ei1333の日記

ぺこい

a×b mod 1e9+7

ゆるせね〜〜〜〜〜〜〜

こどふぉの環境で、 {a \times b} {10^9 + 7} で割った余りを求めたいとき, なにがいいか

 {10^9!} {\pmod {10^9 + 7}} で求めるとき, 実行時間の比較

ついき:あとこだでも測りました

GNU G++17 7.3.0

ベンチマーク

#include <bits/stdc++.h>

using namespace std;

struct Timer {
  chrono::high_resolution_clock::time_point st;

  Timer() { reset(); }

  void reset() {
    st = chrono::high_resolution_clock::now();
  }

  chrono::milliseconds::rep elapsed() {
    auto ed = chrono::high_resolution_clock::now();
    return chrono::duration_cast< chrono::milliseconds >(ed - st).count();
  }
};

int main() {
  Timer timer;
  int N;
  cin >> N;
  factorial(N);
}

普通にやる

constexpr int mod = 1e9 + 7;

inline int mul(int a, int b) {
  return 1LL * a * b % mod;
}

void factorial(int n) {
  Timer timer;
  int ret = 1;
  timer.reset();
  for(int i = 1; i <= n; i++) ret = mul(ret, i);
  auto now = timer.elapsed();
  cout << n << "! = " << ret << " (" << now << " ms)" << endl;
}
  • こどふぉ  {11482 } ms
  • あとこだ  {4926} ms

unsigned でやる

constexpr unsigned mod = 1e9 + 7;

inline unsigned mul(unsigned a, unsigned b) {
  return 1uLL * a * b % mod;
}

void factorial(int n) {
  Timer timer;
  unsigned ret = 1;
  timer.reset();
  for(unsigned i = 1; i <= n; i++) ret = mul(ret, i);
  auto now = timer.elapsed();
  cout << n << "! = " << ret << " (" << now << " ms)" << endl;
}
  • こどふぉ  {9598} ms
  • あとこだ  {4214} ms

asm 命令(divl)

constexpr unsigned mod = 1e9 + 7;

inline unsigned mul(unsigned a, unsigned b) {
  unsigned long long x = 1uLL * a * b;
  unsigned xh = (unsigned) (x >> 32), xl = (unsigned) x, d, m;
  asm("divl %4; \n\t" : "=a" (d), "=d" (m) : "d" (xh), "a" (xl), "r" (mod));
  return m;
}

void factorial(int n) {
  Timer timer;
  unsigned ret = 1;
  timer.reset();
  for(unsigned i = 1; i <= n; i++) ret = mul(ret, i);
  auto now = timer.elapsed();
  cout << n << "! = " << ret << " (" << now << " ms)" << endl;
}
  • こどふぉ  {8688} ms
  • あとこだ  {9682} ms

こどふぉでは有効

モンゴメリ乗算

constexpr unsigned mod = 1e9 + 7;

static constexpr uint32_t mul_inv(uint32_t n, int e = 5, uint32_t x = 1) {
  return e == 0 ? x : mul_inv(n, e - 1, x * (2 - x * n));
}

template< uint32_t mod >
struct ModInt {
  using u32 = uint32_t;
  using u64 = uint64_t;

  static constexpr u32 inv = mul_inv(mod);
  static constexpr u32 r2 = -u64(mod) % mod;

  u32 x;

  ModInt() : x(0) {}

  ModInt(const u32 &x) : x(reduce(u64(x) * r2)) {}

  u32 reduce(const u64 &w) const {
    return u32(w >> 32) + mod - u32((u64(u32(w) * inv) * mod) >> 32);
  }

  ModInt &operator+=(const ModInt &p) {
    if(int(x += p.x - 2 * mod) < 0) x += 2 * mod;
    return *this;
  }

  ModInt &operator-=(const ModInt &p) {
    if(int(x -= p.x) < 0) x += 2 * mod;
    return *this;
  }

  ModInt &operator*=(const ModInt &p) {
    x = reduce(uint64_t(x) * p.x);
    return *this;
  }

  ModInt &operator/=(const ModInt &p) {
    *this *= p.inverse();
    return *this;
  }

  ModInt operator+(const ModInt &p) const { return ModInt(*this) += p; }

  ModInt operator-(const ModInt &p) const { return ModInt(*this) -= p; }

  ModInt operator*(const ModInt &p) const { return ModInt(*this) *= p; }

  ModInt operator/(const ModInt &p) const { return ModInt(*this) /= p; }

  bool operator==(const ModInt &p) const { return get() == p.get(); }

  bool operator!=(const ModInt &p) const { return get() != p.get(); }

  int get() const { return reduce(x) % mod; }

  ModInt pow(int64_t n) const {
    ModInt ret(1), mul(*this);
    while(n > 0) {
      if(n & 1) ret *= mul;
      mul *= mul;
      n >>= 1;
    }
    return ret;
  }

  ModInt inverse() const {
    return pow(mod - 2);
  }

  friend ostream &operator<<(ostream &os, const ModInt &p) {
    return os << p.get();
  }

  friend istream &operator>>(istream &is, ModInt &a) {
    u32 t;
    is >> t;
    a = ModInt< mod >(t);
    return (is);
  }

  static int get_mod() { return mod; }
};

using modint = ModInt< mod >;

void factorial(int n) {
  Timer timer;
  modint ret = 1;
  timer.reset();
  for(unsigned i = 1; i <= n; i++) ret *= i;
  auto now = timer.elapsed();
  cout << n << "! = " << ret.get() << " (" << now << " ms)" << endl;
}
  • こどふぉ  {3454} ms
  • あとこだ  {3701} ms

モンゴメリ乗算すごい!

じょえ

もっとはやいやりかたがあったらおしえて

ところで、これホント?→わかりません