This documentation is automatically generated by competitive-verifier/competitive-verifier
#define PROBLEM "https://judge.yosupo.jp/problem/convolution_mod"
#include "include/mtl/convolution.hpp"
#include <bits/stdc++.h>
using namespace std;
using ll = long long;
int main() {
cin.tie(nullptr); ios::sync_with_stdio(false);
int N,M; cin>>N>>M;
vector<Modular998244353> A(N), B(M);
for (auto& a : A) cin>>a;
for (auto& b : B) cin>>b;
auto C = convolution(A, B);
C.resize(N+M-1);
for (int i = 0; i < N+M-1; i++)
cout << C[i] << " ";
cout << endl;
return 0;
}
#line 1 "test/yosupo/convolution.test.cpp"
#define PROBLEM "https://judge.yosupo.jp/problem/convolution_mod"
#line 2 "include/mtl/fft.hpp"
#include <vector>
#include <complex>
class Fft {
public:
using complex_t = std::complex<double>;
using value_type = complex_t;
private:
size_t n_;
public:
explicit Fft(size_t size) {
n_ = 1;
while (n_ < size)
n_ *= 2;
};
size_t n() const { return n_; }
void fft_inline(std::vector<complex_t>& f) const {
_fft(f);
}
std::vector<complex_t> fft(const std::vector<complex_t>& f) const {
auto ff = f;
ff.resize(n_, 0);
_fft(ff);
return ff;
}
void ifft_inline(std::vector<complex_t>& f) const {
_ifft(f);
}
std::vector<complex_t> ifft(const std::vector<complex_t>& f) const {
auto ff = f;
ff.resize(n_, 0);
_ifft(ff);
return ff;
}
private:
template <bool Forward>
void _fft_impl(std::vector<complex_t>& f) const {
// iterative bit reversal
for (size_t i = 0, j = 1; j < n_-1; j++) {
for (size_t k = n_ >> 1; k > (i ^= k); k >>= 1);
if (i < j) std::swap(f[i], f[j]);
}
// Cooley-Tukey FFT
for (size_t m = 1; m < n_; m *= 2) {
double theta0 = Forward ? M_PI/m : -M_PI/m;
for (size_t chunk = 0; chunk < n_; chunk += 2*m) {
for (size_t i = 0; i < m; i++) {
complex_t w = {cos(theta0*i), -sin(theta0*i)};
auto p = chunk + i;
auto a = f[p + 0];
auto b = f[p + m] * w;
f[p + 0] = a + b;
f[p + m] = a - b;
}
}
}
}
void _fft(std::vector<complex_t>& f) const {
_fft_impl<true>(f);
}
void _ifft(std::vector<complex_t>& f) const {
_fft_impl<false>(f);
for (auto& x : f) {
x /= n_;
}
}
};
#line 2 "include/mtl/bit_manip.hpp"
#include <cstdint>
#include <cassert>
#if __cplusplus >= 202002L
#ifndef MTL_CPP20
#define MTL_CPP20
#endif
#include <bit>
#endif
namespace bm {
/// Count 1s for each 8 bits
inline constexpr uint64_t popcnt_e8(uint64_t x) {
x = (x & 0x5555555555555555) + ((x>>1) & 0x5555555555555555);
x = (x & 0x3333333333333333) + ((x>>2) & 0x3333333333333333);
x = (x & 0x0F0F0F0F0F0F0F0F) + ((x>>4) & 0x0F0F0F0F0F0F0F0F);
return x;
}
/// Count 1s
inline constexpr unsigned popcnt(uint64_t x) {
#ifdef MTL_CPP20
return std::popcount(x);
#else
return (popcnt_e8(x) * 0x0101010101010101) >> 56;
#endif
}
/// Alias to mtl::popcnt(x)
constexpr unsigned popcount(uint64_t x) {
return popcnt(x);
}
/// Count trailing 0s. s.t. *11011000 -> 3
inline constexpr unsigned ctz(uint64_t x) {
#ifdef MTL_CPP20
return std::countr_zero(x);
#else
return popcnt((x & (-x)) - 1);
#endif
}
/// Alias to mtl::ctz(x)
constexpr unsigned countr_zero(uint64_t x) {
return ctz(x);
}
/// Count trailing 1s. s.t. *11011011 -> 2
inline constexpr unsigned cto(uint64_t x) {
#ifdef MTL_CPP20
return std::countr_one(x);
#else
return ctz(~x);
#endif
}
/// Alias to mtl::cto(x)
constexpr unsigned countr_one(uint64_t x) {
return cto(x);
}
inline constexpr unsigned ctz8(uint8_t x) {
return x == 0 ? 8 : popcnt_e8((x & (-x)) - 1);
}
/// [00..0](8bit) -> 0, [**..*](not only 0) -> 1
inline constexpr uint8_t summary(uint64_t x) {
constexpr uint64_t hmask = 0x8080808080808080ull;
constexpr uint64_t lmask = 0x7F7F7F7F7F7F7F7Full;
auto a = x & hmask;
auto b = x & lmask;
b = hmask - b;
b = ~b;
auto c = (a | b) & hmask;
c *= 0x0002040810204081ull;
return uint8_t(c >> 56);
}
/// Extract target area of bits
inline constexpr uint64_t bextr(uint64_t x, unsigned start, unsigned len) {
uint64_t mask = len < 64 ? (1ull<<len)-1 : 0xFFFFFFFFFFFFFFFFull;
return (x >> start) & mask;
}
/// 00101101 -> 00111111 -count_1s-> 6
inline constexpr unsigned log2p1(uint8_t x) {
if (x & 0x80)
return 8;
uint64_t p = uint64_t(x) * 0x0101010101010101ull;
p -= 0x8040201008040201ull;
p = ~p & 0x8080808080808080ull;
p = (p >> 7) * 0x0101010101010101ull;
p >>= 56;
return p;
}
/// 00101100 -mask_mssb-> 00100000 -to_index-> 5
inline constexpr unsigned mssb8(uint8_t x) {
assert(x != 0);
return log2p1(x) - 1;
}
/// 00101100 -mask_lssb-> 00000100 -to_index-> 2
inline constexpr unsigned lssb8(uint8_t x) {
assert(x != 0);
return popcnt_e8((x & -x) - 1);
}
/// Count leading 0s. 00001011... -> 4
inline constexpr unsigned clz(uint64_t x) {
#ifdef MTL_CPP20
return std::countl_zero(x);
#else
if (x == 0)
return 64;
auto i = mssb8(summary(x));
auto j = mssb8(bextr(x, 8 * i, 8));
return 63 - (8 * i + j);
#endif
}
/// Alias to mtl::clz(x)
constexpr unsigned countl_zero(uint64_t x) {
return clz(x);
}
/// Count leading 1s. 11110100... -> 4
inline constexpr unsigned clo(uint64_t x) {
#ifdef MTL_CPP20
return std::countl_one(x);
#else
return clz(~x);
#endif
}
/// Alias to mtl::clo(x)
constexpr unsigned countl_one(uint64_t x) {
return clo(x);
}
inline constexpr unsigned clz8(uint8_t x) {
return x == 0 ? 8 : 7 - mssb8(x);
}
inline constexpr uint64_t bit_reverse(uint64_t x) {
x = ((x & 0x00000000FFFFFFFF) << 32) | ((x & 0xFFFFFFFF00000000) >> 32);
x = ((x & 0x0000FFFF0000FFFF) << 16) | ((x & 0xFFFF0000FFFF0000) >> 16);
x = ((x & 0x00FF00FF00FF00FF) << 8) | ((x & 0xFF00FF00FF00FF00) >> 8);
x = ((x & 0x0F0F0F0F0F0F0F0F) << 4) | ((x & 0xF0F0F0F0F0F0F0F0) >> 4);
x = ((x & 0x3333333333333333) << 2) | ((x & 0xCCCCCCCCCCCCCCCC) >> 2);
x = ((x & 0x5555555555555555) << 1) | ((x & 0xAAAAAAAAAAAAAAAA) >> 1);
return x;
}
/// Check if x is power of 2. 00100000 -> true, 00100001 -> false
constexpr bool has_single_bit(uint64_t x) noexcept {
#ifdef MTL_CPP20
return std::has_single_bit(x);
#else
return x != 0 && (x & (x - 1)) == 0;
#endif
}
/// Bit width needs to represent x. 00110110 -> 6
constexpr int bit_width(uint64_t x) noexcept {
#ifdef MTL_CPP20
return std::bit_width(x);
#else
return 64 - clz(x);
#endif
}
/// Ceil power of 2. 00110110 -> 01000000
constexpr uint64_t bit_ceil(uint64_t x) {
#ifdef MTL_CPP20
return std::bit_ceil(x);
#else
if (x == 0) return 1;
return 1ull << bit_width(x - 1);
#endif
}
/// Floor power of 2. 00110110 -> 00100000
constexpr uint64_t bit_floor(uint64_t x) {
#ifdef MTL_CPP20
return std::bit_floor(x);
#else
if (x == 0) return 0;
return 1ull << (bit_width(x) - 1);
#endif
}
} // namespace bm
#line 3 "include/mtl/modular.hpp"
#include <iostream>
#line 5 "include/mtl/modular.hpp"
template <int MOD>
class Modular {
private:
unsigned int val_;
public:
static constexpr unsigned int mod() { return MOD; }
template<class T>
static constexpr unsigned int safe_mod(T v) {
auto x = (long long)(v%(long long)mod());
if (x < 0) x += mod();
return (unsigned int) x;
}
constexpr Modular() : val_(0) {}
template<class T,
std::enable_if_t<
std::is_integral<T>::value && std::is_unsigned<T>::value
> * = nullptr>
constexpr Modular(T v) : val_(v%mod()) {}
template<class T,
std::enable_if_t<
std::is_integral<T>::value && !std::is_unsigned<T>::value
> * = nullptr>
constexpr Modular(T v) : val_(safe_mod(v)) {}
constexpr unsigned int val() const { return val_; }
constexpr Modular& operator+=(Modular x) {
val_ += x.val();
if (val_ >= mod()) val_ -= mod();
return *this;
}
constexpr Modular operator-() const { return {mod() - val_}; }
constexpr Modular& operator-=(Modular x) {
val_ += mod() - x.val();
if (val_ >= mod()) val_ -= mod();
return *this;
}
constexpr Modular& operator*=(Modular x) {
auto v = (long long) val_ * x.val();
if (v >= mod()) v %= mod();
val_ = v;
return *this;
}
constexpr Modular pow(long long p) const {
assert(p >= 0);
Modular t = 1;
Modular u = *this;
while (p) {
if (p & 1)
t *= u;
u *= u;
p >>= 1;
}
return t;
}
friend constexpr Modular pow(Modular x, long long p) {
return x.pow(p);
}
constexpr Modular inv() const { return pow(mod()-2); }
constexpr Modular& operator/=(Modular x) { return *this *= x.inv(); }
constexpr Modular operator+(Modular x) const { return Modular(*this) += x; }
constexpr Modular operator-(Modular x) const { return Modular(*this) -= x; }
constexpr Modular operator*(Modular x) const { return Modular(*this) *= x; }
constexpr Modular operator/(Modular x) const { return Modular(*this) /= x; }
constexpr Modular& operator++() { return *this += 1; }
constexpr Modular operator++(int) { Modular c = *this; ++(*this); return c; }
constexpr Modular& operator--() { return *this -= 1; }
constexpr Modular operator--(int) { Modular c = *this; --(*this); return c; }
constexpr bool operator==(Modular x) const { return val() == x.val(); }
constexpr bool operator!=(Modular x) const { return val() != x.val(); }
constexpr bool is_square() const {
return pow((mod()-1)/2) == 1;
}
/**
* Return x s.t. x * x = a mod p
* reference: https://zenn.dev/peria/articles/c6afc72b6b003c
*/
constexpr Modular sqrt() const {
if (!is_square())
throw std::runtime_error("not square");
auto mod_eight = mod() % 8;
if (mod_eight == 3 || mod_eight == 7) {
return pow((mod()+1)/4);
} else if (mod_eight == 5) {
auto x = pow((mod()+3)/8);
if (x * x != *this)
x *= Modular(2).pow((mod()-1)/4);
return x;
} else {
Modular d = 2;
while (d.is_square())
d += 1;
auto t = mod()-1;
int s = bm::ctz(t);
t >>= s;
auto a = pow(t);
auto D = d.pow(t);
int m = 0;
Modular dt = 1;
Modular du = D;
for (int i = 0; i < s; i++) {
if ((a*dt).pow(1u<<(s-1-i)) == -1) {
m |= 1u << i;
dt *= du;
}
du *= du;
}
return pow((t+1)/2) * D.pow(m/2);
}
}
friend std::ostream& operator<<(std::ostream& os, const Modular& x) {
return os << x.val();
}
friend std::istream& operator>>(std::istream& is, Modular& x) {
return is >> x.val_;
}
};
using Modular998244353 = Modular<998244353>;
using Modular1000000007 = Modular<(int)1e9+7>;
template<int Id=0>
class DynamicModular {
private:
static unsigned int mod_;
unsigned int val_;
public:
static unsigned int mod() { return mod_; }
static void set_mod(unsigned int m) { mod_ = m; }
template<class T>
static constexpr unsigned int safe_mod(T v) {
auto x = (long long)(v%(long long)mod());
if (x < 0) x += mod();
return (unsigned int) x;
}
constexpr DynamicModular() : val_(0) {}
template<class T,
std::enable_if_t<
std::is_integral<T>::value && std::is_unsigned<T>::value
> * = nullptr>
constexpr DynamicModular(T v) : val_(v%mod()) {}
template<class T,
std::enable_if_t<
std::is_integral<T>::value && !std::is_unsigned<T>::value
> * = nullptr>
constexpr DynamicModular(T v) : val_(safe_mod(v)) {}
constexpr unsigned int val() const { return val_; }
constexpr DynamicModular& operator+=(DynamicModular x) {
val_ += x.val();
if (val_ >= mod()) val_ -= mod();
return *this;
}
constexpr DynamicModular operator-() const { return {mod() - val_}; }
constexpr DynamicModular& operator-=(DynamicModular x) {
val_ += mod() - x.val();
if (val_ >= mod()) val_ -= mod();
return *this;
}
constexpr DynamicModular& operator*=(DynamicModular x) {
auto v = (long long) val_ * x.val();
if (v >= mod()) v %= mod();
val_ = v;
return *this;
}
constexpr DynamicModular pow(long long p) const {
assert(p >= 0);
DynamicModular t = 1;
DynamicModular u = *this;
while (p) {
if (p & 1)
t *= u;
u *= u;
p >>= 1;
}
return t;
}
friend constexpr DynamicModular pow(DynamicModular x, long long p) {
return x.pow(p);
}
// TODO: implement when mod is not prime
constexpr DynamicModular inv() const { return pow(mod()-2); }
constexpr DynamicModular& operator/=(DynamicModular x) { return *this *= x.inv(); }
constexpr DynamicModular operator+(DynamicModular x) const { return DynamicModular(*this) += x; }
constexpr DynamicModular operator-(DynamicModular x) const { return DynamicModular(*this) -= x; }
constexpr DynamicModular operator*(DynamicModular x) const { return DynamicModular(*this) *= x; }
constexpr DynamicModular operator/(DynamicModular x) const { return DynamicModular(*this) /= x; }
constexpr DynamicModular& operator++() { return *this += 1; }
constexpr DynamicModular operator++(int) { DynamicModular c = *this; ++(*this); return c; }
constexpr DynamicModular& operator--() { return *this -= 1; }
constexpr DynamicModular operator--(int) { DynamicModular c = *this; --(*this); return c; }
constexpr bool operator==(DynamicModular x) const { return val() == x.val(); }
constexpr bool operator!=(DynamicModular x) const { return val() != x.val(); }
constexpr bool is_square() const {
return val() == 0 or pow((mod()-1)/2) == 1;
}
/**
* Return x s.t. x * x = a mod p
* reference: https://zenn.dev/peria/articles/c6afc72b6b003c
*/
constexpr DynamicModular sqrt() const {
// assert mod is prime
if (!is_square())
throw std::runtime_error("not square");
if (val() < 2)
return val();
auto mod_eight = mod() % 8;
if (mod_eight == 3 || mod_eight == 7) {
return pow((mod()+1)/4);
} else if (mod_eight == 5) {
auto x = pow((mod()+3)/8);
if (x * x != *this)
x *= DynamicModular(2).pow((mod()-1)/4);
return x;
} else {
DynamicModular d = 2;
while (d.is_square())
++d;
auto t = mod()-1;
int s = bm::ctz(t);
t >>= s;
auto a = pow(t);
auto D = d.pow(t);
int m = 0;
DynamicModular dt = 1;
DynamicModular du = D;
for (int i = 0; i < s; i++) {
if ((a*dt).pow(1u<<(s-1-i)) == -1) {
m |= 1u << i;
dt *= du;
}
du *= du;
}
return pow((t+1)/2) * D.pow(m/2);
}
}
friend std::ostream& operator<<(std::ostream& os, const DynamicModular& x) {
return os << x.val();
}
friend std::istream& operator>>(std::istream& is, DynamicModular& x) {
return is >> x.val_;
}
};
template<int Id>
unsigned int DynamicModular<Id>::mod_;
#line 264 "include/mtl/modular.hpp"
template<class ModInt>
struct ModularUtil {
static constexpr int mod = ModInt::mod();
static struct inv_table {
std::vector<ModInt> tb{0,1};
inv_table() : tb({0,1}) {}
} inv_;
void set_inv(int n) {
int m = inv_.tb.size();
if (m > n) return;
inv_.tb.resize(n+1);
for (int i = m; i < n+1; i++)
inv_.tb[i] = -inv_.tb[mod % i] * (mod / i);
}
ModInt& inv(int i) {
set_inv(i);
return inv_.tb[i];
}
};
template<class ModInt>
typename ModularUtil<ModInt>::inv_table ModularUtil<ModInt>::inv_;
#include <array>
namespace math {
constexpr int mod_pow_constexpr(int x, int p, int m) {
long long t = 1;
long long u = x;
while (p) {
if (p & 1) {
t *= u;
t %= m;
}
u *= u;
u %= m;
p >>= 1;
}
return (int) t;
}
constexpr int primitive_root_constexpr(int m) {
if (m == 2) return 1;
if (m == 167772161) return 3;
if (m == 469762049) return 3;
if (m == 754974721) return 11;
if (m == 880803841) return 26;
if (m == 998244353) return 3;
std::array<int, 20> divs{};
int cnt = 0;
int x = m-1;
if (x % 2 == 0) {
divs[cnt++] = 2;
x >>= bm::ctz(x);
}
for (int d = 3; d*d <= x; d += 2) {
if (x % d == 0) {
divs[cnt++] = d;
while (x % d == 0)
x /= d;
}
}
if (x > 1) divs[cnt++] = x;
for (int g = 2; g < m; g++) {
bool ok = true;
for (int i = 0; i < cnt; i++) {
if (mod_pow_constexpr(g, (m-1) / divs[i], m) == 1) {
ok = false;
break;
}
}
if (ok) return g;
}
return -1;
}
template<int m>
constexpr int primitive_root = primitive_root_constexpr(m);
}
#line 6 "include/mtl/ntt.hpp"
namespace math {
constexpr int ceil_pow2(unsigned long long x) {
return x == 0 ? 0 : 64-bm::clz(x-1);
}
}
#ifndef MTL_ARRAY_SET_CONSTEXPR
#if __cplusplus >= 201703L
#define MTL_ARRAY_SET_CONSTEXPR constexpr
#else
#define MTL_ARRAY_SET_CONSTEXPR
#endif
#endif
namespace _ntt {
template<int mod>
struct ntt_info {
using mint = Modular<mod>;
static constexpr int primitive_root() {
return math::primitive_root<mod>;
}
static constexpr int log_n() {
return bm::ctz(mod-1);
}
static MTL_ARRAY_SET_CONSTEXPR
std::array<Modular<mod>, log_n()> coeff(bool forward) {
mint r = mint(primitive_root()).pow((mod-1) >> log_n());
std::array<mint, log_n()> coeff{};
mint iw = forward ? r.inv() : r;
for (int i = log_n()-1; i >= 0; i--) {
coeff[i] = iw;
iw *= iw;
}
return coeff;
}
};
template<class T>
void _iterative_bit_reverse(std::vector<T>& f) {
int log_n = math::ceil_pow2(f.size());
int n = 1 << log_n;
for (int i = 0, j = 1; j < n-1; j++) {
for (int k = n >> 1; k > (i ^= k); k >>= 1);
if (i < j) std::swap(f[i], f[j]);
}
}
template <bool Forward, bool BitReverse, int mod>
void _fft_impl(std::vector<Modular<mod>>& f) {
using mint = Modular<mod>;
int log_n = math::ceil_pow2(f.size());
using info = ntt_info<mod>;
assert(info::log_n() >= log_n);
int n = 1 << log_n;
f.resize(n, 0);
if constexpr (!Forward and BitReverse)
_iterative_bit_reverse(f);
// Cooley-Tukey FFT
static MTL_ARRAY_SET_CONSTEXPR auto coeff = info::coeff(Forward);
if constexpr (Forward) {
for (int log_m = log_n-1; log_m >= 0; log_m -= 2) {
int m = 1<<log_m;
mint w0 = coeff[log_m];
if (log_m == 0) {
for (int chunk = 0; chunk < n; chunk += 2*m) {
mint w = 1;
for (int i = 0; i < m; i++) {
auto p = chunk + i;
auto a = f[p + 0];
auto b = f[p + m];
f[p + 0] = (a + b);
f[p + m] = (a - b) * w;
w *= w0;
}
}
} else { // 4-base
auto w1 = coeff[log_m-1];
auto wh = w0.pow(m/2).val();
for (int chunk = 0; chunk < n; chunk += 2*m) {
mint w = 1, x = 1;
for (int i = 0; i < m/2; i++) {
auto p = chunk + i;
auto ia = p + 0*m/2;
auto ib = p + 1*m/2;
auto ic = p + 2*m/2;
auto id = p + 3*m/2;
long long a = f[ia].val(), b = f[ib].val(), c = f[ic].val(), d = f[id].val();
auto s = a + c, t = b + d, u = (a - c + mod) * w.val() % mod, v = (b - d + mod) * w.val() % mod * wh % mod;
f[ia] = (s + t);
f[ib] = (s - t) * x.val();
f[ic] = (u + v);
f[id] = (u - v) * x.val();
w *= w0;
x *= w1;
}
}
}
}
} else {
int _log_m = 0;
if (log_n % 2 == 1) {
for (int chunk = 0; chunk < n; chunk += 2) {
auto p = chunk;
auto a = f[p + 0];
auto b = f[p + 1];
f[p + 0] = a + b;
f[p + 1] = a - b;
}
_log_m = 1;
}
// 4-base
for (int log_m = _log_m; log_m < log_n; log_m += 2) {
int m = 1<<(log_m+1);
mint w0 = coeff[log_m];
auto w1 = coeff[log_m+1];
auto wh = w1.pow(m/2).val();
for (int chunk = 0; chunk < n; chunk += 2*m) {
mint w = 1, x = 1;
for (int i = 0; i < m/2; i++) {
auto p = chunk + i;
auto ia = p + 0*m/2;
auto ib = p + 1*m/2;
auto ic = p + 2*m/2;
auto id = p + 3*m/2;
long long a = f[ia].val(), b = (long long)f[ib].val() * w.val() % mod,
c = f[ic].val(), d = (long long)f[id].val() * w.val() % mod;
auto s = a + b, t = a - b + mod, u = (c + d) * x.val() % mod, v = (c - d + mod) * x.val() % mod * wh % mod;
f[ia] = s + u;
f[ib] = t + v;
f[ic] = s - u;
f[id] = t - v;
w *= w0;
x *= w1;
}
}
}
}
if constexpr (Forward and BitReverse)
_iterative_bit_reverse(f);
if constexpr (!Forward) {
mint inv = mint(n).inv();
for (int i = 0; i < n; i++) f[i] *= inv;
}
}
template <int mod>
void _fft(std::vector<Modular<mod>>& f) {
_fft_impl<true, true>(f);
}
template <int mod>
void _ifft(std::vector<Modular<mod>>& f) {
_fft_impl<false, true>(f);
}
template <int mod>
void _convolution_fft(std::vector<Modular<mod>>& f) {
_fft_impl<true, false>(f);
}
template <int mod>
void _convolution_ifft(std::vector<Modular<mod>>& f) {
_fft_impl<false, false>(f);
}
} // namespace _ntt
template<class mint>
void ntt_inline(std::vector<mint>& f) {
_ntt::_fft(f);
}
template<class mint>
std::vector<mint> ntt(std::vector<mint> f) {
_ntt::_fft(f);
return f;
}
template<class mint>
void intt_inline(std::vector<mint>& f) {
_ntt::_ifft(f);
}
template<class mint>
std::vector<mint> intt(std::vector<mint> f) {
_ntt::_ifft(f);
return f;
}
template<class mint>
void ntt_convolution_inline(std::vector<mint>& f) {
_ntt::_convolution_fft(f);
}
template<class mint>
std::vector<mint> ntt_convolution(std::vector<mint> f) {
_ntt::_convolution_fft(f);
return f;
}
template<class mint>
void intt_convolution_inline(std::vector<mint>& f) {
_ntt::_convolution_ifft(f);
}
template<class mint>
std::vector<mint> intt_convolution(std::vector<mint> f) {
_ntt::_convolution_ifft(f);
return f;
}
#line 4 "include/mtl/convolution.hpp"
const int CONVOLUTION_NAIVE_THRESHOLD = 60;
template<class T>
std::vector<T> convolution_naive(const std::vector<T>& f, const std::vector<T>& g) {
auto n = f.size(), m = g.size();
if (n == 0 or m == 0)
return {};
std::vector<T> h(n+m-1);
if (n < m) {
for (size_t j = 0; j < m; j++)
for (size_t i = 0; i < n; i++)
h[i+j] += f[i] * g[j];
} else {
for (size_t i = 0; i < n; i++)
for (size_t j = 0; j < m; j++)
h[i+j] += f[i] * g[j];
}
return h;
}
template<class T>
std::vector<T> convolution_fft(std::vector<T> f, std::vector<T> g) {
auto n = f.size(), m = g.size();
size_t len = n + m - 1;
size_t l = 1 << math::ceil_pow2(len);
if (n+m-3 < l/2) {
auto fb = f.back(), gb = g.back();
f.pop_back(); g.pop_back();
std::vector<T> h(n+m-1, 0);
h[n+m-2] = fb * gb;
for (size_t i = 0; i < n; i++)
h[i + m-1] += f[i] * gb;
for (size_t i = 0; i < m; i++)
h[i + n-1] += g[i] * fb;
auto fg = convolution(std::move(f), std::move(g));
for (size_t i = 0; i < n+m-3; i++)
h[i] += fg[i];
return h;
}
Fft fft(len);
f.resize(fft.n(), 0);
g.resize(fft.n(), 0);
auto same = f == g;
fft.fft_inline(f);
if (same) {
for (size_t i = 0; i < fft.n(); i++)
f[i] *= f[i];
} else {
fft.fft_inline(g);
for (size_t i = 0; i < fft.n(); i++)
f[i] *= g[i];
}
fft.ifft_inline(f);
std::vector<T> res(len);
for (size_t i = 0; i < len; i++)
res[i] = f[i].real();
return res;
}
template<int Mod>
std::vector<Modular<Mod>> convolution_ntt(std::vector<Modular<Mod>> f, std::vector<Modular<Mod>> g) {
auto n = f.size(), m = g.size();
size_t l = 1 << math::ceil_pow2(n + m - 1);
if (n+m-3 < l/2) {
auto fb = f.back(), gb = g.back();
f.pop_back(); g.pop_back();
std::vector<Modular<Mod>> h(n+m-1, 0);
h[n+m-2] = fb * gb;
for (size_t i = 0; i < n-1; i++)
h[i + m-1] += f[i] * gb;
for (size_t i = 0; i < m-1; i++)
h[i + n-1] += g[i] * fb;
auto fg = convolution(std::move(f), std::move(g));
for (size_t i = 0; i < fg.size(); i++)
h[i] += fg[i];
return h;
}
f.resize(l, 0);
g.resize(l, 0);
bool same = f == g;
ntt_convolution_inline(f);
if (same) {
for (size_t i = 0; i < l; i++)
f[i] *= f[i];
} else {
ntt_convolution_inline(g);
for (size_t i = 0; i < l; i++)
f[i] *= g[i];
}
intt_convolution_inline(f);
return f;
}
template<int Mod>
std::vector<Modular<Mod>> convolution_fft(std::vector<Modular<Mod>> f, std::vector<Modular<Mod>> g) {
return convolution_ntt(std::move(f), std::move(g));
}
std::vector<int> convolution_fft(const std::vector<int>& a, const std::vector<int>& b) {
auto n = a.size(), m = b.size();
std::vector<Modular998244353> f(n), g(m);
for (size_t i = 0; i < n; i++)
f[i] = a[i];
for (size_t i = 0; i < m; i++)
g[i] = b[i];
auto h = convolution_fft(std::move(f), std::move(g));
std::vector<int> res(n+m-1);
for (size_t i = 0; i < n+m-1; i++)
res[i] = h[i].val();
return res;
}
template<class T>
std::vector<T> convolution(const std::vector<T>& f, const std::vector<T>& g) {
auto n = f.size(), m = g.size();
if (n == 0 or m == 0)
return {};
if (std::min(n, m) <= CONVOLUTION_NAIVE_THRESHOLD)
return convolution_naive(f, g);
return convolution_fft(f, g);
}
template<class T>
std::vector<T> convolution(std::vector<T>&& f, std::vector<T>&& g) {
auto n = f.size(), m = g.size();
if (n == 0 or m == 0)
return {};
if (std::min(n, m) <= CONVOLUTION_NAIVE_THRESHOLD)
return convolution_naive(f, g);
return convolution_fft(std::move(f), std::move(g));
}
template<class mint>
std::vector<mint> convolution_garner(const std::vector<mint>& f,
const std::vector<mint>& g) {
auto n = f.size();
auto m = g.size();
if (n == 0 or m == 0)
return {};
constexpr long long nttprimes[] = {754974721, 167772161, 469762049};
using mint0 = Modular<754974721>;
using mint1 = Modular<167772161>;
using mint2 = Modular<469762049>;
std::vector<mint0> a0(n), b0(m);
std::vector<mint1> a1(n), b1(m);
std::vector<mint2> a2(n), b2(m);
for(size_t i = 0; i < n; i++) a0[i] = f[i].val(), a1[i] = f[i].val(), a2[i] = f[i].val();
for(size_t i = 0; i < m; i++) b0[i] = g[i].val(), b1[i] = g[i].val(), b2[i] = g[i].val();
auto c0 = convolution_ntt(a0, b0);
auto c1 = convolution_ntt(a1, b1);
auto c2 = convolution_ntt(a2, b2);
constexpr long long m01 = (long long) nttprimes[0] * nttprimes[1];
constexpr long long m0_inv_m1 = mint1(nttprimes[0]).inv().val();
constexpr long long m01_inv_m2 = mint2(m01).inv().val();
const int mod = mint::mod();
auto garner = [&](mint0 x0, mint1 x1, mint2 x2) -> mint {
int r0 = x0.val(), r1 = x1.val(), r2 = x2.val();
int v1 = (m0_inv_m1 * (r1 + nttprimes[1] - r0)) % nttprimes[1];
auto v2 = (mint2(r2) - r0 - mint2(nttprimes[0]) * v1) * mint2(m01_inv_m2);
return mint(r0 + 1LL * nttprimes[0] * v1 + m01 % mod * v2.val());
};
std::vector<mint> c(c0.size());
for(size_t i = 0; i < c.size(); i++) c[i] = garner(c0[i], c1[i], c2[i]);
return c;
}
template<int Mod>
std::vector<Modular<Mod>> convolution_large(const std::vector<Modular<Mod>>& f,
const std::vector<Modular<Mod>>& g) {
auto n = f.size();
auto m = g.size();
if (n == 0 or m == 0)
return {};
constexpr auto h = bm::ctz(Mod-1);
if (n+m-1 < 1u<<h)
return convolution(f ,g);
size_t l = h-1;
int cs = ((n-1)>>l)+1;
int ds = ((m-1)>>l)+1;
using result_type = std::vector<Modular<Mod>>;
std::vector<result_type> c(cs, result_type(2<<l));
std::vector<result_type> d(ds, result_type(2<<l));
size_t mask = (1<<l)-1;
for (size_t i = 0; i < n; i++)
c[i>>l][i&mask] = f[i];
for (size_t i = 0; i < m; i++)
d[i>>l][i&mask] = g[i];
for (auto& v: c) ntt_convolution_inline(v);
for (auto& v: d) ntt_convolution_inline(v);
result_type res(n+m-1);
result_type e(2<<l);
for (int i = 0; i < cs+ds-1; i++) {
e.assign(2<<l, 0);
for (int j = std::max(i-ds+1, 0); j < std::min(cs, i+1); j++) {
int k = i-j;
assert(k >= 0 and k < ds);
for (int x = 0; x < 2<<l; x++)
e[x] += c[j][x] * d[k][x];
}
intt_convolution_inline(e);
auto len = std::min((size_t)2<<l, n+m-1-(i<<l));
for (size_t x = 0; x < len; x++)
res[(i<<l) + x] += e[x];
}
return res;
}
size_t complexity_of_convolution(size_t fsize, size_t gsize) {
int lg = math::ceil_pow2(fsize + gsize - 1);
return lg << lg;
}
#line 3 "test/yosupo/convolution.test.cpp"
#include <bits/stdc++.h>
using namespace std;
using ll = long long;
int main() {
cin.tie(nullptr); ios::sync_with_stdio(false);
int N,M; cin>>N>>M;
vector<Modular998244353> A(N), B(M);
for (auto& a : A) cin>>a;
for (auto& b : B) cin>>b;
auto C = convolution(A, B);
C.resize(N+M-1);
for (int i = 0; i < N+M-1; i++)
cout << C[i] << " ";
cout << endl;
return 0;
}
Env | Name | Status | Elapsed | Memory |
---|---|---|---|---|
g++ | example_00 |
![]() |
6 ms | 3 MB |
g++ | example_01 |
![]() |
5 ms | 3 MB |
g++ | fft_killer_00 |
![]() |
227 ms | 18 MB |
g++ | fft_killer_01 |
![]() |
232 ms | 18 MB |
g++ | max_ans_zero_00 |
![]() |
233 ms | 18 MB |
g++ | max_random_00 |
![]() |
227 ms | 18 MB |
g++ | max_random_01 |
![]() |
216 ms | 18 MB |
g++ | medium_00 |
![]() |
9 ms | 4 MB |
g++ | medium_01 |
![]() |
7 ms | 4 MB |
g++ | medium_02 |
![]() |
8 ms | 4 MB |
g++ | medium_all_zero_00 |
![]() |
7 ms | 4 MB |
g++ | medium_pre_suf_zero_00 |
![]() |
6 ms | 4 MB |
g++ | medium_pre_suf_zero_01 |
![]() |
6 ms | 4 MB |
g++ | medium_pre_suf_zero_02 |
![]() |
6 ms | 4 MB |
g++ | medium_pre_suf_zero_03 |
![]() |
6 ms | 4 MB |
g++ | medium_pre_suf_zero_04 |
![]() |
6 ms | 4 MB |
g++ | random_00 |
![]() |
200 ms | 16 MB |
g++ | random_01 |
![]() |
212 ms | 16 MB |
g++ | random_02 |
![]() |
98 ms | 11 MB |
g++ | signed_overflow_00 |
![]() |
6 ms | 3 MB |
g++ | small_00 |
![]() |
5 ms | 3 MB |
g++ | small_01 |
![]() |
5 ms | 3 MB |
g++ | small_02 |
![]() |
5 ms | 3 MB |
g++ | small_03 |
![]() |
5 ms | 3 MB |
g++ | small_04 |
![]() |
6 ms | 3 MB |
g++ | small_05 |
![]() |
6 ms | 3 MB |
g++ | small_06 |
![]() |
5 ms | 3 MB |
g++ | small_07 |
![]() |
5 ms | 3 MB |
g++ | small_08 |
![]() |
5 ms | 3 MB |
g++ | small_09 |
![]() |
5 ms | 3 MB |
g++ | small_10 |
![]() |
6 ms | 3 MB |
g++ | small_11 |
![]() |
5 ms | 3 MB |
g++ | small_12 |
![]() |
5 ms | 3 MB |
g++ | small_13 |
![]() |
6 ms | 3 MB |
g++ | small_14 |
![]() |
5 ms | 3 MB |
g++ | small_15 |
![]() |
5 ms | 3 MB |
g++ | small_and_large_00 |
![]() |
158 ms | 15 MB |
g++ | small_and_large_01 |
![]() |
160 ms | 15 MB |
g++ | small_and_large_02 |
![]() |
159 ms | 13 MB |
g++ | small_and_large_03 |
![]() |
160 ms | 13 MB |
g++ | unsigned_overflow_00 |
![]() |
6 ms | 3 MB |
clang++ | example_00 |
![]() |
6 ms | 3 MB |
clang++ | example_01 |
![]() |
5 ms | 3 MB |
clang++ | fft_killer_00 |
![]() |
219 ms | 18 MB |
clang++ | fft_killer_01 |
![]() |
216 ms | 18 MB |
clang++ | max_ans_zero_00 |
![]() |
225 ms | 18 MB |
clang++ | max_random_00 |
![]() |
229 ms | 18 MB |
clang++ | max_random_01 |
![]() |
220 ms | 18 MB |
clang++ | medium_00 |
![]() |
9 ms | 4 MB |
clang++ | medium_01 |
![]() |
7 ms | 4 MB |
clang++ | medium_02 |
![]() |
8 ms | 4 MB |
clang++ | medium_all_zero_00 |
![]() |
7 ms | 4 MB |
clang++ | medium_pre_suf_zero_00 |
![]() |
6 ms | 4 MB |
clang++ | medium_pre_suf_zero_01 |
![]() |
6 ms | 4 MB |
clang++ | medium_pre_suf_zero_02 |
![]() |
6 ms | 4 MB |
clang++ | medium_pre_suf_zero_03 |
![]() |
6 ms | 4 MB |
clang++ | medium_pre_suf_zero_04 |
![]() |
6 ms | 4 MB |
clang++ | random_00 |
![]() |
192 ms | 16 MB |
clang++ | random_01 |
![]() |
209 ms | 17 MB |
clang++ | random_02 |
![]() |
100 ms | 11 MB |
clang++ | signed_overflow_00 |
![]() |
6 ms | 3 MB |
clang++ | small_00 |
![]() |
5 ms | 3 MB |
clang++ | small_01 |
![]() |
5 ms | 3 MB |
clang++ | small_02 |
![]() |
5 ms | 3 MB |
clang++ | small_03 |
![]() |
5 ms | 3 MB |
clang++ | small_04 |
![]() |
5 ms | 3 MB |
clang++ | small_05 |
![]() |
5 ms | 3 MB |
clang++ | small_06 |
![]() |
5 ms | 3 MB |
clang++ | small_07 |
![]() |
6 ms | 3 MB |
clang++ | small_08 |
![]() |
5 ms | 3 MB |
clang++ | small_09 |
![]() |
5 ms | 3 MB |
clang++ | small_10 |
![]() |
5 ms | 3 MB |
clang++ | small_11 |
![]() |
5 ms | 3 MB |
clang++ | small_12 |
![]() |
5 ms | 3 MB |
clang++ | small_13 |
![]() |
5 ms | 3 MB |
clang++ | small_14 |
![]() |
5 ms | 3 MB |
clang++ | small_15 |
![]() |
5 ms | 3 MB |
clang++ | small_and_large_00 |
![]() |
169 ms | 16 MB |
clang++ | small_and_large_01 |
![]() |
163 ms | 16 MB |
clang++ | small_and_large_02 |
![]() |
162 ms | 13 MB |
clang++ | small_and_large_03 |
![]() |
165 ms | 13 MB |
clang++ | unsigned_overflow_00 |
![]() |
6 ms | 3 MB |