#include using namespace std; #define rep(i,x,y) for(int i=x;i<=y;++i) typedef long long int ll; typedef long long LL; /* * Following parts are NTT (Number theoritic transform) code belonging to Hengjie zhang, (zhj on codeforces) * His profile link is codeforces.com/profile/zhj. * His submission from which code is taken is http://codeforces.com/contest/438/submission/8172645 * It is recursive implementation of NTT, I think using non - recursive implementation would be even faster. * You can see code of Anton Lunyov for a faster implementation of DFT (NTT). */ const ll modulo = 998244353; int Mod=998244353; const int alp=3; const int MAX=(1<<21)+5; const int mod1=998244353 ; const int mod2=1300234241; ll powmod(ll a,ll b,ll m){ ll res=1%m; while(b){ if(b&1)res=(res*a)%m; a=(a*a)%m; b>>=1; } return res; } ll mulmod(ll a,ll b,ll m){ ll res=0; while(b){ if(b&1)res=(res+a)%m; a=(a+a)%m; b>>=1; } return res; } int MULTIPLICATION_COUNT = 0; int MAXDEG = 0; /* code is taken from internet, I will write my fft code later */ const int mo=998244353; const int maxS = 1<<21; int W[2][maxS+10],pos[22][maxS+10],tmp[maxS+10],c[maxS+10],b[maxS+10];; int n,m; int Pow(int x,int y){ int res=1; while(y){ if(y&1)res=1ll*x*res%mo; x=1ll*x*x%mo; y/=2; } return res; } void prep(){ int g=3,x=1; g=Pow(g,(mo-1)/maxS); rep(i,0,maxS-1){ W[0][i]=x; x=1ll*x*g%mo; } rep(i,0,maxS-1)W[1][i]=W[0][(maxS-i)%maxS]; rep(i,0,21) rep(j,0,(1<>k)&1)x|=1<i)swap(a[i],a[pos[cnt][i]]); for(int i=1;i>= 1; } return res; } LL digit_dp[60][2][MAXP]; // base 10 digital representation of a number n, Used in digit dp function. vector getDigits(LL n) { vector res; if (n == 0) { res.push_back(0); } while (n) { res.push_back(n % 10); n /= 10; } reverse(res.begin(), res.end()); return res; } // C(n, r) denotes binomial coefficient. LL C(LL n, LL r) { LL res = 1; for (int i = 0; i < r; i++) { res = (res * (LL) (n - i)) % mod; res = (res * inv[i + 1]) % mod; } return res; } int dp[2][MAXM][MAXP]; int f[MAXP][MAXM]; LL cnt[MAXP]; ll values[MAX]; int N, M, P; int tempRes[MAX]; // sanitize function takes output of product of two polynomials, it makes all the excess unneeded coefficients of the // resulting polynomial zero, so that the degree of the polynomial is bound be DEG always. int *sanitize(int *result) { memset(tempRes, 0, sizeof(int) * MAXDEG); for (int i = 0; i <= DEG; i++) { int op = i % (2 * M + 1); int t = i / (2 * M + 1); t %= P; if (op <= M) { int val = t * (2 * M + 1) + op; tempRes[val] += result[i]; if (tempRes[val] >= mod) tempRes[val] -= mod; } } for (int i = 0; i < MAXDEG; i++) { result[i] = tempRes[i]; } return result; } vector indices; // recursive product of several bi-variate polynomials. // Can be optimized to reduce all the leaf level products in the binary tree structure of the polynomials. // Note that you can optimization this function further by following tricks. // if (lo + 1 == hi), then instead of use direct O(M^2) with some optimizations. No need to use to represent that as bivariate polynomial. // One smart optimization everywhere can be done by storing i % (2 * M + 1) etc which will save you from a lot of modulo operators. int * doIt(int lo, int hi) { cerr << lo << " " << hi << endl; int *RES = (int*) malloc(sizeof(int)*(MAXDEG)) ; memset(RES, 0, sizeof(int) * MAXDEG); if (lo > hi) { RES[0] = 1; return RES; } if (lo == hi) { int idx = indices[lo]; for (int j = 0; j <= M; j++) { int t = ((idx * j) % P) * (2 * M + 1) + j; RES[t] += f[idx][j]; if(RES[t]>=mod) RES[t]-=mod; } return RES; } else { int mid = (lo + hi) / 2; int *RES1 = doIt(lo, mid); int *RES2 = doIt(mid + 1, hi); RES1 = sanitize(RES1); RES2 = sanitize(RES2); NTT(RES1, DEG + 1, RES2, DEG + 1, RES); return sanitize(RES); } } // digit dp is for calculating count of n digits numbers i such that 10^i % P = 0. void do_digit_dp() { for (int k = 0; k < P; k++) { memset(digit_dp, 0, sizeof(digit_dp)); vector d = getDigits(N - 1); digit_dp[d.size()][0][k] = 1; digit_dp[d.size()][1][k] = 1; for (int id = (int) d.size() - 1; id >= 0; id--) { for (int tight = 0; tight < 2; tight++) { for (int rem = 0; rem <= P; rem++) { for (int cur = 0; cur < 10; cur++) { int new_id = id + 1; int new_tight = tight; int new_rem = rem; if (tight && cur > d[id]) { continue; } if (tight) { if (cur < d[id]) { new_tight = false; } else if (cur == d[id]) { new_tight = true; } } new_rem = (modpow(rem, 10, P) * modpow(10, cur, P)) % P; LL &res = digit_dp[id][tight][rem]; res += digit_dp[new_id][new_tight][new_rem]; res %= mod; } } } } cnt[k] = digit_dp[0][1][1]; } } int ans_problem[MAXM]; const int MAXN = 1005; LL brute_dp[MAXN][MAXP][MAXP]; int a[MAXN]; // dp as described in the editorial for solving subtask 1. void smartBrute() { for (int i = 0; i < N; i++) { a[i] = modpow(10, i, P); } memset(brute_dp, 0, sizeof(brute_dp)); brute_dp[0][0][0] = 1; for (int i = 0; i < N; i++) { for (int k = 0; k <= M; k++) { for (int rem = 0; rem < P; rem++) { for (int cur = 0; cur < 10 && k + cur <= M; cur++) { LL &res = brute_dp[i + 1][k + cur][(rem + a[i] * cur) % P]; res += brute_dp[i][k][rem]; res %= mod; } } } } for (int k = 0; k <= M; k++) { ans_problem[k] = brute_dp[N][k][0]; } } // partitions based solution. vector partitions[MAXM + 1]; // Computing nCk directly. LL compute(LL n, LL k) { // n! / k! assert(n - k >= 0 && n - k <= M); LL res = 1; for (int i = n; i >= k + 1; i--) { res = (res * (LL) i) % mod; } return res; } // Bruteforce method for generating all partitions used in multinomial theorem. // Please see editorial for more details of this method. void genPartitions(int id, int operations, LL sum, int requiredSum, LL value, int min_t, int max_t) { if (id == 10) { if (sum == requiredSum) { partitions[operations].push_back(value); } } else { if (id != 0) { min_t = 0; max_t = M; } for (int cur_t = min_t; cur_t <= max_t; cur_t++) { int new_operations = operations + id * cur_t; if (new_operations <= M) { int new_sum = sum + cur_t; if (new_sum <= requiredSum) { LL new_value = 0; if (id == 0) { new_value = compute(requiredSum, cur_t); } else { new_value = (value * invFact[cur_t]) % mod; } genPartitions(id + 1, new_operations, new_sum, requiredSum, new_value, min_t, max_t); } else { break; } } else { break; } } } } void calc_f_using_partition() { map prev; memset(f, 0, sizeof(f)); for (int idx = 0; idx < P; idx++) { if (cnt[idx] == 0) { f[idx][0] = 1; continue; } if (prev.count(cnt[idx])) { for (int operations = 0; operations <= M; operations++) { f[idx][operations] = f[prev[cnt[idx]]][operations]; } continue; } for (int operations = 0; operations <= M; operations++) { partitions[operations].clear(); } genPartitions(0, 0, 0, cnt[idx], 1LL, max(0LL, cnt[idx] - M), cnt[idx]); for (int operations = 0; operations <= M; operations++) { int rem = (idx * operations) % P; for (int i = 0; i < partitions[operations].size(); i++) { LL val = partitions[operations][i]; int &res = f[idx][operations]; res += val; res %= mod; } } prev[cnt[idx]] = idx; } } LL brute_f[MAXP][MAXP][MAXP]; LL temp_dp[MAXN][MAXP][MAXP]; // It is only for subtask 1: Uses normal dp for calculating f. void calc_f_using_normal_dp() { memset(brute_f, 0, sizeof(brute_f)); for (int idx = 0; idx < P; idx++) { memset(temp_dp, 0, sizeof(temp_dp)); int total = cnt[idx]; temp_dp[0][0][0] = 1; for (int i = 0; i < total; i++) { for (int operations = 0; operations <= M; operations++) { for (int rem = 0; rem < P; rem++) { for (int cur = 0; cur < 10; cur++) { int new_rem = (rem + idx * cur) % P; int new_operations = operations + cur; if (new_operations <= M) { LL &res = temp_dp[i + 1][new_operations][new_rem]; res += temp_dp[i][operations][rem]; res %= mod; } } } } } for (int operations = 0; operations <= M; operations++) { for (int rem = 0; rem < P; rem++) { brute_f[idx][operations][rem] = temp_dp[total][operations][rem]; } } } for (int idx = 0; idx < P; idx++) { for (int operations = 0; operations <= M; operations++) { f[idx][operations] = brute_f[idx][operations][(idx * operations) % P]; } } } // f function is described in the editorial. // You can use bruteforce, monvariate or bivariate polynomials or combinatorics to get f. void calc_f_using_combinatorics() { // We use map to just iterate over all the idx, such that cnt[idx] is non zero. // Also using cyclicity property of 10^x, we can deduce that set containing values of cnt[idx] will have // very low cardinality. It can be proved that it would be at most 4. // So instead of computing each time, we will use the precalculated result. map prev; memset(f, 0, sizeof(f)); for (int idx = 0; idx < P; idx++) { int n = cnt[idx]; if (n == 0) { f[idx][0] = 1; continue; } if (prev.find(n) != prev.end()) { int t = prev[n]; for (int i = 0; i <= M; i++) { f[idx][i] = f[t][i]; } continue; } for (int i = 0; i <= M / 10; i++) { LL coeff1 = C(n, i); if ((n - i) % 2 != 0) { coeff1 *= -1; } if (coeff1 < 0) { coeff1 += mod; } LL prod = 1; for (int j = 0; 10 * i + j <= M; j++) { LL coeff2 = prod; prod = (prod * (n + j)) % mod; prod = (prod * inv[j + 1]) % mod; if (n % 2 != 0) { coeff2 *= -1; } if (coeff2 < 0) { coeff2 += mod; } LL coeff = (coeff1 * coeff2) % mod; int operations = 10 * i + j; f[idx][operations] += coeff; if (f[idx][operations] >= mod) { f[idx][operations] -= mod; } } } prev[n] = idx; } } // O(M^2 P^2) dp solution for solving subtask 3 after computing f. // Note that we are going to optimize this part only using FFT for solving subtask 4. void solve_simple() { memset(dp, 0, sizeof(dp)); dp[0][0][0] = 1; for (int idx = 0; idx < P; idx++) { cerr << "processing index " << idx << endl; memset(dp[(idx + 1) & 1], 0, sizeof(dp[0])); for (int operations = 0; operations <= M; operations++) { for (int rem = 0; rem < P; rem++) { for (int interOperations = 0; operations + interOperations <= M; interOperations++) { int interRem = (interOperations * idx) % P; int newOperations = operations + interOperations; int newRem = (rem + interRem); if (newRem >= P) { newRem -= P; } int &res = dp[(idx + 1) & 1][newOperations][newRem]; res = (res + f[idx][interOperations] * (LL) dp[idx & 1][operations][rem]) % mod; } } } } for (int i = 0; i <= M; i++) { ans_problem[i] = dp[P & 1][i][0]; } } // In this method, we solve the bivariate polynomial multiplications, We use few optimizations too which are stated in do_it function. void solve_fft_smart() { int base = 2 * M + 1; DEG = (2 * P - 2) * base + 2 * M; MAXDEG = 1; for(int i = 1;i <= DEG; i *= 2) MAXDEG *= 2; MAXDEG *= 2; cerr << "MAXDEG is " << MAXDEG << endl; indices.clear(); for (int i = 0; i < P; i++) { if (cnt[i] != 0) { indices.push_back(i); } } int * RES = doIt(0, (int) indices.size() - 1); memset(values, 0, sizeof(values)); for (int i = 0; i <= DEG; i++) { int j = i / (2 * M + 1); assert(j <= 2 * P - 2); if (j % P == 0) { int op = i % (2 * M + 1); if (op <= M) { values[op] += RES[i]; values[op] %= mod; } } } for (int i = 0; i <= M; i++) { ans_problem[i] = values[i]; } } int poly1[MAX], poly2[MAX], respoly[MAX], result[MAX]; // Direct multiplication of all the bivariate polynomials, no optimization, it will also pass all subtasks. void solve_fft_normal() { int base = 2 * M + 1; DEG = (2 * P - 2) * base + 2 * M; MAXDEG = 1; for(int i = 1;i <= DEG; i *= 2) MAXDEG *= 2; MAXDEG *= 2; cerr << "MAXDEG is " << MAXDEG << endl; indices.clear(); for (int i = 0; i < P; i++) { if (cnt[i] != 0) { indices.push_back(i); } } memset(respoly, 0, sizeof(respoly)); respoly[0] = 1; for (int idx = 0; idx < P; idx++) { if (cnt[idx] == 0) { continue; } cerr << "currently processing at " << idx << endl; memset(poly2, 0, sizeof(int) * MAXDEG); for (int j = 0; j <= M; j++) { int t = ((idx * j) % P) * (2 * M + 1) + j; poly2[t] += f[idx][j]; if (poly2[t] >= mod) { poly2[t] -= mod; } } for (int i = 0; i < MAXDEG; i++) { poly1[i] = respoly[i]; } NTT(poly1, DEG, poly2, DEG, result); memset(respoly, 0, sizeof(int) * MAXDEG); for (int i = 0; i <= DEG; i++) { int op = i % (2 * M + 1); int t = i / (2 * M + 1); while (t >= P) { t -= P; } if (op <= M) { int val = t * (2 * M + 1) + op; respoly[val] += result[i]; if (respoly[val] >= mod) { respoly[val] -= mod; } } } } memset(values, 0, sizeof(values)); for (int i = 0; i <= DEG; i++) { int j = i / (2 * M + 1); assert(j <= 2 * P - 2); if (j % P == 0) { int op = i % (2 * M + 1); if (op <= M) { values[op] += respoly[i]; values[op] %= mod; } } } for (int i = 0; i <= M; i++) { ans_problem[i] = values[i]; } } void print_sol() { int ans = 0; for (int i = 0; i <= M; i++) { ans += ans_problem[i]; ans %= mod; cout << ans << " "; } cout << endl; } void solve() { do_digit_dp(); // Instead of using digit dp, you can even make use of cyclicity of 10^i % P. if (N <= 1000 && P <= 50) { // subtask 1 calc_f_using_normal_dp(); smartBrute(); } else if (P <= 50 && M <= 50) { // subtask 2 calc_f_using_partition(); // You can also calculate f using matrix exponentiation. Please see tester's code for that. solve_simple(); } else if (P <= 50 && M <= 500) { // subtask 3 calc_f_using_combinatorics(); solve_simple(); } else { // subtask 4 calc_f_using_combinatorics(); // You can use any of the methods to pass the problem, but smarter method relies on some good optimizations. // Note that I have used bi-variate polynomial multiplication in this problem. // You can even solve it using monovariate polynomial multiplication for O(P * M) polynomials, but in reality they are quite less // so, that method would give you better time despite having low theoritical time complexity. // solve_fft_normal(); solve_fft_smart(); } // print the answer. print_sol(); } // pre computation of factorial and inverses. void pre() { fact[0] = 1; for (LL i = 1; i < MAXNN; i++) { fact[i] = (i * fact[i - 1]) % mod; } for (int i = 0; i < MAXNN; i++) { inv[i] = modpow(i, mod - 2, mod); } for (int i = 0; i < MAXNN; i++) { invFact[i] = modpow(fact[i], mod - 2, mod); } } int main() { int start_time = clock(); // precompute method. pre(); // prepare method for using DFT. prep(); cin >> N >> P >> M; solve(); cerr << endl; cerr << "Total multiplications done using FFT are " << MULTIPLICATION_COUNT << endl; cerr << "time taken " << (clock() - start_time) / (double) CLOCKS_PER_SEC << endl; return 0; }