置换数量是阶乘级别的,但容易发现本质不同的点的置换数量仅仅是n的整数拆分个数,OEIS(或者写个dp或者暴力)一下会发现不是很大,当n=53时约在3e5左右。
于是暴力枚举点的置换,并且发现根据点的置换我们得到的实际上是边的置换,暴力数一下循环节就好了。3e5*50*50,luogu上过掉了。诶怎么bzoj上开的时限总共只有4s啊?
考虑数边置换的循环节时不那么暴力。显然两端点在同一循环内的边和在不同循环内的边是不可能处于同一边的循环的,并且第一种情况只与该循环长度有关,第二种情况只与两循环长度有关,先预处理一下就可以了,当然事实上也能直接推出来。不过这复杂度上并没有优化。
#include<iostream>
#include<cstdio>
#include<cmath>
#include<cstdlib>
#include<cstring>
#include<algorithm>
using namespace std;
#define ll long long
#define N 60
char getc(){char c=getchar();while ((c<'A'||c>'Z')&&(c<'a'||c>'z')&&(c<''||c>'')) c=getchar();return c;}
int gcd(int n,int m){return m==?n:gcd(m,n%m);}
int read()
{
int x=,f=;char c=getchar();
while (c<''||c>'') {if (c=='-') f=-;c=getchar();}
while (c>=''&&c<='') x=(x<<)+(x<<)+(c^),c=getchar();
return x*f;
}
int n,c,P,a[N],b[N],fac[N],inv[N],id[N][N],nxt[N*N],f[N],g[N][N],ans,T;
bool flag[N*N];
int ksm(int a,int k)
{
int s=;
for (;k;k>>=,a=1ll*a*a%P) if (k&) s=1ll*s*a%P;
return s;
}
int C(int n,int m){return 1ll*fac[n]*inv[m]%P*inv[n-m]%P;}
int calc(int m)
{
int s=,tot=n;
for (int i=;i<=m;i++)
{
int t=i;
while (t<m&&a[t+]==a[i]) t++;
s=1ll*s*inv[t-i+]%P;
for (int j=i;j<=t;j++)
s=1ll*s*C(tot,a[j])%P*fac[a[j]-]%P,tot-=a[j];
i=t;
}
tot=;
for (int i=;i<=m;i++) tot=(tot+f[a[i]])%P;
for (int i=;i<=m;i++)
for (int j=i+;j<=m;j++)
tot=(tot+g[a[i]][a[j]])%P;
return 1ll*s*ksm(c,tot)%P;
}
void dfs(int k,int n,int last)
{
if (n==) ans=(ans+calc(k-))%P;
if (n<last) return;
for (int i=last;i<=n;i++)
a[k]=i,dfs(k+,n-i,i);
}
int cycle()
{
int tot=;memset(flag,,T+);
for (int i=;i<=T;i++)
if (!flag[i])
{
int x=nxt[i];flag[i]=;tot++;
while (x!=i) flag[x]=,x=nxt[x];
}
return tot;
}
int main()
{
#ifndef ONLINE_JUDGE
freopen("bzoj1815.in","r",stdin);
freopen("bzoj1815.out","w",stdout);
const char LL[]="%I64d\n";
#else
const char LL[]="%lld\n";
#endif
n=read(),c=read(),P=read();
fac[]=;for (int i=;i<=n;i++) fac[i]=1ll*fac[i-]*i%P;
inv[]=inv[]=;for (int i=;i<=n;i++) inv[i]=P-1ll*(P/i)*inv[P%i]%P;
for (int i=;i<=n;i++) inv[i]=1ll*inv[i-]*inv[i]%P;
for (int i=;i<=n;i++)
{
T=;
for (int x=;x<=i;x++)
for (int y=x+;y<=i;y++)
id[x][y]=id[y][x]=++T;
for (int x=;x<=i;x++)
for (int y=x+;y<=i;y++)
nxt[id[x][y]]=id[x%i+][y%i+];
f[i]=cycle();
}
for (int i=;i<=n;i++)
for (int j=i;j<=n;j++)
{
T=;
for (int x=;x<=i;x++)
for (int y=;y<=j;y++)
id[x][y]=++T;
for (int x=;x<=i;x++)
for (int y=;y<=j;y++)
nxt[id[x][y]]=id[x%i+][y%j+];
g[i][j]=g[j][i]=cycle();
}
dfs(,n,);
cout<<1ll*ans*inv[n]%P;
return ;
}