memcpy?memmove?
//#pragma GCC optimize(2)
#include<bits/stdc++.h>
using namespace std;
const int n=;
int a[n+],b[n+],c[n+],d[n+],e[n+],f[n+];
int main()
{
int i;
srand();
for(i=;i<=n;i++) a[i]=rand();
clock_t st1=clock();
memcpy(b+,a+,sizeof(int)*n);
clock_t ed1=clock();
clock_t st2=clock();
memmove(c+,a+,sizeof(int)*n);
clock_t ed2=clock();
clock_t st3=clock();
for(i=;i<=n;i++) d[i]=a[i];
clock_t ed3=clock();
clock_t st4=clock();
for(i=;i<=n-;i+=)
{
e[i]=a[i];
e[i+]=a[i+];
e[i+]=a[i+];
e[i+]=a[i+];
}
(i<=n)&&(e[i]=a[i]);
(i+<=n)&&(e[i+]=a[i+]);
(i+<=n)&&(e[i+]=a[i+]);
clock_t ed4=clock();
clock_t st5=clock();
for(i=;i<=n-;i+=)
{
f[i]=a[i];
f[i+]=a[i+];
f[i+]=a[i+];
f[i+]=a[i+];
f[i+]=a[i+];
f[i+]=a[i+];
f[i+]=a[i+];
f[i+]=a[i+];
}
(i<=n)&&(f[i]=a[i]);
(i+<=n)&&(f[i+]=a[i+]);
(i+<=n)&&(f[i+]=a[i+]);
(i+<=n)&&(f[i+]=a[i+]);
(i+<=n)&&(f[i+]=a[i+]);
(i+<=n)&&(f[i+]=a[i+]);
(i+<=n)&&(f[i+]=a[i+]);
clock_t ed5=clock();
cout<<"time1:"<<ed1-st1<<' '<<memcmp(a+,b+,sizeof(int)*n)<<'\n';
cout<<"time2:"<<ed2-st2<<' '<<memcmp(a+,c+,sizeof(int)*n)<<'\n';
cout<<"time3:"<<ed3-st3<<' '<<memcmp(a+,d+,sizeof(int)*n)<<'\n';
cout<<"time4:"<<ed4-st4<<' '<<memcmp(a+,e+,sizeof(int)*n)<<'\n';
cout<<"time5:"<<ed5-st5<<' '<<memcmp(a+,f+,sizeof(int)*n)<<'\n';
return ;
}
不开优化:
time1:139254 0
time2:198093 0
time3:601853 0
time4:588247 0
time5:598584 0
O2:
time1:138256 0
time2:139235 0
time3:426570 0
time4:322532 0
time5:301933 0
Ofast:
time1:137893 0
time2:140585 0
time3:422154 0
time4:309306 0
time5:298620 0
很显然在大数据(n=50000000)下memcpy最快
另外,在小数据(比如n=26)下,测试得到明显直接赋值(time3)最快
在较小数据(比如n=1000)下,测试得到memmove最快?
快速乘
测试对比程序:
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
ll rd()
{
return rand()|(ll(rand())<<);
}
ll md;
ll mul1(ll x,ll y)
{
x%=md;y%=md;
ll t=x*y-ll((long double)x/md*y+0.5)*md;
return t<?t+md:t;
}
ll mul2(ll x,ll y)
{
x%=md;y%=md;
ll t=x*y-ll((long double)x*y/md+0.5)*md;
return t<?t+md:t;
}
ll mul3(ll x,ll y)
{
x%=md;y%=md;
ll t=x*y-ll((long double)x/md*y+1e-)*md;
return t<?t+md:t;
}
ll mul0(ll x,ll y)
{
return __int128(x)*y%md;
}
ll a,b;
int main()
{
int T=;
srand();
while()
{
T++;
ll a=rd(),b=rd();
md=rd();//%ll(1e18);
//cout<<a<<' '<<b<<' '<<md<<'\n';
ll t1=mul1(a,b),t2=mul0(a,b);//可将mul1改为mul2/mul3
//cout<<t1<<' '<<t2<<'\n';
if(t1!=t2)
{
printf("%d\n",T);
puts("test");
int t;cin>>t;
}
//int t;cin>>t;
}
return ;
}
经过一些测试,可以发现,mul3效果最差(在模数>=1e17时,100000组以内就拍出锅);应该是1e-8不够
mul2效果没有mul1好(模数不设额外上限时,100000组以内出锅;上限1e18时,20秒不出锅)
mul1效果最好(模数不设额外上限时,20秒不出锅)
原因就不知道了。。。