洛谷P1117 优秀的拆分【Hash】【字符串】【二分】【好难不会】

题目描述

如果一个字符串可以被拆分为AABBAABB的形式，其中 A和 B是任意非空字符串，则我们称该字符串的这种拆分是优秀的。

例如，对于字符串aabaabaaaabaabaa，如果令 A=aabA=aab，B=aB=a，我们就找到了这个字符串拆分成 AABBAABB的一种方式。

一个字符串可能没有优秀的拆分，也可能存在不止一种优秀的拆分。比如我们令 A=aA=a，B=baaB=baa，也可以用 AABBAABB表示出上述字符串；但是，字符串 abaabaaabaabaa 就没有优秀的拆分。

现在给出一个长度为 nn的字符串SS，我们需要求出，在它所有子串的所有拆分方式中，优秀拆分的总个数。这里的子串是指字符串中连续的一段。

以下事项需要注意：

出现在不同位置的相同子串，我们认为是不同的子串，它们的优秀拆分均会被记入答案。
在一个拆分中，允许出现A=BA=B。例如 cccccccc 存在拆分A=B=cA=B=c。
字符串本身也是它的一个子串。

输入输出格式

输入格式：

每个输入文件包含多组数据。

输入的第一行只有一个整数TT，表示数据的组数。保证 1≤T≤101≤T≤10。

接下来 TT行，每行包含一个仅由英文小写字母构成的字符串SS，意义如题所述。

输出格式：

输出 TT行，每行包含一个整数，表示字符串SS 所有子串的所有拆分中，总共有多少个是优秀的拆分。

输入输出样例

输入样例#1：复制

4

aabbbb

cccccc

aabaabaabaa

bbaabaababaaba

输出样例#1：复制

说明

我们用S_{i,j}Si,j表示字符串 SS第 ii个字符到第jj个字符的子串（从11开始计数）。

第一组数据中，共有 33个子串存在优秀的拆分：

S_{1,4}=aabbS1,4=aabb，优秀的拆分为A=aA=a，B=bB=b；

S_{3,6}=bbbbS3,6=bbbb，优秀的拆分为 A=bA=b，B=bB=b；

S_{1,6}=aabbbbS1,6=aabbbb，优秀的拆分为 A=aA=a，B=bbB=bb。

而剩下的子串不存在优秀的拆分，所以第一组数据的答案是 33。

第二组数据中，有两类，总共44个子串存在优秀的拆分：

对于子串 S_{1,4}=S_{2,5}=S_{3,6}=ccccS1,4=S2,5=S3,6=cccc，它们优秀的拆分相同，均为A=cA=c，B=cB=c，但由于这些子串位置不同，因此要计算33 次；

对于子串 S_{1,6}=ccccccS1,6=cccccc，它优秀的拆分有 22种：A=cA=c，B=ccB=cc和 A=ccA=cc，B=cB=c，它们是相同子串的不同拆分，也都要计入答案。

所以第二组数据的答案是3+2=53+2=5。

第三组数据中，S_{1,8}S1,8和 S_{4,11}S4,11 各有 22 种优秀的拆分，其中S_{1,8}S1,8 是问题描述中的例子，所以答案是2+2=42+2=4。

第四组数据中，S_{1,4},S_{6,11},S_{7,12},S_{2,11},S_{1,8}S1,4,S6,11,S7,12,S2,11,S1,8 各有 11种优秀的拆分，S_{3,14}S3,14 有22 种优秀的拆分，所以答案是 5+2=75+2=7。

对于全部的测试点,保证1≤T≤101≤T≤10。以下对数据的限制均是对于单组输入数据而言的，也就是说同一个测试点下的TT组数据均满足限制条件。

我们假定nn为字符串SS的长度，每个测试点的详细数据范围见下表：

洛谷P1117 优秀的拆分【Hash】【字符串】【二分】【好难不会】

暴力Hash 80分TLE

枚举子串，枚举A的长度，hash比较

 #include <iostream>

 #include <set>

 #include <cmath>

 #include <stdio.h>

 #include <cstring>

 #include <algorithm>

 #include <vector>

 #include <queue>

 #include <map>

 using namespace std;

 typedef long long LL;

 #define inf 0x7f7f7f7f

 const int maxn = 3e4 + ;

 int t;

 char s[maxn];

 unsigned long long h[maxn], p[maxn];

 int get_hash(int i, int j)

 {

     return h[j] - h[i - ] * p[j - i + ];

 }

 int main()

 {

     scanf("%d", &t);

     p[] = ;

     for(int i = ; i < maxn; i++){

         p[i] = p[i - ] * ;

     }

     while(t--){

         scanf("%s", s + );

         int len = strlen(s + );

         for(int i = ; i <= len; i++){

             h[i] = h[i - ] *  + s[i] - 'a' + ;

         }

         int ans = ;

         for(int i = ; i <= len; i++){

             for(int j = i + ; j <= len; j += ){

                 int l = j - i + ;

                 for(int x = ; x <= l /  - ; x++){

                     int y = l /  - x;

                     if(get_hash(i, i + x - ) == get_hash(i + x, i +  * x - )

                        && get_hash(i +  * x, i +  * x + y - ) == get_hash(i +  *  x + y, i + l - )){

                         ans++;

                     }

                 }

             }

         }

         printf("%d\n", ans);

     }

     return ;

 }

稍微优化了一下的暴力Hash 95分TLE

用l[i]表示以i为结尾的满足AA串的个数， r[i]表示以i+1为开头的满足BB串的个数

 #include <iostream>

 #include <set>

 #include <cmath>

 #include <stdio.h>

 #include <cstring>

 #include <algorithm>

 #include <vector>

 #include <queue>

 #include <map>

 using namespace std;

 typedef long long LL;

 #define inf 0x7f7f7f7f

 const int maxn = 3e4 + ;

 int t;

 char s[maxn];

 unsigned long long h[maxn], p[maxn];

 int l[maxn], r[maxn];

 int get_hash(int i, int j)

 {

     return h[j] - h[i - ] * p[j - i + ];

 }

 int main()

 {

     scanf("%d", &t);

     p[] = ;

     for(int i = ; i < maxn; i++){

         p[i] = p[i - ] * ;

     }

     while(t--){

         scanf("%s", s + );

         int len = strlen(s + );

         for(int i = ; i <= len; i++){

             h[i] = h[i - ] *  + s[i] - 'a' + ;

         }

         int ans = ;

         for(int i = ; i <= len; i++){

             l[i] = r[i] = ;

             for(int j = i / ; j >= ; j--){

                 if(get_hash(i -  * j + , i - j) == get_hash(i - j + , i)){

                     l[i]++;

                 }

             }

             for(int j = (len - i) / ; j >= ; j--){

                 if(get_hash(i + , i + j) == get_hash(i + j + , i +  * j)){

                     r[i]++;

                 }

             }

         }

         for(int i = ; i <= len; i++){

             ans += l[i] * r[i];

         }

         printf("%d\n", ans);

     }

     return ;

 }

还需要继续优化一下找l和r数组的过程，枚举AA串的一半长度L，把整个字符串都分成长为L的好几段。

比如我们现在看第i段的起始点，第i段和第i-1段找一个最长公共前缀，i-1段和第i-2段找一个最长公共后缀。

如果lcs和lcp的长度大于L的话，说明是存在这样一个AA串的【画个图就能理解了】

如果lcs+lcp=L，那么恰好有这么一个串，如果大于L，那么重叠多少，就有多少个这样的串。

用差分的思想进行统计。本来是这个区间都需要++的，现在只给这个区间的开头++，最后求和。那么区间结尾之后的都是多加了的，就给他--

要注意head应该是在[i-l~i]区间， tail在[i-i+L]区间，最后答案要使用longlong

还有题目给的数据范围3e4是不够的，会RE。

 #include <iostream>

 #include <set>

 #include <cmath>

 #include <stdio.h>

 #include <cstring>

 #include <algorithm>

 #include <vector>

 #include <queue>

 #include <map>

 using namespace std;

 typedef long long LL;

 #define inf 0x7f7f7f7f

 const int maxn = 5e4 + ;

 int t;

 char s[maxn];

 unsigned long long h[maxn], p[maxn];

 LL u[maxn], v[maxn];

 unsigned long long get_hash(int i, int j)

 {

     return h[i] - h[j] * p[j - i];

     //return h[j] - h[i - 1] * p[j - i + 1];

 }

 int main()

 {

     scanf("%d", &t);

     p[] = ;

     for(int i = ; i < maxn; i++){

         p[i] = p[i - ] * ;

     }

     while(t--){

         //memset(u, 0, sizeof(u));

         //memset(v, 0, sizeof(v));

         scanf("%s", s + );

         int len = strlen(s + );

         h[len + ] = ;

         for(int i = len; i >= ; i--){

             u[i] = v[i] = ;

             h[i] = h[i + ] *  + s[i] - 'a' + ;

         }

         /*for(int i = 1; i <= len; i++){

             u[i] = v[i] = 0;

             h[i] = h[i - 1] * 131 + s[i] - 'a' + 1;

         }*/

         for(int l = ; l *  <= len; l++){//枚举A的长度

             for(int i = l + l; i <= len; i += l){//分块

                 if(s[i] != s[i - l]){

                     continue;

                 }

                 //与上上个块求最长公共后缀

                 int st = , ed = l, last = i - l, pos = ;

                 while(st <= ed){

                     int mid = (st + ed) / ;

                     if(get_hash(last - mid + , last + ) == get_hash(i - mid + , i + )){

                         st = mid + ;

                         pos = mid;

                     }

                     else{

                         ed = mid - ;

                     }

                 }

                 int head = i - pos + ;

                 //与上一个块求最长公共前缀

                 st = ;ed = l; pos = ;

                 while(st <= ed){

                     int mid = (st + ed) / ;

                     if(get_hash(last, last + mid) == get_hash(i, i + mid)){

                         st = mid + ;

                         pos = mid;

                     }

                     else{

                         ed = mid - ;

                     }

                 }

                 int tail = i + pos - ;

                 head = max(head + l - , i);

                 tail = min(tail, i + l - );

                 if(head  <= tail){

                     u[head - l *  + ]++;

                     u[tail +  -  * l + ]--;

                     v[head]++;

                     v[tail + ]--;

                 }

             }

         }

         LL ans = ;

         for(int i = ; i <= len; i++){

             u[i] += u[i - ];

             v[i] += v[i - ];

         }

         for(int i = ; i < len; i++){

             ans += v[i] * u[i + ];

         }

         printf("%lld\n", ans);

     }

     return ;

 }