UVA 11019 Matrix Matcher ( 二维字符串匹配， AC自动机 || 二维Hash )

题目：传送门

题意：给你一个 n * m 的文本串 T，再给你一个 r * c 的模式串 S；

　　　问模式串 S 在文本串 T 中出现了多少次。

解：

法一： AC自动机（正解） 670ms

　　把模式串的每一行当成一个字符串，建一个AC自动机。

　　然后设cnt[ x ][ y ] 表示文本串中，以 (x, y) 这个点为矩阵右上角的点，且矩阵大小为 r * c的矩阵与模式串匹配了多少行。

　　那最终统计答案的时候，只需要 o(n * m) 枚举所有点，记录那些 cnt[ x ][ y ] == n 的点的个数。就是答案。

　　那我们建完AC自动机后，就可以枚举文本串的每一行，让其去跑建成的AC自动机，记录匹配情况即可。

　　文本串的第 x 行和模式串的第 i 行匹配，则， cnt[ x - i + 1][ y ] ++；

　　我的代码里有一个 nx[ ] 数组，这个数组的作用是。

　　若模式串中，存在多行字符是完全相等的情况，则你文本串和当前字符串匹配，可能有多种情况。

　　比如，你模式串的第3行和第5行是完全相等的，那么，你要是文本串匹配到了模式串的第3行，那么你也同样匹配到了第5行。

　　所以，增加nx[]数组来存，同一字符串的不同编号。

#include <bits/stdc++.h>

#define LL long long

#define rep(i, j, k) for(int i = j; i <= k; i++)

#define dep(i, j, k) for(int i = k; i >= j; i--)

#define mem(i, j) memset(i, j, sizeof(i))

using namespace std;

const int N = 1e3 + , M = 1e4 + ;

struct Trie {

    int ch[M][], val[M], Fail[M], tot, nx[M], last[M], cnt[N][N];

    void init() {

        mem(ch[], ); val[] = ; tot = ; last[] = ; mem(cnt, ); mem(nx, );

    }

    int get(char Q) {

        return Q - 'a';

    }

    void join(char s[], int pos) {

        int now = ; int len = strlen(s);

        rep(i, , len - ) {

            int id = get(s[i]);

            if(!ch[now][id]) {

                mem(ch[tot], ); val[tot] = ; last[tot] = ;

                ch[now][id] = tot++;

            }

            now = ch[now][id];

        }

        nx[pos] = val[now];

        val[now] = pos;

    }

    void getFail() {

        queue<int> Q; while(!Q.empty()) Q.pop();

        rep(i, , ) {

            if(ch[][i]) {

                Q.push(ch[][i]);

                Fail[ch[][i]] = ;

                last[ch[][i]] = ;

            }

        }

        while(!Q.empty()) {

            int now = Q.front(); Q.pop();

            rep(i, , ) {

                int u = ch[now][i];

                if(!ch[now][i]) ch[now][i] = ch[Fail[now]][i];

                else {

                    Q.push(ch[now][i]);

                    Fail[u] = ch[Fail[now]][i];

                    last[u] = val[Fail[u]] ? Fail[u] : last[Fail[u]];

                }

            }

        }

    }

    void add_ans(int x, int y, int u) {

        if(u) {

            if(x - val[u] +  >= ) {

                cnt[x - val[u] + ][y]++;

            }

            int tmp = val[u];

            while(nx[tmp]) {

                tmp = nx[tmp];

                if(x - tmp +  >= ) cnt[x - tmp + ][y]++;

            }

            add_ans(x, y, last[u]);

        }

    }

    void print(char s[], int x) {

        int len = strlen(s + ); int now = ;

        rep(i, , len) {

            int id = get(s[i]);

            now = ch[now][id];

            if(val[now]) {

                add_ans(x, i, now);

            }

            else if(last[now]) {

                add_ans(x, i, last[now]);

            }

        }

    }

};

Trie AC;

char s[][], ss[];

int main() {

    int _; scanf("%d", &_);

    while(_--) {

        AC.init();

        int n, m; scanf("%d %d", &n, &m);

        rep(i, , n) scanf("%s", s[i] + );

        int r, c; scanf("%d %d", &r, &c);

        rep(i, , r) {

            scanf("%s", ss); AC.join(ss, i);

        }

        AC.getFail(); ///建AC自动机

        rep(i, , n) { /// 对文本串每一行跑AC自动机，记录匹配情况

            AC.print(s[i], i);

        }

        int ans = ;

        rep(i, , n) rep(j, , m) {

            if(AC.cnt[i][j] == r) ans++;

        }

        printf("%d\n", ans);

    }

    return ;

}

法二：二维Hash 40ms

　　参考：博客

#include <bits/stdc++.h>

#define LL long long

#define ULL unsigned long long

#define rep(i, j, k) for(int i = j; i <= k; i++)

#define dep(i, j, k) for(int i = k; i >= j; i--)

#define mem(i, j) memset(i, j, sizeof(i))

using namespace std;

const int N = 1e3 + ;

const unsigned int hash1 = 1e9 + , hash2 = 1e9 + ;

char a[N][N], b[][];

unsigned int p1[N], p2[N];

unsigned int hs[N][N];

void init() {

    p1[] = ; p2[] = ;

    rep(i, , N - ) p1[i] = p1[i - ] * hash1, p2[i] = p2[i - ]* hash2;

}

int main() {

    int _; scanf("%d", &_); init();

    while(_--) {

        int n, m; scanf("%d %d", &n, &m);

        rep(i, , n) scanf("%s", a[i] + );

        int x, y; scanf("%d %d", &x, &y);

        rep(i, , x) scanf("%s", b[i] + );

        rep(i, , n) rep(j, , m) { /// 预处理n * m矩阵的二维前缀和。（三个方向的前缀和，）

            hs[i][j] = hs[i - ][j - ] * hash1 * hash2 + (hs[i - ][j] - hs[i - ][j - ] * hash2) * hash1 + (hs[i][j - ] - hs[i - ][j - ] * hash1) * hash2 + a[i][j];

        }

        unsigned int S = , C;

        rep(i, , x) rep(j, , y) { ///算模式串的hash值

            S += b[i][j] * p1[x - i] * p2[y - j];

        }

        int ans = ;

        rep(i, x, n) rep(j, y, m) { ///枚举文本串所有x*y矩阵，o(1)算出它们的hash值

            C = hs[i][j] - hs[i - x][j - y] * p1[x] * p2[y] - (hs[i][j - y] - hs[i - x][j - y] * p1[x]) * p2[y] - (hs[i - x][j] - hs[i - x][j - y] * p2[y]) * p1[x];

            if(S == C) ans++;

        }

        printf("%d\n", ans);

    }

    return ;

}

秒客网

UVA 11019 Matrix Matcher ( 二维字符串匹配， AC自动机 || 二维Hash )

相关文章