POJ2278 DNA Sequence —— AC自动机 + 矩阵优化

时间:2022-09-15 19:12:13

题目链接:https://vjudge.net/problem/POJ-2778

DNA Sequence
Time Limit: 1000MS   Memory Limit: 65536K
Total Submissions: 18479   Accepted: 7112

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

Source

题意:

给出m个DNA序列,问长度为n且不含这m个序列的DNA有多少个?

题解:

1.把这m个序列插入到AC自动机中。

2.根据自动机中各个状态之间的关系,构成一张邻接矩阵A,但需要去除与“结束点”有关的边,这样就能保证不含有给出的序列。

3.长度为n,那么答案就是 A^n 中,初始状态那一行之和。

代码如下:

 #include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <vector>
#include <cmath>
#include <queue>
#include <stack>
#include <map>
#include <string>
#include <set>
using namespace std;
typedef long long LL;
const double EPS = 1e-;
const int INF = 2e9;
const LL LNF = 9e18;
const int MOD = 1e5;
const int MAXN = +; int Size;
int Map[];
struct MA
{
int mat[][];
void init()
{
for(int i = ; i<Size; i++)
for(int j = ; j<Size; j++)
mat[i][j] = (i==j);
}
}; MA operator*(const MA &x, const MA &y)
{
MA ret;
memset(ret.mat, , sizeof(ret.mat));
for(int i = ; i<Size; i++)
for(int j = ; j<Size; j++)
for(int k = ; k<Size; k++)
ret.mat[i][j] += (1LL*x.mat[i][k]*y.mat[k][j])%MOD, ret.mat[i][j] %= MOD;
return ret;
} MA qpow(MA x, int y)
{
MA s;
s.init();
while(y)
{
if(y&) s = s*x;
x = x*x;
y >>= ;
}
return s;
} struct Trie
{
const static int sz = , base = 'A';
int next[MAXN][sz], fail[MAXN], end[MAXN];
int root, L;
int newnode()
{
for(int i = ; i<sz; i++)
next[L][i] = -;
end[L++] = false;
return L-;
}
void init()
{
L = ;
root = newnode();
}
void insert(char buf[])
{
int len = strlen(buf);
int now = root;
for(int i = ; i<len; i++)
{
if(next[now][Map[buf[i]]] == -) next[now][Map[buf[i]]] = newnode();
now = next[now][Map[buf[i]]];
}
end[now] = true;
}
void build()
{
queue<int>Q;
fail[root] = root;
for(int i = ; i<sz; i++)
{
if(next[root][i] == -) next[root][i] = root;
else fail[next[root][i]] = root, Q.push(next[root][i]);
}
while(!Q.empty())
{
int now = Q.front();
Q.pop();
end[now] |= end[fail[now]]; //当前串的后缀是否也包含单词
for(int i = ; i<sz; i++)
{
if(next[now][i] == -) next[now][i] = next[fail[now]][i];
else fail[next[now][i]] = next[fail[now]][i], Q.push(next[now][i]);
}
}
} int query(int n)
{
MA s;
memset(s.mat, , sizeof(s.mat));
for(int i = ; i<L; i++)
{
if(end[i]) continue; //存在单词的状态没有后继
for(int j = ; j<sz; j++)
{
if(end[next[i][j]]) continue; //存在单词的状态没有前驱
s.mat[i][next[i][j]]++; // i到next[i][j]的路径数+1。注意,当next[i][j]==root时,路径数很可能大于1。
}
} int ret = ;
Size = L;
s = qpow(s, n);
for(int i = ; i<L; i++) //答案为:初始状态到各个状态(包括初始状态)的路径数之和。
ret = (ret+s.mat[][i])%MOD;
return ret;
}
}; Trie ac;
char buf[];
int main()
{
Map['A'] = ; Map['C'] = ; Map['G'] = ; Map['T'] = ; //离散化
int n, m;
while(scanf("%d%d", &m,&n)!=EOF)
{
ac.init();
for(int i = ; i<=m; i++)
{
scanf("%s", buf);
ac.insert(buf);
}
ac.build();
int ans = ac.query(n);
printf("%d\n", ans);
}
return ;
}