POJ3691 DNA repair(AC自动机 DP)

时间:2023-03-09 06:44:20
POJ3691 DNA repair(AC自动机 DP)

给定N个长度不超过20的模式串,再给定一个长度为M的目标串S,求在目标串S上最少改变多少字符,可以使得它不包含任何的模式串

建立Trie图,求得每个节点是否是不可被包含的串,然后进行DP

dp[i][j]表示在Trie图上行走i步到达节点j的要修改的最小次数,则

dp[i + 1][chi[j][k]] = min(dp[i + 1][chi[j][k]], dp[i][j] + (ACGT[k] != str[i]))

chi[j][k]表示节点j的第k个子节点

#include<cstdio>
#include<iostream>
#include<cstdlib>
#include<cstring>
#include<algorithm>
#include<queue>
using namespace std; const int N = 2000, CH = 4;
const int INF = 0x3F3F3F3F;
const char *ACGT = "ACGT";
int Hash[128]; struct trie{
trie *next[CH];
trie *fail;
int cnt, id;
}tree[N]; bool flag[1000];
int chi[1008][4]; class AC_Auto{ public: int nxt;
trie *root; AC_Auto(){
root = &tree[0];
nxt=0;
memset(&tree[0], 0, sizeof(trie));
} void insert(char *s){
trie *p = root;
for(int i = 0; s[i]; i++){
int c = Hash[s[i]];
if(!p -> next[c]){
memset(&tree[++nxt], 0, sizeof(trie));
p -> next[c] = &tree[nxt];
p -> next[c] -> id = nxt;
}
p = p -> next[c];
}
p -> cnt = 1;
flag[p->id] = 1;
} void build(){
queue<trie *> q;
q.push(root);
root -> fail = NULL;
while(!q.empty()){
trie *cur = q.front();
q.pop();
if(cur -> fail && flag[cur->fail ->id ]){
flag[cur -> id] = 1;
}
for(int i = 0; i < CH; i++){
trie *son = cur -> next[i];
trie *tp = (cur == root)? root: cur -> fail->next[i];
if(son == NULL){
cur -> next[i] = tp;
}else{
son -> fail = tp;
q.push(son);
}
son = cur ->next[i];
chi[cur -> id][i] = son -> id;
}
}
} }; char str[1008];
int dp[1008][1000];
int main(){
Hash['A'] = 0;
Hash['C'] = 1;
Hash['G'] = 2;
Hash['T'] = 3; int n;
int t = 0;
while(~scanf("%d", &n) && n){
AC_Auto ac;
memset(flag, 0, sizeof(flag));
memset(dp, 0x3F, sizeof(dp));
for(int i = 0; i < n; i++){
scanf("%s", str);
ac.insert(str);
} ac.build(); dp[0][0] = 0;
scanf("%s", str);
int num = ac.nxt + 1; int len = 0;
for(int i = 0; str[i]; i++, len++){
for(int j = 0; j < num; j++){
for(int k = 0; k < CH; k++){
if(flag[chi[j][k]] == 0){
dp[i + 1][chi[j][k]] = min(dp[i + 1][chi[j][k]], dp[i][j] + (ACGT[k] != str[i]));
}
}
}
}
int ans = INF;
for(int i = 0; i < num ; i++){
ans = min(ans, dp[len][i]);
}
if(ans >= INF){
ans = -1;
}
printf("Case %d: %d\n", ++t, ans);
}
return 0;
}