iOS:使用莱文斯坦距离算法计算两串字符串的相似度

时间:2022-04-30 04:16:23

Levenshtein:莱文斯坦距离

Levenshtein的经典算法,参考http://en.wikipedia.org/wiki/Levenshtein_distance的伪代码实现的,同时参考了一些C++的实现,求字符串相似度。

下面求出结果是0.0~100.0,   表示为0%~100%。

static inline int min(int a, int b) {
return a < b ? a : b;
} +(float)likePercentByCompareOriginText:(NSString *)originText targetText:(NSString *)targetText{ //length
int n = (int)originText.length;
int m = (int)targetText.length;
if (n == || m == ) {
return 0.0;
} //Construct a matrix, need C99 support
int N = n+;
int **matrix;
matrix = (int **)malloc(sizeof(int *)*N); int M = m+;
for (int i = ; i < N; i++) {
matrix[i] = (int *)malloc(sizeof(int)*M);
} for (int i = ; i<N; i++) {
for (int j=; j<M; j++) {
matrix[i][j]=;
}
} for(int i=; i<=n; i++) {
matrix[i][]=i;
}
for(int i=; i<=m; i++) {
matrix[][i]=i;
}
for(int i=;i<=n;i++)
{
unichar si = [originText characterAtIndex:i-];
for(int j=;j<=m;j++)
{
unichar dj = [targetText characterAtIndex:j-];
int cost;
if(si==dj){
cost=;
}
else{
cost=;
}
const int above = matrix[i-][j]+;
const int left = matrix[i][j-]+;
const int diag = matrix[i-][j-]+cost;
matrix[i][j] = min(above, min(left,diag));
}
}
return 100.0 - 100.0*matrix[n][m]/MAX(m,n);
}