字符串相似度-C#

时间:2023-03-09 15:41:48
字符串相似度-C#

之前在做一个任务时, 需要比较字符串的相似度, 最终整理了一个出来, 以下:

  1 /*
2 * Copyright (c) 2013 Thyiad
3 * Author: Thyiad
4 * Create date: 2013/08/08
5 */
6
7 using System;
8
9 namespace Thyiad.Utility
10 {
11 /// <summary>
12 /// Operates about string.
13 /// </summary>
14 public static class StringUtil
15 {
16 /// <summary>
17 /// Compare with two string, return avg similar degree.
18 /// </summary>
19 /// <param name="str1"></param>
20 /// <param name="str2"></param>
21 /// <returns>A number of percent.</returns>
22 public static int StrSim(string str1, string str2)
23 {
24 try
25 {
26 if (str1 == null || str2 == null ||
27 (str1 == string.Empty && str2 != string.Empty) ||
28 (str1 != string.Empty && str2 == string.Empty))
29 {
30 return 0;
31 }
32 else if (str1.Equals(str2))
33 {
34 return 100;
35 }
36
37 int similar1 = 0;
38 int similar2 = 0;
39
40 similar1 = StrSimSub2(str1, str2);
41 similar2 = StrSimSub2(str2, str1);
42
43 return ((similar1 + similar2) / 2);
44 }
45 catch (Exception)
46 {
47 throw;
48 }
49 }
50
51 /// <summary>
52 /// Compare with two string, return similar degree.
53 /// </summary>
54 /// <param name="str1"></param>
55 /// <param name="str2"></param>
56 /// <returns>A number of percent.</returns>
57 private static int StrSimSub2(string str1, string str2)
58 {
59 try
60 {
61 int len1, len2;
62 int pos1, pos2;
63 char char1, char2;
64 int val1, val_min, val_max;
65
66 len1 = str1.Length;
67 len2 = str2.Length;
68 pos1 = 1;
69 val1 = 0;
70
71 if (len1 < len2)
72 {
73 val_max = len2 + 1;
74 }
75 else
76 {
77 val_max = len1 + 1;
78 }
79
80 while (pos1 <= len1)
81 {
82 char1 = str1[pos1 - 1];
83 pos2 = 1;
84 val_min = val_max;
85 while (pos2 <= len2)
86 {
87 char2 = str2[pos2 - 1];
88 if (char1 == char2)
89 {
90 if (Math.Abs(pos1 - pos2) < val_min)
91 {
92 val_min = Math.Abs(pos1 - pos2);
93 }
94 }
95 pos2++;
96 }
97 pos1++;
98 val1 = val1 + val_min;
99 }
100
101 return (100 - (val1 * 100 / (len1 * val_max)));
102 }
103 catch (Exception)
104 {
105 throw;
106 }
107 }
108 }
109 }