【转】java开源类库pinyin4j的使用

时间:2021-11-20 08:37:22

最近CMS系统为了增加查询的匹配率,需要增加拼音检索字段,在网上找到了pinyin4j的java开源类库,提供中文转汉语拼音(并且支持多音字), 呵呵,看了看他的demo,决定就用它了,因为我在实际使用的时候,需要考虑多音字的不同排列组合,下面的代码就是支持多音字的.

pinyin4j官方网址:http://pinyin4j.sourceforge.net/

maven依赖

<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.0</version>
</dependency>

使用范例

import java.util.HashSet;
import java.util.Set; import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination; public class pinyin4j { /**
* 字符串集合转换字符串(逗号分隔)
* @author wyh
* @param stringSet
* @return
*/
public static String makeStringByStringSet(Set<String> stringSet){
StringBuilder str = new StringBuilder();
int i=0;
for(String s : stringSet){
if(i == stringSet.size() - 1){
str.append(s);
}else{
str.append(s + ",");
}
i++;
}
return str.toString().toLowerCase();
} /**
* 获取拼音集合
* @author wyh
* @param src
* @return Set<String>
*/
public static Set<String> getPinyin(String src){
if(src!=null && !src.trim().equalsIgnoreCase("")){
char[] srcChar ;
srcChar=src.toCharArray();
//汉语拼音格式输出类
HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat(); //输出设置,大小写,音标方式等
hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE)
hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); String[][] temp = new String[src.length()][];
for(int i=0;i<srcChar.length;i++){
char c = srcChar[i];
//是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z)
if(String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")){
try{
temp[i] = PinyinHelper.toHanyuPinyinStringArray(srcChar[i], hanYuPinOutputFormat);
}catch(BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
}else if(((int)c>=65 && (int)c<=90) || ((int)c>=97 && (int)c<=122)){
temp[i] = new String[]{String.valueOf(srcChar[i])};
}else{
temp[i] = new String[]{""};
}
}
String[] pingyinArray = Exchange(temp);
Set<String> pinyinSet = new HashSet<String>();
for(int i=0;i<pingyinArray.length;i++){
pinyinSet.add(pingyinArray[i]);
}
return pinyinSet;
}
return null;
} /**
* 递归
* @author wyh
* @param strJaggedArray
* @return
*/
public static String[] Exchange(String[][] strJaggedArray){
String[][] temp = DoExchange(strJaggedArray);
return temp[0];
} /**
* 递归
* @author wyh
* @param strJaggedArray
* @return
*/
private static String[][] DoExchange(String[][] strJaggedArray){
int len = strJaggedArray.length;
if(len >= 2){
int len1 = strJaggedArray[0].length;
int len2 = strJaggedArray[1].length;
int newlen = len1*len2;
String[] temp = new String[newlen];
int Index = 0;
for(int i=0;i<len1;i++){
for(int j=0;j<len2;j++){
temp[Index] = strJaggedArray[0][i] + strJaggedArray[1][j];
Index ++;
}
}
String[][] newArray = new String[len-1][];
for(int i=2;i<len;i++){
newArray[i-1] = strJaggedArray[i];
}
newArray[0] = temp;
return DoExchange(newArray);
}else{
return strJaggedArray;
}
} /**
* @param args
*/
public static void main(String[] args) {
String str = "单田芳";
System.out.println(makeStringByStringSet(getPinyin(str))); } }

控制台输出:dantianfang,shantianfang,chantianfang