在做网页爬虫时难免会遇到编码乱码或需要编码转换的问题Unicode编码、json编码转中文

时间:2022-10-16 20:16:07

 

Unicode编码转中文

private string NormalU2CC(string str)//”string str”指待传入的字符串
{
string r = "";
MatchCollection mc = Regex.Matches(str, @"\\u([\w]{2})([\w]{2})", RegexOptions.Compiled | RegexOptions.IgnoreCase);
byte[] bts = new byte[2];
foreach (Match m in mc)
{
bts[0] = (byte)int.Parse(m.Groups[2].Value, NumberStyles.HexNumber);
bts[1] = (byte)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber);
r = Encoding.Unicode.GetString(bts);
Regex regex = new Regex(@"\\u" + m.Groups[1].Value + m.Groups[2].Value, RegexOptions.IgnoreCase);
str = regex.Replace(str, r);
}
return str;
}

 

json字符串转中文

/// json字符串转中文需要先将编码转换为Unicode编码之后在通过Unicode编码转为中文
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
private string JsonToUnicode(string input)
{
input = input.Replace("\\\"", "\"");
input = input.Replace("\\/", "/");
string result;
if (!input.Contains("\\u"))
{
result = input;
}
else
{
StringBuilder stringBuilder = new StringBuilder();
if (input.IndexOf("\\u") > 0)
{
stringBuilder.Append(input.Substring(0, input.IndexOf("\\u")));
input = input.Substring(input.IndexOf("\\u"));
}
if (!string.IsNullOrEmpty(input))
{
string[] array = input.Split(new string[] { "\\u" }, StringSplitOptions.RemoveEmptyEntries);
string[] array2 = array;
for (int i = 0; i < array2.Length; i++)
{
string text = array2[i];
if (text.Length > 4)
{
string arg = text.Substring(4);
stringBuilder.Append((char)int.Parse(text.Substring(0, 4), NumberStyles.HexNumber) + arg);
}
else
{
if (text.Length == 4)
{
stringBuilder.Append((char)int.Parse(text, NumberStyles.HexNumber));
}
else
{
if (text.Length < 4 && text.Length > 0)
{
stringBuilder.Append(text);
}
}
}
}
}
result = stringBuilder.ToString();
}
return result;
}