【C#基础】实现URL Unicode编码,编码、解码相关整理

时间:2023-12-20 20:27:56

1、Unicode编码  引用系统 System.Web

using System.Web;
string postdata = "SAMLRequest=" + HttpUtility.UrlEncode(SAMLRequest) + "&RelayState=" + RelayState;

2、Unicode编码  自己封装的方法

     //实现URL编码
public static string UrlEncode(string str)
{
StringBuilder sb = new StringBuilder();
byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默认是System.Text.Encoding.Default.GetBytes(str)
for (int i = ; i < byStr.Length; i++)
{
sb.Append(@"%" + Convert.ToString(byStr[i], ));
} return (sb.ToString());
}

3、\u50FA\u49AD 转换成 "中国"

     ///<summary>
/// \u50FA\u49AD 转换成 "中国"
     ///\u8eab\u4efd\u9a8c\u8bc1\u5931\u8d25,\u8bf7\u6ce8\u9500\u5e10\u53f7\u540e\u91cd\u65b0\u767b\u5f55
///<summary>
public static string NormalU2C(string input)
{
string str = "";
char[] chArray = input.ToCharArray();
Encoding bigEndianUnicode = Encoding.BigEndianUnicode;
for (int i = ; i < chArray.Length; i++)
{
char ch = chArray[i];
if (ch.Equals('\\'))
{
i++;
i++;
char[] chArray2 = new char[];
int index = ;
index = ;
while ((index < ) && (i < chArray.Length))
{
chArray2[index] = chArray[i];
index++;
i++;
}
if (index == )
{
try
{
str = str + UnicodeCode2Str(chArray2);
}
catch (Exception)
{
str = str + @"\u";
for (int j = ; j < index; j++)
{
str = str + chArray2[j];
}
}
i--;
}
else
{
str = str + @"\u";
for (int k = ; k < index; k++)
{
str = str + chArray2[k];
}
}
}
else
{
str = str + ch.ToString();
}
}
return str;
} ///<summary>
/// UnicodeCode字节 转换成 "中国"
     /// 上面的方法引用此方法
///<summary>
public static string UnicodeCode2Str(char[] u4)
{
if (u4.Length < )
{
throw new Exception("It's not a unicode code array");
}
string str = "0123456789ABCDEF";
char ch = char.ToUpper(u4[]);
char ch2 = char.ToUpper(u4[]);
char ch3 = char.ToUpper(u4[]);
char ch4 = char.ToUpper(u4[]);
int index = str.IndexOf(ch);
int num2 = str.IndexOf(ch2);
int num3 = str.IndexOf(ch3);
int num4 = str.IndexOf(ch4);
if (((index == -) || (num2 == -)) || ((num3 == -) || (num4 == -)))
{
throw new Exception("It's not a unicode code array");
}
byte num5 = (byte)(((index * 0x10) + num2) & 0xff);
byte num6 = (byte)(((num3 * 0x10) + num4) & 0xff);
byte[] bytes = new byte[] { num5, num6 };
return Encoding.BigEndianUnicode.GetString(bytes);
}

4、网页ASCII转换成Unicode

    //网页ASCII转换成Unicode
public string HtmlEncoding(string htmltext)
{
string text = "";
IHTMLDocument2 doc = new HTMLDocumentClass();
doc.write(new object[] { htmltext });
doc.close();
text = doc.body.innerText;
return text;
}

5、解析html的NCR编码方法

//解析html的NCR编码方法
public string NCRtoString(string htmltext)
{
string result = "";
try
{
RegexHelper.GetMatchStr(htmltext, "<body>(.*?)</body>", out htmltext);
htmltext = htmltext.Replace("\t", "").Replace("\r", "").Replace("\n", "").Replace(" ", "");
htmltext = Regex.Replace(htmltext,"<[^>]*>","");
htmltext = htmltext.Replace("&#x", "\\u").Replace(";", "");
string[] strlist = htmltext.Replace("\\", "").Split('u');
for (int i = ; i < strlist.Length; i++)
{
if (strlist[i].Length!=)
{
strlist[i] = strlist[i].Substring(,);
}
//将unicode字符转为10进制整数,然后转为char中文字符
result += (char)int.Parse(strlist[i], System.Globalization.NumberStyles.HexNumber);
}
}
catch (Exception)
{
return "解析html的NCR编码方法异常";
}
return result;
}

6、C#实现escape编码

     //C#实现escape编码
public static string UrlEncode(string s)
{
StringBuilder sb = new StringBuilder();
byte[] ba = System.Text.Encoding.Unicode.GetBytes(s);
for (int i = ; i < ba.Length; i += )
{
sb.Append("%25u");
sb.Append(ba[i + ].ToString("X2"));
sb.Append(ba[i].ToString("X2"));
}
return sb.ToString();
}

7、将Unicode编码转换为汉字字符串

        /// <summary>

        /// 汉字转换为Unicode编码

        /// </summary>

        /// <param name="str">要编码的汉字字符串</param>

        /// <returns>Unicode编码的的字符串</returns>

        public static string ToUnicode(string str)
{ byte[] bts = Encoding.Unicode.GetBytes(str); string r = ""; for (int i = ; i < bts.Length; i += ) r += "\\u" + bts[i + ].ToString("x").PadLeft(, '') + bts[i].ToString("x").PadLeft(, ''); return r; } /// <summary> /// 将Unicode编码转换为汉字字符串 /// </summary> /// <param name="str">Unicode编码字符串</param> /// <returns>汉字字符串</returns> public static string ToGB2312(string str)
{ string r = ""; MatchCollection mc = Regex.Matches(str, @"\\u([\w]{2})([\w]{2})", RegexOptions.Compiled | RegexOptions.IgnoreCase); byte[] bts = new byte[]; foreach (Match m in mc)
{ bts[] = (byte)int.Parse(m.Groups[].Value, NumberStyles.HexNumber); bts[] = (byte)int.Parse(m.Groups[].Value, NumberStyles.HexNumber); r += Encoding.Unicode.GetString(bts); } return r; }