【C#基础】实现URL Unicode编码，编码、解码相关整理

1、Unicode编码　　引用系统 System.Web

using System.Web;

string postdata = "SAMLRequest=" + HttpUtility.UrlEncode(SAMLRequest) + "&RelayState=" + RelayState;

2、Unicode编码　　自己封装的方法

　　　　 //实现URL编码

        public static string UrlEncode(string str)

        {

            StringBuilder sb = new StringBuilder();

            byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默认是System.Text.Encoding.Default.GetBytes(str)

            for (int i = ; i < byStr.Length; i++)

            {

                sb.Append(@"%" + Convert.ToString(byStr[i], ));

            }

            return (sb.ToString());

        }

3、\u50FA\u49AD 转换成 "中国"

　　　　 ///<summary>

        /// \u50FA\u49AD 转换成 "中国"

　　　　 ///\u8eab\u4efd\u9a8c\u8bc1\u5931\u8d25,\u8bf7\u6ce8\u9500\u5e10\u53f7\u540e\u91cd\u65b0\u767b\u5f55

        ///<summary>

        public static string NormalU2C(string input)

        {

            string str = "";

            char[] chArray = input.ToCharArray();

            Encoding bigEndianUnicode = Encoding.BigEndianUnicode;

            for (int i = ; i < chArray.Length; i++)

            {

                char ch = chArray[i];

                if (ch.Equals('\\'))

                {

                    i++;

                    i++;

                    char[] chArray2 = new char[];

                    int index = ;

                    index = ;

                    while ((index < ) && (i < chArray.Length))

                    {

                        chArray2[index] = chArray[i];

                        index++;

                        i++;

                    }

                    if (index == )

                    {

                        try

                        {

                            str = str + UnicodeCode2Str(chArray2);

                        }

                        catch (Exception)

                        {

                            str = str + @"\u";

                            for (int j = ; j < index; j++)

                            {

                                str = str + chArray2[j];

                            }

                        }

                        i--;

                    }

                    else

                    {

                        str = str + @"\u";

                        for (int k = ; k < index; k++)

                        {

                            str = str + chArray2[k];

                        }

                    }

                }

                else

                {

                    str = str + ch.ToString();

                }

            }

            return str;

        }

        ///<summary>

        /// UnicodeCode字节 转换成 "中国"

　　　　 /// 上面的方法引用此方法

        ///<summary>

        public static string UnicodeCode2Str(char[] u4)

        {

            if (u4.Length < )

            {

                throw new Exception("It's not a unicode code array");

            }

            string str = "0123456789ABCDEF";

            char ch = char.ToUpper(u4[]);

            char ch2 = char.ToUpper(u4[]);

            char ch3 = char.ToUpper(u4[]);

            char ch4 = char.ToUpper(u4[]);

            int index = str.IndexOf(ch);

            int num2 = str.IndexOf(ch2);

            int num3 = str.IndexOf(ch3);

            int num4 = str.IndexOf(ch4);

            if (((index == -) || (num2 == -)) || ((num3 == -) || (num4 == -)))

            {

                throw new Exception("It's not a unicode code array");

            }

            byte num5 = (byte)(((index * 0x10) + num2) & 0xff);

            byte num6 = (byte)(((num3 * 0x10) + num4) & 0xff);

            byte[] bytes = new byte[] { num5, num6 };

            return Encoding.BigEndianUnicode.GetString(bytes);

        }

4、网页ASCII转换成Unicode

　　　　//网页ASCII转换成Unicode

        public string HtmlEncoding(string htmltext)

        {

            string text = "";

            IHTMLDocument2 doc = new HTMLDocumentClass();

            doc.write(new object[] { htmltext });

            doc.close();

            text = doc.body.innerText;

            return text;

        }

5、解析html的NCR编码方法

//解析html的NCR编码方法

        public string NCRtoString(string htmltext)

        {

            string result = "";

            try

            {

                RegexHelper.GetMatchStr(htmltext, "<body>(.*?)</body>", out htmltext);

                htmltext = htmltext.Replace("\t", "").Replace("\r", "").Replace("\n", "").Replace(" ", "");

                htmltext = Regex.Replace(htmltext,"<[^>]*>","");

                htmltext = htmltext.Replace("&#x", "\\u").Replace(";", "");

                string[] strlist = htmltext.Replace("\\", "").Split('u');

                for (int i = ; i < strlist.Length; i++)

                {

                    if (strlist[i].Length!=)

                    {

                        strlist[i] = strlist[i].Substring(,);

                    }

                    //将unicode字符转为10进制整数，然后转为char中文字符

                    result += (char)int.Parse(strlist[i], System.Globalization.NumberStyles.HexNumber);

                }

            }

            catch (Exception)

            {

                return "解析html的NCR编码方法异常";

            }

            return result;

        }

6、C#实现escape编码

　　　　 //C#实现escape编码

        public static string UrlEncode(string s)

        {

            StringBuilder sb = new StringBuilder();

            byte[] ba = System.Text.Encoding.Unicode.GetBytes(s);

            for (int i = ; i < ba.Length; i += )

            {

                sb.Append("%25u");

                sb.Append(ba[i + ].ToString("X2"));

                sb.Append(ba[i].ToString("X2"));

            }

            return sb.ToString();

        }

7、将Unicode编码转换为汉字字符串

        /// <summary>

        /// 汉字转换为Unicode编码

        /// </summary>

        /// <param name="str">要编码的汉字字符串</param>

        /// <returns>Unicode编码的的字符串</returns>

        public static string ToUnicode(string str)

        {

            byte[] bts = Encoding.Unicode.GetBytes(str);

            string r = "";

            for (int i = ; i < bts.Length; i += ) r += "\\u" + bts[i + ].ToString("x").PadLeft(, '') + bts[i].ToString("x").PadLeft(, '');

            return r;

        }

        /// <summary>

        /// 将Unicode编码转换为汉字字符串

        /// </summary>

        /// <param name="str">Unicode编码字符串</param>

        /// <returns>汉字字符串</returns>

        public static string ToGB2312(string str)

        {

            string r = "";

            MatchCollection mc = Regex.Matches(str, @"\\u([\w]{2})([\w]{2})", RegexOptions.Compiled | RegexOptions.IgnoreCase);

            byte[] bts = new byte[];

            foreach (Match m in mc)

            {

                bts[] = (byte)int.Parse(m.Groups[].Value, NumberStyles.HexNumber);

                bts[] = (byte)int.Parse(m.Groups[].Value, NumberStyles.HexNumber);

                r += Encoding.Unicode.GetString(bts);

            }

            return r;

        }

秒客网

【C#基础】实现URL Unicode编码，编码、解码相关整理

相关文章