CI框架源码学习笔记7——Utf8.php

时间:2023-03-09 03:28:08
CI框架源码学习笔记7——Utf8.php

愉快的清明节假期结束了,继续回到CI框架学习。这一节我们来看看Utf8.php文件,它主要是用来做utf8编码,废话不多说,上代码。

class CI_Utf8 {

    /**
* Class constructor
*
* Determines if UTF-8 support is to be enabled.
*
* @return void
*/
//构造函数,判断utf8编码是否被支持
public function __construct()
{
if (
defined('PREG_BAD_UTF8_ERROR') // 判断常量是否定义,用来判断PCRE是否支持utf8
&& (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) // 通过常量判断iconv和mbstring是否被安装
&& strtoupper(config_item('charset')) === 'UTF-8' // 通过配置判断当前的应用是否支持utf8
)
{
//支持utf8编码
define('UTF8_ENABLED', TRUE);
log_message('debug', 'UTF-8 Support Enabled');
}
else
{ //不支持utf8编码
define('UTF8_ENABLED', FALSE);
log_message('debug', 'UTF-8 Support Disabled');
} log_message('info', 'Utf8 Class Initialized');
} // -------------------------------------------------------------------- /**
* Clean UTF-8 strings
*
* Ensures strings contain only valid UTF-8 characters.
*
* @param string $str String to clean
* @return string
*/
//清洗字符串,确保字符串中只包含有效的utf8字符
public function clean_string($str)
{
//如果字符串不是ascii编码
if ($this->is_ascii($str) === FALSE)
{
//如果多字节字符串函数重载没有启用,则通过mb_convert_encoding来转换编码
if (MB_ENABLED)
{
$str = mb_convert_encoding($str, 'UTF-8', 'UTF-8');
}
//否则如果iconv安装,那么通过iconv函数转换编码
elseif (ICONV_ENABLED)
{
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
}
} return $str;
} // -------------------------------------------------------------------- /**
* Remove ASCII control characters
*
* Removes all ASCII control characters except horizontal tabs,
* line feeds, and carriage returns, as all others can cause
* problems in XML.
*
* @param string $str String to clean
* @return string
*/
//删除所有在xml中可能导致问题的ASCII码字符,除了水平制表符,换行,回车
public function safe_ascii_for_xml($str)
{
return remove_invisible_characters($str, FALSE);
} // -------------------------------------------------------------------- /**
* Convert to UTF-8
*
* Attempts to convert a string to UTF-8.
*
* @param string $str Input string
* @param string $encoding Input encoding
* @return string $str encoded in UTF-8 or FALSE on failure
*/
//将字符串转换为utf8编码
//注意它与clean_string的不同是,这里是从指定的编码模式转换到utf8的
public function convert_to_utf8($str, $encoding)
{
if (MB_ENABLED)
{
return mb_convert_encoding($str, 'UTF-8', $encoding);
}
elseif (ICONV_ENABLED)
{
return @iconv($encoding, 'UTF-8', $str);
} return FALSE;
} // -------------------------------------------------------------------- /**
* Is ASCII?
*
* Tests if a string is standard 7-bit ASCII or not.
*
* @param string $str String to check
* @return bool
*/
//通过正则判断是否是ascii编码的字符串
public function is_ascii($str)
{
return (preg_match('/[^\x00-\x7F]/S', $str) === 0);
} }