维吾尔语基本区转换扩展区(2) · 开发者必备的知识

~~~ <?php /** * Created by nur.cn * info: Uyghur_convert * Date: 2016/08/12 * Time: 15:51 */ class Uyghur_convert { private static $uyghur_harp = array("ئ", "ب", "پ", "ت", "ج", "چ", "خ", "س", "ش", "غ", "ف", "ق", "ك", "گ", "ڭ", "ل", "م", "ن", "ھ", "ې", "ى", "ي"); private static $uyghur_all = array("ئ", "ا", "ب", "ە", "پ", "ت", "ج", "چ", "خ", "د", "ر", "ز", "ژ", "س", "ش", "غ", "ف", "ق", "ك", "گ", "ڭ", "ل", "م", "ن", "ھ", "و", "ۇ", "ۆ", "ۈ", "ۋ", "ې", "ى", "ي"); private static $letters = array( "ا" => array("character" => "ا", "isoGlyph" => "ﺍ", "iniGlyph" => "ﺍ", "midGlyph" => "ﺎ", "endGlyph" => "ﺎ"), "ە" => array("character" => "ە", "isoGlyph" => "ﻩ", "iniGlyph" => "ﻩ", "midGlyph" => "ﻩ", "endGlyph" => "ﻪ"), "ب" => array("character" => "ب", "isoGlyph" => "ﺏ", "iniGlyph" => "ﺑ", "midGlyph" => "ﺒ", "endGlyph" => "ﺐ"), "پ" => array("character" => "پ", "isoGlyph" => "ﭖ", "iniGlyph" => "ﭘ", "midGlyph" => "ﭙ", "endGlyph" => "ﭗ"), "ت" => array("character" => "ت", "isoGlyph" => "ﺕ", "iniGlyph" => "ﺗ", "midGlyph" => "ﺘ", "endGlyph" => "ﺖ"), "ج" => array("character" => "ج", "isoGlyph" => "ﺝ", "iniGlyph" => "ﺟ", "midGlyph" => "ﺠ", "endGlyph" => "ﺞ"), "چ" => array("character" => "چ", "isoGlyph" => "ﭺ", "iniGlyph" => "ﭼ", "midGlyph" => "ﭽ", "endGlyph" => "ﭻ"), "خ" => array("character" => "خ", "isoGlyph" => "ﺥ", "iniGlyph" => "ﺧ", "midGlyph" => "ﺨ", "endGlyph" => "ﺦ"), "د" => array("character" => "د", "isoGlyph" => "ﺩ", "iniGlyph" => "ﺩ", "midGlyph" => "ﺪ", "endGlyph" => "ﺪ"), "ر" => array("character" => "ر", "isoGlyph" => "ﺭ", "iniGlyph" => "ﺭ", "midGlyph" => "ﺮ", "endGlyph" => "ﺮ"), "ز" => array("character" => "ز", "isoGlyph" => "ﺯ", "iniGlyph" => "ﺯ", "midGlyph" => "ﺰ", "endGlyph" => "ﺰ"), "ژ" => array("character" => "ژ", "isoGlyph" => "ﮊ", "iniGlyph" => "ﮊ", "midGlyph" => "ﮋ", "endGlyph" => "ﮋ"), "س" => array("character" => "س", "isoGlyph" => "ﺱ", "iniGlyph" => "ﺳ", "midGlyph" => "ﺴ", "endGlyph" => "ﺲ"), "ش" => array("character" => "ش", "isoGlyph" => "ﺵ", "iniGlyph" => "ﺷ", "midGlyph" => "ﺸ", "endGlyph" => "ﺶ"), "غ" => array("character" => "غ", "isoGlyph" => "ﻍ", "iniGlyph" => "ﻏ", "midGlyph" => "ﻐ", "endGlyph" => "ﻎ"), "ق" => array("character" => "ق", "isoGlyph" => "ﻕ", "iniGlyph" => "ﻗ", "midGlyph" => "ﻘ", "endGlyph" => "ﻖ"), "ف" => array("character" => "ف", "isoGlyph" => "ﻑ", "iniGlyph" => "ﻓ", "midGlyph" => "ﻔ", "endGlyph" => "ﻒ"), "ك" => array("character" => "ك", "isoGlyph" => "ﻙ", "iniGlyph" => "ﻛ", "midGlyph" => "ﻜ", "endGlyph" => "ﻚ"), "گ" => array("character" => "گ", "isoGlyph" => "ﮒ", "iniGlyph" => "ﮔ", "midGlyph" => "ﮕ", "endGlyph" => "ﮓ"), "ڭ" => array("character" => "ڭ", "isoGlyph" => "ﯓ", "iniGlyph" => "ﯕ", "midGlyph" => "ﯖ", "endGlyph" => "ﯔ"), "ل" => array("character" => "ل", "isoGlyph" => "ﻝ", "iniGlyph" => "ﻟ", "midGlyph" => "ﻠ", "endGlyph" => "ﻞ"), "م" => array("character" => "م", "isoGlyph" => "ﻡ", "iniGlyph" => "ﻣ", "midGlyph" => "ﻤ", "endGlyph" => "ﻢ"), "ن" => array("character" => "ن", "isoGlyph" => "ﻥ", "iniGlyph" => "ﻧ", "midGlyph" => "ﻨ", "endGlyph" => "ﻦ"), "ھ" => array("character" => "ھ", "isoGlyph" => "ﮪ", "iniGlyph" => "ﮪ", "midGlyph" => "ﮭ", "endGlyph" => "ﮭ"), "و" => array("character" => "و", "isoGlyph" => "ﻭ", "iniGlyph" => "ﻭ", "midGlyph" => "ﻮ", "endGlyph" => "ﻮ"), "ۇ" => array("character" => "ۇ", "isoGlyph" => "ﯗ", "iniGlyph" => "ﯗ", "midGlyph" => "ﯘ", "endGlyph" => "ﯘ"), "ۆ" => array("character" => "ۆ", "isoGlyph" => "ﯙ", "iniGlyph" => "ﯙ", "midGlyph" => "ﯚ", "endGlyph" => "ﯚ"), "ۈ" => array("character" => "ۈ", "isoGlyph" => "ﯛ", "iniGlyph" => "ﯛ", "midGlyph" => "ﯜ", "endGlyph" => "ﯜ"), "ۋ" => array("character" => "ۋ", "isoGlyph" => "ﯞ", "iniGlyph" => "ﯞ", "midGlyph" => "ﯟ", "endGlyph" => "ﯟ"), "ې" => array("character" => "ې", "isoGlyph" => "ﯤ", "iniGlyph" => "ﯦ", "midGlyph" => "ﯧ", "endGlyph" => "ﯥ"), "ى" => array("character" => "ى", "isoGlyph" => "ﻯ", "iniGlyph" => "ﯨ", "midGlyph" => "ﯩ", "endGlyph" => "ﻰ"), "ي" => array("character" => "ي", "isoGlyph" => "ﻱ", "iniGlyph" => "ﻳ", "midGlyph" => "ﻴ", "endGlyph" => "ﻲ"), "ئ" => array("character" => "ئ", "isoGlyph" => "ﺋ", "iniGlyph" => "ﺋ", "midGlyph" => "ﺌ", "endGlyph" => "ﺌ"), ); /** * 扩展区转 * @param $text * @return mixed */ public static function asasiy($text) { $text = str_replace(array('ا', 'ﺍ', 'ﺍ', 'ﺎ', 'ﺎ'), "ا", $text); $text = str_replace(array('ە', 'ﻩ', 'ﻩ', 'ﻩ', 'ﻪ'), "ە", $text); $text = str_replace(array('ب', 'ﺏ', 'ﺑ', 'ﺒ', 'ﺐ'), "ب", $text); $text = str_replace(array('پ', 'ﭖ', 'ﭘ', 'ﭙ', 'ﭗ'), "پ", $text); $text = str_replace(array('ت', 'ﺕ', 'ﺗ', 'ﺘ', 'ﺖ'), "ت", $text); $text = str_replace(array('ج', 'ﺝ', 'ﺟ', 'ﺠ', 'ﺞ'), "ج", $text); $text = str_replace(array('چ', 'ﭺ', 'ﭼ', 'ﭽ', 'ﭻ'), "چ", $text); $text = str_replace(array('خ', 'ﺥ', 'ﺧ', 'ﺨ', 'ﺦ'), "خ", $text); $text = str_replace(array('د', 'ﺩ', 'ﺩ', 'ﺪ', 'ﺪ'), "د", $text); $text = str_replace(array('ر', 'ﺭ', 'ﺭ', 'ﺮ', 'ﺮ'), "ر", $text); $text = str_replace(array('ز', 'ﺯ', 'ﺯ', 'ﺰ', 'ﺰ'), "ز", $text); $text = str_replace(array('ژ', 'ﮊ', 'ﮊ', 'ﮋ', 'ﮋ'), "ژ", $text); $text = str_replace(array('س', 'ﺱ', 'ﺳ', 'ﺴ', 'ﺲ'), "س", $text); $text = str_replace(array('ش', 'ﺵ', 'ﺷ', 'ﺸ', 'ﺶ'), "ش", $text); $text = str_replace(array('غ', 'ﻍ', 'ﻏ', 'ﻐ', 'ﻎ'), "غ", $text); $text = str_replace(array('ق', 'ﻕ', 'ﻗ', 'ﻘ', 'ﻖ'), "ق", $text); $text = str_replace(array('ف', 'ﻑ', 'ﻓ', 'ﻔ', 'ﻒ'), "ف", $text); $text = str_replace(array('ك', 'ﻙ', 'ﻛ', 'ﻜ', 'ﻚ'), "ك", $text); $text = str_replace(array('گ', 'ﮒ', 'ﮔ', 'ﮕ', 'ﮓ'), "گ", $text); $text = str_replace(array('ڭ', 'ﯓ', 'ﯕ', 'ﯖ', 'ﯔ'), "ڭ", $text); $text = str_replace(array('ل', 'ﻝ', 'ﻟ', 'ﻠ', 'ﻞ'), "ل", $text); $text = str_replace(array('م', 'ﻡ', 'ﻣ', 'ﻤ', 'ﻢ'), "م", $text); $text = str_replace(array('ن', 'ﻥ', 'ﻧ', 'ﻨ', 'ﻦ'), "ن", $text); $text = str_replace(array('ھ', 'ﮪ', 'ﮪ', 'ﮭ', 'ﮭ'), "ھ", $text); $text = str_replace(array('و', 'ﻭ', 'ﻭ', 'ﻮ', 'ﻮ'), "و", $text); $text = str_replace(array('ۇ', 'ﯗ', 'ﯗ', 'ﯘ', 'ﯘ'), "ۇ", $text); $text = str_replace(array('ۆ', 'ﯙ', 'ﯙ', 'ﯚ', 'ﯚ'), "ۆ", $text); $text = str_replace(array('ۈ', 'ﯛ', 'ﯛ', 'ﯜ', 'ﯜ'), "ۈ", $text); $text = str_replace(array('ۋ', 'ﯞ', 'ﯞ', 'ﯟ', 'ﯟ'), "ۋ", $text); $text = str_replace(array('ې', 'ﯤ', 'ﯦ', 'ﯧ', 'ﯥ'), "ې", $text); $text = str_replace(array('ى', 'ﻯ', 'ﯨ', 'ﯩ', 'ﻰ'), "ى", $text); $text = str_replace(array('ي', 'ﻱ', 'ﻳ', 'ﻴ', 'ﻲ'), "ي", $text); $text = str_replace(array('ئ', 'ﺋ', 'ﺋ', 'ﺌ', 'ﺌ'), "ئ", $text); $text = str_replace(array('ﻻ', 'ﻼ'), "لا", $text); $text = str_replace('：', " : ", $text); $text = str_replace('…', "...", $text); return $text; } /** * ئۇيغۇرچە ئاساسىي رايوننى ULY كودىغا ئالماشتۇرۇش ئۇسۇلى * @param $text * @return mixed|string */ public static function ULYStr($text) { if (!$text) { return ''; } $text = " " . $text; $uy = array(" ئا", " ئە", " ئې", " ئى", " ئو", " ئۇ", " ئۆ", " ئۈ", "ا", "ە", "ې", "ى", "و", "ۇ", "ۆ", "ۈ", "ش", "ڭ", "غ", "چ", "ب", "د", "ف", "گ", "ھ", "ج", "ك", "ل", "م", "ن", "پ", "ق", "ر", "س", "ت", "ۋ", "ي", "ز", "خ", "ژ", "،", "؟", "؛"); $uly = array(" a", " e", " é", " i", " o", " u", " ö", " ü", "a", "e", "é", "i", "o", "u", "ö", "ü", "sh", "ng", "gh", "ch", "b", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "w", "y", "z", "x", "J", ",", "?", ";"); $text = str_replace($uy, $uly, $text); return $text; } /** * Gets the current character from a UTF-8 string * * Returns a substitution character if the first byte is invalid. * Expecting a valid UTF-8 string. Does not check if the bytes following * the first one are valid. * * @param string $string the UTF-8 string * @param integer &$pos the current byte position within the UTF-8 string, * the position is updated to the next character on exit * @param integer $length the length of the UTF-8 string * @param boolean $lookahead update the position to the next UTF-8 character * if true, leaves it unchanged if true * @param string $invalid the ASCII character replacing an invalid byte, e.g. "?", * invalid bytes are silently ignored if null * @return string the UTF-8 character, or false if there are * no more characters to get * @access public */ private static function getChar($string, &$pos, $length, $lookahead = false, $invalid = '?') { if ($pos >= $length) { // no more character to read return false; } // saves the current character position if lookahead $lookahead and $copy = $pos; // gets the first byte $char = $string{$pos++}; if ($char < "\x80") { // a 1-byte character } else if ($char < "\xC0") { // error: invalid as a first byte $char = $invalid; } else if ($char < "\xE0") { // a 2-byte character $char .= $string{$pos++}; } else if ($char < "\xF0") { // a 3-byte character $char .= substr($string, $pos, 2); $pos += 2; } else if ($char < "\xF8") { // a 4-byte character $char .= substr($string, $pos, 3); $pos += 3; } else { // error: out of range as a first byte $char = $invalid; } // restores the current character position if lookahead $lookahead and $pos = $copy; return $char; } private static function checkLinkBefore($currentChar, $beforeChar) { return in_array($beforeChar, self::$uyghur_harp); } private static function checkLinkAfter($currentChar, $afterChar) { if (strcmp($currentChar, "ە") == 0) { return false; } return in_array($afterChar, self::$uyghur_all); } private static function unicode_convert($string) { $result = ""; $La = ""; $length_of_the_string = strlen($string); $postionBefore = 0; $postionMiddle = 0; $position = 0; $i = 0; $canLinkBefore = false; $canLinkAfter = false; $charaterBefore = ""; $charaterMiddle = ""; $charaterAfter = ""; do { $postionBefore = $postionMiddle; $postionMiddle = $position; $charaterAfter = self::getChar($string, $position, $length_of_the_string, false, null); if (isset(self::$letters[$charaterMiddle])) { $letter = self::$letters[$charaterMiddle]; if ($postionBefore == 0) { $canLinkBefore = false; } else { $canLinkBefore = self::checkLinkBefore($charaterMiddle, $charaterBefore); } if ($position == "end or string") { $canLinkAfter = false; } else { $canLinkAfter = self::checkLinkAfter($charaterMiddle, $charaterAfter); } if (strcmp($charaterMiddle, "ا") == 0 && (strcmp($La, "ل") == 0 || strcmp($La, "ﻝ") == 0 || strcmp($La, "ﻟ") == 0)) { $LaPosition = strlen($result) - strlen($La); $result = substr_replace($result, "ﻻ", $LaPosition); } else if (strcmp($charaterMiddle, "ا") == 0 && (strcmp($La, "ل") == 0 || strcmp($La, "ﻠ") == 0 || strcmp($La, "ﻞ") == 0)) { $LaPosition = strlen($result) - strlen($La); $result = substr_replace($result, "ﻼ", $LaPosition); } else if ($canLinkBefore && $canLinkAfter) { $result .= $letter['midGlyph']; $La = $letter['midGlyph']; } else if (!$canLinkBefore && !$canLinkAfter) { $result .= $letter['isoGlyph']; $La = $letter['isoGlyph']; } else if ($canLinkBefore && !$canLinkAfter) { $result .= $letter['endGlyph']; $La = $letter['endGlyph']; } else if (!$canLinkBefore && $canLinkAfter) { $result .= $letter['iniGlyph']; $La = $letter['iniGlyph']; } } else { $result .= $charaterMiddle; $La = $charaterMiddle; } $charaterBefore = $charaterMiddle; $charaterMiddle = $charaterAfter; } while ($postionMiddle < $length_of_the_string); return $result; } public static function html_convert($string) { //return self::unicode_convert($string); return html_entity_decode(self::unicode_convert($string), ENT_NOQUOTES, "UTF-8"); } /** * كېڭەيتىلگەن رايۇن ئالماشتۇرۇش * @param $string * @return string */ public static function html_convert_words($string) { $words_text = ""; /* words_text */ foreach ($list = explode("\n", $string) as $words) { $words_text .= self::html_convert($words) . "\n"; } /*foreach*/ return $words_text; } /** * 过滤有些字符串 * @param $words_text * @return mixed */ public static function html_filter_convert($words_text) { $words_text = self::html_convert_words($words_text); return str_replace(array('،', '؟', '؛', 'ـ'), array('،', '؟', ' ', ''), $words_text); } public static function html_convert_format($words_text) { $words_text = self::html_convert($words_text); return str_replace(array('،', '؟', '؛', 'ـ'), array('،', '؟', ' ', ''), $words_text); } /***************************************/ /***************************************/ /***************************************/ /** * utf8 split 实现 * @param $str * @param int $split_len * @return array|bool */ private static function utf8_str_split($str, $split_len = 1) { if (!preg_match('/^[0-9]+$/', $split_len) || $split_len < 1) { return FALSE; } $len = mb_strlen($str, 'UTF-8'); if ($len <= $split_len) { return array($str); } preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar); return $ar[0]; } /** * utf8 text convert ps * @param $string * @return string */ public static function utf8_ps_reverse($string) { $content = ''; /*返回内容*/ foreach (explode(" ", $string) as $key => $value) { $content .= ' ' . implode("", array_reverse(self::utf8_str_split($value))); }/* foreach */ return trim($content); } /** * 内容转photoshop文本模式 * @param string $ThisText 字符串 * @param boolean $convert 转换 * @return null */ public static function convert_to_ps($ThisText, $convert = true) { if ($convert === true) { $ThisText = self::html_convert($ThisText); } /* end */ $ThisText = self::_ReverseString($ThisText); return self::_ReverseAscii($ThisText); } /** * @param $source * @return string */ private static function _ReverseString($source) { return implode("", array_reverse(self::utf8_str_split($source))); } /** * @param $source * @return string|string[]|null */ private static function _ReverseAscii($source) { return preg_replace_callback("/([^\x{FB00}-\x{FEFF}\s]+)/u", function ($word) { return self::_ReverseString($word[0]); }, $source); } /** * GD库生成图片中文自动换行 * @param string $fontsize 字体大小 * @param string $angle 角度 * @param string $fontface 字体名称 * @param string $string 字符串 * @param string $width 预设宽度 * @return string */ public static function auto_wrap($fontsize, $angle, $fontface, $string, $width) { $content = ""; $letter = self::utf8_str_split($string); foreach ($letter as $l) { $teststr = $content . "" . $l; $testbox = imagettfbbox($fontsize, $angle, $fontface, $teststr); /* 判断拼接后的字符串是否超过预设的宽度 */ if (($testbox[2] > $width) && ($content !== "")) { $content .= "\n"; } /* end */ $content .= $l; } /* foreach */ $content = mb_convert_encoding($content, "html-entities", "utf-8"); return $content; } } ~~~