``` public static function getResourceByTemp($html) { $attr = ['title', 'alt', 'placeholder'];//指定html属性上的文本 $getAttrArr = []; foreach ($attr as $k => $v) { preg_match_all("/{$v}=\"(.*?)\"/", $html, $get); if (!empty($get['1'])) { $getAttrArr = array_unique(array_merge($getAttrArr, $get['1'])); } } $html = preg_replace("/<!--.*?-->/is", '', $html); //删除注释 $html = preg_replace("/<style.*?>.*?<\/style>/is", '', $html); //删除style标签 $html = preg_replace("/<script.*?>.*?<\/script>/is", '', $html); //删除js标签 $separator = '::#::myself::#::'; //自己设定的,特有的分隔符 $html = preg_replace("/<.*?>/is", $separator, $html); $arr = array_filter(explode($separator, $html)); if (!empty($getAttrArr)) { $arr = array_filter(array_unique(array_merge($arr, $getAttrArr))); } $array = []; foreach ($arr as $k => $v) { $v = trim($v); if (!empty($v)) { if (strpos(PHP_EOL, $v)) { //有些字符里面会还有换行,再分析一次并去掉字符前后的空格。 $tmp = explode(PHP_EOL, $v); foreach ($tmp as $val) { $val = trim($val); if (!empty($val)) { $array[] = $val; } } } else { //如果没有换行,直接赋值 $array[] = $v; } } } foreach ($array as $k => $v) { //去掉纯数字的元素 if (is_numeric($v)) { unset($array[$k]); } //去掉纯符号的元素 $pregStr = preg_replace("/[\x{4e00}-\x{9fa5}A-Za-z0-9]/u", '', $v); if ($v == $pregStr) { unset($array[$k]); } //去掉类似&#xe61b;这种图标字符 if (strpos($v, '&#x') !== false && (strlen($v) == 7 || strlen($v) == 8)) { unset($array[$k]); } } $result = []; $array = array_unique($array); foreach ($array as $v) { $result[] = ['text' => trim($v), 'len' => mb_strlen($v)]; } array_multisort(array_column($result, 'len'), SORT_DESC, $result); //按字符长度倒序 return $result; } ``` ``` //获取页面的title,keyword,description public static function get_sitemeta($data) { if (self::isUrl($data)) { $data = file_get_contents($url); } $meta = array(); if (!empty($data)) { #Title preg_match('/<TITLE>([\w\W]*?)<\/TITLE>/si', $data, $matches); if (!empty($matches[1])) { $meta['title'] = $matches[1]; } #Keywords preg_match('/<META\s+name="keywords"\s+content="([\w\W]*?)"/si', $data, $matches); if (empty($matches[1])) { preg_match("/<META\s+name='keywords'\s+content='([\w\W]*?)'/si", $data, $matches); } if (empty($matches[1])) { preg_match('/<META\s+content="([\w\W]*?)"\s+name="keywords"/si', $data, $matches); } if (empty($matches[1])) { preg_match('/<META\s+http-equiv="keywords"\s+content="([\w\W]*?)"/si', $data, $matches); } if (!empty($matches[1])) { $meta['keywords'] = $matches[1]; } #Description preg_match('/<META\s+name="description"\s+content="([\w\W]*?)"/si', $data, $matches); if (empty($matches[1])) { preg_match("/<META\s+name='description'\s+content='([\w\W]*?)'/si", $data, $matches); } if (empty($matches[1])) { preg_match('/<META\s+content="([\w\W]*?)"\s+name="description"/si', $data, $matches); } if (empty($matches[1])) { preg_match('/<META\s+http-equiv="description"\s+content="([\w\W]*?)"/si', $data, $matches); } if (!empty($matches[1])) { $meta['description'] = $matches[1]; } } return $meta; } ```