1分钟部署网站📞AI智能客服,大模型训练自有数据,简单好用,有效降低客服成本 广告
``` /*谷歌浏览器*/ namespace util; use WebSocket\Client; class ChromeSocket{ protected $filename; protected $timeout=30; protected $host; protected $port; protected $address; protected $socket; public $tab; static protected $passType=array('stylesheet','image','media','font'); public function __construct($host,$port,$timeout=30,$filename=''){ $this->host=empty($host)?'127.0.0.1':$host; $port=intval($port); $this->port=empty($port)?9222:$port; $this->address=$this->host.($this->port?(':'.$this->port):''); $timeout=intval($timeout); $this->timeout=$timeout<=0?30:$timeout; $this->filename=$filename?$filename:'chrome'; } public function __destruct(){ if(!empty($this->tab)){ $this->closeTab($this->tab['id']); } } /*检查服务器是否开启*/ public function hostIsOpen(){ $data=get_html($this->address.'/json/version',null,array('timeout'=>5)); $data=json_decode($data,true); if(!empty($data)&&!empty($data['webSocketDebuggerUrl'])){ return true; } return false; } /*开启谷歌服务器*/ public function openHost(){ if(!in_array(strtolower($this->host),array('localhost','127.0.0.1','0.0.0.0'))){ return; } $command=$this->filename; if(empty($command)){ $command='chrome'; }else{ if(IS_WIN){ if(file_exists($command)){ $command='"'.$command.'"'; } } } $commandStr=sprintf('%s --headless --remote-debugging-port=%s',$command,$this->port); if(!function_exists('proc_open')){ throw new \Exception('请开启proc_open函数或者手动执行命令:'.$commandStr); } $descriptorspec = array( 0 => array('pipe', 'r'), 1 => array('pipe', 'w'), 2 => array('pipe', 'w') ); $pipes=array(); $handle=proc_open($commandStr,$descriptorspec,$pipes); $hdStatus=proc_get_status($handle); fclose($pipes[0]); fclose($pipes[1]); fclose($pipes[2]); } /*握手浏览器*/ public function websocket($url='',$headers=array(),$options=array()){ $headers=is_array($headers)?$headers:array(); $headers=array_change_key_case($headers,CASE_LOWER); $options=is_array($options)?$options:array(); $options['timeout']=$options['timeout']>0?$options['timeout']:$this->timeout; if(!empty($headers)){ $options['headers']=is_array($options['headers'])?$options['headers']:array(); $options['headers']=array_merge($options['headers'],$headers); } if(empty($url)){ $url=$this->tab['webSocketDebuggerUrl']; } $this->socket=new Client($url,$options); } /*发送数据*/ public function send($method,$params=array(),$id=0){ if(empty($id)){ static $no=1; $no++; $id=$no; } $data=array( 'id'=>$id, 'method'=>$method, 'params'=>$params ); $this->socket->send(json_encode($data)); return $data; } /*获取渲染的页面*/ public function getRenderHtml($url,$headers=array(),$options=array(),$fromEncode=null,$postData=null){ if(!preg_match('/^\w+\:\/\//', $url)){ $url='http://'.$url; } $this->send('Network.enable'); if(!empty($headers)){ foreach ($headers as $k=>$v){ if(strcasecmp($k, 'cookie')==0){ $this->send('Network.clearBrowserCookies'); break; } } $this->send('Network.setExtraHTTPHeaders',array('headers'=>$headers)); } $this->send('Network.setRequestInterception',array('patterns'=>array( array('urlPattern'=>'*','interceptionStage'=>'Request') ))); if(!empty($options['proxy'])){ } $this->send('Page.enable'); if(isset($postData)){ if(!is_array($postData)){ if(preg_match_all('/([^\&]+?)\=([^\&]*)/', $postData,$m_post_data)){ $new_post_data=array(); foreach($m_post_data[1] as $k=>$v){ $new_post_data[$v]=rawurldecode($m_post_data[2][$k]); } $postData=$new_post_data; }else{ $postData=array(); } } $formHtml=''; foreach ($postData as $k=>$v){ $formHtml.='<input type="text" name="'.$k.'" value="'.addslashes($v).'">'; } $postForm='var postForm=document.createElement("form");'; if(!empty($postData)&&!empty($fromEncode)&&!in_array(strtolower($fromEncode),array('auto','utf-8','utf8'))){ $postForm.='postForm.acceptCharset="'.$fromEncode.'";'; } $postForm.='postForm.method="post";' .'postForm.action="'.$url.'";' .'postForm.innerHTML=\''.$formHtml.'\';' .'document.documentElement.appendChild(postForm);' .'postForm.submit();'; $sendData=$this->send('Runtime.evaluate',array('expression'=>$postForm)); }else{ $sendData=$this->send('Page.navigate',array('url'=>$url)); } $complete=false; $startTime=time(); while((time()-$startTime)<=$this->timeout){ $data=$this->receive(); if(!$data){ break; } if($data['method']=='Page.loadEventFired'){ $complete=true; break; }elseif($data['method']=='Network.requestIntercepted'){ $ncParams=array('interceptionId'=>$data['params']['interceptionId']); if(in_array(strtolower($data['params']['resourceType']),self::$passType)){ $ncParams['errorReason']='Aborted'; } $this->send('Network.continueInterceptedRequest',$ncParams); } } if($complete){ $sendData=$this->send('Runtime.evaluate',array('expression'=>'document.documentElement.outerHTML')); $data=$this->receiveById($sendData['id'],false); $data=$data['result']['result']['value']; if(preg_match('/^\{(.+\:.+,*){1,}\}$/', strip_tags($data))){ $data=strip_tags($data); } return $data; } return null; } /*接收数据帧*/ public function receive(){ try { $data=$this->socket->receive(); }catch (\Exception $ex){ $data=null; } return $data?json_decode($data,true):null; } /*接收id相应的数据*/ public function receiveById($id,$returnAll=false){ $startTime=time(); $complete=false; $result=null; $all=array(); while((time()-$startTime)<=$this->timeout){ $data=$this->receive(); if(!$data){ break; } if($data['id']==$id){ $result=$data; break; } if($data['method']=='Network.requestIntercepted'){ $ncParams=array('interceptionId'=>$data['params']['interceptionId']); if(in_array(strtolower($data['params']['resourceType']),self::$passType)){ $ncParams['errorReason']='Aborted'; } $this->send('Network.continueInterceptedRequest',$ncParams); } if($returnAll){ $all[]=$data; } } if($returnAll){ return array('all'=>$all,'result'=>$result); }else{ return $result; } } /*获取所有标签页*/ public function getTabs(){ $data=get_html($this->address.'/json'); $data=empty($data)?array():json_decode($data,true); return $data; } /*新建空白标签页*/ public function newTab(){ $data=get_html($this->address.'/json/new'); $data=empty($data)?array():json_decode($data,true); $this->tab=$data; return $data; } /*关闭标签页*/ public function closeTab($id){ get_html($this->address.'/json/close/'.$id,null,array('timeout'=>1)); } } /** * 获取html代码 * @param string $url * @param string $headers 键值对形式 * @param array $options * @param string $fromEncode * @param array $post_data 通过isset判断是否是post模式 */ function get_html($url,$headers=null,$options=array(),$fromEncode='auto',$post_data=null){ $headers=is_array($headers)?$headers:array(); $options=is_array($options)?$options:array(); if(!isset($options['useragent'])){ $options['useragent']='Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70 Safari/537.36'; } $options['timeout']=$options['timeout']>0?$options['timeout']:30; $curlHeaders=array(); foreach ($headers as $k=>$v){ $curlHeaders[]=$k.': '.$v; } $headers=$curlHeaders; unset($curlHeaders); if(!preg_match('/^\w+\:\/\//', $url)){ $url='http://'.$url; } $curl=null; try { if(!isset($post_data)){ $allow_get=true; if(!empty($options['max_bytes'])){ $max_bytes=intval($options['max_bytes']); unset($options['max_bytes']); $curl=\util\Curl::head($url,$headers,$options); if(preg_match('/\bContent-Length\s*:\s*(\d+)/i', $curl->header,$contLen)){ $contLen=intval($contLen[1]); if($contLen>=$max_bytes){ $allow_get=false; } } } if($allow_get){ $curl=\util\Curl::get($url,$headers,$options); }else{ $curl=null; } }else{ if(!empty($post_data)&&!empty($fromEncode)&&!in_array(strtolower($fromEncode), array('auto','utf-8','utf8'))){ if(!is_array($post_data)){ if(preg_match_all('/([^\&]+?)\=([^\&]*)/',$post_data,$m_post_data)){ $new_post_data=array(); foreach($m_post_data[1] as $k=>$v){ $new_post_data[$v]=rawurldecode($m_post_data[2][$k]); } $post_data=$new_post_data; }else{ $post_data=array(); } } $post_data=is_array($post_data)?$post_data:array(); foreach ($post_data as $k=>$v){ $post_data[$k] = iconv ( 'utf-8', $fromEncode.'//IGNORE', $v ); } } $curl=\util\Curl::post($url,$headers,$options,$post_data); } } catch (\Exception $e) { $curl=null; } $html=null; if(!empty($curl)){ if($curl->isOk){ $html=$curl->body; if ($fromEncode == 'auto') { $htmlCharset=''; if(preg_match ( '/<meta[^<>]*?content=[\'\"]text\/html\;\s*charset=(?P<charset>[^\'\"\<\>]+?)[\'\"]/i', $html, $htmlCharset ) || preg_match ( '/<meta[^<>]*?charset=[\'\"](?P<charset>[^\'\"\<\>]+?)[\'\"]/i', $html, $htmlCharset )){ $htmlCharset=strtolower(trim($htmlCharset['charset'])); if('utf8'==$htmlCharset){ $htmlCharset='utf-8'; } }else{ $htmlCharset=''; } $headerCharset=''; if(preg_match('/\bContent-Type\s*:[^\r\n]*charset=(?P<charset>[\w\-]+)/i', $curl->header,$headerCharset)){ $headerCharset=strtolower(trim($headerCharset['charset'])); if('utf8'==$headerCharset){ $headerCharset='utf-8'; } }else{ $headerCharset=''; } if(!empty($htmlCharset)&&!empty($headerCharset)&&strcasecmp($htmlCharset,$headerCharset)!==0){ $zhCharset=array('gb18030','gbk','gb2312'); if(in_array($htmlCharset,$zhCharset)&&in_array($headerCharset,$zhCharset)){ $fromEncode='gb18030'; }else{ $autoEncode = mb_detect_encoding($html, array('ASCII','UTF-8','GB2312','GBK','BIG5')); if(strcasecmp($htmlCharset,$autoEncode)==0){ $fromEncode=$htmlCharset; }elseif(strcasecmp($headerCharset,$autoEncode)==0){ $fromEncode=$headerCharset; }else{ $fromEncode=$autoEncode; } } }elseif(!empty($htmlCharset)){ $fromEncode=$htmlCharset; }elseif(!empty($headerCharset)){ $fromEncode=$headerCharset; }else{ $fromEncode = mb_detect_encoding($html, array('ASCII','UTF-8','GB2312','GBK','BIG5')); } $fromEncode=empty($fromEncode)?null:$fromEncode; } $fromEncode=trim($fromEncode); if(!empty($fromEncode)){ $fromEncode=strtolower($fromEncode); switch ($fromEncode){ case 'utf8':$fromEncode='utf-8';break; case 'cp936':$fromEncode='gbk';break; case 'cp20936':$fromEncode='gb2312';break; case 'cp950':$fromEncode='big5';break; } if ($fromEncode!='utf-8'){ $html = iconv ( $fromEncode, 'utf-8//IGNORE', $html ); } } } } return isset($html)?$html:false; } ```