PHP常用采集函数
2022-06-04PHP
本文整理php采集中常用的采集函数,并整合相关实例,代码如下
获取所有链接内容和地址
function getAllURL($code){
preg_match_all('/"\' ]+)["|\']?\s*[^>]*>([^>]+)/i',$code,$arr);
return array('name'=>$arr[2],'url'=>$arr[1]);
}[^>\s+href=["|\']?([^>
function getTdArray($table) {
$table = preg_replace("']*?>'si","",$table);
$table = preg_replace("']*?>'si","",$table);
$table = preg_replace("']*?>'si","",$table);
$table = str_replace("","{tr}",$table);
$table = str_replace("","{td}",$table);
//去掉 HTML 标记
$table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
//去掉空白字符
$table = preg_replace("'([\r\n])[\s]+'","",$table);
$table = str_replace(" ","",$table);
$table = str_replace(" ","",$table);
$table = explode('{tr}', $table);
array_pop($table);
foreach ($table as $key=>$tr) {
$td = explode('{td}', $tr);
array_pop($td);
$td_array[] = $td;
}
return $td_array;
}[^>[^>
function getTrArray($table) {
$table = preg_replace("']*?>'si",'"',$table);
$table = str_replace("",'",',$table);
$table = str_replace("","{tr}",$table);
//去掉 HTML 标记
$table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
//去掉空白字符
$table = preg_replace("'([\r\n])[\s]+'","",$table);
$table = str_replace(" ","",$table);
$table = str_replace(" ","",$table);
$table = explode(",{tr}",$table);
array_pop($table);
return $table;将HTML表格的每行每列转为数组,采集表格数据 | |
返回字符串中的所有单词 $distinct=true 去除重复 function splitEnStr($str,$distinct=true) {
preg_match_all('/([a-zA-Z]+)/',$str,$match);
if ($distinct == true) {
$match[1] = array_unique($match[1]);
}
sort($match[1]);
return $match[1];
}
|
[^>[^>\s+href=["|\']?([^>
很赞哦! ()
