php采集后的处理
<?php
/**
* @name 采集后的处理.php
* @date Sat Dec 22 02:07:45 CST 2007
* @copyright 马永占(MyZ)
* @author 马永占(MyZ)
* @link http://blog.csdn.net/mayongzhan/
*/
//采集后的文件,然后那来进行处理.这里的东西让我抄了5本书,是哪的不方便提供,自己找找吧.
header(''Content-Type:text/html;charset=utf8'');
function writer($content,$url)
{
$fp = fopen($url, ''ab'');
fwrite($fp, $content);
fclose($fp);
}
//从1到136页的内容一次合并.这个是最爽的...
for ($i=1;$i<136;$i++) {
$str = file_get_contents(''./myz/''.$i.''.shtml'');
preg_match("/(<h1>)(.*?)(</h1>)(.*?)(<div class="artibody" id="artibody">)(.*?)(</div>)/s",$str,$arr);
$arr[6] = preg_replace("/(<span[^>]+>.*?<a[^>]+>)(.*?)(</a></span>)/s","$2",preg_replace("/<p>|</p>/","rn",$arr[6]));
$result = "rn------------------------------------------------rn------------------------------------------------rn------------------------------------------------rn".$i."----------------马永占的目录编号:".$arr[2]."rn------------------------------------------------rn------------------------------------------------rn------------------------------------------------rn".$arr[6];
writer($result, "./myz/all.txt");
}
?>
补充:Php教程,Php常用代码