抓取“维库电子市场”供应商程序
<?php
/**
* 抓取“维库电子市场”供应商主程序
* author Lee.
* Last modify $Date: 2012-2-3 9:30:21 $
* 注:本程序按照编码 GB2312 执行,因为“维库电子市场”网站是GB2312编码,数据库也得保持一致
*/
class weiku {
private $key; // 型号
private $pageNum; // 页码
/**
* 入口程序
*/
public function go($key) {
$this->key = $key;
$this->pageNum = $this->getPageNum();
$this->getInfo();
}
/**
* 获取供应商 url 链接数组
* @return ArrayObject
*/
private function getInfo() {
if ($this->pageNum==1) { # 处理只有一页的情况
$arr = $this->shopAddContact($this->shopUrlMatchReArr($this->getContent()));
$this->isAddSuccess($arr);
} elseif ($this->pageNum>1) { # 多页
for ($i=1; $i<=$this->pageNum; $i++) {
$arr = $this->shopAddContact($this->shopUrlMatchReArr($this->getContent($i)));
$this->isAddSuccess($arr);
}
}
}
/**
* 打印是否添加成功
* @param ArrayObject $arr
* @return string
*/
private function isAddSuccess($arr) {
foreach ($arr as $k=>$v) {
if ($this->execAdd($this->getInfoByShopUrl($v))) {
echo 'Add Success!!';
} else {
echo 'Add Faild!!';
}
}
}
/**
* 执行添加到数据库
* @param ArrayObject $infoArr
* @return Number 受影响的行数
*/
private function execAdd($infoArr) {
$mysqli = $this->getDb();
if (!emptyempty($infoArr['company'])) {
if (!$this->isExists($mysqli, $infoArr)) {
$num = $mysqli->query("INSERT INTO weiku(company,person,phone,fax,mobile,qq,msn,email,address,shopUrl) VALUES ('{$infoArr['company']}','{$infoArr['person']}','{$infoArr['phone']}','{$infoArr['fax']}','{$infoArr['mobile']}','{$infoArr['qq']}','{$infoArr['msn']}','{$infoArr['email']}','{$infoArr['address']}','{$infoArr['shopUrl']}')");
return $num;
} else {
return false; # 表示数据已经存在
}
} else {
return false;
}
}
/**
* 连接数据库
*/
private function getDb() {
$mysqli = new mysqli('localhost', 'root', '1715544', 'weiku');
$mysqli->query('SET NAMES GB2312');
return $mysqli;
}
/**
* 检查公司是否已经存在
* @param Resource $mysqli
* @param ArrayObject $infoArr
* @return bool
*/
private function isExists($mysqli, $infoArr) {
$mysqli->query("SELECT company FROM weiku WHERE company = '{$infoArr['company']}'");
if ($mysqli->affected_rows) {
return true;
} else {
return false;
}
}
/**
* 抓取信息
* @param $url
* @return ArrayObject
*/
private function getInfoByShopUrl($url) {
$re = $this->getUrlInfo($url);
preg_match_all('/<b>公司名称:<\/b><span>(.*)<\/span>/Usi', $re, $companyArr);
preg_match_all('/<b>联系人:<\/b><span>(.*)<\/span>/Usi', $re, $personArr);
preg_match_all('/<b>电话:<\/b><span>(.*)<\/span>/Usi', $re, $phoneArr);
preg_match_all('/<b>传真:<\/b><span>(.*)<\/span>/Usi', $re, $faxArr);
preg_match_all('/<b>手机:<\/b><span>(.*)<\/span>/Usi', $re, $mobileArr);
&n
补充:Web开发 , php ,