asp采集数据并自动判断网页编码并转换
在做采集的朋友就会知道经常会碰到采集过来的内容是乱码,下面我们就来看一篇关于asp教程采集数据并自动判断网页编码并转换吧。
<%@LANGUAGE="JAVASCRIPT" CODEPAGE="65001"%>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>asp自动判断网页编码并转换</title>
</head>
<%Server.ScriptTimeout=9999999;
function send_request(url){
var codedtext;
http_request = Server.CreateObject("Microsoft.XMLHTTP");
http_request.Open("GET",url,false);
http_request.Send(null);
if (http_request.ReadyState == 4){
//自动判断编码开始
var charresult = http_request.ResponseText.match(/CharSet=(S+)">/i);
if (charresult != null){
var Cset = charresult[1];
}else{Cset = "gb2312"}//对获取不到的网站采用gb2312编码,可自行更改
//自动判断编码结束
codedtext = bytesToBSTR(http_request.Responsebody,Cset);
}else{
codedtext = "Erro";
}
return(codedtext);
}
function bytesToBSTR(body,Cset){
var objstream;
objstream = Server.CreateObject("Adodb.Stream");
objstream.Type = 1;
objstream.Mode = 3;
objstream.Open();
objstream.Write(body);
objstream.Position = 0;
objstream.Type = 2;
objstream.Charset = Cset;
bytesToBSTR = objstream.Readtext;
objstream.Close;
return(bytesToBSTR);
}%>
<body>
<%Response.Write(send_request("http://www.zzzyk.com/404.htm"))%>
</body>
</html>
采集原理很简单就是用了asp xmlhttp来采集,并且adodb.stream来对采集过来的数据进行处理。
补充:asp教程,ASP入门