HttpClient问题~~~新手求助!利用httpclient从丁丁地图上抓取上海所有公交线路名
这边是丁丁公交列表http://www.ddmap.com/mstmap50007/g_bus_all.jsp?g_mapid=21分别点开能看到每条公交的站名
求各位大侠帮助
代码如下:为何在第二次使用get方法时不成功
报错:2012-11-6 22:39:48 org.apache.commons.httpclient.HttpMethodBase processCookieHeaders
警告: Cookie rejected: "$Version=0; JSESSIONID=A8BCF336844529C57C99113ECDEE435B.TSVR_LOCAL; $Path=/mstmap50007". Illegal path attribute "/mstmap50007". Path of origin: "/map/21/bus-key-%B5%D8%CC%FA11%BA%C5%CF%DF.htm"
package httpclient;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
public class Test
{
static String response;
public static void main(String[] argus)
{
String response2;
HttpClient httpClient = new HttpClient();
GetMethod getMethod = new GetMethod(
"http://www.ddmap.com/mstmap50007/g_bus_all.jsp?g_mapid=21");
try
{
// http://www.ddmap.com/mstmap50007/g_bus_all.jsp?g_mapid=21 丁丁公交——上海
// 第一次:提取公交车名和相应的每条公交路线的链接
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK)
System.err.println("Method failed: " + getMethod.getStatusLine());
InputStream resStream = getMethod.getResponseBodyAsStream();
BufferedReader br = new BufferedReader(new InputStreamReader(resStream));
StringBuffer resBuffer = new StringBuffer();
String resTemp = "";
while ((resTemp = br.readLine()) != null)
resBuffer.append(resTemp);
response = resBuffer.toString();
// System.out.println(response);
// 第一次:使用正则表达式
String a = "<a href='/map/21/bus-key-[^/]+.htm' target='_blank'>[^/]+</a>";
Pattern p = Pattern.compile(a);
Matcher m = p.matcher(response);
while(m.find())
{
// 分割出所需信息
String[] st = m.group().split("-|>|<|\\.");
for(int i = 0; i < st.length; i++)
{
//st[2]链接 , st[4]路线名
// System.out.println(st[i]+"\t");
//System.out.println(st[3]);
getMethod = new GetMethod("http://www.ddmap.com/map/21/bus-key-"+st[3]+".htm");
//try
//{
int statusCode2 = httpClient.executeMethod(getMethod);
if (statusCode2 != HttpStatus.SC_OK)
System.err.println("Method failed: " + getMethod.getStatusLine());
InputStream resStream2 = getMethod.getResponseBodyAsStream();
BufferedReader br2 = new BufferedReader(new InputStreamReader(resStream2));
StringBuffer resBuffer2 = new StringBuffer();
String resTemp2 = "";
while ((resTemp2 = br2.readLine()) != null)
resBuffer2.append(resTemp2);
response2 = resBuffer2.toString();
// System.out.println(response2);
//<a href="/map/21/busstop-01-%B2%DC%D1%EE%C2%B7%C2%BD%BC%D2%CF%EF-0.htm">曹杨路陆家巷</a>
String a2 = "<a href='/map/21/busstop-[^/]-[^/]-0.htm'>[^/]+</a>";
Pattern p2 = Pattern.compile(a2);
Matcher m2 = p2.matcher(response2);
while(m2.find())
{
String[] st2 = m2.group().split(">|<");
System.out.print(st2[2]);
}
// }
//catch (Exception e)
//{
// System.err.println("页面无法访问");
// }
//finally
//{
// getMethod.releaseConnection();
// }
Thread.sleep(2);
}
}
}
catch (Exception e)
{
System.err.println("页面无法访问");
}
finally
{
getMethod.releaseConnection();
}
}
}
补充:Java , Java相关