我这个截取网页源代码之间怎么一直截取的不对呢?
using System;using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
namespace WindowsApplication2
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
System.Net.HttpWebRequest req;
System.Net.HttpWebResponse res;
string url = "http://fanyi.yahoo.com.cn/translate_txt?trtext="+richTextBox2.Text+"&lp=en_zh";
req = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
res = (System.Net.HttpWebResponse)req.GetResponse();
System.IO.StreamReader strm = new System.IO.StreamReader(res.GetResponseStream(), Encoding.GetEncoding("UTF-8"));
string str = strm.ReadToEnd();
str = str.Substring(str.IndexOf("resultcon")+65, str.IndexOf("div"));
richTextBox1.Text = str;
}
}
}
结果为:你好,怎么样您 </div>
</div>
</div>
</div>
</form>
<!--}}}end:translated -->
<div class="clr"></div>
</div>
<!--}}end:translate area -->
<!--{{start:translate page -->
<div class="parea">
<div class="pform">
<h2>翻译网页</h2>
但是我指向要“你好,怎么样您”
这个要怎么弄呢? --------------------编程问答-------------------- 分析完整的返回的整体 html ,用正则匹配
你这样肯定不精确的,页面肯定多个 div
补充:.NET技术 , C#