跪求asp.net/c# 正则表达式 截取HTML!
<tr class='a1'> <td width='84' rowspan='1' bgcolor='#EFF6FB'><div align='center'>13层</div></td> <td height='68' width='140' bgcolor='#EFF6FB'><div align='center'>房号:13A</div> <div align='center'><a href='housedetail.aspx?id=925352' target=_blank><img src='imc/b2.gif' border='0'></a></div></td> <td height='68' width='140' bgcolor='#EFF6FB'><div align='center'>房号:13B</div> <div align='center'><a href='housedetail.aspx?id=925353' target=_blank><img src='imc/b1_2.gif' border='0'></a></div></td> <td height='68' width='140' bgcolor='#EFF6FB'><div align='center'>房号:13C</div> <div align='center'><a href='housedetail.aspx?id=925354' target=_blank><img src='imc/b1_2.gif' border='0'></a></div></td> <td height='68' width='140' bgcolor='#EFF6FB'><div align='center'>房号:13D</div> <div align='center'><a href='housedetail.aspx?id=925355' target=_blank><img src='imc/b3.gif' border='0'></a></div></td> <td height='68' width='140' bgcolor='#EFF6FB'><div align='center'>房号:13E</div> <div align='center'><a href='housedetail.aspx?id=925356' target=_blank><img src='imc/b1_2.gif' border='0'></a></div></td> </tr><tr class='a1'>......</tr>
<tr class='a1'>......</tr>
......许多层许多房间
我要截取出来的字段: 层数、房号、房号详细页连接地址:如housedetail.aspx?id=925355、房间状态:如imc/b3.gif或者imc/b1_2.gif等
在线等ing.... --------------------编程问答-------------------- 先做个记号/. --------------------编程问答-------------------- 大大们都取那儿了555 --------------------编程问答--------------------
--------------------编程问答-------------------- 我也很笨.. --------------------编程问答-------------------- string str = "<tr class='a1'> <td width='84' rowspan='1' bgcolor='#EFF6FB'> <div align='center'>13层 </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13A </div> <div align='center'> <a href='housedetail.aspx?id=925352' target=_blank> <img src='imc/b2.gif' border='0'> </a> </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13B </div> <div align='center'> <a href='housedetail.aspx?id=925353' target=_blank> <img src='imc/b1_2.gif' border='0'> </a> </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13C </div> <div align='center'> <a href='housedetail.aspx?id=925354' target=_blank> <img src='imc/b1_2.gif' border='0'> </a> </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13D </div> <div align='center'> <a href='housedetail.aspx?id=925355' target=_blank> <img src='imc/b3.gif' border='0'> </a> </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13E </div> <div align='center'> <a href='housedetail.aspx?id=925356' target=_blank> <img src='imc/b1_2.gif' border='0'> </a> </div> </td> </tr> <tr class='a1'>...... </tr> <tr class='a1'>...... </tr>";
//获得房间号码。参考。举一反三。其他的就不写了
static string[] GetRoomId(string hTMLCode)
{
string[] temproomId = hTMLCode.Split('房');
string[] roomId=null;
if (temproomId.Length != 0)
{
roomId = new string[temproomId.Length];
for (int i = 0; i < temproomId.Length; i++)
{
roomId[i] = temproomId[i].Substring(0, temproomId[i].IndexOf( '<')).Trim();
Console.WriteLine(roomId[i]);
}
}
return roomId;
}
string strs = System.Text.RegularExpressions.Regex.Replace(items, "<[^>]+>", "");
MessageBox.Show(strs); --------------------编程问答-------------------- items 改成str --------------------编程问答-------------------- 把熊猫的正则加以完善OK --------------------编程问答-------------------- 我试下 呵呵 --------------------编程问答--------------------
熊猫的不行,把<>里面的全部去了会把 如housedetail.aspx?id=925355、房间状态:如imc/b3.gif或者imc/b1_2.gif等
也去掉的 --------------------编程问答-------------------- 楼上说的对哦 555 各位大大再帮帮 呵呵 --------------------编程问答-------------------- (?<=(a href='|<img src='|<.+?>)).+?(?=(' target=_blank>|' border='0'>|<.+?>)) --------------------编程问答--------------------
您好! 我是新手这个怎么取啊 而且能取到我想要的吗? --------------------编程问答--------------------
string str = "<tr class='a1'> <td width='84' rowspan='1' bgcolor='#EFF6FB'> <div align='center'>13层 </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13A </div> <div align='center'> <a href='housedetail.aspx?id=925352' target=_blank> <img src='imc/b2.gif' border='0'> </a> </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13B </div> <div align='center'> <a href='housedetail.aspx?id=925353' target=_blank> <img src='imc/b1_2.gif' border='0'> </a> </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13C </div> <div align='center'> <a href='housedetail.aspx?id=925354' target=_blank> <img src='imc/b1_2.gif' border='0'> </a> </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13D </div> <div align='center'> <a href='housedetail.aspx?id=925355' target=_blank> <img src='imc/b3.gif' border='0'> </a> </div> </td> <td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13E </div> <div align='center'> <a href='housedetail.aspx?id=925356' target=_blank> <img src='imc/b1_2.gif' border='0'> </a> </div> </td> </tr>";
Regex reg = new Regex("(?<=(a href='|<img src='|<.+?>)).+?(?=(' target=_blank>|' border='0'>|<.+?>))");
string strRes = "";
for (int i = 0; i < reg.Matches(str).Count; i++)
{
strRes += reg.Matches(str)[i].ToString();
}
strRes就是结果. --------------------编程问答-------------------- 关注 --------------------编程问答--------------------
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Globalization;
using System.IO;
using System.Data.SqlClient;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string str = @"<tr class='a1'>
<td width='84' rowspan='1' bgcolor='#EFF6FB'> <div align='center'>13层 </div> </td>
<td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13A </div> <div align='center'><a href='housedetail.aspx?id=925352' target=_blank> <img src='imc/b2.gif' border='0'> </a> </div> </td>
<td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13B </div> <div align='center'><a href='housedetail.aspx?id=925353' target=_blank> <img src='imc/b1_2.gif' border='0'> </a> </div> </td>
<td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13C </div> <div align='center'><a href='housedetail.aspx?id=925354' target=_blank> <img src='imc/b1_2.gif' border='0'> </a> </div> </td>
<td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13D </div> <div align='center'><a href='housedetail.aspx?id=925355' target=_blank> <img src='imc/b3.gif' border='0'> </a> </div> </td>
<td height='68' width='140' bgcolor='#EFF6FB'> <div align='center'>房号:13E </div> <div align='center'><a href='housedetail.aspx?id=925356' target=_blank> <img src='imc/b1_2.gif' border='0'> </a> </div>
</td> </tr>";
Regex reg = new Regex(@"<a\s*href='(?<href>[^']+)'[^>]*>|<img\s*src='(?<src>[^']+)'[^>]*>|<[^>]*>(?<name>[^<]*)");
MatchCollection mc = reg.Matches(str);
foreach(Match m in mc)
Console.WriteLine("{0} {1} {2}",m.Groups["href"].Value,m.Groups["src"].Value,m.Groups["name"].Value);
}
}
}
13层
房号:13A
housedetail.aspx?id=925352
imc/b2.gif
房号:13B
housedetail.aspx?id=925353
imc/b1_2.gif
房号:13C
housedetail.aspx?id=925354
imc/b1_2.gif
房号:13D
housedetail.aspx?id=925355
imc/b3.gif
房号:13E
housedetail.aspx?id=925356
imc/b1_2.gif
Press any key to continue . . . --------------------编程问答-------------------- 看来我还是写个丑陋的吧,
补充:.NET技术 , C#