当前位置:编程学习 > C#/ASP.NET >>

采集数据

分享我的数据采集代码,不知道  大家还有啥好办法!


 public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        private void button1_Click(object sender, EventArgs e)
        {
            dt.Rows.Clear();
            //要抓取的URL地址
            string Url = "http://list.mp3.baidu.com/topso/mp3topsong.html?id=1#top2";

            //得到指定Url的源码
            string strWebContent = GetWebContent(Url);

            // richTextBox1.Text = strWebContent;
            //取出和数据有关的那段源码
            int iBodyStart = strWebContent.IndexOf("<body", 0);
            int iStart = strWebContent.IndexOf("歌曲TOP500", iBodyStart);
            int iTableStart = strWebContent.IndexOf("<table", iStart);
            int iTableEnd = strWebContent.IndexOf("</table>", iTableStart);
            string strWeb = strWebContent.Substring(iTableStart, iTableEnd - iTableStart + 8);

            //生成HtmlDocument
            WebBrowser webb = new WebBrowser();
            webb.Navigate("about:blank");
            HtmlDocument htmldoc = webb.Document.OpenNew(true);
            htmldoc.Write(strWeb);
            HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("TR");

            // DataRow row = null;
            foreach (HtmlElement tr in htmlTR)
            {
                string strID = tr.GetElementsByTagName("TD")[0].InnerText;
                string[] info = tr.GetElementsByTagName("TD")[1].InnerText.Split('(');
                string strName = "";
                string strSinger = "";
                if (info.Length != 2)
                {
                    strName = info[0];
                    strSinger = "未知";
                }
                else
                {
                    strName = info[0];
                    strSinger = info[1].Replace(")", "");
                }
                strID = strID.Replace(".", "");
                DataRow row = dt.NewRow();
                row["ID"] = strID;
                row["voide"] = strName;
                row["name"] = strSinger;
                dt.Rows.Add(row);


                string strID1 = tr.GetElementsByTagName("TD")[2].InnerText;
                string[] info1 = tr.GetElementsByTagName("TD")[3].InnerText.Split('(');
                string strName1 = "";
                string strSinger1 = "";
                if (info1.Length != 2)
                {
                    strName1 = info1[0];
                    strSinger1 = "未知";
                }
                else
                {
                    strName1 = info1[0];
                    strSinger1 = info1[1].Replace(")", "");
                }
                strID1 = strID1.Replace(".", "");
                DataRow row1 = dt.NewRow();
                row1["ID"] = strID1;
                row1["voide"] = strName1;
                row1["name"] = strSinger1;
                dt.Rows.Add(row1);



                //string strID2 = tr.GetElementsByTagName("TD")[3].InnerText;

                //string[] info2 = tr.GetElementsByTagName("TD")[4].InnerText.Split('(');
                //string strName2 = "";
                //string strSinger2 = "";
                //if (info1.Length != 2)
                //{
                //    strName2 = info1[0];
                //    strSinger2 = "未知";
                //}
                //else
                //{
                //    strName2 = info1[0];
                //    strSinger2 = info1[1].Replace(")", "");
                //}
                //strID1 = strID2.Replace(".", "");
                //DataRow row2 = dt.NewRow();
                //row2["ID"] = strID2;
                //row2["voide"] = strName2;
                //row2["name"] = strSinger2;
                //dt.Rows.Add(row2);

            }
            ////插入数据库
            //// InsertData(dt);

            dataGridView1.DataSource = dt.DefaultView;



        }



        private string GetWebContent(string Url)
        {
            string strResult = "";
            try
            {

                //创建访问目标
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                //声明一个HttpWebRequest请求
                request.Timeout = 30000;
                //设置连接超时时间
                request.Headers.Set("Pragma", "no-cache");

                //得到回应
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                //得到数据流
                Stream streamReceive = response.GetResponseStream();

                //对获取到的数据流进行编码解析,让我们可以进行正常读取
                Encoding encoding = Encoding.GetEncoding("GB2312");
                StreamReader streamReader = new StreamReader(streamReceive, encoding);

                //读取出数据流中的信息
                strResult = streamReader.ReadToEnd();

                //关闭流

                streamReader.Close();

                //关闭网络响应流

                response.Close();
            }
            catch
            {
                MessageBox.Show("出错");
            }
            return strResult;
        }
        static DataTable dt = new DataTable();

        private void Form1_Load(object sender, EventArgs e)
        {
            DataColumn ID = new DataColumn();
            DataColumn voideName = new DataColumn();
            DataColumn name = new DataColumn();
            ID.ColumnName = "ID";
            voideName.ColumnName = "voide";
            name.ColumnName = "name";
            dt.Columns.Add(ID);
            dt.Columns.Add(voideName);
            dt.Columns.Add(name);
        }
    }


--------------------编程问答-------------------- 我晕,竟然没人回复! --------------------编程问答-------------------- up --------------------编程问答-------------------- 做成可配置的、通用的就好了 --------------------编程问答-------------------- 用不了
补充:.NET技术 ,  .NET Framework
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,