天天躁日日躁狠狠躁AV麻豆-天天躁人人躁人人躁狂躁-天天澡夜夜澡人人澡-天天影视香色欲综合网-国产成人女人在线视频观看-国产成人女人视频在线观看

asp.net(c#)做一個網(wǎng)頁數(shù)據(jù)采集工具

通過這個軟件一兩天就完成了幾千產(chǎn)品數(shù)據(jù)的錄入,可見很多工作不是一味用人工去做,作為一個程序員,就是要讓很多讓那些經(jīng)常做重復(fù)性的、繁瑣的工作中的人解放出來。下面只是寫了一些核心代碼,而且采集必須要和對應(yīng)網(wǎng)站相掛鉤,作者:鄭少群

復(fù)制代碼 代碼如下:
//提取產(chǎn)品列表頁中產(chǎn)品最終頁的網(wǎng)頁
private void button1_Click(object sender, EventArgs e)
{
if (textBox1.Text.Trim() == "" || textBox2.Text.Trim() == "")
{
MessageBox.Show("網(wǎng)址和域名不能為空!", "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
return;
}
try
{
string Html = inc.GetHtml("http://study.pctoday.NET.cn");
//ArrayList al = inc.GetMatchesStr(Html, "<a[^>]*?>.*?</a>");
ArrayList al = inc.GetMatchesStr(Html, @"href/s*=/s*(?:[/'/""/s](?<1>[^/""/']*)[/'/""])");//提取鏈接


" title="Replica Watches:">Replica Watches Buy Full Quality Popular Luxury Watches at Amazing Price, Your One Stop Discount Swiss Watches StoreExclusive Replica Rolex Watches, Tag Heuer Watches Replica, Cartier Watches online Sale!
StringBuilder sb = new StringBuilder();
foreach (object var in al)
{
string a = var.ToString().Replace("/"", "").Replace("'", "");
a = Regex.Replace(a, "href=", "", RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (a.StartsWith("/"))
a = textBox2.Text.Trim() + a;
if (!a.StartsWith("http://"))
a = "http://" + a;
sb.Append(a + "/r/n");
}
textBox5.Text = sb.ToString();//把提取到網(wǎng)址輸出到一個textBox,每個鏈接占一行



MessageBox.Show("共提取" + al.Count.ToString() + "個鏈接", "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);

}
catch (Exception err)
{
MessageBox.Show("提取出錯!原因:" + err.Message, "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
}

}




//把采集的產(chǎn)品頁面html代碼進(jìn)行字符串處理,提取需要的代碼,最后保存到本地一個access數(shù)據(jù)庫中,同時提取產(chǎn)品圖片地址并自動現(xiàn)在圖片到本地images文件夾下

private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
{
//填充產(chǎn)品表
Database.ExecuteNonQuery("delete from Tb_Product");
DataTable dt2 = new DataTable();
OleDbConnection conn = new OleDbConnection(Database.ConnectionStrings);
OleDbDataAdapter da = new OleDbDataAdapter("select * from Tb_Product", conn);
OleDbCommandBuilder cb = new OleDbCommandBuilder(da);
da.Fill(dt2);
dt2.Rows.Clear();

BackgroundWorker worker = (BackgroundWorker)sender;//這個是做一個進(jìn)度條

string[] Urls = textBox5.Text.Trim().ToLower().Replace("/r/n", ",").Split(',');
DataTable dt = new DataTable();
StringBuilder ErrorStr = new StringBuilder();
string html = "", ImageDir = AppDomain.CurrentDomain.BaseDirectory + "Images//";

//循環(huán)每次采集網(wǎng)址
for (int i = 0; i < Urls.Length; i++)
{
try
{
if (!worker.CancellationPending)
{
if (Urls[i] == "")
return;
html = inc.GetHtml(Urls[i]);//獲取該url的html代碼
DataRow NewRow = dt2.NewRow();

//產(chǎn)品名
string ProductName = html.Substring(html.IndexOf("<title>") + 7);
NewRow["ProductName"] = ProductName.Remove(ProductName.IndexOf("</title>")).Trim();

//產(chǎn)品編號
NewRow["ModelId"] = NewRow["ProductName"].ToString().Substring(NewRow["ProductName"].ToString().IndexOf("Model:") + 6).Trim();

//產(chǎn)品介紹,這些都是根據(jù)不同網(wǎng)站的html做相應(yīng)的修改
string Introduce = html.Substring(html.IndexOf("Product Details") + 26);
Introduce = Introduce.Remove(Introduce.IndexOf("</table>") + 8).Trim()

NewRow["Introduce"] = Introduce;



" title="Replica Watches:">Replica Watches Buy Full Quality Popular Luxury Watches at Amazing Price, Your One Stop Discount Swiss Watches StoreExclusive Replica Rolex Watches, Tag Heuer Watches Replica, Cartier Watches online Sale!
//下載圖片
string ProductImage = html.Substring(html.IndexOf("align=center><img") + 17);
ProductImage = textBox2.Text.Trim() + ProductImage.Substring(ProductImage.IndexOf("src=/"") + 5);
ProductImage = ProductImage.Remove(ProductImage.IndexOf("/""));
try
{
inc.DownFile(ProductImage, ImageDir + ProductImage.Substring(ProductImage.LastIndexOf("/") + 1));
}
catch (Exception)
{
ErrorStr.Append("下載圖片失敗,圖片地址:" + ImageDir + ProductImage.Substring(ProductImage.LastIndexOf("/") + 1) + "/r/n");
}


dt2.Rows.Add(NewRow);

//Thread.Sleep(100);
worker.ReportProgress((i + 1) * 100 / Urls.Length, i);
toolStripStatusLabel1.Text = "處理進(jìn)度:" + (i + 1).ToString() + "/" + Urls.Length.ToString();//進(jìn)度條
}

}
catch (Exception err)
{
ErrorStr.Append("采集錯誤:" + err.Message + ";網(wǎng)址:" + Urls[i] + "/r/n");
}
}
da.Update(dt2);
DataBind(dt2);
ShowError(ErrorStr.ToString());
}

/// <summary>
/// ASPX頁面生成靜態(tài)Html頁面,作者:鄭少群
/// </summary>
public static string GetHtml(string url)
{
StreamReader sr = null;
string str = null;
//讀取遠(yuǎn)程路徑
WebRequest request = WebRequest.Create(url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(response.CharacterSet));
str = sr.ReadToEnd();
sr.Close();
return str;
}


// 提取HTML代碼中的網(wǎng)址
public static ArrayList GetMatchesStr(string htmlCode, string strRegex)
{
ArrayList al = new ArrayList();

Regex r = new Regex(strRegex, RegexOptions.IgnoreCase | RegexOptions.Multiline);
MatchCollection m = r.Matches(htmlCode);

for (int i = 0; i < m.Count; i++)
{
bool rep = false;
string strNew = m[i].ToString();

// 過濾重復(fù)的URL
foreach (string str in al)
{
if (strNew == str)
{
rep = true;
break;
}
}

if (!rep) al.Add(strNew);
}

al.Sort();

return al;
}

public static void DownFile(string Url, string Path)
{

HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
long size = response.ContentLength;
//創(chuàng)建文件流對象
using (FileStream fs = new FileStream(Path, FileMode.OpenOrCreate, FileAccess.Write))
{
byte[] b = new byte[1025];
int n = 0;
while ((n = stream.Read(b, 0, 1024)) > 0)
{
fs.Write(b, 0, n);
}
}
}

AspNet技術(shù)asp.net(c#)做一個網(wǎng)頁數(shù)據(jù)采集工具,轉(zhuǎn)載需保留來源!

鄭重聲明:本文版權(quán)歸原作者所有,轉(zhuǎn)載文章僅為傳播更多信息之目的,如作者信息標(biāo)記有誤,請第一時間聯(lián)系我們修改或刪除,多謝。

主站蜘蛛池模板: 午夜插插插 | 色久天| 色老99九久精品偷偷鲁 | 女女破视频在线观看 | 99精品视频在线观看免费播放 | 日韩AV爽爽爽久久久久久 | 国产精品野外AV久久久 | 青青草原社区 | 花蝴蝶免费版高清版 | 直插下身完整的欧美版 | 青春草久久 | 毛片网站视频 | 黑人性xxx| 欧美日韩午夜群交多人轮换 | 国产亚洲精品久久久999密臂 | 少妇高潮久久久久7777 | 精品熟女少妇AV免费观看 | 中文字幕a有搜索网站 | 在线国产a不卡 | 欧美 日韩 无码 有码 在线 | 久久精品亚洲 | 一个人的免费完整在线观看HD | 驯服有夫之妇HD中字日本 | 亚洲AV电影天堂男人的天堂 | 被老头下药玩好爽 | 免费毛片在线视频 | 欧美伊人久久大香线蕉综合69 | 98国产精品人妻无码免费 | 手机看片国产免费久久网 | 久久www成人看片 | 久久才是精品亚洲国产 | 国产精华av午夜在线观看 | 日本高清免费看 | 久久视热频国产这里只有精品23 | 三男强一女90分钟在线观看 | 亚洲精品自在线拍2019 | 成人欧美一区二区三区白人 | 亚洲国产精品特色大片观看 | 亚洲 日韩经典 中文字幕 | 老司机亚洲精品影院 | 猪蜜蜜网站在线观看电视剧 |