天天看點

笨笨圖檔批量抓取下載下傳 V0.2 beta[C# | WinForm | 正規表達式 | HttpWebRequest | Async異步程式設計]

前言

     首次在部落格園首頁釋出文章,一直把自己的文章定位在新手區,也一直這樣認為自己的。這段時間看部落格園首頁挺多的,說實在的有些文章很一般但是得到更多朋友以及高手的指點,是以鼓起勇氣把自己花了幾天時間改版的圖檔下載下傳器貼上來,希望能在程式的性能上以及适用上得到更多的寶貴的建議!先謝謝每位看客賞光了 : )

感謝

     msdn     msdn上異步網絡程式設計的例子是我0.2版的核心所在

運作環境

     .net framework2.0

開發工具

     microsoft visual studio 2005

正題

     一.     先貼一張圖,這個界面就是程式的主界面了:

笨笨圖檔批量抓取下載下傳 V0.2 beta[C# | WinForm | 正規表達式 | HttpWebRequest | Async異步程式設計]

    二.     部分代碼說明(主要講解異步分析和下載下傳):

          異步分析下載下傳采取的政策是同時分析同時下載下傳,即未等待資料全部分析完畢就開始把已經分析出來的圖檔連結開始下載下傳。下載下傳成功的均在list框連結前面劃上了√ ,未能下載下傳的圖檔有可能是分析錯誤或者是下載下傳異常。

         1.     異步分析部分代碼

笨笨圖檔批量抓取下載下傳 V0.2 beta[C# | WinForm | 正規表達式 | HttpWebRequest | Async異步程式設計]

        /// <summary>

        /// 異步分析下載下傳

        /// </summary>

        private void asyncanalyzeanddownload(string url, string savepath)

        {

            this.uristring = url;

            this.savepath = savepath;

            #region 分析計時開始

            count = 0;

            count1 = 0;

            freq = 0;

            result = 0;

            queryperformancefrequency(ref freq);

            queryperformancecounter(ref count);

            #endregion

            using (webclient wclient = new webclient())

            {

                autoresetevent waiter = new autoresetevent(false);

                wclient.credentials = credentialcache.defaultcredentials;

                wclient.downloaddatacompleted += new downloaddatacompletedeventhandler(asyncurianalyze);

                wclient.downloaddataasync(new uri(uristring), waiter);

                //waiter.waitone();     //阻止目前線程,直到收到信号

            }

        }

        /// 異步分析

        protected void asyncurianalyze(object sender, downloaddatacompletedeventargs e)

            autoresetevent waiter = (autoresetevent)e.userstate;

            try

                if (!e.cancelled && e.error == null)

                {

                    string dndir = string.empty;

                    string domainname = string.empty;

                    string uri = uristring;

                    //獲得域名 http://www.sina.com/

                    match match = regex.match(uri, @"((http(s)?://)?)+[\w-.]+[^/]");//, regexoptions.ignorecase

                    domainname = match.value;

                    //獲得域名最深層目錄 http://www.sina.com/mail/

                    if (domainname.equals(uri))

                        dndir = domainname;

                    else

                        dndir = uri.substring(0, uri.lastindexof('/'));

                    dndir += '/';

                    //擷取資料

                    string pagedata = encoding.utf8.getstring(e.result);

                    list<string> urllist = new list<string>();

                    //比對全路徑

                    match = regex.match(pagedata, @"((http(s)?://)?)+(((/?)+[\w-.]+(/))*)+[\w-./]+\.+(" + imagetype + ")"); //, regexoptions.ignorecase

                    while (match.success)

                    {

                        string item = match.value;

                        //短路徑處理

                        if (item.indexof("http://") == -1 && item.indexof("https://") == -1)

                            item = (item[0] == '/' ? domainname : dndir) + item;

                        if (!urllist.contains(item))

                        {

                            urllist.add(item);

                            imgurllist.add(item);

                            //實時顯示分析結果

                            addlbshowitem(item);

                            //邊分析邊下載下傳

                            webrequest hwr = webrequest.create(item);

                            hwr.begingetresponse(new asynccallback(asyncdownload), hwr);

                            //hwr.timeout = "0x30d40";        //預設 0x186a0 -> 100000 0x30d40 -> 200000

                            //hwr.method = "post";

                            //hwr.contenttype = "application/x-www-form-urlencoded";

                            //hwr.maximumautomaticredirections = 3;

                            //hwr.accept ="image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";

                            //hwr.accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";

                            //iasyncresult iar = hwr.begingetresponse(new asynccallback(asyncdownload), hwr);

                            //iar.asyncwaithandle.waitone();

                        }

                        match = match.nextmatch();

                    }

                }

            finally

                waiter.set();

                #region 分析計時結束

                queryperformancecounter(ref count1);

                count = count1 - count;

                result = (double)(count) / (double)freq;

                toolstripstatuslabel1.text = "分析完畢!";

                toolstripstatuslabel2.text = string.format(" | 分析耗時:{0}秒", result);

                application.doevents();

                #endregion

                //分析完畢

                isanalyzecomplete = true;

笨笨圖檔批量抓取下載下傳 V0.2 beta[C# | WinForm | 正規表達式 | HttpWebRequest | Async異步程式設計]

     這兩個方法主要是用webclient來請求然後異步獲得網址所傳回的資料并對資料分析,提取圖檔連結,提取主要有兩種方式:一種是完整路徑的圖檔連結;一種是短路徑的連結,比如/images/bg.gif,程式會自動為其加上域名部分組成完整的連結。

     2.     異步下載下傳部分代碼

笨笨圖檔批量抓取下載下傳 V0.2 beta[C# | WinForm | 正規表達式 | HttpWebRequest | Async異步程式設計]

        /// 異步接受資料

        /// <param name="asyncresult"></param>

        public  void asyncdownload(iasyncresult asyncresult)  

            #region 下載下傳計時開始

            if (cfreq == 0)

                queryperformancefrequency(ref cfreq);

                queryperformancecounter(ref ccount);

            webrequest request = (webrequest)asyncresult.asyncstate;

            string url = request.requesturi.tostring();

                webresponse response = request.endgetresponse(asyncresult);

                using (stream stream = response.getresponsestream())

                    image img = image.fromstream(stream);

                    string[] tmpurl = url.split('.');

                    img.save(string.concat(savepath, "/", datetime.now.tostring("yyyymmddhhmmssfff"), ".", tmpurl[tmpurl.length - 1]));

                    img.dispose();

                    stream.close();

                alldone.set();

                //從未下載下傳的清單中删除已經下載下傳的圖檔

                imgurllist.remove(url);

                //更新清單框

                int indexitem = this.lbshow.items.indexof(url);

                if (indexitem >= 0 && indexitem <= this.lbshow.items.count)

                    setlbshowitem(indexitem);

            catch (exception)

笨笨圖檔批量抓取下載下傳 V0.2 beta[C# | WinForm | 正規表達式 | HttpWebRequest | Async異步程式設計]

     這部分就是異步下載下傳圖檔并儲存的代碼,調用部分請看asyncurianalyze方法分析圖檔連結比對成功後就開始進行圖檔下載下傳,每下載下傳完一張圖檔就更新顯示在界面正下方list框内(在連結前标記√ )。

程式和代碼:

後話

     寫這個東西很大一部分程度上是為了練習,裡面注釋比較多,就當時學習交流吧,歡迎多多指教: )

轉載:http://www.cnblogs.com/over140/archive/2008/08/22/1273587.html

繼續閱讀