天天看點

笨笨圖檔批量抓取下載下傳 V0.2 beta[C# | WinForm | 正規表達式 | HttpWebRequest | Async異步程式設計]

一.     先貼一張圖,這個界面就是程式的主界面了:

 二.     部分代碼說明(主要講解異步分析和下載下傳):

          異步分析下載下傳采取的政策是同時分析同時下載下傳,即未等待資料全部分析完畢就開始把已經分析出來的圖檔連結開始下載下傳。下載下傳成功的均在List框連結前面劃上了√ ,未能下載下傳的圖檔有可能是分析錯誤或者是下載下傳異常。

         1.     異步分析部分代碼

        /// <summary>

        /// 異步分析下載下傳

        /// </summary>

        private void AsyncAnalyzeAndDownload(string url, string savePath)

        {

            this.uriString = url;

            this.savePath = savePath;

            #region 分析計時開始

            count = 0;

            count1 = 0;

            freq = 0;

            result = 0;

            QueryPerformanceFrequency(ref freq);

            QueryPerformanceCounter(ref count);

            #endregion

            using (WebClient wClient = new WebClient())

            {

                AutoResetEvent waiter = new AutoResetEvent(false);

                wClient.Credentials = CredentialCache.DefaultCredentials;

                wClient.DownloadDataCompleted += new DownloadDataCompletedEventHandler(AsyncURIAnalyze);

                wClient.DownloadDataAsync(new Uri(uriString), waiter);

                //waiter.WaitOne();     //阻止目前線程,直到收到信号

            }

        }

        /// 異步分析

        protected void AsyncURIAnalyze(Object sender, DownloadDataCompletedEventArgs e)

            AutoResetEvent waiter = (AutoResetEvent)e.UserState;

            try

                if (!e.Cancelled && e.Error == null)

                {

                    string dnDir = string.Empty;

                    string domainName = string.Empty;

                    string uri = uriString;

                    //獲得域名 http://www.sina.com/

                    Match match = Regex.Match(uri, @"((http(s)?://)?)+[\w-.]+[^/]");//, RegexOptions.IgnoreCase

                    domainName = match.Value;

                    //獲得域名最深層目錄 http://www.sina.com/mail/

                    if (domainName.Equals(uri))

                        dnDir = domainName;

                    else

                        dnDir = uri.Substring(0, uri.LastIndexOf('/'));

                    dnDir += '/';

                    //擷取資料

                    string pageData = Encoding.UTF8.GetString(e.Result);

                    List<string> urlList = new List<string>();

                    //比對全路徑

                    match = Regex.Match(pageData, @"((http(s)?://)?)+(((/?)+[\w-.]+(/))*)+[\w-./]+\.+(" + ImageType + ")"); //, RegexOptions.IgnoreCase

                    while (match.Success)

                    {

                        string item = match.Value;

                        //短路徑處理

                        if (item.IndexOf("http://") == -1 && item.IndexOf("https://") == -1)

                            item = (item[0] == '/' ? domainName : dnDir) + item;

                        if (!urlList.Contains(item))

                        {

                            urlList.Add(item);

                            imgUrlList.Add(item);

                            //實時顯示分析結果

                            AddlbShowItem(item);

                            //邊分析邊下載下傳

                            WebRequest hwr = WebRequest.Create(item);

                            hwr.BeginGetResponse(new AsyncCallback(AsyncDownLoad), hwr);

                            //hwr.Timeout = "0x30D40";        //預設 0x186a0 -> 100000 0x30D40 -> 200000

                            //hwr.Method = "POST";

                            //hwr.ContentType = "application/x-www-form-urlencoded";

                            //hwr.MaximumAutomaticRedirections = 3;

                            //hwr.Accept ="image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";

                            //hwr.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";

                            //IAsyncResult iar = hwr.BeginGetResponse(new AsyncCallback(AsyncDownLoad), hwr);

                            //iar.AsyncWaitHandle.WaitOne();

                        }

                        match = match.NextMatch();

                    }

                }

            finally

                waiter.Set();

                #region 分析計時結束

                QueryPerformanceCounter(ref count1);

                count = count1 - count;

                result = (double)(count) / (double)freq;

                toolStripStatusLabel1.Text = "分析完畢!";

                toolStripStatusLabel2.Text = string.Format(" | 分析耗時:{0}秒", result);

                Application.DoEvents();

                #endregion

                //分析完畢

                isAnalyzeComplete = true;

     這兩個方法主要是用WebClient來請求然後異步獲得網址所傳回的資料并對資料分析,提取圖檔連結,提取主要有兩種方式:一種是完整路徑的圖檔連結;一種是短路徑的連結,比如/images/bg.gif,程式會自動為其加上域名部分組成完整的連結。

2.     異步下載下傳部分代碼

        /// 異步接受資料

        /// <param name="asyncResult"></param>

        public  void AsyncDownLoad(IAsyncResult asyncResult)  

            #region 下載下傳計時開始

            if (cfreq == 0)

                QueryPerformanceFrequency(ref cfreq);

                QueryPerformanceCounter(ref ccount);

            WebRequest request = (WebRequest)asyncResult.AsyncState;

            string url = request.RequestUri.ToString();

                WebResponse response = request.EndGetResponse(asyncResult);

                using (Stream stream = response.GetResponseStream())

                    Image img = Image.FromStream(stream);

                    string[] tmpUrl = url.Split('.');

                    img.Save(string.Concat(savePath, "/", DateTime.Now.ToString("yyyyMMddHHmmssfff"), ".", tmpUrl[tmpUrl.Length - 1]));

                    img.Dispose();

                    stream.Close();

                allDone.Set();

                //從未下載下傳的清單中删除已經下載下傳的圖檔

                imgUrlList.Remove(url);

                //更新清單框

                int indexItem = this.lbShow.Items.IndexOf(url);

                if (indexItem >= 0 && indexItem <= this.lbShow.Items.Count)

                    SetlbShowItem(indexItem);

            catch (Exception)

     這部分就是異步下載下傳圖檔并儲存的代碼,調用部分請看AsyncURIAnalyze方法分析圖檔連結比對成功後就開始進行圖檔下載下傳,每下載下傳完一張圖檔就更新顯示在界面正下方List框内(在連結前标記√ )。

程式和代碼: