天天看點

Java爬蟲 50行代碼爬取一個網頁(最簡單的爬取)

最簡單最基礎的寫法沒有任何騷操作

package regex;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;

public class Test {
    public static void main(String [] args)
    {
        try
        {
        //url位址
            URL url = new URL("https://www.cangqionglongqi.com/xingjiyoulun/28543.html");
            //建立一個連接配接
            URLConnection urlConnection = url.openConnection();
            HttpURLConnection connection = null;
            if(urlConnection instanceof HttpURLConnection)
            {
            //對象向下轉型
                connection = (HttpURLConnection) urlConnection;
            }
            else
            {
                System.out.println("請輸入 URL 位址");
                return;
            }
            //建立一個讀取流從連接配接中讀取
            BufferedReader in = new BufferedReader(
                    new InputStreamReader(connection.getInputStream(),"gbk"));
                  
            BufferedWriter wr=new BufferedWriter( new OutputStreamWriter(new FileOutputStream("a.txt"),"gbk"));
            String urlString = "";
            String current;
            //如果有資料就一緻讀
            while((current = in.readLine()) != null)
            {
                urlString += current;
                //每讀一行寫一行到文本中
                wr.write(current);
                wr.newLine();
                wr.flush();
       }
           System.out.println(urlString);

        }catch(IOException e)
        {
            e.printStackTrace();
        }
    }
}