天天看點

java擷取網頁源代碼

public static String getHtmlContent(URL url, String encode) {

   StringBuffer contentBuffer = new StringBuffer();

   int responseCode = -1;

   HttpURLConnection con = null;

   try {

    con = (HttpURLConnection) url.openConnection();

    con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理進行下載下傳

    con.setConnectTimeout(60000);

    con.setReadTimeout(60000);

    // 獲得網頁傳回資訊碼

    responseCode = con.getResponseCode();

    if (responseCode == -1) {

     String re = url.toString() + " : connection is failure...";

     con.disconnect();

     return re;

    }

    if (responseCode >= 400) // 請求失敗

    {

     String re = "請求失敗:get response code: " + responseCode;

    InputStream inStr = con.getInputStream();

    InputStreamReader istreamReader = new InputStreamReader(inStr, encode);

    BufferedReader buffStr = new BufferedReader(istreamReader);

    String str = null;

    while ((str = buffStr.readLine()) != null)

     contentBuffer.append(str);

    inStr.close();

   } catch (IOException e) {

    e.printStackTrace();

    contentBuffer = null;

    System.out.println("error: " + url.toString());

   } finally {

    con.disconnect();

   }

   return contentBuffer.toString();

  }

  public static String getHtmlContent(String url, String encode) {

   if (!url.toLowerCase().startsWith("http://")) {

    url = "http://" + url;

    URL rUrl = new URL(url);

    return getHtmlContent(rUrl, encode);

   } catch (Exception e) {

    return "網址錯誤!";

  //測試

  /*public static void main(String argsp[]){

  }*/