天天看點

python爬蟲登入下載下傳_Python3爬蟲之五網頁下載下傳器的幾種方法【Python使用cookie模拟登入CSDN】...

(1)直接請求

from urllib import request

# 目标網址

url = "http://www.zhihu.com"

# 直接請求

response = request.urlopen(url)

# 擷取請求的狀态碼,200表示成功

# 讀取内容

if(response.getcode() == 200):

print(response.read())

(2)使用Request添加data、http header等資料

from urllib import request

# 目标網址

url = "http://www.zhihu.com"

# 需要添加的資料

header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"}

# 建立Request對象

req = request.Request(url, headers=header)

# 發送請求

res = request.urlopen(req)

# # 擷取請求的狀态碼,200表示成功

# # 讀取内容

if(res.getcode() == 200):

print(res.read())

(3)利用cookies模拟登入我的CSDN部落格

import urllib

import re

from urllib import request

import http.cookiejar

# 目标網址

url = 'https://passport.csdn.net'

# 建立cookie容器

cookie = http.cookiejar.CookieJar()

# 建立一個opener

opener = request.build_opener(request.HTTPCookieProcessor(cookie))

# 添加http header

opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36')]

# 需要封裝的資料

h = opener.open(url).read().decode("utf8")

pattern1 = re.compile(r'name="lt" value="(.*?)"')

pattern2 = re.compile(r'name="execution" value="(.*?)"')

b1 = pattern1.search(h)

b2 = pattern2.search(h)

post_data = {

'username':'***',

'password':'***',

'lt': b1.group(1),

'execution': b2.group(1),

'_eventId': 'submit',

}

post_data = urllib.parse.urlencode(post_data).encode('utf-8')

# 使用帶cookie的urllib通路網頁

res = opener.open(url, post_data)

# text = res.read().decode('utf-8')

# print(text)

res2 = opener.open('http://my.csdn.net/my/mycsdn')

text2 = res2.read().decode('utf-8')

print(text2)