新手爬蟲練習之 Instagram Explore - 利用selenium擷取cookies
Instagram的Explore界面内容是與賬号綁定的,我們需要擷取賬号對應的Cookie附加在請求頭上來擷取需要的資訊。
// Get cookies
import json
import os
import click
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
def get_cookies():
username = input("please enter username: ")
password = input("please enter password: ")
# choose a path to save cookies
folder_path = 'C:\Users\Desktop\Python\savedcookies'
file_path = 'C:\Users\Desktop\Python\savedcookies\%s.txt' % username.split('@')[0]
cookie_dict={}
if not os.path.isdir(folder): # Create file folder if not exist
click.echo("Create new folder to save cookies")
time.sleep(1)
os.mkdir(folder)
if os.path.isfile(file): # If user in record, use saved cookies
click.echo("Read cookies from file")
f = open(file, 'r')
js = f.read() # Read cookies
cookie_dict = json.loads(js)
f.close()
click.echo("Complete...")
else: # Get cookies if not exists
try: # Use selenium to get cookies
option = webdriver.ChromeOptions()
option.add_argument('headless') # Set Chrome Work in background
driver = webdriver.Chrome(chrome_options=option)
driver.get('https://www.instagram.com/accounts/login/?next=/explore/')
driver.find_element_by_name('username').send_keys(username)
driver.find_element_by_name('password').send_keys(password, Keys.ENTER)
time.sleep(5)
driver.get(BASE_URL)
time.sleep(5)
cookies = driver.get_cookies()
finally:
driver.close()
# save cookies in file
for cookie in cookies:
cookie_dict[cookie['name']]=cookie['value']
click.echo("Complete...")
click.echo("Save cookies...")
js = json.dumps(cookie_dict)
f = open(file, 'a')
f.write(js)
f.close()
return cookie_dict
代碼部分結束,均為自己編寫,新手入門,歡迎建議。