天天看點

51job

#-*-coding:utf-8-*-
"""
需求:51job,打開網頁,輸入“自動化測試”選擇地點為北京,點選搜尋,将搜尋結果中的薪資儲存到.txt檔案中
"""
from selenium import webdriver
import time
import  re
#打開浏覽器,輸入網址
driver=webdriver.Chrome()
driver.implicitly_wait(10) #最多等待10秒
driver.maximize_window()
driver.get('https://www.51job.com/')
#定位搜尋框,輸入自動化測試
driver.find_element_by_id('kwdselectid').send_keys('自動化測試')
#點選地點
driver.find_element_by_id('work_position_input').click()
time.sleep(3)
#清除已經選擇的城市,選中後點選
select_els=driver.find_elements_by_css_selector('#work_position_click_multiple_selected>span')
for el in select_els:
    el.click()
#選擇北京、南京
driver.find_element_by_id('work_position_click_center_right_list_category_000000_010000').click()
#driver.find_element_by_id('work_position_click_center_right_list_category_000000_070200').click()
#點選确定
driver.find_element_by_id('work_position_click_bottom_save').click()
#點選搜尋
driver.find_element_by_css_selector('body > div.content > div > div.fltr.radius_5 > div > button').click()
time.sleep(2)

#擷取頁數
e=driver.find_element_by_class_name('td').text
num=int(re.findall(r'\d+',e)[0])
print(f'一共有{num}頁的資訊')
job_collection=[] #記錄所有的資訊
for i in range(num):
    print(f'開始寫第{i+1}頁的内容')
    time.sleep(3)
    # 擷取文本内容
    jobs = driver.find_elements_by_css_selector(
        'body > div:nth-child(4) > div.j_result > div > div.leftbox > div:nth-child(4) > div.j_joblist > div')
    '''for job in jobs:
        print(job.text.split('\n')[2])'''
    with open('data2.txt', 'w') as f:
        for job in jobs:
            f.write(job.text.split('\n')[2]+'\n')
            job_collection.append(job.text.split('\n')[2])

    if i <num-1: #不是最後一頁時,點選下一頁
        driver.find_element_by_css_selector('body > div:nth-child(4) > div.j_result > div > div.leftbox > div:nth-child(4) > div.j_page > div > div > div > ul > li.next > a').click()
    driver.find_element_by_class_name('j_result') #穩定頁面,是所有的資訊加載全就可以點選下一頁
print(f'一共收集了{len(job_collection)}條職位資訊')
driver.quit()