天天看点

16.ajax_case01

# 抓取北京市2018年积分落户公示名单
# \'http://www.bjrbj.gov.cn/integralpublic/settlePerson\'

import csv
import json
import requests

fw = open(\'luohu.csv\', \'w\')
writer = csv.writer(fw)
writer.writerow([\'id\',\'name\',\'birthday\',\'company\',\'score\'])

def get_publicity(page_number):
    url = \'http://www.bjrbj.gov.cn/integralpublic/settlePerson/settlePersonJson?sort=pxid&order=asc&limit=100&offset=0&name=&rows=100&page={}\'.format(page_number*100)

    header = {
            \'Accept\':\'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\',
            \'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36\'
    }

    response = requests.get(url,headers=header,timeout=5)

    result = json.loads(response.text)

    for item in result[\'rows\']:
        id = item[\'pxid\']
        name = item[\'name\']
        birthday = item[\'csrq\']
        company = item[\'unit\']
        score = item[\'score\']
        print(id,name,birthday,company,score)
        writer.writerow([id, name, birthday, company, score])

def main():
    for i in range(0,61):
        get_publicity(i)

if __name__ == \'__main__\':
    main()