天天看點

爬取大學排名

#_*_coding:utf-8_*_
# @Time          :2018/6/21$  {TIME}
#@Author        :[email protected]
#@File             :Example.py
import requests
from bs4 import  BeautifulSoup

def getHTMLText(url):
    try:
        r=requests.get(url,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return  ''

import bs4
def fillUnivList(ulist,html):
    soup=BeautifulSoup(html,'html.parser')
    for tr in soup.find('tbody').children:
        if isinstance(tr,bs4.element.Tag):
            tds=tr('td')
            ulist.append([tds[0].string,tds[1].string,tds[2].string,tds[3].string])


def printUnivList(ulist,num):
    tplt='{:^10}\t{:^20}\t{:^20}\t{:^20}'
    print(tplt.format('排名','學校','省份','總分',chr(12288)))
    for i in range(num):
        u=ulist[i]
        print(tplt.format(u[0],u[1],u[2],u[3],chr(12288)))

def main():
    uinfo=[]
    url='http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html'
    html=getHTMLText(url)
    fillUnivList(uinfo,html)
    printUnivList(uinfo,30)

main()