python爬蟲爬取深交所資料
話不多說,直接上代碼:
import requests
from bs4 import BeautifulSoup
import xlwt
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36 Edg/88.0.705.63'}
def Get_data(url):
jsondata = requests.get(url,headers=headers).json()
data1 = jsondata[0]
list_col = data1['metadata']
cols = list_col['cols']
data2 = data1['data']
COLS = []
ROW1 = []
ROW2 = []
ROW3 = []
ROW4 = []
col1 = cols['lbmc']
col2 = cols['zqsl']
col3 = cols['zgb']
col4 = cols['sjzz']
col5 = cols['cjje']
COLS.append(col1)
COLS.append(col2.replace('<br>','/'))
COLS.append(col3.replace('<br>','/'))
COLS.append(col4.replace('<br>','/'))
COLS.append(col5.replace('<br>','/'))
row1 = data2[0]
row11 = row1['lbmc']
row12 = row1['zqsl']
row13 = row1['zgb']
row14 = row1['sjzz']
row15 = row1['cjje']
ROW1.append(row11)
ROW1.append(row12)
ROW1.append(row13)
ROW1.append(row14)
ROW1.append(row15)
row2 = data2[1]
row21 = row2['lbmc']
row22 = row2['zqsl']
row23 = row2['zgb']
row24 = row2['sjzz']
row25 = row2['cjje']
ROW2.append(row21[-3:])
ROW2.append(row22)
ROW2.append(row23)
ROW2.append(row24)
ROW2.append(row25)
row3 = data2[2]
row31 = row3['lbmc']
row32 = row3['zqsl']
row33 = row3['zgb']
row34 = row3['sjzz']
row35 = row3['cjje']
ROW3.append(row31[-3:])
ROW3.append(row32)
ROW3.append(row33)
ROW3.append(row34)
ROW3.append(row35)
row4 = data2[3]
row41 = row4['lbmc']
row42 = row4['zqsl']
row43 = row4['zgb']
row44 = row4['sjzz']
row45 = row4['cjje']
ROW4.append(row41[-5:])
ROW4.append(row42)
ROW4.append(row43)
ROW4.append(row44)
ROW4.append(row45)
print('*'*101)
print('{0} {1} {2} {3} {4}'.format(col1,col2.replace('<br>','/'),col3.replace('<br>','/'),col4.replace('<br>','/'),col5.replace('<br>','/')))
print('{0} {1} {2} {3} {4}'.format(row11,row12,row13,row14,row15))
print('{0} {1} {2} {3} {4}'.format(row21[-3:],row22,row23,row24,row25))
print('{0} {1} {2} {3} {4}'.format(row31[-3:],row32,row33,row34,row35))
print('{0} {1} {2} {3} {4}'.format(row41[-5:],row42,row43,row44,row45))
print('*'*101)
book = xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet = book.add_sheet('深交所基金市場概況',cell_overwrite_ok=True)
for Col in range(len(COLS)):
sheet.write(0,Col,COLS[Col])
for Row1 in range(len(ROW1)):
sheet.write(1,Row1,ROW1[Row1])
for Row2 in range(len(ROW2)):
sheet.write(2,Row2,ROW2[Row2])
for Row3 in range(len(ROW3)):
sheet.write(3,Row3,ROW3[Row3])
for Row4 in range(len(ROW4)):
sheet.write(4,Row4,ROW4[Row4])
book.save("C:/Users/DELL/Desktop/深交所基金市場概況.xls")
if __name__ == '__main__':
url = 'http://fund.szse.cn/api/report/ShowReport/data?SHOWTYPE=JSON&CATALOGID=lf_fund_scgk&loading=first&random=0.9140249439976937'
Get_data(url)
運作結果: