天天看点

python 配合正则表达式爬取网站日历信息

import time
import requests as rq
import re
def local():
    strtime=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime())
    return strtime  
html=rq.get("https://wannianrili.51240.com")
# print(html.text)
q=r'<span class="wnrl_td_gl">(\d\d)</span>'
day=re.findall(q,html.text)
p=r'<div class="wnrl_k_you_id_wnrl_nongli">(.+?)</div>'
nongli=re.findall(p,html.text)
o=r'<span class="wnrl_xuanze_top_wenzi">\d\d\d\d.\d(\d.)?</span>'
month=re.findall(o,html.text)
s=r'<span class=(?:"wnrl_td_bzl wnrl_td_bzl_hong"|"wnrl_td_bzl"|"wnrl_td_bzl wnrl_td_bzl_lv")>(.+?)</span>'
jieri=re.findall(s,html.text)
text=""
for i in range(len(day)):
        text=text+month[0]+day[i]+"号"+"   "+nongli[i]+"  "+jieri[i]+"\n"
print(text)
reobject1=re.compile(r"\d\d\d\d月\d\d月\d\d日 详细信息")
reobject2=re.compile(r'<span class="wnrl_k_xia_nr_wnrl_beizhu_biaoti">(.+?)</span>')
reobject3=re.compile(r'<span class="wnrl_k_xia_nr_wnrl_beizhu_neirong">(.+?)</span>')
cishu=int(local()[8:10])-1
biaoti=re.findall(reobject1,html.text)[int(local()[8:10])-1]
zbiaoti=re.findall(reobject2,html.text)[16*cishu:16*cishu+16]
content=re.findall(reobject3,html.text)[16*cishu:16*cishu+16]
count=0
print(content)
def pdjs(x):
    if x%2==0:
        return False
    else:
        return True
    
info=""
for i in range(16):
    count+=1
    info=info+zbiaoti[i]+":"+content[i]
    if pdjs(count):
        info=info+"      "
    else:
        info=info+"\n"
print(biaoti+":\n"+info)