1 #-*- coding:utf-8 -*-
2 #python 2.7
3 #XiaoDeng
4 #http://tieba.baidu.com/p/2460150866
5 #标簽操作
6
7
8 from bs4 importBeautifulSoup9 importurllib.request10 importre11
12
13 #如果是網址,可以用這個辦法來讀取網頁
14 #html_doc = "http://tieba.baidu.com/p/2460150866"
15 #req = urllib.request.Request(html_doc)
16 #webpage = urllib.request.urlopen(req)
17 #html = webpage.read()
18
19
20
21 html="""
22
The Dormouse's story23 24
The Dormouse's story25
Once upon a time there were three little sisters; and their names were26 ,27 Lacie and28 Tillie;29 Lacie30 and they lived at the bottom of a well.
31
加載中… 32
33
34 個人資料35 36 37 38
- 39
- 部落格等級: 40
- 部落格積分:041
42
- 43
- 部落格通路:3,97144
- 關注人氣:045
- 獲贈金筆:0支46
- 贈出金筆:0支47
- 榮譽徽章:48
49 50 更多>> 51
...
52 """
53 soup = BeautifulSoup(html, 'html.parser') #文檔對象
54
55
56
57 #類名為xxx而且文本内容為hahaha的div
58 for k in soup.find_all('div',class_='atcTit_more'):#,string='更多'
59 print(k)60 #
更多>>