1 #-*- coding:utf-8 -*-
2 #python 2.7
3 #XiaoDeng
4 #http://tieba.baidu.com/p/2460150866
5 #标签操作
6
7
8 from bs4 importBeautifulSoup9 importurllib.request10 importre11
12
13 #如果是网址,可以用这个办法来读取网页
14 #html_doc = "http://tieba.baidu.com/p/2460150866"
15 #req = urllib.request.Request(html_doc)
16 #webpage = urllib.request.urlopen(req)
17 #html = webpage.read()
18
19
20
21 html="""
22
The Dormouse's story23 24
The Dormouse's story25
Once upon a time there were three little sisters; and their names were26 ,27 Lacie and28 Tillie;29 Lacie30 and they lived at the bottom of a well.
31
加载中… 32
33
34 个人资料35 36 37 38
- 39
- 博客等级: 40
- 博客积分:041
42
- 43
- 博客访问:3,97144
- 关注人气:045
- 获赠金笔:0支46
- 赠出金笔:0支47
- 荣誉徽章:48
49 50 更多>> 51
...
52 """
53 soup = BeautifulSoup(html, 'html.parser') #文档对象
54
55
56
57 #类名为xxx而且文本内容为hahaha的div
58 for k in soup.find_all('div',class_='atcTit_more'):#,string='更多'
59 print(k)60 #
更多>>