下载图片
package cn.tedu;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ImgTest {
// 地址
private static final String URL = "http://www.ireader.com/index.php?ca=booksort.index&pid=92&cid=142&order=download&status=0&page=3";
// 获取img标签正则
private static final String IMGURL_REG = "<img.*src=(.*?)[^>]*?>";
// 获取src路径的正则
private static final String IMGSRC_REG = "[a-zA-z]+://[^\\s]*";
public static void main(String[] args) {
try {
ImgTest cm=new ImgTest();
//获得html文本内容
String HTML = cm.getHtml(URL);
//获取图片标签
List<String> imgUrl = cm.getImageUrl(HTML);
//获取图片src地址
List<String> imgSrc = cm.getImageSrc(imgUrl);
int count=120;
for (String is : imgSrc) {
count++;
String path="d:/library/history/"+count+".jpg";
downloadPicture( is,path);
//System.out.println(is);
}
//下载图片
//cm.Download(imgSrc);
}catch (Exception e){
System.out.println("发生错误");
}
System.out.println("下载成功");
}
//获取HTML内容
private String getHtml(String url)throws Exception{
URL url1=new URL(url);
URLConnection connection=url1.openConnection();
InputStream in=connection.getInputStream();
InputStreamReader isr=new InputStreamReader(in);
BufferedReader br=new BufferedReader(isr);
String line;
StringBuffer sb=new StringBuffer();
while((line=br.readLine())!=null){
sb.append(line,0,line.length());
sb.append('\n');
}
br.close();
isr.close();
in.close();
return sb.toString();
}
//获取ImageUrl地址
private List<String> getImageUrl(String html){
Matcher matcher=Pattern.compile(IMGURL_REG).matcher(html);
List<String>listimgurl=new ArrayList<String>();
while (matcher.find()){
listimgurl.add(matcher.group());
}
return listimgurl;
}
//获取ImageSrc地址
private List<String> getImageSrc(List<String> listimageurl){
List<String> listImageSrc=new ArrayList<String>();
for (String image:listimageurl){
Matcher matcher=Pattern.compile(IMGSRC_REG).matcher(image);
while (matcher.find()){
listImageSrc.add(matcher.group().substring(0, matcher.group().length()-1));
}
}
return listImageSrc;
}
//链接url下载图片
private static void downloadPicture(String urlList,String path) {
URL url = null;
try {
url = new URL(urlList);
DataInputStream dataInputStream = new DataInputStream(url.openStream());
FileOutputStream fileOutputStream = new FileOutputStream(new File(path));
ByteArrayOutputStream output = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int length;
while ((length = dataInputStream.read(buffer)) > 0) {
output.write(buffer, 0, length);
}
fileOutputStream.write(output.toByteArray());
dataInputStream.close();
fileOutputStream.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
获取书名和作者名
@Test
public void getMessage() throws Exception {
String url="http://www.ireader.com/index.php?ca=booksort.index&pid=92&cid=142&order=download&status=0&page=3";
Connection cn=Jsoup.connect(url);
Document doc=cn.get();
Elements el=doc.select(".bookMation h3 a");
for (Element e : el) {
String info=e.text();
System.out.println("书名: "+info);
}
Elements el2=doc.select(".tryread");
for (Element e2 : el2) {
String info2=e2.text();
System.out.println("作者: "+info2);
}
}