import java.io.file;
import java.io.fileoutputstream;
import java.io.inputstream;
import java.net.url;
import java.net.urlconnection;
import java.util.arraylist;
import java.util.list;
import java.util.regex.matcher;
import java.util.regex.pattern;
/***
* java抓取网络图片
* @author swinglife
*
*/
public class catchimage {
// 地址
private static final string url = "http://www.csdn.net";
// 编码
private static final string ecoding = "utf-8";
// 获取img标签正则
private static final string imgurl_reg = "<img.*src=(.*?)[^>]*?>";
// 获取src路径的正则
private static final string imgsrc_reg = "http:\"?(.*?)(\"|>|\\s+)";
public static void main(string[] args) throws exception {
catchimage cm = new catchimage();
//获得html文本内容
string html = cm.gethtml(url);
//获取图片标签
list<string> imgurl = cm.getimageurl(html);
//获取图片src地址
list<string> imgsrc = cm.getimagesrc(imgurl);
//下载图片
cm.download(imgsrc);
}
* 获取html内容
* @param url
* @return
* @throws exception
private string gethtml(string url) throws exception {
url uri = new url(url);
urlconnection connection = uri.openconnection();
inputstream in = connection.getinputstream();
byte[] buf = new byte[1024];
int length = 0;
stringbuffer sb = new stringbuffer();
while ((length = in.read(buf, 0, buf.length)) > 0) {
sb.append(new string(buf, ecoding));
in.close();
return sb.tostring();
* 获取imageurl地址
* @param html
private list<string> getimageurl(string html) {
matcher matcher = pattern.compile(imgurl_reg).matcher(html);
list<string> listimgurl = new arraylist<string>();
while (matcher.find()) {
listimgurl.add(matcher.group());
return listimgurl;
* 获取imagesrc地址
* @param listimageurl
private list<string> getimagesrc(list<string> listimageurl) {
list<string> listimgsrc = new arraylist<string>();
for (string image : listimageurl) {
matcher matcher = pattern.compile(imgsrc_reg).matcher(image);
listimgsrc.add(matcher.group().substring(0, matcher.group().length() - 1));
return listimgsrc;
* 下载图片
* @param listimgsrc
private void download(list<string> listimgsrc) {
try {
for (string url : listimgsrc) {
string imagename = url.substring(url.lastindexof("/") + 1, url.length());
inputstream in = uri.openstream();
fileoutputstream fo = new fileoutputstream(new file(imagename));
system.out.println("开始下载:" + url);
while ((length = in.read(buf, 0, buf.length)) != -1) {
fo.write(buf, 0, length);
fo.close();
system.out.println(imagename + "下载完成");
} catch (exception e) {
system.out.println("下载失败");