URL的全称是Uniform Resource Locator,例如:百度的URL为https://www.baidu.com
Java的URL类提供了JVM连接URL的方式。而我们对URL感兴趣的是URL包含的数据,取得数据的方式有以下几种:
01
—
openStream
假定URL指向文本内容:
import java.io.IOException;import java.io.InputStream;import java.net.URL;public class Main { public static void main(String[] args) throws IOException { URL u = new URL("https://www.baidu.com"); InputStream in = u.openStream(); byte[] b = new byte[1024]; while (in.read(b) != -1) { String s = new String(b); System.out.println(s); } }}
02
—
openConnection
import java.io.IOException;import java.io.InputStream;import java.net.URL;import java.net.URLConnection;public class Main { public static void main(String[] args) throws IOException { URL u = new URL("https://www.baidu.com"); URLConnection c = u.openConnection(); InputStream in = c.getInputStream(); byte[] b = new byte[1024]; while ((in.read(b)) != -1) { String s = new String(b); System.out.println(s); } }}
03
—
实战:利用Jsoup获取网络图片
此处,我们希望把B站首页的图片下载下来,存在download_from_bilibili的文件夹里:
![](https://img.laitimes.com/img/__Qf2AjLwojIjJCLyojI0JCLicmbw5SZlRmYxgTM2UWZxImZ3gzM4MzMhdDNyQDN5ADZhRTYm9CX0JXZ252bj91Ztl2Lc52YucWbp5GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.png)
import org.jsoup.Connection;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import java.io.*;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;import java.net.URLEncoder;public class Main { /** * 下载图片到指定目录 * * @param filePath 文件路径 * @param imgUrl 图片URL */ public static void downImages(String filePath, String imgUrl) { // 若指定文件夹没有,则先创建 File dir = new File(filePath); if (!dir.exists()) { dir.mkdirs(); } // 截取图片文件名 String fileName = System.currentTimeMillis() + ".jpeg"; // 写出的路径 File file = new File(filePath + File.separator + fileName); try { // 获取图片URL URL url = new URL(imgUrl); // 获得连接 URLConnection connection = url.openConnection(); // 设置10秒的相应时间 connection.setConnectTimeout(10 * 1000); // 获得输入流 InputStream in = connection.getInputStream(); // 获得输出流 BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(file)); // 构建缓冲区 byte[] buf = new byte[1024]; int size; // 写入到文件 while (-1 != (size = in.read(buf))) { out.write(buf, 0, size); } out.close(); in.close(); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { // 利用Jsoup获得连接 Connection connect = Jsoup.connect("https://www.bilibili.com/"); try { // 得到Document对象 Document document = connect.get(); // 查找所有img标签 Elements imgs = document.getElementsByTag("img"); System.out.println("共检测到下列图片URL:"); System.out.println("开始下载"); // 遍历img标签并获得src的属性 for (Element element : imgs) { //获取每个img标签URL "abs:"表示绝对路径 String imgSrc = element.attr("abs:src"); // 打印URL System.out.println(imgSrc); //下载图片到本地 downImages("download_from_bilibili",imgSrc); } System.out.println("下载完成"); } catch (IOException e) { e.printStackTrace(); } }}
下载结果: