`

java-下载图片(正则表达式、HttpURLConnection、流、socket综合运用)

    博客分类:
  • java
阅读更多
package cn.itcast.httpserver;

import java.io.BufferedInputStream;

public class DownloadImage {
	
	private String url;

	public DownloadImage(String url) {
		this.url = url;
	}
	
	public void run() {
		/**
		 * 获取相关的html页面 解析html页面中<img>标签,并且获取src属性的值,把图片的地址放到一个集合中
		 */
		//获取html代码
		String html = loadHtml(this.url);
		
		//从html代码中获取所有的img标签的src属性
		List<String> imgPaths = parseImagePath(html);		
		
		for (String imgPath : imgPaths) {
			
			//获取网络图片url路径
			String path = "http://127.0.0.1/"+imgPath;
			
			Thread thread = new Thread(new ImageDownloadTask(path,"d:\\image\\download"));
			thread.start();
		}
		
	}

	//从html代码中获取所有的img标签的src属性
	private List<String> parseImagePath(String html) {
		List<String> imagePaths = new ArrayList<String>();
		
		Pattern imgPattern = Pattern.compile("<img.*?/>");
		Pattern srcPattern = Pattern.compile("<img.*src=([\"|'])(.*?)\\1.*>",Pattern.CASE_INSENSITIVE);
		
		//匹配页面中所有img标签
		Matcher matcher = imgPattern.matcher(html);
		while(matcher.find()){
			
			//匹配页面中所有img标签
			String img = matcher.group();
			
			/*int start = img.indexOf("src=\"");
			int end = img.indexOf("\"",start+5);
			//System.out.println(start+","+end);
			String path = img.substring(start+5,end);*/
			
			//通过匹配出来的img标签,获取img中src属性
			Matcher srcMatcher = srcPattern.matcher(img);
			if(srcMatcher.find()){
				String path=srcMatcher.group(2);
				//匹配到src中的值,加入到集合中
				imagePaths.add(path);
			}
		}
		return imagePaths;
	}

	/**
	 * 加载网络中的html代码
	 * @return
	 */
	private String loadHtml(String urlString) {
		
		URL url;
		BufferedReader reader = null;
		HttpURLConnection connection = null;
		try {
			url = new URL(urlString);
			
			connection = (HttpURLConnection) url.openConnection();
			
			reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
			
			String s = null;
			
			StringBuilder sb = new StringBuilder();
			
			while ((s = reader.readLine()) != null) {
				sb.append(s);
			}
			return sb.toString();
		} catch (Exception e) {
			e.printStackTrace();
		}finally{
			if(reader!=null){
				try {
					reader.close();
				} catch (IOException e) {}
			}
			if(connection!=null){
				connection.disconnect();
			}
		}
		return "";
	}
	
	public static class ImageDownloadTask implements Runnable{
		
		private String urlString;
		private String baseDir;
		
		/**
		 * @param url 网络图片地址 
		 * @param baseDir 保存到本地的文件目录
		 */
		public ImageDownloadTask(String url,String baseDir) {
			this.urlString = url;
			this.baseDir = baseDir;
		}
		
		@Override
		public void run() {
			
			HttpURLConnection httpURLConnection = null;
			BufferedInputStream input = null;
			BufferedOutputStream output = null;
			
			try {
				
				URL url = new URL(this.urlString);
				httpURLConnection = (HttpURLConnection) url.openConnection();
				input = new BufferedInputStream(httpURLConnection.getInputStream()) ;
				
				//创建图片所保存的文件名  d:\\image\\download\\fadsfadsf234234.gif
				File f =new File(this.baseDir,UUID.randomUUID().toString()+".gif");
				
				//d:\\image\\download
				if(!f.getParentFile().exists()){
					f.getParentFile().mkdirs();
				}
				
				output = new BufferedOutputStream(new FileOutputStream(f));
				
				byte[] bs = new byte[1024];
				int len = 0;
				
				while((len=input.read(bs))>0){
					output.write(bs,0,len);
				}
				output.flush();
				
			} catch (Exception e) {
				e.printStackTrace();
			}finally{
				if(input!=null){
					try {
						input.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
				if(output!=null){
					try {
						output.close();
					} catch (IOException e) {
						e.printStackTrace();
					}
				}
				httpURLConnection.disconnect();
			}
		}
	} 
	
	
	public static void main(String[] args) {
		DownloadImage downloadImage = new DownloadImage("http://127.0.0.1/index.html");
		downloadImage.run();
	}
}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics