UrlDownloadTools.java

package eu.javaexperience.url;

import static eu.javaexperience.log.LogLevel.*;
import static eu.javaexperience.log.LoggingTools.*;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;
import java.net.URLConnection;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.X509Certificate;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import eu.javaexperience.interfaces.simple.getBy.GetBy2;
import eu.javaexperience.log.JavaExperienceLoggingFacility;
import eu.javaexperience.log.Loggable;
import eu.javaexperience.log.Logger;
import eu.javaexperience.proxy.ProxyStorage;
import eu.javaexperience.proxy.TorProxySpawner.ProxySource;
import eu.javaexperience.reflect.Mirror;
import eu.javaexperience.semantic.references.MayNull;

public class UrlDownloadTools
{
	protected static final Logger LOG = JavaExperienceLoggingFacility.getLogger(new Loggable("UrlDownloadTools"));
	
	public static byte[] download(String url) throws MalformedURLException, IOException
	{
		return download(url, null);
	}
	
	public static byte[] download(String url, @MayNull Map<String,String> headers) throws MalformedURLException, IOException
	{
		return download(url, headers, null);
	}
	
	public static byte[] download(Proxy proxy, String url, @MayNull Map<String,String> headers) throws MalformedURLException, IOException
	{
		return download(proxy, url, headers, null);
	}
	
	
	public static byte[] download(String url, @MayNull Map<String,String> headers, String post_data) throws MalformedURLException, IOException
	{
		return download(null, new URL(url), headers, -1, null == post_data?null: post_data.getBytes());
	}
	
	
	public static byte[] download(Proxy proxy, String url, @MayNull Map<String,String> headers, String post_data) throws MalformedURLException, IOException
	{
		return download(proxy, new URL(url), headers, -1, null == post_data?null: post_data.getBytes());
	}
	
	
	
	public static byte[] download(URL url, Map<String,String> headers) throws IOException
	{
		return download(null, url, headers, -1, null);
	}
	
	public static byte[] download(Proxy proxy, URL url, Map<String,String> headers) throws IOException
	{
		return download(proxy, url, headers, -1, null);
	}
	
	public static byte[] download(Proxy proxy, URL url, Map<String,String> headers, byte[] POST_data) throws IOException
	{
		return download(proxy, url, headers, 60_000, POST_data);
	}
	
	public static byte[] download(Proxy proxy, URL url, Map<String,String> headers, int timeoutMs, byte[] POST_data) throws IOException
	{
		URLConnection connection = null;
		
		if(null == proxy)
		{
			connection = url.openConnection();
		}
		else
		{
			connection = url.openConnection(proxy);
		}
		
		if(timeoutMs > 0)
		{
			connection.setConnectTimeout(timeoutMs);
		}

		if(null != headers)
		{
			for(Entry<String, String> header:headers.entrySet())
			{
				if(null != header.getValue())
				{
					connection.addRequestProperty(header.getKey(), header.getValue());
				}
			}
		}
		
		if(null != POST_data)
		{
			connection.addRequestProperty("Content-Length", String.valueOf(POST_data.length));
			connection.setDoOutput(true);
			try
			(
				OutputStream os = connection.getOutputStream();
			)
			{
				if(null != POST_data)
				{
					os.write(POST_data);
					os.flush();
				}
			}
		}
		
		try(InputStream is = connection.getInputStream())
		{
			int ep = 0;
			int read = 0;
			byte[] ret = new byte[10240];
			
			while((read = is.read(ret, ep, ret.length-ep)) >= 0)
			{
				if(ep + read == ret.length)
				{
					ret = Arrays.copyOf(ret, ret.length*2);
				}
				
				ep+= read;
			}
			
			HttpURLConnection conn = ((HttpURLConnection) connection);
			int status = conn.getResponseCode();
			if(status == HttpURLConnection.HTTP_MOVED_TEMP || status == HttpURLConnection.HTTP_MOVED_PERM || status == HttpURLConnection.HTTP_SEE_OTHER)
			{
				return download(proxy, new URL(conn.getHeaderField("Location")), headers, timeoutMs, POST_data);
			}

			return Arrays.copyOf(ret, ep);
		}
	}
	
	public static HttpRequestResult httpDownload(Proxy proxy, URL url, Map<String,String> headers, byte[] POST_data) throws IOException
	{
		return httpDownload(proxy, url, headers, 0, POST_data);
	}
	
	
	public static HttpRequestResult httpDownload(Proxy proxy, URL url, Map<String,String> headers, int timeoutMs, byte[] POST_data) throws IOException
	{
		URLConnection connection = null;
		
		if(null == proxy)
		{
			connection = url.openConnection();
		}
		else
		{
			connection = url.openConnection(proxy);
		}
		
		if(timeoutMs > 0)
		{
			connection.setConnectTimeout(timeoutMs);
		}
		
		//((HttpURLConnection) connection).setInstanceFollowRedirects(true);
		
		if(null != headers)
		{
			for(Entry<String, String> header:headers.entrySet())
			{
				if(null != header.getValue())
				{
					connection.addRequestProperty(header.getKey(), header.getValue());
				}
			}
		}
		
		if(null != POST_data)
		{
			connection.addRequestProperty("Content-Length", String.valueOf(POST_data.length));
			connection.setDoOutput(true);
			try
			(
				OutputStream os = connection.getOutputStream();
			)
			{
				if(null != POST_data)
				{
					os.write(POST_data);
					os.flush();
				}
			}
		}
		
		HttpURLConnection httpConn = (HttpURLConnection) connection; 
		
		HttpRequestResult res = new HttpRequestResult();
		
		res.responseCode = httpConn.getResponseCode();
		
		res.headers = httpConn.getHeaderFields();
		
		try
		{
			res.responseStatus = res.headers.get(null).get(0);
		}
		catch(Exception e){}
		
		try(InputStream is = 200 == res.responseCode?httpConn.getInputStream():httpConn.getErrorStream())
		{
			if(null != is)
			{
				int ep = 0;
				int read = 0;
				byte[] ret = new byte[10240];
				
				while((read = is.read(ret, ep, ret.length-ep))>0)
				{
					if(ep + read == ret.length)
					{
						ret = Arrays.copyOf(ret, ret.length*2);
					}
					
					ep+= read;
				}
				
				res.data = Arrays.copyOf(ret, ep);
			}
			else
			{
				res.data = Mirror.emptyByteArray;
			}
		}
		
		return res;
	}
	
	public static class HttpRequestResult
	{
		public String responseStatus;
		public int responseCode;
		public byte[] data;
		public Map<String, List<String>> headers;
	}
	
	public static void downloadPagesIntoParallelWithProxies
	(
		final Map<String, byte[]> dst,
		Collection<String> src,
		final ProxyStorage spawnerStorage,
		final int proxies,
		final int concurrency,
		final boolean skip_exists
	)
	{
		downloadPagesIntoParallelWithProxies(null, dst, src, null, spawnerStorage, proxies, concurrency, skip_exists);
	}
	
	public static void downloadPagesIntoParallelWithProxies
	(
		final @MayNull GetBy2<byte[], Proxy, URL> downloader,
		final Map<String, byte[]> dst,
		Collection<String> src,
		@MayNull Map<String, String> headers,
		final ProxyStorage spawnerStorage,
		final int proxies,
		final int concurrency,
		final boolean skip_exists
	)
	{
		final BlockingQueue<String> urls_queue = new LinkedBlockingQueue<>();
		urls_queue.addAll(src);
		
		final AtomicInteger nums = new AtomicInteger(concurrency);
		
		for(int i=0;i<concurrency;++i)
		{
			final int thread_ordinal = i;
			new Thread()
			{
				@Override
				public void run()
				{
					try
					{
						String toDownload = null;
						while(null != (toDownload = urls_queue.poll()))
						{
							if(skip_exists)
							{
								if(dst.containsKey(toDownload))
								{
									continue;
								}
							}
							
							try
							{
								long t0 = System.currentTimeMillis();
								byte[] data = null;
								
								final int try_count = 10;
								for(int i=0;;++i)
								{
									try
									{
										ProxySource tp = spawnerStorage.getAtOffset(thread_ordinal % proxies);
										Proxy p = tp.getProxy();
										
										if(null == downloader)
										{
											data = download(p, toDownload, headers);
										}
										else
										{
											data = downloader.getBy(p, new URL(toDownload));
										}
										if(null != data)
										{
											break;
										}
									}
									catch(Exception e)
									{
										if(try_count == i)
										{
											tryLogFormat(LOG, WARNING, "Can't download URL \"%s\" %s", toDownload, e.getMessage());
											break;
											//throw e;
										}
									}
								}
								
								if(null != data)
								{
									tryLogFormat(LOG, MEASURE, "Url download \"%s\" took %s ms", toDownload, System.currentTimeMillis()-t0);
									dst.put(toDownload, data);
								}
							}
							catch(Exception e)
							{
								e.printStackTrace();
							}
						}
					}
					catch(Throwable e)
					{
						e.printStackTrace();
					}
					
					synchronized (nums)
					{
						nums.decrementAndGet();
						nums.notifyAll();
					}
					
				};
			}.start();
		}
		
		synchronized (nums)
		{
			while(0 != nums.get())
			{
				try
				{
					nums.wait();
				}
				catch (InterruptedException e)
				{
					e.printStackTrace();
				}
			}
		}
	}
	
	public static void disableSslVerification()
	{
		try
		{
			// Create a trust manager that does not validate certificate chains
			TrustManager[] trustAllCerts = new TrustManager[] {new X509TrustManager() {
				public java.security.cert.X509Certificate[] getAcceptedIssuers() {
					return null;
				}
				public void checkClientTrusted(X509Certificate[] certs, String authType) {
				}
				public void checkServerTrusted(X509Certificate[] certs, String authType) {
				}
			}
			};

			// Install the all-trusting trust manager
			SSLContext sc = SSLContext.getInstance("SSL");
			sc.init(null, trustAllCerts, new java.security.SecureRandom());
			HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());

			// Create all-trusting host name verifier
			HostnameVerifier allHostsValid = new HostnameVerifier() {
				public boolean verify(String hostname, SSLSession session) {
					return true;
				}
			};

			// Install the all-trusting host verifier
			HttpsURLConnection.setDefaultHostnameVerifier(allHostsValid);
		} catch (NoSuchAlgorithmException e) {
			e.printStackTrace();
		} catch (KeyManagementException e) {
			e.printStackTrace();
		}
	}
}