package eu.javaexpereince.spider.selenium;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;

import org.openqa.selenium.WebDriver;

import eu.javaexperience.collection.PublisherCollection;
import eu.javaexperience.spider.AbstractWebSpider;
import eu.javaexperience.url.UrlBuilder;

public abstract class AbstractSeleniumSpider
{
	protected Queue<String> urlsToVisit;
	
	protected Map<String, List<String>> pageLinks;
	
	public AbstractSeleniumSpider(Queue<String> urlsToVisit, Map<String, List<String>> pageLinks)
	{
		this.urlsToVisit = urlsToVisit;
		this.pageLinks = pageLinks;
	}
	
	public abstract boolean needWisit(String url);
	
	public abstract void selectUrls(Collection<String> dst, WebDriver wd);
	
	public void sniffPage(String link, WebDriver wd)
	{
		
	}
	
	public synchronized void enqueueLinks(Collection<String> links)
	{
		Set<String> ex = pageLinks.keySet();
		for(String s:links)
		{
			if(!ex.contains(s))
			{
				urlsToVisit.add(s);
			}
		}
	}
	
	public synchronized boolean isVisited(String url)
	{
		return pageLinks.keySet().contains(url);
	}
	
	public synchronized void addPage(String url)
	{
		url = normalizeUrl(url);
		if(!isVisited(url))
		{
			urlsToVisit.add(url);
		}
	}
	
	public String normalizeUrl(String url)
	{
		return defaultNormalizeUrl(url);
	}
	
	public static String defaultNormalizeUrl(String url)
	{
		url = AbstractWebSpider.betterNormalizeURL.getBy(url);
		return new UrlBuilder(url).getUrl().toString();
	}
	
	public void continueCrawling(WebDriver wd)
	{
		String link = null;
		while(null != (link = urlsToVisit.poll()))
		{
			if(isVisited(link))
			{
				System.out.println("Skip visited page: "+link);
				continue;
			}
			
			wd.navigate().to(link);
			sniffPage(link, wd);
			final Set<String> add = new HashSet<>();
			selectUrls(new PublisherCollection<String>()
			{
				@Override
				public boolean add(String obj)
				{
					obj = normalizeUrl(obj);
					if(needWisit(obj))
					{
						add.add(obj);
					}
					return false;
				}
			}, wd);
			
			ArrayList<String> put = new ArrayList<String>(add);
			synchronized(this)
			{
				pageLinks.put(link, put);
			}
			enqueueLinks(put);
		}
	}
	
	public static void main(String[] args)
	{
		System.out.println(defaultNormalizeUrl("http://www.site.com/list?page=1&page=7"));
		System.out.println(defaultNormalizeUrl("http://www.site.com/"));
		System.out.println(defaultNormalizeUrl("http://www.site.com"));
		
	}
}
