Tokenizer.java

package eu.javaexperience.text.tokenize;

import java.util.ArrayList;
import java.util.Collection;

import eu.javaexperience.collection.CollectionTools;
import eu.javaexperience.text.StringTools;

public class Tokenizer<T>
{
	protected ArrayList<TokenGroup<T>> tokenizers = new ArrayList<>();
	
	public Tokenizer(Collection<TokenGroup<T>> grps)
	{
		CollectionTools.copyInto(grps, tokenizers);
	}
	
	public void tokenize(Collection<ParsedToken<T>> dst, String source)
	{
		int from = 0;
		
		final int len = source.length();
		out:while(from < len)
		{
			for(TokenGroup<T> t:tokenizers)
			{
				ParsedToken<T> ret = t.tryMatch(from, source);
				if(null != ret)
				{
					dst.add(ret);
					from += ret.content.length();
					continue out;
				}
			}
			
			throw new RuntimeException("Unrecognisable token at positon: "+from+", content: \n"+source+"\n"+StringTools.repeatChar(' ', from)+"^\n");
		}
	}
	
	@Override
	public String toString()
	{
		return super.toString();
	}
}