package org.aksw.simba.bengal.selector;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;

import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Sets;

/**
 * This triple selector is a hybrid approach based on the
 * {@link SimpleSummarySelector} and the {@link PathBasedTripleSelector}. It
 * randomly selects whether it should follow the star or the path pattern to
 * select the next statement that is added to the selected triples.
 * 
 * @author Michael R&ouml;der (roeder@informatik.uni-leipzig.de)
 *
 */
public class HybridTripleSelector extends AbstractSelector {

	private static final Logger LOGGER = LoggerFactory.getLogger(HybridTripleSelector.class);

	private Set<String> sourceClasses;
	private List<Resource> resources;
	private Random r = new Random(20);
	private int minSize = 1;
	private int maxSize = 5;

	/**
	 * Constructor
	 * 
	 * @param sourceClasses
	 *            Classes for subjects
	 * @param targetClasses
	 *            Classes for objects
	 * @param endpoint
	 *            SPARQL endpoint
	 * @param graph
	 *            Graph to query (null if none)
	 * @param minSize
	 *            Minimal size of summary
	 * @param maxSize
	 *            Maximal size of summary
	 */
	public HybridTripleSelector(Set<String> sourceClasses, Set<String> targetClasses, String endpoint, String graph,
			int minSize, int maxSize, long seed) {
		super(targetClasses, endpoint, graph);
		this.sourceClasses = sourceClasses;
		resources = null;
		this.minSize = minSize;
		if (maxSize < minSize) {
			maxSize = minSize + 1;
		}
		this.maxSize = maxSize;
		this.r = new Random(seed);
	}

	/**
	 * Constructor
	 * 
	 * @param sourceClasses
	 *            Classes for subjects
	 * @param targetClasses
	 *            Classes for objects
	 * @param endpoint
	 *            SPARQL endpoint
	 * @param graph
	 *            Graph to query (null if none)
	 */
	public HybridTripleSelector(Set<String> sourceClasses, Set<String> targetClasses, String endpoint, String graph) {
		super(targetClasses, endpoint, graph);
		this.sourceClasses = sourceClasses;
		resources = null;
	}

	/**
	 * Returns the next set of statements generated by this selector
	 * 
	 * @return Set of statements
	 */
	@Override
	public List<Statement> getNextStatements() {
		if (resources == null) {
			resources = getResources(sourceClasses);
		}
		// pick a random length for the result list
		int size = minSize + r.nextInt(maxSize - minSize + 1);
		List<Statement> result = new ArrayList<>(size);

		// Choose the first resource randomly
		int counter = Math.abs(r.nextInt() % resources.size());
		Resource currentResource = resources.get(counter);

		Set<Statement> alreadySeenStatements = new HashSet<Statement>();
		boolean madeAStepBackBefore = true;
		Resource oldResource = null;
		List<Statement> oldStatements = null;
		List<Statement> statements = null;
		Statement statement;
		boolean resourceChanged = true;
		int retries = 0, maxRetries = 10, statementsForThisResource = 0, processedStatementsOfThisResource = 0;
		while (result.size() < size) {
			// get symmetric CBD
			if (resourceChanged) {
				oldStatements = statements;
				statements = getSummary(currentResource);
				if (statements == null) {
					// there was an error
					LOGGER.error("Got an empty list of statements for the resource \"" + currentResource.getURI()
							+ "\". Returning null.");
					return null;
				}
				statementsForThisResource = statements.size();
				processedStatementsOfThisResource = Sets
						.intersection(new HashSet<Statement>(statements), alreadySeenStatements).size();
				// if all statements have been processed
				if (processedStatementsOfThisResource == statementsForThisResource) {
					// try to make a step back.
					if (!madeAStepBackBefore) {
						madeAStepBackBefore = true;
						statements = oldStatements;
						currentResource = oldResource;
						statementsForThisResource = statements.size();
						processedStatementsOfThisResource = Sets
								.intersection(new HashSet<Statement>(statements), alreadySeenStatements).size();
					} else {
						LOGGER.info("Got stuck in a dead end. Returning the statements I selected so far.");
						// System.out.println(result);
						return result;
					}
				}
				resourceChanged = false;
			}

			// now pick a random statement
			counter = r.nextInt(statementsForThisResource);
			statement = statements.get(counter);
			// make sure that we haven't seen this statement before
			if (alreadySeenStatements.contains(statement)) {
				++retries;
				if (retries > maxRetries) {
					LOGGER.warn(
							"After {} retries I couldn't select a matching statement. Returning the statements I selected so far.",
							maxRetries);
					// System.out.println(result);
					return result;
				}
			} else {
				// if this is the last statement of this path
				if (result.size() == (size - 1)) {
					result.add(statement);
					alreadySeenStatements.add(statement);
				} else {
					if (statement.getObject().isResource()) {
						result.add(statement);
						alreadySeenStatements.add(statement);
						++processedStatementsOfThisResource;
						// Choose whether we should go on with the path pattern
						if (r.nextInt(statementsForThisResource) <= processedStatementsOfThisResource) {
							oldResource = currentResource;
							currentResource = statement.getObject().asResource();
							resourceChanged = true;
							madeAStepBackBefore = false;
							retries = 0;
							processedStatementsOfThisResource = 0;
						}
					} else {
						result.add(statement);
						alreadySeenStatements.add(statement);
						++processedStatementsOfThisResource;
					}
				}
			}
		}
		System.out.println(result);
		return result;
	}
}
