/*  Sesame - Storage and Querying architecture for RDF and RDF Schema
 *  Copyright (C) 2001-2006 Aduna
 *
 *  Contact:
 *  	Aduna
 *  	Prinses Julianaplein 14 b
 *  	3817 CS Amersfoort
 *  	The Netherlands
 *  	tel. +33 (0)33 465 99 87
 *  	fax. +33 (0)33 465 99 87
 *
 *  	http://aduna-software.com/
 *  	http://www.openrdf.org/
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.openrdf.sesame.sailimpl.memory;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.GZIPInputStream;

import org.openrdf.vocabulary.RDF;
import org.openrdf.vocabulary.RDFS;

import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.BNodeImpl;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.impl.URIImpl;

import org.openrdf.rio.ParseException;
import org.openrdf.rio.Parser;
import org.openrdf.rio.StatementHandlerException;
import org.openrdf.rio.ntriples.NTriplesParser;
import org.openrdf.rio.rdfxml.RdfXmlParser;
import org.openrdf.rio.turtle.TurtleParser;

import org.openrdf.sesame.constants.RDFFormat;
import org.openrdf.sesame.sail.NamespaceIterator;
import org.openrdf.sesame.sail.SailInitializationException;
import org.openrdf.sesame.sail.SailInternalException;
import org.openrdf.sesame.sail.StatementIterator;
import org.openrdf.sesame.sail.query.Query;
import org.openrdf.sesame.sail.query.QueryOptimizer;
import org.openrdf.sesame.sail.util.EmptyStatementIterator;

/**
 * An implementation of the RdfSource interface from the RDF Sail API that
 * stores its data in main memory and that can use a file for persistent
 * storage.
 * <p>
 * Caveat: Namespace prefix assignment is not guaranteed to be correct.
 * 
 * @author Arjohn Kampman
 */
public class RdfSource implements org.openrdf.sesame.sail.RdfSource, ValueFactory {

/*--------------+
| Constants     |
+--------------*/

	/** Key used to specify a file for persistent storage. **/
	public static final String FILE_KEY = "file";

	/** Key used to specify the format of the data in the file. **/
	public static final String DATA_FORMAT_KEY = "dataFormat";

	/** Key used to specify whether any data that is written to the file should be compressed. **/
	public static final String COMPRESS_FILE_KEY = "compressFile";

/*--------------+
| Variables     |
+--------------*/
	
	/** List containing all available statements. * */
	protected StatementList _statements;

	/**
	 * A Map containing identical URINode objects as key and value. This Map is
	 * used for sharing URINode objects: a shared URINode can be retrieved by
	 * using a URI or URINode object as search key.
	 */
	protected Map _uriNodesMap;

	/**
	 * A Map containing identical BNodeNode objects as key and value. This Map
	 * is used for sharing BNodeNode objects: a shared BNodeNode can be
	 * retrieved by using a BNode or BNodeNode object as search key.
	 */
	protected Map _bNodeNodesMap;

	/**
	 * A Map containing identical LiteralNode objects as key and value. This Map
	 * is used for sharing LiteralNode objects: a shared LiteralNode can be
	 * retrieved by using a Literal or LiteralNode object as search key.
	 */
	protected Map _literalNodesMap;

	/**
	 * A mapping from Strings representing namespaces to Namespace objects. Used
	 * for sharing the (relatively large) strings.
	 */
	protected Map _namespacesTable;

	/** List of all Namespace objects. * */
	protected List _namespacesList;

	/**
	 * Flag indicating whether the 'export' status of namespaces needs to be
	 * updated before reporting them. An update of the export status normally
	 * takes place the first time getNamespaces() is called after a transaction
	 * has been committed.
	 **/
	protected boolean _exportStatusUpToDate;

	/** A counter for creating unique namespace prefixes. * */
	protected int _nextNsPrefixId;

	/** The file that should be used for persistent storage. * */
	protected File _file;

	/** The format of the data in the file. **/
	protected RDFFormat _dataFormat;

	/** Flag indicating whether the data in the file is/should be compressed.* */
	protected boolean _compressFile;

	/** The prefix for any new bnode IDs. **/
	private String _bnodePrefix;

	/** The ID for the next bnode that is created. **/
	private int _nextBNodeID;
	
/*--------------+
| Constructors  |
+--------------*/

	/**
	 * Creates a new RdfSource.
	 **/
	public RdfSource() {
		_statements = new StatementList(256);

		_namespacesTable = new HashMap();
		_namespacesList = new ArrayList();
		_nextNsPrefixId = 1;

		_uriNodesMap = new HashMap();
		_bNodeNodesMap = new HashMap();
		_literalNodesMap = new HashMap();

		_addDefaultNamespaces();

		_updateBNodePrefix();
	}

	void _addDefaultNamespaces() {
		// add default namespaces
		_registerNamespace("rdf", RDF.NAMESPACE, true);
		_registerNamespace("rdfs", RDFS.NAMESPACE, true);
	}

	/**
	 * Generates a new bnode prefix based on <tt>currentTimeMillis()</tt> and
	 * resets <tt>_nextBNodeID</tt> to <tt>1</tt>.
	 **/
	protected void _updateBNodePrefix() {
		// BNode prefix is based on currentTimeMillis(). Combined with a
		// sequential number per session, this gives a unique identifier.
		_bnodePrefix = "node" + Long.toString(System.currentTimeMillis(), 32) + "x";
		_nextBNodeID = 1;
	}

	/**
	 * Initializes this RdfSource. The supplied Map can contain the following
	 * parameters: <table>
	 * <th>
	 * <td>key</td>
	 * <td>value</td>
	 * </th>
	 * <tr>
	 * <td>file</td>
	 * <td>The fully qualified name of the file that should be used for
	 * persistent data storage, e.g. <tt>c:\rdfdata\data.rdf</tt>.</td>
	 * </tr>
	 * <tr>
	 * <td>dataFormat</td>
	 * <td>The format of the data in the file. Legal values are "rdfxml",
	 * "ntriples" and "turtle". These are the values that are returned by the
	 * constants of class RDFFormat.</td>
	 * </tr>
	 * <tr>
	 * <td>compressFile</td>
	 * <td>Flag indicating whether the data that is written to the file (if
	 * any) should be compressed. Compressions can seriously reduce the size of
	 * the often verbose RDF data file. Compression is enabled when this
	 * parameter has the value "yes".</td>
	 * </tr>
	 * <tr>
	 * <td>syncDelay</td>
	 * <td>The time (in milliseconds) to wait after a transaction was commited
	 * before writing the changed data to file. Setting this variable to
	 * <tt>0</tt> will force a file sync immediately after each commit. A
	 * negative value will deactivate file synchronization until the Sail is
	 * shut down. A positive value will postpone the synchronization for at
	 * least that amount of milliseconds. If in the meantime a new transaction
	 * is started, the file synchronization will be rescheduled to wait for
	 * another <tt>syncDelay</tt> ms. This way, bursts of transaction events can
	 * be combined in one file sync, improving performance.
	 * </td>
	 * </tr>
	 * </table>
	 * 
	 * @param configParams The configuration parameters.
	 * @exception SailInitializationException If this RdfRepository could not be
	 * initialized using the supplied parameters.
	 * @see org.openrdf.sesame.constants.RDFFormat
	 **/
	public void initialize(Map configParams)
		throws SailInitializationException
	{
		// Get initialization parameters
		String fileStr = (String)configParams.get(FILE_KEY);
		File file = null;
		if (fileStr != null) {
			file = new File(fileStr);
		}

		String dataFormatStr = (String)configParams.get(DATA_FORMAT_KEY);
		RDFFormat dataFormat = RDFFormat.RDFXML;
		if (dataFormatStr != null) {
			dataFormat = RDFFormat.forValue(dataFormatStr);

			if (dataFormat == null) {
				throw new SailInitializationException("Illegal dataFormat value: " + dataFormatStr);
			}
		}

		String compressFileStr = (String)configParams.get(COMPRESS_FILE_KEY);
		boolean compressFile =
			"yes".equalsIgnoreCase(compressFileStr) ||
			"true".equalsIgnoreCase(compressFileStr) ||
			"on".equalsIgnoreCase(compressFileStr);

		initialize(file, dataFormat, compressFile);
	}

	/**
	 * Initializes this repository. The contents of the repository will be
	 * volatile as no file will be used for persistent storage.
	 **/
	public void initialize()
		throws SailInitializationException
	{
		initialize(null, null, false);
	}

	/**
	 * Initializes this repository, using the specified file for persistent
	 * storage.
	 * 
	 * @param file The file to use for persistent storage.
	 * @param dataFormat The format of the data in the file. Legal values are
	 * <tt>RDFFormat.RDFXML</tt>, <tt>RDFFormat.NTRIPLES</tt> and
	 * <tt>RDFFormat.TURTLE</tt>.
	 * @param compressFile Flag indicating whether the data in the file
	 * is/should be compressed.
	 * @exception SailInternalException If the initialization failed.
	 */
	public void initialize(File file, RDFFormat dataFormat, boolean compressFile)
		throws SailInitializationException
	{
		// Data format and compression flag need to be set before calling _readFromFile():
		_dataFormat = dataFormat;
		_compressFile = compressFile;

		if (file != null) {
			_file = file;

			if (_file.exists()) {
				// Check read access
				if (!_file.canRead()) {
					throw new SailInitializationException("File is not readable: " + _file.getPath());
				}

				_readFromFile();

				_exportStatusUpToDate = false;
			}
		}
	}

	// Implements Sail.shutDown()
	public void shutDown() {
	}

	// Implements RdfSource.getValueFactory()
	public ValueFactory getValueFactory() {
		return this;
	}

	// Implements RdfSource.getStatements(Resource, URI, Value)
	public StatementIterator getStatements(Resource subj, URI pred, Value obj) {
		ResourceNode subjNode = null;
		URINode predNode = null;
		ValueNode objNode = null;

		if (subj != null) {
			subjNode = _getResourceNode(subj);
			if (subjNode == null) {
				return new EmptyStatementIterator();
			}
		}
		if (pred != null) {
			predNode = _getURINode(pred);
			if (predNode == null) {
				return new EmptyStatementIterator();
			}
		}
		if (obj != null) {
			objNode = _getValueNode(obj);
			if (objNode == null) {
				return new EmptyStatementIterator();
			}
		}

		StatementList smallestList = _statements;

		if (subjNode != null) {
			StatementList l = subjNode.getSubjectStatementList();
			if (l.size() < smallestList.size()) {
				smallestList = l;
			}
		}
		if (predNode != null) {
			StatementList l = predNode.getPredicateStatementList();
			if (l.size() < smallestList.size()) {
				smallestList = l;
			}
		}
		if (objNode != null) {
			StatementList l = objNode.getObjectStatementList();
			if (l.size() < smallestList.size()) {
				smallestList = l;
			}
		}

		// Iterate over 'smallestList'
		return new MemStatementIterator(smallestList, subjNode, predNode, objNode);
	}

	// Implements RdfSource.hasStatement(Resource, URI, Value)
	public boolean hasStatement(Resource subj, URI pred, Value obj) {
		StatementIterator stIter = getStatements(subj, pred, obj);
		boolean result = stIter.hasNext();
		stIter.close();

		return result;
	}

	// Implements RdfSource.optimizeQuery(Query)
	public Query optimizeQuery(Query qc) {
		// Apply the default optimizations
		QueryOptimizer.optimizeQuery(qc);
		return qc;
	}

	// Implements RdfSource.getNamespaces()
	public NamespaceIterator getNamespaces() {
		if (!_exportStatusUpToDate) {
			_updateExportedNamespaces();
		}

		return new MemNamespaceIterator(_namespacesList);
	}

/*------------------------------------------+
| Methods specific for this implementation  |
+------------------------------------------*/

	/**
	 * Adds a statement to this RdfSource. Statements that are already present
	 * are ignored.
	 */
	protected MemStatement _addStatement(Resource subj, URI pred, Value obj) {
		boolean newNodeCreated = false;

		// Get or create ValueNodes for the operands
		ResourceNode subjNode = _getResourceNode(subj);
		if (subjNode == null) {
			subjNode = _createResourceNode(subj);
			newNodeCreated = true;
		}

		URINode predNode = _getURINode(pred);
		if (predNode == null) {
			predNode = _createURINode(pred);
			newNodeCreated = true;
		}

		ValueNode objNode = _getValueNode(obj);
		if (objNode == null) {
			objNode = _createValueNode(obj);
			newNodeCreated = true;
		}

		if (!newNodeCreated) {
			// All nodes were already present in the graph. Possibly, the
			// statement is already present. Check this.
			StatementIterator statIter = getStatements(subjNode, predNode,
					objNode);
			if (statIter.hasNext()) {
				// statement is already present, don't add it again.
				MemStatement st = (MemStatement) statIter.next();
				statIter.close();
				return st;
			}
			statIter.close();
		}

		// This is a new statement, add it.

		MemStatement st = new MemStatement(subjNode, predNode, objNode);

		_statements.add(st);

		subjNode.addSubjectStatement(st);
		predNode.addPredicateStatement(st);
		objNode.addObjectStatement(st);

		return st;
	}

/*----------------------------------------------+
| Methods for acquiring and creating ValueNodes |
+----------------------------------------------*/

	/**
	 * Checks whether the supplied value is an instance of <tt>ValueNode</tt>
	 * and whether it has been created by this RdfSource.
	 **/
	protected boolean _isOwnValueNode(Value value) {
		return value instanceof ValueNode &&
			((ValueNode)value).getRdfSource() == this;
	}

	/**
	 * Returns a value that is equal to the supplied value but that is shared
	 * with other objects.
	 * 
	 * @param value A Value object.
	 * @return A shared ValueNode that is equal to v, or null if no such value
	 * exists.
	 */
	protected ValueNode _getValueNode(Value value) {
		if (value instanceof Resource) {
			return _getResourceNode( (Resource)value );
		}
		else if (value instanceof Literal) {
			return _getLiteralNode( (Literal)value );
		}
		else {
			throw new RuntimeException("value is not a Resource or Literal: " + value);
		}
	}

	/**
	 * See _getValueNode() for desription.
	 */
	protected ResourceNode _getResourceNode(Resource resource) {
		if (resource instanceof URI) {
			return _getURINode( (URI)resource);
		}
		else if (resource instanceof BNode) {
			return _getBNodeNode( (BNode)resource);
		}
		else {
			throw new RuntimeException("resource is not a URI or BNode");
		}
	}

	/**
	 * See _getValueNode() for desription.
	 */
	protected URINode _getURINode(URI uri) {
		if (_isOwnValueNode(uri)) {
			return (URINode)uri;
		}
		else {
			return (URINode)_uriNodesMap.get(uri);
		}
	}

	/**
	 * See _getValueNode() for desription.
	 */
	protected BNodeNode _getBNodeNode(BNode bNode) {
		if (_isOwnValueNode(bNode)) {
			return (BNodeNode)bNode;
		}
		else {
			return (BNodeNode)_bNodeNodesMap.get(bNode);
		}
	}

	/**
	 * See _getValueNode() for desription.
	 */
	protected LiteralNode _getLiteralNode(Literal literal) {
		if (_isOwnValueNode(literal)) {
			return (LiteralNode)literal;
		}
		else {
			return (LiteralNode)_literalNodesMap.get(literal);
		}
	}

	/**
	 * Creates a ValueNode for the supplied Value. The supplied value should not
	 * already have an associated ValueNode. The created ValueNode is returned.
	 * 
	 * @param value A Resource or Literal.
	 * @return The created ValueNode.
	 */
	protected ValueNode _createValueNode(Value value) {
		if (value instanceof Resource) {
			return _createResourceNode( (Resource)value );
		}
		else if (value instanceof Literal) {
			return _createLiteralNode( (Literal)value );
		}
		else {
			throw new RuntimeException("value is not a Resource or Literal");
		}
	}

	/**
	 * See _createValueNode() for description.
	 */
	protected ResourceNode _createResourceNode(Resource resource) {
		if (resource instanceof URI) {
			return _createURINode( (URI)resource );
		}
		else if (resource instanceof BNode) {
			return _createBNodeNode( (BNode)resource );
		}
		else {
			throw new RuntimeException("resource is not a URI or BNode");
		}
	}

	/**
	 * See _createValueNode() for description.
	 */
	protected URINode _createURINode(URI uri) {
		// Namespace strings are relatively large objects and
		// are shared between uris
		String namespace = uri.getNamespace();
		Namespace n = (Namespace) _namespacesTable.get(namespace);

		if (n == null) {
			// New namespace, add it to the register.
			n = _registerNamespace(namespace, false);
		}
		else {
			// Use the shared namespace
			namespace = n.getName();
		}

		// Create a new URINode
		URINode uriNode = new URINode(this, namespace, uri.getLocalName());

		Object existingNode = _uriNodesMap.put(uriNode, uriNode);
		if (existingNode != null) {
			throw new SailInternalException("Created a duplicate URINode for uri " + uri);
		}

		return uriNode;
	}

	/**
	 * See _createValueNode() for description.
	 */
	protected BNodeNode _createBNodeNode(BNode bNode) {
		BNodeNode bNodeNode = new BNodeNode(this, bNode.getID());

		Object existingNode = _bNodeNodesMap.put(bNodeNode, bNodeNode);
		if (existingNode != null) {
			throw new SailInternalException("Created a duplicate BNodeNode for bNode " + bNode);
		}

		return bNodeNode;
	}

	/**
	 * See _createValueNode() for description.
	 */
	protected LiteralNode _createLiteralNode(Literal literal) {
		LiteralNode literalNode = null;

		if (literal.getDatatype() != null) {
			literalNode = new LiteralNode(this, literal.getLabel(), literal.getDatatype());
		}
		else if (literal.getLanguage() != null) {
			literalNode = new LiteralNode(this, literal.getLabel(), literal.getLanguage());
		}
		else {
			literalNode = new LiteralNode(this, literal.getLabel());
		}

		Object existingNode = _literalNodesMap.put(literalNode, literalNode);
		if (existingNode != null) {
			throw new SailInternalException("Created a duplicate LiteralNode for literal " + literal);
		}

		return literalNode;
	}

	protected Namespace _registerNamespace(String name, boolean export) {
		return _registerNamespace("ns" + _nextNsPrefixId++, name, export);
	}

	protected Namespace _registerNamespace(String prefix, String name, boolean export) {
		Namespace namespace = new Namespace(prefix, name, export);
		_namespacesTable.put(name, namespace);
		_namespacesList.add(namespace);
		return namespace;
	}

	protected void _updateExportedNamespaces() {
		Set namespacesSet = new HashSet();

		// Gather the namespaces used in predicates
		for (int i = 0; i < _statements.size(); i++) {
			Statement st = (Statement) _statements.get(i);
			URI pred = st.getPredicate();
			namespacesSet.add(pred.getNamespace());
		}

		// Update the export flag in all namespaces
		for (int i = 0; i < _namespacesList.size(); i++) {
			Namespace namespace = (Namespace) _namespacesList.get(i);
			namespace.setExported(namespacesSet.contains(namespace.getName()));
		}

		_exportStatusUpToDate = true;
	}

/*--------------------------+
| Reading from files        |
+--------------------------*/

	protected void _readFromFile()
		throws SailInitializationException
	{
		// Don't try to read files of length 0: with compression enabled this
		// will result in an IOException being thrown (gzip-files always contain
		// at least a header), and the file doesn't contain any data anyway.
		if (_file.length() == 0L) {
			return;
		}

		try {
			Parser parser = null;
			if (_dataFormat == RDFFormat.RDFXML) {
				parser = new RdfXmlParser(this);
			}
			else if (_dataFormat == RDFFormat.NTRIPLES) {
				parser = new NTriplesParser(this);
			}
			else if (_dataFormat == RDFFormat.TURTLE) {
				parser = new TurtleParser(this);
			}
			else {
				throw new SailInitializationException("Illegal value for data format: " + _dataFormat.toString());
			}

			parser.setNamespaceListener(new NamespaceListener());
			parser.setStatementHandler(new StatementHandler());

			InputStream in = new FileInputStream(_file);
			try {
				if (_compressFile) {
					in = new GZIPInputStream(in, 4096);
				}
				in = new BufferedInputStream(in, 4096);

				parser.parse(in, "file://" + _file.getPath());
			}
			finally {
				in.close();
			}
		}
		catch (ParseException e) {
			throw new SailInitializationException("Parse error on line " + e.getLineNumber(), e);
		}
		catch (IOException e) {
			throw new SailInitializationException("I/O error on reading file " + _file, e);
		}
		catch (StatementHandlerException e) {
			throw new RuntimeException("Unexpected StatementHandlerException: " + e.getMessage());
		}
	}

	class NamespaceListener implements org.openrdf.rio.NamespaceListener {
		public void handleNamespace(String prefix, String uri) {
			if (!_namespacesTable.containsKey(uri)) {
				_registerNamespace(prefix, uri, true);
			}
		}
	}

	class StatementHandler implements org.openrdf.rio.StatementHandler {
		public void handleStatement(Resource subject, URI predicate, Value object) {
			_addStatement(subject, predicate, object);
		}
	}

	// Implements ValueFactory#createURI(String)
	public URI createURI(String uri) {
		URI tempURI = new URIImpl(uri);
		URINode node = _getURINode(tempURI);
		if (node == null) {
			node = _createURINode(tempURI);
		}
		return node;
	}
	
	public URI createURI(String namespace, String localName) {
		URI tempURI = new URIImpl(namespace, localName);
		URINode node = _getURINode(tempURI);
		if (node == null) {
			node = _createURINode(tempURI);
		}
		return node;
	}

	// Implements ValueFactory#createBNode()
	public BNode createBNode() {
		if (_nextBNodeID == Integer.MAX_VALUE) {
			// Start with a new bnode prefix
			_updateBNodePrefix();
		}

		return createBNode(_bnodePrefix + _nextBNodeID++);
	}
	
	// Implements ValueFactory#createBNode(String)
	public BNode createBNode(String nodeId) {
		BNode tempBNode = new BNodeImpl(nodeId);
		BNodeNode node = _getBNodeNode(tempBNode);
		if (node == null) {
			node = _createBNodeNode(tempBNode);
		}
		return node;
	}

	// Implements ValueFactory#createLiteral(String)
	public Literal createLiteral(String value) {
		Literal tempLiteral = new LiteralImpl(value);
		LiteralNode node = _getLiteralNode(tempLiteral);
		if (node == null) {
			node = _createLiteralNode(tempLiteral);
		}
		return node;
	}

	// Implements ValueFactory#createLiteral(String, String)
	public Literal createLiteral(String value, String language) {
		Literal tempLiteral = new LiteralImpl(value, language);
		LiteralNode node = _getLiteralNode(tempLiteral);
		if (node == null) {
			node = _createLiteralNode(tempLiteral);
		}
		return node;
	}

	// Implements ValueFactory#createLiteral(String, URI)
	public Literal createLiteral(String value, URI datatype) {
		Literal tempLiteral = new LiteralImpl(value, datatype);
		LiteralNode node = _getLiteralNode(tempLiteral);
		if (node == null) {
			node = _createLiteralNode(tempLiteral);
		}
		return node;
	}

	// Implements ValueFactory#createStatement(Resource, URI, Value)
	public Statement createStatement(Resource subject, URI predicate, Value object) {
		Statement result = null;

		StatementIterator stIter = getStatements(subject, predicate, object);
		if (stIter.hasNext()) {
			result = stIter.next();
		}
		stIter.close();

		if (result == null) {
			result = new MemStatement(subject, predicate, object);
		}

		return result;
	}
}
