/*  Sesame - Storage and Querying architecture for RDF and RDF Schema
 *  Copyright (C) 2001-2006 Aduna
 *
 *  Contact: 
 *  	Aduna
 *  	Prinses Julianaplein 14 b
 *  	3817 CS Amersfoort
 *  	The Netherlands
 *  	tel. +33 (0)33 465 99 87
 *  	fax. +33 (0)33 465 99 87
 *
 *  	http://aduna-software.com/
 *  	http://www.openrdf.org/
 *  
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.openrdf.sesame.admin;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.text.NumberFormat;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.openrdf.util.io.IOUtil;
import org.openrdf.util.log.ThreadLog;

import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.StatementImpl;
import org.openrdf.model.impl.ValueFactoryImpl;

import org.openrdf.rio.NamespaceListener;
import org.openrdf.rio.ParseErrorListener;
import org.openrdf.rio.ParseException;
import org.openrdf.rio.Parser;
import org.openrdf.rio.StatementHandler;
import org.openrdf.rio.StatementHandlerException;
import org.openrdf.rio.ntriples.NTriplesParser;
import org.openrdf.rio.rdfxml.RdfXmlParser;
import org.openrdf.rio.turtle.TurtleParser;

import org.openrdf.sesame.constants.RDFFormat;
import org.openrdf.sesame.repository.local.LocalService;
import org.openrdf.sesame.sail.RdfRepository;
import org.openrdf.sesame.sail.SailInternalException;
import org.openrdf.sesame.sail.SailUpdateException;

/**
 * The RdfAdmin module that can upload files to an RdfRepository and remove sets
 * of statements from it.
 **/
public class RdfAdmin {

/*--------------------------------------+
| Variables                             |
+--------------------------------------*/

	private NumberFormat _nf = NumberFormat.getInstance();;
	private RdfRepository _rdfRepository;
	private LocalService _service;

/*--------------------------------------+
| Constructors                          |
+--------------------------------------*/

	/**
	 * Creates a new RdfAdmin that will use the supplied RdfRepository for
	 * administrating the data.
	 *
	 * @exception IllegalArgumentException If the supplied rdfRepository is equal
	 * to null.
	 **/
	public RdfAdmin(RdfRepository rdfRepository, LocalService service) {
		this(rdfRepository);
		_service = service;
	}

	public RdfAdmin(RdfRepository rdfRepository) {
		if (rdfRepository == null) {
			throw new IllegalArgumentException("rdfRepository must not be null");
		}
		_rdfRepository = rdfRepository;
	}

/*--------------------------------------+
| Methods                               |
+--------------------------------------*/

	/**
	 * Removes statements matching the pattern (subject, predicate, object). 
	 *
	 * @param subject the subject of a statement, or <tt>null</tt> to indicate a
	 * match with any resource.
	 * @param predicate the predicate of a statement, or <tt>null</tt> to
	 * indicate a match with any URI.
	 * @param object the object of a statement, or <tt>null</tt> to indicate a
	 * match with any value.
	 **/
	public void removeStatements(Resource subject, URI predicate, Value object, AdminListener listener)
		throws UpdateException
	{
		try {
			listener.transactionStart();
			_rdfRepository.startTransaction();

			listener.status("Removing statements...", -1, -1);
			int count = _rdfRepository.removeStatements(subject, predicate, object);

			_rdfRepository.commitTransaction();

			listener.notification("Removed " + count + " statements", -1, -1, null);
		}
		catch (SailUpdateException e) {
			listener.error("Failed to remove statements: " + e.getMessage(), -1, -1, null);
			throw new UpdateException(e);
		}
		catch (RuntimeException e) {
			listener.error("Unexpected error while removing statements: " + e.getMessage(), -1, -1, null);
			throw e;
		}
		finally {
			listener.transactionEnd();
		}
	}

	/**
	 * Clears the repository of this RdfAdmin.
	 *
	 * @param listener An <tt>AdminListener</tt> to report administrative message to.
	 **/
	public void clearRepository(AdminListener listener)
		throws UpdateException
	{
		try {
			listener.transactionStart();
			_rdfRepository.startTransaction();

			listener.status("Clearing repository...", -1, -1);
			_rdfRepository.clearRepository();

			_rdfRepository.commitTransaction();

			listener.status("Repository cleared", -1, -1);
		}
		catch (SailUpdateException e) {
			listener.error("Failed to clear the repository: " + e.getMessage(), -1, -1, null);
			throw new UpdateException(e);
		}
		catch (RuntimeException e) {
			listener.error("Unexpected error while removing statements: " + e.getMessage(), -1, -1, null);
			throw e;
		}
		finally {
			listener.transactionEnd();
		}
	}

	/**
	 * Reads and parses an RDF/XML file from the supplied InputStream and adds
	 * the triples to the RdfRepository of this RdfAdmin.
	 *
	 * @return the number of processed statements
	 **/
	public int addRdfModel(InputStream in, String baseURL, AdminListener listener, boolean verifyData)
		throws UpdateException, IOException
	{
		return addRdfModel(in, baseURL, listener, RDFFormat.RDFXML, verifyData);
	}

	/**
	 * Reads and parses an RDF file from the supplied InputStream and adds
	 * the triples to the RdfRepository of this RdfAdmin.
	 *
	 * @param inputStream The InputStream containing the RDF data.
	 * @param baseURL The base URL of the data for resolving any relative URIs.
	 * @param listener An AdminListener that will receive feedback about the
	 * progress and of any errors in the data.
	 * @param dataFormat Either RDFFormat.RDFXML if the RDF data is XML-encoded
	 * RDF, or RDFFormat.NTRIPLES if the RDF data is in N-Triples format.
	 * @param verifyData If set to 'true', the data will first be checked for
	 * errors. This flag should only be set to false if the input is known for
	 * sure to be correct.
	 * @return The number of processed statements.
	 **/
	public int addRdfModel(InputStream inputStream, String baseURL, AdminListener listener,
		RDFFormat dataFormat, boolean verifyData)
		throws UpdateException, IOException
	{
		File tmpFile = null;
		byte[] data = null;
		int statementCount = 0;

		try {
			listener.transactionStart();

			if (verifyData) {
				// Try storing the data to disk to prevent double downloads, use
				// main memory as backup in case no tmp file could be created.
				tmpFile = _createUploadTmpFile();

				listener.status("Loading data", -1, -1);
				ThreadLog.trace("Loading data");

				long totalBytes = 0;
				if (tmpFile != null) {
					// Store data to disk
					totalBytes = IOUtil.writeToFile(inputStream, tmpFile);
					inputStream.close();
				}
				else {
					// Store data in memory
					listener.warning("Unable to store data in tmp file, attempting to store it in main memory", -1, -1, null);
					data = IOUtil.readFully(inputStream);
					inputStream.close();
				}

				listener.status("Data loaded (" + _nf.format(totalBytes) + " bytes)", -1, -1);

				// Open an InputStream on the cached data for verification
				if (tmpFile != null) {
					inputStream = new FileInputStream(tmpFile);
				}
				else {
					inputStream = new ByteArrayInputStream(data);
				}

				// Verify data
				statementCount = _verifyData(inputStream, baseURL, dataFormat, listener);
				inputStream.close();

				if (statementCount == 0) {
					listener.status("Data does not contain any statements", -1, -1);
					ThreadLog.trace("No statements found");
					return 0;
				}
				else {
					String msg = "Data is correct and contains " + _nf.format(statementCount) + " statements";
					listener.status(msg, -1, -1);
					ThreadLog.trace(msg);
				}

				// Open an InputStream on the cached data for the actual processing
				if (tmpFile != null) {
					inputStream = new FileInputStream(tmpFile);
				} else {
					inputStream = new ByteArrayInputStream(data);
				}
			}

			// Process the data
			statementCount = _processStatements(inputStream, baseURL, dataFormat, listener);
		}
		catch (UpdateException e) {
			listener.error("Failed to add statements: " + e.getMessage(), -1, -1, null);
			throw e;
		}
		catch (RuntimeException e) {
			listener.error("Unexpected error while adding statements: " + e.getMessage(), -1, -1, null);
			throw e;
		}
		finally {
			listener.transactionEnd();

			inputStream.close();

			// Delete the tmp file.
			if (tmpFile != null) {
				tmpFile.delete();
			}
		}

		return statementCount;
	}

	/**
	 * Reads and parses an RDF file from the supplied Reader and adds
	 * the triples to the RdfRepository of this RdfAdmin.
	 *
	 * @param reader The Reader containing the RDF data.
	 * @param baseURL The base URL of the data for resolving any relative URIs.
	 * @param listener An AdminListener that will receive feedback about the
	 * progress and of any errors in the data.
	 * @param dataFormat Either RDFFormat.RDFXML if the RDF data is XML-encoded
	 * RDF, or RDFFormat.NTRIPLES if the RDF data is in N-Triples format.
	 * @param verifyData If set to 'true', the data will first be checked for
	 * errors. This flag should only be set to false if the input is known for
	 * sure to be correct.
	 * @return The number of processed statements.
	 **/
	public int addRdfModel(Reader reader, String baseURL, AdminListener listener,
		RDFFormat dataFormat, boolean verifyData)
		throws UpdateException, IOException
	{
		File tmpFile = null;
		String data = null;
		int statementCount = 0;

		try {
			listener.transactionStart();

			if (verifyData) {
				// Try storing the data to disk to prevent double downloads, use
				// main memory as backup in case no tmp file could be created.
				tmpFile = _createUploadTmpFile();
	
				listener.status("Loading data", -1, -1);
				ThreadLog.trace("Loading data");
	
				long totalChars = 0;
				if (tmpFile != null) {
					try {
						FileOutputStream out = new FileOutputStream(tmpFile);
						Writer writer = new OutputStreamWriter(out, "UTF-8");
						totalChars = IOUtil.transfer(reader, writer);
						reader.close();
						writer.close();
					}
					catch (UnsupportedEncodingException e) {
						// UTF-8 should be supported on all platforms...
						ThreadLog.error("Unable to write data to tmp file using UTF-8", e);
						throw new RuntimeException(e);
					}
				}
				else {
					// Store data in memory
					listener.warning("Unable to store data in tmp file, attempting to store it in main memory", -1, -1, null);
					data = IOUtil.readFully(reader);
					reader.close();
				}
	
				listener.status("Data loaded (" + _nf.format(totalChars) + " characters)", -1, -1);
	
				// Open a Reader on the cached data for verification
				if (tmpFile != null) {
					try {
						FileInputStream in = new FileInputStream(tmpFile);
						reader = new InputStreamReader(in, "UTF-8");
					}
					catch (UnsupportedEncodingException e) {
						// UTF-8 should be supported on all platforms...
						ThreadLog.error("Unable to read data from tmp file using UTF-8", e);
						throw new RuntimeException(e);
					}
				}
				else {
					reader = new StringReader(data);
				}
	
				// Verify data
				statementCount = _verifyData(reader, baseURL, dataFormat, listener);
				reader.close();

				if (statementCount == 0) {
					listener.status("Data does not contain any statements", -1, -1);
					ThreadLog.trace("No statements found");
					return 0;
				}
				else {
					String msg = "Data is correct and contains " + _nf.format(statementCount) + " statements";
					listener.status(msg, -1, -1);
					ThreadLog.trace(msg);
				}

				// Open a Reader on the cached data for the actual processing
				if (tmpFile != null) {
					try {
						FileInputStream in = new FileInputStream(tmpFile);
						reader = new InputStreamReader(in, "UTF-8");
					}
					catch (UnsupportedEncodingException e) {
						// UTF-8 should be supported on all platforms...
						ThreadLog.error("Unable to read data from tmp file using UTF-8", e);
						throw new RuntimeException(e);
					}
				}
				else {
					reader = new StringReader(data);
				}
			}

			// Process the data
			statementCount = _processStatements(reader, baseURL, dataFormat, listener);
		}
		catch (UpdateException e) {
			listener.error("Failed to add statements: " + e.getMessage(), -1, -1, null);
			throw e;
		}
		catch (RuntimeException e) {
			listener.error("Unexpected error while adding statements: " + e.getMessage(), -1, -1, null);
			throw e;
		}
		finally {
			listener.transactionEnd();

			reader.close();

			// Delete the tmp file.
			if (tmpFile != null) {
				tmpFile.delete();
			}
		}

		return statementCount;
	}

	// Returns the number of statements found in the data.
	private int _verifyData(Object inputStreamOrReader, String baseURL, RDFFormat dataFormat, AdminListener listener)
		throws UpdateException, IOException
	{
		listener.status("Checking data for errors", -1, -1);
		ThreadLog.trace("Checking data for errors");

		// The RDF Parser
		Parser rdfParser = _createParser(dataFormat, baseURL, new ValueFactoryImpl());
		rdfParser.setVerifyData(true);
		rdfParser.setStopAtFirstError(false);
		rdfParser.setDatatypeHandling(Parser.DT_VERIFY);

		// The StatementHandler and ErrorListener
		DataVerifier errorHandler = new DataVerifier(listener);
		rdfParser.setStatementHandler(errorHandler);
		rdfParser.setParseErrorListener(errorHandler);

		try {
			if (inputStreamOrReader instanceof InputStream) {
				rdfParser.parse((InputStream)inputStreamOrReader, baseURL);
			}
			else if (inputStreamOrReader instanceof Reader) {
				rdfParser.parse((Reader)inputStreamOrReader, baseURL);
			}
			else {
				throw new IllegalArgumentException("inputStreamOrReader is of type: " + inputStreamOrReader.getClass());
			}

			int errorCount = errorHandler.getErrorCount();
			int statementCount = errorHandler.getStatementCount();

			// Check number of errors
			if (errorCount > 0) {
				throw new UpdateException(errorCount + " errors found, please fix these first");
			}

			return statementCount;
		}
		catch (IOException e) {
			ThreadLog.warning("unable to verify data", e);
			throw new UpdateException(e);
		}
		catch (ParseException e) {
			throw new UpdateException(e);
		}
		catch (StatementHandlerException e) {
			throw new UpdateException(e);
		}
	}

	private int _processStatements(Object inputStreamOrReader, String baseURL, RDFFormat dataFormat, AdminListener listener)
		throws UpdateException, IOException
	{
		listener.status("Processing statements from data", -1, -1);
		ThreadLog.trace("Processing statements");

		// StatusReporter will report progress every 60 seconds
		StatusReporter statusReporter = new StatusReporter(60 * 1000, listener);

		_rdfRepository.startTransaction();
		statusReporter.start();

		// The RDF Parser
		Parser rdfParser = _createParser(dataFormat, baseURL, _rdfRepository.getValueFactory());
		rdfParser.setVerifyData(false);
		rdfParser.setStopAtFirstError(false);
		rdfParser.setDatatypeHandling(Parser.DT_VERIFY);

		// The StatementHandler
		StatementInserter statHandler = new StatementInserter(listener, statusReporter);
		rdfParser.setStatementHandler(statHandler);

		// The NamespaceListener
		NamespaceCollector nsCollector = new NamespaceCollector();
		rdfParser.setNamespaceListener(nsCollector);

		try {
			if (inputStreamOrReader instanceof InputStream) {
				rdfParser.parse((InputStream)inputStreamOrReader, baseURL);
			}
			else if (inputStreamOrReader instanceof Reader) {
				rdfParser.parse((Reader)inputStreamOrReader, baseURL);
			}
			else {
				throw new IllegalArgumentException("inputStreamOrReader is of type: " + inputStreamOrReader.getClass());
			}

			listener.status("Updating extracted namespace prefixes", -1, -1);
			ThreadLog.trace("Updating extracted namespace prefixes");

			Map namespaces = nsCollector.getNamespaces();

			Iterator prefixes = namespaces.keySet().iterator();
			while (prefixes.hasNext()) {
				String prefix = (String)prefixes.next();
				String namespace = (String)namespaces.get(prefix);
				try {
					_rdfRepository.changeNamespacePrefix(namespace, prefix);
				}
				catch (SailUpdateException e) {
					listener.warning("Unable to set namespace prefix '" + prefix +
							"' for namespace '" + namespace + "': " + e.getMessage(), -1, -1, null);
				}
			}
		}
		catch (IOException e) {
			ThreadLog.warning("Unable to process data", e);
			throw new UpdateException(e);
		}
		catch (ParseException e) {
			ThreadLog.warning("Parse error", e);
			throw new UpdateException(e);
		}
		catch (StatementHandlerException e) {
			ThreadLog.warning("Statement handler exception", e);
			throw new UpdateException(e);
		}
		catch (OutOfMemoryError e) {
			ThreadLog.warning("Out of memory");
			listener.error("Out of memory", -1, -1, null);
			throw e;
		}
		finally {
			statusReporter.stopRunning();

			listener.status("Committing transaction...", -1, -1);
			ThreadLog.trace("Committing transaction");
			try {
				_rdfRepository.commitTransaction();
			}
			catch (SailInternalException e) {
				ThreadLog.error("Internal Sail error", e);
				listener.status(e.getMessage(), -1, -1);
			}
		}
		return statusReporter.getStatementCount();
	}

	private Parser _createParser(RDFFormat dataFormat, String baseURL, ValueFactory valFactory) {
		Parser result = null;

		if (RDFFormat.RDFXML.equals(dataFormat)) {
			result = new RdfXmlParser(valFactory);
		}
		else if (RDFFormat.NTRIPLES.equals(dataFormat)) {
			result = new NTriplesParser(valFactory);
		}
		else if (RDFFormat.TURTLE.equals(dataFormat)) {
			result = new TurtleParser(valFactory);
		}
		else {
			throw new IllegalArgumentException("Illegal value for parameter 'dataFormat'");
		}

		return result;
	}
	
	private File _createUploadTmpFile() {
		try {
			if (_service != null) {
				return _service.createTmpFile("upload", ".tmp");
			}
			else {
				ThreadLog.error("Unable to create tmp file for uploaded data: local service not set");
			}
		}
		catch (IOException e) {
			ThreadLog.error("Unable to create tmp file for uploaded data", e);
		}
		
		return null;
	}

/*-----------------------------------------------+
| Inner class DataVerifier                       |
+-----------------------------------------------*/

	private class DataVerifier implements StatementHandler, ParseErrorListener {

		private AdminListener _listener;
		private int _errorCount;
		private int _statementCount;
	
		public DataVerifier(AdminListener listener) {
			_listener = listener;
			_errorCount = 0;
			_statementCount = 0;
		}
	
		public int getErrorCount() {
			return _errorCount;
		}
	
		public int getStatementCount() {
			return _statementCount;
		}
	
		// Implements StatementHandler.handleStatement(...)
		public void handleStatement(Resource subject, URI predicate, Value object) {
			_statementCount++;
		}

		// Implements ParseErrorListener.warning(...)
		public void warning(String msg, int lineNo, int colNo) {
			_listener.notification(msg, lineNo, colNo, null);
		}
	
		// Implements ParseErrorListener.error(...)
		public void error(String msg, int lineNo, int colNo) {
			_listener.warning(msg, lineNo, colNo, null);
			_errorCount++;
		}
	
		// Implements ParseErrorListener.fatalError(...)
		public void fatalError(String msg, int lineNo, int colNo) {
			_listener.error(msg, lineNo, colNo, null);
			_errorCount++;
		}
	}

/*-----------------------------------------------+
| Inner class StatementInserter                  |
+-----------------------------------------------*/

	private class StatementInserter implements StatementHandler {

		private AdminListener _listener;
		private StatusReporter _statusReporter;

		public StatementInserter(AdminListener listener, StatusReporter statusReporter) {
			_listener = listener;
			_statusReporter = statusReporter;
		}
	
		public void handleStatement(Resource s, URI p, Value o) {
			try {
				_rdfRepository.addStatement(s, p, o);
				_statusReporter.statementAdded();
			}
			catch (SailUpdateException e) {
				_listener.warning("Unable to add statement; " + e.getMessage().trim(),
						-1, -1, new StatementImpl(s, p, o));
			}
		}
	}

/*-----------------------------------------------+
| Inner class NamespaceCollector                 |
+-----------------------------------------------*/

	private static class NamespaceCollector implements NamespaceListener {

		private Map _namespaceMap = new HashMap();

		public void handleNamespace(String prefix, String uri) {
			if (prefix != null && prefix.trim().length() > 0 &&
				!_namespaceMap.containsKey(prefix)) // use the first declaraction
			{
				_namespaceMap.put(prefix, uri);
			}
		}

		public Map getNamespaces() {
			return _namespaceMap;
		}
	}
}
