/*  Sesame - Storage and Querying architecture for RDF and RDF Schema
 *  Copyright (C) 2001-2007 Aduna
 *
 *  Contact: 
 *  	Aduna
 *  	Prinses Julianaplein 14 b
 *  	3817 CS Amersfoort
 *  	The Netherlands
 *  	tel. +33 (0)33 465 99 87
 *  	fax. +33 (0)33 465 99 87
 *
 *  	http://aduna-software.com/
 *  	http://www.openrdf.org/
 *  
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.openrdf.sesame.sailimpl.nativerdf.datastore;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;

/**
 * Class supplying access to a hash file. This class is a wrapper class. The
 * actual hash file stuff is implemented in the inner class HashFile0.
 *
 * @author Arjohn Kampman
 * @version $Revision: 1.10.4.4 $
 **/
public class HashFile {

/*-------------+
| Constants    |
+-------------*/

	// The size of an item (32-bit hash + 64-bit offset), in bytes
	private static final int ITEM_SIZE = 12;

	// The size of the header field in bytes
	private static final long HEADER_LENGTH = 12L;

	private static final int INIT_BUCKET_COUNT = 64;
	private static final int INIT_BUCKET_SIZE = 8;

/*-------------+
| Variables    |
+-------------*/

	private File _file;
	private HashFile0 _hashFile;

	private File _txnFile;
	private HashFile0 _txnHashFile;

	/** Flag indicating whether the current transaction is/should be an isolated one. **/
	private boolean _isolatedTransaction;

/*-------------+
| Constructors |
+-------------*/

	public HashFile(File file)
		throws IOException
	{
		_file = file;

		// Make sure the file exists
		_file.createNewFile();

		// Create a hash file
		_hashFile = new HashFile0(_file);
	}

/*----------+
| Methods   |
+----------*/

	public void startTransaction(boolean isolateTransaction)
		throws IOException
	{
		_isolatedTransaction = isolateTransaction;

		if (isolateTransaction) {
			// Create working copy of the hash file
			_txnFile = new File(_file.getParentFile(), "txn_" + _file.getName());
			RandomAccessFile txnRaf = _createEmptyFile(_txnFile);
			FileChannel txnChannel = txnRaf.getChannel();

			// Copy the hash file data to the working copy
			_hashFile.sync();
			FileChannel channel = _hashFile.getFileChannel();
			TransferUtil.transferTo(channel, 0L, channel.size(), txnChannel);

			_txnHashFile = new HashFile0(_txnFile, txnRaf);
		}
		else {
			_txnHashFile = _hashFile;
		}
	}

	public void commitTransaction()
		throws IOException
	{
		if (_isolatedTransaction) {
			// Close both hash files; all file channels need to
			// be closed before the txn file can be renamed
			_hashFile.close();
			_hashFile = null;

			_txnHashFile.sync();
			_txnHashFile.close();
			_txnHashFile = null;

			// Delete the data file
			_file.delete();

			// Rename the txn file to the data file
			boolean success = _txnFile.renameTo(_file);
			if (!success) {
				throw new IOException("Unable to rename file '"+_txnFile+"' to '"+_file+"'");
			}

			// Recreate the hash file
			_hashFile = new HashFile0(_file);

			_txnFile = null;
		}
		else {
			// Transaction wasn't isolated so all changes have already been committed.
			_txnHashFile = null;
			_hashFile.sync();
		}

		//_hashFile.dumpContents(System.out);
	}

	public void rollbackTransaction()
		throws IOException
	{
		if (_isolatedTransaction) {
			// Discard the working copy
			_txnHashFile.close();
			_txnHashFile = null;

			_txnFile.delete();
			_txnFile = null;
		}
		else {
			throw new IOException("Unisolated transactions cannot be rolled back");
		}
	}

	public void storeOffset(int hash, long dataOffset)
		throws IOException
	{
		_txnHashFile.storeOffset(hash, dataOffset);
	}

	public void clear()
		throws IOException
	{
		// Clear the working copy, it will overwrite the existing data on commit
		_txnHashFile.clear();
	}

	public OffsetIterator getOffsetIterator(int hash, boolean dirtyReads)
		throws IOException
	{
		HashFile0 hashFile = dirtyReads ? _txnHashFile : _hashFile;
		return new OffsetIterator(hashFile, hash);
	}

	public void close()
		throws IOException
	{
		if (_txnHashFile != null && _isolatedTransaction) {
			rollbackTransaction();
		}
		_hashFile.close();
	}

	private RandomAccessFile _createEmptyFile(File file)
		throws IOException
	{
		// Make sure the file exists
		if (!file.exists()) {
			file.createNewFile();
		}

		// Open the file in read-write mode and make sure the file is empty
		RandomAccessFile raf = new RandomAccessFile(file, "rw");
		raf.setLength(0L);

		return raf;
	}

/*----------------------------------------------------+
| Inner class HashFile0, the actual hash file wrapper |
+----------------------------------------------------*/

class HashFile0 {

/*-------------+
| Variables    |
+-------------*/

	private File _file;

	private RandomAccessFile _raf;

	private FileChannel _fileChannel;

	// The number of (non-overflow) buckets in the hash file
	private int _bucketCount;

	// The number of items that can be stored in a bucket
	private int _bucketSize;

	// The number of items in the hash file
	private int _itemCount;

	// Load factor (fixed, for now)
	private float _loadFactor = 0.75f;

	// _recordSize = ITEM_SIZE * _bucketSize + 4
	private int _recordSize;

	private ByteBuffer _txnBucket;

/*-------------+
| Constructors |
+-------------*/

	public HashFile0(File file)
		throws IOException
	{
		this(file, new RandomAccessFile(file, "rw"));
	}

	public HashFile0(File file, RandomAccessFile raf)
		throws IOException
	{
		_file = file;
		_raf = raf;
		_fileChannel = raf.getChannel();

		if (_fileChannel.size() == 0L) {
			// Empty file, insert bucket count, bucket size
			// and item count at the start of the file
			_bucketCount = INIT_BUCKET_COUNT;
			_bucketSize = INIT_BUCKET_SIZE;
			_itemCount = 0;
			_recordSize = ITEM_SIZE * _bucketSize + 4;

			_writeFileHeader();

			// Initialize the file by writing <_bucketCount> empty buckets
			_writeEmptyBuckets(HEADER_LENGTH, _bucketCount);
		}
		else {
			// Read bucket count, bucket size and item count from the file
			_readFileHeader();

			_recordSize = ITEM_SIZE * _bucketSize + 4;
		}

		_txnBucket = ByteBuffer.allocate(_recordSize);
	}

/*----------+
| Methods   |
+----------*/

	public FileChannel getFileChannel() {
		return _fileChannel;
	}

	public int getBucketCount() {
		return _bucketCount;
	}

	public int getBucketSize() {
		return _bucketSize;
	}

	public int getItemCount() {
		return _itemCount;
	}

	public int getRecordSize() {
		return _recordSize;
	}

	/**
	 * Stores the offset of a new data entry with the specified ID.
	 **/
	public void storeOffset(int hash, long dataOffset)
		throws IOException
	{
		// Calculate bucket offset for initial bucket
		long bucketOffset = _getBucketOffset(hash);

		_storeOffset(bucketOffset, hash, dataOffset);

		_itemCount++;

		if (_itemCount >= _loadFactor * _bucketCount * _bucketSize) {
			_increaseHashTable();
		}
	}

	private void _storeOffset(long bucketOffset, int hash, long dataOffset)
		throws IOException
	{
		boolean offsetStored = false;

		while (!offsetStored) {
			_txnBucket.clear();
			_fileChannel.read(_txnBucket, bucketOffset);

			// Find first empty slot in bucket
			int slotID = _findEmptySlotInBucket(_txnBucket);

			if (slotID >= 0) {
				// Empty slot found, store dataOffset in it
				_txnBucket.putInt(ITEM_SIZE*slotID, hash);
				_txnBucket.putLong(ITEM_SIZE*slotID + 4, dataOffset);
				_txnBucket.rewind();
				_fileChannel.write(_txnBucket, bucketOffset);
				offsetStored = true;
			}
			else {
				// No empty slot found, check if bucket has an overflow bucket
				int overflowID = _txnBucket.getInt(ITEM_SIZE*_bucketSize);

				if (overflowID == 0) {
					// No overflow bucket yet, create one
					overflowID = _createOverflowBucket();
					
					// Link overflow bucket to current bucket
					_txnBucket.putInt(ITEM_SIZE*_bucketSize, overflowID);
					_txnBucket.rewind();
					_fileChannel.write(_txnBucket, bucketOffset);
				}

				// Continue searching for an empty slot in the overflow bucket
				bucketOffset = _getOverflowBucketOffset(overflowID);
			}
		}
	}

	public void clear()
		throws IOException
	{
		// Truncate the file to remove any overflow buffers
		_fileChannel.truncate(HEADER_LENGTH + (long)_bucketCount*_recordSize);

		// Overwrite normal buckets with empty ones
		_writeEmptyBuckets(HEADER_LENGTH, _bucketCount);

		_itemCount = 0;
	}

	/**
	 * Syncs any unstored data to the hash file.
	 **/
	public void sync()
		throws IOException
	{
		// Update the file header
		_writeFileHeader();
		_fileChannel.force(false);
	}

	public void close()
		throws IOException
	{
		_raf.close();
		_raf = null;
		_fileChannel = null;
	}

/*----------------+
| Utility methods |
+----------------*/

	/**
	 * Writes the bucket count, bucket size and item count to the file header.
	 **/
	private void _writeFileHeader()
		throws IOException
	{
		ByteBuffer buf = ByteBuffer.allocate(12);
		buf.putInt(0, _bucketCount);
		buf.putInt(4, _bucketSize);
		buf.putInt(8, _itemCount);
		_fileChannel.write(buf, 0L);
	}

	/**
	 * Reads the bucket count, bucket size and item count from the file header.
	 **/
	private void _readFileHeader()
		throws IOException
	{
		ByteBuffer buf = ByteBuffer.allocate(12);
		_fileChannel.read(buf, 0L);
		_bucketCount = buf.getInt(0);
		_bucketSize = buf.getInt(4);
		_itemCount = buf.getInt(8);
	}

	/**
	 * Returns the offset of the bucket for the specified hash code.
	 **/
	private long _getBucketOffset(int hash) {
		int bucketNo = hash % _bucketCount;
		if (bucketNo < 0) {
			bucketNo += _bucketCount;
		}
		return HEADER_LENGTH + (long)bucketNo * _recordSize;
	}

	/**
	 * Returns the offset of the overflow bucket with the specified ID.
	 **/
	private long _getOverflowBucketOffset(int bucketID) {
		return HEADER_LENGTH + ((long)_bucketCount + (long)bucketID - 1L) * _recordSize;
	}

	/**
	 * Creates a new overflow bucket and returns its ID.
	 **/
	private int _createOverflowBucket()
		throws IOException
	{
		long offset = _fileChannel.size();
		_writeEmptyBuckets(offset, 1);
		return (int) ((offset - HEADER_LENGTH) / _recordSize) - _bucketCount + 1;
	}

	private void _writeEmptyBuckets(long fileOffset, int bucketCount)
		throws IOException
	{
		ByteBuffer emptyBucket = ByteBuffer.allocate(_recordSize);

		for (int i = 0; i < bucketCount; i++) {
			_fileChannel.write(emptyBucket, fileOffset + i*(long)_recordSize);
			emptyBucket.rewind();
		}
	}

	private int _findEmptySlotInBucket(ByteBuffer bucket) {
		for (int slotNo = 0; slotNo < _bucketSize; slotNo++) {
			// Check for offsets that are equal to 0
			if (bucket.getLong(ITEM_SIZE*slotNo + 4) == 0L) {
				return slotNo;
			}
		}

		return -1;
	}

	/**
	 * Double the number of buckets in the hash file and rehashes the
	 * stored items.
	 **/
	private void _increaseHashTable()
		throws IOException
	{
		//System.out.println("Increasing hash table to " + (2*_bucketCount) + " buckets...");
		//long startTime = System.currentTimeMillis();

		long oldTableSize = HEADER_LENGTH + (long)_bucketCount * _recordSize;
		long newTableSize = HEADER_LENGTH + (long)_bucketCount * _recordSize * 2;
		long oldFileSize = _fileChannel.size(); // includes overflow buckets

		// Move any overflow buckets out of the way to a temporary file
		File tmpFile = new File(_file.getParentFile(), "rehash_" + _file.getName());
		RandomAccessFile tmpRaf = _createEmptyFile(tmpFile);
		FileChannel tmpChannel = tmpRaf.getChannel();

		// Transfer the overflow buckets to the temp file
		TransferUtil.transferTo(_fileChannel, oldTableSize, oldFileSize, tmpChannel);

		// Increase hash table by factor 2
		_writeEmptyBuckets(oldTableSize, _bucketCount);
		_bucketCount *= 2;

		// Discard any remaining overflow buffers
		_fileChannel.truncate(newTableSize);

		ByteBuffer bucket = ByteBuffer.allocate(_recordSize);
		ByteBuffer newBucket = ByteBuffer.allocate(_recordSize);

		// Rehash items in 'normal' buckets, half of these will move to a new location,
		// but none of them will trigger the creation of new overflow buckets. Any (now
		// deprecated) references to overflow buckets are removed too.

		// All items that are moved to a new location end up in one and the same new and
		// empty bucket. All items are divided between the old and the new bucket and the
		// changes to the buckets are written to disk only once.
		for (long bucketOffset = HEADER_LENGTH; bucketOffset < oldTableSize; bucketOffset += _recordSize) {
			_fileChannel.read(bucket, bucketOffset);

			boolean bucketChanged = false;
			long newBucketOffset = 0L;

			for (int slotNo = 0; slotNo < _bucketSize; slotNo++) {
				long dataOffset = bucket.getLong(ITEM_SIZE*slotNo + 4);

				if (dataOffset != 0L) {
					// Slot is not empty
					int hash = bucket.getInt(ITEM_SIZE*slotNo);
					long newOffset = _getBucketOffset(hash);

					if (newOffset != bucketOffset) {
						// Move this item to new bucket...
						newBucket.putInt(hash);
						newBucket.putLong(dataOffset);

						// ...and remove it from the current bucket
						bucket.putInt(ITEM_SIZE*slotNo, 0);
						bucket.putLong(ITEM_SIZE*slotNo + 4, 0L);

						bucketChanged = true;
						newBucketOffset = newOffset;
					}
				}
			}

			if (bucketChanged) {
				// Some of the items were moved to the new bucket, write it to the file
				newBucket.flip();
				_fileChannel.write(newBucket, newBucketOffset);
				newBucket.clear();
			}

			// Reset overflow ID in the old bucket to 0 if necessary
			if (bucket.getInt(ITEM_SIZE*_bucketSize) != 0) {
				bucket.putInt(ITEM_SIZE*_bucketSize, 0);
				bucketChanged = true;
			}

			if (bucketChanged) {
				// Some of the items were moved to the new bucket or the overflow
				// ID has been reset; write the bucket back to the file
				bucket.rewind();
				_fileChannel.write(bucket, bucketOffset);
			}

			bucket.clear();
		}

		// Rehash items in overflow buckets. This might trigger the creation of
		// new overflow buckets so we can't optimize this in the same way as we
		// rehash the normal buckets.
		long tmpFileSize = tmpChannel.size();
		for (long bucketOffset = 0L; bucketOffset < tmpFileSize; bucketOffset += _recordSize) {
			tmpChannel.read(bucket, bucketOffset);

			for (int slotNo = 0; slotNo < _bucketSize; slotNo++) {
				long dataOffset = bucket.getLong(ITEM_SIZE*slotNo + 4);

				if (dataOffset != 0L) {
					// Slot is not empty
					int hash = bucket.getInt(ITEM_SIZE*slotNo);
					long newBucketOffset = _getBucketOffset(hash);

					// Move this item to new location...
					_storeOffset(newBucketOffset, hash, dataOffset);

					// ...and remove it from the current bucket
					bucket.putInt(ITEM_SIZE*slotNo, 0);
					bucket.putLong(ITEM_SIZE*slotNo + 4, 0L);
				}
			}

			bucket.clear();
		}

		// Discard the temp file
		tmpRaf.close();
		tmpFile.delete();

		//long endTime = System.currentTimeMillis();
		//System.out.println("Hash table rehashed in " + (endTime-startTime) + " ms");
	}

	public void dumpContents(PrintStream out)
		throws IOException
	{
		out.println();
		out.println("*** hash file contents ***");

		out.println("_bucketCount="+_bucketCount);
		out.println("_bucketSize="+_bucketSize);
		out.println("_itemCount="+_itemCount);

		ByteBuffer buf = ByteBuffer.allocate(_recordSize);
		_fileChannel.position(HEADER_LENGTH);

		out.println("---Buckets---");

		for (int bucketNo = 1; bucketNo <= _bucketCount; bucketNo++) {
			buf.clear();
			_fileChannel.read(buf);

			out.print("Bucket " + bucketNo + ": ");

			for (int slotNo = 0; slotNo < _bucketSize; slotNo++) {
				int hash = buf.getInt(ITEM_SIZE*slotNo);
				long offset = buf.getLong(ITEM_SIZE*slotNo + 4);
				if (slotNo > 0) {
					out.print(" ");
				}
				out.print("["+toHexString(hash)+","+offset+"]");
			}

			int overflowID = buf.getInt(ITEM_SIZE*_bucketSize);
			out.println("---> "+overflowID);
		}

		out.println("---Overflow Buckets---");

		int bucketNo = 0;
		while (_fileChannel.position() < _fileChannel.size()) {
			buf.clear();
			_fileChannel.read(buf);
			bucketNo++;

			out.print("Bucket " + bucketNo + ": ");

			for (int slotNo = 0; slotNo < _bucketSize; slotNo++) {
				int hash = buf.getInt(ITEM_SIZE*slotNo);
				long offset = buf.getLong(ITEM_SIZE*slotNo + 4);
				if (slotNo > 0) {
					out.print(" ");
				}
				out.print("["+toHexString(hash)+","+offset+"]");
			}

			int overflowID = buf.getInt(ITEM_SIZE*_bucketSize);
			out.println("---> "+overflowID);
		}

		out.println("*** end of hash file contents ***");
		out.println();
	}

	private String toHexString(int decimal) {
		String hex = Integer.toHexString(decimal);

		StringBuffer result = new StringBuffer(8);
		for (int i = hex.length(); i < 8; i++) {
			result.append("0");
		}
		result.append(hex);

		return result.toString();
	}

} // End inner class HashFile0

/*---------------------------+
| Inner class OffsetIterator |
+---------------------------*/

public static class OffsetIterator {

	private HashFile0 _hashFile;
	private int _queryHash;

	private ByteBuffer _bucketBuffer;
	private long _bucketOffset;
	private int _slotNo;

	private OffsetIterator(HashFile0 hashFile, int hash)
		throws IOException
	{
		_hashFile = hashFile;
		_queryHash = hash;

		_bucketBuffer = ByteBuffer.allocate(_hashFile.getRecordSize());

		// Calculate offset for initial bucket
		_bucketOffset = _hashFile._getBucketOffset(hash);

		// Read initial bucket
		_hashFile.getFileChannel().read(_bucketBuffer, _bucketOffset);

		_slotNo = -1;
	}

	/**
	 * Returns the next offset that has been mapped to the specified hash
	 * code, or <tt>-1</tt> if no more offset were found.
	 **/
	public long next()
		throws IOException
	{
		while (_bucketBuffer != null) {
			// Search through current bucket
			_slotNo++;
			while (_slotNo < _hashFile.getBucketSize()) {
				if (_bucketBuffer.getInt(ITEM_SIZE*_slotNo) == _queryHash) {
					return _bucketBuffer.getLong(ITEM_SIZE*_slotNo + 4);
				}
				_slotNo++;
			}

			// No matching hash code in current bucket, check overflow bucket
			int overflowID = _bucketBuffer.getInt(ITEM_SIZE*_hashFile.getBucketSize());
			if (overflowID == 0) {
				// No overflow bucket, end the search
				_bucketBuffer = null;
				_bucketOffset = 0L;
			}
			else {
				// Continue with overflow bucket
				_bucketOffset = _hashFile._getOverflowBucketOffset(overflowID);
				_bucketBuffer.clear();
				_hashFile.getFileChannel().read(_bucketBuffer, _bucketOffset);
				_slotNo = -1;
			}
		}

		return -1;
	}
} // End inner class OffsetIterator

	public static void main(String[] args)
		throws Exception
	{
		HashFile hashFile = new HashFile(new File(args[0]));
		hashFile._hashFile.dumpContents(System.out);
		hashFile.close();
	}

} // End class HashFile
