package de.tu.darmstadt.lt.ner.reader

import com.google.common.cache.{CacheBuilder, LoadingCache}
import de.tu.darmstadt.lt.ner.util.loadingCache
import grizzled.slf4j.Logging
import resource._

import scala.collection.convert.wrapAll._
import scala.collection.mutable.ListBuffer
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Promise}
import scala.util.{Failure, Success, Try}

import java.io.{BufferedReader, InputStreamReader}
import java.net.URL
import java.util
import java.util.function.{Function => JavaFunction}
import java.util.zip.ZipInputStream
import java.util.{StringTokenizer, HashMap => JavaHashMap, Map => JavaMap}


/**
  * Created by Markus Ackermann.
  * No rights reserved.
  */
object NERLookupCaching extends Logging {

  val dataZipURLPromise = Promise[URL]()

  def setDataZipURL(url: URL) = dataZipURLPromise.tryComplete(Success(url))

  def dataZipURL = Await.result(dataZipURLPromise.future, Duration("4min"))

  def dataZipUrlSet = dataZipURLPromise.isCompleted

  def extractEntry[T](archiveURL: URL, entryName: String)(converter: BufferedReader => T): Option[T] = {

    managed(new ZipInputStream(archiveURL.openStream())).acquireAndGet { zipStream =>

      var entry = zipStream.getNextEntry
      while (!(entry eq null)) {
        if (entry.toString == entryName) {
          val someResult = managed(new BufferedReader(new InputStreamReader(zipStream)))
            .acquireAndGet(r => Some(converter(r)))
          return someResult
        }
        entry = zipStream.getNextEntry
      }
      None
    }
  }

  def extractEntry[T](entryName: String, javaConverter: JavaFunction[BufferedReader, T]): T =
    extractEntry(dataZipURL, entryName)(javaConverter.apply(_)).get


  def readMapData[T](entryName: String, defaultName: String, lineConverter: JavaFunction[String, (String, T)]) = {

    debug(s"reading and convertion data for $defaultName from entry $entryName")

    val scalaMap = extractEntry(dataZipURL, entryName) { reader =>

      reader.lines().iterator().foldLeft(Map[String, T]()) {

        case (map, line) => map + lineConverter(line)
      }

    }

    (scalaMap.getOrElse(sys.error(s"entry '$entryName' not found")): JavaMap[String, T])
  }

  def readMapDataMulti[T](entryName: String, defaultName: String,
                          lineConverter: JavaFunction[String, util.Map[String, T]]) = {

    debug(s"reading and convertion data for $defaultName from entry $entryName")

    val scalaMap = extractEntry(dataZipURL, entryName) { reader =>

      reader.lines().iterator().foldLeft(Map[String, T]()) { case (map, line) =>

        map ++ lineConverter(line)
      }
    }

    (scalaMap.getOrElse(sys.error(s"entry '$entryName' not found")): JavaMap[String, T])
  }

  val twoColumMappingCache: LoadingCache[String, JavaMap[String, String]] = loadingCache { entryName: String =>

    debug(s"reading data from entry $entryName")

    val scalaMap = extractEntry(dataZipURL, entryName) { reader =>

      lazy val malFormedLines = ListBuffer[String]()


      val scalaMap = reader.lines().iterator().foldLeft(Map[String, String]()) {
        case (map, line) =>

          val st = new StringTokenizer(line, "\t")

          Try((st.nextToken, st.nextToken())) match {

            case Success((key, value)) => map.updated(key, value)

            case Failure(_) => {

              malFormedLines append line
              map
            }
          }
      }

      if (malFormedLines nonEmpty) {
        warn(s"there were ${malFormedLines.size} malformed FreeBase source lines in " +
          s" $entryName in ${dataZipURL}, examples:\n" + malFormedLines.take(4).mkString("\n"))
      }

      (scalaMap: util.Map[String, String])
    }

    (scalaMap.getOrElse(sys.error(s"entry '$entryName' not found")): JavaMap[String, String])
  }

  val customMapCache = CacheBuilder.newBuilder().concurrencyLevel(4).build[String, JavaMap[String, String]]()
}
