/*******************************************************************************
 * Copyright 2014
 * FG Language Technology
 * Technische Universität Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tu.darmstadt.lt.ner.preprocessing;

import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.pipeline.SimplePipeline.runPipeline;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import de.tu.darmstadt.lt.ner.reader.NERLookupCaching;
import de.tu.darmstadt.lt.ner.util.Configuration;
import de.tu.darmstadt.lt.ner.util.ModelLocking;
import lombok.Getter;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.apache.commons.io.IOUtils;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.cleartk.ml.CleartkSequenceAnnotator;
import org.cleartk.ml.crfsuite.CrfSuiteStringOutcomeDataWriter;
import org.cleartk.ml.jar.DefaultSequenceDataWriterFactory;
import org.cleartk.ml.jar.DirectoryDataWriterFactory;
import org.cleartk.ml.jar.GenericJarClassifierFactory;
import org.cleartk.util.cr.FilesCollectionReader;

import de.tu.darmstadt.lt.ner.annotator.NERAnnotator;
import de.tu.darmstadt.lt.ner.reader.NERReader;
import de.tu.darmstadt.lt.ner.writer.EvaluatedNERWriter;
import de.tu.darmstadt.lt.ner.writer.SentenceToCRFTestFileWriter;

@Slf4j
public class GermaNERMain {

    private String modelDirString;
    private File modelDirectory;

    private String testFileName;
    private String trainFileName;
    private String dataZipFile;
    URL configSource = null;

    @Getter(lazy = true)
    private final String mode = computeMode();
    @Getter(lazy = true)
    private final URL dataZipURL = computeDataZipURL();
    @Getter(lazy = true)
    private final Configuration configuration = new Configuration(configSource.toExternalForm());

    private static final ChangeColon changeColon =  new ChangeColon();

    static private final String USAGE = "USAGE: java -jar germanner.jar [-c config.properties] \n"
            + " [-f trainingFileName] -t testFileName -d modelOutputDirectory -o outputFile"
            + " -r data.zip file containing files used for different features";


    /**
     * @param NER_TagFile
     * @param modelDirectory   = the directory where the training model will be saved/or found
     * @param language         = the language of the document, de for German and en for English
     * @param createPos        = if bulletin MatePOS tagger is to be used
     * @param freebaseListFile = use freebase lists as a feature
     * @param usePosition      = use the position of the token as a feature
     * @param suffixCLass      = if a file to match common suffixes to a given class is given
     * @throws ResourceInitializationException
     * @throws UIMAException
     * @throws IOException
     */
    public void trainModel(File NER_TagFile)
            throws UIMAException, IOException {

        prepareLookupCaching();

        /*ModelLocking.performWithModelLock(modelDirectory, () -> {

            return*/ trainUnderLocking(NER_TagFile);
//        });
    }

    @SneakyThrows
    private boolean trainUnderLocking(File NER_TagFile) {

        System.out.println("Start model generation");
        runPipeline(
                FilesCollectionReader.getCollectionReaderWithSuffixes(NER_TagFile.getAbsolutePath(),
                        NERReader.CONLL_VIEW, NER_TagFile.getName()),
                createEngine(NERReader.class, NERReader.DATA_ZIP_URL, getDataZipURL(),
                        NERReader.CONFIGURATION, configSource.toExternalForm()),
                createEngine(NERAnnotator.class,
                        NERAnnotator.PARAM_CONFIGURATION, configSource.toExternalForm(),
                        NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE,
                        modelDirectory.getAbsolutePath() + "/feature.xml",
                        CleartkSequenceAnnotator.PARAM_IS_TRAINING, true,
                        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
                        modelDirectory.getAbsolutePath(),
                        DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
                        CrfSuiteStringOutcomeDataWriter.class));


        getConfiguration().writeTo(modelDirectory.toPath().resolve("config.properties"));
        System.out.println("Model generation done");
        System.out.println("Start training");
        org.cleartk.ml.jar.Train.main(modelDirectory.getAbsolutePath());
        System.out.println("Training done");

        return true;
    }

    public void classifyTestFile(File testPosFile, File outputFile,
                                 File aNodeResultFile, List<Integer> aSentencesIds)
            throws UIMAException, IOException {

        prepareLookupCaching();

        val savedConfigFile = modelDirectory.toPath().resolve("config.properties");

        val testConfigSource = Files.isRegularFile(savedConfigFile) ? savedConfigFile.normalize().toUri().toURL() :
                configSource;

        val normalizedTestFilePath = testPosFile.getPath() + ".normalized";

        val normalizedOutputFile = predictionsTempFile(outputFile);

        changeColon.normalize(testPosFile.getPath(), normalizedTestFilePath);

        log.debug("using config to classification from: " + testConfigSource);

        /*ModelLocking.performWithModelLock(modelDirectory, () -> {
            return */classifyUnderLocking(testPosFile, normalizedOutputFile,
                    testConfigSource, aNodeResultFile, aSentencesIds);
//        });

        changeColon.deNormalize(normalizedOutputFile.getPath(), outputFile.getPath());
        normalizedOutputFile.delete();
    }

    @SneakyThrows({UIMAException.class, IOException.class})
    private boolean classifyUnderLocking(File testPosFile, File outputFile, URL testConfigSource,
                                         File aNodeResultFile, List<Integer> aSentencesIds) {

        System.out.println("Start tagging");

        runPipeline(
                FilesCollectionReader.getCollectionReaderWithSuffixes(testPosFile.getAbsolutePath(),
                        NERReader.CONLL_VIEW, testPosFile.getName()),
                createEngine(NERReader.class, NERReader.DATA_ZIP_URL, getDataZipURL(),
                        NERReader.CONFIGURATION, testConfigSource.toExternalForm()),
                createEngine(NERAnnotator.class,
                        NERAnnotator.PARAM_CONFIGURATION, testConfigSource.toExternalForm(),
                        NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE,
                        modelDirectory.getAbsolutePath() + "/feature.xml",
                        NERAnnotator.PARAM_MODEL_LOCATION, modelDirectory.getAbsolutePath(),
                        NERAnnotator.PARAM_PREDICTION_OUTPUT, outputFile,
                        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
                        modelDirectory.getAbsolutePath() + "/model.jar"),
                createEngine(EvaluatedNERWriter.class, EvaluatedNERWriter.OUTPUT_FILE, outputFile,
                        EvaluatedNERWriter.IS_GOLD, false, EvaluatedNERWriter.NOD_OUTPUT_FILE,
                        aNodeResultFile, EvaluatedNERWriter.SENTENCES_ID, aSentencesIds));

        System.out.println("Tagging done");

        return true;
    }



    /*public void classifyTestFile(File testPosFile, File outputFile, File aNodeResultFile,
                                 List<Integer> aSentencesIds)
            throws UIMAException, IOException {
        prepareModelDir(true);
        runPipeline(
                FilesCollectionReader.getCollectionReaderWithSuffixes(testPosFile.getAbsolutePath(),
                        NERReader.CONLL_VIEW, testPosFile.getName()),
                createEngine(NERReader.class),
                createEngine(NERAnnotator.class, NERAnnotator.PARAM_FEATURE_EXTRACTION_FILE,
                        modelDirectory.getAbsolutePath() + "/feature.xml",
                        NERAnnotator.PARAM_MODEL_LOCATION, modelDirectory.getAbsolutePath(),
                        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
                        modelDirectory.getAbsolutePath() + "/model.jar"),
                createEngine(EvaluatedNERWriter.class, EvaluatedNERWriter.OUTPUT_FILE, outputFile,
                        EvaluatedNERWriter.IS_GOLD, false, EvaluatedNERWriter.NOD_OUTPUT_FILE,
                        aNodeResultFile, EvaluatedNERWriter.SENTENCES_ID, aSentencesIds));
    }*/

    /**
     * This is a helper method, can be called from NoD. If you use a DKPro tokenizer during
     * training, this mehtod use the same tokenizer available in DKPro,
     *
     * @param sentences pure sentences
     * @return
     * @throws UIMAException
     * @throws IllegalArgumentException
     * @throws IOException
     */
    public static void sentenceToCRFFormat(List<String> sentences, String aCRFFileName,
                                           String aLanguage)
            throws UIMAException, IllegalArgumentException, IOException {
        SimplePipeline.runPipeline(JCasFactory.createJCas(),
                createEngine(SentenceToCRFTestFileWriter.class,
                        SentenceToCRFTestFileWriter.SENTENCE_ITERATOR, sentences,
                        SentenceToCRFTestFileWriter.CRF_TEST_FILE_NAME, aCRFFileName,
                        SentenceToCRFTestFileWriter.CRF_TEST_FILE_LANG, aLanguage));
    }

    public static void main(String[] args)
            throws Exception {
        long startTime = System.currentTimeMillis();

        long start = System.currentTimeMillis();

        val main = new GermaNERMain();

        val configuration = main.configuration;

        List<String> argList = Arrays.asList(args);
        try {

            if (argList.contains("-r") && argList.get(argList.indexOf("-r") + 1) != null) {
                if (new File(argList.get(argList.indexOf("-r") + 1)).exists()) {
                    main.dataZipFile = argList.get(argList.indexOf("-r") + 1);
                }

            }

            if (argList.contains("-c") && argList.get(argList.indexOf("-c") + 1) != null &&
                    new File(argList.get(argList.indexOf("-c") + 1)).isFile()) {
                main.configSource = Paths.get(argList.get(argList.indexOf("-c") + 1)).toUri().toURL();
            } else {

                log.error("Default configuration is read from the system\n");
                main.configSource = ClassLoader.getSystemResource("config.properties");
            }


            if (argList.contains("-t") && argList.get(argList.indexOf("-t") + 1) != null) {
                if (!new File(argList.get(argList.indexOf("-t") + 1)).exists()) {
                    log.error("There is no test file to tag");
                    System.exit(1);
                }
                main.testFileName = argList.get(argList.indexOf("-t") + 1);
            }

            if (argList.contains("-f") && argList.get(argList.indexOf("-f") + 1) != null) {
                if (!new File(argList.get(argList.indexOf("-f") + 1)).exists()) {
                    log.error("The system is running in tagging mode. No training data provided");
                } else {
                    main.trainFileName = argList.get(argList.indexOf("-f") + 1);
                }
            }

            if (argList.contains("-d") && argList.get(argList.indexOf("-d") + 1) != null) {
                if (new File(argList.get(argList.indexOf("-d") + 1)).exists()) {
                    main.modelDirString = argList.get(argList.indexOf("-d") + 1);
                } else {
                    File dir = new File(argList.get(argList.indexOf("-d") + 1));
                    dir.mkdirs();
                    main.modelDirString = dir.getAbsolutePath();
                }
            }
            // load a properties file
        } catch (IOException ex) {
            ex.printStackTrace();
        }

        try {
            main.prepareModelDir(false);

            File outputFile = null;
            if (argList.contains("-o") && argList.get(argList.indexOf("-o") + 1) != null) {
                outputFile = new File(argList.get(argList.indexOf("-o") + 1));
            } else {
                log.error("The directory for this output file does not exist. Output file "
                        + "will be found in the current directory under folder \"output\"");
                outputFile = new File(main.modelDirectory, "result.tsv");
            }

            if (main.getMode().equals("f") && main.trainFileName != null) {
                changeColon.normalize(main.trainFileName,
                        main.trainFileName + ".normalized");

                main.trainModel(new File(main.trainFileName + ".normalized"));

            } else if (main.getMode().equals("ft") && main.trainFileName != null
                    && main.testFileName != null) {
                changeColon.normalize(main.trainFileName,
                        main.trainFileName + ".normalized");
                changeColon.normalize(main.testFileName, main.testFileName + ".normalized");
                main.trainModel(new File(main.trainFileName + ".normalized"));


                main.classifyTestFile(new File(main.testFileName + ".normalized"), outputFile, null,
                        null);
            } else {
                changeColon.normalize(main.testFileName, main.testFileName + ".normalized");
                main.classifyTestFile(new File(main.testFileName + ".normalized"), outputFile, null,
                        null);
            }
            long now = System.currentTimeMillis();
            UIMAFramework.getLogger().log(Level.INFO, "Time: " + (now - start) + "ms");
        } catch (Exception e) {
            throw new RuntimeException("error in GermaNER run:\n" +
                    argList.stream().collect(Collectors.joining(" ")), e);
        }
        long endTime = System.currentTimeMillis();
        long totalTime = endTime - startTime;
        System.out.println("NER train/test done in " + totalTime / 1000 + " seconds");

    }

    private void prepareModelDir(boolean copyFiles)
            throws IOException, FileNotFoundException {
        modelDirectory = (modelDirString == null || modelDirString.isEmpty())
                ? new File("output") : new File(modelDirString);
        modelDirectory.mkdirs();

        if (copyFiles) {
            if (!new File(modelDirectory, "model.jar").exists()) {
                IOUtils.copyLarge(ClassLoader.getSystemResourceAsStream("model/model.jar"),
                        new FileOutputStream(new File(modelDirectory, "model.jar")));
            }
            if (!new File(modelDirectory, "MANIFEST.MF").exists()) {
                IOUtils.copyLarge(ClassLoader.getSystemResourceAsStream("model/MANIFEST.MF"),
                        new FileOutputStream(new File(modelDirectory, "MANIFEST.MF")));
            }
            if (!new File(modelDirectory, "config.properties").exists()) {
                IOUtils.copyLarge(ClassLoader.getSystemResourceAsStream("config.properties"),
                        new FileOutputStream(new File(modelDirectory, "config.properties")));
            }
            if (!new File(modelDirectory, "feature.xml").exists()) {
                IOUtils.copyLarge(ClassLoader.getSystemResourceAsStream("feature/feature.xml"),
                        new FileOutputStream(new File(modelDirectory, "feature.xml")));
            }
        }
    }

    private String computeMode() {

        if (trainFileName != null && testFileName != null) {
            return "ft";
        } else if (trainFileName != null) {
            return "f";
        } else if (testFileName != null) {
            return "t";
        } else {
            log.error("neither train nor prediction selected");
            log.error(USAGE);
            System.exit(1);
        }
        return null;
    }

    @SneakyThrows(MalformedURLException.class)
    private URL computeDataZipURL() {

        URL res = dataZipFile == null ? ClassLoader.getSystemResource("data.zip") :
                new File(dataZipFile).toURI().toURL();

        if (res == null) {
            throw new RuntimeException("Unable to locate data.zip");
        }

        return res;
    }

    private void prepareLookupCaching() {

        if (!NERLookupCaching.dataZipUrlSet()) {
            NERLookupCaching.setDataZipURL(getDataZipURL());
        }
    }

    @SneakyThrows(IOException.class)
    private File predictionsTempFile(File finalPredictionsFile) {

        val targetDirOption = Optional.ofNullable(finalPredictionsFile.getParentFile());

        targetDirOption.map(File::mkdirs);

        val parent = targetDirOption.orElseGet(() -> new File(""));


        val tmpFile = Files.createTempFile(parent.toPath(), finalPredictionsFile.getName() + ".",
                ".tmp").toFile();
        tmpFile.deleteOnExit();
        return tmpFile;
    }
}
