/*
 * Decompiled with CFR 0.152.
 */
package org.aksw.simba.tapioca.gen;

import java.io.File;
import java.io.IOException;
import org.aksw.simba.tapioca.gen.InitialCorpusCreation;
import org.aksw.simba.tapioca.gen.LDACorpusCreation;
import org.aksw.simba.tapioca.gen.MetaDataInformationCollector;
import org.aksw.simba.tapioca.gen.ModelGenerator;
import org.aksw.simba.tapioca.preprocessing.StringCountToSimpleTokenizedTextConvertingDocumentSupplierDecorator;
import org.aksw.simba.tapioca.preprocessing.UriCountMappingCreatingDocumentSupplierDecorator;
import org.aksw.simba.tapioca.preprocessing.labelretrieving.WorkerBasedLabelRetrievingDocumentSupplierDecorator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TMBasedIndexGenerator {
    private static final Logger LOGGER = LoggerFactory.getLogger(TMBasedIndexGenerator.class);
    public static final String TAPIOCA_FOLDER = "/Daten/tapioca/";
    public static final String CORPUS_NAME = "lodStats";
    public static final String CORPUS_FILE = "/Daten/tapioca/lodStats.corpus";
    public static final String LDA_CORPUS_FILE = "/Daten/tapioca/lodStats_all_log.object";
    public static final File[] CACHE_FILES = new File[]{new File("/Daten/tapioca/cache/uriToLabelCache_1.object"), new File("/Daten/tapioca/cache/uriToLabelCache_2.object"), new File("/Daten/tapioca/cache/uriToLabelCache_3.object")};
    public static final File INPUT_FOLDER = new File("C:/Daten/Dropbox/lodstats-rdf/23032015/void");
    public static final String META_DATA_FILE = "/Daten/tapioca/lodStats/datasets.nt";
    public static final String STAT_RESULT_FILE = "/Daten/tapioca/lodStats/statresult.nt";
    public static final String OUTPUT_FOLDER = "/Daten/tapioca/lodStats_model";
    public static final String FINAL_CORPUS_FILE = "lodStats_final.corpus";
    public static final String MODEL_FILE = "probAlgState.object";
    public static final String MODEL_META_DATA_FILE = "lodstats.nt";

    public static void main(String[] args) {
        TMBasedIndexGenerator generator = new TMBasedIndexGenerator();
        generator.run();
    }

    public void run() {
        File datasetDescriptionsFile;
        File outputFolder = new File(OUTPUT_FOLDER);
        if (!outputFolder.exists()) {
            outputFolder.mkdirs();
        }
        if ((datasetDescriptionsFile = new File(OUTPUT_FOLDER + File.separator + FINAL_CORPUS_FILE)).exists()) {
            LOGGER.info("The final corpus file is already existing.");
        } else {
            this.generateFinalCorpusFile();
        }
        File modelFile = new File(OUTPUT_FOLDER + File.separator + MODEL_FILE);
        if (modelFile.exists()) {
            LOGGER.info("The model file is already existing.");
        } else {
            this.generateModel();
        }
    }

    protected static void generateFirstCorpusFile() {
        InitialCorpusCreation creation = new InitialCorpusCreation();
        creation.run(INPUT_FOLDER, new File(CORPUS_FILE));
    }

    protected void generateLDACorpusFile() {
        File corpusFile = new File(CORPUS_FILE);
        if (!corpusFile.exists()) {
            LOGGER.info("The first corpus file is not existing. Trying to generate it...");
            TMBasedIndexGenerator.generateFirstCorpusFile();
            if (!corpusFile.exists()) {
                LOGGER.error("The first corpus file is not existing and couldn't be generated.");
                return;
            }
        }
        LDACorpusCreation creation = new LDACorpusCreation(CORPUS_FILE, UriCountMappingCreatingDocumentSupplierDecorator.UriUsage.CLASSES_AND_PROPERTIES, StringCountToSimpleTokenizedTextConvertingDocumentSupplierDecorator.WordOccurence.LOG, null);
        try (WorkerBasedLabelRetrievingDocumentSupplierDecorator cachingLabelRetriever = new WorkerBasedLabelRetrievingDocumentSupplierDecorator(null, CACHE_FILES, new File[0]);){
            try {
                creation.run(cachingLabelRetriever);
            }
            catch (IOException e) {
                LOGGER.error("Exception while generating LDA corpus.", (Throwable)e);
                cachingLabelRetriever.close();
            }
        }
    }

    protected void generateFinalCorpusFile() {
        if (this.checkLDACorpusExistence()) {
            MetaDataInformationCollector collector = new MetaDataInformationCollector();
            LOGGER.info("Generating final corpus file...");
            collector.run(META_DATA_FILE, LDA_CORPUS_FILE, STAT_RESULT_FILE, OUTPUT_FOLDER + File.separator + FINAL_CORPUS_FILE, OUTPUT_FOLDER + File.separator + MODEL_META_DATA_FILE);
        }
    }

    protected void generateModel() {
        if (this.checkLDACorpusExistence()) {
            ModelGenerator generator = new ModelGenerator(1000, 1040);
            LOGGER.info("Generating Model file...");
            generator.run(OUTPUT_FOLDER + File.separator + FINAL_CORPUS_FILE, OUTPUT_FOLDER + File.separator + MODEL_FILE);
        }
    }

    protected boolean checkLDACorpusExistence() {
        File ldaCorpusFile = new File(LDA_CORPUS_FILE);
        if (!ldaCorpusFile.exists()) {
            LOGGER.warn("The LDA corpus file is not existing. Trying to generate it...");
            this.generateLDACorpusFile();
            if (!ldaCorpusFile.exists()) {
                LOGGER.error("The LDA corpus file is not existing and couldn't be generated.");
                return false;
            }
        }
        return true;
    }
}

