/*
 * Decompiled with CFR 0.152.
 */
package org.aksw.simba.tapioca.gen;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import org.aksw.simba.tapioca.data.DatasetClassInfo;
import org.aksw.simba.tapioca.data.DatasetPropertyInfo;
import org.aksw.simba.tapioca.data.DatasetSpecialClassesInfo;
import org.aksw.simba.tapioca.data.DatasetVocabularies;
import org.aksw.simba.tapioca.preprocessing.JenaBasedVoidParsingSupplierDecorator;
import org.dice_research.topicmodeling.io.FolderReader;
import org.dice_research.topicmodeling.io.xml.XmlWritingDocumentConsumer;
import org.dice_research.topicmodeling.preprocessing.docsupplier.DocumentSupplier;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.DocumentFilteringSupplierDecorator;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.DocumentTextCreatingSupplierDecorator;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.PropertyRemovingSupplierDecorator;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.filter.DocumentFilter;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.filter.StringContainingDocumentPropertyBasedFilter;
import org.dice_research.topicmodeling.utils.doc.Document;
import org.dice_research.topicmodeling.utils.doc.DocumentName;
import org.dice_research.topicmodeling.utils.doc.DocumentRawData;
import org.dice_research.topicmodeling.utils.doc.DocumentText;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class InitialCorpusCreation {
    private static final Logger LOGGER = LoggerFactory.getLogger(InitialCorpusCreation.class);
    @Deprecated
    public static final String CORPUS_NAME = "lodStats";
    @Deprecated
    public static final String CORPUS_FILE = "C:/Daten/tapioca/lodStats.corpus";
    @Deprecated
    public static final File INPUT_FOLDER = new File("C:/Daten/Dropbox/lodstats-rdf/23032015/void");

    public static void main(String[] args) {
        if (args.length < 2) {
            System.err.println("Not enough arguments. Call the program as:");
            System.err.println("InitialCorpusCreation <input-directory> <output-corpus-file>");
            System.exit(1);
        }
        InitialCorpusCreation creation = new InitialCorpusCreation();
        creation.run(new File(args[0]), new File(args[1]));
    }

    protected void run(File inputFolder, File corpusFile) {
        FolderReader reader = new FolderReader(inputFolder);
        reader.setUseFolderNameAsCategory(true);
        FolderReader supplier = reader;
        supplier = new DocumentFilteringSupplierDecorator((DocumentSupplier)supplier, (DocumentFilter)new StringContainingDocumentPropertyBasedFilter(StringContainingDocumentPropertyBasedFilter.StringContainingDocumentPropertyBasedFilterType.ENDS_WITH, DocumentName.class, ".ttl", true));
        supplier = new DocumentTextCreatingSupplierDecorator((DocumentSupplier)supplier);
        supplier = new JenaBasedVoidParsingSupplierDecorator((DocumentSupplier)supplier);
        supplier = new PropertyRemovingSupplierDecorator((DocumentSupplier)supplier, Arrays.asList(DocumentRawData.class, DocumentText.class));
        XmlWritingDocumentConsumer consumer = XmlWritingDocumentConsumer.createXmlWritingDocumentConsumer((File)corpusFile.getAbsoluteFile());
        XmlWritingDocumentConsumer.registerParseableDocumentProperty(DatasetClassInfo.class);
        XmlWritingDocumentConsumer.registerParseableDocumentProperty(DatasetSpecialClassesInfo.class);
        XmlWritingDocumentConsumer.registerParseableDocumentProperty(DatasetPropertyInfo.class);
        XmlWritingDocumentConsumer.registerParseableDocumentProperty(DatasetVocabularies.class);
        Document document = supplier.getNextDocument();
        int count = 0;
        while (document != null) {
            try {
                consumer.consumeDocument(document);
            }
            catch (Exception e) {
                LOGGER.error("Exception at document #" + document.getDocumentId() + ". Aborting.", (Throwable)e);
                return;
            }
            if (++count % 100 == 0) {
                LOGGER.info("Saw " + count + " documents");
            }
            document = supplier.getNextDocument();
        }
        LOGGER.info("Saw " + count + " documents");
        try {
            consumer.close();
        }
        catch (IOException e) {
            LOGGER.warn("Got an exception while closing the XML Writer.", (Throwable)e);
        }
    }
}

