/*
 * Decompiled with CFR 0.152.
 */
package org.aksw.simba.tapioca.gen;

import au.com.bytecode.opencsv.CSVReader;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.aksw.simba.tapioca.data.DatasetClassInfo;
import org.aksw.simba.tapioca.data.DatasetPropertyInfo;
import org.aksw.simba.tapioca.data.DatasetSpecialClassesInfo;
import org.aksw.simba.tapioca.data.DatasetVocabularies;
import org.dice_research.topicmodeling.io.xml.XmlWritingDocumentConsumer;
import org.dice_research.topicmodeling.io.xml.stream.StreamBasedXmlDocumentSupplier;
import org.dice_research.topicmodeling.preprocessing.docsupplier.DocumentSupplier;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.AbstractDocumentSupplierDecorator;
import org.dice_research.topicmodeling.utils.doc.Document;
import org.dice_research.topicmodeling.utils.doc.DocumentURI;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LaundromatCorpusUpdater {
    private static final Logger LOGGER = LoggerFactory.getLogger(LaundromatCorpusUpdater.class);

    public static void main(String[] args) {
        if (args.length < 3) {
            System.err.println("Not enough arguments. Call the program as:");
            System.err.println("LaundromatCorpusUpdater <laundromat-tsv-file> <input-corpus-file> <output-corpus-file>");
            System.exit(1);
        }
        LaundromatCorpusUpdater updater = new LaundromatCorpusUpdater();
        updater.run(new File(args[0]), new File(args[1]), new File(args[2]));
    }

    protected void run(File laundromatTSVFile, File inputFolder, File outputFile) {
        XmlWritingDocumentConsumer.registerParseableDocumentProperty(DatasetClassInfo.class);
        XmlWritingDocumentConsumer.registerParseableDocumentProperty(DatasetSpecialClassesInfo.class);
        XmlWritingDocumentConsumer.registerParseableDocumentProperty(DatasetPropertyInfo.class);
        XmlWritingDocumentConsumer.registerParseableDocumentProperty(DatasetVocabularies.class);
        Object supplier = StreamBasedXmlDocumentSupplier.createReader((File)inputFolder);
        supplier = LaundromatDocumentUpdater.create((DocumentSupplier)supplier, laundromatTSVFile);
        if (supplier == null) {
            LOGGER.error("The LaundromatDocumentUpdater couldn't be created. Aborting.");
            return;
        }
        XmlWritingDocumentConsumer consumer = XmlWritingDocumentConsumer.createXmlWritingDocumentConsumer((File)outputFile.getAbsoluteFile());
        Document document = supplier.getNextDocument();
        int count = 0;
        while (document != null) {
            try {
                consumer.consumeDocument(document);
            }
            catch (Exception e) {
                LOGGER.error("Exception at document #" + document.getDocumentId() + ". Aborting.", (Throwable)e);
                return;
            }
            if (++count % 100 == 0) {
                LOGGER.info("Saw " + count + " documents");
            }
            document = supplier.getNextDocument();
        }
        LOGGER.info("Saw " + count + " documents");
        try {
            consumer.close();
        }
        catch (IOException e) {
            LOGGER.warn("Got an exception while closing the XML Writer.", (Throwable)e);
        }
    }

    public static class LaundromatDocumentUpdater
    extends AbstractDocumentSupplierDecorator {
        public static final String LAUNDROMAT_URI_PREFIX = "http://lodlaundromat.org/resource/";
        public static final int LAUNDROMAT_URI_PREFIX_LENGTH = "http://lodlaundromat.org/resource/".length();
        public static final int URI_ID = 14;
        public static final int HASH_ID = 6;
        protected Map<String, String> hash2Uri;

        public static LaundromatDocumentUpdater create(DocumentSupplier documentSource, File tsvFile) {
            LaundromatDocumentUpdater laundromatDocumentUpdater;
            HashMap<String, String> hash2Uri = new HashMap<String, String>();
            InputStreamReader reader = new InputStreamReader((InputStream)new BufferedInputStream(new FileInputStream(tsvFile)), StandardCharsets.UTF_8);
            try {
                StringBuilder builder = new StringBuilder();
                int state = 0;
                String hash = null;
                block14: while (((Reader)reader).ready()) {
                    char c = (char)((Reader)reader).read();
                    switch (state) {
                        case 0: {
                            if (!Character.isWhitespace(c)) continue block14;
                            state = 1;
                            continue block14;
                        }
                        case 1: 
                        case 3: {
                            if (Character.isWhitespace(c)) continue block14;
                            ++state;
                            builder.append(c);
                            continue block14;
                        }
                        case 2: {
                            if (Character.isWhitespace(c)) {
                                state = 3;
                                hash = builder.toString();
                                builder.delete(0, builder.length());
                                continue block14;
                            }
                            builder.append(c);
                            continue block14;
                        }
                        case 4: {
                            if (Character.isWhitespace(c)) {
                                hash2Uri.put(hash, builder.toString());
                                builder.delete(0, builder.length());
                                state = 5;
                                continue block14;
                            }
                            builder.append(c);
                            continue block14;
                        }
                        case 5: {
                            if (c != '\n') continue block14;
                            state = 0;
                            continue block14;
                        }
                    }
                    throw new IllegalStateException("Unknown state " + state);
                }
                laundromatDocumentUpdater = new LaundromatDocumentUpdater(documentSource, hash2Uri);
            }
            catch (Throwable throwable) {
                try {
                    try {
                        ((Reader)reader).close();
                    }
                    catch (Throwable throwable2) {
                        throwable.addSuppressed(throwable2);
                    }
                    throw throwable;
                }
                catch (Exception e) {
                    LOGGER.error("Exception while creating LaundromatDocumentUpdater. Returning null.", (Throwable)e);
                    return null;
                }
            }
            ((Reader)reader).close();
            return laundromatDocumentUpdater;
        }

        @Deprecated
        public static LaundromatDocumentUpdater createWithCSVReader(DocumentSupplier documentSource, File tsvFile) {
            LaundromatDocumentUpdater laundromatDocumentUpdater;
            HashMap<String, String> hash2Uri = new HashMap<String, String>();
            FileReader fReader = new FileReader(tsvFile);
            try {
                CSVReader reader = new CSVReader((Reader)fReader, ' ');
                Object[] line = reader.readNext();
                while (line != null) {
                    if (line.length > 14 && line.length > 6) {
                        hash2Uri.put((String)line[6], (String)line[14]);
                    } else {
                        LOGGER.info("Discarded " + Arrays.toString(line));
                    }
                    line = reader.readNext();
                }
                reader.close();
                laundromatDocumentUpdater = new LaundromatDocumentUpdater(documentSource, hash2Uri);
            }
            catch (Throwable throwable) {
                try {
                    try {
                        fReader.close();
                    }
                    catch (Throwable throwable2) {
                        throwable.addSuppressed(throwable2);
                    }
                    throw throwable;
                }
                catch (Exception e) {
                    LOGGER.error("Exception while creating LaundromatDocumentUpdater. Returning null.", (Throwable)e);
                    return null;
                }
            }
            fReader.close();
            return laundromatDocumentUpdater;
        }

        public LaundromatDocumentUpdater(DocumentSupplier documentSource, Map<String, String> hash2Uri) {
            super(documentSource);
            this.hash2Uri = hash2Uri;
            LOGGER.info("Initialized with " + hash2Uri.size() + " elements.");
        }

        protected Document prepareDocument(Document document) {
            DocumentURI uri = (DocumentURI)document.getProperty(DocumentURI.class);
            if (uri == null) {
                LOGGER.error("Got a document without the necessary DocumentURI property.");
                return document;
            }
            String hash = this.extractHash(uri);
            if (!this.hash2Uri.containsKey(hash)) {
                LOGGER.error("Got a document with an unknown hash (\"" + hash + "\").");
                return document;
            }
            uri.set((Object)this.hash2Uri.get(hash));
            return document;
        }

        protected String extractHash(DocumentURI uri) {
            return ((String)uri.get()).trim().substring(LAUNDROMAT_URI_PREFIX_LENGTH);
        }
    }
}

