/*
 * Decompiled with CFR 0.152.
 */
package org.aksw.simba.tapioca.gen;

import com.carrotsearch.hppc.IntObjectOpenHashMap;
import com.carrotsearch.hppc.ObjectIntOpenHashMap;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import org.aksw.simba.tapioca.extraction.AbstractExtractor;
import org.aksw.simba.tapioca.extraction.Extractor;
import org.aksw.simba.tapioca.extraction.RDF2ExtractionStreamer;
import org.aksw.simba.tapioca.extraction.voidex.DatasetDescription;
import org.aksw.simba.tapioca.gen.data.StatResult;
import org.aksw.simba.tapioca.gen.preprocessing.StatResultsReader;
import org.apache.commons.io.IOUtils;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.Triple;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.impl.ResourceImpl;
import org.apache.jena.riot.Lang;
import org.apache.jena.vocabulary.DC;
import org.apache.jena.vocabulary.OWL;
import org.dice_research.topicmodeling.io.gzip.GZipCorpusObjectReader;
import org.dice_research.topicmodeling.io.gzip.GZipCorpusObjectWriter;
import org.dice_research.topicmodeling.preprocessing.ListCorpusCreator;
import org.dice_research.topicmodeling.preprocessing.docsupplier.CorpusWrappingDocumentSupplier;
import org.dice_research.topicmodeling.preprocessing.docsupplier.DocumentSupplier;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.AbstractDocumentSupplierDecorator;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.DocumentFilteringSupplierDecorator;
import org.dice_research.topicmodeling.preprocessing.docsupplier.decorator.filter.DocumentFilter;
import org.dice_research.topicmodeling.utils.corpus.Corpus;
import org.dice_research.topicmodeling.utils.corpus.DocumentListCorpus;
import org.dice_research.topicmodeling.utils.doc.Document;
import org.dice_research.topicmodeling.utils.doc.DocumentDescription;
import org.dice_research.topicmodeling.utils.doc.DocumentName;
import org.dice_research.topicmodeling.utils.doc.DocumentProperty;
import org.dice_research.topicmodeling.utils.doc.DocumentURI;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MetaDataInformationCollector {
    private static final Logger LOGGER = LoggerFactory.getLogger(MetaDataInformationCollector.class);
    private static final String LOD_STATS_DOC_BASE_URI = "http://lodstats.aksw.org/rdfdocs/";
    private RDF2ExtractionStreamer streamer = new RDF2ExtractionStreamer();

    public static void main(String[] args) {
        MetaDataInformationCollector collector = new MetaDataInformationCollector();
        collector.run("/Daten/Dropbox/lodstats-rdf/23032015/datasets.nt", "/Daten/tapioca/lodStats_BL.object", "/Daten/Dropbox/lodstats-rdf/23032015/statresult.nt", "/Daten/tapioca/test.corpus", "/Daten/tapioca/lodStats_model/lodstats.nt");
    }

    public void run(String metaFileName, String corpusFileName, String statResultsFile, String corpusOutFileName, String additionalMetaDataFile) {
        StatResultsReader reader = new StatResultsReader();
        IntObjectOpenHashMap<StatResult> statResults = reader.read(statResultsFile);
        IntObjectOpenHashMap<DatasetDescription> descriptions = this.readDescriptions(metaFileName);
        if (additionalMetaDataFile != null) {
            this.enricheMetaData(additionalMetaDataFile, descriptions);
        }
        this.addDescriptionsToCorpus(descriptions, statResults, corpusFileName, corpusOutFileName);
    }

    @Deprecated
    private ObjectIntOpenHashMap<String> createDocIdCorpusIdMapping(String corpusFileName) {
        ObjectIntOpenHashMap mapping = new ObjectIntOpenHashMap();
        Corpus corpus = this.readCorpus(corpusFileName);
        int id = 0;
        for (Document document : corpus) {
            DocumentName name = (DocumentName)document.getProperty(DocumentName.class);
            if (name != null) {
                String docId = (String)name.get();
                int pos = docId.indexOf(46);
                if (pos > 0) {
                    docId = new String(docId.substring(0, pos));
                }
                mapping.put((Object)docId, id);
            } else {
                LOGGER.warn("Document #" + id + " has no DocumentName property.");
            }
            ++id;
        }
        return mapping;
    }

    protected Corpus readCorpus(String corpusFileName) {
        GZipCorpusObjectReader reader = new GZipCorpusObjectReader(new File(corpusFileName));
        reader.readCorpus();
        return reader.getCorpus();
    }

    protected IntObjectOpenHashMap<DatasetDescription> readDescriptions(String metaFileName) {
        LODStatsMetaDataExtractor extractor = new LODStatsMetaDataExtractor();
        FileInputStream fin = null;
        try {
            try {
                fin = new FileInputStream(metaFileName);
                this.streamer.runExtraction((InputStream)fin, LOD_STATS_DOC_BASE_URI, Lang.TTL, new Extractor[]{extractor});
            }
            catch (Exception e) {
                LOGGER.error("Error while parsing file \"" + metaFileName + "\". Aborting.", (Throwable)e);
                IOUtils.closeQuietly((InputStream)fin);
                return null;
            }
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(fin);
            throw throwable;
        }
        IOUtils.closeQuietly((InputStream)fin);
        return extractor.descriptions;
    }

    protected void enricheMetaData(String additionalMetaDataFile, IntObjectOpenHashMap<DatasetDescription> descriptions) {
        Model model = ModelFactory.createDefaultModel();
        try {
            Throwable throwable = null;
            Object var5_7 = null;
            try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(additionalMetaDataFile));){
                model.read((InputStream)in, LOD_STATS_DOC_BASE_URI, "Turtle");
            }
            catch (Throwable throwable2) {
                if (throwable == null) {
                    throwable = throwable2;
                } else if (throwable != throwable2) {
                    throwable.addSuppressed(throwable2);
                }
                throw throwable;
            }
        }
        catch (IOException e) {
            LOGGER.error("Couldn't read model with additional meta data from file. Ignoring this file.", (Throwable)e);
            return;
        }
        int i = 0;
        while (i < descriptions.allocated.length) {
            if (descriptions.allocated[i]) {
                Statement s;
                DatasetDescription description = (DatasetDescription)descriptions.values[i];
                ResourceImpl datasetResource = new ResourceImpl(description.uri);
                if (model.containsResource((RDFNode)datasetResource)) {
                    this.updateDescription(description, (Resource)datasetResource, model);
                }
                if (model.contains((Resource)datasetResource, OWL.sameAs, null)) {
                    s = (Statement)model.listStatements((Resource)datasetResource, OWL.sameAs, null).next();
                    datasetResource = s.getObject().asResource();
                    this.updateDescription(description, (Resource)datasetResource, model);
                }
                if (model.contains(null, OWL.sameAs, (RDFNode)datasetResource)) {
                    s = (Statement)model.listStatements(null, OWL.sameAs, (RDFNode)datasetResource).next();
                    datasetResource = s.getSubject().asResource();
                    this.updateDescription(description, (Resource)datasetResource, model);
                }
            }
            ++i;
        }
    }

    protected void updateDescription(DatasetDescription description, Resource datasetResource, Model model) {
        if (!datasetResource.getURI().startsWith(LOD_STATS_DOC_BASE_URI)) {
            description.uri = datasetResource.getURI();
        }
    }

    protected void addDescriptionsToCorpus(IntObjectOpenHashMap<DatasetDescription> descriptions, IntObjectOpenHashMap<StatResult> statResults, String corpusFileName, String corpusOutFileName) {
        Corpus corpus1 = this.readCorpus(corpusFileName);
        Object supplier = new CorpusWrappingDocumentSupplier(corpus1);
        supplier = new DocumentFilteringSupplierDecorator((DocumentSupplier)supplier, (DocumentFilter)new StatResultListBasedDocumentFilter(statResults));
        supplier = new MetaDataAddingSupplierDecorator((DocumentSupplier)supplier, descriptions, statResults);
        ListCorpusCreator preprocessor = new ListCorpusCreator((DocumentSupplier)supplier, new DocumentListCorpus(new ArrayList()));
        Corpus corpus2 = preprocessor.getCorpus();
        corpus2.setProperties(corpus1.getProperties());
        GZipCorpusObjectWriter writer = new GZipCorpusObjectWriter(new File(corpusOutFileName));
        writer.writeCorpus(corpus2);
    }

    protected static int getIdFromDocumentName(DocumentName name) {
        if (name == null) {
            return -1;
        }
        int pos = ((String)name.get()).indexOf(46);
        if (pos < 0) {
            return -1;
        }
        try {
            return Integer.parseInt(((String)name.get()).substring(0, pos));
        }
        catch (NumberFormatException e) {
            return -1;
        }
    }

    protected static class LODStatsMetaDataExtractor
    extends AbstractExtractor {
        public static final String DCAT_ACCESS_URL_URI = "http://www.w3.org/ns/dcat#accessURL";
        public IntObjectOpenHashMap<DatasetDescription> descriptions = new IntObjectOpenHashMap();

        protected LODStatsMetaDataExtractor() {
        }

        public void handleTriple(Triple triple) {
            int datasetId;
            String subjUri;
            Node subject = triple.getSubject();
            if (!subject.isBlank() && (subjUri = subject.getURI()).startsWith(MetaDataInformationCollector.LOD_STATS_DOC_BASE_URI) && (datasetId = LODStatsMetaDataExtractor.getDatasetIdFromUri(subjUri)) >= 0) {
                DatasetDescription description;
                if (this.descriptions.containsKey(datasetId)) {
                    description = (DatasetDescription)this.descriptions.get(datasetId);
                } else {
                    description = new DatasetDescription(subjUri);
                    this.descriptions.put(datasetId, (Object)description);
                }
                if (triple.getPredicate().getURI().equals(DCAT_ACCESS_URL_URI)) {
                    description.title = triple.getObject().toString();
                } else if (triple.getPredicate().equals((Object)DC.source.asNode())) {
                    description.description = "Accessed through " + triple.getObject().toString();
                }
            }
        }

        protected static int getDatasetIdFromUri(String uri) {
            try {
                return Integer.parseInt(uri.substring(MetaDataInformationCollector.LOD_STATS_DOC_BASE_URI.length()));
            }
            catch (NumberFormatException e) {
                LOGGER.error("Couldn't extract the dataset id from URI \"" + uri + "\". Returning -1.", (Throwable)e);
                return -1;
            }
        }
    }

    protected static class MetaDataAddingSupplierDecorator
    extends AbstractDocumentSupplierDecorator {
        private IntObjectOpenHashMap<DatasetDescription> descriptions;
        private IntObjectOpenHashMap<StatResult> statResults;

        public MetaDataAddingSupplierDecorator(DocumentSupplier documentSource, IntObjectOpenHashMap<DatasetDescription> descriptions, IntObjectOpenHashMap<StatResult> statResults) {
            super(documentSource);
            this.statResults = statResults;
            this.descriptions = descriptions;
        }

        protected Document prepareDocument(Document document) {
            DocumentName name = (DocumentName)document.getProperty(DocumentName.class);
            int docId = MetaDataInformationCollector.getIdFromDocumentName(name);
            StatResult statResult = (StatResult)this.statResults.get(docId);
            int datasetId = LODStatsMetaDataExtractor.getDatasetIdFromUri(statResult.getDatasetUri());
            if (this.descriptions.containsKey(datasetId)) {
                DatasetDescription description = (DatasetDescription)this.descriptions.lget();
                if (description.title != null) {
                    document.addProperty((DocumentProperty)new DocumentName(description.title));
                }
                if (description.uri != null) {
                    document.addProperty((DocumentProperty)new DocumentURI(description.uri));
                }
                if (description.description != null) {
                    document.addProperty((DocumentProperty)new DocumentDescription(description.description));
                }
            } else {
                LOGGER.warn("Document #{} has no description.", (Object)datasetId);
                if (document.getProperty(DocumentDescription.class) == null) {
                    document.addProperty((DocumentProperty)new DocumentDescription("Couldn't get meta data for this dataset."));
                }
            }
            return document;
        }
    }

    protected static class StatResultListBasedDocumentFilter
    implements DocumentFilter {
        private IntObjectOpenHashMap<StatResult> statResults;

        public StatResultListBasedDocumentFilter(IntObjectOpenHashMap<StatResult> statResults) {
            this.statResults = statResults;
        }

        public boolean isDocumentGood(Document document) {
            DocumentName name = (DocumentName)document.getProperty(DocumentName.class);
            int docId = MetaDataInformationCollector.getIdFromDocumentName(name);
            if (docId < 0) {
                LOGGER.warn("Document #" + document.getDocumentId() + " has no id in its name. Removing it.");
                return false;
            }
            return this.statResults.containsKey(docId);
        }
    }
}

