/*
 * Decompiled with CFR 0.152.
 */
package org.aksw.simba.bengal.utils;

import java.io.Closeable;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.aksw.gerbil.dataset.Dataset;
import org.aksw.gerbil.dataset.DatasetConfiguration;
import org.aksw.gerbil.dataset.impl.nif.NIFFileDatasetConfig;
import org.aksw.gerbil.datatypes.ExperimentType;
import org.aksw.gerbil.exceptions.GerbilException;
import org.aksw.gerbil.semantic.kb.UriKBClassifier;
import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.transfer.nif.Meaning;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DatasetAnalyzer {
    private static final Logger LOGGER = LoggerFactory.getLogger(DatasetAnalyzer.class);
    private PrintStream output;
    private UriKBClassifier classifier;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] args) {
        ArrayList<NIFFileDatasetConfig> datasetConfigs = new ArrayList<NIFFileDatasetConfig>();
        datasetConfigs.add(new NIFFileDatasetConfig("abstracts_nl0", "abstracts_nl0.ttl", false, ExperimentType.D2KB, null, null));
        PrintStream output = null;
        try {
            output = new PrintStream("datasetAnalyzation.log");
            output.println("name,entitiesPerDoc, entitiesPerToken, avgDocumentLength,numberOfDocuments,numberOfEntities, numberOfEEs, amountOfPersons, amountOfOrganizations, amountOfLocations, amountOfOthers");
            DatasetAnalyzer analyzer = new DatasetAnalyzer(output);
            for (DatasetConfiguration datasetConfiguration : datasetConfigs) {
                try {
                    analyzer.analyzeDataset(datasetConfiguration);
                }
                catch (GerbilException e) {
                    e.printStackTrace();
                }
            }
        }
        catch (Exception e) {
            try {
                e.printStackTrace();
            }
            catch (Throwable throwable) {
                IOUtils.closeQuietly(output);
                throw throwable;
            }
            IOUtils.closeQuietly((OutputStream)output);
        }
        IOUtils.closeQuietly((OutputStream)output);
    }

    public DatasetAnalyzer(PrintStream output) {
        this.output = output;
    }

    public void analyzeDataset(DatasetConfiguration config) throws GerbilException {
        if (config.isApplicableForExperiment(ExperimentType.D2KB)) {
            this.analyze(config, ExperimentType.D2KB);
        } else if (config.isApplicableForExperiment(ExperimentType.A2KB)) {
            this.analyze(config, ExperimentType.A2KB);
        } else if (config.isApplicableForExperiment(ExperimentType.ETyping)) {
            this.analyze(config, ExperimentType.ETyping);
        } else if (config.isApplicableForExperiment(ExperimentType.OKE_Task2)) {
            this.analyze(config, ExperimentType.OKE_Task2);
        } else if (config.isApplicableForExperiment(ExperimentType.C2KB)) {
            this.analyze(config, ExperimentType.C2KB);
        } else if (config.isApplicableForExperiment(ExperimentType.ERec)) {
            this.analyze(config, ExperimentType.ERec);
        } else {
            LOGGER.error("Can not analyze the dataset with the following config: " + config.toString());
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private int countTokensInText(String text) {
        WhitespaceTokenizer tokenizer = new WhitespaceTokenizer((Reader)new StringReader(text));
        int tokens = 0;
        try {
            tokenizer.reset();
            while (tokenizer.incrementToken()) {
                ++tokens;
            }
        }
        catch (Exception e) {
            LOGGER.error("Error while tokenizing text. Returning.", (Throwable)e);
        }
        finally {
            IOUtils.closeQuietly((Closeable)tokenizer);
        }
        return tokens;
    }

    private void analyze(DatasetConfiguration config, ExperimentType type) throws GerbilException {
        Dataset dataset = config.getDataset(type);
        if (dataset == null) {
            return;
        }
        this.output.print(config.getName());
        this.output.print(',');
        List documents = dataset.getInstances();
        int annotationsSum = 0;
        int tokensSum = 0;
        int eeCount = 0;
        for (Document document : documents) {
            annotationsSum += document.getMarkings().size();
            tokensSum += this.countTokensInText(document.getText());
            for (Meaning meaning : document.getMarkings(Meaning.class)) {
                if (this.classifier.containsKBUri((Collection)meaning.getUris())) continue;
                ++eeCount;
            }
        }
        this.output.print((double)annotationsSum / (double)documents.size());
        this.output.print(',');
        this.output.print((double)annotationsSum / (double)tokensSum);
        this.output.print(',');
        this.output.print((double)tokensSum / (double)documents.size());
        this.output.print(',');
        this.output.print(documents.size());
        this.output.print(',');
        this.output.print(annotationsSum);
        this.output.print(',');
        this.output.print(eeCount);
        this.output.print(',');
        this.output.println();
    }
}

