/*
 * Decompiled with CFR 0.152.
 */
package org.aksw.simba.bengal.utils;

import au.com.bytecode.opencsv.CSVWriter;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import java.io.Closeable;
import java.io.FileWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import org.aksw.gerbil.dataset.Dataset;
import org.aksw.gerbil.dataset.DatasetConfiguration;
import org.aksw.gerbil.dataset.impl.nif.NIFFileDatasetConfig;
import org.aksw.gerbil.datatypes.ExperimentType;
import org.aksw.gerbil.exceptions.GerbilException;
import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.web.config.AdapterList;
import org.aksw.gerbil.web.config.DatasetsConfig;
import org.apache.commons.io.IOUtils;

public class PosTagAnalyzer {
    protected StanfordCoreNLP pipeline;

    public static void main(String[] args) {
        ArrayList<DatasetConfiguration> configs = new ArrayList<DatasetConfiguration>();
        AdapterList datasets = DatasetsConfig.datasets(null, null);
        configs.addAll(datasets.getAdaptersForName("ACE2004"));
        configs.addAll(datasets.getAdaptersForName("AIDA/CoNLL-Complete"));
        configs.addAll(datasets.getAdaptersForName("AIDA/CoNLL-Test A"));
        configs.addAll(datasets.getAdaptersForName("AIDA/CoNLL-Test B"));
        configs.addAll(datasets.getAdaptersForName("AIDA/CoNLL-Training"));
        configs.addAll(datasets.getAdaptersForName("AQUAINT"));
        configs.addAll(datasets.getAdaptersForName("DBpediaSpotlight"));
        configs.addAll(datasets.getAdaptersForName("IITB"));
        configs.addAll(datasets.getAdaptersForName("KORE50"));
        configs.addAll(datasets.getAdaptersForName("Microposts2014-Test"));
        configs.addAll(datasets.getAdaptersForName("Microposts2014-Train"));
        configs.addAll(datasets.getAdaptersForName("MSNBC"));
        configs.addAll(datasets.getAdaptersForName("N3-Reuters-128"));
        configs.addAll(datasets.getAdaptersForName("N3-RSS-500"));
        configs.addAll(datasets.getAdaptersForName("OKE 2015 Task 1 evaluation dataset"));
        configs.addAll(datasets.getAdaptersForName("OKE 2015 Task 1 gold standard sample"));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B1", "datasets/B1_bengal_path_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B2", "datasets/B2_bengal_path_para_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B3", "datasets/B3_bengal_star_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B4", "datasets/B4_bengal_star_para_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B5", "datasets/B5_bengal_sym_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B6", "datasets/B6_bengal_sym_para_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B7", "datasets/B7_bengal_hybrid_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B8", "datasets/B8_bengal_hybrid_para_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B9", "datasets/B9_bengal_summary_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B10", "datasets/B10_bengal_summary_para_100.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B11", "datasets/B11_bengal_hybrid_10000.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B12", "datasets/B12_bengal_hybrid_object_10.ttl", true, ExperimentType.A2KB, null, null));
        configs.add((DatasetConfiguration)new NIFFileDatasetConfig("B13", "datasets/B13_bengal_star_data_10_70+_sen.ttl", true, ExperimentType.A2KB, null, null));
        PosTagAnalyzer analyzer = new PosTagAnalyzer();
        analyzer.analyze(configs, "analyzation.csv");
    }

    public PosTagAnalyzer() {
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos");
        this.pipeline = new StanfordCoreNLP(props);
    }

    public void analyze(List<DatasetConfiguration> configs, String outputFile) {
        ArrayList<Object2IntOpenHashMap<String>> results = new ArrayList<Object2IntOpenHashMap<String>>(configs.size());
        for (DatasetConfiguration config : configs) {
            try {
                Dataset dataset = config.getDataset(ExperimentType.A2KB);
                results.add(this.analyzeDocs(dataset.getInstances()));
            }
            catch (GerbilException e) {
                e.printStackTrace();
                results.add(null);
            }
        }
        this.writeOutput(outputFile, configs, results);
    }

    private Object2IntOpenHashMap<String> analyzeDocs(List<Document> documents) {
        Object2IntOpenHashMap result = new Object2IntOpenHashMap();
        for (Document document : documents) {
            Annotation annotation = new Annotation(document.getText());
            this.pipeline.annotate(annotation);
            List sentences = (List)annotation.get(CoreAnnotations.SentencesAnnotation.class);
            for (CoreMap sentence : sentences) {
                for (CoreLabel token : (List)sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                    result.addTo((Object)((String)token.get(CoreAnnotations.PartOfSpeechAnnotation.class)), 1);
                }
            }
        }
        return result;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void writeOutput(String outputFile, List<DatasetConfiguration> configs, List<Object2IntOpenHashMap<String>> results) {
        block9: {
            HashSet posTags = new HashSet();
            for (Object2IntOpenHashMap<String> result : results) {
                if (result == null) continue;
                posTags.addAll(result.keySet());
            }
            Object[] keys = posTags.toArray(new String[posTags.size()]);
            Arrays.sort(keys);
            FileWriter fWriter = null;
            CSVWriter writer = null;
            try {
                fWriter = new FileWriter(outputFile);
                writer = new CSVWriter((Writer)fWriter);
                String[] line = new String[configs.size() + 1];
                line[0] = "pos tags";
                for (int i = 0; i < configs.size(); ++i) {
                    line[i + 1] = configs.get(i).getName();
                }
                writer.writeNext(line);
                for (int i = 0; i < keys.length; ++i) {
                    line[0] = keys[i];
                    for (int j = 0; j < results.size(); ++j) {
                        Object2IntOpenHashMap<String> result = results.get(j);
                        line[j + 1] = result != null ? Integer.toString(result.getInt((Object)line[0])) : "";
                    }
                    writer.writeNext(line);
                }
                IOUtils.closeQuietly((Closeable)writer);
            }
            catch (Exception e) {
                e.printStackTrace();
                break block9;
            }
            finally {
                IOUtils.closeQuietly(writer);
                IOUtils.closeQuietly((Writer)fWriter);
            }
            IOUtils.closeQuietly((Writer)fWriter);
        }
    }
}

