/*
 * Decompiled with CFR 0.152.
 */
package de.tu.darmstadt.lt.ner.reader;

import com.google.common.collect.Lists;
import de.tu.darmstadt.lt.ner.reader.NERLookupCaching;
import de.tu.darmstadt.lt.ner.types.FreeBaseMatch;
import de.tu.darmstadt.lt.ner.types.GoldNamedEntity;
import de.tu.darmstadt.lt.ner.types.PositionInSentence;
import de.tu.darmstadt.lt.ner.util.Configuration;
import de.tu.darmstadt.lt.ner.util.GenerateNgram;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="de.tu.darmstadt.lt.ner.reader.NERReader", description="Copyright 2014\nFG Language Technology\nTechnische Universit\u00e4t Darmstadt\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", version="0.2.0-2f18f8006ef564cdf2181c7754368976fefa3d94", vendor="DKPro Core Project", copyright="Copyright 2010\n\t\t\t\t\t\t\tUbiquitous Knowledge Processing\t(UKP) Lab\n\t\t\t\t\t\t\tTechnische Universit\u00e4t Darmstadt")
@EnhancedClassFile
public class NERReader
extends JCasAnnotator_ImplBase {
    private static final Logger log = LoggerFactory.getLogger(NERReader.class);
    public static final String CONLL_VIEW = "ConnlView";
    static final String FREEBASE_FILENAME = "freebase_2502.txt3";
    public static final String CONFIGURATION = "configuration";
    @ConfigurationParameter(name="configuration")
    private Configuration configuration = null;
    public static final String DATA_ZIP_URL = "DataZipURL";
    @ConfigurationParameter(name="DataZipURL", mandatory=false)
    private static URL dataZipURL = null;
    private org.apache.uima.util.Logger logger = null;

    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        this.logger = context.getLogger();
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        String tbText;
        JCas docView;
        log.info("NERReader#process");
        try {
            docView = jcas.getView("_InitialView");
            tbText = jcas.getView(CONLL_VIEW).getDocumentText();
        }
        catch (CASException e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
        if (tbText.charAt(0) != '\n') {
            tbText = "\n" + tbText;
        }
        String[] tokens = tbText.split("(\r\n|\n)");
        Sentence sentence = null;
        int idx = 0;
        Token token = null;
        LinkedList freeBaseMatches = Lists.newLinkedList();
        int positionIndex = 0;
        boolean initSentence = false;
        StringBuffer docText = new StringBuffer();
        StringBuffer sentenceSb = new StringBuffer();
        for (String line : tokens) {
            if (line.equals("")) {
                if (sentence != null && token != null) {
                    this.terminateSentence(sentence, token, docText);
                    docText.append("\n");
                    ++idx;
                    if (this.configuration.useFreeBase()) {
                        this.saveFreeBaseMatches(sentenceSb, freeBaseMatches);
                        freeBaseMatches.clear();
                    }
                    positionIndex = 0;
                }
                initSentence = true;
                sentenceSb = new StringBuffer();
                continue;
            }
            String[] tag = line.split("\\t");
            String word = tag[0];
            String namedEntity = tag[tag.length - 1];
            docText.append(word);
            sentenceSb.append(word + " ");
            token = new Token(docView, idx, idx + word.length());
            GoldNamedEntity namedEntityTag = new GoldNamedEntity(docView, idx, idx + word.length());
            if (this.configuration.useFreeBase()) {
                freeBaseMatches.add(new FreeBaseMatch(docView, idx, idx + word.length()));
            }
            if (this.configuration.usePosition()) {
                PositionInSentence positition = new PositionInSentence(docView, idx, idx + word.length());
                positition.setPosition(positionIndex);
                positition.addToIndexes();
                ++positionIndex;
            }
            docText.append(" ");
            ++idx;
            if (initSentence) {
                sentence = new Sentence(docView);
                sentence.setBegin(token.getBegin());
                initSentence = false;
            }
            idx += word.length();
            namedEntityTag.setNamedEntityType(namedEntity);
            namedEntityTag.addToIndexes();
            token.addToIndexes();
            this.logger.log(Level.FINE, "Token: [" + docText.substring(token.getBegin(), token.getEnd()) + "]" + token.getBegin() + "\t" + token.getEnd());
            this.logger.log(Level.FINE, "NamedEnity: [" + docText.substring(namedEntityTag.getBegin(), namedEntityTag.getEnd()) + "]" + namedEntityTag.getBegin() + "\t" + namedEntityTag.getEnd());
        }
        if (this.configuration.useFreeBase()) {
            this.saveFreeBaseMatches(sentenceSb, freeBaseMatches);
        }
        if (sentence != null && token != null) {
            this.terminateSentence(sentence, token, docText);
        }
        docView.setSofaDataString(docText.toString(), "text/plain");
    }

    private List<String> getngramBasedFreebaseList(StringBuffer sentenceSb) {
        ArrayList tagsForSentence = Lists.newArrayList();
        Map freeBaseMap = (Map)NERLookupCaching.twoColumMappingCache().get((Object)FREEBASE_FILENAME);
        block4: for (String sentToken : sentenceSb.toString().trim().split(" ")) {
            for (int i = 5; i > 0; --i) {
                try {
                    for (String nGramToken : GenerateNgram.generateNgramsUpto(sentenceSb.toString(), i)) {
                        String tag;
                        if (nGramToken.split(" ").length == 0 && !nGramToken.equals(sentToken) || !nGramToken.contains(sentToken) || freeBaseMap.get(nGramToken) == null) continue;
                        if (nGramToken.startsWith(sentToken)) {
                            tag = "B-" + (String)freeBaseMap.get(nGramToken);
                            tagsForSentence.add(tag);
                            continue block4;
                        }
                        tag = "I-" + (String)freeBaseMap.get(nGramToken);
                        tagsForSentence.add(tag);
                    }
                    continue;
                }
                catch (Exception e) {
                    tagsForSentence.add("none");
                }
                continue block4;
            }
            tagsForSentence.add("none");
        }
        return tagsForSentence;
    }

    private void saveFreeBaseMatches(StringBuffer sentenceSb, List<FreeBaseMatch> featureStructures) {
        if (sentenceSb.length() == 0) {
            return;
        }
        List<String> freeBaseTags = this.getngramBasedFreebaseList(sentenceSb);
        if (freeBaseTags.size() != featureStructures.size()) {
            throw new RuntimeException(String.format("length mismatch! %d != %d%n%s%n%s%n\"%s\"%n", freeBaseTags.size(), featureStructures.size(), freeBaseTags, featureStructures, sentenceSb));
        }
        Iterator<String> tagsIter = freeBaseTags.iterator();
        for (FreeBaseMatch freeBaseMatch : featureStructures) {
            freeBaseMatch.setBITag(tagsIter.next());
            freeBaseMatch.addToIndexes();
        }
    }

    private void terminateSentence(Sentence sentence, Token token, StringBuffer docText) {
        sentence.setEnd(token.getEnd());
        sentence.addToIndexes();
        this.logger.log(Level.FINE, "Sentence:[" + docText.substring(sentence.getBegin(), sentence.getEnd()) + "]\t" + sentence.getBegin() + "\t" + sentence.getEnd());
    }

    public Reader getReader(String aName) throws IOException {
        InputStream is = dataZipURL != null ? dataZipURL.openStream() : ClassLoader.getSystemResourceAsStream("data.zip");
        ZipInputStream zis = new ZipInputStream(is);
        ZipEntry entry = zis.getNextEntry();
        while (entry != null) {
            if (entry.toString().equals(aName)) {
                return new BufferedReader(new InputStreamReader(zis));
            }
            entry = zis.getNextEntry();
        }
        return null;
    }
}

