package de.tu.darmstadt.lt.ner.reader;

import de.tu.darmstadt.lt.ner.feature.variables.ClarkPosInduction;
import de.tu.darmstadt.lt.ner.feature.variables.FreeBaseFeature;
import de.tu.darmstadt.lt.ner.feature.variables.PositionFeature;
import de.tu.darmstadt.lt.ner.preprocessing.Configuration;
import de.tu.darmstadt.lt.ner.types.GoldNamedEntity;
import de.tu.darmstadt.lt.ner.util.GenerateNgram;
import de.tu.darmstadt.lt.ner.writer.EvaluatedNERWriter;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;

@ResourceMetaData(name = "de.tu.darmstadt.lt.ner.reader.NERReader", description = "Copyright 2014\nFG Language Technology\nTechnische Universität Darmstadt\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", version = "0.2.0-mack-SNAPSHOT", vendor = "DKPro Core Project", copyright = "Copyright 2010\n\t\t\t\t\t\t\tUbiquitous Knowledge Processing\t(UKP) Lab\n\t\t\t\t\t\t\tTechnische Universität Darmstadt")
@EnhancedClassFile
/* loaded from: input_file:de/tu/darmstadt/lt/ner/reader/NERReader.class */
public class NERReader extends JCasAnnotator_ImplBase {
    public static final String CONLL_VIEW = "ConnlView";
    private Logger logger = null;
    private Map<String, String> freebaseMap = new HashMap();
    private Map<String, String> suffixClassMap = new HashMap();
    public static final String DATA_ZIP_FILE = "datazipfile";

    @ConfigurationParameter(name = DATA_ZIP_FILE, mandatory = false)
    private static String datazipfile = null;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.logger = uimaContext.getLogger();
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        try {
            JCas view = jCas.getView("_InitialView");
            String documentText = jCas.getView(CONLL_VIEW).getDocumentText();
            if (documentText.charAt(0) != '\n') {
                documentText = "\n" + documentText;
            }
            String[] split = documentText.split("(\r\n|\n)");
            Sentence sentence = null;
            int i = 0;
            Token token = null;
            boolean z = false;
            StringBuffer stringBuffer = new StringBuffer();
            if (Configuration.useFreeBase) {
                try {
                    useFreeBase();
                } catch (Exception e) {
                }
            }
            if (Configuration.useClarkPosInduction) {
                try {
                    useClarkPosInduction();
                } catch (Exception e2) {
                }
            }
            StringBuffer stringBuffer2 = new StringBuffer();
            int i2 = 0;
            for (String str : split) {
                if (str.equals("")) {
                    if (sentence != null && token != null) {
                        terminateSentence(sentence, token, stringBuffer);
                        stringBuffer.append("\n");
                        i++;
                        if (Configuration.useFreeBase) {
                            getngramBasedFreebaseList(stringBuffer2);
                        }
                        i2 = 0;
                    }
                    z = true;
                    stringBuffer2 = new StringBuffer();
                } else {
                    String[] split2 = str.split("\\t");
                    String str2 = split2[0];
                    String str3 = split2[split2.length - 1];
                    if (Configuration.usePosition) {
                        PositionFeature.posistion.add(Integer.valueOf(i2));
                        i2++;
                    }
                    stringBuffer.append(str2);
                    stringBuffer2.append(str2 + " ");
                    token = new Token(view, i, i + str2.length());
                    GoldNamedEntity goldNamedEntity = new GoldNamedEntity(view, i, i + str2.length());
                    stringBuffer.append(" ");
                    int i3 = i + 1;
                    if (z) {
                        sentence = new Sentence(view);
                        sentence.setBegin(token.getBegin());
                        z = false;
                    }
                    i = i3 + str2.length();
                    goldNamedEntity.setNamedEntityType(str3);
                    goldNamedEntity.addToIndexes();
                    token.addToIndexes();
                    this.logger.log(Level.FINE, "Token: [" + stringBuffer.substring(token.getBegin(), token.getEnd()) + "]" + token.getBegin() + EvaluatedNERWriter.TAB + token.getEnd());
                    this.logger.log(Level.FINE, "NamedEnity: [" + stringBuffer.substring(goldNamedEntity.getBegin(), goldNamedEntity.getEnd()) + "]" + goldNamedEntity.getBegin() + EvaluatedNERWriter.TAB + goldNamedEntity.getEnd());
                }
            }
            if (!stringBuffer2.toString().isEmpty() && Configuration.useFreeBase) {
                getngramBasedFreebaseList(stringBuffer2);
            }
            if (sentence != null && token != null) {
                terminateSentence(sentence, token, stringBuffer);
            }
            view.setSofaDataString(stringBuffer.toString(), "text/plain");
        } catch (CASException e3) {
            throw new AnalysisEngineProcessException(e3);
        }
    }

    private void getngramBasedFreebaseList(StringBuffer stringBuffer) {
        for (String str : stringBuffer.toString().trim().split(" ")) {
            int i = 5;
            while (true) {
                if (i <= 0) {
                    FreeBaseFeature.freebaseFeature.add("none");
                    break;
                }
                try {
                    for (String str2 : GenerateNgram.generateNgramsUpto(stringBuffer.toString(), i)) {
                        if (str2.split(" ").length != 0 || str2.equals(str)) {
                            if (str2.contains(str) && this.freebaseMap.get(str2) != null) {
                                if (str2.startsWith(str)) {
                                    FreeBaseFeature.freebaseFeature.add("B-" + this.freebaseMap.get(str2));
                                } else {
                                    FreeBaseFeature.freebaseFeature.add("I-" + this.freebaseMap.get(str2));
                                }
                            }
                        }
                    }
                    i--;
                } catch (Exception e) {
                    FreeBaseFeature.freebaseFeature.add("none");
                }
            }
        }
    }

    private void terminateSentence(Sentence sentence, Token token, StringBuffer stringBuffer) {
        sentence.setEnd(token.getEnd());
        sentence.addToIndexes();
        this.logger.log(Level.FINE, "Sentence:[" + stringBuffer.substring(sentence.getBegin(), sentence.getEnd()) + "]\t" + sentence.getBegin() + EvaluatedNERWriter.TAB + sentence.getEnd());
    }

    private void useFreeBase() throws Exception {
        BufferedReader bufferedReader = (BufferedReader) getReader("freebase_2502.txt3");
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            try {
                StringTokenizer stringTokenizer = new StringTokenizer(readLine, EvaluatedNERWriter.TAB);
                this.freebaseMap.put(stringTokenizer.nextToken(), stringTokenizer.nextToken());
            } catch (Exception e) {
            }
        }
    }

    public void useClarkPosInduction() throws Exception {
        BufferedReader bufferedReader = (BufferedReader) getReader("clark10m256");
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            try {
                String[] split = readLine.split("\\t");
                ClarkPosInduction.posInduction.put(split[0], split[split.length - 1]);
            } catch (Exception e) {
            }
        }
    }

    public Reader getReader(String str) throws IOException {
        ZipInputStream zipInputStream = new ZipInputStream(datazipfile != null ? new FileInputStream(new File(datazipfile)) : ClassLoader.getSystemResourceAsStream("data.zip"));
        ZipEntry nextEntry = zipInputStream.getNextEntry();
        while (true) {
            ZipEntry zipEntry = nextEntry;
            if (zipEntry == null) {
                return null;
            }
            if (zipEntry.toString().equals(str)) {
                return new BufferedReader(new InputStreamReader(zipInputStream));
            }
            nextEntry = zipInputStream.getNextEntry();
        }
    }
}
