package edu.stanford.nlp.tagger.maxent;

import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.WordTag;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreeTransformer;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.StringTokenizer;

/* loaded from: input_file:edu/stanford/nlp/tagger/maxent/ReadDataTagged.class */
public class ReadDataTagged {
    private String filename;
    private ArrayList<DataWordTag> v = new ArrayList<>();
    int numElements = 0;
    static final String eosWord = "EOS";
    static final String eosTag = "EOS";

    public ReadDataTagged(String str, String str2, String str3) throws Exception {
        this.filename = str;
        init(str2, str3);
    }

    public ReadDataTagged(TaggerConfig taggerConfig) {
        this.filename = taggerConfig.getFile();
        try {
            if (taggerConfig.getInitFromTrees()) {
                initFromTrees(taggerConfig);
            } else {
                init(taggerConfig.getDelimiter(), taggerConfig.getEncoding());
            }
        } catch (Exception e) {
            System.err.println("Error reading data from " + this.filename);
            e.printStackTrace();
        }
    }

    public void release() {
        this.v = null;
    }

    public DataWordTag get(int i) {
        return this.v.get(i);
    }

    private void initFromTrees(TaggerConfig taggerConfig) throws Exception {
        System.err.println("Training a tagger from treebank" + this.filename);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        int i2 = 0;
        int i3 = Integer.MIN_VALUE;
        int i4 = Integer.MAX_VALUE;
        DiskTreebank diskTreebank = new DiskTreebank(new LabeledScoredTreeReaderFactory(), taggerConfig.getEncoding());
        TreeTransformer treeTransformer = taggerConfig.getTreeTransformer();
        TreeNormalizer treeNormalizer = taggerConfig.getTreeNormalizer();
        if (taggerConfig.getTreeRange() != null) {
            diskTreebank.loadPath(this.filename, new NumberRangesFileFilter(taggerConfig.getTreeRange(), true));
        } else {
            diskTreebank.loadPath(this.filename);
        }
        Iterator<Tree> it = diskTreebank.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            if (treeNormalizer != null) {
                next = treeNormalizer.normalizeWholeTree(next, next.treeFactory());
            }
            if (treeTransformer != null) {
                next = next.transform(treeTransformer);
            }
            Sentence<TaggedWord> taggedYield = next.taggedYield();
            Iterator<T> it2 = taggedYield.iterator();
            while (it2.hasNext()) {
                TaggedWord taggedWord = (TaggedWord) it2.next();
                if (taggedWord != null) {
                    arrayList.add(taggedWord.word());
                    arrayList2.add(taggedWord.tag());
                    if (!GlobalHolder.tagTokens.containsKey(taggedWord.tag())) {
                        GlobalHolder.tagTokens.put(taggedWord.tag(), new HashSet<>());
                    }
                    GlobalHolder.tagTokens.get(taggedWord.tag()).add(taggedWord.word());
                }
            }
            i3 = taggedYield.length() > i3 ? taggedYield.length() : i3;
            i4 = taggedYield.length() < i4 ? taggedYield.length() : i4;
            arrayList.add("EOS");
            arrayList2.add("EOS");
            this.numElements = this.numElements + taggedYield.length() + 1;
            for (int i5 = 0; i5 < taggedYield.length() + 1; i5++) {
                History history = new History(i2 + i, i2 + i + taggedYield.length(), i2 + i + i5);
                WordTag wordTag = new WordTag();
                String str = (String) arrayList2.get(i5);
                String str2 = (String) arrayList.get(i5);
                wordTag.setWord(str2);
                wordTag.setTag(str);
                GlobalHolder.pairs.add(wordTag);
                this.v.add(new DataWordTag(history, GlobalHolder.tags.add(str)));
                GlobalHolder.dict.add(str2, str);
            }
            i++;
            i2 += taggedYield.length();
            arrayList.clear();
            arrayList2.clear();
            if (i % 100000 == 0) {
                System.err.println("Read " + i + " sentences, min " + i4 + " words, max " + i3 + " words ... [still reading]");
            }
        }
        System.err.println("Read " + i2 + " words from " + this.filename + " [done].");
        System.err.println("Read " + i + " sentences, min " + i4 + " words, max " + i3 + " words.");
    }

    private void init(String str, String str2) throws IOException {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(this.filename), str2));
        int i5 = Integer.MIN_VALUE;
        int i6 = Integer.MAX_VALUE;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                System.err.println("Read " + i2 + " words from " + this.filename + " [done].");
                System.err.println("Read " + i + " sentences, min " + i6 + " words, max " + i5 + " words.");
                return;
            }
            StringTokenizer stringTokenizer = new StringTokenizer(readLine);
            while (stringTokenizer.hasMoreTokens()) {
                String nextToken = stringTokenizer.nextToken();
                i2++;
                int lastIndexOf = nextToken.lastIndexOf(str);
                if (lastIndexOf < 0) {
                    throw new RuntimeException("Data format error: can't find delimiter \"" + str + "\" in word \"" + nextToken + "\" (line " + i + " of " + this.filename + ')');
                }
                String substring = nextToken.substring(0, lastIndexOf);
                String substring2 = nextToken.substring(lastIndexOf + 1);
                arrayList.add(substring);
                arrayList2.add(substring2);
                if (!GlobalHolder.tagTokens.containsKey(substring2)) {
                    GlobalHolder.tagTokens.put(substring2, new HashSet<>());
                }
                GlobalHolder.tagTokens.get(substring2).add(substring);
                i3++;
            }
            if (i3 > i5) {
                i5 = i3;
            }
            if (i3 < i6) {
                i6 = i3;
            }
            arrayList.add("EOS");
            arrayList2.add("EOS");
            this.numElements = this.numElements + i3 + 1;
            for (int i7 = 0; i7 < i3 + 1; i7++) {
                History history = new History(i4, i4 + i3, i4 + i7);
                WordTag wordTag = new WordTag();
                String str3 = (String) arrayList2.get(i7);
                String str4 = (String) arrayList.get(i7);
                wordTag.setWord(str4);
                wordTag.setTag(str3);
                GlobalHolder.pairs.add(wordTag);
                this.v.add(new DataWordTag(history, GlobalHolder.tags.add(str3)));
                GlobalHolder.dict.add(str4, str3);
            }
            i++;
            i4 += i3 + 1;
            i3 = 0;
            arrayList.clear();
            arrayList2.clear();
            if (i % 100000 == 0) {
                System.err.println("Read " + i + " sentences, min " + i6 + " words, max " + i5 + " words ... [still reading]");
            }
        }
    }

    public int getSize() {
        return this.numElements;
    }

    public static void main(String[] strArr) {
    }
}
