package marmot.morph;

import java.io.File;
import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import lemming.lemma.BackupLemmatizer;
import lemming.lemma.GoldLemmaGenerator;
import lemming.lemma.LemmaCandidate;
import lemming.lemma.LemmaCandidateGenerator;
import lemming.lemma.LemmaCandidateSet;
import lemming.lemma.LemmaInstance;
import lemming.lemma.LemmaOptions;
import lemming.lemma.SimpleLemmatizer;
import lemming.lemma.SimpleLemmatizerTrainer;
import lemming.lemma.ranker.Ranker;
import lemming.lemma.ranker.RankerCandidate;
import lemming.lemma.ranker.RankerInstance;
import lemming.lemma.ranker.RankerModel;
import lemming.lemma.ranker.RankerTrainer;
import lemming.lemma.toutanova.EditTreeAligner;
import lemming.lemma.toutanova.EditTreeAlignerTrainer;
import marmot.core.Model;
import marmot.core.Options;
import marmot.core.Sequence;
import marmot.core.State;
import marmot.core.Tagger;
import marmot.core.Token;
import marmot.core.Trainer;
import marmot.core.TrainerFactory;
import marmot.morph.analyzer.Analyzer;
import marmot.morph.signature.Trie;
import marmot.util.Copy;
import marmot.util.Counter;
import marmot.util.FeatUtil;
import marmot.util.FileUtils;
import marmot.util.StringUtils;
import marmot.util.SymbolTable;

/* loaded from: input_file:marmot/morph/MorphModel.class */
public class MorphModel extends Model {
    private static final long serialVersionUID = 2;
    private static final int POS_INDEX_ = 0;
    private static final int MORPH_INDEX_ = 1;
    private static final String POS_NAME_ = "pos";
    private static final String MORPH_NAME_ = "morph";
    private SymbolTable<String> word_table_;
    private SymbolTable<String> shape_table_;
    private SymbolTable<Character> char_table_;
    private SymbolTable<String> token_feature_table_;
    private SymbolTable<String> weighted_token_feature_table_;
    private List<SymbolTable<String>> subtag_tables_;
    private transient Map<String, Integer> signature_cache;
    private int[] vocab_;
    private int[][] tag_classes_;
    private int[][] transitions_;
    private int[][][] tag_to_subtag_;
    private List<Set<Integer>> observed_sets_;
    private int[][] word_to_observed_tags_;
    private Trie trie_;
    private boolean verbose_;
    private boolean shape_;
    private boolean tag_morph_;
    private int num_folds_;
    private int rare_word_max_freq_;
    private boolean split_morphs_;
    private boolean split_pos_;
    private StringUtils.Mode normalize_forms_;
    private Analyzer analyzer_;
    private RankerModel lemma_model_;
    private List<LemmaCandidateGenerator> generators_;
    private transient Map<String, List<RankerInstance>> lemma_instance_map_;
    private transient Set<Character> unseen_char_set_;
    private boolean special_signature_;
    private boolean skip_lemma_;
    private boolean marginalize_lemmas_;
    private boolean lemma_use_morph_;
    private boolean lemma_tag_dependent_;
    private boolean restrict_pos_tags_to_seen_combinations_;
    boolean lemma_prepruning_extraction_ = true;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:marmot/morph/MorphModel$MorphEntry.class */
    public static class MorphEntry implements Comparable<MorphEntry> {
        private MorphOptions options_;
        private MorphResult result_;

        public MorphEntry(MorphOptions morphOptions, MorphResult morphResult) {
            this.options_ = morphOptions;
            this.result_ = morphResult;
        }

        @Override // java.lang.Comparable
        public int compareTo(MorphEntry morphEntry) {
            return -Double.compare(this.result_.getScore(), morphEntry.result_.getScore());
        }

        public MorphOptions getOptions() {
            return this.options_;
        }

        public MorphResult getResult() {
            return this.result_;
        }
    }

    public void init(MorphOptions morphOptions, Collection<Sequence> collection) {
        this.verbose_ = morphOptions.getVerbose();
        this.rare_word_max_freq_ = morphOptions.getRareWordMaxFreq();
        this.shape_ = morphOptions.getShape();
        this.tag_morph_ = morphOptions.getTagMorph();
        this.split_pos_ = morphOptions.getSplitPos();
        this.split_morphs_ = morphOptions.getSplitMorphs();
        this.normalize_forms_ = morphOptions.getNormalizeForms();
        this.special_signature_ = morphOptions.getSpecialSignature();
        this.num_folds_ = morphOptions.getNumFolds();
        this.restrict_pos_tags_to_seen_combinations_ = morphOptions.getRestrictPosTagsToSeenCombinations();
        init(morphOptions, extractCategories(collection));
        this.subtag_tables_ = new ArrayList();
        this.subtag_tables_.add(null);
        this.subtag_tables_.add(null);
        if (this.split_pos_) {
            this.subtag_tables_.set(0, new SymbolTable<>());
        }
        if (this.tag_morph_ && this.split_morphs_) {
            this.subtag_tables_.set(1, new SymbolTable<>());
        }
        this.word_table_ = new SymbolTable<>(true);
        this.char_table_ = new SymbolTable<>();
        if (this.shape_) {
            this.shape_table_ = new SymbolTable<>();
        }
        this.signature_cache = new HashMap();
        this.token_feature_table_ = new SymbolTable<>();
        this.weighted_token_feature_table_ = new SymbolTable<>();
        String internalAnalyzer = morphOptions.getInternalAnalyzer();
        if (internalAnalyzer != null) {
            this.analyzer_ = Analyzer.create(internalAnalyzer);
        }
        if (this.shape_) {
            File file = morphOptions.getShapeTriePath().isEmpty() ? null : new File(morphOptions.getShapeTriePath());
            if (file == null || !file.exists()) {
                if (this.verbose_) {
                    System.err.println("Inducing shape trie.");
                }
                this.trie_ = Trie.train(collection, morphOptions.getVeryVerbose());
                if (file != null) {
                    if (this.verbose_) {
                        System.err.format("Writing shape trie to: %s.\n", morphOptions.getShapeTriePath());
                    }
                    FileUtils.saveToFile(this.trie_, morphOptions.getShapeTriePath());
                }
            } else {
                System.err.format("Loading shape trie from: %s.\n", morphOptions.getShapeTriePath());
                this.trie_ = (Trie) FileUtils.loadFromFile(morphOptions.getShapeTriePath());
            }
        }
        if (this.trie_ == null) {
            this.shape_ = false;
        }
        Iterator<Sequence> it = collection.iterator();
        while (it.hasNext()) {
            Iterator<Token> it2 = it.next().iterator();
            while (it2.hasNext()) {
                addIndexes((Word) it2.next(), true);
            }
        }
        this.vocab_ = extractVocabulary(morphOptions, collection);
        this.transitions_ = extractPossibleTransitions(morphOptions, collection);
        this.observed_sets_ = extractObservedSets(collection);
        this.tag_classes_ = extractTagClasses(getTagTables());
        this.tag_to_subtag_ = extractSubTags(morphOptions.getSubTagSeparator());
        Iterator<Sequence> it3 = collection.iterator();
        while (it3.hasNext()) {
            for (Word word : it3.next()) {
                addShape(word, word.getWordForm(), true);
            }
        }
        if (morphOptions.getLemmatizer()) {
            initLemmatizer(morphOptions, collection);
        }
    }

    private void initLemmatizer(MorphOptions morphOptions, Collection<Sequence> collection) {
        this.lemma_use_morph_ = morphOptions.getLemmaUseMorph();
        this.marginalize_lemmas_ = morphOptions.getMarginalizeLemmas();
        this.lemma_prepruning_extraction_ = morphOptions.getLemmaPrePruningExtraction();
        this.lemma_tag_dependent_ = morphOptions.getLemmaTagDependent();
        RankerTrainer.RankerTrainerOptions rankerTrainerOptions = new RankerTrainer.RankerTrainerOptions();
        rankerTrainerOptions.setOption(RankerTrainer.RankerTrainerOptions.UNIGRAM_FILE, morphOptions.getLemmaUnigramFile());
        rankerTrainerOptions.setOption(RankerTrainer.RankerTrainerOptions.IGNORE_FEATURES, morphOptions.getLemmaIgnoreFeatures());
        rankerTrainerOptions.setOption(RankerTrainer.RankerTrainerOptions.ASPELL_PATH, morphOptions.getLemmaAspellPath());
        rankerTrainerOptions.setOption(RankerTrainer.RankerTrainerOptions.ASPELL_LANG, morphOptions.getLemmaAspellLang());
        rankerTrainerOptions.setOption(RankerTrainer.RankerTrainerOptions.USE_SHAPE_LEXICON, Boolean.valueOf(morphOptions.getLemmaUseShapeLexicon()));
        rankerTrainerOptions.setOption(RankerTrainer.RankerTrainerOptions.CLUSTER_FILE, morphOptions.getLemmaClusterFile());
        rankerTrainerOptions.setOption("tag-dependent", Boolean.valueOf(this.lemma_tag_dependent_));
        rankerTrainerOptions.setOption(RankerTrainer.RankerTrainerOptions.OFFLINE_FEATURE_EXTRACTION, false);
        rankerTrainerOptions.setOption("use-hash-feature-table", Boolean.valueOf(morphOptions.getUseHashFeatureTable()));
        List<LemmaInstance> instances = LemmaInstance.getInstances(collection, true, false);
        if (morphOptions.getGoldLemma()) {
            this.generators_ = Collections.singletonList(new GoldLemmaGenerator());
        } else if (morphOptions.getLemmaUseLemmingGenerator() > 0) {
            RankerTrainer rankerTrainer = new RankerTrainer();
            RankerTrainer.RankerTrainerOptions rankerTrainerOptions2 = new RankerTrainer.RankerTrainerOptions(rankerTrainerOptions);
            rankerTrainerOptions2.setOption(RankerTrainer.RankerTrainerOptions.USE_MALLET, false);
            rankerTrainerOptions2.setOption(RankerTrainer.RankerTrainerOptions.USE_PERCEPTRON, false);
            rankerTrainerOptions2.setOption(LemmaOptions.USE_MORPH, false);
            rankerTrainerOptions2.setOption(RankerTrainer.RankerTrainerOptions.USE_SHAPE_LEXICON, true);
            rankerTrainerOptions2.setOption(RankerTrainer.RankerTrainerOptions.USE_CORE_FEATURES, true);
            rankerTrainerOptions2.setOption(RankerTrainer.RankerTrainerOptions.USE_ALIGNMENT_FEATURES, true);
            rankerTrainerOptions2.setOption(RankerTrainer.RankerTrainerOptions.OFFLINE_FEATURE_EXTRACTION, false);
            rankerTrainerOptions2.setOption("tag-dependent", true);
            rankerTrainerOptions2.setOption("use-hash-feature-table", true);
            rankerTrainer.setOptions(rankerTrainerOptions2);
            Ranker ranker = (Ranker) rankerTrainer.train(instances, (List<LemmaInstance>) null);
            ranker.setNumCandidates(morphOptions.getLemmaUseLemmingGenerator());
            SimpleLemmatizerTrainer simpleLemmatizerTrainer = new SimpleLemmatizerTrainer();
            simpleLemmatizerTrainer.getOptions().setOption(SimpleLemmatizerTrainer.SimpleLemmatizerTrainerOptions.USE_BACKUP, false);
            this.generators_ = Collections.singletonList(new BackupLemmatizer((SimpleLemmatizer) simpleLemmatizerTrainer.train(instances, (List<LemmaInstance>) null), ranker));
        } else {
            this.generators_ = rankerTrainerOptions.getGenerators(instances);
        }
        SymbolTable<String> symbolTable = getTagTables().get(0);
        Iterator<Sequence> it = collection.iterator();
        while (it.hasNext()) {
            Iterator<Token> it2 = it.next().iterator();
            while (it2.hasNext()) {
                addRankerInstances((Word) it2.next());
            }
        }
        SymbolTable<String> symbolTable2 = 1 < this.subtag_tables_.size() ? this.subtag_tables_.get(1) : null;
        EditTreeAligner editTreeAligner = (EditTreeAligner) new EditTreeAlignerTrainer(rankerTrainerOptions.getRandom(), false, 1, -1).train(instances);
        LinkedList linkedList = new LinkedList();
        Iterator<List<RankerInstance>> it3 = this.lemma_instance_map_.values().iterator();
        while (it3.hasNext()) {
            for (RankerInstance rankerInstance : it3.next()) {
                if (rankerInstance != null) {
                    linkedList.add(rankerInstance);
                }
            }
        }
        this.lemma_model_ = new RankerModel();
        this.lemma_model_.init(rankerTrainerOptions, linkedList, editTreeAligner, symbolTable, symbolTable2);
        if (morphOptions.getLemmaPretraining()) {
            this.skip_lemma_ = true;
        } else {
            this.skip_lemma_ = false;
        }
    }

    private int getBiIndex(int i, int i2, int i3) {
        int i4 = 1;
        for (int i5 = 0; i5 <= i2; i5++) {
            i4 *= getTagTables().get(i5).size();
        }
        if ($assertionsDisabled || i3 < i4) {
            return (i * i4) + i3;
        }
        throw new AssertionError();
    }

    public boolean hasBeenObserved(int i, int i2, int i3) {
        if (isRare(i)) {
            i = this.word_table_.size();
        }
        return this.observed_sets_.get(i2).contains(Integer.valueOf(getBiIndex(i, i2, i3)));
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v3, types: [int[][], int[][][]] */
    private int[][][] extractSubTags(String str) {
        ?? r0 = new int[this.subtag_tables_.size()];
        int i = 0;
        for (int i2 = 0; i2 < this.subtag_tables_.size() && i2 < getTagTables().size(); i2++) {
            SymbolTable<String> symbolTable = getTagTables().get(i2);
            if (symbolTable != null && this.subtag_tables_.get(i2) != null) {
                r0[i2] = new int[symbolTable.size()];
                for (Map.Entry<String, Integer> entry : symbolTable.entrySet()) {
                    r0[i2][entry.getValue().intValue()] = getSubTags(entry.getKey(), i2, true, i, str);
                }
                i += this.subtag_tables_.get(i2).size();
            }
        }
        return r0;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v2, types: [int[], int[][]] */
    private int[][] extractTagClasses(List<SymbolTable<String>> list) {
        ?? r0 = new int[list.size()];
        for (int i = 0; i < list.size(); i++) {
            int size = list.get(i).size();
            r0[i] = new int[size - 1];
            int i2 = 0;
            for (int i3 = 0; i3 < size; i3++) {
                if (i3 != getBoundaryIndex()) {
                    r0[i][i2] = i3;
                    i2++;
                }
            }
        }
        return r0;
    }

    /* JADX WARN: Type inference failed for: r1v27, types: [int[], int[][]] */
    private List<Set<Integer>> extractObservedSets(Collection<Sequence> collection) {
        List<SymbolTable<String>> tagTables = getTagTables();
        ArrayList arrayList = new ArrayList(tagTables.size());
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < tagTables.size(); i++) {
            arrayList2.add(new HashMap());
        }
        Iterator<Sequence> it = collection.iterator();
        while (it.hasNext()) {
            for (Word word : it.next()) {
                int wordFormIndex = word.getWordFormIndex();
                int i2 = 0;
                for (int i3 = 0; i3 < tagTables.size(); i3++) {
                    i2 = (i2 * tagTables.get(i3).size()) + word.getTagIndexes()[i3];
                    Set set = (Set) ((Map) arrayList2.get(i3)).get(Integer.valueOf(wordFormIndex));
                    if (set == null) {
                        set = new HashSet();
                        ((Map) arrayList2.get(i3)).put(Integer.valueOf(wordFormIndex), set);
                    }
                    set.add(Integer.valueOf(i2));
                }
            }
        }
        if (this.restrict_pos_tags_to_seen_combinations_) {
            this.word_to_observed_tags_ = new int[this.vocab_.length];
            for (Map.Entry entry : ((Map) arrayList2.get(0)).entrySet()) {
                int intValue = ((Integer) entry.getKey()).intValue();
                if (!isRare(intValue)) {
                    Set set2 = (Set) entry.getValue();
                    int[] iArr = new int[set2.size()];
                    int i4 = 0;
                    Iterator it2 = set2.iterator();
                    while (it2.hasNext()) {
                        int i5 = i4;
                        i4++;
                        iArr[i5] = ((Integer) it2.next()).intValue();
                    }
                    this.word_to_observed_tags_[intValue] = iArr;
                }
            }
        }
        List<List<Integer>> openPosTagClassesCrossValidation = getOpenPosTagClassesCrossValidation(collection, this.num_folds_, tagTables);
        for (int i6 = 0; i6 < tagTables.size(); i6++) {
            HashSet hashSet = new HashSet();
            arrayList.add(hashSet);
            Iterator<Integer> it3 = openPosTagClassesCrossValidation.get(i6).iterator();
            while (it3.hasNext()) {
                hashSet.add(Integer.valueOf(getBiIndex(this.word_table_.size(), i6, it3.next().intValue())));
            }
            for (Map.Entry entry2 : ((Map) arrayList2.get(i6)).entrySet()) {
                int intValue2 = ((Integer) entry2.getKey()).intValue();
                Set set3 = (Set) entry2.getValue();
                if (!isRare(intValue2)) {
                    int[] iArr2 = new int[set3.size()];
                    int i7 = 0;
                    Iterator it4 = set3.iterator();
                    while (it4.hasNext()) {
                        int i8 = i7;
                        i7++;
                        iArr2[i8] = ((Integer) it4.next()).intValue();
                    }
                    for (int i9 : iArr2) {
                        hashSet.add(Integer.valueOf(getBiIndex(intValue2, i6, i9)));
                    }
                }
            }
        }
        return arrayList;
    }

    public static List<List<Integer>> getOpenPosTagClassesCrossValidation(Collection<Sequence> collection, int i, List<SymbolTable<String>> list) {
        int size = collection.size() / i;
        if (size == 0) {
            size = 1;
        }
        HashSet hashSet = new HashSet();
        ArrayList arrayList = new ArrayList(list.size());
        for (int i2 = 0; i2 < list.size(); i2++) {
            arrayList.add(new Counter());
        }
        int i3 = 0;
        while (true) {
            int i4 = i3;
            if (i4 >= collection.size()) {
                break;
            }
            hashSet.clear();
            int i5 = i4 + size;
            if (i5 + size >= collection.size()) {
                i5 = collection.size();
            }
            int i6 = 0;
            for (Sequence sequence : collection) {
                if (i6 < i4 || i6 >= i5) {
                    Iterator<Token> it = sequence.iterator();
                    while (it.hasNext()) {
                        hashSet.add(Integer.valueOf(((Word) it.next()).getWordFormIndex()));
                    }
                }
                i6++;
            }
            int i7 = 0;
            for (Sequence sequence2 : collection) {
                if (i7 >= i4 && i7 < i5) {
                    for (Token token : sequence2) {
                        if (!hashSet.contains(Integer.valueOf(((Word) token).getWordFormIndex()))) {
                            int i8 = 0;
                            for (int i9 = 0; i9 < list.size(); i9++) {
                                i8 = (i8 * list.get(i9).size()) + token.getTagIndexes()[i9];
                                ((Counter) arrayList.get(i9)).increment(Integer.valueOf(i8), Double.valueOf(1.0d));
                            }
                        }
                    }
                }
                i7++;
            }
            i3 = i5;
        }
        ArrayList arrayList2 = new ArrayList(list.size());
        for (int i10 = 0; i10 < list.size(); i10++) {
            Counter counter = (Counter) arrayList.get(i10);
            double doubleValue = counter.totalCount().doubleValue();
            LinkedList linkedList = new LinkedList();
            for (Map.Entry entry : counter.entrySet()) {
                if (((Double) entry.getValue()).doubleValue() / doubleValue > 1.0E-4d) {
                    linkedList.add(entry.getKey());
                }
            }
            arrayList2.add(linkedList);
        }
        return arrayList2;
    }

    private int[] extractVocabulary(MorphOptions morphOptions, Collection<Sequence> collection) {
        Counter counter = new Counter();
        Iterator<Sequence> it = collection.iterator();
        while (it.hasNext()) {
            Iterator<Token> it2 = it.next().iterator();
            while (it2.hasNext()) {
                counter.increment(Integer.valueOf(((Word) it2.next()).getWordFormIndex()), Double.valueOf(1.0d));
            }
        }
        int[] iArr = new int[counter.size()];
        for (Map.Entry entry : counter.entrySet()) {
            iArr[((Integer) entry.getKey()).intValue()] = ((Double) entry.getValue()).intValue();
        }
        return iArr;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v14, types: [int[], int[][]] */
    private int[][] extractPossibleTransitions(MorphOptions morphOptions, Collection<Sequence> collection) {
        if (!morphOptions.getRestricTransitions() || !this.tag_morph_) {
            return (int[][]) null;
        }
        HashMap hashMap = new HashMap();
        Iterator<Sequence> it = collection.iterator();
        while (it.hasNext()) {
            for (Token token : it.next()) {
                int i = token.getTagIndexes()[0];
                int i2 = token.getTagIndexes()[1];
                Set set = (Set) hashMap.get(Integer.valueOf(i));
                if (set == null) {
                    set = new HashSet();
                    hashMap.put(Integer.valueOf(i), set);
                }
                set.add(Integer.valueOf(i2));
            }
        }
        ?? r0 = new int[hashMap.size() + 1];
        r0[0] = new int[1];
        for (Map.Entry entry : hashMap.entrySet()) {
            int intValue = ((Integer) entry.getKey()).intValue();
            int[] iArr = new int[((Set) entry.getValue()).size()];
            int i3 = 0;
            Iterator it2 = ((Set) entry.getValue()).iterator();
            while (it2.hasNext()) {
                int i4 = i3;
                i3++;
                iArr[i4] = ((Integer) it2.next()).intValue();
            }
            Arrays.sort(iArr);
            if (!$assertionsDisabled && r0[intValue] != 0) {
                throw new AssertionError();
            }
            r0[intValue] = iArr;
        }
        return r0;
    }

    private SymbolTable<String> extractCategories(Collection<Sequence> collection) {
        SymbolTable<String> symbolTable = new SymbolTable<>(true);
        symbolTable.toIndex((SymbolTable<String>) POS_NAME_, true);
        if (this.tag_morph_) {
            symbolTable.toIndex((SymbolTable<String>) MORPH_NAME_, true);
        }
        return symbolTable;
    }

    private void addCharIndexes(Word word, String str, boolean z) {
        short[] charIndexes = FeatUtil.getCharIndexes(str, this.char_table_, z);
        if (!$assertionsDisabled && charIndexes == null) {
            throw new AssertionError();
        }
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (charIndexes[i] < 0 && this.verbose_) {
                if (this.unseen_char_set_ == null) {
                    this.unseen_char_set_ = new HashSet();
                }
                if (!this.unseen_char_set_.contains(Character.valueOf(charAt))) {
                    System.err.format("Warning: Unknown character: %c\n", Character.valueOf(charAt));
                    this.unseen_char_set_.add(Character.valueOf(charAt));
                }
            }
        }
        word.setCharIndexes(charIndexes);
    }

    private void addSignature(Word word, String str, boolean z) {
        if (this.signature_cache == null) {
            this.signature_cache = new HashMap();
        }
        Integer num = this.signature_cache.get(str);
        if (num == null) {
            num = Integer.valueOf(FeatUtil.getSignature(str, this.special_signature_));
            this.signature_cache.put(str, num);
        }
        word.setWordSignature(num.intValue());
    }

    private void addTokenFeatures(Word word, Word word2, boolean z) {
        String[] tokenFeatures = word2.getTokenFeatures();
        List<String> analyze = this.analyzer_ != null ? this.analyzer_.analyze(word2.getWordForm()) : null;
        int length = tokenFeatures != null ? 0 + tokenFeatures.length : 0;
        if (analyze != null) {
            length += analyze.size();
        }
        if (length > 0) {
            int[] iArr = new int[length];
            int i = 0;
            if (tokenFeatures != null) {
                for (String str : tokenFeatures) {
                    iArr[i] = this.token_feature_table_.toIndex(str, -1, z);
                    i++;
                }
            }
            if (analyze != null) {
                Iterator<String> it = analyze.iterator();
                while (it.hasNext()) {
                    iArr[i] = this.token_feature_table_.toIndex(it.next(), -1, z);
                    i++;
                }
            }
            word.setTokenFeatureIndexes(iArr);
        }
        String[] weightedTokenFeatures = word.getWeightedTokenFeatures();
        if (weightedTokenFeatures == null || this.weighted_token_feature_table_ == null) {
            return;
        }
        int[] iArr2 = new int[weightedTokenFeatures.length];
        int i2 = 0;
        for (String str2 : weightedTokenFeatures) {
            iArr2[i2] = this.weighted_token_feature_table_.toIndex(str2, -1, z);
            i2++;
        }
        word.setWeightedTokenFeatureIndexes(iArr2);
    }

    public void addIndexes(Word word, boolean z) {
        String wordForm = word.getWordForm();
        addTagIndexes(word, -1, z);
        addSignature(word, wordForm, z);
        addTokenFeatures(word, word, z);
        addShape(word, wordForm, z);
        String normalize = StringUtils.normalize(wordForm, this.normalize_forms_);
        word.setWordIndex(this.word_table_.toIndex(normalize, -1, z));
        addCharIndexes(word, normalize, z);
    }

    private RankerInstance getRankerInstance(Word word, int i, boolean z) {
        List<RankerInstance> rankerIstances = word.getRankerIstances();
        if (rankerIstances == null) {
            rankerIstances = addRankerInstances(word);
        }
        if (!this.lemma_tag_dependent_) {
            i = 0;
        }
        RankerInstance rankerInstance = rankerIstances.get(i);
        if ($assertionsDisabled || rankerInstance != null) {
            return rankerInstance;
        }
        throw new AssertionError();
    }

    private List<RankerInstance> addRankerInstances(Word word) {
        if (this.lemma_instance_map_ == null) {
            this.lemma_instance_map_ = new HashMap();
        }
        List<RankerInstance> list = this.lemma_instance_map_.get(word.getWordForm());
        if (list == null) {
            SymbolTable<String> symbolTable = getTagTables().get(0);
            if (this.lemma_tag_dependent_) {
                list = new ArrayList(symbolTable.size());
                for (int i = 0; i < symbolTable.size(); i++) {
                    list.add(null);
                }
                LemmaCandidateSet lemmaCandidateSet = new LemmaCandidateSet();
                for (Map.Entry<String, Integer> entry : symbolTable.entrySet()) {
                    int intValue = entry.getValue().intValue();
                    String key = entry.getKey();
                    if (!this.restrict_pos_tags_to_seen_combinations_ || isRare(word.getWordFormIndex()) || hasBeenObserved(word.getWordFormIndex(), 0, intValue)) {
                        list.set(intValue, getRankerInstance(word, key, lemmaCandidateSet));
                    }
                }
            } else {
                list = Collections.singletonList(getRankerInstance(word, "_", (LemmaCandidateSet) null));
            }
            this.lemma_instance_map_.put(word.getWordForm(), list);
        }
        word.setRankerIstances(list);
        return list;
    }

    private RankerInstance getRankerInstance(Word word, String str, LemmaCandidateSet lemmaCandidateSet) {
        LemmaInstance lemmaInstance = LemmaInstance.getInstance(word, false, false);
        lemmaInstance.setPosTag(str);
        RankerInstance rankerInstance = RankerInstance.getInstance(lemmaInstance, this.generators_);
        lemmaInstance.setPosTag(null);
        if (lemmaCandidateSet != null) {
            LemmaCandidateSet lemmaCandidateSet2 = new LemmaCandidateSet();
            Iterator<Map.Entry<String, LemmaCandidate>> it = rankerInstance.getCandidateSet().iterator();
            while (it.hasNext()) {
                Map.Entry<String, LemmaCandidate> next = it.next();
                lemmaCandidateSet2.addCandidate(next.getKey(), lemmaCandidateSet.getCandidate(next.getKey()));
            }
            rankerInstance.setCandidateSet(lemmaCandidateSet2);
        }
        if (rankerInstance.getCandidateSet().size() == 0) {
            if (lemmaCandidateSet == null) {
                rankerInstance.getCandidateSet().getCandidate(lemmaInstance.getForm());
            } else {
                rankerInstance.getCandidateSet().addCandidate(lemmaInstance.getForm(), lemmaCandidateSet.getCandidate(lemmaInstance.getForm()));
            }
        }
        return rankerInstance;
    }

    private int[] getSubTags(String str, int i, boolean z, int i2, String str2) {
        SymbolTable<String> symbolTable;
        int index;
        if (str.equals(Model.BORDER_SYMBOL_) || str.equals("_") || i >= this.subtag_tables_.size() || (symbolTable = this.subtag_tables_.get(i)) == null) {
            return null;
        }
        String[] split = str.split(str2);
        if (split.length == 1) {
            return null;
        }
        LinkedList linkedList = new LinkedList();
        for (String str3 : split) {
            if (str3.length() > 0 && (index = symbolTable.toIndex(str3, -1, z)) >= 0) {
                linkedList.add(Integer.valueOf(index));
            }
        }
        int[] iArr = new int[linkedList.size()];
        int i3 = 0;
        Iterator it = linkedList.iterator();
        while (it.hasNext()) {
            int i4 = i3;
            i3++;
            iArr[i4] = ((Integer) it.next()).intValue() + i2;
        }
        return iArr;
    }

    private void addTagIndexes(Word word, int i, boolean z) {
        List<SymbolTable<String>> tagTables = getTagTables();
        String posTag = word.getPosTag();
        String morphTag = word.getMorphTag();
        int[] iArr = new int[tagTables.size()];
        if (posTag == null) {
            iArr[0] = -1;
        } else {
            iArr[0] = tagTables.get(0).toIndex(posTag, -1, z);
        }
        if (this.tag_morph_) {
            if (morphTag == null) {
                iArr[1] = -1;
            } else {
                iArr[1] = tagTables.get(1).toIndex(morphTag, -1, z);
            }
        }
        word.setTagIndexes(iArr);
    }

    private void addShape(Word word, String str, boolean z) {
        if (this.shape_) {
            int wordFormIndex = word.getWordFormIndex();
            if (this.vocab_ != null && isRare(wordFormIndex)) {
                int i = -1;
                if (this.trie_ != null) {
                    i = this.shape_table_.toIndex(Integer.toString(this.trie_.classify(str)), -1, z);
                }
                word.setWordShapeIndex(i);
            }
        }
    }

    public boolean isRare(int i) {
        return i < 0 || i >= this.vocab_.length || this.vocab_[i] < this.rare_word_max_freq_;
    }

    public SymbolTable<String> getWordTable() {
        return this.word_table_;
    }

    public static Tagger trainOptimal(MorphOptions morphOptions, Collection<Sequence> collection, Collection<Sequence> collection2, List<String> list, List<List<String>> list2, List<MorphEntry> list3) {
        if (collection2 == null) {
            throw new InvalidParameterException("test_sentences is null!");
        }
        if (!$assertionsDisabled && list.size() != list2.size()) {
            throw new AssertionError();
        }
        if (!$assertionsDisabled && list.isEmpty()) {
            throw new AssertionError();
        }
        if (list.size() == 1) {
            return trainOptimal(morphOptions, collection, collection2, list.get(0), list2.get(0), list3);
        }
        LinkedList linkedList = new LinkedList(list);
        LinkedList linkedList2 = new LinkedList(list2);
        Tagger tagger = null;
        String str = (String) linkedList.pollFirst();
        for (String str2 : (Collection) linkedList2.pollFirst()) {
            morphOptions = (MorphOptions) Copy.clone(morphOptions);
            morphOptions.setProperty(str, str2);
            Tagger trainOptimal = trainOptimal(morphOptions, collection, collection2, linkedList, linkedList2, list3);
            if (tagger == null) {
                tagger = trainOptimal;
            } else if (trainOptimal.getResult().getScore() > tagger.getResult().getScore()) {
                tagger = trainOptimal;
            }
        }
        return tagger;
    }

    public static Tagger trainOptimal(MorphOptions morphOptions, Collection<Sequence> collection, Collection<Sequence> collection2, String str, Collection<String> collection3, List<MorphEntry> list) {
        Tagger tagger = null;
        if (collection2 == null) {
            throw new InvalidParameterException("test_sentebces is null!");
        }
        for (String str2 : collection3) {
            morphOptions = (MorphOptions) Copy.clone(morphOptions);
            morphOptions.setProperty(str, str2);
            Tagger train = train((MorphOptions) Copy.clone(morphOptions), collection, collection2);
            list.add(new MorphEntry(morphOptions, (MorphResult) train.getResult()));
            if (tagger == null) {
                tagger = train;
            } else if (train.getResult().getScore() > tagger.getResult().getScore()) {
                tagger = train;
            }
        }
        return tagger;
    }

    public static Tagger trainOptimal(MorphOptions morphOptions, List<Sequence> list, List<Sequence> list2) {
        if (list2 == null) {
            throw new InvalidParameterException("test_sentences is null!");
        }
        List<String> asList = Arrays.asList(Options.ORDER, "seed", "penalty");
        LinkedList<MorphEntry> linkedList = new LinkedList();
        Tagger trainOptimal = trainOptimal(morphOptions, list, list2, (List<String>) asList, (List<List<String>>) Arrays.asList(Arrays.asList("1", "3", "5"), Arrays.asList("41", "42", "43"), Arrays.asList("0.0", "0.05", "0.1", "0.5")), linkedList);
        Collections.sort(linkedList);
        System.err.println("OPTIMAL OPTIONS AND RESULTS");
        for (MorphEntry morphEntry : linkedList) {
            StringBuilder sb = new StringBuilder();
            for (String str : asList) {
                if (sb.length() > 0) {
                    sb.append(',');
                    sb.append(' ');
                }
                sb.append(str);
                sb.append(':');
                sb.append(morphEntry.getOptions().getProperty(str));
            }
            sb.append('\t');
            sb.append(morphEntry.getResult().getScore());
            System.err.println(sb.toString());
        }
        return trainOptimal;
    }

    public static Tagger train(MorphOptions morphOptions, Collection<Sequence> collection, Collection<Sequence> collection2) {
        MorphModel morphModel = new MorphModel();
        morphModel.init(morphOptions, collection);
        if (collection2 != null) {
            Iterator<Sequence> it = collection2.iterator();
            while (it.hasNext()) {
                Iterator<Token> it2 = it.next().iterator();
                while (it2.hasNext()) {
                    morphModel.addIndexes((Word) it2.next(), false);
                }
            }
        }
        MorphWeightVector morphWeightVector = new MorphWeightVector(morphOptions);
        morphWeightVector.init(morphModel, collection);
        MorphTagger morphTagger = new MorphTagger(morphModel, morphModel.getOrder(), morphWeightVector);
        Trainer create = TrainerFactory.create(morphOptions);
        MorphEvaluator morphEvaluator = null;
        if (collection2 != null) {
            morphEvaluator = new MorphEvaluator(collection2);
        }
        create.train(morphTagger, collection, morphEvaluator);
        if (morphOptions.getLemmatizer() && morphOptions.getLemmaPretraining()) {
            morphModel.skip_lemma_ = false;
            if (morphOptions.getVerbose()) {
                System.err.format("Training with lemmatizer.\n", new Object[0]);
            }
            create.train(morphTagger, collection, morphEvaluator);
        }
        return morphTagger;
    }

    public SymbolTable<Character> getCharTable() {
        return this.char_table_;
    }

    public int getNumShapes() {
        return this.trie_ == null ? this.shape_table_.size() : this.trie_.getIndex();
    }

    public SymbolTable<String> getShapeTable() {
        return this.shape_table_;
    }

    public boolean isOOV(int i) {
        return i < 0 || this.vocab_[i] == 0;
    }

    public int getNumSubTags() {
        int i = 0;
        if (this.subtag_tables_ != null) {
            for (SymbolTable<String> symbolTable : this.subtag_tables_) {
                if (symbolTable != null) {
                    i += symbolTable.size();
                }
            }
        }
        return i;
    }

    public SymbolTable<String> getTokenFeatureTable() {
        return this.token_feature_table_;
    }

    public SymbolTable<String> getWeightedTokenFeatureTable() {
        return this.weighted_token_feature_table_;
    }

    @Override // marmot.core.Model
    public int[] getTagCandidates(Sequence sequence, int i, State state) {
        int level = state == null ? 0 : state.getLevel() + 1;
        if (this.transitions_ != null && level == 1) {
            return this.transitions_[state.getIndex()];
        }
        if (level == 0 && this.restrict_pos_tags_to_seen_combinations_) {
            int wordFormIndex = ((Word) sequence.get(i)).getWordFormIndex();
            if (!isRare(wordFormIndex)) {
                return this.word_to_observed_tags_[wordFormIndex];
            }
        }
        return this.tag_classes_[level];
    }

    public int[][][] getTagToSubTags() {
        return this.tag_to_subtag_;
    }

    public void setVerbose(boolean z) {
        this.verbose_ = z;
    }

    public int getMaxSignature() {
        return FeatUtil.getMaxSignature(this.special_signature_);
    }

    public static Tagger train(MorphOptions morphOptions, List<Sequence> list) {
        return train(morphOptions, list, null);
    }

    @Override // marmot.core.Model
    public void setLemmaCandidates(Token token, State state, boolean z, boolean z2) {
        if (this.lemma_model_ == null || z != this.lemma_prepruning_extraction_) {
            return;
        }
        int index = state.getIndex();
        Word word = (Word) token;
        RankerInstance rankerInstance = getRankerInstance(word, index, z2);
        if (!$assertionsDisabled && rankerInstance == null) {
            throw new AssertionError();
        }
        LemmaCandidateSet candidateSet = rankerInstance.getCandidateSet();
        ArrayList arrayList = new ArrayList(candidateSet.size());
        if (!$assertionsDisabled && state.getLevel() != 0) {
            throw new AssertionError();
        }
        int[] iArr = RankerInstance.EMPTY_ARRAY;
        String lowerCase = word.getLemma().toLowerCase();
        Iterator<Map.Entry<String, LemmaCandidate>> it = candidateSet.iterator();
        while (it.hasNext()) {
            Map.Entry<String, LemmaCandidate> next = it.next();
            String key = next.getKey();
            boolean equals = key.equals(lowerCase);
            LemmaCandidate value = next.getValue();
            if (!$assertionsDisabled && value == null) {
                throw new AssertionError();
            }
            RankerCandidate rankerCandidate = new RankerCandidate(key, value, equals, getLemmaCandidateScore(value, candidateSet, index, iArr, rankerInstance, z2));
            if (!$assertionsDisabled && rankerCandidate.getCandidate() == null) {
                throw new AssertionError();
            }
            arrayList.add(rankerCandidate);
        }
        state.setLemmaCandidates(arrayList);
        state.setLemmaScoreSum();
        if (!$assertionsDisabled && state.getLemmaCandidates() == null) {
            throw new AssertionError();
        }
    }

    private double getLemmaCandidateScore(LemmaCandidate lemmaCandidate, LemmaCandidateSet lemmaCandidateSet, int i, int[] iArr, RankerInstance rankerInstance, boolean z) {
        if (this.skip_lemma_) {
            lemmaCandidate.setFeatureIndexes(RankerInstance.EMPTY_ARRAY);
            return 0.0d;
        }
        if (lemmaCandidate.getFeatureIndexes() == null || lemmaCandidate.getFeatureIndexes() == RankerInstance.EMPTY_ARRAY) {
            Iterator<Map.Entry<String, LemmaCandidate>> it = lemmaCandidateSet.iterator();
            while (it.hasNext()) {
                Map.Entry<String, LemmaCandidate> next = it.next();
                if (next.getValue().getFeatureIndexes() == RankerInstance.EMPTY_ARRAY) {
                    next.getValue().setFeatureIndexes(null);
                }
            }
            lemmaCandidate.setFeatureIndexes(null);
            this.lemma_model_.addIndexes(rankerInstance, lemmaCandidateSet, z);
            Iterator<Map.Entry<String, LemmaCandidate>> it2 = lemmaCandidateSet.iterator();
            while (it2.hasNext()) {
                Map.Entry<String, LemmaCandidate> next2 = it2.next();
                if (!$assertionsDisabled && next2.getValue().getFeatureIndexes() == null) {
                    throw new AssertionError();
                }
                if (!$assertionsDisabled && next2.getValue().getFeatureIndexes() == RankerInstance.EMPTY_ARRAY) {
                    throw new AssertionError();
                }
            }
            if (!$assertionsDisabled && lemmaCandidate.getFeatureIndexes() == null) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && lemmaCandidate.getFeatureIndexes() == RankerInstance.EMPTY_ARRAY) {
                throw new AssertionError();
            }
        }
        return this.lemma_model_.score(lemmaCandidate, i, iArr);
    }

    @Override // marmot.core.Model
    public void setLemmaCandidates(State state, boolean z) {
        if (this.lemma_model_ == null || z != this.lemma_prepruning_extraction_) {
            return;
        }
        if (!$assertionsDisabled && state.getLevel() != 1) {
            throw new AssertionError();
        }
        State subLevelState = state.getSubLevelState();
        if (!$assertionsDisabled && subLevelState == null) {
            throw new AssertionError();
        }
        if (!$assertionsDisabled && state == null) {
            throw new AssertionError();
        }
        if (!$assertionsDisabled && subLevelState.getOrder() != 1) {
            throw new AssertionError();
        }
        if (!$assertionsDisabled && state.getOrder() != 1) {
            throw new AssertionError();
        }
        List<RankerCandidate> lemmaCandidates = subLevelState.getLemmaCandidates();
        if (!$assertionsDisabled && lemmaCandidates == null) {
            throw new AssertionError();
        }
        if (!$assertionsDisabled && subLevelState.getLevel() != 0) {
            throw new AssertionError();
        }
        int index = subLevelState.getIndex();
        int[] iArr = getTagToSubTags()[state.getLevel()][state.getIndex()];
        if (iArr == null) {
            iArr = RankerInstance.EMPTY_ARRAY;
        }
        if (!this.lemma_use_morph_) {
            iArr = RankerInstance.EMPTY_ARRAY;
        }
        ArrayList arrayList = new ArrayList(lemmaCandidates.size());
        for (RankerCandidate rankerCandidate : lemmaCandidates) {
            String lemma = rankerCandidate.getLemma();
            LemmaCandidate candidate = rankerCandidate.getCandidate();
            arrayList.add(new RankerCandidate(lemma, candidate, rankerCandidate.isCorrect(), this.lemma_model_.score(candidate, index, iArr)));
        }
        state.setLemmaCandidates(arrayList);
        state.setLemmaScoreSum();
    }

    public RankerModel getLemmaModel() {
        return this.lemma_model_;
    }

    @Override // marmot.core.Model
    public boolean getMarganlizeLemmas() {
        return this.marginalize_lemmas_;
    }

    public boolean getLemmaUseMorph() {
        return this.lemma_use_morph_;
    }

    static {
        $assertionsDisabled = !MorphModel.class.desiredAssertionStatus();
    }
}
