package edu.stanford.nlp.patterns.surface;

import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.international.morph.MorphoFeatures;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.Env;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.patterns.surface.ConstantsAndVariables;
import edu.stanford.nlp.patterns.surface.GetPatternsFromDataMultiClass;
import edu.stanford.nlp.patterns.surface.Pattern;
import edu.stanford.nlp.patterns.surface.PhraseScorer;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.stats.TwoDimensionalCounterInterface;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CollectionValuedMap;
import edu.stanford.nlp.util.Execution;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObjectBuilder;
import javax.json.JsonReader;
import javax.json.JsonValue;

/* loaded from: input_file:edu/stanford/nlp/patterns/surface/ScorePhrases.class */
public class ScorePhrases<E extends Pattern> {
    ConstantsAndVariables<E> constVars;
    PhraseScorer phraseScorer;
    static final /* synthetic */ boolean $assertionsDisabled;
    Map<String, Boolean> writtenInJustification = new HashMap();

    @Execution.Option(name = "phraseScorerClass")
    Class<? extends PhraseScorer> phraseScorerClass = ScorePhrasesAverageFeatures.class;

    public ScorePhrases(Properties properties, ConstantsAndVariables constantsAndVariables) {
        this.constVars = null;
        this.phraseScorer = null;
        Execution.fillOptions(this, properties);
        this.constVars = constantsAndVariables;
        try {
            this.phraseScorer = this.phraseScorerClass.getConstructor(ConstantsAndVariables.class).newInstance(this.constVars);
            Execution.fillOptions(this.phraseScorer, properties);
        } catch (IllegalAccessException e) {
            throw new RuntimeException(e);
        } catch (InstantiationException e2) {
            throw new RuntimeException(e2);
        } catch (NoSuchMethodException e3) {
            throw new RuntimeException(e3);
        } catch (InvocationTargetException e4) {
            throw new RuntimeException(e4);
        }
    }

    public Counter<String> chooseTopWords(Counter<String> counter, TwoDimensionalCounter<String, E> twoDimensionalCounter, Counter<String> counter2, Set<String> set, double d) {
        Iterator<E> it = Counters.toPriorityQueue(counter).iterator();
        ClassicCounter classicCounter = new ClassicCounter();
        while (it.hasNext() && classicCounter.size() < this.constVars.numWordsToAdd) {
            String str = (String) it.next();
            if (counter.getCount(str) < d) {
                break;
            }
            if (!$assertionsDisabled && counter.getCount(str) == Double.POSITIVE_INFINITY) {
                throw new AssertionError();
            }
            if (!counter2.containsKey(str) || numNonRedundantPatterns(twoDimensionalCounter, str) >= this.constVars.thresholdNumPatternsApplied) {
                String str2 = null;
                if (this.constVars.minLen4FuzzyForPattern > 0 && set != null) {
                    str2 = ConstantsAndVariables.containsFuzzy(set, str, this.constVars.minLen4FuzzyForPattern);
                }
                if (str2 == null) {
                    Redwood.log("extremePatDebug", "adding word " + str);
                    classicCounter.setCount(str, counter.getCount(str));
                } else {
                    Redwood.log("extremePatDebug", "not adding " + str + " because it matched " + str2 + " in common English word");
                    set.add(str);
                }
            } else {
                Redwood.log("extremePatDebug", "Not adding " + str + " because the number of non redundant patterns are below threshold: " + twoDimensionalCounter.getCounter((TwoDimensionalCounter<String, E>) str).keySet());
            }
        }
        String str3 = "";
        int i = 0;
        while (it.hasNext()) {
            i++;
            if (i > 5) {
                break;
            }
            String str4 = (String) it.next();
            str3 = str3 + ";\t" + str4 + MorphoFeatures.KEY_VAL_DELIM + counter.getCount(str4);
        }
        Redwood.log(Redwood.DBG, "Next five phrases were " + str3);
        return classicCounter;
    }

    public static <E, F> void removeKeys(TwoDimensionalCounter<E, F> twoDimensionalCounter, Collection<E> collection) {
        Iterator<E> it = collection.iterator();
        while (it.hasNext()) {
            twoDimensionalCounter.remove(it.next());
        }
    }

    private double numNonRedundantPatterns(TwoDimensionalCounter<String, E> twoDimensionalCounter, String str) {
        Object[] array = twoDimensionalCounter.getCounter((TwoDimensionalCounter<String, E>) str).keySet().toArray();
        int i = 0;
        for (int i2 = 0; i2 < array.length; i2++) {
            String obj = array[i2].toString();
            boolean z = false;
            for (int i3 = i2 + 1; i3 < array.length; i3++) {
                String obj2 = array[i3].toString();
                if (obj2.contains(obj) || obj.contains(obj2)) {
                    z = true;
                    break;
                }
            }
            if (!z) {
                i++;
            }
        }
        return i;
    }

    public Counter<String> learnNewPhrases(String str, PatternsForEachToken patternsForEachToken, Counter<E> counter, Counter<E> counter2, CollectionValuedMap<E, Triple<String, Integer, Integer>> collectionValuedMap, Counter<String> counter3, TwoDimensionalCounter<String, E> twoDimensionalCounter, TwoDimensionalCounter<String, E> twoDimensionalCounter2, TwoDimensionalCounter<E, String> twoDimensionalCounter3, String str2, Set<String> set) throws IOException, ClassNotFoundException {
        boolean z = false;
        if (Data.processedDataFreq == null) {
            z = true;
            Data.processedDataFreq = new ClassicCounter();
            if (!$assertionsDisabled && Data.rawFreq == null) {
                throw new AssertionError();
            }
        }
        Counter<String> learnNewPhrasesPrivate = learnNewPhrasesPrivate(str, patternsForEachToken, counter, counter2, this.constVars.getLabelDictionary().get(str), collectionValuedMap, counter3, twoDimensionalCounter, twoDimensionalCounter2, twoDimensionalCounter3, str2, set, z);
        this.constVars.addLabelDictionary(str, learnNewPhrasesPrivate.keySet());
        return learnNewPhrasesPrivate;
    }

    void runParallelApplyPats(Map<String, List<CoreLabel>> map, String str, E e, TwoDimensionalCounter<Pair<String, String>, E> twoDimensionalCounter, CollectionValuedMap<E, Triple<String, Integer, Integer>> collectionValuedMap) {
        Redwood.log(Redwood.DBG, "Applying pattern " + e + " to a total of " + map.size() + " sentences ");
        ArrayList arrayList = new ArrayList();
        List list = CollectionUtils.toList(map.keySet());
        if (this.constVars.doNotExtractPhraseAnyWordLabeledOtherClass) {
            for (String str2 : this.constVars.getAnswerClass().keySet()) {
                if (!str2.equals(str)) {
                    arrayList.add(str2 + MorphoFeatures.KEY_VAL_DELIM + str2);
                }
            }
            arrayList.add("OTHERSEM:OTHERSEM");
        }
        HashMap hashMap = new HashMap();
        TokenSequencePattern compile = TokenSequencePattern.compile(this.constVars.env.get(str), ((SurfacePattern) e).toString(arrayList));
        hashMap.put(compile, e);
        int i = this.constVars.numThreads;
        if (map.size() < 50) {
            i = 1;
        }
        int size = i == 1 ? map.size() : map.size() / (i - 1);
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(this.constVars.numThreads);
        ArrayList arrayList2 = new ArrayList();
        for (int i2 = 0; i2 < i; i2++) {
            arrayList2.add(newFixedThreadPool.submit(new ApplyPatterns(map, size == map.size() ? list : list.subList(i2 * size, Math.min(list.size(), (i2 + 1) * size)), hashMap, str, this.constVars.removeStopWordsFromSelectedPhrases, this.constVars.removePhrasesWithStopWords, this.constVars)));
        }
        Iterator it = arrayList2.iterator();
        while (it.hasNext()) {
            try {
                Pair pair = (Pair) ((Future) it.next()).get();
                Redwood.log(ConstantsAndVariables.extremedebug, "Pattern " + compile + " extracted phrases " + pair.first());
                twoDimensionalCounter.addAll((TwoDimensionalCounterInterface) pair.first());
                collectionValuedMap.addAll((CollectionValuedMap<E, Triple<String, Integer, Integer>>) pair.second());
            } catch (Exception e2) {
                newFixedThreadPool.shutdownNow();
                throw new RuntimeException(e2);
            }
        }
        newFixedThreadPool.shutdown();
    }

    protected Map<E, Map<String, List<CoreLabel>>> getSentences(Map<E, Set<String>> map) {
        try {
            HashSet hashSet = new HashSet();
            HashMap hashMap = new HashMap();
            CollectionValuedMap collectionValuedMap = new CollectionValuedMap();
            for (Map.Entry<E, Set<String>> entry : map.entrySet()) {
                if (!hashMap.containsKey(entry.getKey())) {
                    hashMap.put(entry.getKey(), new HashMap());
                }
                for (String str : entry.getValue()) {
                    collectionValuedMap.add(str, entry.getKey());
                    if (this.constVars.batchProcessSents) {
                        File file = Data.sentId2File.get(str);
                        if (!$assertionsDisabled && file == null) {
                            throw new AssertionError("How come no file for sentence " + str);
                        }
                        hashSet.add(file);
                    }
                }
            }
            if (this.constVars.batchProcessSents) {
                Iterator it = hashSet.iterator();
                while (it.hasNext()) {
                    for (Map.Entry entry2 : ((Map) IOUtils.readObjectFromFile((File) it.next())).entrySet()) {
                        Iterator it2 = collectionValuedMap.get(entry2.getKey()).iterator();
                        while (it2.hasNext()) {
                            ((Map) hashMap.get((Pattern) it2.next())).put(entry2.getKey(), entry2.getValue());
                        }
                    }
                }
            } else {
                for (Map.Entry<String, List<CoreLabel>> entry3 : Data.sents.entrySet()) {
                    Iterator it3 = collectionValuedMap.get((Object) entry3.getKey()).iterator();
                    while (it3.hasNext()) {
                        ((Map) hashMap.get((Pattern) it3.next())).put(entry3.getKey(), entry3.getValue());
                    }
                }
            }
            return hashMap;
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (ClassNotFoundException e2) {
            throw new RuntimeException(e2);
        }
    }

    public void applyPats(Counter<E> counter, String str, TwoDimensionalCounter<Pair<String, String>, E> twoDimensionalCounter, CollectionValuedMap<E, Triple<String, Integer, Integer>> collectionValuedMap) {
        Iterator<Map.Entry<String, Env>> it = this.constVars.env.entrySet().iterator();
        while (it.hasNext()) {
            it.next().getValue().getVariables().putAll(Token.env.getVariables());
        }
        for (Map.Entry<E, Map<String, List<CoreLabel>>> entry : getSentences(this.constVars.invertedIndex.queryIndex(counter.keySet())).entrySet()) {
            runParallelApplyPats(entry.getValue(), str, entry.getKey(), twoDimensionalCounter, collectionValuedMap);
        }
        Redwood.log(Redwood.DBG, "# words/lemma and pattern pairs are " + twoDimensionalCounter.size());
    }

    private void statsWithoutApplyingPatterns(Map<String, List<CoreLabel>> map, PatternsForEachToken patternsForEachToken, Counter<E> counter, TwoDimensionalCounter<Pair<String, String>, E> twoDimensionalCounter) {
        for (Map.Entry<String, List<CoreLabel>> entry : map.entrySet()) {
            Map patternsForAllTokens = patternsForEachToken.getPatternsForAllTokens(entry.getKey());
            if (patternsForAllTokens == null) {
                throw new RuntimeException("How come there are no patterns for " + entry.getKey());
            }
            for (Map.Entry entry2 : patternsForAllTokens.entrySet()) {
                CoreLabel coreLabel = null;
                Set set = (Set) entry2.getValue();
                for (E e : counter.keySet()) {
                    if (set.contains(e)) {
                        if (coreLabel == null) {
                            coreLabel = entry.getValue().get(((Integer) entry2.getKey()).intValue());
                        }
                        twoDimensionalCounter.incrementCount(new Pair<>(coreLabel.word(), coreLabel.lemma()), e);
                    }
                }
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    private Counter<String> learnNewPhrasesPrivate(String str, PatternsForEachToken patternsForEachToken, Counter<E> counter, Counter<E> counter2, Set<String> set, CollectionValuedMap<E, Triple<String, Integer, Integer>> collectionValuedMap, Counter<String> counter3, TwoDimensionalCounter<String, E> twoDimensionalCounter, TwoDimensionalCounter<String, E> twoDimensionalCounter2, TwoDimensionalCounter<E, String> twoDimensionalCounter3, String str2, Set<String> set2, boolean z) throws IOException, ClassNotFoundException {
        double log;
        TwoDimensionalCounter<Pair<String, String>, E> twoDimensionalCounter4 = new TwoDimensionalCounter<>();
        if (this.constVars.doNotApplyPatterns) {
            ConstantsAndVariables.DataSentsIterator dataSentsIterator = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
            while (dataSentsIterator.hasNext()) {
                statsWithoutApplyingPatterns(dataSentsIterator.next().first(), patternsForEachToken, counter, twoDimensionalCounter4);
            }
        } else if (counter.size() > 0) {
            applyPats(counter, str, twoDimensionalCounter4, collectionValuedMap);
        }
        if (z) {
            if (this.phraseScorer.wordFreqNorm.equals(PhraseScorer.Normalization.NONE)) {
                Data.processedDataFreq = Data.rawFreq;
            } else {
                Redwood.log(Redwood.DBG, "computing processed freq");
                for (Map.Entry<String, Double> entry : Data.rawFreq.entrySet()) {
                    double doubleValue = entry.getValue().doubleValue();
                    if (this.phraseScorer.wordFreqNorm.equals(PhraseScorer.Normalization.SQRT)) {
                        log = Math.sqrt(doubleValue);
                    } else {
                        if (!this.phraseScorer.wordFreqNorm.equals(PhraseScorer.Normalization.LOG)) {
                            throw new RuntimeException("can't understand the normalization");
                        }
                        log = 1.0d + Math.log(doubleValue);
                    }
                    Data.processedDataFreq.setCount(entry.getKey(), log);
                }
            }
        }
        if (!this.constVars.wordScoring.equals(GetPatternsFromDataMultiClass.WordScoring.WEIGHTEDNORM)) {
            if (!this.constVars.wordScoring.equals(GetPatternsFromDataMultiClass.WordScoring.BPB)) {
                throw new RuntimeException("wordscoring " + this.constVars.wordScoring + " not identified");
            }
            Counters.addInPlace(twoDimensionalCounter, twoDimensionalCounter2);
            ClassicCounter classicCounter = new ClassicCounter();
            HashMap hashMap = new HashMap();
            for (Map.Entry entry2 : twoDimensionalCounter.entrySet()) {
                ClassicCounter classicCounter2 = new ClassicCounter();
                for (Pattern pattern : ((ClassicCounter) entry2.getValue()).keySet()) {
                    classicCounter2.setCount(pattern, counter.getCount(pattern));
                }
                classicCounter.setCount(entry2.getKey(), Counters.max(classicCounter2));
                hashMap.put(entry2.getKey(), Counters.argmax(classicCounter2));
            }
            Counters.removeKeys(classicCounter, set);
            Set<String> keysAbove = Counters.keysAbove(classicCounter, Counters.max(classicCounter) - 1.0E-10d);
            String str3 = null;
            if (keysAbove.size() > 1) {
                double d = Double.NEGATIVE_INFINITY;
                for (String str4 : keysAbove) {
                    if (twoDimensionalCounter.getCount(str4, hashMap.get(str4)) > d) {
                        d = twoDimensionalCounter.getCount(str4, hashMap.get(str4));
                        str3 = str4;
                    }
                }
            } else {
                if (keysAbove.size() != 1) {
                    return new ClassicCounter();
                }
                str3 = (String) keysAbove.iterator().next();
            }
            Redwood.log(ConstantsAndVariables.minimaldebug, "Selected Words: " + str3);
            return Counters.asCounter(Arrays.asList(str3));
        }
        for (Pair<String, String> pair : twoDimensionalCounter4.firstKeySet()) {
            if (!this.constVars.getOtherSemanticClassesWords().contains(pair.first()) && !this.constVars.getOtherSemanticClassesWords().contains(pair.second())) {
                twoDimensionalCounter.addAll(pair.first(), twoDimensionalCounter4.getCounter((TwoDimensionalCounter<Pair<String, String>, E>) pair));
            }
            twoDimensionalCounter2.addAll(pair.first(), twoDimensionalCounter4.getCounter((TwoDimensionalCounter<Pair<String, String>, E>) pair));
        }
        ConstantsAndVariables<E> constantsAndVariables = this.constVars;
        removeKeys(twoDimensionalCounter, ConstantsAndVariables.getStopWords());
        Counter<String> scorePhrases = this.phraseScorer.scorePhrases(str, twoDimensionalCounter, twoDimensionalCounter2, counter2, set, false);
        Set<String> otherSemanticClassesWords = (set2 == null || set2.isEmpty()) ? this.constVars.getOtherSemanticClassesWords() : CollectionUtils.unionAsSet(set2, this.constVars.getOtherSemanticClassesWords());
        otherSemanticClassesWords.addAll(this.constVars.getLabelDictionary().get(str));
        Counter<String> chooseTopWords = chooseTopWords(scorePhrases, twoDimensionalCounter, scorePhrases, otherSemanticClassesWords, this.constVars.thresholdWordExtract);
        counter3.clear();
        Counters.addInPlace(counter3, scorePhrases);
        Redwood.log(ConstantsAndVariables.minimaldebug, "\n\n## Selected Words for " + str + " : " + Counters.toSortedString(chooseTopWords, chooseTopWords.size(), "%1$s:%2$.2f", LinearClassifier.TEXT_SERIALIZATION_DELIMITER));
        if (this.constVars.outDir != null && !this.constVars.outDir.isEmpty()) {
            String str5 = this.constVars.outDir + "/" + str2 + "/" + str;
            IOUtils.ensureDir(new File(str5));
            TwoDimensionalCounter twoDimensionalCounter5 = new TwoDimensionalCounter();
            for (String str6 : chooseTopWords.keySet()) {
                Iterator<E> it = twoDimensionalCounter2.getCounter((TwoDimensionalCounter<String, E>) str6).keySet().iterator();
                while (it.hasNext()) {
                    Iterator<String> it2 = twoDimensionalCounter3.getCounter((TwoDimensionalCounter<E, String>) it.next()).iterator();
                    while (it2.hasNext()) {
                        twoDimensionalCounter5.incrementCount(str6, it2.next());
                    }
                }
            }
            Redwood.log(ConstantsAndVariables.minimaldebug, "Saving output in " + str5);
            String str7 = str5 + "/words.json";
            JsonArrayBuilder createArrayBuilder = Json.createArrayBuilder();
            if (this.writtenInJustification.containsKey(str) && this.writtenInJustification.get(str).booleanValue()) {
                JsonReader createReader = Json.createReader(new BufferedInputStream(new FileInputStream(str7)));
                Iterator it3 = createReader.readArray().iterator();
                while (it3.hasNext()) {
                    createArrayBuilder.add((JsonValue) it3.next());
                }
                createReader.close();
            }
            JsonArrayBuilder createArrayBuilder2 = Json.createArrayBuilder();
            for (String str8 : twoDimensionalCounter5.firstKeySet()) {
                JsonObjectBuilder createObjectBuilder = Json.createObjectBuilder();
                JsonArrayBuilder createArrayBuilder3 = Json.createArrayBuilder();
                Iterator it4 = twoDimensionalCounter5.getCounter((TwoDimensionalCounter) str8).keySet().iterator();
                while (it4.hasNext()) {
                    createArrayBuilder3.add((String) it4.next());
                }
                JsonArrayBuilder createArrayBuilder4 = Json.createArrayBuilder();
                Iterator<E> it5 = twoDimensionalCounter2.getCounter((TwoDimensionalCounter<String, E>) str8).iterator();
                while (it5.hasNext()) {
                    createArrayBuilder4.add(it5.next().toStringSimple());
                }
                createObjectBuilder.add("reasonwords", createArrayBuilder3);
                createObjectBuilder.add("patterns", createArrayBuilder4);
                createObjectBuilder.add("score", chooseTopWords.getCount(str8));
                createObjectBuilder.add("entity", str8);
                createArrayBuilder2.add(createObjectBuilder.build());
            }
            createArrayBuilder.add(createArrayBuilder2);
            IOUtils.writeStringToFile(createArrayBuilder.build().toString(), str7, "utf8");
            this.writtenInJustification.put(str, true);
        }
        if (this.constVars.justify) {
            Redwood.log(Redwood.DBG, "\nJustification for phrases:\n");
            for (String str9 : chooseTopWords.keySet()) {
                Redwood.log(Redwood.DBG, "Phrase " + str9 + " extracted because of patterns: \t" + Counters.toSortedString(twoDimensionalCounter2.getCounter((TwoDimensionalCounter<String, E>) str9), twoDimensionalCounter2.getCounter((TwoDimensionalCounter<String, E>) str9).size(), "%1$s:%2$f", "\n"));
            }
        }
        return chooseTopWords;
    }

    Counter<String> getLearnedScores() {
        return this.phraseScorer.getLearnedScores();
    }

    static {
        $assertionsDisabled = !ScorePhrases.class.desiredAssertionStatus();
    }
}
