package LbjTagger;

import LBJ2.parse.LinkedVector;
import StringStatisticsUtils.OccurrenceCounter;
import java.util.Hashtable;
import java.util.Vector;
import lbj.NETaggerLevel1;
import lbj.NETaggerLevel2;

/* loaded from: input_file:LbjTagger/GlobalFeatures.class */
public class GlobalFeatures {
    public static void displayLevel1AggregationData(Vector<LinkedVector> vector) {
        for (int i = 0; i < vector.size(); i++) {
            for (int i2 = 0; i2 < vector.elementAt(i).size(); i2++) {
                NEWord nEWord = vector.elementAt(i).get(i2);
                System.out.println("Word: " + nEWord.form);
                System.out.println("\t entity: " + nEWord.entity);
                System.out.println("\t enity type: " + nEWord.entityType);
                System.out.println("\t token in entity supertype: ");
                String[] tokens = nEWord.mostFrequentLevel1TokenInEntityType.getTokens();
                for (int i3 = 0; i3 < tokens.length; i3++) {
                    System.out.println("\t\t" + tokens[i3] + ":" + (nEWord.mostFrequentLevel1TokenInEntityType.getCount(tokens[i3]) / nEWord.mostFrequentLevel1TokenInEntityType.totalTokens));
                }
                System.out.println("\t supertype: ");
                String[] tokens2 = nEWord.mostFrequentLevel1SuperEntityType.getTokens();
                for (int i4 = 0; i4 < tokens2.length; i4++) {
                    System.out.println("\t\t" + tokens2[i4] + ":" + (nEWord.mostFrequentLevel1SuperEntityType.getCount(tokens2[i4]) / nEWord.mostFrequentLevel1SuperEntityType.totalTokens));
                }
                System.out.println("\t exact entity type: ");
                String[] tokens3 = nEWord.mostFrequentLevel1ExactEntityType.getTokens();
                for (int i5 = 0; i5 < tokens3.length; i5++) {
                    System.out.println("\t\t" + tokens3[i5] + ":" + (nEWord.mostFrequentLevel1ExactEntityType.getCount(tokens3[i5]) / nEWord.mostFrequentLevel1ExactEntityType.totalTokens));
                }
                System.out.println("\t token maj: ");
                String[] tokens4 = nEWord.mostFrequentLevel1Prediction.getTokens();
                for (int i6 = 0; i6 < tokens4.length; i6++) {
                    System.out.println("\t\t" + tokens4[i6] + ":" + (nEWord.mostFrequentLevel1Prediction.getCount(tokens4[i6]) / nEWord.mostFrequentLevel1Prediction.totalTokens));
                }
                System.out.println("\t token maj type: ");
                String[] tokens5 = nEWord.mostFrequentLevel1PredictionType.getTokens();
                for (int i7 = 0; i7 < tokens5.length; i7++) {
                    System.out.println("\t\t" + tokens5[i7] + ":" + (nEWord.mostFrequentLevel1PredictionType.getCount(tokens5[i7]) / nEWord.mostFrequentLevel1PredictionType.totalTokens));
                }
                System.out.println("\t token maj not-O: ");
                String[] tokens6 = nEWord.mostFrequentLevel1NotOutsidePrediction.getTokens();
                for (int i8 = 0; i8 < tokens6.length; i8++) {
                    System.out.println("\t\t" + tokens6[i8] + ":" + (nEWord.mostFrequentLevel1NotOutsidePrediction.getCount(tokens6[i8]) / nEWord.mostFrequentLevel1NotOutsidePrediction.totalTokens));
                }
                System.out.println("\t token maj type not-O: ");
                String[] tokens7 = nEWord.mostFrequentLevel1NotOutsidePredictionType.getTokens();
                for (int i9 = 0; i9 < tokens7.length; i9++) {
                    System.out.println("\t\t" + tokens7[i9] + ":" + (nEWord.mostFrequentLevel1NotOutsidePredictionType.getCount(tokens7[i9]) / nEWord.mostFrequentLevel1NotOutsidePredictionType.totalTokens));
                }
            }
        }
    }

    public static void aggregateLevel1Predictions(Vector<LinkedVector> vector) {
        for (int i = 0; i < vector.size(); i++) {
            for (int i2 = 0; i2 < vector.elementAt(i).size(); i2++) {
                aggregateTokenLevelLevel1Predictions(vector.elementAt(i).get(i2));
            }
        }
    }

    private static void aggregateTokenLevelLevel1Predictions(NEWord nEWord) {
        String str;
        OccurrenceCounter occurrenceCounter = new OccurrenceCounter();
        OccurrenceCounter occurrenceCounter2 = new OccurrenceCounter();
        OccurrenceCounter occurrenceCounter3 = new OccurrenceCounter();
        OccurrenceCounter occurrenceCounter4 = new OccurrenceCounter();
        if (Character.isUpperCase(nEWord.form.charAt(0))) {
            if (Parameters.featuresToUse.containsKey("PredictionsLevel1")) {
                NEWord nEWord2 = nEWord;
                NEWord nEWord3 = nEWord.nextIgnoreSentenceBoundary;
                for (int i = 0; i < 1000 && nEWord3 != null; i++) {
                    nEWord3 = nEWord3.nextIgnoreSentenceBoundary;
                }
                for (int i2 = 0; i2 > -1000 && nEWord2.previousIgnoreSentenceBoundary != null; i2--) {
                    nEWord2 = nEWord2.previousIgnoreSentenceBoundary;
                }
                do {
                    if (nEWord2.form.equalsIgnoreCase(nEWord.form) && Character.isUpperCase(nEWord2.form.charAt(0)) && nEWord2 != nEWord) {
                        if (NETaggerLevel2.isTraining) {
                            str = nEWord2.neLabel;
                            if (Parameters.level1AggregationRandomGenerator.useNoise()) {
                                str = Parameters.level1AggregationRandomGenerator.randomLabel();
                            }
                        } else {
                            str = nEWord2.neTypeLevel1;
                        }
                        occurrenceCounter.addToken(str);
                        if (!str.equals("O")) {
                            occurrenceCounter3.addToken(str);
                        }
                        if (str.indexOf(45) > -1) {
                            str = str.substring(2, str.length());
                            occurrenceCounter4.addToken(str);
                        }
                        occurrenceCounter2.addToken(str);
                    }
                    nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
                } while (nEWord2 != nEWord3);
            }
            nEWord.mostFrequentLevel1Prediction = occurrenceCounter;
            nEWord.mostFrequentLevel1NotOutsidePrediction = occurrenceCounter3;
            nEWord.mostFrequentLevel1PredictionType = occurrenceCounter2;
            nEWord.mostFrequentLevel1NotOutsidePredictionType = occurrenceCounter4;
        }
    }

    public static void aggregateEntityLevelPredictions(Vector<LinkedVector> vector) {
        annotatePredictionLevel1Entities(vector);
        for (int i = 0; i < vector.size(); i++) {
            for (int i2 = 0; i2 < vector.elementAt(i).size(); i2++) {
                setMajorityExactEntityFeatures(vector.elementAt(i).get(i2));
                setMajoritySuperEntityFeatures(vector.elementAt(i).get(i2));
                setMajorityTokenInEntityFeatures(vector.elementAt(i).get(i2));
            }
        }
    }

    public static void setMajoritySuperEntityFeatures(NEWord nEWord) {
        if (!Parameters.featuresToUse.containsKey("PredictionsLevel1") || nEWord.entity == null) {
            return;
        }
        OccurrenceCounter occurrenceCounter = new OccurrenceCounter();
        NEWord nEWord2 = nEWord;
        NEWord nEWord3 = nEWord.nextIgnoreSentenceBoundary;
        for (int i = 0; i < 1000 && nEWord3 != null; i++) {
            nEWord3 = nEWord3.nextIgnoreSentenceBoundary;
        }
        for (int i2 = 0; i2 > -1000 && nEWord2.previousIgnoreSentenceBoundary != null; i2--) {
            nEWord2 = nEWord2.previousIgnoreSentenceBoundary;
        }
        do {
            if (nEWord2 != nEWord && nEWord2 != null && nEWord2 != nEWord3 && nEWord2.entity != null && nEWord2.entity.indexOf(nEWord.entity) > -1 && !nEWord2.entity.equals(nEWord.entity)) {
                occurrenceCounter.addToken(nEWord2.entityType);
                String str = nEWord2.entity;
                while (nEWord2 != null && nEWord2.entity != null && nEWord2.entity.equals(str) && nEWord2 != nEWord3) {
                    nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
                }
            } else if (nEWord2 != nEWord3) {
                nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
            }
        } while (nEWord2 != nEWord3);
        nEWord.mostFrequentLevel1SuperEntityType = occurrenceCounter;
    }

    public static void setMajorityTokenInEntityFeatures(NEWord nEWord) {
        if (Parameters.featuresToUse.containsKey("PredictionsLevel1") && Character.isUpperCase(nEWord.form.charAt(0))) {
            OccurrenceCounter occurrenceCounter = new OccurrenceCounter();
            NEWord nEWord2 = nEWord;
            NEWord nEWord3 = nEWord.nextIgnoreSentenceBoundary;
            for (int i = 0; i < 1000 && nEWord3 != null; i++) {
                nEWord3 = nEWord3.nextIgnoreSentenceBoundary;
            }
            for (int i2 = 0; i2 > -1000 && nEWord2.previousIgnoreSentenceBoundary != null; i2--) {
                nEWord2 = nEWord2.previousIgnoreSentenceBoundary;
            }
            Hashtable hashtable = new Hashtable();
            hashtable.put(nEWord, true);
            NEWord nEWord4 = nEWord.nextIgnoreSentenceBoundary;
            while (true) {
                NEWord nEWord5 = nEWord4;
                if (nEWord5 == null || nEWord.entity == null || nEWord5.entity == null || !nEWord.entity.equals(nEWord5.entity)) {
                    break;
                }
                hashtable.put(nEWord5, true);
                nEWord4 = nEWord5.nextIgnoreSentenceBoundary;
            }
            NEWord nEWord6 = nEWord.previousIgnoreSentenceBoundary;
            while (true) {
                NEWord nEWord7 = nEWord6;
                if (nEWord7 == null || nEWord.entity == null || nEWord7.entity == null || !nEWord.entity.equals(nEWord7.entity)) {
                    break;
                }
                hashtable.put(nEWord7, true);
                nEWord6 = nEWord7.previousIgnoreSentenceBoundary;
            }
            do {
                if (!hashtable.containsKey(nEWord2) && nEWord2 != null && nEWord2 != nEWord3 && nEWord2.entity != null && nEWord2.entity.indexOf(nEWord.form.toLowerCase() + " ") > -1 && !nEWord2.entity.equals(nEWord.form.toLowerCase() + " ")) {
                    occurrenceCounter.addToken(nEWord2.entityType);
                    String str = nEWord2.entity;
                    while (nEWord2 != null && nEWord2.entity != null && nEWord2.entity.equals(str) && nEWord2 != nEWord3) {
                        nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
                    }
                } else if (nEWord2 != nEWord3) {
                    nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
                }
            } while (nEWord2 != nEWord3);
            nEWord.mostFrequentLevel1TokenInEntityType = occurrenceCounter;
        }
    }

    public static void setMajorityExactEntityFeatures(NEWord nEWord) {
        if (!Parameters.featuresToUse.containsKey("PredictionsLevel1") || nEWord.entity == null) {
            return;
        }
        OccurrenceCounter occurrenceCounter = new OccurrenceCounter();
        NEWord nEWord2 = nEWord;
        NEWord nEWord3 = nEWord.nextIgnoreSentenceBoundary;
        for (int i = 0; i < 1000 && nEWord3 != null; i++) {
            nEWord3 = nEWord3.nextIgnoreSentenceBoundary;
        }
        for (int i2 = 0; i2 > -1000 && nEWord2.previousIgnoreSentenceBoundary != null; i2--) {
            nEWord2 = nEWord2.previousIgnoreSentenceBoundary;
        }
        Hashtable hashtable = new Hashtable();
        hashtable.put(nEWord, true);
        NEWord nEWord4 = nEWord.nextIgnoreSentenceBoundary;
        while (true) {
            NEWord nEWord5 = nEWord4;
            if (nEWord5 == null || nEWord.entity == null || nEWord5.entity == null || !nEWord.entity.equals(nEWord5.entity)) {
                break;
            }
            hashtable.put(nEWord5, true);
            nEWord4 = nEWord5.nextIgnoreSentenceBoundary;
        }
        NEWord nEWord6 = nEWord.previousIgnoreSentenceBoundary;
        while (true) {
            NEWord nEWord7 = nEWord6;
            if (nEWord7 == null || nEWord.entity == null || nEWord7.entity == null || !nEWord.entity.equals(nEWord7.entity)) {
                break;
            }
            hashtable.put(nEWord7, true);
            nEWord6 = nEWord7.previousIgnoreSentenceBoundary;
        }
        do {
            if (!hashtable.containsKey(nEWord2) && nEWord2 != null && nEWord2 != nEWord3 && nEWord2.entity != null && nEWord2.entity.equals(nEWord.entity) && !nEWord2.entity.equalsIgnoreCase((nEWord2.form + " ").toLowerCase())) {
                String str = nEWord2.entity;
                occurrenceCounter.addToken(nEWord2.entityType);
                while (nEWord2 != null && nEWord2.entity != null && nEWord2.entity.equals(str) && nEWord2 != nEWord3) {
                    nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
                }
            } else if (nEWord2 != nEWord3) {
                nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
            }
        } while (nEWord2 != nEWord3);
        nEWord.mostFrequentLevel1ExactEntityType = occurrenceCounter;
    }

    public static void annotatePredictionLevel1Entities(Vector<LinkedVector> vector) {
        NEWord nEWord = vector.elementAt(0).get(0);
        while (nEWord != null) {
            String str = nEWord.neTypeLevel1;
            if (NETaggerLevel2.isTraining) {
                str = nEWord.neLabel;
            }
            if (str.startsWith("B-") || str.startsWith("U-")) {
                String str2 = nEWord.form + " ";
                String substring = str.substring(2);
                NEWord nEWord2 = nEWord.nextIgnoreSentenceBoundary;
                if (NETaggerLevel2.isTraining) {
                    while (nEWord2 != null && nEWord2.neLabel.endsWith(substring) && !nEWord2.neLabel.startsWith("B-") && !nEWord2.neLabel.startsWith("U-")) {
                        str2 = str2 + nEWord2.form + " ";
                        nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
                    }
                } else {
                    while (nEWord2 != null && nEWord2.neTypeLevel1.endsWith(substring) && !nEWord2.neTypeLevel1.startsWith("B-") && !nEWord2.neTypeLevel1.startsWith("U-")) {
                        str2 = str2 + nEWord2.form + " ";
                        nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
                    }
                }
                if (Parameters.level1AggregationRandomGenerator.useNoise()) {
                    substring = Parameters.level1AggregationRandomGenerator.randomType();
                }
                while (nEWord != nEWord2) {
                    nEWord.entity = str2.toLowerCase();
                    nEWord.entityType = substring;
                    nEWord = nEWord.nextIgnoreSentenceBoundary;
                }
            } else {
                nEWord = nEWord.nextIgnoreSentenceBoundary;
            }
        }
    }

    public static void annotate(NEWord nEWord) {
        if (Parameters.featuresToUse.containsKey("aggregateContext") || Parameters.featuresToUse.containsKey("aggregateGazetteerMatches")) {
            NEWord nEWord2 = nEWord;
            NEWord nEWord3 = nEWord.nextIgnoreSentenceBoundary;
            Hashtable hashtable = new Hashtable();
            hashtable.put(nEWord, true);
            NEWord nEWord4 = nEWord.nextIgnoreSentenceBoundary;
            for (int i = 0; nEWord4 != null && i < 3; i++) {
                hashtable.put(nEWord4, true);
                nEWord4 = nEWord4.nextIgnoreSentenceBoundary;
            }
            NEWord nEWord5 = nEWord.previousIgnoreSentenceBoundary;
            for (int i2 = 0; nEWord5 != null && i2 < 3; i2++) {
                hashtable.put(nEWord5, true);
                nEWord5 = nEWord5.previousIgnoreSentenceBoundary;
            }
            for (int i3 = 0; i3 < 200 && nEWord3 != null; i3++) {
                nEWord3 = nEWord3.nextIgnoreSentenceBoundary;
            }
            for (int i4 = 0; i4 > -200 && nEWord2.previousIgnoreSentenceBoundary != null; i4--) {
                nEWord2 = nEWord2.previousIgnoreSentenceBoundary;
            }
            do {
                if (nEWord2.form.equalsIgnoreCase(nEWord.form) && Character.isUpperCase(nEWord.form.charAt(0)) && Character.isLowerCase(nEWord2.form.charAt(0))) {
                    updateFeatureCounts(nEWord, "appearsDownCased");
                }
                if (nEWord2.form.equalsIgnoreCase(nEWord.form) && Character.isUpperCase(nEWord2.form.charAt(0)) && Character.isUpperCase(nEWord.form.charAt(0)) && nEWord != nEWord2) {
                    if (!hashtable.containsKey(nEWord2) && Parameters.featuresToUse.containsKey("aggregateGazetteerMatches")) {
                        for (int i5 = 0; i5 < nEWord2.matchedMultiTokenGazEntries.size(); i5++) {
                            updateFeatureCounts(nEWord, nEWord2.matchedMultiTokenGazEntryTypes.elementAt(i5));
                        }
                        for (int i6 = 0; i6 < nEWord2.matchedMultiTokenGazEntryTypesIgnoreCase.size(); i6++) {
                            updateFeatureCounts(nEWord, nEWord2.matchedMultiTokenGazEntryTypesIgnoreCase.elementAt(i6));
                        }
                    }
                    if (Parameters.featuresToUse.containsKey("aggregateContext")) {
                        if (nEWord2.previous == null) {
                            updateFeatureCounts(nEWord, "appearancesUpperStartSentence");
                        }
                        if (nEWord2.previous != null && nEWord2.previous.form.endsWith(".")) {
                            updateFeatureCounts(nEWord, "appearancesUpperStartSentence");
                        }
                        if (nEWord2.previous != null && !nEWord2.previous.form.endsWith(".")) {
                            updateFeatureCounts(nEWord, "appearancesUpperMiddleSentence");
                        }
                        NEWord nEWord6 = nEWord2;
                        NEWord nEWord7 = nEWord2.nextIgnoreSentenceBoundary;
                        for (int i7 = 0; i7 < 2 && nEWord7 != null; i7++) {
                            nEWord7 = nEWord7.nextIgnoreSentenceBoundary;
                        }
                        int i8 = 0;
                        while (i8 > -2 && nEWord6.previousIgnoreSentenceBoundary != null) {
                            nEWord6 = nEWord6.previousIgnoreSentenceBoundary;
                            i8--;
                        }
                        do {
                            updateFeatureCounts(nEWord, "context:" + i8 + ":" + nEWord6.form);
                            String[] prefixes = BrownClusters.getPrefixes(nEWord6.form);
                            if (prefixes.length > 0) {
                                updateFeatureCounts(nEWord, "contextPath:" + i8 + ":" + prefixes[0]);
                            }
                            nEWord6 = nEWord6.nextIgnoreSentenceBoundary;
                            i8++;
                        } while (nEWord6 != nEWord7);
                    }
                }
                nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
            } while (nEWord2 != nEWord3);
        }
    }

    private static void updateFeatureCounts(NEWord nEWord, String str) {
        if (!nEWord.nonLocalFeatures.containsKey(str)) {
            nEWord.nonLocalFeatures.put(str, 1);
            return;
        }
        int intValue = nEWord.nonLocalFeatures.get(str).intValue() + 1;
        nEWord.nonLocalFeatures.remove(str);
        nEWord.nonLocalFeatures.put(str, Integer.valueOf(intValue));
    }

    public static void main(String[] strArr) {
        System.out.println("Reading config");
        Parameters.readConfigAndLoadExternalData("Config/withLookaheadDualTokenizingBILOU.config");
        System.out.println("Reading data");
        Vector<LinkedVector> readAndAnnotate = BracketFileManager.readAndAnnotate("Data/temp.txt");
        System.out.println("extracting non-local features");
        NETaggerLevel1.isTraining = true;
        NETaggerLevel2.isTraining = true;
        if (Parameters.featuresToUse.containsKey("PredictionsLevel1")) {
            aggregateLevel1Predictions(readAndAnnotate);
            aggregateEntityLevelPredictions(readAndAnnotate);
            displayLevel1AggregationData(readAndAnnotate);
        }
    }
}
