package LbjTagger;

import LBJ2.parse.LinkedVector;
import StringStatisticsUtils.OccurrenceCounter;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;

/* loaded from: input_file:LbjTagger/PatternExtractor.class */
public class PatternExtractor {
    public static final int contextSize = 50;
    public static final int appearanceThres = 3;

    public static void annotate(Vector<LinkedVector> vector, boolean z, boolean z2) {
        String annotateWithPattern;
        Hashtable<NEWord, Vector<String>> patterns = getPatterns(vector, z);
        for (int i = 0; i < vector.size(); i++) {
            for (int i2 = 0; i2 < vector.elementAt(i).size(); i2++) {
                vector.elementAt(i).get(i2).activePatterns = new OccurrenceCounter();
            }
        }
        for (int i3 = 0; i3 < vector.size(); i3++) {
            Vector vector2 = new Vector();
            for (int i4 = 0; i4 < vector.elementAt(i3).size(); i4++) {
                OccurrenceCounter occurrenceCounter = new OccurrenceCounter();
                NEWord nEWord = vector.elementAt(i3).get(i4);
                for (int i5 = 0; i5 < 50 && nEWord != null; i5++) {
                    if (patterns.containsKey(nEWord)) {
                        Vector<String> vector3 = patterns.get(nEWord);
                        for (int i6 = 0; i6 < vector3.size(); i6++) {
                            occurrenceCounter.addToken(vector3.elementAt(i6));
                        }
                    }
                    nEWord = nEWord.nextIgnoreSentenceBoundary;
                }
                NEWord nEWord2 = vector.elementAt(i3).get(i4);
                for (int i7 = 0; i7 < 50 && nEWord2 != null; i7++) {
                    if (patterns.containsKey(nEWord2)) {
                        Vector<String> vector4 = patterns.get(nEWord2);
                        for (int i8 = 0; i8 < vector4.size(); i8++) {
                            occurrenceCounter.addToken(vector4.elementAt(i8));
                        }
                    }
                    nEWord2 = nEWord2.previousIgnoreSentenceBoundary;
                }
                NEWord nEWord3 = vector.elementAt(i3).get(i4);
                Iterator<String> tokensIterator = occurrenceCounter.getTokensIterator();
                while (tokensIterator.hasNext()) {
                    String next = tokensIterator.next();
                    if (occurrenceCounter.getCount(next) >= 3.0d && (annotateWithPattern = annotateWithPattern(nEWord3, next, occurrenceCounter.getCount(next))) != null && z2) {
                        vector2.addElement(annotateWithPattern + " : " + occurrenceCounter.getCount(next));
                    }
                }
            }
            if (z2) {
                for (int i9 = 0; i9 < vector.elementAt(i3).size(); i9++) {
                    System.out.print(vector.elementAt(i3).get(i9).form + " ");
                }
                System.out.println("");
                for (int i10 = 0; i10 < vector2.size(); i10++) {
                    System.out.println((String) vector2.elementAt(i10));
                }
            }
        }
    }

    private static String annotateWithPattern(NEWord nEWord, String str, double d) {
        StringTokenizer stringTokenizer = new StringTokenizer(str);
        String nextToken = stringTokenizer.nextToken();
        String nextToken2 = stringTokenizer.nextToken();
        String nextToken3 = stringTokenizer.nextToken();
        String str2 = nextToken + " [" + nextToken2 + " ";
        String normalizeDigits = nEWord.previous != null ? normalizeDigits(nEWord.previous.form) : "*null*";
        String normalizeDigits2 = nEWord.previousIgnoreSentenceBoundary != null ? normalizeDigits(nEWord.previousIgnoreSentenceBoundary.form) : "*null*";
        if (!normalizeDigits.equals(nextToken) && !normalizeDigits2.equals(nextToken)) {
            return null;
        }
        NEWord nEWord2 = nEWord;
        for (int i = 0; i < 5 && nEWord2 != null; i++) {
            String normalizeDigits3 = nEWord2.next != null ? normalizeDigits(nEWord2.next.form) : "*null*";
            String normalizeDigits4 = nEWord2.nextIgnoreSentenceBoundary != null ? normalizeDigits(nEWord2.nextIgnoreSentenceBoundary.form) : "*null*";
            if (nextToken3.equals(normalizeDigits3) || nextToken3.equals(normalizeDigits4)) {
                NEWord nEWord3 = nEWord;
                while (nEWord3 != nEWord2) {
                    nEWord3 = nEWord3.nextIgnoreSentenceBoundary;
                    String normalizeDigits5 = nEWord3.previous != null ? normalizeDigits(nEWord3.previous.form) : "*null*";
                    String normalizeDigits6 = nEWord3.previousIgnoreSentenceBoundary != null ? normalizeDigits(nEWord3.previousIgnoreSentenceBoundary.form) : "*null*";
                    if (normalizeDigits5.equals(nextToken) || normalizeDigits6.equals(nextToken)) {
                        return null;
                    }
                }
                NEWord nEWord4 = nEWord;
                String str3 = str2 + " " + nEWord4.form;
                nEWord4.activePatterns.addToken(str, d);
                while (nEWord4 != nEWord2) {
                    nEWord4 = nEWord4.nextIgnoreSentenceBoundary;
                    nEWord4.activePatterns.addToken(str, d);
                    str3 = str3 + " " + nEWord4.form;
                }
                return str3 + " ] " + nextToken3 + "\t";
            }
            nEWord2 = nEWord2.nextIgnoreSentenceBoundary;
        }
        return null;
    }

    public static Hashtable<NEWord, Vector<String>> getPatterns(Vector<LinkedVector> vector, boolean z) {
        Hashtable<NEWord, Vector<String>> hashtable = new Hashtable<>();
        for (int i = 0; i < vector.size(); i++) {
            for (int i2 = 0; i2 < vector.elementAt(i).size(); i2++) {
                NEWord nEWord = vector.elementAt(i).get(i2);
                String str = nEWord.neLabel;
                if (!z) {
                    str = nEWord.neTypeLevel1;
                }
                if (str.startsWith("B-") || str.startsWith("U-")) {
                    Vector vector2 = new Vector();
                    if (nEWord.previous == null) {
                        vector2.addElement("*null*");
                    }
                    if (nEWord.previousIgnoreSentenceBoundary != null) {
                        String str2 = nEWord.previousIgnoreSentenceBoundary.form;
                        if (hasNoLetters(str2)) {
                            vector2.addElement(normalizeDigits(str2));
                        }
                    }
                    if (z) {
                        while (nEWord.next != null && (nEWord.next.neLabel.startsWith("I-") || nEWord.next.neLabel.startsWith("L-"))) {
                            nEWord = (NEWord) nEWord.next;
                        }
                    } else {
                        while (nEWord.next != null && (nEWord.next.neTypeLevel1.startsWith("I-") || nEWord.next.neTypeLevel1.startsWith("L-"))) {
                            nEWord = (NEWord) nEWord.next;
                        }
                    }
                    Vector vector3 = new Vector();
                    if (nEWord.next == null) {
                        vector3.addElement("*null*");
                    }
                    if (nEWord.nextIgnoreSentenceBoundary != null) {
                        String str3 = nEWord.nextIgnoreSentenceBoundary.form;
                        if (hasNoLetters(str3)) {
                            vector3.addElement(normalizeDigits(str3));
                        }
                    }
                    Vector<String> vector4 = new Vector<>();
                    String substring = str.substring(2, str.length());
                    if (z && Math.random() < 0.3d) {
                        substring = ParametersForLbjCode.patternLabelRandomGenerator.randomType();
                    }
                    if (!substring.equals("O")) {
                        for (int i3 = 0; i3 < vector2.size(); i3++) {
                            for (int i4 = 0; i4 < vector3.size(); i4++) {
                                vector4.addElement(((String) vector2.elementAt(i3)) + "\t" + substring + "\t" + ((String) vector3.elementAt(i4)));
                            }
                        }
                        hashtable.put((NEWord) vector.elementAt(i).get(i2), vector4);
                    }
                }
            }
        }
        return hashtable;
    }

    private static boolean hasNoLetters(String str) {
        for (int i = 0; i < str.length(); i++) {
            if (Character.isLetter(str.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    private static String normalizeDigits(String str) {
        int i = 0;
        for (int i2 = 0; i2 < str.length(); i2++) {
            char charAt = str.charAt(i2);
            if (!Character.isDigit(charAt) && charAt != '.' && charAt != ',') {
                i++;
            }
        }
        if (i > 1) {
            return str;
        }
        for (int i3 = 0; i3 < str.length(); i3++) {
            if (Character.isDigit(str.charAt(i3))) {
                return "*D*";
            }
        }
        return str;
    }

    public static void main(String[] strArr) {
        Parameters.readConfigAndLoadExternalData("Config/baselineFeatures.config");
        annotate(new Reuters2003Parser("Data/GoldData/Reuters/BIO.testb").readAndAnnotate(), true, true);
    }
}
