package LbjTagger;

import IO.InFile;
import StringStatisticsUtils.MyString;
import java.io.File;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;

/* loaded from: input_file:LbjTagger/Gazzetteers.class */
public class Gazzetteers {
    public static Vector<String> dictNames = new Vector<>();
    public static Vector<Hashtable<String, Boolean>> dictionaries = null;
    public static Vector<Hashtable<String, Boolean>> dictionariesIgnoreCase = null;
    public static Vector<Hashtable<String, Boolean>> dictionariesOneWordIgnorePunctuation = null;

    public static void init(String str) {
        dictNames = new Vector<>();
        dictionaries = null;
        dictionariesIgnoreCase = null;
        dictionariesOneWordIgnorePunctuation = null;
        System.out.println("loading dazzetteers....");
        Vector vector = new Vector();
        String[] list = new File(str).list();
        for (int i = 0; i < list.length; i++) {
            if (new File(str + "/" + list[i]).isFile()) {
                vector.addElement(str + "/" + list[i]);
                dictNames.addElement(list[i]);
            }
        }
        dictionaries = new Vector<>(vector.size());
        dictionariesIgnoreCase = new Vector<>(vector.size());
        dictionariesOneWordIgnorePunctuation = new Vector<>(vector.size());
        for (int i2 = 0; i2 < vector.size(); i2++) {
            System.out.println("\tloading gazzetteer:...." + ((String) vector.elementAt(i2)));
            dictionaries.addElement(new Hashtable<>());
            dictionariesIgnoreCase.addElement(new Hashtable<>());
            dictionariesOneWordIgnorePunctuation.addElement(new Hashtable<>());
            InFile inFile = new InFile((String) vector.elementAt(i2));
            String readLine = inFile.readLine();
            while (true) {
                String str2 = readLine;
                if (str2 != null) {
                    dictionaries.elementAt(i2).put(str2, true);
                    if (!str2.equalsIgnoreCase("in") && !str2.equalsIgnoreCase("on") && !str2.equalsIgnoreCase("us") && !str2.equalsIgnoreCase("or") && !str2.equalsIgnoreCase("am")) {
                        dictionariesIgnoreCase.elementAt(i2).put(str2.toLowerCase(), true);
                    }
                    StringTokenizer stringTokenizer = new StringTokenizer(str2, " ");
                    while (stringTokenizer.hasMoreTokens()) {
                        String cleanPunctuation = MyString.cleanPunctuation(stringTokenizer.nextToken());
                        if (cleanPunctuation.length() >= 5 && Character.isUpperCase(cleanPunctuation.charAt(0))) {
                            dictionariesOneWordIgnorePunctuation.elementAt(i2).put(cleanPunctuation, true);
                        }
                    }
                    readLine = inFile.readLine();
                }
            }
            inFile.close();
        }
        System.out.println("found " + dictionaries.size() + " gazetteers");
    }

    public static void annotate(NEWord nEWord) {
        nEWord.gazetteers = new Vector<>();
        for (int i = 0; i < dictionaries.size(); i++) {
            if (dictionariesOneWordIgnorePunctuation.elementAt(i).containsKey(MyString.cleanPunctuation(nEWord.form))) {
                nEWord.gazetteers.addElement("Part-" + dictNames.elementAt(i));
            }
        }
        NEWord nEWord2 = (NEWord) nEWord.next;
        String str = nEWord.form;
        boolean z = true;
        for (int i2 = 0; i2 < 5 && z; i2++) {
            z = false;
            for (int i3 = 0; i3 < dictionaries.size(); i3++) {
                if (dictionaries.elementAt(i3).containsKey(str)) {
                    NEWord nEWord3 = nEWord;
                    if (nEWord3.gazetteers == null) {
                        nEWord3.gazetteers = new Vector<>();
                    }
                    if (i2 == 0) {
                        nEWord3.gazetteers.addElement("U-" + dictNames.elementAt(i3));
                    } else {
                        int i4 = 0;
                        while (nEWord3 != nEWord2) {
                            if (nEWord3.gazetteers == null) {
                                nEWord3.gazetteers = new Vector<>();
                            }
                            if (i4 == 0) {
                                nEWord3.gazetteers.addElement("B-" + dictNames.elementAt(i3));
                                nEWord3.matchedMultiTokenGazEntries.addElement(str);
                                nEWord3.matchedMultiTokenGazEntryTypes.addElement("B-" + dictNames.elementAt(i3));
                            }
                            if (i4 > 0 && i4 < i2) {
                                nEWord3.gazetteers.addElement("I-" + dictNames.elementAt(i3));
                                nEWord3.matchedMultiTokenGazEntries.addElement(str);
                                nEWord3.matchedMultiTokenGazEntryTypes.addElement("I-" + dictNames.elementAt(i3));
                            }
                            if (i4 == i2) {
                                nEWord3.gazetteers.addElement("L-" + dictNames.elementAt(i3));
                                nEWord3.matchedMultiTokenGazEntries.addElement(str);
                                nEWord3.matchedMultiTokenGazEntryTypes.addElement("L-" + dictNames.elementAt(i3));
                            }
                            nEWord3 = (NEWord) nEWord3.next;
                            i4++;
                        }
                    }
                }
                if (dictionariesIgnoreCase.elementAt(i3).containsKey(str.toLowerCase())) {
                    NEWord nEWord4 = nEWord;
                    if (nEWord4.gazetteers == null) {
                        nEWord4.gazetteers = new Vector<>();
                    }
                    if (i2 == 0) {
                        nEWord4.gazetteers.addElement("U-" + dictNames.elementAt(i3) + "(IC)");
                    } else {
                        int i5 = 0;
                        while (nEWord4 != nEWord2) {
                            if (nEWord4.gazetteers == null) {
                                nEWord4.gazetteers = new Vector<>();
                            }
                            if (i5 == 0) {
                                nEWord4.gazetteers.addElement("B-" + dictNames.elementAt(i3) + "(IC)");
                                nEWord4.matchedMultiTokenGazEntriesIgnoreCase.addElement(str.toLowerCase());
                                nEWord4.matchedMultiTokenGazEntryTypesIgnoreCase.addElement("B-" + dictNames.elementAt(i3) + "(IC)");
                            }
                            if (i5 > 0 && i5 < i2) {
                                nEWord4.gazetteers.addElement("I-" + dictNames.elementAt(i3) + "(IC)");
                                nEWord4.matchedMultiTokenGazEntriesIgnoreCase.addElement(str.toLowerCase());
                                nEWord4.matchedMultiTokenGazEntryTypesIgnoreCase.addElement("I-" + dictNames.elementAt(i3) + "(IC)");
                            }
                            if (i5 == i2) {
                                nEWord4.gazetteers.addElement("L-" + dictNames.elementAt(i3) + "(IC)");
                                nEWord4.matchedMultiTokenGazEntriesIgnoreCase.addElement(str.toLowerCase());
                                nEWord4.matchedMultiTokenGazEntryTypesIgnoreCase.addElement("L-" + dictNames.elementAt(i3) + "(IC)");
                            }
                            nEWord4 = (NEWord) nEWord4.next;
                            i5++;
                        }
                    }
                }
            }
            if (nEWord2 != null) {
                str = str + " " + nEWord2.form;
                nEWord2 = (NEWord) nEWord2.next;
                z = true;
            }
        }
    }

    public static boolean hasPunctuation(String str) {
        return str.indexOf(46) > -1 || str.indexOf(44) > -1 || str.indexOf(58) > -1 || str.indexOf(59) > -1 || str.indexOf(45) > -1 || str.indexOf(47) > -1 || str.indexOf(63) > -1 || str.indexOf(33) > -1 || str.indexOf(92) > -1 || str.indexOf(34) > -1 || str.indexOf(96) > -1 || str.indexOf(39) > -1 || str.indexOf(91) > -1 || str.indexOf(93) > -1 || str.indexOf(123) > -1 || str.indexOf(125) > -1 || str.indexOf(40) > -1 || str.indexOf(41) > -1;
    }
}
