package edu.northwestern.at.morphadorner;

import edu.northwestern.at.morphadorner.tools.ExtendedAdornedWord;
import edu.northwestern.at.morphadorner.tools.ExtendedAdornedWordFilter;
import edu.northwestern.at.morphadorner.tools.FilterAdornedFile;
import edu.northwestern.at.morphadorner.xgtagger.XGParser;
import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.Formatters;
import edu.northwestern.at.utils.IsCloseableObject;
import edu.northwestern.at.utils.ListFactory;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.RomanNumeralUtils;
import edu.northwestern.at.utils.SingleTagTaggedStrings;
import edu.northwestern.at.utils.SortedArrayList;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.TaggedStrings;
import edu.northwestern.at.utils.TaggedStringsSet;
import edu.northwestern.at.utils.TextFile;
import edu.northwestern.at.utils.URLUtils;
import edu.northwestern.at.utils.UTF8Properties;
import edu.northwestern.at.utils.UnicodeReader;
import edu.northwestern.at.utils.corpuslinguistics.adornedword.AdornedWord;
import edu.northwestern.at.utils.corpuslinguistics.inputter.TextInputter;
import edu.northwestern.at.utils.corpuslinguistics.inputter.TextInputterFactory;
import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.Lemmatizer;
import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.LemmatizerFactory;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.LexiconFactory;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.UsesLexicon;
import edu.northwestern.at.utils.corpuslinguistics.namerecognizer.Names;
import edu.northwestern.at.utils.corpuslinguistics.namestandardizer.NameStandardizer;
import edu.northwestern.at.utils.corpuslinguistics.namestandardizer.NameStandardizerFactory;
import edu.northwestern.at.utils.corpuslinguistics.outputter.AdornedWordOutputter;
import edu.northwestern.at.utils.corpuslinguistics.outputter.AdornedWordOutputterFactory;
import edu.northwestern.at.utils.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.utils.corpuslinguistics.partsofspeech.PartOfSpeechTagsFactory;
import edu.northwestern.at.utils.corpuslinguistics.postagger.InvalidRuleException;
import edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechRetagger;
import edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechRetaggerFactory;
import edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTagger;
import edu.northwestern.at.utils.corpuslinguistics.postagger.PartOfSpeechTaggerFactory;
import edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesser;
import edu.northwestern.at.utils.corpuslinguistics.postagger.guesser.PartOfSpeechGuesserFactory;
import edu.northwestern.at.utils.corpuslinguistics.postagger.noopretagger.NoopRetagger;
import edu.northwestern.at.utils.corpuslinguistics.postagger.transitionmatrix.TransitionMatrix;
import edu.northwestern.at.utils.corpuslinguistics.sentencesplitter.SentenceSplitter;
import edu.northwestern.at.utils.corpuslinguistics.sentencesplitter.SentenceSplitterFactory;
import edu.northwestern.at.utils.corpuslinguistics.spellingmapper.SpellingMapper;
import edu.northwestern.at.utils.corpuslinguistics.spellingmapper.SpellingMapperFactory;
import edu.northwestern.at.utils.corpuslinguistics.spellingstandardizer.SpellingStandardizer;
import edu.northwestern.at.utils.corpuslinguistics.spellingstandardizer.SpellingStandardizerFactory;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.Abbreviations;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.PennTreebankTokenizer;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.WordTokenizer;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.WordTokenizerFactory;
import edu.northwestern.at.utils.logger.UsesLogger;
import edu.northwestern.at.utils.xml.DOMUtils;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import org.xml.sax.XMLFilter;
import org.xml.sax.helpers.XMLReaderFactory;

/* loaded from: input_file:edu/northwestern/at/morphadorner/MorphAdorner.class */
public class MorphAdorner {
    protected static PartOfSpeechTags partOfSpeechTags;
    protected static PartOfSpeechTagger tagger;
    protected static PartOfSpeechRetagger retagger;
    protected static Lexicon wordLexicon;
    protected static PartOfSpeechGuesser partOfSpeechGuesser;
    protected static Lexicon suffixLexicon;
    protected static TransitionMatrix transitionMatrix;
    protected static SpellingStandardizer spellingStandardizer;
    protected static SpellingMapper spellingMapper;
    protected static NameStandardizer nameStandardizer;
    protected static Lemmatizer lemmatizer;
    protected static SentenceSplitterFactory sentenceSplitterFactory;
    protected static AdornedWordOutputterFactory outputterFactory;
    protected static WordTokenizerFactory wordTokenizerFactory;
    protected static TextInputterFactory inputterFactory;
    protected static int defaultKWICWidth = 80;
    protected static String latinWordsFileName = "resources/latinwords.txt";
    protected static String extraWordsFileName = "resources/extrawords.txt";
    protected static TaggedStrings extraWords = null;
    protected static Pattern underlineCapCapPattern = Pattern.compile("^_([ABCDEFGHIJKLMNOPQRSTUVWXYZ])([ABCDEFGHIJKLMNOPQRSTUVWXYZ])");
    protected static final Matcher underlineCapCapMatcher = underlineCapCapPattern.matcher("");
    protected static WordTokenizer spellingTokenizer = new PennTreebankTokenizer();
    protected static Names names = new Names();
    protected static String tagSeparator = CharUtils.VERTICAL_BAR_STRING;
    protected static String lemmaSeparator = CharUtils.VERTICAL_BAR_STRING;
    protected static Runtime runTime = Runtime.getRuntime();

    protected static void logMemoryUsage(String str) {
        MorphAdornerLogger.println("Memory_used", new Object[]{str, Formatters.formatLongWithCommas(runTime.freeMemory()), Formatters.formatLongWithCommas(runTime.totalMemory())});
    }

    protected static Lexicon loadWordLexicon() throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        Lexicon newLexicon = new LexiconFactory().newLexicon();
        if (MorphAdornerSettings.wordLexiconURL != null) {
            newLexicon.loadLexicon(MorphAdornerSettings.wordLexiconURL, "utf-8");
        }
        MorphAdornerLogger.println("Loaded_word_lexicon", new Object[]{Formatters.formatIntegerWithCommas(newLexicon.getLexiconSize()), durationString(currentTimeMillis)});
        ((UsesLogger) newLexicon).setLogger(MorphAdornerLogger.getLogger());
        return newLexicon;
    }

    protected static Lexicon loadSuffixLexicon() throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        Lexicon newLexicon = new LexiconFactory().newLexicon();
        if (MorphAdornerSettings.suffixLexiconURL != null) {
            newLexicon.loadLexicon(MorphAdornerSettings.suffixLexiconURL, "utf-8");
        }
        MorphAdornerLogger.println("Loaded_suffix_lexicon", new Object[]{Formatters.formatIntegerWithCommas(newLexicon.getLexiconSize()), durationString(currentTimeMillis)});
        ((UsesLogger) newLexicon).setLogger(MorphAdornerLogger.getLogger());
        return newLexicon;
    }

    protected static TransitionMatrix loadTransitionMatrix(PartOfSpeechTagger partOfSpeechTagger) throws IOException {
        TransitionMatrix transitionMatrix2 = new TransitionMatrix();
        if (MorphAdornerSettings.transitionMatrixURL != null && partOfSpeechTagger.usesTransitionProbabilities()) {
            long currentTimeMillis = System.currentTimeMillis();
            transitionMatrix2.loadTransitionMatrix(MorphAdornerSettings.transitionMatrixURL, "utf-8", '\t');
            partOfSpeechTagger.setTransitionMatrix(transitionMatrix2);
            MorphAdornerLogger.println("Loaded_transition_matrix", new Object[]{durationString(currentTimeMillis)});
            transitionMatrix2.setLogger(MorphAdornerLogger.getLogger());
        }
        return transitionMatrix2;
    }

    protected static void loadTaggerRules(PartOfSpeechTagger partOfSpeechTagger) throws InvalidRuleException, IOException {
        if (MorphAdornerSettings.contextRulesURL != null && partOfSpeechTagger.usesContextRules()) {
            partOfSpeechTagger.setContextRules(new TextFile(MorphAdornerSettings.contextRulesURL, "utf-8").toArray());
        }
        if (MorphAdornerSettings.lexicalRulesURL == null || !partOfSpeechTagger.usesLexicalRules()) {
            return;
        }
        partOfSpeechTagger.setLexicalRules(new TextFile(MorphAdornerSettings.lexicalRulesURL, "utf-8").toArray());
    }

    protected static SpellingStandardizer createSpellingStandardizer(Lexicon lexicon) throws IOException {
        SpellingStandardizer newSpellingStandardizer = new SpellingStandardizerFactory().newSpellingStandardizer();
        if (newSpellingStandardizer != null) {
            long currentTimeMillis = System.currentTimeMillis();
            if (newSpellingStandardizer instanceof UsesLexicon) {
                ((UsesLexicon) newSpellingStandardizer).setLexicon(lexicon);
            }
            if (MorphAdornerSettings.spellingsURL != null) {
                newSpellingStandardizer.loadStandardSpellings(MorphAdornerSettings.spellingsURL, "utf-8");
                MorphAdornerLogger.println("Loaded_standard_spellings", new Object[]{Formatters.formatIntegerWithCommas(newSpellingStandardizer.getNumberOfStandardSpellings()), durationString(currentTimeMillis)});
            }
            newSpellingStandardizer.addStandardSpellings(names.getFirstNames());
            newSpellingStandardizer.addStandardSpellings(names.getSurnames());
            newSpellingStandardizer.addStandardSpellings(names.getPlaceNames().keySet());
            if (MorphAdornerSettings.alternateSpellingsURLs != null) {
                int i = 0;
                for (int i2 = 0; i2 < MorphAdornerSettings.alternateSpellingsURLs.length; i2++) {
                    long currentTimeMillis2 = System.currentTimeMillis();
                    newSpellingStandardizer.loadAlternativeSpellings(MorphAdornerSettings.alternateSpellingsURLs[i2], "utf-8", "\t");
                    MorphAdornerLogger.println("Loaded_alternate_spellings", new Object[]{Formatters.formatIntegerWithCommas(newSpellingStandardizer.getNumberOfAlternateSpellings() - i), durationString(currentTimeMillis2)});
                    i = newSpellingStandardizer.getNumberOfAlternateSpellings();
                }
            }
            if (MorphAdornerSettings.alternateSpellingsByWordClassURLs != null) {
                int[] iArr = {0, 0};
                for (int i3 = 0; i3 < MorphAdornerSettings.alternateSpellingsByWordClassURLs.length; i3++) {
                    long currentTimeMillis3 = System.currentTimeMillis();
                    newSpellingStandardizer.loadAlternativeSpellingsByWordClass(MorphAdornerSettings.alternateSpellingsByWordClassURLs[i3], "utf-8");
                    int[] numberOfAlternateSpellingsByWordClass = newSpellingStandardizer.getNumberOfAlternateSpellingsByWordClass();
                    MorphAdornerLogger.println("Loaded_alternate_spellings_by_word_class", new Object[]{Formatters.formatIntegerWithCommas(numberOfAlternateSpellingsByWordClass[1] - iArr[1]), Formatters.formatIntegerWithCommas(numberOfAlternateSpellingsByWordClass[0] - iArr[0]), durationString(currentTimeMillis3)});
                    iArr[0] = numberOfAlternateSpellingsByWordClass[0];
                    iArr[1] = numberOfAlternateSpellingsByWordClass[1];
                }
            }
            if (nameStandardizer instanceof UsesLogger) {
                ((UsesLogger) newSpellingStandardizer).setLogger(MorphAdornerLogger.getLogger());
            }
        }
        return newSpellingStandardizer;
    }

    protected static SpellingMapper createSpellingMapper() throws IOException {
        return new SpellingMapperFactory().newSpellingMapper();
    }

    protected static NameStandardizer createNameStandardizer(Lexicon lexicon) throws IOException {
        NameStandardizer newNameStandardizer = new NameStandardizerFactory().newNameStandardizer();
        if (newNameStandardizer != null) {
            if (lexicon != null) {
                long currentTimeMillis = System.currentTimeMillis();
                newNameStandardizer.loadNamesFromLexicon(lexicon);
                MorphAdornerLogger.println("Loaded_names", new Object[]{Formatters.formatIntegerWithCommas(newNameStandardizer.getNumberOfNames()), durationString(currentTimeMillis)});
            }
            if (newNameStandardizer instanceof UsesLogger) {
                ((UsesLogger) newNameStandardizer).setLogger(MorphAdornerLogger.getLogger());
            }
        }
        return newNameStandardizer;
    }

    protected static void initializeAdornment() {
        try {
            sentenceSplitterFactory = new SentenceSplitterFactory();
            outputterFactory = new AdornedWordOutputterFactory();
            wordTokenizerFactory = new WordTokenizerFactory();
            inputterFactory = new TextInputterFactory();
            partOfSpeechTags = new PartOfSpeechTagsFactory().newPartOfSpeechTags();
            tagSeparator = partOfSpeechTags.getTagSeparator();
            tagger = new PartOfSpeechTaggerFactory().newPartOfSpeechTagger();
            retagger = new PartOfSpeechRetaggerFactory().newPartOfSpeechRetagger();
            ((UsesLogger) tagger).setLogger(MorphAdornerLogger.getLogger());
            ((UsesLogger) retagger).setLogger(MorphAdornerLogger.getLogger());
            tagger.setRetagger(retagger);
            MorphAdornerLogger.println("Using", new Object[]{tagger.toString()});
            MorphAdornerLogger.println("Using", new Object[]{retagger.toString()});
            wordLexicon = loadWordLexicon();
            wordLexicon.setPartOfSpeechTags(partOfSpeechTags);
            partOfSpeechGuesser = new PartOfSpeechGuesserFactory().newPartOfSpeechGuesser();
            partOfSpeechGuesser.setCheckPossessives(MorphAdornerSettings.getBooleanProperty("partofspeechguesser.check_possessives", false));
            tagger.setPartOfSpeechGuesser(partOfSpeechGuesser);
            partOfSpeechGuesser.setWordLexicon(wordLexicon);
            ((UsesLogger) partOfSpeechGuesser).setLogger(MorphAdornerLogger.getLogger());
            suffixLexicon = loadSuffixLexicon();
            partOfSpeechGuesser.setSuffixLexicon(suffixLexicon);
            extraWords = getExtraWordsList(extraWordsFileName, partOfSpeechTags.getSingularProperNounTag(), "Loaded_extra_words");
            partOfSpeechGuesser.addAuxiliaryWordList(extraWords);
            partOfSpeechGuesser.addAuxiliaryWordList(new TaggedStringsSet(names.getPlaceNames().keySet(), partOfSpeechTags.getSingularProperNounTag()));
            partOfSpeechGuesser.addAuxiliaryWordList(new TaggedStringsSet(names.getFirstNames(), partOfSpeechTags.getSingularProperNounTag()));
            partOfSpeechGuesser.addAuxiliaryWordList(new TaggedStringsSet(names.getSurnames(), partOfSpeechTags.getSingularProperNounTag()));
            if (MorphAdornerSettings.useLatinWordList) {
                partOfSpeechGuesser.addAuxiliaryWordList(getWordList(latinWordsFileName, partOfSpeechTags.getForeignWordTag("latin"), "Loaded_latin_words"));
            }
            if (MorphAdornerSettings.abbreviationsURL.length() > 0) {
                loadAbbreviations(URLUtils.getURLFromFileNameOrURL(MorphAdornerSettings.abbreviationsURL).toString(), "Loaded_abbreviations");
            }
            tagger.setLexicon(wordLexicon);
            loadTaggerRules(tagger);
            transitionMatrix = loadTransitionMatrix(tagger);
            spellingStandardizer = createSpellingStandardizer(wordLexicon);
            spellingMapper = createSpellingMapper();
            nameStandardizer = createNameStandardizer(wordLexicon);
            if (spellingStandardizer != null) {
                partOfSpeechGuesser.setSpellingStandardizer(spellingStandardizer);
            }
            lemmatizer = new LemmatizerFactory().newLemmatizer();
            lemmaSeparator = lemmatizer.getLemmaSeparator();
            lemmatizer.setLexicon(wordLexicon);
            lemmatizer.setDictionary(spellingStandardizer.getStandardSpellings());
            ((UsesLogger) lemmatizer).setLogger(MorphAdornerLogger.getLogger());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    protected static void processInputFiles() {
        long currentTimeMillis = System.currentTimeMillis();
        switch (MorphAdornerSettings.fileNames.length) {
            case 0:
                MorphAdornerLogger.println("No_files_to_process");
                break;
            case 1:
                MorphAdornerLogger.println("One_file_to_process");
                break;
            default:
                MorphAdornerLogger.println("Number_of_files_to_process", new Object[]{Formatters.formatIntegerWithCommas(MorphAdornerSettings.fileNames.length)});
                break;
        }
        boolean booleanProperty = MorphAdornerSettings.getBooleanProperty("adorner.handle_xml", false);
        logMemoryUsage("Before processing input texts: ");
        for (int i = 0; i < MorphAdornerSettings.fileNames.length; i++) {
            String str = MorphAdornerSettings.fileNames[i];
            MorphAdornerLogger.println("Processing_file", new Object[]{str});
            if (booleanProperty) {
                try {
                    if (isAdorned(str, 500)) {
                        readorn(str);
                    } else {
                        adornXML(str);
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            } else {
                adornFile(str);
            }
        }
        if (MorphAdornerSettings.fileNames.length > 0) {
            MorphAdornerLogger.println("All_files_adorned", new Object[]{durationString(currentTimeMillis)});
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static void adornXML(String str) throws Exception {
        if (!MorphAdornerSettings.adornExistingXMLFiles && doesOutputFileNameExist(str)) {
            MorphAdornerLogger.println("Skipping_file_which_is_already_adorned", new Object[]{str});
            return;
        }
        TextInputter newTextInputter = inputterFactory.newTextInputter();
        newTextInputter.enableGapFixer(MorphAdornerSettings.fixGapTags);
        newTextInputter.enableOrigFixer(MorphAdornerSettings.fixOrigTags);
        newTextInputter.enableSplitWordsFixer(MorphAdornerSettings.fixSplitWords, MorphAdornerSettings.fixSplitWordsPatternReplacers);
        newTextInputter.loadText(URLUtils.getURLFromFileNameOrURL(str), "utf-8", MorphAdornerSettings.xmlSchema);
        int segmentCount = newTextInputter.getSegmentCount();
        String formatIntegerWithCommas = Formatters.formatIntegerWithCommas(segmentCount);
        MorphAdornerLogger.println("Input_file_split", new Object[]{str, formatIntegerWithCommas});
        int i = 0;
        Map<Integer, Integer> createNewMap = MapFactory.createNewMap();
        int i2 = 0;
        int i3 = 0;
        for (int i4 = 0; i4 < segmentCount; i4++) {
            String segmentName = newTextInputter.getSegmentName(i4);
            if (segmentName.startsWith("text") && !segmentName.equals("text")) {
                MorphAdornerLogger.println("Processing_segment", new Object[]{segmentName, Formatters.formatIntegerWithCommas(i4 + 1), formatIntegerWithCommas});
                Document textToDOM = XGParser.textToDOM(MorphAdornerSettings.xgOptions, newTextInputter.getSegmentText(segmentName));
                fixEmptySoftTags(textToDOM);
                fixSupTags(textToDOM);
                i3 += countPageBreaks(textToDOM);
                Object[] extractText = XGParser.extractText(MorphAdornerSettings.xgOptions, textToDOM);
                XGParser xGParser = (XGParser) extractText[1];
                xGParser.setRunningWordID(i);
                AdornedWordOutputter adornText = adornText((String) extractText[0], null);
                MorphAdornerLogger.println("Inserting_adornments_into_xml");
                long currentTimeMillis = System.currentTimeMillis();
                Map<Integer, Integer> mergeAdornments = XGParser.mergeAdornments(MorphAdornerSettings.xgOptions, (XGParser) extractText[1], textToDOM, segmentName, adornText, newTextInputter);
                Iterator<Integer> it = mergeAdornments.keySet().iterator();
                while (it.hasNext()) {
                    int intValue = it.next().intValue();
                    if (mergeAdornments.get(Integer.valueOf(intValue)).intValue() > 1) {
                        createNewMap.put(Integer.valueOf(intValue), mergeAdornments.get(Integer.valueOf(intValue)));
                    }
                }
                MorphAdornerLogger.println("Inserted_adornments_into_xml", new Object[]{durationString(currentTimeMillis)});
                i = xGParser.getRunningWordID();
                i2 += xGParser.getNumberOfAdornedWords();
                extractText[0] = null;
                extractText[1] = null;
            }
        }
        String outputFileName = getOutputFileName(str);
        long currentTimeMillis2 = System.currentTimeMillis();
        MorphAdornerLogger.println("Merging_adorned");
        File createTempFile = File.createTempFile("mad", null);
        createTempFile.deleteOnExit();
        String absolutePath = createTempFile.getAbsolutePath();
        mergeXML(newTextInputter, absolutePath);
        MorphAdornerLogger.println("Writing_merged", new Object[]{outputFileName});
        new MorphAdornerXMLWriterFactory().newMorphAdornerXMLWriter().writeXML(absolutePath, outputFileName, i, partOfSpeechTags, createNewMap, i2, i3);
        try {
            createTempFile.delete();
        } catch (Exception e) {
        }
        MorphAdornerLogger.println("Adorned_XML_written", new Object[]{outputFileName, durationString(currentTimeMillis2)});
        ((IsCloseableObject) newTextInputter).close();
        logMemoryUsage("After completing " + str + ": ");
    }

    public static String getOutputFileName(String str) throws IOException {
        String path = new File(MorphAdornerSettings.outputDirectoryName, FileNameUtils.stripPathName(str)).getPath();
        if (FileUtils.createPathForFile(path)) {
            return FileNameUtils.createVersionedFileName(path);
        }
        throw new IOException(MorphAdornerSettings.getString("Unable_to_create_output_directory"));
    }

    public static boolean doesOutputFileNameExist(String str) {
        return new File(MorphAdornerSettings.outputDirectoryName, FileNameUtils.stripPathName(str)).exists();
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static AdornedWordOutputter adornFile(String str) throws IOException {
        MorphAdornerLogger.println("Tagging", new Object[]{str});
        URL uRLFromFileNameOrURL = URLUtils.getURLFromFileNameOrURL(str);
        if (uRLFromFileNameOrURL == null) {
            MorphAdornerLogger.println("Bad_file_name_or_URL", new Object[]{str});
            return null;
        }
        long currentTimeMillis = System.currentTimeMillis();
        try {
            TextInputter newTextInputter = inputterFactory.newTextInputter();
            newTextInputter.enableGapFixer(MorphAdornerSettings.fixGapTags);
            newTextInputter.enableOrigFixer(MorphAdornerSettings.fixOrigTags);
            newTextInputter.loadText(uRLFromFileNameOrURL, "utf-8", MorphAdornerSettings.xmlSchema);
            String segmentText = newTextInputter.getSegmentText(0);
            ((IsCloseableObject) newTextInputter).close();
            MorphAdornerLogger.println("Loaded_text", new Object[]{str, durationString(currentTimeMillis)});
            return adornText(segmentText, uRLFromFileNameOrURL);
        } catch (Exception e) {
            MorphAdornerLogger.println("Unable_to_read_text", new Object[]{str});
            return null;
        }
    }

    public static AdornedWordOutputter adornText(String str, URL url) throws IOException {
        String str2;
        long currentTimeMillis = System.currentTimeMillis();
        SentenceSplitter newSentenceSplitter = sentenceSplitterFactory.newSentenceSplitter();
        ((UsesLogger) newSentenceSplitter).setLogger(MorphAdornerLogger.getLogger());
        newSentenceSplitter.setPartOfSpeechGuesser(partOfSpeechGuesser);
        List<List<String>> extractSentences = newSentenceSplitter.extractSentences(str, wordTokenizerFactory.newWordTokenizer());
        int[] wordAndSentenceCounts = getWordAndSentenceCounts(extractSentences);
        int i = wordAndSentenceCounts[1];
        MorphAdornerLogger.println("Extracted_words", new Object[]{Formatters.formatIntegerWithCommas(i), Formatters.formatIntegerWithCommas(wordAndSentenceCounts[0]), durationString(currentTimeMillis)});
        if (partOfSpeechGuesser != null) {
            partOfSpeechGuesser.setTryStandardSpellings(MorphAdornerSettings.tryStandardSpellings);
        }
        boolean z = MorphAdornerSettings.outputLemma && lemmatizer != null;
        boolean z2 = MorphAdornerSettings.outputStandardSpelling && spellingStandardizer != null;
        MorphAdornerSettings.setXMLWordAttributes(MorphAdornerSettings.outputOriginalToken || MorphAdornerSettings.useXMLHandler, z, z2);
        long currentTimeMillis2 = System.currentTimeMillis();
        List<List<AdornedWord>> tagSentences = tagger.tagSentences(extractSentences);
        MorphAdornerLogger.println("Tagging_complete", new Object[]{durationString(currentTimeMillis2), Formatters.formatIntegerWithCommas((int) ((i / (System.currentTimeMillis() - currentTimeMillis2)) * 1000.0d))});
        MorphAdornerLogger.println("Generating_other_adornments");
        long currentTimeMillis3 = System.currentTimeMillis();
        AdornedWordOutputter newAdornedWordOutputter = outputterFactory.newAdornedWordOutputter();
        newAdornedWordOutputter.setWordAttributeNames(MorphAdornerSettings.getXMLWordAttributes());
        if (url != null) {
            newAdornedWordOutputter.createOutputFile(getOutputFileName(URLUtils.getFileNameFromURL(url, MorphAdornerSettings.outputDirectoryName)), "utf-8", '\t');
        } else {
            File createTempFile = File.createTempFile("mad", null);
            createTempFile.deleteOnExit();
            newAdornedWordOutputter.createOutputFile(createTempFile.getAbsolutePath(), "utf-8", '\t');
        }
        int i2 = 0;
        int i3 = 0;
        String str3 = "";
        String trim = MorphAdornerSettings.xgOptions.getSurroundMarker().trim();
        String undeterminedTag = partOfSpeechTags.getUndeterminedTag();
        List<String> createNewList = ListFactory.createNewList();
        for (List<AdornedWord> list : tagSentences) {
            i2++;
            String str4 = i2 + "";
            int size = list.size() - 1;
            if (!MorphAdornerSettings.outputRunningWordNumbers) {
                i3 = 0;
            }
            int i4 = 0;
            while (i4 < list.size()) {
                createNewList.clear();
                if (MorphAdornerSettings.outputSentenceNumber) {
                    createNewList.add(str4);
                }
                i3++;
                if (MorphAdornerSettings.outputWordNumber) {
                    createNewList.add(i3 + "");
                }
                AdornedWord adornedWord = list.get(i4);
                String token = adornedWord.getToken();
                if (MorphAdornerSettings.outputOriginalToken) {
                    createNewList.add(token);
                }
                String spelling = adornedWord.getSpelling();
                String standardSpelling = adornedWord.getStandardSpelling();
                if (MorphAdornerSettings.outputSpelling) {
                    createNewList.add(spelling);
                }
                String partsOfSpeech = adornedWord.getPartsOfSpeech();
                if (z2) {
                    standardSpelling = getStandardizedSpelling(spellingStandardizer, nameStandardizer, spelling, standardSpelling, partsOfSpeech);
                    if (spellingMapper != null) {
                        standardSpelling = spellingMapper.mapSpelling(standardSpelling);
                    }
                }
                if (z) {
                    str3 = !MorphAdornerSettings.ignoreLexiconEntriesForLemmatization ? wordLexicon.getLemma(spelling, partsOfSpeech) : "*";
                    if (str3.equals("*") && lemmatizer != null) {
                        str3 = standardSpelling.length() > 0 ? getLemma(lemmatizer, standardSpelling, partsOfSpeech) : getLemma(lemmatizer, spelling, partsOfSpeech);
                    }
                    if (str3.indexOf(lemmaSeparator) < 0 && !partOfSpeechTags.isProperNounTag(partsOfSpeech)) {
                        str3 = str3.toLowerCase();
                    }
                }
                if (lemmatizer != null) {
                    if (partOfSpeechTags.countTags(partsOfSpeech) != lemmatizer.countLemmata(str3)) {
                        partsOfSpeech = undeterminedTag;
                    }
                    if (partsOfSpeech.equals(undeterminedTag) || str3.length() == 0) {
                        str3 = spelling.toLowerCase();
                        standardSpelling = spelling;
                        partsOfSpeech = undeterminedTag;
                    }
                }
                if (MorphAdornerSettings.outputPartOfSpeech) {
                    createNewList.add(partsOfSpeech);
                }
                if (z2) {
                    createNewList.add(standardSpelling);
                }
                if (z) {
                    createNewList.add(str3);
                }
                if (MorphAdornerSettings.outputEOSFlag) {
                    if (MorphAdornerSettings.useXMLHandler) {
                        str2 = "0";
                        if (i4 >= size) {
                            str2 = "1";
                        } else if (list.get(i4 + 1).getToken().equals(trim) && (token.endsWith(".") || token.endsWith("!") || token.endsWith("?") || token.endsWith("'") || token.endsWith("\"") || token.endsWith(CharUtils.RSQUOTE_STRING) || token.endsWith(CharUtils.RDQUOTE_STRING) || token.endsWith("}") || token.endsWith("]") || token.endsWith(")"))) {
                            str2 = "1";
                        }
                    } else {
                        str2 = i4 >= size ? "1" : "0";
                    }
                    createNewList.add(str2);
                }
                if (MorphAdornerSettings.outputKWIC) {
                    String[] kwic = getKWIC(list, i4, MorphAdornerSettings.outputKWICWidth);
                    createNewList.add(kwic[0]);
                    createNewList.add(kwic[2]);
                }
                newAdornedWordOutputter.outputWordAndAdornments(createNewList);
                i4++;
            }
        }
        newAdornedWordOutputter.close();
        if (url != null) {
            MorphAdornerLogger.println("Adornments_written_to", new Object[]{getOutputFileName(URLUtils.getFileNameFromURL(url, MorphAdornerSettings.outputDirectoryName)), durationString(currentTimeMillis3)});
        } else {
            MorphAdornerLogger.println("Adornments_generated", new Object[]{durationString(currentTimeMillis3)});
        }
        extractSentences.clear();
        tagSentences.clear();
        return newAdornedWordOutputter;
    }

    public static void readorn(String str) throws SAXException, IOException, FileNotFoundException {
        MorphAdornerLogger.println("Loading_previously_adorned");
        long currentTimeMillis = System.currentTimeMillis();
        ExtendedAdornedWordFilter extendedAdornedWordFilter = new ExtendedAdornedWordFilter(XMLReaderFactory.createXMLReader());
        StripWordAttributesFilter stripWordAttributesFilter = new StripWordAttributesFilter(extendedAdornedWordFilter);
        File createTempFile = File.createTempFile("mad", null);
        createTempFile.deleteOnExit();
        String absolutePath = createTempFile.getAbsolutePath();
        new FilterAdornedFile(str, absolutePath, stripWordAttributesFilter);
        List<List<ExtendedAdornedWord>> sentences = extendedAdornedWordFilter.getSentences();
        MorphAdornerLogger.println("Loaded_existing_words", new Object[]{Formatters.formatIntegerWithCommas(extendedAdornedWordFilter.getNumberOfWords()), Formatters.formatIntegerWithCommas(sentences.size()), durationString(currentTimeMillis)});
        PartOfSpeechRetagger partOfSpeechRetagger = null;
        if (retagger.canAddOrDeleteWords()) {
            partOfSpeechRetagger = retagger;
            tagger.setRetagger(new NoopRetagger());
            MorphAdornerLogger.println("Disabling_retagger", new Object[]{retagger.toString()});
        }
        long currentTimeMillis2 = System.currentTimeMillis();
        tagger.tagAdornedWordSentences(sentences);
        if (partOfSpeechRetagger != null) {
            tagger.setRetagger(partOfSpeechRetagger);
        }
        MorphAdornerLogger.println("Tagging_complete", new Object[]{durationString(currentTimeMillis2), Formatters.formatIntegerWithCommas((int) ((extendedAdornedWordFilter.getNumberOfWords() / (System.currentTimeMillis() - currentTimeMillis2)) * 1000.0d))});
        MorphAdornerLogger.println("Generating_other_adornments");
        long currentTimeMillis3 = System.currentTimeMillis();
        for (int i = 0; i < sentences.size(); i++) {
            List<ExtendedAdornedWord> list = sentences.get(i);
            for (int i2 = 0; i2 < list.size(); i2++) {
                ExtendedAdornedWord extendedAdornedWord = list.get(i2);
                extendedAdornedWord.getToken();
                String spelling = extendedAdornedWord.getSpelling();
                String standardSpelling = extendedAdornedWord.getStandardSpelling();
                String partsOfSpeech = extendedAdornedWord.getPartsOfSpeech();
                String standardizedSpelling = getStandardizedSpelling(spellingStandardizer, nameStandardizer, spelling, standardSpelling, partsOfSpeech);
                if (spellingMapper != null) {
                    standardizedSpelling = spellingMapper.mapSpelling(standardizedSpelling);
                }
                String lemma = !MorphAdornerSettings.ignoreLexiconEntriesForLemmatization ? wordLexicon.getLemma(spelling, partsOfSpeech) : "*";
                if (lemma.equals("*") && lemmatizer != null) {
                    lemma = standardizedSpelling.length() > 0 ? getLemma(lemmatizer, standardizedSpelling, partsOfSpeech) : getLemma(lemmatizer, spelling, partsOfSpeech);
                }
                if (lemma.indexOf(lemmaSeparator) < 0 && !partOfSpeechTags.isProperNounTag(partsOfSpeech)) {
                    lemma.toLowerCase();
                }
            }
        }
        MorphAdornerLogger.println("Adornments_generated", new Object[]{durationString(currentTimeMillis3)});
        XMLFilter addWordAttributesFilter = new AddWordAttributesFilter(XMLReaderFactory.createXMLReader(), extendedAdornedWordFilter);
        XMLFilter xMLFilter = addWordAttributesFilter;
        if (MorphAdornerSettings.outputPseudoPageBoundaryMilestones) {
            xMLFilter = new PseudoPageAdderFilter(addWordAttributesFilter, MorphAdornerSettings.pseudoPageSize, MorphAdornerSettings.pseudoPageContainerDivTypes);
        }
        String outputFileName = getOutputFileName(str);
        MorphAdornerLogger.println("Writing_merged", new Object[]{outputFileName});
        long currentTimeMillis4 = System.currentTimeMillis();
        new FilterAdornedFile(absolutePath, outputFileName, xMLFilter);
        MorphAdornerLogger.println("Adorned_XML_written", new Object[]{outputFileName, durationString(currentTimeMillis4)});
        try {
            createTempFile.delete();
        } catch (Exception e) {
        }
    }

    protected static boolean isAdorned(String str, int i) {
        String str2;
        boolean z = false;
        try {
            BufferedReader bufferedReader = new BufferedReader(new UnicodeReader(new FileInputStream(str), "utf-8"));
            int i2 = 0;
            String readLine = bufferedReader.readLine();
            while (true) {
                if (readLine == null || 0 != 0 || i2 >= i) {
                    break;
                }
                i2++;
                if (readLine.indexOf("<w ") >= 0) {
                    try {
                        String[] matchGroups = WordAttributePatterns.idReplacer.matchGroups(WordAttributePatterns.wReplacer.matchGroups(readLine)[2]);
                        if (matchGroups != null && (str2 = matchGroups[2]) != null && str2.length() > 0) {
                            z = true;
                            break;
                        }
                    } catch (Exception e) {
                    }
                }
                readLine = bufferedReader.readLine();
            }
            bufferedReader.close();
        } catch (Exception e2) {
        }
        return z;
    }

    public static void loadAbbreviations(String str, String str2) {
        long currentTimeMillis = System.currentTimeMillis();
        int abbreviationsCount = Abbreviations.getAbbreviationsCount();
        Abbreviations.loadAbbreviations(str);
        MorphAdornerLogger.println(str2, new Object[]{Formatters.formatIntegerWithCommas(Abbreviations.getAbbreviationsCount() - abbreviationsCount), durationString(currentTimeMillis)});
    }

    public static TaggedStrings getWordList(String str, String str2, String str3) {
        long currentTimeMillis = System.currentTimeMillis();
        SingleTagTaggedStrings singleTagTaggedStrings = new SingleTagTaggedStrings(new TextFile(MorphAdorner.class.getResourceAsStream(str), "utf-8").toArray(), str2);
        MorphAdornerLogger.println(str3, new Object[]{Formatters.formatIntegerWithCommas(singleTagTaggedStrings.getStringCount()), durationString(currentTimeMillis)});
        return singleTagTaggedStrings;
    }

    public static TaggedStrings getExtraWordsList(String str, String str2, String str3) {
        long currentTimeMillis = System.currentTimeMillis();
        UTF8Properties uTF8Properties = null;
        try {
            uTF8Properties = new UTF8Properties();
            uTF8Properties.load(MorphAdorner.class.getResourceAsStream(str), str2);
        } catch (Exception e) {
        }
        if (uTF8Properties.size() > 0) {
            MorphAdornerLogger.println(str3, new Object[]{Formatters.formatIntegerWithCommas(uTF8Properties.getStringCount()), durationString(currentTimeMillis)});
        }
        return uTF8Properties;
    }

    public static String[] getKWIC(List<AdornedWord> list, int i, int i2) {
        String[] strArr = new String[3];
        StringBuffer stringBuffer = new StringBuffer();
        AdornedWord adornedWord = list.get(i);
        int i3 = 0;
        int length = ((i2 - 4) - adornedWord.getToken().length()) / 2;
        for (int i4 = i - 1; i3 < length && i4 >= 0; i4--) {
            AdornedWord adornedWord2 = list.get(i4);
            if (stringBuffer.length() > 0) {
                stringBuffer.insert(0, " ");
            }
            stringBuffer.insert(0, adornedWord2.getToken());
            i3 += adornedWord2.getToken().length() + 1;
        }
        strArr[0] = stringBuffer.toString();
        strArr[1] = adornedWord.getToken();
        stringBuffer.setLength(0);
        int size = list.size();
        for (int i5 = i + 1; stringBuffer.length() < length && i5 < size; i5++) {
            stringBuffer.append(list.get(i5).getToken());
            stringBuffer.append(" ");
        }
        strArr[2] = stringBuffer.toString();
        return strArr;
    }

    protected static int getWordCount(List list) {
        int i = 0;
        for (int i2 = 0; i2 < list.size(); i2++) {
            i += ((List) list.get(i2)).size();
        }
        return i;
    }

    protected static int[] getWordAndSentenceCounts(List list) {
        int[] iArr = {0, 0};
        for (int i = 0; i < list.size(); i++) {
            List list2 = (List) list.get(i);
            int size = list2.size();
            boolean z = false;
            while (!z && ((String) list2.get(size - 1)).equals(CharUtils.CHAR_END_OF_TEXT_SECTION_STRING)) {
                size--;
                z = size < 1;
            }
            if (size > 0) {
                iArr[0] = iArr[0] + 1;
            }
            iArr[1] = iArr[1] + size;
        }
        return iArr;
    }

    public static String durationString(long j) {
        StringBuffer stringBuffer = new StringBuffer();
        long currentTimeMillis = ((System.currentTimeMillis() - j) + 999) / 1000;
        String formatLongWithCommas = Formatters.formatLongWithCommas(currentTimeMillis);
        if (currentTimeMillis < 1) {
            formatLongWithCommas = "< 1";
        }
        String str = currentTimeMillis > 1 ? "seconds" : "second";
        stringBuffer.append(formatLongWithCommas);
        stringBuffer.append(" ");
        stringBuffer.append(MorphAdornerSettings.getString(str));
        stringBuffer.append(".");
        return stringBuffer.toString();
    }

    /* JADX WARN: Multi-variable type inference failed */
    protected static void mergeXML(TextInputter textInputter, String str) {
        try {
            FileOutputStream fileOutputStream = new FileOutputStream(new File(str), false);
            BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
            OutputStreamWriter outputStreamWriter = new OutputStreamWriter(bufferedOutputStream, "utf-8");
            SortedArrayList sortedArrayList = new SortedArrayList();
            int segmentCount = textInputter.getSegmentCount();
            for (int i = 0; i < segmentCount; i++) {
                sortedArrayList.add(textInputter.getSegmentName(i));
            }
            String str2 = "";
            for (int i2 = 0; i2 < sortedArrayList.size(); i2++) {
                String str3 = ((String) sortedArrayList.get(i2)).toString();
                String segmentText = textInputter.getSegmentText(str3);
                if (str3.equals("head")) {
                    int indexOfIgnoreCase = StringUtils.indexOfIgnoreCase(segmentText, "</eebo");
                    if (indexOfIgnoreCase < 0) {
                        indexOfIgnoreCase = segmentText.indexOf("</TEI");
                    }
                    if (indexOfIgnoreCase >= 0) {
                        str2 = segmentText.substring(indexOfIgnoreCase);
                        segmentText = segmentText.substring(0, indexOfIgnoreCase);
                    }
                } else if (str3.equals("text")) {
                    segmentText = StringUtils.replaceAll(segmentText.trim(), "/>", ">");
                    str2 = segmentText.startsWith("<group") ? "</group>" + str2 : segmentText.startsWith("<GROUP") ? "</GROUP>" + str2 : segmentText.startsWith("<text") ? "</text>" + str2 : "</TEXT>" + str2;
                    if (segmentText.endsWith("</text>") || segmentText.endsWith("</TEXT>")) {
                        segmentText = segmentText.substring(0, segmentText.length() - 7);
                    }
                }
                while (segmentText.endsWith(" >")) {
                    segmentText = segmentText.substring(0, segmentText.length() - 2) + ">";
                }
                outputStreamWriter.write(segmentText, 0, segmentText.length());
            }
            String replaceAll = StringUtils.replaceAll(str2, " >", ">");
            outputStreamWriter.write(replaceAll, 0, replaceAll.length());
            outputStreamWriter.close();
            bufferedOutputStream.close();
            fileOutputStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        System.gc();
    }

    public static String fixSpelling(String str) {
        String str2 = str;
        if (!str2.equals(CharUtils.VERTICAL_BAR_STRING)) {
            str2 = StringUtils.replaceAll(str2, CharUtils.VERTICAL_BAR_STRING, "");
        }
        if (!str2.equals("{")) {
            str2 = StringUtils.replaceAll(str2, "{", "");
        }
        if (!str2.equals("}")) {
            str2 = StringUtils.replaceAll(str2, "}", "");
        }
        if (!str2.equals("+")) {
            str2 = StringUtils.replaceAll(str2, "+", "");
        }
        if (str2.length() > 1 && str2.charAt(0) == '_') {
            underlineCapCapMatcher.reset(str2);
            if (underlineCapCapMatcher.find()) {
                str2 = (str2.charAt(1) + "") + (Character.toLowerCase(str2.charAt(2)) + "") + (str2.length() > 3 ? str2.substring(3) : "");
            }
        }
        return str2;
    }

    protected static String getLemma(Lemmatizer lemmatizer2, String str, String str2) {
        String str3 = str;
        String lemmaWordClass = partOfSpeechTags.getLemmaWordClass(str2);
        if (!lemmatizer2.cantLemmatize(str) && !lemmaWordClass.equals("none")) {
            boolean isCompoundTag = partOfSpeechTags.isCompoundTag(str2);
            if (isCompoundTag) {
                String lemmatize = lemmatizer2.lemmatize(str, "compound");
                if (lemmatizer2.isCompoundLemma(lemmatize)) {
                    return lemmatize;
                }
            }
            List<String> extractWords = spellingTokenizer.extractWords(str);
            if (isCompoundTag && extractWords.size() != 1) {
                str3 = "";
                String[] splitTag = partOfSpeechTags.splitTag(str2);
                if (splitTag.length == extractWords.size()) {
                    for (int i = 0; i < extractWords.size(); i++) {
                        String str4 = extractWords.get(i);
                        if (i > 0) {
                            str3 = str3 + lemmaSeparator;
                        }
                        str3 = str3 + lemmatizer2.lemmatize(str4, partOfSpeechTags.getLemmaWordClass(splitTag[i]));
                    }
                }
            } else if (lemmaWordClass.length() == 0) {
                str3 = lemmatizer2.lemmatize(str, "compound");
                if (str3.equals(str)) {
                    str3 = lemmatizer2.lemmatize(str);
                }
            } else {
                str3 = lemmatizer2.lemmatize(str, lemmaWordClass);
            }
        } else if (partOfSpeechTags.isNumberTag(str2) && RomanNumeralUtils.isLooseRomanNumeral(str3)) {
            if (str3.charAt(0) == '.') {
                str3 = str3.substring(1);
            }
            if (str3.charAt(str3.length() - 1) == '.') {
                str3 = str3.substring(0, str3.length() - 1);
            }
        }
        return str3;
    }

    protected static String getStandardizedSpelling(SpellingStandardizer spellingStandardizer2, NameStandardizer nameStandardizer2, String str, String str2, String str3) {
        String str4 = str;
        if (partOfSpeechTags.isProperNounTag(str3)) {
            str4 = nameStandardizer2.standardizeProperName(str);
        } else if ((!partOfSpeechTags.isNounTag(str3) || !CharUtils.hasInternalCaps(str)) && !partOfSpeechTags.isForeignWordTag(str3)) {
            if (!partOfSpeechTags.isNumberTag(str3)) {
                str4 = spellingStandardizer2.standardizeSpelling(str, partOfSpeechTags.getMajorWordClass(str3));
                if (str4.equalsIgnoreCase(str)) {
                    str4 = str;
                }
            } else if (RomanNumeralUtils.isLooseRomanNumeral(str4)) {
                if (str4.charAt(0) == '.') {
                    str4 = str4.substring(1);
                }
                if (str4.charAt(str4.length() - 1) == '.') {
                    str4 = str4.substring(0, str4.length() - 1);
                }
            }
        }
        return str4;
    }

    public static void fixEmptySoftTags(Document document) {
        for (Node node : DOMUtils.getDescendants(document)) {
            if (MorphAdornerSettings.xgOptions.isSoftTag(node.getNodeName()) && DOMUtils.getText(node).length() == 0) {
                DOMUtils.setText(node, " ");
            }
        }
    }

    public static void fixSupTags(Document document) {
        Node previousSibling;
        for (Node node : DOMUtils.getDescendants(document, "sup")) {
            String text = DOMUtils.getText(node);
            if (!text.startsWith("^") && (previousSibling = node.getPreviousSibling()) != null) {
                String textContent = previousSibling.getTextContent();
                if (textContent.endsWith(" y")) {
                    String lowerCase = text.toLowerCase();
                    if (lowerCase.equals("e") || lowerCase.equals("t") || lowerCase.equals("c") || lowerCase.equals("en") || lowerCase.equals("ere") || lowerCase.equals("f") || lowerCase.equals("i") || lowerCase.equals("m") || lowerCase.equals("n") || lowerCase.equals("o") || lowerCase.equals("u")) {
                        DOMUtils.setText(node, CharUtils.CHAR_SUP_TEXT_MARKER_STRING + text);
                    }
                } else if (textContent.endsWith(" w")) {
                    String lowerCase2 = text.toLowerCase();
                    if (lowerCase2.equals("ch") || lowerCase2.equals("t")) {
                        DOMUtils.setText(node, CharUtils.CHAR_SUP_TEXT_MARKER_STRING + text);
                    }
                }
            }
        }
    }

    public static int countPageBreaks(Document document) {
        return DOMUtils.getDescendants(document, "pb").size();
    }

    public static void main(String[] strArr) {
        if (!MorphAdornerSettings.initialized) {
            MorphAdornerSettings.initializeSettings(strArr);
            try {
                MorphAdornerLogger.initialize("morphadornerlog.config", "/morphadorner/log");
            } catch (Exception e) {
                e.printStackTrace();
            }
            try {
                MorphAdornerSettings.getSettings(strArr);
            } catch (Exception e2) {
                e2.printStackTrace();
            }
            MorphAdornerLogger.println("programBanner");
            MorphAdornerLogger.println("Initializing_please_wait");
            initializeAdornment();
            MorphAdornerSettings.initialized = true;
        }
        if (MorphAdornerSettings.fileNames.length > 0) {
            processInputFiles();
        } else {
            MorphAdornerLogger.println("No_files_found_to_process");
        }
        MorphAdornerLogger.terminate();
    }

    protected MorphAdorner() {
    }
}
