package edu.northwestern.at.morphadorner.examples;

import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.corpuslinguistics.adornedword.AdornedWord;
import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.DefaultLemmatizer;
import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.Lemmatizer;
import edu.northwestern.at.utils.corpuslinguistics.lexicon.Lexicon;
import edu.northwestern.at.utils.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.utils.corpuslinguistics.postagger.DefaultPartOfSpeechTagger;
import edu.northwestern.at.utils.corpuslinguistics.sentencesplitter.DefaultSentenceSplitter;
import edu.northwestern.at.utils.corpuslinguistics.spellingstandardizer.DefaultSpellingStandardizer;
import edu.northwestern.at.utils.corpuslinguistics.spellingstandardizer.SpellingStandardizer;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.DefaultWordTokenizer;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.PennTreebankTokenizer;
import edu.northwestern.at.utils.corpuslinguistics.tokenizer.WordTokenizer;
import java.util.List;

/* loaded from: input_file:edu/northwestern/at/morphadorner/examples/AdornAString.class */
public class AdornAString {
    public static String lemmaSeparator = CharUtils.VERTICAL_BAR_STRING;

    public static void main(String[] strArr) {
        try {
            adornText(strArr);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void adornText(String[] strArr) throws Exception {
        if (strArr.length < 1) {
            System.out.println("No text to adorn.");
            System.exit(1);
        }
        String str = strArr[0];
        DefaultPartOfSpeechTagger defaultPartOfSpeechTagger = new DefaultPartOfSpeechTagger();
        Lexicon lexicon = defaultPartOfSpeechTagger.getLexicon();
        PartOfSpeechTags partOfSpeechTags = lexicon.getPartOfSpeechTags();
        DefaultWordTokenizer defaultWordTokenizer = new DefaultWordTokenizer();
        PennTreebankTokenizer pennTreebankTokenizer = new PennTreebankTokenizer();
        DefaultSentenceSplitter defaultSentenceSplitter = new DefaultSentenceSplitter();
        defaultSentenceSplitter.setPartOfSpeechGuesser(defaultPartOfSpeechTagger.getPartOfSpeechGuesser());
        DefaultLemmatizer defaultLemmatizer = new DefaultLemmatizer();
        DefaultSpellingStandardizer defaultSpellingStandardizer = new DefaultSpellingStandardizer();
        List<List<String>> extractSentences = defaultSentenceSplitter.extractSentences(str, defaultWordTokenizer);
        List<List<AdornedWord>> tagSentences = defaultPartOfSpeechTagger.tagSentences(extractSentences);
        for (int i = 0; i < extractSentences.size(); i++) {
            List<AdornedWord> list = tagSentences.get(i);
            System.out.println(StringUtils.dupl("-", 30) + " " + (i + 1) + " " + StringUtils.dupl("-", 30));
            for (int i2 = 0; i2 < list.size(); i2++) {
                AdornedWord adornedWord = list.get(i2);
                setStandardSpelling(adornedWord, defaultSpellingStandardizer, partOfSpeechTags);
                setLemma(adornedWord, lexicon, defaultLemmatizer, partOfSpeechTags, pennTreebankTokenizer);
                System.out.println(StringUtils.rpad((i2 + 1) + "", 3) + ": " + StringUtils.rpad(adornedWord.getSpelling(), 20) + StringUtils.rpad(adornedWord.getPartsOfSpeech(), 8) + StringUtils.rpad(adornedWord.getStandardSpelling(), 20) + adornedWord.getLemmata());
            }
        }
    }

    public static void setStandardSpelling(AdornedWord adornedWord, SpellingStandardizer spellingStandardizer, PartOfSpeechTags partOfSpeechTags) {
        String spelling = adornedWord.getSpelling();
        String str = spelling;
        String partsOfSpeech = adornedWord.getPartsOfSpeech();
        if (!partOfSpeechTags.isProperNounTag(partsOfSpeech) && ((!partOfSpeechTags.isNounTag(partsOfSpeech) || !CharUtils.hasInternalCaps(spelling)) && !partOfSpeechTags.isForeignWordTag(partsOfSpeech) && !partOfSpeechTags.isNumberTag(partsOfSpeech))) {
            str = spellingStandardizer.standardizeSpelling(adornedWord.getSpelling(), partOfSpeechTags.getMajorWordClass(adornedWord.getPartsOfSpeech()));
            if (str.equalsIgnoreCase(spelling)) {
                str = spelling;
            }
        }
        adornedWord.setStandardSpelling(str);
    }

    public static void setLemma(AdornedWord adornedWord, Lexicon lexicon, Lemmatizer lemmatizer, PartOfSpeechTags partOfSpeechTags, WordTokenizer wordTokenizer) {
        String spelling = adornedWord.getSpelling();
        String partsOfSpeech = adornedWord.getPartsOfSpeech();
        String str = spelling;
        String lemmaWordClass = partOfSpeechTags.getLemmaWordClass(partsOfSpeech);
        if (!lemmatizer.cantLemmatize(spelling) && !lemmaWordClass.equals("none")) {
            str = lemmatizer.lemmatize(spelling, "compound");
            if (str.equals(spelling)) {
                List<String> extractWords = wordTokenizer.extractWords(spelling);
                if (!partOfSpeechTags.isCompoundTag(partsOfSpeech) || extractWords.size() == 1) {
                    str = lemmaWordClass.length() == 0 ? lemmatizer.lemmatize(spelling) : lemmatizer.lemmatize(spelling, lemmaWordClass);
                } else {
                    str = "";
                    String[] splitTag = partOfSpeechTags.splitTag(partsOfSpeech);
                    if (splitTag.length == extractWords.size()) {
                        for (int i = 0; i < extractWords.size(); i++) {
                            String str2 = extractWords.get(i);
                            if (i > 0) {
                                str = str + lemmaSeparator;
                            }
                            str = str + lemmatizer.lemmatize(str2, partOfSpeechTags.getLemmaWordClass(splitTag[i]));
                        }
                    }
                }
            }
        }
        adornedWord.setLemmata(str);
    }
}
