package edu.stanford.nlp.tagger.maxent;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.PrintFile;
import edu.stanford.nlp.ling.CoreAnnotations$AnswerAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.SentenceProcessor;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.objectbank.ReaderIteratorFactory;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.ListProcessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TransformXML;
import edu.stanford.nlp.process.WhitespaceTokenizer;
import edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:edu/stanford/nlp/tagger/maxent/MaxentTagger.class */
public class MaxentTagger implements Function<Sentence<? extends HasWord>, Sentence<TaggedWord>>, SentenceProcessor, ListProcessor<Sentence<? extends HasWord>, Sentence<TaggedWord>> {
    private static boolean isInitialized;
    public static final String DEFAULT_NLP_GROUP_MODEL_PATH = "/u/nlp/data/pos-tagger/wsj3t0-18-left3words/left3words-wsj-0-18.tagger";
    public static final String DEFAULT_DISTRIBUTION_PATH = "models/left3words-wsj-0-18.tagger";
    private static Writer outStream;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/tagger/maxent/MaxentTagger$TaggerWrapper.class */
    public static class TaggerWrapper implements Function<String, Object> {
        private final TaggerConfig config;
        private TokenizerFactory tokenizerFactory;
        private int sentNum = 0;

        public TaggerWrapper(TaggerConfig taggerConfig) {
            this.config = taggerConfig;
            try {
                if (taggerConfig.getTokenizerFactory().trim().length() != 0) {
                    this.tokenizerFactory = (TokenizerFactory) Class.forName(taggerConfig.getTokenizerFactory()).newInstance();
                } else if (taggerConfig.getTokenize()) {
                    this.tokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newPTBTokenizerFactory();
                }
            } catch (Exception e) {
                System.err.println("Error in tokenizer factory instantiation for class: " + taggerConfig.getTokenizerFactory());
                e.printStackTrace();
                this.tokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newPTBTokenizerFactory();
            }
        }

        @Override // edu.stanford.nlp.util.Function
        public String apply(String str) {
            StringBuilder sb = new StringBuilder();
            StringBuilder sb2 = new StringBuilder();
            if (this.config.getTokenize()) {
                Iterator<Sentence<? extends HasWord>> it = MaxentTagger.tokenizeText(new StringReader(str), this.tokenizerFactory).iterator();
                while (it.hasNext()) {
                    Sentence<TaggedWord> tagSentence = MaxentTagger.tagSentence(it.next());
                    if (MaxentTagger.outStream != null) {
                        MaxentTagger.writeXMLSentence(tagSentence, this.sentNum);
                        int i = this.sentNum;
                        this.sentNum = i + 1;
                        sb2.append(MaxentTagger.getXMLWords(tagSentence, i)).append('\n');
                    }
                    sb.append(tagSentence.toString(false)).append(' ');
                }
            } else {
                try {
                    TestSentence testSentence = new TestSentence(GlobalHolder.getLambdaSolve(), str);
                    if (MaxentTagger.outStream != null) {
                        MaxentTagger.writeXMLSentence(testSentence.getTaggedSentence(), this.sentNum);
                        Sentence<TaggedWord> taggedSentence = testSentence.getTaggedSentence();
                        int i2 = this.sentNum;
                        this.sentNum = i2 + 1;
                        sb2.append(MaxentTagger.getXMLWords(taggedSentence, i2)).append('\n');
                    }
                    sb.append(testSentence.getTaggedNice()).append(' ');
                } catch (Exception e) {
                    System.err.println("Error tagging string: " + str);
                    e.printStackTrace();
                }
            }
            return MaxentTagger.outStream != null ? sb2.toString() : sb.toString();
        }
    }

    public MaxentTagger() {
    }

    public MaxentTagger(String str) throws Exception {
        init(str);
    }

    public static void init(String str) throws Exception {
        init(str, null);
    }

    public static void init(String str, TaggerConfig taggerConfig) throws Exception {
        if (isInitialized) {
            return;
        }
        GlobalHolder.readModelAndInit(taggerConfig, str);
        CollectionTaggerOutputs.baseToken -= GlobalHolder.pairs.getSize();
        GlobalHolder.tFeature.init();
        isInitialized = true;
    }

    public static synchronized String tagString(String str) throws Exception {
        if (!isInitialized) {
            init(DEFAULT_NLP_GROUP_MODEL_PATH);
        }
        if (!isInitialized) {
            return null;
        }
        try {
            return new TestSentence(GlobalHolder.getLambdaSolve(), str).getTaggedNice();
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public static synchronized Sentence<TaggedWord> tagStringTokenized(String str) throws Exception {
        if (!isInitialized) {
            init(DEFAULT_NLP_GROUP_MODEL_PATH);
        }
        if (!isInitialized) {
            return null;
        }
        try {
            return new TestSentence(GlobalHolder.getLambdaSolve(), str).getTaggedSentence();
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    @Override // edu.stanford.nlp.util.Function
    public synchronized Sentence<TaggedWord> apply(Sentence<? extends HasWord> sentence) {
        if (!isInitialized) {
            try {
                init(DEFAULT_NLP_GROUP_MODEL_PATH);
            } catch (Exception e) {
                e.printStackTrace();
                System.exit(-1);
            }
        }
        if (!isInitialized) {
            return null;
        }
        try {
            return new TestSentence().tagSentence(GlobalHolder.getLambdaSolve(), sentence);
        } catch (Exception e2) {
            e2.printStackTrace();
            System.exit(-1);
            return null;
        }
    }

    @Override // edu.stanford.nlp.process.ListProcessor
    public List<Sentence<TaggedWord>> process(List<Sentence<? extends HasWord>> list) {
        ArrayList arrayList = new ArrayList();
        TestSentence testSentence = new TestSentence();
        Iterator<Sentence<? extends HasWord>> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(testSentence.tagSentence(GlobalHolder.getLambdaSolve(), it.next()));
        }
        return arrayList;
    }

    @Override // edu.stanford.nlp.ling.SentenceProcessor
    public Sentence<TaggedWord> processSentence(Sentence sentence) {
        return tagSentence(sentence);
    }

    public static Sentence<TaggedWord> tagSentence(Sentence<? extends HasWord> sentence) {
        return new TestSentence().tagSentence(GlobalHolder.getLambdaSolve(), sentence);
    }

    public static List<Sentence<? extends HasWord>> tokenizeText(Reader reader) {
        return tokenizeText(reader, null);
    }

    public static List<Sentence<? extends HasWord>> tokenizeText(Reader reader, TokenizerFactory tokenizerFactory) {
        List<List<? extends HasWord>> sentencesFromText = (tokenizerFactory == null ? new DocumentPreprocessor() : new DocumentPreprocessor(tokenizerFactory)).getSentencesFromText(reader);
        ArrayList arrayList = new ArrayList(sentencesFromText.size());
        Iterator<List<? extends HasWord>> it = sentencesFromText.iterator();
        while (it.hasNext()) {
            arrayList.add(new Sentence(it.next()));
        }
        return arrayList;
    }

    private static void convertToSingleFileFormat(TaggerConfig taggerConfig) {
        try {
            taggerConfig.dump();
            GlobalHolder.convertMultifileTagger(taggerConfig.getModel() + ".holder", taggerConfig.getFile(), taggerConfig);
        } catch (Exception e) {
            System.err.println("An error occurred while converting to the new tagger format.");
            e.printStackTrace();
        }
    }

    private static void dumpModel(TaggerConfig taggerConfig) {
        try {
            GlobalHolder.readModelAndInit(taggerConfig, taggerConfig.getFile());
            GlobalHolder.dumpModel();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void runTest(TaggerConfig taggerConfig) {
        if (taggerConfig.getVerbose()) {
            System.err.println("## tagger testing invoked at " + new Date() + " with arguments:");
            taggerConfig.dump();
        }
        if (taggerConfig.getDebug()) {
            TestClassifier.writeUnknDict = true;
            TestClassifier.writeWords = true;
            TestClassifier.writeTopWords = true;
        }
        try {
            TestClassifier.testModel(taggerConfig);
        } catch (Exception e) {
            System.err.println("An error occured while testing the tagger.");
            e.printStackTrace();
        }
    }

    private static void runTraining(TaggerConfig taggerConfig) {
        Date date = new Date();
        System.err.println("## tagger training invoked at " + date + " with arguments:");
        taggerConfig.dump();
        Timing timing = new Timing();
        try {
            PrintFile printFile = new PrintFile(taggerConfig.getModel() + ".props");
            printFile.println("## tagger training invoked at " + date + " with arguments:");
            taggerConfig.dump(printFile);
            printFile.close();
            TestClassifier.trainAndSaveModel(taggerConfig);
            timing.done("Training POS tagger");
        } catch (Exception e) {
            System.err.println("An error occurred while training a new tagger.");
            e.printStackTrace();
        }
    }

    private static void closeOutStream() {
        try {
            outStream.flush();
            outStream.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Multi-variable type inference failed */
    public static String getXMLWords(Sentence<TaggedWord> sentence, int i) {
        StringBuilder sb = new StringBuilder();
        sb.append("<sentence id=\"").append(i).append("\">\n");
        int size = sentence.size();
        for (int i2 = 0; i2 < size; i2++) {
            sb.append("\t<word wid=\"").append(i2).append("\" pos=\"").append(((TaggedWord) sentence.get(i2)).tag()).append("\">").append(((TaggedWord) sentence.get(i2)).word()).append("</word>\n");
        }
        sb.append("</sentence>");
        return sb.toString();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void writeXMLSentence(Sentence<TaggedWord> sentence, int i) {
        try {
            outStream.write(getXMLWords(sentence, i));
        } catch (Exception e) {
            System.err.println("Error writing sentence " + i + ": " + sentence.toString(false));
            e.printStackTrace();
        }
    }

    public static void tagFromXML(TaggerConfig taggerConfig) {
        TransformXML transformXML = new TransformXML();
        try {
            FileInputStream fileInputStream = new FileInputStream(taggerConfig.getFile());
            transformXML.transformXML(taggerConfig.getXMLInput(), new TaggerWrapper(taggerConfig), fileInputStream, System.out, new TaggerSaxInterface());
            fileInputStream.close();
        } catch (FileNotFoundException e) {
            System.err.println("Input file not found: " + taggerConfig.getFile());
            e.printStackTrace();
        } catch (IOException e2) {
            System.err.println("tagFromXML: mysterious IO Exception");
            e2.printStackTrace();
        }
        if (outStream != null) {
            closeOutStream();
        }
    }

    private static void runTagger(TaggerConfig taggerConfig) {
        Timing timing;
        String str;
        TokenizerFactory<Word> factory;
        DocumentPreprocessor documentPreprocessor;
        int i;
        int i2;
        BufferedWriter bufferedWriter;
        BufferedReader bufferedReader;
        if (taggerConfig.getVerbose()) {
            System.err.println("## tagger invoked at " + new Date() + " with arguments:");
            taggerConfig.dump();
        }
        try {
            timing = new Timing();
            System.err.print("Reading POS tagger from " + taggerConfig.getModel() + " ... ");
            init(taggerConfig.getModel(), taggerConfig);
            timing.done();
            timing.start();
            str = null;
            if (taggerConfig.getTokenize() && taggerConfig.getTokenizerFactory().trim().length() != 0) {
                factory = (TokenizerFactory) Class.forName(taggerConfig.getTokenizerFactory()).newInstance();
            } else if (taggerConfig.getTokenize()) {
                factory = PTBTokenizer.PTBTokenizerFactory.newPTBTokenizerFactory();
            } else {
                factory = WhitespaceTokenizer.factory();
                str = "\n";
            }
            documentPreprocessor = new DocumentPreprocessor(factory);
            documentPreprocessor.setEncoding(taggerConfig.getEncoding());
            i = 0;
            i2 = 0;
            bufferedWriter = new BufferedWriter(new OutputStreamWriter(System.out, taggerConfig.getEncoding()));
            if (taggerConfig.getXMLOutput().length() > 0) {
                try {
                    outStream = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(taggerConfig.getXMLOutput()), taggerConfig.getEncoding()));
                } catch (Exception e) {
                    System.err.println("Error opening an output to " + taggerConfig.getXMLOutput() + " for XML.");
                    e.printStackTrace();
                }
            }
        } catch (Exception e2) {
            System.err.println("An error occurred while tagging.");
            e2.printStackTrace();
        }
        if (taggerConfig.getXMLInput().length > 0) {
            tagFromXML(taggerConfig);
            return;
        }
        boolean equalsIgnoreCase = taggerConfig.getFile().trim().equalsIgnoreCase("stdin");
        do {
            if (equalsIgnoreCase) {
                System.err.println("Type some text to tag, then EOF.");
                System.err.println("  (For EOF, use Return, Ctrl-D on Unix; Enter, Ctrl-Z, Enter on Windows.)");
                bufferedReader = new BufferedReader(new InputStreamReader(System.in));
            } else {
                bufferedReader = IOUtils.readReaderFromString(taggerConfig.getFile(), taggerConfig.getEncoding());
            }
            if (taggerConfig.getSGML()) {
                Iterator it = new ObjectBank(new ReaderIteratorFactory(bufferedReader), new PlainTextDocumentReaderAndWriter()).iterator();
                while (it.hasNext()) {
                    List list = (List) it.next();
                    Sentence sentence = new Sentence(list);
                    i += sentence.length();
                    Sentence<TaggedWord> tagSentence = tagSentence(sentence);
                    Iterator it2 = list.iterator();
                    Iterator<T> it3 = tagSentence.iterator();
                    while (it3.hasNext()) {
                        ((CoreLabel) it2.next()).set(CoreAnnotations$AnswerAnnotation.class, ((TaggedWord) it3.next()).tag());
                    }
                    bufferedWriter.write(PlainTextDocumentReaderAndWriter.getAnswers(list));
                }
            } else {
                for (List<? extends HasWord> list2 : (taggerConfig.getTagInside() == null || taggerConfig.getTagInside().equals("")) ? equalsIgnoreCase ? documentPreprocessor.getSentencesFromText(new StringReader(bufferedReader.readLine())) : documentPreprocessor.getSentencesFromText(bufferedReader, str) : documentPreprocessor.getSentencesFromXML((Reader) bufferedReader, taggerConfig.getTagInside(), (String) null, false)) {
                    i += list2.size();
                    Sentence<TaggedWord> tagSentence2 = tagSentence(new Sentence(list2));
                    bufferedWriter.write(tagSentence2.toString(false));
                    bufferedWriter.write("\n");
                    if (equalsIgnoreCase) {
                        bufferedWriter.newLine();
                        bufferedWriter.flush();
                    }
                    if (outStream != null) {
                        writeXMLSentence(tagSentence2, i2);
                    }
                    i2++;
                }
            }
        } while (equalsIgnoreCase);
        bufferedWriter.close();
        TestClassifier.printErrWordsPerSec(timing.stop(), i);
        if (outStream != null) {
            closeOutStream();
        }
    }

    public static void main(String[] strArr) throws IOException {
        TaggerConfig taggerConfig = new TaggerConfig(strArr);
        if (taggerConfig.getMode() == TaggerConfig.Mode.TRAIN) {
            runTraining(taggerConfig);
            return;
        }
        if (taggerConfig.getMode() == TaggerConfig.Mode.TAG) {
            runTagger(taggerConfig);
            return;
        }
        if (taggerConfig.getMode() == TaggerConfig.Mode.TEST) {
            runTest(taggerConfig);
            return;
        }
        if (taggerConfig.getMode() == TaggerConfig.Mode.CONVERT) {
            convertToSingleFileFormat(taggerConfig);
        } else if (taggerConfig.getMode() == TaggerConfig.Mode.DUMP) {
            dumpModel(taggerConfig);
        } else {
            System.err.println("Impossible: nothing to do. None of train, tag, test, or convert was specified.");
        }
    }
}
