package marmot.tokenize.cmd;

import chipmunk.segmenter.SegmenterOptions;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.util.Iterator;
import marmot.morph.MorphOptions;
import marmot.tokenize.RuleBasedTokenizer;
import marmot.tokenize.openlp.OpenNlpConverter;
import marmot.tokenize.openlp.OpenNlpTokenizerTrainer;
import marmot.tokenize.preprocess.WikiSelector;
import marmot.tokenize.rules.RuleProvider;
import marmot.util.FileUtils;

/* loaded from: input_file:marmot/tokenize/cmd/Trainer.class */
public class Trainer {
    public static void main(String[] strArr) throws IOException, JSAPException {
        JSAP jsap = new JSAP();
        jsap.registerParameter(new FlaggedOption("tokenized-file").setRequired(true).setLongFlag("tokenized-file"));
        jsap.registerParameter(new FlaggedOption("untokenized-file").setRequired(true).setLongFlag("untokenized-file"));
        jsap.registerParameter(new FlaggedOption(MorphOptions.MODEL_FILE).setRequired(true).setLongFlag(MorphOptions.MODEL_FILE));
        jsap.registerParameter(new FlaggedOption(SegmenterOptions.LANG).setRequired(true).setLongFlag(SegmenterOptions.LANG));
        jsap.registerParameter(new FlaggedOption("num-sentences").setRequired(true).setLongFlag("num-sentences").setStringParser(JSAP.INTEGER_PARSER).setDefault("1000"));
        jsap.registerParameter(new FlaggedOption("verbose").setRequired(true).setLongFlag("verbose").setStringParser(JSAP.INTEGER_PARSER).setDefault("0"));
        JSAPResult parse = jsap.parse(strArr);
        if (!parse.success()) {
            Iterator errorMessageIterator = parse.getErrorMessageIterator();
            while (errorMessageIterator.hasNext()) {
                System.err.println("Error: " + errorMessageIterator.next());
            }
            System.err.println("Usage: ");
            System.err.println(jsap.getUsage());
            System.err.println(jsap.getHelp());
            System.err.println();
            System.exit(1);
        }
        String string = parse.getString(SegmenterOptions.LANG);
        String string2 = parse.getString("tokenized-file");
        String string3 = parse.getString("untokenized-file");
        String string4 = parse.getString(MorphOptions.MODEL_FILE);
        int i = parse.getInt("num-sentences");
        int i2 = parse.getInt("verbose");
        WikiSelector wikiSelector = new WikiSelector(string3, string2, string.equalsIgnoreCase("de") || string.equalsIgnoreCase("es"), i);
        RuleProvider createRuleProvider = RuleProvider.createRuleProvider(string);
        OpenNlpConverter openNlpConverter = new OpenNlpConverter(createRuleProvider);
        System.out.println("Starting alignment for '" + string + "' textset");
        File createTempFile = File.createTempFile("openlp_file", ".txt");
        Writer openFileWriter = FileUtils.openFileWriter(createTempFile.getAbsolutePath());
        openNlpConverter.convert(wikiSelector, openFileWriter, i2);
        openFileWriter.close();
        new RuleBasedTokenizer(new OpenNlpTokenizerTrainer().train(createTempFile.getAbsolutePath()), createRuleProvider).saveToFile(string4);
    }
}
