/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.cmdline.doccat;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.doccat.TrainingParams;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.doccat.DoccatModel;
import opennlp.tools.doccat.DocumentCategorizerME;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.doccat.DocumentSampleStream;
import opennlp.tools.doccat.FeatureGenerator;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class DoccatTrainerTool
implements CmdLineTool {
    @Override
    public String getName() {
        return "DoccatTrainer";
    }

    @Override
    public String getShortDescription() {
        return "trainer for the learnable document categorizer";
    }

    @Override
    public String getHelp() {
        return "Usage: opennlp " + this.getName() + " " + ArgumentParser.createUsage(TrainerToolParams.class);
    }

    static ObjectStream<DocumentSample> openSampleData(String sampleDataName, File sampleDataFile, Charset encoding) {
        CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);
        FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);
        PlainTextByLineStream lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(), encoding);
        return new DocumentSampleStream(lineStream);
    }

    @Override
    public void run(String[] args) {
        DoccatModel model;
        if (!ArgumentParser.validateArguments(args, TrainerToolParams.class)) {
            System.err.println(this.getHelp());
            throw new TerminateToolException(1);
        }
        TrainerToolParams params = ArgumentParser.parse(args, TrainerToolParams.class);
        TrainingParameters mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false);
        File trainingDataInFile = params.getData();
        File modelOutFile = params.getModel();
        CmdLineUtil.checkOutputFile("document categorizer model", modelOutFile);
        ObjectStream<DocumentSample> sampleStream = DoccatTrainerTool.openSampleData("Training", trainingDataInFile, params.getEncoding());
        try {
            model = mlParams == null ? DocumentCategorizerME.train(params.getLang(), sampleStream, params.getCutoff(), params.getIterations()) : DocumentCategorizerME.train(params.getLang(), sampleStream, mlParams, new FeatureGenerator[0]);
        }
        catch (IOException e) {
            CmdLineUtil.printTrainingIoError(e);
            throw new TerminateToolException(-1);
        }
        finally {
            try {
                sampleStream.close();
            }
            catch (IOException e) {}
        }
        CmdLineUtil.writeModel("document categorizer", modelOutFile, model);
    }

    static interface TrainerToolParams
    extends TrainingParams,
    TrainingToolParams {
    }
}

