/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.sentdetect;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.model.AbstractModel;
import opennlp.model.EventStream;
import opennlp.model.MaxentModel;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.sentdetect.EndOfSentenceScanner;
import opennlp.tools.sentdetect.SDContextGenerator;
import opennlp.tools.sentdetect.SDEventStream;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.sentdetect.SentenceSample;
import opennlp.tools.sentdetect.SentenceSampleStream;
import opennlp.tools.sentdetect.lang.Factory;
import opennlp.tools.util.HashSumEventStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.StringUtil;
import opennlp.tools.util.model.ModelUtil;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class SentenceDetectorME
implements SentenceDetector {
    public static final String SPLIT = "s";
    public static final String NO_SPLIT = "n";
    private static final Double ONE = new Double(1.0);
    private MaxentModel model;
    private final SDContextGenerator cgen;
    private final EndOfSentenceScanner scanner;
    private List<Double> sentProbs = new ArrayList<Double>();
    protected boolean useTokenEnd;

    public SentenceDetectorME(SentenceModel model) {
        this(model, new Factory());
    }

    public SentenceDetectorME(SentenceModel model, Factory factory) {
        this.model = model.getMaxentModel();
        this.cgen = factory.createSentenceContextGenerator(model.getLanguage());
        this.scanner = factory.createEndOfSentenceScanner(model.getLanguage());
        this.useTokenEnd = model.useTokenEnd();
    }

    @Override
    public String[] sentDetect(String s) {
        String[] sentences;
        Span[] spans = this.sentPosDetect(s);
        if (spans.length != 0) {
            sentences = new String[spans.length];
            for (int si = 0; si < spans.length; ++si) {
                sentences[si] = ((Object)spans[si].getCoveredText(s)).toString();
            }
        } else {
            sentences = new String[]{};
        }
        return sentences;
    }

    private int getFirstWS(String s, int pos) {
        while (pos < s.length() && !StringUtil.isWhitespace(s.charAt(pos))) {
            ++pos;
        }
        return pos;
    }

    private int getFirstNonWS(String s, int pos) {
        while (pos < s.length() && StringUtil.isWhitespace(s.charAt(pos))) {
            ++pos;
        }
        return pos;
    }

    @Override
    public Span[] sentPosDetect(String s) {
        double sentProb = 1.0;
        this.sentProbs.clear();
        StringBuffer sb = new StringBuffer(s);
        List<Integer> enders = this.scanner.getPositions(s);
        ArrayList<Integer> positions = new ArrayList<Integer>(enders.size());
        int end = enders.size();
        int index = 0;
        for (int i = 0; i < end; ++i) {
            Integer candidate = enders.get(i);
            int cint = candidate;
            int fws = this.getFirstWS(s, cint + 1);
            if (i + 1 < end && enders.get(i + 1) < fws) continue;
            double[] probs = this.model.eval(this.cgen.getContext(sb, cint));
            String bestOutcome = this.model.getBestOutcome(probs);
            sentProb *= probs[this.model.getIndex(bestOutcome)];
            if (!bestOutcome.equals(SPLIT) || !this.isAcceptableBreak(s, index, cint)) continue;
            if (index != cint) {
                if (this.useTokenEnd) {
                    positions.add(this.getFirstNonWS(s, this.getFirstWS(s, cint + 1)));
                } else {
                    positions.add(this.getFirstNonWS(s, cint));
                }
                this.sentProbs.add(new Double(probs[this.model.getIndex(bestOutcome)]));
            }
            index = cint + 1;
        }
        int[] starts = new int[positions.size()];
        for (int i = 0; i < starts.length; ++i) {
            starts[i] = (Integer)positions.get(i);
        }
        if (starts.length == 0) {
            int start;
            int end2 = s.length();
            for (start = 0; start < s.length() && Character.isWhitespace(s.charAt(start)); ++start) {
            }
            while (end2 > 0 && Character.isWhitespace(s.charAt(end2 - 1))) {
                --end2;
            }
            if (end2 - start > 0) {
                return new Span[]{new Span(start, end2)};
            }
            return new Span[0];
        }
        boolean leftover = starts[starts.length - 1] != s.length();
        Span[] spans = new Span[leftover ? starts.length + 1 : starts.length];
        for (int si = 0; si < starts.length; ++si) {
            int end3;
            int start;
            if (si == 0) {
                start = 0;
                while (si < starts.length && Character.isWhitespace(s.charAt(start))) {
                    ++start;
                }
            } else {
                start = starts[si - 1];
            }
            for (end3 = starts[si]; end3 > 0 && Character.isWhitespace(s.charAt(end3 - 1)); --end3) {
            }
            spans[si] = new Span(start, end3);
        }
        if (leftover) {
            spans[spans.length - 1] = new Span(starts[starts.length - 1], s.length());
            this.sentProbs.add(ONE);
        }
        return spans;
    }

    public double[] getSentenceProbabilities() {
        double[] sentProbArray = new double[this.sentProbs.size()];
        for (int i = 0; i < sentProbArray.length; ++i) {
            sentProbArray[i] = this.sentProbs.get(i);
        }
        return sentProbArray;
    }

    protected boolean isAcceptableBreak(String s, int fromIndex, int candidateIndex) {
        return true;
    }

    public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations) throws IOException {
        return SentenceDetectorME.train(languageCode, samples, useTokenEnd, abbreviations, 5, 100);
    }

    public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException {
        HashMap<String, String> manifestInfoEntries = new HashMap<String, String>();
        ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations);
        Factory factory = new Factory();
        SDEventStream eventStream = new SDEventStream(samples, factory.createSentenceContextGenerator(languageCode), factory.createEndOfSentenceScanner(languageCode));
        HashSumEventStream hses = new HashSumEventStream((EventStream)eventStream);
        GISModel sentModel = GIS.trainModel((EventStream)hses, (int)iterations, (int)cutoff);
        manifestInfoEntries.put("Training-Eventhash", hses.calculateHashSum().toString(16));
        return new SentenceModel(languageCode, (AbstractModel)sentModel, useTokenEnd, abbreviations, manifestInfoEntries);
    }

    private static void usage() {
        System.err.println("Usage: SentenceDetectorME -encoding charset -lang language trainData modelName [cutoff iterations]");
        System.err.println("-encoding charset specifies the encoding which should be used ");
        System.err.println("                  for reading and writing text.");
        System.err.println("-lang language    specifies the language which ");
        System.err.println("                  is being processed.");
        System.err.println("trainData         specifies the name of the input training file");
        System.err.println("                  to train the resulting model.");
        System.err.println("modelName         specifies the resulting saved model after");
        System.err.println("                  training.");
        System.exit(1);
    }

    public static void main(String[] args) throws IOException {
        int ai = 0;
        String encoding = null;
        String lang = null;
        if (args.length == 0) {
            SentenceDetectorME.usage();
        }
        while (args[ai].startsWith("-")) {
            if (args[ai].equals("-encoding")) {
                if (++ai < args.length) {
                    encoding = args[ai];
                    ++ai;
                    continue;
                }
                SentenceDetectorME.usage();
                continue;
            }
            if (args[ai].equals("-lang")) {
                if (++ai < args.length) {
                    lang = args[ai];
                    ++ai;
                    continue;
                }
                SentenceDetectorME.usage();
                continue;
            }
            SentenceDetectorME.usage();
        }
        File inFile = new File(args[ai++]);
        File outFile = new File(args[ai++]);
        try {
            if (lang == null || encoding == null) {
                SentenceDetectorME.usage();
            }
            SentenceModel model = SentenceDetectorME.train(lang, new SentenceSampleStream(new PlainTextByLineStream(new InputStreamReader((InputStream)new FileInputStream(inFile), encoding))), true, null);
            System.out.println("Saving the model as: " + outFile);
            model.serialize(new FileOutputStream(outFile));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

