package chipmunk.test.segmenter;

import chipmunk.segmenter.ScoredSegmentationReading;
import chipmunk.segmenter.Scorer;
import chipmunk.segmenter.SegmentationDataReader;
import chipmunk.segmenter.Segmenter;
import chipmunk.segmenter.SegmenterOptions;
import chipmunk.segmenter.SegmenterTrainer;
import chipmunk.segmenter.StatSegmenter;
import chipmunk.segmenter.Word;
import java.util.List;
import java.util.logging.Logger;
import marmot.util.Copy;
import marmot.util.Numerics;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:chipmunk/test/segmenter/SegmenterTest.class */
public class SegmenterTest {
    @Test
    public void trainAccuracyTest() {
        SegmentationDataReader segmentationDataReader = new SegmentationDataReader("res:///chipmunk/test/segmenter/data/eng/trn", "eng", 0);
        SegmenterOptions segmenterOptions = new SegmenterOptions();
        segmenterOptions.setOption(SegmenterOptions.LANG, "eng");
        Segmenter segmenter = (Segmenter) Copy.clone(new SegmenterTrainer(segmenterOptions).train(segmentationDataReader.getData()));
        Logger logger = Logger.getLogger(getClass().getName());
        Scorer scorer = new Scorer();
        scorer.eval(segmentationDataReader.getData(), segmenter);
        logger.info(scorer.report());
        Assert.assertTrue(Numerics.approximatelyGreaterEqual(scorer.getFscore(), 99.0d));
    }

    @Test
    public void crfTrainAccuracyTest() {
        String str = "res:///chipmunk/test/segmenter/data/ger/";
        SegmentationDataReader segmentationDataReader = new SegmentationDataReader(str + "trn", "ger", 0);
        SegmentationDataReader segmentationDataReader2 = new SegmentationDataReader(str + "tst", "ger", 0);
        SegmenterOptions segmenterOptions = new SegmenterOptions();
        segmenterOptions.setOption(SegmenterOptions.LANG, "ger");
        segmenterOptions.setOption(SegmenterOptions.CRF_MODE, true);
        Segmenter segmenter = (Segmenter) Copy.clone(new SegmenterTrainer(segmenterOptions).train(segmentationDataReader.getData()));
        Logger logger = Logger.getLogger(getClass().getName());
        Scorer scorer = new Scorer();
        scorer.eval(segmentationDataReader2.getData(), segmenter);
        logger.info(scorer.report());
        Assert.assertTrue(Numerics.approximatelyGreaterEqual(scorer.getFscore(), 99.0d));
    }

    @Test
    public void segmentHaus() {
        SegmentationDataReader segmentationDataReader = new SegmentationDataReader(("res:///chipmunk/test/segmenter/data/ger/") + "trn", "ger", 0);
        SegmenterOptions segmenterOptions = new SegmenterOptions();
        segmenterOptions.setOption(SegmenterOptions.LANG, "ger");
        segmenterOptions.setOption(SegmenterOptions.CRF_MODE, true);
        SegmenterTrainer segmenterTrainer = new SegmenterTrainer(segmenterOptions);
        List<Word> subList = segmentationDataReader.getData().subList(0, 1000);
        Segmenter train = segmenterTrainer.train(subList);
        for (Word word : subList) {
            if (train instanceof StatSegmenter) {
                ScoredSegmentationReading segmentWithScores = ((StatSegmenter) train).segmentWithScores(word);
                System.out.printf("p(y|x) for %s is %f(%e)%n", segmentWithScores.reading, Double.valueOf(segmentWithScores.logLikelihood), Double.valueOf(segmentWithScores.logLikelihood));
                Assert.assertTrue(segmentWithScores.logLikelihood < 0.0d);
            }
        }
    }
}
