package marmot.test.morph;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import junit.framework.AssertionFailedError;
import marmot.core.Model;
import marmot.core.Options;
import marmot.core.PerceptronTrainer;
import marmot.core.Sequence;
import marmot.core.Tagger;
import marmot.core.Token;
import marmot.morph.MorphEvaluator;
import marmot.morph.MorphModel;
import marmot.morph.MorphOptions;
import marmot.morph.MorphResult;
import marmot.morph.Sentence;
import marmot.morph.Word;
import marmot.morph.io.SentenceReader;
import marmot.util.Copy;
import marmot.util.FileUtils;
import marmot.util.StringUtils;
import org.junit.Test;

/* loaded from: input_file:marmot/test/morph/PipelineTest.class */
public class PipelineTest {
    static final /* synthetic */ boolean $assertionsDisabled;

    private String getResourceFile(String str) {
        return String.format("res:///%s/%s", getClass().getPackage().getName().replace(".", "/"), str);
    }

    public static List<Sequence> getSentences(String str, int i) {
        LinkedList linkedList = new LinkedList();
        Iterator<Sequence> it = new SentenceReader(str).iterator();
        while (it.hasNext()) {
            linkedList.add(it.next());
            if (i >= 0 && linkedList.size() >= i) {
                break;
            }
        }
        return linkedList;
    }

    public List<Sequence> getTrainSentences() {
        LinkedList linkedList = new LinkedList();
        LinkedList linkedList2 = new LinkedList();
        linkedList2.add(new Word("das", "A", "c=N|n=S"));
        linkedList2.add(new Word("ist", "V", "n=S"));
        linkedList2.add(new Word("ein", "A", "c=N|n=S"));
        linkedList2.add(new Word("Test", "N", "c=N|n=S"));
        linkedList2.add(new Word(".", ".", "_"));
        linkedList.add(new Sentence(linkedList2));
        LinkedList linkedList3 = new LinkedList();
        linkedList3.add(new Word("die", "A", "c=N|n=P"));
        linkedList3.add(new Word("Rüben", "N", "c=N|n=P"));
        linkedList3.add(new Word("sind", "V", "n=P"));
        linkedList3.add(new Word("kalt", "J", "c=N|n=P"));
        linkedList3.add(new Word(".", ".", "_"));
        linkedList.add(new Sentence(linkedList3));
        return linkedList;
    }

    public List<Sequence> getTestSentences() {
        LinkedList linkedList = new LinkedList();
        linkedList.add(new Word("das", "A", "c=N|n=S"));
        linkedList.add(new Word("ist", "V", "n=S"));
        linkedList.add(new Word("mein", "A", "c=N|n=S"));
        linkedList.add(new Word("Test", "N", "c=N|n=S"));
        linkedList.add(new Word(".", ".", "_"));
        return Collections.singletonList(new Sentence(linkedList));
    }

    public Model getModel(Collection<Sequence> collection, MorphOptions morphOptions) {
        MorphModel morphModel = new MorphModel();
        morphModel.init(morphOptions, collection);
        return morphModel;
    }

    @Test
    public void toyPosTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "1024");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[9, 9, 9]");
        morphOptions.setProperty(Options.PRUNE, "false");
        toyTestWithOptions(morphOptions);
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(MorphOptions.TAG_MORPH, "false");
        toyTestWithOptions(morphOptions);
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        toyTestWithOptions(morphOptions);
    }

    @Test
    public void toyTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "1024");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[9, 9, 9]");
        morphOptions.setProperty(Options.PRUNE, "false");
        toyTestWithOptions(morphOptions);
        morphOptions.setProperty(Options.PRUNE, "true");
        toyTestWithOptions(morphOptions);
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        toyTestWithOptions(morphOptions);
    }

    @Test
    public void realTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty("use-hash-feature-table", "false");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.txt"));
        realTestWithOptions(morphOptions, 98.51d, 54.1d);
    }

    @Test
    public void realInfixTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(MorphOptions.MAX_AFFIX_LENGTH, "5");
        morphOptions.setProperty(MorphOptions.FEATURE_TEMPLATES, "form,rare,infix,context,sig,bigrams");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.txt"));
        realTestWithOptions(morphOptions, 99.0d, 51.5d);
    }

    @Test
    public void realFloatTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(MorphOptions.FLOAT_TYPE_DICT, getResourceFile("svd_small.txt"));
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.txt"));
        realTestWithOptions(morphOptions, 98.89d, 54.52d);
    }

    @Test
    public void realOptimizerTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.OPTIMIZE_NUM_ITERATIONS, "true");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.txt"));
    }

    @Test
    public void realNonHashTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "1");
        morphOptions.setProperty(MorphOptions.USE_HASH_VECTOR, "false");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", "0.1");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.txt"));
        realTestWithOptions(morphOptions, 98.51d, 54.1d);
    }

    @Test
    public void realNormalizeFormTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(MorphOptions.FORM_NORMALIZATION, StringUtils.Mode.lower.toString());
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.txt"));
        realTestWithOptions(morphOptions, 99.11d, 55.38d);
    }

    @Test
    public void realSpecialSignatureTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(MorphOptions.SPECIAL_SIGNATURE, "true");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.txt"));
        realTestWithOptions(morphOptions, 98.45d, 53.96d);
    }

    @Test
    public void realPosTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4," + getResourceFile("tst.txt"));
        morphOptions.setProperty(MorphOptions.TAG_MORPH, "false");
        realTestWithOptions(morphOptions, 99.66d, 79.14d);
    }

    @Test
    public void realPerceptronPosTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.PRUNE, "false");
        morphOptions.setProperty(Options.ORDER, "1");
        morphOptions.setProperty(Options.TRAINER, PerceptronTrainer.class.getCanonicalName());
        morphOptions.setProperty(MorphOptions.TAG_MORPH, "false");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.txt"));
        realTestWithOptions(morphOptions, 98.84d, 77.49d);
    }

    @Test
    public void realOracleTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(Options.ORACLE, "true");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.txt"));
        realTestWithOptions(morphOptions, 99.94d, 53.39d);
    }

    @Test
    public void realFstTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6,token-feature-index=7," + getResourceFile("trn.fst.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6,token-feature-index=7," + getResourceFile("tst.fst.txt"));
        realTestWithOptions(morphOptions, 99.33d, 70.1d);
    }

    @Test
    public void realFstNoDefaultFeaturesTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6,token-feature-index=7," + getResourceFile("trn.fst.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6,token-feature-index=7," + getResourceFile("tst.fst.txt"));
        morphOptions.setProperty(MorphOptions.USE_DEFAULT_FEATURES, "false");
        realTestWithOptions(morphOptions, 60.84d, 50.13d);
    }

    @Test
    public void realAramorphBaselineTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(MorphOptions.INTERNAL_ANALYZER, "ar");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("trn.aramorph.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6," + getResourceFile("tst.aramorph.txt"));
        realTestWithOptions(morphOptions, 100.0d, 66.26d);
    }

    @Test
    public void realFstMaxLevelTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(Options.MAX_TRANSITION_FEATURE_LEVEL, "0");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6,token-feature-index=7," + getResourceFile("trn.fst.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6,token-feature-index=7," + getResourceFile("tst.fst.txt"));
        realTestWithOptions(morphOptions, 99.06d, 69.46d);
    }

    @Test
    public void realOracleFstTest() {
        MorphOptions morphOptions = new MorphOptions();
        morphOptions.setProperty("seed", "42");
        morphOptions.setProperty("num-iterations", "10");
        morphOptions.setProperty(Options.VECTOR_SIZE, "10000000");
        morphOptions.setProperty(Options.CANDIDATES_PER_STATE, "[4, 2, 1.5, 1.25]");
        morphOptions.setProperty(Options.PRUNE, "true");
        morphOptions.setProperty(Options.ORDER, "3");
        morphOptions.setProperty("penalty", ".1");
        morphOptions.setProperty(Options.ORACLE, "true");
        morphOptions.setProperty(MorphOptions.TRAIN_FILE, "form-index=1,tag-index=4,morph-index=6,token-feature-index=7," + getResourceFile("trn.fst.txt"));
        morphOptions.setProperty(MorphOptions.TEST_FILE, "form-index=1,tag-index=4,morph-index=6,token-feature-index=7," + getResourceFile("tst.fst.txt"));
        realTestWithOptions(morphOptions, 99.83d, 70.94d);
    }

    public void toyTestWithOptions(MorphOptions morphOptions) {
        morphOptions.setProperty(MorphOptions.SHAPE, "false");
        testWithOptions(morphOptions, getTrainSentences(), getTestSentences(), 100.0d, 100.0d, 0.0d, 0.0d);
    }

    public void realOptimizerTestWithOptions(MorphOptions morphOptions, double d, double d2) {
        testOptimizerWithOptions(morphOptions, getSentences(morphOptions.getTrainFile(), 100), getSentences(morphOptions.getTestFile(), 100), d, d2);
    }

    public static void realTestWithOptions(MorphOptions morphOptions, double d, double d2) {
        realTestWithOptions(morphOptions, d, d2, 0.0d, 0.0d);
    }

    public static void realTestWithOptions(MorphOptions morphOptions, double d, double d2, double d3, double d4) {
        testWithOptions(morphOptions, getSentences(morphOptions.getTrainFile(), 100), getSentences(morphOptions.getTestFile(), 100), d, d2, d3, d4);
    }

    public static void testWithOptions(MorphOptions morphOptions, List<Sequence> list, List<Sequence> list2, double d, double d2, double d3, double d4) {
        StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
        String methodName = stackTrace.length > 3 ? stackTrace[3].getMethodName() : "None";
        Tagger train = MorphModel.train(morphOptions, list, null);
        assertModelPerformanceOnTestset(methodName + " Train", train, list, d, d3);
        assertModelPerformanceOnTestset(methodName + " Test ", train, list2, d2, d4);
        try {
            File createTempFile = File.createTempFile("tagger", ".marmot");
            createTempFile.deleteOnExit();
            FileUtils.saveToFile(train, createTempFile);
            assertModelPerformanceOnTestset(methodName + " Test (reload) ", (Tagger) FileUtils.loadFromFile(createTempFile), list2, d2);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static void testOptimizerWithOptions(MorphOptions morphOptions, List<Sequence> list, List<Sequence> list2, double d, double d2) {
        StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
        String methodName = stackTrace.length > 3 ? stackTrace[3].getMethodName() : "None";
        if (!$assertionsDisabled && list2 == null) {
            throw new AssertionError();
        }
        Tagger trainOptimal = MorphModel.trainOptimal(morphOptions, list, null);
        assertModelPerformanceOnTestset(methodName + " Train", trainOptimal, list, d);
        assertModelPerformanceOnTestset(methodName + " Test ", trainOptimal, list2, d2);
        assertModelPerformanceOnTestset(methodName + " Test (reload) ", (Tagger) Copy.clone(trainOptimal), list2, d2);
    }

    public static void assertModelPerformanceOnTestset(String str, Tagger tagger, List<Sequence> list, double d) {
        assertModelPerformanceOnTestset(str, tagger, list, d, 0.0d);
    }

    public static void assertModelPerformanceOnTestset(String str, Tagger tagger, List<Sequence> list, double d, double d2) {
        MorphResult morphResult = new MorphResult(tagger.getModel(), tagger.getNumLevels());
        MorphModel morphModel = (MorphModel) tagger.getModel();
        for (Sequence sequence : list) {
            Iterator<Token> it = sequence.iterator();
            while (it.hasNext()) {
                morphModel.addIndexes((Word) it.next(), false);
            }
            morphResult.increment(MorphEvaluator.eval(tagger, (Sentence) sequence));
        }
        double d3 = ((morphResult.num_tokens - morphResult.morph_errors) * 100.0d) / morphResult.num_tokens;
        double d4 = ((morphResult.num_tokens - morphResult.lemma_errors) * 100.0d) / morphResult.num_tokens;
        System.err.println(morphResult.toString());
        if (d3 - d < -1.0E-5d) {
            throw new AssertionFailedError(d3 + " < " + d);
        }
        if (d4 - d2 < -1.0E-5d) {
            throw new AssertionFailedError(d3 + " < " + d);
        }
    }

    static {
        $assertionsDisabled = !PipelineTest.class.desiredAssertionStatus();
    }
}
