package de.tu.darmstadt.lt.ner.feature.extractor;

import de.tu.darmstadt.lt.ner.reader.NERLookupCaching;
import lombok.SneakyThrows;
import lombok.val;
import org.cleartk.ml.Feature;
import org.cleartk.ml.feature.function.FeatureFunction;
import scala.Tuple2;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;

/**
 * Created by Markus Ackermann.
 * No rights reserved.
 */
public interface SimilarWordExtractor extends FeatureFunction {

    int column();

    String defaultName();

    default String featureName() {

        return "SIMWO";
    }

    @SneakyThrows(ExecutionException.class)
    default List<Feature> apply(Feature feature) {

        val simWordMap = NERLookupCaching.customMapCache().get(defaultName(), () -> readData());

        val tokenString = feature.getValue().toString();

        val featureValue = simWordMap.getOrDefault(tokenString, "O");

        return Collections.singletonList(new Feature(featureName(), featureValue));
    }


    @SneakyThrows
    default Map<String, String> readData() {

        return NERLookupCaching.readMapData("200k_2d_wordlists", defaultName(), line -> {

            String[] sep = line.split("\\t");

            return new Tuple2<>(sep[0], sep[column()]);
        });
    }
}
