/*******************************************************************************
 * Copyright 2014
 * FG Language Technology
 * Technische Universität Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tu.darmstadt.lt.ner.feature.extractor;

import java.io.BufferedReader;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;

import de.tu.darmstadt.lt.ner.reader.NERLookupCaching;
import lombok.SneakyThrows;
import lombok.val;
import org.cleartk.ml.Feature;
import org.cleartk.ml.feature.function.FeatureFunction;

import de.tu.darmstadt.lt.ner.reader.NERReader;

public class DBLocationListFeatureExtractor
        implements FeatureFunction {

    public static final String DEFAULT_NAME = "DBPLocationName";

    @Override
    @SneakyThrows(ExecutionException.class)
    public List<Feature> apply(Feature feature) {

        val dbLocationNameList = NERLookupCaching.customMapCache().get(DEFAULT_NAME, () -> readData());

        val tokenString = feature.getValue().toString();

        val featureValue = dbLocationNameList.getOrDefault(tokenString, "O");

        return Collections.singletonList(new Feature("DBPLocation", featureValue));
    }

    @SneakyThrows
    private Map<String, String> readData() {

        return NERLookupCaching.readMapDataMulti("location_wiki.list", DEFAULT_NAME, line -> {

            val subMap = new HashMap<String, String>(4);

            String[] lineSplit = line.split("\\t");
            String wikiType = lineSplit[1];
            String[] multiWordSplit = lineSplit[0].split("\\s+");
            for (int i = 0; i < multiWordSplit.length; i++) {
                if (i == 0) {
                    subMap.put(lineSplit[0], "B-" + wikiType);
                } else {
                    subMap.put(lineSplit[0], "I-" + wikiType);
                }
            }

            return subMap;
        });
    }
}
