/*******************************************************************************
 * Copyright 2014
 * FG Language Technology
 * Technische Universität Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package de.tu.darmstadt.lt.ner.feature.extractor;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.concurrent.ExecutionException;

import de.tu.darmstadt.lt.ner.reader.NERLookupCaching;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.cleartk.ml.Feature;
import org.cleartk.ml.feature.function.FeatureFunction;

import de.tu.darmstadt.lt.ner.reader.NERReader;
import scala.Tuple2;

@Slf4j
public class DBPersonListFeatureExtractor
        implements FeatureFunction {

    public static final String DEFAULT_NAME = "DBPersonName";

    @Override
    @SneakyThrows(ExecutionException.class)
    public List<Feature> apply(Feature feature) {

        val dbPersonNameList = NERLookupCaching.customMapCache().get(DEFAULT_NAME, () -> readData());

        val tokenString = feature.getValue().toString();

        val featureValue = dbPersonNameList.getOrDefault(tokenString, "O");


        return Collections.singletonList(new Feature("DBPersonName", featureValue));
    }




    private static Map<String, String> readData() {

        return NERLookupCaching.readMapDataMulti("inDBperson.txt", DEFAULT_NAME, line -> {

            val subMap = new HashMap<String, String>(4);

            String[] sep = line.split("\\s+");
            for (String token : sep) {
                    subMap.put(token, "BI");
            }

            return subMap;

        });
    }
}
