/*
 * Decompiled with CFR 0.152.
 */
package org.dice_research.opal.metadata;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import opennlp.tools.langdetect.Language;
import org.apache.jena.rdf.model.Literal;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.vocabulary.DCTerms;
import org.dice_research.opal.common.interfaces.JenaModelProcessor;
import org.dice_research.opal.metadata.lang.LangDetector;

public class LanguageDetection
implements JenaModelProcessor {
    protected static final double REQUIRED_CONFIDENCE_TITLE = 0.01;
    protected static final double REQUIRED_CONFIDENCE_DESCRIPTION = 0.75;
    protected static LangDetector langDetector;
    protected static Map<String, String> supportedLanguages;
    protected static Set<String> supportedIso_639_3;

    public Model process(Model model, String datasetUri) throws Exception {
        this.initialize();
        model = ModelFactory.createDefaultModel().add(model);
        Resource dataset = ResourceFactory.createResource((String)datasetUri);
        NodeIterator titleIterator = model.listObjectsOfProperty(dataset, DCTerms.title);
        while (titleIterator.hasNext()) {
            RDFNode titleNode = titleIterator.next();
            if (!titleNode.isLiteral()) continue;
            Literal literal = this.updateLanguageTag(titleNode.asLiteral(), 0.01);
            model.remove(dataset, DCTerms.title, titleNode);
            model.add(dataset, DCTerms.title, (RDFNode)literal);
        }
        NodeIterator decriptionIterator = model.listObjectsOfProperty(dataset, DCTerms.description);
        while (decriptionIterator.hasNext()) {
            RDFNode decriptionNode = decriptionIterator.next();
            if (!decriptionNode.isLiteral()) continue;
            Literal literal = this.updateLanguageTag(decriptionNode.asLiteral(), 0.75);
            model.remove(dataset, DCTerms.description, decriptionNode);
            model.add(dataset, DCTerms.description, (RDFNode)literal);
        }
        return model;
    }

    protected Literal updateLanguageTag(Literal literal, double requiredConfidence) throws Exception {
        Language lang;
        if (literal.getLanguage().isEmpty() && (lang = this.predictsupportedLanguage(literal.getString())) != null && lang.getConfidence() >= requiredConfidence) {
            return ResourceFactory.createLangLiteral((String)literal.getString(), (String)supportedLanguages.get(lang.getLang()));
        }
        return literal;
    }

    protected Language predictsupportedLanguage(String text) throws Exception {
        Language[] languages;
        this.initialize();
        Language predictedLanguage = null;
        for (Language language : languages = langDetector.predictLanguages(text)) {
            if (!supportedIso_639_3.contains(language.getLang()) || predictedLanguage != null && !(language.getConfidence() >= predictedLanguage.getConfidence())) continue;
            predictedLanguage = language;
        }
        return predictedLanguage;
    }

    public void initialize() throws Exception {
        if (langDetector == null) {
            langDetector = new LangDetector();
            langDetector.initialize();
        }
        if (supportedLanguages == null) {
            supportedLanguages = this.getSupportedLanguages();
            supportedIso_639_3 = supportedLanguages.keySet();
        }
    }

    protected Map<String, String> getSupportedLanguages() {
        HashMap<String, String> map = new HashMap<String, String>();
        map.put("deu", "de");
        map.put("eng", "en");
        map.put("fra", "fr");
        map.put("spa", "es");
        return map;
    }
}

