/*
 * Decompiled with CFR 0.152.
 */
package org.dice_research.opal.catfish.cleaner;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.vocabulary.DCAT;
import org.apache.jena.vocabulary.DCTerms;
import org.apache.jena.vocabulary.RDF;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dice_research.opal.common.interfaces.ModelProcessor;
import org.dice_research.opal.common.vocabulary.Opal;

public class FormatCleaner
implements ModelProcessor {
    public static final int EXT_MAX_LENGTH = 12;
    public static final String PREFIX_IANA_VENDOR = "vnd.";
    public static final String RESOURCE_WHITELIST = "extensions-whiltelist.txt";
    public static final Pattern PATTERN_FINAL = Pattern.compile("[a-zA-Z][a-zA-Z0-9+-]{2,11}");
    public static final Pattern PATTERN_ZIP = Pattern.compile("^zip[ ]*\\(([a-zA-Z0-9+]{2,5})\\)");
    public static final Pattern PATTERN_BRACKET = Pattern.compile("^([a-zA-Z0-9+]{2,12})[ ]+\\(([\\.]*[a-zA-Z0-9+]{2,12})\\)");
    public static final Pattern PATTERN_EXTENSION = Pattern.compile("^[a-z][a-z0-9]{1,11}");
    public static final List<String> IANA_MIMETYPES;
    public static final List<String> EXTENSIONS_WHILTELIST;
    private static final Logger LOGGER;

    protected static List<String> readExtensionsWhitelist() throws IOException {
        LinkedList<String> list = new LinkedList<String>();
        InputStream inputStream = FormatCleaner.class.getClassLoader().getResourceAsStream(RESOURCE_WHITELIST);
        try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));){
            String line = bufferedReader.readLine();
            while (line != null) {
                list.add(line);
                line = bufferedReader.readLine();
            }
            bufferedReader.close();
        }
        return list;
    }

    public void processModel(Model model, String datasetUri) throws Exception {
        List dataSets = model.listResourcesWithProperty(RDF.type, (RDFNode)DCAT.Dataset).toList();
        dataSets.forEach(dataSet -> this.clean(model, (Resource)dataSet));
    }

    @Deprecated
    public void clean(Model model, Resource dataset) {
        NodeIterator distributionIt = model.listObjectsOfProperty(dataset, DCAT.distribution);
        while (distributionIt.hasNext()) {
            RDFNode rdfNode = distributionIt.next();
            if (!rdfNode.isResource()) continue;
            Resource distribution = rdfNode.asResource();
            HashSet<String> allFormats = new HashSet<String>();
            NodeIterator nodeIterator = model.listObjectsOfProperty(distribution, DCAT.mediaType);
            while (nodeIterator.hasNext()) {
                RDFNode mediaType = nodeIterator.next();
                for (String value : this.getValues(mediaType, true)) {
                    allFormats.addAll(this.cleanInput(value));
                }
            }
            nodeIterator = model.listObjectsOfProperty(distribution, DCTerms.format);
            while (nodeIterator.hasNext()) {
                RDFNode format = nodeIterator.next();
                for (String value : this.getValues(format, true)) {
                    allFormats.addAll(this.cleanInput(value));
                }
            }
            if (allFormats.isEmpty()) {
                nodeIterator = model.listObjectsOfProperty(distribution, DCAT.downloadURL);
                while (nodeIterator.hasNext()) {
                    RDFNode downloadUrl = nodeIterator.next();
                    for (String value : this.getValues(downloadUrl, true)) {
                        String cleanedUrl = this.cleanDownloadUrl(value);
                        if (cleanedUrl == null) continue;
                        allFormats.add(this.cleanDownloadUrl(value));
                    }
                }
            }
            for (String format : allFormats) {
                Resource formatResource = model.getResource("http://projekt-opal.de/format/" + format);
                model.add(formatResource, RDF.type, (RDFNode)Opal.OPAL_FORMAT);
                distribution.addProperty(DCTerms.format, (RDFNode)formatResource);
            }
        }
    }

    protected Set<String> getValues(RDFNode rdfNode, boolean checkNextTriple) {
        HashSet<String> values = new HashSet<String>();
        if (rdfNode.isURIResource()) {
            values.add(rdfNode.asResource().getURI());
        } else if (rdfNode.isLiteral()) {
            values.add(rdfNode.asLiteral().getString());
        } else if (rdfNode.isAnon() && checkNextTriple) {
            StmtIterator stmtIterator = rdfNode.asResource().listProperties();
            while (stmtIterator.hasNext()) {
                values.addAll(this.getValues(((Statement)stmtIterator.next()).getObject(), false));
            }
        }
        return values;
    }

    public Set<String> cleanInput(String string) {
        HashSet<String> formats = new HashSet<String>();
        if (string == null || string.trim().isEmpty()) {
            return formats;
        }
        if ((string = string.toLowerCase().trim()).startsWith("http")) {
            try {
                URL url = new URL(string);
                if (url.getPath().isEmpty()) {
                    return formats;
                }
                int index = url.getPath().lastIndexOf(47);
                if (index == -1) {
                    return formats;
                }
                string = url.getPath().substring(index + 1);
                if (string.isEmpty()) {
                    return formats;
                }
                string = string.replace("%20", " ");
            }
            catch (MalformedURLException e) {
                return formats;
            }
        }
        for (String cleanedValue : this.cleanValue(string)) {
            this.finalize(formats, cleanedValue);
        }
        return formats;
    }

    protected Set<String> cleanValue(String value) {
        HashSet<String> values = new HashSet<String>();
        String[] parts = value.split("[ ]*,[ ]*");
        if (parts.length > 1) {
            for (String part : parts) {
                values.addAll(this.cleanValue(part));
            }
            return values;
        }
        Matcher matcher = PATTERN_BRACKET.matcher(value);
        if (matcher.matches()) {
            values.addAll(this.cleanValue(matcher.group(1)));
            values.addAll(this.cleanValue(matcher.group(2)));
            return values;
        }
        value = this.removeIanaMimeType(value);
        if ((value = this.removeIanaVendor(value)).startsWith(".")) {
            value = value.substring(1);
        }
        values.add(value);
        return values;
    }

    protected String removeIanaVendor(String value) {
        int index;
        if (value.startsWith(PREFIX_IANA_VENDOR) && (index = (value = value.substring(PREFIX_IANA_VENDOR.length())).lastIndexOf(46)) != -1) {
            value = value.substring(index + 1);
        }
        return value;
    }

    protected String removeIanaMimeType(String value) {
        String[] parts = value.split("/");
        if (parts.length == 2 && IANA_MIMETYPES.contains(parts[0])) {
            return parts[1];
        }
        return value;
    }

    protected void finalize(Set<String> formats, String value) {
        if (PATTERN_FINAL.matcher(value).matches()) {
            formats.add(value);
        }
    }

    public String cleanDownloadUrl(String urlString) {
        String extension;
        Matcher matcher;
        URL url;
        try {
            url = new URL(urlString);
        }
        catch (MalformedURLException e) {
            return null;
        }
        int index = url.getPath().lastIndexOf(46);
        if (index != -1 && (matcher = PATTERN_EXTENSION.matcher(url.getPath().toLowerCase().substring(index + 1))).matches() && EXTENSIONS_WHILTELIST.contains(extension = matcher.group())) {
            return extension;
        }
        return null;
    }

    static {
        LOGGER = LogManager.getLogger();
        String[] mimeTypes = new String[]{"application", "audio", "font", "example", "image", "message", "model", "multipart", "text", "video"};
        IANA_MIMETYPES = Arrays.asList(mimeTypes);
        EXTENSIONS_WHILTELIST = new LinkedList<String>();
        try {
            EXTENSIONS_WHILTELIST.addAll(FormatCleaner.readExtensionsWhitelist());
        }
        catch (IOException e) {
            LOGGER.error((Object)e);
        }
    }
}

