package org.dice_research.opal.catfish.cleaner;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.vocabulary.DCAT;
import org.apache.jena.vocabulary.DCTerms;
import org.apache.jena.vocabulary.RDF;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dice_research.opal.common.interfaces.ModelProcessor;
import org.dice_research.opal.common.vocabulary.Opal;

/* loaded from: input_file:org/dice_research/opal/catfish/cleaner/FormatCleaner.class */
public class FormatCleaner implements ModelProcessor {
    public static final int EXT_MAX_LENGTH = 12;
    public static final String PREFIX_IANA_VENDOR = "vnd.";
    public static final String RESOURCE_WHITELIST = "extensions-whiltelist.txt";
    public static final Pattern PATTERN_FINAL = Pattern.compile("[a-zA-Z][a-zA-Z0-9+-]{2,11}");
    public static final Pattern PATTERN_ZIP = Pattern.compile("^zip[ ]*\\(([a-zA-Z0-9+]{2,5})\\)");
    public static final Pattern PATTERN_BRACKET = Pattern.compile("^([a-zA-Z0-9+]{2,12})[ ]+\\(([\\.]*[a-zA-Z0-9+]{2,12})\\)");
    public static final Pattern PATTERN_EXTENSION = Pattern.compile("^[a-z][a-z0-9]{1,11}");
    private static final Logger LOGGER = LogManager.getLogger();
    public static final List<String> IANA_MIMETYPES = Arrays.asList("application", "audio", "font", "example", "image", "message", "model", "multipart", "text", "video");
    public static final List<String> EXTENSIONS_WHILTELIST = new LinkedList();

    protected static List<String> readExtensionsWhitelist() throws IOException {
        LinkedList linkedList = new LinkedList();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(FormatCleaner.class.getClassLoader().getResourceAsStream(RESOURCE_WHITELIST)));
        try {
            for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                linkedList.add(readLine);
            }
            bufferedReader.close();
            bufferedReader.close();
            return linkedList;
        } catch (Throwable th) {
            try {
                bufferedReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    public void processModel(Model model, String str) throws Exception {
        model.listResourcesWithProperty(RDF.type, DCAT.Dataset).toList().forEach(resource -> {
            clean(model, resource);
        });
    }

    @Deprecated
    public void clean(Model model, Resource resource) {
        NodeIterator listObjectsOfProperty = model.listObjectsOfProperty(resource, DCAT.distribution);
        while (listObjectsOfProperty.hasNext()) {
            RDFNode next = listObjectsOfProperty.next();
            if (next.isResource()) {
                Resource asResource = next.asResource();
                HashSet hashSet = new HashSet();
                NodeIterator listObjectsOfProperty2 = model.listObjectsOfProperty(asResource, DCAT.mediaType);
                while (listObjectsOfProperty2.hasNext()) {
                    Iterator<String> it = getValues(listObjectsOfProperty2.next(), true).iterator();
                    while (it.hasNext()) {
                        hashSet.addAll(cleanInput(it.next()));
                    }
                }
                NodeIterator listObjectsOfProperty3 = model.listObjectsOfProperty(asResource, DCTerms.format);
                while (listObjectsOfProperty3.hasNext()) {
                    Iterator<String> it2 = getValues(listObjectsOfProperty3.next(), true).iterator();
                    while (it2.hasNext()) {
                        hashSet.addAll(cleanInput(it2.next()));
                    }
                }
                if (hashSet.isEmpty()) {
                    NodeIterator listObjectsOfProperty4 = model.listObjectsOfProperty(asResource, DCAT.downloadURL);
                    while (listObjectsOfProperty4.hasNext()) {
                        for (String str : getValues(listObjectsOfProperty4.next(), true)) {
                            if (cleanDownloadUrl(str) != null) {
                                hashSet.add(cleanDownloadUrl(str));
                            }
                        }
                    }
                }
                Iterator it3 = hashSet.iterator();
                while (it3.hasNext()) {
                    Resource resource2 = model.getResource("http://projekt-opal.de/format/" + ((String) it3.next()));
                    model.add(resource2, RDF.type, Opal.OPAL_FORMAT);
                    asResource.addProperty(DCTerms.format, resource2);
                }
            }
        }
    }

    protected Set<String> getValues(RDFNode rDFNode, boolean z) {
        HashSet hashSet = new HashSet();
        if (rDFNode.isURIResource()) {
            hashSet.add(rDFNode.asResource().getURI());
        } else if (rDFNode.isLiteral()) {
            hashSet.add(rDFNode.asLiteral().getString());
        } else if (rDFNode.isAnon() && z) {
            StmtIterator listProperties = rDFNode.asResource().listProperties();
            while (listProperties.hasNext()) {
                hashSet.addAll(getValues(((Statement) listProperties.next()).getObject(), false));
            }
        }
        return hashSet;
    }

    public Set<String> cleanInput(String str) {
        int lastIndexOf;
        HashSet hashSet = new HashSet();
        if (str == null || str.trim().isEmpty()) {
            return hashSet;
        }
        String trim = str.toLowerCase().trim();
        if (trim.startsWith("http")) {
            try {
                URL url = new URL(trim);
                if (!url.getPath().isEmpty() && (lastIndexOf = url.getPath().lastIndexOf(47)) != -1) {
                    String substring = url.getPath().substring(lastIndexOf + 1);
                    if (substring.isEmpty()) {
                        return hashSet;
                    }
                    trim = substring.replace("%20", " ");
                }
                return hashSet;
            } catch (MalformedURLException e) {
                return hashSet;
            }
        }
        Iterator<String> it = cleanValue(trim).iterator();
        while (it.hasNext()) {
            finalize(hashSet, it.next());
        }
        return hashSet;
    }

    protected Set<String> cleanValue(String str) {
        HashSet hashSet = new HashSet();
        String[] split = str.split("[ ]*,[ ]*");
        if (split.length > 1) {
            for (String str2 : split) {
                hashSet.addAll(cleanValue(str2));
            }
            return hashSet;
        }
        Matcher matcher = PATTERN_BRACKET.matcher(str);
        if (matcher.matches()) {
            hashSet.addAll(cleanValue(matcher.group(1)));
            hashSet.addAll(cleanValue(matcher.group(2)));
            return hashSet;
        }
        String removeIanaVendor = removeIanaVendor(removeIanaMimeType(str));
        if (removeIanaVendor.startsWith(".")) {
            removeIanaVendor = removeIanaVendor.substring(1);
        }
        hashSet.add(removeIanaVendor);
        return hashSet;
    }

    protected String removeIanaVendor(String str) {
        if (str.startsWith(PREFIX_IANA_VENDOR)) {
            str = str.substring(PREFIX_IANA_VENDOR.length());
            int lastIndexOf = str.lastIndexOf(46);
            if (lastIndexOf != -1) {
                str = str.substring(lastIndexOf + 1);
            }
        }
        return str;
    }

    protected String removeIanaMimeType(String str) {
        String[] split = str.split("/");
        return (split.length == 2 && IANA_MIMETYPES.contains(split[0])) ? split[1] : str;
    }

    protected void finalize(Set<String> set, String str) {
        if (PATTERN_FINAL.matcher(str).matches()) {
            set.add(str);
        }
    }

    public String cleanDownloadUrl(String str) {
        try {
            URL url = new URL(str);
            int lastIndexOf = url.getPath().lastIndexOf(46);
            if (lastIndexOf == -1) {
                return null;
            }
            Matcher matcher = PATTERN_EXTENSION.matcher(url.getPath().toLowerCase().substring(lastIndexOf + 1));
            if (!matcher.matches()) {
                return null;
            }
            String group = matcher.group();
            if (EXTENSIONS_WHILTELIST.contains(group)) {
                return group;
            }
            return null;
        } catch (MalformedURLException e) {
            return null;
        }
    }

    static {
        try {
            EXTENSIONS_WHILTELIST.addAll(readExtensionsWhitelist());
        } catch (IOException e) {
            LOGGER.error(e);
        }
    }
}
