package org.dice_research.squirrel.data.uri.norm;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.client.utils.URIBuilder;
import org.apache.jena.sparql.sse.Tags;
import org.dice_research.squirrel.data.uri.CrawleableUri;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.BeanFactory;

/* loaded from: input_file:org/dice_research/squirrel/data/uri/norm/NormalizerImpl.class */
public class NormalizerImpl implements UriNormalizer {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) NormalizerImpl.class);
    private static final Pattern UNESCAPE_RULE_PATTERN = Pattern.compile("%([0-9A-Fa-f]{2})");
    private static final BitSet UNESCAPED_CHARS = new BitSet(127);
    private final List<String> sessionIDs;
    private final Map<String, Integer> defaultPortMap;

    public NormalizerImpl(List<String> list, Map<String, Integer> map) {
        this.sessionIDs = list;
        this.defaultPortMap = map;
    }

    @Override // org.dice_research.squirrel.data.uri.norm.UriNormalizer
    public CrawleableUri normalize(CrawleableUri crawleableUri) {
        String normalizePath;
        URI uri = crawleableUri.getUri();
        boolean z = false;
        String rawPath = uri.getRawPath();
        if (rawPath != null && (normalizePath = normalizePath(rawPath)) != rawPath) {
            rawPath = normalizePath;
            z = true;
        }
        String query = uri.getQuery();
        if (query != null) {
            if (query.length() > 0) {
                String[] split = query.split(BeanFactory.FACTORY_BEAN_PREFIX);
                Arrays.sort(split);
                ArrayList<String> arrayList = new ArrayList(Arrays.asList(split));
                ArrayList arrayList2 = new ArrayList();
                for (String str : arrayList) {
                    if (this.sessionIDs.contains(str.split(Tags.symEQ)[0].toLowerCase())) {
                        arrayList2.add(str);
                    }
                }
                arrayList.removeAll(arrayList2);
                String join = String.join(BeanFactory.FACTORY_BEAN_PREFIX, arrayList);
                if (!query.equals(join)) {
                    query = join;
                    z = true;
                }
            } else {
                query = null;
                z = true;
            }
        }
        int port = uri.getPort();
        String scheme = uri.getScheme() != null ? uri.getScheme() : "";
        if (port != -1 && this.defaultPortMap.containsKey(scheme) && port == this.defaultPortMap.get(scheme).intValue()) {
            port = -1;
            z = true;
        }
        String fragment = uri.getFragment();
        if (fragment != null && fragment.length() > 0) {
            z = true;
        }
        String host = uri.getHost() != null ? uri.getHost() : "";
        String lowerCase = host != null ? host.toLowerCase() : "";
        String lowerCase2 = scheme != null ? scheme.toLowerCase() : "";
        if (!scheme.equals(lowerCase2) || !host.equals(lowerCase)) {
            scheme = lowerCase2;
            host = lowerCase;
            z = true;
        }
        if (!z) {
            return crawleableUri;
        }
        URIBuilder uRIBuilder = new URIBuilder(uri);
        uRIBuilder.setFragment(null);
        uRIBuilder.setPath(rawPath);
        uRIBuilder.setCustomQuery(query);
        uRIBuilder.setPort(port);
        uRIBuilder.setHost(host);
        uRIBuilder.setScheme(scheme);
        try {
            CrawleableUri crawleableUri2 = new CrawleableUri(uRIBuilder.build());
            crawleableUri2.setData(crawleableUri.getData());
            return crawleableUri2;
        } catch (URISyntaxException e) {
            LOGGER.error("Exception while normalizing URI. Returning original URI.", (Throwable) e);
            return crawleableUri;
        }
    }

    public String normalizePath(String str) {
        if (str.equals("")) {
            return "/";
        }
        Matcher matcher = UNESCAPE_RULE_PATTERN.matcher(str);
        StringBuffer stringBuffer = null;
        if (matcher.find()) {
            stringBuffer = new StringBuffer("");
            int i = 0;
            do {
                stringBuffer.append(str.substring(i, matcher.start()));
                int start = matcher.start();
                int hexValue = getHexValue(str.charAt(start + 1), str.charAt(start + 2));
                if (UNESCAPED_CHARS.get(hexValue)) {
                    stringBuffer.append((char) hexValue);
                } else {
                    stringBuffer.append(str.substring(start, start + 3));
                }
                i = start + 3;
            } while (matcher.find());
            if (i < str.length()) {
                stringBuffer.append(str.substring(i));
            }
        }
        if (stringBuffer == null) {
            return PathNormalization.normalize(str);
        }
        String stringBuffer2 = stringBuffer.toString();
        return PathNormalization.normalize(stringBuffer2.equals(str) ? str : stringBuffer2);
    }

    protected static int getHexValue(char c, char c2) {
        int i;
        int i2;
        if (c <= '9') {
            i = c - '0';
        } else {
            i = c <= 'F' ? c - '7' : c - 'W';
        }
        int i3 = i << 4;
        if (c2 <= '9') {
            i2 = i3 | (c2 - '0');
        } else {
            i2 = i3 | (c2 <= 'F' ? c2 - '7' : c2 - 'W');
        }
        return i2;
    }

    static {
        UNESCAPED_CHARS.set(45, 46);
        UNESCAPED_CHARS.set(48, 57);
        UNESCAPED_CHARS.set(65, 90);
        UNESCAPED_CHARS.set(97, 122);
        UNESCAPED_CHARS.set(95);
        UNESCAPED_CHARS.set(126);
    }
}
