package crawlercommons.filters.basic;

import crawlercommons.filters.URLFilter;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:crawlercommons/filters/basic/BasicURLNormalizer.class */
public class BasicURLNormalizer extends URLFilter {
    public static final Logger LOG = LoggerFactory.getLogger((Class<?>) BasicURLNormalizer.class);
    private static final Pattern hasNormalizablePathPattern = Pattern.compile("/[./]|[.]/");
    private static final Pattern unescapeRulePattern = Pattern.compile("%([0-9A-Fa-f]{2})");
    private static final Charset utf8 = Charset.forName("UTF-8");
    private static final boolean[] unescapedCharacters = new boolean[128];

    @Override // crawlercommons.filters.URLFilter
    public String filter(String str) {
        if ("".equals(str)) {
            return str;
        }
        String trim = str.trim();
        try {
            URL url = new URL(trim);
            String protocol = url.getProtocol();
            String host = url.getHost();
            int port = url.getPort();
            String file = url.getFile();
            boolean z = false;
            if (!trim.startsWith(protocol)) {
                z = true;
            }
            if ("http".equals(protocol) || "https".equals(protocol) || "ftp".equals(protocol)) {
                if (host != null) {
                    String lowerCase = host.toLowerCase(Locale.ROOT);
                    if (!host.equals(lowerCase)) {
                        host = lowerCase;
                        z = true;
                    }
                }
                if (port == url.getDefaultPort()) {
                    port = -1;
                    z = true;
                }
                if (file == null || "".equals(file)) {
                    file = "/";
                    z = true;
                }
                if (url.getRef() != null) {
                    z = true;
                }
                try {
                    String fileWithNormalizedPath = getFileWithNormalizedPath(url);
                    if (!file.equals(fileWithNormalizedPath)) {
                        z = true;
                        file = fileWithNormalizedPath;
                    }
                } catch (MalformedURLException e) {
                    LOG.info("Malformed URL {}", url);
                    return null;
                }
            }
            String escapePath = escapePath(unescapePath(file));
            if (!file.equals(escapePath)) {
                z = true;
                file = escapePath;
            }
            if (z) {
                try {
                    trim = new URL(protocol, host, port, file).toString();
                } catch (MalformedURLException e2) {
                    LOG.info("Malformed URL {}{}{}{}", protocol, host, Integer.valueOf(port), file);
                    return null;
                }
            }
            return trim;
        } catch (MalformedURLException e3) {
            LOG.info("Malformed URL {}", trim);
            return null;
        }
    }

    private String getFileWithNormalizedPath(URL url) throws MalformedURLException {
        String file;
        if (hasNormalizablePathPattern.matcher(url.getPath()).find()) {
            try {
                file = url.toURI().normalize().toURL().getFile();
                int i = 0;
                while (file.startsWith("/../", i)) {
                    i += 3;
                }
                if (i > 0) {
                    file = file.substring(i);
                }
            } catch (URISyntaxException e) {
                file = url.getFile();
            }
        } else {
            file = url.getFile();
        }
        if (file.isEmpty()) {
            file = "/";
        }
        return file;
    }

    private String unescapePath(String str) {
        int i;
        StringBuilder sb = new StringBuilder();
        Matcher matcher = unescapeRulePattern.matcher(str);
        int i2 = -1;
        while (true) {
            i = i2;
            if (!matcher.find()) {
                break;
            }
            sb.append(str.substring(i + 1, matcher.start()));
            int intValue = Integer.valueOf(matcher.group().substring(1), 16).intValue();
            if (intValue >= 128 || !unescapedCharacters[intValue]) {
                sb.append(matcher.group().toUpperCase(Locale.ROOT));
            } else {
                sb.append(new Character((char) intValue));
            }
            i2 = matcher.start() + 2;
        }
        int length = str.length();
        if (i <= length - 1) {
            sb.append(str.substring(i + 1, length));
        }
        return sb.toString();
    }

    private String escapePath(String str) {
        StringBuilder sb = new StringBuilder(str.length());
        for (byte b : str.getBytes(utf8)) {
            if (b < 33 || b == 91 || b == 93) {
                sb.append('%');
                String upperCase = Integer.toHexString(b & 255).toUpperCase(Locale.ROOT);
                if (upperCase.length() % 2 != 0) {
                    sb.append('0');
                    sb.append(upperCase);
                } else {
                    sb.append(upperCase);
                }
            } else {
                sb.append((char) b);
            }
        }
        return sb.toString();
    }

    public static void main(String[] strArr) throws IOException {
        BasicURLNormalizer basicURLNormalizer = new BasicURLNormalizer();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in, utf8));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                System.exit(0);
                return;
            } else {
                LOG.info("{} => {}", readLine, basicURLNormalizer.filter(readLine));
            }
        }
    }

    static {
        for (int i = 0; i < 128; i++) {
            if ((65 > i || i > 90) && ((97 > i || i > 122) && !((48 <= i && i <= 57) || i == 45 || i == 46 || i == 95 || i == 126))) {
                unescapedCharacters[i] = false;
            } else {
                unescapedCharacters[i] = true;
            }
        }
    }
}
