package org.dice_research.squirrel.data.uri.filter;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Pattern;
import org.dice_research.squirrel.data.uri.CrawleableUri;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/dice_research/squirrel/data/uri/filter/RDBRegexBasedWhiteListFilter.class */
public class RDBRegexBasedWhiteListFilter extends RDBKnownUriFilter {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) RDBRegexBasedWhiteListFilter.class);
    private Set<String> whiteList;

    public RDBRegexBasedWhiteListFilter(String str, Integer num) {
        super(str, num);
    }

    public RDBRegexBasedWhiteListFilter(String str, Integer num, File file) {
        this(str, num, false, file);
    }

    public RDBRegexBasedWhiteListFilter(String str, Integer num, boolean z, File file) {
        super(str, num, z);
        try {
            this.whiteList = loadWhiteList(file);
        } catch (IOException e) {
            LOGGER.error("A problem was found when loading the WhiteList");
        }
    }

    @Override // org.dice_research.squirrel.data.uri.filter.RDBKnownUriFilter, org.dice_research.squirrel.data.uri.filter.UriFilter
    public boolean isUriGood(CrawleableUri crawleableUri) {
        if (!super.isUriGood(crawleableUri) || this.whiteList == null || this.whiteList.isEmpty()) {
            return false;
        }
        Iterator<String> it = this.whiteList.iterator();
        while (it.hasNext()) {
            Pattern compile = Pattern.compile(it.next().toLowerCase());
            if (compile.matcher(crawleableUri.getUri().toString().toLowerCase()).find()) {
                LOGGER.trace("The URI {} fits to the pattern " + compile.pattern() + " of the whitelist", crawleableUri.getUri().toString());
                return true;
            }
        }
        LOGGER.warn("The URI {} is itself a good URI, but no of the " + this.whiteList.size() + " patterns of the whitelist matches! (in " + this + ")", crawleableUri.getUri().toString());
        return false;
    }

    private Set<String> loadWhiteList(File file) throws IOException {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return linkedHashSet;
            }
            linkedHashSet.add(readLine);
        }
    }

    @Override // org.dice_research.squirrel.data.uri.filter.KnownUriFilter, org.dice_research.squirrel.data.uri.filter.UriFilter
    public void add(CrawleableUri crawleableUri) {
        super.add(crawleableUri, System.currentTimeMillis() + 3600000);
    }

    @Override // org.dice_research.squirrel.data.uri.filter.RDBKnownUriFilter, org.dice_research.squirrel.data.uri.filter.KnownUriFilter
    public void add(CrawleableUri crawleableUri, long j) {
        super.add(crawleableUri, j);
    }

    @Override // org.dice_research.squirrel.data.uri.filter.RDBKnownUriFilter, org.dice_research.squirrel.data.uri.filter.KnownUriFilter
    public void add(CrawleableUri crawleableUri, long j, long j2) {
        super.add(crawleableUri, j, j2);
    }

    @Override // org.dice_research.squirrel.data.uri.filter.RDBKnownUriFilter, java.io.Closeable, java.lang.AutoCloseable
    public void close() {
        super.close();
    }

    @Override // org.dice_research.squirrel.data.uri.filter.RDBKnownUriFilter, org.dice_research.squirrel.data.uri.filter.KnownUriFilter
    public void open() {
        super.open();
    }
}
