package org.dice_research.squirrel.data.uri.filter;

import com.mongodb.BasicDBObject;
import com.mongodb.MongoClient;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.Indexes;
import java.io.Closeable;
import java.io.IOException;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.bson.Document;
import org.dice_research.squirrel.data.uri.CrawleableUri;
import org.dice_research.squirrel.deduplication.hashing.HashValue;
import org.dice_research.squirrel.deduplication.hashing.UriHashCustodian;
import org.dice_research.squirrel.frontier.impl.FrontierImpl;
import org.dice_research.squirrel.mongodb.MongodbConnectionFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/dice_research/squirrel/data/uri/filter/MongoDBKnowUriFilter.class */
public class MongoDBKnowUriFilter implements KnownUriFilter, Cloneable, Closeable, UriHashCustodian {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) MongoDBKnowUriFilter.class);
    private MongoClient client;
    private MongoDatabase mongoDB;
    public static final String DB_NAME = "squirrel";
    private Integer recrawlEveryWeek = 604800000;
    public static final String COLLECTION_NAME = "knownurifilter";
    public static final String COLUMN_TIMESTAMP_LAST_CRAWL = "timestampLastCrawl";
    public static final String COLUMN_URI = "uri";
    public static final String COLUMN_CRAWLING_IN_PROCESS = "crawlingInProcess";
    public static final String COLUMN_TIMESTAMP_NEXT_CRAWL = "timestampNextCrawl";
    public static final String COLUMN_IP = "ipAddress";
    public static final String COLUMN_TYPE = "type";
    public static final String COLUMN_HASH_VALUE = "hashValue";
    private static final boolean PERSIST;
    private static final String DUMMY_HASH_VALUE = "dummyValue";

    public MongoDBKnowUriFilter(String str, Integer num) {
        LOGGER.info("Filter Persistance: " + PERSIST);
        this.client = MongodbConnectionFactory.getConnection(str, num);
    }

    @Override // org.dice_research.squirrel.data.uri.filter.UriFilter
    public boolean isUriGood(CrawleableUri crawleableUri) {
        MongoCursor<Document> it = this.mongoDB.getCollection("knownurifilter").find(new Document("uri", crawleableUri.getUri().toString())).iterator();
        if (!it.hasNext()) {
            LOGGER.debug("URI {} is good", crawleableUri.toString());
            it.close();
            return true;
        }
        LOGGER.debug("URI {} is not good", crawleableUri.toString());
        Long valueOf = Long.valueOf(Long.parseLong(it.next().get("timestampLastCrawl").toString()));
        it.close();
        return System.currentTimeMillis() - valueOf.longValue() >= ((long) this.recrawlEveryWeek.intValue());
    }

    @Override // org.dice_research.squirrel.data.uri.filter.KnownUriFilter
    public void add(CrawleableUri crawleableUri, long j) {
        add(crawleableUri, System.currentTimeMillis(), j);
    }

    public Document crawleableUriToMongoDocument(CrawleableUri crawleableUri) {
        return new Document("uri", crawleableUri.getUri().toString()).append("type", crawleableUri.getType().toString());
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        if (!PERSIST) {
            this.mongoDB.getCollection("knownurifilter").drop();
        }
        this.client.close();
    }

    @Override // org.dice_research.squirrel.data.uri.filter.KnownUriFilter
    public void open() {
        this.mongoDB = this.client.getDatabase("squirrel");
        if (knowUriTableExists()) {
            return;
        }
        this.mongoDB.createCollection("knownurifilter");
        this.mongoDB.getCollection("knownurifilter").createIndex(Indexes.compoundIndex(Indexes.ascending("uri")));
    }

    public boolean knowUriTableExists() {
        boolean z = false;
        MongoCursor<String> it = this.mongoDB.listCollectionNames().iterator();
        if (it.hasNext()) {
            z = it.next().equalsIgnoreCase("knownurifilter");
        }
        return z;
    }

    @Override // org.dice_research.squirrel.data.uri.filter.KnownUriFilter
    public void add(CrawleableUri crawleableUri, long j, long j2) {
        this.mongoDB.getCollection("knownurifilter").insertOne(crawleableUriToMongoDocument(crawleableUri).append("timestampLastCrawl", Long.valueOf(j)).append("timestampNextCrawl", Long.valueOf(j2)).append("crawlingInProcess", false).append("hashValue", DUMMY_HASH_VALUE));
        LOGGER.debug("Adding URI {} to the known uri filter list", crawleableUri.toString());
    }

    @Override // org.dice_research.squirrel.deduplication.hashing.UriHashCustodian
    public void addHashValuesForUris(List<CrawleableUri> list) {
        System.out.println();
    }

    public void purge() {
        this.mongoDB.getCollection("knownurifilter").drop();
    }

    @Override // org.dice_research.squirrel.data.uri.filter.KnownUriFilter
    public List<CrawleableUri> getOutdatedUris() {
        MongoCursor<Document> it = this.mongoDB.getCollection("knownurifilter").find(Filters.and(Filters.eq("COLUMN_TIMESTAMP_NEXT_CRAWL", Long.valueOf(System.currentTimeMillis())), Filters.or(Filters.eq("COLUMN_CRAWLING_IN_PROCESS", false), Filters.eq("COLUMN_TIMESTAMP_LAST_CRAWL", Long.valueOf(System.currentTimeMillis() - (Math.max(FrontierImpl.DEFAULT_GENERAL_RECRAWL_TIME, FrontierImpl.getGeneralRecrawlTime()) * 3)))))).iterator();
        ArrayList<CrawleableUri> arrayList = new ArrayList();
        while (it.hasNext()) {
            try {
                Document next = it.next();
                String str = (String) next.get("ipAddress");
                if (str.contains("/")) {
                    str = str.split("/")[1];
                }
                arrayList.add(new CrawleableUri(new URI((String) next.get("uri")), InetAddress.getByName(str)));
            } catch (URISyntaxException | UnknownHostException e) {
                LOGGER.warn(e.toString());
            }
        }
        for (CrawleableUri crawleableUri : arrayList) {
            BasicDBObject basicDBObject = new BasicDBObject();
            basicDBObject.append("$set", (Object) new BasicDBObject().append("crawlingInProcess", (Object) true));
            this.mongoDB.getCollection("knownurifilter").updateMany(new BasicDBObject().append("uri", (Object) crawleableUri.getUri().toString()), basicDBObject);
        }
        return arrayList;
    }

    @Override // org.dice_research.squirrel.data.uri.filter.KnownUriFilter
    public long count() {
        return 0L;
    }

    @Override // org.dice_research.squirrel.deduplication.hashing.UriHashCustodian
    public Set<CrawleableUri> getUrisWithSameHashValues(Set<HashValue> set) {
        return null;
    }

    static {
        PERSIST = System.getenv("QUEUE_FILTER_PERSIST") == null ? false : Boolean.parseBoolean(System.getenv("QUEUE_FILTER_PERSIST"));
    }
}
