package org.dice_research.squirrel.frontier.impl;

import java.net.UnknownHostException;
import java.util.Iterator;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
import org.dice_research.squirrel.Constants;
import org.dice_research.squirrel.data.uri.CrawleableUri;
import org.dice_research.squirrel.data.uri.filter.SchemeBasedUriFilter;
import org.dice_research.squirrel.data.uri.filter.UriFilterComposer;
import org.dice_research.squirrel.data.uri.info.URIReferences;
import org.dice_research.squirrel.data.uri.norm.UriGenerator;
import org.dice_research.squirrel.data.uri.norm.UriNormalizer;
import org.dice_research.squirrel.frontier.Frontier;
import org.dice_research.squirrel.frontier.recrawling.OutDatedUriRetriever;
import org.dice_research.squirrel.graph.GraphLogger;
import org.dice_research.squirrel.queue.BlockingQueue;
import org.dice_research.squirrel.queue.UriQueue;
import org.dice_research.squirrel.uri.processing.UriProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/dice_research/squirrel/frontier/impl/FrontierImpl.class */
public class FrontierImpl implements Frontier {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) FrontierImpl.class);
    protected UriNormalizer normalizer;
    protected UriFilterComposer uriFilter;
    protected OutDatedUriRetriever outDatedUriRetriever;
    protected URIReferences uriReferences;
    protected SchemeBasedUriFilter schemeUriFilter;
    protected UriQueue queue;
    protected UriProcessor uriProcessor;
    protected List<UriGenerator> uriGenerator;
    protected GraphLogger graphLogger;
    private boolean doesRecrawling;
    private Timer timerRecrawling;
    private static long generalRecrawlTime;
    private long timerPeriod;
    public static final long DEFAULT_GENERAL_RECRAWL_TIME = 604800000;
    private static final long DEFAULT_TIMER_PERIOD = 3600000;

    public FrontierImpl(UriNormalizer uriNormalizer, UriFilterComposer uriFilterComposer, UriQueue uriQueue, List<UriGenerator> list, GraphLogger graphLogger, boolean z, long j, long j2) {
        this(uriNormalizer, uriFilterComposer, null, uriQueue, list, graphLogger, z, j, j2, null);
    }

    public FrontierImpl(UriNormalizer uriNormalizer, UriFilterComposer uriFilterComposer, UriQueue uriQueue, List<UriGenerator> list, boolean z, long j, long j2) {
        this(uriNormalizer, uriFilterComposer, uriQueue, list, null, z, j, j2);
    }

    public FrontierImpl(UriNormalizer uriNormalizer, UriFilterComposer uriFilterComposer, URIReferences uRIReferences, UriQueue uriQueue, List<UriGenerator> list, boolean z, OutDatedUriRetriever outDatedUriRetriever) {
        this(uriNormalizer, uriFilterComposer, uRIReferences, uriQueue, list, null, z, DEFAULT_GENERAL_RECRAWL_TIME, 3600000L, outDatedUriRetriever);
    }

    public FrontierImpl(UriNormalizer uriNormalizer, UriFilterComposer uriFilterComposer, UriQueue uriQueue, List<UriGenerator> list, boolean z) {
        this(uriNormalizer, uriFilterComposer, uriQueue, list, null, z, DEFAULT_GENERAL_RECRAWL_TIME, 3600000L);
    }

    public FrontierImpl(UriNormalizer uriNormalizer, UriFilterComposer uriFilterComposer, UriQueue uriQueue, List<UriGenerator> list) {
        this(uriNormalizer, uriFilterComposer, uriQueue, list, null, false, DEFAULT_GENERAL_RECRAWL_TIME, 3600000L);
    }

    public FrontierImpl(UriNormalizer uriNormalizer, UriFilterComposer uriFilterComposer, URIReferences uRIReferences, final UriQueue uriQueue, List<UriGenerator> list, GraphLogger graphLogger, boolean z, long j, long j2, final OutDatedUriRetriever outDatedUriRetriever) {
        this.uriReferences = null;
        this.schemeUriFilter = new SchemeBasedUriFilter();
        this.normalizer = uriNormalizer;
        this.uriFilter = uriFilterComposer;
        this.uriReferences = uRIReferences;
        this.uriGenerator = list;
        this.queue = uriQueue;
        this.uriProcessor = new UriProcessor();
        this.graphLogger = graphLogger;
        this.outDatedUriRetriever = outDatedUriRetriever;
        this.queue.open();
        this.doesRecrawling = z;
        this.timerPeriod = j2;
        generalRecrawlTime = j;
        if (this.doesRecrawling) {
            this.timerRecrawling = new Timer();
            this.timerRecrawling.schedule(new TimerTask() { // from class: org.dice_research.squirrel.frontier.impl.FrontierImpl.1
                @Override // java.util.TimerTask, java.lang.Runnable
                public void run() {
                    List<CrawleableUri> uriToRecrawl = outDatedUriRetriever.getUriToRecrawl();
                    UriQueue uriQueue2 = uriQueue;
                    uriToRecrawl.forEach(crawleableUri -> {
                        uriQueue2.addUri(FrontierImpl.this.uriProcessor.recognizeUriType(crawleableUri));
                    });
                }
            }, this.timerPeriod, this.timerPeriod);
        }
    }

    @Override // org.dice_research.squirrel.frontier.Frontier
    public List<CrawleableUri> getNextUris() {
        return this.queue.getNextUris();
    }

    @Override // org.dice_research.squirrel.frontier.Frontier
    public void addNewUris(List<CrawleableUri> list) {
        Iterator<CrawleableUri> it = list.iterator();
        while (it.hasNext()) {
            addNewUri(it.next());
        }
    }

    @Override // org.dice_research.squirrel.frontier.Frontier
    public void addNewUri(CrawleableUri crawleableUri) {
        CrawleableUri normalize = this.normalizer.normalize(crawleableUri);
        addNormalizedUri(normalize);
        try {
            for (UriGenerator uriGenerator : this.uriGenerator) {
                if (uriGenerator.getUriVariant(normalize) != null) {
                    addNormalizedUri(this.normalizer.normalize(uriGenerator.getUriVariant(normalize)));
                }
            }
        } catch (Exception e) {
            LOGGER.info("Exception happened while generating additional URI variant for URI: " + normalize.getUri().toString());
        }
    }

    protected void addNormalizedUri(CrawleableUri crawleableUri) {
        CrawleableUri crawleableUri2 = crawleableUri;
        if (!this.uriFilter.isUriGood(crawleableUri2)) {
            LOGGER.debug("addNewUri(" + crawleableUri2 + "): URI is not good [" + this.uriFilter + "]. Will not be added!");
            return;
        }
        LOGGER.debug("addNewUri(" + crawleableUri2 + "): URI is good [" + this.uriFilter + "]");
        if (!this.schemeUriFilter.isUriGood(crawleableUri2)) {
            LOGGER.warn("addNewUri(" + crawleableUri2 + "): " + crawleableUri2.getUri().getScheme() + " is not supported, only " + this.schemeUriFilter.getSchemes() + ". Will not added!");
            return;
        }
        LOGGER.trace("addNewUri(" + crawleableUri2.getUri() + "): URI schemes is OK [" + this.schemeUriFilter + "]");
        try {
            crawleableUri2 = this.uriProcessor.recognizeInetAddress(crawleableUri2);
        } catch (UnknownHostException e) {
            LOGGER.error("Could not recognize IP for {}, unknown host", crawleableUri2.getUri());
        }
        if (crawleableUri2.getIpAddress() != null) {
            this.queue.addUri(this.uriProcessor.recognizeUriType(crawleableUri2));
        } else {
            LOGGER.error("Couldn't determine the Inet address of \"{}\". It will be ignored.", crawleableUri2.getUri());
        }
        this.uriFilter.getKnownUriFilter().add(crawleableUri2, System.currentTimeMillis());
    }

    @Override // org.dice_research.squirrel.frontier.Frontier
    public void crawlingDone(List<CrawleableUri> list) {
        LOGGER.info("One worker finished his work and crawled " + list.size() + " URIs.");
        if (this.queue instanceof BlockingQueue) {
            ((BlockingQueue) this.queue).markUrisAsAccessible(list);
        }
        for (CrawleableUri crawleableUri : list) {
            Long l = (Long) crawleableUri.getData(Constants.URI_PREFERRED_RECRAWL_ON);
            if (l == null || l.longValue() >= System.currentTimeMillis()) {
                this.uriFilter.getKnownUriFilter().add(crawleableUri, System.currentTimeMillis());
            } else {
                CrawleableUri crawleableUri2 = new CrawleableUri(crawleableUri.getUri(), crawleableUri.getIpAddress());
                crawleableUri2.addData("type", crawleableUri.getData("type"));
                addNewUri(crawleableUri2);
            }
        }
    }

    @Override // org.dice_research.squirrel.frontier.Frontier
    public int getNumberOfPendingUris() {
        if (this.queue instanceof BlockingQueue) {
            return ((BlockingQueue) this.queue).getNumberOfBlockedKeys();
        }
        return 0;
    }

    @Override // org.dice_research.squirrel.frontier.Frontier
    public boolean doesRecrawling() {
        return this.doesRecrawling;
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() {
        this.timerRecrawling.cancel();
    }

    public static long getGeneralRecrawlTime() {
        return generalRecrawlTime;
    }

    public UriQueue getQueue() {
        return this.queue;
    }
}
