package org.dice_research.squirrel.components;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.Semaphore;
import org.apache.jena.sparql.sse.Tags;
import org.dice_research.squirrel.Constants;
import org.dice_research.squirrel.configurator.MongoConfiguration;
import org.dice_research.squirrel.configurator.SeedConfiguration;
import org.dice_research.squirrel.configurator.WebConfiguration;
import org.dice_research.squirrel.configurator.WhiteListConfiguration;
import org.dice_research.squirrel.data.uri.CrawleableUri;
import org.dice_research.squirrel.data.uri.UriSeedReader;
import org.dice_research.squirrel.data.uri.filter.InMemoryKnownUriFilter;
import org.dice_research.squirrel.data.uri.filter.RegexBasedWhiteListFilter;
import org.dice_research.squirrel.data.uri.filter.UriFilterComposer;
import org.dice_research.squirrel.data.uri.info.URIReferences;
import org.dice_research.squirrel.data.uri.norm.UriGenerator;
import org.dice_research.squirrel.data.uri.norm.UriNormalizer;
import org.dice_research.squirrel.data.uri.serialize.Serializer;
import org.dice_research.squirrel.data.uri.serialize.java.GzipJavaUriSerializer;
import org.dice_research.squirrel.frontier.ExtendedFrontier;
import org.dice_research.squirrel.frontier.Frontier;
import org.dice_research.squirrel.frontier.impl.ExtendedFrontierImpl;
import org.dice_research.squirrel.frontier.impl.FrontierImpl;
import org.dice_research.squirrel.frontier.impl.FrontierSenderToWebservice;
import org.dice_research.squirrel.frontier.impl.QueueBasedTerminationCheck;
import org.dice_research.squirrel.frontier.impl.TerminationCheck;
import org.dice_research.squirrel.frontier.impl.WorkerGuard;
import org.dice_research.squirrel.frontier.recrawling.OutDatedUriRetriever;
import org.dice_research.squirrel.queue.InMemoryQueue;
import org.dice_research.squirrel.queue.UriQueue;
import org.dice_research.squirrel.rabbit.RPCServer;
import org.dice_research.squirrel.rabbit.RespondingDataHandler;
import org.dice_research.squirrel.rabbit.ResponseHandler;
import org.dice_research.squirrel.rabbit.msgs.CrawlingResult;
import org.dice_research.squirrel.rabbit.msgs.UriSet;
import org.dice_research.squirrel.rabbit.msgs.UriSetRequest;
import org.dice_research.squirrel.worker.AliveMessage;
import org.dice_research.squirrel.worker.WorkerInfo;
import org.hobbit.core.components.AbstractComponent;
import org.hobbit.core.data.RabbitQueue;
import org.hobbit.core.rabbit.DataHandler;
import org.hobbit.core.rabbit.DataReceiver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Component;

@Component
@Qualifier("frontierComponent")
/* loaded from: input_file:org/dice_research/squirrel/components/FrontierComponent.class */
public class FrontierComponent extends AbstractComponent implements RespondingDataHandler {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) FrontierComponent.class);

    @Autowired
    @Qualifier("queueBean")
    protected UriQueue queue;

    @Autowired
    @Qualifier("UriFilterBean")
    private UriFilterComposer uriFilter;
    private Frontier frontier;
    private RabbitQueue rabbitQueue;
    private DataReceiver receiver;

    @Autowired
    @Qualifier("serializerBean")
    private Serializer serializer;

    @Autowired
    @Qualifier("normalizerBean")
    private UriNormalizer normalizer;

    @Autowired
    @Qualifier("uriRetrieverBean")
    protected OutDatedUriRetriever outDatedUriRetriever;

    @Qualifier("listUriGenerator")
    private List<UriGenerator> uriGenerator;
    private Timer timerTerminator;
    public static final boolean RECRAWLING_ACTIVE = true;
    private URIReferences uriReferences = null;
    private final Semaphore terminationMutex = new Semaphore(0);
    private final WorkerGuard workerGuard = new WorkerGuard(this);
    private final boolean doRecrawling = true;
    private long recrawlingTime = 2592000000L;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/dice_research/squirrel/components/FrontierComponent$TerminatorTask.class */
    public static class TerminatorTask extends TimerTask {
        private UriQueue queue;
        private TerminationCheck terminationCheck = new QueueBasedTerminationCheck();
        private Semaphore terminationMutex;
        private WorkerGuard workerGuard;

        public TerminatorTask(UriQueue uriQueue, Semaphore semaphore, WorkerGuard workerGuard) {
            this.queue = uriQueue;
            this.terminationMutex = semaphore;
            this.workerGuard = workerGuard;
        }

        @Override // java.util.TimerTask, java.lang.Runnable
        public void run() {
            boolean z = false;
            Iterator<Map.Entry<String, WorkerInfo>> it = this.workerGuard.getMapWorkerInfo().entrySet().iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                } else if (it.next().getValue().getUrisCrawling().size() > 0) {
                    z = true;
                    break;
                }
            }
            if (z || !this.terminationCheck.shouldFrontierTerminate(this.queue)) {
                return;
            }
            this.terminationMutex.release();
        }
    }

    @Override // org.hobbit.core.components.AbstractComponent, org.hobbit.core.components.Component
    public void init() throws Exception {
        super.init();
        this.serializer = new GzipJavaUriSerializer();
        MongoConfiguration mDBConfiguration = MongoConfiguration.getMDBConfiguration();
        WebConfiguration webConfiguration = WebConfiguration.getWebConfiguration();
        if (mDBConfiguration != null) {
            this.queue.open();
            this.uriFilter.getKnownUriFilter().open();
            WhiteListConfiguration whiteListConfiguration = WhiteListConfiguration.getWhiteListConfiguration();
            if (whiteListConfiguration != null) {
                this.uriFilter.setKnownUriFilter(RegexBasedWhiteListFilter.create(this.uriFilter.getKnownUriFilter(), new File(whiteListConfiguration.getWhiteListURI())));
            }
        } else {
            LOGGER.warn("Couldn't get MDBConfiguration. An in-memory queue will be used.");
            this.queue = new InMemoryQueue();
            this.uriFilter.setKnownUriFilter(new InMemoryKnownUriFilter(true, this.recrawlingTime));
        }
        this.frontier = new ExtendedFrontierImpl(this.normalizer, this.uriFilter, this.uriReferences, this.queue, this.uriGenerator, true, this.outDatedUriRetriever);
        this.rabbitQueue = this.incomingDataQueueFactory.createDefaultRabbitQueue(Constants.FRONTIER_QUEUE_NAME);
        this.receiver = new RPCServer.Builder().responseQueueFactory(this.outgoingDataQueuefactory).dataHandler((DataHandler) this).maxParallelProcessedMsgs(100).queue(this.rabbitQueue).build();
        SeedConfiguration seedConfiguration = SeedConfiguration.getSeedConfiguration();
        if (seedConfiguration != null) {
            processSeedFile(seedConfiguration.getSeedFile());
        }
        LOGGER.info("Frontier initialized.");
        if (!webConfiguration.isCommunicationWithWebserviceEnabled()) {
            LOGGER.info("webConfiguration.isCommunicationWithWebserviceEnabled is set to " + webConfiguration.isCommunicationWithWebserviceEnabled() + "/" + webConfiguration.isVisualizationOfCrawledGraphEnabled() + ". No WebServiceSenderThread will be started!");
            return;
        }
        FrontierSenderToWebservice frontierSenderToWebservice = new FrontierSenderToWebservice(this.outgoingDataQueuefactory, this.workerGuard, this.queue, this.uriFilter.getKnownUriFilter(), this.uriReferences);
        LOGGER.trace("FrontierSenderToWebservice -> sendCrawledGraph is set to " + webConfiguration.isVisualizationOfCrawledGraphEnabled());
        Thread thread = new Thread(frontierSenderToWebservice);
        thread.setName("Sender to the Webservice via RabbitMQ (current information from the Frontier)");
        thread.start();
        LOGGER.info("Started thread [" + thread.getName() + "] <ID " + thread.getId() + " in the state " + thread.getState() + " with the priority " + thread.getPriority() + Tags.symGT);
    }

    @Override // org.hobbit.core.components.Component
    public void run() throws Exception {
        this.terminationMutex.acquire();
    }

    @Override // org.hobbit.core.components.AbstractComponent, java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        LOGGER.info("Closing Frontier Component.");
        this.timerTerminator.cancel();
        if (this.receiver != null) {
            this.receiver.close();
        }
        if (this.queue != null) {
            this.queue.close();
        }
        if (this.uriReferences != null) {
            this.uriReferences.close();
        }
        if (this.uriFilter instanceof Closeable) {
            ((Closeable) this.uriFilter).close();
        }
        this.workerGuard.shutdown();
        if (this.frontier != null) {
            this.frontier.close();
        }
        super.close();
        LOGGER.info("Frontier Component Closed.");
    }

    @Override // org.hobbit.core.rabbit.DataHandler
    public void handleData(byte[] bArr) {
        handleData(bArr, null, null, null);
    }

    @Override // org.dice_research.squirrel.rabbit.RespondingDataHandler
    public void handleData(byte[] bArr, ResponseHandler responseHandler, String str, String str2) {
        try {
            Object deserialize = this.serializer.deserialize(bArr);
            if (deserialize != null) {
                if (deserialize instanceof UriSetRequest) {
                    responseToUriSetRequest(responseHandler, str, str2, (UriSetRequest) deserialize);
                    return;
                }
                if (deserialize instanceof UriSet) {
                    if (this.timerTerminator == null) {
                        LOGGER.info("Initializing Terminator task...");
                        TerminatorTask terminatorTask = new TerminatorTask(this.queue, this.terminationMutex, this.workerGuard);
                        this.timerTerminator = new Timer();
                        this.timerTerminator.schedule(terminatorTask, 5000L, 5000L);
                    }
                    this.frontier.addNewUris(((UriSet) deserialize).uris);
                    return;
                }
                if (deserialize instanceof CrawlingResult) {
                    CrawlingResult crawlingResult = (CrawlingResult) deserialize;
                    LOGGER.warn("Received the message that the crawling for {} URIs is done.", Integer.valueOf(crawlingResult.uris.size()));
                    this.frontier.crawlingDone(crawlingResult.uris);
                    this.workerGuard.removeUrisForWorker(crawlingResult.idOfWorker, crawlingResult.uris);
                    return;
                }
                if (!(deserialize instanceof AliveMessage)) {
                    LOGGER.warn("Received an unknown object {}. It will be ignored.", deserialize.toString());
                    return;
                }
                String workerId = ((AliveMessage) deserialize).getWorkerId();
                LOGGER.warn("Received alive message from worker with id " + workerId);
                this.workerGuard.putNewTimestamp(workerId);
            }
        } catch (IOException e) {
            LOGGER.error("Error while trying to deserialize incoming data. It will be ignored.", (Throwable) e);
        }
    }

    private void responseToUriSetRequest(ResponseHandler responseHandler, String str, String str2, UriSetRequest uriSetRequest) {
        if (responseHandler == null) {
            LOGGER.warn("Got a UriSetRequest object without a ResponseHandler. No response will be sent.");
            return;
        }
        try {
            List<CrawleableUri> nextUris = this.frontier.getNextUris();
            LOGGER.trace("Responding with a list of {} uris.", nextUris == null ? "null" : Integer.toString(nextUris.size()));
            responseHandler.sendResponse(this.serializer.serialize(new UriSet(nextUris)), str, str2);
            if (nextUris != null && nextUris.size() > 0) {
                this.workerGuard.putUrisForWorker(uriSetRequest.getWorkerId(), uriSetRequest.workerSendsAliveMessages(), nextUris);
            }
        } catch (IOException e) {
            LOGGER.error("Couldn't serialize new URI set.", (Throwable) e);
        }
    }

    private List<CrawleableUri> initializeDepth(List<CrawleableUri> list) {
        list.forEach(crawleableUri -> {
            crawleableUri.addData(Constants.URI_DEPTH, 1);
        });
        return list;
    }

    protected void processSeedFile(String str) {
        try {
            List<CrawleableUri> initializeDepth = initializeDepth(new UriSeedReader(str).getUris());
            if (!initializeDepth.isEmpty()) {
                this.frontier.addNewUris(initializeDepth);
            }
        } catch (Exception e) {
            LOGGER.error("Couldn't process seed file. It will be ignored.", (Throwable) e);
        }
    }

    public void informFrontierAboutDeadWorker(String str, List<CrawleableUri> list) {
        if (this.frontier instanceof ExtendedFrontier) {
            ((ExtendedFrontier) this.frontier).informAboutDeadWorker(str, list);
        }
    }

    @Autowired
    private void setGenerator(List<UriGenerator> list) {
        this.uriGenerator = list;
    }

    public void setFrontier(FrontierImpl frontierImpl) {
        this.frontier = frontierImpl;
    }

    public WorkerGuard getWorkerGuard() {
        return this.workerGuard;
    }
}
