package edu.uci.ics.crawler4j.crawler;

import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.frontier.DocIDServer;
import edu.uci.ics.crawler4j.frontier.Frontier;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
import edu.uci.ics.crawler4j.url.WebURL;
import edu.uci.ics.crawler4j.util.IO;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/uci/ics/crawler4j/crawler/CrawlController.class */
public class CrawlController extends Configurable {
    static final Logger logger = Logger.getLogger(CrawlController.class.getName());
    protected Object customData;
    protected List<Object> crawlersLocalData;
    protected boolean finished;
    protected boolean shuttingDown;
    protected PageFetcher pageFetcher;
    protected RobotstxtServer robotstxtServer;
    protected Frontier frontier;
    protected DocIDServer docIdServer;
    protected final Object waitingLock;

    public CrawlController(CrawlConfig crawlConfig, PageFetcher pageFetcher, RobotstxtServer robotstxtServer) throws Exception {
        super(crawlConfig);
        this.crawlersLocalData = new ArrayList();
        this.waitingLock = new Object();
        crawlConfig.validate();
        File file = new File(crawlConfig.getCrawlStorageFolder());
        if (!file.exists() && !file.mkdirs()) {
            throw new Exception("Couldn't create this folder: " + file.getAbsolutePath());
        }
        boolean isResumableCrawling = crawlConfig.isResumableCrawling();
        EnvironmentConfig environmentConfig = new EnvironmentConfig();
        environmentConfig.setAllowCreate(true);
        environmentConfig.setTransactional(isResumableCrawling);
        environmentConfig.setLocking(isResumableCrawling);
        File file2 = new File(crawlConfig.getCrawlStorageFolder() + "/frontier");
        if (!file2.exists() && !file2.mkdir()) {
            throw new Exception("Couldn't create this folder: " + file2.getAbsolutePath());
        }
        if (!isResumableCrawling) {
            IO.deleteFolderContents(file2);
        }
        Environment environment = new Environment(file2, environmentConfig);
        this.docIdServer = new DocIDServer(environment, crawlConfig);
        this.frontier = new Frontier(environment, crawlConfig, this.docIdServer);
        this.pageFetcher = pageFetcher;
        this.robotstxtServer = robotstxtServer;
        this.finished = false;
        this.shuttingDown = false;
    }

    public <T extends WebCrawler> void start(Class<T> cls, int i) {
        start(cls, i, true);
    }

    public <T extends WebCrawler> void startNonBlocking(Class<T> cls, int i) {
        start(cls, i, false);
    }

    protected <T extends WebCrawler> void start(final Class<T> cls, int i, boolean z) {
        try {
            this.finished = false;
            this.crawlersLocalData.clear();
            final ArrayList arrayList = new ArrayList();
            final ArrayList arrayList2 = new ArrayList();
            for (int i2 = 1; i2 <= i; i2++) {
                T newInstance = cls.newInstance();
                Thread thread = new Thread(newInstance, "Crawler " + i2);
                newInstance.setThread(thread);
                newInstance.init(i2, this);
                thread.start();
                arrayList2.add(newInstance);
                arrayList.add(thread);
                logger.info("Crawler " + i2 + " started.");
            }
            new Thread(new Runnable() { // from class: edu.uci.ics.crawler4j.crawler.CrawlController.1
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        synchronized (CrawlController.this.waitingLock) {
                            while (true) {
                                CrawlController.sleep(10);
                                boolean z2 = false;
                                for (int i3 = 0; i3 < arrayList.size(); i3++) {
                                    if (((Thread) arrayList.get(i3)).isAlive()) {
                                        if (((WebCrawler) arrayList2.get(i3)).isNotWaitingForNewURLs()) {
                                            z2 = true;
                                        }
                                    } else if (!CrawlController.this.shuttingDown) {
                                        CrawlController.logger.info("Thread " + i3 + " was dead, I'll recreate it.");
                                        WebCrawler webCrawler = (WebCrawler) cls.newInstance();
                                        Thread thread2 = new Thread(webCrawler, "Crawler " + (i3 + 1));
                                        arrayList.remove(i3);
                                        arrayList.add(i3, thread2);
                                        webCrawler.setThread(thread2);
                                        webCrawler.init(i3 + 1, this);
                                        thread2.start();
                                        arrayList2.remove(i3);
                                        arrayList2.add(i3, webCrawler);
                                    }
                                }
                                if (!z2) {
                                    CrawlController.logger.info("It looks like no thread is working, waiting for 10 seconds to make sure...");
                                    CrawlController.sleep(10);
                                    boolean z3 = false;
                                    for (int i4 = 0; i4 < arrayList.size(); i4++) {
                                        if (((Thread) arrayList.get(i4)).isAlive() && ((WebCrawler) arrayList2.get(i4)).isNotWaitingForNewURLs()) {
                                            z3 = true;
                                        }
                                    }
                                    if (!z3) {
                                        if (CrawlController.this.shuttingDown) {
                                            break;
                                        }
                                        if (CrawlController.this.frontier.getQueueLength() <= 0) {
                                            CrawlController.logger.info("No thread is working and no more URLs are in queue waiting for another 10 seconds to make sure...");
                                            CrawlController.sleep(10);
                                            if (CrawlController.this.frontier.getQueueLength() <= 0) {
                                                break;
                                            }
                                        }
                                    }
                                }
                            }
                            CrawlController.logger.info("All of the crawlers are stopped. Finishing the process...");
                            CrawlController.this.frontier.finish();
                            for (WebCrawler webCrawler2 : arrayList2) {
                                webCrawler2.onBeforeExit();
                                CrawlController.this.crawlersLocalData.add(webCrawler2.getMyLocalData());
                            }
                            CrawlController.logger.info("Waiting for 10 seconds before final clean up...");
                            CrawlController.sleep(10);
                            CrawlController.this.frontier.close();
                            CrawlController.this.docIdServer.close();
                            CrawlController.this.pageFetcher.shutDown();
                            CrawlController.this.finished = true;
                            CrawlController.this.waitingLock.notifyAll();
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }).start();
            if (z) {
                waitUntilFinish();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void waitUntilFinish() {
        while (!this.finished) {
            synchronized (this.waitingLock) {
                if (this.finished) {
                    return;
                }
                try {
                    this.waitingLock.wait();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    public List<Object> getCrawlersLocalData() {
        return this.crawlersLocalData;
    }

    protected static void sleep(int i) {
        try {
            Thread.sleep(i * 1000);
        } catch (Exception e) {
        }
    }

    public void addSeed(String str) {
        addSeed(str, -1);
    }

    public void addSeed(String str, int i) {
        String canonicalURL = URLCanonicalizer.getCanonicalURL(str);
        if (canonicalURL == null) {
            logger.error("Invalid seed URL: " + str);
            return;
        }
        if (i >= 0) {
            try {
                this.docIdServer.addUrlAndDocId(canonicalURL, i);
            } catch (Exception e) {
                logger.error("Could not add seed: " + e.getMessage());
            }
        } else if (this.docIdServer.getDocId(canonicalURL) > 0) {
            return;
        } else {
            i = this.docIdServer.getNewDocID(canonicalURL);
        }
        WebURL webURL = new WebURL();
        webURL.setURL(canonicalURL);
        webURL.setDocid(i);
        webURL.setDepth((short) 0);
        if (this.robotstxtServer.allows(webURL)) {
            this.frontier.schedule(webURL);
        } else {
            logger.info("Robots.txt does not allow this seed: " + str);
        }
    }

    public void addSeenUrl(String str, int i) {
        String canonicalURL = URLCanonicalizer.getCanonicalURL(str);
        if (canonicalURL == null) {
            logger.error("Invalid Url: " + str);
            return;
        }
        try {
            this.docIdServer.addUrlAndDocId(canonicalURL, i);
        } catch (Exception e) {
            logger.error("Could not add seen url: " + e.getMessage());
        }
    }

    public PageFetcher getPageFetcher() {
        return this.pageFetcher;
    }

    public void setPageFetcher(PageFetcher pageFetcher) {
        this.pageFetcher = pageFetcher;
    }

    public RobotstxtServer getRobotstxtServer() {
        return this.robotstxtServer;
    }

    public void setRobotstxtServer(RobotstxtServer robotstxtServer) {
        this.robotstxtServer = robotstxtServer;
    }

    public Frontier getFrontier() {
        return this.frontier;
    }

    public void setFrontier(Frontier frontier) {
        this.frontier = frontier;
    }

    public DocIDServer getDocIdServer() {
        return this.docIdServer;
    }

    public void setDocIdServer(DocIDServer docIDServer) {
        this.docIdServer = docIDServer;
    }

    public Object getCustomData() {
        return this.customData;
    }

    public void setCustomData(Object obj) {
        this.customData = obj;
    }

    public boolean isFinished() {
        return this.finished;
    }

    public boolean isShuttingDown() {
        return this.shuttingDown;
    }

    public void shutdown() {
        logger.info("Shutting down...");
        this.shuttingDown = true;
        this.frontier.finish();
    }
}
