package org.dbpedia.extraction.live.util.iterators;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.log4j.Logger;
import org.dbpedia.extraction.live.util.DBPediaXPathUtil;
import org.dbpedia.extraction.live.util.XPathUtil;
import org.w3c.dom.Document;

/* loaded from: input_file:org/dbpedia/extraction/live/util/iterators/DuplicateOAIRecordRemoverIterator.class */
public class DuplicateOAIRecordRemoverIterator extends PrefetchIterator<Document> {
    private Iterator<Document> iterator;
    private Logger logger = Logger.getLogger(DuplicateOAIRecordRemoverIterator.class);
    private Date currentTimestamp = new Date(0);
    private Set<String> currentIdentifiers = new HashSet();
    private DateFormat dateFormat = new SimpleDateFormat("yyyy-mm-dd'T'HH:mm:ss'Z'");

    public DuplicateOAIRecordRemoverIterator(Iterator<Document> it) {
        this.iterator = it;
    }

    private String getIdentifier(Document document) {
        return XPathUtil.evalToString(document, DBPediaXPathUtil.getOAIIdentifierExpr());
    }

    private Date getTimestamp(Document document) throws Exception {
        return this.dateFormat.parse(XPathUtil.evalToString(document, DBPediaXPathUtil.getTimestampExpr()));
    }

    @Override // org.dbpedia.extraction.live.util.iterators.PrefetchIterator
    /* renamed from: prefetch */
    protected Iterator<Document> prefetch2() throws Exception {
        while (this.iterator.hasNext()) {
            Document next = this.iterator.next();
            String identifier = getIdentifier(next);
            Date timestamp = getTimestamp(next);
            if (timestamp.after(this.currentTimestamp)) {
                this.currentTimestamp = timestamp;
                this.currentIdentifiers.clear();
            }
            if (!this.currentIdentifiers.contains(identifier)) {
                this.currentIdentifiers.add(identifier);
                return Collections.singleton(next).iterator();
            }
            this.logger.debug("Skipping duplicate: " + timestamp + ": " + identifier);
        }
        return null;
    }
}
