/*
 * Decompiled with CFR 0.152.
 */
package org.dbpedia.extraction.live.util.iterators;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.log4j.Logger;
import org.dbpedia.extraction.live.util.DBPediaXPathUtil;
import org.dbpedia.extraction.live.util.XPathUtil;
import org.dbpedia.extraction.live.util.iterators.PrefetchIterator;
import org.w3c.dom.Document;
import org.w3c.dom.Node;

public class DuplicateOAIRecordRemoverIterator
extends PrefetchIterator<Document> {
    private Logger logger = Logger.getLogger(DuplicateOAIRecordRemoverIterator.class);
    private Iterator<Document> iterator;
    private Date currentTimestamp = new Date(0L);
    private Set<String> currentIdentifiers = new HashSet<String>();
    private DateFormat dateFormat = new SimpleDateFormat("yyyy-mm-dd'T'HH:mm:ss'Z'");

    public DuplicateOAIRecordRemoverIterator(Iterator<Document> iterator) {
        this.iterator = iterator;
    }

    private String getIdentifier(Document document) {
        return XPathUtil.evalToString((Node)document, DBPediaXPathUtil.getOAIIdentifierExpr());
    }

    private Date getTimestamp(Document document) throws Exception {
        String str = XPathUtil.evalToString((Node)document, DBPediaXPathUtil.getTimestampExpr());
        return this.dateFormat.parse(str);
    }

    @Override
    protected Iterator<Document> prefetch() throws Exception {
        while (this.iterator.hasNext()) {
            Document document = this.iterator.next();
            String identifier = this.getIdentifier(document);
            Date timestamp = this.getTimestamp(document);
            if (timestamp.after(this.currentTimestamp)) {
                this.currentTimestamp = timestamp;
                this.currentIdentifiers.clear();
            }
            if (this.currentIdentifiers.contains(identifier)) {
                this.logger.debug((Object)("Skipping duplicate: " + timestamp + ": " + identifier));
                continue;
            }
            this.currentIdentifiers.add(identifier);
            return Collections.singleton(document).iterator();
        }
        return null;
    }
}

