/*
 * Decompiled with CFR 0.152.
 */
package it.tsoru.ppjoinhandler;

import it.tsoru.ppjoinhandler.Pair;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import jp.ndca.similarity.join.PPJoin;
import jp.ndca.similarity.join.StringItem;
import jp.ndca.similarity.join.Tokenizer;

public class PPJoinHandler {
    private HashMap<Integer, String> dataset = new HashMap();
    private PPJoin ppjoin = new PPJoin();
    private Tokenizer tok = this.ppjoin.getTokenizer();
    private List<StringItem> stringItems = new ArrayList<StringItem>();
    private double threshold;

    public PPJoinHandler(double threshold) {
        this.threshold = threshold;
    }

    public void addEntry(String entry) {
        if (!this.dataset.containsKey(entry)) {
            Integer id = this.dataset.size();
            this.dataset.put(id, entry);
            Object[] tokens = this.tok.tokenize(entry, false);
            Arrays.sort(tokens);
            this.stringItems.add(new StringItem((String[])tokens, id));
        }
    }

    public List<Pair<String, String>> run() {
        Object[] strDatum = this.stringItems.toArray(new StringItem[this.stringItems.size()]);
        Arrays.sort(strDatum);
        this.ppjoin.setUseSortAtExtractPairs(false);
        List<Map.Entry<StringItem, StringItem>> result = this.ppjoin.extractPairs((StringItem[])strDatum, this.threshold);
        ArrayList<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
        for (Map.Entry<StringItem, StringItem> entry : result) {
            Pair<String, String> pair = new Pair<String, String>(this.dataset.get(entry.getKey().getId()), this.dataset.get(entry.getValue().getId()));
            res.add(pair);
        }
        return res;
    }

    public double getThreshold() {
        return this.threshold;
    }

    public void setThreshold(double threshold) {
        this.threshold = threshold;
    }
}

