/*
 * Decompiled with CFR 0.152.
 */
package jp.ndca.similarity.join;

import it.unimi.dsi.fastutil.ints.IntSet;
import java.util.Arrays;
import java.util.List;
import jp.ndca.similarity.distance.Jaccard;
import jp.ndca.similarity.join.IntItem;
import jp.ndca.similarity.join.NgramTokenizer;
import jp.ndca.similarity.join.SimilarityJoin;
import jp.ndca.similarity.join.StringItem;
import jp.ndca.similarity.join.Tokenizer;

public abstract class AbstractSimilarityJoin
implements SimilarityJoin {
    protected static final Jaccard jaccard = new Jaccard();
    protected Tokenizer tokenizer = new NgramTokenizer(2);
    protected boolean useConvertingTypeData = false;

    public Tokenizer getTokenizer() {
        return this.tokenizer;
    }

    public boolean isUseConvertingTypeData() {
        return this.useConvertingTypeData;
    }

    public void setTokenizer(Tokenizer tokenizer) {
        this.tokenizer = tokenizer;
    }

    public void setUseConvertingTypeData(boolean useConvertingTypeData) {
        this.useConvertingTypeData = useConvertingTypeData;
    }

    @Override
    public StringItem strConvert(String data, int id) {
        return new StringItem(this.tokenizer.tokenize(data, this.useConvertingTypeData), id);
    }

    @Override
    public StringItem[] strConvert(List<String> dataSet) {
        int len = dataSet.size();
        Object[] _dataSet = new StringItem[len];
        for (int i = 0; i < len; ++i) {
            _dataSet[i] = new StringItem(this.tokenizer.tokenize(dataSet.get(i), this.useConvertingTypeData), i);
        }
        Arrays.sort(_dataSet);
        return _dataSet;
    }

    @Override
    public IntItem intConvert(int[] data, int id) {
        Arrays.sort(data);
        return new IntItem(data, id);
    }

    @Override
    public IntItem[] intConvert(List<int[]> dataset) {
        IntItem[] items = new IntItem[dataset.size()];
        int i = 0;
        for (int[] data : dataset) {
            Arrays.sort(data);
            items[i++] = new IntItem(data, i++);
        }
        return items;
    }

    protected <K extends Comparable<K>> void validation(K[] dataSet, double threshold, boolean useSort) {
        if (threshold <= 0.0 || 1.0 <= threshold) {
            throw new IllegalArgumentException("argumenrt \"threshold\" is between 0 and 1.0");
        }
        if (useSort) {
            Arrays.sort(dataSet);
        }
    }

    protected boolean strUnion(List<StringItem> S, List<List<StringItem>> result, double threshold, IntSet buffer) {
        boolean isUnioned = false;
        String[] query = S.get(0).getTokens();
        int querySize = query.length;
        for (List<StringItem> set : result) {
            double score;
            String[] candidate = set.get(0).getTokens();
            int candidateSize = candidate.length;
            if ((double)querySize < threshold * (double)candidateSize || (double)candidateSize < threshold * (double)querySize || !(threshold <= (score = jaccard.calcByMerge((Comparable[])query, (Comparable[])candidate)))) continue;
            set.addAll(S);
            isUnioned = true;
            break;
        }
        return isUnioned;
    }

    protected boolean intUnion(List<IntItem> S, List<List<IntItem>> result, double threshold, IntSet buffer) {
        boolean isUnioned = false;
        int[] query = S.get(0).getTokens();
        int querySize = query.length;
        for (List<IntItem> set : result) {
            double score;
            int[] candidate = set.get(0).getTokens();
            int candidateSize = candidate.length;
            if ((double)querySize < threshold * (double)candidateSize || (double)candidateSize < threshold * (double)querySize || !(threshold <= (score = jaccard.calcByMerge(query, candidate)))) continue;
            set.addAll(S);
            isUnioned = true;
            break;
        }
        return isUnioned;
    }
}

