/*
 * Decompiled with CFR 0.152.
 */
package jp.ndca.similarity.join;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import jp.ndca.similarity.join.Tokenizer;

public class NgramTokenizer
implements Tokenizer {
    private static final int DEFAULT_N = 2;
    private int n = 2;

    public int getN() {
        return this.n;
    }

    public void setN(int n) {
        this.n = n;
    }

    public NgramTokenizer() {
    }

    public NgramTokenizer(int n) {
        this.n = n;
    }

    @Override
    public String[] tokenize(String str, boolean allowDuplicate) {
        if (allowDuplicate) {
            return this.tokenizeWithDuplication(str);
        }
        return this.tokenizeWithoutDuplication(str);
    }

    private String[] tokenizeWithoutDuplication(String str) {
        HashSet<String> ngramSet = new HashSet<String>();
        int i = 0;
        while (i + (this.n - 1) < str.length()) {
            String ngram = str.substring(i, i + this.n);
            ngramSet.add(ngram);
            ++i;
        }
        Object[] sets = new String[ngramSet.size()];
        int i2 = 0;
        for (String ngram : ngramSet) {
            sets[i2++] = ngram;
        }
        Arrays.sort(sets);
        return sets;
    }

    private String[] tokenizeWithDuplication(String str) {
        ArrayList<String> result = new ArrayList<String>();
        int i = 0;
        while (i + (this.n - 1) < str.length()) {
            String ngram = str.substring(i, i + this.n);
            result.add(ngram);
            ++i;
        }
        Object[] strArray = result.toArray(new String[result.size()]);
        Arrays.sort(strArray);
        return strArray;
    }
}

