package de.uni_leipzig.simba.mapper.atomic.fastngram;

import java.util.HashSet;
import java.util.Set;

/* loaded from: input_file:de/uni_leipzig/simba/mapper/atomic/fastngram/NGramTokenizer.class */
public class NGramTokenizer implements Tokenizer {
    @Override // de.uni_leipzig.simba.mapper.atomic.fastngram.Tokenizer
    public Set<String> tokenize(String str, int i) {
        String str2;
        if (str == null) {
            str = "";
        }
        while (str.contains("  ")) {
            str = str.replaceAll("  ", " ");
        }
        String trim = str.trim();
        while (true) {
            str2 = trim;
            if (str2.length() >= i) {
                break;
            }
            trim = str2 + "_";
        }
        HashSet hashSet = new HashSet();
        for (int i2 = 0; i2 < (str2.length() - i) + 1; i2++) {
            hashSet.add(str2.substring(i2, i2 + i));
        }
        return hashSet;
    }
}
