/*
 * Decompiled with CFR 0.152.
 */
package jp.ndca.similarity.join;

import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import jp.ndca.similarity.join.AbstractSimilarityJoin;
import jp.ndca.similarity.join.IntItem;
import jp.ndca.similarity.join.IntLinkedInvertedIndex;
import jp.ndca.similarity.join.LinkedPositions;
import jp.ndca.similarity.join.StringItem;
import jp.ndca.similarity.join.StringLinkedInvertedIndex;

public class MPJoin
extends AbstractSimilarityJoin {
    private boolean useSortAtSearch = true;
    private boolean useSortAtExtractPairs = true;
    private boolean useSortAtExtractBulks = true;

    public boolean isUseSortAtSearch() {
        return this.useSortAtSearch;
    }

    public boolean isUseSortAtExtractPairs() {
        return this.useSortAtExtractPairs;
    }

    public boolean isUseSortAtExtractBulks() {
        return this.useSortAtExtractBulks;
    }

    public void setUseSortAtSearch(boolean useSortAtSearch) {
        this.useSortAtSearch = useSortAtSearch;
    }

    public void setUseSortAtExtractPairs(boolean useSortAtExtractPairs) {
        this.useSortAtExtractPairs = useSortAtExtractPairs;
    }

    public void setUseSortAtExtractBulks(boolean useSortAtExtractBulks) {
        this.useSortAtExtractBulks = useSortAtExtractBulks;
    }

    @Override
    public List<Map.Entry<StringItem, StringItem>> extractPairs(StringItem[] dataSet, double threshold) {
        this.validation(dataSet, threshold, this.useSortAtExtractPairs);
        double coff = threshold / (1.0 + threshold);
        ArrayList<Map.Entry<StringItem, StringItem>> S = new ArrayList<Map.Entry<StringItem, StringItem>>();
        int dataSetSize = dataSet.length;
        StringLinkedInvertedIndex index = new StringLinkedInvertedIndex();
        int[] prefixLengths = new int[dataSetSize];
        int[] minOrverlap = new int[dataSetSize];
        PrefixOverlap[] M = new PrefixOverlap[dataSetSize];
        for (int i = 0; i < dataSetSize; ++i) {
            M[i] = new PrefixOverlap();
        }
        for (int xDataSetID = 0; xDataSetID < dataSetSize; ++xDataSetID) {
            int midPrefixLength;
            StringItem x = dataSet[xDataSetID];
            for (int i = 0; i < xDataSetID; ++i) {
                M[i].i = 0;
                M[i].j = 0;
                M[i].overlap = 0;
            }
            int xSize = x.size();
            if (xSize == 0) continue;
            int maxPrefixLength = xSize - (int)Math.ceil((double)xSize * threshold) + 1;
            for (int xPos = 0; xPos < maxPrefixLength; ++xPos) {
                LinkedPositions.Node next;
                String w = x.get(xPos);
                LinkedPositions positions = index.get(w);
                if (positions == null) continue;
                LinkedPositions.Node node = positions.getRootNode();
                while ((next = node.getNext()) != null) {
                    int minPrefixLength;
                    int yID = next.getId();
                    int yPos = next.getPosition();
                    int ySize = dataSet[yID].size();
                    if ((double)ySize < (double)xSize * threshold) {
                        next.remove();
                        continue;
                    }
                    minOrverlap[yID] = (int)Math.ceil(coff * (double)(ySize + xSize));
                    prefixLengths[yID] = minPrefixLength = ySize - minOrverlap[yID] + 1;
                    if (prefixLengths[yID] < yPos + 1) {
                        next.remove();
                        continue;
                    }
                    ++M[yID].overlap;
                    M[yID].i = xPos;
                    M[yID].j = yPos;
                    int remx = xSize - xPos - 1;
                    int remy = ySize - yPos - 1;
                    int ubound = Math.min(remx, remy);
                    if (M[yID].overlap + ubound < minOrverlap[yID]) {
                        M[yID].overlap = Integer.MIN_VALUE;
                    }
                    node = next;
                }
            }
            this.veryfy(xDataSetID, dataSet, maxPrefixLength, M, prefixLengths, minOrverlap, S);
            prefixLengths[xDataSetID] = midPrefixLength = xSize - (int)Math.ceil(2.0 / (1.0 + threshold) * threshold * (double)xSize) + 1;
            for (int xPos = 0; xPos < midPrefixLength; ++xPos) {
                String w = x.get(xPos);
                index.put(w, xDataSetID, xPos);
            }
        }
        return S;
    }

    private void veryfy(int xDataSetID, StringItem[] dataSet, int xMaxPrefixLength, PrefixOverlap[] M, int[] prefixLengths, int[] minOverlap, List<Map.Entry<StringItem, StringItem>> S) {
        StringItem x = dataSet[xDataSetID];
        String wx_lastPrefix = x.get(xMaxPrefixLength - 1);
        for (int yDataSetID = 0; yDataSetID < xDataSetID; ++yDataSetID) {
            if (M[yDataSetID].overlap <= 0) continue;
            StringItem y = dataSet[yDataSetID];
            String wy_lastPrefix = y.get(prefixLengths[yDataSetID] - 1);
            int xOffset = 0;
            int yOffset = 0;
            if (wx_lastPrefix.compareTo(wy_lastPrefix) < 0) {
                xOffset = xMaxPrefixLength;
                yOffset = M[yDataSetID].j + 1;
            } else {
                xOffset = M[yDataSetID].i + 1;
                yOffset = prefixLengths[yDataSetID];
            }
            int overlapValue = this.overLap(x.getTokens(), xOffset, y.getTokens(), yOffset, M[yDataSetID].overlap, (double)minOverlap[yDataSetID]);
            if (minOverlap[yDataSetID] > overlapValue) continue;
            S.add(new AbstractMap.SimpleEntry<StringItem, StringItem>(x, y));
        }
    }

    private int overLap(String[] x, int xOffSet, String[] y, int yOffSet, int defaultOverlap, double minOverlap) {
        int overlap = defaultOverlap;
        int i = xOffSet;
        int j = yOffSet;
        while (i < x.length && j < y.length) {
            if (x[i].equals(y[j])) {
                ++overlap;
                ++i;
                ++j;
                continue;
            }
            if (x[i].compareTo(y[j]) < 0) {
                if ((double)(overlap + (x.length - i - 1)) < minOverlap) break;
                ++i;
                continue;
            }
            if ((double)(overlap + (y.length - j - 1)) < minOverlap) break;
            ++j;
        }
        return overlap;
    }

    @Override
    public List<StringItem> search(StringItem query, StringItem[] dataSet, double threshold) {
        this.validation(dataSet, threshold, this.useSortAtSearch);
        StringItem x = query;
        ArrayList<StringItem> S = new ArrayList<StringItem>();
        int dataSetSize = dataSet.length;
        int[] prefixLengths = new int[dataSetSize];
        int[] minOverlap = new int[dataSetSize];
        int xSize = x.size();
        if (xSize == 0) {
            return S;
        }
        int xPrefixLength = xSize - (int)Math.ceil((double)xSize * threshold) + 1;
        HashSet<String> xPrefixSet = new HashSet<String>();
        for (int xPos = 0; xPos < xPrefixLength; ++xPos) {
            String w = x.get(xPos);
            xPrefixSet.add(w);
        }
        double coeff = threshold / (1.0 + threshold);
        StringLinkedInvertedIndex index = new StringLinkedInvertedIndex();
        for (int yID = 0; yID < dataSetSize; ++yID) {
            int yMinPrefixLength;
            StringItem y = dataSet[yID];
            int ySize = y.size();
            if (ySize == 0 || (double)ySize < (double)xSize * threshold || (double)xSize < (double)ySize * threshold) continue;
            minOverlap[yID] = (int)Math.ceil(coeff * (double)(ySize + xSize));
            prefixLengths[yID] = yMinPrefixLength = ySize - minOverlap[yID] + 1;
            for (int yPos = 0; yPos < yMinPrefixLength; ++yPos) {
                String w = y.get(yPos);
                if (!xPrefixSet.contains(w)) continue;
                index.put(w, yID, yPos);
            }
        }
        PrefixOverlap[] M = new PrefixOverlap[dataSetSize];
        for (int i = 0; i < dataSetSize; ++i) {
            M[i] = new PrefixOverlap();
        }
        for (int xPos = 0; xPos < xPrefixLength; ++xPos) {
            LinkedPositions.Node next;
            String w = x.get(xPos);
            LinkedPositions positions = index.get(w);
            if (positions == null) continue;
            LinkedPositions.Node node = positions.getRootNode();
            while ((next = node.getNext()) != null) {
                int yID = next.getId();
                if (M[yID].overlap == Integer.MIN_VALUE) {
                    next.remove();
                    continue;
                }
                StringItem y = dataSet[yID];
                int ySize = y.size();
                int yPos = node.getPosition();
                int unbound = Math.min(xSize - xPos, ySize - yPos) - 1;
                ++M[yID].overlap;
                M[yID].i = xPos;
                M[yID].j = yPos;
                if (M[yID].overlap + unbound < minOverlap[yID]) {
                    M[yID].overlap = Integer.MIN_VALUE;
                }
                node = next;
            }
        }
        this.veryfy(x, xPrefixLength, dataSet, M, prefixLengths, minOverlap, S);
        return S;
    }

    private void veryfy(StringItem x, int xMaxPrefixLength, StringItem[] dataSet, PrefixOverlap[] M, int[] prefixLengths, int[] minOverlap, Collection<StringItem> S) {
        String wx_lastPrefix = x.get(xMaxPrefixLength - 1);
        for (int yID = 0; yID < dataSet.length; ++yID) {
            if (M[yID].overlap <= 0) continue;
            StringItem y = dataSet[yID];
            String wy_lastPrefix = y.get(prefixLengths[yID] - 1);
            int xOffset = 0;
            int yOffset = 0;
            if (wx_lastPrefix.compareTo(wy_lastPrefix) < 0) {
                xOffset = xMaxPrefixLength;
                yOffset = M[yID].j + 1;
            } else {
                xOffset = M[yID].i + 1;
                yOffset = prefixLengths[yID];
            }
            int overlapValue = this.overLap(x.getTokens(), xOffset, y.getTokens(), yOffset, M[yID].overlap, (double)minOverlap[yID]);
            if (minOverlap[yID] > overlapValue) continue;
            S.add(y);
        }
    }

    @Override
    public List<List<StringItem>> extractBulks(StringItem[] dataSet, double threshold) {
        this.validation(dataSet, threshold, this.useSortAtExtractBulks);
        double coeff = threshold / (1.0 + threshold);
        IntOpenHashSet buffer = new IntOpenHashSet();
        ArrayList<List<StringItem>> result = new ArrayList<List<StringItem>>();
        int dataSetSize = dataSet.length;
        StringLinkedInvertedIndex index = new StringLinkedInvertedIndex();
        int[] prefixLengths = new int[dataSetSize];
        int[] minOrverlap = new int[dataSetSize];
        PrefixOverlap[] M = new PrefixOverlap[dataSetSize];
        for (int i = 0; i < dataSetSize; ++i) {
            M[i] = new PrefixOverlap();
        }
        for (int xDataSetID = 0; xDataSetID < dataSetSize; ++xDataSetID) {
            int midPrefixLength;
            boolean isUnioned;
            StringItem x = dataSet[xDataSetID];
            int xSize = x.size();
            if (xSize == 0) {
                buffer.add(xDataSetID);
                continue;
            }
            for (int i = 0; i < xDataSetID; ++i) {
                M[i].i = 0;
                M[i].j = 0;
                M[i].overlap = 0;
            }
            int maxPrefixLength = xSize - (int)Math.ceil((double)xSize * threshold) + 1;
            for (int xPos = 0; xPos < maxPrefixLength; ++xPos) {
                LinkedPositions.Node next;
                String w = x.get(xPos);
                LinkedPositions positions = index.get(w);
                if (positions == null) continue;
                LinkedPositions.Node node = positions.getRootNode();
                while ((next = node.getNext()) != null) {
                    int minPrefixLength;
                    int yID = next.getId();
                    if (buffer.contains(yID)) {
                        next.remove();
                        continue;
                    }
                    int yPos = next.getPosition();
                    int ySize = dataSet[yID].size();
                    if ((double)ySize < (double)xSize * threshold) {
                        next.remove();
                        continue;
                    }
                    minOrverlap[yID] = (int)Math.ceil(coeff * (double)(ySize + xSize));
                    prefixLengths[yID] = minPrefixLength = ySize - minOrverlap[yID] + 1;
                    if (prefixLengths[yID] < yPos + 1) {
                        next.remove();
                        continue;
                    }
                    ++M[yID].overlap;
                    M[yID].i = xPos;
                    M[yID].j = yPos;
                    int remx = xSize - xPos - 1;
                    int remy = ySize - yPos - 1;
                    int ubound = Math.min(remx, remy);
                    if (M[yID].overlap + ubound < minOrverlap[yID]) {
                        M[yID].overlap = Integer.MIN_VALUE;
                    }
                    node = next;
                }
            }
            ArrayList<StringItem> S = new ArrayList<StringItem>();
            this.veryfy(xDataSetID, dataSet, maxPrefixLength, M, prefixLengths, minOrverlap, S, (Set<Integer>)buffer);
            if (0 < S.size()) {
                buffer.add(x.getId());
                S.add(x);
                isUnioned = this.strUnion(S, result, threshold, buffer);
                if (isUnioned) continue;
                result.add(S);
                continue;
            }
            S.add(x);
            isUnioned = this.strUnion(S, result, threshold, buffer);
            if (isUnioned) continue;
            prefixLengths[xDataSetID] = midPrefixLength = xSize - (int)Math.ceil(2.0 * coeff * (double)xSize) + 1;
            for (int xPos = 0; xPos < midPrefixLength; ++xPos) {
                String w = x.get(xPos);
                index.put(w, xDataSetID, xPos);
            }
        }
        Collections.sort(result, new Comparator<List<StringItem>>(){

            @Override
            public int compare(List<StringItem> o1, List<StringItem> o2) {
                int size2;
                int size1 = o1.size();
                if (size1 < (size2 = o2.size())) {
                    return 1;
                }
                if (size2 < size1) {
                    return -1;
                }
                return 0;
            }
        });
        return result;
    }

    private void veryfy(int xDataSetID, StringItem[] dataSet, int xMaxPrefixLength, PrefixOverlap[] M, int[] prefixLengths, int[] minOverlap, List<StringItem> S, Set<Integer> buffer) {
        StringItem x = dataSet[xDataSetID];
        String wx_lastPrefix = x.get(xMaxPrefixLength - 1);
        for (int yDataSetID = 0; yDataSetID < xDataSetID; ++yDataSetID) {
            if (M[yDataSetID].overlap <= 0) continue;
            StringItem y = dataSet[yDataSetID];
            String wy_lastPrefix = y.get(prefixLengths[yDataSetID] - 1);
            int xOffset = 0;
            int yOffset = 0;
            if (wx_lastPrefix.compareTo(wy_lastPrefix) < 0) {
                xOffset = xMaxPrefixLength;
                yOffset = M[yDataSetID].j + 1;
            } else {
                xOffset = M[yDataSetID].i + 1;
                yOffset = prefixLengths[yDataSetID];
            }
            int overlapValue = this.overLap(x.getTokens(), xOffset, y.getTokens(), yOffset, M[yDataSetID].overlap, (double)minOverlap[yDataSetID]);
            if (minOverlap[yDataSetID] > overlapValue) continue;
            S.add(y);
            buffer.add(yDataSetID);
        }
    }

    @Override
    public List<Map.Entry<IntItem, IntItem>> extractPairs(IntItem[] dataSet, double threshold) {
        this.validation(dataSet, threshold, this.useSortAtExtractPairs);
        double coff = threshold / (1.0 + threshold);
        ArrayList<Map.Entry<IntItem, IntItem>> S = new ArrayList<Map.Entry<IntItem, IntItem>>();
        int dataSetSize = dataSet.length;
        IntLinkedInvertedIndex index = new IntLinkedInvertedIndex();
        int[] prefixLengths = new int[dataSetSize];
        int[] minOrverlap = new int[dataSetSize];
        PrefixOverlap[] M = new PrefixOverlap[dataSetSize];
        for (int i = 0; i < dataSetSize; ++i) {
            M[i] = new PrefixOverlap();
        }
        for (int xDataSetID = 0; xDataSetID < dataSetSize; ++xDataSetID) {
            int midPrefixLength;
            IntItem x = dataSet[xDataSetID];
            for (int i = 0; i < xDataSetID; ++i) {
                M[i].i = 0;
                M[i].j = 0;
                M[i].overlap = 0;
            }
            int xSize = x.size();
            if (xSize == 0) continue;
            int maxPrefixLength = xSize - (int)Math.ceil((double)xSize * threshold) + 1;
            for (int xPos = 0; xPos < maxPrefixLength; ++xPos) {
                LinkedPositions.Node next;
                int w = x.get(xPos);
                LinkedPositions positions = index.get(w);
                if (positions == null) continue;
                LinkedPositions.Node node = positions.getRootNode();
                while ((next = node.getNext()) != null) {
                    int minPrefixLength;
                    int yID = next.getId();
                    int yPos = next.getPosition();
                    int ySize = dataSet[yID].size();
                    if ((double)ySize < (double)xSize * threshold) {
                        next.remove();
                        continue;
                    }
                    minOrverlap[yID] = (int)Math.ceil(coff * (double)(ySize + xSize));
                    prefixLengths[yID] = minPrefixLength = ySize - minOrverlap[yID] + 1;
                    if (prefixLengths[yID] < yPos + 1) {
                        next.remove();
                        continue;
                    }
                    ++M[yID].overlap;
                    M[yID].i = xPos;
                    M[yID].j = yPos;
                    int remx = xSize - xPos - 1;
                    int remy = ySize - yPos - 1;
                    int ubound = Math.min(remx, remy);
                    if (M[yID].overlap + ubound < minOrverlap[yID]) {
                        M[yID].overlap = Integer.MIN_VALUE;
                    }
                    node = next;
                }
            }
            this.veryfy(xDataSetID, dataSet, maxPrefixLength, M, prefixLengths, minOrverlap, S);
            prefixLengths[xDataSetID] = midPrefixLength = xSize - (int)Math.ceil(2.0 / (1.0 + threshold) * threshold * (double)xSize) + 1;
            for (int xPos = 0; xPos < midPrefixLength; ++xPos) {
                int w = x.get(xPos);
                index.put(w, xDataSetID, xPos);
            }
        }
        return S;
    }

    private void veryfy(int xDataSetID, IntItem[] dataSet, int xMaxPrefixLength, PrefixOverlap[] M, int[] prefixLengths, int[] minOverlap, List<Map.Entry<IntItem, IntItem>> S) {
        IntItem x = dataSet[xDataSetID];
        int wx_lastPrefix = x.get(xMaxPrefixLength - 1);
        for (int yDataSetID = 0; yDataSetID < xDataSetID; ++yDataSetID) {
            if (M[yDataSetID].overlap <= 0) continue;
            IntItem y = dataSet[yDataSetID];
            int wy_lastPrefix = y.get(prefixLengths[yDataSetID] - 1);
            int xOffset = 0;
            int yOffset = 0;
            if (wx_lastPrefix < wy_lastPrefix) {
                xOffset = xMaxPrefixLength;
                yOffset = M[yDataSetID].j + 1;
            } else {
                xOffset = M[yDataSetID].i + 1;
                yOffset = prefixLengths[yDataSetID];
            }
            int overlapValue = this.overLap(x.getTokens(), xOffset, y.getTokens(), yOffset, M[yDataSetID].overlap, (double)minOverlap[yDataSetID]);
            if (minOverlap[yDataSetID] > overlapValue) continue;
            S.add(new AbstractMap.SimpleEntry<IntItem, IntItem>(x, y));
        }
    }

    private int overLap(int[] x, int xOffSet, int[] y, int yOffSet, int defaultOverlap, double minOverlap) {
        int overlap = defaultOverlap;
        int i = xOffSet;
        int j = yOffSet;
        while (i < x.length && j < y.length) {
            if (x[i] == y[j]) {
                ++overlap;
                ++i;
                ++j;
                continue;
            }
            if (x[i] < y[j]) {
                if ((double)(overlap + (x.length - i - 1)) < minOverlap) break;
                ++i;
                continue;
            }
            if ((double)(overlap + (y.length - j - 1)) < minOverlap) break;
            ++j;
        }
        return overlap;
    }

    @Override
    public List<IntItem> search(IntItem query, IntItem[] dataSet, double threshold) {
        this.validation(dataSet, threshold, this.useSortAtSearch);
        IntItem x = query;
        ArrayList<IntItem> S = new ArrayList<IntItem>();
        int dataSetSize = dataSet.length;
        int[] prefixLengths = new int[dataSetSize];
        int[] minOverlap = new int[dataSetSize];
        int xSize = x.size();
        if (xSize == 0) {
            return S;
        }
        int xPrefixLength = xSize - (int)Math.ceil((double)xSize * threshold) + 1;
        IntOpenHashSet xPrefixSet = new IntOpenHashSet();
        for (int xPos = 0; xPos < xPrefixLength; ++xPos) {
            int w = x.get(xPos);
            xPrefixSet.add(w);
        }
        double coeff = threshold / (1.0 + threshold);
        IntLinkedInvertedIndex index = new IntLinkedInvertedIndex();
        for (int yID = 0; yID < dataSetSize; ++yID) {
            int yMinPrefixLength;
            IntItem y = dataSet[yID];
            int ySize = y.size();
            if (ySize == 0 || (double)ySize < (double)xSize * threshold || (double)xSize < (double)ySize * threshold) continue;
            minOverlap[yID] = (int)Math.ceil(coeff * (double)(ySize + xSize));
            prefixLengths[yID] = yMinPrefixLength = ySize - minOverlap[yID] + 1;
            for (int yPos = 0; yPos < yMinPrefixLength; ++yPos) {
                int w = y.get(yPos);
                if (!xPrefixSet.contains(w)) continue;
                index.put(w, yID, yPos);
            }
        }
        PrefixOverlap[] M = new PrefixOverlap[dataSetSize];
        for (int i = 0; i < dataSetSize; ++i) {
            M[i] = new PrefixOverlap();
        }
        for (int xPos = 0; xPos < xPrefixLength; ++xPos) {
            LinkedPositions.Node next;
            int w = x.get(xPos);
            LinkedPositions positions = index.get(w);
            if (positions == null) continue;
            LinkedPositions.Node node = positions.getRootNode();
            while ((next = node.getNext()) != null) {
                int yID = next.getId();
                if (M[yID].overlap == Integer.MIN_VALUE) {
                    next.remove();
                    continue;
                }
                IntItem y = dataSet[yID];
                int ySize = y.size();
                int yPos = node.getPosition();
                int unbound = Math.min(xSize - xPos, ySize - yPos) - 1;
                ++M[yID].overlap;
                M[yID].i = xPos;
                M[yID].j = yPos;
                if (M[yID].overlap + unbound < minOverlap[yID]) {
                    M[yID].overlap = Integer.MIN_VALUE;
                }
                node = next;
            }
        }
        this.veryfy(x, xPrefixLength, dataSet, M, prefixLengths, minOverlap, S);
        return S;
    }

    private void veryfy(IntItem x, int xMaxPrefixLength, IntItem[] dataSet, PrefixOverlap[] M, int[] prefixLengths, int[] minOverlap, Collection<IntItem> S) {
        int wx_lastPrefix = x.get(xMaxPrefixLength - 1);
        for (int yID = 0; yID < dataSet.length; ++yID) {
            if (M[yID].overlap <= 0) continue;
            IntItem y = dataSet[yID];
            int wy_lastPrefix = y.get(prefixLengths[yID] - 1);
            int xOffset = 0;
            int yOffset = 0;
            if (wx_lastPrefix < wy_lastPrefix) {
                xOffset = xMaxPrefixLength;
                yOffset = M[yID].j + 1;
            } else {
                xOffset = M[yID].i + 1;
                yOffset = prefixLengths[yID];
            }
            int overlapValue = this.overLap(x.getTokens(), xOffset, y.getTokens(), yOffset, M[yID].overlap, (double)minOverlap[yID]);
            if (minOverlap[yID] > overlapValue) continue;
            S.add(y);
        }
    }

    @Override
    public List<List<IntItem>> extractBulks(IntItem[] dataSet, double threshold) {
        this.validation(dataSet, threshold, this.useSortAtExtractBulks);
        double coeff = threshold / (1.0 + threshold);
        IntOpenHashSet buffer = new IntOpenHashSet();
        ArrayList<List<IntItem>> result = new ArrayList<List<IntItem>>();
        int dataSetSize = dataSet.length;
        IntLinkedInvertedIndex index = new IntLinkedInvertedIndex();
        int[] prefixLengths = new int[dataSetSize];
        int[] minOrverlap = new int[dataSetSize];
        PrefixOverlap[] M = new PrefixOverlap[dataSetSize];
        for (int i = 0; i < dataSetSize; ++i) {
            M[i] = new PrefixOverlap();
        }
        for (int xDataSetID = 0; xDataSetID < dataSetSize; ++xDataSetID) {
            int midPrefixLength;
            boolean isUnioned;
            IntItem x = dataSet[xDataSetID];
            int xSize = x.size();
            if (xSize == 0) {
                buffer.add(xDataSetID);
                continue;
            }
            for (int i = 0; i < xDataSetID; ++i) {
                M[i].i = 0;
                M[i].j = 0;
                M[i].overlap = 0;
            }
            int maxPrefixLength = xSize - (int)Math.ceil((double)xSize * threshold) + 1;
            for (int xPos = 0; xPos < maxPrefixLength; ++xPos) {
                LinkedPositions.Node next;
                int w = x.get(xPos);
                LinkedPositions positions = index.get(w);
                if (positions == null) continue;
                LinkedPositions.Node node = positions.getRootNode();
                while ((next = node.getNext()) != null) {
                    int minPrefixLength;
                    int yID = next.getId();
                    if (buffer.contains(yID)) {
                        next.remove();
                        continue;
                    }
                    int yPos = next.getPosition();
                    int ySize = dataSet[yID].size();
                    if ((double)ySize < (double)xSize * threshold) {
                        next.remove();
                        continue;
                    }
                    minOrverlap[yID] = (int)Math.ceil(coeff * (double)(ySize + xSize));
                    prefixLengths[yID] = minPrefixLength = ySize - minOrverlap[yID] + 1;
                    if (prefixLengths[yID] < yPos + 1) {
                        next.remove();
                        continue;
                    }
                    ++M[yID].overlap;
                    M[yID].i = xPos;
                    M[yID].j = yPos;
                    int remx = xSize - xPos - 1;
                    int remy = ySize - yPos - 1;
                    int ubound = Math.min(remx, remy);
                    if (M[yID].overlap + ubound < minOrverlap[yID]) {
                        M[yID].overlap = Integer.MIN_VALUE;
                    }
                    node = next;
                }
            }
            ArrayList<IntItem> S = new ArrayList<IntItem>();
            this.veryfy(xDataSetID, dataSet, maxPrefixLength, M, prefixLengths, minOrverlap, S, buffer);
            if (0 < S.size()) {
                buffer.add(x.getId());
                S.add(x);
                isUnioned = this.intUnion(S, result, threshold, buffer);
                if (isUnioned) continue;
                result.add(S);
                continue;
            }
            S.add(x);
            isUnioned = this.intUnion(S, result, threshold, buffer);
            if (isUnioned) continue;
            prefixLengths[xDataSetID] = midPrefixLength = xSize - (int)Math.ceil(2.0 * coeff * (double)xSize) + 1;
            for (int xPos = 0; xPos < midPrefixLength; ++xPos) {
                int w = x.get(xPos);
                index.put(w, xDataSetID, xPos);
            }
        }
        Collections.sort(result, new Comparator<List<IntItem>>(){

            @Override
            public int compare(List<IntItem> o1, List<IntItem> o2) {
                int size2;
                int size1 = o1.size();
                if (size1 < (size2 = o2.size())) {
                    return 1;
                }
                if (size2 < size1) {
                    return -1;
                }
                return 0;
            }
        });
        return result;
    }

    private void veryfy(int xDataSetID, IntItem[] dataSet, int xMaxPrefixLength, PrefixOverlap[] M, int[] prefixLengths, int[] minOverlap, List<IntItem> S, IntSet buffer) {
        IntItem x = dataSet[xDataSetID];
        int wx_lastPrefix = x.get(xMaxPrefixLength - 1);
        for (int yDataSetID = 0; yDataSetID < xDataSetID; ++yDataSetID) {
            if (M[yDataSetID].overlap <= 0) continue;
            IntItem y = dataSet[yDataSetID];
            int wy_lastPrefix = y.get(prefixLengths[yDataSetID] - 1);
            int xOffset = 0;
            int yOffset = 0;
            if (wx_lastPrefix < wy_lastPrefix) {
                xOffset = xMaxPrefixLength;
                yOffset = M[yDataSetID].j + 1;
            } else {
                xOffset = M[yDataSetID].i + 1;
                yOffset = prefixLengths[yDataSetID];
            }
            int overlapValue = this.overLap(x.getTokens(), xOffset, y.getTokens(), yOffset, M[yDataSetID].overlap, (double)minOverlap[yDataSetID]);
            if (minOverlap[yDataSetID] > overlapValue) continue;
            S.add(y);
            buffer.add(yDataSetID);
        }
    }

    class PrefixOverlap {
        int overlap;
        int i;
        int j;

        PrefixOverlap() {
        }
    }
}

