/*
 * Decompiled with CFR 0.152.
 */
package org.aksw.limes.core.ml.algorithm.eagle.coala;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import org.aksw.limes.core.datastrutures.PairSimilar;
import org.aksw.limes.core.io.cache.ACache;
import org.aksw.limes.core.io.mapping.AMapping;
import org.aksw.limes.core.measures.measure.AMeasure;
import org.aksw.limes.core.measures.measure.phoneticmeasure.SoundexMeasure;
import org.aksw.limes.core.measures.measure.string.StringMeasure;
import org.aksw.limes.core.ml.algorithm.eagle.coala.ClusterInstance;
import org.aksw.limes.core.ml.algorithm.eagle.core.ALDecider;
import org.aksw.limes.core.ml.algorithm.eagle.util.PropertyMapping;
import org.apache.log4j.Logger;
import weka.clusterers.ClusterEvaluation;
import weka.clusterers.Clusterer;
import weka.clusterers.EM;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

public class Clustering {
    static Logger logger = Logger.getLogger(Clustering.class);
    ACache sourceCache;
    ACache targetCache;
    PropertyMapping propMap;
    AMapping reference;
    StringMeasure measure = new SoundexMeasure();
    double threshold = 0.6;

    public Clustering(ACache sourceCache, ACache targetCache, PropertyMapping propMap) {
        this.sourceCache = sourceCache;
        this.targetCache = targetCache;
        this.propMap = propMap;
    }

    public Instances computeSimilaritySpace(List<ALDecider.Triple> candidates) {
        ArrayList<Attribute> attributes = new ArrayList<Attribute>(this.propMap.stringPropPairs.size() + 1);
        attributes.add(new Attribute("match"));
        for (PairSimilar<String> pair : this.propMap.stringPropPairs) {
            attributes.add(new Attribute("" + (String)pair.a + "-" + (String)pair.b));
        }
        Instances data = new Instances("dataset", attributes, candidates.size() * this.propMap.stringPropPairs.size());
        for (ALDecider.Triple t : candidates) {
            double[] values = new double[attributes.size()];
            String debug = "Triple (" + t.toString() + ")";
            values[0] = t.getSimilarity();
            debug = debug + "\t" + values[0];
            int i = 1;
            for (PairSimilar<String> pair : this.propMap.stringPropPairs) {
                values[i] = this.getSimilarity(this.measure, pair, t);
                debug = debug + "\t" + values[i];
                ++i;
            }
            DenseInstance inst = new DenseInstance(1.0, values);
            boolean added = data.add((Instance)inst);
            logger.debug((Object)("new Inst:" + debug + " added?" + added));
        }
        return data;
    }

    public static void wekaTest() throws Exception {
        int i;
        Attribute uri1 = new Attribute("uri1", (List)null);
        Attribute uri2 = new Attribute("uri2", (List)null);
        Attribute dist1 = new Attribute("dist1");
        Attribute dist2 = new Attribute("dist2");
        ArrayList<String> labels = new ArrayList<String>();
        labels.add("yes");
        labels.add("no");
        Attribute classAttr = new Attribute("class", labels);
        ArrayList<Attribute> attributes = new ArrayList<Attribute>();
        attributes.add(uri1);
        attributes.add(uri2);
        attributes.add(dist1);
        attributes.add(dist2);
        Instances dataset = new Instances("test-dataset", attributes, 0);
        int j = 200;
        for (int i2 = 0; i2 < 100; ++i2) {
            double[] values = new double[4];
            values[0] = dataset.attribute(0).addStringValue("uri1_" + i2);
            values[1] = dataset.attribute(1).addStringValue("uri2_" + j);
            values[2] = (double)i2 / (double)j;
            Random rand = new Random();
            values[3] = values[2] * rand.nextDouble();
            DenseInstance inst = new DenseInstance(1.0, values);
            dataset.add((Instance)inst);
            --j;
        }
        Remove filter = new Remove();
        filter.setAttributeIndices("1,2");
        filter.setInputFormat(dataset);
        Instances dataClusterer = Filter.useFilter((Instances)dataset, (Filter)filter);
        EM clusterer = new EM();
        clusterer.buildClusterer(dataClusterer);
        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer((Clusterer)clusterer);
        eval.evaluateClusterer(dataClusterer);
        System.out.println(eval.clusterResultsToString());
        System.out.println("# - cluster - distribution");
        ArrayList group = new ArrayList(eval.getNumClusters());
        for (i = 0; i < eval.getNumClusters(); ++i) {
            group.add(new ArrayList());
        }
        for (i = 0; i < dataClusterer.numInstances(); ++i) {
            int cluster = clusterer.clusterInstance(dataClusterer.instance(i));
            double[] dArray = clusterer.distributionForInstance(dataClusterer.instance(i));
            ClusterInstance ci = new ClusterInstance(i, cluster, dArray[cluster]);
            ((ArrayList)group.get(cluster)).add(ci);
            System.out.print(i + 1);
            System.out.print(" - ");
            System.out.print(cluster);
            System.out.print(" - ");
            System.out.print(Utils.arrayToString((Object)dArray));
            System.out.println();
        }
        int cl = 0;
        for (ArrayList arrayList : group) {
            Collections.sort(arrayList);
            System.out.println("Cluster " + cl);
            for (ClusterInstance ci : arrayList) {
                System.out.println("\t" + ci);
            }
            ++cl;
        }
        int k = 10;
        ArrayList<Integer> arrayList = new ArrayList<Integer>();
        int clusterIndex = 0;
        for (int i3 = 0; i3 < Math.min(k, dataset.size()); ++i3) {
            ArrayList cluster = (ArrayList)group.get(clusterIndex % group.size());
            for (int r = cluster.size() - 1; r >= 0; --r) {
                ClusterInstance inst = (ClusterInstance)cluster.get(r);
                if (arrayList.contains(inst.instanceNumber)) continue;
                arrayList.add(inst.instanceNumber);
                break;
            }
            ++clusterIndex;
        }
        System.out.println("k most representatives..." + arrayList);
    }

    private double getSimilarity(AMeasure measure, PairSimilar<String> pair, ALDecider.Triple t) {
        double sim = 0.0;
        sim = measure.getSimilarity(this.sourceCache.getInstance(t.getSourceUri()).getProperty((String)pair.a), this.targetCache.getInstance(t.getTargetUri()).getProperty((String)pair.b));
        return sim;
    }

    public static void main(String[] args) throws Exception {
        Clustering.wekaTest();
    }
}

