package org.dice_research.glisten;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Scanner;

import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.vocabulary.RDFS;

import com.carrotsearch.hppc.ObjectLongOpenHashMap;

public class WikipediaCategoryReporter {

    private static final String INPUT_COUNT_CSV_FILE = "/home/micha/data/WikidataClassCounts.csv";
    private static final String INPUT_SUBCLASS_HIERARCHY = "/home/micha/data/WikidataSubclasses.nt";

    private static final String OUTPUT_CSV_FILE = "";

    private static final long MIN_COUNT = 100;

    public static void main(String[] args) throws FileNotFoundException, IOException {
        Model model = ModelFactory.createDefaultModel();
        try (InputStream is = new BufferedInputStream(new FileInputStream(INPUT_SUBCLASS_HIERARCHY))) {
            model.read(is, "", "TTL");
        }
        System.out.println("Read " + model.size() + " triples.");

        StmtIterator iterator = model.listStatements(null, model.getProperty(""), (RDFNode) null);
        Statement s;
        while (iterator.hasNext()) {
            s = iterator.next();
            if (s.getObject().isLiteral()) {
                long count = s.getObject().asLiteral().getLong();
                if (count >= MIN_COUNT) {
                    System.out.print(s.getSubject().getURI());
                    System.out.print("\t");
                    System.out.print(getLabel(model, s.getSubject()));
                    System.out.print("\t");
                    System.out.println(count);
                }
            }
        }
//        try (PrintStream out = new PrintStream(fileName))
    }

    /**
     * Returns the label of the given {@link Resource} if it is present in the given
     * {@link Model}.
     *
     * @param model    the model that should contain the label
     * @param resource the resource for which the label is requested
     * @return the label of the resource or <code>null</code> if such a label does
     *         not exist
     */
    public static String getLabel(Model model, Resource resource) {
        return getStringValue(model, resource, RDFS.label);
    }

    /**
     * Returns the object as String of the first triple that has the given subject
     * and predicate and that can be found in the given model.
     *
     * @param model     the model that should contain the triple
     * @param subject   the subject of the triple <code>null</code> works like a
     *                  wildcard.
     * @param predicate the predicate of the triple <code>null</code> works like a
     *                  wildcard.
     * @return object of the triple as String or <code>null</code> if such a triple
     *         couldn't be found
     */
    public static String getStringValue(Model model, Resource subject, Property predicate) {
        if (model == null) {
            return null;
        }
        NodeIterator nodeIterator = model.listObjectsOfProperty(subject, predicate);
        if (nodeIterator.hasNext()) {
            RDFNode node = nodeIterator.next();
            if (node.isLiteral()) {
                return node.asLiteral().getString();
            } else {
                return node.toString();
            }
        } else {
            return null;
        }
    }
}
