/*
 * Decompiled with CFR 0.152.
 */
package de.tudarmstadt.ukp.wikipedia.parser.statistics;

import de.tudarmstadt.ukp.wikipedia.api.DatabaseConfiguration;
import de.tudarmstadt.ukp.wikipedia.api.Page;
import de.tudarmstadt.ukp.wikipedia.api.WikiConstants;
import de.tudarmstadt.ukp.wikipedia.api.Wikipedia;
import de.tudarmstadt.ukp.wikipedia.parser.ParsedPage;
import de.tudarmstadt.ukp.wikipedia.parser.SectionContainer;
import de.tudarmstadt.ukp.wikipedia.parser.Table;
import de.tudarmstadt.ukp.wikipedia.parser.TableElement;
import de.tudarmstadt.ukp.wikipedia.parser.Template;
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser;
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory;
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.ShowTemplateNamesAndParameters;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

public class Statistics {
    public static final String path = "./data/parsedpage/statistics/";
    static long nrOfPages;
    static int nrOfTables;
    static int nrOfTemplates;
    static int nrOfAnalyzedPages;
    static List<Integer> templateNrOfOccurence;
    static List<String> templateNameOfFirstOccurence;
    static List<String> templateNames;
    static List<String> pagesWithTableSections;
    static final int skipPages = 0;
    static final long offsetTime = 0L;
    static final boolean debug = false;

    public static void main(String[] argv) throws Exception {
        DatabaseConfiguration dbConfig = new DatabaseConfiguration();
        dbConfig.setDatabase("wikiapi_en");
        dbConfig.setHost("bender.ukp.informatik.tu-darmstadt.de");
        dbConfig.setUser("student");
        dbConfig.setPassword("student");
        dbConfig.setLanguage(WikiConstants.Language.english);
        Wikipedia wiki = new Wikipedia(dbConfig);
        MediaWikiParserFactory pf = new MediaWikiParserFactory();
        pf.setTemplateParserClass(ShowTemplateNamesAndParameters.class);
        pf.setShowImageText(true);
        pf.setShowMathTagContent(true);
        pf.setDeleteTags(false);
        pf.getImageIdentifers().add("IMAGE");
        pf.setCalculateSrcSpans(false);
        MediaWikiParser parser = pf.createParser();
        Iterator<Page> pageIt = wiki.getArticles().iterator();
        nrOfPages = wiki.getMetaData().getNumberOfPages();
        nrOfTables = 0;
        nrOfTemplates = 0;
        templateNames = new ArrayList<String>();
        templateNameOfFirstOccurence = new ArrayList<String>();
        templateNrOfOccurence = new ArrayList<Integer>();
        pagesWithTableSections = new ArrayList<String>();
        long startTime = new Date().getTime();
        nrOfAnalyzedPages = 0;
        System.out.println("ANALYSING ...");
        while (pageIt.hasNext()) {
            Page currentPage = pageIt.next();
            if (++nrOfAnalyzedPages < 1) {
                System.out.println("Skipped: " + currentPage.getPageId());
                continue;
            }
            if (nrOfAnalyzedPages % 1024 == 0) {
                long aktualTime = new Date().getTime();
                long runnedTime = aktualTime - startTime + 0L;
                long totalTime = runnedTime * nrOfPages / (long)nrOfAnalyzedPages;
                System.out.println(Statistics.percentString(nrOfAnalyzedPages, nrOfPages) + " -> " + nrOfAnalyzedPages + " of " + nrOfPages + " pages in " + runnedTime / 1000L + "sec" + " -> " + (totalTime - runnedTime) / 60000L + "min left");
            }
            String name = currentPage.getTitle().getPlainTitle();
            String src = currentPage.getText();
            ParsedPage pp = parser.parse(src);
            if (pp == null) continue;
            pp.setName(name);
            for (Template t : pp.getTemplates()) {
                ++nrOfTemplates;
                String templateName = t.getName().toLowerCase();
                if (templateName.startsWith("vorlage:")) {
                    templateName = templateName.substring(8);
                } else if (templateName.startsWith("template:")) {
                    templateName = templateName.substring(9);
                }
                int pos = templateNames.indexOf(templateName);
                if (pos != -1) {
                    templateNrOfOccurence.set(pos, templateNrOfOccurence.get(pos) + 1);
                    continue;
                }
                templateNrOfOccurence.add(1);
                templateNames.add(templateName);
                templateNameOfFirstOccurence.add(pp.getName());
                ArrayList<String> temp = new ArrayList<String>();
                temp.add(pp.getName());
            }
            if (pp.nrOfTables() != 0) {
                ++nrOfTables;
            }
            boolean b = true;
            block2: for (Table t : pp.getTables()) {
                if (!b) continue;
                for (int i = 0; i < t.nrOfTableElements(); ++i) {
                    TableElement te = t.getTableElement(i);
                    if (te.nrOfSections() <= 1 && te.getSection(0).getClass() != SectionContainer.class) continue;
                    pagesWithTableSections.add(pp.getName());
                    b = false;
                    continue block2;
                }
            }
        }
        System.out.println("Finished.");
        Statistics.sortTemplates();
        Statistics.writeFiles("statistics");
        Statistics.restructureTemplateNames();
        Statistics.sortTemplates();
        Statistics.writeTemplates("statistics.restructured");
        System.out.println("check the Results ;-)\nnow...");
    }

    private static void sortTemplates() {
        System.out.println("Sort Template List");
        ArrayList<String> sTemplateNames = new ArrayList<String>();
        ArrayList<Integer> sOcc = new ArrayList<Integer>();
        ArrayList<String> sTemplateNameFirstOcc = new ArrayList<String>();
        for (int i = 0; i < templateNrOfOccurence.size(); ++i) {
            int pos;
            int nr = templateNrOfOccurence.get(i);
            for (pos = 0; pos < sOcc.size() && nr < (Integer)sOcc.get(pos); ++pos) {
            }
            sOcc.add(pos, nr);
            sTemplateNames.add(pos, templateNames.get(i));
            sTemplateNameFirstOcc.add(pos, templateNameOfFirstOccurence.get(i));
        }
        templateNames = sTemplateNames;
        templateNrOfOccurence = sOcc;
        templateNameOfFirstOccurence = sTemplateNameFirstOcc;
    }

    private static void writeFiles(String fileName) throws IOException {
        System.out.print("writeFiles() " + fileName);
        Statistics.writeTemplates(fileName);
        Statistics.wirteTables(fileName);
    }

    private static void writeTemplates(String fileName) throws IOException {
        BufferedWriter bw = new BufferedWriter(new FileWriter(path + fileName + ".template"));
        bw.write("Analyzed Pages: " + nrOfAnalyzedPages + "\n\n");
        bw.write("Found " + nrOfTemplates + " Templates\n");
        bw.write("Found " + templateNames.size() + " different Templates\n\n");
        int sum = 0;
        for (int i = 0; i < templateNames.size(); ++i) {
            int temp = templateNrOfOccurence.get(i);
            bw.write(temp + " x {{" + templateNames.get(i) + "}}");
            bw.write(" @" + templateNameOfFirstOccurence.get(i));
            bw.write(" sum=" + (sum += temp));
            bw.write("\n");
        }
        bw.close();
    }

    private static void wirteTables(String fileName) throws IOException {
        BufferedWriter bw = new BufferedWriter(new FileWriter(path + fileName + ".table"));
        int sections = pagesWithTableSections.size();
        bw.write("Analyzed Pages: " + nrOfAnalyzedPages + "\n" + "\n" + "Found " + nrOfTables + " Tables\n" + "-> " + Statistics.percentString(nrOfTables, nrOfAnalyzedPages) + " @Pages\n" + "\n" + "Found " + sections + " Sections in Tables\n" + "-> " + Statistics.percentString(sections, nrOfTables) + " @Tables\n" + "-> " + Statistics.percentString(sections, nrOfAnalyzedPages) + " @Pages\n" + "\n");
        bw.write("-=Pages with Tables and Sections---------------------------------------------------\n");
        for (String s : pagesWithTableSections) {
            bw.write(s + "\n");
        }
        bw.close();
        System.out.println(" --> OK");
    }

    private static void restructureTemplateNames() {
        System.out.println("restructure Template Names");
        ArrayList<String> newTemplateNames = new ArrayList<String>();
        ArrayList<Integer> newTemplateNrOfOccurence = new ArrayList<Integer>();
        ArrayList<String> newTemplateNameOfFirstOccurence = new ArrayList<String>();
        for (int i = 0; i < templateNames.size(); ++i) {
            int index;
            String tn = templateNames.get(i);
            int pos = tn.indexOf(32);
            int pos2 = tn.indexOf(95);
            if (pos == -1 || pos2 != -1 && pos2 < pos) {
                pos = pos2;
            }
            if (pos != -1) {
                tn = tn.substring(0, pos);
            }
            if ((index = newTemplateNames.indexOf(tn)) != -1) {
                newTemplateNrOfOccurence.set(index, (Integer)newTemplateNrOfOccurence.get(index) + templateNrOfOccurence.get(i));
                continue;
            }
            newTemplateNames.add(tn);
            newTemplateNrOfOccurence.add(templateNrOfOccurence.get(i));
            newTemplateNameOfFirstOccurence.add(templateNameOfFirstOccurence.get(i));
        }
        templateNames = newTemplateNames;
        templateNrOfOccurence = newTemplateNrOfOccurence;
        templateNameOfFirstOccurence = newTemplateNameOfFirstOccurence;
    }

    private static String percentString(long a, long nr) {
        long temp = nr > 0L ? a * 10000L / nr : 0L;
        return temp / 100L + "." + temp / 10L % 10L + "" + temp % 10L + "%";
    }
}

