package de.tudarmstadt.ukp.wikipedia.wikimachine.dump.xml;

import de.tudarmstadt.ukp.wikipedia.mwdumper.importer.Contributor;
import de.tudarmstadt.ukp.wikipedia.mwdumper.importer.DumpWriter;
import de.tudarmstadt.ukp.wikipedia.mwdumper.importer.NamespaceSet;
import de.tudarmstadt.ukp.wikipedia.mwdumper.importer.Page;
import de.tudarmstadt.ukp.wikipedia.mwdumper.importer.Revision;
import de.tudarmstadt.ukp.wikipedia.mwdumper.importer.Siteinfo;
import de.tudarmstadt.ukp.wikipedia.mwdumper.importer.Title;
import java.io.IOException;
import java.io.InputStream;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Map;
import java.util.TimeZone;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.apache.log4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:de/tudarmstadt/ukp/wikipedia/wikimachine/dump/xml/AbstractXmlDumpReader.class */
public abstract class AbstractXmlDumpReader extends DefaultHandler {
    protected static final String SITENAME = "sitename";
    protected static final String GENERATOR = "generator";
    protected static final String CASE = "case";
    protected static final String BASE = "base";
    protected static final String NAMESPACE = "namespace";
    protected static final String NAMESPACES = "namespaces";
    protected static final String SITEINFO = "siteinfo";
    protected static final String MEDIAWIKI = "mediawiki";
    protected static final String USERNAME = "username";
    protected static final String TITLE = "title";
    protected static final String TIMESTAMP = "timestamp";
    protected static final String TEXT = "text";
    protected static final String RESTRICTIONS = "restrictions";
    protected static final String PAGE = "page";
    protected static final String MINOR = "minor";
    protected static final String IP = "ip";
    protected static final String ID = "id";
    protected static final String COMMENT = "comment";
    protected static final String THREAD_TYPE = "ThreadType";
    protected static final String THREAD_EDIT_STATUS = "ThreadEditStatus";
    protected static final String THREAD_AUTHOR = "ThreadAuthor";
    protected static final String THREAD_SUMMARY_PAGE = "ThreadSummaryPage";
    protected static final String THREAD_ID = "ThreadID";
    protected static final String THREAD_PAGE = "ThreadPage";
    protected static final String THREAD_ANCESTOR = "ThreadAncestor";
    protected static final String THREAD_PARENT = "ThreadParent";
    protected static final String THREAD_SUBJECT = "ThreadSubject";
    protected static final String CONTRIBUTOR = "contributor";
    protected static final String REVISION = "revision";
    InputStream input;
    DumpWriter writer;
    private boolean hasContent;
    Siteinfo siteinfo;
    Page page;
    boolean pageSent;
    Contributor contrib;
    Revision rev;
    int nskey;
    boolean abortFlag;
    private static final TimeZone utc = TimeZone.getTimeZone("UTC");
    private boolean deleted = false;
    boolean errorState = false;
    protected Map<String, String> startElements = new HashMap(64);
    protected Map<String, String> endElements = new HashMap(64);
    protected Map<String, String> forbiddenIdStartElements = new HashMap(64);
    protected Map<String, String> forbiddenIdEndElements = new HashMap(64);
    private char[] buffer = new char[4096];
    private int len = 0;

    protected void setupForbiddenStartElements() {
        this.forbiddenIdStartElements.put(REVISION, REVISION);
        this.forbiddenIdStartElements.put(CONTRIBUTOR, CONTRIBUTOR);
    }

    protected void setupForbiddenEndElements() {
        this.forbiddenIdEndElements.put(THREAD_SUBJECT, THREAD_SUBJECT);
        this.forbiddenIdEndElements.put(THREAD_PARENT, THREAD_PARENT);
        this.forbiddenIdEndElements.put(THREAD_ANCESTOR, THREAD_ANCESTOR);
        this.forbiddenIdEndElements.put(THREAD_PAGE, THREAD_PAGE);
        this.forbiddenIdEndElements.put(THREAD_ID, THREAD_ID);
        this.forbiddenIdEndElements.put(THREAD_SUMMARY_PAGE, THREAD_SUMMARY_PAGE);
        this.forbiddenIdEndElements.put(THREAD_AUTHOR, THREAD_AUTHOR);
        this.forbiddenIdEndElements.put(THREAD_EDIT_STATUS, THREAD_EDIT_STATUS);
        this.forbiddenIdEndElements.put(THREAD_TYPE, THREAD_TYPE);
        this.forbiddenIdEndElements.put(COMMENT, COMMENT);
        this.forbiddenIdEndElements.put(CONTRIBUTOR, CONTRIBUTOR);
        this.forbiddenIdEndElements.put(ID, ID);
        this.forbiddenIdEndElements.put(IP, IP);
        this.forbiddenIdEndElements.put(MINOR, MINOR);
        this.forbiddenIdEndElements.put(PAGE, PAGE);
        this.forbiddenIdEndElements.put(RESTRICTIONS, RESTRICTIONS);
        this.forbiddenIdEndElements.put(REVISION, REVISION);
        this.forbiddenIdEndElements.put(TEXT, TEXT);
        this.forbiddenIdEndElements.put(TIMESTAMP, TIMESTAMP);
        this.forbiddenIdEndElements.put(TITLE, TITLE);
        this.forbiddenIdEndElements.put(USERNAME, USERNAME);
    }

    protected abstract void setupStartElements();

    protected abstract void setupEndElements();

    public AbstractXmlDumpReader(InputStream inputStream, DumpWriter dumpWriter) {
        this.hasContent = false;
        this.input = inputStream;
        this.writer = dumpWriter;
        this.hasContent = false;
        setupStartElements();
        setupEndElements();
        setupForbiddenStartElements();
        setupForbiddenEndElements();
    }

    public void readDump() throws IOException {
        try {
            SAXParserFactory.newInstance().newSAXParser().parse(this.input, this);
            this.writer.close();
        } catch (ParserConfigurationException e) {
            throw new IOException(e);
        } catch (SAXException e2) {
            throw new IOException(e2);
        }
    }

    public void abort() {
        this.abortFlag = true;
    }

    private boolean notAllowedStart(String str) {
        this.errorState = this.errorState && this.startElements.containsKey(str) && this.forbiddenIdStartElements.containsKey(str);
        return this.errorState;
    }

    private boolean notAllowedEnd(String str) {
        this.errorState = this.errorState && this.endElements.containsKey(str) && this.forbiddenIdEndElements.containsKey(str);
        return this.errorState;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        this.len = 0;
        this.hasContent = false;
        if (this.abortFlag) {
            throw new SAXException("XmlDumpReader set abort flag.");
        }
        String value = attributes.getValue("deleted");
        this.deleted = value != null && value.equals("deleted");
        try {
            String str4 = this.startElements.get(str3);
            if (str4 == null || notAllowedStart(str4)) {
                return;
            }
            if (str4 == REVISION) {
                openRevision();
            } else if (str4 == CONTRIBUTOR) {
                openContributor();
            } else if (str4 == PAGE) {
                openPage();
            } else if (str4 == MEDIAWIKI) {
                openMediaWiki();
            } else if (str4 == SITEINFO) {
                openSiteinfo();
            } else if (str4 == NAMESPACES) {
                openNamespaces();
            } else if (str4 == NAMESPACE) {
                openNamespace(attributes);
            }
        } catch (IOException e) {
            throw new SAXException(e);
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) {
        if (this.buffer.length < this.len + i2) {
            int length = this.buffer.length * 2;
            if (length < this.len + i2) {
                length = this.len + i2;
            }
            char[] cArr2 = new char[length];
            System.arraycopy(this.buffer, 0, cArr2, 0, this.len);
            this.buffer = cArr2;
        }
        System.arraycopy(cArr, i, this.buffer, this.len, i2);
        this.len += i2;
        this.hasContent = true;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        try {
            String str4 = this.endElements.get(str3);
            if (str4 == null || notAllowedEnd(str4)) {
                return;
            }
            if (str4 == ID) {
                readId();
            } else if (str4 == REVISION) {
                closeRevision();
            } else if (str4 == TIMESTAMP) {
                readTimestamp();
            } else if (str4 == TEXT) {
                readText();
            } else if (str4 == CONTRIBUTOR) {
                closeContributor();
            } else if (str4 == USERNAME) {
                readUsername();
            } else if (str4 == IP) {
                readIp();
            } else if (str4 == COMMENT) {
                readComment();
            } else if (str4 == MINOR) {
                readMinor();
            } else if (str4 == PAGE) {
                closePage();
            } else if (str4 == TITLE) {
                readTitle();
            } else if (str4 == RESTRICTIONS) {
                readRestrictions();
            } else if (str4.startsWith("Thread")) {
                threadAttribute(str4);
            } else if (str4 == MEDIAWIKI) {
                closeMediaWiki();
            } else if (str4 == SITEINFO) {
                closeSiteinfo();
            } else if (str4 == SITENAME) {
                readSitename();
            } else if (str4 == BASE) {
                readBase();
            } else if (str4 == GENERATOR) {
                readGenerator();
            } else if (str4 == CASE) {
                readCase();
            } else if (str4 == NAMESPACES) {
                closeNamespaces();
            } else if (str4 == NAMESPACE) {
                closeNamespace();
            }
        } catch (IOException e) {
            throw ((SAXException) new SAXException(e.getMessage()).initCause(e));
        }
    }

    void threadAttribute(String str) throws IOException {
        if (str.equals(THREAD_PAGE)) {
            this.page.DiscussionThreadingInfo.put(str, new Title(bufferContents(), this.siteinfo.Namespaces));
        } else {
            this.page.DiscussionThreadingInfo.put(str, bufferContents());
        }
    }

    void openMediaWiki() throws IOException {
        this.siteinfo = null;
        this.writer.writeStartWiki();
    }

    void closeMediaWiki() throws IOException {
        this.writer.writeEndWiki();
        this.siteinfo = null;
    }

    void openSiteinfo() {
        this.siteinfo = new Siteinfo();
    }

    void closeSiteinfo() throws IOException {
        this.writer.writeSiteinfo(this.siteinfo);
    }

    private String bufferContentsOrNull() {
        if (this.hasContent) {
            return bufferContents();
        }
        return null;
    }

    private String bufferContents() {
        return this.len == 0 ? "" : new String(this.buffer, 0, this.len).replace("\\", "\\\\");
    }

    void readSitename() {
        this.siteinfo.Sitename = bufferContents();
    }

    void readBase() {
        this.siteinfo.Base = bufferContents();
    }

    void readGenerator() {
        this.siteinfo.Generator = bufferContents();
    }

    void readCase() {
        this.siteinfo.Case = bufferContents();
    }

    void openNamespaces() {
        this.siteinfo.Namespaces = new NamespaceSet();
    }

    void openNamespace(Attributes attributes) {
        this.nskey = Integer.parseInt(attributes.getValue("key"));
    }

    void closeNamespace() {
        this.siteinfo.Namespaces.add(this.nskey, bufferContents());
    }

    void closeNamespaces() {
    }

    void openPage() {
        this.page = new Page();
        this.pageSent = false;
    }

    void closePage() throws IOException {
        if (this.pageSent) {
            this.writer.writeEndPage();
        }
        this.page = null;
    }

    void readTitle() {
        this.page.Title = new Title(bufferContents(), this.siteinfo.Namespaces);
    }

    void readId() {
        int parseInt = Integer.parseInt(bufferContents());
        if (this.contrib != null) {
            this.contrib.Id = parseInt;
            return;
        }
        if (this.rev != null) {
            this.rev.Id = parseInt;
            return;
        }
        if (this.page != null) {
            this.page.Id = parseInt;
            return;
        }
        Logger.getLogger(AbstractXmlDumpReader.class.getName()).debug("Unexpected <id> outside a <page>, <revision>, or <contributor>");
        this.errorState = true;
        this.contrib = null;
        this.rev = null;
        this.page = null;
    }

    void readRestrictions() {
        this.page.Restrictions = bufferContents();
    }

    void openRevision() throws IOException {
        if (!this.pageSent) {
            this.writer.writeStartPage(this.page);
            this.pageSent = true;
        }
        this.rev = new Revision();
    }

    void closeRevision() throws IOException {
        this.writer.writeRevision(this.rev);
        this.rev = null;
    }

    void readTimestamp() {
        this.rev.Timestamp = parseUTCTimestamp(bufferContents());
    }

    void readComment() {
        this.rev.Comment = bufferContentsOrNull();
        if (this.rev.Comment != null || this.deleted) {
            return;
        }
        this.rev.Comment = "";
    }

    void readMinor() {
        this.rev.Minor = true;
    }

    void readText() {
        this.rev.Text = bufferContentsOrNull();
        if (this.rev.Text != null || this.deleted) {
            return;
        }
        this.rev.Text = "";
    }

    void openContributor() {
        this.contrib = new Contributor();
    }

    void closeContributor() {
        this.rev.Contributor = this.contrib;
        this.contrib = null;
    }

    void readUsername() {
        this.contrib.Username = bufferContentsOrNull();
    }

    void readIp() {
        this.contrib.Username = bufferContents();
        this.contrib.isIP = true;
    }

    private static Calendar parseUTCTimestamp(String str) {
        String trim = str.trim();
        GregorianCalendar gregorianCalendar = new GregorianCalendar(utc);
        gregorianCalendar.set(Integer.parseInt(trim.substring(0, 4)), Integer.parseInt(trim.substring(5, 7)) - 1, Integer.parseInt(trim.substring(8, 10)), Integer.parseInt(trim.substring(11, 13)), Integer.parseInt(trim.substring(14, 16)), Integer.parseInt(trim.substring(17, 19)));
        return gregorianCalendar;
    }
}
