package org.aksw.simba.topicmodeling.io.xml;

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.aksw.simba.topicmodeling.utils.doc.Document;
import org.aksw.simba.topicmodeling.utils.doc.DocumentMultipleCategories;
import org.aksw.simba.topicmodeling.utils.doc.DocumentProperty;
import org.aksw.simba.topicmodeling.utils.doc.DocumentText;
import org.aksw.simba.topicmodeling.utils.doc.ParseableDocumentProperty;
import org.aksw.simba.topicmodeling.utils.doc.StringContainingDocumentProperty;
import org.aksw.simba.topicmodeling.utils.doc.ner.NamedEntitiesInText;
import org.aksw.simba.topicmodeling.utils.doc.ner.SignedNamedEntityInText;
import org.apache.commons.lang3.StringEscapeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/simba/topicmodeling/io/xml/AbstractDocumentXmlWriter.class */
abstract class AbstractDocumentXmlWriter {
    private static final Logger LOGGER = LoggerFactory.getLogger(AbstractDocumentXmlWriter.class);

    /* JADX INFO: Access modifiers changed from: protected */
    public void writeDocument(OutputStreamWriter outputStreamWriter, Document document) throws IOException {
        outputStreamWriter.write("<" + CorpusXmlTagHelper.DOCUMENT_TAG_NAME + " id=\"" + document.getDocumentId() + "\">\n");
        DocumentText documentText = null;
        NamedEntitiesInText namedEntitiesInText = null;
        DocumentMultipleCategories documentMultipleCategories = null;
        Iterator it = document.iterator();
        while (it.hasNext()) {
            DocumentProperty documentProperty = (DocumentProperty) it.next();
            if (documentProperty instanceof DocumentText) {
                documentText = (DocumentText) documentProperty;
            } else if (documentProperty instanceof NamedEntitiesInText) {
                namedEntitiesInText = (NamedEntitiesInText) documentProperty;
            } else if (documentProperty instanceof DocumentMultipleCategories) {
                documentMultipleCategories = (DocumentMultipleCategories) documentProperty;
            } else if (documentProperty instanceof ParseableDocumentProperty) {
                writeDocumentProperty(outputStreamWriter, (ParseableDocumentProperty) documentProperty);
            }
        }
        if (documentMultipleCategories != null) {
            outputStreamWriter.write("<DocumentMultipleCategories>\n");
            writeArray(outputStreamWriter, documentMultipleCategories.getCategories(), CorpusXmlTagHelper.DOCUMENT_CATEGORIES_SINGLE_CATEGORY_TAG_NAME);
            outputStreamWriter.write("</DocumentMultipleCategories>\n");
        }
        if (documentText != null) {
            if (namedEntitiesInText != null) {
                outputStreamWriter.write("<TextWithNamedEntities>" + prepareText(documentText, namedEntitiesInText) + "</" + CorpusXmlTagHelper.TEXT_WITH_NAMED_ENTITIES_TAG_NAME + ">\n");
            } else {
                writeDocumentProperty(outputStreamWriter, documentText);
            }
        }
        outputStreamWriter.write("</" + CorpusXmlTagHelper.DOCUMENT_TAG_NAME + ">\n");
    }

    protected void writeDocumentProperty(OutputStreamWriter outputStreamWriter, ParseableDocumentProperty parseableDocumentProperty) throws IOException {
        String tagNameOfParseableDocumentProperty = CorpusXmlTagHelper.getTagNameOfParseableDocumentProperty(parseableDocumentProperty.getClass());
        if (tagNameOfParseableDocumentProperty == null) {
            LOGGER.error("There is no XML tag name defined for the ParseableDocumentProperty class " + parseableDocumentProperty.getClass().getCanonicalName() + ". Discarding this property.");
            return;
        }
        outputStreamWriter.write("<" + tagNameOfParseableDocumentProperty + ">");
        if (parseableDocumentProperty instanceof StringContainingDocumentProperty) {
            outputStreamWriter.write(StringEscapeUtils.escapeXml11(((StringContainingDocumentProperty) parseableDocumentProperty).getStringValue()));
        } else {
            outputStreamWriter.write(StringEscapeUtils.escapeXml11(parseableDocumentProperty.getValue().toString()));
        }
        outputStreamWriter.write("</" + tagNameOfParseableDocumentProperty + ">\n");
    }

    protected void writeArray(OutputStreamWriter outputStreamWriter, Object[] objArr, String str) throws IOException {
        for (Object obj : objArr) {
            outputStreamWriter.write("<" + str + ">" + obj.toString() + "</" + str + ">\n");
        }
    }

    protected String prepareText(DocumentText documentText, NamedEntitiesInText namedEntitiesInText) {
        ArrayList arrayList = new ArrayList();
        List<SignedNamedEntityInText> namedEntities = namedEntitiesInText.getNamedEntities();
        Collections.sort(namedEntities);
        String text = documentText.getText();
        int length = text.length();
        for (SignedNamedEntityInText signedNamedEntityInText : namedEntities) {
            if (length >= signedNamedEntityInText.getEndPos()) {
                boolean z = signedNamedEntityInText instanceof SignedNamedEntityInText;
                arrayList.add(">");
                arrayList.add(CorpusXmlTagHelper.TEXT_PART_TAG_NAME);
                arrayList.add("</");
                try {
                    arrayList.add(StringEscapeUtils.escapeXml11(text.substring(signedNamedEntityInText.getEndPos(), length)));
                } catch (StringIndexOutOfBoundsException e) {
                    LOGGER.error("Got a wrong named entity (" + signedNamedEntityInText.toString() + ")", e);
                    arrayList.add("<AN_ERROR_OCCURED/>");
                }
                arrayList.add(">");
                arrayList.add(CorpusXmlTagHelper.TEXT_PART_TAG_NAME);
                arrayList.add("<");
                arrayList.add(">");
                arrayList.add(z ? CorpusXmlTagHelper.SIGNED_NAMED_ENTITY_IN_TEXT_TAG_NAME : CorpusXmlTagHelper.NAMED_ENTITY_IN_TEXT_TAG_NAME);
                arrayList.add("</");
                try {
                    arrayList.add(StringEscapeUtils.escapeXml11(text.substring(signedNamedEntityInText.getStartPos(), signedNamedEntityInText.getEndPos())));
                } catch (StringIndexOutOfBoundsException e2) {
                    LOGGER.error("Got a wrong named entity (" + signedNamedEntityInText.toString() + ")", e2);
                    arrayList.add("<AN_ERROR_OCCURED/>");
                }
                arrayList.add("\">");
                if (z) {
                    arrayList.add(signedNamedEntityInText.getSource());
                    arrayList.add("\" source=\"");
                }
                arrayList.add(signedNamedEntityInText.getNamedEntityUri());
                arrayList.add(" uri=\"");
                arrayList.add(z ? CorpusXmlTagHelper.SIGNED_NAMED_ENTITY_IN_TEXT_TAG_NAME : CorpusXmlTagHelper.NAMED_ENTITY_IN_TEXT_TAG_NAME);
                arrayList.add("<");
                length = signedNamedEntityInText.getStartPos();
            }
        }
        if (length > 0) {
            arrayList.add("</SimpleTextPart>");
            arrayList.add(StringEscapeUtils.escapeXml11(text.substring(0, length)));
            arrayList.add("<SimpleTextPart>");
        }
        StringBuilder sb = new StringBuilder();
        for (int size = arrayList.size() - 1; size >= 0; size--) {
            sb.append((String) arrayList.get(size));
        }
        return sb.toString();
    }

    public static void registerParseableDocumentProperty(Class<? extends ParseableDocumentProperty> cls) {
        CorpusXmlTagHelper.registerParseableDocumentProperty(cls);
    }
}
