package org.aksw.simba.topicmodeling.preprocessing.docsupplier.decorator;

import org.aksw.simba.topicmodeling.lang.Term;
import org.aksw.simba.topicmodeling.preprocessing.docsupplier.DocumentSupplier;
import org.aksw.simba.topicmodeling.utils.doc.Document;
import org.aksw.simba.topicmodeling.utils.doc.DocumentText;
import org.aksw.simba.topicmodeling.utils.doc.DocumentTextWithTermInfo;
import org.aksw.simba.topicmodeling.utils.doc.TermTokenizedText;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/simba/topicmodeling/preprocessing/docsupplier/decorator/DocumentTextWithTermInfoCreatingSupplierDecorator.class */
public class DocumentTextWithTermInfoCreatingSupplierDecorator extends AbstractPropertyAppendingDocumentSupplierDecorator<DocumentTextWithTermInfo> {
    public static final char TERM_START_CHAR = '[';
    public static final char TERM_END_CHAR = ']';
    public static final char SEPARATION_CHAR = '|';
    public static final char ESCAPE_CHAR = '\\';
    private static final Logger LOGGER = LoggerFactory.getLogger(DocumentTextWithTermInfoCreatingSupplierDecorator.class);

    public DocumentTextWithTermInfoCreatingSupplierDecorator(DocumentSupplier documentSupplier) {
        super(documentSupplier);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.aksw.simba.topicmodeling.preprocessing.docsupplier.decorator.AbstractPropertyAppendingDocumentSupplierDecorator
    public DocumentTextWithTermInfo createPropertyForDocument(Document document) {
        DocumentText documentText = (DocumentText) document.getProperty(DocumentText.class);
        if (documentText == null) {
            LOGGER.error("Got a document without the needed DocumentText property. Ignoring this document.");
            return null;
        }
        TermTokenizedText termTokenizedText = (TermTokenizedText) document.getProperty(TermTokenizedText.class);
        if (termTokenizedText != null) {
            return createTextWithTermInfo(documentText, termTokenizedText);
        }
        LOGGER.error("Got a document without the needed TermTokenizedText property. Ignoring this document.");
        return null;
    }

    private DocumentTextWithTermInfo createTextWithTermInfo(DocumentText documentText, TermTokenizedText termTokenizedText) {
        String text = documentText.getText();
        StringBuilder sb = new StringBuilder(text.length());
        int i = 0;
        for (Term term : termTokenizedText.getTermTokenizedText()) {
            int indexOf = text.indexOf(term.getWordForm(), i);
            if (indexOf < 0) {
                LOGGER.error("Couldn't find a term inside the given text. Ignoring this term.\nTerm = " + term.toString() + "\nremaining text = \"" + (text.length() - i < 50 ? text.substring(i) : text.substring(i, i + 50) + "...") + "\".");
            } else {
                if (i < indexOf) {
                    sb.append(escapeString(text.substring(i, indexOf)));
                }
                sb.append('[');
                sb.append(escapeString(term.getWordForm()));
                sb.append('|');
                sb.append(escapeString(term.getLemma()));
                sb.append('|');
                sb.append(escapeString(term.getPosTag()));
                sb.append('|');
                sb.append(Long.toString(term.properties.getAsLong()));
                sb.append(']');
                i = indexOf + term.getWordForm().length();
            }
        }
        if (i < text.length()) {
            sb.append(escapeString(text.substring(i)));
        }
        return new DocumentTextWithTermInfo(sb.toString());
    }

    public static String escapeString(String str) {
        char[] charArray = str.toCharArray();
        int i = 0;
        StringBuilder sb = new StringBuilder();
        boolean z = false;
        for (int i2 = 0; i2 < charArray.length; i2++) {
            switch (charArray[i2]) {
                case TERM_START_CHAR /* 91 */:
                case ESCAPE_CHAR /* 92 */:
                case TERM_END_CHAR /* 93 */:
                case SEPARATION_CHAR /* 124 */:
                    sb.append(str.substring(i, i2));
                    sb.append('\\');
                    i = i2;
                    z = true;
                    break;
            }
        }
        if (!z) {
            return str;
        }
        if (i < str.length()) {
            sb.append(str.substring(i));
        }
        return sb.toString();
    }
}
