package org.aksw.simba.topicmodeling.preprocessing.docsupplier.decorator;

import com.carrotsearch.hppc.IntArrayList;
import java.util.HashSet;
import java.util.Set;
import org.aksw.simba.topicmodeling.preprocessing.docsupplier.DocumentSupplier;
import org.aksw.simba.topicmodeling.utils.doc.Document;
import org.aksw.simba.topicmodeling.utils.doc.DocumentText;

/* loaded from: input_file:org/aksw/simba/topicmodeling/preprocessing/docsupplier/decorator/UseNetTextExtractingSupplierDecorator.class */
public class UseNetTextExtractingSupplierDecorator extends AbstractDocumentSupplierDecorator {
    private static final String PGP_MESSAGE_START = "-----BEGIN PGP SIGNED MESSAGE-----";
    private static final String PGP_MESSAGE_END = "-----BEGIN PGP SIGNATURE-----";
    private static final Set<String> USE_NET_HEADING_KEYS = new HashSet<String>() { // from class: org.aksw.simba.topicmodeling.preprocessing.docsupplier.decorator.UseNetTextExtractingSupplierDecorator.1
        private static final long serialVersionUID = -3033226731898068935L;

        {
            add("article-i.d.");
            add("date");
            add("disclaimer");
            add("distribution");
            add("expires");
            add("followup-to");
            add("foolowup-to");
            add("from");
            add("important-info");
            add("in-reply-to");
            add("keywords");
            add("lines");
            add("message-id");
            add("news-software");
            add("newsgroups");
            add("nf-from");
            add("nf-id");
            add("nntp-posting-host");
            add("nntp-posting-user");
            add("organization");
            add("originator");
            add("path");
            add("references");
            add("reply-to");
            add("sender");
            add("subject");
            add("summary");
            add("supersedes");
            add("to");
            add("was subject");
            add("x-added");
            add("x-alt.reply-address");
            add("x-disclaimer");
            add("x-gated-by");
            add("x-header");
            add("x-mailer");
            add("x-md4-signature");
            add("x-newsreader");
            add("x-received");
            add("x-sender");
            add("x-sequence");
            add("x-telephone");
            add("x-to");
            add("x-useragent");
            add("x-us-mail");
            add("x-xxdate");
            add("x-xxmessage-id");
            add("xref");
            add("xuseragent");
            add("xxxdate");
        }
    };

    public UseNetTextExtractingSupplierDecorator(DocumentSupplier documentSupplier) {
        super(documentSupplier);
    }

    @Override // org.aksw.simba.topicmodeling.preprocessing.docsupplier.decorator.AbstractDocumentSupplierDecorator
    public Document prepareDocument(Document document) {
        DocumentText property = document.getProperty(DocumentText.class);
        if (property == null) {
            throw new IllegalArgumentException("Got a document without the needed DocumentText property.");
        }
        String text = property.getText();
        String identifyTextUsingPgpStatements = identifyTextUsingPgpStatements(text);
        if (identifyTextUsingPgpStatements == null) {
            identifyTextUsingPgpStatements = removeAddresses(text.substring(getTextStartIndex(text), text.length()));
        }
        property.setText(identifyTextUsingPgpStatements);
        return document;
    }

    private int getTextStartIndex(String str) {
        int i;
        int indexOf;
        int i2 = -1;
        while (true) {
            i = i2 + 1;
            i2 = str.indexOf(10, i);
            if (i2 < 0) {
                i2 = str.length();
            }
            indexOf = str.indexOf(58, i);
            if (indexOf <= 0 || !USE_NET_HEADING_KEYS.contains(str.substring(i, indexOf).toLowerCase()) || i2 >= str.length()) {
                if (i > i2 || !str.substring(i, i2).trim().isEmpty()) {
                    break;
                }
            }
        }
        if ((indexOf <= 0 || !USE_NET_HEADING_KEYS.contains(str.substring(i, indexOf))) && i <= str.length()) {
            return i;
        }
        return str.length();
    }

    private int removeAuthorLine(String str, int i) {
        int indexOf = str.indexOf(10, i);
        int indexOf2 = str.indexOf(58, i);
        if (indexOf2 > 0 && indexOf2 < indexOf) {
            String substring = str.substring(i, indexOf2);
            if (substring.endsWith("writes") || substring.endsWith("wrote")) {
                return indexOf + 1;
            }
        }
        return i;
    }

    private String removeAddresses(String str) {
        IntArrayList intArrayList = new IntArrayList();
        IntArrayList intArrayList2 = new IntArrayList();
        int indexOf = str.indexOf(64);
        while (true) {
            int i = indexOf;
            if (i < 0) {
                break;
            }
            int i2 = i;
            while (i > 0 && !Character.isWhitespace(str.charAt(i - 1))) {
                i--;
            }
            while (i2 < str.length() && !Character.isWhitespace(str.charAt(i2))) {
                i2++;
            }
            intArrayList.add(i);
            intArrayList2.add(i2);
            indexOf = str.indexOf(64, i2);
        }
        if (intArrayList.size() <= 0) {
            return str;
        }
        StringBuilder sb = new StringBuilder(str.length());
        sb.append(str.substring(0, intArrayList.get(0)));
        for (int i3 = 1; i3 < intArrayList.size(); i3++) {
            sb.append(str.substring(intArrayList2.get(i3 - 1), intArrayList.get(i3)));
        }
        sb.append(str.substring(intArrayList2.get(intArrayList2.size() - 1)));
        return sb.toString();
    }

    private String identifyTextUsingPgpStatements(String str) {
        int indexOf;
        String str2 = null;
        int indexOf2 = str.indexOf(PGP_MESSAGE_START);
        if (indexOf2 > 0 && (indexOf = str.indexOf(PGP_MESSAGE_END)) > 0) {
            str2 = str.substring(indexOf2 + PGP_MESSAGE_START.length(), indexOf);
        }
        return str2;
    }
}
