package org.aksw.simba.topicmodeling.preprocessing.docsupplier.decorator;

import com.carrotsearch.hppc.ObjectIntOpenHashMap;
import org.aksw.simba.topicmodeling.preprocessing.docsupplier.DocumentSupplier;
import org.aksw.simba.topicmodeling.utils.doc.Document;
import org.aksw.simba.topicmodeling.utils.doc.DocumentCategory;
import org.aksw.simba.topicmodeling.utils.doc.DocumentText;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/simba/topicmodeling/preprocessing/docsupplier/decorator/NSFTextAndCategoryExtractingSupplierDecorator.class */
public class NSFTextAndCategoryExtractingSupplierDecorator extends AbstractDocumentSupplierDecorator {
    private static final Logger LOGGER = LoggerFactory.getLogger(NSFTextAndCategoryExtractingSupplierDecorator.class);
    private static final boolean REMOVE_DOCUMENTS_WITHOUT_CATEGORY = true;
    private static final String CATEGORY_KEY = "Fld Applictn";
    private static final String TEXT_KEY = "Abstract";
    private static final String EMPTY_TEXT = "Not Available";
    private static final String FILTER_CATEGORY = "Other";
    private ObjectIntOpenHashMap<String> categories;

    public NSFTextAndCategoryExtractingSupplierDecorator(DocumentSupplier documentSupplier) {
        super(documentSupplier);
        this.categories = new ObjectIntOpenHashMap<>();
    }

    @Override // org.aksw.simba.topicmodeling.preprocessing.docsupplier.decorator.AbstractDocumentSupplierDecorator
    public Document prepareDocument(Document document) {
        DocumentText property = document.getProperty(DocumentText.class);
        if (property == null) {
            throw new IllegalArgumentException("Got a Document without the needed DocumentText property.");
        }
        String text = property.getText();
        String extractCategory = extractCategory(text);
        if (extractCategory.isEmpty() || extractCategory.contains(FILTER_CATEGORY)) {
            LOGGER.error("Got a document without a category. Removing it.");
            return getNextDocument();
        }
        document.addProperty(new DocumentCategory(extractCategory));
        String extractText = extractText(text);
        if (extractText.equals(EMPTY_TEXT)) {
            LOGGER.error("Got a document with an empty Text. Removing it.");
            return getNextDocument();
        }
        this.categories.putOrAdd(extractCategory, 1, 1);
        document.addProperty(new DocumentText(extractText));
        return document;
    }

    private String extractCategory(String str) {
        String[] trimmedLinesOfValue = getTrimmedLinesOfValue(getValueForKey(str, CATEGORY_KEY));
        for (int length = trimmedLinesOfValue.length - 1; length >= 0; length--) {
            if (trimmedLinesOfValue[length] != null && !trimmedLinesOfValue[length].isEmpty() && !trimmedLinesOfValue[length].equals("null")) {
                return trimmedLinesOfValue[length];
            }
        }
        LOGGER.warn("Couldn't extract the category from the text. Returning empty String");
        return "";
    }

    private String extractText(String str) {
        String[] trimmedLinesOfValue = getTrimmedLinesOfValue(getValueForKey(str, TEXT_KEY));
        int i = 0;
        for (String str2 : trimmedLinesOfValue) {
            i += str2.length();
        }
        StringBuilder sb = new StringBuilder(i + trimmedLinesOfValue.length);
        for (String str3 : trimmedLinesOfValue) {
            sb.append(str3);
            sb.append(' ');
        }
        return sb.toString();
    }

    private String[] getTrimmedLinesOfValue(String str) {
        String[] split = str.split("\n");
        for (int i = 0; i < split.length; i++) {
            split[i] = split[i].trim();
        }
        return split;
    }

    private String getValueForKey(String str, String str2) {
        int indexOf = str.indexOf(str2);
        if (indexOf < 0) {
            LOGGER.error("Couldn't find value for key \"" + str2 + "\" in the text \"" + str + "\". Returning empty String");
            return "";
        }
        int indexOf2 = !str2.contains(":") ? str.indexOf(58, indexOf) + 1 : indexOf + str2.length();
        int indexOf3 = str.indexOf(58, indexOf2);
        if (indexOf3 == -1) {
            return str.substring(indexOf2);
        }
        int i = 0;
        int indexOf4 = str.indexOf(10, indexOf2);
        while (true) {
            int i2 = indexOf4;
            if (i2 >= indexOf3 || i2 <= 0) {
                break;
            }
            i = i2;
            indexOf4 = str.indexOf(10, i2 + 1);
        }
        return str.substring(indexOf2, i);
    }
}
