package org.aksw.simba.topicmodeling.io.reuters;

import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import org.aksw.simba.topicmodeling.automaton.AutomatonCallback;
import org.aksw.simba.topicmodeling.automaton.BricsAutomatonManager;
import org.aksw.simba.topicmodeling.automaton.MultiPatternAutomaton;
import org.aksw.simba.topicmodeling.utils.doc.Document;
import org.aksw.simba.topicmodeling.utils.doc.DocumentMultipleCategories;
import org.aksw.simba.topicmodeling.utils.doc.DocumentName;
import org.aksw.simba.topicmodeling.utils.doc.DocumentText;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/aksw/simba/topicmodeling/io/reuters/ReutersDocumentCreator.class */
public class ReutersDocumentCreator implements AutomatonCallback {
    private static final Logger LOGGER = LoggerFactory.getLogger(ReutersDocumentCreator.class);
    private Deque<Document> queue;
    private String text;
    private Document currentDocument;
    private int state;
    private int dataStartPos;
    private List<String> categories = new ArrayList();
    private StringBuilder currentData = new StringBuilder();
    private MultiPatternAutomaton automaton = new BricsAutomatonManager(this, new String[]{"\\<[^\\<\\>]*\\>"});
    private ReutersStringParser stringParser = new ReutersStringParser();

    public ReutersDocumentCreator(Deque<Document> deque) {
        this.queue = deque;
    }

    public void createDocuments(String str) {
        this.text = str;
        this.state = 0;
        this.automaton.parseText(str);
    }

    public void foundPattern(int i, int i2, int i3) {
        if (i == 0) {
            String substring = this.text.substring(i2 + 1, (i2 + i3) - 1);
            if (substring.startsWith("REUTERS")) {
                if (this.state != 0) {
                    LOGGER.error("found \"" + substring + "\" but I'm in the wrong state (" + this.state + ")");
                    return;
                }
                try {
                    this.currentDocument = new Document(Integer.parseInt(substring.substring(substring.lastIndexOf("NEWID=\"") + 7, substring.length() - 1)));
                } catch (Exception e) {
                    LOGGER.warn("Couldn't parse documentId.", e);
                    this.currentDocument = new Document();
                }
                this.state = 1;
                return;
            }
            if (substring.equals("TOPICS")) {
                if (this.state != 1) {
                    LOGGER.error("found \"" + substring + "\" but I'm in the wrong state (" + this.state + ")");
                    return;
                } else {
                    this.categories.clear();
                    this.state = 2;
                    return;
                }
            }
            if (substring.equals("/TOPICS")) {
                if (this.state != 2) {
                    LOGGER.error("found \"" + substring + "\" but I'm in the wrong state (" + this.state + ")");
                    return;
                } else {
                    this.currentDocument.addProperty(new DocumentMultipleCategories((String[]) this.categories.toArray(new String[this.categories.size()])));
                    this.state = 1;
                    return;
                }
            }
            if (substring.equals("D")) {
                if (this.state == 2) {
                    this.dataStartPos = i2 + i3;
                    return;
                }
                return;
            }
            if (substring.equals("/D")) {
                if (this.state == 2) {
                    this.currentData.append(this.text.substring(this.dataStartPos, i2));
                    this.categories.add(this.stringParser.parseString(this.currentData.toString()));
                    this.currentData.delete(0, this.currentData.length());
                    return;
                }
                return;
            }
            if (substring.equals("TITLE")) {
                if (this.state != 1) {
                    LOGGER.error("found \"" + substring + "\" but I'm in the wrong state (" + this.state + ")");
                    return;
                } else {
                    this.dataStartPos = i2 + i3;
                    this.state = 3;
                    return;
                }
            }
            if (substring.equals("/TITLE")) {
                if (this.state != 3) {
                    LOGGER.error("found \"" + substring + "\" but I'm in the wrong state (" + this.state + ")");
                    return;
                }
                this.currentData.append(this.text.substring(this.dataStartPos, i2));
                this.currentDocument.addProperty(new DocumentName(this.stringParser.parseString(this.currentData.toString())));
                this.currentData.delete(0, this.currentData.length());
                this.state = 1;
                return;
            }
            if (substring.equals("BODY")) {
                if (this.state != 1) {
                    LOGGER.error("found \"" + substring + "\" but I'm in the wrong state (" + this.state + ")");
                    return;
                } else {
                    this.dataStartPos = i2 + i3;
                    this.state = 4;
                    return;
                }
            }
            if (substring.equals("/BODY")) {
                if (this.state != 4) {
                    LOGGER.error("found \"" + substring + "\" but I'm in the wrong state (" + this.state + ")");
                    return;
                }
                this.currentData.append(this.text.substring(this.dataStartPos, i2));
                this.currentDocument.addProperty(new DocumentText(this.stringParser.parseString(this.currentData.toString())));
                this.currentData.delete(0, this.currentData.length());
                this.state = 1;
                return;
            }
            if (substring.equals("/REUTERS")) {
                if (this.state != 1) {
                    LOGGER.error("found \"" + substring + "\" but I'm in the wrong state (" + this.state + ")");
                } else {
                    this.state = 0;
                    this.queue.add(this.currentDocument);
                }
            }
        }
    }
}
