public class ReutersDocumentCreator extends Object implements org.dice_research.topicmodeling.automaton.AutomatonCallback
| Modifier and Type | Field and Description |
|---|---|
private org.dice_research.topicmodeling.automaton.MultiPatternAutomaton |
automaton |
private List<String> |
categories |
private StringBuilder |
currentData |
private org.dice_research.topicmodeling.utils.doc.Document |
currentDocument |
private int |
dataStartPos |
private static org.slf4j.Logger |
LOGGER |
private Deque<org.dice_research.topicmodeling.utils.doc.Document> |
queue |
private int |
state
0 - start state
1 - found document
2 - found topics
3 - found title
4 - found text (body)
|
private ReutersStringParser |
stringParser |
private String |
text |
| Constructor and Description |
|---|
ReutersDocumentCreator(Deque<org.dice_research.topicmodeling.utils.doc.Document> queue) |
| Modifier and Type | Method and Description |
|---|---|
void |
createDocuments(String sgmlText) |
void |
foundPattern(int patternId,
int startPos,
int length) |
private static final org.slf4j.Logger LOGGER
private org.dice_research.topicmodeling.automaton.MultiPatternAutomaton automaton
private ReutersStringParser stringParser
private Deque<org.dice_research.topicmodeling.utils.doc.Document> queue
private String text
private org.dice_research.topicmodeling.utils.doc.Document currentDocument
private int state
private int dataStartPos
private StringBuilder currentData
public ReutersDocumentCreator(Deque<org.dice_research.topicmodeling.utils.doc.Document> queue)
public void createDocuments(String sgmlText)
public void foundPattern(int patternId,
int startPos,
int length)
foundPattern in interface org.dice_research.topicmodeling.automaton.AutomatonCallbackCopyright © 2015–2020. All rights reserved.