| Modifier and Type | Constant Field | Value |
|---|---|---|
public static final String |
SYSTEM_PROPERTY_NAME_FILE_APPENDIX |
"CorpusFileNameAppendix" |
| Modifier and Type | Constant Field | Value |
|---|---|---|
public static final char |
ESCAPECHAR |
0 |
public static final char |
QUOTECHAR |
0 |
public static final char |
SEPARATOR |
59 |
| Modifier and Type | Constant Field | Value |
|---|---|---|
private static final String |
DEFAULT_FILE_ENDING |
".html" |
| Modifier and Type | Constant Field | Value |
|---|---|---|
private static final String |
SEPARATING_TOP_WORDS_FILE |
"separating_top_words.csv" |
private static final String |
TOP_WORDS_FILE |
"top_words.csv" |
private static final String |
TOPIC_PROBS_FILE_NAME |
"topic_probabilities.csv" |
private static final String |
TOPICS_FOR_DOCUMENTS_FILE |
"topics_for_documents.csv" |
private static final String |
WORD_TOPIC_PROBS_FILE_NAME |
"word_topic_probabilities.csv" |
private static final String |
WORD_TOPIC_SEPARATING_PROBS_FILE_NAME |
"word_topic_separating_probabilities.csv" |
| Modifier and Type | Constant Field | Value |
|---|---|---|
private static final int |
DEFAULT_BUFFER_SIZE |
512 |
| Modifier and Type | Constant Field | Value |
|---|---|---|
private static final double |
FREQUENCY_CLASS_WEIGHTING |
0.8 |
private static final int |
MAX_NUMBER_OF_TOP_WORDS |
30 |
private static final String |
SEPARATING_TOP_WORDS_FILE |
"zipf_based_top_words.csv" |
| Modifier and Type | Constant Field | Value |
|---|---|---|
private static final int |
MAX_SEGMENT_ID |
21 |
private static final String |
REUTERS_FILE_ENDING |
".sgm" |
private static final String |
REUTERS_PATH |
"/data/Reuters/reut2-0" |
| Modifier and Type | Constant Field | Value |
|---|---|---|
public static final String |
CORPUS_TAG_NAME |
"corpus" |
public static final String |
DOCUMENT_CATEGORIES_SINGLE_CATEGORY_TAG_NAME |
"Category" |
public static final String |
DOCUMENT_CATEGORIES_TAG_NAME |
"DocumentMultipleCategories" |
public static final String |
NAMESPACE |
"http://cube-research.org/topicmodeling/xml-corpus-schema-2013" |
public static final String |
NAMESPACE_DECLARATION |
"xmlns=\"http://cube-research.org/topicmodeling/xml-corpus-schema-2013\"" |
public static final String |
SOURCE_ATTRIBUTE_NAME |
"source" |
public static final String |
TEXT_PART_TAG_NAME |
"SimpleTextPart" |
public static final String |
TEXT_WITH_NAMED_ENTITIES_TAG_NAME |
"TextWithNamedEntities" |
public static final String |
URI_ATTRIBUTE_NAME |
"uri" |
public static final String |
XML_FILE_HEAD |
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" |
| Modifier and Type | Constant Field | Value |
|---|---|---|
public static final String |
PART_FILE_PREFIX |
"part" |
public static final String |
PART_FILE_SUFFIX |
".xml" |
| Modifier and Type | Constant Field | Value |
|---|---|---|
public static final int |
XML_COMMENT_PATTERN_ID |
0 |
public static final int |
XML_ENCODED_CHAR_PATTERN_ID |
2 |
public static final int |
XML_TAG_PATTERN_ID |
1 |
| Modifier and Type | Constant Field | Value |
|---|---|---|
private static final boolean |
USE_DOCUMENT_IDS_FROM_FILE_DEFAULT |
true |
| Modifier and Type | Constant Field | Value |
|---|---|---|
private static final boolean |
USE_DOCUMENT_IDS_FROM_FILE_DEFAULT |
true |
| Modifier and Type | Constant Field | Value |
|---|---|---|
private static final long |
serialVersionUID |
207762970663404304L |
Copyright © 2015–2020. All rights reserved.