public class MalletLdaWrapper extends Object implements org.dice_research.topicmodeling.algorithms.ModelingAlgorithm, org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplier
| Modifier and Type | Class and Description |
|---|---|
protected static class |
MalletLdaWrapper.MalletLDATopicModeler |
| Modifier and Type | Field and Description |
|---|---|
protected MalletAlphabetWrapper |
alphabet |
private static org.slf4j.Logger |
LOGGER |
private static String |
MALLET_REGEX_TOKEN |
private String |
malletRegexToken |
protected long |
seed |
private static long |
serialVersionUID |
protected MalletLdaWrapper.MalletLDATopicModeler |
topicModel |
protected org.dice_research.topicmodeling.algorithms.WordCounter |
wordCounter |
| Constructor and Description |
|---|
MalletLdaWrapper(int numberOfTopics) |
MalletLdaWrapper(int numberOfTopics,
double alphaSum,
double beta) |
MalletLdaWrapper(int numberOfTopics,
double alphaSum,
double beta,
long seed) |
MalletLdaWrapper(int numberOfTopics,
long seed) |
MalletLdaWrapper(cc.mallet.types.LabelAlphabet topicAlphabet,
double alphaSum,
double beta) |
MalletLdaWrapper(cc.mallet.types.LabelAlphabet topicAlphabet,
double alphaSum,
double beta,
long seed) |
| Modifier and Type | Method and Description |
|---|---|
protected cc.mallet.types.Instance |
createInstanceFromDocument(org.dice_research.topicmodeling.utils.doc.Document document,
cc.mallet.types.Alphabet alphabet) |
protected void |
createMultipleSpellingVocabulary(org.dice_research.topicmodeling.utils.corpus.DocumentListCorpus<?> corpus) |
org.dice_research.topicmodeling.preprocessing.Preprocessor |
createPreprocessor(org.dice_research.topicmodeling.preprocessing.docsupplier.DocumentSupplier supplier,
org.dice_research.topicmodeling.lang.Language lang) |
private void |
directInitialization(org.dice_research.topicmodeling.utils.corpus.Corpus corpus,
org.dice_research.topicmodeling.utils.vocabulary.Vocabulary vocabulary) |
org.dice_research.topicmodeling.algorithms.Model |
getModel() |
int |
getNumberOfDocuments() |
int |
getNumberOfTopics() |
int |
getNumberOfWords() |
long |
getSeed() |
protected String[] |
getTokenizedTermsAsText(org.dice_research.topicmodeling.utils.corpus.DocumentListCorpus<?> corpus) |
org.dice_research.topicmodeling.utils.vocabulary.Vocabulary |
getVocabulary() |
org.dice_research.topicmodeling.algorithms.WordCounter |
getWordCounts() |
int[] |
getWordsOfDocument(int documentId) |
int[] |
getWordTopicAssignmentForDocument(int documentId) |
void |
initialize(org.dice_research.topicmodeling.utils.corpus.Corpus corpus) |
void |
performNextStep() |
void |
setMalletRegexToken(String malletRegexToken) |
void |
setOptimizeInterval(int interval)
Set to 0 to turn optimization off.
|
private static final long serialVersionUID
private static final org.slf4j.Logger LOGGER
private static final String MALLET_REGEX_TOKEN
protected MalletLdaWrapper.MalletLDATopicModeler topicModel
protected MalletAlphabetWrapper alphabet
protected long seed
protected transient org.dice_research.topicmodeling.algorithms.WordCounter wordCounter
private String malletRegexToken
public MalletLdaWrapper(int numberOfTopics)
public MalletLdaWrapper(int numberOfTopics,
long seed)
public MalletLdaWrapper(int numberOfTopics,
double alphaSum,
double beta)
public MalletLdaWrapper(int numberOfTopics,
double alphaSum,
double beta,
long seed)
public MalletLdaWrapper(cc.mallet.types.LabelAlphabet topicAlphabet,
double alphaSum,
double beta)
public MalletLdaWrapper(cc.mallet.types.LabelAlphabet topicAlphabet,
double alphaSum,
double beta,
long seed)
public org.dice_research.topicmodeling.preprocessing.Preprocessor createPreprocessor(org.dice_research.topicmodeling.preprocessing.docsupplier.DocumentSupplier supplier,
org.dice_research.topicmodeling.lang.Language lang)
createPreprocessor in interface org.dice_research.topicmodeling.preprocessing.PreprocessorFactorypublic void initialize(org.dice_research.topicmodeling.utils.corpus.Corpus corpus)
initialize in interface org.dice_research.topicmodeling.algorithms.ModelingAlgorithmprotected String[] getTokenizedTermsAsText(org.dice_research.topicmodeling.utils.corpus.DocumentListCorpus<?> corpus)
private void directInitialization(org.dice_research.topicmodeling.utils.corpus.Corpus corpus,
org.dice_research.topicmodeling.utils.vocabulary.Vocabulary vocabulary)
protected cc.mallet.types.Instance createInstanceFromDocument(org.dice_research.topicmodeling.utils.doc.Document document,
cc.mallet.types.Alphabet alphabet)
protected void createMultipleSpellingVocabulary(org.dice_research.topicmodeling.utils.corpus.DocumentListCorpus<?> corpus)
public void performNextStep()
performNextStep in interface org.dice_research.topicmodeling.algorithms.ModelingAlgorithmpublic org.dice_research.topicmodeling.algorithms.Model getModel()
getModel in interface org.dice_research.topicmodeling.algorithms.ModelingAlgorithmpublic int[] getWordTopicAssignmentForDocument(int documentId)
getWordTopicAssignmentForDocument in interface org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplierpublic int[] getWordsOfDocument(int documentId)
getWordsOfDocument in interface org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplierpublic int getNumberOfTopics()
getNumberOfTopics in interface org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplierpublic int getNumberOfDocuments()
getNumberOfDocuments in interface org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplierpublic int getNumberOfWords()
getNumberOfWords in interface org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplierpublic org.dice_research.topicmodeling.utils.vocabulary.Vocabulary getVocabulary()
getVocabulary in interface org.dice_research.topicmodeling.algorithms.VocabularyContainingpublic long getSeed()
getSeed in interface org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplierpublic org.dice_research.topicmodeling.algorithms.WordCounter getWordCounts()
getWordCounts in interface org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplierpublic void setMalletRegexToken(String malletRegexToken)
public void setOptimizeInterval(int interval)
interval - Copyright © 2015–2020. All rights reserved.