public class ZipfBasedTopWordWriter extends AbstractModelFilesWriter implements CSVFileProcessor
| Modifier and Type | Field and Description |
|---|---|
private String |
fileNamePrefix |
private static double |
FREQUENCY_CLASS_WEIGHTING |
private static double |
LOG2 |
private static org.slf4j.Logger |
LOGGER |
private static int |
MAX_NUMBER_OF_TOP_WORDS |
private org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplier |
probTopicStateSupplier |
private static String |
SEPARATING_TOP_WORDS_FILE |
folderESCAPECHAR, QUOTECHAR, SEPARATOR| Constructor and Description |
|---|
ZipfBasedTopWordWriter(File folder,
String fileNamePrefix) |
ZipfBasedTopWordWriter(org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplier probTopicStateSupplier,
File folder) |
| Modifier and Type | Method and Description |
|---|---|
private int[] |
determineBestWords(int[] wordCounts,
com.carrotsearch.hppc.IntDoubleOpenHashMap frequencyClasses,
com.carrotsearch.hppc.IntDoubleOpenHashMap differences,
double maxBackgroundClass,
double maxForegroundClass) |
private double |
determineFrequencyClasses(int[] wordCounts,
com.carrotsearch.hppc.IntDoubleOpenHashMap frequencyClasses) |
protected com.carrotsearch.hppc.IntDoubleOpenHashMap |
getClassDifferences(com.carrotsearch.hppc.IntDoubleOpenHashMap foregroundFrequencyClasses,
com.carrotsearch.hppc.IntDoubleOpenHashMap backgroundFrequencyClasses,
double maxBackgroundClass) |
private int[] |
getCountDifference(int[] backgroundCount,
int[] foregroundCount) |
protected int[] |
getFacetsForSingleTopic(int[] foregroundWordCounts,
int[] backgroundWordCounts) |
private double |
getMaxCount(int[] wordCounts) |
void |
writeModelToFiles(org.dice_research.topicmodeling.algorithms.Model model) |
private void |
writeTopWords(int[][] topWords,
org.dice_research.topicmodeling.utils.vocabulary.Vocabulary vocabulary) |
getFolder, setFolderprivate static final org.slf4j.Logger LOGGER
private static final String SEPARATING_TOP_WORDS_FILE
private static final int MAX_NUMBER_OF_TOP_WORDS
private static final double FREQUENCY_CLASS_WEIGHTING
private static final double LOG2
private String fileNamePrefix
private org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplier probTopicStateSupplier
public ZipfBasedTopWordWriter(org.dice_research.topicmodeling.algorithms.ProbTopicModelingAlgorithmStateSupplier probTopicStateSupplier,
File folder)
public void writeModelToFiles(org.dice_research.topicmodeling.algorithms.Model model)
writeModelToFiles in interface ModelWriterprivate void writeTopWords(int[][] topWords,
org.dice_research.topicmodeling.utils.vocabulary.Vocabulary vocabulary)
protected int[] getFacetsForSingleTopic(int[] foregroundWordCounts,
int[] backgroundWordCounts)
private double determineFrequencyClasses(int[] wordCounts,
com.carrotsearch.hppc.IntDoubleOpenHashMap frequencyClasses)
private int[] getCountDifference(int[] backgroundCount,
int[] foregroundCount)
private double getMaxCount(int[] wordCounts)
protected com.carrotsearch.hppc.IntDoubleOpenHashMap getClassDifferences(com.carrotsearch.hppc.IntDoubleOpenHashMap foregroundFrequencyClasses,
com.carrotsearch.hppc.IntDoubleOpenHashMap backgroundFrequencyClasses,
double maxBackgroundClass)
private int[] determineBestWords(int[] wordCounts,
com.carrotsearch.hppc.IntDoubleOpenHashMap frequencyClasses,
com.carrotsearch.hppc.IntDoubleOpenHashMap differences,
double maxBackgroundClass,
double maxForegroundClass)
Copyright © 2015–2020. All rights reserved.