public class Qald7CreationTool extends Object
| Modifier and Type | Field and Description |
|---|---|
(package private) int |
badQuestionCounter |
private static String |
DBO_URI |
private int |
duplicate |
static Set<Dataset> |
HYBRID_SETS |
static Set<Dataset> |
MULTILINGUAL_TRAIN_TEST_SETS
QALD1 and QALD2 not multilingual!
|
private static String |
RES_URI |
private ThreadedSPARQL |
sparql |
| Constructor and Description |
|---|
Qald7CreationTool() |
Qald7CreationTool(String sparqlEndpoint,
int timeout) |
| Modifier and Type | Method and Description |
|---|---|
private void |
addSave(Map<String,List<Qald7Question>> map,
String question,
Qald7Question q) |
private boolean |
checkAnswertypeSet(IQuestion q) |
private boolean |
checkAtleastSixLanguages(IQuestion q) |
private boolean |
checkIsOnlydbo(String sparqlQuery) |
private boolean |
checkKeywordsPresent(IQuestion q) |
private boolean |
checkSparqlPresent(IQuestion q) |
void |
createFileReport(Set<Qald7Question> allQuestions,
String pathAndFilenameWithExtension,
Set<Fail> ignoreFlags) |
void |
createFileReportForTestQuestions(Set<Dataset> datasets,
boolean autocorrectOnlydbo,
String pathAndFilenameWithExtension,
Set<Fail> ignoreFlags)
Creates a file report to all bad questions in given datasets
|
private void |
createQald7Dataset(Set<Qald7Question> allQuestions,
String path,
String filenameWithoutExtension) |
void |
createQald7HybridDataset(Set<Dataset> hybridDatasets,
String path,
String filenameWithoutExtension)
Creates the hybrid datasets.
|
void |
createQald7MultilingualTrainDataset(Set<Dataset> datasets,
boolean fileReport,
boolean autocorrectOnlydbo,
String path,
String filenameWithoutExtension)
Creates the multilingual train datasets.
|
void |
destroy()
Call this if you dont need this object anymore.
|
private Set<Qald7Question> |
extractBadQuestionsFromAnnotated(Set<Qald7Question> questions,
Set<Fail> ignoreFlags) |
private Set<Qald7Question> |
extractGoodTrainQuestionsFromAnnotated(Set<Qald7Question> questions) |
private Set<Qald7Question> |
findAndSelectBestDuplicate(List<Qald7Question> questions) |
Set<String> |
getAnswersFromServer(IQuestion q)
Returns answers from official dbpedia endpoint to the stored sparql in
IQuestion |
Set<Qald7Question> |
getQald7HybridQuestions(Set<Dataset> datasets)
Returns all Hybrid questions for Qald7 (Loads all previous qald hybrid questions and drops duplicates).
|
Set<Qald7Question> |
getQald7MultilingualTrainQuestions(Set<Dataset> datasets,
boolean autocorrectOnlydbo)
Loads all questions from given datasets, checks question integrity (is the stored answerset still identical with the one returned for given sparql query, is a sparql present and parseable, are
at least 6 languages available with keywords, is an answertype set,...
|
Set<Qald7Question> |
loadAndAnnotateTrain(Set<Dataset> datasets,
boolean autocorrectOnlyDBO) |
static void |
main(String[] args) |
private static final String DBO_URI
private static final String RES_URI
private int duplicate
public static final Set<Dataset> MULTILINGUAL_TRAIN_TEST_SETS
private final ThreadedSPARQL sparql
int badQuestionCounter
public Qald7CreationTool()
public Qald7CreationTool(String sparqlEndpoint, int timeout)
public Set<Qald7Question> getQald7HybridQuestions(Set<Dataset> datasets)
datasets - All datasets from which questions should be extractedpublic void createQald7HybridDataset(Set<Dataset> hybridDatasets, String path, String filenameWithoutExtension)
hybridDatasets - The sets questions are taken from.path - The path to write the datasets to.filenameWithoutExtension - The name of the new datasetpublic void createQald7MultilingualTrainDataset(Set<Dataset> datasets, boolean fileReport, boolean autocorrectOnlydbo, String path, String filenameWithoutExtension)
datasets - The sets questions are taken from.autocorrectOnlydbo - Is a bad Onlydbo-flag a exclusion criterion (Question wont appear in file) for a question or should it be autofixed?path - The path to write the datasets to.filenameWithoutExtension - The name of the new datasetpublic Set<Qald7Question> getQald7MultilingualTrainQuestions(Set<Dataset> datasets, boolean autocorrectOnlydbo)
Fail will be in returned
set. So, returned Questions are all clean. To get a duplicate free, with Fail annotated dataset, use loadAndAnnotateTrain(Set, boolean)datasets - The datasets from which the questions are gatheredautocorrectOnlydbo - Is a bad Onlydbo-flag a exclusion criterion (Question wont appear in file) for a question or should it be autofixed?public Set<Qald7Question> loadAndAnnotateTrain(Set<Dataset> datasets, boolean autocorrectOnlyDBO)
private boolean checkSparqlPresent(IQuestion q)
private boolean checkAnswertypeSet(IQuestion q)
private boolean checkKeywordsPresent(IQuestion q)
private boolean checkAtleastSixLanguages(IQuestion q)
public Set<String> getAnswersFromServer(IQuestion q) throws ExecutionException
IQuestionq - Question to be answeredExecutionExceptionprivate void addSave(Map<String,List<Qald7Question>> map, String question, Qald7Question q)
private Set<Qald7Question> findAndSelectBestDuplicate(List<Qald7Question> questions)
private Set<Qald7Question> extractGoodTrainQuestionsFromAnnotated(Set<Qald7Question> questions)
private Set<Qald7Question> extractBadQuestionsFromAnnotated(Set<Qald7Question> questions, Set<Fail> ignoreFlags)
private void createQald7Dataset(Set<Qald7Question> allQuestions, String path, String filenameWithoutExtension)
public void createFileReportForTestQuestions(Set<Dataset> datasets, boolean autocorrectOnlydbo, String pathAndFilenameWithExtension, Set<Fail> ignoreFlags)
datasets - All datasets to be checkedautocorrectOnlydbo - Is a bad Onlydbo-flag a exclusion criterion (Question wont appear in file) for a question or should it be autofixed?pathAndFilenameWithExtension - Path and name of new file reportskipQuestionsWithTooLittleLanguages - Normally, multilingual datasets have at least six languages. When this flag is set, all questions with less languages will be ignored, otherwise its an error Fail and the
question goes into the reportpublic void createFileReport(Set<Qald7Question> allQuestions, String pathAndFilenameWithExtension, Set<Fail> ignoreFlags)
private boolean checkIsOnlydbo(String sparqlQuery) throws org.apache.jena.query.QueryParseException
org.apache.jena.query.QueryParseExceptionpublic void destroy()
public static void main(String[] args)
Copyright © 2016–2017 Pivotal Software, Inc.. All rights reserved.