001 /*
002 * LuceneSearch.java
003 *
004 * Copyright (c) 1995-2010, The University of Sheffield. See the file
005 * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
006 *
007 * This file is part of GATE (see http://gate.ac.uk/), and is free
008 * software, licenced under the GNU Library General Public License,
009 * Version 2, June 1991 (in the distribution as file licence.html,
010 * and also available at http://gate.ac.uk/gate/licence.html).
011 *
012 * Rosen Marinov, 19/Apr/2002
013 *
014 */
015
016 package gate.creole.ir.lucene;
017
018 import java.io.File;
019 import java.util.List;
020 import java.util.Vector;
021
022 import org.apache.lucene.analysis.SimpleAnalyzer;
023 import org.apache.lucene.queryParser.QueryParser;
024 import org.apache.lucene.search.*;
025 import org.apache.lucene.store.FSDirectory;
026 import org.apache.lucene.util.Version;
027
028 import gate.creole.ir.*;
029
030 /** This class represents Lucene implementation of serching in index. */
031 public class LuceneSearch implements Search {
032
033 /** Default number of maximum results when no limit is specified
034 * in a search method call
035 */
036 private static int DEFAULTMAXRESULTS = 1000000;
037
038 /** An instance of indexed corpus*/
039 private IndexedCorpus indexedCorpus;
040
041 /** Set the indexed corpus resource for searching. */
042 public void setCorpus(IndexedCorpus ic){
043 this.indexedCorpus = ic;
044 }
045
046 /** Search in corpus with this query.
047 * Result length is limited by DEFAULTMAXRESULTS */
048 public QueryResultList search(String query)
049 throws IndexException, SearchException{
050 return search(query, DEFAULTMAXRESULTS);
051 }
052
053 /** Search in corpus with this query.
054 * Size of the result list is limited. */
055 public QueryResultList search(String query, int limit)
056 throws IndexException, SearchException{
057 return search(query, limit, null);
058 }
059
060 /** Search in corpus with this query.
061 * In each QueryResult will be added values of theise fields.
062 * Result length is limited by DEFAULTMAXRESULTS. */
063 public QueryResultList search(String query, List fieldNames)
064 throws IndexException, SearchException{
065 return search(query, DEFAULTMAXRESULTS, fieldNames);
066 }
067
068 /** Search in corpus with this query.
069 * In each QueryResult will be added values of these fields.
070 * Result length is limited. */
071 public QueryResultList search(String query, int limit, List fieldNames)
072 throws IndexException, SearchException{
073 Vector result = new Vector();
074
075 try {
076 IndexSearcher searcher = new IndexSearcher(
077 FSDirectory.open(
078 new File(indexedCorpus.getIndexDefinition().getIndexLocation())
079 ),
080 true);
081 QueryParser parser = new QueryParser(
082 Version.LUCENE_29,
083 "body",
084 new SimpleAnalyzer());
085 Query luceneQuery = parser.parse(query);
086
087 // JP was for lucene 2.2
088 // Hits hits = searcher.search(luceneQuery);
089 //int resultlength = hits.length();
090 //if (limit>-1) {
091 // resultlength = Math.min(limit,resultlength);
092 //}
093 TopDocs topDocs = searcher.search(luceneQuery, limit);
094 ScoreDoc[] hits = topDocs.scoreDocs;
095 int resultlength = hits.length;
096
097
098 Vector fieldValues = null;
099 for (int i=0; i<resultlength; i++) {
100
101 if (fieldNames != null){
102 fieldValues = new Vector();
103 for (int j=0; j<fieldNames.size(); j++){
104 fieldValues.add(new gate.creole.ir.Term(
105 fieldNames.get(j).toString(),
106 searcher.doc(hits[i].doc).get(fieldNames.get(j).toString()))
107 );
108 }
109 }
110
111 result.add(new QueryResult(
112 searcher.doc(hits[i].doc).get(LuceneIndexManager.DOCUMENT_ID),
113 hits[i].score,fieldValues));
114 }// for (all search hints)
115
116 searcher.close();
117
118 return new QueryResultList(query, indexedCorpus, result);
119 }
120 catch (java.io.IOException ioe) {
121 throw new IndexException(ioe.getMessage());
122 }
123 catch (org.apache.lucene.queryParser.ParseException pe) {
124 throw new SearchException(pe.getMessage());
125 }
126 }
127 }
|