import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.junit.Test;
/**
* @author WilKey
* @see A Beginner's Guide for Lucene
*
*/
public class HelloWorld {
//源数据路径
String dsPath = "D:\workspace\LuceneExample\luceneds\笔记.txt";
//存放索引文件位置,及索引库
String indexPath = "D:\workspace\LuceneExample\luceneindex";
//分词器
Analyzer analyzer = new StandardAnalyzer();
/**
* 创建索引
*
* IndexWriter //用来操作(CRU)索引库的
*/
@Test
public void createIndex() throws Exception{
File file = new File(dsPath);
//Document存放经过组织后的数据源,只有转换为Document对象才可以被索引和搜索到
Document doc = new Document();
//文件名称
doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED));
//检索到的内容
doc.add(new Field("content",readFileContent(file),Store.YES,Index.ANALYZED));
//文件大小
doc.add(new Field("size",NumberTools.longToString(file.length()),Store.YES,Index.NOT_ANALYZED));
//检索到的文件位置
doc.add(new Field("path",file.getAbsolutePath(),Store.YES,Index.NOT_ANALYZED));
//建立索引
IndexWriter indexWriter = new IndexWriter(indexPath,analyzer,true,MaxFieldLength.LIMITED);
indexWriter.addDocument(doc);
indexWriter.close();
}
/**
* 搜索
*
* IndexSearcher 用来在索引库中进行查询
*/
@Test
public void search() throws Exception{
//请求字段
//String queryString = "document";
String queryString = "东乡";
// 1、把要搜索的文本解析为 Query
String[] fields = {"name","content"};
QueryParser queryParser = new MultiFieldQueryParser(fields,analyzer);
Query query = queryParser.parse(queryString);
// 2、进行查询,从索引库中查找
IndexSearcher indexSearcher = new IndexSearcher(indexPath);
Filter filter = null;
TopDocs topDocs = indexSearcher.search(query,filter,10000);
System.out.println("总共有["+topDocs.totalHits+"]条匹配结果。");
// 3、打印结果
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
//文档内部编号
int index = scoreDoc.doc;
//根据编号取出相应的文档
Document doc = indexSearcher.doc(index);
System.out.println("==================================");
System.out.println("name = "+doc.get("name"));
System.out.println("content = "+doc.get("content"));
System.out.println("size = "+doc.get("size"));
System.out.println("path = "+doc.get("path"));
}
}
/**
* 读取文件内容
*
*/
/**
* @param file
* @return
*/
public static String readFileContent(File file){
StringBuffer content = null;
BufferedReader bfreader = null;
try {
bfreader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
content = new StringBuffer();
for (String line = null; (line = bfreader.readLine()) !=null;) {
content.append(line).append("\n");
}
} catch (Exception e) {
// TODO: handle exception
System.out.println("RuntimeException :" + e.getMessage());
}finally{
try {
bfreader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return content.toString();
}
}
Comments | NOTHING