A Beginner

发布于 2011-04-01  2.5k 次阅读


import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Iterator;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.junit.Test;

 

/**
 * @author WilKey
 * @see A Beginner's Guide for Lucene
 *
 */
public class HelloWorld {
 //源数据路径
 String dsPath = "D:\workspace\LuceneExample\luceneds\笔记.txt";
 
 //存放索引文件位置,及索引库
 String indexPath = "D:\workspace\LuceneExample\luceneindex";
 
 //分词器
 Analyzer analyzer = new StandardAnalyzer();
 
 /**
  * 创建索引
  *
  * IndexWriter //用来操作(CRU)索引库的
  */
 
 @Test
 public void createIndex() throws Exception{
  File file = new File(dsPath);
  
  //Document存放经过组织后的数据源,只有转换为Document对象才可以被索引和搜索到
  Document doc = new Document();
  
  //文件名称
  doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED));
  
  //检索到的内容
  doc.add(new Field("content",readFileContent(file),Store.YES,Index.ANALYZED));
  
  //文件大小
  doc.add(new Field("size",NumberTools.longToString(file.length()),Store.YES,Index.NOT_ANALYZED));
  
  //检索到的文件位置
  doc.add(new Field("path",file.getAbsolutePath(),Store.YES,Index.NOT_ANALYZED));
  
  //建立索引
  IndexWriter indexWriter = new IndexWriter(indexPath,analyzer,true,MaxFieldLength.LIMITED);
  indexWriter.addDocument(doc);
  indexWriter.close();
 }
 
 
 /**
  * 搜索
  *
  * IndexSearcher 用来在索引库中进行查询
  */
 @Test
 public void search() throws Exception{
  //请求字段
  //String queryString = "document";
  String queryString = "东乡";
  // 1、把要搜索的文本解析为 Query
  String[] fields = {"name","content"};
  QueryParser queryParser = new MultiFieldQueryParser(fields,analyzer);
  Query query  = queryParser.parse(queryString);
  // 2、进行查询,从索引库中查找
  IndexSearcher indexSearcher = new IndexSearcher(indexPath);
  Filter filter = null;
  
  TopDocs topDocs = indexSearcher.search(query,filter,10000);
  System.out.println("总共有["+topDocs.totalHits+"]条匹配结果。");
  
  // 3、打印结果
  for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
   //文档内部编号
   int index = scoreDoc.doc;
   //根据编号取出相应的文档
   Document doc  = indexSearcher.doc(index);
   System.out.println("==================================");
   System.out.println("name = "+doc.get("name"));
   System.out.println("content = "+doc.get("content"));
   System.out.println("size = "+doc.get("size"));
   System.out.println("path = "+doc.get("path"));
  }
 }
 
 /**
  * 读取文件内容
  *
  */
 /**
  * @param file
  * @return
  */
 public static String readFileContent(File file){
  StringBuffer content = null;
  BufferedReader bfreader = null;
  try {
   bfreader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
    content = new StringBuffer();
   
   for (String line = null; (line = bfreader.readLine()) !=null;) {
    content.append(line).append("\n");
   }
   
  } catch (Exception e) {
   // TODO: handle exception
   System.out.println("RuntimeException :" + e.getMessage());
  }finally{
   try {
    bfreader.close();
   } catch (IOException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }
  }
  return content.toString();
 }
}