package com.bjsxt.indexsearch;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import com.bjsxt.utils.File2DocumentUtil;
public class IndexAndSearch {
/**
* @param args
*/
String filePath = "D:\\flexWorkespace\\LuceneDemo\\luceneDataSource\\小笑话_总统的房间 Room .txt";
String indexPath = "D:\\flexWorkespace\\LuceneDemo\\indexPath";
// Analyzer analyzer = new StandardAnalyzer();
Analyzer analyzer = new MMAnalyzer();
public static void main(String[] args) throws Exception {
// new IndexAndSearch().createIndexByDir();
new IndexAndSearch().search("房间");
}
public void createIndex() throws Exception {
// File file = new File(filePath);
Document doc = File2DocumentUtil.file2Document(filePath);
// IndexWriter 是用来操作(增删改)索引库的
IndexWriter iw = new IndexWriter(indexPath, analyzer, true,
MaxFieldLength.LIMITED);
iw.addDocument(doc);
iw.close();
}
public void createIndexByDir()throws Exception {
//1.创建时候载入文件系统里的索引
Directory fsDir = FSDirectory.getDirectory(indexPath);
Directory ramDir = new RAMDirectory(fsDir);
//new ramIndexWriter时不需要重新创建
IndexWriter ramIndexWriter = new IndexWriter(ramDir, analyzer, MaxFieldLength.LIMITED);
//添加document
Document doc = File2DocumentUtil.file2Document(filePath);
ramIndexWriter.addDocument(doc);
ramIndexWriter.close();
//2.退出时保存内存里的索引
//new fsIndexWriter时需要重新创建,即删除原来的索引文件
IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, true, MaxFieldLength.LIMITED);
// Directory[] dir = {ramDir};
//保存内存里的索引
fsIndexWriter.addIndexesNoOptimize(new Directory[]{ramDir});
//把内存里的东西提交后才优化
fsIndexWriter.commit();
// 优化索引文件,把多个cfs文件合并成一个
fsIndexWriter.optimize();
fsIndexWriter.close();
}
public void search(String queryStr) throws Exception {
String[] fields = { "content","name" };
QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
Query query = null;
Filter filter = null;
query = queryParser.parse(queryStr);
IndexSearcher indexSearcher = null;
indexSearcher = new IndexSearcher(indexPath);
TopDocs topDocs = indexSearcher.search(query, filter, 1000);
System.out.println("总共有" + topDocs.totalHits + "条记录:");
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int docNum = scoreDoc.doc;
Document doc = indexSearcher.doc(docNum);
//---------------------------准备高亮器
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter,scorer);
Fragmenter fragmenter = new SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);
//---------------------------
//--------------------------高亮
//将高亮后的结果返回,如果当前属性值中没有出现关键字,则返回空
String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
if(hc == null){
String content = doc.get("content");
int endIndex = Math.min(50,content.length());System.out.println(content.length());
hc = content.substring(0,endIndex);
}
doc.getField("content").setValue(hc);
//--------------------------
File2DocumentUtil.printDocumentInfo(doc);
}
}
public void searchByDir(String queryStr) throws Exception {
String[] fields = { "name", "content" };
QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
Query query = null;
Filter filter = null;
query = queryParser.parse(queryStr);
IndexSearcher indexSearcher = null;
Directory fsDir = FSDirectory.getDirectory(indexPath);
indexSearcher = new IndexSearcher(fsDir);
TopDocs topDocs = indexSearcher.search(query, filter, 1000);
System.out.println("总共有" + topDocs.totalHits + "条记录:");
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int docNum = scoreDoc.doc;
Document doc = indexSearcher.doc(docNum);
File2DocumentUtil.printDocumentInfo(doc);
}
}
}
分享到:
相关推荐
NULL 博文链接:https://sunlongan666.iteye.com/blog/580380
高光插件,支持各种模型高亮效果,使用相机后处理边缘高光特效
Highlighter.js 是一个小型(纯 javascript)的库,可以让你轻松导航、选择以及高亮 DOM 元素。用法:document.addEventListener("DOMContentLoaded", function() { var Highlighter = new window....
VS 高亮 插件 Highlighter.zip
lucene-highlighter-3.5.0.jar lucene高亮包
AE脚本-文字层代码高亮突出颜色控制FUI脚本AEscripts ASH Syntax Highlighter 1.0.0AE脚本-文字层代码高亮突出颜色控制FUI脚本AEscripts ASH Syntax Highlighter 1.0.0AE脚本-文字层代码高亮突出颜色控制FUI脚本...
crayon-syntax-highlighter 是一个 WordPress 代码高亮插件,可用于 PHP 项目下。支持多语言,多主体,多字体的高亮插件。用 PHP 和 jQuery 写成。 标签:crayon
unity 高亮插件highlightingV5.0,支持模型高亮,.....
Crayon Syntax Highlighter这个功能全面、使用方便的代码高亮显示插件, 不仅能帮你高亮显示php, java, asp, python等各种代码, 而且在wordpress文件编辑器里直接就有一个按钮可以很方便的帮助你插入代码。
Highlighter能够对任何UI对象中的文本进行高亮,可以神奇地找到在您UITableViewCell或其他类中的UILabel,UITextView,UITexTfield,UIButton等UI对象。
PHP实例开发源码—WordPress代码高亮插件(Crayon Syntax Highlighter).zip PHP实例开发源码—WordPress代码高亮插件(Crayon Syntax Highlighter).zip PHP实例开发源码—WordPress代码高亮插件(Crayon Syntax ...
1.emmet 2.jQuery 3.sublimecodeintel 4.ColorHighlighter显示颜色 ...6.AlignTab正则对其 7.Sidebarenhancements侧边栏 8.DeleteBlankLines 删除空行 ...17.BracketHighlighter高亮括号 18.Babel es6高亮 19.bootstrap
lucene3.0-highlighter.jar lucene3.0的高亮jar包,从lucene3.0源码中导出来的
一款不错的js高亮插件,支持多种语言且有多种高亮效果,使用时需引入以下 <link rel="stylesheet" href="styles/default.css"> [removed][removed] [removed]hljs.initHighlightingOnLoad();[removed] 高亮...
我写的google protobuf 的visual studio2008 语法高亮插件,目前只有语法高亮,没有自动完成等功能,后续考虑添加。
基于PHP的WordPress代码高亮插件(Crayon Syntax Highlighter).zip
npm install react-syntax-highlighter --save 为什么要这样一个? React还有其他语法高亮器,为什么要使用它呢? 最大的原因是,所有其他原因都依赖于在componentDidMount和componentDidUpdate触发调用以突出显示...
lucene3.6.2及highlighter jar包 可以把关键字在搜索结果中高亮显示
m_highlighter=this.GetComponent<Highlighter>(); } void OnMouseEnter(){ m_highlighter.FlashingOn();//调用Highlighte脚本的开始高亮函数 } void OnMouseEnter(){ m_highlighter.FlashingOff();//调用...
主要介绍了Crayon Syntax Highlighter代码高亮插件与fancybox图片暗箱冲突的解决方法,需要的朋友可以参考下