如何将Lucene索引写入Hadoop
2015-08-08
展开全部
pre name="code" class="java">package indexhadoop;
import hdfs.HdfsDirectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
/**
*
* @author qindongliang
* 将索引存储在HDFS上的demo
* 支持hadoop1.x的版本
*
* **/
public class MyIndex {
public static void main(String[] args)throws Exception {
//long a=System.currentTimeMillis();
//add();
// long b=System.currentTimeMillis();
// System.out.println("耗时: "+(b-a)+"毫秒");
query("中国");
//delete("3");//删除指定ID的数据
}
/***
* 得到HDFS的writer
*
* **/
public static IndexWriter getIndexWriter() throws Exception{
Analyzer analyzer=new SmartChineseAnalyzer(Version.LUCENE_46);
IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_46, analyzer);
Configuration conf=new Configuration();
//Path p1 =new Path("hdfs://10.2.143.5:9090/root/myfile/my.txt");
//Path path=new Path("hdfs://10.2.143.5:9090/root/myfile");
Path path=new Path("hdfs://192.168.75.130:9000/root/index");
HdfsDirectory directory=new HdfsDirectory(path, conf);
IndexWriter writer=new IndexWriter(directory, config);
return writer;
}
/**
* 建索引的方法
*
* **/
public static void add()throws Exception{
IndexWriter writer=getIndexWriter();
import hdfs.HdfsDirectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
/**
*
* @author qindongliang
* 将索引存储在HDFS上的demo
* 支持hadoop1.x的版本
*
* **/
public class MyIndex {
public static void main(String[] args)throws Exception {
//long a=System.currentTimeMillis();
//add();
// long b=System.currentTimeMillis();
// System.out.println("耗时: "+(b-a)+"毫秒");
query("中国");
//delete("3");//删除指定ID的数据
}
/***
* 得到HDFS的writer
*
* **/
public static IndexWriter getIndexWriter() throws Exception{
Analyzer analyzer=new SmartChineseAnalyzer(Version.LUCENE_46);
IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_46, analyzer);
Configuration conf=new Configuration();
//Path p1 =new Path("hdfs://10.2.143.5:9090/root/myfile/my.txt");
//Path path=new Path("hdfs://10.2.143.5:9090/root/myfile");
Path path=new Path("hdfs://192.168.75.130:9000/root/index");
HdfsDirectory directory=new HdfsDirectory(path, conf);
IndexWriter writer=new IndexWriter(directory, config);
return writer;
}
/**
* 建索引的方法
*
* **/
public static void add()throws Exception{
IndexWriter writer=getIndexWriter();
本回答被提问者和网友采纳
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询