高分求代码!!用java如何检索一个文件夹里所有txt文件中,包含输入文字,并显示出现次数,并按次数排序呢
从其中检索包含“计算机”的txt文件,并按其在各txt中出现次数,按从大到小的顺序输出txt名字以及相应的次数,txt可以命名为1.txt,2.txt...20.txt。 展开
这个很简单,124行代码完美呈现。
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
public class Search {
//你的文件夹路径
private String filePath="E:\\content5_5";
//关键字
private String keyWord="计算机";
private char[] keyWordAry;
private List<message> msgList;
public Search(){
//统计各个文件的词数
msgList=new ArrayList<message>();
//文件夹路径
File rootFile=new File(filePath);
//文件夹里面文件
File[] listFile=rootFile.listFiles();
//关键字数组
keyWordAry=keyWord.toCharArray();
for(int i=0;i<listFile.length;i++){
if(!listFile[i].isFile()){
continue;
}
if(!listFile[i].getName().endsWith(".txt")){
continue;
}
//统计每一个文件词语出现的次数
getKeyWordCount(listFile[i]);
}
System.out.println("'计算机'在各个文件中出现的次数降序排列:");
//输出统计信息
for(int i=msgList.size()-1;i>0;i--){
msgList.get(i).display();
}
}
protected void getKeyWordCount(File pFile){
InputStreamReader freader=null;
File aFile=pFile;
try {
FileInputStream fileInputs=new FileInputStream(aFile);
freader=new InputStreamReader(fileInputs,Charset.forName("GB2312"));
BufferedReader in=new BufferedReader(freader);
int tem=-1;
int count=0;
int index=0;
boolean ismatch=false;
while(true){
tem=in.read();
if(tem==-1){
break;
}
if(!ismatch && index==0){
if(tem==keyWordAry[index]){
ismatch=true;
}
}
if(ismatch && tem==keyWordAry[index]){
index++;
if(index>=keyWordAry.length){
count++;
index=0;
ismatch=false;
}
}
}
in.close();
freader.close();
addMessage(new message(aFile.getName(),count));
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void addMessage(message pMsg){
for(int i=0;i<msgList.size();i++){
if(msgList.get(i).count>pMsg.count){
msgList.add(i, pMsg);
return;
}
}
msgList.add(pMsg);
}
public static void main(String[] args){
new Search();
}
}
class message{
public String fileName;
public int count;
public message(){
}
public message(String pName,int pCount){
this.count=pCount;
this.fileName=pName;
}
public void display(){
System.out.println(fileName+" count:"+count);
}
}
配置好你的文件夹路径和关键字,运行主方法就好了!
呵呵,改完了,好使,那我要是想再文件名后面添加文件里面的内容该改哪里呢
难道楼主是说新文件命名为1.txt,2.txt……20.txt。并且1.txt里面的内容是:count+原来这个文件里面的文本内容。
排序_次数.txt
---------------------------------------------------------------------
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.text.DecimalFormat;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class JTest {
public static void main(String args[]) throws Exception {
execute(new File("H:\\test\\data\\Cartoon"), "您看到");
}
public static void execute(File root, String findStr) throws Exception {
List<Bean> list = new ArrayList<Bean>();
File result = new File(root, "result");
result.mkdirs();
File[] files = root.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isDirectory()) {
continue;
}
fileHandler(files[i], findStr, list);
}
Collections.sort(list, new Comparator<Bean>() {
public int compare(Bean b1, Bean b2) {
return b2.count - b1.count;
}
});
// 文件名 排序_次数.txt
int sortLen = (list.size() + "").length();
if (sortLen == 1) {
sortLen = 2;
}
int maxLen = 0;
int maxCount = 0;
for (int i = 0; i < list.size(); i++) {
Bean bean = list.get(i);
if (maxCount < bean.count) {
maxCount = bean.count;
}
}
maxLen = (maxCount + "").length();
for (int i = 0; i < list.size(); i++) {
Bean bean = list.get(i);
String sort = getSeques(sortLen, (i + 1));
String count = getSeques(maxLen, bean.count);
String filename = getFileName(sort, count);
copy(new File(root, bean.filename), new File(result, filename));
}
}
public static void fileHandler(File file, String findStr, List<Bean> list)
throws Exception {
FileReader fr = new FileReader(file);
BufferedReader br = new BufferedReader(fr);
int count = 0;
while (br.ready()) {
String line = br.readLine();
count += findCount(line, findStr);
}
br.close();
fr.close();
if (count == 0) {
return;
}
Bean bean = new Bean();
bean.filename = file.getName();
bean.count = count;
list.add(bean);
}
public static int findCount(String line, String findStr) {
Pattern p = Pattern.compile(findStr);
Matcher m = p.matcher(line);
int count = 0;
while (m.find()) {
if (!"".equals(m.group())) {
count++;
}
}
return count;
}
public static void copy(File src, File dest) throws Exception {
FileInputStream fin = new FileInputStream(src);
FileOutputStream fout = new FileOutputStream(dest);
byte[] buff = new byte[4096];
int len = 0;
while ((len = fin.read(buff)) != -1) {
fout.write(buff, 0, len);
}
fout.close();
fin.close();
}
public static String getSeques(int len, int index) {
String pattern = "";
for (int i = 0; i < len; i++) {
pattern += "0";
}
DecimalFormat df = new DecimalFormat(pattern);
return df.format(index);
}
public static String getFileName(String sort, String count) {
String pattern = "{0}_{1}.txt";
return MessageFormat.format(pattern, sort, count);
}
}
class Bean {
String filename = null;
int count = 0;
}
是啊,但是我是要不用lucene的那种,O(∩_∩)O~