C++编程,从一个文件中统计所有出现过的单词,并按次数从大到小输出
展开全部
问题可以分为2个部分:
统计出现过的所有单词
按次数从大到小输出
#include<iostream>
#include<fstream>
#include<unordered_map>
#include<algorithm>
#include<string>
using namespace std;
bool mysort(pair < int, string > a, pair < int, string > b){
if (a.first != b.first)
return a.first > b.first;
else
return a.second < b.second;
}
int main(){
//统计出现过的所有单词:
ifstream ifs("input.in", ifstream::in);
unordered_map < string, int > um;
string s;
while (ifs >> s){
if (um.find(s) == um.end()) um[s] = 1;
else ++um[s];
}
ifs.close();
//排序(以出现次数大到小为最优先排位方式,如果出现次数一致,则以辞典编纂从小到大的顺序排位:
vector < pair < int, string > > v;
for (unordered_map<string, int>::iterator it = um.begin(); it != um.end(); ++it)
v.push_back(make_pair(it->second, it->first));
sort(v.begin(), v.end(), mysort);
//输出:
for (int i = 0; i < v.size(); ++i)
puts(v[i].second.c_str());
}
几点小贴士:
如果只输出字符串的话puts是最快的内部函数(比printf快大概10倍,而printf又比cout要快),不过要记得puts只能输出c字符串,所以要输出string的时候记得用 .c_str() 函数。
unordered_map 比 map要快上很多,因为它使用哈希表(调用的时间是O(1),map调用时间是O(nlogn)),但是代价就是它不是按顺序储存的。
展开全部
(1)程序的输入文本为每行切词后的结果,每个单词(term)之间以制表符分割。
(2)使用hashmap计算单词和其出现的个数,key为term,value为其出现次数。统计完后将hashmap转换为vector,使用STL的标准排序算法
#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <algorithm>
#include <ext/hash_map>
using namespace std;
using namespace __gnu_cxx;
struct str_hash{
size_t operator()(const string& str) const
{
unsigned long __h = 0;
for (size_t i = 0 ; i < str.size() ; i ++)
__h = 5*__h + str[i];
return size_t(__h);
}
};
bool less_first(pair<int, string> const& lhs, pair<int, string> const& rhs)
{
return lhs.first > rhs.first;
}
vector<pair<int, string> > mirror_map(hash_map<string, int, str_hash> const& m)
{
vector<pair<int, string> > mirror;
for (hash_map<string, int, str_hash>::const_iterator i = m.begin(); i != m.end(); ++i)
{
mirror.push_back(pair<int, string>(i->second, i->first));
}
std::sort(mirror.begin(), mirror.end(), less_first);
return mirror;
}
int main()
{
ifstream input;
ofstream output;
input.open("word.txt");
output.open("wordfrequency.txt");
string eachline;
hash_map<string,int,str_hash> termtime;
while(getline(input,eachline) )
{
string::size_type start = 0;
string::size_type end = eachline.find_first_of("\t");
while(end != string::npos){
string term = eachline.substr(start, end - start);
termtime[term]++;
start = end + 1;
end = eachline.find_first_of("\t",start);
}
if(start != eachline.size()){
string term = eachline.substr(start);
termtime[term]++;
}
}
vector<pair<int, string> > mirror = mirror_map(termtime);
for(vector<pair<int, string> >::iterator myiter= mirror.begin(); myiter != mirror.end(); ++myiter){
output<<myiter->second<<"\t"<<myiter->first<<endl;
}
input.close();
output.close();
cout << "Done" << endl;
system("pause");
return 0;
}
(2)使用hashmap计算单词和其出现的个数,key为term,value为其出现次数。统计完后将hashmap转换为vector,使用STL的标准排序算法
#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <algorithm>
#include <ext/hash_map>
using namespace std;
using namespace __gnu_cxx;
struct str_hash{
size_t operator()(const string& str) const
{
unsigned long __h = 0;
for (size_t i = 0 ; i < str.size() ; i ++)
__h = 5*__h + str[i];
return size_t(__h);
}
};
bool less_first(pair<int, string> const& lhs, pair<int, string> const& rhs)
{
return lhs.first > rhs.first;
}
vector<pair<int, string> > mirror_map(hash_map<string, int, str_hash> const& m)
{
vector<pair<int, string> > mirror;
for (hash_map<string, int, str_hash>::const_iterator i = m.begin(); i != m.end(); ++i)
{
mirror.push_back(pair<int, string>(i->second, i->first));
}
std::sort(mirror.begin(), mirror.end(), less_first);
return mirror;
}
int main()
{
ifstream input;
ofstream output;
input.open("word.txt");
output.open("wordfrequency.txt");
string eachline;
hash_map<string,int,str_hash> termtime;
while(getline(input,eachline) )
{
string::size_type start = 0;
string::size_type end = eachline.find_first_of("\t");
while(end != string::npos){
string term = eachline.substr(start, end - start);
termtime[term]++;
start = end + 1;
end = eachline.find_first_of("\t",start);
}
if(start != eachline.size()){
string term = eachline.substr(start);
termtime[term]++;
}
}
vector<pair<int, string> > mirror = mirror_map(termtime);
for(vector<pair<int, string> >::iterator myiter= mirror.begin(); myiter != mirror.end(); ++myiter){
output<<myiter->second<<"\t"<<myiter->first<<endl;
}
input.close();
output.close();
cout << "Done" << endl;
system("pause");
return 0;
}
追问
我目前只学了vector,map,没有学到hashmap,有没有更低级一点的解法,谢谢啊···
本回答被提问者和网友采纳
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询