求助,关于一个c++词频统计编程代码
1个回答
展开全部
这个代码扩展了《C++ Primer 中文版(第5版)》shared_ptr章节的单词统计程序。那个程序只可以查一个单词出现了几次,分别在哪些行,行的内容是什么。
这个程序除了保持上面功能之外,增加了统计词频的功能,代码如下:
#include <iostream>
#include <iomanip>
#include <fstream>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <sstream>
#include <vector>
using namespace std;
typedef vector<string>::size_type CLine;
class CQueryResult
{
public:
CQueryResult(const string& _queryWord, shared_ptr<set<CLine> > _lines, shared_ptr<vector<string> > _linesText);
public:
friend ostream& operator<<(ostream& os, const CQueryResult& _queryResult);
private:
string m_queryWord;
shared_ptr<set<CLine> > m_linesFound;
shared_ptr<vector<string> > m_linesText;
};
CQueryResult::CQueryResult(const string& _queryWord, shared_ptr<set<CLine> > _lines, shared_ptr<vector<string> > _linesText)
:m_queryWord(_queryWord), m_linesFound(_lines), m_linesText(_linesText)
{
}
string makeAppend(int _n, const string& _original, const string& _append)
{
return _n > 1 ? _original + _append : _original;
}
ostream& operator<<(ostream& os, const CQueryResult& _queryResult)
{
os<<"\""<<_queryResult.m_queryWord<<"\" occurs "<<_queryResult.m_linesFound->size()<<" "
<<makeAppend(_queryResult.m_linesFound->size(), "time", "s")<<" :"<<endl;
for (auto itr = _queryResult.m_linesFound->cbegin(); itr != _queryResult.m_linesFound->cend(); ++itr)
{
os<<" "<<setw(3)<<*itr + 1<<" "<<_queryResult.m_linesText->at(*itr)<<endl;
}
return os;
}
class CTextQuery
{
public:
CTextQuery(ifstream& _fin);
CQueryResult query(const string& _queryWord) const;
struct CFrequency
{
string m_word;
int m_frequency;
};
public:
friend ostream& operator<<(ostream& os, const CTextQuery& _queryResult);
private:
void InitFrequency(void);
private:
shared_ptr<vector<string> > m_linesText;
map<string, shared_ptr<set<CLine> > > m_wordMap;
vector<list<CFrequency>> m_wordFrequency;
size_t m_maxFrequency;
};
CTextQuery::CTextQuery(ifstream& _fin)
:m_linesText(new vector<string>), m_maxFrequency(0)
{
string lineText;
int currentLine = 0;
while (getline(_fin, lineText))
{
//cout<<currentLine+1<<" "<<lineText<<endl;//打印原文包括行数
m_linesText->push_back(lineText);
istringstream iss(lineText);
string word;
while(iss>>word)
{
auto& lines = m_wordMap[word];//没有会创建空的shared_ptr,注意这里的引用类型(有何好处?)
if (!lines)
{
lines.reset(new set<CLine>);
}
lines->insert(currentLine);
if (lines->size() > m_maxFrequency)
{
m_maxFrequency = lines->size();
}
}
++currentLine;
}
InitFrequency();
}
void CTextQuery::InitFrequency(void)
{
m_wordFrequency.resize(m_maxFrequency);
for (auto itr = m_wordMap.cbegin(); itr != m_wordMap.cend(); ++itr)
{
CFrequency item;
item.m_frequency = itr->second->size();
item.m_word = itr->first;
m_wordFrequency[item.m_frequency-1].push_back(item);
}
}
CQueryResult CTextQuery::query(const string& _queryWord) const
{
static shared_ptr<set<CLine>> notFound(new set<CLine>);//(设置一个静态的函数成员变量有何好处?)这里主要是可以利用此对象来判断是否存在数据
auto itr = m_wordMap.find(_queryWord);
if (itr != m_wordMap.end())
{
return CQueryResult(_queryWord, itr->second, m_linesText);
}
else
{
return CQueryResult(_queryWord, notFound, m_linesText);
}
}
ostream& operator<<(ostream& os, const CTextQuery& _queryResult)
{
for (auto itr = _queryResult.m_wordFrequency.crbegin(); itr != _queryResult.m_wordFrequency.crend(); ++itr)
{
for (auto itrList = itr->cbegin(); itrList != itr->cend(); ++itrList)
{
os<<"\""<<itrList->m_word<<"\" occurs "<<(*itrList).m_frequency<<endl;
}
}
return os;
}
int main()
{
ifstream fin("CPlusPlus.txt");
CTextQuery textQuery(fin);
cout<<textQuery<<endl;
CQueryResult queryResult = textQuery.query("C++");
cout<<queryResult<<endl;
return 0;
}
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询