一个中文文件,如何分别提取出文件中的所有句子?要求用C++实现!多谢!急,在线等待!
你的文件是什么样的啊!!每句话以什么结尾啊!!。还是.又或者是其他的呢?给个样本看看!
vc6.0 编译
使用方法 例子:app c:\\1.txt 一定用双\\
#include <iostream>
using namespace std;
int main(int argc, char **argv)
{
#if 1
if (argc != 2)
{
cout<<"Usag: <应用程序> <文件路径>"<<endl;
return 1;
}
#endif
FILE *fp;
if ((fp = fopen(argv[1], "r")) == NULL)
{
cout<<"打开文件"<<argv[1]<<"错误!"<<endl;
return 1;
}
fseek(fp, 0, SEEK_END);
long nFileSize = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *ptoken1, *ptoken2, *ptoken3, *pstr;
char *pszFileBuff;
char szLineBuff[1024] = {0};
pszFileBuff = new char[nFileSize+1];
fread(pszFileBuff, sizeof(char), nFileSize, fp);
pszFileBuff[nFileSize] = '\0';
cout<<pszFileBuff<<endl;
fclose(fp);
pstr = pszFileBuff;
if ((ptoken1 = strstr(pstr, "?")) == NULL)
ptoken1 = pszFileBuff + nFileSize;
if ((ptoken2 = strstr(pstr, "!")) == NULL)
ptoken2 = pszFileBuff + nFileSize;
if ((ptoken3 = strstr(pstr, "。")) == NULL)
ptoken3 = pszFileBuff + nFileSize;
if (ptoken1 < ptoken2 && ptoken1 <ptoken3)
{
memcpy(szLineBuff, pstr, ptoken1 - pstr + 2);
pstr = ptoken1+2;
}
else if (ptoken2 < ptoken1 && ptoken2 <ptoken3)
{
memcpy(szLineBuff, pstr, ptoken2 - pstr + 2);
pstr = ptoken2+2;
}
else if (ptoken3 < ptoken1 && ptoken3 <ptoken2)
{
memcpy(szLineBuff, pstr, ptoken3 - pstr + 2);
pstr = ptoken3+2;
}
#if 1
while(1)
{
cout<<szLineBuff<<endl;
memset(szLineBuff, 0, 1024);
if ((ptoken1 = strstr(pstr, "?")) == NULL)
ptoken1 = pszFileBuff + nFileSize;
if ((ptoken2 = strstr(pstr, "!")) == NULL)
ptoken2 = pszFileBuff + nFileSize;
if ((ptoken3 = strstr(pstr, "。")) == NULL)
ptoken3 = pszFileBuff + nFileSize;
if (ptoken1 < ptoken2 && ptoken1 <ptoken3)
{
memcpy(szLineBuff, pstr, ptoken1 - pstr + 2);
pstr = ptoken1+2;
}
else if (ptoken2 < ptoken1 && ptoken2 <ptoken3)
{
memcpy(szLineBuff, pstr, ptoken2 - pstr + 2);
pstr = ptoken2+2;
}
else if (ptoken3 < ptoken1 && ptoken3 <ptoken2)
{
memcpy(szLineBuff, pstr, ptoken3 - pstr + 2);
pstr = ptoken3+2;
}
else
break;
}
#endif
free(pszFileBuff);
return 0;
}