
求KNN文本分类算法java实现源代码【散分了!!!!】
最近在做一个文本分类的需要KNN算法。虽然基本原理都懂,自己也写了一个代码。我的没有用到训练集。测试文本由950个,但是根据相似度值(如果小于0.3,就新建一个类),结果...
最近在做一个文本分类的需要KNN算法。虽然基本原理都懂,自己也写了一个代码。我的没有用到训练集。测试文本由950个,但是根据相似度值(如果小于0.3,就新建一个类),结果得到了950个类。很郁闷!!!特此上来求教高手解答。!!!!有满意答案另外加100分。绝对保证信用!!!
展开
若以下回答无法解决问题,邀请你更新回答
2个回答
2013-04-13
展开全部
#include <iostream>
#include <cmath>
#include <fstream>
using namespace std;
#define NATTRS 5 //number of attributes
#define MAXSZ 1700 //max size of training set
#define MAXVALUE 10000.0 //the biggest attribute's value is below 10000(int)
#define K 5
struct vector {
double attributes[NATTRS];
double classlabel;
};
struct item {
double distance;
double classlabel;
};
struct vector trSet[MAXSZ];//global variable,the training set
struct item knn[K];//global variable,the k-neareast-neighbour set
int curTSize = 0; //current size of the training set
int AddtoTSet(struct vector v)
{
if(curTSize>=MAXSZ) {
cout<<endl<<"The training set has "<<MAXSZ<<" examples!"<<endl<<endl;
return 0;
}
trSet[curTSize] = v;
curTSize++;
return 1;
}
double Distance(struct vector v1,struct vector v2)
{
double d = 0.0;
double tem = 0.0;
for(int i = 0;i < NATTRS;i++)
tem += (v1.attributes[i]-v2.attributes[i])*(v1.attributes[i]-v2.attributes[i]);
d = sqrt(tem);
return d;
}
int max(struct item knn[]) //return the no. of the item which has biggest distance(
//should be replaced)
{
int maxNo = 0;
if(K > 1)
for(int i = 1;i < K;i++)
if(knn[i].distance>knn[maxNo].distance)
maxNo = i;
return maxNo;
}double Classify(struct vector v)//decide which class label will be assigned to
//a given input vetor with the knn method
{
double dd = 0;
int maxn = 0;
int freq[K];
double mfreqC = 0;//the class label appears most frequently
int i;
for(i = 0;i < K;i++)
knn[i].distance = MAXVALUE;
for(i = 0;i < curTSize;i++)
{
dd = Distance(trSet[i],v);
maxn = max(knn);//for every new state of the training set should update maxn
if(dd < knn[maxn].distance) {
knn[maxn].distance = dd;
knn[maxn].classlabel = trSet[i].classlabel;
}
}
for(i = 0;i < K;i++)//freq[i] represents knn[i].classlabel appears how many times
freq[i] = 1;
for(i = 0;i < K;i++)
for(int j = 0;j < K;j++)
if((i!=j)&&(knn[i].classlabel == knn[j].classlabel))
freq[i]+=1;
int mfreq = 1;
mfreqC = knn[0].classlabel;
for(i = 0;i < K;i++)
if(freq[i] > mfreq) {
mfreq = freq[i];//mfreq represents the most frepuences
mfreqC = knn[i].classlabel; //mfreqNo is the item no. with the most frequent
//classlabel
}
return mfreqC;
}
void main()
{ double classlabel;
double c;
double n;
struct vector trExmp;
int i;
ifstream filein("G:\\data\\for knn\\data.txt");
if(filein.fail()){cout<<"Can't open data.txt"<<endl; return;}
while(!filein.eof()) {
filein>>c;
trExmp.classlabel = c;
cout<<trExmp.classlabel<<" "; for(int i = 0;i < NATTRS;i++) {
filein>>n;
trExmp.attributes[i] = n;
cout<<trExmp.attributes[i]<<" ";
} cout<<endl;
if(!AddtoTSet(trExmp))
break;
}filein.close();struct vector testv={{142,188,11,1159,0.5513196},17};
classlabel = Classify(testv);
cout<<"The classlable of the testv is: ";
cout<<classlabel<<endl;
for(i = 0;i < K;i++)
cout<<knn[i].distance<<"\t"<<knn[i].classlabel<<endl;
//cout<<max(knn);
}
#include <cmath>
#include <fstream>
using namespace std;
#define NATTRS 5 //number of attributes
#define MAXSZ 1700 //max size of training set
#define MAXVALUE 10000.0 //the biggest attribute's value is below 10000(int)
#define K 5
struct vector {
double attributes[NATTRS];
double classlabel;
};
struct item {
double distance;
double classlabel;
};
struct vector trSet[MAXSZ];//global variable,the training set
struct item knn[K];//global variable,the k-neareast-neighbour set
int curTSize = 0; //current size of the training set
int AddtoTSet(struct vector v)
{
if(curTSize>=MAXSZ) {
cout<<endl<<"The training set has "<<MAXSZ<<" examples!"<<endl<<endl;
return 0;
}
trSet[curTSize] = v;
curTSize++;
return 1;
}
double Distance(struct vector v1,struct vector v2)
{
double d = 0.0;
double tem = 0.0;
for(int i = 0;i < NATTRS;i++)
tem += (v1.attributes[i]-v2.attributes[i])*(v1.attributes[i]-v2.attributes[i]);
d = sqrt(tem);
return d;
}
int max(struct item knn[]) //return the no. of the item which has biggest distance(
//should be replaced)
{
int maxNo = 0;
if(K > 1)
for(int i = 1;i < K;i++)
if(knn[i].distance>knn[maxNo].distance)
maxNo = i;
return maxNo;
}double Classify(struct vector v)//decide which class label will be assigned to
//a given input vetor with the knn method
{
double dd = 0;
int maxn = 0;
int freq[K];
double mfreqC = 0;//the class label appears most frequently
int i;
for(i = 0;i < K;i++)
knn[i].distance = MAXVALUE;
for(i = 0;i < curTSize;i++)
{
dd = Distance(trSet[i],v);
maxn = max(knn);//for every new state of the training set should update maxn
if(dd < knn[maxn].distance) {
knn[maxn].distance = dd;
knn[maxn].classlabel = trSet[i].classlabel;
}
}
for(i = 0;i < K;i++)//freq[i] represents knn[i].classlabel appears how many times
freq[i] = 1;
for(i = 0;i < K;i++)
for(int j = 0;j < K;j++)
if((i!=j)&&(knn[i].classlabel == knn[j].classlabel))
freq[i]+=1;
int mfreq = 1;
mfreqC = knn[0].classlabel;
for(i = 0;i < K;i++)
if(freq[i] > mfreq) {
mfreq = freq[i];//mfreq represents the most frepuences
mfreqC = knn[i].classlabel; //mfreqNo is the item no. with the most frequent
//classlabel
}
return mfreqC;
}
void main()
{ double classlabel;
double c;
double n;
struct vector trExmp;
int i;
ifstream filein("G:\\data\\for knn\\data.txt");
if(filein.fail()){cout<<"Can't open data.txt"<<endl; return;}
while(!filein.eof()) {
filein>>c;
trExmp.classlabel = c;
cout<<trExmp.classlabel<<" "; for(int i = 0;i < NATTRS;i++) {
filein>>n;
trExmp.attributes[i] = n;
cout<<trExmp.attributes[i]<<" ";
} cout<<endl;
if(!AddtoTSet(trExmp))
break;
}filein.close();struct vector testv={{142,188,11,1159,0.5513196},17};
classlabel = Classify(testv);
cout<<"The classlable of the testv is: ";
cout<<classlabel<<endl;
for(i = 0;i < K;i++)
cout<<knn[i].distance<<"\t"<<knn[i].classlabel<<endl;
//cout<<max(knn);
}
本回答被网友采纳
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
展开全部
坐等答案啊
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询