k均值算法c++语言实现代码
//k-mean.h
#ifndef KMEAN_HEAD
#define KMEAN_HEAD
#include <vector>
#include <map>
//空间点的定义
class Node
{
public:
double pos_x;
double pos_y;
double pos_z;
Node()
{
pos_x = 0.0;
pos_y = 0.0;
pos_z = 0.0;
}
Node(double x,double y,double z)
{
pos_x = x;
pos_y = y;
pos_z = z;
}
friend bool operator < (const Node& first,const Node& second)
{
//对x轴的比较
if(first.pos_x < second.pos_x)
{
return true;
}
else if (first.pos_x > second.pos_x)
{
return false;
}
//对y轴的比较
else
{
if(first.pos_y < second.pos_y)
{
return true;
}
else if (first.pos_y > second.pos_y)
{
return false;
}
//对z轴的比较
else
{
if(first.pos_z < second.pos_z)
{
return true;
}
else if (first.pos_z >= second.pos_z)
{
return false;
}
}
}
}
friend bool operator == (const Node& first,const Node& second)
{
if(first.pos_x == second.pos_x && first.pos_y == second.pos_y && first.pos_z == second.pos_z)
{
return true;
}
else
{
return false;
}
}
};
class KMean
{
private:
int cluster_num;//生成的簇的数量。
std:: vector<Node> mean_nodes;//均值点
std:: vector<Node> data;//所有的数据点
std:: map <int , std:: vector<Node> > cluster;//簇,key为簇的下标,value为该簇中所有点
void Init();//初始化函数(首先随即生成代表点)
void ClusterProcess();//聚类过程,将空间中的点分到不同的簇中
Node GetMean(int cluster_index);//生成均值
void NewCluster();//确定新的簇过程,该函数会调用ClusterProcess函数。
double Kdistance(Node active,Node other);//判断两个点之间的距离
public:
KMean(int c_num,std:: vector<Node> node_vector);
void Star();//启动k均值算法
};
#endif // KMEAN_HEAD
//k-mean.h
#ifndef KMEAN_HEAD
#define KMEAN_HEAD
#include <vector>
#include <map>
//空间点的定义
class Node
{
public:
double pos_x;
double pos_y;
double pos_z;
Node()
{
pos_x = 0.0;
pos_y = 0.0;
pos_z = 0.0;
}
Node(double x,double y,double z)
{
pos_x = x;
pos_y = y;
pos_z = z;
}
friend bool operator < (const Node& first,const Node& second)
{
//对x轴的比较
if(first.pos_x < second.pos_x)
{
return true;
}
else if (first.pos_x > second.pos_x)
{
return false;
}
//对y轴的比较
else
{
if(first.pos_y < second.pos_y)
{
return true;
}
else if (first.pos_y > second.pos_y)
{
return false;
}
//对z轴的比较
else
{
if(first.pos_z < second.pos_z)
{
return true;
}
else if (first.pos_z >= second.pos_z)
{
return false;
}
}
}
}
friend bool operator == (const Node& first,const Node& second)
{
if(first.pos_x == second.pos_x && first.pos_y == second.pos_y && first.pos_z == second.pos_z)
{
return true;
}
else
{
return false;
}
}
};
class KMean
{
private:
int cluster_num;//生成的簇的数量。
std:: vector<Node> mean_nodes;//均值点
std:: vector<Node> data;//所有的数据点
std:: map <int , std:: vector<Node> > cluster;//簇,key为簇的下标,value为该簇中所有点
void Init();//初始化函数(首先随即生成代表点)
void ClusterProcess();//聚类过程,将空间中的点分到不同的簇中
Node GetMean(int cluster_index);//生成均值
void NewCluster();//确定新的簇过程,该函数会调用ClusterProcess函数。
double Kdistance(Node active,Node other);//判断两个点之间的距离
public:
KMean(int c_num,std:: vector<Node> node_vector);
void Star();//启动k均值算法
};
#endif // KMEAN_HEAD
#include "k-mean.h"
#include <vector>
#include <map>
#include <ctime>
#include <cstdlib>
#include <algorithm>
#include <cmath>
#include <iostream>
using namespace std;
#define MAXDISTANCE 1000000
KMean::KMean(int c_num,vector<Node> node_vector)
{
cluster_num = c_num;
data = node_vector;
srand((int)time(0));
Init();
}
void KMean::Init()//初始化函数(首先随即生成代表点)
{
for(int i =0 ;i<cluster_num;)
{
int pos = rand() % data.size();
bool insert_flag = true;
//首先判断选中的点是否是中心点
for(unsigned int j = 0;j<mean_nodes.size();j++)
{
if(mean_nodes[j] == data[i])
{
insert_flag = false;
break;
}
}
if(insert_flag )
{
mean_nodes.push_back(data[pos]);
i++;
}
}
ClusterProcess();//进行聚类过程
}
void KMean::ClusterProcess()//聚类过程,将空间中的点分到不同的簇中
{
//遍历空间上所有的点
for( unsigned int i = 0 ; i < data.size();i++)
{
//忽略中心点
bool continue_flag = false;
for(unsigned int j = 0;j<mean_nodes.size();j++)
{
if(mean_nodes[j] == data[i])
{
continue_flag = true;
break;
}
}
if(continue_flag)
{
continue;
}
//下面是聚类过程
//首先找到离当前点最近的中心点,并记录下该中心点所在的簇
int min_kdistance = MAXDISTANCE;
int index = 0 ;
for(unsigned int j = 0;j < mean_nodes.size();j++)
{
double dis = Kdistance(data[i],mean_nodes[j]);
if(dis < min_kdistance)
{
min_kdistance = dis;
index = j;
}
}
//先将当前点从原先的簇中删除
map<int,vector<Node> >::iterator iter;
//搜索所有的簇
for(iter = cluster.begin();iter != cluster.end();++iter)
{
vector<Node>::iterator node_iter;
bool jump_flag = false;
//对每个簇中的vector进行搜索
for(node_iter = iter->second.begin();node_iter != iter->second.end();node_iter++)
{
if(*node_iter == data[i])
{
//如果当前点就在更新的簇中,则忽略后面的操作
if(index == iter->first)
{
continue_flag = true;
}
else
{
iter->second.erase(node_iter);
}
jump_flag = true;
break;
}
}
if(jump_flag)
{
break;
}
}
if(continue_flag)
{
continue;
}
//将当前点插入到中心点所对应的簇中
//查看中心点是否已经存在map中
bool insert_flag = true;
for(iter = cluster.begin(); iter != cluster.end();++iter)
{
if(iter->first == index)
{
iter->second.push_back(data[i]);
insert_flag = false;
break;
}
}
//不存在则创建新的元素对象
if(insert_flag)
{
vector<Node> cluster_node_vector;
cluster_node_vector.push_back(data[i]);
cluster.insert(make_pair(index,cluster_node_vector));
}
}
}
double KMean::Kdistance(Node active,Node other)
{
return sqrt(pow((active.pos_x-other.pos_x),2) + pow((active.pos_y - other.pos_y),2) + pow((active.pos_z - other.pos_z),2));
}
Node KMean::GetMean(int cluster_index)
{
//对传入的参数进行判断,查看是否越界
if( cluster_num <0 || unsigned (cluster_index) >= mean_nodes.size() )
{
Node new_node;
new_node.pos_x = -1.0;
new_node.pos_y = -1.0;
new_node.pos_z = -1.0;
return new_node;
}
//求出簇中所有点的均值
Node sum_node;
Node aver_node;
for(int j = 0;j < cluster[cluster_index].size();j++)
{
sum_node.pos_x += cluster[cluster_index].at(j).pos_x;
sum_node.pos_y += cluster[cluster_index].at(j).pos_y;
sum_node.pos_z += cluster[cluster_index].at(j).pos_z;
}
aver_node.pos_x = sum_node.pos_x*1.0/ cluster[cluster_index].size();
aver_node.pos_y = sum_node.pos_y*1.0 / cluster[cluster_index].size();
aver_node.pos_z = sum_node.pos_z*1.0 / cluster[cluster_index].size();
//找到与均值最近的点
double min_dis = MAXDISTANCE;
Node new_mean_doc;
for(unsigned int i = 0;i< cluster[cluster_index].size();i++)
{
double dis = Kdistance(aver_node,cluster[cluster_index].at(i));
if(min_dis > dis)
{
min_dis = dis;
new_mean_doc = cluster[cluster_index].at(i);
}
}
return new_mean_doc;
}
void KMean::NewCluster()//确定新的中心点
{
for (unsigned int i = 0;i < mean_nodes.size();i++)
{
Node new_node =GetMean(i);
mean_nodes[i] = new_node;
}
ClusterProcess();
}
void KMean::Star()
{
for (int i = 0;i<100;i++)
{
NewCluster();
cout << "no:"<< i<<endl;
for(int j = 0;j < mean_nodes.size();j++)
{
cout << cluster[j].size()<<endl;
}
}
}
#include <iostream>
#include <vector>
#include "k-mean.h"
#include <ctime>
#include <cstdlib>
using namespace std;
int main()
{
srand((int) time(0));
vector<Node> data;
for(int i =0;i<100;i++)
{
Node node;
node.pos_x = (random() % 17 )*1.2;
node.pos_y = (random() % 19 )*1.2;
node.pos_z = (random() % 21) *1.2;
data.push_back(node);
}
KMean kmean(3,data);
kmean.Star();
return 0;
}