- %knn 归一化自写
- % 把数据分为测试集和训练集
- % train=[wine(1:30,:);wine(60:89,:);wine(131:160,:)];
- % test=[wine(31:59,:);wine(90:130,:);wine(161:178,:)];
- % train_labels=[wine_labels(1:30,:);wine_labels(60:89,:);wine_labels(131:160,:)];
- % test_labels=[wine_labels(31:59,:);wine_labels(90:130,:);wine_labels(161:178,:)];
- %把标签加到数据的最后一列
- wine(:,14)=wine_labels;
- %按行随机打乱数据
- wine_data =wine(randperm(size(wine,1)),:);
- %取前一百条数据给训练集
- train=wine_data(1:100,1:13);
- train_labels=wine_data(1:100,14);
- %剩下的78条给测试集
- test=wine_data(101:178,1:13);
- test_labels=wine_data(101:178,14);
- %数据归一化
- h=max(train,[],1);
- l=min(train,[],1);
- for i=1:13
- train(:,i)=(train(:,i)-l(i))./(h(i)-l(i));
- test(:,i)=(test(:,i)-l(i))./(h(i)-l(i));
- end
- %[train,strc]=mapminmax(train');
- %strc归一化返回的参数 max min
- %test=mapminmax.apply(test',strc);
- %计算测试集与所有训练集间的距离
- all_distance=dist(test,train');
- %升序排序 B为返回的索引
- [A,B]=sort(all_distance,2);
- %Knn K近邻
- K=3;
- all_labels=train_labels(B(:,1:K));
- %统计矩阵中出现的类别
- b=unique(all_labels);
- %统计各个类别出现的次数
- c=histc(all_labels',b);
- %新标签赋值
- [m,predict_labels]=max(c);
- %求准确率
- accuracy=1-(length(find((predict_labels-test_labels')~=0))/length(test_labels))
- %画图
- figure;
- hold on;
- plot(test_labels,'o');
- plot(predict_labels,'*');
- xlabel('测试集数据','FontSize',12);
- ylabel('类别标签','FontSize',12);
- legend('实际测试集分类','预测测试集分类');
- title('测试集的实际分类和预测分类图','FontSize',12);
- grid on;
- %knn 归一化使用mapminmax函数进行的
- %knn
- % 把数据分为测试集和训练集
- % train=[wine(1:30,:);wine(60:89,:);wine(131:160,:)];
- % test=[wine(31:59,:);wine(90:130,:);wine(161:178,:)];
- % train_labels=[wine_labels(1:30,:);wine_labels(60:89,:);wine_labels(131:160,:)];
- % test_labels=[wine_labels(31:59,:);wine_labels(90:130,:);wine_labels(161:178,:)];
- %把标签加到数据的最后一列
- wine(:,14)=wine_labels;
- %按行随机打乱数据
- wine_data =wine(randperm(size(wine,1)),:);
- %取前一百条数据给训练集
- train=wine_data(1:100,1:13);
- train_labels=wine_data(1:100,14);
- %剩下的78条给测试集
- test=wine_data(101:178,1:13);
- test_labels=wine_data(101:178,14);
- %数据归一化
- [train,strc]=mapminmax(train');
- %strc归一化返回的参数 max min
- test=mapminmax.apply(test',strc);
- %计算测试集与所有训练集间的距离
- all_distance=dist(test',train);
- %升序排序 B为返回的索引
- [A,B]=sort(all_distance,2);
- %Knn K近邻
- K=5;
- all_labels=train_labels(B(:,1:K));
- %统计矩阵中出现的类别
- b=unique(all_labels);
- %统计各个类别出现的次数
- c=histc(all_labels',b);
- %新标签赋值
- [~,predict_labels]=max(c);
- %求准确率
- accuracy=1-(length(find((predict_labels-test_labels')~=0))/length(test_labels))
- %画图
- figure;
- hold on;
- plot(test_labels,'o');
- plot(predict_labels,'*');
- xlabel('测试集数据','FontSize',12);
- ylabel('类别标签','FontSize',12);
- legend('实际测试集分类','预测测试集分类');
- title('测试集的实际分类和预测分类图','FontSize',12);
- grid on;
复制代码
|