[Python]绘图简化
码了SOFM(Self Organized Feature Mapping)代码,课上演示结束,通常要进行复盘。本篇博文谈谈绘图的一个问题。代码最终要显示原始数据和权值更新(此处不对算法进行说明)后的数据,姑且表示为A和B。
首先遇到的一个问题,关于数据拷贝的问题。
A是ndarray类型,也就是matrix类型。使用
B = A.copy()
将B作为A的数据副本,此时对A的更新不会影响到B。而直接赋值,如:
B = A
A,B是指向同一块内存区域的。同样切片操作也是指向相同区域。也就是说,对ndarray类型的数据要想实现数据拷贝,需要显示调用copy()函数。
对于列表的拷贝,直接赋值同样和原始变量指向同一块内存区域,要实现数据拷贝,请使用切片slice(此处不同于ndarray类型)。
第二个问题是关于绘图。代码需求要画出多幅图像的时候怎样处理?先上代码:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import collections | |
import matplotlib.pyplot as plt | |
def scatterDats(idx,dats): | |
''' | |
Desc: plot data from dats | |
Param: | |
idx: the index of the figure | |
dats: data to show | |
''' | |
num = len(dats) | |
plt.figure(idx) | |
title = ['a','b'] | |
axs = [] | |
for i in range(num): | |
ax = plt.subplot(1,num,i+1) | |
axs.append(ax) | |
for j in range(num): | |
plt.sca(axs[j]) | |
plt.scatter(dats[j][:,0],dats[j][:,1]) | |
plt.title(title[j]) | |
plt.show() | |
def _scatterDats(idx,dats): | |
''' | |
Desc: plot data from dats | |
Param: | |
idx: the index of the figure | |
dats: data to show | |
''' | |
num = len(dats) | |
plt.figure(idx) | |
axs = [] | |
for i in range(num): | |
ax = plt.subplot(1,num,i+1) | |
axs.append(ax) | |
j = 0 | |
for key in dats: | |
plt.sca(axs[j]) | |
plt.scatter(dats[key][:,0],dats[key][:,1]) | |
plt.title(key) | |
j += 1 | |
plt.show() | |
if __name__ == '__main__': | |
# Method List | |
dats = [] | |
dats.append(a) | |
dats.append(b) | |
scatterDats(2,dats) | |
# Method OrderedDict | |
_dats = collections.OrderedDict() | |
a = np.array([[1,2],[3,4]]) | |
b = np.array([[5,6],[7,8]]) | |
_dats['a'] = a | |
_dats['b'] = b | |
_scatterDats(1,_dats) | |
原始的思路是采用列表结构,由于列表结构中存放的数据类型一致,直接放数据,使得图像名称和数据分离,没有实现代码解耦,添加新的打印图像的时候,需要在scatterDats()中显式添加图像名称。
于是,一个直接的想法是:将图像名称和数据绑定。但是此时需要考虑字典是无序的,也就是说虽然按顺序加入了要打印的图像,结果显示却是乱序。解决方案是OrderedDict数据结构,意如其名。
第三个问题是,变量命名问题。
为了设置调试开关,原来使用的是
DEBUG=True
其实更好的表达是:
verbose=True
为了表达迭代次数,原来是:
iterNums
更好的表达:
epoch
实际上epoch和iteration也是有区别的。
结合上篇写BP代码的编程复盘,这次的代码风格如下:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
@Desc: Self Organized Feature Mapping Algorithm | |
@Author: zhpmatrix | |
@Date: 2016.12.18 13:56 | |
''' | |
import random | |
import math | |
import time | |
import collections | |
import numpy as np | |
import matplotlib.pyplot as plt | |
def getSamples(lowS,highS,num,dim=2): | |
samples = np.random.uniform(lowS,highS,size=(num,dim)) | |
return samples | |
def setNormOne(dat): | |
num = dat.shape[0] | |
for i in range(num): | |
dat[i] = dat[i]/math.sqrt(sum(dat[i]**2)) | |
return dat | |
def getUW(samples,num): | |
uw = [] | |
numS = samples.shape[0] | |
for i in range(num): | |
idx = random.randint(0,numS-1) | |
uw.append(samples[idx]) | |
return np.array(uw) | |
def getUL(lowU,highU): | |
ul = [] | |
i = 0 | |
while(i < highU): | |
for j in range(highU): | |
ul.append([i,j]) | |
i += 1 | |
return np.array(ul) | |
def findWinner(sample,uw): | |
maxVal = 0 | |
winner = 0 | |
num = uw.shape[0] | |
for i in range(num): | |
ip = sample.dot(uw[i]) | |
if(ip >= maxVal): | |
maxVal = ip | |
winner = i | |
return winner | |
def getLR(iniLR,ctimes,epoch): | |
return iniLR * math.exp(-epoch/ctimes) | |
def getArea(unit,winner,radius,ctimes,epoch): | |
dist = sum((unit-winner)**2) | |
if radius == 1.0: | |
radius += 0.01 | |
aux = radius * math.exp(-epoch/(ctimes/math.log(radius))) | |
return math.exp(-dist**2/(2.0*aux**2)) | |
def matShuffle(dat): | |
dat = dat.tolist() | |
random.shuffle(dat) | |
return np.array(dat) | |
def sofm(samples,uw,ul,iniLR,radius,ctimes,lrGap,epochNums): | |
''' | |
Desc: update weights for all the units | |
Param: | |
samples: input samples with norm equals one | |
uw: unit weight matrix | |
ul: unit location matrix | |
iniLR: initial learning ratio | |
radius: radius for area | |
ctims: constant times for lr and area functions | |
''' | |
for i in range(epochNums): | |
numS = samples.shape[0] | |
numU = uw.shape[0] | |
lr = getLR(iniLR,ctimes,i) | |
if(lr <= lrGap): | |
print('LR OK!') | |
exit() | |
samples = matShuffle(samples) | |
for j in range(numS): | |
winnerIdx = findWinner(samples[j],uw) | |
for k in range(numU): | |
areaVal = getArea(ul[k],ul[winnerIdx],radius,ctimes,i) | |
uw[k] = uw[k] + lr*areaVal*(samples[j]-uw[k]) | |
# Debug info | |
debugInfo(verbose,str(i),str(lr),str(j),str(winnerIdx),str(k),str(uw[k])) | |
return uw | |
def debugInfo(verbose,epochnum,lr,snum,winner,unum,uw): | |
if verbose == True: | |
log = file('log','a+') | |
print 'EpochNum:',epochnum,' LR:',lr,' SNum:',snum,' Winnner:',winner,' UNum:',unum,' UW:',uw | |
time.sleep(0.2) | |
info = ['EpochNum:',str(epochnum),' LR:',str(lr),' SNum:',str(snum),' Winnner:',str(winner),' UNum:',str(unum),' UW:',str(uw),'\n'] | |
log.writelines(info) | |
log.close() | |
def scatterDats(dats): | |
num = len(dats) | |
plt.figure(1) | |
title = ['samples','samplesNO','uwNO','fUW'] | |
axs = [] | |
for i in range(num): | |
ax = plt.subplot(1,num,i+1) | |
axs.append(ax) | |
for j in range(num): | |
plt.sca(axs[j]) | |
plt.scatter(dats[j][:,0],dats[j][:,1]) | |
plt.title(title[j]) | |
plt.show() | |
def _scatterDats(dats): | |
num = len(dats) | |
plt.figure(1) | |
axs = [] | |
for i in range(num): | |
ax = plt.subplot(1,num,i+1) | |
axs.append(ax) | |
j = 0 | |
for key in dats: | |
plt.sca(axs[j]) | |
plt.scatter(dats[key][:,0],dats[key][:,1]) | |
plt.title(key) | |
j += 1 | |
plt.show() | |
if __name__ == '__main__': | |
verbose = True | |
lowS = 0 # Low bound of samples | |
highS = 1 # High bound of samples | |
lowU = 0 # Low bound of units | |
highU = 2 # High bound of units | |
num = 10 # Number of samples | |
iniLR = 0.1 # Initialize learning ratio | |
ctimes = 1000.0 # Constant times for lr and area functions | |
radius = 2.0 # Radius for area function | |
lrGap = 0.001 # Learning ratio bound | |
iterNums = 1 # Number of iter | |
dats = collections.OrderedDict()# Collections for dats to show | |
# Get samples | |
samples = getSamples(lowS,highS,num) | |
samplesNO = samples.copy() | |
#samplesNO = setNormOne(samplesNO) | |
# Get units with location | |
ul = getUL(lowU,highU) | |
# Get units with weights | |
uw = getUW(samples,highU**2) | |
uwNO = setNormOne(uw) | |
# Get final units weight | |
fUW = sofm(samplesNO,uwNO,ul,iniLR,radius,ctimes,lrGap,iterNums) | |
dats['samples'] = samples | |
dats['sampleNO'] = samplesNO | |
dats['uwNO'] = uwNO | |
dats['fUW'] = fUW | |
_scatterDats(dats) |
从封装角度来说,main函数中基本实现了自己想要的样子。可是高质量的代码,依然路途遥远,心向往之。