天天看点

求解惩戒线性回归-LARS算法源码

import urllib
import numpy as np
from sklearn import datasets, linear_model
from math import sqrt
import matplotlib.pyplot as plot

#read data into iterable
target_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = urllib.request.urlopen(target_url)

xList = []
labels = []
names = []
firstLine = True
for line in data:
    if firstLine:
        names = str(line).strip().split(";")
        firstLine = False
    else:
        row = str(line).strip("\\n'").split(';')
        labels.append(row[-1])
        row.pop()
        floatRow = []
        for num in row:
            if "b'" in num:
                num = num.replace("b'",'')
            floatRow.append(float(num))
        xList.append(floatRow)
nrows = len(xList)
ncols = len(xList[0])
xMeans = np.array(xList).mean(axis=0)
xSD = np.array(xList).std(axis=0)
xNormalized = []
for i in range(nrows):
    rowN = [(xList[i][j] - xMeans[j])/xSD[j] for j in range(ncols)]
    xNormalized.append(rowN)
labels = [float(labels[i]) for i in range(nrows)]
meanLabel = sum(labels)/nrows
sdLabel = sqrt(sum([(labels[i] - meanLabel) * (labels[i] - meanLabel) for i in range(nrows)])/nrows)

labelNormalize = [(labels[i] - meanLabel)/sdLabel for i in range(nrows)]
beta = [0.0] * ncols
betaMat = []
betaMat.append(list(beta))
nSteps = 350
StepSize = 0.004
for i in range(nSteps):
    residuals = [0.0] * nrows
    for j in range(nrows):
        labelsHat = sum([xNormalized[j][k] * beta[k] for k in range(ncols)])
        residuals[j] = labelNormalize[j] - labelsHat
    corr = [0.0] * ncols
    for j in range(ncols):
        corr[j] = sum([xNormalized[k][j] * residuals[k] for k in range(nrows)]) / nrows
    iStar = 0
    corrStar = corr[0]
    for j in range(1,(ncols)):
        if abs(corrStar) < abs(corr[j]):
            iStar = j
            corrStar = corr[j]
    beta[iStar] += StepSize * corrStar /abs(corrStar)
    betaMat.append(list(beta))
for i in range(ncols):
    coefCurve = [betaMat[k][i] for k in range(nSteps)]
    xaxis = range(nSteps)
    plot.plot(xaxis,coefCurve)
plot.show()

           

继续阅读