# 使用python和numpy的梯度下降

• X：特征矩阵
• y：目标值
• w：重量/值
• N：训练集的大小

``````import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import random

def generateSample(N, variance=100):
X = np.matrix(range(N)).T + 1
Y = np.matrix([random.random() * variance + i * 10 + 900 for i in range(len(X))]).T
return X, Y

N = len(x)
w = np.zeros((x.shape[1], 1))
eta = 0.0001

maxIteration = 100000
for i in range(maxIteration):
error = x * w - y
gradient = x.T * error / N
w = w - eta * gradient
return w

def plotModel(x, y, w):
plt.plot(x[:,1], y, "x")
plt.plot(x[:,1], x * w, "r-")
plt.show()

def test(N, variance, modelFunction):
X, Y = generateSample(N, variance)
X = np.hstack([np.matrix(np.ones(len(X))).T, X])
w = modelFunction(X, Y)
plotModel(X, Y, w)

``````

Question
``````def gradient(X_norm,y,theta,alpha,m,n,num_it):
temp=np.array(np.zeros_like(theta,float))
for i in range(0,num_it):
h=np.dot(X_norm,theta)
#temp[j]=theta[j]-(alpha/m)*(  np.sum( (h-y)*X_norm[:,j][np.newaxis,:] )  )
temp[0]=theta[0]-(alpha/m)*(np.sum(h-y))
temp[1]=theta[1]-(alpha/m)*(np.sum((h-y)*X_norm[:,1]))
theta=temp
return theta

X_norm,mean,std=featureScale(X)
#length of X (number of rows)
m=len(X)
X_norm=np.array([np.ones(m),X_norm])
n,m=np.shape(X_norm)
num_it=1500
alpha=0.01
theta=np.zeros(n,float)[:,np.newaxis]
X_norm=X_norm.transpose()
print theta
``````

``````1 10 1000
2 20 2500
3 25 3500
4 40 5500
5 60 6200
``````

``````%
% Linear Regression with multiple variables
%

% Alpha for learning curve
alphaNum = 0.0005;

% Number of features
n = 2;

% Number of iterations for Gradient Descent algorithm
iterations = 10000

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% No need to update after here
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Initial theta values
theta = ones(n + 1, 1);

% Number of training samples
m = length(DATA(:, 1));

% X with one mor column (x0 filled with '1's)
X = ones(m, 1);
for i = 1:n
X = [X, DATA(:,i)];
endfor

% Expected data must go always in the last column
y = DATA(:, n + 1)

function gradientDescent(x, y, theta, alphaNum, iterations)
iterations = [];
costs = [];

m = length(y);

for iteration = 1:10000
hypothesis = x * theta;

loss = hypothesis - y;

% J(theta)
cost = sum(loss.^2) / (2 * m);

% Save for the graphic to see if the algorithm did work
iterations = [iterations, iteration];
costs = [costs, cost];

gradient = (x' * loss) / m; % /m is for the average

theta = theta - (alphaNum * gradient);
endfor

% Show final theta values
display(theta)

% Show J(theta) graphic evolution to check it worked, tendency must be zero
plot(iterations, costs);

endfunction

``````