



gg代表一个常用的逻辑函数(logistic function),为S型函数(sigmoid function),公式为:

g(z)=11+expzg(z) = \frac{1}{1+\exp^{-z}}


h(θj)=11+expθTXh(\theta_{j}) = \frac{1}{1+ \exp^{-\theta^{T}X}}

Logistic Regression 的代价函数为:

J(θ)=1mi=1m[y(i)log(hθ(xi))+(1y(i))log(1hθ(xi))]J(\theta) = -\frac{1}{m}\sum_{i=1}^{m}[y^{(i)}\log(h_{\theta}(x^{i})) + (1-y^{(i)})\log(1 - h_{\theta}(x^{i}))]



Gradient Descent(梯度下降)

θjJ(θ)=1mi=1m(hθ(x(i))y(i))xj(i)\frac{\partial}{\partial \theta_{j}}J(\theta) = \frac{1}{m} \sum_{i= 1}^{m} (h_{\theta}(x^{(i)})-y^{(i)})x_{j}^{(i)}


Scipy’s truncated newton (TNC) 实现寻找最优参数,具体使用方法

编写一个函数,用我们所学的参数theta来为数据集X输出预测。然后,我们可以使用这个函数来给我们的分类器的训练精度打分。 逻辑回归模型的假设函数:

hθ(x)=11+expθTXh_{\theta}(x) = \frac{1}{1 + \exp^{-\theta^{T}X}}

hθh_{\theta}大于等于0.5时,预测 y=1y=1

hθh_{\theta}小于0.5时,预测 y=0y=0

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import scipy.optimize as opt

path = 'G:/machine learning/吴恩达/学习笔记/Lecture_2/ex2data1.txt'
data = pd.read_csv(path, header=None, names=['Exam 1', 'Exam 2', 'Admitted'])


positive = data[data["Admitted"].isin([1])] # positive = data[data['Admitted'].isin([1])]
negative = data[data["Admitted"].isin([0])] # positive = data[data['Admitted'].isin([0])]

# fig, ax = plt.subplots(figsize=(8, 6))
# ax.scatter(positive['Exam 1'], positive['Exam 2'], s=50, c='b', marker='o', label='Admitted')
# ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='Not Admitted')
# ax.legend()
# ax.set_xlabel('Exam 1 Score')
# ax.set_ylabel('Exam 2 Score')
# plt.show()

# 定义sigmoid函数
def sigmoid(z):
return 1 / (1 + np.exp(-z))

# nums = np.arange(-10, 10, step=1)
# fig, ax = plt.subplots(figsize=(8,6))
# ax.plot(nums, sigmoid(nums), 'r')
# plt.show()

data.insert(0, 'Ones', 1)

cols = data.shape[1]
X = data.iloc[:, 0: cols-1] #[100, 3]
y = data.iloc[:, cols-1: cols] #[100, 1]

X = X.to_numpy() #矩阵化
y = y.to_numpy()
theta = np.zeros((X.shape[1], 1))

#Logistic regression的代价函数
def computeCost(theta, X, y):
sum_1 = np.dot(np.log(sigmoid(np.dot(X, theta))).T, y)
sum_2 = np.dot(np.log(1- sigmoid(np.dot(X, theta))).T, 1-y)
return -1/len(X) * (sum_1 + sum_2)

print(computeCost(theta, X, y))
#Logistic Regression的梯度下降函数
def gradientdescent(theta, X, y):
theta = theta.reshape(theta.shape[0], 1) #这里传进来的theta参数,由[3,1]变成了[3,],导致维度爆炸,reshape一下成功
error = (sigmoid(np.dot(X, theta)) - y)
grad = np.zeros(theta.shape)

for j in range(X.shape[1]):
temp_X = np.array(X[:,j]).reshape((len(X), 1))
grad[j,0] = (1 /len(X) * np.dot(error.T, temp_X))[0,0]

return grad

result = opt.fmin_tnc(func=computeCost, x0=theta, fprime=gradientdescent, args=(X, y))

#Logistic Regression 预测函数
def predict(X, theta):
predict_value = sigmoid(np.dot(X, theta))
return [1 if each_predict_value >= 0.5 else 0 for each_predict_value in predict_value]

predictions = predict(X, np.array(result[0]))
correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y)]
accuracy = (sum(map(int, correct)) % len(correct))
print ('accuracy = {0}%'.format(accuracy))

Regularized Logistic Regression(正则化逻辑回归)







J(θ)=1mi=1m[y(i)log(hθ(x(i)))(1y(i))log(1hθ(x(i)))]+λ2mj=1nθj2J(\theta)=\frac{1}{m} \sum_{i=1}^{m}\left[-y^{(i)} \log \left(h_{\theta}\left(x^{(i)}\right)\right)-\left(1-y^{(i)}\right) \log \left(1-h_{\theta}\left(x^{(i)}\right)\right)\right]+\frac{\lambda}{2 m} \sum_{j=1}^{n} \theta_{j}^{2}


Repeat until convergence{θ0:=θ0a1mi=1m[hθ(x(i))y(i)]x0(i)θj:=θja1mi=1m[hθ(x(i))y(i)]xj(i)+λmθj}Repeat\begin{aligned} &Repeat \ until \ convergence\{\\ &\theta_{0}:=\theta_{0}-a \frac{1}{m} \sum_{i=1}^{m}\left[h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right] x_{0}^{(i)} \\ &\theta_{j}:=\theta_{j}-a \frac{1}{m} \sum_{i=1}^{m}\left[h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right] x_{j}^{(i)}+\frac{\lambda}{m} \theta_{j} \\ &\} \\ &Repeat \end{aligned}

对上面的算法中 j=1,2,...,nj=1,2,...,n 时的更新式子进行调整可得:

θj:=θj(1aλm)a1mi=1m(hθ(x(i))y(i))xj(i)\theta_{j}:=\theta_{j}\left(1-a \frac{\lambda}{m}\right)-a \frac{1}{m} \sum_{i=1}^{m}\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{j}^{(i)}


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import scipy.optimize as opt

path = 'G:/machine learning/吴恩达/学习笔记/Lecture_2/ex2data2.txt'
data = pd.read_csv(path, header=None, names=['Test 1', 'Test 2', 'Accepted'])

positive = data[data["Accepted"].isin([1])] # positive = data[data['Accepted'].isin([1])]
negative = data[data["Accepted"].isin([0])] # positive = data[data['Accepted'].isin([0])]

# fig, ax = plt.subplots(figsize=(8,6))
# ax.scatter(positive['Test 1'], positive['Test 2'], s=50, c='b', marker='o', label='Accepted')
# ax.scatter(negative['Test 1'], negative['Test 2'], s=50, c='r', marker='x', label='Rejected')
# ax.legend()
# ax.set_xlabel('Test 1 Score')
# ax.set_ylabel('Test 2 Score')
# plt.show()

# 定义sigmoid函数
def sigmoid(z):
return 1 / (1 + np.exp(-z))

#Logistic regression 正则化后的代价函数
def computeCost(theta, X, y, learningRate):
theta = theta.reshape((theta.shape[0], 1))
sum_1 = np.dot(np.log(sigmoid(np.dot(X, theta))).T, y)
sum_2 = np.dot(np.log(1- sigmoid(np.dot(X, theta))).T, 1-y)
reg = (learningRate/(2 * len(X))) * np.sum(np.power(theta[1:, :], 2))
return -1/len(X) * (sum_1 + sum_2) + reg

#Logistic Regression 正则化后的梯度下降函数
def gradient(theta, X, y, learningRate):
theta = theta.reshape(theta.shape[0], 1) #这里传进来的theta参数,由[3,1]变成了[3,],导致维度爆炸,reshape一下成功
error = (sigmoid(np.dot(X, theta)) - y)
grad = np.zeros(theta.shape)

for j in range(X.shape[1]):
temp_X = np.array(X[:,j]).reshape((len(X), 1))
if (j == 0):
grad[j,0] = (1 /len(X) * np.dot(error.T, temp_X))[0,0]
grad[j,0] = (1 /len(X) * np.dot(error.T, temp_X))[0,0] + (learningRate / len(X)) * theta[j, :]

return grad

#Logistic Regression 预测函数
def predict(X, theta):
theta = theta.reshape(theta.shape[0], 1) #这里传进来的theta参数,由[3,1]变成了[3,],导致维度爆炸,reshape一下成功
predict_value = sigmoid(np.dot(X, theta))
return [1 if each_predict_value >= 0.5 else 0 for each_predict_value in predict_value]

degree = 5
x1 = data['Test 1']
x2 = data['Test 2']

data.insert(3, 'Ones', 1)

for i in range(1, degree):
for j in range(0, i):
data['F' + str(i) + str(j)] = np.power(x1, i-j) * np.power(x2, j)

data.drop('Test 1', axis=1, inplace=True)
data.drop('Test 2', axis=1, inplace=True)

#Initialize parameters
cols = data.shape[1]
X = data.iloc[:,1:cols]
y = data.iloc[:,0:1]
X = X.to_numpy()
y = y.to_numpy()

theta = np.zeros((X.shape[1], 1))

learningrate = 1

result = opt.fmin_tnc(func=computeCost, x0=theta, fprime=gradient, args=(X, y, learningrate))

predictions = predict(X, np.array(result[0]))
correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y)]
accuracy = (sum(map(int, correct)) / len(correct))
print ('accuracy = {0}%'.format(100 * accuracy))


from sklearn import linear_model#调用sklearn的线性回归包
model = linear_model.LogisticRegression(penalty='l2', C=1.0)
model.fit(X, y.ravel())
print(model.score(X, y))