diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..1995a23 Binary files /dev/null and b/.DS_Store differ diff --git a/ex1/.DS_Store b/ex1/.DS_Store new file mode 100644 index 0000000..508fb6e Binary files /dev/null and b/ex1/.DS_Store differ diff --git a/ex1/ex1.pdf b/ex1/ex1.pdf new file mode 100644 index 0000000..03999a7 Binary files /dev/null and b/ex1/ex1.pdf differ diff --git a/ex1/ex1/.DS_Store b/ex1/ex1/.DS_Store new file mode 100644 index 0000000..07c62a4 Binary files /dev/null and b/ex1/ex1/.DS_Store differ diff --git a/ex1/ex1/computeCost.m b/ex1/ex1/computeCost.m new file mode 100644 index 0000000..003bfb9 --- /dev/null +++ b/ex1/ex1/computeCost.m @@ -0,0 +1,22 @@ +function J = computeCost(X, y, theta) +%COMPUTECOST Compute cost for linear regression +% J = COMPUTECOST(X, y, theta) computes the cost of using theta as the +% parameter for linear regression to fit the data points in X and y + +% Initialize some useful values +m = length(y); % number of training examples + +% You need to return the following variables correctly + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the cost of a particular choice of theta +% You should set J to the cost. + +distances = theta(1) + theta(2) * X(:, 2) - y; +J = sum(distances.^ 2) / (2 * m); + + + +% ========================================================================= + +end diff --git a/ex1/ex1/computeCostMulti.m b/ex1/ex1/computeCostMulti.m new file mode 100644 index 0000000..1648ea7 --- /dev/null +++ b/ex1/ex1/computeCostMulti.m @@ -0,0 +1,21 @@ +function J = computeCostMulti(X, y, theta) +%COMPUTECOSTMULTI Compute cost for linear regression with multiple variables +% J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the +% parameter for linear regression to fit the data points in X and y + +% Initialize some useful values +m = length(y); % number of training examples + +% You need to return the following variables correctly + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the cost of a particular choice of theta +% You should set J to the cost. + +distances = X * theta - y; + +J = sum(distances .^ 2) / (2 * m); + +% ========================================================================= + +end diff --git a/ex1/ex1/ex1.m b/ex1/ex1/ex1.m new file mode 100644 index 0000000..33a0a0c --- /dev/null +++ b/ex1/ex1/ex1.m @@ -0,0 +1,122 @@ +%% Machine Learning Online Class - Exercise 1: Linear Regression + +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% linear exercise. You will need to complete the following functions +% in this exericse: +% +% warmUpExercise.m +% plotData.m +% gradientDescent.m +% computeCost.m +% gradientDescentMulti.m +% computeCostMulti.m +% featureNormalize.m +% normalEqn.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% +% x refers to the population size in 10,000s +% y refers to the profit in $10,000s +% + +%% Initialization +clear ; close all; clc + +%% ==================== Part 1: Basic Function ==================== +% Complete warmUpExercise.m +fprintf('Running warmUpExercise ... \n'); +fprintf('5x5 Identity Matrix: \n'); +warmUpExercise() + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% ======================= Part 2: Plotting ======================= +fprintf('Plotting Data ...\n') +data = load('ex1data1.txt'); +X = data(:, 1); y = data(:, 2); +m = length(y); % number of training examples + +% Plot Data +% Note: You have to complete the code in plotData.m +plotData(X, y); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =================== Part 3: Gradient descent =================== +fprintf('Running Gradient Descent ...\n') + +X = [ones(m, 1), data(:,1)]; % Add a column of ones to x +theta = zeros(2, 1); % initialize fitting parameters + +% Some gradient descent settings +iterations = 1500; +alpha = 0.01; + +% compute and display initial cost +computeCost(X, y, theta) + +% run gradient descent +theta = gradientDescent(X, y, theta, alpha, iterations); + +% print theta to screen +fprintf('Theta found by gradient descent: '); +fprintf('%f %f \n', theta(1), theta(2)); + +% Plot the linear fit +hold on; % keep previous plot visible +plot(X(:,2), X*theta, '-') +legend('Training data', 'Linear regression') +hold off % don't overlay any more plots on this figure + +% Predict values for population sizes of 35,000 and 70,000 +predict1 = [1, 3.5] *theta; +fprintf('For population = 35,000, we predict a profit of %f\n',... + predict1*10000); +predict2 = [1, 7] * theta; +fprintf('For population = 70,000, we predict a profit of %f\n',... + predict2*10000); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ============= Part 4: Visualizing J(theta_0, theta_1) ============= +fprintf('Visualizing J(theta_0, theta_1) ...\n') + +% Grid over which we will calculate J +theta0_vals = linspace(-10, 10, 100); +theta1_vals = linspace(-1, 4, 100); + +% initialize J_vals to a matrix of 0's +J_vals = zeros(length(theta0_vals), length(theta1_vals)); + +% Fill out J_vals +for i = 1:length(theta0_vals) + for j = 1:length(theta1_vals) + t = [theta0_vals(i); theta1_vals(j)]; + J_vals(i,j) = computeCost(X, y, t); + end +end + + +% Because of the way meshgrids work in the surf command, we need to +% transpose J_vals before calling surf, or else the axes will be flipped +J_vals = J_vals'; +% Surface plot +figure; +surf(theta0_vals, theta1_vals, J_vals) +xlabel('\theta_0'); ylabel('\theta_1'); + +% Contour plot +figure; +% Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100 +contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20)) +xlabel('\theta_0'); ylabel('\theta_1'); +hold on; +plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2); diff --git a/ex1/ex1/ex1_multi.m b/ex1/ex1/ex1_multi.m new file mode 100644 index 0000000..f733012 --- /dev/null +++ b/ex1/ex1/ex1_multi.m @@ -0,0 +1,159 @@ +%% Machine Learning Online Class +% Exercise 1: Linear regression with multiple variables +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% linear regression exercise. +% +% You will need to complete the following functions in this +% exericse: +% +% warmUpExercise.m +% plotData.m +% gradientDescent.m +% computeCost.m +% gradientDescentMulti.m +% computeCostMulti.m +% featureNormalize.m +% normalEqn.m +% +% For this part of the exercise, you will need to change some +% parts of the code below for various experiments (e.g., changing +% learning rates). +% + +%% Initialization + +%% ================ Part 1: Feature Normalization ================ + +%% Clear and Close Figures +clear ; close all; clc + +fprintf('Loading data ...\n'); + +%% Load Data +data = load('ex1data2.txt'); +X = data(:, 1:2); +y = data(:, 3); +m = length(y); + +% Print out some data points +fprintf('First 10 examples from the dataset: \n'); +fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +% Scale features and set them to zero mean +fprintf('Normalizing Features ...\n'); + +[X mu sigma] = featureNormalize(X); + +% Add intercept term to X +X = [ones(m, 1) X]; + + +%% ================ Part 2: Gradient Descent ================ + +% ====================== YOUR CODE HERE ====================== +% Instructions: We have provided you with the following starter +% code that runs gradient descent with a particular +% learning rate (alpha). +% +% Your task is to first make sure that your functions - +% computeCost and gradientDescent already work with +% this starter code and support multiple variables. +% +% After that, try running gradient descent with +% different values of alpha and see which one gives +% you the best result. +% +% Finally, you should complete the code at the end +% to predict the price of a 1650 sq-ft, 3 br house. +% +% Hint: By using the 'hold on' command, you can plot multiple +% graphs on the same figure. +% +% Hint: At prediction, make sure you do the same feature normalization. +% + +fprintf('Running gradient descent ...\n'); + +% Choose some alpha value +alpha = 0.01; +num_iters = 400; + +% Init Theta and Run Gradient Descent +theta = zeros(3, 1); +[theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters); + +% Plot the convergence graph +figure; +plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2); +xlabel('Number of iterations'); +ylabel('Cost J'); + +% Display gradient descent's result +fprintf('Theta computed from gradient descent: \n'); +fprintf(' %f \n', theta); +fprintf('\n'); + +% Estimate the price of a 1650 sq-ft, 3 br house +% ====================== YOUR CODE HERE ====================== +% Recall that the first column of X is all-ones. Thus, it does +% not need to be normalized. +price = 0; % You should change this + + +% ============================================================ + +fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... + '(using gradient descent):\n $%f\n'], price); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ================ Part 3: Normal Equations ================ + +fprintf('Solving with normal equations...\n'); + +% ====================== YOUR CODE HERE ====================== +% Instructions: The following code computes the closed form +% solution for linear regression using the normal +% equations. You should complete the code in +% normalEqn.m +% +% After doing so, you should complete this code +% to predict the price of a 1650 sq-ft, 3 br house. +% + +%% Load Data +data = csvread('ex1data2.txt'); +X = data(:, 1:2); +y = data(:, 3); +m = length(y); + +% Add intercept term to X +X = [ones(m, 1) X]; + +% Calculate the parameters from the normal equation +theta = normalEqn(X, y); + +% Display normal equation's result +fprintf('Theta computed from the normal equations: \n'); +fprintf(' %f \n', theta); +fprintf('\n'); + + +% Estimate the price of a 1650 sq-ft, 3 br house +% ====================== YOUR CODE HERE ====================== +price = 0; % You should change this + + +% ============================================================ + +fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ... + '(using normal equations):\n $%f\n'], price); + diff --git a/ex1/ex1/ex1data1.txt b/ex1/ex1/ex1data1.txt new file mode 100644 index 0000000..0f88ccb --- /dev/null +++ b/ex1/ex1/ex1data1.txt @@ -0,0 +1,97 @@ +6.1101,17.592 +5.5277,9.1302 +8.5186,13.662 +7.0032,11.854 +5.8598,6.8233 +8.3829,11.886 +7.4764,4.3483 +8.5781,12 +6.4862,6.5987 +5.0546,3.8166 +5.7107,3.2522 +14.164,15.505 +5.734,3.1551 +8.4084,7.2258 +5.6407,0.71618 +5.3794,3.5129 +6.3654,5.3048 +5.1301,0.56077 +6.4296,3.6518 +7.0708,5.3893 +6.1891,3.1386 +20.27,21.767 +5.4901,4.263 +6.3261,5.1875 +5.5649,3.0825 +18.945,22.638 +12.828,13.501 +10.957,7.0467 +13.176,14.692 +22.203,24.147 +5.2524,-1.22 +6.5894,5.9966 +9.2482,12.134 +5.8918,1.8495 +8.2111,6.5426 +7.9334,4.5623 +8.0959,4.1164 +5.6063,3.3928 +12.836,10.117 +6.3534,5.4974 +5.4069,0.55657 +6.8825,3.9115 +11.708,5.3854 +5.7737,2.4406 +7.8247,6.7318 +7.0931,1.0463 +5.0702,5.1337 +5.8014,1.844 +11.7,8.0043 +5.5416,1.0179 +7.5402,6.7504 +5.3077,1.8396 +7.4239,4.2885 +7.6031,4.9981 +6.3328,1.4233 +6.3589,-1.4211 +6.2742,2.4756 +5.6397,4.6042 +9.3102,3.9624 +9.4536,5.4141 +8.8254,5.1694 +5.1793,-0.74279 +21.279,17.929 +14.908,12.054 +18.959,17.054 +7.2182,4.8852 +8.2951,5.7442 +10.236,7.7754 +5.4994,1.0173 +20.341,20.992 +10.136,6.6799 +7.3345,4.0259 +6.0062,1.2784 +7.2259,3.3411 +5.0269,-2.6807 +6.5479,0.29678 +7.5386,3.8845 +5.0365,5.7014 +10.274,6.7526 +5.1077,2.0576 +5.7292,0.47953 +5.1884,0.20421 +6.3557,0.67861 +9.7687,7.5435 +6.5159,5.3436 +8.5172,4.2415 +9.1802,6.7981 +6.002,0.92695 +5.5204,0.152 +5.0594,2.8214 +5.7077,1.8451 +7.6366,4.2959 +5.8707,7.2029 +5.3054,1.9869 +8.2934,0.14454 +13.394,9.0551 +5.4369,0.61705 diff --git a/ex1/ex1/ex1data2.txt b/ex1/ex1/ex1data2.txt new file mode 100644 index 0000000..79e9a80 --- /dev/null +++ b/ex1/ex1/ex1data2.txt @@ -0,0 +1,47 @@ +2104,3,399900 +1600,3,329900 +2400,3,369000 +1416,2,232000 +3000,4,539900 +1985,4,299900 +1534,3,314900 +1427,3,198999 +1380,3,212000 +1494,3,242500 +1940,4,239999 +2000,3,347000 +1890,3,329999 +4478,5,699900 +1268,3,259900 +2300,4,449900 +1320,2,299900 +1236,3,199900 +2609,4,499998 +3031,4,599000 +1767,3,252900 +1888,2,255000 +1604,3,242900 +1962,4,259900 +3890,3,573900 +1100,3,249900 +1458,3,464500 +2526,3,469000 +2200,3,475000 +2637,3,299900 +1839,2,349900 +1000,1,169900 +2040,4,314900 +3137,3,579900 +1811,4,285900 +1437,3,249900 +1239,3,229900 +2132,4,345000 +4215,4,549000 +2162,4,287000 +1664,2,368500 +2238,3,329900 +2567,4,314000 +1200,3,299000 +852,2,179900 +1852,4,299900 +1203,3,239500 diff --git a/ex1/ex1/featureNormalize.m b/ex1/ex1/featureNormalize.m new file mode 100644 index 0000000..277f3cd --- /dev/null +++ b/ex1/ex1/featureNormalize.m @@ -0,0 +1,46 @@ +function [X_norm, mu, sigma] = featureNormalize(X) +%FEATURENORMALIZE Normalizes the features in X +% FEATURENORMALIZE(X) returns a normalized version of X where +% the mean value of each feature is 0 and the standard deviation +% is 1. This is often a good preprocessing step to do when +% working with learning algorithms. + +% You need to set these values correctly +X_norm = X; +mu = zeros(1, size(X, 2)); +sigma = zeros(1, size(X, 2)); + +for i = 1 : size(X, 2) + mu = mean(X(:, i)); + sigma = std(X(:, i)); + X(:, i) = (X(:, i) - mu) / sigma; + +X_norm = X + +% ====================== YOUR CODE HERE ====================== +% Instructions: First, for each feature dimension, compute the mean +% of the feature and subtract it from the dataset, +% storing the mean value in mu. Next, compute the +% standard deviation of each feature and divide +% each feature by it's standard deviation, storing +% the standard deviation in sigma. +% +% Note that X is a matrix where each column is a +% feature and each row is an example. You need +% to perform the normalization separately for +% each feature. +% +% Hint: You might find the 'mean' and 'std' functions useful. +% + + + + + + + + + +% ============================================================ + +end diff --git a/ex1/ex1/gradientDescent.m b/ex1/ex1/gradientDescent.m new file mode 100644 index 0000000..7779b8a --- /dev/null +++ b/ex1/ex1/gradientDescent.m @@ -0,0 +1,35 @@ +function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters) +%GRADIENTDESCENT Performs gradient descent to learn theta +% theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by +% taking num_iters gradient steps with learning rate alpha + +% Initialize some useful values +m = length(y); % number of training examples +J_history = zeros(num_iters, 1); + +for iter = 1:num_iters + + % ====================== YOUR CODE HERE ====================== + % Instructions: Perform a single gradient step on the parameter vector + % theta. + % + % Hint: While debugging, it can be useful to print out the values + % of the cost function (computeCost) and gradient here. + % + temp0 = theta(1) - alpha / m * sum(theta(1) + theta(2) * X(:, 2) - y); + temp1 = theta(2) - alpha / m * sum((theta(1) + theta(2) * X(:, 2) - y) .* X(:, 2)); + theta(1) = temp0; + theta(2) = temp1; + + + + + + % ============================================================ + + % Save the cost J in every iteration + J_history(iter) = computeCost(X, y, theta); + +end + +end diff --git a/ex1/ex1/gradientDescentMulti.m b/ex1/ex1/gradientDescentMulti.m new file mode 100644 index 0000000..e1313d3 --- /dev/null +++ b/ex1/ex1/gradientDescentMulti.m @@ -0,0 +1,38 @@ +function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters) +%GRADIENTDESCENTMULTI Performs gradient descent to learn theta +% theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by +% taking num_iters gradient steps with learning rate alpha + +% Initialize some useful values +m = length(y); % number of training examples +J_history = zeros(num_iters, 1); + +for iter = 1:num_iters + theta -= alpha / m * (X' * X * theta - X' * y); + + % ====================== YOUR CODE HERE ====================== + % Instructions: Perform a single gradient step on the parameter vector + % theta. + % + % Hint: While debugging, it can be useful to print out the values + % of the cost function (computeCostMulti) and gradient here. + % + + + + + + + + + + + + % ============================================================ + + % Save the cost J in every iteration + J_history(iter) = computeCostMulti(X, y, theta); + +end + +end diff --git a/ex1/ex1/ml_login_data.mat b/ex1/ex1/ml_login_data.mat new file mode 100644 index 0000000..eb1b095 --- /dev/null +++ b/ex1/ex1/ml_login_data.mat @@ -0,0 +1,15 @@ +# Created by Octave 3.4.0, Thu Aug 30 11:41:55 2012 CDT +# name: login +# type: sq_string +# elements: 1 +# length: 21 +zhang349@illinois.edu + + +# name: password +# type: sq_string +# elements: 1 +# length: 10 +3pBDppG8Gd + + diff --git a/ex1/ex1/normalEqn.m b/ex1/ex1/normalEqn.m new file mode 100644 index 0000000..ff826bc --- /dev/null +++ b/ex1/ex1/normalEqn.m @@ -0,0 +1,24 @@ +function [theta] = normalEqn(X, y) +%NORMALEQN Computes the closed-form solution to linear regression +% NORMALEQN(X,y) computes the closed-form solution to linear +% regression using the normal equations. + +theta = zeros(size(X, 2), 1); + +theta = pinv(X' * X) * X' * y; +% ====================== YOUR CODE HERE ====================== +% Instructions: Complete the code to compute the closed form solution +% to linear regression and put the result in theta. +% + +% ---------------------- Sample Solution ---------------------- + + + + +% ------------------------------------------------------------- + + +% ============================================================ + +end diff --git a/ex1/ex1/octave-core b/ex1/ex1/octave-core new file mode 100644 index 0000000..184ecdd Binary files /dev/null and b/ex1/ex1/octave-core differ diff --git a/ex1/ex1/plotData.m b/ex1/ex1/plotData.m new file mode 100644 index 0000000..62d4850 --- /dev/null +++ b/ex1/ex1/plotData.m @@ -0,0 +1,29 @@ +function plotData(x, y) +%PLOTDATA Plots the data points x and y into a new figure +% PLOTDATA(x,y) plots the data points and gives the figure axes labels of +% population and profit. + +% ====================== YOUR CODE HERE ====================== +% Instructions: Plot the training data into a figure using the +% "figure" and "plot" commands. Set the axes labels using +% the "xlabel" and "ylabel" commands. Assume the +% population and revenue data have been passed in +% as the x and y arguments of this function. +% +% Hint: You can use the 'rx' option with plot to have the markers +% appear as red crosses. Furthermore, you can make the +% markers larger by using plot(..., 'rx', 'MarkerSize', 10); +plot(x, y, 'rx', 'MarkerSize', 10); +ylabel('Profit in $10,000s'); +xlabel('Population of City in 10,000s'); +axis([4,24,-5,25]); +figure; % open a new figure window + + + + + + +% ============================================================ + +end diff --git a/ex1/ex1/submit.m b/ex1/ex1/submit.m new file mode 100644 index 0000000..8f7e099 --- /dev/null +++ b/ex1/ex1/submit.m @@ -0,0 +1,577 @@ +function submit(partId, webSubmit) +%SUBMIT Submit your code and output to the ml-class servers +% SUBMIT() will connect to the ml-class server and submit your solution + + fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ... + homework_id()); + if ~exist('partId', 'var') || isempty(partId) + partId = promptPart(); + end + + if ~exist('webSubmit', 'var') || isempty(webSubmit) + webSubmit = 0; % submit directly by default + end + + % Check valid partId + partNames = validParts(); + if ~isValidPartId(partId) + fprintf('!! Invalid homework part selected.\n'); + fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1); + fprintf('!! Submission Cancelled\n'); + return + end + + if ~exist('ml_login_data.mat','file') + [login password] = loginPrompt(); + save('ml_login_data.mat','login','password'); + else + load('ml_login_data.mat'); + [login password] = quickLogin(login, password); + save('ml_login_data.mat','login','password'); + end + + if isempty(login) + fprintf('!! Submission Cancelled\n'); + return + end + + fprintf('\n== Connecting to ml-class ... '); + if exist('OCTAVE_VERSION') + fflush(stdout); + end + + % Setup submit list + if partId == numel(partNames) + 1 + submitParts = 1:numel(partNames); + else + submitParts = [partId]; + end + + for s = 1:numel(submitParts) + thisPartId = submitParts(s); + if (~webSubmit) % submit directly to server + [login, ch, signature, auxstring] = getChallenge(login, thisPartId); + if isempty(login) || isempty(ch) || isempty(signature) + % Some error occured, error string in first return element. + fprintf('\n!! Error: %s\n\n', login); + return + end + + % Attempt Submission with Challenge + ch_resp = challengeResponse(login, password, ch); + + [result, str] = submitSolution(login, ch_resp, thisPartId, ... + output(thisPartId, auxstring), source(thisPartId), signature); + + partName = partNames{thisPartId}; + + fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ... + homework_id(), thisPartId, partName); + fprintf('== %s\n', strtrim(str)); + + if exist('OCTAVE_VERSION') + fflush(stdout); + end + else + [result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ... + source(thisPartId)); + result = base64encode(result); + + fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ... + homework_id(), thisPartId); + saveAsFile = input('', 's'); + if (isempty(saveAsFile)) + saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId); + end + + fid = fopen(saveAsFile, 'w'); + if (fid) + fwrite(fid, result); + fclose(fid); + fprintf('\nSaved your solutions to %s.\n\n', saveAsFile); + fprintf(['You can now submit your solutions through the web \n' ... + 'form in the programming exercises. Select the corresponding \n' ... + 'programming exercise to access the form.\n']); + + else + fprintf('Unable to save to %s\n\n', saveAsFile); + fprintf(['You can create a submission file by saving the \n' ... + 'following text in a file: (press enter to continue)\n\n']); + pause; + fprintf(result); + end + end + end +end + +% ================== CONFIGURABLES FOR EACH HOMEWORK ================== + +function id = homework_id() + id = '1'; +end + +function [partNames] = validParts() + partNames = { 'Warm up exercise ', ... + 'Computing Cost (for one variable)', ... + 'Gradient Descent (for one variable)', ... + 'Feature Normalization', ... + 'Computing Cost (for multiple variables)', ... + 'Gradient Descent (for multiple variables)', ... + 'Normal Equations'}; +end + +function srcs = sources() + % Separated by part + srcs = { { 'warmUpExercise.m' }, ... + { 'computeCost.m' }, ... + { 'gradientDescent.m' }, ... + { 'featureNormalize.m' }, ... + { 'computeCostMulti.m' }, ... + { 'gradientDescentMulti.m' }, ... + { 'normalEqn.m' }, ... + }; +end + +function out = output(partId, auxstring) + % Random Test Cases + X1 = [ones(20,1) (exp(1) + exp(2) * (0.1:0.1:2))']; + Y1 = X1(:,2) + sin(X1(:,1)) + cos(X1(:,2)); + X2 = [X1 X1(:,2).^0.5 X1(:,2).^0.25]; + Y2 = Y1.^0.5 + Y1; + if partId == 1 + out = sprintf('%0.5f ', warmUpExercise()); + elseif partId == 2 + out = sprintf('%0.5f ', computeCost(X1, Y1, [0.5 -0.5]')); + elseif partId == 3 + out = sprintf('%0.5f ', gradientDescent(X1, Y1, [0.5 -0.5]', 0.01, 10)); + elseif partId == 4 + out = sprintf('%0.5f ', featureNormalize(X2(:,2:4))); + elseif partId == 5 + out = sprintf('%0.5f ', computeCostMulti(X2, Y2, [0.1 0.2 0.3 0.4]')); + elseif partId == 6 + out = sprintf('%0.5f ', gradientDescentMulti(X2, Y2, [-0.1 -0.2 -0.3 -0.4]', 0.01, 10)); + elseif partId == 7 + out = sprintf('%0.5f ', normalEqn(X2, Y2)); + end +end + +% ====================== SERVER CONFIGURATION =========================== + +% ***************** REMOVE -staging WHEN YOU DEPLOY ********************* +function url = site_url() + url = 'http://class.coursera.org/ml-2012-002'; +end + +function url = challenge_url() + url = [site_url() '/assignment/challenge']; +end + +function url = submit_url() + url = [site_url() '/assignment/submit']; +end + +% ========================= CHALLENGE HELPERS ========================= + +function src = source(partId) + src = ''; + src_files = sources(); + if partId <= numel(src_files) + flist = src_files{partId}; + for i = 1:numel(flist) + fid = fopen(flist{i}); + if (fid == -1) + error('Error opening %s (is it missing?)', flist{i}); + end + line = fgets(fid); + while ischar(line) + src = [src line]; + line = fgets(fid); + end + fclose(fid); + src = [src '||||||||']; + end + end +end + +function ret = isValidPartId(partId) + partNames = validParts(); + ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1); +end + +function partId = promptPart() + fprintf('== Select which part(s) to submit:\n'); + partNames = validParts(); + srcFiles = sources(); + for i = 1:numel(partNames) + fprintf('== %d) %s [', i, partNames{i}); + fprintf(' %s ', srcFiles{i}{:}); + fprintf(']\n'); + end + fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ... + numel(partNames) + 1, numel(partNames) + 1); + selPart = input('', 's'); + partId = str2num(selPart); + if ~isValidPartId(partId) + partId = -1; + end +end + +function [email,ch,signature,auxstring] = getChallenge(email, part) + str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'}); + + str = strtrim(str); + r = struct; + while(numel(str) > 0) + [f, str] = strtok (str, '|'); + [v, str] = strtok (str, '|'); + r = setfield(r, f, v); + end + + email = getfield(r, 'email_address'); + ch = getfield(r, 'challenge_key'); + signature = getfield(r, 'state'); + auxstring = getfield(r, 'challenge_aux_data'); +end + +function [result, str] = submitSolutionWeb(email, part, output, source) + + result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ... + '"email_address":"' base64encode(email, '') '",' ... + '"submission":"' base64encode(output, '') '",' ... + '"submission_aux":"' base64encode(source, '') '"' ... + '}']; + str = 'Web-submission'; +end + +function [result, str] = submitSolution(email, ch_resp, part, output, ... + source, signature) + + params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ... + 'email_address', email, ... + 'submission', base64encode(output, ''), ... + 'submission_aux', base64encode(source, ''), ... + 'challenge_response', ch_resp, ... + 'state', signature}; + + str = urlread(submit_url(), 'post', params); + + % Parse str to read for success / failure + result = 0; + +end + +% =========================== LOGIN HELPERS =========================== + +function [login password] = loginPrompt() + % Prompt for password + [login password] = basicPrompt(); + + if isempty(login) || isempty(password) + login = []; password = []; + end +end + + +function [login password] = basicPrompt() + login = input('Login (Email address): ', 's'); + password = input('Password: ', 's'); +end + +function [login password] = quickLogin(login,password) + disp(['You are currently logged in as ' login '.']); + cont_token = input('Is this you? (y/n - type n to reenter password)','s'); + if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y') + return; + else + [login password] = loginPrompt(); + end +end + +function [str] = challengeResponse(email, passwd, challenge) + str = sha1([challenge passwd]); +end + +% =============================== SHA-1 ================================ + +function hash = sha1(str) + + % Initialize variables + h0 = uint32(1732584193); + h1 = uint32(4023233417); + h2 = uint32(2562383102); + h3 = uint32(271733878); + h4 = uint32(3285377520); + + % Convert to word array + strlen = numel(str); + + % Break string into chars and append the bit 1 to the message + mC = [double(str) 128]; + mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')]; + + numB = strlen * 8; + if exist('idivide') + numC = idivide(uint32(numB + 65), 512, 'ceil'); + else + numC = ceil(double(numB + 65)/512); + end + numW = numC * 16; + mW = zeros(numW, 1, 'uint32'); + + idx = 1; + for i = 1:4:strlen + 1 + mW(idx) = bitor(bitor(bitor( ... + bitshift(uint32(mC(i)), 24), ... + bitshift(uint32(mC(i+1)), 16)), ... + bitshift(uint32(mC(i+2)), 8)), ... + uint32(mC(i+3))); + idx = idx + 1; + end + + % Append length of message + mW(numW - 1) = uint32(bitshift(uint64(numB), -32)); + mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32)); + + % Process the message in successive 512-bit chs + for cId = 1 : double(numC) + cSt = (cId - 1) * 16 + 1; + cEnd = cId * 16; + ch = mW(cSt : cEnd); + + % Extend the sixteen 32-bit words into eighty 32-bit words + for j = 17 : 80 + ch(j) = ch(j - 3); + ch(j) = bitxor(ch(j), ch(j - 8)); + ch(j) = bitxor(ch(j), ch(j - 14)); + ch(j) = bitxor(ch(j), ch(j - 16)); + ch(j) = bitrotate(ch(j), 1); + end + + % Initialize hash value for this ch + a = h0; + b = h1; + c = h2; + d = h3; + e = h4; + + % Main loop + for i = 1 : 80 + if(i >= 1 && i <= 20) + f = bitor(bitand(b, c), bitand(bitcmp(b), d)); + k = uint32(1518500249); + elseif(i >= 21 && i <= 40) + f = bitxor(bitxor(b, c), d); + k = uint32(1859775393); + elseif(i >= 41 && i <= 60) + f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d)); + k = uint32(2400959708); + elseif(i >= 61 && i <= 80) + f = bitxor(bitxor(b, c), d); + k = uint32(3395469782); + end + + t = bitrotate(a, 5); + t = bitadd(t, f); + t = bitadd(t, e); + t = bitadd(t, k); + t = bitadd(t, ch(i)); + e = d; + d = c; + c = bitrotate(b, 30); + b = a; + a = t; + + end + h0 = bitadd(h0, a); + h1 = bitadd(h1, b); + h2 = bitadd(h2, c); + h3 = bitadd(h3, d); + h4 = bitadd(h4, e); + + end + + hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]); + + hash = lower(hash); + +end + +function ret = bitadd(iA, iB) + ret = double(iA) + double(iB); + ret = bitset(ret, 33, 0); + ret = uint32(ret); +end + +function ret = bitrotate(iA, places) + t = bitshift(iA, places - 32); + ret = bitshift(iA, places); + ret = bitor(ret, t); +end + +% =========================== Base64 Encoder ============================ +% Thanks to Peter John Acklam +% + +function y = base64encode(x, eol) +%BASE64ENCODE Perform base64 encoding on a string. +% +% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending +% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10). +% The returned encoded string is broken into lines of no more than 76 +% characters each, and each line will end with EOL unless it is empty. Let +% EOL be empty if you do not want the encoded string broken into lines. +% +% STR and EOL don't have to be strings (i.e., char arrays). The only +% requirement is that they are vectors containing values in the range 0-255. +% +% This function may be used to encode strings into the Base64 encoding +% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The +% Base64 encoding is designed to represent arbitrary sequences of octets in a +% form that need not be humanly readable. A 65-character subset +% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per +% printable character. +% +% Examples +% -------- +% +% If you want to encode a large file, you should encode it in chunks that are +% a multiple of 57 bytes. This ensures that the base64 lines line up and +% that you do not end up with padding in the middle. 57 bytes of data fills +% one complete base64 line (76 == 57*4/3): +% +% If ifid and ofid are two file identifiers opened for reading and writing, +% respectively, then you can base64 encode the data with +% +% while ~feof(ifid) +% fwrite(ofid, base64encode(fread(ifid, 60*57))); +% end +% +% or, if you have enough memory, +% +% fwrite(ofid, base64encode(fread(ifid))); +% +% See also BASE64DECODE. + +% Author: Peter John Acklam +% Time-stamp: 2004-02-03 21:36:56 +0100 +% E-mail: pjacklam@online.no +% URL: http://home.online.no/~pjacklam + + if isnumeric(x) + x = num2str(x); + end + + % make sure we have the EOL value + if nargin < 2 + eol = sprintf('\n'); + else + if sum(size(eol) > 1) > 1 + error('EOL must be a vector.'); + end + if any(eol(:) > 255) + error('EOL can not contain values larger than 255.'); + end + end + + if sum(size(x) > 1) > 1 + error('STR must be a vector.'); + end + + x = uint8(x); + eol = uint8(eol); + + ndbytes = length(x); % number of decoded bytes + nchunks = ceil(ndbytes / 3); % number of chunks/groups + nebytes = 4 * nchunks; % number of encoded bytes + + % add padding if necessary, to make the length of x a multiple of 3 + if rem(ndbytes, 3) + x(end+1 : 3*nchunks) = 0; + end + + x = reshape(x, [3, nchunks]); % reshape the data + y = repmat(uint8(0), 4, nchunks); % for the encoded data + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Split up every 3 bytes into 4 pieces + % + % aaaaaabb bbbbcccc ccdddddd + % + % to form + % + % 00aaaaaa 00bbbbbb 00cccccc 00dddddd + % + y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:) + + y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:) + y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:) + + y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:) + y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:) + + y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:) + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Now perform the following mapping + % + % 0 - 25 -> A-Z + % 26 - 51 -> a-z + % 52 - 61 -> 0-9 + % 62 -> + + % 63 -> / + % + % We could use a mapping vector like + % + % ['A':'Z', 'a':'z', '0':'9', '+/'] + % + % but that would require an index vector of class double. + % + z = repmat(uint8(0), size(y)); + i = y <= 25; z(i) = 'A' + double(y(i)); + i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i)); + i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i)); + i = y == 62; z(i) = '+'; + i = y == 63; z(i) = '/'; + y = z; + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Add padding if necessary. + % + npbytes = 3 * nchunks - ndbytes; % number of padding bytes + if npbytes + y(end-npbytes+1 : end) = '='; % '=' is used for padding + end + + if isempty(eol) + + % reshape to a row vector + y = reshape(y, [1, nebytes]); + + else + + nlines = ceil(nebytes / 76); % number of lines + neolbytes = length(eol); % number of bytes in eol string + + % pad data so it becomes a multiple of 76 elements + y = [y(:) ; zeros(76 * nlines - numel(y), 1)]; + y(nebytes + 1 : 76 * nlines) = 0; + y = reshape(y, 76, nlines); + + % insert eol strings + eol = eol(:); + y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines)); + + % remove padding, but keep the last eol string + m = nebytes + neolbytes * (nlines - 1); + n = (76+neolbytes)*nlines - neolbytes; + y(m+1 : n) = ''; + + % extract and reshape to row vector + y = reshape(y, 1, m+neolbytes); + + end + + % output is a character array + y = char(y); + +end diff --git a/ex1/ex1/submitWeb.m b/ex1/ex1/submitWeb.m new file mode 100644 index 0000000..e429365 --- /dev/null +++ b/ex1/ex1/submitWeb.m @@ -0,0 +1,20 @@ +% submitWeb Creates files from your code and output for web submission. +% +% If the submit function does not work for you, use the web-submission mechanism. +% Call this function to produce a file for the part you wish to submit. Then, +% submit the file to the class servers using the "Web Submission" button on the +% Programming Exercises page on the course website. +% +% You should call this function without arguments (submitWeb), to receive +% an interactive prompt for submission; optionally you can call it with the partID +% if you so wish. Make sure your working directory is set to the directory +% containing the submitWeb.m file and your assignment files. + +function submitWeb(partId) + if ~exist('partId', 'var') || isempty(partId) + partId = []; + end + + submit(partId, 1); +end + diff --git a/ex1/ex1/warmUpExercise.m b/ex1/ex1/warmUpExercise.m new file mode 100644 index 0000000..68eb65c --- /dev/null +++ b/ex1/ex1/warmUpExercise.m @@ -0,0 +1,21 @@ +function A = warmUpExercise() +%WARMUPEXERCISE Example function in octave +% A = WARMUPEXERCISE() is an example function that returns the 5x5 identity matrix + + +% ============= YOUR CODE HERE ============== +% Instructions: Return the 5x5 identity matrix +% In octave, we return values by defining which variables +% represent the return values (at the top of the file) +% and then set them accordingly. + +A = eye(5) + + + + + +% =========================================== + + +end diff --git a/ex2/.DS_Store b/ex2/.DS_Store new file mode 100644 index 0000000..8612614 Binary files /dev/null and b/ex2/.DS_Store differ diff --git a/ex2/ex2.pdf b/ex2/ex2.pdf new file mode 100644 index 0000000..8964f6e Binary files /dev/null and b/ex2/ex2.pdf differ diff --git a/ex2/ex2/.DS_Store b/ex2/ex2/.DS_Store new file mode 100644 index 0000000..b5a8b53 Binary files /dev/null and b/ex2/ex2/.DS_Store differ diff --git a/ex2/ex2/costFunction.m b/ex2/ex2/costFunction.m new file mode 100644 index 0000000..7dfba5e --- /dev/null +++ b/ex2/ex2/costFunction.m @@ -0,0 +1,34 @@ +function [J, grad] = costFunction(theta, X, y) +%COSTFUNCTION Compute cost and gradient for logistic regression +% J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the +% parameter for logistic regression and the gradient of the cost +% w.r.t. to the parameters. + +% Initialize some useful values +m = length(y); % number of training examples + +% You need to return the following variables correctly +J = 0; +grad = zeros(size(theta)); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the cost of a particular choice of theta. +% You should set J to the cost. +% Compute the partial derivatives and set grad to the partial +% derivatives of the cost w.r.t. each parameter in theta +% +% Note: grad should have the same dimensions as theta +% + +J = 1./m * ( -y' * log( sigmoid(X * theta) ) - ( 1 - y' ) * log ( 1 - sigmoid( X * theta)) ) +grad = 1./m * X' * (sigmoid(X * theta) - y) + + + + + + + +% ============================================================= + +end diff --git a/ex2/ex2/costFunctionReg.m b/ex2/ex2/costFunctionReg.m new file mode 100644 index 0000000..ff7d569 --- /dev/null +++ b/ex2/ex2/costFunctionReg.m @@ -0,0 +1,31 @@ +function [J, grad] = costFunctionReg(theta, X, y, lambda) +%COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization +% J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using +% theta as the parameter for regularized logistic regression and the +% gradient of the cost w.r.t. to the parameters. + +% Initialize some useful values +m = length(y); % number of training examples + +% You need to return the following variables correctly +J = 0; +grad = zeros(size(theta)); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the cost of a particular choice of theta. +% You should set J to the cost. +% Compute the partial derivatives and set grad to the partial +% derivatives of the cost w.r.t. each parameter in theta + +% for j = 2:n +J = 1./m * ( -y' * log( sigmoid(X * theta) ) - ( 1 - y' ) * log ( 1 - sigmoid( X * theta)) ) +lambda / 2. / m * ( theta' * theta - theta(1)^2 ); +t = ones(size(theta)); +t(1) = 0; +grad = 1./m * X' * (sigmoid(X * theta) - y) + lambda * (theta .* t)/ m; + + + + +% ============================================================= + +end diff --git a/ex2/ex2/ex2.m b/ex2/ex2/ex2.m new file mode 100644 index 0000000..1cbf04a --- /dev/null +++ b/ex2/ex2/ex2.m @@ -0,0 +1,135 @@ +%% Machine Learning Online Class - Exercise 2: Logistic Regression +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the logistic +% regression exercise. You will need to complete the following functions +% in this exericse: +% +% sigmoid.m +% costFunction.m +% predict.m +% costFunctionReg.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% Load Data +% The first two columns contains the exam scores and the third column +% contains the label. + +data = load('ex2data1.txt'); +X = data(:, [1, 2]); y = data(:, 3); + +%% ==================== Part 1: Plotting ==================== +% We start the exercise by first plotting the data to understand the +% the problem we are working with. + +fprintf(['Plotting data with + indicating (y = 1) examples and o ' ... + 'indicating (y = 0) examples.\n']); + +plotData(X, y); + +% Put some labels +hold on; +% Labels and Legend +xlabel('Exam 1 score') +ylabel('Exam 2 score') + +% Specified in plot order +legend('Admitted', 'Not admitted') +hold off; + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + + +%% ============ Part 2: Compute Cost and Gradient ============ +% In this part of the exercise, you will implement the cost and gradient +% for logistic regression. You neeed to complete the code in +% costFunction.m + +% Setup the data matrix appropriately, and add ones for the intercept term +[m, n] = size(X); + +% Add intercept term to x and X_test +X = [ones(m, 1) X]; + +% Initialize fitting parameters +initial_theta = zeros(n + 1, 1); + +% Compute and display initial cost and gradient +[cost, grad] = costFunction(initial_theta, X, y); + +fprintf('Cost at initial theta (zeros): %f\n', cost); +fprintf('Gradient at initial theta (zeros): \n'); +fprintf(' %f \n', grad); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + + +%% ============= Part 3: Optimizing using fminunc ============= +% In this exercise, you will use a built-in function (fminunc) to find the +% optimal parameters theta. + +% Set options for fminunc +options = optimset('GradObj', 'on', 'MaxIter', 400); + +% Run fminunc to obtain the optimal theta +% This function will return theta and the cost +[theta, cost] = ... + fminunc(@(t)(costFunction(t, X, y)), initial_theta, options); + +% Print theta to screen +fprintf('Cost at theta found by fminunc: %f\n', cost); +fprintf('theta: \n'); +fprintf(' %f \n', theta); + +% Plot Boundary +plotDecisionBoundary(theta, X, y); + +% Put some labels +hold on; +% Labels and Legend +xlabel('Exam 1 score') +ylabel('Exam 2 score') + +% Specified in plot order +legend('Admitted', 'Not admitted') +hold off; + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + +%% ============== Part 4: Predict and Accuracies ============== +% After learning the parameters, you'll like to use it to predict the outcomes +% on unseen data. In this part, you will use the logistic regression model +% to predict the probability that a student with score 45 on exam 1 and +% score 85 on exam 2 will be admitted. +% +% Furthermore, you will compute the training and test set accuracies of +% our model. +% +% Your task is to complete the code in predict.m + +% Predict probability for a student with score 45 on exam 1 +% and score 85 on exam 2 + +prob = sigmoid([1 45 85] * theta); +fprintf(['For a student with scores 45 and 85, we predict an admission ' ... + 'probability of %f\n\n'], prob); + +% Compute accuracy on our training set +p = predict(theta, X); + +fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + diff --git a/ex2/ex2/ex2_reg.m b/ex2/ex2/ex2_reg.m new file mode 100644 index 0000000..d83dffe --- /dev/null +++ b/ex2/ex2/ex2_reg.m @@ -0,0 +1,116 @@ +%% Machine Learning Online Class - Exercise 2: Logistic Regression +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the second part +% of the exercise which covers regularization with logistic regression. +% +% You will need to complete the following functions in this exericse: +% +% sigmoid.m +% costFunction.m +% predict.m +% costFunctionReg.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% Load Data +% The first two columns contains the X values and the third column +% contains the label (y). + +data = load('ex2data2.txt'); +X = data(:, [1, 2]); y = data(:, 3); + +plotData(X, y); + +% Put some labels +hold on; + +% Labels and Legend +xlabel('Microchip Test 1') +ylabel('Microchip Test 2') + +% Specified in plot order +legend('y = 1', 'y = 0') +hold off; + + +%% =========== Part 1: Regularized Logistic Regression ============ +% In this part, you are given a dataset with data points that are not +% linearly separable. However, you would still like to use logistic +% regression to classify the data points. +% +% To do so, you introduce more features to use -- in particular, you add +% polynomial features to our data matrix (similar to polynomial +% regression). +% + +% Add Polynomial Features + +% Note that mapFeature also adds a column of ones for us, so the intercept +% term is handled +X = mapFeature(X(:,1), X(:,2)); + +% Initialize fitting parameters +initial_theta = zeros(size(X, 2), 1); + +% Set regularization parameter lambda to 1 +lambda = 1; + +% Compute and display initial cost and gradient for regularized logistic +% regression +[cost, grad] = costFunctionReg(initial_theta, X, y, lambda); + +fprintf('Cost at initial theta (zeros): %f\n', cost); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + +%% ============= Part 2: Regularization and Accuracies ============= +% Optional Exercise: +% In this part, you will get to try different values of lambda and +% see how regularization affects the decision coundart +% +% Try the following values of lambda (0, 1, 10, 100). +% +% How does the decision boundary change when you vary lambda? How does +% the training set accuracy vary? +% + +% Initialize fitting parameters +initial_theta = zeros(size(X, 2), 1); + +% Set regularization parameter lambda to 1 (you should vary this) +lambda = 1; + +% Set Options +options = optimset('GradObj', 'on', 'MaxIter', 400); + +% Optimize +[theta, J, exit_flag] = ... + fminunc(@(t)(costFunctionReg(t, X, y, lambda)), initial_theta, options); + +% Plot Boundary +plotDecisionBoundary(theta, X, y); +hold on; +title(sprintf('lambda = %g', lambda)) + +% Labels and Legend +xlabel('Microchip Test 1') +ylabel('Microchip Test 2') + +legend('y = 1', 'y = 0', 'Decision boundary') +hold off; + +% Compute accuracy on our training set +p = predict(theta, X); + +fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); + + diff --git a/ex2/ex2/ex2data1.txt b/ex2/ex2/ex2data1.txt new file mode 100644 index 0000000..3a5f952 --- /dev/null +++ b/ex2/ex2/ex2data1.txt @@ -0,0 +1,100 @@ +34.62365962451697,78.0246928153624,0 +30.28671076822607,43.89499752400101,0 +35.84740876993872,72.90219802708364,0 +60.18259938620976,86.30855209546826,1 +79.0327360507101,75.3443764369103,1 +45.08327747668339,56.3163717815305,0 +61.10666453684766,96.51142588489624,1 +75.02474556738889,46.55401354116538,1 +76.09878670226257,87.42056971926803,1 +84.43281996120035,43.53339331072109,1 +95.86155507093572,38.22527805795094,0 +75.01365838958247,30.60326323428011,0 +82.30705337399482,76.48196330235604,1 +69.36458875970939,97.71869196188608,1 +39.53833914367223,76.03681085115882,0 +53.9710521485623,89.20735013750205,1 +69.07014406283025,52.74046973016765,1 +67.94685547711617,46.67857410673128,0 +70.66150955499435,92.92713789364831,1 +76.97878372747498,47.57596364975532,1 +67.37202754570876,42.83843832029179,0 +89.67677575072079,65.79936592745237,1 +50.534788289883,48.85581152764205,0 +34.21206097786789,44.20952859866288,0 +77.9240914545704,68.9723599933059,1 +62.27101367004632,69.95445795447587,1 +80.1901807509566,44.82162893218353,1 +93.114388797442,38.80067033713209,0 +61.83020602312595,50.25610789244621,0 +38.78580379679423,64.99568095539578,0 +61.379289447425,72.80788731317097,1 +85.40451939411645,57.05198397627122,1 +52.10797973193984,63.12762376881715,0 +52.04540476831827,69.43286012045222,1 +40.23689373545111,71.16774802184875,0 +54.63510555424817,52.21388588061123,0 +33.91550010906887,98.86943574220611,0 +64.17698887494485,80.90806058670817,1 +74.78925295941542,41.57341522824434,0 +34.1836400264419,75.2377203360134,0 +83.90239366249155,56.30804621605327,1 +51.54772026906181,46.85629026349976,0 +94.44336776917852,65.56892160559052,1 +82.36875375713919,40.61825515970618,0 +51.04775177128865,45.82270145776001,0 +62.22267576120188,52.06099194836679,0 +77.19303492601364,70.45820000180959,1 +97.77159928000232,86.7278223300282,1 +62.07306379667647,96.76882412413983,1 +91.56497449807442,88.69629254546599,1 +79.94481794066932,74.16311935043758,1 +99.2725269292572,60.99903099844988,1 +90.54671411399852,43.39060180650027,1 +34.52451385320009,60.39634245837173,0 +50.2864961189907,49.80453881323059,0 +49.58667721632031,59.80895099453265,0 +97.64563396007767,68.86157272420604,1 +32.57720016809309,95.59854761387875,0 +74.24869136721598,69.82457122657193,1 +71.79646205863379,78.45356224515052,1 +75.3956114656803,85.75993667331619,1 +35.28611281526193,47.02051394723416,0 +56.25381749711624,39.26147251058019,0 +30.05882244669796,49.59297386723685,0 +44.66826172480893,66.45008614558913,0 +66.56089447242954,41.09209807936973,0 +40.45755098375164,97.53518548909936,1 +49.07256321908844,51.88321182073966,0 +80.27957401466998,92.11606081344084,1 +66.74671856944039,60.99139402740988,1 +32.72283304060323,43.30717306430063,0 +64.0393204150601,78.03168802018232,1 +72.34649422579923,96.22759296761404,1 +60.45788573918959,73.09499809758037,1 +58.84095621726802,75.85844831279042,1 +99.82785779692128,72.36925193383885,1 +47.26426910848174,88.47586499559782,1 +50.45815980285988,75.80985952982456,1 +60.45555629271532,42.50840943572217,0 +82.22666157785568,42.71987853716458,0 +88.9138964166533,69.80378889835472,1 +94.83450672430196,45.69430680250754,1 +67.31925746917527,66.58935317747915,1 +57.23870631569862,59.51428198012956,1 +80.36675600171273,90.96014789746954,1 +68.46852178591112,85.59430710452014,1 +42.0754545384731,78.84478600148043,0 +75.47770200533905,90.42453899753964,1 +78.63542434898018,96.64742716885644,1 +52.34800398794107,60.76950525602592,0 +94.09433112516793,77.15910509073893,1 +90.44855097096364,87.50879176484702,1 +55.48216114069585,35.57070347228866,0 +74.49269241843041,84.84513684930135,1 +89.84580670720979,45.35828361091658,1 +83.48916274498238,48.38028579728175,1 +42.2617008099817,87.10385094025457,1 +99.31500880510394,68.77540947206617,1 +55.34001756003703,64.9319380069486,1 +74.77589300092767,89.52981289513276,1 diff --git a/ex2/ex2/ex2data2.txt b/ex2/ex2/ex2data2.txt new file mode 100644 index 0000000..a888992 --- /dev/null +++ b/ex2/ex2/ex2data2.txt @@ -0,0 +1,118 @@ +0.051267,0.69956,1 +-0.092742,0.68494,1 +-0.21371,0.69225,1 +-0.375,0.50219,1 +-0.51325,0.46564,1 +-0.52477,0.2098,1 +-0.39804,0.034357,1 +-0.30588,-0.19225,1 +0.016705,-0.40424,1 +0.13191,-0.51389,1 +0.38537,-0.56506,1 +0.52938,-0.5212,1 +0.63882,-0.24342,1 +0.73675,-0.18494,1 +0.54666,0.48757,1 +0.322,0.5826,1 +0.16647,0.53874,1 +-0.046659,0.81652,1 +-0.17339,0.69956,1 +-0.47869,0.63377,1 +-0.60541,0.59722,1 +-0.62846,0.33406,1 +-0.59389,0.005117,1 +-0.42108,-0.27266,1 +-0.11578,-0.39693,1 +0.20104,-0.60161,1 +0.46601,-0.53582,1 +0.67339,-0.53582,1 +-0.13882,0.54605,1 +-0.29435,0.77997,1 +-0.26555,0.96272,1 +-0.16187,0.8019,1 +-0.17339,0.64839,1 +-0.28283,0.47295,1 +-0.36348,0.31213,1 +-0.30012,0.027047,1 +-0.23675,-0.21418,1 +-0.06394,-0.18494,1 +0.062788,-0.16301,1 +0.22984,-0.41155,1 +0.2932,-0.2288,1 +0.48329,-0.18494,1 +0.64459,-0.14108,1 +0.46025,0.012427,1 +0.6273,0.15863,1 +0.57546,0.26827,1 +0.72523,0.44371,1 +0.22408,0.52412,1 +0.44297,0.67032,1 +0.322,0.69225,1 +0.13767,0.57529,1 +-0.0063364,0.39985,1 +-0.092742,0.55336,1 +-0.20795,0.35599,1 +-0.20795,0.17325,1 +-0.43836,0.21711,1 +-0.21947,-0.016813,1 +-0.13882,-0.27266,1 +0.18376,0.93348,0 +0.22408,0.77997,0 +0.29896,0.61915,0 +0.50634,0.75804,0 +0.61578,0.7288,0 +0.60426,0.59722,0 +0.76555,0.50219,0 +0.92684,0.3633,0 +0.82316,0.27558,0 +0.96141,0.085526,0 +0.93836,0.012427,0 +0.86348,-0.082602,0 +0.89804,-0.20687,0 +0.85196,-0.36769,0 +0.82892,-0.5212,0 +0.79435,-0.55775,0 +0.59274,-0.7405,0 +0.51786,-0.5943,0 +0.46601,-0.41886,0 +0.35081,-0.57968,0 +0.28744,-0.76974,0 +0.085829,-0.75512,0 +0.14919,-0.57968,0 +-0.13306,-0.4481,0 +-0.40956,-0.41155,0 +-0.39228,-0.25804,0 +-0.74366,-0.25804,0 +-0.69758,0.041667,0 +-0.75518,0.2902,0 +-0.69758,0.68494,0 +-0.4038,0.70687,0 +-0.38076,0.91886,0 +-0.50749,0.90424,0 +-0.54781,0.70687,0 +0.10311,0.77997,0 +0.057028,0.91886,0 +-0.10426,0.99196,0 +-0.081221,1.1089,0 +0.28744,1.087,0 +0.39689,0.82383,0 +0.63882,0.88962,0 +0.82316,0.66301,0 +0.67339,0.64108,0 +1.0709,0.10015,0 +-0.046659,-0.57968,0 +-0.23675,-0.63816,0 +-0.15035,-0.36769,0 +-0.49021,-0.3019,0 +-0.46717,-0.13377,0 +-0.28859,-0.060673,0 +-0.61118,-0.067982,0 +-0.66302,-0.21418,0 +-0.59965,-0.41886,0 +-0.72638,-0.082602,0 +-0.83007,0.31213,0 +-0.72062,0.53874,0 +-0.59389,0.49488,0 +-0.48445,0.99927,0 +-0.0063364,0.99927,0 +0.63265,-0.030612,0 diff --git a/ex2/ex2/mapFeature.m b/ex2/ex2/mapFeature.m new file mode 100644 index 0000000..d02a72a --- /dev/null +++ b/ex2/ex2/mapFeature.m @@ -0,0 +1,21 @@ +function out = mapFeature(X1, X2) +% MAPFEATURE Feature mapping function to polynomial features +% +% MAPFEATURE(X1, X2) maps the two input features +% to quadratic features used in the regularization exercise. +% +% Returns a new feature array with more features, comprising of +% X1, X2, X1.^2, X2.^2, X1*X2, X1*X2.^2, etc.. +% +% Inputs X1, X2 must be the same size +% + +degree = 6; +out = ones(size(X1(:,1))); +for i = 1:degree + for j = 0:i + out(:, end+1) = (X1.^(i-j)).*(X2.^j); + end +end + +end \ No newline at end of file diff --git a/ex2/ex2/ml_login_data.mat b/ex2/ex2/ml_login_data.mat new file mode 100644 index 0000000..cc0bb8e --- /dev/null +++ b/ex2/ex2/ml_login_data.mat @@ -0,0 +1,15 @@ +# Created by Octave 3.4.0, Tue Sep 04 22:10:18 2012 CDT +# name: login +# type: sq_string +# elements: 1 +# length: 21 +zhang349@illinois.edu + + +# name: password +# type: sq_string +# elements: 1 +# length: 10 +3pBDppG8Gd + + diff --git a/ex2/ex2/octave-core b/ex2/ex2/octave-core new file mode 100644 index 0000000..53c119d Binary files /dev/null and b/ex2/ex2/octave-core differ diff --git a/ex2/ex2/plotData.m b/ex2/ex2/plotData.m new file mode 100644 index 0000000..b904cda --- /dev/null +++ b/ex2/ex2/plotData.m @@ -0,0 +1,37 @@ +function plotData(X, y) +%PLOTDATA Plots the data points X and y into a new figure +% PLOTDATA(x,y) plots the data points with + for the positive examples +% and o for the negative examples. X is assumed to be a Mx2 matrix. + +% Create New Figure +figure; hold on; + +% ====================== YOUR CODE HERE ====================== +% Instructions: Plot the positive and negative examples on a +% 2D plot, using the option 'k+' for the positive +% examples and 'ko' for the negative examples. +% + +% Find Indices of Positive and Negative Examples +pos = find(y==1); +neg = find(y==0); +% Plot Examples +plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 2, ... +'MarkerSize', 7); +plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', ... +'MarkerSize', 7); + + + + + + + + +% ========================================================================= + + + +hold off; + +end diff --git a/ex2/ex2/plotDecisionBoundary.m b/ex2/ex2/plotDecisionBoundary.m new file mode 100644 index 0000000..cd36314 --- /dev/null +++ b/ex2/ex2/plotDecisionBoundary.m @@ -0,0 +1,48 @@ +function plotDecisionBoundary(theta, X, y) +%PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with +%the decision boundary defined by theta +% PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the +% positive examples and o for the negative examples. X is assumed to be +% a either +% 1) Mx3 matrix, where the first column is an all-ones column for the +% intercept. +% 2) MxN, N>3 matrix, where the first column is all-ones + +% Plot Data +plotData(X(:,2:3), y); +hold on + +if size(X, 2) <= 3 + % Only need 2 points to define a line, so choose two endpoints + plot_x = [min(X(:,2))-2, max(X(:,2))+2]; + + % Calculate the decision boundary line + plot_y = (-1./theta(3)).*(theta(2).*plot_x + theta(1)); + + % Plot, and adjust axes for better viewing + plot(plot_x, plot_y) + + % Legend, specific for the exercise + legend('Admitted', 'Not admitted', 'Decision Boundary') + axis([30, 100, 30, 100]) +else + % Here is the grid range + u = linspace(-1, 1.5, 50); + v = linspace(-1, 1.5, 50); + + z = zeros(length(u), length(v)); + % Evaluate z = theta*x over the grid + for i = 1:length(u) + for j = 1:length(v) + z(i,j) = mapFeature(u(i), v(j))*theta; + end + end + z = z'; % important to transpose z before calling contour + + % Plot z = 0 + % Notice you need to specify the range [0, 0] + contour(u, v, z, [0, 0], 'LineWidth', 2) +end +hold off + +end diff --git a/ex2/ex2/predict.m b/ex2/ex2/predict.m new file mode 100644 index 0000000..e4078c3 --- /dev/null +++ b/ex2/ex2/predict.m @@ -0,0 +1,29 @@ +function p = predict(theta, X) +%PREDICT Predict whether the label is 0 or 1 using learned logistic +%regression parameters theta +% p = PREDICT(theta, X) computes the predictions for X using a +% threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1) + +m = size(X, 1); % Number of training examples + +% You need to return the following variables correctly +p = round(sigmoid(X * theta)) + +% ====================== YOUR CODE HERE ====================== +% Instructions: Complete the following code to make predictions using +% your learned logistic regression parameters. +% You should set p to a vector of 0's and 1's +% +%prob = sigmoid(X * theta); +%p = ones(m, 1); % initialize all predictions to ones first +%p( find(prob<0.5) ) = 0; + + + + + + +% ========================================================================= + + +end diff --git a/ex2/ex2/sigmoid.m b/ex2/ex2/sigmoid.m new file mode 100644 index 0000000..5b980dc --- /dev/null +++ b/ex2/ex2/sigmoid.m @@ -0,0 +1,18 @@ +function g = sigmoid(z) +%SIGMOID Compute sigmoid functoon +% J = SIGMOID(z) computes the sigmoid of z. + +% You need to return the following variables correctly +g = zeros(size(z)); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the sigmoid of each value of z (z can be a matrix, +% vector or scalar). +for i = 0:size(z) + g = 1.0 ./ (1.0 + exp(-z)); + + + +% ============================================================= + +end diff --git a/ex2/ex2/submit.m b/ex2/ex2/submit.m new file mode 100644 index 0000000..50a1f5d --- /dev/null +++ b/ex2/ex2/submit.m @@ -0,0 +1,574 @@ +function submit(partId, webSubmit) +%SUBMIT Submit your code and output to the ml-class servers +% SUBMIT() will connect to the ml-class server and submit your solution + + fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ... + homework_id()); + if ~exist('partId', 'var') || isempty(partId) + partId = promptPart(); + end + + if ~exist('webSubmit', 'var') || isempty(webSubmit) + webSubmit = 0; % submit directly by default + end + + % Check valid partId + partNames = validParts(); + if ~isValidPartId(partId) + fprintf('!! Invalid homework part selected.\n'); + fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1); + fprintf('!! Submission Cancelled\n'); + return + end + + if ~exist('ml_login_data.mat','file') + [login password] = loginPrompt(); + save('ml_login_data.mat','login','password'); + else + load('ml_login_data.mat'); + [login password] = quickLogin(login, password); + save('ml_login_data.mat','login','password'); + end + + if isempty(login) + fprintf('!! Submission Cancelled\n'); + return + end + + fprintf('\n== Connecting to ml-class ... '); + if exist('OCTAVE_VERSION') + fflush(stdout); + end + + % Setup submit list + if partId == numel(partNames) + 1 + submitParts = 1:numel(partNames); + else + submitParts = [partId]; + end + + for s = 1:numel(submitParts) + thisPartId = submitParts(s); + if (~webSubmit) % submit directly to server + [login, ch, signature, auxstring] = getChallenge(login, thisPartId); + if isempty(login) || isempty(ch) || isempty(signature) + % Some error occured, error string in first return element. + fprintf('\n!! Error: %s\n\n', login); + return + end + + % Attempt Submission with Challenge + ch_resp = challengeResponse(login, password, ch); + + [result, str] = submitSolution(login, ch_resp, thisPartId, ... + output(thisPartId, auxstring), source(thisPartId), signature); + + partName = partNames{thisPartId}; + + fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ... + homework_id(), thisPartId, partName); + fprintf('== %s\n', strtrim(str)); + + if exist('OCTAVE_VERSION') + fflush(stdout); + end + else + [result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ... + source(thisPartId)); + result = base64encode(result); + + fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ... + homework_id(), thisPartId); + saveAsFile = input('', 's'); + if (isempty(saveAsFile)) + saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId); + end + + fid = fopen(saveAsFile, 'w'); + if (fid) + fwrite(fid, result); + fclose(fid); + fprintf('\nSaved your solutions to %s.\n\n', saveAsFile); + fprintf(['You can now submit your solutions through the web \n' ... + 'form in the programming exercises. Select the corresponding \n' ... + 'programming exercise to access the form.\n']); + + else + fprintf('Unable to save to %s\n\n', saveAsFile); + fprintf(['You can create a submission file by saving the \n' ... + 'following text in a file: (press enter to continue)\n\n']); + pause; + fprintf(result); + end + end + end +end + +% ================== CONFIGURABLES FOR EACH HOMEWORK ================== + +function id = homework_id() + id = '2'; +end + +function [partNames] = validParts() + partNames = { 'Sigmoid Function ', ... + 'Logistic Regression Cost', ... + 'Logistic Regression Gradient', ... + 'Predict', ... + 'Regularized Logistic Regression Cost' ... + 'Regularized Logistic Regression Gradient' ... + }; +end + +function srcs = sources() + % Separated by part + srcs = { { 'sigmoid.m' }, ... + { 'costFunction.m' }, ... + { 'costFunction.m' }, ... + { 'predict.m' }, ... + { 'costFunctionReg.m' }, ... + { 'costFunctionReg.m' } }; +end + +function out = output(partId, auxstring) + % Random Test Cases + X = [ones(20,1) (exp(1) * sin(1:1:20))' (exp(0.5) * cos(1:1:20))']; + y = sin(X(:,1) + X(:,2)) > 0; + if partId == 1 + out = sprintf('%0.5f ', sigmoid(X)); + elseif partId == 2 + out = sprintf('%0.5f ', costFunction([0.25 0.5 -0.5]', X, y)); + elseif partId == 3 + [cost, grad] = costFunction([0.25 0.5 -0.5]', X, y); + out = sprintf('%0.5f ', grad); + elseif partId == 4 + out = sprintf('%0.5f ', predict([0.25 0.5 -0.5]', X)); + elseif partId == 5 + out = sprintf('%0.5f ', costFunctionReg([0.25 0.5 -0.5]', X, y, 0.1)); + elseif partId == 6 + [cost, grad] = costFunctionReg([0.25 0.5 -0.5]', X, y, 0.1); + out = sprintf('%0.5f ', grad); + end +end + + +% ====================== SERVER CONFIGURATION =========================== + +% ***************** REMOVE -staging WHEN YOU DEPLOY ********************* +function url = site_url() + url = 'http://class.coursera.org/ml-2012-002'; +end + +function url = challenge_url() + url = [site_url() '/assignment/challenge']; +end + +function url = submit_url() + url = [site_url() '/assignment/submit']; +end + +% ========================= CHALLENGE HELPERS ========================= + +function src = source(partId) + src = ''; + src_files = sources(); + if partId <= numel(src_files) + flist = src_files{partId}; + for i = 1:numel(flist) + fid = fopen(flist{i}); + if (fid == -1) + error('Error opening %s (is it missing?)', flist{i}); + end + line = fgets(fid); + while ischar(line) + src = [src line]; + line = fgets(fid); + end + fclose(fid); + src = [src '||||||||']; + end + end +end + +function ret = isValidPartId(partId) + partNames = validParts(); + ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1); +end + +function partId = promptPart() + fprintf('== Select which part(s) to submit:\n'); + partNames = validParts(); + srcFiles = sources(); + for i = 1:numel(partNames) + fprintf('== %d) %s [', i, partNames{i}); + fprintf(' %s ', srcFiles{i}{:}); + fprintf(']\n'); + end + fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ... + numel(partNames) + 1, numel(partNames) + 1); + selPart = input('', 's'); + partId = str2num(selPart); + if ~isValidPartId(partId) + partId = -1; + end +end + +function [email,ch,signature,auxstring] = getChallenge(email, part) + str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'}); + + str = strtrim(str); + r = struct; + while(numel(str) > 0) + [f, str] = strtok (str, '|'); + [v, str] = strtok (str, '|'); + r = setfield(r, f, v); + end + + email = getfield(r, 'email_address'); + ch = getfield(r, 'challenge_key'); + signature = getfield(r, 'state'); + auxstring = getfield(r, 'challenge_aux_data'); +end + +function [result, str] = submitSolutionWeb(email, part, output, source) + + result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ... + '"email_address":"' base64encode(email, '') '",' ... + '"submission":"' base64encode(output, '') '",' ... + '"submission_aux":"' base64encode(source, '') '"' ... + '}']; + str = 'Web-submission'; +end + +function [result, str] = submitSolution(email, ch_resp, part, output, ... + source, signature) + + params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ... + 'email_address', email, ... + 'submission', base64encode(output, ''), ... + 'submission_aux', base64encode(source, ''), ... + 'challenge_response', ch_resp, ... + 'state', signature}; + + str = urlread(submit_url(), 'post', params); + + % Parse str to read for success / failure + result = 0; + +end + +% =========================== LOGIN HELPERS =========================== + +function [login password] = loginPrompt() + % Prompt for password + [login password] = basicPrompt(); + + if isempty(login) || isempty(password) + login = []; password = []; + end +end + + +function [login password] = basicPrompt() + login = input('Login (Email address): ', 's'); + password = input('Password: ', 's'); +end + +function [login password] = quickLogin(login,password) + disp(['You are currently logged in as ' login '.']); + cont_token = input('Is this you? (y/n - type n to reenter password)','s'); + if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y') + return; + else + [login password] = loginPrompt(); + end +end + +function [str] = challengeResponse(email, passwd, challenge) + str = sha1([challenge passwd]); +end + +% =============================== SHA-1 ================================ + +function hash = sha1(str) + + % Initialize variables + h0 = uint32(1732584193); + h1 = uint32(4023233417); + h2 = uint32(2562383102); + h3 = uint32(271733878); + h4 = uint32(3285377520); + + % Convert to word array + strlen = numel(str); + + % Break string into chars and append the bit 1 to the message + mC = [double(str) 128]; + mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')]; + + numB = strlen * 8; + if exist('idivide') + numC = idivide(uint32(numB + 65), 512, 'ceil'); + else + numC = ceil(double(numB + 65)/512); + end + numW = numC * 16; + mW = zeros(numW, 1, 'uint32'); + + idx = 1; + for i = 1:4:strlen + 1 + mW(idx) = bitor(bitor(bitor( ... + bitshift(uint32(mC(i)), 24), ... + bitshift(uint32(mC(i+1)), 16)), ... + bitshift(uint32(mC(i+2)), 8)), ... + uint32(mC(i+3))); + idx = idx + 1; + end + + % Append length of message + mW(numW - 1) = uint32(bitshift(uint64(numB), -32)); + mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32)); + + % Process the message in successive 512-bit chs + for cId = 1 : double(numC) + cSt = (cId - 1) * 16 + 1; + cEnd = cId * 16; + ch = mW(cSt : cEnd); + + % Extend the sixteen 32-bit words into eighty 32-bit words + for j = 17 : 80 + ch(j) = ch(j - 3); + ch(j) = bitxor(ch(j), ch(j - 8)); + ch(j) = bitxor(ch(j), ch(j - 14)); + ch(j) = bitxor(ch(j), ch(j - 16)); + ch(j) = bitrotate(ch(j), 1); + end + + % Initialize hash value for this ch + a = h0; + b = h1; + c = h2; + d = h3; + e = h4; + + % Main loop + for i = 1 : 80 + if(i >= 1 && i <= 20) + f = bitor(bitand(b, c), bitand(bitcmp(b), d)); + k = uint32(1518500249); + elseif(i >= 21 && i <= 40) + f = bitxor(bitxor(b, c), d); + k = uint32(1859775393); + elseif(i >= 41 && i <= 60) + f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d)); + k = uint32(2400959708); + elseif(i >= 61 && i <= 80) + f = bitxor(bitxor(b, c), d); + k = uint32(3395469782); + end + + t = bitrotate(a, 5); + t = bitadd(t, f); + t = bitadd(t, e); + t = bitadd(t, k); + t = bitadd(t, ch(i)); + e = d; + d = c; + c = bitrotate(b, 30); + b = a; + a = t; + + end + h0 = bitadd(h0, a); + h1 = bitadd(h1, b); + h2 = bitadd(h2, c); + h3 = bitadd(h3, d); + h4 = bitadd(h4, e); + + end + + hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]); + + hash = lower(hash); + +end + +function ret = bitadd(iA, iB) + ret = double(iA) + double(iB); + ret = bitset(ret, 33, 0); + ret = uint32(ret); +end + +function ret = bitrotate(iA, places) + t = bitshift(iA, places - 32); + ret = bitshift(iA, places); + ret = bitor(ret, t); +end + +% =========================== Base64 Encoder ============================ +% Thanks to Peter John Acklam +% + +function y = base64encode(x, eol) +%BASE64ENCODE Perform base64 encoding on a string. +% +% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending +% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10). +% The returned encoded string is broken into lines of no more than 76 +% characters each, and each line will end with EOL unless it is empty. Let +% EOL be empty if you do not want the encoded string broken into lines. +% +% STR and EOL don't have to be strings (i.e., char arrays). The only +% requirement is that they are vectors containing values in the range 0-255. +% +% This function may be used to encode strings into the Base64 encoding +% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The +% Base64 encoding is designed to represent arbitrary sequences of octets in a +% form that need not be humanly readable. A 65-character subset +% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per +% printable character. +% +% Examples +% -------- +% +% If you want to encode a large file, you should encode it in chunks that are +% a multiple of 57 bytes. This ensures that the base64 lines line up and +% that you do not end up with padding in the middle. 57 bytes of data fills +% one complete base64 line (76 == 57*4/3): +% +% If ifid and ofid are two file identifiers opened for reading and writing, +% respectively, then you can base64 encode the data with +% +% while ~feof(ifid) +% fwrite(ofid, base64encode(fread(ifid, 60*57))); +% end +% +% or, if you have enough memory, +% +% fwrite(ofid, base64encode(fread(ifid))); +% +% See also BASE64DECODE. + +% Author: Peter John Acklam +% Time-stamp: 2004-02-03 21:36:56 +0100 +% E-mail: pjacklam@online.no +% URL: http://home.online.no/~pjacklam + + if isnumeric(x) + x = num2str(x); + end + + % make sure we have the EOL value + if nargin < 2 + eol = sprintf('\n'); + else + if sum(size(eol) > 1) > 1 + error('EOL must be a vector.'); + end + if any(eol(:) > 255) + error('EOL can not contain values larger than 255.'); + end + end + + if sum(size(x) > 1) > 1 + error('STR must be a vector.'); + end + + x = uint8(x); + eol = uint8(eol); + + ndbytes = length(x); % number of decoded bytes + nchunks = ceil(ndbytes / 3); % number of chunks/groups + nebytes = 4 * nchunks; % number of encoded bytes + + % add padding if necessary, to make the length of x a multiple of 3 + if rem(ndbytes, 3) + x(end+1 : 3*nchunks) = 0; + end + + x = reshape(x, [3, nchunks]); % reshape the data + y = repmat(uint8(0), 4, nchunks); % for the encoded data + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Split up every 3 bytes into 4 pieces + % + % aaaaaabb bbbbcccc ccdddddd + % + % to form + % + % 00aaaaaa 00bbbbbb 00cccccc 00dddddd + % + y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:) + + y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:) + y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:) + + y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:) + y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:) + + y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:) + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Now perform the following mapping + % + % 0 - 25 -> A-Z + % 26 - 51 -> a-z + % 52 - 61 -> 0-9 + % 62 -> + + % 63 -> / + % + % We could use a mapping vector like + % + % ['A':'Z', 'a':'z', '0':'9', '+/'] + % + % but that would require an index vector of class double. + % + z = repmat(uint8(0), size(y)); + i = y <= 25; z(i) = 'A' + double(y(i)); + i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i)); + i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i)); + i = y == 62; z(i) = '+'; + i = y == 63; z(i) = '/'; + y = z; + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Add padding if necessary. + % + npbytes = 3 * nchunks - ndbytes; % number of padding bytes + if npbytes + y(end-npbytes+1 : end) = '='; % '=' is used for padding + end + + if isempty(eol) + + % reshape to a row vector + y = reshape(y, [1, nebytes]); + + else + + nlines = ceil(nebytes / 76); % number of lines + neolbytes = length(eol); % number of bytes in eol string + + % pad data so it becomes a multiple of 76 elements + y = [y(:) ; zeros(76 * nlines - numel(y), 1)]; + y(nebytes + 1 : 76 * nlines) = 0; + y = reshape(y, 76, nlines); + + % insert eol strings + eol = eol(:); + y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines)); + + % remove padding, but keep the last eol string + m = nebytes + neolbytes * (nlines - 1); + n = (76+neolbytes)*nlines - neolbytes; + y(m+1 : n) = ''; + + % extract and reshape to row vector + y = reshape(y, 1, m+neolbytes); + + end + + % output is a character array + y = char(y); + +end diff --git a/ex2/ex2/submitWeb.m b/ex2/ex2/submitWeb.m new file mode 100644 index 0000000..e429365 --- /dev/null +++ b/ex2/ex2/submitWeb.m @@ -0,0 +1,20 @@ +% submitWeb Creates files from your code and output for web submission. +% +% If the submit function does not work for you, use the web-submission mechanism. +% Call this function to produce a file for the part you wish to submit. Then, +% submit the file to the class servers using the "Web Submission" button on the +% Programming Exercises page on the course website. +% +% You should call this function without arguments (submitWeb), to receive +% an interactive prompt for submission; optionally you can call it with the partID +% if you so wish. Make sure your working directory is set to the directory +% containing the submitWeb.m file and your assignment files. + +function submitWeb(partId) + if ~exist('partId', 'var') || isempty(partId) + partId = []; + end + + submit(partId, 1); +end + diff --git a/ex3/.DS_Store b/ex3/.DS_Store new file mode 100644 index 0000000..a2e9fb7 Binary files /dev/null and b/ex3/.DS_Store differ diff --git a/ex3/ex3.pdf b/ex3/ex3.pdf new file mode 100644 index 0000000..709e868 Binary files /dev/null and b/ex3/ex3.pdf differ diff --git a/ex3/ex3/.DS_Store b/ex3/ex3/.DS_Store new file mode 100644 index 0000000..b94302a Binary files /dev/null and b/ex3/ex3/.DS_Store differ diff --git a/ex3/ex3/displayData.m b/ex3/ex3/displayData.m new file mode 100644 index 0000000..160697e --- /dev/null +++ b/ex3/ex3/displayData.m @@ -0,0 +1,59 @@ +function [h, display_array] = displayData(X, example_width) +%DISPLAYDATA Display 2D data in a nice grid +% [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data +% stored in X in a nice grid. It returns the figure handle h and the +% displayed array if requested. + +% Set example_width automatically if not passed in +if ~exist('example_width', 'var') || isempty(example_width) + example_width = round(sqrt(size(X, 2))); +end + +% Gray Image +colormap(gray); + +% Compute rows, cols +[m n] = size(X); +example_height = (n / example_width); + +% Compute number of items to display +display_rows = floor(sqrt(m)); +display_cols = ceil(m / display_rows); + +% Between images padding +pad = 1; + +% Setup blank display +display_array = - ones(pad + display_rows * (example_height + pad), ... + pad + display_cols * (example_width + pad)); + +% Copy each example into a patch on the display array +curr_ex = 1; +for j = 1:display_rows + for i = 1:display_cols + if curr_ex > m, + break; + end + % Copy the patch + + % Get the max value of the patch + max_val = max(abs(X(curr_ex, :))); + display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... + pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... + reshape(X(curr_ex, :), example_height, example_width) / max_val; + curr_ex = curr_ex + 1; + end + if curr_ex > m, + break; + end +end + +% Display Image +h = imagesc(display_array, [-1 1]); + +% Do not show axis +axis image off + +drawnow; + +end diff --git a/ex3/ex3/ex3.m b/ex3/ex3/ex3.m new file mode 100644 index 0000000..da858ca --- /dev/null +++ b/ex3/ex3/ex3.m @@ -0,0 +1,69 @@ +%% Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all + +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% linear exercise. You will need to complete the following functions +% in this exericse: +% +% lrCostFunction.m (logistic regression cost function) +% oneVsAll.m +% predictOneVsAll.m +% predict.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% Setup the parameters you will use for this part of the exercise +input_layer_size = 400; % 20x20 Input Images of Digits +num_labels = 10; % 10 labels, from 1 to 10 + % (note that we have mapped "0" to label 10) + +%% =========== Part 1: Loading and Visualizing Data ============= +% We start the exercise by first loading and visualizing the dataset. +% You will be working with a dataset that contains handwritten digits. +% + +% Load Training Data +fprintf('Loading and Visualizing Data ...\n') + +load('ex3data1.mat'); % training data stored in arrays X, y +m = size(X, 1); + +% Randomly select 100 data points to display +rand_indices = randperm(m); +sel = X(rand_indices(1:100), :); + +displayData(sel); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ============ Part 2: Vectorize Logistic Regression ============ +% In this part of the exercise, you will reuse your logistic regression +% code from the last exercise. You task here is to make sure that your +% regularized logistic regression implementation is vectorized. After +% that, you will implement one-vs-all classification for the handwritten +% digit dataset. +% + +fprintf('\nTraining One-vs-All Logistic Regression...\n') + +lambda = 0.1; +[all_theta] = oneVsAll(X, y, num_labels, lambda); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% ================ Part 3: Predict for One-Vs-All ================ +% After ... +pred = predictOneVsAll(all_theta, X); + +fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); + diff --git a/ex3/ex3/ex3_nn.m b/ex3/ex3/ex3_nn.m new file mode 100644 index 0000000..073bc4d --- /dev/null +++ b/ex3/ex3/ex3_nn.m @@ -0,0 +1,88 @@ +%% Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks + +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% linear exercise. You will need to complete the following functions +% in this exericse: +% +% lrCostFunction.m (logistic regression cost function) +% oneVsAll.m +% predictOneVsAll.m +% predict.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% Setup the parameters you will use for this exercise +input_layer_size = 400; % 20x20 Input Images of Digits +hidden_layer_size = 25; % 25 hidden units +num_labels = 10; % 10 labels, from 1 to 10 + % (note that we have mapped "0" to label 10) + +%% =========== Part 1: Loading and Visualizing Data ============= +% We start the exercise by first loading and visualizing the dataset. +% You will be working with a dataset that contains handwritten digits. +% + +% Load Training Data +fprintf('Loading and Visualizing Data ...\n') + +load('ex3data1.mat'); +m = size(X, 1); + +% Randomly select 100 data points to display +sel = randperm(size(X, 1)); +sel = sel(1:100); + +displayData(X(sel, :)); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ================ Part 2: Loading Pameters ================ +% In this part of the exercise, we load some pre-initialized +% neural network parameters. + +fprintf('\nLoading Saved Neural Network Parameters ...\n') + +% Load the weights into variables Theta1 and Theta2 +load('ex3weights.mat'); + +%% ================= Part 3: Implement Predict ================= +% After training the neural network, we would like to use it to predict +% the labels. You will now implement the "predict" function to use the +% neural network to predict the labels of the training set. This lets +% you compute the training set accuracy. + +pred = predict(Theta1, Theta2, X); + +fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +% To give you an idea of the network's output, you can also run +% through the examples one at the a time to see what it is predicting. + +% Randomly permute examples +rp = randperm(m); + +for i = 1:m + % Display + fprintf('\nDisplaying Example Image\n'); + displayData(X(rp(i), :)); + + pred = predict(Theta1, Theta2, X(rp(i),:)); + fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10)); + + % Pause + fprintf('Program paused. Press enter to continue.\n'); + pause; +end + diff --git a/ex3/ex3/ex3data1.mat b/ex3/ex3/ex3data1.mat new file mode 100644 index 0000000..371bd0c Binary files /dev/null and b/ex3/ex3/ex3data1.mat differ diff --git a/ex3/ex3/ex3weights.mat b/ex3/ex3/ex3weights.mat new file mode 100644 index 0000000..ace2a09 Binary files /dev/null and b/ex3/ex3/ex3weights.mat differ diff --git a/ex3/ex3/fmincg.m b/ex3/ex3/fmincg.m new file mode 100644 index 0000000..34bf539 --- /dev/null +++ b/ex3/ex3/fmincg.m @@ -0,0 +1,175 @@ +function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) +% Minimize a continuous differentialble multivariate function. Starting point +% is given by "X" (D by 1), and the function named in the string "f", must +% return a function value and a vector of partial derivatives. The Polack- +% Ribiere flavour of conjugate gradients is used to compute search directions, +% and a line search using quadratic and cubic polynomial approximations and the +% Wolfe-Powell stopping criteria is used together with the slope ratio method +% for guessing initial step sizes. Additionally a bunch of checks are made to +% make sure that exploration is taking place and that extrapolation will not +% be unboundedly large. The "length" gives the length of the run: if it is +% positive, it gives the maximum number of line searches, if negative its +% absolute gives the maximum allowed number of function evaluations. You can +% (optionally) give "length" a second component, which will indicate the +% reduction in function value to be expected in the first line-search (defaults +% to 1.0). The function returns when either its length is up, or if no further +% progress can be made (ie, we are at a minimum, or so close that due to +% numerical problems, we cannot get any closer). If the function terminates +% within a few iterations, it could be an indication that the function value +% and derivatives are not consistent (ie, there may be a bug in the +% implementation of your "f" function). The function returns the found +% solution "X", a vector of function values "fX" indicating the progress made +% and "i" the number of iterations (line searches or function evaluations, +% depending on the sign of "length") used. +% +% Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) +% +% See also: checkgrad +% +% Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13 +% +% +% (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen +% +% Permission is granted for anyone to copy, use, or modify these +% programs and accompanying documents for purposes of research or +% education, provided this copyright notice is retained, and note is +% made of any changes that have been made. +% +% These programs and documents are distributed without any warranty, +% express or implied. As the programs were written for research +% purposes only, they have not been tested to the degree that would be +% advisable in any important application. All use of these programs is +% entirely at the user's own risk. +% +% [ml-class] Changes Made: +% 1) Function name and argument specifications +% 2) Output display +% + +% Read options +if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter') + length = options.MaxIter; +else + length = 100; +end + + +RHO = 0.01; % a bunch of constants for line searches +SIG = 0.5; % RHO and SIG are the constants in the Wolfe-Powell conditions +INT = 0.1; % don't reevaluate within 0.1 of the limit of the current bracket +EXT = 3.0; % extrapolate maximum 3 times the current bracket +MAX = 20; % max 20 function evaluations per line search +RATIO = 100; % maximum allowed slope ratio + +argstr = ['feval(f, X']; % compose string used to call function +for i = 1:(nargin - 3) + argstr = [argstr, ',P', int2str(i)]; +end +argstr = [argstr, ')']; + +if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end +S=['Iteration ']; + +i = 0; % zero the run length counter +ls_failed = 0; % no previous line search has failed +fX = []; +[f1 df1] = eval(argstr); % get function value and gradient +i = i + (length<0); % count epochs?! +s = -df1; % search direction is steepest +d1 = -s'*s; % this is the slope +z1 = red/(1-d1); % initial step is red/(|s|+1) + +while i < abs(length) % while not finished + i = i + (length>0); % count iterations?! + + X0 = X; f0 = f1; df0 = df1; % make a copy of current values + X = X + z1*s; % begin line search + [f2 df2] = eval(argstr); + i = i + (length<0); % count epochs?! + d2 = df2'*s; + f3 = f1; d3 = d1; z3 = -z1; % initialize point 3 equal to point 1 + if length>0, M = MAX; else M = min(MAX, -length-i); end + success = 0; limit = -1; % initialize quanteties + while 1 + while ((f2 > f1+z1*RHO*d1) | (d2 > -SIG*d1)) & (M > 0) + limit = z1; % tighten the bracket + if f2 > f1 + z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); % quadratic fit + else + A = 6*(f2-f3)/z3+3*(d2+d3); % cubic fit + B = 3*(f3-f2)-z3*(d3+2*d2); + z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; % numerical error possible - ok! + end + if isnan(z2) | isinf(z2) + z2 = z3/2; % if we had a numerical problem then bisect + end + z2 = max(min(z2, INT*z3),(1-INT)*z3); % don't accept too close to limits + z1 = z1 + z2; % update the step + X = X + z2*s; + [f2 df2] = eval(argstr); + M = M - 1; i = i + (length<0); % count epochs?! + d2 = df2'*s; + z3 = z3-z2; % z3 is now relative to the location of z2 + end + if f2 > f1+z1*RHO*d1 | d2 > -SIG*d1 + break; % this is a failure + elseif d2 > SIG*d1 + success = 1; break; % success + elseif M == 0 + break; % failure + end + A = 6*(f2-f3)/z3+3*(d2+d3); % make cubic extrapolation + B = 3*(f3-f2)-z3*(d3+2*d2); + z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); % num. error possible - ok! + if ~isreal(z2) | isnan(z2) | isinf(z2) | z2 < 0 % num prob or wrong sign? + if limit < -0.5 % if we have no upper limit + z2 = z1 * (EXT-1); % the extrapolate the maximum amount + else + z2 = (limit-z1)/2; % otherwise bisect + end + elseif (limit > -0.5) & (z2+z1 > limit) % extraplation beyond max? + z2 = (limit-z1)/2; % bisect + elseif (limit < -0.5) & (z2+z1 > z1*EXT) % extrapolation beyond limit + z2 = z1*(EXT-1.0); % set to extrapolation limit + elseif z2 < -z3*INT + z2 = -z3*INT; + elseif (limit > -0.5) & (z2 < (limit-z1)*(1.0-INT)) % too close to limit? + z2 = (limit-z1)*(1.0-INT); + end + f3 = f2; d3 = d2; z3 = -z2; % set point 3 equal to point 2 + z1 = z1 + z2; X = X + z2*s; % update current estimates + [f2 df2] = eval(argstr); + M = M - 1; i = i + (length<0); % count epochs?! + d2 = df2'*s; + end % end of line search + + if success % if line search succeeded + f1 = f2; fX = [fX' f1]'; + fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); + s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; % Polack-Ribiere direction + tmp = df1; df1 = df2; df2 = tmp; % swap derivatives + d2 = df1'*s; + if d2 > 0 % new slope must be negative + s = -df1; % otherwise use steepest direction + d2 = -s'*s; + end + z1 = z1 * min(RATIO, d1/(d2-realmin)); % slope ratio but max RATIO + d1 = d2; + ls_failed = 0; % this line search did not fail + else + X = X0; f1 = f0; df1 = df0; % restore point from before failed line search + if ls_failed | i > abs(length) % line search failed twice in a row + break; % or we ran out of time, so we give up + end + tmp = df1; df1 = df2; df2 = tmp; % swap derivatives + s = -df1; % try steepest + d1 = -s'*s; + z1 = 1/(1-d1); + ls_failed = 1; % this line search failed + end + if exist('OCTAVE_VERSION') + fflush(stdout); + end +end +fprintf('\n'); diff --git a/ex3/ex3/lrCostFunction.m b/ex3/ex3/lrCostFunction.m new file mode 100644 index 0000000..a2586f6 --- /dev/null +++ b/ex3/ex3/lrCostFunction.m @@ -0,0 +1,57 @@ +function [J, grad] = lrCostFunction(theta, X, y, lambda) +%LRCOSTFUNCTION Compute cost and gradient for logistic regression with +%regularization +% J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using +% theta as the parameter for regularized logistic regression and the +% gradient of the cost w.r.t. to the parameters. + +% Initialize some useful values +m = length(y); % number of training examples + +% You need to return the following variables correctly +J = 0; +grad = zeros(size(theta)); + + + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the cost of a particular choice of theta. +% You should set J to the cost. +% Compute the partial derivatives and set grad to the partial +% derivatives of the cost w.r.t. each parameter in theta +% +% Hint: The computation of the cost function and gradients can be +% efficiently vectorized. For example, consider the computation +% +% sigmoid(X * theta) +% +% Each row of the resulting matrix will contain the value of the +% prediction for that example. You can make use of this to vectorize +% the cost function and gradient computations. +% +% Hint: When computing the gradient of the regularized cost function, +% there're many possible vectorized solutions, but one solution +% looks like: +% grad = (unregularized gradient for logistic regression) +% temp = theta; +% temp(1) = 0; % because we don't add anything for j = 0 +% grad = grad + YOUR_CODE_HERE (using the temp variable) +% + +J = 1./m * ( -y' * log( sigmoid(X * theta) ) - ( 1 - y' ) * log ( 1 - sigmoid( X * theta)) ) +lambda / 2. / m * ( theta' * theta - theta(1)^2 ); +t = ones(size(theta)); +t(1) = 0; +grad = 1./m * X' * (sigmoid(X * theta) - y) + lambda * (theta .* t)/ m; + + + + + + + + +% ============================================================= + +grad = grad(:); + +end diff --git a/ex3/ex3/ml_login_data.mat b/ex3/ex3/ml_login_data.mat new file mode 100644 index 0000000..ca647fc --- /dev/null +++ b/ex3/ex3/ml_login_data.mat @@ -0,0 +1,15 @@ +# Created by Octave 3.4.0, Mon Sep 17 01:06:51 2012 CDT +# name: login +# type: sq_string +# elements: 1 +# length: 21 +zhang349@illinois.edu + + +# name: password +# type: sq_string +# elements: 1 +# length: 10 +3pBDppG8Gd + + diff --git a/ex3/ex3/octave-core b/ex3/ex3/octave-core new file mode 100644 index 0000000..e69de29 diff --git a/ex3/ex3/oneVsAll.m b/ex3/ex3/oneVsAll.m new file mode 100644 index 0000000..ec62d65 --- /dev/null +++ b/ex3/ex3/oneVsAll.m @@ -0,0 +1,65 @@ +function [all_theta] = oneVsAll(X, y, num_labels, lambda) +%ONEVSALL trains multiple logistic regression classifiers and returns all +%the classifiers in a matrix all_theta, where the i-th row of all_theta +%corresponds to the classifier for label i +% [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels +% logisitc regression classifiers and returns each of these classifiers +% in a matrix all_theta, where the i-th row of all_theta corresponds +% to the classifier for label i + +% Some useful variables +m = size(X, 1); +n = size(X, 2); + +% You need to return the following variables correctly +all_theta = zeros(num_labels, n + 1); + +% Add ones to the X data matrix +X = [ones(m, 1) X]; + +% ====================== YOUR CODE HERE ====================== +% Instructions: You should complete the following code to train num_labels +% logistic regression classifiers with regularization +% parameter lambda. +% +% Hint: theta(:) will return a column vector. +% +% Hint: You can use y == c to obtain a vector of 1's and 0's that tell use +% whether the ground truth is true/false for this class. +% +% Note: For this assignment, we recommend using fmincg to optimize the cost +% function. It is okay to use a for-loop (for c = 1:num_labels) to +% loop over the different classes. +% +% fmincg works similarly to fminunc, but is more efficient when we +% are dealing with large number of parameters. +% +% Example Code for fmincg: + + % Set Initial theta + initial_theta = zeros(n + 1, 1); + + % Set options for fminunc + options = optimset('GradObj', 'on', 'MaxIter', 50); + + for c = 1: num_labels + % Run fmincg to obtain the optimal theta + % This function will return theta and the cost + all_theta(c,:) = fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), initial_theta, options); + + + + + + + + + + + + + +% ========================================================================= + + +end diff --git a/ex3/ex3/predict.m b/ex3/ex3/predict.m new file mode 100644 index 0000000..1accb65 --- /dev/null +++ b/ex3/ex3/predict.m @@ -0,0 +1,48 @@ +function p = predict(Theta1, Theta2, X) +%PREDICT Predict the label of an input given a trained neural network +% p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the +% trained weights of a neural network (Theta1, Theta2) + +% Useful values +m = size(X, 1); % 5000 +num_labels = size(Theta2, 1); % 10 + +% You need to return the following variables correctly +p = zeros(size(X, 1), 1); % 5000*1 + +% ====================== YOUR CODE HERE ====================== +% Instructions: Complete the following code to make predictions using +% your learned neural network. You should set p to a +% vector containing labels between 1 to num_labels. +% +% Hint: The max function might come in useful. In particular, the max +% function can also return the index of the max element, for more +% information see 'help max'. If your examples are in rows, then, you +% can use max(A, [], 2) to obtain the max for each row. +% + +X = [ones(m, 1) X]; % 5000 * 401 +% Theta1 = 25 * 401 +% Theta1 = 401* 25 +% Theta2 = 10 * 26 + +z2 = X * Theta1'; % 5000 * 25 +a2 = sigmoid(z2); + +a2 = [ones(m,1) a2]; % 5000 * 26 + +z3 = a2 * Theta2'; % 5000 * 10 +a3 = sigmoid(z3); + +[prediction, p] = max(sigmoid(a3), [], 2); + + + + + + + +% ========================================================================= + + +end diff --git a/ex3/ex3/predictOneVsAll.m b/ex3/ex3/predictOneVsAll.m new file mode 100644 index 0000000..ffba80c --- /dev/null +++ b/ex3/ex3/predictOneVsAll.m @@ -0,0 +1,54 @@ +function p = predictOneVsAll(all_theta, X) +%PREDICT Predict the label for a trained one-vs-all classifier. The labels +%are in the range 1..K, where K = size(all_theta, 1). +% p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions +% for each example in the matrix X. Note that X contains the examples in +% rows. all_theta is a matrix where the i-th row is a trained logistic +% regression theta vector for the i-th class. You should set p to a vector +% of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2 +% for 4 examples) + +m = size(X, 1); +num_labels = size(all_theta, 1); + +% You need to return the following variables correctly +p = zeros(size(X, 1), 1); + +% Add ones to the X data matrix +X = [ones(m, 1) X]; + +% ====================== YOUR CODE HERE ====================== +% Instructions: Complete the following code to make predictions using +% your learned logistic regression parameters (one-vs-all). +% You should set p to a vector of predictions (from 1 to +% num_labels). +% +% Hint: This code can be done all vectorized using the max function. +% In particular, the max function can also return the index of the +% max element, for more information see 'help max'. If your examples +% are in rows, then, you can use max(A, [], 2) to obtain the max +% for each row. +% +#[value, p] = max((X * all_theta'),[], 2); +for i = 1:m + [prediction, p(i)] = max(X(i,:) * all_theta'); +end; + +% m = 5000 +% num_labels = 10 +% p = 5000 * 1 +% X = 5000 * 401 +% all_theta = 10 * 401 +% all_theta' = 401 * 10 +% X (1* 401) * (401 * 10) => 1 * 10 +% 5000 iterations + + + + + + +% ========================================================================= + + +end diff --git a/ex3/ex3/sigmoid.m b/ex3/ex3/sigmoid.m new file mode 100644 index 0000000..6deca13 --- /dev/null +++ b/ex3/ex3/sigmoid.m @@ -0,0 +1,6 @@ +function g = sigmoid(z) +%SIGMOID Compute sigmoid functoon +% J = SIGMOID(z) computes the sigmoid of z. + +g = 1.0 ./ (1.0 + exp(-z)); +end diff --git a/ex3/ex3/submit.m b/ex3/ex3/submit.m new file mode 100644 index 0000000..18d7005 --- /dev/null +++ b/ex3/ex3/submit.m @@ -0,0 +1,574 @@ +function submit(partId, webSubmit) +%SUBMIT Submit your code and output to the ml-class servers +% SUBMIT() will connect to the ml-class server and submit your solution + + fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ... + homework_id()); + if ~exist('partId', 'var') || isempty(partId) + partId = promptPart(); + end + + if ~exist('webSubmit', 'var') || isempty(webSubmit) + webSubmit = 0; % submit directly by default + end + + % Check valid partId + partNames = validParts(); + if ~isValidPartId(partId) + fprintf('!! Invalid homework part selected.\n'); + fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1); + fprintf('!! Submission Cancelled\n'); + return + end + + if ~exist('ml_login_data.mat','file') + [login password] = loginPrompt(); + save('ml_login_data.mat','login','password'); + else + load('ml_login_data.mat'); + [login password] = quickLogin(login, password); + save('ml_login_data.mat','login','password'); + end + + if isempty(login) + fprintf('!! Submission Cancelled\n'); + return + end + + fprintf('\n== Connecting to ml-class ... '); + if exist('OCTAVE_VERSION') + fflush(stdout); + end + + % Setup submit list + if partId == numel(partNames) + 1 + submitParts = 1:numel(partNames); + else + submitParts = [partId]; + end + + for s = 1:numel(submitParts) + thisPartId = submitParts(s); + if (~webSubmit) % submit directly to server + [login, ch, signature, auxstring] = getChallenge(login, thisPartId); + if isempty(login) || isempty(ch) || isempty(signature) + % Some error occured, error string in first return element. + fprintf('\n!! Error: %s\n\n', login); + return + end + + % Attempt Submission with Challenge + ch_resp = challengeResponse(login, password, ch); + + [result, str] = submitSolution(login, ch_resp, thisPartId, ... + output(thisPartId, auxstring), source(thisPartId), signature); + + partName = partNames{thisPartId}; + + fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ... + homework_id(), thisPartId, partName); + fprintf('== %s\n', strtrim(str)); + + if exist('OCTAVE_VERSION') + fflush(stdout); + end + else + [result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ... + source(thisPartId)); + result = base64encode(result); + + fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ... + homework_id(), thisPartId); + saveAsFile = input('', 's'); + if (isempty(saveAsFile)) + saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId); + end + + fid = fopen(saveAsFile, 'w'); + if (fid) + fwrite(fid, result); + fclose(fid); + fprintf('\nSaved your solutions to %s.\n\n', saveAsFile); + fprintf(['You can now submit your solutions through the web \n' ... + 'form in the programming exercises. Select the corresponding \n' ... + 'programming exercise to access the form.\n']); + + else + fprintf('Unable to save to %s\n\n', saveAsFile); + fprintf(['You can create a submission file by saving the \n' ... + 'following text in a file: (press enter to continue)\n\n']); + pause; + fprintf(result); + end + end + end +end + +% ================== CONFIGURABLES FOR EACH HOMEWORK ================== + +function id = homework_id() + id = '3'; +end + +function [partNames] = validParts() + partNames = { 'Vectorized Logistic Regression ', ... + 'One-vs-all classifier training', ... + 'One-vs-all classifier prediction', ... + 'Neural network prediction function' ... + }; +end + +function srcs = sources() + % Separated by part + srcs = { { 'lrCostFunction.m' }, ... + { 'oneVsAll.m' }, ... + { 'predictOneVsAll.m' }, ... + { 'predict.m' } }; +end + +function out = output(partId, auxdata) + % Random Test Cases + X = [ones(20,1) (exp(1) * sin(1:1:20))' (exp(0.5) * cos(1:1:20))']; + y = sin(X(:,1) + X(:,2)) > 0; + Xm = [ -1 -1 ; -1 -2 ; -2 -1 ; -2 -2 ; ... + 1 1 ; 1 2 ; 2 1 ; 2 2 ; ... + -1 1 ; -1 2 ; -2 1 ; -2 2 ; ... + 1 -1 ; 1 -2 ; -2 -1 ; -2 -2 ]; + ym = [ 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 ]'; + t1 = sin(reshape(1:2:24, 4, 3)); + t2 = cos(reshape(1:2:40, 4, 5)); + + if partId == 1 + [J, grad] = lrCostFunction([0.25 0.5 -0.5]', X, y, 0.1); + out = sprintf('%0.5f ', J); + out = [out sprintf('%0.5f ', grad)]; + elseif partId == 2 + out = sprintf('%0.5f ', oneVsAll(Xm, ym, 4, 0.1)); + elseif partId == 3 + out = sprintf('%0.5f ', predictOneVsAll(t1, Xm)); + elseif partId == 4 + out = sprintf('%0.5f ', predict(t1, t2, Xm)); + end +end + + +% ====================== SERVER CONFIGURATION =========================== + +% ***************** REMOVE -staging WHEN YOU DEPLOY ********************* +function url = site_url() + url = 'http://class.coursera.org/ml-2012-002'; +end + +function url = challenge_url() + url = [site_url() '/assignment/challenge']; +end + +function url = submit_url() + url = [site_url() '/assignment/submit']; +end + +% ========================= CHALLENGE HELPERS ========================= + +function src = source(partId) + src = ''; + src_files = sources(); + if partId <= numel(src_files) + flist = src_files{partId}; + for i = 1:numel(flist) + fid = fopen(flist{i}); + if (fid == -1) + error('Error opening %s (is it missing?)', flist{i}); + end + line = fgets(fid); + while ischar(line) + src = [src line]; + line = fgets(fid); + end + fclose(fid); + src = [src '||||||||']; + end + end +end + +function ret = isValidPartId(partId) + partNames = validParts(); + ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1); +end + +function partId = promptPart() + fprintf('== Select which part(s) to submit:\n'); + partNames = validParts(); + srcFiles = sources(); + for i = 1:numel(partNames) + fprintf('== %d) %s [', i, partNames{i}); + fprintf(' %s ', srcFiles{i}{:}); + fprintf(']\n'); + end + fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ... + numel(partNames) + 1, numel(partNames) + 1); + selPart = input('', 's'); + partId = str2num(selPart); + if ~isValidPartId(partId) + partId = -1; + end +end + +function [email,ch,signature,auxstring] = getChallenge(email, part) + str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'}); + + str = strtrim(str); + r = struct; + while(numel(str) > 0) + [f, str] = strtok (str, '|'); + [v, str] = strtok (str, '|'); + r = setfield(r, f, v); + end + + email = getfield(r, 'email_address'); + ch = getfield(r, 'challenge_key'); + signature = getfield(r, 'state'); + auxstring = getfield(r, 'challenge_aux_data'); +end + +function [result, str] = submitSolutionWeb(email, part, output, source) + + result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ... + '"email_address":"' base64encode(email, '') '",' ... + '"submission":"' base64encode(output, '') '",' ... + '"submission_aux":"' base64encode(source, '') '"' ... + '}']; + str = 'Web-submission'; +end + +function [result, str] = submitSolution(email, ch_resp, part, output, ... + source, signature) + + params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ... + 'email_address', email, ... + 'submission', base64encode(output, ''), ... + 'submission_aux', base64encode(source, ''), ... + 'challenge_response', ch_resp, ... + 'state', signature}; + + str = urlread(submit_url(), 'post', params); + + % Parse str to read for success / failure + result = 0; + +end + +% =========================== LOGIN HELPERS =========================== + +function [login password] = loginPrompt() + % Prompt for password + [login password] = basicPrompt(); + + if isempty(login) || isempty(password) + login = []; password = []; + end +end + + +function [login password] = basicPrompt() + login = input('Login (Email address): ', 's'); + password = input('Password: ', 's'); +end + +function [login password] = quickLogin(login,password) + disp(['You are currently logged in as ' login '.']); + cont_token = input('Is this you? (y/n - type n to reenter password)','s'); + if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y') + return; + else + [login password] = loginPrompt(); + end +end + +function [str] = challengeResponse(email, passwd, challenge) + str = sha1([challenge passwd]); +end + +% =============================== SHA-1 ================================ + +function hash = sha1(str) + + % Initialize variables + h0 = uint32(1732584193); + h1 = uint32(4023233417); + h2 = uint32(2562383102); + h3 = uint32(271733878); + h4 = uint32(3285377520); + + % Convert to word array + strlen = numel(str); + + % Break string into chars and append the bit 1 to the message + mC = [double(str) 128]; + mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')]; + + numB = strlen * 8; + if exist('idivide') + numC = idivide(uint32(numB + 65), 512, 'ceil'); + else + numC = ceil(double(numB + 65)/512); + end + numW = numC * 16; + mW = zeros(numW, 1, 'uint32'); + + idx = 1; + for i = 1:4:strlen + 1 + mW(idx) = bitor(bitor(bitor( ... + bitshift(uint32(mC(i)), 24), ... + bitshift(uint32(mC(i+1)), 16)), ... + bitshift(uint32(mC(i+2)), 8)), ... + uint32(mC(i+3))); + idx = idx + 1; + end + + % Append length of message + mW(numW - 1) = uint32(bitshift(uint64(numB), -32)); + mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32)); + + % Process the message in successive 512-bit chs + for cId = 1 : double(numC) + cSt = (cId - 1) * 16 + 1; + cEnd = cId * 16; + ch = mW(cSt : cEnd); + + % Extend the sixteen 32-bit words into eighty 32-bit words + for j = 17 : 80 + ch(j) = ch(j - 3); + ch(j) = bitxor(ch(j), ch(j - 8)); + ch(j) = bitxor(ch(j), ch(j - 14)); + ch(j) = bitxor(ch(j), ch(j - 16)); + ch(j) = bitrotate(ch(j), 1); + end + + % Initialize hash value for this ch + a = h0; + b = h1; + c = h2; + d = h3; + e = h4; + + % Main loop + for i = 1 : 80 + if(i >= 1 && i <= 20) + f = bitor(bitand(b, c), bitand(bitcmp(b), d)); + k = uint32(1518500249); + elseif(i >= 21 && i <= 40) + f = bitxor(bitxor(b, c), d); + k = uint32(1859775393); + elseif(i >= 41 && i <= 60) + f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d)); + k = uint32(2400959708); + elseif(i >= 61 && i <= 80) + f = bitxor(bitxor(b, c), d); + k = uint32(3395469782); + end + + t = bitrotate(a, 5); + t = bitadd(t, f); + t = bitadd(t, e); + t = bitadd(t, k); + t = bitadd(t, ch(i)); + e = d; + d = c; + c = bitrotate(b, 30); + b = a; + a = t; + + end + h0 = bitadd(h0, a); + h1 = bitadd(h1, b); + h2 = bitadd(h2, c); + h3 = bitadd(h3, d); + h4 = bitadd(h4, e); + + end + + hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]); + + hash = lower(hash); + +end + +function ret = bitadd(iA, iB) + ret = double(iA) + double(iB); + ret = bitset(ret, 33, 0); + ret = uint32(ret); +end + +function ret = bitrotate(iA, places) + t = bitshift(iA, places - 32); + ret = bitshift(iA, places); + ret = bitor(ret, t); +end + +% =========================== Base64 Encoder ============================ +% Thanks to Peter John Acklam +% + +function y = base64encode(x, eol) +%BASE64ENCODE Perform base64 encoding on a string. +% +% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending +% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10). +% The returned encoded string is broken into lines of no more than 76 +% characters each, and each line will end with EOL unless it is empty. Let +% EOL be empty if you do not want the encoded string broken into lines. +% +% STR and EOL don't have to be strings (i.e., char arrays). The only +% requirement is that they are vectors containing values in the range 0-255. +% +% This function may be used to encode strings into the Base64 encoding +% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The +% Base64 encoding is designed to represent arbitrary sequences of octets in a +% form that need not be humanly readable. A 65-character subset +% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per +% printable character. +% +% Examples +% -------- +% +% If you want to encode a large file, you should encode it in chunks that are +% a multiple of 57 bytes. This ensures that the base64 lines line up and +% that you do not end up with padding in the middle. 57 bytes of data fills +% one complete base64 line (76 == 57*4/3): +% +% If ifid and ofid are two file identifiers opened for reading and writing, +% respectively, then you can base64 encode the data with +% +% while ~feof(ifid) +% fwrite(ofid, base64encode(fread(ifid, 60*57))); +% end +% +% or, if you have enough memory, +% +% fwrite(ofid, base64encode(fread(ifid))); +% +% See also BASE64DECODE. + +% Author: Peter John Acklam +% Time-stamp: 2004-02-03 21:36:56 +0100 +% E-mail: pjacklam@online.no +% URL: http://home.online.no/~pjacklam + + if isnumeric(x) + x = num2str(x); + end + + % make sure we have the EOL value + if nargin < 2 + eol = sprintf('\n'); + else + if sum(size(eol) > 1) > 1 + error('EOL must be a vector.'); + end + if any(eol(:) > 255) + error('EOL can not contain values larger than 255.'); + end + end + + if sum(size(x) > 1) > 1 + error('STR must be a vector.'); + end + + x = uint8(x); + eol = uint8(eol); + + ndbytes = length(x); % number of decoded bytes + nchunks = ceil(ndbytes / 3); % number of chunks/groups + nebytes = 4 * nchunks; % number of encoded bytes + + % add padding if necessary, to make the length of x a multiple of 3 + if rem(ndbytes, 3) + x(end+1 : 3*nchunks) = 0; + end + + x = reshape(x, [3, nchunks]); % reshape the data + y = repmat(uint8(0), 4, nchunks); % for the encoded data + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Split up every 3 bytes into 4 pieces + % + % aaaaaabb bbbbcccc ccdddddd + % + % to form + % + % 00aaaaaa 00bbbbbb 00cccccc 00dddddd + % + y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:) + + y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:) + y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:) + + y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:) + y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:) + + y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:) + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Now perform the following mapping + % + % 0 - 25 -> A-Z + % 26 - 51 -> a-z + % 52 - 61 -> 0-9 + % 62 -> + + % 63 -> / + % + % We could use a mapping vector like + % + % ['A':'Z', 'a':'z', '0':'9', '+/'] + % + % but that would require an index vector of class double. + % + z = repmat(uint8(0), size(y)); + i = y <= 25; z(i) = 'A' + double(y(i)); + i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i)); + i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i)); + i = y == 62; z(i) = '+'; + i = y == 63; z(i) = '/'; + y = z; + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Add padding if necessary. + % + npbytes = 3 * nchunks - ndbytes; % number of padding bytes + if npbytes + y(end-npbytes+1 : end) = '='; % '=' is used for padding + end + + if isempty(eol) + + % reshape to a row vector + y = reshape(y, [1, nebytes]); + + else + + nlines = ceil(nebytes / 76); % number of lines + neolbytes = length(eol); % number of bytes in eol string + + % pad data so it becomes a multiple of 76 elements + y = [y(:) ; zeros(76 * nlines - numel(y), 1)]; + y(nebytes + 1 : 76 * nlines) = 0; + y = reshape(y, 76, nlines); + + % insert eol strings + eol = eol(:); + y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines)); + + % remove padding, but keep the last eol string + m = nebytes + neolbytes * (nlines - 1); + n = (76+neolbytes)*nlines - neolbytes; + y(m+1 : n) = ''; + + % extract and reshape to row vector + y = reshape(y, 1, m+neolbytes); + + end + + % output is a character array + y = char(y); + +end diff --git a/ex3/ex3/submitWeb.m b/ex3/ex3/submitWeb.m new file mode 100644 index 0000000..e429365 --- /dev/null +++ b/ex3/ex3/submitWeb.m @@ -0,0 +1,20 @@ +% submitWeb Creates files from your code and output for web submission. +% +% If the submit function does not work for you, use the web-submission mechanism. +% Call this function to produce a file for the part you wish to submit. Then, +% submit the file to the class servers using the "Web Submission" button on the +% Programming Exercises page on the course website. +% +% You should call this function without arguments (submitWeb), to receive +% an interactive prompt for submission; optionally you can call it with the partID +% if you so wish. Make sure your working directory is set to the directory +% containing the submitWeb.m file and your assignment files. + +function submitWeb(partId) + if ~exist('partId', 'var') || isempty(partId) + partId = []; + end + + submit(partId, 1); +end + diff --git a/ex4/.DS_Store b/ex4/.DS_Store new file mode 100644 index 0000000..9f33919 Binary files /dev/null and b/ex4/.DS_Store differ diff --git a/ex4/ex4.pdf b/ex4/ex4.pdf new file mode 100644 index 0000000..d833004 Binary files /dev/null and b/ex4/ex4.pdf differ diff --git a/ex4/ex4/.DS_Store b/ex4/ex4/.DS_Store new file mode 100644 index 0000000..0710ce6 Binary files /dev/null and b/ex4/ex4/.DS_Store differ diff --git a/ex4/ex4/checkNNGradients.m b/ex4/ex4/checkNNGradients.m new file mode 100644 index 0000000..f9930aa --- /dev/null +++ b/ex4/ex4/checkNNGradients.m @@ -0,0 +1,52 @@ +function checkNNGradients(lambda) +%CHECKNNGRADIENTS Creates a small neural network to check the +%backpropagation gradients +% CHECKNNGRADIENTS(lambda) Creates a small neural network to check the +% backpropagation gradients, it will output the analytical gradients +% produced by your backprop code and the numerical gradients (computed +% using computeNumericalGradient). These two gradient computations should +% result in very similar values. +% + +if ~exist('lambda', 'var') || isempty(lambda) + lambda = 0; +end + +input_layer_size = 3; +hidden_layer_size = 5; +num_labels = 3; +m = 5; + +% We generate some 'random' test data +Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size); +Theta2 = debugInitializeWeights(num_labels, hidden_layer_size); +% Reusing debugInitializeWeights to generate X +X = debugInitializeWeights(m, input_layer_size - 1); +y = 1 + mod(1:m, num_labels)'; + +% Unroll parameters +nn_params = [Theta1(:) ; Theta2(:)]; + +% Short hand for cost function +costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ... + num_labels, X, y, lambda); + +[cost, grad] = costFunc(nn_params); +numgrad = computeNumericalGradient(costFunc, nn_params); + +% Visually examine the two gradient computations. The two columns +% you get should be very similar. +disp([numgrad grad]); +fprintf(['The above two columns you get should be very similar.\n' ... + '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); + +% Evaluate the norm of the difference between two solutions. +% If you have a correct implementation, and assuming you used EPSILON = 0.0001 +% in computeNumericalGradient.m, then diff below should be less than 1e-9 +diff = norm(numgrad-grad)/norm(numgrad+grad); + +fprintf(['If your backpropagation implementation is correct, then \n' ... + 'the relative difference will be small (less than 1e-9). \n' ... + '\nRelative Difference: %g\n'], diff); + +end diff --git a/ex4/ex4/computeNumericalGradient.m b/ex4/ex4/computeNumericalGradient.m new file mode 100644 index 0000000..c3abeac --- /dev/null +++ b/ex4/ex4/computeNumericalGradient.m @@ -0,0 +1,29 @@ +function numgrad = computeNumericalGradient(J, theta) +%COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" +%and gives us a numerical estimate of the gradient. +% numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical +% gradient of the function J around theta. Calling y = J(theta) should +% return the function value at theta. + +% Notes: The following code implements numerical gradient checking, and +% returns the numerical gradient.It sets numgrad(i) to (a numerical +% approximation of) the partial derivative of J with respect to the +% i-th input argument, evaluated at theta. (i.e., numgrad(i) should +% be the (approximately) the partial derivative of J with respect +% to theta(i).) +% + +numgrad = zeros(size(theta)); +perturb = zeros(size(theta)); +e = 1e-4; +for p = 1:numel(theta) + % Set perturbation vector + perturb(p) = e; + loss1 = J(theta - perturb); + loss2 = J(theta + perturb); + % Compute Numerical Gradient + numgrad(p) = (loss2 - loss1) / (2*e); + perturb(p) = 0; +end + +end diff --git a/ex4/ex4/debugInitializeWeights.m b/ex4/ex4/debugInitializeWeights.m new file mode 100644 index 0000000..a71b5ab --- /dev/null +++ b/ex4/ex4/debugInitializeWeights.m @@ -0,0 +1,22 @@ +function W = debugInitializeWeights(fan_out, fan_in) +%DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in +%incoming connections and fan_out outgoing connections using a fixed +%strategy, this will help you later in debugging +% W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights +% of a layer with fan_in incoming connections and fan_out outgoing +% connections using a fix set of values +% +% Note that W should be set to a matrix of size(1 + fan_in, fan_out) as +% the first row of W handles the "bias" terms +% + +% Set W to zeros +W = zeros(fan_out, 1 + fan_in); + +% Initialize W using "sin", this ensures that W is always of the same +% values and will be useful for debugging +W = reshape(sin(1:numel(W)), size(W)) / 10; + +% ========================================================================= + +end diff --git a/ex4/ex4/displayData.m b/ex4/ex4/displayData.m new file mode 100644 index 0000000..160697e --- /dev/null +++ b/ex4/ex4/displayData.m @@ -0,0 +1,59 @@ +function [h, display_array] = displayData(X, example_width) +%DISPLAYDATA Display 2D data in a nice grid +% [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data +% stored in X in a nice grid. It returns the figure handle h and the +% displayed array if requested. + +% Set example_width automatically if not passed in +if ~exist('example_width', 'var') || isempty(example_width) + example_width = round(sqrt(size(X, 2))); +end + +% Gray Image +colormap(gray); + +% Compute rows, cols +[m n] = size(X); +example_height = (n / example_width); + +% Compute number of items to display +display_rows = floor(sqrt(m)); +display_cols = ceil(m / display_rows); + +% Between images padding +pad = 1; + +% Setup blank display +display_array = - ones(pad + display_rows * (example_height + pad), ... + pad + display_cols * (example_width + pad)); + +% Copy each example into a patch on the display array +curr_ex = 1; +for j = 1:display_rows + for i = 1:display_cols + if curr_ex > m, + break; + end + % Copy the patch + + % Get the max value of the patch + max_val = max(abs(X(curr_ex, :))); + display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... + pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... + reshape(X(curr_ex, :), example_height, example_width) / max_val; + curr_ex = curr_ex + 1; + end + if curr_ex > m, + break; + end +end + +% Display Image +h = imagesc(display_array, [-1 1]); + +% Do not show axis +axis image off + +drawnow; + +end diff --git a/ex4/ex4/ex4.m b/ex4/ex4/ex4.m new file mode 100644 index 0000000..f7b3fa9 --- /dev/null +++ b/ex4/ex4/ex4.m @@ -0,0 +1,234 @@ +%% Machine Learning Online Class - Exercise 4 Neural Network Learning + +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% linear exercise. You will need to complete the following functions +% in this exericse: +% +% sigmoidGradient.m +% randInitializeWeights.m +% nnCostFunction.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% Setup the parameters you will use for this exercise +input_layer_size = 400; % 20x20 Input Images of Digits +hidden_layer_size = 25; % 25 hidden units +num_labels = 10; % 10 labels, from 1 to 10 + % (note that we have mapped "0" to label 10) + +%% =========== Part 1: Loading and Visualizing Data ============= +% We start the exercise by first loading and visualizing the dataset. +% You will be working with a dataset that contains handwritten digits. +% + +% Load Training Data +fprintf('Loading and Visualizing Data ...\n') + +load('ex4data1.mat'); +m = size(X, 1); + +% Randomly select 100 data points to display +sel = randperm(size(X, 1)); +sel = sel(1:100); + +displayData(X(sel, :)); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% ================ Part 2: Loading Parameters ================ +% In this part of the exercise, we load some pre-initialized +% neural network parameters. + +fprintf('\nLoading Saved Neural Network Parameters ...\n') + +% Load the weights into variables Theta1 and Theta2 +load('ex4weights.mat'); + +% Unroll parameters +nn_params = [Theta1(:) ; Theta2(:)]; + +%% ================ Part 3: Compute Cost (Feedforward) ================ +% To the neural network, you should first start by implementing the +% feedforward part of the neural network that returns the cost only. You +% should complete the code in nnCostFunction.m to return cost. After +% implementing the feedforward to compute the cost, you can verify that +% your implementation is correct by verifying that you get the same cost +% as us for the fixed debugging parameters. +% +% We suggest implementing the feedforward cost *without* regularization +% first so that it will be easier for you to debug. Later, in part 4, you +% will get to implement the regularized cost. +% +fprintf('\nFeedforward Using Neural Network ...\n') + +% Weight regularization parameter (we set this to 0 here). +lambda = 0; + +J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... + num_labels, X, y, lambda); + +fprintf(['Cost at parameters (loaded from ex4weights): %f '... + '\n(this value should be about 0.287629)\n'], J); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + +%% =============== Part 4: Implement Regularization =============== +% Once your cost function implementation is correct, you should now +% continue to implement the regularization with the cost. +% + +fprintf('\nChecking Cost Function (w/ Regularization) ... \n') + +% Weight regularization parameter (we set this to 1 here). +lambda = 1; + +J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... + num_labels, X, y, lambda); + +fprintf(['Cost at parameters (loaded from ex4weights): %f '... + '\n(this value should be about 0.383770)\n'], J); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% ================ Part 5: Sigmoid Gradient ================ +% Before you start implementing the neural network, you will first +% implement the gradient for the sigmoid function. You should complete the +% code in the sigmoidGradient.m file. +% + +fprintf('\nEvaluating sigmoid gradient...\n') + +g = sigmoidGradient([1 -0.5 0 0.5 1]); +fprintf('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n '); +fprintf('%f ', g); +fprintf('\n\n'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% ================ Part 6: Initializing Pameters ================ +% In this part of the exercise, you will be starting to implment a two +% layer neural network that classifies digits. You will start by +% implementing a function to initialize the weights of the neural network +% (randInitializeWeights.m) + +fprintf('\nInitializing Neural Network Parameters ...\n') + +initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size); +initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels); + +% Unroll parameters +initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)]; + + +%% =============== Part 7: Implement Backpropagation =============== +% Once your cost matches up with ours, you should proceed to implement the +% backpropagation algorithm for the neural network. You should add to the +% code you've written in nnCostFunction.m to return the partial +% derivatives of the parameters. +% +fprintf('\nChecking Backpropagation... \n'); + +% Check gradients by running checkNNGradients +checkNNGradients; + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + + +%% =============== Part 8: Implement Regularization =============== +% Once your backpropagation implementation is correct, you should now +% continue to implement the regularization with the cost and gradient. +% + +fprintf('\nChecking Backpropagation (w/ Regularization) ... \n') + +% Check gradients by running checkNNGradients +lambda = 3; +checkNNGradients(lambda); + +% Also output the costFunction debugging values +debug_J = nnCostFunction(nn_params, input_layer_size, ... + hidden_layer_size, num_labels, X, y, lambda); + +fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = 10): %f ' ... + '\n(this value should be about 0.576051)\n\n'], debug_J); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% =================== Part 8: Training NN =================== +% You have now implemented all the code necessary to train a neural +% network. To train your neural network, we will now use "fmincg", which +% is a function which works similarly to "fminunc". Recall that these +% advanced optimizers are able to train our cost functions efficiently as +% long as we provide them with the gradient computations. +% +fprintf('\nTraining Neural Network... \n') + +% After you have completed the assignment, change the MaxIter to a larger +% value to see how more training helps. +options = optimset('MaxIter', 50); + +% You should also try different values of lambda +lambda = 1; + +% Create "short hand" for the cost function to be minimized +costFunction = @(p) nnCostFunction(p, ... + input_layer_size, ... + hidden_layer_size, ... + num_labels, X, y, lambda); + +% Now, costFunction is a function that takes in only one argument (the +% neural network parameters) +[nn_params, cost] = fmincg(costFunction, initial_nn_params, options); + +% Obtain Theta1 and Theta2 back from nn_params +Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... + hidden_layer_size, (input_layer_size + 1)); + +Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... + num_labels, (hidden_layer_size + 1)); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% ================= Part 9: Visualize Weights ================= +% You can now "visualize" what the neural network is learning by +% displaying the hidden units to see what features they are capturing in +% the data. + +fprintf('\nVisualizing Neural Network... \n') + +displayData(Theta1(:, 2:end)); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + +%% ================= Part 10: Implement Predict ================= +% After training the neural network, we would like to use it to predict +% the labels. You will now implement the "predict" function to use the +% neural network to predict the labels of the training set. This lets +% you compute the training set accuracy. + +pred = predict(Theta1, Theta2, X); + +fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); + + diff --git a/ex4/ex4/ex4data1.mat b/ex4/ex4/ex4data1.mat new file mode 100644 index 0000000..371bd0c Binary files /dev/null and b/ex4/ex4/ex4data1.mat differ diff --git a/ex4/ex4/ex4weights.mat b/ex4/ex4/ex4weights.mat new file mode 100644 index 0000000..ace2a09 Binary files /dev/null and b/ex4/ex4/ex4weights.mat differ diff --git a/ex4/ex4/fmincg.m b/ex4/ex4/fmincg.m new file mode 100644 index 0000000..34bf539 --- /dev/null +++ b/ex4/ex4/fmincg.m @@ -0,0 +1,175 @@ +function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) +% Minimize a continuous differentialble multivariate function. Starting point +% is given by "X" (D by 1), and the function named in the string "f", must +% return a function value and a vector of partial derivatives. The Polack- +% Ribiere flavour of conjugate gradients is used to compute search directions, +% and a line search using quadratic and cubic polynomial approximations and the +% Wolfe-Powell stopping criteria is used together with the slope ratio method +% for guessing initial step sizes. Additionally a bunch of checks are made to +% make sure that exploration is taking place and that extrapolation will not +% be unboundedly large. The "length" gives the length of the run: if it is +% positive, it gives the maximum number of line searches, if negative its +% absolute gives the maximum allowed number of function evaluations. You can +% (optionally) give "length" a second component, which will indicate the +% reduction in function value to be expected in the first line-search (defaults +% to 1.0). The function returns when either its length is up, or if no further +% progress can be made (ie, we are at a minimum, or so close that due to +% numerical problems, we cannot get any closer). If the function terminates +% within a few iterations, it could be an indication that the function value +% and derivatives are not consistent (ie, there may be a bug in the +% implementation of your "f" function). The function returns the found +% solution "X", a vector of function values "fX" indicating the progress made +% and "i" the number of iterations (line searches or function evaluations, +% depending on the sign of "length") used. +% +% Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) +% +% See also: checkgrad +% +% Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13 +% +% +% (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen +% +% Permission is granted for anyone to copy, use, or modify these +% programs and accompanying documents for purposes of research or +% education, provided this copyright notice is retained, and note is +% made of any changes that have been made. +% +% These programs and documents are distributed without any warranty, +% express or implied. As the programs were written for research +% purposes only, they have not been tested to the degree that would be +% advisable in any important application. All use of these programs is +% entirely at the user's own risk. +% +% [ml-class] Changes Made: +% 1) Function name and argument specifications +% 2) Output display +% + +% Read options +if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter') + length = options.MaxIter; +else + length = 100; +end + + +RHO = 0.01; % a bunch of constants for line searches +SIG = 0.5; % RHO and SIG are the constants in the Wolfe-Powell conditions +INT = 0.1; % don't reevaluate within 0.1 of the limit of the current bracket +EXT = 3.0; % extrapolate maximum 3 times the current bracket +MAX = 20; % max 20 function evaluations per line search +RATIO = 100; % maximum allowed slope ratio + +argstr = ['feval(f, X']; % compose string used to call function +for i = 1:(nargin - 3) + argstr = [argstr, ',P', int2str(i)]; +end +argstr = [argstr, ')']; + +if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end +S=['Iteration ']; + +i = 0; % zero the run length counter +ls_failed = 0; % no previous line search has failed +fX = []; +[f1 df1] = eval(argstr); % get function value and gradient +i = i + (length<0); % count epochs?! +s = -df1; % search direction is steepest +d1 = -s'*s; % this is the slope +z1 = red/(1-d1); % initial step is red/(|s|+1) + +while i < abs(length) % while not finished + i = i + (length>0); % count iterations?! + + X0 = X; f0 = f1; df0 = df1; % make a copy of current values + X = X + z1*s; % begin line search + [f2 df2] = eval(argstr); + i = i + (length<0); % count epochs?! + d2 = df2'*s; + f3 = f1; d3 = d1; z3 = -z1; % initialize point 3 equal to point 1 + if length>0, M = MAX; else M = min(MAX, -length-i); end + success = 0; limit = -1; % initialize quanteties + while 1 + while ((f2 > f1+z1*RHO*d1) | (d2 > -SIG*d1)) & (M > 0) + limit = z1; % tighten the bracket + if f2 > f1 + z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); % quadratic fit + else + A = 6*(f2-f3)/z3+3*(d2+d3); % cubic fit + B = 3*(f3-f2)-z3*(d3+2*d2); + z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; % numerical error possible - ok! + end + if isnan(z2) | isinf(z2) + z2 = z3/2; % if we had a numerical problem then bisect + end + z2 = max(min(z2, INT*z3),(1-INT)*z3); % don't accept too close to limits + z1 = z1 + z2; % update the step + X = X + z2*s; + [f2 df2] = eval(argstr); + M = M - 1; i = i + (length<0); % count epochs?! + d2 = df2'*s; + z3 = z3-z2; % z3 is now relative to the location of z2 + end + if f2 > f1+z1*RHO*d1 | d2 > -SIG*d1 + break; % this is a failure + elseif d2 > SIG*d1 + success = 1; break; % success + elseif M == 0 + break; % failure + end + A = 6*(f2-f3)/z3+3*(d2+d3); % make cubic extrapolation + B = 3*(f3-f2)-z3*(d3+2*d2); + z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); % num. error possible - ok! + if ~isreal(z2) | isnan(z2) | isinf(z2) | z2 < 0 % num prob or wrong sign? + if limit < -0.5 % if we have no upper limit + z2 = z1 * (EXT-1); % the extrapolate the maximum amount + else + z2 = (limit-z1)/2; % otherwise bisect + end + elseif (limit > -0.5) & (z2+z1 > limit) % extraplation beyond max? + z2 = (limit-z1)/2; % bisect + elseif (limit < -0.5) & (z2+z1 > z1*EXT) % extrapolation beyond limit + z2 = z1*(EXT-1.0); % set to extrapolation limit + elseif z2 < -z3*INT + z2 = -z3*INT; + elseif (limit > -0.5) & (z2 < (limit-z1)*(1.0-INT)) % too close to limit? + z2 = (limit-z1)*(1.0-INT); + end + f3 = f2; d3 = d2; z3 = -z2; % set point 3 equal to point 2 + z1 = z1 + z2; X = X + z2*s; % update current estimates + [f2 df2] = eval(argstr); + M = M - 1; i = i + (length<0); % count epochs?! + d2 = df2'*s; + end % end of line search + + if success % if line search succeeded + f1 = f2; fX = [fX' f1]'; + fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); + s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; % Polack-Ribiere direction + tmp = df1; df1 = df2; df2 = tmp; % swap derivatives + d2 = df1'*s; + if d2 > 0 % new slope must be negative + s = -df1; % otherwise use steepest direction + d2 = -s'*s; + end + z1 = z1 * min(RATIO, d1/(d2-realmin)); % slope ratio but max RATIO + d1 = d2; + ls_failed = 0; % this line search did not fail + else + X = X0; f1 = f0; df1 = df0; % restore point from before failed line search + if ls_failed | i > abs(length) % line search failed twice in a row + break; % or we ran out of time, so we give up + end + tmp = df1; df1 = df2; df2 = tmp; % swap derivatives + s = -df1; % try steepest + d1 = -s'*s; + z1 = 1/(1-d1); + ls_failed = 1; % this line search failed + end + if exist('OCTAVE_VERSION') + fflush(stdout); + end +end +fprintf('\n'); diff --git a/ex4/ex4/ml_login_data.mat b/ex4/ex4/ml_login_data.mat new file mode 100644 index 0000000..1e1a869 --- /dev/null +++ b/ex4/ex4/ml_login_data.mat @@ -0,0 +1,15 @@ +# Created by Octave 3.4.0, Thu Sep 27 11:19:55 2012 CDT +# name: login +# type: sq_string +# elements: 1 +# length: 21 +zhang349@illinois.edu + + +# name: password +# type: sq_string +# elements: 1 +# length: 10 +3pBDppG8Gd + + diff --git a/ex4/ex4/nnCostFunction.m b/ex4/ex4/nnCostFunction.m new file mode 100644 index 0000000..5e80ed2 --- /dev/null +++ b/ex4/ex4/nnCostFunction.m @@ -0,0 +1,116 @@ +function [J grad] = nnCostFunction(nn_params, ... + input_layer_size, ... + hidden_layer_size, ... + num_labels, ... + X, y, lambda) +%NNCOSTFUNCTION Implements the neural network cost function for a two layer +%neural network which performs classification +% [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ... +% X, y, lambda) computes the cost and gradient of the neural network. The +% parameters for the neural network are "unrolled" into the vector +% nn_params and need to be converted back into the weight matrices. +% +% The returned parameter grad should be a "unrolled" vector of the +% partial derivatives of the neural network. +% + +% Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices +% for our 2 layer neural network +Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... + hidden_layer_size, (input_layer_size + 1)); + +Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... + num_labels, (hidden_layer_size + 1)); + +% Setup some useful variables +m = size(X, 1); + +% You need to return the following variables correctly +J = 0; +Theta1_grad = zeros(size(Theta1)); +Theta2_grad = zeros(size(Theta2)); + +% ====================== YOUR CODE HERE ====================== +% Instructions: You should complete the code by working through the +% following parts. +% +% Part 1: Feedforward the neural network and return the cost in the +% variable J. After implementing Part 1, you can verify that your +% cost function computation is correct by verifying the cost +% computed in ex4.m +% +% Part 2: Implement the backpropagation algorithm to compute the gradients +% Theta1_grad and Theta2_grad. You should return the partial derivatives of +% the cost function with respect to Theta1 and Theta2 in Theta1_grad and +% Theta2_grad, respectively. After implementing Part 2, you can check +% that your implementation is correct by running checkNNGradients +% +% Note: The vector y passed into the function is a vector of labels +% containing values from 1..K. You need to map this vector into a +% binary vector of 1's and 0's to be used with the neural network +% cost function. +% +% Hint: We recommend implementing backpropagation using a for-loop +% over the training examples if you are implementing it for the +% first time. +% +% Part 3: Implement regularization with the cost function and gradients. +% +% Hint: You can implement this around the code for +% backpropagation. That is, you can compute the gradients for +% the regularization separately and then add them to Theta1_grad +% and Theta2_grad from Part 2. +% + + + +y = eye(num_labels)(y,:); + +a_1 = [ones(m,1) X]; +z_2 = (Theta1 * a_1')'; + +a_2 = sigmoid(Theta1 * a_1')'; +a_2 = [ones(size(a_2), 1) a_2]; + +a_3 = sigmoid(Theta2 * a_2')'; + +J_reg = lambda/(2*m)*(sum(sum(Theta1(:,2:end).^2)) + sum(sum(Theta2(:,2:end).^2))); +J = (1/m)*sum(sum(-y.*log(a_3) - (1-y).*log(1-a_3))) + J_reg; + + +%Backprop: + +d_3 = a_3 - y; +d_2 = ((d_3*Theta2(:,2:end)).*sigmoidGradient(z_2)); + +Theta1_grad = 1/m * d_2' * a_1; +Theta2_grad = 1/m * d_3' * a_2; + +%add regularization: + +Theta1_grad(:,2:end) = Theta1_grad(:,2:end) + lambda/m*Theta1(:,2:end); +Theta2_grad(:,2:end) = Theta2_grad(:,2:end) + lambda/m*Theta2(:,2:end); + + + + + + + + + + + + + + + +% ------------------------------------------------------------- + +% ========================================================================= + +% Unroll gradients +grad = [Theta1_grad(:) ; Theta2_grad(:)]; + + +end diff --git a/ex4/ex4/octave-core b/ex4/ex4/octave-core new file mode 100644 index 0000000..8d42fb6 Binary files /dev/null and b/ex4/ex4/octave-core differ diff --git a/ex4/ex4/predict.m b/ex4/ex4/predict.m new file mode 100644 index 0000000..9ec3f6d --- /dev/null +++ b/ex4/ex4/predict.m @@ -0,0 +1,20 @@ +function p = predict(Theta1, Theta2, X) +%PREDICT Predict the label of an input given a trained neural network +% p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the +% trained weights of a neural network (Theta1, Theta2) + +% Useful values +m = size(X, 1); +num_labels = size(Theta2, 1); + +% You need to return the following variables correctly +p = zeros(size(X, 1), 1); + +h1 = sigmoid([ones(m, 1) X] * Theta1'); +h2 = sigmoid([ones(m, 1) h1] * Theta2'); +[dummy, p] = max(h2, [], 2); + +% ========================================================================= + + +end diff --git a/ex4/ex4/randInitializeWeights.m b/ex4/ex4/randInitializeWeights.m new file mode 100644 index 0000000..59eaec7 --- /dev/null +++ b/ex4/ex4/randInitializeWeights.m @@ -0,0 +1,32 @@ +function W = randInitializeWeights(L_in, L_out) +%RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in +%incoming connections and L_out outgoing connections +% W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights +% of a layer with L_in incoming connections and L_out outgoing +% connections. +% +% Note that W should be set to a matrix of size(L_out, 1 + L_in) as +% the column row of W handles the "bias" terms +% + +% You need to return the following variables correctly +W = zeros(L_out, 1 + L_in); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Initialize W randomly so that we break the symmetry while +% training the neural network. +% +% Note: The first row of W corresponds to the parameters for the bias units +% + + + + + + + + + +% ========================================================================= + +end diff --git a/ex4/ex4/sigmoid.m b/ex4/ex4/sigmoid.m new file mode 100644 index 0000000..6deca13 --- /dev/null +++ b/ex4/ex4/sigmoid.m @@ -0,0 +1,6 @@ +function g = sigmoid(z) +%SIGMOID Compute sigmoid functoon +% J = SIGMOID(z) computes the sigmoid of z. + +g = 1.0 ./ (1.0 + exp(-z)); +end diff --git a/ex4/ex4/sigmoidGradient.m b/ex4/ex4/sigmoidGradient.m new file mode 100644 index 0000000..b80fdf6 --- /dev/null +++ b/ex4/ex4/sigmoidGradient.m @@ -0,0 +1,34 @@ +function g = sigmoidGradient(z) +%SIGMOIDGRADIENT returns the gradient of the sigmoid function +%evaluated at z +% g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function +% evaluated at z. This should work regardless if z is a matrix or a +% vector. In particular, if z is a vector or matrix, you should return +% the gradient for each element. + +g = zeros(size(z)); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the gradient of the sigmoid function evaluated at +% each value of z (z can be a matrix, vector or scalar). + + +f = 1.0 ./ (1.0 + exp( -z )); +g = f .* (ones(size(f)) - f); + + + + + + + + + + + +% ============================================================= + + + + +end diff --git a/ex4/ex4/submit.m b/ex4/ex4/submit.m new file mode 100644 index 0000000..e4d2166 --- /dev/null +++ b/ex4/ex4/submit.m @@ -0,0 +1,578 @@ +function submit(partId, webSubmit) +%SUBMIT Submit your code and output to the ml-class servers +% SUBMIT() will connect to the ml-class server and submit your solution + + fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ... + homework_id()); + if ~exist('partId', 'var') || isempty(partId) + partId = promptPart(); + end + + if ~exist('webSubmit', 'var') || isempty(webSubmit) + webSubmit = 0; % submit directly by default + end + + % Check valid partId + partNames = validParts(); + if ~isValidPartId(partId) + fprintf('!! Invalid homework part selected.\n'); + fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1); + fprintf('!! Submission Cancelled\n'); + return + end + + if ~exist('ml_login_data.mat','file') + [login password] = loginPrompt(); + save('ml_login_data.mat','login','password'); + else + load('ml_login_data.mat'); + [login password] = quickLogin(login, password); + save('ml_login_data.mat','login','password'); + end + + if isempty(login) + fprintf('!! Submission Cancelled\n'); + return + end + + fprintf('\n== Connecting to ml-class ... '); + if exist('OCTAVE_VERSION') + fflush(stdout); + end + + % Setup submit list + if partId == numel(partNames) + 1 + submitParts = 1:numel(partNames); + else + submitParts = [partId]; + end + + for s = 1:numel(submitParts) + thisPartId = submitParts(s); + if (~webSubmit) % submit directly to server + [login, ch, signature, auxstring] = getChallenge(login, thisPartId); + if isempty(login) || isempty(ch) || isempty(signature) + % Some error occured, error string in first return element. + fprintf('\n!! Error: %s\n\n', login); + return + end + + % Attempt Submission with Challenge + ch_resp = challengeResponse(login, password, ch); + + [result, str] = submitSolution(login, ch_resp, thisPartId, ... + output(thisPartId, auxstring), source(thisPartId), signature); + + partName = partNames{thisPartId}; + + fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ... + homework_id(), thisPartId, partName); + fprintf('== %s\n', strtrim(str)); + + if exist('OCTAVE_VERSION') + fflush(stdout); + end + else + [result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ... + source(thisPartId)); + result = base64encode(result); + + fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ... + homework_id(), thisPartId); + saveAsFile = input('', 's'); + if (isempty(saveAsFile)) + saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId); + end + + fid = fopen(saveAsFile, 'w'); + if (fid) + fwrite(fid, result); + fclose(fid); + fprintf('\nSaved your solutions to %s.\n\n', saveAsFile); + fprintf(['You can now submit your solutions through the web \n' ... + 'form in the programming exercises. Select the corresponding \n' ... + 'programming exercise to access the form.\n']); + + else + fprintf('Unable to save to %s\n\n', saveAsFile); + fprintf(['You can create a submission file by saving the \n' ... + 'following text in a file: (press enter to continue)\n\n']); + pause; + fprintf(result); + end + end + end +end + +% ================== CONFIGURABLES FOR EACH HOMEWORK ================== + +function id = homework_id() + id = '4'; +end + +function [partNames] = validParts() + partNames = { 'Feedforward and Cost Function', ... + 'Regularized Cost Function', ... + 'Sigmoid Gradient', ... + 'Neural Network Gradient (Backpropagation)' ... + 'Regularized Gradient' ... + }; +end + +function srcs = sources() + % Separated by part + srcs = { { 'nnCostFunction.m' }, ... + { 'nnCostFunction.m' }, ... + { 'sigmoidGradient.m' }, ... + { 'nnCostFunction.m' }, ... + { 'nnCostFunction.m' } }; +end + +function out = output(partId, auxstring) + % Random Test Cases + X = reshape(3 * sin(1:1:30), 3, 10); + Xm = reshape(sin(1:32), 16, 2) / 5; + ym = 1 + mod(1:16,4)'; + t1 = sin(reshape(1:2:24, 4, 3)); + t2 = cos(reshape(1:2:40, 4, 5)); + t = [t1(:) ; t2(:)]; + if partId == 1 + [J] = nnCostFunction(t, 2, 4, 4, Xm, ym, 0); + out = sprintf('%0.5f ', J); + elseif partId == 2 + [J] = nnCostFunction(t, 2, 4, 4, Xm, ym, 1.5); + out = sprintf('%0.5f ', J); + elseif partId == 3 + out = sprintf('%0.5f ', sigmoidGradient(X)); + elseif partId == 4 + [J, grad] = nnCostFunction(t, 2, 4, 4, Xm, ym, 0); + out = sprintf('%0.5f ', J); + out = [out sprintf('%0.5f ', grad)]; + elseif partId == 5 + [J, grad] = nnCostFunction(t, 2, 4, 4, Xm, ym, 1.5); + out = sprintf('%0.5f ', J); + out = [out sprintf('%0.5f ', grad)]; + end +end + + +% ====================== SERVER CONFIGURATION =========================== + +% ***************** REMOVE -staging WHEN YOU DEPLOY ********************* +function url = site_url() + url = 'http://class.coursera.org/ml-2012-002'; +end + +function url = challenge_url() + url = [site_url() '/assignment/challenge']; +end + +function url = submit_url() + url = [site_url() '/assignment/submit']; +end + +% ========================= CHALLENGE HELPERS ========================= + +function src = source(partId) + src = ''; + src_files = sources(); + if partId <= numel(src_files) + flist = src_files{partId}; + for i = 1:numel(flist) + fid = fopen(flist{i}); + if (fid == -1) + error('Error opening %s (is it missing?)', flist{i}); + end + line = fgets(fid); + while ischar(line) + src = [src line]; + line = fgets(fid); + end + fclose(fid); + src = [src '||||||||']; + end + end +end + +function ret = isValidPartId(partId) + partNames = validParts(); + ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1); +end + +function partId = promptPart() + fprintf('== Select which part(s) to submit:\n'); + partNames = validParts(); + srcFiles = sources(); + for i = 1:numel(partNames) + fprintf('== %d) %s [', i, partNames{i}); + fprintf(' %s ', srcFiles{i}{:}); + fprintf(']\n'); + end + fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ... + numel(partNames) + 1, numel(partNames) + 1); + selPart = input('', 's'); + partId = str2num(selPart); + if ~isValidPartId(partId) + partId = -1; + end +end + +function [email,ch,signature,auxstring] = getChallenge(email, part) + str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'}); + + str = strtrim(str); + r = struct; + while(numel(str) > 0) + [f, str] = strtok (str, '|'); + [v, str] = strtok (str, '|'); + r = setfield(r, f, v); + end + + email = getfield(r, 'email_address'); + ch = getfield(r, 'challenge_key'); + signature = getfield(r, 'state'); + auxstring = getfield(r, 'challenge_aux_data'); +end + +function [result, str] = submitSolutionWeb(email, part, output, source) + + result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ... + '"email_address":"' base64encode(email, '') '",' ... + '"submission":"' base64encode(output, '') '",' ... + '"submission_aux":"' base64encode(source, '') '"' ... + '}']; + str = 'Web-submission'; +end + +function [result, str] = submitSolution(email, ch_resp, part, output, ... + source, signature) + + params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ... + 'email_address', email, ... + 'submission', base64encode(output, ''), ... + 'submission_aux', base64encode(source, ''), ... + 'challenge_response', ch_resp, ... + 'state', signature}; + + str = urlread(submit_url(), 'post', params); + + % Parse str to read for success / failure + result = 0; + +end + +% =========================== LOGIN HELPERS =========================== + +function [login password] = loginPrompt() + % Prompt for password + [login password] = basicPrompt(); + + if isempty(login) || isempty(password) + login = []; password = []; + end +end + + +function [login password] = basicPrompt() + login = input('Login (Email address): ', 's'); + password = input('Password: ', 's'); +end + +function [login password] = quickLogin(login,password) + disp(['You are currently logged in as ' login '.']); + cont_token = input('Is this you? (y/n - type n to reenter password)','s'); + if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y') + return; + else + [login password] = loginPrompt(); + end +end + +function [str] = challengeResponse(email, passwd, challenge) + str = sha1([challenge passwd]); +end + +% =============================== SHA-1 ================================ + +function hash = sha1(str) + + % Initialize variables + h0 = uint32(1732584193); + h1 = uint32(4023233417); + h2 = uint32(2562383102); + h3 = uint32(271733878); + h4 = uint32(3285377520); + + % Convert to word array + strlen = numel(str); + + % Break string into chars and append the bit 1 to the message + mC = [double(str) 128]; + mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')]; + + numB = strlen * 8; + if exist('idivide') + numC = idivide(uint32(numB + 65), 512, 'ceil'); + else + numC = ceil(double(numB + 65)/512); + end + numW = numC * 16; + mW = zeros(numW, 1, 'uint32'); + + idx = 1; + for i = 1:4:strlen + 1 + mW(idx) = bitor(bitor(bitor( ... + bitshift(uint32(mC(i)), 24), ... + bitshift(uint32(mC(i+1)), 16)), ... + bitshift(uint32(mC(i+2)), 8)), ... + uint32(mC(i+3))); + idx = idx + 1; + end + + % Append length of message + mW(numW - 1) = uint32(bitshift(uint64(numB), -32)); + mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32)); + + % Process the message in successive 512-bit chs + for cId = 1 : double(numC) + cSt = (cId - 1) * 16 + 1; + cEnd = cId * 16; + ch = mW(cSt : cEnd); + + % Extend the sixteen 32-bit words into eighty 32-bit words + for j = 17 : 80 + ch(j) = ch(j - 3); + ch(j) = bitxor(ch(j), ch(j - 8)); + ch(j) = bitxor(ch(j), ch(j - 14)); + ch(j) = bitxor(ch(j), ch(j - 16)); + ch(j) = bitrotate(ch(j), 1); + end + + % Initialize hash value for this ch + a = h0; + b = h1; + c = h2; + d = h3; + e = h4; + + % Main loop + for i = 1 : 80 + if(i >= 1 && i <= 20) + f = bitor(bitand(b, c), bitand(bitcmp(b), d)); + k = uint32(1518500249); + elseif(i >= 21 && i <= 40) + f = bitxor(bitxor(b, c), d); + k = uint32(1859775393); + elseif(i >= 41 && i <= 60) + f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d)); + k = uint32(2400959708); + elseif(i >= 61 && i <= 80) + f = bitxor(bitxor(b, c), d); + k = uint32(3395469782); + end + + t = bitrotate(a, 5); + t = bitadd(t, f); + t = bitadd(t, e); + t = bitadd(t, k); + t = bitadd(t, ch(i)); + e = d; + d = c; + c = bitrotate(b, 30); + b = a; + a = t; + + end + h0 = bitadd(h0, a); + h1 = bitadd(h1, b); + h2 = bitadd(h2, c); + h3 = bitadd(h3, d); + h4 = bitadd(h4, e); + + end + + hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]); + + hash = lower(hash); + +end + +function ret = bitadd(iA, iB) + ret = double(iA) + double(iB); + ret = bitset(ret, 33, 0); + ret = uint32(ret); +end + +function ret = bitrotate(iA, places) + t = bitshift(iA, places - 32); + ret = bitshift(iA, places); + ret = bitor(ret, t); +end + +% =========================== Base64 Encoder ============================ +% Thanks to Peter John Acklam +% + +function y = base64encode(x, eol) +%BASE64ENCODE Perform base64 encoding on a string. +% +% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending +% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10). +% The returned encoded string is broken into lines of no more than 76 +% characters each, and each line will end with EOL unless it is empty. Let +% EOL be empty if you do not want the encoded string broken into lines. +% +% STR and EOL don't have to be strings (i.e., char arrays). The only +% requirement is that they are vectors containing values in the range 0-255. +% +% This function may be used to encode strings into the Base64 encoding +% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The +% Base64 encoding is designed to represent arbitrary sequences of octets in a +% form that need not be humanly readable. A 65-character subset +% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per +% printable character. +% +% Examples +% -------- +% +% If you want to encode a large file, you should encode it in chunks that are +% a multiple of 57 bytes. This ensures that the base64 lines line up and +% that you do not end up with padding in the middle. 57 bytes of data fills +% one complete base64 line (76 == 57*4/3): +% +% If ifid and ofid are two file identifiers opened for reading and writing, +% respectively, then you can base64 encode the data with +% +% while ~feof(ifid) +% fwrite(ofid, base64encode(fread(ifid, 60*57))); +% end +% +% or, if you have enough memory, +% +% fwrite(ofid, base64encode(fread(ifid))); +% +% See also BASE64DECODE. + +% Author: Peter John Acklam +% Time-stamp: 2004-02-03 21:36:56 +0100 +% E-mail: pjacklam@online.no +% URL: http://home.online.no/~pjacklam + + if isnumeric(x) + x = num2str(x); + end + + % make sure we have the EOL value + if nargin < 2 + eol = sprintf('\n'); + else + if sum(size(eol) > 1) > 1 + error('EOL must be a vector.'); + end + if any(eol(:) > 255) + error('EOL can not contain values larger than 255.'); + end + end + + if sum(size(x) > 1) > 1 + error('STR must be a vector.'); + end + + x = uint8(x); + eol = uint8(eol); + + ndbytes = length(x); % number of decoded bytes + nchunks = ceil(ndbytes / 3); % number of chunks/groups + nebytes = 4 * nchunks; % number of encoded bytes + + % add padding if necessary, to make the length of x a multiple of 3 + if rem(ndbytes, 3) + x(end+1 : 3*nchunks) = 0; + end + + x = reshape(x, [3, nchunks]); % reshape the data + y = repmat(uint8(0), 4, nchunks); % for the encoded data + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Split up every 3 bytes into 4 pieces + % + % aaaaaabb bbbbcccc ccdddddd + % + % to form + % + % 00aaaaaa 00bbbbbb 00cccccc 00dddddd + % + y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:) + + y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:) + y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:) + + y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:) + y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:) + + y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:) + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Now perform the following mapping + % + % 0 - 25 -> A-Z + % 26 - 51 -> a-z + % 52 - 61 -> 0-9 + % 62 -> + + % 63 -> / + % + % We could use a mapping vector like + % + % ['A':'Z', 'a':'z', '0':'9', '+/'] + % + % but that would require an index vector of class double. + % + z = repmat(uint8(0), size(y)); + i = y <= 25; z(i) = 'A' + double(y(i)); + i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i)); + i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i)); + i = y == 62; z(i) = '+'; + i = y == 63; z(i) = '/'; + y = z; + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Add padding if necessary. + % + npbytes = 3 * nchunks - ndbytes; % number of padding bytes + if npbytes + y(end-npbytes+1 : end) = '='; % '=' is used for padding + end + + if isempty(eol) + + % reshape to a row vector + y = reshape(y, [1, nebytes]); + + else + + nlines = ceil(nebytes / 76); % number of lines + neolbytes = length(eol); % number of bytes in eol string + + % pad data so it becomes a multiple of 76 elements + y = [y(:) ; zeros(76 * nlines - numel(y), 1)]; + y(nebytes + 1 : 76 * nlines) = 0; + y = reshape(y, 76, nlines); + + % insert eol strings + eol = eol(:); + y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines)); + + % remove padding, but keep the last eol string + m = nebytes + neolbytes * (nlines - 1); + n = (76+neolbytes)*nlines - neolbytes; + y(m+1 : n) = ''; + + % extract and reshape to row vector + y = reshape(y, 1, m+neolbytes); + + end + + % output is a character array + y = char(y); + +end diff --git a/ex4/ex4/submitWeb.m b/ex4/ex4/submitWeb.m new file mode 100644 index 0000000..e429365 --- /dev/null +++ b/ex4/ex4/submitWeb.m @@ -0,0 +1,20 @@ +% submitWeb Creates files from your code and output for web submission. +% +% If the submit function does not work for you, use the web-submission mechanism. +% Call this function to produce a file for the part you wish to submit. Then, +% submit the file to the class servers using the "Web Submission" button on the +% Programming Exercises page on the course website. +% +% You should call this function without arguments (submitWeb), to receive +% an interactive prompt for submission; optionally you can call it with the partID +% if you so wish. Make sure your working directory is set to the directory +% containing the submitWeb.m file and your assignment files. + +function submitWeb(partId) + if ~exist('partId', 'var') || isempty(partId) + partId = []; + end + + submit(partId, 1); +end + diff --git a/ex5/.DS_Store b/ex5/.DS_Store new file mode 100644 index 0000000..5074184 Binary files /dev/null and b/ex5/.DS_Store differ diff --git a/ex5/ex5.pdf b/ex5/ex5.pdf new file mode 100644 index 0000000..3d6391f Binary files /dev/null and b/ex5/ex5.pdf differ diff --git a/ex5/ex5/.DS_Store b/ex5/ex5/.DS_Store new file mode 100644 index 0000000..93db9b2 Binary files /dev/null and b/ex5/ex5/.DS_Store differ diff --git a/ex5/ex5/ex5.m b/ex5/ex5/ex5.m new file mode 100644 index 0000000..c62e800 --- /dev/null +++ b/ex5/ex5/ex5.m @@ -0,0 +1,220 @@ +%% Machine Learning Online Class +% Exercise 5 | Regularized Linear Regression and Bias-Variance +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% exercise. You will need to complete the following functions: +% +% linearRegCostFunction.m +% learningCurve.m +% validationCurve.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% =========== Part 1: Loading and Visualizing Data ============= +% We start the exercise by first loading and visualizing the dataset. +% The following code will load the dataset into your environment and plot +% the data. +% + +% Load Training Data +fprintf('Loading and Visualizing Data ...\n') + +% Load from ex5data1: +% You will have X, y, Xval, yval, Xtest, ytest in your environment +load ('ex5data1.mat'); + +% m = Number of examples +m = size(X, 1); + +% Plot training data +plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); +xlabel('Change in water level (x)'); +ylabel('Water flowing out of the dam (y)'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =========== Part 2: Regularized Linear Regression Cost ============= +% You should now implement the cost function for regularized linear +% regression. +% + +theta = [1 ; 1]; +J = linearRegCostFunction([ones(m, 1) X], y, theta, 1); + +fprintf(['Cost at theta = [1 ; 1]: %f '... + '\n(this value should be about 303.993192)\n'], J); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =========== Part 3: Regularized Linear Regression Gradient ============= +% You should now implement the gradient for regularized linear +% regression. +% + +theta = [1 ; 1]; +[J, grad] = linearRegCostFunction([ones(m, 1) X], y, theta, 1); + +fprintf(['Gradient at theta = [1 ; 1]: [%f; %f] '... + '\n(this value should be about [-15.303016; 598.250744])\n'], ... + grad(1), grad(2)); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% =========== Part 4: Train Linear Regression ============= +% Once you have implemented the cost and gradient correctly, the +% trainLinearReg function will use your cost function to train +% regularized linear regression. +% +% Write Up Note: The data is non-linear, so this will not give a great +% fit. +% + +% Train linear regression with lambda = 0 +lambda = 0; +[theta] = trainLinearReg([ones(m, 1) X], y, lambda); + +% Plot fit over the data +plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); +xlabel('Change in water level (x)'); +ylabel('Water flowing out of the dam (y)'); +hold on; +plot(X, [ones(m, 1) X]*theta, '--', 'LineWidth', 2) +hold off; + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% =========== Part 5: Learning Curve for Linear Regression ============= +% Next, you should implement the learningCurve function. +% +% Write Up Note: Since the model is underfitting the data, we expect to +% see a graph with "high bias" -- slide 8 in ML-advice.pdf +% + +lambda = 0; +[error_train, error_val] = ... + learningCurve([ones(m, 1) X], y, ... + [ones(size(Xval, 1), 1) Xval], yval, ... + lambda); + +plot(1:m, error_train, 1:m, error_val); +title('Learning curve for linear regression') +legend('Train', 'Cross Validation') +xlabel('Number of training examples') +ylabel('Error') +axis([0 13 0 150]) + +fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); +for i = 1:m + fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); +end + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =========== Part 6: Feature Mapping for Polynomial Regression ============= +% One solution to this is to use polynomial regression. You should now +% complete polyFeatures to map each example into its powers +% + +p = 8; + +% Map X onto Polynomial Features and Normalize +X_poly = polyFeatures(X, p); +[X_poly, mu, sigma] = featureNormalize(X_poly); % Normalize +X_poly = [ones(m, 1), X_poly]; % Add Ones + +% Map X_poly_test and normalize (using mu and sigma) +X_poly_test = polyFeatures(Xtest, p); +X_poly_test = bsxfun(@minus, X_poly_test, mu); +X_poly_test = bsxfun(@rdivide, X_poly_test, sigma); +X_poly_test = [ones(size(X_poly_test, 1), 1), X_poly_test]; % Add Ones + +% Map X_poly_val and normalize (using mu and sigma) +X_poly_val = polyFeatures(Xval, p); +X_poly_val = bsxfun(@minus, X_poly_val, mu); +X_poly_val = bsxfun(@rdivide, X_poly_val, sigma); +X_poly_val = [ones(size(X_poly_val, 1), 1), X_poly_val]; % Add Ones + +fprintf('Normalized Training Example 1:\n'); +fprintf(' %f \n', X_poly(1, :)); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + + + +%% =========== Part 7: Learning Curve for Polynomial Regression ============= +% Now, you will get to experiment with polynomial regression with multiple +% values of lambda. The code below runs polynomial regression with +% lambda = 0. You should try running the code with different values of +% lambda to see how the fit and learning curve change. +% + +lambda = 0; +[theta] = trainLinearReg(X_poly, y, lambda); + +% Plot training data and fit +figure(1); +plot(X, y, 'rx', 'MarkerSize', 10, 'LineWidth', 1.5); +plotFit(min(X), max(X), mu, sigma, theta, p); +xlabel('Change in water level (x)'); +ylabel('Water flowing out of the dam (y)'); +title (sprintf('Polynomial Regression Fit (lambda = %f)', lambda)); + +figure(2); +[error_train, error_val] = ... + learningCurve(X_poly, y, X_poly_val, yval, lambda); +plot(1:m, error_train, 1:m, error_val); + +title(sprintf('Polynomial Regression Learning Curve (lambda = %f)', lambda)); +xlabel('Number of training examples') +ylabel('Error') +axis([0 13 0 100]) +legend('Train', 'Cross Validation') + +fprintf('Polynomial Regression (lambda = %f)\n\n', lambda); +fprintf('# Training Examples\tTrain Error\tCross Validation Error\n'); +for i = 1:m + fprintf(' \t%d\t\t%f\t%f\n', i, error_train(i), error_val(i)); +end + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =========== Part 8: Validation for Selecting Lambda ============= +% You will now implement validationCurve to test various values of +% lambda on a validation set. You will then use this to select the +% "best" lambda value. +% + +[lambda_vec, error_train, error_val] = ... + validationCurve(X_poly, y, X_poly_val, yval); + +close all; +plot(lambda_vec, error_train, lambda_vec, error_val); +legend('Train', 'Cross Validation'); +xlabel('lambda'); +ylabel('Error'); + +fprintf('lambda\t\tTrain Error\tValidation Error\n'); +for i = 1:length(lambda_vec) + fprintf(' %f\t%f\t%f\n', ... + lambda_vec(i), error_train(i), error_val(i)); +end + +fprintf('Program paused. Press enter to continue.\n'); +pause; diff --git a/ex5/ex5/ex5data1.mat b/ex5/ex5/ex5data1.mat new file mode 100644 index 0000000..5a17abd Binary files /dev/null and b/ex5/ex5/ex5data1.mat differ diff --git a/ex5/ex5/featureNormalize.m b/ex5/ex5/featureNormalize.m new file mode 100644 index 0000000..da03bee --- /dev/null +++ b/ex5/ex5/featureNormalize.m @@ -0,0 +1,17 @@ +function [X_norm, mu, sigma] = featureNormalize(X) +%FEATURENORMALIZE Normalizes the features in X +% FEATURENORMALIZE(X) returns a normalized version of X where +% the mean value of each feature is 0 and the standard deviation +% is 1. This is often a good preprocessing step to do when +% working with learning algorithms. + +mu = mean(X); +X_norm = bsxfun(@minus, X, mu); + +sigma = std(X_norm); +X_norm = bsxfun(@rdivide, X_norm, sigma); + + +% ============================================================ + +end diff --git a/ex5/ex5/fmincg.m b/ex5/ex5/fmincg.m new file mode 100644 index 0000000..34bf539 --- /dev/null +++ b/ex5/ex5/fmincg.m @@ -0,0 +1,175 @@ +function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) +% Minimize a continuous differentialble multivariate function. Starting point +% is given by "X" (D by 1), and the function named in the string "f", must +% return a function value and a vector of partial derivatives. The Polack- +% Ribiere flavour of conjugate gradients is used to compute search directions, +% and a line search using quadratic and cubic polynomial approximations and the +% Wolfe-Powell stopping criteria is used together with the slope ratio method +% for guessing initial step sizes. Additionally a bunch of checks are made to +% make sure that exploration is taking place and that extrapolation will not +% be unboundedly large. The "length" gives the length of the run: if it is +% positive, it gives the maximum number of line searches, if negative its +% absolute gives the maximum allowed number of function evaluations. You can +% (optionally) give "length" a second component, which will indicate the +% reduction in function value to be expected in the first line-search (defaults +% to 1.0). The function returns when either its length is up, or if no further +% progress can be made (ie, we are at a minimum, or so close that due to +% numerical problems, we cannot get any closer). If the function terminates +% within a few iterations, it could be an indication that the function value +% and derivatives are not consistent (ie, there may be a bug in the +% implementation of your "f" function). The function returns the found +% solution "X", a vector of function values "fX" indicating the progress made +% and "i" the number of iterations (line searches or function evaluations, +% depending on the sign of "length") used. +% +% Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) +% +% See also: checkgrad +% +% Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13 +% +% +% (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen +% +% Permission is granted for anyone to copy, use, or modify these +% programs and accompanying documents for purposes of research or +% education, provided this copyright notice is retained, and note is +% made of any changes that have been made. +% +% These programs and documents are distributed without any warranty, +% express or implied. As the programs were written for research +% purposes only, they have not been tested to the degree that would be +% advisable in any important application. All use of these programs is +% entirely at the user's own risk. +% +% [ml-class] Changes Made: +% 1) Function name and argument specifications +% 2) Output display +% + +% Read options +if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter') + length = options.MaxIter; +else + length = 100; +end + + +RHO = 0.01; % a bunch of constants for line searches +SIG = 0.5; % RHO and SIG are the constants in the Wolfe-Powell conditions +INT = 0.1; % don't reevaluate within 0.1 of the limit of the current bracket +EXT = 3.0; % extrapolate maximum 3 times the current bracket +MAX = 20; % max 20 function evaluations per line search +RATIO = 100; % maximum allowed slope ratio + +argstr = ['feval(f, X']; % compose string used to call function +for i = 1:(nargin - 3) + argstr = [argstr, ',P', int2str(i)]; +end +argstr = [argstr, ')']; + +if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end +S=['Iteration ']; + +i = 0; % zero the run length counter +ls_failed = 0; % no previous line search has failed +fX = []; +[f1 df1] = eval(argstr); % get function value and gradient +i = i + (length<0); % count epochs?! +s = -df1; % search direction is steepest +d1 = -s'*s; % this is the slope +z1 = red/(1-d1); % initial step is red/(|s|+1) + +while i < abs(length) % while not finished + i = i + (length>0); % count iterations?! + + X0 = X; f0 = f1; df0 = df1; % make a copy of current values + X = X + z1*s; % begin line search + [f2 df2] = eval(argstr); + i = i + (length<0); % count epochs?! + d2 = df2'*s; + f3 = f1; d3 = d1; z3 = -z1; % initialize point 3 equal to point 1 + if length>0, M = MAX; else M = min(MAX, -length-i); end + success = 0; limit = -1; % initialize quanteties + while 1 + while ((f2 > f1+z1*RHO*d1) | (d2 > -SIG*d1)) & (M > 0) + limit = z1; % tighten the bracket + if f2 > f1 + z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); % quadratic fit + else + A = 6*(f2-f3)/z3+3*(d2+d3); % cubic fit + B = 3*(f3-f2)-z3*(d3+2*d2); + z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; % numerical error possible - ok! + end + if isnan(z2) | isinf(z2) + z2 = z3/2; % if we had a numerical problem then bisect + end + z2 = max(min(z2, INT*z3),(1-INT)*z3); % don't accept too close to limits + z1 = z1 + z2; % update the step + X = X + z2*s; + [f2 df2] = eval(argstr); + M = M - 1; i = i + (length<0); % count epochs?! + d2 = df2'*s; + z3 = z3-z2; % z3 is now relative to the location of z2 + end + if f2 > f1+z1*RHO*d1 | d2 > -SIG*d1 + break; % this is a failure + elseif d2 > SIG*d1 + success = 1; break; % success + elseif M == 0 + break; % failure + end + A = 6*(f2-f3)/z3+3*(d2+d3); % make cubic extrapolation + B = 3*(f3-f2)-z3*(d3+2*d2); + z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); % num. error possible - ok! + if ~isreal(z2) | isnan(z2) | isinf(z2) | z2 < 0 % num prob or wrong sign? + if limit < -0.5 % if we have no upper limit + z2 = z1 * (EXT-1); % the extrapolate the maximum amount + else + z2 = (limit-z1)/2; % otherwise bisect + end + elseif (limit > -0.5) & (z2+z1 > limit) % extraplation beyond max? + z2 = (limit-z1)/2; % bisect + elseif (limit < -0.5) & (z2+z1 > z1*EXT) % extrapolation beyond limit + z2 = z1*(EXT-1.0); % set to extrapolation limit + elseif z2 < -z3*INT + z2 = -z3*INT; + elseif (limit > -0.5) & (z2 < (limit-z1)*(1.0-INT)) % too close to limit? + z2 = (limit-z1)*(1.0-INT); + end + f3 = f2; d3 = d2; z3 = -z2; % set point 3 equal to point 2 + z1 = z1 + z2; X = X + z2*s; % update current estimates + [f2 df2] = eval(argstr); + M = M - 1; i = i + (length<0); % count epochs?! + d2 = df2'*s; + end % end of line search + + if success % if line search succeeded + f1 = f2; fX = [fX' f1]'; + fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); + s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; % Polack-Ribiere direction + tmp = df1; df1 = df2; df2 = tmp; % swap derivatives + d2 = df1'*s; + if d2 > 0 % new slope must be negative + s = -df1; % otherwise use steepest direction + d2 = -s'*s; + end + z1 = z1 * min(RATIO, d1/(d2-realmin)); % slope ratio but max RATIO + d1 = d2; + ls_failed = 0; % this line search did not fail + else + X = X0; f1 = f0; df1 = df0; % restore point from before failed line search + if ls_failed | i > abs(length) % line search failed twice in a row + break; % or we ran out of time, so we give up + end + tmp = df1; df1 = df2; df2 = tmp; % swap derivatives + s = -df1; % try steepest + d1 = -s'*s; + z1 = 1/(1-d1); + ls_failed = 1; % this line search failed + end + if exist('OCTAVE_VERSION') + fflush(stdout); + end +end +fprintf('\n'); diff --git a/ex5/ex5/learningCurve.m b/ex5/ex5/learningCurve.m new file mode 100644 index 0000000..dcc2201 --- /dev/null +++ b/ex5/ex5/learningCurve.m @@ -0,0 +1,71 @@ +function [error_train, error_val] = ... + learningCurve(X, y, Xval, yval, lambda) +%LEARNINGCURVE Generates the train and cross validation set errors needed +%to plot a learning curve +% [error_train, error_val] = ... +% LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and +% cross validation set errors for a learning curve. In particular, +% it returns two vectors of the same length - error_train and +% error_val. Then, error_train(i) contains the training error for +% i examples (and similarly for error_val(i)). +% +% In this function, you will compute the train and test errors for +% dataset sizes from 1 up to m. In practice, when working with larger +% datasets, you might want to do this in larger intervals. +% + +% Number of training examples +m = size(X, 1); + +% You need to return these values correctly +error_train = zeros(m, 1); +error_val = zeros(m, 1); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Fill in this function to return training errors in +% error_train and the cross validation errors in error_val. +% i.e., error_train(i) and +% error_val(i) should give you the errors +% obtained after training on i examples. +% +% Note: You should evaluate the training error on the first i training +% examples (i.e., X(1:i, :) and y(1:i)). +% +% For the cross-validation error, you should instead evaluate on +% the _entire_ cross validation set (Xval and yval). +% +% Note: If you are using your cost function (linearRegCostFunction) +% to compute the training and cross validation error, you should +% call the function with the lambda argument set to 0. +% Do note that you will still need to use lambda when running +% the training to obtain the theta parameters. +% +% Hint: You can loop over the examples with the following: +% +% for i = 1:m +% % Compute train/cross validation errors using training examples +% % X(1:i, :) and y(1:i), storing the result in +% % error_train(i) and error_val(i) +% .... +% +% end +% + +% ---------------------- Sample Solution ---------------------- +for i = 1:m + x_t = X(1:i, :); + y_t = y(1:i, :); + theta = trainLinearReg(x_t, y_t, lambda); + error_train(i) = linearRegCostFunction(x_t, y_t, theta, 0); + error_val(i) = linearRegCostFunction(Xval, yval, theta, 0); + +end; + + + + +% ------------------------------------------------------------- + +% ========================================================================= + +end diff --git a/ex5/ex5/linearRegCostFunction.m b/ex5/ex5/linearRegCostFunction.m new file mode 100644 index 0000000..59d8b09 --- /dev/null +++ b/ex5/ex5/linearRegCostFunction.m @@ -0,0 +1,43 @@ +function [J, grad] = linearRegCostFunction(X, y, theta, lambda) +%LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear +%regression with multiple variables +% [J, grad] = LINEARREGCOSTFUNCTION(X, y, theta, lambda) computes the +% cost of using theta as the parameter for linear regression to fit the +% data points in X and y. Returns the cost in J and the gradient in grad + +% Initialize some useful values +m = length(y); % number of training examples + +% You need to return the following variables correctly +J = 0; +grad = zeros(size(theta)); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the cost and gradient of regularized linear +% regression for a particular choice of theta. +% +% You should set J to the cost and grad to the gradient. +% + +grad = zeros(size(theta)); +h = X*theta; +J = sum(((h-y).^2))/(2*m) + lambda * (sum(theta.^2)-theta(1,1)^2)/(2*m); +grad = X'*(h-y)/m; +thetaT = theta; +thetaT(1,1) = 0; +grad += (lambda*thetaT)/m; + + + + + + + + + + +% ========================================================================= + +grad = grad(:); + +end diff --git a/ex5/ex5/ml_login_data.mat b/ex5/ex5/ml_login_data.mat new file mode 100644 index 0000000..fd01ac2 --- /dev/null +++ b/ex5/ex5/ml_login_data.mat @@ -0,0 +1,15 @@ +# Created by Octave 3.4.0, Sat Oct 06 18:39:05 2012 CDT +# name: login +# type: sq_string +# elements: 1 +# length: 21 +zhang349@illinois.edu + + +# name: password +# type: sq_string +# elements: 1 +# length: 10 +3pBDppG8Gd + + diff --git a/ex5/ex5/octave-core b/ex5/ex5/octave-core new file mode 100644 index 0000000..184ecdd Binary files /dev/null and b/ex5/ex5/octave-core differ diff --git a/ex5/ex5/plotFit.m b/ex5/ex5/plotFit.m new file mode 100644 index 0000000..8dba7cf --- /dev/null +++ b/ex5/ex5/plotFit.m @@ -0,0 +1,28 @@ +function plotFit(min_x, max_x, mu, sigma, theta, p) +%PLOTFIT Plots a learned polynomial regression fit over an existing figure. +%Also works with linear regression. +% PLOTFIT(min_x, max_x, mu, sigma, theta, p) plots the learned polynomial +% fit with power p and feature normalization (mu, sigma). + +% Hold on to the current figure +hold on; + +% We plot a range slightly bigger than the min and max values to get +% an idea of how the fit will vary outside the range of the data points +x = (min_x - 15: 0.05 : max_x + 25)'; + +% Map the X values +X_poly = polyFeatures(x, p); +X_poly = bsxfun(@minus, X_poly, mu); +X_poly = bsxfun(@rdivide, X_poly, sigma); + +% Add ones +X_poly = [ones(size(x, 1), 1) X_poly]; + +% Plot +plot(x, X_poly * theta, '--', 'LineWidth', 2) + +% Hold off to the current figure +hold off + +end diff --git a/ex5/ex5/polyFeatures.m b/ex5/ex5/polyFeatures.m new file mode 100644 index 0000000..e5e85b6 --- /dev/null +++ b/ex5/ex5/polyFeatures.m @@ -0,0 +1,27 @@ +function [X_poly] = polyFeatures(X, p) +%POLYFEATURES Maps X (1D vector) into the p-th power +% [X_poly] = POLYFEATURES(X, p) takes a data matrix X (size m x 1) and +% maps each example into its polynomial features where +% X_poly(i, :) = [X(i) X(i).^2 X(i).^3 ... X(i).^p]; +% + + +% You need to return the following variables correctly. +X_poly = zeros(numel(X), p); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Given a vector X, return a matrix X_poly where the p-th +% column of X contains the values of X to the p-th power. +% +% + +for i = 1:p + X_poly(:, i) = X.^i; +end; + + + + +% ========================================================================= + +end diff --git a/ex5/ex5/submit.m b/ex5/ex5/submit.m new file mode 100644 index 0000000..001950a --- /dev/null +++ b/ex5/ex5/submit.m @@ -0,0 +1,577 @@ +function submit(partId, webSubmit) +%SUBMIT Submit your code and output to the ml-class servers +% SUBMIT() will connect to the ml-class server and submit your solution + + fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ... + homework_id()); + if ~exist('partId', 'var') || isempty(partId) + partId = promptPart(); + end + + if ~exist('webSubmit', 'var') || isempty(webSubmit) + webSubmit = 0; % submit directly by default + end + + % Check valid partId + partNames = validParts(); + if ~isValidPartId(partId) + fprintf('!! Invalid homework part selected.\n'); + fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1); + fprintf('!! Submission Cancelled\n'); + return + end + + if ~exist('ml_login_data.mat','file') + [login password] = loginPrompt(); + save('ml_login_data.mat','login','password'); + else + load('ml_login_data.mat'); + [login password] = quickLogin(login, password); + save('ml_login_data.mat','login','password'); + end + + if isempty(login) + fprintf('!! Submission Cancelled\n'); + return + end + + fprintf('\n== Connecting to ml-class ... '); + if exist('OCTAVE_VERSION') + fflush(stdout); + end + + % Setup submit list + if partId == numel(partNames) + 1 + submitParts = 1:numel(partNames); + else + submitParts = [partId]; + end + + for s = 1:numel(submitParts) + thisPartId = submitParts(s); + if (~webSubmit) % submit directly to server + [login, ch, signature, auxstring] = getChallenge(login, thisPartId); + if isempty(login) || isempty(ch) || isempty(signature) + % Some error occured, error string in first return element. + fprintf('\n!! Error: %s\n\n', login); + return + end + + % Attempt Submission with Challenge + ch_resp = challengeResponse(login, password, ch); + + [result, str] = submitSolution(login, ch_resp, thisPartId, ... + output(thisPartId, auxstring), source(thisPartId), signature); + + partName = partNames{thisPartId}; + + fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ... + homework_id(), thisPartId, partName); + fprintf('== %s\n', strtrim(str)); + + if exist('OCTAVE_VERSION') + fflush(stdout); + end + else + [result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ... + source(thisPartId)); + result = base64encode(result); + + fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ... + homework_id(), thisPartId); + saveAsFile = input('', 's'); + if (isempty(saveAsFile)) + saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId); + end + + fid = fopen(saveAsFile, 'w'); + if (fid) + fwrite(fid, result); + fclose(fid); + fprintf('\nSaved your solutions to %s.\n\n', saveAsFile); + fprintf(['You can now submit your solutions through the web \n' ... + 'form in the programming exercises. Select the corresponding \n' ... + 'programming exercise to access the form.\n']); + + else + fprintf('Unable to save to %s\n\n', saveAsFile); + fprintf(['You can create a submission file by saving the \n' ... + 'following text in a file: (press enter to continue)\n\n']); + pause; + fprintf(result); + end + end + end +end + +% ================== CONFIGURABLES FOR EACH HOMEWORK ================== + +function id = homework_id() + id = '5'; +end + +function [partNames] = validParts() + partNames = { 'Regularized Linear Regression Cost Function', ... + 'Regularized Linear Regression Gradient', ... + 'Learning Curve', ... + 'Polynomial Feature Mapping' ... + 'Validation Curve' ... + }; +end + +function srcs = sources() + % Separated by part + srcs = { { 'linearRegCostFunction.m' }, ... + { 'linearRegCostFunction.m' }, ... + { 'learningCurve.m' }, ... + { 'polyFeatures.m' }, ... + { 'validationCurve.m' } }; +end + +function out = output(partId, auxstring) + % Random Test Cases + X = [ones(10,1) sin(1:1.5:15)' cos(1:1.5:15)']; + y = sin(1:3:30)'; + Xval = [ones(10,1) sin(0:1.5:14)' cos(0:1.5:14)']; + yval = sin(1:10)'; + if partId == 1 + [J] = linearRegCostFunction(X, y, [0.1 0.2 0.3]', 0.5); + out = sprintf('%0.5f ', J); + elseif partId == 2 + [J, grad] = linearRegCostFunction(X, y, [0.1 0.2 0.3]', 0.5); + out = sprintf('%0.5f ', grad); + elseif partId == 3 + [error_train, error_val] = ... + learningCurve(X, y, Xval, yval, 1); + out = sprintf('%0.5f ', [error_train(:); error_val(:)]); + elseif partId == 4 + [X_poly] = polyFeatures(X(2,:)', 8); + out = sprintf('%0.5f ', X_poly); + elseif partId == 5 + [lambda_vec, error_train, error_val] = ... + validationCurve(X, y, Xval, yval); + out = sprintf('%0.5f ', ... + [lambda_vec(:); error_train(:); error_val(:)]); + end +end + +% ====================== SERVER CONFIGURATION =========================== + +% ***************** REMOVE -staging WHEN YOU DEPLOY ********************* +function url = site_url() + url = 'http://class.coursera.org/ml-2012-002'; +end + +function url = challenge_url() + url = [site_url() '/assignment/challenge']; +end + +function url = submit_url() + url = [site_url() '/assignment/submit']; +end + +% ========================= CHALLENGE HELPERS ========================= + +function src = source(partId) + src = ''; + src_files = sources(); + if partId <= numel(src_files) + flist = src_files{partId}; + for i = 1:numel(flist) + fid = fopen(flist{i}); + if (fid == -1) + error('Error opening %s (is it missing?)', flist{i}); + end + line = fgets(fid); + while ischar(line) + src = [src line]; + line = fgets(fid); + end + fclose(fid); + src = [src '||||||||']; + end + end +end + +function ret = isValidPartId(partId) + partNames = validParts(); + ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1); +end + +function partId = promptPart() + fprintf('== Select which part(s) to submit:\n'); + partNames = validParts(); + srcFiles = sources(); + for i = 1:numel(partNames) + fprintf('== %d) %s [', i, partNames{i}); + fprintf(' %s ', srcFiles{i}{:}); + fprintf(']\n'); + end + fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ... + numel(partNames) + 1, numel(partNames) + 1); + selPart = input('', 's'); + partId = str2num(selPart); + if ~isValidPartId(partId) + partId = -1; + end +end + +function [email,ch,signature,auxstring] = getChallenge(email, part) + str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'}); + + str = strtrim(str); + r = struct; + while(numel(str) > 0) + [f, str] = strtok (str, '|'); + [v, str] = strtok (str, '|'); + r = setfield(r, f, v); + end + + email = getfield(r, 'email_address'); + ch = getfield(r, 'challenge_key'); + signature = getfield(r, 'state'); + auxstring = getfield(r, 'challenge_aux_data'); +end + +function [result, str] = submitSolutionWeb(email, part, output, source) + + result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ... + '"email_address":"' base64encode(email, '') '",' ... + '"submission":"' base64encode(output, '') '",' ... + '"submission_aux":"' base64encode(source, '') '"' ... + '}']; + str = 'Web-submission'; +end + +function [result, str] = submitSolution(email, ch_resp, part, output, ... + source, signature) + + params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ... + 'email_address', email, ... + 'submission', base64encode(output, ''), ... + 'submission_aux', base64encode(source, ''), ... + 'challenge_response', ch_resp, ... + 'state', signature}; + + str = urlread(submit_url(), 'post', params); + + % Parse str to read for success / failure + result = 0; + +end + +% =========================== LOGIN HELPERS =========================== + +function [login password] = loginPrompt() + % Prompt for password + [login password] = basicPrompt(); + + if isempty(login) || isempty(password) + login = []; password = []; + end +end + + +function [login password] = basicPrompt() + login = input('Login (Email address): ', 's'); + password = input('Password: ', 's'); +end + +function [login password] = quickLogin(login,password) + disp(['You are currently logged in as ' login '.']); + cont_token = input('Is this you? (y/n - type n to reenter password)','s'); + if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y') + return; + else + [login password] = loginPrompt(); + end +end + +function [str] = challengeResponse(email, passwd, challenge) + str = sha1([challenge passwd]); +end + +% =============================== SHA-1 ================================ + +function hash = sha1(str) + + % Initialize variables + h0 = uint32(1732584193); + h1 = uint32(4023233417); + h2 = uint32(2562383102); + h3 = uint32(271733878); + h4 = uint32(3285377520); + + % Convert to word array + strlen = numel(str); + + % Break string into chars and append the bit 1 to the message + mC = [double(str) 128]; + mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')]; + + numB = strlen * 8; + if exist('idivide') + numC = idivide(uint32(numB + 65), 512, 'ceil'); + else + numC = ceil(double(numB + 65)/512); + end + numW = numC * 16; + mW = zeros(numW, 1, 'uint32'); + + idx = 1; + for i = 1:4:strlen + 1 + mW(idx) = bitor(bitor(bitor( ... + bitshift(uint32(mC(i)), 24), ... + bitshift(uint32(mC(i+1)), 16)), ... + bitshift(uint32(mC(i+2)), 8)), ... + uint32(mC(i+3))); + idx = idx + 1; + end + + % Append length of message + mW(numW - 1) = uint32(bitshift(uint64(numB), -32)); + mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32)); + + % Process the message in successive 512-bit chs + for cId = 1 : double(numC) + cSt = (cId - 1) * 16 + 1; + cEnd = cId * 16; + ch = mW(cSt : cEnd); + + % Extend the sixteen 32-bit words into eighty 32-bit words + for j = 17 : 80 + ch(j) = ch(j - 3); + ch(j) = bitxor(ch(j), ch(j - 8)); + ch(j) = bitxor(ch(j), ch(j - 14)); + ch(j) = bitxor(ch(j), ch(j - 16)); + ch(j) = bitrotate(ch(j), 1); + end + + % Initialize hash value for this ch + a = h0; + b = h1; + c = h2; + d = h3; + e = h4; + + % Main loop + for i = 1 : 80 + if(i >= 1 && i <= 20) + f = bitor(bitand(b, c), bitand(bitcmp(b), d)); + k = uint32(1518500249); + elseif(i >= 21 && i <= 40) + f = bitxor(bitxor(b, c), d); + k = uint32(1859775393); + elseif(i >= 41 && i <= 60) + f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d)); + k = uint32(2400959708); + elseif(i >= 61 && i <= 80) + f = bitxor(bitxor(b, c), d); + k = uint32(3395469782); + end + + t = bitrotate(a, 5); + t = bitadd(t, f); + t = bitadd(t, e); + t = bitadd(t, k); + t = bitadd(t, ch(i)); + e = d; + d = c; + c = bitrotate(b, 30); + b = a; + a = t; + + end + h0 = bitadd(h0, a); + h1 = bitadd(h1, b); + h2 = bitadd(h2, c); + h3 = bitadd(h3, d); + h4 = bitadd(h4, e); + + end + + hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]); + + hash = lower(hash); + +end + +function ret = bitadd(iA, iB) + ret = double(iA) + double(iB); + ret = bitset(ret, 33, 0); + ret = uint32(ret); +end + +function ret = bitrotate(iA, places) + t = bitshift(iA, places - 32); + ret = bitshift(iA, places); + ret = bitor(ret, t); +end + +% =========================== Base64 Encoder ============================ +% Thanks to Peter John Acklam +% + +function y = base64encode(x, eol) +%BASE64ENCODE Perform base64 encoding on a string. +% +% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending +% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10). +% The returned encoded string is broken into lines of no more than 76 +% characters each, and each line will end with EOL unless it is empty. Let +% EOL be empty if you do not want the encoded string broken into lines. +% +% STR and EOL don't have to be strings (i.e., char arrays). The only +% requirement is that they are vectors containing values in the range 0-255. +% +% This function may be used to encode strings into the Base64 encoding +% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The +% Base64 encoding is designed to represent arbitrary sequences of octets in a +% form that need not be humanly readable. A 65-character subset +% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per +% printable character. +% +% Examples +% -------- +% +% If you want to encode a large file, you should encode it in chunks that are +% a multiple of 57 bytes. This ensures that the base64 lines line up and +% that you do not end up with padding in the middle. 57 bytes of data fills +% one complete base64 line (76 == 57*4/3): +% +% If ifid and ofid are two file identifiers opened for reading and writing, +% respectively, then you can base64 encode the data with +% +% while ~feof(ifid) +% fwrite(ofid, base64encode(fread(ifid, 60*57))); +% end +% +% or, if you have enough memory, +% +% fwrite(ofid, base64encode(fread(ifid))); +% +% See also BASE64DECODE. + +% Author: Peter John Acklam +% Time-stamp: 2004-02-03 21:36:56 +0100 +% E-mail: pjacklam@online.no +% URL: http://home.online.no/~pjacklam + + if isnumeric(x) + x = num2str(x); + end + + % make sure we have the EOL value + if nargin < 2 + eol = sprintf('\n'); + else + if sum(size(eol) > 1) > 1 + error('EOL must be a vector.'); + end + if any(eol(:) > 255) + error('EOL can not contain values larger than 255.'); + end + end + + if sum(size(x) > 1) > 1 + error('STR must be a vector.'); + end + + x = uint8(x); + eol = uint8(eol); + + ndbytes = length(x); % number of decoded bytes + nchunks = ceil(ndbytes / 3); % number of chunks/groups + nebytes = 4 * nchunks; % number of encoded bytes + + % add padding if necessary, to make the length of x a multiple of 3 + if rem(ndbytes, 3) + x(end+1 : 3*nchunks) = 0; + end + + x = reshape(x, [3, nchunks]); % reshape the data + y = repmat(uint8(0), 4, nchunks); % for the encoded data + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Split up every 3 bytes into 4 pieces + % + % aaaaaabb bbbbcccc ccdddddd + % + % to form + % + % 00aaaaaa 00bbbbbb 00cccccc 00dddddd + % + y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:) + + y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:) + y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:) + + y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:) + y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:) + + y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:) + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Now perform the following mapping + % + % 0 - 25 -> A-Z + % 26 - 51 -> a-z + % 52 - 61 -> 0-9 + % 62 -> + + % 63 -> / + % + % We could use a mapping vector like + % + % ['A':'Z', 'a':'z', '0':'9', '+/'] + % + % but that would require an index vector of class double. + % + z = repmat(uint8(0), size(y)); + i = y <= 25; z(i) = 'A' + double(y(i)); + i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i)); + i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i)); + i = y == 62; z(i) = '+'; + i = y == 63; z(i) = '/'; + y = z; + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Add padding if necessary. + % + npbytes = 3 * nchunks - ndbytes; % number of padding bytes + if npbytes + y(end-npbytes+1 : end) = '='; % '=' is used for padding + end + + if isempty(eol) + + % reshape to a row vector + y = reshape(y, [1, nebytes]); + + else + + nlines = ceil(nebytes / 76); % number of lines + neolbytes = length(eol); % number of bytes in eol string + + % pad data so it becomes a multiple of 76 elements + y = [y(:) ; zeros(76 * nlines - numel(y), 1)]; + y(nebytes + 1 : 76 * nlines) = 0; + y = reshape(y, 76, nlines); + + % insert eol strings + eol = eol(:); + y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines)); + + % remove padding, but keep the last eol string + m = nebytes + neolbytes * (nlines - 1); + n = (76+neolbytes)*nlines - neolbytes; + y(m+1 : n) = ''; + + % extract and reshape to row vector + y = reshape(y, 1, m+neolbytes); + + end + + % output is a character array + y = char(y); + +end diff --git a/ex5/ex5/submitWeb.m b/ex5/ex5/submitWeb.m new file mode 100644 index 0000000..e429365 --- /dev/null +++ b/ex5/ex5/submitWeb.m @@ -0,0 +1,20 @@ +% submitWeb Creates files from your code and output for web submission. +% +% If the submit function does not work for you, use the web-submission mechanism. +% Call this function to produce a file for the part you wish to submit. Then, +% submit the file to the class servers using the "Web Submission" button on the +% Programming Exercises page on the course website. +% +% You should call this function without arguments (submitWeb), to receive +% an interactive prompt for submission; optionally you can call it with the partID +% if you so wish. Make sure your working directory is set to the directory +% containing the submitWeb.m file and your assignment files. + +function submitWeb(partId) + if ~exist('partId', 'var') || isempty(partId) + partId = []; + end + + submit(partId, 1); +end + diff --git a/ex5/ex5/trainLinearReg.m b/ex5/ex5/trainLinearReg.m new file mode 100644 index 0000000..eb89860 --- /dev/null +++ b/ex5/ex5/trainLinearReg.m @@ -0,0 +1,21 @@ +function [theta] = trainLinearReg(X, y, lambda) +%TRAINLINEARREG Trains linear regression given a dataset (X, y) and a +%regularization parameter lambda +% [theta] = TRAINLINEARREG (X, y, lambda) trains linear regression using +% the dataset (X, y) and regularization parameter lambda. Returns the +% trained parameters theta. +% + +% Initialize Theta +initial_theta = zeros(size(X, 2), 1); + +% Create "short hand" for the cost function to be minimized +costFunction = @(t) linearRegCostFunction(X, y, t, lambda); + +% Now, costFunction is a function that takes in only one argument +options = optimset('MaxIter', 200, 'GradObj', 'on'); + +% Minimize using fmincg +theta = fmincg(costFunction, initial_theta, options); + +end diff --git a/ex5/ex5/validationCurve.m b/ex5/ex5/validationCurve.m new file mode 100644 index 0000000..1eb121f --- /dev/null +++ b/ex5/ex5/validationCurve.m @@ -0,0 +1,58 @@ +function [lambda_vec, error_train, error_val] = ... + validationCurve(X, y, Xval, yval) +%VALIDATIONCURVE Generate the train and validation errors needed to +%plot a validation curve that we can use to select lambda +% [lambda_vec, error_train, error_val] = ... +% VALIDATIONCURVE(X, y, Xval, yval) returns the train +% and validation errors (in error_train, error_val) +% for different values of lambda. You are given the training set (X, +% y) and validation set (Xval, yval). +% + +% Selected values of lambda (you should not change this) +lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]'; + +% You need to return these variables correctly. +error_train = zeros(length(lambda_vec), 1); +error_val = zeros(length(lambda_vec), 1); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Fill in this function to return training errors in +% error_train and the validation errors in error_val. The +% vector lambda_vec contains the different lambda parameters +% to use for each calculation of the errors, i.e, +% error_train(i), and error_val(i) should give +% you the errors obtained after training with +% lambda = lambda_vec(i) +% +% Note: You can loop over lambda_vec with the following: +% +% for i = 1:length(lambda_vec) +% lambda = lambda_vec(i); +% % Compute train / val errors when training linear +% % regression with regularization parameter lambda +% % You should store the result in error_train(i) +% % and error_val(i) +% .... +% +% end +% +% +for i = 1:length(lambda_vec) + lambda = lambda_vec(i); + theta = trainLinearReg(X,y,lambda); + error_train(i) = linearRegCostFunction(X,y,theta,0); + error_val(i) = linearRegCostFunction(Xval,yval,theta,0); +end + + + + + + + + + +% ========================================================================= + +end diff --git a/ex6/.DS_Store b/ex6/.DS_Store new file mode 100644 index 0000000..32f4bd1 Binary files /dev/null and b/ex6/.DS_Store differ diff --git a/ex6/ex6.pdf b/ex6/ex6.pdf new file mode 100644 index 0000000..674e200 Binary files /dev/null and b/ex6/ex6.pdf differ diff --git a/ex6/ex6/.DS_Store b/ex6/ex6/.DS_Store new file mode 100644 index 0000000..d507cd2 Binary files /dev/null and b/ex6/ex6/.DS_Store differ diff --git a/ex6/ex6/dataset3Params.m b/ex6/ex6/dataset3Params.m new file mode 100644 index 0000000..c665593 --- /dev/null +++ b/ex6/ex6/dataset3Params.m @@ -0,0 +1,77 @@ +function [C, sigma] = dataset3Params(X, y, Xval, yval) +%EX6PARAMS returns your choice of C and sigma for Part 3 of the exercise +%where you select the optimal (C, sigma) learning parameters to use for SVM +%with RBF kernel +% [C, sigma] = EX6PARAMS(X, y, Xval, yval) returns your choice of C and +% sigma. You should complete this function to return the optimal C and +% sigma based on a cross-validation set. +% + +% You need to return the following variables correctly. +C = 1; +sigma = 0.3; + +% ====================== YOUR CODE HERE ====================== +% Instructions: Fill in this function to return the optimal C and sigma +% learning parameters found using the cross validation set. +% You can use svmPredict to predict the labels on the cross +% validation set. For example, +% predictions = svmPredict(model, Xval); +% will return the predictions on the cross validation set. +% +% Note: You can compute the prediction error using +% mean(double(predictions ~= yval)) +% +%CVec = [0.01; 0.03; 0.1; 0.3; 1; 3; 10; 30]; +%SVec = [0.01; 0.03; 0.1; 0.3; 1; 3; 10; 30]; +%bestC = 1; +%bestSigma = 0.3; +%bestError = 99999999; +%errorMat = zeros(length(CVec),length(SVec)); + +%for cIndx = 1:length(CVec) +% for sIndx = 1: length(SVec); +% C = CVec(cIndx); +% model = svmTrain(X,y,C,@(x1,x2) gaussianKernel(x1,x2,sigma)); +% pred = svmPredict(model, Xval); +% error = mean(double(pred ~= yval)); +% errorMat(cIndx, sIndx) = error; +% if(error < bestError) +% bestC = C; +% bestSigma = sigma; +% bestError = error; +% end +% end +%end + +%C = bestC; +%sigma = bestSigma; +values = [0.01 0.03 0.1 0.3 1 3 10 30]'; + +% all predictions (in a matrix) +predictions = zeros(size(values)); + +for i=1:size(values,1), %C + for j=1:size(values,1), %sigma + + model = svmTrain(X, y, values(i), @(x1, x2) gaussianKernel(x1, x2, values(j))); + predict = svmPredict(model,Xval); + predictions(i,j) = mean(double(predict ~= yval)); + + end; +end; + +[colmin, rowindex] = min(predictions); +[minerror, index] = min(colmin); + +C = values(rowindex(index)); +sigma = values(index); + + + + + + +% ========================================================================= + +end diff --git a/ex6/ex6/emailFeatures.m b/ex6/ex6/emailFeatures.m new file mode 100644 index 0000000..b70a5a3 --- /dev/null +++ b/ex6/ex6/emailFeatures.m @@ -0,0 +1,63 @@ +function x = emailFeatures(word_indices) +%EMAILFEATURES takes in a word_indices vector and produces a feature vector +%from the word indices +% x = EMAILFEATURES(word_indices) takes in a word_indices vector and +% produces a feature vector from the word indices. + +% Total number of words in the dictionary +n = 1899; + +% You need to return the following variables correctly. +x = zeros(n, 1); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Fill in this function to return a feature vector for the +% given email (word_indices). To help make it easier to +% process the emails, we have have already pre-processed each +% email and converted each word in the email into an index in +% a fixed dictionary (of 1899 words). The variable +% word_indices contains the list of indices of the words +% which occur in one email. +% +% Concretely, if an email has the text: +% +% The quick brown fox jumped over the lazy dog. +% +% Then, the word_indices vector for this text might look +% like: +% +% 60 100 33 44 10 53 60 58 5 +% +% where, we have mapped each word onto a number, for example: +% +% the -- 60 +% quick -- 100 +% ... +% +% (note: the above numbers are just an example and are not the +% actual mappings). +% +% Your task is take one such word_indices vector and construct +% a binary feature vector that indicates whether a particular +% word occurs in the email. That is, x(i) = 1 when word i +% is present in the email. Concretely, if the word 'the' (say, +% index 60) appears in the email, then x(60) = 1. The feature +% vector should look like: +% +% x = [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..]; +% +% + +for i=1:size(word_indices), + x(word_indices(i)) = 1; +end; + + + + + + +% ========================================================================= + + +end diff --git a/ex6/ex6/emailSample1.txt b/ex6/ex6/emailSample1.txt new file mode 100644 index 0000000..eac52a3 --- /dev/null +++ b/ex6/ex6/emailSample1.txt @@ -0,0 +1,10 @@ +> Anyone knows how much it costs to host a web portal ? +> +Well, it depends on how many visitors you're expecting. +This can be anywhere from less than 10 bucks a month to a couple of $100. +You should checkout http://www.rackspace.com/ or perhaps Amazon EC2 +if youre running something big.. + +To unsubscribe yourself from this mailing list, send an email to: +groupname-unsubscribe@egroups.com + diff --git a/ex6/ex6/emailSample2.txt b/ex6/ex6/emailSample2.txt new file mode 100644 index 0000000..e47acda --- /dev/null +++ b/ex6/ex6/emailSample2.txt @@ -0,0 +1,34 @@ +Folks, + +my first time posting - have a bit of Unix experience, but am new to Linux. + + +Just got a new PC at home - Dell box with Windows XP. Added a second hard disk +for Linux. Partitioned the disk and have installed Suse 7.2 from CD, which went +fine except it didn't pick up my monitor. + +I have a Dell branded E151FPp 15" LCD flat panel monitor and a nVidia GeForce4 +Ti4200 video card, both of which are probably too new to feature in Suse's default +set. I downloaded a driver from the nVidia website and installed it using RPM. +Then I ran Sax2 (as was recommended in some postings I found on the net), but +it still doesn't feature my video card in the available list. What next? + +Another problem. I have a Dell branded keyboard and if I hit Caps-Lock twice, +the whole machine crashes (in Linux, not Windows) - even the on/off switch is +inactive, leaving me to reach for the power cable instead. + +If anyone can help me in any way with these probs., I'd be really grateful - +I've searched the 'net but have run out of ideas. + +Or should I be going for a different version of Linux such as RedHat? Opinions +welcome. + +Thanks a lot, +Peter + +-- +Irish Linux Users' Group: ilug@linux.ie +http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information. +List maintainer: listmaster@linux.ie + + diff --git a/ex6/ex6/ex6.m b/ex6/ex6/ex6.m new file mode 100644 index 0000000..a48e060 --- /dev/null +++ b/ex6/ex6/ex6.m @@ -0,0 +1,150 @@ +%% Machine Learning Online Class +% Exercise 6 | Support Vector Machines +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% exercise. You will need to complete the following functions: +% +% gaussianKernel.m +% dataset3Params.m +% processEmail.m +% emailFeatures.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% =============== Part 1: Loading and Visualizing Data ================ +% We start the exercise by first loading and visualizing the dataset. +% The following code will load the dataset into your environment and plot +% the data. +% + +fprintf('Loading and Visualizing Data ...\n') + +% Load from ex6data1: +% You will have X, y in your environment +load('ex6data1.mat'); + +% Plot training data +plotData(X, y); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ==================== Part 2: Training Linear SVM ==================== +% The following code will train a linear SVM on the dataset and plot the +% decision boundary learned. +% + +% Load from ex6data1: +% You will have X, y in your environment +load('ex6data1.mat'); + +fprintf('\nTraining Linear SVM ...\n') + +% You should try to change the C value below and see how the decision +% boundary varies (e.g., try C = 1000) +C = 1; +model = svmTrain(X, y, C, @linearKernel, 1e-3, 20); +visualizeBoundaryLinear(X, y, model); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =============== Part 3: Implementing Gaussian Kernel =============== +% You will now implement the Gaussian kernel to use +% with the SVM. You should complete the code in gaussianKernel.m +% +fprintf('\nEvaluating the Gaussian Kernel ...\n') + +x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2; +sim = gaussianKernel(x1, x2, sigma); + +fprintf(['Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 0.5 :' ... + '\n\t%f\n(this value should be about 0.324652)\n'], sim); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =============== Part 4: Visualizing Dataset 2 ================ +% The following code will load the next dataset into your environment and +% plot the data. +% + +fprintf('Loading and Visualizing Data ...\n') + +% Load from ex6data2: +% You will have X, y in your environment +load('ex6data2.mat'); + +% Plot training data +plotData(X, y); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ========== +% After you have implemented the kernel, we can now use it to train the +% SVM classifier. +% +fprintf('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...\n'); + +% Load from ex6data2: +% You will have X, y in your environment +load('ex6data2.mat'); + +% SVM Parameters +C = 1; sigma = 0.1; + +% We set the tolerance and max_passes lower here so that the code will run +% faster. However, in practice, you will want to run the training to +% convergence. +model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); +visualizeBoundary(X, y, model); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =============== Part 6: Visualizing Dataset 3 ================ +% The following code will load the next dataset into your environment and +% plot the data. +% + +fprintf('Loading and Visualizing Data ...\n') + +% Load from ex6data3: +% You will have X, y in your environment +load('ex6data3.mat'); + +% Plot training data +plotData(X, y); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ========== + +% This is a different dataset that you can use to experiment with. Try +% different values of C and sigma here. +% + +% Load from ex6data3: +% You will have X, y in your environment +load('ex6data3.mat'); + +% Try different SVM Parameters here +[C, sigma] = dataset3Params(X, y, Xval, yval); + +% Train the SVM +model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma)); +visualizeBoundary(X, y, model); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + diff --git a/ex6/ex6/ex6_spam.m b/ex6/ex6/ex6_spam.m new file mode 100644 index 0000000..479848e --- /dev/null +++ b/ex6/ex6/ex6_spam.m @@ -0,0 +1,138 @@ +%% Machine Learning Online Class +% Exercise 6 | Spam Classification with SVMs +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% exercise. You will need to complete the following functions: +% +% gaussianKernel.m +% dataset3Params.m +% processEmail.m +% emailFeatures.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% ==================== Part 1: Email Preprocessing ==================== +% To use an SVM to classify emails into Spam v.s. Non-Spam, you first need +% to convert each email into a vector of features. In this part, you will +% implement the preprocessing steps for each email. You should +% complete the code in processEmail.m to produce a word indices vector +% for a given email. + +fprintf('\nPreprocessing sample email (emailSample1.txt)\n'); + +% Extract Features +file_contents = readFile('emailSample1.txt'); +word_indices = processEmail(file_contents); + +% Print Stats +fprintf('Word Indices: \n'); +fprintf(' %d', word_indices); +fprintf('\n\n'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ==================== Part 2: Feature Extraction ==================== +% Now, you will convert each email into a vector of features in R^n. +% You should complete the code in emailFeatures.m to produce a feature +% vector for a given email. + +fprintf('\nExtracting features from sample email (emailSample1.txt)\n'); + +% Extract Features +file_contents = readFile('emailSample1.txt'); +word_indices = processEmail(file_contents); +features = emailFeatures(word_indices); + +% Print Stats +fprintf('Length of feature vector: %d\n', length(features)); +fprintf('Number of non-zero entries: %d\n', sum(features > 0)); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =========== Part 3: Train Linear SVM for Spam Classification ======== +% In this section, you will train a linear classifier to determine if an +% email is Spam or Not-Spam. + +% Load the Spam Email dataset +% You will have X, y in your environment +load('spamTrain.mat'); + +fprintf('\nTraining Linear SVM (Spam Classification)\n') +fprintf('(this may take 1 to 2 minutes) ...\n') + +C = 0.1; +model = svmTrain(X, y, C, @linearKernel); + +p = svmPredict(model, X); + +fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100); + +%% =================== Part 4: Test Spam Classification ================ +% After training the classifier, we can evaluate it on a test set. We have +% included a test set in spamTest.mat + +% Load the test dataset +% You will have Xtest, ytest in your environment +load('spamTest.mat'); + +fprintf('\nEvaluating the trained Linear SVM on a test set ...\n') + +p = svmPredict(model, Xtest); + +fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100); +pause; + + +%% ================= Part 5: Top Predictors of Spam ==================== +% Since the model we are training is a linear SVM, we can inspect the +% weights learned by the model to understand better how it is determining +% whether an email is spam or not. The following code finds the words with +% the highest weights in the classifier. Informally, the classifier +% 'thinks' that these words are the most likely indicators of spam. +% + +% Sort the weights and obtin the vocabulary list +[weight, idx] = sort(model.w, 'descend'); +vocabList = getVocabList(); + +fprintf('\nTop predictors of spam: \n'); +for i = 1:15 + fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i)); +end + +fprintf('\n\n'); +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + +%% =================== Part 6: Try Your Own Emails ===================== +% Now that you've trained the spam classifier, you can use it on your own +% emails! In the starter code, we have included spamSample1.txt, +% spamSample2.txt, emailSample1.txt and emailSample2.txt as examples. +% The following code reads in one of these emails and then uses your +% learned SVM classifier to determine whether the email is Spam or +% Not Spam + +% Set the file to be read in (change this to spamSample2.txt, +% emailSample1.txt or emailSample2.txt to see different predictions on +% different emails types). Try your own emails as well! +filename = 'spamSample1.txt'; + +% Read and predict +file_contents = readFile(filename); +word_indices = processEmail(file_contents); +x = emailFeatures(word_indices); +p = svmPredict(model, x); + +fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p); +fprintf('(1 indicates spam, 0 indicates not spam)\n\n'); + diff --git a/ex6/ex6/ex6data1.mat b/ex6/ex6/ex6data1.mat new file mode 100644 index 0000000..ae0d2aa Binary files /dev/null and b/ex6/ex6/ex6data1.mat differ diff --git a/ex6/ex6/ex6data2.mat b/ex6/ex6/ex6data2.mat new file mode 100644 index 0000000..c6ad661 Binary files /dev/null and b/ex6/ex6/ex6data2.mat differ diff --git a/ex6/ex6/ex6data3.mat b/ex6/ex6/ex6data3.mat new file mode 100644 index 0000000..a0441ac Binary files /dev/null and b/ex6/ex6/ex6data3.mat differ diff --git a/ex6/ex6/gaussianKernel.m b/ex6/ex6/gaussianKernel.m new file mode 100644 index 0000000..14179cd --- /dev/null +++ b/ex6/ex6/gaussianKernel.m @@ -0,0 +1,26 @@ +function sim = gaussianKernel(x1, x2, sigma) +%RBFKERNEL returns a radial basis function kernel between x1 and x2 +% sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2 +% and returns the value in sim + +% Ensure that x1 and x2 are column vectors +x1 = x1(:); x2 = x2(:); + +% You need to return the following variables correctly. +sim = 0; + +% ====================== YOUR CODE HERE ====================== +% Instructions: Fill in this function to return the similarity between x1 +% and x2 computed using a Gaussian kernel with bandwidth +% sigma +% +% + +sim = exp(-((x1 - x2)'*(x1 - x2))/(2 * sigma^2)); + + + + +% ============================================================= + +end diff --git a/ex6/ex6/getVocabList.m b/ex6/ex6/getVocabList.m new file mode 100644 index 0000000..0b5f427 --- /dev/null +++ b/ex6/ex6/getVocabList.m @@ -0,0 +1,25 @@ +function vocabList = getVocabList() +%GETVOCABLIST reads the fixed vocabulary list in vocab.txt and returns a +%cell array of the words +% vocabList = GETVOCABLIST() reads the fixed vocabulary list in vocab.txt +% and returns a cell array of the words in vocabList. + + +%% Read the fixed vocabulary list +fid = fopen('vocab.txt'); + +% Store all dictionary words in cell array vocab{} +n = 1899; % Total number of words in the dictionary + +% For ease of implementation, we use a struct to map the strings => integers +% In practice, you'll want to use some form of hashmap +vocabList = cell(n, 1); +for i = 1:n + % Word Index (can ignore since it will be = i) + fscanf(fid, '%d', 1); + % Actual Word + vocabList{i} = fscanf(fid, '%s', 1); +end +fclose(fid); + +end diff --git a/ex6/ex6/linearKernel.m b/ex6/ex6/linearKernel.m new file mode 100644 index 0000000..11fd759 --- /dev/null +++ b/ex6/ex6/linearKernel.m @@ -0,0 +1,12 @@ +function sim = linearKernel(x1, x2) +%LINEARKERNEL returns a linear kernel between x1 and x2 +% sim = linearKernel(x1, x2) returns a linear kernel between x1 and x2 +% and returns the value in sim + +% Ensure that x1 and x2 are column vectors +x1 = x1(:); x2 = x2(:); + +% Compute the kernel +sim = x1' * x2; % dot product + +end \ No newline at end of file diff --git a/ex6/ex6/ml_login_data.mat b/ex6/ex6/ml_login_data.mat new file mode 100644 index 0000000..6973aeb --- /dev/null +++ b/ex6/ex6/ml_login_data.mat @@ -0,0 +1,15 @@ +# Created by Octave 3.4.0, Sat Oct 13 01:19:50 2012 CDT +# name: login +# type: sq_string +# elements: 1 +# length: 21 +zhang349@illinois.edu + + +# name: password +# type: sq_string +# elements: 1 +# length: 10 +3pBDppG8Gd + + diff --git a/ex6/ex6/octave-core b/ex6/ex6/octave-core new file mode 100644 index 0000000..f496f67 Binary files /dev/null and b/ex6/ex6/octave-core differ diff --git a/ex6/ex6/plotData.m b/ex6/ex6/plotData.m new file mode 100644 index 0000000..795cc16 --- /dev/null +++ b/ex6/ex6/plotData.m @@ -0,0 +1,17 @@ +function plotData(X, y) +%PLOTDATA Plots the data points X and y into a new figure +% PLOTDATA(x,y) plots the data points with + for the positive examples +% and o for the negative examples. X is assumed to be a Mx2 matrix. +% +% Note: This was slightly modified such that it expects y = 1 or y = 0 + +% Find Indices of Positive and Negative Examples +pos = find(y == 1); neg = find(y == 0); + +% Plot Examples +plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 1, 'MarkerSize', 7) +hold on; +plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7) +hold off; + +end diff --git a/ex6/ex6/porterStemmer.m b/ex6/ex6/porterStemmer.m new file mode 100644 index 0000000..6da6fd1 --- /dev/null +++ b/ex6/ex6/porterStemmer.m @@ -0,0 +1,385 @@ +function stem = porterStemmer(inString) +% Applies the Porter Stemming algorithm as presented in the following +% paper: +% Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, +% no. 3, pp 130-137 + +% Original code modeled after the C version provided at: +% http://www.tartarus.org/~martin/PorterStemmer/c.txt + +% The main part of the stemming algorithm starts here. b is an array of +% characters, holding the word to be stemmed. The letters are in b[k0], +% b[k0+1] ending at b[k]. In fact k0 = 1 in this demo program (since +% matlab begins indexing by 1 instead of 0). k is readjusted downwards as +% the stemming progresses. Zero termination is not in fact used in the +% algorithm. + +% To call this function, use the string to be stemmed as the input +% argument. This function returns the stemmed word as a string. + +% Lower-case string +inString = lower(inString); + +global j; +b = inString; +k = length(b); +k0 = 1; +j = k; + + + +% With this if statement, strings of length 1 or 2 don't go through the +% stemming process. Remove this conditional to match the published +% algorithm. +stem = b; +if k > 2 + % Output displays per step are commented out. + %disp(sprintf('Word to stem: %s', b)); + x = step1ab(b, k, k0); + %disp(sprintf('Steps 1A and B yield: %s', x{1})); + x = step1c(x{1}, x{2}, k0); + %disp(sprintf('Step 1C yields: %s', x{1})); + x = step2(x{1}, x{2}, k0); + %disp(sprintf('Step 2 yields: %s', x{1})); + x = step3(x{1}, x{2}, k0); + %disp(sprintf('Step 3 yields: %s', x{1})); + x = step4(x{1}, x{2}, k0); + %disp(sprintf('Step 4 yields: %s', x{1})); + x = step5(x{1}, x{2}, k0); + %disp(sprintf('Step 5 yields: %s', x{1})); + stem = x{1}; +end + +% cons(j) is TRUE <=> b[j] is a consonant. +function c = cons(i, b, k0) +c = true; +switch(b(i)) + case {'a', 'e', 'i', 'o', 'u'} + c = false; + case 'y' + if i == k0 + c = true; + else + c = ~cons(i - 1, b, k0); + end +end + +% mseq() measures the number of consonant sequences between k0 and j. If +% c is a consonant sequence and v a vowel sequence, and <..> indicates +% arbitrary presence, + +% gives 0 +% vc gives 1 +% vcvc gives 2 +% vcvcvc gives 3 +% .... +function n = measure(b, k0) +global j; +n = 0; +i = k0; +while true + if i > j + return + end + if ~cons(i, b, k0) + break; + end + i = i + 1; +end +i = i + 1; +while true + while true + if i > j + return + end + if cons(i, b, k0) + break; + end + i = i + 1; + end + i = i + 1; + n = n + 1; + while true + if i > j + return + end + if ~cons(i, b, k0) + break; + end + i = i + 1; + end + i = i + 1; +end + + +% vowelinstem() is TRUE <=> k0,...j contains a vowel +function vis = vowelinstem(b, k0) +global j; +for i = k0:j, + if ~cons(i, b, k0) + vis = true; + return + end +end +vis = false; + +%doublec(i) is TRUE <=> i,(i-1) contain a double consonant. +function dc = doublec(i, b, k0) +if i < k0+1 + dc = false; + return +end +if b(i) ~= b(i-1) + dc = false; + return +end +dc = cons(i, b, k0); + + +% cvc(j) is TRUE <=> j-2,j-1,j has the form consonant - vowel - consonant +% and also if the second c is not w,x or y. this is used when trying to +% restore an e at the end of a short word. e.g. +% +% cav(e), lov(e), hop(e), crim(e), but +% snow, box, tray. + +function c1 = cvc(i, b, k0) +if ((i < (k0+2)) || ~cons(i, b, k0) || cons(i-1, b, k0) || ~cons(i-2, b, k0)) + c1 = false; +else + if (b(i) == 'w' || b(i) == 'x' || b(i) == 'y') + c1 = false; + return + end + c1 = true; +end + +% ends(s) is TRUE <=> k0,...k ends with the string s. +function s = ends(str, b, k) +global j; +if (str(length(str)) ~= b(k)) + s = false; + return +end % tiny speed-up +if (length(str) > k) + s = false; + return +end +if strcmp(b(k-length(str)+1:k), str) + s = true; + j = k - length(str); + return +else + s = false; +end + +% setto(s) sets (j+1),...k to the characters in the string s, readjusting +% k accordingly. + +function so = setto(s, b, k) +global j; +for i = j+1:(j+length(s)) + b(i) = s(i-j); +end +if k > j+length(s) + b((j+length(s)+1):k) = ''; +end +k = length(b); +so = {b, k}; + +% rs(s) is used further down. +% [Note: possible null/value for r if rs is called] +function r = rs(str, b, k, k0) +r = {b, k}; +if measure(b, k0) > 0 + r = setto(str, b, k); +end + +% step1ab() gets rid of plurals and -ed or -ing. e.g. + +% caresses -> caress +% ponies -> poni +% ties -> ti +% caress -> caress +% cats -> cat + +% feed -> feed +% agreed -> agree +% disabled -> disable + +% matting -> mat +% mating -> mate +% meeting -> meet +% milling -> mill +% messing -> mess + +% meetings -> meet + +function s1ab = step1ab(b, k, k0) +global j; +if b(k) == 's' + if ends('sses', b, k) + k = k-2; + elseif ends('ies', b, k) + retVal = setto('i', b, k); + b = retVal{1}; + k = retVal{2}; + elseif (b(k-1) ~= 's') + k = k-1; + end +end +if ends('eed', b, k) + if measure(b, k0) > 0; + k = k-1; + end +elseif (ends('ed', b, k) || ends('ing', b, k)) && vowelinstem(b, k0) + k = j; + retVal = {b, k}; + if ends('at', b, k) + retVal = setto('ate', b(k0:k), k); + elseif ends('bl', b, k) + retVal = setto('ble', b(k0:k), k); + elseif ends('iz', b, k) + retVal = setto('ize', b(k0:k), k); + elseif doublec(k, b, k0) + retVal = {b, k-1}; + if b(retVal{2}) == 'l' || b(retVal{2}) == 's' || ... + b(retVal{2}) == 'z' + retVal = {retVal{1}, retVal{2}+1}; + end + elseif measure(b, k0) == 1 && cvc(k, b, k0) + retVal = setto('e', b(k0:k), k); + end + k = retVal{2}; + b = retVal{1}(k0:k); +end +j = k; +s1ab = {b(k0:k), k}; + +% step1c() turns terminal y to i when there is another vowel in the stem. +function s1c = step1c(b, k, k0) +global j; +if ends('y', b, k) && vowelinstem(b, k0) + b(k) = 'i'; +end +j = k; +s1c = {b, k}; + +% step2() maps double suffices to single ones. so -ization ( = -ize plus +% -ation) maps to -ize etc. note that the string before the suffix must give +% m() > 0. +function s2 = step2(b, k, k0) +global j; +s2 = {b, k}; +switch b(k-1) + case {'a'} + if ends('ational', b, k) s2 = rs('ate', b, k, k0); + elseif ends('tional', b, k) s2 = rs('tion', b, k, k0); end; + case {'c'} + if ends('enci', b, k) s2 = rs('ence', b, k, k0); + elseif ends('anci', b, k) s2 = rs('ance', b, k, k0); end; + case {'e'} + if ends('izer', b, k) s2 = rs('ize', b, k, k0); end; + case {'l'} + if ends('bli', b, k) s2 = rs('ble', b, k, k0); + elseif ends('alli', b, k) s2 = rs('al', b, k, k0); + elseif ends('entli', b, k) s2 = rs('ent', b, k, k0); + elseif ends('eli', b, k) s2 = rs('e', b, k, k0); + elseif ends('ousli', b, k) s2 = rs('ous', b, k, k0); end; + case {'o'} + if ends('ization', b, k) s2 = rs('ize', b, k, k0); + elseif ends('ation', b, k) s2 = rs('ate', b, k, k0); + elseif ends('ator', b, k) s2 = rs('ate', b, k, k0); end; + case {'s'} + if ends('alism', b, k) s2 = rs('al', b, k, k0); + elseif ends('iveness', b, k) s2 = rs('ive', b, k, k0); + elseif ends('fulness', b, k) s2 = rs('ful', b, k, k0); + elseif ends('ousness', b, k) s2 = rs('ous', b, k, k0); end; + case {'t'} + if ends('aliti', b, k) s2 = rs('al', b, k, k0); + elseif ends('iviti', b, k) s2 = rs('ive', b, k, k0); + elseif ends('biliti', b, k) s2 = rs('ble', b, k, k0); end; + case {'g'} + if ends('logi', b, k) s2 = rs('log', b, k, k0); end; +end +j = s2{2}; + +% step3() deals with -ic-, -full, -ness etc. similar strategy to step2. +function s3 = step3(b, k, k0) +global j; +s3 = {b, k}; +switch b(k) + case {'e'} + if ends('icate', b, k) s3 = rs('ic', b, k, k0); + elseif ends('ative', b, k) s3 = rs('', b, k, k0); + elseif ends('alize', b, k) s3 = rs('al', b, k, k0); end; + case {'i'} + if ends('iciti', b, k) s3 = rs('ic', b, k, k0); end; + case {'l'} + if ends('ical', b, k) s3 = rs('ic', b, k, k0); + elseif ends('ful', b, k) s3 = rs('', b, k, k0); end; + case {'s'} + if ends('ness', b, k) s3 = rs('', b, k, k0); end; +end +j = s3{2}; + +% step4() takes off -ant, -ence etc., in context vcvc. +function s4 = step4(b, k, k0) +global j; +switch b(k-1) + case {'a'} + if ends('al', b, k) end; + case {'c'} + if ends('ance', b, k) + elseif ends('ence', b, k) end; + case {'e'} + if ends('er', b, k) end; + case {'i'} + if ends('ic', b, k) end; + case {'l'} + if ends('able', b, k) + elseif ends('ible', b, k) end; + case {'n'} + if ends('ant', b, k) + elseif ends('ement', b, k) + elseif ends('ment', b, k) + elseif ends('ent', b, k) end; + case {'o'} + if ends('ion', b, k) + if j == 0 + elseif ~(strcmp(b(j),'s') || strcmp(b(j),'t')) + j = k; + end + elseif ends('ou', b, k) end; + case {'s'} + if ends('ism', b, k) end; + case {'t'} + if ends('ate', b, k) + elseif ends('iti', b, k) end; + case {'u'} + if ends('ous', b, k) end; + case {'v'} + if ends('ive', b, k) end; + case {'z'} + if ends('ize', b, k) end; +end +if measure(b, k0) > 1 + s4 = {b(k0:j), j}; +else + s4 = {b(k0:k), k}; +end + +% step5() removes a final -e if m() > 1, and changes -ll to -l if m() > 1. +function s5 = step5(b, k, k0) +global j; +j = k; +if b(k) == 'e' + a = measure(b, k0); + if (a > 1) || ((a == 1) && ~cvc(k-1, b, k0)) + k = k-1; + end +end +if (b(k) == 'l') && doublec(k, b, k0) && (measure(b, k0) > 1) + k = k-1; +end +s5 = {b(k0:k), k}; diff --git a/ex6/ex6/processEmail.m b/ex6/ex6/processEmail.m new file mode 100644 index 0000000..8588f3c --- /dev/null +++ b/ex6/ex6/processEmail.m @@ -0,0 +1,127 @@ +function word_indices = processEmail(email_contents) +%PROCESSEMAIL preprocesses a the body of an email and +%returns a list of word_indices +% word_indices = PROCESSEMAIL(email_contents) preprocesses +% the body of an email and returns a list of indices of the +% words contained in the email. +% + +% Load Vocabulary +vocabList = getVocabList(); + +% Init return value +word_indices = []; + +% ========================== Preprocess Email =========================== + +% Find the Headers ( \n\n and remove ) +% Uncomment the following lines if you are working with raw emails with the +% full headers + +% hdrstart = strfind(email_contents, ([char(10) char(10)])); +% email_contents = email_contents(hdrstart(1):end); + +% Lower case +email_contents = lower(email_contents); + +% Strip all HTML +% Looks for any expression that starts with < and ends with > and replace +% and does not have any < or > in the tag it with a space +email_contents = regexprep(email_contents, '<[^<>]+>', ' '); + +% Handle Numbers +% Look for one or more characters between 0-9 +email_contents = regexprep(email_contents, '[0-9]+', 'number'); + +% Handle URLS +% Look for strings starting with http:// or https:// +email_contents = regexprep(email_contents, ... + '(http|https)://[^\s]*', 'httpaddr'); + +% Handle Email Addresses +% Look for strings with @ in the middle +email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr'); + +% Handle $ sign +email_contents = regexprep(email_contents, '[$]+', 'dollar'); + + +% ========================== Tokenize Email =========================== + +% Output the email to screen as well +fprintf('\n==== Processed Email ====\n\n'); + +% Process file +l = 0; + +while ~isempty(email_contents) + + % Tokenize and also get rid of any punctuation + [str, email_contents] = ... + strtok(email_contents, ... + [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]); + + % Remove any non alphanumeric characters + str = regexprep(str, '[^a-zA-Z0-9]', ''); + + % Stem the word + % (the porterStemmer sometimes has issues, so we use a try catch block) + try str = porterStemmer(strtrim(str)); + catch str = ''; continue; + end; + + % Skip the word if it is too short + if length(str) < 1 + continue; + end + + % Look up the word in the dictionary and add to word_indices if + % found + % ====================== YOUR CODE HERE ====================== + % Instructions: Fill in this function to add the index of str to + % word_indices if it is in the vocabulary. At this point + % of the code, you have a stemmed word from the email in + % the variable str. You should look up str in the + % vocabulary list (vocabList). If a match exists, you + % should add the index of the word to the word_indices + % vector. Concretely, if str = 'action', then you should + % look up the vocabulary list to find where in vocabList + % 'action' appears. For example, if vocabList{18} = + % 'action', then, you should add 18 to the word_indices + % vector (e.g., word_indices = [word_indices ; 18]; ). + % + % Note: vocabList{idx} returns a the word with index idx in the + % vocabulary list. + % + % Note: You can use strcmp(str1, str2) to compare two strings (str1 and + % str2). It will return 1 only if the two strings are equivalent. + % + %[contains, index] = ismember(str, vocabList); + %if(contains) + % word_indices = [word_indices; index]; + %end; + for i = 1:length(vocabList) + if(strcmp(vocabList{i}, str)) + word_indices = [word_indices; i]; + end + end + + + + % ============================================================= + + + % Print to screen, ensuring that the output lines are not too long + if (l + length(str) + 1) > 78 + fprintf('\n'); + l = 0; + end + fprintf('%s ', str); + l = l + length(str) + 1; + +end + +% Print footer +fprintf('\n\n=========================\n'); + +end diff --git a/ex6/ex6/readFile.m b/ex6/ex6/readFile.m new file mode 100644 index 0000000..08686d6 --- /dev/null +++ b/ex6/ex6/readFile.m @@ -0,0 +1,18 @@ +function file_contents = readFile(filename) +%READFILE reads a file and returns its entire contents +% file_contents = READFILE(filename) reads a file and returns its entire +% contents in file_contents +% + +% Load File +fid = fopen(filename); +if fid + file_contents = fscanf(fid, '%c', inf); + fclose(fid); +else + file_contents = ''; + fprintf('Unable to open %s\n', filename); +end + +end + diff --git a/ex6/ex6/spamSample1.txt b/ex6/ex6/spamSample1.txt new file mode 100644 index 0000000..bab0ca2 --- /dev/null +++ b/ex6/ex6/spamSample1.txt @@ -0,0 +1,42 @@ +Do You Want To Make $1000 Or More Per Week? + + + +If you are a motivated and qualified individual - I +will personally demonstrate to you a system that will +make you $1,000 per week or more! This is NOT mlm. + + + +Call our 24 hour pre-recorded number to get the +details. + + + +000-456-789 + + + +I need people who want to make serious money. Make +the call and get the facts. + +Invest 2 minutes in yourself now! + + + +000-456-789 + + + +Looking forward to your call and I will introduce you +to people like yourself who +are currently making $10,000 plus per week! + + + +000-456-789 + + + +3484lJGv6-241lEaN9080lRmS6-271WxHo7524qiyT5-438rjUv5615hQcf0-662eiDB9057dMtVl72 + diff --git a/ex6/ex6/spamSample2.txt b/ex6/ex6/spamSample2.txt new file mode 100644 index 0000000..f8e8fce --- /dev/null +++ b/ex6/ex6/spamSample2.txt @@ -0,0 +1,8 @@ +Best Buy Viagra Generic Online + +Viagra 100mg x 60 Pills $125, Free Pills & Reorder Discount, Top Selling 100% Quality & Satisfaction guaranteed! + +We accept VISA, Master & E-Check Payments, 90000+ Satisfied Customers! +http://medphysitcstech.ru + + diff --git a/ex6/ex6/spamTest.mat b/ex6/ex6/spamTest.mat new file mode 100644 index 0000000..b7bf953 Binary files /dev/null and b/ex6/ex6/spamTest.mat differ diff --git a/ex6/ex6/spamTrain.mat b/ex6/ex6/spamTrain.mat new file mode 100644 index 0000000..1b9c81f Binary files /dev/null and b/ex6/ex6/spamTrain.mat differ diff --git a/ex6/ex6/submit.m b/ex6/ex6/submit.m new file mode 100644 index 0000000..147be6f --- /dev/null +++ b/ex6/ex6/submit.m @@ -0,0 +1,573 @@ +function submit(partId, webSubmit) +%SUBMIT Submit your code and output to the ml-class servers +% SUBMIT() will connect to the ml-class server and submit your solution + + fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ... + homework_id()); + if ~exist('partId', 'var') || isempty(partId) + partId = promptPart(); + end + + if ~exist('webSubmit', 'var') || isempty(webSubmit) + webSubmit = 0; % submit directly by default + end + + % Check valid partId + partNames = validParts(); + if ~isValidPartId(partId) + fprintf('!! Invalid homework part selected.\n'); + fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1); + fprintf('!! Submission Cancelled\n'); + return + end + + if ~exist('ml_login_data.mat','file') + [login password] = loginPrompt(); + save('ml_login_data.mat','login','password'); + else + load('ml_login_data.mat'); + [login password] = quickLogin(login, password); + save('ml_login_data.mat','login','password'); + end + + if isempty(login) + fprintf('!! Submission Cancelled\n'); + return + end + + fprintf('\n== Connecting to ml-class ... '); + if exist('OCTAVE_VERSION') + fflush(stdout); + end + + % Setup submit list + if partId == numel(partNames) + 1 + submitParts = 1:numel(partNames); + else + submitParts = [partId]; + end + + for s = 1:numel(submitParts) + thisPartId = submitParts(s); + if (~webSubmit) % submit directly to server + [login, ch, signature, auxstring] = getChallenge(login, thisPartId); + if isempty(login) || isempty(ch) || isempty(signature) + % Some error occured, error string in first return element. + fprintf('\n!! Error: %s\n\n', login); + return + end + + % Attempt Submission with Challenge + ch_resp = challengeResponse(login, password, ch); + + [result, str] = submitSolution(login, ch_resp, thisPartId, ... + output(thisPartId, auxstring), source(thisPartId), signature); + + partName = partNames{thisPartId}; + + fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ... + homework_id(), thisPartId, partName); + fprintf('== %s\n', strtrim(str)); + + if exist('OCTAVE_VERSION') + fflush(stdout); + end + else + [result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ... + source(thisPartId)); + result = base64encode(result); + + fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ... + homework_id(), thisPartId); + saveAsFile = input('', 's'); + if (isempty(saveAsFile)) + saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId); + end + + fid = fopen(saveAsFile, 'w'); + if (fid) + fwrite(fid, result); + fclose(fid); + fprintf('\nSaved your solutions to %s.\n\n', saveAsFile); + fprintf(['You can now submit your solutions through the web \n' ... + 'form in the programming exercises. Select the corresponding \n' ... + 'programming exercise to access the form.\n']); + + else + fprintf('Unable to save to %s\n\n', saveAsFile); + fprintf(['You can create a submission file by saving the \n' ... + 'following text in a file: (press enter to continue)\n\n']); + pause; + fprintf(result); + end + end + end +end + +% ================== CONFIGURABLES FOR EACH HOMEWORK ================== + +function id = homework_id() + id = '6'; +end + +function [partNames] = validParts() + partNames = { 'Gaussian Kernel', ... + 'Parameters (C, sigma) for Dataset 3', ... + 'Email Preprocessing' ... + 'Email Feature Extraction' ... + }; +end + +function srcs = sources() + % Separated by part + srcs = { { 'gaussianKernel.m' }, ... + { 'dataset3Params.m' }, ... + { 'processEmail.m' }, ... + { 'emailFeatures.m' } }; +end + +function out = output(partId, auxstring) + % Random Test Cases + x1 = sin(1:10)'; + x2 = cos(1:10)'; + ec = 'the quick brown fox jumped over the lazy dog'; + wi = 1 + abs(round(x1 * 1863)); + wi = [wi ; wi]; + if partId == 1 + sim = gaussianKernel(x1, x2, 2); + out = sprintf('%0.5f ', sim); + elseif partId == 2 + load('ex6data3.mat'); + [C, sigma] = dataset3Params(X, y, Xval, yval); + out = sprintf('%0.5f ', C); + out = [out sprintf('%0.5f ', sigma)]; + elseif partId == 3 + word_indices = processEmail(ec); + out = sprintf('%d ', word_indices); + elseif partId == 4 + x = emailFeatures(wi); + out = sprintf('%d ', x); + end +end + + +% ====================== SERVER CONFIGURATION =========================== + +% ***************** REMOVE -staging WHEN YOU DEPLOY ********************* +function url = site_url() + url = 'http://class.coursera.org/ml-2012-002'; +end + +function url = challenge_url() + url = [site_url() '/assignment/challenge']; +end + +function url = submit_url() + url = [site_url() '/assignment/submit']; +end + +% ========================= CHALLENGE HELPERS ========================= + +function src = source(partId) + src = ''; + src_files = sources(); + if partId <= numel(src_files) + flist = src_files{partId}; + for i = 1:numel(flist) + fid = fopen(flist{i}); + if (fid == -1) + error('Error opening %s (is it missing?)', flist{i}); + end + line = fgets(fid); + while ischar(line) + src = [src line]; + line = fgets(fid); + end + fclose(fid); + src = [src '||||||||']; + end + end +end + +function ret = isValidPartId(partId) + partNames = validParts(); + ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1); +end + +function partId = promptPart() + fprintf('== Select which part(s) to submit:\n'); + partNames = validParts(); + srcFiles = sources(); + for i = 1:numel(partNames) + fprintf('== %d) %s [', i, partNames{i}); + fprintf(' %s ', srcFiles{i}{:}); + fprintf(']\n'); + end + fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ... + numel(partNames) + 1, numel(partNames) + 1); + selPart = input('', 's'); + partId = str2num(selPart); + if ~isValidPartId(partId) + partId = -1; + end +end + +function [email,ch,signature,auxstring] = getChallenge(email, part) + str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'}); + + str = strtrim(str); + r = struct; + while(numel(str) > 0) + [f, str] = strtok (str, '|'); + [v, str] = strtok (str, '|'); + r = setfield(r, f, v); + end + + email = getfield(r, 'email_address'); + ch = getfield(r, 'challenge_key'); + signature = getfield(r, 'state'); + auxstring = getfield(r, 'challenge_aux_data'); +end + +function [result, str] = submitSolutionWeb(email, part, output, source) + + result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ... + '"email_address":"' base64encode(email, '') '",' ... + '"submission":"' base64encode(output, '') '",' ... + '"submission_aux":"' base64encode(source, '') '"' ... + '}']; + str = 'Web-submission'; +end + +function [result, str] = submitSolution(email, ch_resp, part, output, ... + source, signature) + + params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ... + 'email_address', email, ... + 'submission', base64encode(output, ''), ... + 'submission_aux', base64encode(source, ''), ... + 'challenge_response', ch_resp, ... + 'state', signature}; + + str = urlread(submit_url(), 'post', params); + + % Parse str to read for success / failure + result = 0; + +end + +% =========================== LOGIN HELPERS =========================== + +function [login password] = loginPrompt() + % Prompt for password + [login password] = basicPrompt(); + + if isempty(login) || isempty(password) + login = []; password = []; + end +end + + +function [login password] = basicPrompt() + login = input('Login (Email address): ', 's'); + password = input('Password: ', 's'); +end + +function [login password] = quickLogin(login,password) + disp(['You are currently logged in as ' login '.']); + cont_token = input('Is this you? (y/n - type n to reenter password)','s'); + if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y') + return; + else + [login password] = loginPrompt(); + end +end + +function [str] = challengeResponse(email, passwd, challenge) + str = sha1([challenge passwd]); +end + +% =============================== SHA-1 ================================ + +function hash = sha1(str) + + % Initialize variables + h0 = uint32(1732584193); + h1 = uint32(4023233417); + h2 = uint32(2562383102); + h3 = uint32(271733878); + h4 = uint32(3285377520); + + % Convert to word array + strlen = numel(str); + + % Break string into chars and append the bit 1 to the message + mC = [double(str) 128]; + mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')]; + + numB = strlen * 8; + if exist('idivide') + numC = idivide(uint32(numB + 65), 512, 'ceil'); + else + numC = ceil(double(numB + 65)/512); + end + numW = numC * 16; + mW = zeros(numW, 1, 'uint32'); + + idx = 1; + for i = 1:4:strlen + 1 + mW(idx) = bitor(bitor(bitor( ... + bitshift(uint32(mC(i)), 24), ... + bitshift(uint32(mC(i+1)), 16)), ... + bitshift(uint32(mC(i+2)), 8)), ... + uint32(mC(i+3))); + idx = idx + 1; + end + + % Append length of message + mW(numW - 1) = uint32(bitshift(uint64(numB), -32)); + mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32)); + + % Process the message in successive 512-bit chs + for cId = 1 : double(numC) + cSt = (cId - 1) * 16 + 1; + cEnd = cId * 16; + ch = mW(cSt : cEnd); + + % Extend the sixteen 32-bit words into eighty 32-bit words + for j = 17 : 80 + ch(j) = ch(j - 3); + ch(j) = bitxor(ch(j), ch(j - 8)); + ch(j) = bitxor(ch(j), ch(j - 14)); + ch(j) = bitxor(ch(j), ch(j - 16)); + ch(j) = bitrotate(ch(j), 1); + end + + % Initialize hash value for this ch + a = h0; + b = h1; + c = h2; + d = h3; + e = h4; + + % Main loop + for i = 1 : 80 + if(i >= 1 && i <= 20) + f = bitor(bitand(b, c), bitand(bitcmp(b), d)); + k = uint32(1518500249); + elseif(i >= 21 && i <= 40) + f = bitxor(bitxor(b, c), d); + k = uint32(1859775393); + elseif(i >= 41 && i <= 60) + f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d)); + k = uint32(2400959708); + elseif(i >= 61 && i <= 80) + f = bitxor(bitxor(b, c), d); + k = uint32(3395469782); + end + + t = bitrotate(a, 5); + t = bitadd(t, f); + t = bitadd(t, e); + t = bitadd(t, k); + t = bitadd(t, ch(i)); + e = d; + d = c; + c = bitrotate(b, 30); + b = a; + a = t; + + end + h0 = bitadd(h0, a); + h1 = bitadd(h1, b); + h2 = bitadd(h2, c); + h3 = bitadd(h3, d); + h4 = bitadd(h4, e); + + end + + hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]); + + hash = lower(hash); + +end + +function ret = bitadd(iA, iB) + ret = double(iA) + double(iB); + ret = bitset(ret, 33, 0); + ret = uint32(ret); +end + +function ret = bitrotate(iA, places) + t = bitshift(iA, places - 32); + ret = bitshift(iA, places); + ret = bitor(ret, t); +end + +% =========================== Base64 Encoder ============================ +% Thanks to Peter John Acklam +% + +function y = base64encode(x, eol) +%BASE64ENCODE Perform base64 encoding on a string. +% +% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending +% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10). +% The returned encoded string is broken into lines of no more than 76 +% characters each, and each line will end with EOL unless it is empty. Let +% EOL be empty if you do not want the encoded string broken into lines. +% +% STR and EOL don't have to be strings (i.e., char arrays). The only +% requirement is that they are vectors containing values in the range 0-255. +% +% This function may be used to encode strings into the Base64 encoding +% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The +% Base64 encoding is designed to represent arbitrary sequences of octets in a +% form that need not be humanly readable. A 65-character subset +% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per +% printable character. +% +% Examples +% -------- +% +% If you want to encode a large file, you should encode it in chunks that are +% a multiple of 57 bytes. This ensures that the base64 lines line up and +% that you do not end up with padding in the middle. 57 bytes of data fills +% one complete base64 line (76 == 57*4/3): +% +% If ifid and ofid are two file identifiers opened for reading and writing, +% respectively, then you can base64 encode the data with +% +% while ~feof(ifid) +% fwrite(ofid, base64encode(fread(ifid, 60*57))); +% end +% +% or, if you have enough memory, +% +% fwrite(ofid, base64encode(fread(ifid))); +% +% See also BASE64DECODE. + +% Author: Peter John Acklam +% Time-stamp: 2004-02-03 21:36:56 +0100 +% E-mail: pjacklam@online.no +% URL: http://home.online.no/~pjacklam + + if isnumeric(x) + x = num2str(x); + end + + % make sure we have the EOL value + if nargin < 2 + eol = sprintf('\n'); + else + if sum(size(eol) > 1) > 1 + error('EOL must be a vector.'); + end + if any(eol(:) > 255) + error('EOL can not contain values larger than 255.'); + end + end + + if sum(size(x) > 1) > 1 + error('STR must be a vector.'); + end + + x = uint8(x); + eol = uint8(eol); + + ndbytes = length(x); % number of decoded bytes + nchunks = ceil(ndbytes / 3); % number of chunks/groups + nebytes = 4 * nchunks; % number of encoded bytes + + % add padding if necessary, to make the length of x a multiple of 3 + if rem(ndbytes, 3) + x(end+1 : 3*nchunks) = 0; + end + + x = reshape(x, [3, nchunks]); % reshape the data + y = repmat(uint8(0), 4, nchunks); % for the encoded data + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Split up every 3 bytes into 4 pieces + % + % aaaaaabb bbbbcccc ccdddddd + % + % to form + % + % 00aaaaaa 00bbbbbb 00cccccc 00dddddd + % + y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:) + + y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:) + y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:) + + y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:) + y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:) + + y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:) + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Now perform the following mapping + % + % 0 - 25 -> A-Z + % 26 - 51 -> a-z + % 52 - 61 -> 0-9 + % 62 -> + + % 63 -> / + % + % We could use a mapping vector like + % + % ['A':'Z', 'a':'z', '0':'9', '+/'] + % + % but that would require an index vector of class double. + % + z = repmat(uint8(0), size(y)); + i = y <= 25; z(i) = 'A' + double(y(i)); + i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i)); + i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i)); + i = y == 62; z(i) = '+'; + i = y == 63; z(i) = '/'; + y = z; + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Add padding if necessary. + % + npbytes = 3 * nchunks - ndbytes; % number of padding bytes + if npbytes + y(end-npbytes+1 : end) = '='; % '=' is used for padding + end + + if isempty(eol) + + % reshape to a row vector + y = reshape(y, [1, nebytes]); + + else + + nlines = ceil(nebytes / 76); % number of lines + neolbytes = length(eol); % number of bytes in eol string + + % pad data so it becomes a multiple of 76 elements + y = [y(:) ; zeros(76 * nlines - numel(y), 1)]; + y(nebytes + 1 : 76 * nlines) = 0; + y = reshape(y, 76, nlines); + + % insert eol strings + eol = eol(:); + y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines)); + + % remove padding, but keep the last eol string + m = nebytes + neolbytes * (nlines - 1); + n = (76+neolbytes)*nlines - neolbytes; + y(m+1 : n) = ''; + + % extract and reshape to row vector + y = reshape(y, 1, m+neolbytes); + + end + + % output is a character array + y = char(y); + +end diff --git a/ex6/ex6/submitWeb.m b/ex6/ex6/submitWeb.m new file mode 100644 index 0000000..e429365 --- /dev/null +++ b/ex6/ex6/submitWeb.m @@ -0,0 +1,20 @@ +% submitWeb Creates files from your code and output for web submission. +% +% If the submit function does not work for you, use the web-submission mechanism. +% Call this function to produce a file for the part you wish to submit. Then, +% submit the file to the class servers using the "Web Submission" button on the +% Programming Exercises page on the course website. +% +% You should call this function without arguments (submitWeb), to receive +% an interactive prompt for submission; optionally you can call it with the partID +% if you so wish. Make sure your working directory is set to the directory +% containing the submitWeb.m file and your assignment files. + +function submitWeb(partId) + if ~exist('partId', 'var') || isempty(partId) + partId = []; + end + + submit(partId, 1); +end + diff --git a/ex6/ex6/svmPredict.m b/ex6/ex6/svmPredict.m new file mode 100644 index 0000000..ec8ef77 --- /dev/null +++ b/ex6/ex6/svmPredict.m @@ -0,0 +1,54 @@ +function pred = svmPredict(model, X) +%SVMPREDICT returns a vector of predictions using a trained SVM model +%(svmTrain). +% pred = SVMPREDICT(model, X) returns a vector of predictions using a +% trained SVM model (svmTrain). X is a mxn matrix where there each +% example is a row. model is a svm model returned from svmTrain. +% predictions pred is a m x 1 column of predictions of {0, 1} values. +% + +% Check if we are getting a column vector, if so, then assume that we only +% need to do prediction for a single example +if (size(X, 2) == 1) + % Examples should be in rows + X = X'; +end + +% Dataset +m = size(X, 1); +p = zeros(m, 1); +pred = zeros(m, 1); + +if strcmp(func2str(model.kernelFunction), 'linearKernel') + % We can use the weights and bias directly if working with the + % linear kernel + p = X * model.w + model.b; +elseif strfind(func2str(model.kernelFunction), 'gaussianKernel') + % Vectorized RBF Kernel + % This is equivalent to computing the kernel on every pair of examples + X1 = sum(X.^2, 2); + X2 = sum(model.X.^2, 2)'; + K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X')); + K = model.kernelFunction(1, 0) .^ K; + K = bsxfun(@times, model.y', K); + K = bsxfun(@times, model.alphas', K); + p = sum(K, 2); +else + % Other Non-linear kernel + for i = 1:m + prediction = 0; + for j = 1:size(model.X, 1) + prediction = prediction + ... + model.alphas(j) * model.y(j) * ... + model.kernelFunction(X(i,:)', model.X(j,:)'); + end + p(i) = prediction + model.b; + end +end + +% Convert predictions into 0 / 1 +pred(p >= 0) = 1; +pred(p < 0) = 0; + +end + diff --git a/ex6/ex6/svmTrain.m b/ex6/ex6/svmTrain.m new file mode 100644 index 0000000..2b2f169 --- /dev/null +++ b/ex6/ex6/svmTrain.m @@ -0,0 +1,192 @@ +function [model] = svmTrain(X, Y, C, kernelFunction, ... + tol, max_passes) +%SVMTRAIN Trains an SVM classifier using a simplified version of the SMO +%algorithm. +% [model] = SVMTRAIN(X, Y, C, kernelFunction, tol, max_passes) trains an +% SVM classifier and returns trained model. X is the matrix of training +% examples. Each row is a training example, and the jth column holds the +% jth feature. Y is a column matrix containing 1 for positive examples +% and 0 for negative examples. C is the standard SVM regularization +% parameter. tol is a tolerance value used for determining equality of +% floating point numbers. max_passes controls the number of iterations +% over the dataset (without changes to alpha) before the algorithm quits. +% +% Note: This is a simplified version of the SMO algorithm for training +% SVMs. In practice, if you want to train an SVM classifier, we +% recommend using an optimized package such as: +% +% LIBSVM (http://www.csie.ntu.edu.tw/~cjlin/libsvm/) +% SVMLight (http://svmlight.joachims.org/) +% +% + +if ~exist('tol', 'var') || isempty(tol) + tol = 1e-3; +end + +if ~exist('max_passes', 'var') || isempty(max_passes) + max_passes = 5; +end + +% Data parameters +m = size(X, 1); +n = size(X, 2); + +% Map 0 to -1 +Y(Y==0) = -1; + +% Variables +alphas = zeros(m, 1); +b = 0; +E = zeros(m, 1); +passes = 0; +eta = 0; +L = 0; +H = 0; + +% Pre-compute the Kernel Matrix since our dataset is small +% (in practice, optimized SVM packages that handle large datasets +% gracefully will _not_ do this) +% +% We have implemented optimized vectorized version of the Kernels here so +% that the svm training will run faster. +if strcmp(func2str(kernelFunction), 'linearKernel') + % Vectorized computation for the Linear Kernel + % This is equivalent to computing the kernel on every pair of examples + K = X*X'; +elseif strfind(func2str(kernelFunction), 'gaussianKernel') + % Vectorized RBF Kernel + % This is equivalent to computing the kernel on every pair of examples + X2 = sum(X.^2, 2); + K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X'))); + K = kernelFunction(1, 0) .^ K; +else + % Pre-compute the Kernel Matrix + % The following can be slow due to the lack of vectorization + K = zeros(m); + for i = 1:m + for j = i:m + K(i,j) = kernelFunction(X(i,:)', X(j,:)'); + K(j,i) = K(i,j); %the matrix is symmetric + end + end +end + +% Train +fprintf('\nTraining ...'); +dots = 12; +while passes < max_passes, + + num_changed_alphas = 0; + for i = 1:m, + + % Calculate Ei = f(x(i)) - y(i) using (2). + % E(i) = b + sum (X(i, :) * (repmat(alphas.*Y,1,n).*X)') - Y(i); + E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i); + + if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)), + + % In practice, there are many heuristics one can use to select + % the i and j. In this simplified code, we select them randomly. + j = ceil(m * rand()); + while j == i, % Make sure i \neq j + j = ceil(m * rand()); + end + + % Calculate Ej = f(x(j)) - y(j) using (2). + E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j); + + % Save old alphas + alpha_i_old = alphas(i); + alpha_j_old = alphas(j); + + % Compute L and H by (10) or (11). + if (Y(i) == Y(j)), + L = max(0, alphas(j) + alphas(i) - C); + H = min(C, alphas(j) + alphas(i)); + else + L = max(0, alphas(j) - alphas(i)); + H = min(C, C + alphas(j) - alphas(i)); + end + + if (L == H), + % continue to next i. + continue; + end + + % Compute eta by (14). + eta = 2 * K(i,j) - K(i,i) - K(j,j); + if (eta >= 0), + % continue to next i. + continue; + end + + % Compute and clip new value for alpha j using (12) and (15). + alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta; + + % Clip + alphas(j) = min (H, alphas(j)); + alphas(j) = max (L, alphas(j)); + + % Check if change in alpha is significant + if (abs(alphas(j) - alpha_j_old) < tol), + % continue to next i. + % replace anyway + alphas(j) = alpha_j_old; + continue; + end + + % Determine value for alpha i using (16). + alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j)); + + % Compute b1 and b2 using (17) and (18) respectively. + b1 = b - E(i) ... + - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ... + - Y(j) * (alphas(j) - alpha_j_old) * K(i,j)'; + b2 = b - E(j) ... + - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ... + - Y(j) * (alphas(j) - alpha_j_old) * K(j,j)'; + + % Compute b by (19). + if (0 < alphas(i) && alphas(i) < C), + b = b1; + elseif (0 < alphas(j) && alphas(j) < C), + b = b2; + else + b = (b1+b2)/2; + end + + num_changed_alphas = num_changed_alphas + 1; + + end + + end + + if (num_changed_alphas == 0), + passes = passes + 1; + else + passes = 0; + end + + fprintf('.'); + dots = dots + 1; + if dots > 78 + dots = 0; + fprintf('\n'); + end + if exist('OCTAVE_VERSION') + fflush(stdout); + end +end +fprintf(' Done! \n\n'); + +% Save the model +idx = alphas > 0; +model.X= X(idx,:); +model.y= Y(idx); +model.kernelFunction = kernelFunction; +model.b= b; +model.alphas= alphas(idx); +model.w = ((alphas.*Y)'*X)'; + +end diff --git a/ex6/ex6/visualizeBoundary.m b/ex6/ex6/visualizeBoundary.m new file mode 100644 index 0000000..b2020a7 --- /dev/null +++ b/ex6/ex6/visualizeBoundary.m @@ -0,0 +1,24 @@ +function visualizeBoundary(X, y, model, varargin) +%VISUALIZEBOUNDARY plots a non-linear decision boundary learned by the SVM +% VISUALIZEBOUNDARYLINEAR(X, y, model) plots a non-linear decision +% boundary learned by the SVM and overlays the data on it + +% Plot the training data on top of the boundary +plotData(X, y) + +% Make classification predictions over a grid of values +x1plot = linspace(min(X(:,1)), max(X(:,1)), 100)'; +x2plot = linspace(min(X(:,2)), max(X(:,2)), 100)'; +[X1, X2] = meshgrid(x1plot, x2plot); +vals = zeros(size(X1)); +for i = 1:size(X1, 2) + this_X = [X1(:, i), X2(:, i)]; + vals(:, i) = svmPredict(model, this_X); +end + +% Plot the SVM boundary +hold on +contour(X1, X2, vals, [0 0], 'Color', 'b'); +hold off; + +end diff --git a/ex6/ex6/visualizeBoundaryLinear.m b/ex6/ex6/visualizeBoundaryLinear.m new file mode 100644 index 0000000..f17f5ca --- /dev/null +++ b/ex6/ex6/visualizeBoundaryLinear.m @@ -0,0 +1,16 @@ +function visualizeBoundaryLinear(X, y, model) +%VISUALIZEBOUNDARYLINEAR plots a linear decision boundary learned by the +%SVM +% VISUALIZEBOUNDARYLINEAR(X, y, model) plots a linear decision boundary +% learned by the SVM and overlays the data on it + +w = model.w; +b = model.b; +xp = linspace(min(X(:,1)), max(X(:,1)), 100); +yp = - (w(1)*xp + b)/w(2); +plotData(X, y); +hold on; +plot(xp, yp, '-b'); +hold off + +end diff --git a/ex6/ex6/vocab.txt b/ex6/ex6/vocab.txt new file mode 100644 index 0000000..27f64a3 --- /dev/null +++ b/ex6/ex6/vocab.txt @@ -0,0 +1,1899 @@ +1 aa +2 ab +3 abil +4 abl +5 about +6 abov +7 absolut +8 abus +9 ac +10 accept +11 access +12 accord +13 account +14 achiev +15 acquir +16 across +17 act +18 action +19 activ +20 actual +21 ad +22 adam +23 add +24 addit +25 address +26 administr +27 adult +28 advanc +29 advantag +30 advertis +31 advic +32 advis +33 ae +34 af +35 affect +36 affili +37 afford +38 africa +39 after +40 ag +41 again +42 against +43 agenc +44 agent +45 ago +46 agre +47 agreement +48 aid +49 air +50 al +51 alb +52 align +53 all +54 allow +55 almost +56 alon +57 along +58 alreadi +59 alsa +60 also +61 altern +62 although +63 alwai +64 am +65 amaz +66 america +67 american +68 among +69 amount +70 amp +71 an +72 analysi +73 analyst +74 and +75 ani +76 anim +77 announc +78 annual +79 annuiti +80 anoth +81 answer +82 anti +83 anumb +84 anybodi +85 anymor +86 anyon +87 anyth +88 anywai +89 anywher +90 aol +91 ap +92 apolog +93 app +94 appar +95 appear +96 appl +97 appli +98 applic +99 appreci +100 approach +101 approv +102 apt +103 ar +104 archiv +105 area +106 aren +107 argument +108 arial +109 arm +110 around +111 arrai +112 arriv +113 art +114 articl +115 artist +116 as +117 ascii +118 ask +119 asset +120 assist +121 associ +122 assum +123 assur +124 at +125 atol +126 attach +127 attack +128 attempt +129 attent +130 attornei +131 attract +132 audio +133 aug +134 august +135 author +136 auto +137 autom +138 automat +139 avail +140 averag +141 avoid +142 awai +143 awar +144 award +145 ba +146 babi +147 back +148 background +149 backup +150 bad +151 balanc +152 ban +153 bank +154 bar +155 base +156 basenumb +157 basi +158 basic +159 bb +160 bc +161 bd +162 be +163 beat +164 beberg +165 becaus +166 becom +167 been +168 befor +169 begin +170 behalf +171 behavior +172 behind +173 believ +174 below +175 benefit +176 best +177 beta +178 better +179 between +180 bf +181 big +182 bill +183 billion +184 bin +185 binari +186 bit +187 black +188 blank +189 block +190 blog +191 blood +192 blue +193 bnumber +194 board +195 bodi +196 boi +197 bonu +198 book +199 boot +200 border +201 boss +202 boston +203 botan +204 both +205 bottl +206 bottom +207 boundari +208 box +209 brain +210 brand +211 break +212 brian +213 bring +214 broadcast +215 broker +216 browser +217 bug +218 bui +219 build +220 built +221 bulk +222 burn +223 bush +224 busi +225 but +226 button +227 by +228 byte +229 ca +230 cabl +231 cach +232 calcul +233 california +234 call +235 came +236 camera +237 campaign +238 can +239 canada +240 cannot +241 canon +242 capabl +243 capillari +244 capit +245 car +246 card +247 care +248 career +249 carri +250 cartridg +251 case +252 cash +253 cat +254 catch +255 categori +256 caus +257 cb +258 cc +259 cd +260 ce +261 cell +262 cent +263 center +264 central +265 centuri +266 ceo +267 certain +268 certainli +269 cf +270 challeng +271 chanc +272 chang +273 channel +274 char +275 charact +276 charg +277 charset +278 chat +279 cheap +280 check +281 cheer +282 chief +283 children +284 china +285 chip +286 choic +287 choos +288 chri +289 citi +290 citizen +291 civil +292 claim +293 class +294 classifi +295 clean +296 clear +297 clearli +298 click +299 client +300 close +301 clue +302 cnet +303 cnumber +304 co +305 code +306 collect +307 colleg +308 color +309 com +310 combin +311 come +312 comfort +313 command +314 comment +315 commentari +316 commerci +317 commiss +318 commit +319 common +320 commun +321 compani +322 compar +323 comparison +324 compat +325 compet +326 competit +327 compil +328 complet +329 comprehens +330 comput +331 concentr +332 concept +333 concern +334 condit +335 conf +336 confer +337 confid +338 confidenti +339 config +340 configur +341 confirm +342 conflict +343 confus +344 congress +345 connect +346 consid +347 consolid +348 constitut +349 construct +350 consult +351 consum +352 contact +353 contain +354 content +355 continu +356 contract +357 contribut +358 control +359 conveni +360 convers +361 convert +362 cool +363 cooper +364 copi +365 copyright +366 core +367 corpor +368 correct +369 correspond +370 cost +371 could +372 couldn +373 count +374 countri +375 coupl +376 cours +377 court +378 cover +379 coverag +380 crash +381 creat +382 creativ +383 credit +384 critic +385 cross +386 cultur +387 current +388 custom +389 cut +390 cv +391 da +392 dagga +393 dai +394 daili +395 dan +396 danger +397 dark +398 data +399 databas +400 datapow +401 date +402 dave +403 david +404 dc +405 de +406 dead +407 deal +408 dear +409 death +410 debt +411 decad +412 decid +413 decis +414 declar +415 declin +416 decor +417 default +418 defend +419 defens +420 defin +421 definit +422 degre +423 delai +424 delet +425 deliv +426 deliveri +427 dell +428 demand +429 democrat +430 depart +431 depend +432 deposit +433 describ +434 descript +435 deserv +436 design +437 desir +438 desktop +439 despit +440 detail +441 detect +442 determin +443 dev +444 devel +445 develop +446 devic +447 di +448 dial +449 did +450 didn +451 diet +452 differ +453 difficult +454 digit +455 direct +456 directli +457 director +458 directori +459 disabl +460 discount +461 discov +462 discoveri +463 discuss +464 disk +465 displai +466 disposit +467 distanc +468 distribut +469 dn +470 dnumber +471 do +472 doc +473 document +474 doe +475 doer +476 doesn +477 dollar +478 dollarac +479 dollarnumb +480 domain +481 don +482 done +483 dont +484 doubl +485 doubt +486 down +487 download +488 dr +489 draw +490 dream +491 drive +492 driver +493 drop +494 drug +495 due +496 dure +497 dvd +498 dw +499 dynam +500 ea +501 each +502 earli +503 earlier +504 earn +505 earth +506 easi +507 easier +508 easili +509 eat +510 eb +511 ebai +512 ec +513 echo +514 econom +515 economi +516 ed +517 edg +518 edit +519 editor +520 educ +521 eff +522 effect +523 effici +524 effort +525 either +526 el +527 electron +528 elimin +529 els +530 email +531 emailaddr +532 emerg +533 empir +534 employ +535 employe +536 en +537 enabl +538 encod +539 encourag +540 end +541 enemi +542 enenkio +543 energi +544 engin +545 english +546 enhanc +547 enjoi +548 enough +549 ensur +550 enter +551 enterpris +552 entertain +553 entir +554 entri +555 enumb +556 environ +557 equal +558 equip +559 equival +560 error +561 especi +562 essenti +563 establish +564 estat +565 estim +566 et +567 etc +568 euro +569 europ +570 european +571 even +572 event +573 eventu +574 ever +575 everi +576 everyon +577 everyth +578 evid +579 evil +580 exactli +581 exampl +582 excel +583 except +584 exchang +585 excit +586 exclus +587 execut +588 exercis +589 exist +590 exmh +591 expand +592 expect +593 expens +594 experi +595 expert +596 expir +597 explain +598 explor +599 express +600 extend +601 extens +602 extra +603 extract +604 extrem +605 ey +606 fa +607 face +608 fact +609 factor +610 fail +611 fair +612 fall +613 fals +614 famili +615 faq +616 far +617 fast +618 faster +619 fastest +620 fat +621 father +622 favorit +623 fax +624 fb +625 fd +626 featur +627 feder +628 fee +629 feed +630 feedback +631 feel +632 femal +633 few +634 ffffff +635 ffnumber +636 field +637 fight +638 figur +639 file +640 fill +641 film +642 filter +643 final +644 financ +645 financi +646 find +647 fine +648 finish +649 fire +650 firewal +651 firm +652 first +653 fit +654 five +655 fix +656 flag +657 flash +658 flow +659 fnumber +660 focu +661 folder +662 folk +663 follow +664 font +665 food +666 for +667 forc +668 foreign +669 forev +670 forget +671 fork +672 form +673 format +674 former +675 fortun +676 forward +677 found +678 foundat +679 four +680 franc +681 free +682 freedom +683 french +684 freshrpm +685 fri +686 fridai +687 friend +688 from +689 front +690 ftoc +691 ftp +692 full +693 fulli +694 fun +695 function +696 fund +697 further +698 futur +699 ga +700 gain +701 game +702 gari +703 garrigu +704 gave +705 gcc +706 geek +707 gener +708 get +709 gif +710 gift +711 girl +712 give +713 given +714 global +715 gnome +716 gnu +717 gnupg +718 go +719 goal +720 god +721 goe +722 gold +723 gone +724 good +725 googl +726 got +727 govern +728 gpl +729 grand +730 grant +731 graphic +732 great +733 greater +734 ground +735 group +736 grow +737 growth +738 gt +739 guarante +740 guess +741 gui +742 guid +743 ha +744 hack +745 had +746 half +747 ham +748 hand +749 handl +750 happen +751 happi +752 hard +753 hardwar +754 hat +755 hate +756 have +757 haven +758 he +759 head +760 header +761 headlin +762 health +763 hear +764 heard +765 heart +766 heaven +767 hei +768 height +769 held +770 hello +771 help +772 helvetica +773 her +774 herba +775 here +776 hermio +777 hettinga +778 hi +779 high +780 higher +781 highli +782 highlight +783 him +784 histori +785 hit +786 hold +787 home +788 honor +789 hope +790 host +791 hot +792 hour +793 hous +794 how +795 howev +796 hp +797 html +798 http +799 httpaddr +800 huge +801 human +802 hundr +803 ibm +804 id +805 idea +806 ident +807 identifi +808 idnumb +809 ie +810 if +811 ignor +812 ii +813 iii +814 iiiiiiihnumberjnumberhnumberjnumberhnumb +815 illeg +816 im +817 imag +818 imagin +819 immedi +820 impact +821 implement +822 import +823 impress +824 improv +825 in +826 inc +827 includ +828 incom +829 increas +830 incred +831 inde +832 independ +833 index +834 india +835 indian +836 indic +837 individu +838 industri +839 info +840 inform +841 initi +842 inlin +843 innov +844 input +845 insert +846 insid +847 instal +848 instanc +849 instant +850 instead +851 institut +852 instruct +853 insur +854 int +855 integr +856 intel +857 intellig +858 intend +859 interact +860 interest +861 interfac +862 intern +863 internet +864 interview +865 into +866 intro +867 introduc +868 inumb +869 invest +870 investig +871 investor +872 invok +873 involv +874 ip +875 ireland +876 irish +877 is +878 island +879 isn +880 iso +881 isp +882 issu +883 it +884 item +885 itself +886 jabber +887 jame +888 java +889 jim +890 jnumberiiiiiiihepihepihf +891 job +892 joe +893 john +894 join +895 journal +896 judg +897 judgment +898 jul +899 juli +900 jump +901 june +902 just +903 justin +904 keep +905 kei +906 kept +907 kernel +908 kevin +909 keyboard +910 kid +911 kill +912 kind +913 king +914 kingdom +915 knew +916 know +917 knowledg +918 known +919 la +920 lack +921 land +922 languag +923 laptop +924 larg +925 larger +926 largest +927 laser +928 last +929 late +930 later +931 latest +932 launch +933 law +934 lawrenc +935 le +936 lead +937 leader +938 learn +939 least +940 leav +941 left +942 legal +943 lender +944 length +945 less +946 lesson +947 let +948 letter +949 level +950 lib +951 librari +952 licens +953 life +954 lifetim +955 light +956 like +957 limit +958 line +959 link +960 linux +961 list +962 listen +963 littl +964 live +965 ll +966 lo +967 load +968 loan +969 local +970 locat +971 lock +972 lockergnom +973 log +974 long +975 longer +976 look +977 lose +978 loss +979 lost +980 lot +981 love +982 low +983 lower +984 lowest +985 lt +986 ma +987 mac +988 machin +989 made +990 magazin +991 mai +992 mail +993 mailer +994 main +995 maintain +996 major +997 make +998 maker +999 male +1000 man +1001 manag +1002 mani +1003 manual +1004 manufactur +1005 map +1006 march +1007 margin +1008 mark +1009 market +1010 marshal +1011 mass +1012 master +1013 match +1014 materi +1015 matter +1016 matthia +1017 mayb +1018 me +1019 mean +1020 measur +1021 mechan +1022 media +1023 medic +1024 meet +1025 member +1026 membership +1027 memori +1028 men +1029 mention +1030 menu +1031 merchant +1032 messag +1033 method +1034 mh +1035 michael +1036 microsoft +1037 middl +1038 might +1039 mike +1040 mile +1041 militari +1042 million +1043 mime +1044 mind +1045 mine +1046 mini +1047 minimum +1048 minut +1049 miss +1050 mistak +1051 mobil +1052 mode +1053 model +1054 modem +1055 modifi +1056 modul +1057 moment +1058 mon +1059 mondai +1060 monei +1061 monitor +1062 month +1063 monthli +1064 more +1065 morn +1066 mortgag +1067 most +1068 mostli +1069 mother +1070 motiv +1071 move +1072 movi +1073 mpnumber +1074 mr +1075 ms +1076 msg +1077 much +1078 multi +1079 multipart +1080 multipl +1081 murphi +1082 music +1083 must +1084 my +1085 myself +1086 name +1087 nation +1088 natur +1089 nbsp +1090 near +1091 nearli +1092 necessari +1093 need +1094 neg +1095 net +1096 netscap +1097 network +1098 never +1099 new +1100 newslett +1101 next +1102 nextpart +1103 nice +1104 nigeria +1105 night +1106 no +1107 nobodi +1108 non +1109 none +1110 nor +1111 normal +1112 north +1113 not +1114 note +1115 noth +1116 notic +1117 now +1118 nt +1119 null +1120 number +1121 numbera +1122 numberam +1123 numberanumb +1124 numberb +1125 numberbit +1126 numberc +1127 numbercb +1128 numbercbr +1129 numbercfont +1130 numbercli +1131 numbercnumb +1132 numbercp +1133 numberctd +1134 numberd +1135 numberdari +1136 numberdnumb +1137 numberenumb +1138 numberf +1139 numberfb +1140 numberff +1141 numberffont +1142 numberfp +1143 numberftd +1144 numberk +1145 numberm +1146 numbermb +1147 numberp +1148 numberpd +1149 numberpm +1150 numberpx +1151 numberst +1152 numberth +1153 numbertnumb +1154 numberx +1155 object +1156 oblig +1157 obtain +1158 obvious +1159 occur +1160 oct +1161 octob +1162 of +1163 off +1164 offer +1165 offic +1166 offici +1167 often +1168 oh +1169 ok +1170 old +1171 on +1172 onc +1173 onli +1174 onlin +1175 open +1176 oper +1177 opinion +1178 opportun +1179 opt +1180 optim +1181 option +1182 or +1183 order +1184 org +1185 organ +1186 origin +1187 os +1188 osdn +1189 other +1190 otherwis +1191 our +1192 out +1193 outlook +1194 output +1195 outsid +1196 over +1197 own +1198 owner +1199 oz +1200 pacif +1201 pack +1202 packag +1203 page +1204 pai +1205 paid +1206 pain +1207 palm +1208 panel +1209 paper +1210 paragraph +1211 parent +1212 part +1213 parti +1214 particip +1215 particular +1216 particularli +1217 partit +1218 partner +1219 pass +1220 password +1221 past +1222 patch +1223 patent +1224 path +1225 pattern +1226 paul +1227 payment +1228 pc +1229 peac +1230 peopl +1231 per +1232 percent +1233 percentag +1234 perfect +1235 perfectli +1236 perform +1237 perhap +1238 period +1239 perl +1240 perman +1241 permiss +1242 person +1243 pgp +1244 phone +1245 photo +1246 php +1247 phrase +1248 physic +1249 pick +1250 pictur +1251 piec +1252 piiiiiiii +1253 pipe +1254 pjnumber +1255 place +1256 plai +1257 plain +1258 plan +1259 planet +1260 plant +1261 planta +1262 platform +1263 player +1264 pleas +1265 plu +1266 plug +1267 pm +1268 pocket +1269 point +1270 polic +1271 polici +1272 polit +1273 poor +1274 pop +1275 popul +1276 popular +1277 port +1278 posit +1279 possibl +1280 post +1281 potenti +1282 pound +1283 powel +1284 power +1285 powershot +1286 practic +1287 pre +1288 predict +1289 prefer +1290 premium +1291 prepar +1292 present +1293 presid +1294 press +1295 pretti +1296 prevent +1297 previou +1298 previous +1299 price +1300 principl +1301 print +1302 printabl +1303 printer +1304 privaci +1305 privat +1306 prize +1307 pro +1308 probabl +1309 problem +1310 procedur +1311 process +1312 processor +1313 procmail +1314 produc +1315 product +1316 profession +1317 profil +1318 profit +1319 program +1320 programm +1321 progress +1322 project +1323 promis +1324 promot +1325 prompt +1326 properti +1327 propos +1328 proprietari +1329 prospect +1330 protect +1331 protocol +1332 prove +1333 proven +1334 provid +1335 proxi +1336 pub +1337 public +1338 publish +1339 pudg +1340 pull +1341 purchas +1342 purpos +1343 put +1344 python +1345 qnumber +1346 qualifi +1347 qualiti +1348 quarter +1349 question +1350 quick +1351 quickli +1352 quit +1353 quot +1354 radio +1355 ragga +1356 rais +1357 random +1358 rang +1359 rate +1360 rather +1361 ratio +1362 razor +1363 razornumb +1364 re +1365 reach +1366 read +1367 reader +1368 readi +1369 real +1370 realiz +1371 realli +1372 reason +1373 receiv +1374 recent +1375 recipi +1376 recommend +1377 record +1378 red +1379 redhat +1380 reduc +1381 refer +1382 refin +1383 reg +1384 regard +1385 region +1386 regist +1387 regul +1388 regular +1389 rel +1390 relat +1391 relationship +1392 releas +1393 relev +1394 reliabl +1395 remain +1396 rememb +1397 remot +1398 remov +1399 replac +1400 repli +1401 report +1402 repositori +1403 repres +1404 republ +1405 request +1406 requir +1407 research +1408 reserv +1409 resid +1410 resourc +1411 respect +1412 respond +1413 respons +1414 rest +1415 result +1416 retail +1417 return +1418 reveal +1419 revenu +1420 revers +1421 review +1422 revok +1423 rh +1424 rich +1425 right +1426 risk +1427 road +1428 robert +1429 rock +1430 role +1431 roll +1432 rom +1433 roman +1434 room +1435 root +1436 round +1437 rpm +1438 rss +1439 rule +1440 run +1441 sa +1442 safe +1443 sai +1444 said +1445 sale +1446 same +1447 sampl +1448 san +1449 saou +1450 sat +1451 satellit +1452 save +1453 saw +1454 scan +1455 schedul +1456 school +1457 scienc +1458 score +1459 screen +1460 script +1461 se +1462 search +1463 season +1464 second +1465 secret +1466 section +1467 secur +1468 see +1469 seed +1470 seek +1471 seem +1472 seen +1473 select +1474 self +1475 sell +1476 seminar +1477 send +1478 sender +1479 sendmail +1480 senior +1481 sens +1482 sensit +1483 sent +1484 sep +1485 separ +1486 septemb +1487 sequenc +1488 seri +1489 serif +1490 seriou +1491 serv +1492 server +1493 servic +1494 set +1495 setup +1496 seven +1497 seventh +1498 sever +1499 sex +1500 sexual +1501 sf +1502 shape +1503 share +1504 she +1505 shell +1506 ship +1507 shop +1508 short +1509 shot +1510 should +1511 show +1512 side +1513 sign +1514 signatur +1515 signific +1516 similar +1517 simpl +1518 simpli +1519 sinc +1520 sincer +1521 singl +1522 sit +1523 site +1524 situat +1525 six +1526 size +1527 skeptic +1528 skill +1529 skin +1530 skip +1531 sleep +1532 slow +1533 small +1534 smart +1535 smoke +1536 smtp +1537 snumber +1538 so +1539 social +1540 societi +1541 softwar +1542 sold +1543 solut +1544 solv +1545 some +1546 someon +1547 someth +1548 sometim +1549 son +1550 song +1551 soni +1552 soon +1553 sorri +1554 sort +1555 sound +1556 sourc +1557 south +1558 space +1559 spain +1560 spam +1561 spamassassin +1562 spamd +1563 spammer +1564 speak +1565 spec +1566 special +1567 specif +1568 specifi +1569 speech +1570 speed +1571 spend +1572 sponsor +1573 sport +1574 spot +1575 src +1576 ssh +1577 st +1578 stabl +1579 staff +1580 stai +1581 stand +1582 standard +1583 star +1584 start +1585 state +1586 statement +1587 statu +1588 step +1589 steve +1590 still +1591 stock +1592 stop +1593 storag +1594 store +1595 stori +1596 strategi +1597 stream +1598 street +1599 string +1600 strip +1601 strong +1602 structur +1603 studi +1604 stuff +1605 stupid +1606 style +1607 subject +1608 submit +1609 subscrib +1610 subscript +1611 substanti +1612 success +1613 such +1614 suffer +1615 suggest +1616 suit +1617 sum +1618 summari +1619 summer +1620 sun +1621 super +1622 suppli +1623 support +1624 suppos +1625 sure +1626 surpris +1627 suse +1628 suspect +1629 sweet +1630 switch +1631 system +1632 tab +1633 tabl +1634 tablet +1635 tag +1636 take +1637 taken +1638 talk +1639 tape +1640 target +1641 task +1642 tax +1643 teach +1644 team +1645 tech +1646 technic +1647 techniqu +1648 technolog +1649 tel +1650 telecom +1651 telephon +1652 tell +1653 temperatur +1654 templ +1655 ten +1656 term +1657 termin +1658 terror +1659 terrorist +1660 test +1661 texa +1662 text +1663 than +1664 thank +1665 that +1666 the +1667 thei +1668 their +1669 them +1670 themselv +1671 then +1672 theori +1673 there +1674 therefor +1675 these +1676 thi +1677 thing +1678 think +1679 thinkgeek +1680 third +1681 those +1682 though +1683 thought +1684 thousand +1685 thread +1686 threat +1687 three +1688 through +1689 thu +1690 thursdai +1691 ti +1692 ticket +1693 tim +1694 time +1695 tip +1696 tire +1697 titl +1698 tm +1699 to +1700 todai +1701 togeth +1702 token +1703 told +1704 toll +1705 tom +1706 toner +1707 toni +1708 too +1709 took +1710 tool +1711 top +1712 topic +1713 total +1714 touch +1715 toward +1716 track +1717 trade +1718 tradit +1719 traffic +1720 train +1721 transact +1722 transfer +1723 travel +1724 treat +1725 tree +1726 tri +1727 trial +1728 trick +1729 trip +1730 troubl +1731 true +1732 truli +1733 trust +1734 truth +1735 try +1736 tue +1737 tuesdai +1738 turn +1739 tv +1740 two +1741 type +1742 uk +1743 ultim +1744 un +1745 under +1746 understand +1747 unfortun +1748 uniqu +1749 unison +1750 unit +1751 univers +1752 unix +1753 unless +1754 unlik +1755 unlimit +1756 unseen +1757 unsolicit +1758 unsubscrib +1759 until +1760 up +1761 updat +1762 upgrad +1763 upon +1764 urgent +1765 url +1766 us +1767 usa +1768 usag +1769 usb +1770 usd +1771 usdollarnumb +1772 useless +1773 user +1774 usr +1775 usual +1776 util +1777 vacat +1778 valid +1779 valu +1780 valuabl +1781 var +1782 variabl +1783 varieti +1784 variou +1785 ve +1786 vendor +1787 ventur +1788 veri +1789 verifi +1790 version +1791 via +1792 video +1793 view +1794 virtual +1795 visa +1796 visit +1797 visual +1798 vnumber +1799 voic +1800 vote +1801 vs +1802 vulner +1803 wa +1804 wai +1805 wait +1806 wake +1807 walk +1808 wall +1809 want +1810 war +1811 warm +1812 warn +1813 warranti +1814 washington +1815 wasn +1816 wast +1817 watch +1818 water +1819 we +1820 wealth +1821 weapon +1822 web +1823 weblog +1824 websit +1825 wed +1826 wednesdai +1827 week +1828 weekli +1829 weight +1830 welcom +1831 well +1832 went +1833 were +1834 west +1835 what +1836 whatev +1837 when +1838 where +1839 whether +1840 which +1841 while +1842 white +1843 whitelist +1844 who +1845 whole +1846 whose +1847 why +1848 wi +1849 wide +1850 width +1851 wife +1852 will +1853 william +1854 win +1855 window +1856 wing +1857 winner +1858 wireless +1859 wish +1860 with +1861 within +1862 without +1863 wnumberp +1864 woman +1865 women +1866 won +1867 wonder +1868 word +1869 work +1870 worker +1871 world +1872 worldwid +1873 worri +1874 worst +1875 worth +1876 would +1877 wouldn +1878 write +1879 written +1880 wrong +1881 wrote +1882 www +1883 ximian +1884 xml +1885 xp +1886 yahoo +1887 ye +1888 yeah +1889 year +1890 yesterdai +1891 yet +1892 york +1893 you +1894 young +1895 your +1896 yourself +1897 zdnet +1898 zero +1899 zip diff --git a/ex7/ex7.pdf b/ex7/ex7.pdf new file mode 100644 index 0000000..371e707 Binary files /dev/null and b/ex7/ex7.pdf differ diff --git a/ex7/ex7/.DS_Store b/ex7/ex7/.DS_Store new file mode 100644 index 0000000..2e039c4 Binary files /dev/null and b/ex7/ex7/.DS_Store differ diff --git a/ex7/ex7/bird_small.mat b/ex7/ex7/bird_small.mat new file mode 100644 index 0000000..04c224c Binary files /dev/null and b/ex7/ex7/bird_small.mat differ diff --git a/ex7/ex7/bird_small.png b/ex7/ex7/bird_small.png new file mode 100644 index 0000000..a3cd00c Binary files /dev/null and b/ex7/ex7/bird_small.png differ diff --git a/ex7/ex7/computeCentroids.m b/ex7/ex7/computeCentroids.m new file mode 100644 index 0000000..968806a --- /dev/null +++ b/ex7/ex7/computeCentroids.m @@ -0,0 +1,39 @@ +function centroids = computeCentroids(X, idx, K) +%COMPUTECENTROIDS returs the new centroids by computing the means of the +%data points assigned to each centroid. +% centroids = COMPUTECENTROIDS(X, idx, K) returns the new centroids by +% computing the means of the data points assigned to each centroid. It is +% given a dataset X where each row is a single data point, a vector +% idx of centroid assignments (i.e. each entry in range [1..K]) for each +% example, and K, the number of centroids. You should return a matrix +% centroids, where each row of centroids is the mean of the data points +% assigned to it. +% + +% Useful variables +[m n] = size(X); + +% You need to return the following variables correctly. +centroids = zeros(K, n); + + +% ====================== YOUR CODE HERE ====================== +% Instructions: Go over every centroid and compute mean of all points that +% belong to it. Concretely, the row vector centroids(i, :) +% should contain the mean of the data points assigned to +% centroid i. +% +% Note: You can use a for-loop over the centroids to compute this. +% + +for i = 1:K + cluster_examples = X(find(idx == i), :); + centroids(i, :) = sum(cluster_examples) / size(cluster_examples, 1); +end + + +% ============================================================= + + +end + diff --git a/ex7/ex7/displayData.m b/ex7/ex7/displayData.m new file mode 100644 index 0000000..160697e --- /dev/null +++ b/ex7/ex7/displayData.m @@ -0,0 +1,59 @@ +function [h, display_array] = displayData(X, example_width) +%DISPLAYDATA Display 2D data in a nice grid +% [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data +% stored in X in a nice grid. It returns the figure handle h and the +% displayed array if requested. + +% Set example_width automatically if not passed in +if ~exist('example_width', 'var') || isempty(example_width) + example_width = round(sqrt(size(X, 2))); +end + +% Gray Image +colormap(gray); + +% Compute rows, cols +[m n] = size(X); +example_height = (n / example_width); + +% Compute number of items to display +display_rows = floor(sqrt(m)); +display_cols = ceil(m / display_rows); + +% Between images padding +pad = 1; + +% Setup blank display +display_array = - ones(pad + display_rows * (example_height + pad), ... + pad + display_cols * (example_width + pad)); + +% Copy each example into a patch on the display array +curr_ex = 1; +for j = 1:display_rows + for i = 1:display_cols + if curr_ex > m, + break; + end + % Copy the patch + + % Get the max value of the patch + max_val = max(abs(X(curr_ex, :))); + display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ... + pad + (i - 1) * (example_width + pad) + (1:example_width)) = ... + reshape(X(curr_ex, :), example_height, example_width) / max_val; + curr_ex = curr_ex + 1; + end + if curr_ex > m, + break; + end +end + +% Display Image +h = imagesc(display_array, [-1 1]); + +% Do not show axis +axis image off + +drawnow; + +end diff --git a/ex7/ex7/drawLine.m b/ex7/ex7/drawLine.m new file mode 100644 index 0000000..85e6c41 --- /dev/null +++ b/ex7/ex7/drawLine.m @@ -0,0 +1,8 @@ +function drawLine(p1, p2, varargin) +%DRAWLINE Draws a line from point p1 to point p2 +% DRAWLINE(p1, p2) Draws a line from point p1 to point p2 and holds the +% current figure + +plot([p1(1) p2(1)], [p1(2) p2(2)], varargin{:}); + +end \ No newline at end of file diff --git a/ex7/ex7/ex7.m b/ex7/ex7/ex7.m new file mode 100644 index 0000000..3a095ae --- /dev/null +++ b/ex7/ex7/ex7.m @@ -0,0 +1,174 @@ +%% Machine Learning Online Class +% Exercise 7 | Principle Component Analysis and K-Means Clustering +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% exercise. You will need to complete the following functions: +% +% pca.m +% projectData.m +% recoverData.m +% computeCentroids.m +% findClosestCentroids.m +% kMeansInitCentroids.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% ================= Part 1: Find Closest Centroids ==================== +% To help you implement K-Means, we have divided the learning algorithm +% into two functions -- findClosestCentroids and computeCentroids. In this +% part, you shoudl complete the code in the findClosestCentroids function. +% +fprintf('Finding closest centroids.\n\n'); + +% Load an example dataset that we will be using +load('ex7data2.mat'); + +% Select an initial set of centroids +K = 3; % 3 Centroids +initial_centroids = [3 3; 6 2; 8 5]; + +% Find the closest centroids for the examples using the +% initial_centroids +idx = findClosestCentroids(X, initial_centroids); + +fprintf('Closest centroids for the first 3 examples: \n') +fprintf(' %d', idx(1:3)); +fprintf('\n(the closest centroids should be 1, 3, 2 respectively)\n'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ===================== Part 2: Compute Means ========================= +% After implementing the closest centroids function, you should now +% complete the computeCentroids function. +% +fprintf('\nComputing centroids means.\n\n'); + +% Compute means based on the closest centroids found in the previous part. +centroids = computeCentroids(X, idx, K); + +fprintf('Centroids computed after initial finding of closest centroids: \n') +fprintf(' %f %f \n' , centroids'); +fprintf('\n(the centroids should be\n'); +fprintf(' [ 2.428301 3.157924 ]\n'); +fprintf(' [ 5.813503 2.633656 ]\n'); +fprintf(' [ 7.119387 3.616684 ]\n\n'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% =================== Part 3: K-Means Clustering ====================== +% After you have completed the two functions computeCentroids and +% findClosestCentroids, you have all the necessary pieces to run the +% kMeans algorithm. In this part, you will run the K-Means algorithm on +% the example dataset we have provided. +% +fprintf('\nRunning K-Means clustering on example dataset.\n\n'); + +% Load an example dataset +load('ex7data2.mat'); + +% Settings for running K-Means +K = 3; +max_iters = 10; + +% For consistency, here we set centroids to specific values +% but in practice you want to generate them automatically, such as by +% settings them to be random examples (as can be seen in +% kMeansInitCentroids). +initial_centroids = [3 3; 6 2; 8 5]; + +% Run K-Means algorithm. The 'true' at the end tells our function to plot +% the progress of K-Means +[centroids, idx] = runkMeans(X, initial_centroids, max_iters, true); +fprintf('\nK-Means Done.\n\n'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ============= Part 4: K-Means Clustering on Pixels =============== +% In this exercise, you will use K-Means to compress an image. To do this, +% you will first run K-Means on the colors of the pixels in the image and +% then you will map each pixel on to it's closest centroid. +% +% You should now complete the code in kMeansInitCentroids.m +% + +fprintf('\nRunning K-Means clustering on pixels from an image.\n\n'); + +% Load an image of a bird +A = double(imread('bird_small.png')); + +% If imread does not work for you, you can try instead +% load ('bird_small.mat'); + +A = A / 255; % Divide by 255 so that all values are in the range 0 - 1 + +% Size of the image +img_size = size(A); + +% Reshape the image into an Nx3 matrix where N = number of pixels. +% Each row will contain the Red, Green and Blue pixel values +% This gives us our dataset matrix X that we will use K-Means on. +X = reshape(A, img_size(1) * img_size(2), 3); + +% Run your K-Means algorithm on this data +% You should try different values of K and max_iters here +K = 16; +max_iters = 10; + +% When using K-Means, it is important the initialize the centroids +% randomly. +% You should complete the code in kMeansInitCentroids.m before proceeding +initial_centroids = kMeansInitCentroids(X, K); + +% Run K-Means +[centroids, idx] = runkMeans(X, initial_centroids, max_iters); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% ================= Part 5: Image Compression ====================== +% In this part of the exercise, you will use the clusters of K-Means to +% compress an image. To do this, we first find the closest clusters for +% each example. After that, we + +fprintf('\nApplying K-Means to compress an image.\n\n'); + +% Find closest cluster members +idx = findClosestCentroids(X, centroids); + +% Essentially, now we have represented the image X as in terms of the +% indices in idx. + +% We can now recover the image from the indices (idx) by mapping each pixel +% (specified by it's index in idx) to the centroid value +X_recovered = centroids(idx,:); + +% Reshape the recovered image into proper dimensions +X_recovered = reshape(X_recovered, img_size(1), img_size(2), 3); + +% Display the original image +subplot(1, 2, 1); +imagesc(A); +title('Original'); + +% Display compressed image side by side +subplot(1, 2, 2); +imagesc(X_recovered) +title(sprintf('Compressed, with %d colors.', K)); + + +fprintf('Program paused. Press enter to continue.\n'); +pause; + diff --git a/ex7/ex7/ex7_pca.m b/ex7/ex7/ex7_pca.m new file mode 100644 index 0000000..de98b13 --- /dev/null +++ b/ex7/ex7/ex7_pca.m @@ -0,0 +1,235 @@ +%% Machine Learning Online Class +% Exercise 7 | Principle Component Analysis and K-Means Clustering +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% exercise. You will need to complete the following functions: +% +% pca.m +% projectData.m +% recoverData.m +% computeCentroids.m +% findClosestCentroids.m +% kMeansInitCentroids.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% ================== Part 1: Load Example Dataset =================== +% We start this exercise by using a small dataset that is easily to +% visualize +% +fprintf('Visualizing example dataset for PCA.\n\n'); + +% The following command loads the dataset. You should now have the +% variable X in your environment +load ('ex7data1.mat'); + +% Visualize the example dataset +plot(X(:, 1), X(:, 2), 'bo'); +axis([0.5 6.5 2 8]); axis square; + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% =============== Part 2: Principal Component Analysis =============== +% You should now implement PCA, a dimension reduction technique. You +% should complete the code in pca.m +% +fprintf('\nRunning PCA on example dataset.\n\n'); + +% Before running PCA, it is important to first normalize X +[X_norm, mu, sigma] = featureNormalize(X); + +% Run PCA +[U, S] = pca(X_norm); + +% Compute mu, the mean of the each feature + +% Draw the eigenvectors centered at mean of data. These lines show the +% directions of maximum variations in the dataset. +hold on; +drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2); +drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2); +hold off; + +fprintf('Top eigenvector: \n'); +fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1)); +fprintf('\n(you should expect to see -0.707107 -0.707107)\n'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% =================== Part 3: Dimension Reduction =================== +% You should now implement the projection step to map the data onto the +% first k eigenvectors. The code will then plot the data in this reduced +% dimensional space. This will show you what the data looks like when +% using only the corresponding eigenvectors to reconstruct it. +% +% You should complete the code in projectData.m +% +fprintf('\nDimension reduction on example dataset.\n\n'); + +% Plot the normalized dataset (returned from pca) +plot(X_norm(:, 1), X_norm(:, 2), 'bo'); +axis([-4 3 -4 3]); axis square + +% Project the data onto K = 1 dimension +K = 1; +Z = projectData(X_norm, U, K); +fprintf('Projection of the first example: %f\n', Z(1)); +fprintf('\n(this value should be about 1.481274)\n\n'); + +X_rec = recoverData(Z, U, K); +fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2)); +fprintf('\n(this value should be about -1.047419 -1.047419)\n\n'); + +% Draw lines connecting the projected points to the original points +hold on; +plot(X_rec(:, 1), X_rec(:, 2), 'ro'); +for i = 1:size(X_norm, 1) + drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1); +end +hold off + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =============== Part 4: Loading and Visualizing Face Data ============= +% We start the exercise by first loading and visualizing the dataset. +% The following code will load the dataset into your environment +% +fprintf('\nLoading face dataset.\n\n'); + +% Load Face dataset +load ('ex7faces.mat') + +% Display the first 100 faces in the dataset +displayData(X(1:100, :)); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% =========== Part 5: PCA on Face Data: Eigenfaces =================== +% Run PCA and visualize the eigenvectors which are in this case eigenfaces +% We display the first 36 eigenfaces. +% +fprintf(['\nRunning PCA on face dataset.\n' ... + '(this mght take a minute or two ...)\n\n']); + +% Before running PCA, it is important to first normalize X by subtracting +% the mean value from each feature +[X_norm, mu, sigma] = featureNormalize(X); + +% Run PCA +[U, S] = pca(X_norm); + +% Visualize the top 36 eigenvectors found +displayData(U(:, 1:36)'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% ============= Part 6: Dimension Reduction for Faces ================= +% Project images to the eigen space using the top k eigenvectors +% If you are applying a machine learning algorithm +fprintf('\nDimension reduction for face dataset.\n\n'); + +K = 100; +Z = projectData(X_norm, U, K); + +fprintf('The projected data Z has a size of: ') +fprintf('%d ', size(Z)); + +fprintf('\n\nProgram paused. Press enter to continue.\n'); +pause; + +%% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== +% Project images to the eigen space using the top K eigen vectors and +% visualize only using those K dimensions +% Compare to the original input, which is also displayed + +fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n'); + +K = 100; +X_rec = recoverData(Z, U, K); + +% Display normalized data +subplot(1, 2, 1); +displayData(X_norm(1:100,:)); +title('Original faces'); +axis square; + +% Display reconstructed data from only k eigenfaces +subplot(1, 2, 2); +displayData(X_rec(1:100,:)); +title('Recovered faces'); +axis square; + +fprintf('Program paused. Press enter to continue.\n'); +pause; + + +%% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === +% One useful application of PCA is to use it to visualize high-dimensional +% data. In the last K-Means exercise you ran K-Means on 3-dimensional +% pixel colors of an image. We first visualize this output in 3D, and then +% apply PCA to obtain a visualization in 2D. + +close all; close all; clc + +% Re-load the image from the previous exercise and run K-Means on it +% For this to work, you need to complete the K-Means assignment first +A = double(imread('bird_small.png')); + +% If imread does not work for you, you can try instead +% load ('bird_small.mat'); + +A = A / 255; +img_size = size(A); +X = reshape(A, img_size(1) * img_size(2), 3); +K = 16; +max_iters = 10; +initial_centroids = kMeansInitCentroids(X, K); +[centroids, idx] = runkMeans(X, initial_centroids, max_iters); + +% Sample 1000 random indexes (since working with all the data is +% too expensive. If you have a fast computer, you may increase this. +sel = floor(rand(1000, 1) * size(X, 1)) + 1; + +% Setup Color Palette +palette = hsv(K); +colors = palette(idx(sel), :); + +% Visualize the data and centroid memberships in 3D +figure; +scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors); +title('Pixel dataset plotted in 3D. Color shows centroid memberships'); +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === +% Use PCA to project this cloud to 2D for visualization + +% Subtract the mean to use PCA +[X_norm, mu, sigma] = featureNormalize(X); + +% PCA and project the data to 2D +[U, S] = pca(X_norm); +Z = projectData(X_norm, U, 2); + +% Plot in 2D +figure; +plotDataPoints(Z(sel, :), idx(sel), K); +title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction'); +fprintf('Program paused. Press enter to continue.\n'); +pause; diff --git a/ex7/ex7/ex7data1.mat b/ex7/ex7/ex7data1.mat new file mode 100644 index 0000000..f9c3961 Binary files /dev/null and b/ex7/ex7/ex7data1.mat differ diff --git a/ex7/ex7/ex7data2.mat b/ex7/ex7/ex7data2.mat new file mode 100644 index 0000000..de3f5b9 Binary files /dev/null and b/ex7/ex7/ex7data2.mat differ diff --git a/ex7/ex7/ex7faces.mat b/ex7/ex7/ex7faces.mat new file mode 100644 index 0000000..3965bd1 Binary files /dev/null and b/ex7/ex7/ex7faces.mat differ diff --git a/ex7/ex7/featureNormalize.m b/ex7/ex7/featureNormalize.m new file mode 100644 index 0000000..da03bee --- /dev/null +++ b/ex7/ex7/featureNormalize.m @@ -0,0 +1,17 @@ +function [X_norm, mu, sigma] = featureNormalize(X) +%FEATURENORMALIZE Normalizes the features in X +% FEATURENORMALIZE(X) returns a normalized version of X where +% the mean value of each feature is 0 and the standard deviation +% is 1. This is often a good preprocessing step to do when +% working with learning algorithms. + +mu = mean(X); +X_norm = bsxfun(@minus, X, mu); + +sigma = std(X_norm); +X_norm = bsxfun(@rdivide, X_norm, sigma); + + +% ============================================================ + +end diff --git a/ex7/ex7/findClosestCentroids.m b/ex7/ex7/findClosestCentroids.m new file mode 100644 index 0000000..f74c96f --- /dev/null +++ b/ex7/ex7/findClosestCentroids.m @@ -0,0 +1,43 @@ +function idx = findClosestCentroids(X, centroids) +%FINDCLOSESTCENTROIDS computes the centroid memberships for every example +% idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids +% in idx for a dataset X where each row is a single example. idx = m x 1 +% vector of centroid assignments (i.e. each entry in range [1..K]) +% + +% Set K +K = size(centroids, 1); + +% You need to return the following variables correctly. +idx = zeros(size(X,1), 1); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Go over every example, find its closest centroid, and store +% the index inside idx at the appropriate location. +% Concretely, idx(i) should contain the index of the centroid +% closest to example i. Hence, it should be a value in the +% range 1..K +% +% Note: You can use a for-loop over the examples to compute this. +% + +m = size(X, 1); +for i = 1:m + deltas = zeros(K, 1); + rep_examples = repmat(X(i, :), K, 1); + for j = 1:K + rep_examples(j, :) = rep_examples(j, :) - centroids(j, :); + deltas(j, 1) = rep_examples(j, :) * rep_examples(j, :)'; + end + [min_delta, min_delta_index] = min(deltas); + idx(i, 1) = min_delta_index; +end + + + + + +% ============================================================= + +end + diff --git a/ex7/ex7/kMeansInitCentroids.m b/ex7/ex7/kMeansInitCentroids.m new file mode 100644 index 0000000..6bb1376 --- /dev/null +++ b/ex7/ex7/kMeansInitCentroids.m @@ -0,0 +1,27 @@ +function centroids = kMeansInitCentroids(X, K) +%KMEANSINITCENTROIDS This function initializes K centroids that are to be +%used in K-Means on the dataset X +% centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be +% used with the K-Means on the dataset X +% + +% You should return this values correctly +centroids = zeros(K, size(X, 2)); + +% ====================== YOUR CODE HERE ====================== +% Instructions: You should set centroids to randomly chosen examples from +% the dataset X +% + +randidx = randperm(size(X, 1)); +centroids = X(randidx(1:K), :); + + + + + + +% ============================================================= + +end + diff --git a/ex7/ex7/ml_login_data.mat b/ex7/ex7/ml_login_data.mat new file mode 100644 index 0000000..e61d956 --- /dev/null +++ b/ex7/ex7/ml_login_data.mat @@ -0,0 +1,15 @@ +# Created by Octave 3.4.0, Sun Oct 14 17:41:27 2012 CDT +# name: login +# type: sq_string +# elements: 1 +# length: 21 +zhang349@illinois.edu + + +# name: password +# type: sq_string +# elements: 1 +# length: 10 +3pBDppG8Gd + + diff --git a/ex7/ex7/octave-core b/ex7/ex7/octave-core new file mode 100644 index 0000000..6551ee7 Binary files /dev/null and b/ex7/ex7/octave-core differ diff --git a/ex7/ex7/pca.m b/ex7/ex7/pca.m new file mode 100644 index 0000000..743c77f --- /dev/null +++ b/ex7/ex7/pca.m @@ -0,0 +1,32 @@ +function [U, S] = pca(X) +%PCA Run principal component analysis on the dataset X +% [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X +% Returns the eigenvectors U, the eigenvalues (on diagonal) in S +% + +% Useful values +[m, n] = size(X); + +% You need to return the following variables correctly. +U = zeros(n); +S = zeros(n); + +% ====================== YOUR CODE HERE ====================== +% Instructions: You should first compute the covariance matrix. Then, you +% should use the "svd" function to compute the eigenvectors +% and eigenvalues of the covariance matrix. +% +% Note: When computing the covariance matrix, remember to divide by m (the +% number of examples). +% + +Sigma = 1/m * X' * X; +[U, S, V] = svd(Sigma); + + + + + +% ========================================================================= + +end diff --git a/ex7/ex7/plotDataPoints.m b/ex7/ex7/plotDataPoints.m new file mode 100644 index 0000000..77c4623 --- /dev/null +++ b/ex7/ex7/plotDataPoints.m @@ -0,0 +1,14 @@ +function plotDataPoints(X, idx, K) +%PLOTDATAPOINTS plots data points in X, coloring them so that those with the same +%index assignments in idx have the same color +% PLOTDATAPOINTS(X, idx, K) plots data points in X, coloring them so that those +% with the same index assignments in idx have the same color + +% Create palette +palette = hsv(K + 1); +colors = palette(idx, :); + +% Plot the data +scatter(X(:,1), X(:,2), 15, colors); + +end diff --git a/ex7/ex7/plotProgresskMeans.m b/ex7/ex7/plotProgresskMeans.m new file mode 100644 index 0000000..f14d1c7 --- /dev/null +++ b/ex7/ex7/plotProgresskMeans.m @@ -0,0 +1,27 @@ +function plotProgresskMeans(X, centroids, previous, idx, K, i) +%PLOTPROGRESSKMEANS is a helper function that displays the progress of +%k-Means as it is running. It is intended for use only with 2D data. +% PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data +% points with colors assigned to each centroid. With the previous +% centroids, it also plots a line between the previous locations and +% current locations of the centroids. +% + +% Plot the examples +plotDataPoints(X, idx, K); + +% Plot the centroids as black x's +plot(centroids(:,1), centroids(:,2), 'x', ... + 'MarkerEdgeColor','k', ... + 'MarkerSize', 10, 'LineWidth', 3); + +% Plot the history of the centroids with lines +for j=1:size(centroids,1) + drawLine(centroids(j, :), previous(j, :)); +end + +% Title +title(sprintf('Iteration number %d', i)) + +end + diff --git a/ex7/ex7/projectData.m b/ex7/ex7/projectData.m new file mode 100644 index 0000000..396d6d1 --- /dev/null +++ b/ex7/ex7/projectData.m @@ -0,0 +1,26 @@ +function Z = projectData(X, U, K) +%PROJECTDATA Computes the reduced data representation when projecting only +%on to the top k eigenvectors +% Z = projectData(X, U, K) computes the projection of +% the normalized inputs X into the reduced dimensional space spanned by +% the first K columns of U. It returns the projected examples in Z. +% + +% You need to return the following variables correctly. +Z = zeros(size(X, 1), K); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the projection of the data using only the top K +% eigenvectors in U (first K columns). +% For the i-th example X(i,:), the projection on to the k-th +% eigenvector is given as follows: +% x = X(i, :)'; +% projection_k = x' * U(:, k); +% + +U_reduce = U(:, 1:K); +Z = X * U_reduce; + +% ============================================================= + +end diff --git a/ex7/ex7/recoverData.m b/ex7/ex7/recoverData.m new file mode 100644 index 0000000..ff09099 --- /dev/null +++ b/ex7/ex7/recoverData.m @@ -0,0 +1,28 @@ +function X_rec = recoverData(Z, U, K) +%RECOVERDATA Recovers an approximation of the original data when using the +%projected data +% X_rec = RECOVERDATA(Z, U, K) recovers an approximation the +% original data that has been reduced to K dimensions. It returns the +% approximate reconstruction in X_rec. +% + +% You need to return the following variables correctly. +X_rec = zeros(size(Z, 1), size(U, 1)); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the approximation of the data by projecting back +% onto the original space using the top K eigenvectors in U. +% +% For the i-th example Z(i,:), the (approximate) +% recovered data for dimension j is given as follows: +% v = Z(i, :)'; +% recovered_j = v' * U(j, 1:K)'; +% +% Notice that U(j, 1:K) is a row vector. +% +U_reduce = U(:, 1:K); +X_rec = Z * U_reduce'; + +% ============================================================= + +end diff --git a/ex7/ex7/runkMeans.m b/ex7/ex7/runkMeans.m new file mode 100644 index 0000000..fc22c1b --- /dev/null +++ b/ex7/ex7/runkMeans.m @@ -0,0 +1,64 @@ +function [centroids, idx] = runkMeans(X, initial_centroids, ... + max_iters, plot_progress) +%RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X +%is a single example +% [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... +% plot_progress) runs the K-Means algorithm on data matrix X, where each +% row of X is a single example. It uses initial_centroids used as the +% initial centroids. max_iters specifies the total number of interactions +% of K-Means to execute. plot_progress is a true/false flag that +% indicates if the function should also plot its progress as the +% learning happens. This is set to false by default. runkMeans returns +% centroids, a Kxn matrix of the computed centroids and idx, a m x 1 +% vector of centroid assignments (i.e. each entry in range [1..K]) +% + +% Set default value for plot progress +if ~exist('plot_progress', 'var') || isempty(plot_progress) + plot_progress = false; +end + +% Plot the data if we are plotting progress +if plot_progress + figure; + hold on; +end + +% Initialize values +[m n] = size(X); +K = size(initial_centroids, 1); +centroids = initial_centroids; +previous_centroids = centroids; +idx = zeros(m, 1); + +% Run K-Means +for i=1:max_iters + + % Output progress + fprintf('K-Means iteration %d/%d...\n', i, max_iters); + if exist('OCTAVE_VERSION') + fflush(stdout); + end + + % For each example in X, assign it to the closest centroid + idx = findClosestCentroids(X, centroids); + + % Optionally, plot progress here + if plot_progress + plotProgresskMeans(X, centroids, previous_centroids, idx, K, i); + previous_centroids = centroids; + fprintf('Press enter to continue.\n'); + pause; + end + + % Given the memberships, compute new centroids + centroids = computeCentroids(X, idx, K); +end + +% Hold off if we are plotting progress +if plot_progress + hold off; +end + +end + diff --git a/ex7/ex7/submit.m b/ex7/ex7/submit.m new file mode 100644 index 0000000..e6e77ca --- /dev/null +++ b/ex7/ex7/submit.m @@ -0,0 +1,576 @@ +function submit(partId, webSubmit) +%SUBMIT Submit your code and output to the ml-class servers +% SUBMIT() will connect to the ml-class server and submit your solution + + fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ... + homework_id()); + if ~exist('partId', 'var') || isempty(partId) + partId = promptPart(); + end + + if ~exist('webSubmit', 'var') || isempty(webSubmit) + webSubmit = 0; % submit directly by default + end + + % Check valid partId + partNames = validParts(); + if ~isValidPartId(partId) + fprintf('!! Invalid homework part selected.\n'); + fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1); + fprintf('!! Submission Cancelled\n'); + return + end + + if ~exist('ml_login_data.mat','file') + [login password] = loginPrompt(); + save('ml_login_data.mat','login','password'); + else + load('ml_login_data.mat'); + [login password] = quickLogin(login, password); + save('ml_login_data.mat','login','password'); + end + + if isempty(login) + fprintf('!! Submission Cancelled\n'); + return + end + + fprintf('\n== Connecting to ml-class ... '); + if exist('OCTAVE_VERSION') + fflush(stdout); + end + + % Setup submit list + if partId == numel(partNames) + 1 + submitParts = 1:numel(partNames); + else + submitParts = [partId]; + end + + for s = 1:numel(submitParts) + thisPartId = submitParts(s); + if (~webSubmit) % submit directly to server + [login, ch, signature, auxstring] = getChallenge(login, thisPartId); + if isempty(login) || isempty(ch) || isempty(signature) + % Some error occured, error string in first return element. + fprintf('\n!! Error: %s\n\n', login); + return + end + + % Attempt Submission with Challenge + ch_resp = challengeResponse(login, password, ch); + + [result, str] = submitSolution(login, ch_resp, thisPartId, ... + output(thisPartId, auxstring), source(thisPartId), signature); + + partName = partNames{thisPartId}; + + fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ... + homework_id(), thisPartId, partName); + fprintf('== %s\n', strtrim(str)); + + if exist('OCTAVE_VERSION') + fflush(stdout); + end + else + [result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ... + source(thisPartId)); + result = base64encode(result); + + fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ... + homework_id(), thisPartId); + saveAsFile = input('', 's'); + if (isempty(saveAsFile)) + saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId); + end + + fid = fopen(saveAsFile, 'w'); + if (fid) + fwrite(fid, result); + fclose(fid); + fprintf('\nSaved your solutions to %s.\n\n', saveAsFile); + fprintf(['You can now submit your solutions through the web \n' ... + 'form in the programming exercises. Select the corresponding \n' ... + 'programming exercise to access the form.\n']); + + else + fprintf('Unable to save to %s\n\n', saveAsFile); + fprintf(['You can create a submission file by saving the \n' ... + 'following text in a file: (press enter to continue)\n\n']); + pause; + fprintf(result); + end + end + end +end + +% ================== CONFIGURABLES FOR EACH HOMEWORK ================== + +function id = homework_id() + id = '7'; +end + +function [partNames] = validParts() + partNames = { + 'Find Closest Centroids (k-Means)', ... + 'Compute Centroid Means (k-Means)' ... + 'PCA', ... + 'Project Data (PCA)', ... + 'Recover Data (PCA)' ... + }; +end + +function srcs = sources() + % Separated by part + srcs = { { 'findClosestCentroids.m' }, ... + { 'computeCentroids.m' }, ... + { 'pca.m' }, ... + { 'projectData.m' }, ... + { 'recoverData.m' } ... + }; +end + +function out = output(partId, auxstring) + % Random Test Cases + X = reshape(sin(1:165), 15, 11); + Z = reshape(cos(1:121), 11, 11); + C = Z(1:5, :); + idx = (1 + mod(1:15, 3))'; + if partId == 1 + idx = findClosestCentroids(X, C); + out = sprintf('%0.5f ', idx(:)); + elseif partId == 2 + centroids = computeCentroids(X, idx, 3); + out = sprintf('%0.5f ', centroids(:)); + elseif partId == 3 + [U, S] = pca(X); + out = sprintf('%0.5f ', abs([U(:); S(:)])); + elseif partId == 4 + X_proj = projectData(X, Z, 5); + out = sprintf('%0.5f ', X_proj(:)); + elseif partId == 5 + X_rec = recoverData(X(:,1:5), Z, 5); + out = sprintf('%0.5f ', X_rec(:)); + end +end + +% ====================== SERVER CONFIGURATION =========================== + +% ***************** REMOVE -staging WHEN YOU DEPLOY ********************* +function url = site_url() + url = 'http://class.coursera.org/ml-2012-002'; +end + +function url = challenge_url() + url = [site_url() '/assignment/challenge']; +end + +function url = submit_url() + url = [site_url() '/assignment/submit']; +end + +% ========================= CHALLENGE HELPERS ========================= + +function src = source(partId) + src = ''; + src_files = sources(); + if partId <= numel(src_files) + flist = src_files{partId}; + for i = 1:numel(flist) + fid = fopen(flist{i}); + if (fid == -1) + error('Error opening %s (is it missing?)', flist{i}); + end + line = fgets(fid); + while ischar(line) + src = [src line]; + line = fgets(fid); + end + fclose(fid); + src = [src '||||||||']; + end + end +end + +function ret = isValidPartId(partId) + partNames = validParts(); + ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1); +end + +function partId = promptPart() + fprintf('== Select which part(s) to submit:\n'); + partNames = validParts(); + srcFiles = sources(); + for i = 1:numel(partNames) + fprintf('== %d) %s [', i, partNames{i}); + fprintf(' %s ', srcFiles{i}{:}); + fprintf(']\n'); + end + fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ... + numel(partNames) + 1, numel(partNames) + 1); + selPart = input('', 's'); + partId = str2num(selPart); + if ~isValidPartId(partId) + partId = -1; + end +end + +function [email,ch,signature,auxstring] = getChallenge(email, part) + str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'}); + + str = strtrim(str); + r = struct; + while(numel(str) > 0) + [f, str] = strtok (str, '|'); + [v, str] = strtok (str, '|'); + r = setfield(r, f, v); + end + + email = getfield(r, 'email_address'); + ch = getfield(r, 'challenge_key'); + signature = getfield(r, 'state'); + auxstring = getfield(r, 'challenge_aux_data'); +end + +function [result, str] = submitSolutionWeb(email, part, output, source) + + result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ... + '"email_address":"' base64encode(email, '') '",' ... + '"submission":"' base64encode(output, '') '",' ... + '"submission_aux":"' base64encode(source, '') '"' ... + '}']; + str = 'Web-submission'; +end + +function [result, str] = submitSolution(email, ch_resp, part, output, ... + source, signature) + + params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ... + 'email_address', email, ... + 'submission', base64encode(output, ''), ... + 'submission_aux', base64encode(source, ''), ... + 'challenge_response', ch_resp, ... + 'state', signature}; + + str = urlread(submit_url(), 'post', params); + + % Parse str to read for success / failure + result = 0; + +end + +% =========================== LOGIN HELPERS =========================== + +function [login password] = loginPrompt() + % Prompt for password + [login password] = basicPrompt(); + + if isempty(login) || isempty(password) + login = []; password = []; + end +end + + +function [login password] = basicPrompt() + login = input('Login (Email address): ', 's'); + password = input('Password: ', 's'); +end + +function [login password] = quickLogin(login,password) + disp(['You are currently logged in as ' login '.']); + cont_token = input('Is this you? (y/n - type n to reenter password)','s'); + if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y') + return; + else + [login password] = loginPrompt(); + end +end + +function [str] = challengeResponse(email, passwd, challenge) + str = sha1([challenge passwd]); +end + +% =============================== SHA-1 ================================ + +function hash = sha1(str) + + % Initialize variables + h0 = uint32(1732584193); + h1 = uint32(4023233417); + h2 = uint32(2562383102); + h3 = uint32(271733878); + h4 = uint32(3285377520); + + % Convert to word array + strlen = numel(str); + + % Break string into chars and append the bit 1 to the message + mC = [double(str) 128]; + mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')]; + + numB = strlen * 8; + if exist('idivide') + numC = idivide(uint32(numB + 65), 512, 'ceil'); + else + numC = ceil(double(numB + 65)/512); + end + numW = numC * 16; + mW = zeros(numW, 1, 'uint32'); + + idx = 1; + for i = 1:4:strlen + 1 + mW(idx) = bitor(bitor(bitor( ... + bitshift(uint32(mC(i)), 24), ... + bitshift(uint32(mC(i+1)), 16)), ... + bitshift(uint32(mC(i+2)), 8)), ... + uint32(mC(i+3))); + idx = idx + 1; + end + + % Append length of message + mW(numW - 1) = uint32(bitshift(uint64(numB), -32)); + mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32)); + + % Process the message in successive 512-bit chs + for cId = 1 : double(numC) + cSt = (cId - 1) * 16 + 1; + cEnd = cId * 16; + ch = mW(cSt : cEnd); + + % Extend the sixteen 32-bit words into eighty 32-bit words + for j = 17 : 80 + ch(j) = ch(j - 3); + ch(j) = bitxor(ch(j), ch(j - 8)); + ch(j) = bitxor(ch(j), ch(j - 14)); + ch(j) = bitxor(ch(j), ch(j - 16)); + ch(j) = bitrotate(ch(j), 1); + end + + % Initialize hash value for this ch + a = h0; + b = h1; + c = h2; + d = h3; + e = h4; + + % Main loop + for i = 1 : 80 + if(i >= 1 && i <= 20) + f = bitor(bitand(b, c), bitand(bitcmp(b), d)); + k = uint32(1518500249); + elseif(i >= 21 && i <= 40) + f = bitxor(bitxor(b, c), d); + k = uint32(1859775393); + elseif(i >= 41 && i <= 60) + f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d)); + k = uint32(2400959708); + elseif(i >= 61 && i <= 80) + f = bitxor(bitxor(b, c), d); + k = uint32(3395469782); + end + + t = bitrotate(a, 5); + t = bitadd(t, f); + t = bitadd(t, e); + t = bitadd(t, k); + t = bitadd(t, ch(i)); + e = d; + d = c; + c = bitrotate(b, 30); + b = a; + a = t; + + end + h0 = bitadd(h0, a); + h1 = bitadd(h1, b); + h2 = bitadd(h2, c); + h3 = bitadd(h3, d); + h4 = bitadd(h4, e); + + end + + hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]); + + hash = lower(hash); + +end + +function ret = bitadd(iA, iB) + ret = double(iA) + double(iB); + ret = bitset(ret, 33, 0); + ret = uint32(ret); +end + +function ret = bitrotate(iA, places) + t = bitshift(iA, places - 32); + ret = bitshift(iA, places); + ret = bitor(ret, t); +end + +% =========================== Base64 Encoder ============================ +% Thanks to Peter John Acklam +% + +function y = base64encode(x, eol) +%BASE64ENCODE Perform base64 encoding on a string. +% +% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending +% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10). +% The returned encoded string is broken into lines of no more than 76 +% characters each, and each line will end with EOL unless it is empty. Let +% EOL be empty if you do not want the encoded string broken into lines. +% +% STR and EOL don't have to be strings (i.e., char arrays). The only +% requirement is that they are vectors containing values in the range 0-255. +% +% This function may be used to encode strings into the Base64 encoding +% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The +% Base64 encoding is designed to represent arbitrary sequences of octets in a +% form that need not be humanly readable. A 65-character subset +% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per +% printable character. +% +% Examples +% -------- +% +% If you want to encode a large file, you should encode it in chunks that are +% a multiple of 57 bytes. This ensures that the base64 lines line up and +% that you do not end up with padding in the middle. 57 bytes of data fills +% one complete base64 line (76 == 57*4/3): +% +% If ifid and ofid are two file identifiers opened for reading and writing, +% respectively, then you can base64 encode the data with +% +% while ~feof(ifid) +% fwrite(ofid, base64encode(fread(ifid, 60*57))); +% end +% +% or, if you have enough memory, +% +% fwrite(ofid, base64encode(fread(ifid))); +% +% See also BASE64DECODE. + +% Author: Peter John Acklam +% Time-stamp: 2004-02-03 21:36:56 +0100 +% E-mail: pjacklam@online.no +% URL: http://home.online.no/~pjacklam + + if isnumeric(x) + x = num2str(x); + end + + % make sure we have the EOL value + if nargin < 2 + eol = sprintf('\n'); + else + if sum(size(eol) > 1) > 1 + error('EOL must be a vector.'); + end + if any(eol(:) > 255) + error('EOL can not contain values larger than 255.'); + end + end + + if sum(size(x) > 1) > 1 + error('STR must be a vector.'); + end + + x = uint8(x); + eol = uint8(eol); + + ndbytes = length(x); % number of decoded bytes + nchunks = ceil(ndbytes / 3); % number of chunks/groups + nebytes = 4 * nchunks; % number of encoded bytes + + % add padding if necessary, to make the length of x a multiple of 3 + if rem(ndbytes, 3) + x(end+1 : 3*nchunks) = 0; + end + + x = reshape(x, [3, nchunks]); % reshape the data + y = repmat(uint8(0), 4, nchunks); % for the encoded data + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Split up every 3 bytes into 4 pieces + % + % aaaaaabb bbbbcccc ccdddddd + % + % to form + % + % 00aaaaaa 00bbbbbb 00cccccc 00dddddd + % + y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:) + + y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:) + y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:) + + y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:) + y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:) + + y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:) + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Now perform the following mapping + % + % 0 - 25 -> A-Z + % 26 - 51 -> a-z + % 52 - 61 -> 0-9 + % 62 -> + + % 63 -> / + % + % We could use a mapping vector like + % + % ['A':'Z', 'a':'z', '0':'9', '+/'] + % + % but that would require an index vector of class double. + % + z = repmat(uint8(0), size(y)); + i = y <= 25; z(i) = 'A' + double(y(i)); + i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i)); + i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i)); + i = y == 62; z(i) = '+'; + i = y == 63; z(i) = '/'; + y = z; + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Add padding if necessary. + % + npbytes = 3 * nchunks - ndbytes; % number of padding bytes + if npbytes + y(end-npbytes+1 : end) = '='; % '=' is used for padding + end + + if isempty(eol) + + % reshape to a row vector + y = reshape(y, [1, nebytes]); + + else + + nlines = ceil(nebytes / 76); % number of lines + neolbytes = length(eol); % number of bytes in eol string + + % pad data so it becomes a multiple of 76 elements + y = [y(:) ; zeros(76 * nlines - numel(y), 1)]; + y(nebytes + 1 : 76 * nlines) = 0; + y = reshape(y, 76, nlines); + + % insert eol strings + eol = eol(:); + y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines)); + + % remove padding, but keep the last eol string + m = nebytes + neolbytes * (nlines - 1); + n = (76+neolbytes)*nlines - neolbytes; + y(m+1 : n) = ''; + + % extract and reshape to row vector + y = reshape(y, 1, m+neolbytes); + + end + + % output is a character array + y = char(y); + +end diff --git a/ex7/ex7/submitWeb.m b/ex7/ex7/submitWeb.m new file mode 100644 index 0000000..e429365 --- /dev/null +++ b/ex7/ex7/submitWeb.m @@ -0,0 +1,20 @@ +% submitWeb Creates files from your code and output for web submission. +% +% If the submit function does not work for you, use the web-submission mechanism. +% Call this function to produce a file for the part you wish to submit. Then, +% submit the file to the class servers using the "Web Submission" button on the +% Programming Exercises page on the course website. +% +% You should call this function without arguments (submitWeb), to receive +% an interactive prompt for submission; optionally you can call it with the partID +% if you so wish. Make sure your working directory is set to the directory +% containing the submitWeb.m file and your assignment files. + +function submitWeb(partId) + if ~exist('partId', 'var') || isempty(partId) + partId = []; + end + + submit(partId, 1); +end + diff --git a/ex8/.DS_Store b/ex8/.DS_Store new file mode 100644 index 0000000..5e5cb9c Binary files /dev/null and b/ex8/.DS_Store differ diff --git a/ex8/ex8.pdf b/ex8/ex8.pdf new file mode 100644 index 0000000..7cd963c Binary files /dev/null and b/ex8/ex8.pdf differ diff --git a/ex8/ex8/.DS_Store b/ex8/ex8/.DS_Store new file mode 100644 index 0000000..65e631d Binary files /dev/null and b/ex8/ex8/.DS_Store differ diff --git a/ex8/ex8/checkCostFunction.m b/ex8/ex8/checkCostFunction.m new file mode 100644 index 0000000..e72fe32 --- /dev/null +++ b/ex8/ex8/checkCostFunction.m @@ -0,0 +1,48 @@ +function checkCostFunction(lambda) +%CHECKCOSTFUNCTION Creates a collaborative filering problem +%to check your cost function and gradients +% CHECKCOSTFUNCTION(lambda) Creates a collaborative filering problem +% to check your cost function and gradients, it will output the +% analytical gradients produced by your code and the numerical gradients +% (computed using computeNumericalGradient). These two gradient +% computations should result in very similar values. + +% Set lambda +if ~exist('lambda', 'var') || isempty(lambda) + lambda = 0; +end + +%% Create small problem +X_t = rand(4, 3); +Theta_t = rand(5, 3); + +% Zap out most entries +Y = X_t * Theta_t'; +Y(rand(size(Y)) > 0.5) = 0; +R = zeros(size(Y)); +R(Y ~= 0) = 1; + +%% Run Gradient Checking +X = randn(size(X_t)); +Theta = randn(size(Theta_t)); +num_users = size(Y, 2); +num_movies = size(Y, 1); +num_features = size(Theta_t, 2); + +numgrad = computeNumericalGradient( ... + @(t) cofiCostFunc(t, Y, R, num_users, num_movies, ... + num_features, lambda), [X(:); Theta(:)]); + +[cost, grad] = cofiCostFunc([X(:); Theta(:)], Y, R, num_users, ... + num_movies, num_features, lambda); + +disp([numgrad grad]); +fprintf(['The above two columns you get should be very similar.\n' ... + '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); + +diff = norm(numgrad-grad)/norm(numgrad+grad); +fprintf(['If your backpropagation implementation is correct, then \n' ... + 'the relative difference will be small (less than 1e-9). \n' ... + '\nRelative Difference: %g\n'], diff); + +end \ No newline at end of file diff --git a/ex8/ex8/cofiCostFunc.m b/ex8/ex8/cofiCostFunc.m new file mode 100644 index 0000000..cb152c6 --- /dev/null +++ b/ex8/ex8/cofiCostFunc.m @@ -0,0 +1,58 @@ +function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ... + num_features, lambda) +%COFICOSTFUNC Collaborative filtering cost function +% [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ... +% num_features, lambda) returns the cost and gradient for the +% collaborative filtering problem. +% + +% Unfold the U and W matrices from params +X = reshape(params(1:num_movies*num_features), num_movies, num_features); +Theta = reshape(params(num_movies*num_features+1:end), ... + num_users, num_features); + + +% You need to return the following values correctly +J = 0; +X_grad = zeros(size(X)); +Theta_grad = zeros(size(Theta)); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the cost function and gradient for collaborative +% filtering. Concretely, you should first implement the cost +% function (without regularization) and make sure it is +% matches our costs. After that, you should implement the +% gradient and use the checkCostFunction routine to check +% that the gradient is correct. Finally, you should implement +% regularization. +% +% Notes: X - num_movies x num_features matrix of movie features +% Theta - num_users x num_features matrix of user features +% Y - num_movies x num_users matrix of user ratings of movies +% R - num_movies x num_users matrix, where R(i, j) = 1 if the +% i-th movie was rated by the j-th user +% +% You should set the following variables correctly: +% +% X_grad - num_movies x num_features matrix, containing the +% partial derivatives w.r.t. to each element of X +% Theta_grad - num_users x num_features matrix, containing the +% partial derivatives w.r.t. to each element of Theta +% + +J = sum(sum(((X*Theta' - Y) .* R) .^2)) / 2; +X_grad = ((X*Theta' - Y) .* R) * Theta; +Theta_grad = ((X*Theta' - Y) .* R)' * X; + +J = J + sum(sum(Theta.^2))*lambda/2 + sum(sum(X.^2))*lambda/2; +X_grad = X_grad + lambda * X; +Theta_grad = Theta_grad + lambda * Theta; + + + + +% ============================================================= + +grad = [X_grad(:); Theta_grad(:)]; + +end diff --git a/ex8/ex8/computeNumericalGradient.m b/ex8/ex8/computeNumericalGradient.m new file mode 100644 index 0000000..c3abeac --- /dev/null +++ b/ex8/ex8/computeNumericalGradient.m @@ -0,0 +1,29 @@ +function numgrad = computeNumericalGradient(J, theta) +%COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" +%and gives us a numerical estimate of the gradient. +% numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical +% gradient of the function J around theta. Calling y = J(theta) should +% return the function value at theta. + +% Notes: The following code implements numerical gradient checking, and +% returns the numerical gradient.It sets numgrad(i) to (a numerical +% approximation of) the partial derivative of J with respect to the +% i-th input argument, evaluated at theta. (i.e., numgrad(i) should +% be the (approximately) the partial derivative of J with respect +% to theta(i).) +% + +numgrad = zeros(size(theta)); +perturb = zeros(size(theta)); +e = 1e-4; +for p = 1:numel(theta) + % Set perturbation vector + perturb(p) = e; + loss1 = J(theta - perturb); + loss2 = J(theta + perturb); + % Compute Numerical Gradient + numgrad(p) = (loss2 - loss1) / (2*e); + perturb(p) = 0; +end + +end diff --git a/ex8/ex8/estimateGaussian.m b/ex8/ex8/estimateGaussian.m new file mode 100644 index 0000000..4d03ded --- /dev/null +++ b/ex8/ex8/estimateGaussian.m @@ -0,0 +1,38 @@ +function [mu sigma2] = estimateGaussian(X) +%ESTIMATEGAUSSIAN This function estimates the parameters of a +%Gaussian distribution using the data in X +% [mu sigma2] = estimateGaussian(X), +% The input X is the dataset with each n-dimensional data point in one row +% The output is an n-dimensional vector mu, the mean of the data set +% and the variances sigma^2, an n x 1 vector +% + +% Useful variables +[m, n] = size(X); + +% You should return these values correctly +mu = zeros(n, 1); +sigma2 = zeros(n, 1); + +% ====================== YOUR CODE HERE ====================== +% Instructions: Compute the mean of the data and the variances +% In particular, mu(i) should contain the mean of +% the data for the i-th feature and sigma2(i) +% should contain variance of the i-th feature. +% + +mu = sum(X) * (1 / m); +for i = 1:n + sigma2(i) = sum(((X(:,i) - mu(i)) .^ 2)) * (1 / m); + + + + + + + + +% ============================================================= + + +end diff --git a/ex8/ex8/ex8.m b/ex8/ex8/ex8.m new file mode 100644 index 0000000..54bcdf2 --- /dev/null +++ b/ex8/ex8/ex8.m @@ -0,0 +1,123 @@ +%% Machine Learning Online Class +% Exercise 8 | Anomaly Detection and Collaborative Filtering +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% exercise. You will need to complete the following functions: +% +% estimateGaussian.m +% selectThreshold.m +% cofiCostFunc.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% Initialization +clear ; close all; clc + +%% ================== Part 1: Load Example Dataset =================== +% We start this exercise by using a small dataset that is easy to +% visualize. +% +% Our example case consists of 2 network server statistics across +% several machines: the latency and throughput of each machine. +% This exercise will help us find possibly faulty (or very fast) machines. +% + +fprintf('Visualizing example dataset for outlier detection.\n\n'); + +% The following command loads the dataset. You should now have the +% variables X, Xval, yval in your environment +load('ex8data1.mat'); + +% Visualize the example dataset +plot(X(:, 1), X(:, 2), 'bx'); +axis([0 30 0 30]); +xlabel('Latency (ms)'); +ylabel('Throughput (mb/s)'); + +fprintf('Program paused. Press enter to continue.\n'); +pause + + +%% ================== Part 2: Estimate the dataset statistics =================== +% For this exercise, we assume a Gaussian distribution for the dataset. +% +% We first estimate the parameters of our assumed Gaussian distribution, +% then compute the probabilities for each of the points and then visualize +% both the overall distribution and where each of the points falls in +% terms of that distribution. +% +fprintf('Visualizing Gaussian fit.\n\n'); + +% Estimate my and sigma2 +[mu sigma2] = estimateGaussian(X); + +% Returns the density of the multivariate normal at each data point (row) +% of X +p = multivariateGaussian(X, mu, sigma2); + +% Visualize the fit +visualizeFit(X, mu, sigma2); +xlabel('Latency (ms)'); +ylabel('Throughput (mb/s)'); + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ================== Part 3: Find Outliers =================== +% Now you will find a good epsilon threshold using a cross-validation set +% probabilities given the estimated Gaussian distribution +% + +pval = multivariateGaussian(Xval, mu, sigma2); + +[epsilon F1] = selectThreshold(yval, pval); +fprintf('Best epsilon found using cross-validation: %e\n', epsilon); +fprintf('Best F1 on Cross Validation Set: %f\n', F1); +fprintf(' (you should see a value epsilon of about 8.99e-05)\n\n'); + +% Find the outliers in the training set and plot the +outliers = find(p < epsilon); + +% Draw a red circle around those outliers +hold on +plot(X(outliers, 1), X(outliers, 2), 'ro', 'LineWidth', 2, 'MarkerSize', 10); +hold off + +fprintf('Program paused. Press enter to continue.\n'); +pause; + +%% ================== Part 4: Multidimensional Outliers =================== +% We will now use the code from the previous part and apply it to a +% harder problem in which more features describe each datapoint and only +% some features indicate whether a point is an outlier. +% + +% Loads the second dataset. You should now have the +% variables X, Xval, yval in your environment +load('ex8data2.mat'); + +% Apply the same steps to the larger dataset +[mu sigma2] = estimateGaussian(X); + +% Training set +p = multivariateGaussian(X, mu, sigma2); + +% Cross-validation set +pval = multivariateGaussian(Xval, mu, sigma2); + +% Find the best threshold +[epsilon F1] = selectThreshold(yval, pval); + +fprintf('Best epsilon found using cross-validation: %e\n', epsilon); +fprintf('Best F1 on Cross Validation Set: %f\n', F1); +fprintf('# Outliers found: %d\n', sum(p < epsilon)); +fprintf(' (you should see a value epsilon of about 1.38e-18)\n\n'); +pause + + + diff --git a/ex8/ex8/ex8_cofi.m b/ex8/ex8/ex8_cofi.m new file mode 100644 index 0000000..d8f224f --- /dev/null +++ b/ex8/ex8/ex8_cofi.m @@ -0,0 +1,237 @@ +%% Machine Learning Online Class +% Exercise 8 | Anomaly Detection and Collaborative Filtering +% +% Instructions +% ------------ +% +% This file contains code that helps you get started on the +% exercise. You will need to complete the following functions: +% +% estimateGaussian.m +% selectThreshold.m +% cofiCostFunc.m +% +% For this exercise, you will not need to change any code in this file, +% or any other files other than those mentioned above. +% + +%% =============== Part 1: Loading movie ratings dataset ================ +% You will start by loading the movie ratings dataset to understand the +% structure of the data. +% +fprintf('Loading movie ratings dataset.\n\n'); + +% Load data +load ('ex8_movies.mat'); + +% Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on +% 943 users +% +% R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a +% rating to movie i + +% From the matrix, we can compute statistics like average rating. +fprintf('Average rating for movie 1 (Toy Story): %f / 5\n\n', ... + mean(Y(1, R(1, :)))); + +% We can "visualize" the ratings matrix by plotting it with imagesc +imagesc(Y); +ylabel('Movies'); +xlabel('Users'); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + +%% ============ Part 2: Collaborative Filtering Cost Function =========== +% You will now implement the cost function for collaborative filtering. +% To help you debug your cost function, we have included set of weights +% that we trained on that. Specifically, you should complete the code in +% cofiCostFunc.m to return J. + +% Load pre-trained weights (X, Theta, num_users, num_movies, num_features) +load ('ex8_movieParams.mat'); + +% Reduce the data set size so that this runs faster +num_users = 4; num_movies = 5; num_features = 3; +X = X(1:num_movies, 1:num_features); +Theta = Theta(1:num_users, 1:num_features); +Y = Y(1:num_movies, 1:num_users); +R = R(1:num_movies, 1:num_users); + +% Evaluate cost function +J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... + num_features, 0); + +fprintf(['Cost at loaded parameters: %f '... + '\n(this value should be about 22.22)\n'], J); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + + +%% ============== Part 3: Collaborative Filtering Gradient ============== +% Once your cost function matches up with ours, you should now implement +% the collaborative filtering gradient function. Specifically, you should +% complete the code in cofiCostFunc.m to return the grad argument. +% +fprintf('\nChecking Gradients (without regularization) ... \n'); + +% Check gradients by running checkNNGradients +checkCostFunction; + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + + +%% ========= Part 4: Collaborative Filtering Cost Regularization ======== +% Now, you should implement regularization for the cost function for +% collaborative filtering. You can implement it by adding the cost of +% regularization to the original cost computation. +% + +% Evaluate cost function +J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ... + num_features, 1.5); + +fprintf(['Cost at loaded parameters (lambda = 1.5): %f '... + '\n(this value should be about 31.34)\n'], J); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + + +%% ======= Part 5: Collaborative Filtering Gradient Regularization ====== +% Once your cost matches up with ours, you should proceed to implement +% regularization for the gradient. +% + +% +fprintf('\nChecking Gradients (with regularization) ... \n'); + +% Check gradients by running checkNNGradients +checkCostFunction(1.5); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + + +%% ============== Part 6: Entering ratings for a new user =============== +% Before we will train the collaborative filtering model, we will first +% add ratings that correspond to a new user that we just observed. This +% part of the code will also allow you to put in your own ratings for the +% movies in our dataset! +% +movieList = loadMovieList(); + +% Initialize my ratings +my_ratings = zeros(1682, 1); + +% Check the file movie_idx.txt for id of each movie in our dataset +% For example, Toy Story (1995) has ID 1, so to rate it "4", you can set +my_ratings(1) = 4; + +% Or suppose did not enjoy Silence of the Lambs (1991), you can set +my_ratings(98) = 2; + +% We have selected a few movies we liked / did not like and the ratings we +% gave are as follows: +my_ratings(7) = 3; +my_ratings(12)= 5; +my_ratings(54) = 4; +my_ratings(64)= 5; +my_ratings(66)= 3; +my_ratings(69) = 5; +my_ratings(183) = 4; +my_ratings(226) = 5; +my_ratings(355)= 5; + +fprintf('\n\nNew user ratings:\n'); +for i = 1:length(my_ratings) + if my_ratings(i) > 0 + fprintf('Rated %d for %s\n', my_ratings(i), ... + movieList{i}); + end +end + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + + +%% ================== Part 7: Learning Movie Ratings ==================== +% Now, you will train the collaborative filtering model on a movie rating +% dataset of 1682 movies and 943 users +% + +fprintf('\nTraining collaborative filtering...\n'); + +% Load data +load('ex8_movies.mat'); + +% Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by +% 943 users +% +% R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a +% rating to movie i + +% Add our own ratings to the data matrix +Y = [my_ratings Y]; +R = [(my_ratings ~= 0) R]; + +% Normalize Ratings +[Ynorm, Ymean] = normalizeRatings(Y, R); + +% Useful Values +num_users = size(Y, 2); +num_movies = size(Y, 1); +num_features = 10; + +% Set Initial Parameters (Theta, X) +X = randn(num_movies, num_features); +Theta = randn(num_users, num_features); + +initial_parameters = [X(:); Theta(:)]; + +% Set options for fmincg +options = optimset('GradObj', 'on', 'MaxIter', 100); + +% Set Regularization +lambda = 10; +theta = fmincg (@(t)(cofiCostFunc(t, Y, R, num_users, num_movies, ... + num_features, lambda)), ... + initial_parameters, options); + +% Unfold the returned theta back into U and W +X = reshape(theta(1:num_movies*num_features), num_movies, num_features); +Theta = reshape(theta(num_movies*num_features+1:end), ... + num_users, num_features); + +fprintf('Recommender system learning completed.\n'); + +fprintf('\nProgram paused. Press enter to continue.\n'); +pause; + +%% ================== Part 8: Recommendation for you ==================== +% After training the model, you can now make recommendations by computing +% the predictions matrix. +% + +p = X * Theta'; +my_predictions = p(:,1) + Ymean; + +movieList = loadMovieList(); + +[r, ix] = sort(my_predictions, 'descend'); +fprintf('\nTop recommendations for you:\n'); +for i=1:10 + j = ix(i); + fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ... + movieList{j}); +end + +fprintf('\n\nOriginal ratings provided:\n'); +for i = 1:length(my_ratings) + if my_ratings(i) > 0 + fprintf('Rated %d for %s\n', my_ratings(i), ... + movieList{i}); + end +end diff --git a/ex8/ex8/ex8_movieParams.mat b/ex8/ex8/ex8_movieParams.mat new file mode 100644 index 0000000..2dea689 Binary files /dev/null and b/ex8/ex8/ex8_movieParams.mat differ diff --git a/ex8/ex8/ex8_movies.mat b/ex8/ex8/ex8_movies.mat new file mode 100644 index 0000000..31ecd00 Binary files /dev/null and b/ex8/ex8/ex8_movies.mat differ diff --git a/ex8/ex8/ex8data1.mat b/ex8/ex8/ex8data1.mat new file mode 100644 index 0000000..1f08123 Binary files /dev/null and b/ex8/ex8/ex8data1.mat differ diff --git a/ex8/ex8/ex8data2.mat b/ex8/ex8/ex8data2.mat new file mode 100644 index 0000000..fe48db3 Binary files /dev/null and b/ex8/ex8/ex8data2.mat differ diff --git a/ex8/ex8/fmincg.m b/ex8/ex8/fmincg.m new file mode 100644 index 0000000..34bf539 --- /dev/null +++ b/ex8/ex8/fmincg.m @@ -0,0 +1,175 @@ +function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) +% Minimize a continuous differentialble multivariate function. Starting point +% is given by "X" (D by 1), and the function named in the string "f", must +% return a function value and a vector of partial derivatives. The Polack- +% Ribiere flavour of conjugate gradients is used to compute search directions, +% and a line search using quadratic and cubic polynomial approximations and the +% Wolfe-Powell stopping criteria is used together with the slope ratio method +% for guessing initial step sizes. Additionally a bunch of checks are made to +% make sure that exploration is taking place and that extrapolation will not +% be unboundedly large. The "length" gives the length of the run: if it is +% positive, it gives the maximum number of line searches, if negative its +% absolute gives the maximum allowed number of function evaluations. You can +% (optionally) give "length" a second component, which will indicate the +% reduction in function value to be expected in the first line-search (defaults +% to 1.0). The function returns when either its length is up, or if no further +% progress can be made (ie, we are at a minimum, or so close that due to +% numerical problems, we cannot get any closer). If the function terminates +% within a few iterations, it could be an indication that the function value +% and derivatives are not consistent (ie, there may be a bug in the +% implementation of your "f" function). The function returns the found +% solution "X", a vector of function values "fX" indicating the progress made +% and "i" the number of iterations (line searches or function evaluations, +% depending on the sign of "length") used. +% +% Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5) +% +% See also: checkgrad +% +% Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13 +% +% +% (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen +% +% Permission is granted for anyone to copy, use, or modify these +% programs and accompanying documents for purposes of research or +% education, provided this copyright notice is retained, and note is +% made of any changes that have been made. +% +% These programs and documents are distributed without any warranty, +% express or implied. As the programs were written for research +% purposes only, they have not been tested to the degree that would be +% advisable in any important application. All use of these programs is +% entirely at the user's own risk. +% +% [ml-class] Changes Made: +% 1) Function name and argument specifications +% 2) Output display +% + +% Read options +if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter') + length = options.MaxIter; +else + length = 100; +end + + +RHO = 0.01; % a bunch of constants for line searches +SIG = 0.5; % RHO and SIG are the constants in the Wolfe-Powell conditions +INT = 0.1; % don't reevaluate within 0.1 of the limit of the current bracket +EXT = 3.0; % extrapolate maximum 3 times the current bracket +MAX = 20; % max 20 function evaluations per line search +RATIO = 100; % maximum allowed slope ratio + +argstr = ['feval(f, X']; % compose string used to call function +for i = 1:(nargin - 3) + argstr = [argstr, ',P', int2str(i)]; +end +argstr = [argstr, ')']; + +if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end +S=['Iteration ']; + +i = 0; % zero the run length counter +ls_failed = 0; % no previous line search has failed +fX = []; +[f1 df1] = eval(argstr); % get function value and gradient +i = i + (length<0); % count epochs?! +s = -df1; % search direction is steepest +d1 = -s'*s; % this is the slope +z1 = red/(1-d1); % initial step is red/(|s|+1) + +while i < abs(length) % while not finished + i = i + (length>0); % count iterations?! + + X0 = X; f0 = f1; df0 = df1; % make a copy of current values + X = X + z1*s; % begin line search + [f2 df2] = eval(argstr); + i = i + (length<0); % count epochs?! + d2 = df2'*s; + f3 = f1; d3 = d1; z3 = -z1; % initialize point 3 equal to point 1 + if length>0, M = MAX; else M = min(MAX, -length-i); end + success = 0; limit = -1; % initialize quanteties + while 1 + while ((f2 > f1+z1*RHO*d1) | (d2 > -SIG*d1)) & (M > 0) + limit = z1; % tighten the bracket + if f2 > f1 + z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); % quadratic fit + else + A = 6*(f2-f3)/z3+3*(d2+d3); % cubic fit + B = 3*(f3-f2)-z3*(d3+2*d2); + z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; % numerical error possible - ok! + end + if isnan(z2) | isinf(z2) + z2 = z3/2; % if we had a numerical problem then bisect + end + z2 = max(min(z2, INT*z3),(1-INT)*z3); % don't accept too close to limits + z1 = z1 + z2; % update the step + X = X + z2*s; + [f2 df2] = eval(argstr); + M = M - 1; i = i + (length<0); % count epochs?! + d2 = df2'*s; + z3 = z3-z2; % z3 is now relative to the location of z2 + end + if f2 > f1+z1*RHO*d1 | d2 > -SIG*d1 + break; % this is a failure + elseif d2 > SIG*d1 + success = 1; break; % success + elseif M == 0 + break; % failure + end + A = 6*(f2-f3)/z3+3*(d2+d3); % make cubic extrapolation + B = 3*(f3-f2)-z3*(d3+2*d2); + z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); % num. error possible - ok! + if ~isreal(z2) | isnan(z2) | isinf(z2) | z2 < 0 % num prob or wrong sign? + if limit < -0.5 % if we have no upper limit + z2 = z1 * (EXT-1); % the extrapolate the maximum amount + else + z2 = (limit-z1)/2; % otherwise bisect + end + elseif (limit > -0.5) & (z2+z1 > limit) % extraplation beyond max? + z2 = (limit-z1)/2; % bisect + elseif (limit < -0.5) & (z2+z1 > z1*EXT) % extrapolation beyond limit + z2 = z1*(EXT-1.0); % set to extrapolation limit + elseif z2 < -z3*INT + z2 = -z3*INT; + elseif (limit > -0.5) & (z2 < (limit-z1)*(1.0-INT)) % too close to limit? + z2 = (limit-z1)*(1.0-INT); + end + f3 = f2; d3 = d2; z3 = -z2; % set point 3 equal to point 2 + z1 = z1 + z2; X = X + z2*s; % update current estimates + [f2 df2] = eval(argstr); + M = M - 1; i = i + (length<0); % count epochs?! + d2 = df2'*s; + end % end of line search + + if success % if line search succeeded + f1 = f2; fX = [fX' f1]'; + fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); + s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; % Polack-Ribiere direction + tmp = df1; df1 = df2; df2 = tmp; % swap derivatives + d2 = df1'*s; + if d2 > 0 % new slope must be negative + s = -df1; % otherwise use steepest direction + d2 = -s'*s; + end + z1 = z1 * min(RATIO, d1/(d2-realmin)); % slope ratio but max RATIO + d1 = d2; + ls_failed = 0; % this line search did not fail + else + X = X0; f1 = f0; df1 = df0; % restore point from before failed line search + if ls_failed | i > abs(length) % line search failed twice in a row + break; % or we ran out of time, so we give up + end + tmp = df1; df1 = df2; df2 = tmp; % swap derivatives + s = -df1; % try steepest + d1 = -s'*s; + z1 = 1/(1-d1); + ls_failed = 1; % this line search failed + end + if exist('OCTAVE_VERSION') + fflush(stdout); + end +end +fprintf('\n'); diff --git a/ex8/ex8/loadMovieList.m b/ex8/ex8/loadMovieList.m new file mode 100644 index 0000000..161321d --- /dev/null +++ b/ex8/ex8/loadMovieList.m @@ -0,0 +1,25 @@ +function movieList = loadMovieList() +%GETMOVIELIST reads the fixed movie list in movie.txt and returns a +%cell array of the words +% movieList = GETMOVIELIST() reads the fixed movie list in movie.txt +% and returns a cell array of the words in movieList. + + +%% Read the fixed movieulary list +fid = fopen('movie_ids.txt'); + +% Store all movies in cell array movie{} +n = 1682; % Total number of movies + +movieList = cell(n, 1); +for i = 1:n + % Read line + line = fgets(fid); + % Word Index (can ignore since it will be = i) + [idx, movieName] = strtok(line, ' '); + % Actual Word + movieList{i} = strtrim(movieName); +end +fclose(fid); + +end diff --git a/ex8/ex8/ml_login_data.mat b/ex8/ex8/ml_login_data.mat new file mode 100644 index 0000000..a876340 --- /dev/null +++ b/ex8/ex8/ml_login_data.mat @@ -0,0 +1,15 @@ +# Created by Octave 3.4.0, Sun Oct 28 23:17:49 2012 CDT +# name: login +# type: sq_string +# elements: 1 +# length: 21 +zhang349@illinois.edu + + +# name: password +# type: sq_string +# elements: 1 +# length: 10 +3pBDppG8Gd + + diff --git a/ex8/ex8/movie_ids.txt b/ex8/ex8/movie_ids.txt new file mode 100644 index 0000000..392427a --- /dev/null +++ b/ex8/ex8/movie_ids.txt @@ -0,0 +1,1682 @@ +1 Toy Story (1995) +2 GoldenEye (1995) +3 Four Rooms (1995) +4 Get Shorty (1995) +5 Copycat (1995) +6 Shanghai Triad (Yao a yao yao dao waipo qiao) (1995) +7 Twelve Monkeys (1995) +8 Babe (1995) +9 Dead Man Walking (1995) +10 Richard III (1995) +11 Seven (Se7en) (1995) +12 Usual Suspects, The (1995) +13 Mighty Aphrodite (1995) +14 Postino, Il (1994) +15 Mr. Holland's Opus (1995) +16 French Twist (Gazon maudit) (1995) +17 From Dusk Till Dawn (1996) +18 White Balloon, The (1995) +19 Antonia's Line (1995) +20 Angels and Insects (1995) +21 Muppet Treasure Island (1996) +22 Braveheart (1995) +23 Taxi Driver (1976) +24 Rumble in the Bronx (1995) +25 Birdcage, The (1996) +26 Brothers McMullen, The (1995) +27 Bad Boys (1995) +28 Apollo 13 (1995) +29 Batman Forever (1995) +30 Belle de jour (1967) +31 Crimson Tide (1995) +32 Crumb (1994) +33 Desperado (1995) +34 Doom Generation, The (1995) +35 Free Willy 2: The Adventure Home (1995) +36 Mad Love (1995) +37 Nadja (1994) +38 Net, The (1995) +39 Strange Days (1995) +40 To Wong Foo, Thanks for Everything! Julie Newmar (1995) +41 Billy Madison (1995) +42 Clerks (1994) +43 Disclosure (1994) +44 Dolores Claiborne (1994) +45 Eat Drink Man Woman (1994) +46 Exotica (1994) +47 Ed Wood (1994) +48 Hoop Dreams (1994) +49 I.Q. (1994) +50 Star Wars (1977) +51 Legends of the Fall (1994) +52 Madness of King George, The (1994) +53 Natural Born Killers (1994) +54 Outbreak (1995) +55 Professional, The (1994) +56 Pulp Fiction (1994) +57 Priest (1994) +58 Quiz Show (1994) +59 Three Colors: Red (1994) +60 Three Colors: Blue (1993) +61 Three Colors: White (1994) +62 Stargate (1994) +63 Santa Clause, The (1994) +64 Shawshank Redemption, The (1994) +65 What's Eating Gilbert Grape (1993) +66 While You Were Sleeping (1995) +67 Ace Ventura: Pet Detective (1994) +68 Crow, The (1994) +69 Forrest Gump (1994) +70 Four Weddings and a Funeral (1994) +71 Lion King, The (1994) +72 Mask, The (1994) +73 Maverick (1994) +74 Faster Pussycat! Kill! Kill! (1965) +75 Brother Minister: The Assassination of Malcolm X (1994) +76 Carlito's Way (1993) +77 Firm, The (1993) +78 Free Willy (1993) +79 Fugitive, The (1993) +80 Hot Shots! Part Deux (1993) +81 Hudsucker Proxy, The (1994) +82 Jurassic Park (1993) +83 Much Ado About Nothing (1993) +84 Robert A. Heinlein's The Puppet Masters (1994) +85 Ref, The (1994) +86 Remains of the Day, The (1993) +87 Searching for Bobby Fischer (1993) +88 Sleepless in Seattle (1993) +89 Blade Runner (1982) +90 So I Married an Axe Murderer (1993) +91 Nightmare Before Christmas, The (1993) +92 True Romance (1993) +93 Welcome to the Dollhouse (1995) +94 Home Alone (1990) +95 Aladdin (1992) +96 Terminator 2: Judgment Day (1991) +97 Dances with Wolves (1990) +98 Silence of the Lambs, The (1991) +99 Snow White and the Seven Dwarfs (1937) +100 Fargo (1996) +101 Heavy Metal (1981) +102 Aristocats, The (1970) +103 All Dogs Go to Heaven 2 (1996) +104 Theodore Rex (1995) +105 Sgt. Bilko (1996) +106 Diabolique (1996) +107 Moll Flanders (1996) +108 Kids in the Hall: Brain Candy (1996) +109 Mystery Science Theater 3000: The Movie (1996) +110 Operation Dumbo Drop (1995) +111 Truth About Cats & Dogs, The (1996) +112 Flipper (1996) +113 Horseman on the Roof, The (Hussard sur le toit, Le) (1995) +114 Wallace & Gromit: The Best of Aardman Animation (1996) +115 Haunted World of Edward D. Wood Jr., The (1995) +116 Cold Comfort Farm (1995) +117 Rock, The (1996) +118 Twister (1996) +119 Maya Lin: A Strong Clear Vision (1994) +120 Striptease (1996) +121 Independence Day (ID4) (1996) +122 Cable Guy, The (1996) +123 Frighteners, The (1996) +124 Lone Star (1996) +125 Phenomenon (1996) +126 Spitfire Grill, The (1996) +127 Godfather, The (1972) +128 Supercop (1992) +129 Bound (1996) +130 Kansas City (1996) +131 Breakfast at Tiffany's (1961) +132 Wizard of Oz, The (1939) +133 Gone with the Wind (1939) +134 Citizen Kane (1941) +135 2001: A Space Odyssey (1968) +136 Mr. Smith Goes to Washington (1939) +137 Big Night (1996) +138 D3: The Mighty Ducks (1996) +139 Love Bug, The (1969) +140 Homeward Bound: The Incredible Journey (1993) +141 20,000 Leagues Under the Sea (1954) +142 Bedknobs and Broomsticks (1971) +143 Sound of Music, The (1965) +144 Die Hard (1988) +145 Lawnmower Man, The (1992) +146 Unhook the Stars (1996) +147 Long Kiss Goodnight, The (1996) +148 Ghost and the Darkness, The (1996) +149 Jude (1996) +150 Swingers (1996) +151 Willy Wonka and the Chocolate Factory (1971) +152 Sleeper (1973) +153 Fish Called Wanda, A (1988) +154 Monty Python's Life of Brian (1979) +155 Dirty Dancing (1987) +156 Reservoir Dogs (1992) +157 Platoon (1986) +158 Weekend at Bernie's (1989) +159 Basic Instinct (1992) +160 Glengarry Glen Ross (1992) +161 Top Gun (1986) +162 On Golden Pond (1981) +163 Return of the Pink Panther, The (1974) +164 Abyss, The (1989) +165 Jean de Florette (1986) +166 Manon of the Spring (Manon des sources) (1986) +167 Private Benjamin (1980) +168 Monty Python and the Holy Grail (1974) +169 Wrong Trousers, The (1993) +170 Cinema Paradiso (1988) +171 Delicatessen (1991) +172 Empire Strikes Back, The (1980) +173 Princess Bride, The (1987) +174 Raiders of the Lost Ark (1981) +175 Brazil (1985) +176 Aliens (1986) +177 Good, The Bad and The Ugly, The (1966) +178 12 Angry Men (1957) +179 Clockwork Orange, A (1971) +180 Apocalypse Now (1979) +181 Return of the Jedi (1983) +182 GoodFellas (1990) +183 Alien (1979) +184 Army of Darkness (1993) +185 Psycho (1960) +186 Blues Brothers, The (1980) +187 Godfather: Part II, The (1974) +188 Full Metal Jacket (1987) +189 Grand Day Out, A (1992) +190 Henry V (1989) +191 Amadeus (1984) +192 Raging Bull (1980) +193 Right Stuff, The (1983) +194 Sting, The (1973) +195 Terminator, The (1984) +196 Dead Poets Society (1989) +197 Graduate, The (1967) +198 Nikita (La Femme Nikita) (1990) +199 Bridge on the River Kwai, The (1957) +200 Shining, The (1980) +201 Evil Dead II (1987) +202 Groundhog Day (1993) +203 Unforgiven (1992) +204 Back to the Future (1985) +205 Patton (1970) +206 Akira (1988) +207 Cyrano de Bergerac (1990) +208 Young Frankenstein (1974) +209 This Is Spinal Tap (1984) +210 Indiana Jones and the Last Crusade (1989) +211 M*A*S*H (1970) +212 Unbearable Lightness of Being, The (1988) +213 Room with a View, A (1986) +214 Pink Floyd - The Wall (1982) +215 Field of Dreams (1989) +216 When Harry Met Sally... (1989) +217 Bram Stoker's Dracula (1992) +218 Cape Fear (1991) +219 Nightmare on Elm Street, A (1984) +220 Mirror Has Two Faces, The (1996) +221 Breaking the Waves (1996) +222 Star Trek: First Contact (1996) +223 Sling Blade (1996) +224 Ridicule (1996) +225 101 Dalmatians (1996) +226 Die Hard 2 (1990) +227 Star Trek VI: The Undiscovered Country (1991) +228 Star Trek: The Wrath of Khan (1982) +229 Star Trek III: The Search for Spock (1984) +230 Star Trek IV: The Voyage Home (1986) +231 Batman Returns (1992) +232 Young Guns (1988) +233 Under Siege (1992) +234 Jaws (1975) +235 Mars Attacks! (1996) +236 Citizen Ruth (1996) +237 Jerry Maguire (1996) +238 Raising Arizona (1987) +239 Sneakers (1992) +240 Beavis and Butt-head Do America (1996) +241 Last of the Mohicans, The (1992) +242 Kolya (1996) +243 Jungle2Jungle (1997) +244 Smilla's Sense of Snow (1997) +245 Devil's Own, The (1997) +246 Chasing Amy (1997) +247 Turbo: A Power Rangers Movie (1997) +248 Grosse Pointe Blank (1997) +249 Austin Powers: International Man of Mystery (1997) +250 Fifth Element, The (1997) +251 Shall We Dance? (1996) +252 Lost World: Jurassic Park, The (1997) +253 Pillow Book, The (1995) +254 Batman & Robin (1997) +255 My Best Friend's Wedding (1997) +256 When the Cats Away (Chacun cherche son chat) (1996) +257 Men in Black (1997) +258 Contact (1997) +259 George of the Jungle (1997) +260 Event Horizon (1997) +261 Air Bud (1997) +262 In the Company of Men (1997) +263 Steel (1997) +264 Mimic (1997) +265 Hunt for Red October, The (1990) +266 Kull the Conqueror (1997) +267 unknown +268 Chasing Amy (1997) +269 Full Monty, The (1997) +270 Gattaca (1997) +271 Starship Troopers (1997) +272 Good Will Hunting (1997) +273 Heat (1995) +274 Sabrina (1995) +275 Sense and Sensibility (1995) +276 Leaving Las Vegas (1995) +277 Restoration (1995) +278 Bed of Roses (1996) +279 Once Upon a Time... When We Were Colored (1995) +280 Up Close and Personal (1996) +281 River Wild, The (1994) +282 Time to Kill, A (1996) +283 Emma (1996) +284 Tin Cup (1996) +285 Secrets & Lies (1996) +286 English Patient, The (1996) +287 Marvin's Room (1996) +288 Scream (1996) +289 Evita (1996) +290 Fierce Creatures (1997) +291 Absolute Power (1997) +292 Rosewood (1997) +293 Donnie Brasco (1997) +294 Liar Liar (1997) +295 Breakdown (1997) +296 Promesse, La (1996) +297 Ulee's Gold (1997) +298 Face/Off (1997) +299 Hoodlum (1997) +300 Air Force One (1997) +301 In & Out (1997) +302 L.A. Confidential (1997) +303 Ulee's Gold (1997) +304 Fly Away Home (1996) +305 Ice Storm, The (1997) +306 Mrs. Brown (Her Majesty, Mrs. Brown) (1997) +307 Devil's Advocate, The (1997) +308 FairyTale: A True Story (1997) +309 Deceiver (1997) +310 Rainmaker, The (1997) +311 Wings of the Dove, The (1997) +312 Midnight in the Garden of Good and Evil (1997) +313 Titanic (1997) +314 3 Ninjas: High Noon At Mega Mountain (1998) +315 Apt Pupil (1998) +316 As Good As It Gets (1997) +317 In the Name of the Father (1993) +318 Schindler's List (1993) +319 Everyone Says I Love You (1996) +320 Paradise Lost: The Child Murders at Robin Hood Hills (1996) +321 Mother (1996) +322 Murder at 1600 (1997) +323 Dante's Peak (1997) +324 Lost Highway (1997) +325 Crash (1996) +326 G.I. Jane (1997) +327 Cop Land (1997) +328 Conspiracy Theory (1997) +329 Desperate Measures (1998) +330 187 (1997) +331 Edge, The (1997) +332 Kiss the Girls (1997) +333 Game, The (1997) +334 U Turn (1997) +335 How to Be a Player (1997) +336 Playing God (1997) +337 House of Yes, The (1997) +338 Bean (1997) +339 Mad City (1997) +340 Boogie Nights (1997) +341 Critical Care (1997) +342 Man Who Knew Too Little, The (1997) +343 Alien: Resurrection (1997) +344 Apostle, The (1997) +345 Deconstructing Harry (1997) +346 Jackie Brown (1997) +347 Wag the Dog (1997) +348 Desperate Measures (1998) +349 Hard Rain (1998) +350 Fallen (1998) +351 Prophecy II, The (1998) +352 Spice World (1997) +353 Deep Rising (1998) +354 Wedding Singer, The (1998) +355 Sphere (1998) +356 Client, The (1994) +357 One Flew Over the Cuckoo's Nest (1975) +358 Spawn (1997) +359 Assignment, The (1997) +360 Wonderland (1997) +361 Incognito (1997) +362 Blues Brothers 2000 (1998) +363 Sudden Death (1995) +364 Ace Ventura: When Nature Calls (1995) +365 Powder (1995) +366 Dangerous Minds (1995) +367 Clueless (1995) +368 Bio-Dome (1996) +369 Black Sheep (1996) +370 Mary Reilly (1996) +371 Bridges of Madison County, The (1995) +372 Jeffrey (1995) +373 Judge Dredd (1995) +374 Mighty Morphin Power Rangers: The Movie (1995) +375 Showgirls (1995) +376 Houseguest (1994) +377 Heavyweights (1994) +378 Miracle on 34th Street (1994) +379 Tales From the Crypt Presents: Demon Knight (1995) +380 Star Trek: Generations (1994) +381 Muriel's Wedding (1994) +382 Adventures of Priscilla, Queen of the Desert, The (1994) +383 Flintstones, The (1994) +384 Naked Gun 33 1/3: The Final Insult (1994) +385 True Lies (1994) +386 Addams Family Values (1993) +387 Age of Innocence, The (1993) +388 Beverly Hills Cop III (1994) +389 Black Beauty (1994) +390 Fear of a Black Hat (1993) +391 Last Action Hero (1993) +392 Man Without a Face, The (1993) +393 Mrs. Doubtfire (1993) +394 Radioland Murders (1994) +395 Robin Hood: Men in Tights (1993) +396 Serial Mom (1994) +397 Striking Distance (1993) +398 Super Mario Bros. (1993) +399 Three Musketeers, The (1993) +400 Little Rascals, The (1994) +401 Brady Bunch Movie, The (1995) +402 Ghost (1990) +403 Batman (1989) +404 Pinocchio (1940) +405 Mission: Impossible (1996) +406 Thinner (1996) +407 Spy Hard (1996) +408 Close Shave, A (1995) +409 Jack (1996) +410 Kingpin (1996) +411 Nutty Professor, The (1996) +412 Very Brady Sequel, A (1996) +413 Tales from the Crypt Presents: Bordello of Blood (1996) +414 My Favorite Year (1982) +415 Apple Dumpling Gang, The (1975) +416 Old Yeller (1957) +417 Parent Trap, The (1961) +418 Cinderella (1950) +419 Mary Poppins (1964) +420 Alice in Wonderland (1951) +421 William Shakespeare's Romeo and Juliet (1996) +422 Aladdin and the King of Thieves (1996) +423 E.T. the Extra-Terrestrial (1982) +424 Children of the Corn: The Gathering (1996) +425 Bob Roberts (1992) +426 Transformers: The Movie, The (1986) +427 To Kill a Mockingbird (1962) +428 Harold and Maude (1971) +429 Day the Earth Stood Still, The (1951) +430 Duck Soup (1933) +431 Highlander (1986) +432 Fantasia (1940) +433 Heathers (1989) +434 Forbidden Planet (1956) +435 Butch Cassidy and the Sundance Kid (1969) +436 American Werewolf in London, An (1981) +437 Amityville 1992: It's About Time (1992) +438 Amityville 3-D (1983) +439 Amityville: A New Generation (1993) +440 Amityville II: The Possession (1982) +441 Amityville Horror, The (1979) +442 Amityville Curse, The (1990) +443 Birds, The (1963) +444 Blob, The (1958) +445 Body Snatcher, The (1945) +446 Burnt Offerings (1976) +447 Carrie (1976) +448 Omen, The (1976) +449 Star Trek: The Motion Picture (1979) +450 Star Trek V: The Final Frontier (1989) +451 Grease (1978) +452 Jaws 2 (1978) +453 Jaws 3-D (1983) +454 Bastard Out of Carolina (1996) +455 Jackie Chan's First Strike (1996) +456 Beverly Hills Ninja (1997) +457 Free Willy 3: The Rescue (1997) +458 Nixon (1995) +459 Cry, the Beloved Country (1995) +460 Crossing Guard, The (1995) +461 Smoke (1995) +462 Like Water For Chocolate (Como agua para chocolate) (1992) +463 Secret of Roan Inish, The (1994) +464 Vanya on 42nd Street (1994) +465 Jungle Book, The (1994) +466 Red Rock West (1992) +467 Bronx Tale, A (1993) +468 Rudy (1993) +469 Short Cuts (1993) +470 Tombstone (1993) +471 Courage Under Fire (1996) +472 Dragonheart (1996) +473 James and the Giant Peach (1996) +474 Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963) +475 Trainspotting (1996) +476 First Wives Club, The (1996) +477 Matilda (1996) +478 Philadelphia Story, The (1940) +479 Vertigo (1958) +480 North by Northwest (1959) +481 Apartment, The (1960) +482 Some Like It Hot (1959) +483 Casablanca (1942) +484 Maltese Falcon, The (1941) +485 My Fair Lady (1964) +486 Sabrina (1954) +487 Roman Holiday (1953) +488 Sunset Blvd. (1950) +489 Notorious (1946) +490 To Catch a Thief (1955) +491 Adventures of Robin Hood, The (1938) +492 East of Eden (1955) +493 Thin Man, The (1934) +494 His Girl Friday (1940) +495 Around the World in 80 Days (1956) +496 It's a Wonderful Life (1946) +497 Bringing Up Baby (1938) +498 African Queen, The (1951) +499 Cat on a Hot Tin Roof (1958) +500 Fly Away Home (1996) +501 Dumbo (1941) +502 Bananas (1971) +503 Candidate, The (1972) +504 Bonnie and Clyde (1967) +505 Dial M for Murder (1954) +506 Rebel Without a Cause (1955) +507 Streetcar Named Desire, A (1951) +508 People vs. Larry Flynt, The (1996) +509 My Left Foot (1989) +510 Magnificent Seven, The (1954) +511 Lawrence of Arabia (1962) +512 Wings of Desire (1987) +513 Third Man, The (1949) +514 Annie Hall (1977) +515 Boot, Das (1981) +516 Local Hero (1983) +517 Manhattan (1979) +518 Miller's Crossing (1990) +519 Treasure of the Sierra Madre, The (1948) +520 Great Escape, The (1963) +521 Deer Hunter, The (1978) +522 Down by Law (1986) +523 Cool Hand Luke (1967) +524 Great Dictator, The (1940) +525 Big Sleep, The (1946) +526 Ben-Hur (1959) +527 Gandhi (1982) +528 Killing Fields, The (1984) +529 My Life as a Dog (Mitt liv som hund) (1985) +530 Man Who Would Be King, The (1975) +531 Shine (1996) +532 Kama Sutra: A Tale of Love (1996) +533 Daytrippers, The (1996) +534 Traveller (1997) +535 Addicted to Love (1997) +536 Ponette (1996) +537 My Own Private Idaho (1991) +538 Anastasia (1997) +539 Mouse Hunt (1997) +540 Money Train (1995) +541 Mortal Kombat (1995) +542 Pocahontas (1995) +543 Misérables, Les (1995) +544 Things to Do in Denver when You're Dead (1995) +545 Vampire in Brooklyn (1995) +546 Broken Arrow (1996) +547 Young Poisoner's Handbook, The (1995) +548 NeverEnding Story III, The (1994) +549 Rob Roy (1995) +550 Die Hard: With a Vengeance (1995) +551 Lord of Illusions (1995) +552 Species (1995) +553 Walk in the Clouds, A (1995) +554 Waterworld (1995) +555 White Man's Burden (1995) +556 Wild Bill (1995) +557 Farinelli: il castrato (1994) +558 Heavenly Creatures (1994) +559 Interview with the Vampire (1994) +560 Kid in King Arthur's Court, A (1995) +561 Mary Shelley's Frankenstein (1994) +562 Quick and the Dead, The (1995) +563 Stephen King's The Langoliers (1995) +564 Tales from the Hood (1995) +565 Village of the Damned (1995) +566 Clear and Present Danger (1994) +567 Wes Craven's New Nightmare (1994) +568 Speed (1994) +569 Wolf (1994) +570 Wyatt Earp (1994) +571 Another Stakeout (1993) +572 Blown Away (1994) +573 Body Snatchers (1993) +574 Boxing Helena (1993) +575 City Slickers II: The Legend of Curly's Gold (1994) +576 Cliffhanger (1993) +577 Coneheads (1993) +578 Demolition Man (1993) +579 Fatal Instinct (1993) +580 Englishman Who Went Up a Hill, But Came Down a Mountain, The (1995) +581 Kalifornia (1993) +582 Piano, The (1993) +583 Romeo Is Bleeding (1993) +584 Secret Garden, The (1993) +585 Son in Law (1993) +586 Terminal Velocity (1994) +587 Hour of the Pig, The (1993) +588 Beauty and the Beast (1991) +589 Wild Bunch, The (1969) +590 Hellraiser: Bloodline (1996) +591 Primal Fear (1996) +592 True Crime (1995) +593 Stalingrad (1993) +594 Heavy (1995) +595 Fan, The (1996) +596 Hunchback of Notre Dame, The (1996) +597 Eraser (1996) +598 Big Squeeze, The (1996) +599 Police Story 4: Project S (Chao ji ji hua) (1993) +600 Daniel Defoe's Robinson Crusoe (1996) +601 For Whom the Bell Tolls (1943) +602 American in Paris, An (1951) +603 Rear Window (1954) +604 It Happened One Night (1934) +605 Meet Me in St. Louis (1944) +606 All About Eve (1950) +607 Rebecca (1940) +608 Spellbound (1945) +609 Father of the Bride (1950) +610 Gigi (1958) +611 Laura (1944) +612 Lost Horizon (1937) +613 My Man Godfrey (1936) +614 Giant (1956) +615 39 Steps, The (1935) +616 Night of the Living Dead (1968) +617 Blue Angel, The (Blaue Engel, Der) (1930) +618 Picnic (1955) +619 Extreme Measures (1996) +620 Chamber, The (1996) +621 Davy Crockett, King of the Wild Frontier (1955) +622 Swiss Family Robinson (1960) +623 Angels in the Outfield (1994) +624 Three Caballeros, The (1945) +625 Sword in the Stone, The (1963) +626 So Dear to My Heart (1949) +627 Robin Hood: Prince of Thieves (1991) +628 Sleepers (1996) +629 Victor/Victoria (1982) +630 Great Race, The (1965) +631 Crying Game, The (1992) +632 Sophie's Choice (1982) +633 Christmas Carol, A (1938) +634 Microcosmos: Le peuple de l'herbe (1996) +635 Fog, The (1980) +636 Escape from New York (1981) +637 Howling, The (1981) +638 Return of Martin Guerre, The (Retour de Martin Guerre, Le) (1982) +639 Tin Drum, The (Blechtrommel, Die) (1979) +640 Cook the Thief His Wife & Her Lover, The (1989) +641 Paths of Glory (1957) +642 Grifters, The (1990) +643 The Innocent (1994) +644 Thin Blue Line, The (1988) +645 Paris Is Burning (1990) +646 Once Upon a Time in the West (1969) +647 Ran (1985) +648 Quiet Man, The (1952) +649 Once Upon a Time in America (1984) +650 Seventh Seal, The (Sjunde inseglet, Det) (1957) +651 Glory (1989) +652 Rosencrantz and Guildenstern Are Dead (1990) +653 Touch of Evil (1958) +654 Chinatown (1974) +655 Stand by Me (1986) +656 M (1931) +657 Manchurian Candidate, The (1962) +658 Pump Up the Volume (1990) +659 Arsenic and Old Lace (1944) +660 Fried Green Tomatoes (1991) +661 High Noon (1952) +662 Somewhere in Time (1980) +663 Being There (1979) +664 Paris, Texas (1984) +665 Alien 3 (1992) +666 Blood For Dracula (Andy Warhol's Dracula) (1974) +667 Audrey Rose (1977) +668 Blood Beach (1981) +669 Body Parts (1991) +670 Body Snatchers (1993) +671 Bride of Frankenstein (1935) +672 Candyman (1992) +673 Cape Fear (1962) +674 Cat People (1982) +675 Nosferatu (Nosferatu, eine Symphonie des Grauens) (1922) +676 Crucible, The (1996) +677 Fire on the Mountain (1996) +678 Volcano (1997) +679 Conan the Barbarian (1981) +680 Kull the Conqueror (1997) +681 Wishmaster (1997) +682 I Know What You Did Last Summer (1997) +683 Rocket Man (1997) +684 In the Line of Fire (1993) +685 Executive Decision (1996) +686 Perfect World, A (1993) +687 McHale's Navy (1997) +688 Leave It to Beaver (1997) +689 Jackal, The (1997) +690 Seven Years in Tibet (1997) +691 Dark City (1998) +692 American President, The (1995) +693 Casino (1995) +694 Persuasion (1995) +695 Kicking and Screaming (1995) +696 City Hall (1996) +697 Basketball Diaries, The (1995) +698 Browning Version, The (1994) +699 Little Women (1994) +700 Miami Rhapsody (1995) +701 Wonderful, Horrible Life of Leni Riefenstahl, The (1993) +702 Barcelona (1994) +703 Widows' Peak (1994) +704 House of the Spirits, The (1993) +705 Singin' in the Rain (1952) +706 Bad Moon (1996) +707 Enchanted April (1991) +708 Sex, Lies, and Videotape (1989) +709 Strictly Ballroom (1992) +710 Better Off Dead... (1985) +711 Substance of Fire, The (1996) +712 Tin Men (1987) +713 Othello (1995) +714 Carrington (1995) +715 To Die For (1995) +716 Home for the Holidays (1995) +717 Juror, The (1996) +718 In the Bleak Midwinter (1995) +719 Canadian Bacon (1994) +720 First Knight (1995) +721 Mallrats (1995) +722 Nine Months (1995) +723 Boys on the Side (1995) +724 Circle of Friends (1995) +725 Exit to Eden (1994) +726 Fluke (1995) +727 Immortal Beloved (1994) +728 Junior (1994) +729 Nell (1994) +730 Queen Margot (Reine Margot, La) (1994) +731 Corrina, Corrina (1994) +732 Dave (1993) +733 Go Fish (1994) +734 Made in America (1993) +735 Philadelphia (1993) +736 Shadowlands (1993) +737 Sirens (1994) +738 Threesome (1994) +739 Pretty Woman (1990) +740 Jane Eyre (1996) +741 Last Supper, The (1995) +742 Ransom (1996) +743 Crow: City of Angels, The (1996) +744 Michael Collins (1996) +745 Ruling Class, The (1972) +746 Real Genius (1985) +747 Benny & Joon (1993) +748 Saint, The (1997) +749 MatchMaker, The (1997) +750 Amistad (1997) +751 Tomorrow Never Dies (1997) +752 Replacement Killers, The (1998) +753 Burnt By the Sun (1994) +754 Red Corner (1997) +755 Jumanji (1995) +756 Father of the Bride Part II (1995) +757 Across the Sea of Time (1995) +758 Lawnmower Man 2: Beyond Cyberspace (1996) +759 Fair Game (1995) +760 Screamers (1995) +761 Nick of Time (1995) +762 Beautiful Girls (1996) +763 Happy Gilmore (1996) +764 If Lucy Fell (1996) +765 Boomerang (1992) +766 Man of the Year (1995) +767 Addiction, The (1995) +768 Casper (1995) +769 Congo (1995) +770 Devil in a Blue Dress (1995) +771 Johnny Mnemonic (1995) +772 Kids (1995) +773 Mute Witness (1994) +774 Prophecy, The (1995) +775 Something to Talk About (1995) +776 Three Wishes (1995) +777 Castle Freak (1995) +778 Don Juan DeMarco (1995) +779 Drop Zone (1994) +780 Dumb & Dumber (1994) +781 French Kiss (1995) +782 Little Odessa (1994) +783 Milk Money (1994) +784 Beyond Bedlam (1993) +785 Only You (1994) +786 Perez Family, The (1995) +787 Roommates (1995) +788 Relative Fear (1994) +789 Swimming with Sharks (1995) +790 Tommy Boy (1995) +791 Baby-Sitters Club, The (1995) +792 Bullets Over Broadway (1994) +793 Crooklyn (1994) +794 It Could Happen to You (1994) +795 Richie Rich (1994) +796 Speechless (1994) +797 Timecop (1994) +798 Bad Company (1995) +799 Boys Life (1995) +800 In the Mouth of Madness (1995) +801 Air Up There, The (1994) +802 Hard Target (1993) +803 Heaven & Earth (1993) +804 Jimmy Hollywood (1994) +805 Manhattan Murder Mystery (1993) +806 Menace II Society (1993) +807 Poetic Justice (1993) +808 Program, The (1993) +809 Rising Sun (1993) +810 Shadow, The (1994) +811 Thirty-Two Short Films About Glenn Gould (1993) +812 Andre (1994) +813 Celluloid Closet, The (1995) +814 Great Day in Harlem, A (1994) +815 One Fine Day (1996) +816 Candyman: Farewell to the Flesh (1995) +817 Frisk (1995) +818 Girl 6 (1996) +819 Eddie (1996) +820 Space Jam (1996) +821 Mrs. Winterbourne (1996) +822 Faces (1968) +823 Mulholland Falls (1996) +824 Great White Hype, The (1996) +825 Arrival, The (1996) +826 Phantom, The (1996) +827 Daylight (1996) +828 Alaska (1996) +829 Fled (1996) +830 Power 98 (1995) +831 Escape from L.A. (1996) +832 Bogus (1996) +833 Bulletproof (1996) +834 Halloween: The Curse of Michael Myers (1995) +835 Gay Divorcee, The (1934) +836 Ninotchka (1939) +837 Meet John Doe (1941) +838 In the Line of Duty 2 (1987) +839 Loch Ness (1995) +840 Last Man Standing (1996) +841 Glimmer Man, The (1996) +842 Pollyanna (1960) +843 Shaggy Dog, The (1959) +844 Freeway (1996) +845 That Thing You Do! (1996) +846 To Gillian on Her 37th Birthday (1996) +847 Looking for Richard (1996) +848 Murder, My Sweet (1944) +849 Days of Thunder (1990) +850 Perfect Candidate, A (1996) +851 Two or Three Things I Know About Her (1966) +852 Bloody Child, The (1996) +853 Braindead (1992) +854 Bad Taste (1987) +855 Diva (1981) +856 Night on Earth (1991) +857 Paris Was a Woman (1995) +858 Amityville: Dollhouse (1996) +859 April Fool's Day (1986) +860 Believers, The (1987) +861 Nosferatu a Venezia (1986) +862 Jingle All the Way (1996) +863 Garden of Finzi-Contini, The (Giardino dei Finzi-Contini, Il) (1970) +864 My Fellow Americans (1996) +865 Ice Storm, The (1997) +866 Michael (1996) +867 Whole Wide World, The (1996) +868 Hearts and Minds (1996) +869 Fools Rush In (1997) +870 Touch (1997) +871 Vegas Vacation (1997) +872 Love Jones (1997) +873 Picture Perfect (1997) +874 Career Girls (1997) +875 She's So Lovely (1997) +876 Money Talks (1997) +877 Excess Baggage (1997) +878 That Darn Cat! (1997) +879 Peacemaker, The (1997) +880 Soul Food (1997) +881 Money Talks (1997) +882 Washington Square (1997) +883 Telling Lies in America (1997) +884 Year of the Horse (1997) +885 Phantoms (1998) +886 Life Less Ordinary, A (1997) +887 Eve's Bayou (1997) +888 One Night Stand (1997) +889 Tango Lesson, The (1997) +890 Mortal Kombat: Annihilation (1997) +891 Bent (1997) +892 Flubber (1997) +893 For Richer or Poorer (1997) +894 Home Alone 3 (1997) +895 Scream 2 (1997) +896 Sweet Hereafter, The (1997) +897 Time Tracers (1995) +898 Postman, The (1997) +899 Winter Guest, The (1997) +900 Kundun (1997) +901 Mr. Magoo (1997) +902 Big Lebowski, The (1998) +903 Afterglow (1997) +904 Ma vie en rose (My Life in Pink) (1997) +905 Great Expectations (1998) +906 Oscar & Lucinda (1997) +907 Vermin (1998) +908 Half Baked (1998) +909 Dangerous Beauty (1998) +910 Nil By Mouth (1997) +911 Twilight (1998) +912 U.S. Marshalls (1998) +913 Love and Death on Long Island (1997) +914 Wild Things (1998) +915 Primary Colors (1998) +916 Lost in Space (1998) +917 Mercury Rising (1998) +918 City of Angels (1998) +919 City of Lost Children, The (1995) +920 Two Bits (1995) +921 Farewell My Concubine (1993) +922 Dead Man (1995) +923 Raise the Red Lantern (1991) +924 White Squall (1996) +925 Unforgettable (1996) +926 Down Periscope (1996) +927 Flower of My Secret, The (Flor de mi secreto, La) (1995) +928 Craft, The (1996) +929 Harriet the Spy (1996) +930 Chain Reaction (1996) +931 Island of Dr. Moreau, The (1996) +932 First Kid (1996) +933 Funeral, The (1996) +934 Preacher's Wife, The (1996) +935 Paradise Road (1997) +936 Brassed Off (1996) +937 Thousand Acres, A (1997) +938 Smile Like Yours, A (1997) +939 Murder in the First (1995) +940 Airheads (1994) +941 With Honors (1994) +942 What's Love Got to Do with It (1993) +943 Killing Zoe (1994) +944 Renaissance Man (1994) +945 Charade (1963) +946 Fox and the Hound, The (1981) +947 Big Blue, The (Grand bleu, Le) (1988) +948 Booty Call (1997) +949 How to Make an American Quilt (1995) +950 Georgia (1995) +951 Indian in the Cupboard, The (1995) +952 Blue in the Face (1995) +953 Unstrung Heroes (1995) +954 Unzipped (1995) +955 Before Sunrise (1995) +956 Nobody's Fool (1994) +957 Pushing Hands (1992) +958 To Live (Huozhe) (1994) +959 Dazed and Confused (1993) +960 Naked (1993) +961 Orlando (1993) +962 Ruby in Paradise (1993) +963 Some Folks Call It a Sling Blade (1993) +964 Month by the Lake, A (1995) +965 Funny Face (1957) +966 Affair to Remember, An (1957) +967 Little Lord Fauntleroy (1936) +968 Inspector General, The (1949) +969 Winnie the Pooh and the Blustery Day (1968) +970 Hear My Song (1991) +971 Mediterraneo (1991) +972 Passion Fish (1992) +973 Grateful Dead (1995) +974 Eye for an Eye (1996) +975 Fear (1996) +976 Solo (1996) +977 Substitute, The (1996) +978 Heaven's Prisoners (1996) +979 Trigger Effect, The (1996) +980 Mother Night (1996) +981 Dangerous Ground (1997) +982 Maximum Risk (1996) +983 Rich Man's Wife, The (1996) +984 Shadow Conspiracy (1997) +985 Blood & Wine (1997) +986 Turbulence (1997) +987 Underworld (1997) +988 Beautician and the Beast, The (1997) +989 Cats Don't Dance (1997) +990 Anna Karenina (1997) +991 Keys to Tulsa (1997) +992 Head Above Water (1996) +993 Hercules (1997) +994 Last Time I Committed Suicide, The (1997) +995 Kiss Me, Guido (1997) +996 Big Green, The (1995) +997 Stuart Saves His Family (1995) +998 Cabin Boy (1994) +999 Clean Slate (1994) +1000 Lightning Jack (1994) +1001 Stupids, The (1996) +1002 Pest, The (1997) +1003 That Darn Cat! (1997) +1004 Geronimo: An American Legend (1993) +1005 Double vie de Véronique, La (Double Life of Veronique, The) (1991) +1006 Until the End of the World (Bis ans Ende der Welt) (1991) +1007 Waiting for Guffman (1996) +1008 I Shot Andy Warhol (1996) +1009 Stealing Beauty (1996) +1010 Basquiat (1996) +1011 2 Days in the Valley (1996) +1012 Private Parts (1997) +1013 Anaconda (1997) +1014 Romy and Michele's High School Reunion (1997) +1015 Shiloh (1997) +1016 Con Air (1997) +1017 Trees Lounge (1996) +1018 Tie Me Up! Tie Me Down! (1990) +1019 Die xue shuang xiong (Killer, The) (1989) +1020 Gaslight (1944) +1021 8 1/2 (1963) +1022 Fast, Cheap & Out of Control (1997) +1023 Fathers' Day (1997) +1024 Mrs. Dalloway (1997) +1025 Fire Down Below (1997) +1026 Lay of the Land, The (1997) +1027 Shooter, The (1995) +1028 Grumpier Old Men (1995) +1029 Jury Duty (1995) +1030 Beverly Hillbillies, The (1993) +1031 Lassie (1994) +1032 Little Big League (1994) +1033 Homeward Bound II: Lost in San Francisco (1996) +1034 Quest, The (1996) +1035 Cool Runnings (1993) +1036 Drop Dead Fred (1991) +1037 Grease 2 (1982) +1038 Switchback (1997) +1039 Hamlet (1996) +1040 Two if by Sea (1996) +1041 Forget Paris (1995) +1042 Just Cause (1995) +1043 Rent-a-Kid (1995) +1044 Paper, The (1994) +1045 Fearless (1993) +1046 Malice (1993) +1047 Multiplicity (1996) +1048 She's the One (1996) +1049 House Arrest (1996) +1050 Ghost and Mrs. Muir, The (1947) +1051 Associate, The (1996) +1052 Dracula: Dead and Loving It (1995) +1053 Now and Then (1995) +1054 Mr. Wrong (1996) +1055 Simple Twist of Fate, A (1994) +1056 Cronos (1992) +1057 Pallbearer, The (1996) +1058 War, The (1994) +1059 Don't Be a Menace to South Central While Drinking Your Juice in the Hood (1996) +1060 Adventures of Pinocchio, The (1996) +1061 Evening Star, The (1996) +1062 Four Days in September (1997) +1063 Little Princess, A (1995) +1064 Crossfire (1947) +1065 Koyaanisqatsi (1983) +1066 Balto (1995) +1067 Bottle Rocket (1996) +1068 Star Maker, The (Uomo delle stelle, L') (1995) +1069 Amateur (1994) +1070 Living in Oblivion (1995) +1071 Party Girl (1995) +1072 Pyromaniac's Love Story, A (1995) +1073 Shallow Grave (1994) +1074 Reality Bites (1994) +1075 Man of No Importance, A (1994) +1076 Pagemaster, The (1994) +1077 Love and a .45 (1994) +1078 Oliver & Company (1988) +1079 Joe's Apartment (1996) +1080 Celestial Clockwork (1994) +1081 Curdled (1996) +1082 Female Perversions (1996) +1083 Albino Alligator (1996) +1084 Anne Frank Remembered (1995) +1085 Carried Away (1996) +1086 It's My Party (1995) +1087 Bloodsport 2 (1995) +1088 Double Team (1997) +1089 Speed 2: Cruise Control (1997) +1090 Sliver (1993) +1091 Pete's Dragon (1977) +1092 Dear God (1996) +1093 Live Nude Girls (1995) +1094 Thin Line Between Love and Hate, A (1996) +1095 High School High (1996) +1096 Commandments (1997) +1097 Hate (Haine, La) (1995) +1098 Flirting With Disaster (1996) +1099 Red Firecracker, Green Firecracker (1994) +1100 What Happened Was... (1994) +1101 Six Degrees of Separation (1993) +1102 Two Much (1996) +1103 Trust (1990) +1104 C'est arrivé près de chez vous (1992) +1105 Firestorm (1998) +1106 Newton Boys, The (1998) +1107 Beyond Rangoon (1995) +1108 Feast of July (1995) +1109 Death and the Maiden (1994) +1110 Tank Girl (1995) +1111 Double Happiness (1994) +1112 Cobb (1994) +1113 Mrs. Parker and the Vicious Circle (1994) +1114 Faithful (1996) +1115 Twelfth Night (1996) +1116 Mark of Zorro, The (1940) +1117 Surviving Picasso (1996) +1118 Up in Smoke (1978) +1119 Some Kind of Wonderful (1987) +1120 I'm Not Rappaport (1996) +1121 Umbrellas of Cherbourg, The (Parapluies de Cherbourg, Les) (1964) +1122 They Made Me a Criminal (1939) +1123 Last Time I Saw Paris, The (1954) +1124 Farewell to Arms, A (1932) +1125 Innocents, The (1961) +1126 Old Man and the Sea, The (1958) +1127 Truman Show, The (1998) +1128 Heidi Fleiss: Hollywood Madam (1995) +1129 Chungking Express (1994) +1130 Jupiter's Wife (1994) +1131 Safe (1995) +1132 Feeling Minnesota (1996) +1133 Escape to Witch Mountain (1975) +1134 Get on the Bus (1996) +1135 Doors, The (1991) +1136 Ghosts of Mississippi (1996) +1137 Beautiful Thing (1996) +1138 Best Men (1997) +1139 Hackers (1995) +1140 Road to Wellville, The (1994) +1141 War Room, The (1993) +1142 When We Were Kings (1996) +1143 Hard Eight (1996) +1144 Quiet Room, The (1996) +1145 Blue Chips (1994) +1146 Calendar Girl (1993) +1147 My Family (1995) +1148 Tom & Viv (1994) +1149 Walkabout (1971) +1150 Last Dance (1996) +1151 Original Gangstas (1996) +1152 In Love and War (1996) +1153 Backbeat (1993) +1154 Alphaville (1965) +1155 Rendezvous in Paris (Rendez-vous de Paris, Les) (1995) +1156 Cyclo (1995) +1157 Relic, The (1997) +1158 Fille seule, La (A Single Girl) (1995) +1159 Stalker (1979) +1160 Love! Valour! Compassion! (1997) +1161 Palookaville (1996) +1162 Phat Beach (1996) +1163 Portrait of a Lady, The (1996) +1164 Zeus and Roxanne (1997) +1165 Big Bully (1996) +1166 Love & Human Remains (1993) +1167 Sum of Us, The (1994) +1168 Little Buddha (1993) +1169 Fresh (1994) +1170 Spanking the Monkey (1994) +1171 Wild Reeds (1994) +1172 Women, The (1939) +1173 Bliss (1997) +1174 Caught (1996) +1175 Hugo Pool (1997) +1176 Welcome To Sarajevo (1997) +1177 Dunston Checks In (1996) +1178 Major Payne (1994) +1179 Man of the House (1995) +1180 I Love Trouble (1994) +1181 Low Down Dirty Shame, A (1994) +1182 Cops and Robbersons (1994) +1183 Cowboy Way, The (1994) +1184 Endless Summer 2, The (1994) +1185 In the Army Now (1994) +1186 Inkwell, The (1994) +1187 Switchblade Sisters (1975) +1188 Young Guns II (1990) +1189 Prefontaine (1997) +1190 That Old Feeling (1997) +1191 Letter From Death Row, A (1998) +1192 Boys of St. Vincent, The (1993) +1193 Before the Rain (Pred dozhdot) (1994) +1194 Once Were Warriors (1994) +1195 Strawberry and Chocolate (Fresa y chocolate) (1993) +1196 Savage Nights (Nuits fauves, Les) (1992) +1197 Family Thing, A (1996) +1198 Purple Noon (1960) +1199 Cemetery Man (Dellamorte Dellamore) (1994) +1200 Kim (1950) +1201 Marlene Dietrich: Shadow and Light (1996) +1202 Maybe, Maybe Not (Bewegte Mann, Der) (1994) +1203 Top Hat (1935) +1204 To Be or Not to Be (1942) +1205 Secret Agent, The (1996) +1206 Amos & Andrew (1993) +1207 Jade (1995) +1208 Kiss of Death (1995) +1209 Mixed Nuts (1994) +1210 Virtuosity (1995) +1211 Blue Sky (1994) +1212 Flesh and Bone (1993) +1213 Guilty as Sin (1993) +1214 In the Realm of the Senses (Ai no corrida) (1976) +1215 Barb Wire (1996) +1216 Kissed (1996) +1217 Assassins (1995) +1218 Friday (1995) +1219 Goofy Movie, A (1995) +1220 Higher Learning (1995) +1221 When a Man Loves a Woman (1994) +1222 Judgment Night (1993) +1223 King of the Hill (1993) +1224 Scout, The (1994) +1225 Angus (1995) +1226 Night Falls on Manhattan (1997) +1227 Awfully Big Adventure, An (1995) +1228 Under Siege 2: Dark Territory (1995) +1229 Poison Ivy II (1995) +1230 Ready to Wear (Pret-A-Porter) (1994) +1231 Marked for Death (1990) +1232 Madonna: Truth or Dare (1991) +1233 Nénette et Boni (1996) +1234 Chairman of the Board (1998) +1235 Big Bang Theory, The (1994) +1236 Other Voices, Other Rooms (1997) +1237 Twisted (1996) +1238 Full Speed (1996) +1239 Cutthroat Island (1995) +1240 Ghost in the Shell (Kokaku kidotai) (1995) +1241 Van, The (1996) +1242 Old Lady Who Walked in the Sea, The (Vieille qui marchait dans la mer, La) (1991) +1243 Night Flier (1997) +1244 Metro (1997) +1245 Gridlock'd (1997) +1246 Bushwhacked (1995) +1247 Bad Girls (1994) +1248 Blink (1994) +1249 For Love or Money (1993) +1250 Best of the Best 3: No Turning Back (1995) +1251 A Chef in Love (1996) +1252 Contempt (Mépris, Le) (1963) +1253 Tie That Binds, The (1995) +1254 Gone Fishin' (1997) +1255 Broken English (1996) +1256 Designated Mourner, The (1997) +1257 Designated Mourner, The (1997) +1258 Trial and Error (1997) +1259 Pie in the Sky (1995) +1260 Total Eclipse (1995) +1261 Run of the Country, The (1995) +1262 Walking and Talking (1996) +1263 Foxfire (1996) +1264 Nothing to Lose (1994) +1265 Star Maps (1997) +1266 Bread and Chocolate (Pane e cioccolata) (1973) +1267 Clockers (1995) +1268 Bitter Moon (1992) +1269 Love in the Afternoon (1957) +1270 Life with Mikey (1993) +1271 North (1994) +1272 Talking About Sex (1994) +1273 Color of Night (1994) +1274 Robocop 3 (1993) +1275 Killer (Bulletproof Heart) (1994) +1276 Sunset Park (1996) +1277 Set It Off (1996) +1278 Selena (1997) +1279 Wild America (1997) +1280 Gang Related (1997) +1281 Manny & Lo (1996) +1282 Grass Harp, The (1995) +1283 Out to Sea (1997) +1284 Before and After (1996) +1285 Princess Caraboo (1994) +1286 Shall We Dance? (1937) +1287 Ed (1996) +1288 Denise Calls Up (1995) +1289 Jack and Sarah (1995) +1290 Country Life (1994) +1291 Celtic Pride (1996) +1292 Simple Wish, A (1997) +1293 Star Kid (1997) +1294 Ayn Rand: A Sense of Life (1997) +1295 Kicked in the Head (1997) +1296 Indian Summer (1996) +1297 Love Affair (1994) +1298 Band Wagon, The (1953) +1299 Penny Serenade (1941) +1300 'Til There Was You (1997) +1301 Stripes (1981) +1302 Late Bloomers (1996) +1303 Getaway, The (1994) +1304 New York Cop (1996) +1305 National Lampoon's Senior Trip (1995) +1306 Delta of Venus (1994) +1307 Carmen Miranda: Bananas Is My Business (1994) +1308 Babyfever (1994) +1309 Very Natural Thing, A (1974) +1310 Walk in the Sun, A (1945) +1311 Waiting to Exhale (1995) +1312 Pompatus of Love, The (1996) +1313 Palmetto (1998) +1314 Surviving the Game (1994) +1315 Inventing the Abbotts (1997) +1316 Horse Whisperer, The (1998) +1317 Journey of August King, The (1995) +1318 Catwalk (1995) +1319 Neon Bible, The (1995) +1320 Homage (1995) +1321 Open Season (1996) +1322 Metisse (Café au Lait) (1993) +1323 Wooden Man's Bride, The (Wu Kui) (1994) +1324 Loaded (1994) +1325 August (1996) +1326 Boys (1996) +1327 Captives (1994) +1328 Of Love and Shadows (1994) +1329 Low Life, The (1994) +1330 An Unforgettable Summer (1994) +1331 Last Klezmer: Leopold Kozlowski, His Life and Music, The (1995) +1332 My Life and Times With Antonin Artaud (En compagnie d'Antonin Artaud) (1993) +1333 Midnight Dancers (Sibak) (1994) +1334 Somebody to Love (1994) +1335 American Buffalo (1996) +1336 Kazaam (1996) +1337 Larger Than Life (1996) +1338 Two Deaths (1995) +1339 Stefano Quantestorie (1993) +1340 Crude Oasis, The (1995) +1341 Hedd Wyn (1992) +1342 Convent, The (Convento, O) (1995) +1343 Lotto Land (1995) +1344 Story of Xinghua, The (1993) +1345 Day the Sun Turned Cold, The (Tianguo niezi) (1994) +1346 Dingo (1992) +1347 Ballad of Narayama, The (Narayama Bushiko) (1958) +1348 Every Other Weekend (1990) +1349 Mille bolle blu (1993) +1350 Crows and Sparrows (1949) +1351 Lover's Knot (1996) +1352 Shadow of Angels (Schatten der Engel) (1976) +1353 1-900 (1994) +1354 Venice/Venice (1992) +1355 Infinity (1996) +1356 Ed's Next Move (1996) +1357 For the Moment (1994) +1358 The Deadly Cure (1996) +1359 Boys in Venice (1996) +1360 Sexual Life of the Belgians, The (1994) +1361 Search for One-eye Jimmy, The (1996) +1362 American Strays (1996) +1363 Leopard Son, The (1996) +1364 Bird of Prey (1996) +1365 Johnny 100 Pesos (1993) +1366 JLG/JLG - autoportrait de décembre (1994) +1367 Faust (1994) +1368 Mina Tannenbaum (1994) +1369 Forbidden Christ, The (Cristo proibito, Il) (1950) +1370 I Can't Sleep (J'ai pas sommeil) (1994) +1371 Machine, The (1994) +1372 Stranger, The (1994) +1373 Good Morning (1971) +1374 Falling in Love Again (1980) +1375 Cement Garden, The (1993) +1376 Meet Wally Sparks (1997) +1377 Hotel de Love (1996) +1378 Rhyme & Reason (1997) +1379 Love and Other Catastrophes (1996) +1380 Hollow Reed (1996) +1381 Losing Chase (1996) +1382 Bonheur, Le (1965) +1383 Second Jungle Book: Mowgli & Baloo, The (1997) +1384 Squeeze (1996) +1385 Roseanna's Grave (For Roseanna) (1997) +1386 Tetsuo II: Body Hammer (1992) +1387 Fall (1997) +1388 Gabbeh (1996) +1389 Mondo (1996) +1390 Innocent Sleep, The (1995) +1391 For Ever Mozart (1996) +1392 Locusts, The (1997) +1393 Stag (1997) +1394 Swept from the Sea (1997) +1395 Hurricane Streets (1998) +1396 Stonewall (1995) +1397 Of Human Bondage (1934) +1398 Anna (1996) +1399 Stranger in the House (1997) +1400 Picture Bride (1995) +1401 M. Butterfly (1993) +1402 Ciao, Professore! (1993) +1403 Caro Diario (Dear Diary) (1994) +1404 Withnail and I (1987) +1405 Boy's Life 2 (1997) +1406 When Night Is Falling (1995) +1407 Specialist, The (1994) +1408 Gordy (1995) +1409 Swan Princess, The (1994) +1410 Harlem (1993) +1411 Barbarella (1968) +1412 Land Before Time III: The Time of the Great Giving (1995) (V) +1413 Street Fighter (1994) +1414 Coldblooded (1995) +1415 Next Karate Kid, The (1994) +1416 No Escape (1994) +1417 Turning, The (1992) +1418 Joy Luck Club, The (1993) +1419 Highlander III: The Sorcerer (1994) +1420 Gilligan's Island: The Movie (1998) +1421 My Crazy Life (Mi vida loca) (1993) +1422 Suture (1993) +1423 Walking Dead, The (1995) +1424 I Like It Like That (1994) +1425 I'll Do Anything (1994) +1426 Grace of My Heart (1996) +1427 Drunks (1995) +1428 SubUrbia (1997) +1429 Sliding Doors (1998) +1430 Ill Gotten Gains (1997) +1431 Legal Deceit (1997) +1432 Mighty, The (1998) +1433 Men of Means (1998) +1434 Shooting Fish (1997) +1435 Steal Big, Steal Little (1995) +1436 Mr. Jones (1993) +1437 House Party 3 (1994) +1438 Panther (1995) +1439 Jason's Lyric (1994) +1440 Above the Rim (1994) +1441 Moonlight and Valentino (1995) +1442 Scarlet Letter, The (1995) +1443 8 Seconds (1994) +1444 That Darn Cat! (1965) +1445 Ladybird Ladybird (1994) +1446 Bye Bye, Love (1995) +1447 Century (1993) +1448 My Favorite Season (1993) +1449 Pather Panchali (1955) +1450 Golden Earrings (1947) +1451 Foreign Correspondent (1940) +1452 Lady of Burlesque (1943) +1453 Angel on My Shoulder (1946) +1454 Angel and the Badman (1947) +1455 Outlaw, The (1943) +1456 Beat the Devil (1954) +1457 Love Is All There Is (1996) +1458 Damsel in Distress, A (1937) +1459 Madame Butterfly (1995) +1460 Sleepover (1995) +1461 Here Comes Cookie (1935) +1462 Thieves (Voleurs, Les) (1996) +1463 Boys, Les (1997) +1464 Stars Fell on Henrietta, The (1995) +1465 Last Summer in the Hamptons (1995) +1466 Margaret's Museum (1995) +1467 Saint of Fort Washington, The (1993) +1468 Cure, The (1995) +1469 Tom and Huck (1995) +1470 Gumby: The Movie (1995) +1471 Hideaway (1995) +1472 Visitors, The (Visiteurs, Les) (1993) +1473 Little Princess, The (1939) +1474 Nina Takes a Lover (1994) +1475 Bhaji on the Beach (1993) +1476 Raw Deal (1948) +1477 Nightwatch (1997) +1478 Dead Presidents (1995) +1479 Reckless (1995) +1480 Herbie Rides Again (1974) +1481 S.F.W. (1994) +1482 Gate of Heavenly Peace, The (1995) +1483 Man in the Iron Mask, The (1998) +1484 Jerky Boys, The (1994) +1485 Colonel Chabert, Le (1994) +1486 Girl in the Cadillac (1995) +1487 Even Cowgirls Get the Blues (1993) +1488 Germinal (1993) +1489 Chasers (1994) +1490 Fausto (1993) +1491 Tough and Deadly (1995) +1492 Window to Paris (1994) +1493 Modern Affair, A (1995) +1494 Mostro, Il (1994) +1495 Flirt (1995) +1496 Carpool (1996) +1497 Line King: Al Hirschfeld, The (1996) +1498 Farmer & Chase (1995) +1499 Grosse Fatigue (1994) +1500 Santa with Muscles (1996) +1501 Prisoner of the Mountains (Kavkazsky Plennik) (1996) +1502 Naked in New York (1994) +1503 Gold Diggers: The Secret of Bear Mountain (1995) +1504 Bewegte Mann, Der (1994) +1505 Killer: A Journal of Murder (1995) +1506 Nelly & Monsieur Arnaud (1995) +1507 Three Lives and Only One Death (1996) +1508 Babysitter, The (1995) +1509 Getting Even with Dad (1994) +1510 Mad Dog Time (1996) +1511 Children of the Revolution (1996) +1512 World of Apu, The (Apur Sansar) (1959) +1513 Sprung (1997) +1514 Dream With the Fishes (1997) +1515 Wings of Courage (1995) +1516 Wedding Gift, The (1994) +1517 Race the Sun (1996) +1518 Losing Isaiah (1995) +1519 New Jersey Drive (1995) +1520 Fear, The (1995) +1521 Mr. Wonderful (1993) +1522 Trial by Jury (1994) +1523 Good Man in Africa, A (1994) +1524 Kaspar Hauser (1993) +1525 Object of My Affection, The (1998) +1526 Witness (1985) +1527 Senseless (1998) +1528 Nowhere (1997) +1529 Underground (1995) +1530 Jefferson in Paris (1995) +1531 Far From Home: The Adventures of Yellow Dog (1995) +1532 Foreign Student (1994) +1533 I Don't Want to Talk About It (De eso no se habla) (1993) +1534 Twin Town (1997) +1535 Enfer, L' (1994) +1536 Aiqing wansui (1994) +1537 Cosi (1996) +1538 All Over Me (1997) +1539 Being Human (1993) +1540 Amazing Panda Adventure, The (1995) +1541 Beans of Egypt, Maine, The (1994) +1542 Scarlet Letter, The (1926) +1543 Johns (1996) +1544 It Takes Two (1995) +1545 Frankie Starlight (1995) +1546 Shadows (Cienie) (1988) +1547 Show, The (1995) +1548 The Courtyard (1995) +1549 Dream Man (1995) +1550 Destiny Turns on the Radio (1995) +1551 Glass Shield, The (1994) +1552 Hunted, The (1995) +1553 Underneath, The (1995) +1554 Safe Passage (1994) +1555 Secret Adventures of Tom Thumb, The (1993) +1556 Condition Red (1995) +1557 Yankee Zulu (1994) +1558 Aparajito (1956) +1559 Hostile Intentions (1994) +1560 Clean Slate (Coup de Torchon) (1981) +1561 Tigrero: A Film That Was Never Made (1994) +1562 Eye of Vichy, The (Oeil de Vichy, L') (1993) +1563 Promise, The (Versprechen, Das) (1994) +1564 To Cross the Rubicon (1991) +1565 Daens (1992) +1566 Man from Down Under, The (1943) +1567 Careful (1992) +1568 Vermont Is For Lovers (1992) +1569 Vie est belle, La (Life is Rosey) (1987) +1570 Quartier Mozart (1992) +1571 Touki Bouki (Journey of the Hyena) (1973) +1572 Wend Kuuni (God's Gift) (1982) +1573 Spirits of the Dead (Tre passi nel delirio) (1968) +1574 Pharaoh's Army (1995) +1575 I, Worst of All (Yo, la peor de todas) (1990) +1576 Hungarian Fairy Tale, A (1987) +1577 Death in the Garden (Mort en ce jardin, La) (1956) +1578 Collectionneuse, La (1967) +1579 Baton Rouge (1988) +1580 Liebelei (1933) +1581 Woman in Question, The (1950) +1582 T-Men (1947) +1583 Invitation, The (Zaproszenie) (1986) +1584 Symphonie pastorale, La (1946) +1585 American Dream (1990) +1586 Lashou shentan (1992) +1587 Terror in a Texas Town (1958) +1588 Salut cousin! (1996) +1589 Schizopolis (1996) +1590 To Have, or Not (1995) +1591 Duoluo tianshi (1995) +1592 Magic Hour, The (1998) +1593 Death in Brunswick (1991) +1594 Everest (1998) +1595 Shopping (1994) +1596 Nemesis 2: Nebula (1995) +1597 Romper Stomper (1992) +1598 City of Industry (1997) +1599 Someone Else's America (1995) +1600 Guantanamera (1994) +1601 Office Killer (1997) +1602 Price Above Rubies, A (1998) +1603 Angela (1995) +1604 He Walked by Night (1948) +1605 Love Serenade (1996) +1606 Deceiver (1997) +1607 Hurricane Streets (1998) +1608 Buddy (1997) +1609 B*A*P*S (1997) +1610 Truth or Consequences, N.M. (1997) +1611 Intimate Relations (1996) +1612 Leading Man, The (1996) +1613 Tokyo Fist (1995) +1614 Reluctant Debutante, The (1958) +1615 Warriors of Virtue (1997) +1616 Desert Winds (1995) +1617 Hugo Pool (1997) +1618 King of New York (1990) +1619 All Things Fair (1996) +1620 Sixth Man, The (1997) +1621 Butterfly Kiss (1995) +1622 Paris, France (1993) +1623 Cérémonie, La (1995) +1624 Hush (1998) +1625 Nightwatch (1997) +1626 Nobody Loves Me (Keiner liebt mich) (1994) +1627 Wife, The (1995) +1628 Lamerica (1994) +1629 Nico Icon (1995) +1630 Silence of the Palace, The (Saimt el Qusur) (1994) +1631 Slingshot, The (1993) +1632 Land and Freedom (Tierra y libertad) (1995) +1633 Á köldum klaka (Cold Fever) (1994) +1634 Etz Hadomim Tafus (Under the Domin Tree) (1994) +1635 Two Friends (1986) +1636 Brothers in Trouble (1995) +1637 Girls Town (1996) +1638 Normal Life (1996) +1639 Bitter Sugar (Azucar Amargo) (1996) +1640 Eighth Day, The (1996) +1641 Dadetown (1995) +1642 Some Mother's Son (1996) +1643 Angel Baby (1995) +1644 Sudden Manhattan (1996) +1645 Butcher Boy, The (1998) +1646 Men With Guns (1997) +1647 Hana-bi (1997) +1648 Niagara, Niagara (1997) +1649 Big One, The (1997) +1650 Butcher Boy, The (1998) +1651 Spanish Prisoner, The (1997) +1652 Temptress Moon (Feng Yue) (1996) +1653 Entertaining Angels: The Dorothy Day Story (1996) +1654 Chairman of the Board (1998) +1655 Favor, The (1994) +1656 Little City (1998) +1657 Target (1995) +1658 Substance of Fire, The (1996) +1659 Getting Away With Murder (1996) +1660 Small Faces (1995) +1661 New Age, The (1994) +1662 Rough Magic (1995) +1663 Nothing Personal (1995) +1664 8 Heads in a Duffel Bag (1997) +1665 Brother's Kiss, A (1997) +1666 Ripe (1996) +1667 Next Step, The (1995) +1668 Wedding Bell Blues (1996) +1669 MURDER and murder (1996) +1670 Tainted (1998) +1671 Further Gesture, A (1996) +1672 Kika (1993) +1673 Mirage (1995) +1674 Mamma Roma (1962) +1675 Sunchaser, The (1996) +1676 War at Home, The (1996) +1677 Sweet Nothing (1995) +1678 Mat' i syn (1997) +1679 B. Monkey (1998) +1680 Sliding Doors (1998) +1681 You So Crazy (1994) +1682 Scream of Stone (Schrei aus Stein) (1991) diff --git a/ex8/ex8/multivariateGaussian.m b/ex8/ex8/multivariateGaussian.m new file mode 100644 index 0000000..5fb029e --- /dev/null +++ b/ex8/ex8/multivariateGaussian.m @@ -0,0 +1,22 @@ +function p = multivariateGaussian(X, mu, Sigma2) +%MULTIVARIATEGAUSSIAN Computes the probability density function of the +%multivariate gaussian distribution. +% p = MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability +% density function of the examples X under the multivariate gaussian +% distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is +% treated as the covariance matrix. If Sigma2 is a vector, it is treated +% as the \sigma^2 values of the variances in each dimension (a diagonal +% covariance matrix) +% + +k = length(mu); + +if (size(Sigma2, 2) == 1) || (size(Sigma2, 1) == 1) + Sigma2 = diag(Sigma2); +end + +X = bsxfun(@minus, X, mu(:)'); +p = (2 * pi) ^ (- k / 2) * det(Sigma2) ^ (-0.5) * ... + exp(-0.5 * sum(bsxfun(@times, X * pinv(Sigma2), X), 2)); + +end \ No newline at end of file diff --git a/ex8/ex8/normalizeRatings.m b/ex8/ex8/normalizeRatings.m new file mode 100644 index 0000000..d4e0940 --- /dev/null +++ b/ex8/ex8/normalizeRatings.m @@ -0,0 +1,17 @@ +function [Ynorm, Ymean] = normalizeRatings(Y, R) +%NORMALIZERATINGS Preprocess data by subtracting mean rating for every +%movie (every row) +% [Ynorm, Ymean] = NORMALIZERATINGS(Y, R) normalized Y so that each movie +% has a rating of 0 on average, and returns the mean rating in Ymean. +% + +[m, n] = size(Y); +Ymean = zeros(m, 1); +Ynorm = zeros(size(Y)); +for i = 1:m + idx = find(R(i, :) == 1); + Ymean(i) = mean(Y(i, idx)); + Ynorm(i, idx) = Y(i, idx) - Ymean(i); +end + +end diff --git a/ex8/ex8/octave-core b/ex8/ex8/octave-core new file mode 100644 index 0000000..6551ee7 Binary files /dev/null and b/ex8/ex8/octave-core differ diff --git a/ex8/ex8/selectThreshold.m b/ex8/ex8/selectThreshold.m new file mode 100644 index 0000000..8476c7a --- /dev/null +++ b/ex8/ex8/selectThreshold.m @@ -0,0 +1,75 @@ +function [bestEpsilon bestF1] = selectThreshold(yval, pval) +%SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting +%outliers +% [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best +% threshold to use for selecting outliers based on the results from a +% validation set (pval) and the ground truth (yval). +% + +bestEpsilon = 0; +bestF1 = 0; +F1 = 0; + +stepsize = (max(pval) - min(pval)) / 1000; +for epsilon = min(pval):stepsize:max(pval) + + % ====================== YOUR CODE HERE ====================== + % Instructions: Compute the F1 score of choosing epsilon as the + % threshold and place the value in F1. The code at the + % end of the loop will compare the F1 score for this + % choice of epsilon and set it to be the best epsilon if + % it is better than the current choice of epsilon. + % + % Note: You can use predictions = (pval < epsilon) to get a binary vector + % of 0's and 1's of the outlier predictions + +%predictions = (pval < epsilon); +%tp = sum(predictions == 1 & yval == 1); +%fp = sum(predictions == 1 & yval == 0); +%fn = sum(predictions == 0 & yval == 1); +%prec = tp / (tp + fp); +%rec = tp / (tp + fn); +%F1 = (2 * prec * rec) / (prec + rec); +pred = pval < epsilon; +nyval = 1 - yval; +npred = 1 - pred; + +tp = sum( pred & yval); +tn = sum(npred & nyval); +fp = sum( pred & nyval); +fn = sum(npred & yval); +%total=tp+tn+fp+fn + +precision = 0; +recall = 0; +F1 = 0; + +if tp+fp > 0 + precision = tp/(tp+fp); +end +if tp+fn > 0 + recall = tp/(tp+fn); +end +if precision + recall > 0 + F1 = 2 * precision * recall / (precision + recall) ; +end + + + + + + + + + + + + % ============================================================= + + if F1 > bestF1 + bestF1 = F1; + bestEpsilon = epsilon; + end +end + +end diff --git a/ex8/ex8/submit.m b/ex8/ex8/submit.m new file mode 100644 index 0000000..98971d4 --- /dev/null +++ b/ex8/ex8/submit.m @@ -0,0 +1,588 @@ +function submit(partId, webSubmit) +%SUBMIT Submit your code and output to the ml-class servers +% SUBMIT() will connect to the ml-class server and submit your solution + + fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ... + homework_id()); + if ~exist('partId', 'var') || isempty(partId) + partId = promptPart(); + end + + if ~exist('webSubmit', 'var') || isempty(webSubmit) + webSubmit = 0; % submit directly by default + end + + % Check valid partId + partNames = validParts(); + if ~isValidPartId(partId) + fprintf('!! Invalid homework part selected.\n'); + fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1); + fprintf('!! Submission Cancelled\n'); + return + end + + if ~exist('ml_login_data.mat','file') + [login password] = loginPrompt(); + save('ml_login_data.mat','login','password'); + else + load('ml_login_data.mat'); + [login password] = quickLogin(login, password); + save('ml_login_data.mat','login','password'); + end + + if isempty(login) + fprintf('!! Submission Cancelled\n'); + return + end + + fprintf('\n== Connecting to ml-class ... '); + if exist('OCTAVE_VERSION') + fflush(stdout); + end + + % Setup submit list + if partId == numel(partNames) + 1 + submitParts = 1:numel(partNames); + else + submitParts = [partId]; + end + + for s = 1:numel(submitParts) + thisPartId = submitParts(s); + if (~webSubmit) % submit directly to server + [login, ch, signature, auxstring] = getChallenge(login, thisPartId); + if isempty(login) || isempty(ch) || isempty(signature) + % Some error occured, error string in first return element. + fprintf('\n!! Error: %s\n\n', login); + return + end + + % Attempt Submission with Challenge + ch_resp = challengeResponse(login, password, ch); + + [result, str] = submitSolution(login, ch_resp, thisPartId, ... + output(thisPartId, auxstring), source(thisPartId), signature); + + partName = partNames{thisPartId}; + + fprintf('\n== [ml-class] Submitted Assignment %s - Part %d - %s\n', ... + homework_id(), thisPartId, partName); + fprintf('== %s\n', strtrim(str)); + + if exist('OCTAVE_VERSION') + fflush(stdout); + end + else + [result] = submitSolutionWeb(login, thisPartId, output(thisPartId), ... + source(thisPartId)); + result = base64encode(result); + + fprintf('\nSave as submission file [submit_ex%s_part%d.txt (enter to accept default)]:', ... + homework_id(), thisPartId); + saveAsFile = input('', 's'); + if (isempty(saveAsFile)) + saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), thisPartId); + end + + fid = fopen(saveAsFile, 'w'); + if (fid) + fwrite(fid, result); + fclose(fid); + fprintf('\nSaved your solutions to %s.\n\n', saveAsFile); + fprintf(['You can now submit your solutions through the web \n' ... + 'form in the programming exercises. Select the corresponding \n' ... + 'programming exercise to access the form.\n']); + + else + fprintf('Unable to save to %s\n\n', saveAsFile); + fprintf(['You can create a submission file by saving the \n' ... + 'following text in a file: (press enter to continue)\n\n']); + pause; + fprintf(result); + end + end + end +end + +% ================== CONFIGURABLES FOR EACH HOMEWORK ================== + +function id = homework_id() + id = '8'; +end + +function [partNames] = validParts() + partNames = { 'Estimate Gaussian Parameters', ... + 'Select Threshold' ... + 'Collaborative Filtering Cost', ... + 'Collaborative Filtering Gradient', ... + 'Regularized Cost', ... + 'Regularized Gradient' ... + }; +end + +function srcs = sources() + % Separated by part + srcs = { { 'estimateGaussian.m' }, ... + { 'selectThreshold.m' }, ... + { 'cofiCostFunc.m' }, ... + { 'cofiCostFunc.m' }, ... + { 'cofiCostFunc.m' }, ... + { 'cofiCostFunc.m' }, ... + }; +end + +function out = output(partId, auxstring) + % Random Test Cases + n_u = 3; n_m = 4; n = 5; + X = reshape(sin(1:n_m*n), n_m, n); + Theta = reshape(cos(1:n_u*n), n_u, n); + Y = reshape(sin(1:2:2*n_m*n_u), n_m, n_u); + R = Y > 0.5; + pval = [abs(Y(:)) ; 0.001; 1]; + yval = [R(:) ; 1; 0]; + params = [X(:); Theta(:)]; + if partId == 1 + [mu sigma2] = estimateGaussian(X); + out = sprintf('%0.5f ', [mu(:); sigma2(:)]); + elseif partId == 2 + [bestEpsilon bestF1] = selectThreshold(yval, pval); + out = sprintf('%0.5f ', [bestEpsilon(:); bestF1(:)]); + elseif partId == 3 + [J] = cofiCostFunc(params, Y, R, n_u, n_m, ... + n, 0); + out = sprintf('%0.5f ', J(:)); + elseif partId == 4 + [J, grad] = cofiCostFunc(params, Y, R, n_u, n_m, ... + n, 0); + out = sprintf('%0.5f ', grad(:)); + elseif partId == 5 + [J] = cofiCostFunc(params, Y, R, n_u, n_m, ... + n, 1.5); + out = sprintf('%0.5f ', J(:)); + elseif partId == 6 + [J, grad] = cofiCostFunc(params, Y, R, n_u, n_m, ... + n, 1.5); + out = sprintf('%0.5f ', grad(:)); + end +end + +% ====================== SERVER CONFIGURATION =========================== + +% ***************** REMOVE -staging WHEN YOU DEPLOY ********************* +function url = site_url() + url = 'http://class.coursera.org/ml-2012-002'; +end + +function url = challenge_url() + url = [site_url() '/assignment/challenge']; +end + +function url = submit_url() + url = [site_url() '/assignment/submit']; +end + +% ========================= CHALLENGE HELPERS ========================= + +function src = source(partId) + src = ''; + src_files = sources(); + if partId <= numel(src_files) + flist = src_files{partId}; + for i = 1:numel(flist) + fid = fopen(flist{i}); + if (fid == -1) + error('Error opening %s (is it missing?)', flist{i}); + end + line = fgets(fid); + while ischar(line) + src = [src line]; + line = fgets(fid); + end + fclose(fid); + src = [src '||||||||']; + end + end +end + +function ret = isValidPartId(partId) + partNames = validParts(); + ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1); +end + +function partId = promptPart() + fprintf('== Select which part(s) to submit:\n'); + partNames = validParts(); + srcFiles = sources(); + for i = 1:numel(partNames) + fprintf('== %d) %s [', i, partNames{i}); + fprintf(' %s ', srcFiles{i}{:}); + fprintf(']\n'); + end + fprintf('== %d) All of the above \n==\nEnter your choice [1-%d]: ', ... + numel(partNames) + 1, numel(partNames) + 1); + selPart = input('', 's'); + partId = str2num(selPart); + if ~isValidPartId(partId) + partId = -1; + end +end + +function [email,ch,signature,auxstring] = getChallenge(email, part) + str = urlread(challenge_url(), 'post', {'email_address', email, 'assignment_part_sid', [homework_id() '-' num2str(part)], 'response_encoding', 'delim'}); + + str = strtrim(str); + r = struct; + while(numel(str) > 0) + [f, str] = strtok (str, '|'); + [v, str] = strtok (str, '|'); + r = setfield(r, f, v); + end + + email = getfield(r, 'email_address'); + ch = getfield(r, 'challenge_key'); + signature = getfield(r, 'state'); + auxstring = getfield(r, 'challenge_aux_data'); +end + +function [result, str] = submitSolutionWeb(email, part, output, source) + + result = ['{"assignment_part_sid":"' base64encode([homework_id() '-' num2str(part)], '') '",' ... + '"email_address":"' base64encode(email, '') '",' ... + '"submission":"' base64encode(output, '') '",' ... + '"submission_aux":"' base64encode(source, '') '"' ... + '}']; + str = 'Web-submission'; +end + +function [result, str] = submitSolution(email, ch_resp, part, output, ... + source, signature) + + params = {'assignment_part_sid', [homework_id() '-' num2str(part)], ... + 'email_address', email, ... + 'submission', base64encode(output, ''), ... + 'submission_aux', base64encode(source, ''), ... + 'challenge_response', ch_resp, ... + 'state', signature}; + + str = urlread(submit_url(), 'post', params); + + % Parse str to read for success / failure + result = 0; + +end + +% =========================== LOGIN HELPERS =========================== + +function [login password] = loginPrompt() + % Prompt for password + [login password] = basicPrompt(); + + if isempty(login) || isempty(password) + login = []; password = []; + end +end + + +function [login password] = basicPrompt() + login = input('Login (Email address): ', 's'); + password = input('Password: ', 's'); +end + +function [login password] = quickLogin(login,password) + disp(['You are currently logged in as ' login '.']); + cont_token = input('Is this you? (y/n - type n to reenter password)','s'); + if(isempty(cont_token) || cont_token(1)=='Y'||cont_token(1)=='y') + return; + else + [login password] = loginPrompt(); + end +end + +function [str] = challengeResponse(email, passwd, challenge) + str = sha1([challenge passwd]); +end + +% =============================== SHA-1 ================================ + +function hash = sha1(str) + + % Initialize variables + h0 = uint32(1732584193); + h1 = uint32(4023233417); + h2 = uint32(2562383102); + h3 = uint32(271733878); + h4 = uint32(3285377520); + + % Convert to word array + strlen = numel(str); + + % Break string into chars and append the bit 1 to the message + mC = [double(str) 128]; + mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')]; + + numB = strlen * 8; + if exist('idivide') + numC = idivide(uint32(numB + 65), 512, 'ceil'); + else + numC = ceil(double(numB + 65)/512); + end + numW = numC * 16; + mW = zeros(numW, 1, 'uint32'); + + idx = 1; + for i = 1:4:strlen + 1 + mW(idx) = bitor(bitor(bitor( ... + bitshift(uint32(mC(i)), 24), ... + bitshift(uint32(mC(i+1)), 16)), ... + bitshift(uint32(mC(i+2)), 8)), ... + uint32(mC(i+3))); + idx = idx + 1; + end + + % Append length of message + mW(numW - 1) = uint32(bitshift(uint64(numB), -32)); + mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32)); + + % Process the message in successive 512-bit chs + for cId = 1 : double(numC) + cSt = (cId - 1) * 16 + 1; + cEnd = cId * 16; + ch = mW(cSt : cEnd); + + % Extend the sixteen 32-bit words into eighty 32-bit words + for j = 17 : 80 + ch(j) = ch(j - 3); + ch(j) = bitxor(ch(j), ch(j - 8)); + ch(j) = bitxor(ch(j), ch(j - 14)); + ch(j) = bitxor(ch(j), ch(j - 16)); + ch(j) = bitrotate(ch(j), 1); + end + + % Initialize hash value for this ch + a = h0; + b = h1; + c = h2; + d = h3; + e = h4; + + % Main loop + for i = 1 : 80 + if(i >= 1 && i <= 20) + f = bitor(bitand(b, c), bitand(bitcmp(b), d)); + k = uint32(1518500249); + elseif(i >= 21 && i <= 40) + f = bitxor(bitxor(b, c), d); + k = uint32(1859775393); + elseif(i >= 41 && i <= 60) + f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d)); + k = uint32(2400959708); + elseif(i >= 61 && i <= 80) + f = bitxor(bitxor(b, c), d); + k = uint32(3395469782); + end + + t = bitrotate(a, 5); + t = bitadd(t, f); + t = bitadd(t, e); + t = bitadd(t, k); + t = bitadd(t, ch(i)); + e = d; + d = c; + c = bitrotate(b, 30); + b = a; + a = t; + + end + h0 = bitadd(h0, a); + h1 = bitadd(h1, b); + h2 = bitadd(h2, c); + h3 = bitadd(h3, d); + h4 = bitadd(h4, e); + + end + + hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]); + + hash = lower(hash); + +end + +function ret = bitadd(iA, iB) + ret = double(iA) + double(iB); + ret = bitset(ret, 33, 0); + ret = uint32(ret); +end + +function ret = bitrotate(iA, places) + t = bitshift(iA, places - 32); + ret = bitshift(iA, places); + ret = bitor(ret, t); +end + +% =========================== Base64 Encoder ============================ +% Thanks to Peter John Acklam +% + +function y = base64encode(x, eol) +%BASE64ENCODE Perform base64 encoding on a string. +% +% BASE64ENCODE(STR, EOL) encode the given string STR. EOL is the line ending +% sequence to use; it is optional and defaults to '\n' (ASCII decimal 10). +% The returned encoded string is broken into lines of no more than 76 +% characters each, and each line will end with EOL unless it is empty. Let +% EOL be empty if you do not want the encoded string broken into lines. +% +% STR and EOL don't have to be strings (i.e., char arrays). The only +% requirement is that they are vectors containing values in the range 0-255. +% +% This function may be used to encode strings into the Base64 encoding +% specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions). The +% Base64 encoding is designed to represent arbitrary sequences of octets in a +% form that need not be humanly readable. A 65-character subset +% ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per +% printable character. +% +% Examples +% -------- +% +% If you want to encode a large file, you should encode it in chunks that are +% a multiple of 57 bytes. This ensures that the base64 lines line up and +% that you do not end up with padding in the middle. 57 bytes of data fills +% one complete base64 line (76 == 57*4/3): +% +% If ifid and ofid are two file identifiers opened for reading and writing, +% respectively, then you can base64 encode the data with +% +% while ~feof(ifid) +% fwrite(ofid, base64encode(fread(ifid, 60*57))); +% end +% +% or, if you have enough memory, +% +% fwrite(ofid, base64encode(fread(ifid))); +% +% See also BASE64DECODE. + +% Author: Peter John Acklam +% Time-stamp: 2004-02-03 21:36:56 +0100 +% E-mail: pjacklam@online.no +% URL: http://home.online.no/~pjacklam + + if isnumeric(x) + x = num2str(x); + end + + % make sure we have the EOL value + if nargin < 2 + eol = sprintf('\n'); + else + if sum(size(eol) > 1) > 1 + error('EOL must be a vector.'); + end + if any(eol(:) > 255) + error('EOL can not contain values larger than 255.'); + end + end + + if sum(size(x) > 1) > 1 + error('STR must be a vector.'); + end + + x = uint8(x); + eol = uint8(eol); + + ndbytes = length(x); % number of decoded bytes + nchunks = ceil(ndbytes / 3); % number of chunks/groups + nebytes = 4 * nchunks; % number of encoded bytes + + % add padding if necessary, to make the length of x a multiple of 3 + if rem(ndbytes, 3) + x(end+1 : 3*nchunks) = 0; + end + + x = reshape(x, [3, nchunks]); % reshape the data + y = repmat(uint8(0), 4, nchunks); % for the encoded data + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Split up every 3 bytes into 4 pieces + % + % aaaaaabb bbbbcccc ccdddddd + % + % to form + % + % 00aaaaaa 00bbbbbb 00cccccc 00dddddd + % + y(1,:) = bitshift(x(1,:), -2); % 6 highest bits of x(1,:) + + y(2,:) = bitshift(bitand(x(1,:), 3), 4); % 2 lowest bits of x(1,:) + y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4)); % 4 highest bits of x(2,:) + + y(3,:) = bitshift(bitand(x(2,:), 15), 2); % 4 lowest bits of x(2,:) + y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6)); % 2 highest bits of x(3,:) + + y(4,:) = bitand(x(3,:), 63); % 6 lowest bits of x(3,:) + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Now perform the following mapping + % + % 0 - 25 -> A-Z + % 26 - 51 -> a-z + % 52 - 61 -> 0-9 + % 62 -> + + % 63 -> / + % + % We could use a mapping vector like + % + % ['A':'Z', 'a':'z', '0':'9', '+/'] + % + % but that would require an index vector of class double. + % + z = repmat(uint8(0), size(y)); + i = y <= 25; z(i) = 'A' + double(y(i)); + i = 26 <= y & y <= 51; z(i) = 'a' - 26 + double(y(i)); + i = 52 <= y & y <= 61; z(i) = '0' - 52 + double(y(i)); + i = y == 62; z(i) = '+'; + i = y == 63; z(i) = '/'; + y = z; + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % Add padding if necessary. + % + npbytes = 3 * nchunks - ndbytes; % number of padding bytes + if npbytes + y(end-npbytes+1 : end) = '='; % '=' is used for padding + end + + if isempty(eol) + + % reshape to a row vector + y = reshape(y, [1, nebytes]); + + else + + nlines = ceil(nebytes / 76); % number of lines + neolbytes = length(eol); % number of bytes in eol string + + % pad data so it becomes a multiple of 76 elements + y = [y(:) ; zeros(76 * nlines - numel(y), 1)]; + y(nebytes + 1 : 76 * nlines) = 0; + y = reshape(y, 76, nlines); + + % insert eol strings + eol = eol(:); + y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines)); + + % remove padding, but keep the last eol string + m = nebytes + neolbytes * (nlines - 1); + n = (76+neolbytes)*nlines - neolbytes; + y(m+1 : n) = ''; + + % extract and reshape to row vector + y = reshape(y, 1, m+neolbytes); + + end + + % output is a character array + y = char(y); + +end diff --git a/ex8/ex8/submitWeb.m b/ex8/ex8/submitWeb.m new file mode 100644 index 0000000..e429365 --- /dev/null +++ b/ex8/ex8/submitWeb.m @@ -0,0 +1,20 @@ +% submitWeb Creates files from your code and output for web submission. +% +% If the submit function does not work for you, use the web-submission mechanism. +% Call this function to produce a file for the part you wish to submit. Then, +% submit the file to the class servers using the "Web Submission" button on the +% Programming Exercises page on the course website. +% +% You should call this function without arguments (submitWeb), to receive +% an interactive prompt for submission; optionally you can call it with the partID +% if you so wish. Make sure your working directory is set to the directory +% containing the submitWeb.m file and your assignment files. + +function submitWeb(partId) + if ~exist('partId', 'var') || isempty(partId) + partId = []; + end + + submit(partId, 1); +end + diff --git a/ex8/ex8/visualizeFit.m b/ex8/ex8/visualizeFit.m new file mode 100644 index 0000000..8917a1e --- /dev/null +++ b/ex8/ex8/visualizeFit.m @@ -0,0 +1,20 @@ +function visualizeFit(X, mu, sigma2) +%VISUALIZEFIT Visualize the dataset and its estimated distribution. +% VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the +% probability density function of the Gaussian distribution. Each example +% has a location (x1, x2) that depends on its feature values. +% + +[X1,X2] = meshgrid(0:.5:35); +Z = multivariateGaussian([X1(:) X2(:)],mu,sigma2); +Z = reshape(Z,size(X1)); + +plot(X(:, 1), X(:, 2),'bx'); +hold on; +% Do not plot if there are infinities +if (sum(isinf(Z)) == 0) + contour(X1, X2, Z, 10.^(-20:3:0)'); +end +hold off; + +end \ No newline at end of file