Saturday, June 20, 2015

Dimensionality Reduction - K-Means Clustering and PCA - Machine Learning

My solutions to week 8 exercises :


Part 1 : Find Closest Centroids

function idx = findClosestCentroids(X, centroids)
%FINDCLOSESTCENTROIDS computes the centroid memberships for every example
%   idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids
%   in idx for a dataset X where each row is a single example. idx = m x 1 
%   vector of centroid assignments (i.e. each entry in range [1..K])
%

% Set K
K = size(centroids, 1);

% You need to return the following variables correctly.
idx = zeros(size(X,1), 1);

% ====================== YOUR CODE HERE ======================
% Instructions: Go over every example, find its closest centroid, and store
%               the index inside idx at the appropriate location.
%               Concretely, idx(i) should contain the index of the centroid
%               closest to example i. Hence, it should be a value in the 
%               range 1..K
%
% Note: You can use a for-loop over the examples to compute this.
%
for i=1:size(X,1)
 minDistance = 100000000000;
 minIndex = -1;
 for j=1:K 
   vec = ones(size(centroids, 1)) * X(i);
   
   thisDistance = sum((X(i,:)-centroids(j, :)).^2);
   if thisDistance<minDistance minDistance = thisDistance; minIndex = j; end
 end
 idx(i) = minIndex;
end

% =============================================================

end



Part 2 : Compute Centroid Means

function centroids = computeCentroids(X, idx, K)
%COMPUTECENTROIDS returs the new centroids by computing the means of the 
%data points assigned to each centroid.
%   centroids = COMPUTECENTROIDS(X, idx, K) returns the new centroids by 
%   computing the means of the data points assigned to each centroid. It is
%   given a dataset X where each row is a single data point, a vector
%   idx of centroid assignments (i.e. each entry in range [1..K]) for each
%   example, and K, the number of centroids. You should return a matrix
%   centroids, where each row of centroids is the mean of the data points
%   assigned to it.
%

% Useful variables
[m n] = size(X);

% You need to return the following variables correctly.
centroids = zeros(K, n);


% ====================== YOUR CODE HERE ======================
% Instructions: Go over every centroid and compute mean of all points that
%               belong to it. Concretely, the row vector centroids(i, :)
%               should contain the mean of the data points assigned to
%               centroid i.
%
% Note: You can use a for-loop over the centroids to compute this.
%

for i = 1:K
 add=zeros(1, n);
 count=0;
 for j=1:m 
  if idx(j)==i 
  add = add+X(j,:); 
  count = count+1; 
  end
 end
 centroids(i, :) = add/count;
end

% =============================================================

end



Part 3 : PCA

function [U, S] = pca(X)
%PCA Run principal component analysis on the dataset X
%   [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X
%   Returns the eigenvectors U, the eigenvalues (on diagonal) in S
%

% Useful values
[m, n] = size(X);

% You need to return the following variables correctly.
U = zeros(n);
S = zeros(n);

% ====================== YOUR CODE HERE ======================
% Instructions: You should first compute the covariance matrix. Then, you
%               should use the "svd" function to compute the eigenvectors
%               and eigenvalues of the covariance matrix. 
%
% Note: When computing the covariance matrix, remember to divide by m (the
%       number of examples).
%

covarianceMatrixSigma = (X'*X)/m;
[U S V] = svd(covarianceMatrixSigma);

% =========================================================================

end



Part 4 : Project Data

function Z = projectData(X, U, K)
%PROJECTDATA Computes the reduced data representation when projecting only 
%on to the top k eigenvectors
%   Z = projectData(X, U, K) computes the projection of 
%   the normalized inputs X into the reduced dimensional space spanned by
%   the first K columns of U. It returns the projected examples in Z.
%

% You need to return the following variables correctly.
Z = zeros(size(X, 1), K);

% ====================== YOUR CODE HERE ======================
% Instructions: Compute the projection of the data using only the top K 
%               eigenvectors in U (first K columns). 
%               For the i-th example X(i,:), the projection on to the k-th 
%               eigenvector is given as follows:
%                    x = X(i, :)';
%                    projection_k = x' * U(:, k);
%

Ureduced = U(:, 1:K);
Z = X*Ureduced; 

% =============================================================

end



Part 5 : Recover Data

function X_rec = recoverData(Z, U, K)
%RECOVERDATA Recovers an approximation of the original data when using the 
%projected data
%   X_rec = RECOVERDATA(Z, U, K) recovers an approximation the 
%   original data that has been reduced to K dimensions. It returns the
%   approximate reconstruction in X_rec.
%

% You need to return the following variables correctly.
X_rec = zeros(size(Z, 1), size(U, 1)); 
% ====================== YOUR CODE HERE ======================
% Instructions: Compute the approximation of the data by projecting back
%               onto the original space using the top K eigenvectors in U.
%
%               For the i-th example Z(i,:), the (approximate)
%               recovered data for dimension j is given as follows:
%                    v = Z(i, :)';
%                    recovered_j = v' * U(j, 1:K)';
%
%               Notice that U(j, 1:K) is a row vector.
%               

U_reducedT = U(:, 1:K)';
X_rec = Z*U_reducedT;  
% =============================================================

end

Thursday, June 18, 2015

Support Vector Machines - Machine Learning

My solutions to Week 7 Exercises:

Part 1 : Gaussian Kernel

function sim = gaussianKernel(x1, x2, sigma)
%RBFKERNEL returns a radial basis function kernel between x1 and x2
%   sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2
%   and returns the value in sim

% Ensure that x1 and x2 are column vectors
x1 = x1(:); x2 = x2(:);

% You need to return the following variables correctly.
sim = 0;

% ====================== YOUR CODE HERE ======================
% Instructions: Fill in this function to return the similarity between x1
%               and x2 computed using a Gaussian kernel with bandwidth
%               sigma
%
%

differenceSquared = (x1-x2) .^ 2; 
sim = exp(-sum(differenceSquared)/(2*sigma^2));


% =============================================================
    
end


Part 2 : Parameters (C, sigma) for Dataset 3

Helpful : Training Sets, Validation Sets, Test Sets - Explanation

function [C, sigma] = dataset3Params(X, y, Xval, yval)
%EX6PARAMS returns your choice of C and sigma for Part 3 of the exercise
%where you select the optimal (C, sigma) learning parameters to use for SVM
%with RBF kernel
%   [C, sigma] = EX6PARAMS(X, y, Xval, yval) returns your choice of C and 
%   sigma. You should complete this function to return the optimal C and 
%   sigma based on a cross-validation set.
%

% You need to return the following variables correctly.
C = 1;
sigma = 0.3;

 
% ====================== YOUR CODE HERE ======================
% Instructions: Fill in this function to return the optimal C and sigma
%               learning parameters found using the cross validation set.
%               You can use svmPredict to predict the labels on the cross
%               validation set. For example, 
%                   predictions = svmPredict(model, Xval);
%               will return the predictions on the cross validation set.
%
%  Note: You can compute the prediction error using 
%        mean(double(predictions ~= yval))
%

cvals = [0.01 0.03 0.1 0.3 1 3 10 30] 
sigmavals = cvals;
errorMin = 10^8;

for i=1:8
 cval = cvals(i);
 for j=1:8 
  sigmaval = sigmavals(j);
  model = svmTrain(X, y, cval, @(x, l)gaussianKernel(x, l, sigmaval));
  predictions = svmPredict(model, Xval);
  error = mean(double(predictions ~= yval));
  if error<errorMin C = cval; sigma = sigmaval; errorMin = error;end
 end
end

% =========================================================================

end


Part 3 : Email Preprocessing

function word_indices = processEmail(email_contents)
%PROCESSEMAIL preprocesses a the body of an email and
%returns a list of word_indices 
%   word_indices = PROCESSEMAIL(email_contents) preprocesses 
%   the body of an email and returns a list of indices of the 
%   words contained in the email. 
%

% Load Vocabulary
vocabList = getVocabList();

% Init return value
word_indices = [];

% ========================== Preprocess Email ===========================

% Find the Headers ( \n\n and remove )
% Uncomment the following lines if you are working with raw emails with the
% full headers

% hdrstart = strfind(email_contents, ([char(10) char(10)]));
% email_contents = email_contents(hdrstart(1):end);

% Lower case
email_contents = lower(email_contents);

% Strip all HTML
% Looks for any expression that starts with < and ends with > and replace
% and does not have any < or > in the tag it with a space
email_contents = regexprep(email_contents, '<[^<>]+>', ' ');

% Handle Numbers
% Look for one or more characters between 0-9
email_contents = regexprep(email_contents, '[0-9]+', 'number');

% Handle URLS
% Look for strings starting with http:// or https://
email_contents = regexprep(email_contents, ...
                           '(http|https)://[^\s]*', 'httpaddr');

% Handle Email Addresses
% Look for strings with @ in the middle
email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr');

% Handle $ sign
email_contents = regexprep(email_contents, '[$]+', 'dollar');


% ========================== Tokenize Email ===========================

% Output the email to screen as well
fprintf('\n==== Processed Email ====\n\n');

% Process file
l = 0;

while ~isempty(email_contents)

    % Tokenize and also get rid of any punctuation
    [str, email_contents] = ...
       strtok(email_contents, ...
              [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
   
    % Remove any non alphanumeric characters
    str = regexprep(str, '[^a-zA-Z0-9]', '');

    % Stem the word 
    % (the porterStemmer sometimes has issues, so we use a try catch block)
    try str = porterStemmer(strtrim(str)); 
    catch str = ''; continue;
    end;

    % Skip the word if it is too short
    if length(str) < 1
       continue;
    end

    % Look up the word in the dictionary and add to word_indices if
    % found
    % ====================== YOUR CODE HERE ======================
    % Instructions: Fill in this function to add the index of str to
    %               word_indices if it is in the vocabulary. At this point
    %               of the code, you have a stemmed word from the email in
    %               the variable str. You should look up str in the
    %               vocabulary list (vocabList). If a match exists, you
    %               should add the index of the word to the word_indices
    %               vector. Concretely, if str = 'action', then you should
    %               look up the vocabulary list to find where in vocabList
    %               'action' appears. For example, if vocabList{18} =
    %               'action', then, you should add 18 to the word_indices 
    %               vector (e.g., word_indices = [word_indices ; 18]; ).
    % 
    % Note: vocabList{idx} returns a the word with index idx in the
    %       vocabulary list.
    % 
    % Note: You can use strcmp(str1, str2) to compare two strings (str1 and
    %       str2). It will return 1 only if the two strings are equivalent.
    %
 for i = 1:length(vocabList)
  if strcmp(vocabList{i}, str)==1; word_indices = [word_indices; i]; break; end
 end 
 
    % =============================================================


    % Print to screen, ensuring that the output lines are not too long
    if (l + length(str) + 1) > 78
        fprintf('\n');
        l = 0;
    end
    fprintf('%s ', str);
    l = l + length(str) + 1;

end

% Print footer
fprintf('\n\n=========================\n');

end


Part 4 : Email Feature Extraction

function x = emailFeatures(word_indices)
%EMAILFEATURES takes in a word_indices vector and produces a feature vector
%from the word indices
%   x = EMAILFEATURES(word_indices) takes in a word_indices vector and 
%   produces a feature vector from the word indices. 

% Total number of words in the dictionary
n = 1899;

% You need to return the following variables correctly.
x = zeros(n, 1);

% ====================== YOUR CODE HERE ======================
% Instructions: Fill in this function to return a feature vector for the
%               given email (word_indices). To help make it easier to 
%               process the emails, we have have already pre-processed each
%               email and converted each word in the email into an index in
%               a fixed dictionary (of 1899 words). The variable
%               word_indices contains the list of indices of the words
%               which occur in one email.
% 
%               Concretely, if an email has the text:
%
%                  The quick brown fox jumped over the lazy dog.
%
%               Then, the word_indices vector for this text might look 
%               like:
%               
%                   60  100   33   44   10     53  60  58   5
%
%               where, we have mapped each word onto a number, for example:
%
%                   the   -- 60
%                   quick -- 100
%                   ...
%
%              (note: the above numbers are just an example and are not the
%               actual mappings).
%
%              Your task is take one such word_indices vector and construct
%              a binary feature vector that indicates whether a particular
%              word occurs in the email. That is, x(i) = 1 when word i
%              is present in the email. Concretely, if the word 'the' (say,
%              index 60) appears in the email, then x(60) = 1. The feature
%              vector should look like:
%
%              x = [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..];
%
%

for i = 1:size(word_indices, 1)
 x(word_indices(i)) = 1;
end

% =========================================================================
    
end

Tuesday, June 16, 2015

Regularized Linear Regression and Bias/Variance : Machine Learning

My solutions to Week 6 Exercises:

1, 2 : Regularized Linear Regression Cost Function and Regularized Linear Regression Gradient

function [J, grad] = linearRegCostFunction(X, y, theta, lambda)
%LINEARREGCOSTFUNCTION Compute cost and gradient for regularized linear 
%regression with multiple variables
%   [J, grad] = LINEARREGCOSTFUNCTION(X, y, theta, lambda) computes the 
%   cost of using theta as the parameter for linear regression to fit the 
%   data points in X and y. Returns the cost in J and the gradient in grad

% Initialize some useful values
m = length(y); % number of training examples

% You need to return the following variables correctly 
J = 0;
grad = zeros(size(theta));

% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost and gradient of regularized linear 
%               regression for a particular choice of theta.
%
%               You should set J to the cost and grad to the gradient.
%

predictions = X*theta;
squaredErrorSum = sum((predictions-y) .^ 2);
J = (squaredErrorSum/(2*m));

regularization = ((sum(theta .^ 2) - theta(1)^2)*lambda)/(2*m);
J = J+regularization;

grad = zeros(theta, 1);

difference = predictions-y; 
grad = ((X'*difference)+(lambda*theta))/m;
grad(1) = grad(1)-(lambda*theta(1))/m; 
% =========================================================================

grad = grad(:);

end

3: Learning Curve

function [error_train, error_val] = ...
    learningCurve(X, y, Xval, yval, lambda)
%LEARNINGCURVE Generates the train and cross validation set errors needed 
%to plot a learning curve
%   [error_train, error_val] = ...
%       LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and
%       cross validation set errors for a learning curve. In particular, 
%       it returns two vectors of the same length - error_train and 
%       error_val. Then, error_train(i) contains the training error for
%       i examples (and similarly for error_val(i)).
%
%   In this function, you will compute the train and test errors for
%   dataset sizes from 1 up to m. In practice, when working with larger
%   datasets, you might want to do this in larger intervals.
%

% Number of training examples
m = size(X, 1);

% You need to return these values correctly
error_train = zeros(m, 1);
error_val   = zeros(m, 1);

% ====================== YOUR CODE HERE ======================
% Instructions: Fill in this function to return training errors in 
%               error_train and the cross validation errors in error_val. 
%               i.e., error_train(i) and 
%               error_val(i) should give you the errors
%               obtained after training on i examples.
%
% Note: You should evaluate the training error on the first i training
%       examples (i.e., X(1:i, :) and y(1:i)).
%
%       For the cross-validation error, you should instead evaluate on
%       the _entire_ cross validation set (Xval and yval).
%
% Note: If you are using your cost function (linearRegCostFunction)
%       to compute the training and cross validation error, you should 
%       call the function with the lambda argument set to 0. 
%       Do note that you will still need to use lambda when running
%       the training to obtain the theta parameters.
%
% Hint: You can loop over the examples with the following:
%
%       for i = 1:m
%           % Compute train/cross validation errors using training examples 
%           % X(1:i, :) and y(1:i), storing the result in 
%           % error_train(i) and error_val(i)
%           ....
%           
%       end
%

% ---------------------- Sample Solution ----------------------

 
 
for i=1:m
 examples = X(1:i, :);
 Y = y(1:i);
 theta = trainLinearReg(examples, Y, lambda);
 error_train(i) = linearRegCostFunction(examples, Y, theta, 0)(1, 1);  
 error_val(i) = linearRegCostFunction(Xval, yval, theta, 0)(1, 1);
end 

% -------------------------------------------------------------

% =========================================================================

end

4: Polynomial Feature Mapping

function [X_poly] = polyFeatures(X, p)
%POLYFEATURES Maps X (1D vector) into the p-th power
%   [X_poly] = POLYFEATURES(X, p) takes a data matrix X (size m x 1) and
%   maps each example into its polynomial features where
%   X_poly(i, :) = [X(i) X(i).^2 X(i).^3 ...  X(i).^p];
%


% You need to return the following variables correctly.
X_poly = zeros(numel(X), p);

% ====================== YOUR CODE HERE ======================
% Instructions: Given a vector X, return a matrix X_poly where the p-th 
%               column of X contains the values of X to the p-th power.
%
% 

X_poly = X;

for i=2:p
 X_poly = [X_poly X.^i];
end  

% =========================================================================

end

5: Cross Validation Curve


function [lambda_vec, error_train, error_val] = ...
    validationCurve(X, y, Xval, yval)
%VALIDATIONCURVE Generate the train and validation errors needed to
%plot a validation curve that we can use to select lambda
%   [lambda_vec, error_train, error_val] = ...
%       VALIDATIONCURVE(X, y, Xval, yval) returns the train
%       and validation errors (in error_train, error_val)
%       for different values of lambda. You are given the training set (X,
%       y) and validation set (Xval, yval).
%

% Selected values of lambda (you should not change this)
lambda_vec = [0 0.001 0.003 0.01 0.03 0.1 0.3 1 3 10]';

% You need to return these variables correctly.
error_train = zeros(length(lambda_vec), 1);
error_val = zeros(length(lambda_vec), 1);

% ====================== YOUR CODE HERE ======================
% Instructions: Fill in this function to return training errors in 
%               error_train and the validation errors in error_val. The 
%               vector lambda_vec contains the different lambda parameters 
%               to use for each calculation of the errors, i.e, 
%               error_train(i), and error_val(i) should give 
%               you the errors obtained after training with 
%               lambda = lambda_vec(i)
%
% Note: You can loop over lambda_vec with the following:
%
%       for i = 1:length(lambda_vec)
%           lambda = lambda_vec(i);
%           % Compute train / val errors when training linear 
%           % regression with regularization parameter lambda
%           % You should store the result in error_train(i)
%           % and error_val(i)
%           ....
%           
%       end
%
%   

for i=1:length(lambda_vec) 
 theta = trainLinearReg(X, y, lambda_vec(i));  
 error_train(i) = linearRegCostFunction(X, y, theta, 0)(1, 1);  
 error_val(i) = linearRegCostFunction(Xval, yval, theta, 0)(1, 1);
end 

% =========================================================================

end

Sunday, June 14, 2015

TREEGAME SPOJ solution

Problem: http://www.spoj.com/problems/TREEGAME/

My solution : JAVA (AC):

import java.io.*;
import java.util.*;

class TREEGAME {

    static int[] order;
    static int[] tree;
    
    public static void process(int node, int target, int beg, int end, int setVal){
//        IO.println("node = "+node+" target = "+target+" beg = "+beg+" end = "+end+" mid = "+(beg+end)/2+" setVal = "+setVal);
        if(beg==end){
//            IO.println("Set leaf "+node+" to "+setVal);
            tree[node] = setVal;
            IO.print(setVal+" ");
            return;
        }
        
        int left = tree[node*2], right = tree[node*2+1], mid = (beg+end)/2;
        int nextVal=setVal;
        
        if(setVal==0 && (left==1 || right==1))nextVal=1;
        else if(setVal==1 && (left==1 || right == 1))nextVal = 0;
        else if(setVal==0 && (left==-1 && right == -1))nextVal = 1; 
        
        if(target<=mid) process(node*2, target, beg, mid, nextVal);
        else process(node*2+1, target, mid+1, end, nextVal);   
        
        //Set this parent node's value only if both the children have their values set.
        if(tree[node*2]!=-1 && tree[node*2+1]!=-1)tree[node] = setVal; 
    }
    
    public static void main(String[] args) throws Exception{
        int n = IO.nextInt();
        int nLeaves = (int)Math.pow(2, n); 
        order = IO.nextIntArray(nLeaves, " ");
                
        int nNodes = (int)Math.pow(2,(n+1))-1;
        tree = new int[nNodes+1]; 
        Arrays.fill(tree, -1);
        
        for(int i=1;i<=nLeaves-1;++i){
            process(1, order[i], 1, nLeaves, 1);
        }
//        IO.print("\n");
    }

 
    static class IO {

        static BufferedReader br = new BufferedReader(new InputStreamReader(System.in));

        public static int[][] next2dInt(int rows, int cols, String seperator) throws Exception {
            int[][] arr = new int[rows + 1][cols + 1];
            for (int i = 1; i <= rows; ++i) {
                arr[i] = nextIntArray(cols, seperator);
            }
            return arr;
        }

        public static int[] nextIntArray(int nInts, String seperator) throws IOException {
            String ints = br.readLine();
            String[] sArray = ints.split(seperator);
            int[] array = new int[nInts + 1];
            for (int i = 1; i <= nInts; ++i) {
                array[i] = Integer.parseInt(sArray[i - 1]);
            }
            return array;
        }

        public static long[] nextLongArray(int nLongs, String seperator) throws IOException {
            String longs = br.readLine();
            String[] sArray = longs.split(seperator);
            long[] array = new long[nLongs + 1];
            for (int i = 1; i <= nLongs; ++i) {
                array[i] = Long.parseLong(sArray[i - 1]);
            }
            return array;
        }

        public static double[] nextDoubleArray(int nDoubles, String seperator) throws IOException {
            String doubles = br.readLine();
            String[] sArray = doubles.split(seperator);
            double[] array = new double[nDoubles + 1];
            for (int i = 1; i <= nDoubles; ++i) {
                array[i] = Double.parseDouble(sArray[i - 1]);
            }
            return array;
        }

        public static char[] nextCharArray(int nChars, String seperator) throws IOException {
            String chars = br.readLine();
            String[] sArray = chars.split(seperator);
            char[] array = new char[nChars + 1];
            for (int i = 1; i <= nChars; ++i) {
                array[i] = sArray[i - 1].charAt(0);
            }
            return array;
        }

        public static int nextInt() throws IOException {
            String in = br.readLine();
            return Integer.parseInt(in);
        }

        public static double nextDouble() throws IOException {
            String in = br.readLine();
            return Double.parseDouble(in);
        }

        public static long nextLong() throws IOException {
            String in = br.readLine();
            return Long.parseLong(in);
        }

        public static int nextChar() throws IOException {
            String in = br.readLine();
            return in.charAt(0);
        }

        public static String nextString() throws IOException {
            return br.readLine();
        }

        public static void print(Object... o) {
            for (Object os : o) {
                System.out.print(os);
            }
        }

        public static void println(Object... o) {
            for (Object os : o) {
                System.out.print(os);
            }
            System.out.print("\n");
        }

        public static void printlnSeperate(String seperator, Object... o) {
            StringBuilder sb = new StringBuilder();
            sb.delete(sb.length() - seperator.length(), sb.length());
            System.out.println(sb);
        }
    }
}

Friday, June 12, 2015

Neural Network Learning : Machine Learning

My solutions to Week 5 assignment questions.

Helpful links : https://github.com/jcgillespie/Coursera-Machine-Learning/tree/master/ex4

sigmoidGradient.m (#3):

function g = sigmoidGradient(z)
%SIGMOIDGRADIENT returns the gradient of the sigmoid function
%evaluated at z
%   g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function
%   evaluated at z. This should work regardless if z is a matrix or a
%   vector. In particular, if z is a vector or matrix, you should return
%   the gradient for each element.

g = zeros(size(z));

% ====================== YOUR CODE HERE ======================
% Instructions: Compute the gradient of the sigmoid function evaluated at
%               each value of z (z can be a matrix, vector or scalar).

sigmoidZ = zeros(size(z, 1), size(z, 2));
for i=1:size(sigmoidZ, 1)
 for j=1:size(sigmoidZ, 2)
  sigmoidZ(i,j) = sigmoid(z(i,j));
 end
end

oneMinus = 1-sigmoidZ;

for i=1:size(g,1)
 for j=1:size(g,2)
  g(i,j) = sigmoidZ(i,j)*oneMinus(i, j);
 end
end

% =============================================================

end




nnCostFunction(#1. #2. #4. #5):

function [J grad] = nnCostFunction(nn_params, ...
                                   input_layer_size, ...
                                   hidden_layer_size, ...
                                   num_labels, ...
                                   X, y, lambda)
%NNCOSTFUNCTION Implements the neural network cost function for a two layer
%neural network which performs classification
%   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
%   X, y, lambda) computes the cost and gradient of the neural network. The
%   parameters for the neural network are "unrolled" into the vector
%   nn_params and need to be converted back into the weight matrices. 
% 
%   The returned parameter grad should be a "unrolled" vector of the
%   partial derivatives of the neural network.
%

% Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
% for our 2 layer neural network
Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
                 hidden_layer_size, (input_layer_size + 1));

Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
                 num_labels, (hidden_layer_size + 1)); 
% Setup some useful variables
m = size(X, 1);
      
% You need to return the following variables correctly 
J = 0;
Theta1_grad = zeros(size(Theta1));
Theta2_grad = zeros(size(Theta2));

% ====================== YOUR CODE HERE ======================
% Instructions: You should complete the code by working through the
%               following parts.
%
% Part 1: Feedforward the neural network and return the cost in the
%         variable J. After implementing Part 1, you can verify that your
%         cost function computation is correct by verifying the cost
%         computed in ex4.m
%
% Part 2: Implement the backpropagation algorithm to compute the gradients
%         Theta1_grad and Theta2_grad. You should return the partial derivatives of
%         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
%         Theta2_grad, respectively. After implementing Part 2, you can check
%         that your implementation is correct by running checkNNGradients
%
%         Note: The vector y passed into the function is a vector of labels
%               containing values from 1..K. You need to map this vector into a 
%               binary vector of 1's and 0's to be used with the neural network
%               cost function.
%
%         Hint: We recommend implementing backpropagation using a for-loop
%               over the training examples if you are implementing it for the 
%               first time.
%
% Part 3: Implement regularization with the cost function and gradients.
%
%         Hint: You can implement this around the code for
%               backpropagation. That is, you can compute the gradients for
%               the regularization separately and then add them to Theta1_grad
%               and Theta2_grad from Part 2.
%




% Ex1 
X = [ones(m, 1) X];
Y = zeros(size(y, 1), num_labels);

for i=1:size(Y, 1) Y(i, y(i)) = 1; end

a2 = zeros(hidden_layer_size, m+1);
a3 = zeros(num_labels, hidden_layer_size+1);
 

a2 = (Theta1*X');
z2 = a2;
a2 = sigmoid(a2); 

a2 = a2';
a2WithoutOnes = a2;
a2 = [ones(size(a2, 1), 1) a2]; 
a3 = a2*Theta2'; 
a3 = sigmoid(a3); 

predictions = a3;
logPredictions = log(predictions); 
 
tempLeftProd = zeros(size(a3, 1), 1);
tempRightProd = zeros(size(a3, 1), 1);

oneMinusY = 1-Y;
oneMinusPredictions = 1-predictions;

for i=1:size(a3, 1)
 tempLeftProd(i) = logPredictions(i,:)*Y(i,:)';
 tempRightProd(i) = log(oneMinusPredictions(i,:))*(oneMinusY(i,:)');
end

brackets = tempLeftProd+tempRightProd;
sumAllExamples = sum(brackets);
J = (-1/m)*sumAllExamples;





% Ex2
regularizationAdd = 0;
regAddLeft = zeros(hidden_layer_size, 1);

for i=1:hidden_layer_size
 for j=1:size(Theta1, 2)-1
  regAddLeft(i) = regAddLeft(i) + Theta1(i, j+1)^2;
 end
end

regAddRight = zeros(num_labels, 1);
for i=1:num_labels
 for j=1:size(Theta2, 2)-1 
  regAddRight(i) = regAddRight(i) + Theta2(i, j+1)^2;
 end
end

regularizationAdd = (lambda*(sum(regAddLeft)+sum(regAddRight)))/(2*m);
J = J+regularizationAdd; 






% Ex4
Delta1=0;
Delta2=0;

Theta1WithoutBias = Theta1(:, 2:end);
Theta2WithoutBias = Theta2(:, 2:end);

for t=1:m
 a1 = X(t, :)';
 z2 = Theta1*a1;
 a2 = [1; sigmoid(z2)];
 z3 = Theta2*a2;
 a3 = [sigmoid(z3)];
 
 d3 = a3-Y(t, :)'; 
 
 d2 = Theta2WithoutBias'*d3 .* sigmoidGradient(z2);
 %d2 = d2(2:end); % No need to do that. Theta2WithoutBias 
      % and z2(we add bias to a2, not z2) have 
      % already taken care of that
 
 Delta2 = Delta2 + d3*a2';
 Delta1 = Delta1 + d2*a1';
end

Theta1_grad = Delta1/m;
Theta2_grad = Delta2/m;





% Ex5 - regularization
Theta1_grad(:, 2:end) = Theta1_grad(:, 2:end)+(lambda/m)*Theta1WithoutBias;
Theta2_grad(:, 2:end) = Theta2_grad(:, 2:end)+(lambda/m)*Theta2WithoutBias;
% -------------------------------------------------------------

% =========================================================================

% Unroll gradients
grad = [Theta1_grad(:) ; Theta2_grad(:)]; 

end


Monday, June 8, 2015

Neural Networks : Representation : Machine Learning : Week 4

My solutions to Week 4 assignments:

Part 1: Regularied Logistic Regression

function [J, grad] = lrCostFunction(theta, X, y, lambda)
%LRCOSTFUNCTION Compute cost and gradient for logistic regression with 
%regularization
%   J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
%   theta as the parameter for regularized logistic regression and the
%   gradient of the cost w.r.t. to the parameters. 

% Initialize some useful values
m = length(y); % number of training examples

% You need to return the following variables correctly 
J = 0;
grad = zeros(size(theta));

% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost of a particular choice of theta.
%               You should set J to the cost.
%               Compute the partial derivatives and set grad to the partial
%               derivatives of the cost w.r.t. each parameter in theta
%
% Hint: The computation of the cost function and gradients can be
%       efficiently vectorized. For example, consider the computation
%
%           sigmoid(X * theta)
%
%       Each row of the resulting matrix will contain the value of the
%       prediction for that example. You can make use of this to vectorize
%       the cost function and gradient computations. 
%
% Hint: When computing the gradient of the regularized cost function, 
%       there're many possible vectorized solutions, but one solution
%       looks like:
%           grad = (unregularized gradient for logistic regression)
%           temp = theta; 
%           temp(1) = 0;   % because we don't add anything for j = 0  
%           grad = grad + YOUR_CODE_HERE (using the temp variable)
% 

thetaTx = (theta'*X')';
h = sigmoid(thetaTx);  
leftJ = -(1/m)*(sum(y'*log(h)+(1-y)'*log(1-h)));
 
rightJ = (lambda/(2*m))*sum((theta.^2)(2:end,1));
 
J = leftJ+rightJ;

error = h-y; 
grad = ((error'*X)' + (lambda*theta))/m;
grad(1) = (error'*X(:,1))/m ; 

% =============================================================

grad = grad(:);

end

Part 2: One-vs-all classifier training

function [all_theta] = oneVsAll(X, y, num_labels, lambda)
%ONEVSALL trains multiple logistic regression classifiers and returns all
%the classifiers in a matrix all_theta, where the i-th row of all_theta 
%corresponds to the classifier for label i
%   [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels
%   logisitc regression classifiers and returns each of these classifiers
%   in a matrix all_theta, where the i-th row of all_theta corresponds 
%   to the classifier for label i

% Some useful variables
m = size(X, 1);
n = size(X, 2);

% You need to return the following variables correctly 
all_theta = zeros(num_labels, n + 1);

% Add ones to the X data matrix
X = [ones(m, 1) X];

% ====================== YOUR CODE HERE ======================
% Instructions: You should complete the following code to train num_labels
%               logistic regression classifiers with regularization
%               parameter lambda. 
%
% Hint: theta(:) will return a column vector.
%
% Hint: You can use y == c to obtain a vector of 1's and 0's that tell use 
%       whether the ground truth is true/false for this class.
%
% Note: For this assignment, we recommend using fmincg to optimize the cost
%       function. It is okay to use a for-loop (for c = 1:num_labels) to
%       loop over the different classes.
%
%       fmincg works similarly to fminunc, but is more efficient when we
%       are dealing with large number of parameters.
%
% Example Code for fmincg:
%
%     % Set Initial theta
%     initial_theta = zeros(n + 1, 1);
%     
%     % Set options for fminunc
%     options = optimset('GradObj', 'on', 'MaxIter', 50);
% 
%     % Run fmincg to obtain the optimal theta
%     % This function will return theta and the cost 
%     [theta] = ...
%         fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ...
%                 initial_theta, options);
%


options = optimset('GradObj', 'on', 'MaxIter', 50);

for c = 1:num_labels
 initial_theta = zeros(n+1, 1);
 [theta] = fmincg(@(t)(lrCostFunction(t, X, (y==c), lambda)), initial_theta, options);
 all_theta(c,:) = theta;
end


% =========================================================================


end

Part 3: One-vs-all classifier prediction

function p = predictOneVsAll(all_theta, X)
%PREDICT Predict the label for a trained one-vs-all classifier. The labels 
%are in the range 1..K, where K = size(all_theta, 1). 
%  p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions
%  for each example in the matrix X. Note that X contains the examples in
%  rows. all_theta is a matrix where the i-th row is a trained logistic
%  regression theta vector for the i-th class. You should set p to a vector
%  of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2
%  for 4 examples) 

m = size(X, 1);
num_labels = size(all_theta, 1);

% You need to return the following variables correctly 
p = zeros(size(X, 1), 1);

% Add ones to the X data matrix
X = [ones(m, 1) X];

% ====================== YOUR CODE HERE ======================
% Instructions: Complete the following code to make predictions using
%               your learned logistic regression parameters (one-vs-all).
%               You should set p to a vector of predictions (from 1 to
%               num_labels).
%
% Hint: This code can be done all vectorized using the max function.
%       In particular, the max function can also return the index of the 
%       max element, for more information see 'help max'. If your examples 
%       are in rows, then, you can use max(A, [], 2) to obtain the max 
%       for each row.
%       
  
z = X*all_theta';

for i = 1:size(z, 1)
 for j=1:size(z, 2)
  z(i, j) = pinv(1+pinv(e^z(i, j)));
 end
end

pSigmoid = z;

[maxProbabilities indices] = max(pSigmoid, [], 2);
p = indices;

% =========================================================================

end

Part 4: Neural Network Prediction Function

function p = predict(Theta1, Theta2, X)
%PREDICT Predict the label of an input given a trained neural network
%   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
%   trained weights of a neural network (Theta1, Theta2)

% Useful values
m = size(X, 1);
num_labels = size(Theta2, 1);

% You need to return the following variables correctly 
p = zeros(size(X, 1), 1);

% ====================== YOUR CODE HERE ======================
% Instructions: Complete the following code to make predictions using
%               your learned neural network. You should set p to a 
%               vector containing labels between 1 to num_labels.
%
% Hint: The max function might come in useful. In particular, the max
%       function can also return the index of the max element, for more
%       information see 'help max'. If your examples are in rows, then, you
%       can use max(A, [], 2) to obtain the max for each row.
%

X = [ones(m, 1) X];
z = X*Theta1';

for i = 1:size(z, 1)
 for j=1:size(z, 2)
  z(i, j) = pinv(1+pinv(e^z(i, j)));
 end
end

pSigmoid = z; 
pSigmoid = [ones(size(pSigmoid, 1), 1) pSigmoid];

z1 = pSigmoid*Theta2';
[maxProbabilities indices] = max(z1, [], 2);
p = indices; 

% =========================================================================


end

Saturday, June 6, 2015

Logistic Regression with Regularization - Machine Learning

My solutions to Exercises for Week 3 : Regularization - Logistic Regression (Coursera.org - Machine Learning Course):

1. Sigmoid Function

function g = sigmoid(z)
%SIGMOID Compute sigmoid functoon
%   J = SIGMOID(z) computes the sigmoid of z.

% You need to return the following variables correctly 
g = zeros(size(z));

% ====================== YOUR CODE HERE ======================
% Instructions: Compute the sigmoid of each value of z (z can be a matrix,
%               vector or scalar).

for i = 1:size(z, 1)
 for j=1:size(z, 2)
  g(i, j) = pinv(1+pinv(e^z(i, j)));
 end
end
% =============================================================

end

2. Compute cost for logistic regression

function [J, grad] = costFunction(theta, X, y)
%COSTFUNCTION Compute cost and gradient for logistic regression
%   J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the
%   parameter for logistic regression and the gradient of the cost
%   w.r.t. to the parameters.

% Initialize some useful values
m = length(y); % number of training examples

% You need to return the following variables correctly 
J = 0;
grad = zeros(size(theta));




% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost of a particular choice of theta.
%               You should set J to the cost.
%               Compute the partial derivatives and set grad to the partial
%               derivatives of the cost w.r.t. each parameter in theta
%
% Note: grad should have the same dimensions as theta
% 
thetaTx = (theta'*X')';
h = sigmoid(thetaTx);  
J = -(1/m)*(sum(y'*log(h)+(1-y)'*log(1-h)));
grad = (1/m)*(((h-y)'*X)');
% =============================================================

end

3. Gradient for logistic regression

function [J, grad] = costFunction(theta, X, y)
%COSTFUNCTION Compute cost and gradient for logistic regression
%   J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the
%   parameter for logistic regression and the gradient of the cost
%   w.r.t. to the parameters.

% Initialize some useful values
m = length(y); % number of training examples

% You need to return the following variables correctly 
J = 0;
grad = zeros(size(theta));




% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost of a particular choice of theta.
%               You should set J to the cost.
%               Compute the partial derivatives and set grad to the partial
%               derivatives of the cost w.r.t. each parameter in theta
%
% Note: grad should have the same dimensions as theta
% 
thetaTx = (theta'*X')';
h = sigmoid(thetaTx);  
J = -(1/m)*(sum(y'*log(h)+(1-y)'*log(1-h)));
grad = (1/m)*(((h-y)'*X)');
% =============================================================

end

4. Predict Function

function p = predict(theta, X)
%PREDICT Predict whether the label is 0 or 1 using learned logistic 
%regression parameters theta
%   p = PREDICT(theta, X) computes the predictions for X using a 
%   threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1)

m = size(X, 1); % Number of training examples

% You need to return the following variables correctly
p = zeros(m, 1);

% ====================== YOUR CODE HERE ======================
% Instructions: Complete the following code to make predictions using
%               your learned logistic regression parameters. 
%               You should set p to a vector of 0's and 1's
%

thetaTx = (theta'*X')';
h = sigmoid(thetaTx);

for i = 1:m
 if h(i)>=0.5 p(i) = 1; else p(i) = 0;
 end 
end

% =========================================================================
end

5. Compute cost for regularized LR

function [J, grad] = costFunctionReg(theta, X, y, lambda)
%COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
%   J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
%   theta as the parameter for regularized logistic regression and the
%   gradient of the cost w.r.t. to the parameters. 

% Initialize some useful values
m = length(y); % number of training examples 
% You need to return the following variables correctly 
J = 0;
grad = zeros(size(theta));

% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost of a particular choice of theta.
%               You should set J to the cost.
%               Compute the partial derivatives and set grad to the partial
%               derivatives of the cost w.r.t. each parameter in theta


thetaTx = (theta'*X')';
h = sigmoid(thetaTx);  
leftJ = -(1/m)*(sum(y'*log(h)+(1-y)'*log(1-h)));
 
rightJ = (lambda/(2*m))*sum((theta.^2)(2:end,1));
 
J = leftJ+rightJ;

error = h-y;
grad(1) = (error'*X(:,1))/m ;
for i=2:size(grad, 1)
 grad(i) = (error'*X(:,i) + (lambda*theta(i)))/m;
end

% =============================================================

end

6. Gradient for regularized LR

function [J, grad] = costFunctionReg(theta, X, y, lambda)
%COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
%   J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
%   theta as the parameter for regularized logistic regression and the
%   gradient of the cost w.r.t. to the parameters. 

% Initialize some useful values
m = length(y); % number of training examples 
% You need to return the following variables correctly 
J = 0;
grad = zeros(size(theta));

% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost of a particular choice of theta.
%               You should set J to the cost.
%               Compute the partial derivatives and set grad to the partial
%               derivatives of the cost w.r.t. each parameter in theta


thetaTx = (theta'*X')';
h = sigmoid(thetaTx);  
leftJ = -(1/m)*(sum(y'*log(h)+(1-y)'*log(1-h)));
 
rightJ = (lambda/(2*m))*sum((theta.^2)(2:end,1));
 
J = leftJ+rightJ;

error = h-y;
grad(1) = (error'*X(:,1))/m ;
for i=2:size(grad, 1)
 grad(i) = (error'*X(:,i) + (lambda*theta(i)))/m;
end

% =============================================================

end