# calculateCost: Calculates and returns the cost function J() # calculateCost<-function(X, y, theta){ # number of Observations m <- length(y) return( sum((X%*%theta- y)^2)/(2*m) ) } # calculateCost # # Gradient Descent algorithm # # # Parameters: # X : m x (k+1) matrix of the independent variables # k=number of independent variables with the first column having values of 1 (for the costant term) # y : mx1 matrix of the dependent variables # theta: the thetas # alpha: learning rate # numIters: number of iterations to execute # gradientDescent<-function(X, y, theta, alpha=0.01, numIters=90){ # number of observations m <- length(y) # Values of the cost function costHistory <- rep(0, numIters) # Start the gradient descent algorithm for(i in 1:numIters){ theta <- theta - alpha*(1/m)*(t(X)%*%(X%*%theta - y)) # Claculate the cost and save it to vector costHistory[i] <- calculateCost(X, y, theta) } # for # Here, the algorithm is done gdResults<-list("coefficients"=theta, "costs"=costHistory) return(gdResults) } # gradientDescent # Read the csv file foodConsumptionData<-read.csv("FoodConsumption.csv", sep=",", header=T) # How many obs do we have numObs<-nrow(foodConsumptionData) # The dependent variable dependentVariable<-foodConsumptionData$FoodExpenditure #Create the data matrix independentVariableMatrix<- cbind(rep(1, numObs), foodConsumptionData$Income, foodConsumptionData$FamilySize ) #Initialize theta vector with 3 random values thetas<-cbind( rep(runif(1),3) ) # The learning rate alpha alpha=0.00000000008 # Number of iterations to do numIterations = 15000 # Call the gradient descent algorithm gdOutput<-gradientDescent(independentVariableMatrix, dependentVariable, thetas, alpha, numIterations) # Plot the cost function plot(gdOutput$costs, ylab="J(θ)", xlab="Iterations") # Print the estimated coefficients print(gdOutput$coefficients)