Monday, August 10, 2015

R Programming - Matrix Inversion Cache. Solution

My submission:
makeCacheMatrix <- function(x = matrix()) {
        inverse <- NULL
 
        setMatrix <- function(matrix = matrix()){
                x <<- matrix
        }
 
        getMatrix <- function() x
 
        setInverse <- function(inverseMatrix = matrix()){ 
                inverse <<- inverseMatrix
        } 
        getInverse <- function() inverse
        list(get = getMatrix, set = setMatrix, getI = getInverse, setI = setInverse)
}
 
 
## checks if the given makeCacheMatrix object already has it's inverse calculate.
## If not, it calculates it's inverse and caches it
 
cacheSolve <- function(x, ...) {
        ## Return a matrix that is the inverse of 'x' 
 
        if(is.null(x$getI())){
                print("Not cached. Recomputing...") 
                x$setI(solve(x$get()))
        }else {print("Found in cache")}
 
        x$getI()
}

Sunday, August 9, 2015

R Programming Week 3 Solutions

My solutions to week 3 exercises

1. Finding the best hospital in a state (best.R)

best <- function(state, outcome){
        data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
        data[, 11] <- as.numeric(data[, 11])
 
        possible <- c("heart attack", "heart failure", "pneumonia")
        if(!(state %in% data[["State"]])){
                stop("invalid state")
        }else if(!(outcome %in% possible)){
                stop("invalid outcome")
        }else{
                dataState <- data[data[["State"]]==state, ]
 
                #print(head(dataState$State, 10))
 
                diseaseCol <- 0
                if(outcome=="heart attack"){
                        diseaseCol <- 11
                }else if(outcome == "heart failure"){
                        diseaseCol <- 17
                }else diseaseCol <- 23
 
                hospitalNamesCol <- 2
 
                for(i in 1:nrow(dataState)){  
                        if(dataState[i, diseaseCol]=="Not Available"){
                                dataState[i, diseaseCol] <- NA
                        }
                }
 
                dataState <- dataState[!is.na(dataState[, diseaseCol]), ]
 
 
                #print(cbind(dataState$State, dataState[,2], dataState[,diseaseCol]))
 
                min <- 100000
                minState <- "z";
                #print(c("No of rows in dataState = ", nrow(dataState)))
                dataState[, diseaseCol] <- as.numeric(dataState[, diseaseCol])
                for(i in 1:nrow(dataState)){
                        #print(c("checking row", i))
                        #print(c("comparing ", dataState[i, diseaseCol], " against ", min))
                        if(dataState[i, diseaseCol]<min){
                                min <- dataState[i, diseaseCol] 
                                minState <- dataState[i, hospitalNamesCol]; 
                        }else if(dataState[i, diseaseCol]==min){
                                if(dataState[i, hospitalNamesCol]<minState){
                                        minState <- dataState[i, hospitalNamesCol];
                                }
                        }
                }
                minState
        }
}

2. Ranking hospitals by outcome in a state (rankhospital.R)

rankhospital <- function(state, outcome, num = "best"){
        data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
 
        possible <- c("heart attack", "heart failure", "pneumonia")
        if(!(state %in% data[["State"]])){
                stop("invalid state")
        }else if(!(outcome %in% possible)){
                stop("invalid outcome")
        }else{
                dataState <- data[data[["State"]]==state, ] 
 
                diseaseCol <- 0
                if(outcome=="heart attack"){
                        diseaseCol <- 11
                }else if(outcome == "heart failure"){
                        diseaseCol <- 17
                }else diseaseCol <- 23
 
                hospitalNamesCol <- 2
 
                for(i in 1:nrow(dataState)){  
                        if(dataState[i, diseaseCol]=="Not Available"){ 
                                dataState[i, diseaseCol] <- NA 
                        }
                } 
                # print(cbind(dataState[,2], dataState[,diseaseCol]))
 
                if(is.numeric(num) && nrow(dataState)<num)NA
                else{  
                        #remove "NA"s manually or use na.last=NA argument of order function
                        # dataState <- dataState[!is.na(dataState[, diseaseCol]), ]
 
                        # print(cbind(dataState[,2], dataState[,diseaseCol]))
 
                        dataState[, diseaseCol] <- as.numeric(dataState[, diseaseCol])
                        dataState <- dataState[order(dataState[, diseaseCol], dataState[, hospitalNamesCol], na.last = NA), ]
                        # print(cbind(dataState[,2], dataState[,diseaseCol]))
 
                        if(num=="best")dataState[1, 2]
                        else if(num=="worst")dataState[nrow(dataState), 2]
                        else dataState[num, 2] 
                }
        } 
}

3. Ranking hospitals in all states (rankall.R)

rankall <- function(outcome, num = "best"){
        data <- read.csv("outcome-of-care-measures.csv", colClasses = "character") 
 
        possible <- c("heart attack", "heart failure", "pneumonia") 
        if(!(outcome %in% possible)){
                stop("invalid outcome")
        }else{
 
 
                diseaseCol <- 0
                if(outcome=="heart attack"){
                        diseaseCol <- 11
                }else if(outcome == "heart failure"){
                        diseaseCol <- 17
                }else diseaseCol <- 23
 
                hospitalNamesCol <- 2
 
                for(i in 1:nrow(data)){  
                        if(data[i, diseaseCol]=="Not Available"){ 
                                data[i, diseaseCol] <- NA 
                        }
                }  
                r <-  split(data, data$State)  
                hNames <- vector()
                sNames <- vector()
                for(i in 1:length(r)){ 
                        nas <- sum(is.na(r[[i]][,diseaseCol]))
                        if(is.numeric(num) && num>(length(r[[i]][,2])-nas)){
                                hNames <- c(hNames, NA) 
                                sNames <- c(sNames, r[[i]][1, 7])
                                next
                        }
                        r[[i]][,diseaseCol] <- as.numeric(r[[i]][,diseaseCol])
 
                        r[[i]][,] <- r[[i]][order(r[[i]][, diseaseCol], r[[i]][, hospitalNamesCol]), ] 
  
                        if(num=="best")hNames <- c(hNames, r[[i]][1, 2])
                        else if(num=="worst")hNames <- c(hNames, r[[i]][length(r[[i]][,2])-nas, 2])
                        else hNames <- c(hNames, r[[i]][num, 2])
 
                        sNames <- c(sNames, r[[i]][1, 7])
                }
                dataframe <- data.frame(hospital = hNames, state = sNames)
        }
        dataframe 
}

Friday, August 7, 2015

R Programming Week 2 Solutions

My solutions to Exercises:

#1 (pollutantmean.R)
pollutantmean <- function(directory, pollutant, id=1:332){
        sum <- 0
        trueValues <- 0 
        for (i in id){
                prefix <- "";
                if (i<10)prefix <- "00"
                else if(i<100) prefix <- "0"
 
                path <- paste(directory, "\\", prefix, i, ".csv", sep="")
                ## print(path)
                data <- read.csv(path) 
                d <- data[[pollutant]] 
                d <- d[!is.na(d)]
 
                # if(i==1)print(d)
 
                sum <- sum + sum(d)
 
                trueValues <- trueValues + sum(!is.na(d))
#                 if(i==1){
#                         print(sum(d))
#                         print(sum)
#                         print(trueValues)
#                         print(sum/trueValues) 
#                 } 
#         print(sum/length(id))
        }
        sum/trueValues 
}
#2 (complete.R)

complete <- function(directory, id=1:332){
        fileNos <- c()
        numberOfElements <-c()
        for (i in id){
 
                prefix <- "";
                if (i<10)prefix <- "00"
                else if(i<100) prefix <- "0"
 
                path <- paste(directory, "\\", prefix, i, ".csv", sep="")
 
                data <- read.csv(path) 
                d1 <- data[["sulfate"]]   
                d2 <- data[["nitrate"]]
 
                good <- complete.cases(d1, d2) 
                bothNotNA <- length(good[good==TRUE])   
                fileNos <- c(fileNos, i)
                numberOfElements <- c(numberOfElements, bothNotNA) 
        }
        data.frame(id = fileNos, nobs = numberOfElements)
}
#3 (corr.R)

corr <- function(directory, threshold = 0){
        cors <- c() 
        for (i in 1:332){
 
                prefix <- "";
                if (i<10)prefix <- "00"
                else if(i<100) prefix <- "0"
 
                path <- paste(directory, "\\", prefix, i, ".csv", sep="")
 
                data <- read.csv(path) 
                d1 <- data[["sulfate"]]   
                d2 <- data[["nitrate"]]
 
                good <- complete.cases(d1, d2)
                bothNotNA <- length(good[good])  
                if(bothNotNA>threshold){
                        cors <- c(cors, cor(d1[good], d2[good]))
                }
        }
        if(length(cors)==0)vector(mode="numeric", length=0)
        else cors
}