My solutions to week 3 exercises
1. Finding the best hospital in a state (best.R)
best <- function(state, outcome){
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
data[, 11] <- as.numeric(data[, 11])
possible <- c("heart attack", "heart failure", "pneumonia")
if(!(state %in% data[["State"]])){
stop("invalid state")
}else if(!(outcome %in% possible)){
stop("invalid outcome")
}else{
dataState <- data[data[["State"]]==state, ]
#print(head(dataState$State, 10))
diseaseCol <- 0
if(outcome=="heart attack"){
diseaseCol <- 11
}else if(outcome == "heart failure"){
diseaseCol <- 17
}else diseaseCol <- 23
hospitalNamesCol <- 2
for(i in 1:nrow(dataState)){
if(dataState[i, diseaseCol]=="Not Available"){
dataState[i, diseaseCol] <- NA
}
}
dataState <- dataState[!is.na(dataState[, diseaseCol]), ]
#print(cbind(dataState$State, dataState[,2], dataState[,diseaseCol]))
min <- 100000
minState <- "z";
#print(c("No of rows in dataState = ", nrow(dataState)))
dataState[, diseaseCol] <- as.numeric(dataState[, diseaseCol])
for(i in 1:nrow(dataState)){
#print(c("checking row", i))
#print(c("comparing ", dataState[i, diseaseCol], " against ", min))
if(dataState[i, diseaseCol]<min){
min <- dataState[i, diseaseCol]
minState <- dataState[i, hospitalNamesCol];
}else if(dataState[i, diseaseCol]==min){
if(dataState[i, hospitalNamesCol]<minState){
minState <- dataState[i, hospitalNamesCol];
}
}
}
minState
}
}
2. Ranking hospitals by outcome in a state (rankhospital.R)
rankhospital <- function(state, outcome, num = "best"){
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
possible <- c("heart attack", "heart failure", "pneumonia")
if(!(state %in% data[["State"]])){
stop("invalid state")
}else if(!(outcome %in% possible)){
stop("invalid outcome")
}else{
dataState <- data[data[["State"]]==state, ]
diseaseCol <- 0
if(outcome=="heart attack"){
diseaseCol <- 11
}else if(outcome == "heart failure"){
diseaseCol <- 17
}else diseaseCol <- 23
hospitalNamesCol <- 2
for(i in 1:nrow(dataState)){
if(dataState[i, diseaseCol]=="Not Available"){
dataState[i, diseaseCol] <- NA
}
}
# print(cbind(dataState[,2], dataState[,diseaseCol]))
if(is.numeric(num) && nrow(dataState)<num)NA
else{
#remove "NA"s manually or use na.last=NA argument of order function
# dataState <- dataState[!is.na(dataState[, diseaseCol]), ]
# print(cbind(dataState[,2], dataState[,diseaseCol]))
dataState[, diseaseCol] <- as.numeric(dataState[, diseaseCol])
dataState <- dataState[order(dataState[, diseaseCol], dataState[, hospitalNamesCol], na.last = NA), ]
# print(cbind(dataState[,2], dataState[,diseaseCol]))
if(num=="best")dataState[1, 2]
else if(num=="worst")dataState[nrow(dataState), 2]
else dataState[num, 2]
}
}
}
3. Ranking hospitals in all states (rankall.R)
rankall <- function(outcome, num = "best"){
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
possible <- c("heart attack", "heart failure", "pneumonia")
if(!(outcome %in% possible)){
stop("invalid outcome")
}else{
diseaseCol <- 0
if(outcome=="heart attack"){
diseaseCol <- 11
}else if(outcome == "heart failure"){
diseaseCol <- 17
}else diseaseCol <- 23
hospitalNamesCol <- 2
for(i in 1:nrow(data)){
if(data[i, diseaseCol]=="Not Available"){
data[i, diseaseCol] <- NA
}
}
r <- split(data, data$State)
hNames <- vector()
sNames <- vector()
for(i in 1:length(r)){
nas <- sum(is.na(r[[i]][,diseaseCol]))
if(is.numeric(num) && num>(length(r[[i]][,2])-nas)){
hNames <- c(hNames, NA)
sNames <- c(sNames, r[[i]][1, 7])
next
}
r[[i]][,diseaseCol] <- as.numeric(r[[i]][,diseaseCol])
r[[i]][,] <- r[[i]][order(r[[i]][, diseaseCol], r[[i]][, hospitalNamesCol]), ]
if(num=="best")hNames <- c(hNames, r[[i]][1, 2])
else if(num=="worst")hNames <- c(hNames, r[[i]][length(r[[i]][,2])-nas, 2])
else hNames <- c(hNames, r[[i]][num, 2])
sNames <- c(sNames, r[[i]][1, 7])
}
dataframe <- data.frame(hospital = hNames, state = sNames)
}
dataframe
}