library(RCurl)
library(reshape)
library(htmltab)
library(ggplot2)
library(stringr)
library(scales)
#get the table from the url
theurl <- getURL("/info/en/?search=Results_of_the_Republican_Party_presidential_primaries,_2016", ssl.verifyPeer=FALSE)
table <- htmltab(theurl, which=7)
#keep only the useful columns and name them
df <- table3:61, 1:ncol(table)-1
names(df)[1:2 <- c("Date", "State")
#extract delegate count
for (i in 3:ncol(df)) {
df[[i]] = str_extract_all(df[[i]], "[0-9,]+ delegate")
df[[i]] = as.numeric(str_extract(df[[i]], "[0-9,]+"))
}
#sum up minor candidates
df[[7]] = rowSums(df[,7:14], na.rm=TRUE)
names(df)[7 = "Others"
df = df[,1:7
#extract state names
df[[2]] = str_replace(df[[2]], "Binding[A-Za-z ]+", "")
df[[2]] = str_replace(df[[2]], "Territorial[A-Za-z ]+", "")
df[[2]] = str_replace(df[[2]], "State[A-Za-z ]+", "")
df[[2]] = str_replace(df[[2]], "District Co[A-Za-z ]+", "")
df[[2]] = str_replace(df[[2]], "Conv[A-Za-z ]+", "")
#extract date
df$Date = as.Date(df$Date, format="%b %d")
#replace NA witht 0
dfis.na(df)] = 0
#remove contests that haven't happened
df = dfrowSums(df[,3:7], na.rm=TRUE)!=0,]
#new dataframes for unique dates and cumulative count
df2 = data.frame(Date=unique(df$Date))
df3 = df2
for (i in 3:7) {
df2[[names(df)[i]]] = rep(NA, length(df2$Date))
df3[[names(df)[i]]] = rep(NA, length(df2$Date))
for (j in 1:length(df2$Date)) {
df2[[names(df)[i]]][j = sum(subset(df, Date==df2$Datej])[[names(df)[i]]])
df3[[names(df)[i]]][j = sum(df2[[names(df)[i]]][1:j])
}
}
df2$Total = rowSums(df2[,2:6], na.rm=TRUE)
df3$Total = rowSums(df3[,2:6], na.rm=TRUE)
#reshape data so that candidate becomes a variable
mdata <- melt(df2, id=c("Date", "Total"))
mdata2 <- melt(df3, id=c("Date", "Total"))
names(mdata) <- c("Date", "Total", "Candidate", "Delegates")
mdata[["Cumulative"]] <- mdata2[, 4
mdata[["CTotal"]] <- mdata2[, 2
results = mdata
#colors
colors <- c("#283681", "#DAA520", "#29AB87", "#C60E3B", "#000000")
labels = c("Trump", "Cruz", "Kasich", "Rubio", "Other")
d = ggplot(results, aes(x=Date, y=Cumulative/CTotal, fill=Candidate, colour=Candidate)) +
geom_area() +
scale_color_manual(values=colors, labels=labels) +
scale_fill_manual(values=colors, labels=labels) +
scale_y_continuous(labels=percent) +
labs(x="Date", y="% Delegates", title="Share of delegates",
fill="Candidate", colour="Candidate")
#display plot
d