######################################################################
# Introduction to Probability in R
#####################################################################
##### A few reminders about R #####
# R is case-sensitive
# Use # to start a comment line.
# Command-enter to evaluate
# It's not cool to use = for assignment. Use <-
# R likes vectors. To create a fector, use "c(...)". For example:
# nums<-c(2,3,67,11)
# Like Mathematica, R is happiest when used as a functional language.
# It too likes to operate on lists as a single entity whenver possible
# The basic R is ok but to make R really useful you need to use packages.
######################################################################
##### R makes it very easy to generate random values. #####
# For example, suppose we want to simulate flipping a coin a large number of times.
# To do so, we can *sample* from the *list* c("H","T") a large number of times,
# with replacement (so we get to use "H" and "T" over and #over again).
N <- 100
flips <- sample( c("H","T"), N, replace=T)
flips
# Question: How many H and T did we get?
# One way to find the answer: Use the "table" function.
table(flips)
# To find out more about the table function, use ?:
?table
# You can get the max value from a table like this:
max(table(flips))
# Let's find the proportion of H's:
propH <- mean(flips=="H")
propH
# If we did this repeatedly, how would the proportion of H's vary?
M <- 10000 # number of times to repeat
vals <- rep(0,M)
for(i in 1:M){
flips <- sample(c("H","T"), N, replace=T)
vals[i] <- mean(flips == "H")
}
vals
# We can plot a histogram easily.
hist(vals, breaks = 100)
min(vals)
max(vals)
####################################################################
## This is an example of the *Central Limit Theorem* in action! ##
####################################################################
# Here's another way to generate a histogram, this time using the more advanced ggplot package.
# The ggplot commands work best with what R calls a *data frame.*
library(ggplot2)
var.df <- data.frame(p = vals)
ggplot(var.df, aes(p)) +
geom_histogram(color="blue", fill="white", binwidth=.01) +
ggtitle("Proportion of H's in fair coin flip")
##### Rolling Dice #####
# The sample function can simulate rolling a die:
sample(1:6, 1)
sample(1:6, 1)
sample(1:6, 1)
# The sample function can also simulate rolling many dice:
sample(1:6, 5, replace=T)
# we can write our own function to make this a bit simpler to use:
myDice <- function(n){
sample(1:6, n, replace=T)
}
myDice(8)
# Suppose we want the sum of many dice:
dice <- function(n=2){
vals <- myDice(n)
sum(vals)
}
# The sum of two dice:
dice()
dice()
dice()
# The sum of ten dice:
dice(10)
dice(10)
dice(10)
# How about a distribution of the sum of two dice?
M <- 10000 # number of repitions.
vals <- rep(0,M)
for(i in 1:M){
vals[i] <- dice()
}
hist(vals, main="Histogram of Totals of two dice")
# Here's a slightly more illustrative plot using ggplot:
vals.df <- data.frame(table(vals))
ggplot(vals.df) +
geom_segment(aes(x=vals, xend=vals, y=0, yend=Freq)) +
geom_point(aes(x=vals, y=Freq), size=7, color="red") +
ggtitle("Histogram of totals of two dice")