# part (A)

N <- 2000
coinTosses  <- sample(c(-1,1), N, replace = TRUE)

# here we toss 2000 coins, -1 represents tails and +1 represents heads; we get 
# back a list of 2000 numbers all of which are -1 or +1

print('for 2000 coin tosses')
print('mean')
print(mean(coinTosses))

print('median')
print(median(coinTosses))

print('std dev')
print(sd(coinTosses))

N <- 50000
coinTosses  <- sample(c(-1,1), N, replace = TRUE)

print('for 50000 coin tosses')
print('mean')
print(mean(coinTosses))

print('median')
print(median(coinTosses))

print('std dev')
print(sd(coinTosses))

#interpretation: mean over large number of samples is zero (equally likely to have 
# -1 or 1). median can be -1 or 1 depending on number of samples, odd or even. 
# std dev should converge to one. see std deviation formua for population 
# when mean is zero, you get sqrt(sum |x - 0|^2/N) = sqrt(sum |1|^2/N) = 
# sqrt((sum_{1,..N} of 1)/N) = sqrt(N/N) = sqrt(1) = 1
# this uses the fact that all observations x are either plus one or minus one
# note that this holds only for N sufficiently large, for smaller N the mean 
# is not close to zero necessarily so the std deviation doesn't need to be 
# very close to one for small N

# part (B)

# To compute the total profit or loss you sum up the minus ones and plus ones corresponding 
# to all the N coin tosses


# part (C)
plot(cumsum(coinTosses) , type = 'l')

# This tells you the profit or loss after a certain number of tosses, the rightmost 
# value tells you the net profit/loss at end of the game
# x axis: n: toss number from 1 to N; y-axis: net profit (or loss) after n tosses


# part (D)

N <- 2000;
num_trials <- 1000;
net_profits <- list();
for(tn in 1:num_trials) {
    coinTossResults   <- cumsum(sample(c(-1,1), N, replace = TRUE))
    net_profits[[tn]] <- coinTossResults[length(coinTossResults)]
}
#
#This simulates the game over 1000 trials using 2000 coin tosses in each trial.
#The first two lines set the parameters. The third line sets net_profits to be 
#an empty list. We will record the net profit or loss of the game there, 
#for each trial. Next we have a for loop where tn (trial number) ranges 
#from 1 to num_trial (which equals 1000). The for loop construct executes 
#the code inside the loop 1000 times, once for each value of tn. 
#The line coinTossResults   <- cumsum(sample(c(-1,1), N, replace = TRUE))
#produces a cumulative sum of the tosses. The cumulative sum is a vector 
#of the same length as the input. The first entry contains the original 
#entry, the second is the sum of the first two entries, and the last entry 
#is the sum of them all. The last entry is the entry that corresponds to the 
#sum of all the tosses and is hence the net profit. Hence, we assign the 
#very last member of the coinTossResults array to the net_profits list for 
#the corresponding trial tn.

# part (e)

N <- 2000;
num_trials <- 10000;
net_profits <- list();
for(tn in 1:num_trials) {
    coinTossResults   <- cumsum(sample(c(-1,1), N, replace = TRUE))
    net_profits[[tn]] <- coinTossResults[length(coinTossResults)]
}

x <- unlist(net_profits)
h<-hist(x, breaks=40, col="red", xlab="Profit",
   main="Histogram of profit/loss distribution")
xfit<-seq(min(x),max(x),length=length(x))
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)

# this part plots a histogram of the net profits (over the specified number of trials) 
# with a normal curve overlayed 
#on top of the histogram. Notice that sometimes the profits are negative (losses), 
#sometimes positive. For a large number of trials the mean goes to zero and 
#the histogram becomes more and more bell shaped! In fact almost perfectly 
#bell shaped as num_trials goes over 50,000. This is a consequence of the 
#central limit theorem in statistics.