# part (A) N <- 2000 coinTosses <- sample(c(-1,1), N, replace = TRUE) # here we toss 2000 coins, -1 represents tails and +1 represents heads; we get # back a list of 2000 numbers all of which are -1 or +1 print('for 2000 coin tosses') print('mean') print(mean(coinTosses)) print('median') print(median(coinTosses)) print('std dev') print(sd(coinTosses)) N <- 50000 coinTosses <- sample(c(-1,1), N, replace = TRUE) print('for 50000 coin tosses') print('mean') print(mean(coinTosses)) print('median') print(median(coinTosses)) print('std dev') print(sd(coinTosses)) #interpretation: mean over large number of samples is zero (equally likely to have # -1 or 1). median can be -1 or 1 depending on number of samples, odd or even. # std dev should converge to one. see std deviation formua for population # when mean is zero, you get sqrt(sum |x - 0|^2/N) = sqrt(sum |1|^2/N) = # sqrt((sum_{1,..N} of 1)/N) = sqrt(N/N) = sqrt(1) = 1 # this uses the fact that all observations x are either plus one or minus one # note that this holds only for N sufficiently large, for smaller N the mean # is not close to zero necessarily so the std deviation doesn't need to be # very close to one for small N # part (B) # To compute the total profit or loss you sum up the minus ones and plus ones corresponding # to all the N coin tosses # part (C) plot(cumsum(coinTosses) , type = 'l') # This tells you the profit or loss after a certain number of tosses, the rightmost # value tells you the net profit/loss at end of the game # x axis: n: toss number from 1 to N; y-axis: net profit (or loss) after n tosses # part (D) N <- 2000; num_trials <- 1000; net_profits <- list(); for(tn in 1:num_trials) { coinTossResults <- cumsum(sample(c(-1,1), N, replace = TRUE)) net_profits[[tn]] <- coinTossResults[length(coinTossResults)] } # #This simulates the game over 1000 trials using 2000 coin tosses in each trial. #The first two lines set the parameters. The third line sets net_profits to be #an empty list. We will record the net profit or loss of the game there, #for each trial. Next we have a for loop where tn (trial number) ranges #from 1 to num_trial (which equals 1000). The for loop construct executes #the code inside the loop 1000 times, once for each value of tn. #The line coinTossResults <- cumsum(sample(c(-1,1), N, replace = TRUE)) #produces a cumulative sum of the tosses. The cumulative sum is a vector #of the same length as the input. The first entry contains the original #entry, the second is the sum of the first two entries, and the last entry #is the sum of them all. The last entry is the entry that corresponds to the #sum of all the tosses and is hence the net profit. Hence, we assign the #very last member of the coinTossResults array to the net_profits list for #the corresponding trial tn. # part (e) N <- 2000; num_trials <- 10000; net_profits <- list(); for(tn in 1:num_trials) { coinTossResults <- cumsum(sample(c(-1,1), N, replace = TRUE)) net_profits[[tn]] <- coinTossResults[length(coinTossResults)] } x <- unlist(net_profits) h<-hist(x, breaks=40, col="red", xlab="Profit", main="Histogram of profit/loss distribution") xfit<-seq(min(x),max(x),length=length(x)) yfit<-dnorm(xfit,mean=mean(x),sd=sd(x)) yfit <- yfit*diff(h$mids[1:2])*length(x) lines(xfit, yfit, col="blue", lwd=2) # this part plots a histogram of the net profits (over the specified number of trials) # with a normal curve overlayed #on top of the histogram. Notice that sometimes the profits are negative (losses), #sometimes positive. For a large number of trials the mean goes to zero and #the histogram becomes more and more bell shaped! In fact almost perfectly #bell shaped as num_trials goes over 50,000. This is a consequence of the #central limit theorem in statistics.