#Written by Erik Gregory# #egregory2007@yahoo.com# #California State University Sacramento# #19 July 2010# #Dr. Taylor wants to estimate the proportion of teenage patients in her practice# #currently experiencing flu symptoms. On one day she collects all of the data for# #her study to determine the mean length of time with flu symptoms.# #Dr. Taylor observes that patients seem to be more likely to have flu symptoms for# #a longer period of time than the CDC predicts. She wants to know if this measurement# #is statistically significant or not.# #Suppose the true number of individuals in Dr. Taylor's practice with flu symptoms# #starting within the last week is 50. The simulation proceeds as follows.# #Random number generator seed...(anyone get the reference?)# set.seed(48151623) #variable definitions# categories <- 0*(1:8) l <- 0*(1:50) m <- 0*(1:50) #the 8 possible durations# averages <- 1:8 #weights on our categories# weights <- 8/averages #sample size# N = 50 #start dates between 0 and 7 days# start.date <- round(runif(N, 0, 7)-0.5) #durations of symptoms between 1 and 7 days# duration <- round(runif(N, 1, 8)) patients <- data.frame(start.date = start.date, duration = duration, end.date = start.date + duration) #sort out our sampled individuals# patients <- patients[order(patients$end.date),] row.names(patients) <- 1:N j <- 1 while(patients$end.date[j] < 7) { j <- j+1 } w <- j sampled <- patients[w:N,] row.names(sampled) <- 1:(N-w+1) #sorts the indicies of our sampled/unsampled individuals (to be used later)# patients <- patients[order(patients$start.date),] row.names(patients) <- 1:N j <- 1 while(j<=N) { if(patients$end.date[j] >= 7) { l[j] <- j } else { m[j] <- j } j <- j+1 } l <- l[l>0] m <- m[m>0] #Sorts our sampled individuals into categories by duration# sampled <- sampled[order(sampled$duration),] row.names(sampled) <- 1:(N-w+1) j <- 1 i <- 1 while(j<=N-w+1) { k <- 0 while(sampled$duration[j] == i && j<=N-w+1) { k <- k+1 categories[i] <- k j <- j+1 } i <- i+1 } #The uncorrected mean duration# uncorrected <- mean(sampled$duration) #Actual mean duration# true <- mean(patients$duration) #Corrected mean duration# corrected <- sum(categories)/sum(categories/averages) #Plot of the longitudinal lines# plot(patients$start.date, 1:N, main = "Patients", xlim = c(0, 16), xlab = "Time", cex = .01, ylab = "Patient") points(patients$end.date, 1:N, cex = 0.01) segments(patients$start.date[l], l , patients$end.date[l], l, col = "blue") segments(patients$start.date[m], m, patients$end.date[m], m) abline(v = 7, col = "red")