#Written by Erik Gregory#
#egregory2007@yahoo.com#
#California State University Sacramento#
#19 July 2010#
#Dr. Taylor wants to estimate the proportion of teenage patients in her practice#
#currently experiencing flu symptoms. On one day she collects all of the data for#
#her study to determine the mean length of time with flu symptoms.#
#Dr. Taylor observes that patients seem to be more likely to have flu symptoms for#
#a longer period of time than the CDC predicts. She wants to know if this measurement#
#is statistically significant or not.#
#Suppose the true number of individuals in Dr. Taylor's practice with flu symptoms#
#starting within the last week is 50. The simulation proceeds as follows.#
#Random number generator seed...(anyone get the reference?)#
set.seed(48151623)
#variable definitions#
categories <- 0*(1:8)
l <- 0*(1:50)
m <- 0*(1:50)
#the 8 possible durations#
averages <- 1:8
#weights on our categories#
weights <- 8/averages
#sample size#
N = 50
#start dates between 0 and 7 days#
start.date <- round(runif(N, 0, 7)-0.5)
#durations of symptoms between 1 and 7 days#
duration <- round(runif(N, 1, 8))
patients <- data.frame(start.date = start.date, duration = duration, end.date = start.date + duration)
#sort out our sampled individuals#
patients <- patients[order(patients$end.date),]
row.names(patients) <- 1:N
j <- 1
while(patients$end.date[j] < 7) {
j <- j+1
}
w <- j
sampled <- patients[w:N,]
row.names(sampled) <- 1:(N-w+1)
#sorts the indicies of our sampled/unsampled individuals (to be used later)#
patients <- patients[order(patients$start.date),]
row.names(patients) <- 1:N
j <- 1
while(j<=N) {
if(patients$end.date[j] >= 7) {
l[j] <- j
}
else {
m[j] <- j
}
j <- j+1
}
l <- l[l>0]
m <- m[m>0]
#Sorts our sampled individuals into categories by duration#
sampled <- sampled[order(sampled$duration),]
row.names(sampled) <- 1:(N-w+1)
j <- 1
i <- 1
while(j<=N-w+1) {
k <- 0
while(sampled$duration[j] == i && j<=N-w+1) {
k <- k+1
categories[i] <- k
j <- j+1
}
i <- i+1
}
#The uncorrected mean duration#
uncorrected <- mean(sampled$duration)
#Actual mean duration#
true <- mean(patients$duration)
#Corrected mean duration#
corrected <- sum(categories)/sum(categories/averages)
#Plot of the longitudinal lines#
plot(patients$start.date, 1:N, main = "Patients", xlim = c(0, 16), xlab = "Time", cex = .01, ylab = "Patient")
points(patients$end.date, 1:N, cex = 0.01)
segments(patients$start.date[l], l , patients$end.date[l], l, col = "blue")
segments(patients$start.date[m], m, patients$end.date[m], m)
abline(v = 7, col = "red")