Using the stats program R to solve the problems.

Problem 1: a between subjects t-test

1) An investigator believes that caffeine facilitates performance on a simple spelling test. Two groups of subjects are given either 200 mg of caffeine or a placebo. What test should be applied to see if these two groups differ if the results are
0 mg 	200 mg.
24	24
25	29
27	26
26	23
26	25
22	28
21	27
22	24
23	27
25	28
25	27
25	26

scores= c(24,	24,  25,	29,  27,	26,  26,	23, 
   26,	25,  22,	28,  21,	27,  22,	24,  23,	
   27,  25,	28,  25,	27,  25,	26)         #read in the data
data=matrix(scores,nrow=12,ncol=2,byrow=TRUE)    #convert the data to a 2 dimensional matrix
data             #show the data 

placebo=data[,1]     #first column
caffeine=data[,2]    #second column


 t.test(placebo,caffeine,var.equal=TRUE)
 
  #Compare this to the one sample t-test if the data are  correlated.

data.df=data.frame(placebo,caffeine)  #make a "data frame" of the data
data.df            #show the data again
t.test(data.df)

Produces this output:
> scores= c(24,	24,  25,	29,  27,	26,  26,	23, 
+    26,	25,  22,	28,  21,	27,  22,	24,  23,	
+    27,  25,	28,  25,	27,  25,	26)         #read in the data
> data=matrix(scores,nrow=12,ncol=2,byrow=TRUE)    #convert the data to a 2 dimensional matrix
> data             #show the data 
      [,1] [,2]
 [1,]   24   24
 [2,]   25   29
 [3,]   27   26
 [4,]   26   23
 [5,]   26   25
 [6,]   22   28
 [7,]   21   27
 [8,]   22   24
 [9,]   23   27
[10,]   25   28
[11,]   25   27
[12,]   25   26
> placebo=data[,1]     #first column
> caffeine=data[,2]    #second column
> 
> 
>  t.test(placebo,caffeine,var.equal=TRUE)

	Two Sample t-test

data:  placebo and caffeine 
t = -2.5273, df = 22, p-value = 0.01918
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -3.4894314 -0.3439019 
sample estimates:
mean of x mean of y 
 24.25000  26.16667 

But, if the data were correlated (i.e., the same subject took both conditions, the analysis would be much more powerful:
 
> data.df=data.frame(placebo,caffeine)  #make a "data frame" of the data
> data.df            #show the data again
   placebo caffeine
1       24       24
2       25       29
3       27       26
4       26       23
5       26       25
6       22       28
7       21       27
8       22       24
9       23       27
10      25       28
11      25       27
12      25       26
> t.test(data.df)

	One Sample t-test

data:  data.df 
t = 59.8402, df = 23, p-value = < 2.2e-16
alternative hypothesis: true mean is not equal to 0 
95 percent confidence interval:
 24.33689 26.07978 
sample estimates:
mean of x 
 25.20833 
 
 
# Compare this to the two sample t-test if the data are not correlated.
 t.test(placebo,caffeine)

Problem 2: correlation

Another investigator believes that introversion/extraversion has a linear relationship to spelling ability and reports the following data. What test should be applied?
Introversion	Spelling Ability

21	31
14	33
13	39
13	24
20	35
21	37
11	36
15	20
23	46
12	31
17	44
26	44
 
Read in the data, organize the data , report and graph the correlation
scores = c( 21,	31,  14,	33,  13,	39,  13,	24,  20,
	35,  21,	37,  11,	36,  15,	20,  23,	46,  12,
		31,  17,	44,  26,	44)  
		
data=matrix(scores,nrow=12,ncol=2,byrow=TRUE)    #convert the data to a 2 dimensional matrix
data             #show the data 

introversion=data[,1]     #first column
spelling=data[,2]    #second column
data.df=data.frame(introversion,spelling)  #make a "data frame" of the data
data.df            #show the data again
plot(data.df)     #draw the scatter plot
cor(data.df)      #what is the correlation
                  #too many decimals! round it off
round(cor(data.df),2) #round to two decimals  

summary(data.df)
boxplot(data.df)

Produces the following output
 > scores = c( 21,	31,  14,	33,  13,	39,  13,	24,  20,
+ 	35,  21,	37,  11,	36,  15,	20,  23,	46,  12,
+ 		31,  17,	44,  26,	44)  
> 		
> data=matrix(scores,nrow=12,ncol=2,byrow=TRUE)    #convert the data to a 2 dimensional matrix
> data             #show the data 
      [,1] [,2]
 [1,]   21   31
 [2,]   14   33
 [3,]   13   39
 [4,]   13   24
 [5,]   20   35
 [6,]   21   37
 [7,]   11   36
 [8,]   15   20
 [9,]   23   46
[10,]   12   31
[11,]   17   44
[12,]   26   44
> 
> introversion=data[,1]     #first column
> spelling=data[,2]    #second column
> data.df=data.frame(introversion,spelling)  #make a "data frame" of the data
> data.df            #show the data again
   introversion spelling
1            21       31
2            14       33
3            13       39
4            13       24
5            20       35
6            21       37
7            11       36
8            15       20
9            23       46
10           12       31
11           17       44
12           26       44
> plot(data.df)     #draw the scatter plot
> cor(data.df)      #what is the correlation
             introversion  spelling
introversion    1.0000000 0.5102348
spelling        0.5102348 1.0000000
>                   #too many decimals! round it off
> round(cor(data.df),2) #round to two decimals  
             introversion spelling
introversion         1.00     0.51
spelling             0.51     1.00
> 
> summary(data.df)
  introversion      spelling    
 Min.   :11.00   Min.   :20.00  
 1st Qu.:13.00   1st Qu.:31.00  
 Median :16.00   Median :35.50  
 Mean   :17.17   Mean   :35.00  
 3rd Qu.:21.00   3rd Qu.:40.25  
 Max.   :26.00   Max.   :46.00  
> boxplot(data.df)
>

Problem 3: 2 Way analysis of variance (between subjects)
3) Still another investigator believes that spelling performance is a function of the interaction of caffeine and time of day.  She gives 0 or 200 mg of caffeine to subjects at 9 am and 9 pm.  If the results are as below, what statistical test should be applied to test her hypothesis?
9am - 0 mg	9 am 200 mg	9pm 0 mg	9pm 200 mg
26	27	28	24
27	30	27	23
25	28	25	25
22	32	25	21
27	25	31	23
23	29	32	21
21	31	25	25
28	28	32	21
21	28	26	26
23	26	25	22
20	29	27	23
23	31	26	26
Can be solved using the following R code:
#first save the data as an excel tab limited text file 
#or an equivalent file in your text editor
#now read the file --note that header = TRUE implies that you have labelled each column
data=read.table("Bill's TI:Users:bill:Library:Favorites:syllabi:205:R.tutorial:anovadata.txt",header=TRUE)

data    #show the data
summary(data)     #basic descriptives are always useful

stackdata= stack(data)    #convert to a column vector  
numcases=12                #How many subjects are there?
numvariables=4             #how many variables

#create a data frame  -- this applies condition labels to the cases  

score.df =data.frame( data=stackdata, 
             drug=factor(rep(rep(c("placebo", "caffeine"), c(numcases, numcases)), 2)),  	
			 tod=factor(rep(c("am", "pm"), c(numcases*2, numcases*2))))
			 
score.df     #look at the data to make sure it makes sense!
 
timedrug.aov=aov(data.values~drug*tod,data=score.df)     #do the ANOVA

summary(timedrug.aov)                                    #summary output
print(model.tables(timedrug.aov,"means"),digits=3)      #find the cell means


This produces the following rather extensive output 


> #first save the data as an excel tab limited text file 
> #or an equivalent file in your text editor
> #now read the file --note that header = TRUE implies that you have labelled each column
> data=read.table("Bill's TI:Users:bill:Library:Favorites:syllabi:205:R.tutorial:anovadata.txt",header=TRUE)
> 
> data    #show the data
   X9am0mg X9am200mg X9pm0mg X9pm200mg
1       26        27      28        24
2       27        30      27        23
3       25        28      25        25
4       22        32      25        21
5       27        25      31        23
6       23        29      32        21
7       21        31      25        25
8       28        28      32        21
9       21        28      26        26
10      23        26      25        22
11      20        29      27        23
12      23        31      26        26
> 
> stackdata= stack(data)    #convert to a column vector  
> numcases=12                #How many subjects are there?
> numvariables=4             #how many variables
> 
> #create a data frame  -- this applies condition labels to the cases  
> 
> score.df =data.frame( data=stackdata, 
+              drug=factor(rep(rep(c("placebo", "caffeine"), c(numcases, numcases)), 2)),  	
+ 			 tod=factor(rep(c("am", "pm"), c(numcases*2, numcases*2))))
> 			 
> score.df     #look at the data to make sure it makes sense!
   data.values  data.ind     drug tod
1           26   X9am0mg  placebo  am
2           27   X9am0mg  placebo  am
3           25   X9am0mg  placebo  am
[snip..]
11          20   X9am0mg  placebo  am
12          23   X9am0mg  placebo  am
13          27 X9am200mg caffeine  am
[snip..]
24          31 X9am200mg caffeine  am
25          28   X9pm0mg  placebo  pm
[snip..]
35          27   X9pm0mg  placebo  pm
36          26   X9pm0mg  placebo  pm
37          24 X9pm200mg caffeine  pm
[snip..]
47          23 X9pm200mg caffeine  pm
48          26 X9pm200mg caffeine  pm
>  
> timedrug.aov=aov(data.values~drug*tod,data=score.df)     #do the ANOVA
> 
> summary(timedrug.aov)                                    #summary output
            Df  Sum Sq Mean Sq F value    Pr(>F)    
drug         1   1.688   1.688  0.2971    0.5885    
tod          1   9.187   9.187  1.6175    0.2101    
drug:tod     1 238.521 238.521 41.9937 6.633e-08 ***
Residuals   44 249.917   5.680                      
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 
> print(model.tables(timedrug.aov,"means"),digits=3)      #find the cell means
Tables of means
Grand mean
        
25.8125 

 drug 
    caffeine placebo
          26    25.6
rep       24    24.0

 tod 
      am   pm
    26.2 25.4
rep 24.0 24.0

 drug:tod 
          tod
drug am    pm   
  caffeine 28.67 23.33
  rep      12.00 12.00
  placebo  23.83 27.42
  rep      12.00 12.00
> 
We can also do the ANOVA in terms of three separate t-tests:
am vs. pm
drug vs. placebo
(am placebo + pm drug)vs (am drug + pm placebo)

this is the contrasts:
 1  1 -1 -1
 1 -1  1 -1
 1 -1 -1  1
 
 
 data=read.table("Bill's TI:Users:bill:Library:Favorites:syllabi:205:R.tutorial:anovadata.txt",header=TRUE)
 #form new variables
 morning=c(data[,1],data[,2])
 evening=c(data[,3],data[,4])
 placebo=c(data[,1],data[,3])
 drug=c(data[,2],data[,4])
 inter1=c(data[,1],data[,4])
 inter2=c(data[,2],data[,3])
 t.test(morning,evening,var.equal=TRUE)
 t.test(placebo,drug,var.equal=TRUE)
 t.test(inter1,inter2,var.equal=TRUE)
 
 yields the following
 
>  morning=c(data[,1],data[,2])
>  evening=c(data[,3],data[,4])
>  placebo=c(data[,1],data[,3])
>  drug=c(data[,2],data[,4])
>  inter1=c(data[,1],data[,4])
>  inter2=c(data[,2],data[,3])
>  t.test(morning,evening,var.equal=TRUE)

	Two Sample t-test

data:  morning and evening 
t = 0.9286, df = 46, p-value = 0.3579
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -1.021728  2.771728 
sample estimates:
mean of x mean of y 
   26.250    25.375 

>  t.test(placebo,drug,var.equal=TRUE)

	Two Sample t-test

data:  placebo and drug 
t = -0.395, df = 46, p-value = 0.6947
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -2.286185  1.536185 
sample estimates:
mean of x mean of y 
   25.625    26.000 

>  t.test(inter1,inter2,var.equal=TRUE)

	Two Sample t-test

data:  inter1 and inter2 
t = -6.4863, df = 46, p-value = 5.388e-08
alternative hypothesis: true difference in means is not equal to 0 
95 percent confidence interval:
 -5.841895 -3.074772 
sample estimates:
mean of x mean of y 
 23.58333  28.04167 

 
 
Compare the t-test results with the Anova results. Note that they differ somewhat. Why is this? Find the variance within each cell.