# Mike Hammond's tutorial exercise on searching the Brown Corpus # for words that begin with /b, d/ versus /n, m/ ... setwd('C:/Documents and Settings/install215/®à­±/HammondTutorial') x=read.table("bc.txt",as.is=TRUE) dim(x) # [1] 1021281 1 names(x)="fact" x$char=as.character(x$fact) stopVnasal=c(NA,NA) stopVnasal[1]=length(grep("^[bdBG]",x$char,value=FALSE)) # [1] 75943 stopVnasal[2]=length(grep("^([nmNM]|[kK]n)",x$char,value=FALSE)) # [1] 63629 names(stopVnasal)=c("stops","nasals") windows(height=3,width=4,pointsize=12) par(family="serif",oma=rep(0,4),mar=c(0)) barplot(stopVnasal,ylim=c(0,80000)) box() chisq.test(stopVnasal) # X-squared = 1086.426, df = 1, p-value < 2.2e-16