Admin@ARMBP:/cygdrive/c/Users/Admin/Desktop/DMU/SEE Texts/SEE Experiments/Experiment 1 Kinney Chapter> cat KingLear1623.txt | tr "(, " "\n\n\n" | grep -i which | sort | uniq -c
     40 which$6#1$
     30 Which$6#1$
      2 which$6#2$
      3 Which$6#2$

Admin@ARMBP:/cygdrive/c/Users/Admin/Desktop/DMU/SEE Texts/SEE Experiments/Experiment 1 Kinney Chapter> cat KingLear1608.txt | tr "(, " "\n\n\n" | grep -i which | sort | uniq -c
     45 which$6#1$
     19 Which$6#1$
      2 which$6#2$
      3 Which$6#2$

Haskell Script:

{-# OPTIONS_GHC -fno-warn-tabs #-}

import Utilities
import Data.List
import Data.Char

main = interact $ doStuff

sh (divId, result) = 
	unlines [ show divId ++","++ sh1 res | res <- result] 

sh1 (kw, ps, count) = joinAll "," [ getGroup kw, show kw, show count] -- , show ps]

doStuff s = unlines 
	$ map sh 
	$ map analyse 
	$ zip [1..] 
	$ prs "<div" "</div" 
	-- $ takes 10000 
	$ map toLower s

thats = words "that that$3$ that$6#1$ that$6#2$"
whichs = words "which which$6#1$ which$6#2$"
whos = words "who who$6#1$ who$6#2$"

group1 = whichs ++ thats ++ whos

group2 = words "does doth doest do"
group3 = words "these this those"
group4 = words "thy thine"

keywords = concat [ group1, group2, group3, group4 ] 

getGroup w 
	| elem w group1 = "1"
	| elem w group2 = "2"
	| elem w group3 = "3"
	| elem w group4 = "4"
	| otherwise = "0"

isKeyword (pos, w) = elem w keywords

analyse (divId, s) = (divId, result)
	where	result = map collate $ groupBy grp $ sortBy srt
				$ filter isKeyword 
				$ zip [1..] $ words s
		srt (_,w) (_,w') = compare w w'

		grp (_,w) (_,w') = w == w'

collate (x:xs) = (snd x, map fst (x:xs), length (x:xs))


Processing Script:

source ~/.bashrc

cat KingLear1608.txt | tr "(,) " "\n\n\n\n" | rh exp1.hs | grep "," | tee LrQ.csv
cat KingLear1623.txt | tr "(,) " "\n\n\n\n" | rh exp1.hs | grep "," | tee LrF.csv

grep "," Lr*.csv | sed -e 's/.csv//' | tr ":" "," | sed -e 's/Lr/Lr,/' > kinney.csv

Annalysis in R:

a <- read.table('kinney.csv', sep=',', header=F)
colnames(a) <- c('Play', 'Edition', 'Act', "WordGroup", 'Keyword', 'Count')
a$WordGroup <- factor(a$WordGroup)
a$Act <- factor(a$Act)

x <- addmargins(xtabs(Count~WordGroup+Keyword, data=a, ),2)
f <- ftable(x, row.vars=c(3,4)) 

for (i in levels(a$WordGroup)){
	b <- droplevels(a[ a$WordGroup==i, ])
	#b$Keyword <- factor(b$Keyword)	
	x <- xtabs(Count~WordGroup+Keyword+Edition+Act, data=b)
	x <- addmargins(x,4)
	f <- ftable(x, col.vsrs=c(1,4))


                             Act   1   2   3   4   5 Sum
WordGroup Keyword    Edition                            
1         that$3$    F            32  18  14  13  12  89
                     Q            33  19  13  19  12  96
          that$6#1$  F            39  30  35  26  19 149
                     Q            48  31  35  26  25 165
          that$6#2$  F            26   8  10  23   9  76
                     Q            30   7  12  24  12  85
          which$6#1$ F            23  18  10  12   7  70
                     Q            18  16   9  14   7  64
          which$6#2$ F             1   0   0   2   2   5
                     Q             1   0   0   2   2   5
          who$6#1$   F             8   5  10   6   7  36
                     Q             7   4   8  11   8  38
          who$6#2$   F             5   3   7   4   6  25
                     Q             5   3   6   6   7  27
                          Act   1   2   3   4   5 Sum
WordGroup Keyword Edition                            
2         do      F            22  21  12  32  13 100
                  Q            27  19  13  37  13 109
          does    F             4   2   0   8   2  16
                  Q             1   1   0   5   2   9
          doth    F             4   4   3   1   1  13
                  Q             7   4   3   2   1  17
                          Act   1   2   3   4   5 Sum
WordGroup Keyword Edition                            
3         these   F             9   5   6   8   6  34
                  Q             8   5   5   9   5  32
          this    F            52  43  37  33  37 202
                  Q            59  40  47  38  39 223
          those   F             3   4   1   2   0  10
                  Q             4   4   2   4   2  16
                          Act   1   2   3   4   5 Sum
WordGroup Keyword Edition                            
4         thine   F             8   1   3   6   4  22
                  Q             5   1   2   3   5  16
          thy     F            41  18  28  26  34 147
                  Q            46  16  37  32  33 164

Kinney's analysis of alternative words "which" for "that" etc