|
@ -4,10 +4,11 @@ import qualified Data.Map as Dm |
|
|
import Control.Applicative |
|
|
import Control.Applicative |
|
|
import Control.Monad |
|
|
import Control.Monad |
|
|
|
|
|
|
|
|
ngrams' n len xs = let next = (take n xs) |
|
|
ngrams' n len xs = |
|
|
in case len == n of |
|
|
let calcNext next |
|
|
False -> (toLower <$> next) : (ngrams' n (len - 1) $ drop 1 xs) |
|
|
| len == n = return xs |
|
|
_ -> return xs |
|
|
| otherwise = (toLower <$> next) : (ngrams' n (len - 1) $ drop 1 xs) |
|
|
|
|
|
|
|
|
ngrams n xs = ngrams' n (length xs) xs |
|
|
ngrams n xs = ngrams' n (length xs) xs |
|
|
|
|
|
|
|
|
digrams = ngrams 2 |
|
|
digrams = ngrams 2 |
|
@ -27,7 +28,8 @@ startsP' letter dgs = foldr check (0, 0) dgs where |
|
|
_ -> (a, n + 1) |
|
|
_ -> (a, n + 1) |
|
|
check (first:[]) (a, n) = (a, n + 1) |
|
|
check (first:[]) (a, n) = (a, n + 1) |
|
|
|
|
|
|
|
|
startsP letter dgs = let (n, k) = startsP' letter (digrams dgs) |
|
|
startsP letter dgs = |
|
|
|
|
|
let (n, k) = startsP' letter (digrams dgs) |
|
|
in (fromIntegral n) / (fromIntegral k) |
|
|
in (fromIntegral n) / (fromIntegral k) |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -58,9 +60,11 @@ out fname n (d, k) = appendFile fname $ (show d) ++ ":" ++ (show $ k/n) ++ "," |
|
|
-- first argument is all possible ngrams in a Map |
|
|
-- first argument is all possible ngrams in a Map |
|
|
-- second argument is all of the tokenized ngrams from the corpus |
|
|
-- second argument is all of the tokenized ngrams from the corpus |
|
|
ngramProbs k ngrams [] = (k, ngrams) |
|
|
ngramProbs k ngrams [] = (k, ngrams) |
|
|
ngramProbs k ngrams (n:ns) = case (Dm.lookup n ngrams) of |
|
|
ngramProbs k ngrams (n:ns) = |
|
|
|
|
|
case (Dm.lookup n ngrams) of |
|
|
Nothing -> ngramProbs k ngrams ns |
|
|
Nothing -> ngramProbs k ngrams ns |
|
|
(Just count) -> let ngrams' = Dm.insert n (count+1) ngrams |
|
|
(Just count) -> |
|
|
|
|
|
let ngrams' = Dm.insert n (count+1) ngrams |
|
|
in ngramProbs (k+1) ngrams' ns |
|
|
in ngramProbs (k+1) ngrams' ns |
|
|
|
|
|
|
|
|
-- buildProbabilities :: (Fractional a, Ord k) => [k] -> Dm.Map k a |
|
|
-- buildProbabilities :: (Fractional a, Ord k) => [k] -> Dm.Map k a |
|
|