From b1657484fa06e1eaad13c8c53737c300870e4336 Mon Sep 17 00:00:00 2001 From: Wesley Kerfoot Date: Tue, 18 Jun 2013 12:33:42 -0400 Subject: [PATCH] cleaned code up a bit --- tokenize.hs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/tokenize.hs b/tokenize.hs index 975d699..9778797 100644 --- a/tokenize.hs +++ b/tokenize.hs @@ -4,10 +4,11 @@ import qualified Data.Map as Dm import Control.Applicative import Control.Monad -ngrams' n len xs = let next = (take n xs) - in case len == n of - False -> (toLower <$> next) : (ngrams' n (len - 1) $ drop 1 xs) - _ -> return xs +ngrams' n len xs = + let calcNext next + | len == n = return xs + | otherwise = (toLower <$> next) : (ngrams' n (len - 1) $ drop 1 xs) + ngrams n xs = ngrams' n (length xs) xs digrams = ngrams 2 @@ -27,8 +28,9 @@ startsP' letter dgs = foldr check (0, 0) dgs where _ -> (a, n + 1) check (first:[]) (a, n) = (a, n + 1) -startsP letter dgs = let (n, k) = startsP' letter (digrams dgs) - in (fromIntegral n) / (fromIntegral k) +startsP letter dgs = + let (n, k) = startsP' letter (digrams dgs) + in (fromIntegral n) / (fromIntegral k) select [] = [] @@ -58,10 +60,12 @@ out fname n (d, k) = appendFile fname $ (show d) ++ ":" ++ (show $ k/n) ++ "," -- first argument is all possible ngrams in a Map -- second argument is all of the tokenized ngrams from the corpus ngramProbs k ngrams [] = (k, ngrams) -ngramProbs k ngrams (n:ns) = case (Dm.lookup n ngrams) of - Nothing -> ngramProbs k ngrams ns - (Just count) -> let ngrams' = Dm.insert n (count+1) ngrams - in ngramProbs (k+1) ngrams' ns +ngramProbs k ngrams (n:ns) = + case (Dm.lookup n ngrams) of + Nothing -> ngramProbs k ngrams ns + (Just count) -> + let ngrams' = Dm.insert n (count+1) ngrams + in ngramProbs (k+1) ngrams' ns -- buildProbabilities :: (Fractional a, Ord k) => [k] -> Dm.Map k a buildProbabilities ngrams = Dm.fromList [(ngram, 0) | ngram <- ngrams] @@ -71,4 +75,4 @@ main = do let (n, ngramMap) = ngramProbs 0 (buildProbabilities englishQuintgrams) (quintgrams corpus) appendFile "./quadgrams.json" "{" mapM_ (out "./quadgrams.json" n) $ [(d,k) | (d,k) <- Dm.toList ngramMap, k /= 0] - appendFile "./quadgrams.json" "}" \ No newline at end of file + appendFile "./quadgrams.json" "}"