From 837047814ef8797c4e5be16bf98a7e9a4e9d56bb Mon Sep 17 00:00:00 2001 From: wes Date: Sun, 4 Dec 2016 17:13:57 -0500 Subject: [PATCH] initial commit of prefix-tree matcher --- pftokenize.rkt | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 pftokenize.rkt diff --git a/pftokenize.rkt b/pftokenize.rkt new file mode 100644 index 0000000..01f0256 --- /dev/null +++ b/pftokenize.rkt @@ -0,0 +1,44 @@ +#! /usr/bin/racket +#lang racket + +; prefix-tree based tokenizer + +(struct PFTree + (root children) + #:transparent) + +(define (str-head str) + (if (= (string-length str) 0) (integer->char 0) + (string-ref str 0))) + +(define (str-tail str) + (if (= (string-length str) 0) (integer->char 0) + (substring str 1 + (string-length str)))) + +(define str-heads (curry map str-head)) + +(define str-tails (curry map str-tail)) + +(define (prefixes strs) + (map cons + (str-heads strs) + (str-tails strs))) + +; Create a prefix-tree from a list of strings +(define (prefix-tree strs) + (match strs + [(list x) x] + [_ + (define ps (prefixes strs)) + (define sorted-ps + (sort ps #:key car char