schream/tokenize.py


								#! /usr/bin/python2


								from ctypes import *


								tokenizer = cdll.LoadLibrary("./tokenize.so")


								toktypes = { 0 : "symbol",

								             1 : "identifier",

								             2 : "integer",

								             3 : "floating",

								             4 : "quote",

								             5 : "whitespace",

								             6 : "parenthesis"}


								class TokenValT(Union):

								    _fields_ = [("symbol", c_char_p),

								                ("identifier", c_char_p),

								                ("integer", c_char_p),

								                ("floating", c_char_p),

								                ("parenthesis", c_char_p),

								                ("string", c_char_p),

								                ("quote", c_bool),

								                ("whitespace", c_bool),

								                ("null_token", c_bool)]


								class TokenT(Structure):

								    _fields_ = [("token_type", c_int),

								                ("token", TokenValT)]


								class TokStream(Structure):

								    _fields_ = [("length", c_size_t),

								                ("max_length", c_size_t),

								                ("tokens", POINTER(TokenT)),

								                ("memo", c_void_p)]


								tokenizer.tokenize.restype = TokStream

								tokenizer.peek_token.restype = TokenT

								tokenizer.pop_token.restype = c_bool

								tokenizer.release_tokens.restype = c_bool


								def tokenize(source):

								    tokens = tokenizer.tokenize(source, 0, len(source))

								    tp = pointer(tokens)

								    while tokens.length > 0:

								        tok = tokenizer.peek_token(tp)

								        ttype = toktypes[tok.token_type]

								        yield (ttype, getattr(tok.token, ttype))

								        tokenizer.pop_token(tp)

								    tokenizer.release_tokens(tp)


								line = " '''' a b"

								xs = list(tokenize(line))

								print(xs)