tiny scheme compiler that aims to be fast and correct
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.
 
 
 

53 lines
1.5 KiB

#! /usr/bin/python2
from ctypes import *
tokenizer = cdll.LoadLibrary("./tokenize.so")
toktypes = { 0 : "symbol",
1 : "identifier",
2 : "integer",
3 : "floating",
4 : "quote",
5 : "whitespace",
6 : "parenthesis"}
class TokenValT(Union):
_fields_ = [("symbol", c_char_p),
("identifier", c_char_p),
("integer", c_char_p),
("floating", c_char_p),
("parenthesis", c_char_p),
("string", c_char_p),
("quote", c_bool),
("whitespace", c_bool),
("null_token", c_bool)]
class TokenT(Structure):
_fields_ = [("token_type", c_int),
("token", TokenValT)]
class TokStream(Structure):
_fields_ = [("length", c_size_t),
("max_length", c_size_t),
("tokens", POINTER(TokenT)),
("memo", c_void_p)]
tokenizer.tokenize.restype = TokStream
tokenizer.peek_token.restype = TokenT
tokenizer.pop_token.restype = c_bool
tokenizer.release_tokens.restype = c_bool
def tokenize(source):
tokens = tokenizer.tokenize(source, 0, len(source))
tp = pointer(tokens)
while tokens.length > 0:
tok = tokenizer.peek_token(tp)
ttype = toktypes[tok.token_type]
yield (ttype, getattr(tok.token, ttype))
tokenizer.pop_token(tp)
tokenizer.release_tokens(tp)
line = " '''' a b"
xs = list(tokenize(line))
print(xs)