You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
53 lines
1.5 KiB
53 lines
1.5 KiB
#! /usr/bin/python2
|
|
|
|
from ctypes import *
|
|
|
|
tokenizer = cdll.LoadLibrary("./tokenize.so")
|
|
|
|
toktypes = { 0 : "symbol",
|
|
1 : "identifier",
|
|
2 : "integer",
|
|
3 : "floating",
|
|
4 : "quote",
|
|
5 : "whitespace",
|
|
6 : "parenthesis"}
|
|
|
|
class TokenValT(Union):
|
|
_fields_ = [("symbol", c_char_p),
|
|
("identifier", c_char_p),
|
|
("integer", c_char_p),
|
|
("floating", c_char_p),
|
|
("parenthesis", c_char_p),
|
|
("string", c_char_p),
|
|
("quote", c_bool),
|
|
("whitespace", c_bool),
|
|
("null_token", c_bool)]
|
|
|
|
class TokenT(Structure):
|
|
_fields_ = [("token_type", c_int),
|
|
("token", TokenValT)]
|
|
|
|
class TokStream(Structure):
|
|
_fields_ = [("length", c_size_t),
|
|
("max_length", c_size_t),
|
|
("tokens", POINTER(TokenT)),
|
|
("memo", c_void_p)]
|
|
|
|
tokenizer.tokenize.restype = TokStream
|
|
tokenizer.peek_token.restype = TokenT
|
|
tokenizer.pop_token.restype = c_bool
|
|
tokenizer.release_tokens.restype = c_bool
|
|
|
|
def tokenize(source):
|
|
tokens = tokenizer.tokenize(source, 0, len(source))
|
|
tp = pointer(tokens)
|
|
while tokens.length > 0:
|
|
tok = tokenizer.peek_token(tp)
|
|
ttype = toktypes[tok.token_type]
|
|
yield (ttype, getattr(tok.token, ttype))
|
|
tokenizer.pop_token(tp)
|
|
tokenizer.release_tokens(tp)
|
|
|
|
line = " '''' a b"
|
|
xs = list(tokenize(line))
|
|
print(xs)
|
|
|