133 lines
3.1 KiB
Python
133 lines
3.1 KiB
Python
# ----------------------------------------------------------------------
|
|
# ctokens.py
|
|
#
|
|
# Token specifications for symbols in ANSI C and C++. This file is
|
|
# meant to be used as a library in other tokenizers.
|
|
# ----------------------------------------------------------------------
|
|
|
|
# Reserved words
|
|
|
|
tokens = [
|
|
# Literals (identifier, integer constant, float constant, string constant, char const)
|
|
'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER',
|
|
|
|
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
|
|
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO',
|
|
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
|
|
'LOR', 'LAND', 'LNOT',
|
|
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
|
|
|
|
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
|
|
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
|
|
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
|
|
|
|
# Increment/decrement (++,--)
|
|
'INCREMENT', 'DECREMENT',
|
|
|
|
# Structure dereference (->)
|
|
'ARROW',
|
|
|
|
# Ternary operator (?)
|
|
'TERNARY',
|
|
|
|
# Delimeters ( ) [ ] { } , . ; :
|
|
'LPAREN', 'RPAREN',
|
|
'LBRACKET', 'RBRACKET',
|
|
'LBRACE', 'RBRACE',
|
|
'COMMA', 'PERIOD', 'SEMI', 'COLON',
|
|
|
|
# Ellipsis (...)
|
|
'ELLIPSIS',
|
|
]
|
|
|
|
# Operators
|
|
t_PLUS = r'\+'
|
|
t_MINUS = r'-'
|
|
t_TIMES = r'\*'
|
|
t_DIVIDE = r'/'
|
|
t_MODULO = r'%'
|
|
t_OR = r'\|'
|
|
t_AND = r'&'
|
|
t_NOT = r'~'
|
|
t_XOR = r'\^'
|
|
t_LSHIFT = r'<<'
|
|
t_RSHIFT = r'>>'
|
|
t_LOR = r'\|\|'
|
|
t_LAND = r'&&'
|
|
t_LNOT = r'!'
|
|
t_LT = r'<'
|
|
t_GT = r'>'
|
|
t_LE = r'<='
|
|
t_GE = r'>='
|
|
t_EQ = r'=='
|
|
t_NE = r'!='
|
|
|
|
# Assignment operators
|
|
|
|
t_EQUALS = r'='
|
|
t_TIMESEQUAL = r'\*='
|
|
t_DIVEQUAL = r'/='
|
|
t_MODEQUAL = r'%='
|
|
t_PLUSEQUAL = r'\+='
|
|
t_MINUSEQUAL = r'-='
|
|
t_LSHIFTEQUAL = r'<<='
|
|
t_RSHIFTEQUAL = r'>>='
|
|
t_ANDEQUAL = r'&='
|
|
t_OREQUAL = r'\|='
|
|
t_XOREQUAL = r'\^='
|
|
|
|
# Increment/decrement
|
|
t_INCREMENT = r'\+\+'
|
|
t_DECREMENT = r'--'
|
|
|
|
# ->
|
|
t_ARROW = r'->'
|
|
|
|
# ?
|
|
t_TERNARY = r'\?'
|
|
|
|
# Delimeters
|
|
t_LPAREN = r'\('
|
|
t_RPAREN = r'\)'
|
|
t_LBRACKET = r'\['
|
|
t_RBRACKET = r'\]'
|
|
t_LBRACE = r'\{'
|
|
t_RBRACE = r'\}'
|
|
t_COMMA = r','
|
|
t_PERIOD = r'\.'
|
|
t_SEMI = r';'
|
|
t_COLON = r':'
|
|
t_ELLIPSIS = r'\.\.\.'
|
|
|
|
# Identifiers
|
|
t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
|
|
|
|
# Integer literal
|
|
t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
|
|
|
|
# Floating literal
|
|
t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
|
|
|
|
# String literal
|
|
t_STRING = r'\"([^\\\n]|(\\.))*?\"'
|
|
|
|
# Character constant 'c' or L'c'
|
|
t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
|
|
|
|
# Comment (C-Style)
|
|
def t_COMMENT(t):
|
|
r'/\*(.|\n)*?\*/'
|
|
t.lexer.lineno += t.value.count('\n')
|
|
return t
|
|
|
|
# Comment (C++-Style)
|
|
def t_CPPCOMMENT(t):
|
|
r'//.*\n'
|
|
t.lexer.lineno += 1
|
|
return t
|
|
|
|
|
|
|
|
|
|
|
|
|