2025-07-22 15:56:40 +02:00
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
class Token:
|
|
|
|
|
def __init__(self, type, value):
|
|
|
|
|
self.type = type
|
|
|
|
|
self.value = value
|
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
|
return f'Token({self.type}, {self.value!r})'
|
|
|
|
|
|
|
|
|
|
class Lexer:
|
|
|
|
|
def __init__(self, text):
|
|
|
|
|
self.text = text
|
|
|
|
|
self.pos = 0
|
|
|
|
|
|
|
|
|
|
def error(self):
|
|
|
|
|
raise Exception('Invalid character')
|
|
|
|
|
|
|
|
|
|
def get_next_token(self):
|
|
|
|
|
if self.pos > len(self.text) - 1:
|
|
|
|
|
return Token('EOF', None)
|
|
|
|
|
|
2025-07-22 16:06:04 +02:00
|
|
|
# Skip whitespace
|
|
|
|
|
while self.pos < len(self.text) and self.text[self.pos].isspace():
|
2025-07-22 15:56:40 +02:00
|
|
|
self.pos += 1
|
2025-07-22 16:06:04 +02:00
|
|
|
|
|
|
|
|
if self.pos > len(self.text) - 1:
|
|
|
|
|
return Token('EOF', None)
|
|
|
|
|
|
|
|
|
|
current_char = self.text[self.pos]
|
2025-07-22 15:56:40 +02:00
|
|
|
|
|
|
|
|
if current_char == '"':
|
|
|
|
|
self.pos += 1
|
|
|
|
|
string_end = self.text.find('"', self.pos)
|
|
|
|
|
if string_end == -1:
|
|
|
|
|
self.error()
|
|
|
|
|
string = self.text[self.pos:string_end]
|
|
|
|
|
self.pos = string_end + 1
|
|
|
|
|
return Token('STRING', string)
|
|
|
|
|
|
2025-07-22 16:12:23 +02:00
|
|
|
if current_char.isdigit():
|
|
|
|
|
start_pos = self.pos
|
|
|
|
|
while self.pos < len(self.text) and self.text[self.pos].isdigit():
|
|
|
|
|
self.pos += 1
|
|
|
|
|
return Token('INTEGER', int(self.text[start_pos:self.pos]))
|
|
|
|
|
|
2025-07-22 16:17:32 +02:00
|
|
|
# Operators
|
2025-07-22 16:12:23 +02:00
|
|
|
if current_char == '+':
|
|
|
|
|
self.pos += 1
|
|
|
|
|
return Token('PLUS', '+')
|
|
|
|
|
if current_char == '-':
|
|
|
|
|
self.pos += 1
|
|
|
|
|
return Token('MINUS', '-')
|
|
|
|
|
if current_char == '*':
|
|
|
|
|
self.pos += 1
|
|
|
|
|
return Token('MUL', '*')
|
|
|
|
|
if current_char == '/':
|
|
|
|
|
self.pos += 1
|
|
|
|
|
return Token('DIV', '/')
|
2025-07-22 16:17:32 +02:00
|
|
|
if current_char == '=':
|
|
|
|
|
if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=':
|
|
|
|
|
self.pos += 2
|
|
|
|
|
return Token('EQ', '==')
|
|
|
|
|
if current_char == '!':
|
|
|
|
|
if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=':
|
|
|
|
|
self.pos += 2
|
|
|
|
|
return Token('NEQ', '!=')
|
|
|
|
|
if current_char == '>':
|
|
|
|
|
if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=':
|
|
|
|
|
self.pos += 2
|
|
|
|
|
return Token('GTE', '>=')
|
|
|
|
|
else:
|
|
|
|
|
self.pos += 1
|
|
|
|
|
return Token('GT', '>')
|
|
|
|
|
if current_char == '<':
|
|
|
|
|
if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=':
|
|
|
|
|
self.pos += 2
|
|
|
|
|
return Token('LTE', '<=')
|
|
|
|
|
else:
|
|
|
|
|
self.pos += 1
|
|
|
|
|
return Token('LT', '<')
|
2025-07-22 16:12:23 +02:00
|
|
|
|
2025-07-22 16:17:32 +02:00
|
|
|
# Match keywords (longer ones first)
|
|
|
|
|
if re.match(r'\bFemboy Feminine\b', self.text[self.pos:]):
|
|
|
|
|
self.pos += len('Femboy Feminine')
|
|
|
|
|
return Token('FEMBOY_FEMININE', 'Femboy Feminine')
|
2025-07-22 15:56:40 +02:00
|
|
|
if re.match(r'\bUwU Boy\b', self.text[self.pos:]):
|
|
|
|
|
self.pos += 7
|
|
|
|
|
return Token('PRINT', 'UwU Boy')
|
2025-07-22 16:17:32 +02:00
|
|
|
if re.match(r'\bAndrogyny\b', self.text[self.pos:]):
|
|
|
|
|
self.pos += len('Androgyny')
|
|
|
|
|
return Token('ANDROGYNY', 'Androgyny')
|
2025-07-22 16:06:04 +02:00
|
|
|
if re.match(r'\bis\b', self.text[self.pos:]):
|
|
|
|
|
self.pos += 2
|
|
|
|
|
return Token('ASSIGN', 'is')
|
2025-07-22 16:17:32 +02:00
|
|
|
if re.match(r'\bFemboycore\b', self.text[self.pos:]):
|
|
|
|
|
self.pos += len('Femboycore')
|
|
|
|
|
return Token('FEMBOYCORE', 'Femboycore')
|
|
|
|
|
if re.match(r'\bPeriodt\b', self.text[self.pos:]):
|
|
|
|
|
self.pos += len('Periodt')
|
|
|
|
|
return Token('PERIODT', 'Periodt')
|
2025-07-22 16:06:04 +02:00
|
|
|
|
|
|
|
|
# Match identifiers
|
|
|
|
|
match = re.match(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', self.text[self.pos:])
|
|
|
|
|
if match:
|
|
|
|
|
value = match.group(0)
|
|
|
|
|
self.pos += len(value)
|
|
|
|
|
return Token('ID', value)
|
2025-07-22 15:56:40 +02:00
|
|
|
|
|
|
|
|
self.error()
|
|
|
|
|
|
|
|
|
|
def tokenize(self):
|
|
|
|
|
tokens = []
|
|
|
|
|
while True:
|
|
|
|
|
token = self.get_next_token()
|
|
|
|
|
tokens.append(token)
|
|
|
|
|
if token.type == 'EOF':
|
|
|
|
|
break
|
|
|
|
|
return tokens
|