type position = { line : int; col : int; offset : int } type token = | Num of float | Var of string | Ident of string | Plus | Minus | Star | Slash | Caret | LParen | RParen | Comma | EOF type token_with_pos = { token : token; start_pos : position; end_pos : position } let is_digit c = c >= '0' && c <= '9' let is_alpha c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') let is_alphanum c = is_alpha c || is_digit c || c = '_' let needs_implicit_mult tok1 tok2 = match (tok1, tok2) with | (Num _ | Var _ | RParen), (Num _ | Var _ | LParen | Ident _) -> true | _ -> false let tokenize str = let len = String.length str in let line = ref 1 in let col = ref 1 in let rec aux i acc = if i >= len then let pos = { line = !line; col = !col; offset = i } in List.rev ({ token = EOF; start_pos = pos; end_pos = pos } :: acc) else let start_pos = { line = !line; col = !col; offset = i } in match str.[i] with | ' ' | '\t' -> col := !col + 1; aux (i + 1) acc | '\n' -> line := !line + 1; col := 1; aux (i + 1) acc | '+' -> col := !col + 1; let end_pos = { line = !line; col = !col; offset = i + 1 } in aux (i + 1) ({ token = Plus; start_pos; end_pos } :: acc) | '-' -> col := !col + 1; let end_pos = { line = !line; col = !col; offset = i + 1 } in aux (i + 1) ({ token = Minus; start_pos; end_pos } :: acc) | '*' -> col := !col + 1; let end_pos = { line = !line; col = !col; offset = i + 1 } in aux (i + 1) ({ token = Star; start_pos; end_pos } :: acc) | '/' -> col := !col + 1; let end_pos = { line = !line; col = !col; offset = i + 1 } in aux (i + 1) ({ token = Slash; start_pos; end_pos } :: acc) | '^' -> col := !col + 1; let end_pos = { line = !line; col = !col; offset = i + 1 } in aux (i + 1) ({ token = Caret; start_pos; end_pos } :: acc) | '(' -> col := !col + 1; let end_pos = { line = !line; col = !col; offset = i + 1 } in aux (i + 1) ({ token = LParen; start_pos; end_pos } :: acc) | ')' -> col := !col + 1; let end_pos = { line = !line; col = !col; offset = i + 1 } in aux (i + 1) ({ token = RParen; start_pos; end_pos } :: acc) | ',' -> col := !col + 1; let end_pos = { line = !line; col = !col; offset = i + 1 } in aux (i + 1) ({ token = Comma; start_pos; end_pos } :: acc) | c when is_digit c || c = '.' -> let j = ref (i + 1) in while !j < len && (is_digit str.[!j] || str.[!j] = '.') do incr j done; let num_str = String.sub str i (!j - i) in col := !col + (!j - i); let end_pos = { line = !line; col = !col; offset = !j } in aux !j ({ token = Num (float_of_string num_str); start_pos; end_pos } :: acc) | c when is_alpha c -> let j = ref (i + 1) in while !j < len && is_alphanum str.[!j] do incr j done; let id = String.sub str i (!j - i) in col := !col + (!j - i); let end_pos = { line = !line; col = !col; offset = !j } in let tok = if !j < len && str.[!j] = '(' then Ident id else Var id in aux !j ({ token = tok; start_pos; end_pos } :: acc) | c -> failwith (Printf.sprintf "unexpected character: %c at line %d, column %d" c !line !col) in let tokens = aux 0 [] in let rec insert_implicit_mult = function | [] -> [] | [t] -> [t] | t1 :: t2 :: rest -> if needs_implicit_mult t1.token t2.token then let mult_pos = t2.start_pos in t1 :: { token = Star; start_pos = mult_pos; end_pos = mult_pos } :: insert_implicit_mult (t2 :: rest) else t1 :: insert_implicit_mult (t2 :: rest) in insert_implicit_mult tokens