112 lines
4 KiB
OCaml
112 lines
4 KiB
OCaml
type position = { line : int; col : int; offset : int }
|
|
|
|
type token =
|
|
| Num of float
|
|
| Var of string
|
|
| Ident of string
|
|
| Plus | Minus | Star | Slash | Caret
|
|
| LParen | RParen
|
|
| Comma
|
|
| EOF
|
|
|
|
type token_with_pos = { token : token; start_pos : position; end_pos : position }
|
|
|
|
let is_digit c = c >= '0' && c <= '9'
|
|
let is_alpha c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
|
let is_alphanum c = is_alpha c || is_digit c || c = '_'
|
|
|
|
let needs_implicit_mult tok1 tok2 =
|
|
match (tok1, tok2) with
|
|
| (Num _ | Var _ | RParen), (Num _ | Var _ | LParen | Ident _) -> true
|
|
| _ -> false
|
|
|
|
let tokenize str =
|
|
let len = String.length str in
|
|
let line = ref 1 in
|
|
let col = ref 1 in
|
|
|
|
let rec aux i acc =
|
|
if i >= len then
|
|
let pos = { line = !line; col = !col; offset = i } in
|
|
List.rev ({ token = EOF; start_pos = pos; end_pos = pos } :: acc)
|
|
else
|
|
let start_pos = { line = !line; col = !col; offset = i } in
|
|
match str.[i] with
|
|
| ' ' | '\t' ->
|
|
col := !col + 1;
|
|
aux (i + 1) acc
|
|
| '\n' ->
|
|
line := !line + 1;
|
|
col := 1;
|
|
aux (i + 1) acc
|
|
| '+' ->
|
|
col := !col + 1;
|
|
let end_pos = { line = !line; col = !col; offset = i + 1 } in
|
|
aux (i + 1) ({ token = Plus; start_pos; end_pos } :: acc)
|
|
| '-' ->
|
|
col := !col + 1;
|
|
let end_pos = { line = !line; col = !col; offset = i + 1 } in
|
|
aux (i + 1) ({ token = Minus; start_pos; end_pos } :: acc)
|
|
| '*' ->
|
|
col := !col + 1;
|
|
let end_pos = { line = !line; col = !col; offset = i + 1 } in
|
|
aux (i + 1) ({ token = Star; start_pos; end_pos } :: acc)
|
|
| '/' ->
|
|
col := !col + 1;
|
|
let end_pos = { line = !line; col = !col; offset = i + 1 } in
|
|
aux (i + 1) ({ token = Slash; start_pos; end_pos } :: acc)
|
|
| '^' ->
|
|
col := !col + 1;
|
|
let end_pos = { line = !line; col = !col; offset = i + 1 } in
|
|
aux (i + 1) ({ token = Caret; start_pos; end_pos } :: acc)
|
|
| '(' ->
|
|
col := !col + 1;
|
|
let end_pos = { line = !line; col = !col; offset = i + 1 } in
|
|
aux (i + 1) ({ token = LParen; start_pos; end_pos } :: acc)
|
|
| ')' ->
|
|
col := !col + 1;
|
|
let end_pos = { line = !line; col = !col; offset = i + 1 } in
|
|
aux (i + 1) ({ token = RParen; start_pos; end_pos } :: acc)
|
|
| ',' ->
|
|
col := !col + 1;
|
|
let end_pos = { line = !line; col = !col; offset = i + 1 } in
|
|
aux (i + 1) ({ token = Comma; start_pos; end_pos } :: acc)
|
|
| c when is_digit c || c = '.' ->
|
|
let j = ref (i + 1) in
|
|
while !j < len && (is_digit str.[!j] || str.[!j] = '.') do
|
|
incr j
|
|
done;
|
|
let num_str = String.sub str i (!j - i) in
|
|
col := !col + (!j - i);
|
|
let end_pos = { line = !line; col = !col; offset = !j } in
|
|
aux !j ({ token = Num (float_of_string num_str); start_pos; end_pos } :: acc)
|
|
| c when is_alpha c ->
|
|
let j = ref (i + 1) in
|
|
while !j < len && is_alphanum str.[!j] do
|
|
incr j
|
|
done;
|
|
let id = String.sub str i (!j - i) in
|
|
col := !col + (!j - i);
|
|
let end_pos = { line = !line; col = !col; offset = !j } in
|
|
let tok = if !j < len && str.[!j] = '(' then Ident id else Var id in
|
|
aux !j ({ token = tok; start_pos; end_pos } :: acc)
|
|
| c ->
|
|
failwith (Printf.sprintf "unexpected character: %c at line %d, column %d"
|
|
c !line !col)
|
|
in
|
|
|
|
let tokens = aux 0 [] in
|
|
|
|
let rec insert_implicit_mult = function
|
|
| [] -> []
|
|
| [t] -> [t]
|
|
| t1 :: t2 :: rest ->
|
|
if needs_implicit_mult t1.token t2.token then
|
|
let mult_pos = t2.start_pos in
|
|
t1 :: { token = Star; start_pos = mult_pos; end_pos = mult_pos } ::
|
|
insert_implicit_mult (t2 :: rest)
|
|
else
|
|
t1 :: insert_implicit_mult (t2 :: rest)
|
|
in
|
|
|
|
insert_implicit_mult tokens
|