Syntax tree parser in LISP
I’ve finally made it. Got really a lot of experience in functional-like thinking (and partially programming) and recalled bits of LISP I knew.
Parser can build a tree with concatenation, multiplication and OR nodes. It supports braces and symbol escapement. Here are couple of examples:
$ ./stree.lisp "\[a\*" (cat * (cat a [)) $ ./stree.lisp "zxc*" (mult (cat c (cat x z))) $ ./stree.lisp "[a|b]*qwe" (cat e (cat w (cat q (mult (or b a)))))
It uses rather dumb parser which does not differentiate priority or the control symbols, so string like “123*” will have multiplication operator applied to all previous symbols (namely concatenation of 1, 2 and 3). There are no error checks either, which I will likely work to fix, especially implement graceful exiting for non-closed braces.
Next step is to build a deterministic finite automate out of this LISP objects.
Code in LISP under the link.
I believe I’m not that bad as I started to believe.
#!/usr/bin/clisp
(defconstant *control-esc* #\\)
(defconstant *control-mult* #\*)
(defconstant *control-or* #\|)
(defconstant *control-cat-id* "cat")
(defconstant *control-or-id* "or")
(defconstant *control-mult-id* "mult")
(defconstant *control-object-open* #\[)
(defconstant *control-object-close* #\])
(defconstant *control-string* (concatenate 'string (list *control-mult*
*control-or*
*control-object-open*
*control-object-close*)))
(defun control-object-p (obj)
(and (characterp obj) (or (char= obj *control-mult*) (char= obj *control-or*))))
(defun read-object (str len l total esc)
(let ((sym (elt str 0))
(skip 1))
(cond
(esc
(push (list sym) l)
(setf esc nil))
((char= sym *control-object-open*)
(let* ((ret (read-object (subseq str skip) (- len skip) '() 0 nil))
(obj (first ret))
(obj_skip (second ret)))
(setf skip (+ skip obj_skip))
(push obj l)))
((char= sym *control-object-close*))
((char= sym *control-esc*)
(setf esc 1))
(t
(push sym l)))
(setf total (+ total skip))
(if (or (<= len skip) (char= sym *control-object-close*))
(list l total)
(read-object (subseq str skip) (- len skip) l total esc))))
(defun wrap-control (obj)
(cond
((not (characterp obj))
*control-cat-id*)
((char= obj *control-mult*)
*control-mult-id*)
((char= obj *control-or*)
*control-or-id*)
(t
*control-cat-id*)))
(defun unwind-list (obj)
(if (and (listp obj) (= (length obj) 1))
(first obj)
obj))
(defun parse-list (l)
(if (null l)
(list nil nil)
(let* ((obj (pop l))
(lret (parse-list l))
(ret (first lret))
(is-or (second lret))
(l '()))
(if (listp obj)
(setf obj (first (parse-list obj))))
(if (= (length ret) 1)
(setf ret (first ret)))
(if (null ret)
(push (unwind-list obj) l)
(progn
(cond
(is-or
(push ret l)
(push (unwind-list obj) l)
(push *control-or-id* l)
(setf is-or nil))
((and (characterp obj) (char= obj *control-or*))
(setf is-or 1)
(push ret l))
(t
(push ret l)
(unless (control-object-p obj)
(push (unwind-list obj) l))
(push (wrap-control obj) l)))))
(list l is-or))))
(dolist (str *args*)
(let ((ret (read-object str (length str) '() 0 nil)))
(format t "~A~%" (first (parse-list (first ret))))))

Comments are currently closed.