; ; Copyright 2009+ Evgeniy Polyakov ; All rights reserved. ; ; This program is free software; you can redistribute it and/or modify ; it under the terms of the GNU General Public License as published by ; the Free Software Foundation; either version 2 of the License, or ; (at your option) any later version. ; (defpackage :net.ioremap.regexp (:use :common-lisp) (:export :regexp-search)) (defconstant *control-esc* #\\) (defconstant *control-star* #\*) (defconstant *control-or* #\|) (defconstant *control-object-open* #\[) (defconstant *control-object-close* #\]) (defconstant *object-type-symbol* 0) (defconstant *object-type-object* 1) (defconstant *object-type-control-or* 2) (defconstant *object-type-control-star* 3) (defconstant *object-type-control-cat* 4) (defun object-type-from-symbol (sym &optional value-type) (cond (value-type value-type) ((not (characterp sym)) *object-type-object*) ((char= sym *control-star*) *object-type-control-star*) ((char= sym *control-or*) *object-type-control-or*) (t *object-type-symbol*))) (defparameter *global-object-storage* (make-array 5 :fill-pointer 0 :adjustable t :element-type 'list)) (defparameter *global-id-counter* 0) (defclass symbol-instance () ((value :initform (error "Must provide symbol value.") :initarg :value :accessor value) (id :initform nil :reader id) (firstpos :initform '() :accessor firstpos) (lastpos :initform '() :accessor lastpos) (nullable :initform nil :accessor nullable) (value-type :initform (error "Must provide symbol value-type.") :initarg :type :accessor value-type))) (defmethod initialize-instance :after ((obj symbol-instance) &key) (with-slots (value-type id value) obj (when (= value-type *object-type-symbol*) (setf id (incf *global-id-counter*)) (vector-push-extend (list value '()) *global-object-storage*)))) (defun new-symbol (sym &optional value-type) (let ((obj (make-instance 'symbol-instance :value sym :type (object-type-from-symbol sym value-type)))) ;(format t "make: ~A, value: ~A, type: ~A~%" obj (object-value obj) (object-type obj)) obj)) (defun control-object-p (obj) (with-slots (value-type) obj (and (/= value-type *object-type-symbol*) (/= value-type *object-type-object*)))) (defun complex-object-p (obj) (= (slot-value obj 'value-type) *object-type-object*)) (defun or-object-p (obj) (= (slot-value obj 'value-type) *object-type-control-or*)) (defun cat-object-p (obj) (= (slot-value obj 'value-type) *object-type-control-cat*)) (defun star-object-p (obj) (= (slot-value obj 'value-type) *object-type-control-star*)) (defun object-value (obj) (slot-value obj 'value)) (defun object-type (obj) (slot-value obj 'value-type)) (defun object-id (obj) (slot-value obj 'id)) (defun read-object (str len l total esc) (let ((sym (elt str 0)) (skip 1)) (cond (esc (push (new-symbol sym) l) (setf esc nil)) ((char= sym *control-object-open*) (let* ((ret (read-object (subseq str skip) (- len skip) '() 0 nil)) (obj (first ret)) (obj_skip (second ret))) (setf skip (+ skip obj_skip)) (push (new-symbol obj) l))) ((char= sym *control-object-close*)) ((char= sym *control-esc*) (setf esc 1)) (t (push (new-symbol sym) l))) (setf total (+ total skip)) (if (or (<= len skip) (char= sym *control-object-close*)) (list l total) (read-object (subseq str skip) (- len skip) l total esc)))) (defun parse-list (l) (if (null l) (list nil nil) (let* ((obj (pop l)) (lret (parse-list l)) (ret (first lret)) (is-or (second lret)) (l '())) (if (complex-object-p obj) (setf obj (first (parse-list (object-value obj))))) (if (null ret) (list obj is-or) (cond (is-or (push obj ret) (setf is-or nil) (list (new-symbol (reverse ret) *object-type-control-or*) is-or)) ((and (or-object-p obj) (not (listp (object-value obj)))) (setf is-or 1) (push ret l) (list l is-or)) ((and (star-object-p obj) (not (listp (object-value obj)))) (push ret l) (list (new-symbol l (object-type obj)) is-or)) (t (push obj l) (push ret l) (list (new-symbol l *object-type-control-cat*) is-or))))))) (defun drop-last-symbol () (setf *global-object-storage* (subseq *global-object-storage* 0 (1- (length *global-object-storage*))))) (defun object-nullable (node) (let ((value (object-value node)) (value-type (object-type node)) (id (object-id node)) (n nil)) (cond ((null value) (setf n t)) (id (setf n nil)) ((= value-type *object-type-control-star*) (setf n t)) (t (let ((c1 (object-nullable (first value))) (c2 (object-nullable (second value)))) (cond ((= value-type *object-type-control-or*) (setf n (or c1 c2))) ((= value-type *object-type-control-cat*) (setf n (and c1 c2))))))) (setf (slot-value node 'nullable) n))) (defmacro copy-object-list (src dst) `(setf ,dst (concatenate 'list ,src ,dst))) (defun object-setpos (node lpos) (let ((value (object-value node)) (value-type (object-type node)) (id (object-id node)) (f (list))) (cond ((null value) (setf f '())) (id (push id f)) ((= value-type *object-type-control-star*) (setf f (object-setpos (first value) lpos))) (t (let* ((o1 (first value)) (o2 (second value))) (cond ((= value-type *object-type-control-or*) (copy-object-list (object-setpos o2 lpos) f) (copy-object-list (object-setpos o1 lpos) f)) ((= value-type *object-type-control-cat*) (when lpos (setf o1 (second value)) (setf o2 (first value))) (if (slot-value o1 'nullable) (progn (copy-object-list (object-setpos o2 lpos) f) (copy-object-list (object-setpos o1 lpos) f)) (copy-object-list (object-setpos o1 lpos) f))))))) (if lpos (setf (slot-value node 'lastpos) f) (setf (slot-value node 'firstpos) f)))) (defun object-firstpos (node) (object-setpos node nil)) (defun object-lastpos (node) (object-setpos node t)) (defun followpos-merge (c1 c2) (let ((fp (slot-value c2 'firstpos))) (dolist (lp (slot-value c1 'lastpos)) (let* ((obj (elt *global-object-storage* (1- lp))) (value (first obj)) (fpos (second obj))) (setf (elt *global-object-storage* (1- lp)) (list value (merge 'list fp fpos #'<))))))) (defun object-followpos (node) (cond ((cat-object-p node) (followpos-merge (first (object-value node)) (second (object-value node)))) ((star-object-p node) (followpos-merge (first (object-value node)) (first (object-value node)))))) (defun dump-object (obj recursion) (dotimes (i recursion) (format t " ")) (if (listp (object-value obj)) (progn (incf recursion) (format t "type: ~a~%" (object-type obj)) (dolist (o (object-value obj)) (dump-object o recursion))) (progn (format t "value: ~a, type: ~a, id: ~A~%" (object-value obj) (object-type obj) (object-id obj))))) (defun dump-object-tree (l) (let ((recursion 0)) (dolist (obj l) (dump-object obj recursion)))) (defun object-set-dfa-parameters (obj) (object-nullable obj) (object-firstpos obj) (object-lastpos obj)) (defun with-object (obj func) (let ((inner-object-name (object-value obj))) (funcall func obj) (if (listp inner-object-name) (dolist (obj inner-object-name) (with-object obj func))))) (defun get-followpos-for-symbol (l sym arr) (let ((ret (list))) (dolist (e l) (when (< (1- e) (length arr)) (let ((a (elt arr (1- e)))) (when (char= (first a) sym) (copy-object-list (second a) ret))))) (sort ret #'<))) (defun list= (a b elements-are-equal) (let ((ret t)) (cond ((/= (length a) (length b)) (setf ret nil)) (t (mapcar #'(lambda (x y) (unless (funcall elements-are-equal x y) (setf ret nil)) ret) a b))) ret)) (defun num-list= (a b) (list= a b #'=)) (defun mapped-list-index (st mlst) (position st mlst :test #'num-list=)) (defun simplify-transform-list (trans) (let ((map-list (list))) (map 'list #'(lambda (tr) (let* ((ist (first tr)) (sym (second tr)) (fst (third tr)) (ist_index (mapped-list-index ist map-list)) (fst_index (mapped-list-index fst map-list))) (when ist (unless ist_index (setf map-list (append map-list (list ist))) (setf ist_index (mapped-list-index ist map-list)) (when fst (setf fst_index (mapped-list-index fst map-list))))) (when fst (unless fst_index (setf map-list (append map-list (list fst))) (setf fst_index (mapped-list-index fst map-list)))) ;(when fst_index ; (format t "~A: ~A:~A -> ~A:~A~%" sym ist fst ist_index fst_index)) (list ist_index sym fst_index))) trans))) (defun generate-symbol-list (start stop) (let ((ret (list))) (do ((i start (incf i))) ((> i stop)) (push (int-char i) ret)) (reverse ret))) (defun generate-english-symbol-list () (generate-symbol-list (char-int #\ ) (char-int #\z))) (defun generate-ascii-symbol-list () (generate-symbol-list 0 255)) ;(defparameter *symbol-list* (list #\a #\b #\c)) (defparameter *symbol-list* (generate-english-symbol-list)) ;(defparameter *symbol-list* (generate-ascii-symbol-list)) (defun symbol-int (s) (- (char-int s) (char-int (first *symbol-list*)))) (defun check-state-presence (a b) (and (num-list= (first a) (first b)) (num-list= (third a) (third b)) (char= (second a) (second b)))) (defun generate-state-transformation (str) (let* ((ret (read-object str (length str) '() 0 nil)) (obj (first (parse-list (first ret))))) ;(dump-object obj 0) (with-object obj 'object-set-dfa-parameters) (with-object obj 'object-followpos) (let ((states (list (slot-value obj 'firstpos))) (done-states (list)) (trans (list)) (map-list (list))) (drop-last-symbol) (do ((state)) ((null states)) (let ((state (pop states))) (push state done-states) (dolist (sym *symbol-list*) (let* ((pos (get-followpos-for-symbol state sym *global-object-storage*))) (push (list state sym pos) trans) (when (and pos (= 0 (count pos done-states :test #'num-list=))) (push pos states)))))) ;(format t "trans: ~A~%" trans) ;(let* ((trans (simplify-transform-list (reverse (delete-duplicates trans :test #'check-state-presence)))) (let* ((trans (simplify-transform-list (reverse trans))) (trans-len (length trans)) (N (length *symbol-list*)) (S (/ trans-len N)) (dtrans (make-array (* S N) :initial-element 0))) ;(format t "trans:~A, S: ~A, trans-len: ~A~%" trans S trans-len) (dolist (tr trans) (let* ((ist (first tr)) (sym (second tr)) (fst (third tr)) (idx (+ (* ist N) (symbol-int sym)))) ;(when fst ; (format t "~A trans: ~A:~A(~A):~A~%" idx ist sym (symbol-int sym) fst)) (setf (elt dtrans idx) fst))) ;(format t "~A~%" dtrans) dtrans)))) (defun symbol-index (sym) (let ((idx nil) (char-idx (char-int sym))) (when (and (>= char-idx (char-int (first *symbol-list*))) (<= char-idx (char-int (first (last *symbol-list*))))) (setf idx (- char-idx (char-int (first *symbol-list*))))) idx)) (defun regexp-search (regexp str &optional (dtrans nil)) (unless dtrans (setf dtrans (generate-state-transformation (concatenate 'string regexp "#")))) (let* ((ret (list)) (state 0) (N (length *symbol-list*)) (S (1- (/ (length dtrans) N))) (start nil) (end nil)) (dotimes (i (length str)) (let* ((sym (elt str i)) (idx (symbol-index sym))) ;(format t "sym: ~A, idx: ~A, state: ~A/~A, didx: ~A~%" sym idx state S (elt dtrans (+ (* state N) idx))) (if (and idx state) (progn (setf idx (+ (* state N) idx)) (setf state (elt dtrans idx)) (when state (unless start (setf start i)) (when (= state S) (setf end (1+ i)))) (unless state (setf state 0))) (progn (setf state 0))) (when (or (= state 0) (and (= state S) (= i (1- (length str))))) (when end (push (list start end) ret)) (setf start nil) (setf end nil)))) (reverse ret)))