;;;
;;;   A set of routines for defining YACC-like grammar rules
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;        Copyright (c) 1993  Alan W Black (awb@ed.ac.uk)
;;;
;;; This file is part of an implementation of ASTL a computational
;;; situation theoretic language.  This implementation of ASTL is free
;;; software; you can redistribute it and/or modify it under the terms
;;; of the GNU General Public License as published by the Free
;;; Software Foundation; either version 1, or (at your option) any
;;; later version.
;;;
;;; ASTL is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY.  No author or distributor accepts
;;; responsibility to anyone for the consequences of using it or for
;;; whether it serves any particular purpose or works at all, unless
;;; he says so in writing.  Refer to the GNU Genral Public License for
;;; full details.
;;;
;;; Everyone is granted permission to copy, modify and redistribute
;;; this file, but only under the conditions described in the
;;; GNU General Public License.   A copy of this license is
;;; supposed to have been given to you along with this program so you
;;; can know your rights and responsibilities.  It should be in a
;;; file named COPYING.  Among other things, the copyright notice
;;; and this notice must be preserved on all copies.
;;;
;;; The author gratefully acknowledges financial support for the
;;; research this implementation is based on from an SERC studnetship
;;; (number 89313458) and Esprit Basic Research Action Project 6852
;;; (DYANA-2).
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;   Alan W Black (awb@ed) -- October 1989
;;;
;;;   This allows you define "rules" which can be used to parse
;;;   things, in particular to parse lexical entries and define
;;;   transformations on them like lexical redundancy rules
;;;
;;;   The main definition for is a macro of the form
;;;
;;;   (defprule <name> 
;;;      ((<daughter1> <daughter2> ...)
;;;          <statement> <statement> ...)
;;;      ((<daughter1> ...)
;;;          <statement> <statement> ...))
;;;
;;;   The definitions are held in the global prule.rules as an assoc
;;;   list of atom names and lambda function.  I don't want to use
;;;   the normal oblist as you might define a rule called list which
;;;   would redefined the lisp #'list.
;;;
;;;   This defines a function <name> that takes *one* argument a
;;;   stream to read from (I wonder how initialisation happens ..)
;;;
;;;   Daughters can be names of other rules, or quoted symbols to
;;;   denote literals, there are some special symbols too
;;;      symbol  -- simply the current symbol, 
;;;      eof     -- end of file
;;;
;;;   The rule function can refer to the arguments (results of
;;;   parsing the children using $1 $2 ...)  The rule definition
;;;   should return something for the other rules that use it
;;;
;;;   Various other hacky things exist, if there is more than one
;;;   possible rewrite then the first daughter must distinguish the
;;;   the choice.  That is other than the last choice the first daughter
;;;   must be a symbol (not a daughter)
;;;

(defvar prule.debug nil)
(defvar prule.rules nil)
(defvar prule.ruleexpansions nil)
(defvar prule.currentsym nil)
(defvar prule.linebuf nil)
(defvar prule.lineindex 0)
(defvar prule.linecount 0)
(defvar gc.readtable nil)
(defvar gc.singlecharsymbols nil)

(defun pruleevaldaughters (daughters body)
;;;
;;; does the intersting bit of calling the daughters and setting the 
;;; values, then evaluating the extraposition part
;;;
  `(let* ,(mapcar
	  #'(lambda (value name)
	      (cond
	       ((and (atom value) (not (eql 'symbol value)))
		;; its a rule its self
		`( ,name (funcall (prulederef ',value) prule.fd)))
	       (t ;; its a lexical item
		`( ,name (pruleswallow prule.fd ,value)))))
	  (cdr daughters) (prulenewnames 2 (cdr daughters)))
    ,@body))

(defmacro defprule (name &rest rewrites)
;;;
;;;
;;; Records a function in the global list rules (prule.rules) The
;;; function first calls all the daughters in the daughters list then
;;; evals the body with $1 $2 etc set rewrites is a list of the form
;;;
;;;   ((daughter1 d2 d3 ...) <statement> <statement> ...)
;;;
  `(progn
     (push 
      (list (quote ,name)
	    #'(lambda (prule.fd)
		(let ($1)
;;	  (format t "~%Call: ~A cs ~A" ',name prule.currentsym)
		(cond
		 ,@(mapcar
		    #'(lambda (rewrite)
			(cond
                         ((null (car rewrite))
                            `(t ,@(cdr rewrite)))
			 ((atom (caar rewrite))
			  ;; a daughter node
			  `((catch 'prule
			      (setq $1 (funcall (prulederef ',(caar rewrite))
						prule.fd)) t)
			    ,(pruleevaldaughters (car rewrite)
						 (cdr rewrite))))
			 ((equal ''symbol (caar rewrite))
			  ;; an indentifier node -- ???
			  `((or t (prulechecksymbol $1))
			    (setq $1 (pruleswallow prule.fd ,(caar rewrite)))
			    ,(pruleevaldaughters (car rewrite) 
						 (cdr rewrite))))
			 (t ;; a literal first symbol
			  `((eql prule.currentsym 
				 (setq $1 ,(caar rewrite)))
			     (pruleswallow prule.fd $1)
			    ,(pruleevaldaughters (car rewrite) 
						 (cdr rewrite))))))
		    rewrites)
		 (t
		  (throw 'prule nil))))))
      prule.rules)
     (quote ,name)))

(defmacro defrrule (name &rest rewrites)
;;;
;;; Basically the same as a prule but and rrule can be used backwards
;;; That is its second argument to the rewrite is treated as an s-expression
;;; and the $n values are filled in.  This can word backwards too (he
;;; says confidently hoping you can't remember who told you that when the
;;; crunch comes).
;;;
  `(progn
     (push (list (quote ,name)
		 (list
		 ,@(mapcar
		    #'(lambda (rewrite)
			`(list
			 (quote ,(car rewrite))  ;; daughters
                         (quote ,(cadr rewrite)) ;; pattern
			 #'(lambda ($$)
			      ,(cond
				((null (cddr rewrite))
				 ;; no condition so give a default one
				 `t)
				(t
				 (caddr rewrite))))))
		    rewrites)))
	   prule.ruleexpansions)
     (push 
      (list (quote ,name)
	    #'(lambda (prule.fd)
		(let (($1 ',name))
;;		  (format t "~%Call: ~A cs ~A" ',name prule.currentsym)
		(cond
		 ,@(mapcar
		    #'(lambda (rewrite)
			(cond
                         ((null (car rewrite))
                            `(t ,@(cdr rewrite)))
			 ((atom (caar rewrite))
			  ;; a daughter node
			  `((catch 'prule
			      (setq $1 (funcall (prulederef ',(caar rewrite))
						prule.fd)) t)
			    ,(pruleevalrdaughters (car rewrite)
						  (cadr rewrite))))
			 ((equal ''symbol (caar rewrite))
			  ;; an indentifier node -- ???
			  `(,(cond
			      ((cddr rewrite)
			       `(let (($$ prule.currentsym))
				  ,(caddr rewrite)))
			       (t `t))
			    (setq $1 (pruleswallow prule.fd ,(caar rewrite)))
			    ,(pruleevalrdaughters (car rewrite) 
						 (cadr rewrite))))
			 (t ;; a literal first symbol
			  `((eql prule.currentsym 
				 (setq $1 ,(caar rewrite)))
			     (pruleswallow prule.fd $1)
			    ,(pruleevalrdaughters (car rewrite) 
						 (cadr rewrite))))))
		    rewrites)
		 (t
		  (throw 'prule nil))))))
      prule.rules)
     (quote ,name)))

(defun pruleevalrdaughters (daughters patternbody)
;;;
;;; This is does the interesting bit with rrules.  The second arguement
;;; is passed with the bindings of the daughters and they are merged
;;; 
  `(pruledollarreplace
    (list 
     (list '$1 $1)
     ,@(mapcar
	#'(lambda (value name)
	      (cond
	       ((atom value)             ; its a rule 
		`(list ',name 
                       (funcall (prulederef ',value) prule.fd)))
	       (t ;; its a lexical item
		 `(list ',name (pruleswallow prule.fd ,value)))))
	  (cdr daughters) (prulenewnames 2 (cdr daughters))))
    ',patternbody))

(defun prulechecksymbol (symbol)
;;;
;;;  Checks to see if this symbol could be a normal symbol
;;;  that is something that is not declared to be a single
;;;  character symbol (as those are probably punctuation etc)
;;;
  (if (and (= 1 (length (string symbol)))
	   (member (string symbol) gc.singlecharsymbols
		   :test #'string-equal))
      t))

(defun prulederef (name)
;;;
;;; find the function for the given name
;;;
   (let ((func (assoc name prule.rules)))
     (cond
      (func (cadr func))
      (t (error (format nil "No rule defined for: ~S" name))))))

(defun prulenewnames (number daughters)
;;;
;;;  returns a list of atoms of the form ($1 $2 $3 ...)
;;;  equal to the length of daughters
;;;
   (cond
    ((null daughters) nil)
    (t
     (cons (intern (format nil "$~S" number))
	   (prulenewnames (1+ number) (cdr daughters))))))

(defun pruleinit-eachtime ()
;;;
;;;  Initialise for each parse
;;;
   (setq prule.currentsym nil)
   (setq prule.linebuf nil)
   (setq prule.lineindex 0)
   (setq prule.linecount 0)
)

(defun pruleinit ()
;;;
;;;  Sets the global variables properly
;;;
   (setq prule.rules nil)
   (setq prule.ruleexpansions nil)
   (setq prule.currentsym nil)
   (setq prule.linebuf nil)
   (setq prule.lineindex 0)
   (setq prule.linecount 0)
   (prulertinit)
   )

(defun prulertinit ()
;;;
;;;  sets up a read table for reading the grammar file
;;;
   (setq gc.readtable (copy-readtable))
   (mapc
    #'(lambda (char)
       (set-macro-character char #'prulertident nil gc.readtable))
    gc.singlecharsymbols)
   t)

(defun prulertident (stream char)
;;;
;;;  simply returns char as symbol -- due to bugs in some
;;;  lisps,  I do this carefully (the let is necessary because
;;;  intern returns multiple values which in at least one lisp
;;;  get passed up).
;;;
   (let ((sym (intern (string char))))
     stream           ; cmucl does not work without mentioning stream
     sym))

(defun pruleswallow (fd thing)
;;;
;;;  If thing is 'symbol then returns current symbol, if it is
;;;  something else current symbol must match it other wise an error
;;;  is signalled.  If all is well the current symbol is moved to 
;;;  the next one
;;;
   (let ((cs prule.currentsym))
     (cond
      ((eql thing 'symbol)
       ;; its a symbol so don't check it with thing
       )
      ((eql thing cs)
       ;; its a required symbol
       )
      (t
       ;; its not the required symbol
       (pruleperror (format nil "Found ~S but expected ~S" cs thing))))
     (prulegetnextsym fd)
     cs))

(defun prulegetnextsym (fd)
;;;
;;;  get the next symbol
;;;
   (let ()
     (if (equal prule.currentsym 'eof)
	 prule.currentsym
	 (setq prule.currentsym (prulebaseread fd)))
     prule.currentsym))

(defun prulebaseread (fd)
;;;
;;;  Base read function.  Reads the file into a buffer line
;;;  by line so that a check can be kept on the current position
;;;
;;;
  (if (null prule.linebuf)
      (progn
	(setq prule.linecount (1+ prule.linecount))
	(setq prule.linebuf
	      (let ((*readtable* gc.readtable))
		(read-line fd nil '(eof))))
	(if (and prule.debug (not (eq 't fd)))
	    (progn
	      (terpri) (princ prule.linebuf)))
	(setq prule.lineindex 0)
	(if (equal prule.linebuf '(eof))
	    (setq prule.currentsym 'eof)
	    (prulebaseread fd)))
      (progn
	(let ((*readtable* gc.readtable))
	  (multiple-value-setq
	   (prule.currentsym prule.lineindex)
	   (read-from-string prule.linebuf
			     nil '(eof)
			     :start prule.lineindex)))
	(if (equal prule.currentsym '(eof))
	    (progn
	      (setq prule.linebuf nil)
	      (prulebaseread fd)))
	prule.currentsym)))
			       
(defun pruleparse (symbol filename)
;;;
;;;  parses the given file with respect to the given distinguished
;;;  symbol
;;;
  (let (result)
    (pruleinit-eachtime)
   ;;(pruleinit)            ;; initialise the globals
    (cond
     ((eq filename 't)      ;; read from standard input
      (prulegetnextsym 't)
      (setq result (funcall (prulederef symbol) 't)))
     (t                     ;; read from a file
      (with-open-file (fd filename :direction :input)
       (prulegetnextsym fd)
       (setq result (funcall (prulederef symbol) fd))
       (if (not (equal prule.currentsym 'eof))
	   (error (format nil "Found: ~S but end of file expected" 
			  prule.currentsym)))
       )))
   result))

(defun pruleunparse (cat)
;;;
;;;  This unparses a category back into its input form
;;;  This uses the lambda expressions in prule.unrules, as build by
;;;  an rrule
;;;  Each lambda function takes one argument, the thing it has to 
;;;  unparse
;;;  Returns a string at present 
;;;
  (let ((result (catch 'prule
		  (prulereverseparse 
		   (cadr (assoc 'cc prule.ruleexpansions)) cat))))
    (cond
     (result result)
     (t 
      (error (format nil "Couldn't unparse: ~S" cat))))))

(defun prulereverseparse (ruleexpansions struct)
;;;
;;;  Tries each rule expansion in turn for this rule name until
;;;  one matches struct
;;; 
  (let (bindings)
    (cond
     ((null ruleexpansions) 
      ;; failed -- but may ok further up
      (throw 'prule nil))
     ((and (setq bindings (prulematch (prulenewnames 1 (caar ruleexpansions))
				 struct (cadar ruleexpansions)
				 (pruleinitialbindings
				  (caar ruleexpansions))))
	   (funcall (caddr (car ruleexpansions)) 
		    (cdr (assoc '$1 bindings)))
	   ;; matched the extraposition part of this rule matched the current
	   ;; structure.  So now call the sub parts of this rule with the 
	   ;; appropriate parts of of the structure
	   ;; Note the calls are part of the conditions because
	   ;; if it fails it can continue checking other rules
	   ;; at this level
	   (catch 'prule
	     (format nil "~{ ~A~}"
	      (mapcar
	       #'(lambda (subrule value)
		   (cond
		    ((atom subrule)
		     (prulereverseparse
		      (cadr (assoc subrule prule.ruleexpansions))
		      (cdr (assoc value bindings))))
		    (t
		     ;; a literal
		     (cdr (assoc value bindings)))))
	       (caar ruleexpansions)
	       (prulenewnames 1 (caar ruleexpansions)))))))
     (t
      ;; doesn't match this one so continue searching
      (prulereverseparse (cdr ruleexpansions) struct)))))

(defun pruleinitialbindings (subnodes)
;;;
;;;  returns a set of initial binds.  Any quoted symbol in the
;;;  subnodes get included in teh initial bindings, as it might
;;;  not otherwise be used
;;;
   (let ((vars (prulenewnames 1 subnodes)))
     (append
      (mapcan
      #'(lambda (var node)
	  (cond
	   ((atom node)
	    ;; a sub rule node
            nil)
	   ((equal ''symbol node) nil)
	   (t  ;; a quoted node
	    (list (cons var (cadr node))))))
      vars subnodes)
      '((T . T)))))

(defun pruleperror (msg)
;;;
;;; Called when there is an error
;;;
   (terpri) (format t "At Line: ~a" prule.linecount) 
   (terpri) (format t "~a" prule.linebuf)
   (terpri) (format t "~a^" (make-string prule.lineindex :initial-element #\ ))
   (error msg))

(defun pruledollarreplace (bindings structure)
;;;
;;;  Takes an arbitrary s-expression (structure) and replaces
;;;  all dollar names to their vaules in bindings
;;;  
;;;  This could probably be done better using backquote
;;;
   (let ((newthing (copy-tree structure)))
     (mapc
      #'(lambda (binding)
	  (setq newthing (subst (cadr binding) (car binding)
				newthing)))
      bindings)
     newthing))

(defun prulematch (vars thing pattern bindings)
;;;
;;;  Matches two structures thing with pattern.  Pattern may contain
;;;  variables (those atoms in vars).  Returns a set of bindings
;;;  or nil if fails -- note bindings must always be non-nil
;;;
  (cond
   ((eql thing pattern) bindings)
   ((member pattern vars)
    (let ((actual (prulederefvar pattern bindings)))
     (cond
     ((member actual vars)
      ;; new binding
      (cons (cons actual thing) bindings))
     ((eql actual thing)
      bindings)
     (t ;; nope it just doesn't want to match
      nil))))
   ((or (atom thing) (atom pattern)) 
    ;; either one is an atom and they don't match
    nil)
   (t
    (let ((carbinds (prulematch vars (car thing) (car pattern) bindings)))
      (cond
       (carbinds 
	;; car matches so continue match on cdr
	(prulematch vars (cdr thing) (cdr pattern) carbinds))
       (t 
	;; cars failed to match to pass failure up
	nil))))))

(defun prulederefvar (var bindings)
;;;
;;;  dereferences this variable with respect to the bindings
;;;  Note there can never be changes of bindings in this stuff
;;;
  (let ((deref (assoc var bindings)))
   (cond
    ((null deref) var)
    (t (cdr deref)))))

