Filtering...

parse-xml

books/xdoc/parse-xml
other
(in-package "XDOC")
other
(include-book "autolink")
other
(set-state-ok t)
other
(program)
opentok-pfunction
(defun opentok-p (x) (eq (first x) :open))
opentok-namefunction
(defun opentok-name (x) (second x))
opentok-attsfunction
(defun opentok-atts (x) (third x))
opentok-locfunction
(defun opentok-loc (x) (fourth x))
closetok-pfunction
(defun closetok-p
  (x)
  (eq (first x) :close))
closetok-namefunction
(defun closetok-name (x) (second x))
entitytok-pfunction
(defun entitytok-p
  (x)
  (eq (first x) :entity))
entitytok-typefunction
(defun entitytok-type (x) (second x))
texttok-pfunction
(defun texttok-p (x) (eq (first x) :text))
mcflattenfunction
(defun mcflatten
  (x acc)
  (if (atom x)
    (cons x acc)
    (mcflatten (car x)
      (mcflatten (cdr x) acc))))
texttok-texttreefunction
(defun texttok-texttree (x) (second x))
texttok-textfunction
(defun texttok-text
  (x)
  (let ((tree (texttok-texttree x)))
    (if (atom tree)
      tree
      (fast-string-append-lst (mcflatten (texttok-texttree x) nil)))))
other
(defconst *nls*
  (coerce (list #\
) 'string))
read-untilfunction
(defun read-until
  (x n xl stop-chars acc)
  "Returns (MV FOUNDP N ACC)"
  (b* (((when (>= n xl)) (mv nil n acc)) (char-n (char x n))
      ((when (member char-n stop-chars)) (mv t n acc)))
    (read-until x
      (+ 1 n)
      xl
      stop-chars
      (cons char-n acc))))
read-name-auxfunction
(defun read-name-aux
  (x n xl acc)
  "Returns (MV N ACC)"
  (b* (((when (>= n xl)) (mv n acc)) (char-n (char x n))
      ((when (or (and (char<= #\a char-n) (char<= char-n #\z))
           (and (char<= #\Z char-n) (char<= char-n #\Z))
           (and (char<= #\0 char-n) (char<= char-n #\9))
           (eql char-n #\_)
           (eql char-n #\-))) (read-name-aux x
          (+ 1 n)
          xl
          (cons char-n acc))))
    (mv n acc)))
read-namefunction
(defun read-name
  (x n xl)
  "Returns (MV ERR N NAME-STR)"
  (b* (((mv n rchars) (read-name-aux x n xl nil)) ((unless (consp rchars)) (mv (cat "Expected a name, but found unexpected character "
            (coerce (list (char x n)) 'string)
            *nls*
            "Nearby text: {"
            (error-context x n xl)
            "}"
            *nls*)
          n
          nil))
      (str (rchars-to-string rchars)))
    (mv nil n str)))
read-attrvalfunction
(defun read-attrval
  (x n xl)
  "Returns (MV ERR N VAL-STR)"
  (b* ((saved-n n) (quote-char (char x n))
      ((unless (or (eql quote-char #\") (eql quote-char #\'))) (mv (cat "Expected value to start with a quote, but found "
            (coerce (list (char x n)) 'string)
            *nls*
            "Nearby text: {"
            (error-context x n xl)
            "}"
            *nls*)
          n
          nil))
      (n (+ 1 n))
      ((mv found-endp n chars) (read-until x
          n
          xl
          (list quote-char)
          nil))
      ((unless found-endp) (mv (cat "Attribute value never ends."
            *nls*
            "Nearby text: {"
            (error-context x saved-n xl)
            "}"
            *nls*)
          n
          nil))
      (n (+ n 1))
      (val-str (rchars-to-string chars)))
    (mv nil n val-str)))
read-tag-attributesfunction
(defun read-tag-attributes
  (x n xl tag-start-n atts)
  "Returns (MV ERR N ATTS CLOSEP)"
  (b* ((n (skip-past-ws x n xl)) ((when (>= n xl)) (mv (cat "Tag never closes."
            *nls*
            "Nearby text: {"
            (error-context x tag-start-n xl)
            "}"
            *nls*)
          n
          nil
          nil))
      ((when (eql (char x n) #\>)) (mv nil (+ 1 n) atts nil))
      ((when (eql (char x n) #\/)) (b* ((n (+ 1 n)) ((unless (and (< n xl) (eql (char x n) #\>))) (mv (cat "Stray / in tag."
                  *nls*
                  "Nearby text: {"
                  (error-context x tag-start-n xl)
                  "}"
                  *nls*)
                n
                nil
                nil)))
          (mv nil (+ 1 n) atts t)))
      ((mv err n key1) (read-name x n xl))
      ((when err) (mv err n nil nil))
      (n (skip-past-ws x n xl))
      ((when (>= n xl)) (mv (cat "Tag never closes."
            *nls*
            "Nearby text: {"
            (error-context x tag-start-n xl)
            "}"
            *nls*)
          n
          nil
          nil))
      ((unless (eql (char x n) #\=)) (read-tag-attributes x
          n
          xl
          tag-start-n
          (acons key1 nil atts)))
      (n (+ n 1))
      (n (skip-past-ws x n xl))
      ((mv err n val1) (read-attrval x n xl))
      ((when err) (mv err n nil nil)))
    (read-tag-attributes x
      n
      xl
      tag-start-n
      (acons key1 val1 atts))))
read-close-tagfunction
(defun read-close-tag
  (x n xl)
  "Returns (MV ERR N TOKEN-LIST)"
  (b* ((saved-n n) (n (skip-past-ws x n xl))
      ((mv err n name) (read-name x n xl))
      ((when err) (mv err n nil))
      (n (skip-past-ws x n xl))
      ((unless (and (< n xl) (eql (char x n) #\>))) (mv (cat "Invalid closing tag."
            *nls*
            "Nearby text: {"
            (error-context x saved-n xl)
            "}"
            *nls*)
          n
          nil))
      (n (+ 1 n))
      (close (list :close name)))
    (mv nil n (list close))))
skip-declarationfunction
(defun skip-declaration
  (x n xl start-n)
  "Returns (MV ERR N NIL)"
  (b* (((when (>= (+ 1 n) xl)) (mv (cat "<? ... ?> declaration never closes."
           *nls*
           "Nearby text: {"
           (error-context x start-n xl)
           "}"
           *nls*)
         n
         nil)) ((unless (and (eql (char x n) #\?)
           (eql (char x (+ n 1)) #\>))) (skip-declaration x
          (+ n 1)
          xl
          start-n)))
    (mv nil (+ n 2) nil)))
skip-entity-stufffunction
(defun skip-entity-stuff
  (x n xl start-n)
  "Returns (MV ERR N NIL)"
  (b* (((when (>= (+ 1 n) xl)) (mv (cat "<! ... ]> declaration never closes."
           *nls*
           "Nearby text: {"
           (error-context x start-n xl)
           "}"
           *nls*)
         n
         nil)) ((unless (and (eql (char x n) #\])
           (eql (char x (+ n 1)) #\>))) (skip-entity-stuff x
          (+ n 1)
          xl
          start-n)))
    (mv nil (+ n 2) nil)))
read-tagfunction
(defun read-tag
  (x n xl)
  "Returns (MV ERR N TOKEN-LIST)"
  (b* ((tag-start-n n) (n (+ 1 n))
      (n (skip-past-ws x n xl))
      ((when (>= n xl)) (mv (cat "Tag never closes."
            *nls*
            "Nearby text: {"
            (error-context x tag-start-n xl)
            "}"
            *nls*)
          n
          nil))
      ((when (eql (char x n) #\?)) (skip-declaration x
          n
          xl
          tag-start-n))
      ((when (eql (char x n) #\!)) (skip-entity-stuff x
          n
          xl
          tag-start-n))
      ((when (eql (char x n) #\/)) (read-close-tag x (+ 1 n) xl))
      ((mv err n name) (read-name x n xl))
      ((when err) (mv err n nil))
      ((mv err n atts closep) (read-tag-attributes x
          n
          xl
          tag-start-n
          nil))
      ((when err) (mv err n nil))
      (open (list :open name
          (reverse atts)
          tag-start-n))
      ((when (not closep)) (mv nil n (list open)))
      (close (list :close name)))
    (mv nil n (list open close))))
keyword-from-entity-stringfunction
(defun keyword-from-entity-string
  (s)
  (let ((name (if (and (not (equal s "")) (char<= (char s 0) #\Z))
         s
         (string-upcase s))))
    (intern name "KEYWORD")))
entity-strings-to-tokens-falfunction
(defun entity-strings-to-tokens-fal
  (entity-info fal)
  (cond ((endp entity-info) fal)
    (t (entity-strings-to-tokens-fal (cdr entity-info)
        (let* ((entry (car entity-info)) (string (car entry)))
          (hons-acons string
            (list :entity (keyword-from-entity-string string))
            fal))))))
entity-keywords-to-strings-falfunction
(defun entity-keywords-to-strings-fal
  (entity-info fal)
  (cond ((endp entity-info) fal)
    (t (entity-keywords-to-strings-fal (cdr entity-info)
        (let* ((entry (car entity-info)) (string (car entry)))
          (hons-acons (keyword-from-entity-string string)
            (concatenate 'string "&" string ";")
            fal))))))
other
(defconst *entity-info*
  '(("amp" "&" :built-in) ("lt" "<" :built-in)
    ("gt" ">" :built-in)
    ("quot" """ :built-in)
    ("apos" "'" :built-in)
    ("nbsp" " " 160)
    ("ndash" "--" 8211)
    ("mdash" "---" 8212)
    ("larr" "<--" 8592)
    ("rarr" "-->" 8594)
    ("harr" "<->" 8596)
    ("lang" "<" 9001)
    ("rang" ">" 9002)
    ("hellip" "..." 8230)
    ("lsquo" "`" 8216)
    ("rsquo" "'" 8217)
    ("ldquo" "``" 8220)
    ("rdquo" "''" 8221)
    ("and" "&" 8743)
    ("or" "\/" 8744)
    ("not" "~" 172)
    ("ne" "!=" 8800)
    ("le" "<=" 8804)
    ("ge" ">=" 8805)
    ("mid" "|" 8739)
    ("times" "\times" 215)
    ("Alpha" "\Alpha" 913)
    ("Beta" "\Beta" 914)
    ("Gamma" "\Gamma" 915)
    ("Delta" "\Delta" 916)
    ("Epsilon" "\Epsilon" 917)
    ("Zeta" "\Zeta" 918)
    ("Eta" "\Eta" 919)
    ("Theta" "\Theta" 920)
    ("Iota" "\Iota" 921)
    ("Kappa" "\Kappa" 922)
    ("Lambda" "\Lambda" 923)
    ("Mu" "\Mu" 924)
    ("Nu" "\Nu" 925)
    ("Xi" "\Xi" 926)
    ("Omicron" "\Omicron" 927)
    ("Pi" "\Pi" 928)
    ("Rho" "\Rho" 929)
    ("Sigma" "\Sigma" 931)
    ("Tau" "\Tau" 932)
    ("Upsilon" "\Upsilon" 933)
    ("Phi" "\Phi" 934)
    ("Chi" "\Chi" 935)
    ("Psi" "\Psi" 936)
    ("Omega" "\Omega" 937)
    ("alpha" "\alpha" 945)
    ("beta" "\beta" 946)
    ("gamma" "\gamma" 947)
    ("delta" "\delta" 948)
    ("epsilon" "\epsilon" 949)
    ("zeta" "\zeta" 950)
    ("eta" "\eta" 951)
    ("theta" "\theta" 952)
    ("iota" "\iota" 953)
    ("kappa" "\kappa" 954)
    ("lambda" "\lambda" 955)
    ("mu" "\mu" 956)
    ("nu" "\nu" 957)
    ("xi" "\xi" 958)
    ("omicron" "\omicron" 959)
    ("pi" "\pi" 960)
    ("rho" "\rho" 961)
    ("sigma" "\sigma" 963)
    ("tau" "\tau" 964)
    ("upsilon" "\upsilon" 965)
    ("phi" "\phi" 966)
    ("chi" "\chi" 967)
    ("psi" "\psi" 968)
    ("omega" "\omega" 969)
    ("Ccedil" "\Ccedil" 199)
    ("Aacute" "\Aacute" 193)
    ("Agrave" "\Agrave" 192)
    ("Acirc" "\Acirc" 194)
    ("Atilde" "\Atilde" 195)
    ("Auml" "\Auml" 196)
    ("Aring" "\Aring" 197)
    ("Egrave" "\Egrave" 200)
    ("Eacute" "\Eacute" 201)
    ("Ecirc" "\Ecirc" 202)
    ("Euml" "\Euml" 203)
    ("Igrave" "\Igrave" 204)
    ("Iacute" "\Iacute" 205)
    ("Icirc" "\Icirc" 206)
    ("Iuml" "\Iuml" 207)
    ("Ntilde" "\Ntilde" 209)
    ("Ograve" "\Ograve" 210)
    ("Oacute" "\Oacute" 211)
    ("Ocirc" "\Ocirc" 212)
    ("Otilde" "\Otilde" 213)
    ("Ouml" "\Ouml" 214)
    ("Ugrave" "\Ugrave" 217)
    ("Uacute" "\Uacute" 218)
    ("Ucirc" "\Ucirc" 219)
    ("Uuml" "\Uuml" 220)
    ("Aogon" "\Aogon" 260)
    ("Eogon" "\Eogon" 280)
    ("Iogon" "\Iogon" 302)
    ("Uogon" "\Uogon" 370)
    ("Nacute" "\Nacute" 323)
    ("ccedil" "\ccedil" 231)
    ("aacute" "\aacute" 225)
    ("agrave" "\agrave" 224)
    ("acirc" "\acirc" 226)
    ("atilde" "\atilde" 227)
    ("auml" "\auml" 228)
    ("aring" "\aring" 229)
    ("egrave" "\egrave" 232)
    ("eacute" "\eacute" 233)
    ("ecirc" "\ecirc" 234)
    ("euml" "\euml" 235)
    ("igrave" "\igrave" 236)
    ("iacute" "\iacute" 237)
    ("icirc" "\icirc" 238)
    ("iuml" "\iuml" 239)
    ("ntilde" "\ntilde" 241)
    ("ograve" "\ograve" 242)
    ("oacute" "\oacute" 243)
    ("ocirc" "\ocirc" 244)
    ("otilde" "\otilde" 245)
    ("ouml" "\ouml" 246)
    ("ugrave" "\ugrave" 249)
    ("uacute" "\uacute" 250)
    ("ucirc" "\ucirc" 251)
    ("uuml" "\uuml" 252)
    ("aogon" "\aogon" 261)
    ("eogon" "\eogon" 281)
    ("iogon" "\iogon" 303)
    ("uogon" "\uogon" 371)
    ("nacute" "\nacute" 324)
    ("forall" "\forall" 8704)
    ("exist" "\exist" 8707)
    ("empty" "\empty" 8709)
    ("isin" "\isin" 8712)
    ("notin" "\notin" 8713)
    ("prod" "\prod" 8719)
    ("sum" "\sum" 8721)))
other
(defconst *entity-strings-to-tokens-fal*
  (entity-strings-to-tokens-fal *entity-info* nil))
other
(defconst *entity-keywords-to-strings-fal*
  (let* ((fal (entity-keywords-to-strings-fal *entity-info*
         nil)) (dup (duplicate-keysp-eq fal)))
    (if dup
      (er hard
        'top
        "Implementation error: Found duplicate keyword, ~x0!  See ~
             books/xdoc/parse-xml.lisp ."
        dup)
      fal)))
read-entityfunction
(defun read-entity
  (x n xl)
  "Returns (MV ERR N TOK)"
  (b* ((saved-n n) (n (+ 1 n))
      ((mv foundp n rchars) (read-until x n xl (list #\;) nil))
      ((unless foundp) (mv (cat "Entity does not have a closing semicolon."
            *nls*
            "Nearby text: {"
            (error-context x saved-n xl)
            "}"
            *nls*)
          n
          nil))
      (n (+ 1 n))
      (str (rchars-to-string rchars))
      (doublet (cdr (hons-get str
            *entity-strings-to-tokens-fal*)))
      ((when doublet) (mv nil n doublet)))
    (mv (cat "Unsupported entity: &"
        str
        ";"
        *nls*
        "Nearby text: {"
        (error-context x saved-n xl)
        "}"
        *nls*)
      n
      nil)))
parse-xml-auxfunction
(defun parse-xml-aux
  (x n xl acc)
  "Returns (MV ERR TOKENS)"
  (b* (((when (>= n xl)) (mv nil acc)) (char-n (char x n))
      ((when (eql char-n #\<)) (b* (((mv err n token-list) (read-tag x n xl)) ((when err) (mv err nil))
            (acc (revappend token-list acc)))
          (parse-xml-aux x n xl acc)))
      ((when (eql char-n #\&)) (b* (((mv err n token) (read-entity x n xl)) ((when err) (mv err nil)))
          (parse-xml-aux x
            n
            xl
            (cons token acc))))
      ((when (eql char-n #\>)) (mv (cat "Stray end-of-tag character '>'"
            *nls*
            "Nearby text: {"
            (error-context x n xl)
            "}"
            *nls*)
          nil))
      ((mv ?foundp n rchars) (read-until x
          n
          xl
          '(#\< #\& #\>)
          nil))
      (token (list :text (rchars-to-string rchars))))
    (parse-xml-aux x
      n
      xl
      (cons token acc))))
entitytok-as-entityfunction
(defun entitytok-as-entity
  (x)
  (cdr (hons-get (entitytok-type x)
      *entity-keywords-to-strings-fal*)))
entitytok-as-plaintext-falfunction
(defun entitytok-as-plaintext-fal
  (entity-info fal)
  (cond ((endp entity-info) fal)
    (t (entitytok-as-plaintext-fal (cdr entity-info)
        (let* ((entry (car entity-info)) (string (car entry))
            (plaintext (second entry)))
          (hons-acons (keyword-from-entity-string string)
            plaintext
            fal))))))
other
(defconst *entitytok-as-plaintext-fal*
  (entitytok-as-plaintext-fal *entity-info* nil))
entitytok-as-plaintextfunction
(defun entitytok-as-plaintext
  (x)
  (cdr (hons-get (entitytok-type x)
      *entitytok-as-plaintext-fal*)))
flatten-token-for-errormsgfunction
(defun flatten-token-for-errormsg
  (x)
  (cond ((opentok-p x) (cat "<"
        (opentok-name x)
        (if (opentok-atts x)
          " [...]>"
          ">")))
    ((closetok-p x) (cat "</" (closetok-name x) ">"))
    ((entitytok-p x) (entitytok-as-entity x))
    ((texttok-p x) (texttok-text x))
    (t (er hard?
        'flatten-token-for-errormsg
        "Invalid token ~x0"
        x))))
flatten-tokens-for-errormsgfunction
(defun flatten-tokens-for-errormsg
  (x)
  (if (atom x)
    ""
    (cat (flatten-token-for-errormsg (car x))
      (flatten-tokens-for-errormsg (cdr x)))))
nearby-textfunction
(defun nearby-text
  (index str)
  (let* ((strlen (length str)) (start index)
      (stop (min strlen (+ index 60))))
    (cat (substitute #\ 
        #\

        (subseq str start stop))
      (if (equal stop strlen)
        ""
        "..."))))
open-tag-backtrace-entryfunction
(defun open-tag-backtrace-entry
  (orig-str open-tok)
  (declare (type string orig-str))
  (let* ((n (opentok-loc open-tok)) (name (opentok-name open-tok))
      (name-len (length name))
      (spaces-len (max 0 (- 6 name-len)))
      (pad (coerce (make-list spaces-len :initial-element #\ )
          'string))
      (nearby (nearby-text n orig-str)))
    (cat "   <"
      name
      "> "
      pad
      nearby
      *nls*)))
open-tags-backtracefunction
(defun open-tags-backtrace
  (orig-str open-tags)
  (if (atom open-tags)
    ""
    (cat (open-tag-backtrace-entry orig-str
        (car open-tags))
      (open-tags-backtrace orig-str
        (cdr open-tags)))))
find-tag-imbalancefunction
(defun find-tag-imbalance
  (x open-tags loc)
  "Returns (MV ERROR/NIL LOC/NIL STILL-OPEN-TAGS/NIL)"
  (b* (((when (atom x)) (if open-tags
         (mv (cat (opentok-name (car open-tags))
             " tag is never closed.")
           loc
           open-tags)
         (mv nil nil nil))) ((when (opentok-p (car x))) (find-tag-imbalance (cdr x)
          (cons (car x) open-tags)
          (+ 1 loc)))
      ((when (closetok-p (car x))) (b* ((close-name (closetok-name (car x))) ((unless (consp open-tags)) (mv (cat "Found </"
                  close-name
                  "> with no matching opening tag.")
                loc
                open-tags))
            (open-name (opentok-name (car open-tags)))
            ((unless (equal close-name open-name)) (mv (cat "Found <"
                  open-name
                  "> with mismatched </"
                  close-name
                  ">.")
                loc
                open-tags)))
          (find-tag-imbalance (cdr x)
            (cdr open-tags)
            (+ 1 loc)))))
    (find-tag-imbalance (cdr x)
      open-tags
      (+ 1 loc))))
parse-xmlfunction
(defun parse-xml
  (x)
  (declare (type string x))
  (b* (((mv err rtokens) (parse-xml-aux x 0 (length x) nil)) ((when err) (mv err nil))
      (tokens (reverse rtokens))
      ((mv err loc open-tags) (find-tag-imbalance tokens nil 0))
      ((when err) (b* (((when (not loc)) (mv err nil)) (back-one (max 0 (- loc 4)))
            (start-ctx (nthcdr back-one tokens))
            (context (take (min 8 (len start-ctx))
                start-ctx))
            (nearby (flatten-tokens-for-errormsg context)))
          (mv (cat err
              *nls*
              "Nearby text: {"
              nearby
              "}"
              *nls*
              *nls*
              (if open-tags
                (cat "Open tags stack:"
                  *nls*
                  (open-tags-backtrace x
                    (reverse open-tags)))
                ""))
            nil))))
    (mv err tokens)))