Filtering...

html-encode

books/std/strings/html-encode
other
(in-package "STR")
other
(include-book "cat")
other
(include-book "std/util/bstar" :dir :system)
other
(include-book "misc/definline" :dir :system)
other
(include-book "centaur/fty/fixequiv" :dir :system)
other
(include-book "centaur/fty/basetypes" :dir :system)
other
(local (include-book "std/basic/arith-equivs" :dir :system))
other
(local (include-book "arithmetic"))
other
(local (add-default-post-define-hook :fix))
other
(defsection html-encoding
  :parents (std/strings)
  :short "Routines to encode HTML entities, e.g., @('<') becomes @('&lt;')."
  :long "<p>In principle, our conversion routines may not be entirely
legitimate in the sense of some precise HTML specification, because we do not
account for non-printable characters or other similarly unlikely garbage in the
input.  But it seems like what we implement is pretty reasonable, and handles
most ordinary characters.</p>

<p>Note that we convert @('#\Newline') characters into the sequence
@('<br/>#\Newline').  This may not be the desired behavior in certain
applications, but seems basically reasonable for converting plain text into
HTML.</p>")
other
(local (set-default-parents html-encoding))
other
(order-subtopics html-encoding
  (html-encode-string html-encode-string-basic))
html-spacemacro
(defmacro html-space
  nil
  (list 'quote (coerce "&nbsp;" 'list)))
html-newlinemacro
(defmacro html-newline
  nil
  (list 'quote
    (append (coerce "<br/>" 'list) (list #\
))))
html-lessmacro
(defmacro html-less
  nil
  (list 'quote (coerce "&lt;" 'list)))
html-greatermacro
(defmacro html-greater
  nil
  (list 'quote (coerce "&gt;" 'list)))
html-ampmacro
(defmacro html-amp
  nil
  (list 'quote (coerce "&amp;" 'list)))
html-quotemacro
(defmacro html-quote
  nil
  (list 'quote (coerce "&quot;" 'list)))
other
(define html-encode-char-basic
  :short "HTML encode a single character (simple version, no tab support)."
  ((x characterp "Character to encode.") (acc "Accumulator for output characters, reverse order."))
  :returns (new-acc character-listp
    :hyp (character-listp acc))
  :split-types t
  :inline t
  (declare (type character x))
  (b* (((the character x) (mbe :logic (char-fix x) :exec x)))
    (case x
      (#\  (if (or (atom acc)
            (eql (car acc) #\ )
            (eql (car acc) #\
))
          (revappend (html-space) acc)
          (cons #\  acc)))
      (#\
 (revappend (html-newline) acc))
      (#\< (revappend (html-less) acc))
      (#\> (revappend (html-greater) acc))
      (#\& (revappend (html-amp) acc))
      (#\" (revappend (html-quote) acc))
      (otherwise (cons x acc)))))
other
(define html-encode-chars-basic-aux
  :short "Convert a character list into HTML (simple version, no column/tabsize support)."
  ((x character-listp "The characters to convert.") (acc "Accumulator for output characters, reverse order."))
  :returns (new-acc character-listp
    :hyp (character-listp acc))
  (b* (((when (atom x)) acc) (acc (html-encode-char-basic (car x) acc)))
    (html-encode-chars-basic-aux (cdr x) acc)))
other
(define html-encode-string-basic-aux
  :short "Convert a string into HTML (simple version, no tab support)."
  ((x stringp "String we're encoding.") (n natp
      "Current position in @('x').  Should typically start as 0.")
    (xl (eql xl (length x))
      "Precomputed length of @('x').")
    (acc "Accumulator for output characters, reverse order."))
  :guard (<= n xl)
  :enabled t
  :verify-guards nil
  :split-types t
  (declare (type unsigned-byte n xl))
  (mbe :logic (html-encode-chars-basic-aux (nthcdr n (explode x))
      acc)
    :exec (b* (((when (mbe :logic (zp (- (nfix xl) (nfix n)))
            :exec (eql n xl))) acc) (acc (html-encode-char-basic (char x n) acc))
        ((the unsigned-byte n) (mbe :logic (+ 1 (lnfix n))
            :exec (+ 1 n))))
      (html-encode-string-basic-aux x
        n
        xl
        acc)))
  ///
  (verify-guards html-encode-string-basic-aux
    :hints (("Goal" :in-theory (enable html-encode-chars-basic-aux)))))
other
(define html-encode-string-basic
  :short "Convert a string into HTML (simple version, no tab support)."
  ((x stringp))
  :returns (html-string stringp :rule-classes :type-prescription)
  (rchars-to-string (html-encode-string-basic-aux x
      0
      (length x)
      nil)))
other
(define repeated-revappend
  ((n natp) x y)
  :parents (html-encode-push)
  (if (zp n)
    y
    (repeated-revappend (- n 1)
      x
      (revappend-without-guard x y)))
  ///
  (defthm character-listp-of-repeated-revappend
    (implies (and (character-listp x)
        (character-listp y))
      (character-listp (repeated-revappend n x y)))))
other
(define distance-to-tab
  ((col natp) (tabsize posp))
  :parents (html-encode-push)
  :inline t
  :split-types t
  (declare (type unsigned-byte col tabsize))
  (mbe :logic (b* ((col (nfix col)) (tabsize (pos-fix tabsize)))
      (nfix (- tabsize (rem col tabsize))))
    :exec (- tabsize
      (the unsigned-byte (rem col tabsize))))
  :prepwork ((local (include-book "ihs/quotient-remainder-lemmas" :dir :system)) (local (in-theory (disable rem)))))
other
(define html-encode-next-col
  :parents (html-encode-push)
  :short "Compute where we'll be after printing a character, accounting for tab
          sizes and newlines."
  ((char1 characterp "Character to be printed.") (col natp
      "Current column number before printing char1.")
    (tabsize posp))
  :returns (new-col natp
    :rule-classes :type-prescription "New column number after printing char1.")
  :inline t
  (b* ((char1 (mbe :logic (char-fix char1)
         :exec char1)) (col (lnfix col)))
    (cond ((eql char1 #\
) 0)
      ((eql char1 #\	) (+ col (distance-to-tab col tabsize)))
      (t (+ 1 col)))))
other
(define html-encode-push
  :short "HTML encode a single character, with column/tabsize support."
  ((char1 characterp "Character to be printed.") (col natp
      "Current column number before printing char1 (for tab computations).")
    (tabsize posp)
    (acc "Reverse order characters we're building."))
  :returns (new-acc character-listp
    :hyp (character-listp acc))
  (b* (((the character char1) (mbe :logic (char-fix char1)
         :exec char1)))
    (case char1
      (#\  (if (or (atom acc)
            (eql (car acc) #\ )
            (eql (car acc) #\
))
          (revappend (html-space) acc)
          (cons #\  acc)))
      (#\
 (revappend (html-newline) acc))
      (#\< (revappend (html-less) acc))
      (#\> (revappend (html-greater) acc))
      (#\& (revappend (html-amp) acc))
      (#\" (revappend (html-quote) acc))
      (#\	 (repeated-revappend (distance-to-tab col tabsize)
          (html-space)
          acc))
      (otherwise (cons char1 acc)))))
other
(define html-encode-chars-aux
  :short "Convert a character list into HTML.  Outputs to an accumulator.
Tracks the column number to handle tab characters."
  ((x character-listp "The characters to convert.") (col natp "Current column number.")
    (tabsize posp "Width of tab characters.")
    (acc "Accumulator for output characters, reverse order."))
  :returns (mv (new-col natp
      :rule-classes :type-prescription "Updated column number after printing @('x').")
    (new-acc character-listp
      :hyp (character-listp acc)
      "Updated output."))
  :split-types t
  (declare (type unsigned-byte col tabsize))
  (b* (((when (atom x)) (mv (lnfix col) acc)) (acc (html-encode-push (car x)
          col
          tabsize
          acc))
      (col (html-encode-next-col (car x)
          col
          tabsize)))
    (html-encode-chars-aux (cdr x)
      col
      tabsize
      acc)))
other
(define html-encode-string-aux
  :short "Core of converting strings into HTML, output to an accumulator."
  ((x stringp "String we're encoding.") (n natp
      "Current position in @('x').  Should typically start as 0.")
    (xl (eql xl (length x)))
    (col natp "Current column we're printing to.")
    (tabsize posp)
    (acc))
  :guard (<= n xl)
  :returns (mv (new-col natp :rule-classes :type-prescription)
    (new-acc character-listp
      :hyp (character-listp acc)))
  :split-types t
  (declare (type string x)
    (type unsigned-byte n xl col tabsize))
  :long "<p>This is similar to @(see html-encode-chars-aux), but encodes part
of a the string @('x') instead of a character list.</p>"
  :enabled t
  :verify-guards nil
  (mbe :logic (html-encode-chars-aux (nthcdr n (explode x))
      col
      tabsize
      acc)
    :exec (b* (((when (mbe :logic (zp (- (length (str-fix x)) (nfix n)))
            :exec (eql n xl))) (mv (lnfix col) acc)) (char1 (char x n))
        (acc (html-encode-push char1
            col
            tabsize
            acc))
        (col (html-encode-next-col char1 col tabsize)))
      (html-encode-string-aux x
        (+ 1 (lnfix n))
        xl
        col
        tabsize
        acc)))
  ///
  (verify-guards html-encode-string-aux
    :hints (("Goal" :in-theory (enable html-encode-chars-aux)))))
other
(define html-encode-string
  :short "@(call html-encode-string) converts the string @('x') into HTML, and
returns the result as a new string.  Tracks the column number to handle tab
characters."
  ((x stringp) (tabsize posp))
  :returns (html-encoded stringp :rule-classes :type-prescription)
  (b* ((x (mbe :logic (str-fix x) :exec x)) ((mv ?col acc) (html-encode-string-aux x
          0
          (length x)
          0
          tabsize
          nil)))
    (rchars-to-string acc)))