Filtering...

charset

books/std/strings/charset
other
(in-package "STR")
other
(include-book "std/util/define" :dir :system)
other
(include-book "std/util/deflist" :dir :system)
other
(include-book "xdoc/names" :dir :system)
other
(include-book "case-conversion")
other
(local (include-book "std/basic/code-char-char-code-with-force" :dir :system))
other
(define charset-p
  (x)
  :parents (std/strings)
  :short "A way to represent a fixed set of characters."
  :long "<p>When writing a lexer, it is often useful to introduce character
sets that recognize sets of characters such as whitespace, alphabetic
characters, digits, and so forth.</p>

<p>A @('charset-p') represents such a set of characters as a natural number.
In this representation, the character whose code is @('i') is a member of the
set @('x') exactly when the @('i')th bit of @('x') is 1.  This may as well be
thought of as a bit-array lookup.</p>

<p>To introduce new sets of characters, e.g., to recognize "whitespace
characters," or "hex digits," or whatever, we use the @(see defcharset)
macro.</p>

<p>We generally treat character sets as opaque.  It would be quite odd to,
e.g., allow the theorem prover to expand a character set's definition into its
bit-mask form, or to reason about functions like @(see logbitp) in conjunction
with character sets.  If you find yourself doing this, something is probably
wrong.</p>"
  :returns (bool booleanp :rule-classes :type-prescription)
  (natp x))
other
(local (defthm charset-p-cr
    (implies (charset-p x) (natp x))
    :rule-classes :compound-recognizer :hints (("Goal" :in-theory (enable charset-p)))))
other
(local (set-default-parents charset-p))
other
(define char-in-charset-p
  ((char :type character) (set charset-p))
  :short "@(call char-in-charset-p) determines if the character @('char') is a
member of the character set @('set')."
  :inline t
  (mbe :logic (and (characterp char) (logbitp (char-code char) set))
    :exec (logbitp (the (unsigned-byte 8) (char-code char)) set))
  ///
  (defthm char-in-charset-p-when-not-character
    (implies (not (characterp char))
      (not (char-in-charset-p char set)))))
other
(define code-in-charset-p
  ((code :type (unsigned-byte 8)) (set charset-p))
  :short "@(call code-in-charset-p) determines if the character whose code is
@('code') is a member of the character set @('set')."
  :long "<p>Typically there's no reason to use this.  But if you already have
the character code available for some reason, this may be slightly more
efficient than turning it back into a character and then calling @(see
char-in-charset-p).</p>"
  :inline t
  :enabled t
  (mbe :logic (char-in-charset-p (code-char code) set)
    :exec (logbitp code set))
  :guard-hints (("Goal" :in-theory (enable char-in-charset-p))))
other
(deflist chars-in-charset-p
  (x set)
  (char-in-charset-p x set)
  :short "@(call chars-in-charset-p) recognizes lists of characters @('x')
where every character is a member of the @(see charset-p) @('set')."
  :guard (and (character-listp x) (charset-p set)))
other
(defxdoc defcharset
  :short "Define a recognizer for a particular set of characters."
  :long "<p>@('Defcharset') is a macro for introducing a @(see charset-p) and
proving that it recognizes the correct characters.  </p>

<h5>Example</h5>
@({
 (defcharset whitespace
   (or (eql x #\Newline)
       (eql x #\Space)
       (eql x #\Tab)))
})

<p>This example introduces:</p>

<ul>

<li>@('(whitespace-char-p x)') &mdash; a "slow" function for recognizing
newline, space, and tab characters</li>

<li>@('(whitespace-chars)') &mdash; a @(see charset-p) that is proven to
correspond to @('whitespace-char-p'),</li>

<li>@('(whitespace-charlist-p x)') &mdash; an ordinary @(see std::deflist) to
recognize lists whose every character satisfies @('whitespace-char-p').</li>

</ul>

<h5>General Form</h5>
@({
 (defcharset prefix criteria
   [:in-package-of package]
   [:parents ...]
   [:short ...]
   [:long ...]
})

<p>All functions will be introduced in @('pkg'), determined as follows:</p>

<ul>

<li>If an @(':in-package-of') argument is provided, then the corresponding
@('package') must be a symbol, and we will use its package.</li>

<li>Otherwise, the package of @('prefix') will be used.</li>

</ul>

<p>The @('prefix') is a symbol that is used for name generation.  Some common
examples would be @('whitespace'), @('alpha'), @('digit'), etc.</p>

<p>The @('criteria') is some term involving the variable @('pkg::x').  The
criteria term may assume that @('x') is a character, and is responsible for
determining whether @('x') is a member of the desired set.  Normally you should
not worry about the efficiency of @('criteria').  Although the term you write
here <i>does</i> become part of recognizers like @('whitespace-char-p') and
@('whitespace-charlist-p'), the actual character set, i.e.,
@('whitespace-chars'), is represented as a bit mask, and the speed of your
@('criteria') term will not have any bearing on how fast it is to look up its
bits.</p>

<p>The @(':parents'), @(':short'), and @(':long') options are as in @(see
defxdoc), and allow you to provide documentation to the character recognizer,
e.g., @('whitespace-char-p').  The other functions are documented
automatically.</p>")
defcharsetmacro
(defmacro defcharset
  (prefix criteria
    &key
    in-package-of
    parents
    short
    long)
  (b* ((in-package-of (or in-package-of prefix)) (foo-char-p (intern-in-package-of-symbol (cat (symbol-name prefix) "-CHAR-P")
          in-package-of))
      (foo-charlist-p (intern-in-package-of-symbol (cat (symbol-name prefix) "-CHARLIST-P")
          in-package-of))
      (foo-chars (intern-in-package-of-symbol (cat (symbol-name prefix) "-CHARS")
          in-package-of))
      (make-foo-chars (intern-in-package-of-symbol (cat "MAKE-" (symbol-name prefix) "-CHARS")
          in-package-of))
      (foo-char-p-url (rchars-to-string (file-name-mangle foo-char-p nil)))
      (x (intern-in-package-of-symbol "X" in-package-of)))
    `(progn (defsection ,STR::FOO-CHAR-P
        ,@(AND STR::PARENTS `(:PARENTS ,STR::PARENTS))
        ,@(AND STR::SHORT `(:SHORT ,STR::SHORT))
        ,@(AND STR::LONG `(:LONG ,STR::LONG))
        (defund ,STR::FOO-CHAR-P
          (,STR::X)
          (declare (xargs :guard t :normalize nil))
          (and (characterp ,STR::X) ,STR::CRITERIA))
        (in-theory (disable (:type-prescription ,STR::FOO-CHAR-P)))
        (local (in-theory (enable ,STR::FOO-CHAR-P)))
        (defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL
  (STR::CAT "BOOLEANP-OF-" (SYMBOL-NAME STR::FOO-CHAR-P)) STR::IN-PACKAGE-OF)
          (booleanp (,STR::FOO-CHAR-P ,STR::X))
          :rule-classes :type-prescription)
        (local (in-theory (theory 'minimal-theory)))
        (local (in-theory (enable booleanp
              booleanp-compound-recognizer
              ,STR::FOO-CHAR-P
              ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL
  (STR::CAT "BOOLEANP-OF-" (SYMBOL-NAME STR::FOO-CHAR-P)) STR::IN-PACKAGE-OF))))
        (defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL
  (STR::CAT "CHARACTERP-WHEN-" (SYMBOL-NAME STR::FOO-CHAR-P))
  STR::IN-PACKAGE-OF)
          (implies (,STR::FOO-CHAR-P ,STR::X)
            (characterp ,STR::X))
          :rule-classes :compound-recognizer))
      (defsection ,STR::FOO-CHARS
        :parents (,STR::FOO-CHAR-P)
        :short ,(STR::CAT "A character set for <see topic='" STR::FOO-CHAR-P-URL "'>"
  (STR::DOWNCASE-STRING (SYMBOL-NAME STR::FOO-CHAR-P)) "</see>.")
        (local (defund ,STR::MAKE-FOO-CHARS
            (n)
            (declare (xargs :guard (and (natp n) (< n 256))
                :ruler-extenders :all))
            (logior (if (,STR::FOO-CHAR-P (code-char n))
                (ash 1 n)
                0)
              (if (zp n)
                0
                (,STR::MAKE-FOO-CHARS (- n 1))))))
        (make-event (let ((foo-chars ',STR::FOO-CHARS) (charset (,STR::MAKE-FOO-CHARS 255)))
            `(defund-inline ,STR::FOO-CHARS
              nil
              (declare (xargs :guard t))
              ,STR::CHARSET)))
        (in-theory (disable (:e ,STR::FOO-CHARS)
            (:t ,STR::FOO-CHARS)
            (:e ,STR::FOO-CHAR-P)
            (:e char-in-charset-p)
            (:e code-in-charset-p)
            (:e code-char)
            (:e char-code)
            (:e <)))
        (defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL
  (STR::CAT "CHARSET-P-OF-" (SYMBOL-NAME STR::FOO-CHARS)) STR::IN-PACKAGE-OF)
          (charset-p (,STR::FOO-CHARS))
          :hints (("Goal" :in-theory (enable ,STR::FOO-CHARS charset-p))))
        (local (defun defcharset-tester
            (n)
            (declare (xargs :ruler-extenders :all))
            (and (equal (code-in-charset-p n (,STR::FOO-CHARS))
                (,STR::FOO-CHAR-P (code-char n)))
              (or (zp n) (defcharset-tester (- n 1))))))
        (local (defthmd defcharset-lemma1
            (implies (and (natp n)
                (natp i)
                (<= i n)
                (defcharset-tester n))
              (equal (code-in-charset-p i (,STR::FOO-CHARS))
                (,STR::FOO-CHAR-P (code-char i))))
            :hints (("Goal" :induct (defcharset-tester n)))))
        (local (defthmd defcharset-lemma2
            (implies (and (natp i) (<= i 255))
              (equal (code-in-charset-p i (,STR::FOO-CHARS))
                (,STR::FOO-CHAR-P (code-char i))))
            :hints (("Goal" :use ((:instance defcharset-lemma1
                  (i i)
                  (n 255)))))))
        (defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL
  (STR::CAT "CHAR-IN-CHARSET-P-OF-" (SYMBOL-NAME STR::FOO-CHARS))
  STR::IN-PACKAGE-OF)
          (equal (char-in-charset-p ,STR::X (,STR::FOO-CHARS))
            (,STR::FOO-CHAR-P ,STR::X))
          :hints (("Goal" :in-theory (enable code-in-charset-p)
             :use ((:instance defcharset-lemma2
                (i (char-code ,STR::X))))))))
      (deflist ,STR::FOO-CHARLIST-P
        (,STR::X)
        (,STR::FOO-CHAR-P ,STR::X)
        :guard t
        :parents (,STR::FOO-CHAR-P)
        :rest ((defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL
  (STR::CAT "CHARS-IN-CHARSET-P-OF-" (SYMBOL-NAME STR::FOO-CHARS))
  STR::IN-PACKAGE-OF)
           (equal (chars-in-charset-p ,STR::X (,STR::FOO-CHARS))
             (,STR::FOO-CHARLIST-P ,STR::X))
           :hints (("Goal" :induct (len ,STR::X)))))))))
other
(local (progn (include-book "decimal")
    (defcharset whitespace
      (or (eql x #\
)
        (eql x #\ )
        (eql x #\	)))
    (defcharset nondigit (not (digit-char-p x)))
    (defcharset any t)
    (defcharset no nil)))