other
(in-package "STR")
other
(include-book "std/util/define" :dir :system)
other
(include-book "std/util/deflist" :dir :system)
other
(include-book "xdoc/names" :dir :system)
other
(include-book "case-conversion")
other
(local (include-book "std/basic/code-char-char-code-with-force" :dir :system))
other
(define charset-p (x) :parents (std/strings) :short "A way to represent a fixed set of characters." :long "<p>When writing a lexer, it is often useful to introduce character sets that recognize sets of characters such as whitespace, alphabetic characters, digits, and so forth.</p> <p>A @('charset-p') represents such a set of characters as a natural number. In this representation, the character whose code is @('i') is a member of the set @('x') exactly when the @('i')th bit of @('x') is 1. This may as well be thought of as a bit-array lookup.</p> <p>To introduce new sets of characters, e.g., to recognize "whitespace characters," or "hex digits," or whatever, we use the @(see defcharset) macro.</p> <p>We generally treat character sets as opaque. It would be quite odd to, e.g., allow the theorem prover to expand a character set's definition into its bit-mask form, or to reason about functions like @(see logbitp) in conjunction with character sets. If you find yourself doing this, something is probably wrong.</p>" :returns (bool booleanp :rule-classes :type-prescription) (natp x))
other
(local (defthm charset-p-cr (implies (charset-p x) (natp x)) :rule-classes :compound-recognizer :hints (("Goal" :in-theory (enable charset-p)))))
other
(local (set-default-parents charset-p))
other
(define char-in-charset-p ((char :type character) (set charset-p)) :short "@(call char-in-charset-p) determines if the character @('char') is a member of the character set @('set')." :inline t (mbe :logic (and (characterp char) (logbitp (char-code char) set)) :exec (logbitp (the (unsigned-byte 8) (char-code char)) set)) /// (defthm char-in-charset-p-when-not-character (implies (not (characterp char)) (not (char-in-charset-p char set)))))
other
(define code-in-charset-p ((code :type (unsigned-byte 8)) (set charset-p)) :short "@(call code-in-charset-p) determines if the character whose code is @('code') is a member of the character set @('set')." :long "<p>Typically there's no reason to use this. But if you already have the character code available for some reason, this may be slightly more efficient than turning it back into a character and then calling @(see char-in-charset-p).</p>" :inline t :enabled t (mbe :logic (char-in-charset-p (code-char code) set) :exec (logbitp code set)) :guard-hints (("Goal" :in-theory (enable char-in-charset-p))))
other
(deflist chars-in-charset-p (x set) (char-in-charset-p x set) :short "@(call chars-in-charset-p) recognizes lists of characters @('x') where every character is a member of the @(see charset-p) @('set')." :guard (and (character-listp x) (charset-p set)))
other
(defxdoc defcharset :short "Define a recognizer for a particular set of characters." :long "<p>@('Defcharset') is a macro for introducing a @(see charset-p) and proving that it recognizes the correct characters. </p> <h5>Example</h5> @({ (defcharset whitespace (or (eql x #\Newline) (eql x #\Space) (eql x #\Tab))) }) <p>This example introduces:</p> <ul> <li>@('(whitespace-char-p x)') — a "slow" function for recognizing newline, space, and tab characters</li> <li>@('(whitespace-chars)') — a @(see charset-p) that is proven to correspond to @('whitespace-char-p'),</li> <li>@('(whitespace-charlist-p x)') — an ordinary @(see std::deflist) to recognize lists whose every character satisfies @('whitespace-char-p').</li> </ul> <h5>General Form</h5> @({ (defcharset prefix criteria [:in-package-of package] [:parents ...] [:short ...] [:long ...] }) <p>All functions will be introduced in @('pkg'), determined as follows:</p> <ul> <li>If an @(':in-package-of') argument is provided, then the corresponding @('package') must be a symbol, and we will use its package.</li> <li>Otherwise, the package of @('prefix') will be used.</li> </ul> <p>The @('prefix') is a symbol that is used for name generation. Some common examples would be @('whitespace'), @('alpha'), @('digit'), etc.</p> <p>The @('criteria') is some term involving the variable @('pkg::x'). The criteria term may assume that @('x') is a character, and is responsible for determining whether @('x') is a member of the desired set. Normally you should not worry about the efficiency of @('criteria'). Although the term you write here <i>does</i> become part of recognizers like @('whitespace-char-p') and @('whitespace-charlist-p'), the actual character set, i.e., @('whitespace-chars'), is represented as a bit mask, and the speed of your @('criteria') term will not have any bearing on how fast it is to look up its bits.</p> <p>The @(':parents'), @(':short'), and @(':long') options are as in @(see defxdoc), and allow you to provide documentation to the character recognizer, e.g., @('whitespace-char-p'). The other functions are documented automatically.</p>")
defcharsetmacro
(defmacro defcharset (prefix criteria &key in-package-of parents short long) (b* ((in-package-of (or in-package-of prefix)) (foo-char-p (intern-in-package-of-symbol (cat (symbol-name prefix) "-CHAR-P") in-package-of)) (foo-charlist-p (intern-in-package-of-symbol (cat (symbol-name prefix) "-CHARLIST-P") in-package-of)) (foo-chars (intern-in-package-of-symbol (cat (symbol-name prefix) "-CHARS") in-package-of)) (make-foo-chars (intern-in-package-of-symbol (cat "MAKE-" (symbol-name prefix) "-CHARS") in-package-of)) (foo-char-p-url (rchars-to-string (file-name-mangle foo-char-p nil))) (x (intern-in-package-of-symbol "X" in-package-of))) `(progn (defsection ,STR::FOO-CHAR-P ,@(AND STR::PARENTS `(:PARENTS ,STR::PARENTS)) ,@(AND STR::SHORT `(:SHORT ,STR::SHORT)) ,@(AND STR::LONG `(:LONG ,STR::LONG)) (defund ,STR::FOO-CHAR-P (,STR::X) (declare (xargs :guard t :normalize nil)) (and (characterp ,STR::X) ,STR::CRITERIA)) (in-theory (disable (:type-prescription ,STR::FOO-CHAR-P))) (local (in-theory (enable ,STR::FOO-CHAR-P))) (defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL (STR::CAT "BOOLEANP-OF-" (SYMBOL-NAME STR::FOO-CHAR-P)) STR::IN-PACKAGE-OF) (booleanp (,STR::FOO-CHAR-P ,STR::X)) :rule-classes :type-prescription) (local (in-theory (theory 'minimal-theory))) (local (in-theory (enable booleanp booleanp-compound-recognizer ,STR::FOO-CHAR-P ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL (STR::CAT "BOOLEANP-OF-" (SYMBOL-NAME STR::FOO-CHAR-P)) STR::IN-PACKAGE-OF)))) (defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL (STR::CAT "CHARACTERP-WHEN-" (SYMBOL-NAME STR::FOO-CHAR-P)) STR::IN-PACKAGE-OF) (implies (,STR::FOO-CHAR-P ,STR::X) (characterp ,STR::X)) :rule-classes :compound-recognizer)) (defsection ,STR::FOO-CHARS :parents (,STR::FOO-CHAR-P) :short ,(STR::CAT "A character set for <see topic='" STR::FOO-CHAR-P-URL "'>" (STR::DOWNCASE-STRING (SYMBOL-NAME STR::FOO-CHAR-P)) "</see>.") (local (defund ,STR::MAKE-FOO-CHARS (n) (declare (xargs :guard (and (natp n) (< n 256)) :ruler-extenders :all)) (logior (if (,STR::FOO-CHAR-P (code-char n)) (ash 1 n) 0) (if (zp n) 0 (,STR::MAKE-FOO-CHARS (- n 1)))))) (make-event (let ((foo-chars ',STR::FOO-CHARS) (charset (,STR::MAKE-FOO-CHARS 255))) `(defund-inline ,STR::FOO-CHARS nil (declare (xargs :guard t)) ,STR::CHARSET))) (in-theory (disable (:e ,STR::FOO-CHARS) (:t ,STR::FOO-CHARS) (:e ,STR::FOO-CHAR-P) (:e char-in-charset-p) (:e code-in-charset-p) (:e code-char) (:e char-code) (:e <))) (defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL (STR::CAT "CHARSET-P-OF-" (SYMBOL-NAME STR::FOO-CHARS)) STR::IN-PACKAGE-OF) (charset-p (,STR::FOO-CHARS)) :hints (("Goal" :in-theory (enable ,STR::FOO-CHARS charset-p)))) (local (defun defcharset-tester (n) (declare (xargs :ruler-extenders :all)) (and (equal (code-in-charset-p n (,STR::FOO-CHARS)) (,STR::FOO-CHAR-P (code-char n))) (or (zp n) (defcharset-tester (- n 1)))))) (local (defthmd defcharset-lemma1 (implies (and (natp n) (natp i) (<= i n) (defcharset-tester n)) (equal (code-in-charset-p i (,STR::FOO-CHARS)) (,STR::FOO-CHAR-P (code-char i)))) :hints (("Goal" :induct (defcharset-tester n))))) (local (defthmd defcharset-lemma2 (implies (and (natp i) (<= i 255)) (equal (code-in-charset-p i (,STR::FOO-CHARS)) (,STR::FOO-CHAR-P (code-char i)))) :hints (("Goal" :use ((:instance defcharset-lemma1 (i i) (n 255))))))) (defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL (STR::CAT "CHAR-IN-CHARSET-P-OF-" (SYMBOL-NAME STR::FOO-CHARS)) STR::IN-PACKAGE-OF) (equal (char-in-charset-p ,STR::X (,STR::FOO-CHARS)) (,STR::FOO-CHAR-P ,STR::X)) :hints (("Goal" :in-theory (enable code-in-charset-p) :use ((:instance defcharset-lemma2 (i (char-code ,STR::X)))))))) (deflist ,STR::FOO-CHARLIST-P (,STR::X) (,STR::FOO-CHAR-P ,STR::X) :guard t :parents (,STR::FOO-CHAR-P) :rest ((defthm ,(STR::INTERN-IN-PACKAGE-OF-SYMBOL (STR::CAT "CHARS-IN-CHARSET-P-OF-" (SYMBOL-NAME STR::FOO-CHARS)) STR::IN-PACKAGE-OF) (equal (chars-in-charset-p ,STR::X (,STR::FOO-CHARS)) (,STR::FOO-CHARLIST-P ,STR::X)) :hints (("Goal" :induct (len ,STR::X)))))))))
other
(local (progn (include-book "decimal") (defcharset whitespace (or (eql x #\ ) (eql x #\ ) (eql x #\ ))) (defcharset nondigit (not (digit-char-p x))) (defcharset any t) (defcharset no nil)))