other
(in-package "STR")
other
(include-book "decimal")
other
(include-book "tools/mv-nth" :dir :system)
other
(local (include-book "arithmetic"))
other
(defsection charlistnat< :parents (ordering) :short "Mixed alphanumeric character-list less-than test." :long "<p>@(call charlistnat<) determines if the character list @('x') is "smaller" than the character list @('y'), using an ordering that is nice for humans.</p> <p>This is almost an ordinary case-sensitive lexicographic ordering. But, unlike a simple lexicographic order, we identify sequences of natural number digits and group them together so that they can be sorted numerically.</p> <p>Even though this function operates on character lists, let's just talk about strings instead since they are easier to write down. If you give most string sorts a list of inputs like "x0" through "x11", they will end up in a peculiar order:</p> @({"x0", "x1", "x10", "x11", "x2", "x3", ... "x9"}) <p>But in @('charlistnat<'), we see the adjacent digits as bundles and sort them as numbers. This leads to a nicer ordering:</p> @({"x0", "x1", "x2", ..., "x9", "x10", "x11"}) <p>This is almost entirely straightforward. One twist is how to accommodate leading zeroes. Our approach is: instead of grouping adjacent digits and treating them as a natural number, treat them as a pair with a value and a length. We then sort these pairs first by value, and then by length. Hence, a string such as "x0" is considered to be less than "x00", etc.</p> <p>See also @(see strnat<) and @(see icharlist<).</p>" (local (in-theory (disable nth-when-bigger negative-when-natp default-+-2 default-+-1 default-<-2 commutativity-of-+ default-<-1 |x < y => 0 < y-x| char<-trichotomy-strong char<-antisymmetric char<-trichotomy-weak char<-transitive expt default-car default-cdr))) (defund charlistnat< (x y) (declare (xargs :guard (and (character-listp x) (character-listp y)) :measure (len x))) (cond ((atom y) nil) ((atom x) t) ((and (dec-digit-char-p (car x)) (dec-digit-char-p (car y))) (b* (((mv v1 l1 rest-x) (parse-nat-from-charlist x 0 0)) ((mv v2 l2 rest-y) (parse-nat-from-charlist y 0 0))) (cond ((or (< v1 v2) (and (int= v1 v2) (< l1 l2))) t) ((or (< v2 v1) (and (int= v1 v2) (< l2 l1))) nil) (t (charlistnat< rest-x rest-y))))) ((char< (car x) (car y)) t) ((char< (car y) (car x)) nil) (t (charlistnat< (cdr x) (cdr y))))) (local (in-theory (enable charlistnat<))) (defcong charlisteqv equal (charlistnat< x y) 1) (defcong charlisteqv equal (charlistnat< x y) 2) (defthm charlistnat<-irreflexive (not (charlistnat< x x))) (defthm charlistnat<-antisymmetric (implies (charlistnat< x y) (not (charlistnat< y x))) :hints (("goal" :in-theory (enable char<-antisymmetric)))) (encapsulate nil (local (defthm char<-nonsense-2 (implies (and (char< a y) (not (dec-digit-char-p a)) (dec-digit-char-p y) (dec-digit-char-p z)) (char< a z)) :rule-classes ((:rewrite :backchain-limit-lst 0)) :hints (("Goal" :in-theory (enable char< dec-digit-char-p))))) (local (defthm char<-nonsense-3 (implies (and (char< y a) (not (dec-digit-char-p a)) (dec-digit-char-p x) (dec-digit-char-p y)) (char< x a)) :rule-classes ((:rewrite :backchain-limit-lst 0)) :hints (("Goal" :in-theory (enable char< dec-digit-char-p))))) (local (defthm char<-nonsense-4 (implies (and (char< x y) (not (dec-digit-char-p y)) (dec-digit-char-p x) (dec-digit-char-p z)) (not (char< y z))) :rule-classes ((:rewrite :backchain-limit-lst 0)) :hints (("Goal" :in-theory (enable dec-digit-char-p char<))))) (defthm charlistnat<-transitive (implies (and (charlistnat< x y) (charlistnat< y z)) (charlistnat< x z)) :hints (("Goal" :in-theory (e/d ((:induction charlistnat<) char<-trichotomy-strong char<-transitive) (expt charlistnat<-antisymmetric take-leading-dec-digit-chars-when-dec-digit-char-list*p default-+-2 default-+-1 bound-of-len-of-take-leading-dec-digit-chars len-of-parse-nat-from-charlist)) :induct t :expand ((:free (y) (charlistnat< x y)) (:free (z) (charlistnat< y z))))))) (encapsulate nil (local (defthm dec-digit-chars-value-max (< (dec-digit-chars-value x) (expt 10 (len x))) :hints (("Goal" :in-theory (enable dec-digit-chars-value))) :rule-classes :linear)) (local (defthmd equal-of-sum-digits-lemma1 (implies (and (posp b) (natp big1) (natp big2) (natp little1) (natp little2) (< little1 b) (< little2 b) (not (equal big1 big2))) (not (equal (+ (* big1 b) little1) (+ (* big2 b) little2)))) :hints (("goal" :use ((:instance distributivity (x b) (y big1) (z (- big2))) (:instance distributivity (x b) (y big2) (z (- big1)))) :in-theory (disable distributivity)) (and stable-under-simplificationp '(:use ((:instance <-*-y-x-y (y b) (x (- big1 big2))) (:instance <-*-y-x-y (y b) (x (- big2 big1)))) :in-theory (disable distributivity <-*-y-x-y) :cases ((and (< (+ big1 (- big2)) 1) (< (+ big2 (- big1)) 1)))))) :otf-flg t)) (local (defthm equal-of-sum-digits (implies (and (posp b) (natp big1) (natp big2) (natp little1) (natp little2) (< little1 b) (< little2 b)) (iff (equal (+ (* big1 b) little1) (+ (* big2 b) little2)) (and (equal big1 big2) (equal little1 little2)))) :hints (("goal" :use ((:instance equal-of-sum-digits-lemma1)))))) (local (defun my-induction (x y) (if (and (consp x) (consp y)) (my-induction (cdr x) (cdr y)) nil))) (local (defthm lemma-2 (implies (and (equal (len x) (len y)) (character-listp x) (character-listp y) (dec-digit-char-list*p x) (dec-digit-char-list*p y)) (equal (equal (dec-digit-chars-value x) (dec-digit-chars-value y)) (equal x y))) :hints (("Goal" :induct (my-induction x y) :in-theory (enable dec-digit-char-list*p dec-digit-chars-value commutativity-of-+)) (and stable-under-simplificationp '(:use ((:instance equal-of-sum-digits (b (expt 10 (len (cdr x)))) (little1 (dec-digit-chars-value (cdr x))) (little2 (dec-digit-chars-value (cdr y))) (big1 (dec-digit-char-value (car x))) (big2 (dec-digit-char-value (car y)))))))))) (local (defthm crock (implies (and (equal (take-leading-dec-digit-chars y) (take-leading-dec-digit-chars x)) (charlisteqv (skip-leading-digits x) (skip-leading-digits y))) (charlisteqv x y)) :hints (("Goal" :induct (my-induction x y) :expand ((take-leading-dec-digit-chars x) (take-leading-dec-digit-chars y) (skip-leading-digits x) (skip-leading-digits y)))))) (local (defthm lemma-3 (implies (and (equal (len (take-leading-dec-digit-chars y)) (len (take-leading-dec-digit-chars x))) (equal (dec-digit-chars-value (take-leading-dec-digit-chars y)) (dec-digit-chars-value (take-leading-dec-digit-chars x))) (charlisteqv (skip-leading-digits x) (skip-leading-digits y))) (equal (charlisteqv x y) t)) :hints (("Goal" :in-theory (enable chareqv take-leading-dec-digit-chars skip-leading-digits dec-digit-chars-value))))) (defthm charlistnat<-trichotomy-weak (implies (and (not (charlistnat< x y)) (not (charlistnat< y x))) (equal (charlisteqv x y) t)) :hints (("Goal" :in-theory (e/d (char<-trichotomy-strong) (bound-of-len-of-take-leading-dec-digit-chars take-leading-dec-digit-chars-when-dec-digit-char-list*p right-cancellation-for-+ charlistnat<-antisymmetric charlistnat<-irreflexive))))) (defthm charlistnat<-trichotomy-strong (equal (charlistnat< x y) (and (not (charlisteqv x y)) (not (charlistnat< y x)))) :rule-classes ((:rewrite :loop-stopper ((x y)))))))
other
(defsection strnat<-aux :parents (strnat<) :short "Implementation of @(see strnat<)." :long "<p>@(call strnat<-aux) is basically the adaptation of @(see charlistnat<) for strings. Here, X and Y are the strings being compared, and XL and YL are their pre-computed lengths. XN and YN are the indices into the two strings that are our current positions.</p> <p>BOZO why do we have XN and YN separately? It seems like we should only need one index.</p>" (local (in-theory (disable nth-when-bigger negative-when-natp default-+-2 default-+-1 default-<-2 commutativity-of-+ default-<-1 |x < y => 0 < y-x| |x < y => 0 < -x+y| char<-trichotomy-strong char<-antisymmetric char<-trichotomy-weak char<-transitive negative-when-natp natp-rw expt default-car default-cdr))) (defund strnat<-aux (x y xn yn xl yl) (declare (type string x) (type string y) (type integer xn) (type integer yn) (type integer xl) (type integer yl) (xargs :guard (and (stringp x) (stringp y) (natp xn) (natp yn) (equal xl (length x)) (equal yl (length y)) (<= xn xl) (<= yn yl)) :verify-guards nil :measure (let* ((x (if (stringp x) x "")) (y (if (stringp y) y "")) (xn (nfix xn)) (yn (nfix yn)) (xl (length x)) (yl (length y))) (nfix (+ (- yl yn) (- xl xn)))) :hints (("Goal" :in-theory (disable)))) (ignorable xl yl)) (mbe :logic (let* ((x (if (stringp x) x "")) (y (if (stringp y) y "")) (xn (nfix xn)) (yn (nfix yn)) (xl (length x)) (yl (length y))) (cond ((zp (- yl yn)) nil) ((zp (- xl xn)) t) ((and (dec-digit-char-p (char x xn)) (dec-digit-char-p (char y yn))) (b* (((mv v1 l1) (parse-nat-from-string x 0 0 xn xl)) ((mv v2 l2) (parse-nat-from-string y 0 0 yn yl))) (cond ((or (< v1 v2) (and (int= v1 v2) (< l1 l2))) t) ((or (< v2 v1) (and (int= v1 v2) (< l2 l1))) nil) (t (strnat<-aux x y (+ xn l1) (+ yn l2) xl yl))))) ((char< (char x xn) (char y yn)) t) ((char< (char y yn) (char x xn)) nil) (t (strnat<-aux x y (+ 1 xn) (+ 1 yn) xl yl)))) :exec (cond ((int= yn yl) nil) ((int= xn xl) t) (t (let* ((char-x (the character (char (the string x) (the integer xn)))) (char-y (the character (char (the string y) (the integer yn)))) (code-x (the (unsigned-byte 8) (char-code (the character char-x)))) (code-y (the (unsigned-byte 8) (char-code (the character char-y))))) (declare (type character char-x) (type character char-y) (type (unsigned-byte 8) code-x) (type (unsigned-byte 8) code-y)) (cond ((and (<= (the (unsigned-byte 8) 48) (the (unsigned-byte 8) code-x)) (<= (the (unsigned-byte 8) code-x) (the (unsigned-byte 8) 57)) (<= (the (unsigned-byte 8) 48) (the (unsigned-byte 8) code-y)) (<= (the (unsigned-byte 8) code-y) (the (unsigned-byte 8) 57))) (b* (((mv v1 l1) (parse-nat-from-string (the string x) (the integer 0) (the integer 0) (the integer xn) (the integer xl))) ((mv v2 l2) (parse-nat-from-string (the string y) (the integer 0) (the integer 0) (the integer yn) (the integer yl)))) (cond ((or (< (the integer v1) (the integer v2)) (and (int= v1 v2) (< (the integer l1) (the integer l2)))) t) ((or (< (the integer v2) (the integer v1)) (and (int= v1 v2) (< (the integer l2) (the integer l1)))) nil) (t (strnat<-aux (the string x) (the string y) (the integer (+ (the integer xn) (the integer l1))) (the integer (+ (the integer yn) (the integer l2))) (the integer xl) (the integer yl)))))) ((< (the (unsigned-byte 8) code-x) (the (unsigned-byte 8) code-y)) t) ((< (the (unsigned-byte 8) code-y) (the (unsigned-byte 8) code-x)) nil) (t (strnat<-aux (the string x) (the string y) (the integer (+ (the integer 1) (the integer xn))) (the integer (+ (the integer 1) (the integer yn))) (the integer xl) (the integer yl))))))))) (local (in-theory (enable strnat<-aux))) (set-inhibit-warnings "theory") (encapsulate nil (local (in-theory (disable nth-when-bigger take-leading-dec-digit-chars-when-dec-digit-char-list*p dec-digit-char-list*p-when-not-consp (:type-prescription character-listp) (:type-prescription eqlable-listp) (:type-prescription atom-listp) (:type-prescription dec-digit-char-p$inline) (:type-prescription strnat<-aux) (:type-prescription char<) default-char-code char<-antisymmetric char<-trichotomy-strong default-<-1 default-<-2 default-+-1 default-+-2 open-small-nthcdr nthcdr-when-zp nthcdr-when-atom |x < y => 0 < -x+y| nthcdr len nth not strnat<-aux natp-fc-1 natp-fc-2 (:forward-chaining eqlable-listp-forward-to-atom-listp) (:forward-chaining character-listp-forward-to-eqlable-listp) (:forward-chaining atom-listp-forward-to-true-listp)))) (verify-guards strnat<-aux :hints ((and stable-under-simplificationp '(:in-theory (enable dec-digit-char-p dec-digit-char-value char-fix char<)))))) (local (defthm skip-leading-digits-to-nthcdr (implies (force (true-listp x)) (equal (skip-leading-digits x) (nthcdr (len (take-leading-dec-digit-chars x)) x))) :hints (("Goal" :in-theory (enable skip-leading-digits take-leading-dec-digit-chars))))) (defthm strnat<-aux-correct (implies (and (stringp x) (stringp y) (natp xn) (natp yn) (equal xl (length x)) (equal yl (length y)) (<= xn xl) (<= yn yl)) (equal (strnat<-aux x y xn yn xl yl) (charlistnat< (nthcdr xn (explode x)) (nthcdr yn (explode y))))) :hints (("Goal" :induct (strnat<-aux x y xn yn xl yl) :expand ((charlistnat< (nthcdr xn (explode x)) (nthcdr yn (explode y))) (:free (xl yl) (strnat<-aux x y xn yn xl yl))) :in-theory (e/d (charlistnat< commutativity-of-+) (charlistnat<-antisymmetric charlistnat<-trichotomy-strong take-leading-dec-digit-chars-when-dec-digit-char-list*p dec-digit-char-list*p-when-not-consp charlistnat< (:definition strnat<-aux) default-+-1 default-+-2 nth-when-bigger)) :do-not '(generalize fertilize)))))
other
(define strnat< ((x :type string) (y :type string)) :parents (ordering) :short "Mixed alphanumeric string less-than test." :long "<p>@(call strnat<) determines if the string @('x') is "smaller" than the string @('y'), using an ordering that is nice for humans.</p> <p>See @(see charlistnat<) for a description of this order.</p> <p>We avoid coercing the strings into character lists, and this is altogether pretty fast.</p>" :inline t (mbe :logic (charlistnat< (explode x) (explode y)) :exec (strnat<-aux (the string x) (the string y) (the integer 0) (the integer 0) (the integer (length (the string x))) (the integer (length (the string y))))) /// (defcong streqv equal (strnat< x y) 1) (defcong streqv equal (strnat< x y) 2) (defthm strnat<-irreflexive (not (strnat< x x))) (defthm strnat<-antisymmetric (implies (strnat< x y) (not (strnat< y x)))) (defthm strnat<-transitive (implies (and (strnat< x y) (strnat< y z)) (strnat< x z))) (defthm strnat<-transitive-alt (implies (and (strnat< y z) (strnat< x y)) (strnat< x z))))