Add new file ietf-drums-date.el

* lisp/mail/ietf-drums-date.el: parse-time-string replacement which is
compatible but can be made stricter if desired.
* test/lisp/mail/ietf-drums-date-tests.el (added): Add tests for
ietf-drums-parse-date-string.
* lisp/mail/ietf-drums.el (ietf-drums-parse-date): Use
ietf-drums-parse-date-string.
This commit is contained in:
Bob Rogers 2022-02-25 13:03:20 +01:00 committed by Lars Ingebrigtsen
parent 2b8bb05383
commit ca3858563c
4 changed files with 475 additions and 1 deletions

View file

@ -1158,6 +1158,12 @@ functions.
* Lisp Changes in Emacs 29.1
---
** New function 'ietf-drums-parse-date-string'.
This function parses RFC5322 (and RFC822) date strings, and should be
used instead of 'parse-time-string' when parsing data that's standards
compliant.
+++
** New macro 'setopt'.
This is like 'setq', but uses 'customize-set-variable' to set the

View file

@ -0,0 +1,274 @@
;;; ietf-drums-date.el --- parse time/date for ietf-drums.el -*- lexical-binding: t -*-
;; Copyright (C) 2022 Free Software Foundation, Inc.
;; Author: Bob Rogers <rogers@rgrjr.com>
;; Keywords: mail, util
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; 'ietf-drums-parse-date-string' parses a time and/or date in a
;; string and returns a list of values, just like `decode-time', where
;; unspecified elements in the string are returned as nil (except
;; unspecified DST is returned as -1). `encode-time' may be applied
;; on these values to obtain an internal time value.
;; Historically, `parse-time-string' was used for this purpose, but it
;; was gradually but imperfectly extended to handle other date
;; formats. 'ietf-drums-parse-date-string' is compatible in that it
;; uses the same return value format and parses the same email date
;; formats by default, but can be made stricter if desired.
;;; Code:
(require 'cl-lib)
(require 'parse-time)
(define-error 'date-parse-error "Date/time parse error" 'error)
(defconst ietf-drums-date--slot-names
'(second minute hour day month year weekday dst zone)
"Names of return value slots, for better error messages
See the decoded-time defstruct.")
(defconst ietf-drums-date--slot-ranges
'((0 60) (0 59) (0 23) (1 31) (1 12) (1 9999))
"Numeric slot ranges, for bounds checking.
Note that RFC5322 explicitly requires that seconds go up to 60,
to allow for leap seconds (see Mills, D., 'Network Time
Protocol', STD 12, RFC 1119, September 1989).")
(defsubst ietf-drums-date--ignore-char-p (char)
;; Ignore whitespace and commas.
(memq char '(?\s ?\t ?\r ?\n ?,)))
(defun ietf-drums-date--tokenize-string (string &optional comment-eof)
"Turn STRING into tokens, separated only by whitespace and commas.
Multiple commas are ignored. Pure digit sequences are turned
into integers. If COMMENT-EOF is true, then a comment as
defined by RFC5322 (strictly, the CFWS production that also
accepts comments) is treated as an end-of-file, and no further
tokens are recognized, otherwise we strip out all comments and
treat them as whitespace (per RFC822)."
(let ((index 0)
(end (length string))
(list ()))
(cl-flet ((skip-ignored ()
;; Skip ignored characters at index (the scan
;; position). Skip RFC822 comments in matched parens,
;; but do not complain about unterminated comments.
(let ((char nil)
(nest 0))
(while (and (< index end)
(setq char (aref string index))
(or (> nest 0)
(ietf-drums-date--ignore-char-p char)
(and (not comment-eof) (eql char ?\())))
(cl-incf index)
;; FWS bookkeeping.
(cond ((and (eq char ?\\)
(< (1+ index) end))
;; Move to the next char but don't check
;; it to see if it might be a paren.
(cl-incf index))
((eq char ?\() (cl-incf nest))
((eq char ?\)) (cl-decf nest)))))))
(skip-ignored) ;; Skip leading whitespace.
(while (and (< index end)
(not (and comment-eof
(eq (aref string index) ?\())))
(let* ((start index)
(char (aref string index))
(all-digits (<= ?0 char ?9)))
;; char is valid; look for more valid characters.
(when (and (eq char ?\\)
(< (1+ index) end))
;; Escaped character, which might be a "(". If so, we are
;; correct to include it in the token, even though the
;; caller is sure to barf. If not, we violate RFC2?822 by
;; not removing the backslash, but no characters in valid
;; RFC2?822 dates need escaping anyway, so it shouldn't
;; matter that this is not done strictly correctly. --
;; rgr, 24-Dec-21.
(cl-incf index))
(while (and (< (cl-incf index) end)
(setq char (aref string index))
(not (or (ietf-drums-date--ignore-char-p char)
(eq char ?\())))
(unless (<= ?0 char ?9)
(setq all-digits nil))
(when (and (eq char ?\\)
(< (1+ index) end))
;; Escaped character, see above.
(cl-incf index)))
(push (if all-digits
(cl-parse-integer string :start start :end index)
(substring string start index))
list)
(skip-ignored)))
(nreverse list))))
(defun ietf-drums-parse-date-string (time-string &optional error no-822)
"Parse an RFC5322 or RFC822 date, passed as TIME-STRING.
The optional ERROR parameter causes syntax errors to be flagged
by signalling an instance of the date-parse-error condition. The
optional NO-822 parameter disables the more lax RFC822 syntax,
which is permitted by default.
The result is a list of (SEC MIN HOUR DAY MON YEAR DOW DST TZ),
which can be accessed as a decoded-time defstruct (q.v.),
e.g. `decoded-time-year' to extract the year, and turned into an
Emacs timestamp by `encode-time'.
The strict syntax for RFC5322 is as follows:
[ day-of-week \",\" ] day FWS month-name FWS year FWS time [CFWS]
where the \"time\" production is:
2DIGIT \":\" 2DIGIT [ \":\" 2DIGIT ] FWS ( \"+\" / \"-\" ) 4DIGIT
and FWS is \"folding white space,\" and CFWS is \"comments and/or
folding white space\", where comments are included in nesting
parentheses and are equivalent to white space. RFC822 also
accepts comments in random places (all of which is handled by
ietf-drums-date--tokenize-string) and two-digit years. For
two-digit years, 50 and up are interpreted as 1950 through 1999
and 00 through 49 as 200 through 2049.
We are somewhat more lax in what we accept (specifically, the
hours don't have to be two digits, and the TZ and the comma after
the DOW are optional), but we do insist that the items that are
present do appear in this order. Unspecified/unrecognized
elements in the string are returned as nil (except unspecified
DST is returned as -1)."
(let ((tokens (ietf-drums-date--tokenize-string (downcase time-string)
no-822))
(time (list nil nil nil nil nil nil nil -1 nil)))
(cl-labels ((set-matched-slot (slot index token)
;; Assign a slot value from match data if index is
;; non-nil, else from token, signalling an error if
;; enabled and it's out of range.
(let ((value (if index
(cl-parse-integer (match-string index token))
token)))
(when error
(let ((range (nth slot ietf-drums-date--slot-ranges)))
(when (and range
(not (<= (car range) value (cadr range))))
(signal 'date-parse-error
(list "Slot out of range"
(nth slot ietf-drums-date--slot-names)
token (car range) (cadr range))))))
(setf (nth slot time) value)))
(set-numeric (slot token)
;; Only assign the slot if the token is a number.
(cond ((natnump token)
(set-matched-slot slot nil token))
(error
(signal 'date-parse-error
(list "Not a number"
(nth slot ietf-drums-date--slot-names)
token))))))
;; Check for weekday.
(let ((dow (assoc (car tokens) parse-time-weekdays)))
(when dow
;; Day of the week.
(set-matched-slot 6 nil (cdr dow))
(pop tokens)))
;; Day.
(set-numeric 3 (pop tokens))
;; Alphabetic month.
(let* ((month (pop tokens))
(match (assoc month parse-time-months)))
(cond (match
(set-matched-slot 4 nil (cdr match)))
(error
(signal 'date-parse-error
(list "Expected an alphabetic month" month)))
(t
(push month tokens))))
;; Year.
(let ((year (pop tokens)))
;; Check the year for the right number of digits.
(cond ((not (natnump year))
(when error
(signal 'date-parse-error
(list "Expected a year" year)))
(push year tokens))
((>= year 1000)
(set-numeric 5 year))
((or no-822
(>= year 100))
(when error
(signal 'date-parse-error
(list "Four-digit years are required" year)))
(push year tokens))
((>= year 50)
;; second half of the 20th century.
(set-numeric 5 (+ 1900 year)))
(t
;; first half of the 21st century.
(set-numeric 5 (+ 2000 year)))))
;; Time.
(let ((time (pop tokens)))
(cond ((or (null time) (natnump time))
(when error
(signal 'date-parse-error
(list "Expected a time" time)))
(push time tokens))
((string-match
"^\\([0-9][0-9]?\\):\\([0-9][0-9]\\):\\([0-9][0-9]\\)$"
time)
(set-matched-slot 2 1 time)
(set-matched-slot 1 2 time)
(set-matched-slot 0 3 time))
((string-match "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\)$" time)
;; Time without seconds.
(set-matched-slot 2 1 time)
(set-matched-slot 1 2 time)
(set-matched-slot 0 nil 0))
(error
(signal 'date-parse-error
(list "Expected a time" time)))))
;; Timezone.
(let* ((zone (pop tokens))
(match (assoc zone parse-time-zoneinfo)))
(cond (match
(set-matched-slot 8 nil (cadr match))
(set-matched-slot 7 nil (caddr match)))
((and (stringp zone)
(string-match "^[-+][0-9][0-9][0-9][0-9]$" zone))
;; Numeric time zone.
(set-matched-slot
8 nil
(* 60
(+ (cl-parse-integer zone :start 3 :end 5)
(* 60 (cl-parse-integer zone :start 1 :end 3)))
(if (= (aref zone 0) ?-) -1 1))))
((and zone error)
(signal 'date-parse-error
(list "Expected a timezone" zone)))))
(when (and tokens error)
(signal 'date-parse-error
(list "Extra token(s)" (car tokens)))))
time))
(provide 'ietf-drums-date)
;;; ietf-drums-date.el ends here

View file

@ -294,9 +294,13 @@ a list of address strings."
(replace-match " " t t))
(goto-char (point-min)))
(declare-function ietf-drums-parse-date-string "ietf-drums-date"
(time-string &optional error? no-822?))
(defun ietf-drums-parse-date (string)
"Return an Emacs time spec from STRING."
(encode-time (parse-time-string string)))
(require 'ietf-drums-date)
(encode-time (ietf-drums-parse-date-string string)))
(defun ietf-drums-narrow-to-header ()
"Narrow to the header section in the current buffer."

View file

@ -0,0 +1,190 @@
;;; ietf-drums-date-tests.el --- Test suite for ietf-drums-date.el -*- lexical-binding:t -*-
;; Copyright (C) 2022 Free Software Foundation, Inc.
;; Author: Bob Rogers <rogers@rgrjr.com>
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;;; Code:
(require 'ert)
(require 'ietf-drums)
(require 'ietf-drums-date)
(ert-deftest ietf-drums-date-tests ()
"Test basic ietf-drums-parse-date-string functionality."
;; Test tokenization.
(should (equal (ietf-drums-date--tokenize-string " ") '()))
(should (equal (ietf-drums-date--tokenize-string " a b") '("a" "b")))
(should (equal (ietf-drums-date--tokenize-string "a bbc dde")
'("a" "bbc" "dde")))
(should (equal (ietf-drums-date--tokenize-string " , a 27 b,, c 14:32 ")
'("a" 27 "b" "c" "14:32")))
;; Some folding whitespace tests.
(should (equal (ietf-drums-date--tokenize-string " a b (end) c" t)
'("a" "b")))
(should (equal (ietf-drums-date--tokenize-string "(quux)a (foo (bar)) b(baz)")
'("a" "b")))
(should (equal (ietf-drums-date--tokenize-string "a b\\cde")
;; Strictly incorrect, but strictly unnecessary syntax.
'("a" "b\\cde")))
(should (equal (ietf-drums-date--tokenize-string "a b\\ de")
'("a" "b\\ de")))
(should (equal (ietf-drums-date--tokenize-string "a \\de \\(f")
'("a" "\\de" "\\(f")))
;; Start with some compatible RFC822 dates.
(dolist (case '(("Mon, 22 Feb 2016 19:35:42 +0100"
(42 35 19 22 2 2016 1 -1 3600)
(22219 21758))
("22 Feb 2016 19:35:42 +0100"
(42 35 19 22 2 2016 nil -1 3600)
(22219 21758))
("Mon, 22 February 2016 19:35:42 +0100"
(42 35 19 22 2 2016 1 -1 3600)
(22219 21758))
("Mon, 22 feb 2016 19:35:42 +0100"
(42 35 19 22 2 2016 1 -1 3600)
(22219 21758))
("Monday, 22 february 2016 19:35:42 +0100"
(42 35 19 22 2 2016 1 -1 3600)
(22219 21758))
("Monday, 22 february 2016 19:35:42 PST"
(42 35 19 22 2 2016 1 nil -28800)
(22219 54158))
("Friday, 21 Sep 2018 13:47:58 PDT"
(58 47 13 21 9 2018 5 t -25200)
(23461 22782))
("Friday, 21 Sep 2018 13:47:58 EDT"
(58 47 13 21 9 2018 5 t -14400)
(23461 11982))))
(let* ((input (car case))
(parsed (cadr case))
(encoded (caddr case)))
;; The input should parse the same without RFC822.
(should (equal (ietf-drums-parse-date-string input) parsed))
(should (equal (ietf-drums-parse-date-string input nil t) parsed))
;; Check the encoded date (the official output, though the
;; decoded-time is easier to debug).
(should (equal (ietf-drums-parse-date input) encoded))))
;; Test a few without timezones.
(dolist (case '(("Mon, 22 Feb 2016 19:35:42"
(42 35 19 22 2 2016 1 -1 nil))
("Friday, 21 Sep 2018 13:47:58"
(58 47 13 21 9 2018 5 -1 nil))))
(let* ((input (car case))
(parsed (cadr case)))
;; The input should parse the same without RFC822.
(should (equal (ietf-drums-parse-date-string input) parsed))
(should (equal (ietf-drums-parse-date-string input nil t) parsed))
;; We can't check the encoded date here because it will differ
;; depending on the TZ of the test environment.
))
;; Two-digit years are not allowed by the "modern" format.
(should (equal (ietf-drums-parse-date-string "22 Feb 16 19:35:42 +0100")
'(42 35 19 22 2 2016 nil -1 3600)))
(should (equal (ietf-drums-parse-date-string "22 Feb 16 19:35:42 +0100" nil t)
'(nil nil nil 22 2 nil nil -1 nil)))
(should (equal (should-error (ietf-drums-parse-date-string
"22 Feb 16 19:35:42 +0100" t t))
'(date-parse-error "Four-digit years are required" 16)))
(should (equal (ietf-drums-parse-date-string "22 Feb 96 19:35:42 +0100")
'(42 35 19 22 2 1996 nil -1 3600)))
(should (equal (ietf-drums-parse-date-string "22 Feb 96 19:35:42 +0100" nil t)
'(nil nil nil 22 2 nil nil -1 nil)))
(should (equal (should-error (ietf-drums-parse-date-string
"22 Feb 96 19:35:42 +0100" t t))
'(date-parse-error "Four-digit years are required" 96)))
;; Try some dates with comments.
(should (equal (ietf-drums-parse-date-string
"22 Feb (today) 16 19:35:42 +0100")
'(42 35 19 22 2 2016 nil -1 3600)))
(should (equal (ietf-drums-parse-date-string
"22 Feb (today) 16 19:35:42 +0100" nil t)
'(nil nil nil 22 2 nil nil -1 nil)))
(should (equal (should-error (ietf-drums-parse-date-string
"22 Feb (today) 16 19:35:42 +0100" t t))
'(date-parse-error "Expected a year" nil)))
(should (equal (ietf-drums-parse-date-string
"22 Feb 96 (long ago) 19:35:42 +0100")
'(42 35 19 22 2 1996 nil -1 3600)))
(should (equal (ietf-drums-parse-date-string
"Friday, 21 Sep(comment \\) with \\( parens)18 19:35:42")
'(42 35 19 21 9 2018 5 -1 nil)))
(should (equal (ietf-drums-parse-date-string
"Friday, 21 Sep 18 19:35:42 (unterminated comment")
'(42 35 19 21 9 2018 5 -1 nil)))
;; Test some RFC822 error cases
(dolist (test '(("33 1 2022" ("Slot out of range" day 33 1 31))
("0 1 2022" ("Slot out of range" day 0 1 31))
("1 1 2020 2021" ("Expected an alphabetic month" 1))
("1 Jan 2020 2021" ("Expected a time" 2021))
("1 Jan 2020 20:21 2000" ("Expected a timezone" 2000))
("1 Jan 2020 20:21 +0200 33" ("Extra token(s)" 33))))
(should (equal (should-error (ietf-drums-parse-date-string (car test) t))
(cons 'date-parse-error (cadr test)))))
(dolist (test '(("22 Feb 196" nil ;; bad year
("Four-digit years are required" 196))
("22 Feb 16 19:35:24" t ;; two-digit year
("Four-digit years are required" 16))
("22 Feb 96 19:35:42" t ;; two-digit year
("Four-digit years are required" 96))
("2 Feb 2021 1996" nil
("Expected a time" 1996))
("22 Fub 1996" nil
("Expected an alphabetic month" "fub"))
("1 Jan 2020 30" nil
("Expected a time" 30))
("1 Jan 2020 16:47 15:15" nil
("Expected a timezone" "15:15"))
("1 Jan 2020 16:47 +0800 -0800" t
("Extra token(s)" "-0800"))
;; Range tests
("32 Dec 2021" nil
("Slot out of range" day 32 1 31))
("0 Dec 2021" nil
("Slot out of range" day 0 1 31))
("3 13 2021" nil
("Expected an alphabetic month" 13))
("3 Dec 0000" t
("Four-digit years are required" 0))
("3 Dec 20021" nil
("Slot out of range" year 20021 1 9999))
("1 Jan 2020 24:21:14" nil
("Slot out of range" hour "24:21:14" 0 23))
("1 Jan 2020 14:60:21" nil
("Slot out of range" minute "14:60:21" 0 59))
("1 Jan 2020 14:21:61" nil
("Slot out of range" second "14:21:61" 0 60))))
(should (equal (should-error
(ietf-drums-parse-date-string (car test) t (cadr test)))
(cons 'date-parse-error (caddr test)))))
(should (equal (ietf-drums-parse-date-string
"1 Jan 2020 14:21:60") ;; a leap second!
'(60 21 14 1 1 2020 nil -1 nil))))
(provide 'ietf-drums-date-tests)
;;; ietf-drums-date-tests.el ends here