dom-print: Fix missing entities quoting

Also use `?\s` for the space character.

* lisp/dom.el (dom-print): Properly quote special characters to avoid
generating invalid HTML/XML.
(dom-tag, dom-attributes, dom-children, dom-node)
(dom-add-child-before): Simplify.
(dom-set-attribute): Add at beginning rather than at end (slightly
more efficient and less destructive).
This commit is contained in:
Stefan Monnier 2022-10-30 09:37:23 -04:00
parent 2fa8f1b77a
commit 311c95fd67

View file

@ -30,23 +30,17 @@
(defsubst dom-tag (node)
"Return the NODE tag."
;; Called on a list of nodes. Use the first.
(if (consp (car node))
(caar node)
(car node)))
(car (if (consp (car node)) (car node) node)))
(defsubst dom-attributes (node)
"Return the NODE attributes."
;; Called on a list of nodes. Use the first.
(if (consp (car node))
(cadr (car node))
(cadr node)))
(cadr (if (consp (car node)) (car node) node)))
(defsubst dom-children (node)
"Return the NODE children."
;; Called on a list of nodes. Use the first.
(if (consp (car node))
(cddr (car node))
(cddr node)))
(cddr (if (consp (car node)) (car node) node)))
(defun dom-non-text-children (node)
"Return all non-text-node children of NODE."
@ -62,10 +56,11 @@
(defun dom-set-attribute (node attribute value)
"Set ATTRIBUTE in NODE to VALUE."
(setq node (dom-ensure-node node))
(let ((old (assoc attribute (cadr node))))
(let* ((attributes (cadr node))
(old (assoc attribute attributes)))
(if old
(setcdr old value)
(setcar (cdr node) (nconc (cadr node) (list (cons attribute value)))))))
(setcar (cdr node) (cons (cons attribute value) attributes)))))
(defun dom-remove-attribute (node attribute)
"Remove ATTRIBUTE from NODE."
@ -80,7 +75,7 @@ A typical attribute is `href'."
(defun dom-text (node)
"Return all the text bits in the current node concatenated."
(mapconcat 'identity (cl-remove-if-not 'stringp (dom-children node)) " "))
(mapconcat #'identity (cl-remove-if-not #'stringp (dom-children node)) " "))
(defun dom-texts (node &optional separator)
"Return all textual data under NODE concatenated with SEPARATOR in-between."
@ -195,9 +190,7 @@ ATTRIBUTE would typically be `class', `id' or the like."
(defun dom-node (tag &optional attributes &rest children)
"Return a DOM node with TAG and ATTRIBUTES."
(if children
`(,tag ,attributes ,@children)
(list tag attributes)))
`(,tag ,attributes ,@children))
(defun dom-append-child (node child)
"Append CHILD to the end of NODE's children."
@ -215,11 +208,7 @@ If BEFORE is nil, make CHILD NODE's first child."
(let ((pos (if before
(cl-position before children)
0)))
(if (zerop pos)
;; First child.
(setcdr (cdr node) (cons child (cddr node)))
(setcdr (nthcdr (1- pos) children)
(cons child (nthcdr pos children))))))
(push child (nthcdr (+ 2 pos) node))))
node)
(defun dom-ensure-node (node)
@ -247,7 +236,7 @@ white-space."
(insert (format "(%S . %S)" (car elem) (cdr elem)))
(if (zerop (cl-decf times))
(insert ")")
(insert "\n" (make-string column ? ))))))
(insert "\n" (make-string column ?\s))))))
(let* ((children (if remove-empty
(cl-remove-if
(lambda (child)
@ -258,16 +247,16 @@ white-space."
(times (length children)))
(if (null children)
(insert ")")
(insert "\n" (make-string (1+ column) ? ))
(insert "\n" (make-string (1+ column) ?\s))
(dolist (child children)
(if (stringp child)
(if (or (not remove-empty)
(not (string-match "\\`[\n\r\t  ]*\\'" child)))
(if (not (and remove-empty
(string-match "\\`[\n\r\t  ]*\\'" child)))
(insert (format "%S" child)))
(dom-pp child remove-empty))
(if (zerop (cl-decf times))
(insert ")")
(insert "\n" (make-string (1+ column) ? ))))))))
(insert "\n" (make-string (1+ column) ?\s))))))))
(defun dom-print (dom &optional pretty xml)
"Print DOM at point as HTML/XML.
@ -279,18 +268,19 @@ If XML, generate XML instead of HTML."
(dolist (elem attr)
;; In HTML, these are boolean attributes that should not have
;; an = value.
(if (and (memq (car elem)
'(async autofocus autoplay checked
contenteditable controls default
defer disabled formNoValidate frameborder
hidden ismap itemscope loop
multiple muted nomodule novalidate open
readonly required reversed
scoped selected typemustmatch))
(cdr elem)
(not xml))
(insert (format " %s" (car elem)))
(insert (format " %s=%S" (car elem) (cdr elem))))))
(insert (if (and (memq (car elem)
'(async autofocus autoplay checked
contenteditable controls default
defer disabled formNoValidate frameborder
hidden ismap itemscope loop
multiple muted nomodule novalidate open
readonly required reversed
scoped selected typemustmatch))
(cdr elem)
(not xml))
(format " %s" (car elem))
(format " %s=\"%s\"" (car elem)
(url-insert-entities-in-string (cdr elem)))))))
(let* ((children (dom-children dom))
(non-text nil))
(if (null children)
@ -301,7 +291,7 @@ If XML, generate XML instead of HTML."
(insert child)
(setq non-text t)
(when pretty
(insert "\n" (make-string (+ column 2) ? )))
(insert "\n" (make-string (+ column 2) ?\s)))
(dom-print child pretty xml)))
;; If we inserted non-text child nodes, or a text node that
;; ends with a newline, then we indent the end tag.
@ -310,7 +300,7 @@ If XML, generate XML instead of HTML."
non-text))
(unless (bolp)
(insert "\n"))
(insert (make-string column ? )))
(insert (make-string column ?\s)))
(insert (format "</%s>" (dom-tag dom)))))))
(provide 'dom)