* lisp/xml.el: Handle entity and character reference expansion correctly.
(xml-default-ns): New variable. (xml-entity-alist): Use XML spec definitions for lt and amp. (xml-parse-region): Make first two arguments optional. Discard text properties. (xml-parse-tag-1): New function, spun off from xml-parse-tag. All callers changed. (xml-parse-tag): Call xml-parse-tag-1. For backward compatibility, this function should not modify buffer contents. (xml-parse-tag-1): Fix opening-tag regexp. (xml-parse-string): Rewrite, handling entity and character references properly. (xml--entity-replacement-text): Signal an error if a parameter entity is undefined. * test/automated/xml-parse-tests.el (xml-parse-tests--data): More testcases.
This commit is contained in:
parent
2b5208f181
commit
a7aef6f5c6
4 changed files with 220 additions and 190 deletions
|
@ -1,3 +1,21 @@
|
|||
2012-07-02 Chong Yidong <cyd@gnu.org>
|
||||
|
||||
* xml.el: Fix entity and character reference expansion, allowing
|
||||
them to expand into markup as per XML spec.
|
||||
(xml-default-ns): New variable.
|
||||
(xml-entity-alist): Use XML spec definitions for lt and amp.
|
||||
(xml-parse-region): Make first two arguments optional. Discard
|
||||
text properties.
|
||||
(xml-parse-tag-1): New function, spun off from xml-parse-tag. All
|
||||
callers changed.
|
||||
(xml-parse-tag): Call xml-parse-tag-1. For backward
|
||||
compatibility, this function should not modify buffer contents.
|
||||
(xml-parse-tag-1): Fix opening-tag regexp.
|
||||
(xml-parse-string): Rewrite, handling entity and character
|
||||
references properly.
|
||||
(xml--entity-replacement-text): Signal an error if a parameter
|
||||
entity is undefined.
|
||||
|
||||
2012-07-02 Stefan Monnier <monnier@iro.umontreal.ca>
|
||||
|
||||
* comint.el (comint-output-filter): Filter out repeated prompts.
|
||||
|
|
372
lisp/xml.el
372
lisp/xml.el
|
@ -80,22 +80,23 @@
|
|||
;; a worthwhile tradeoff especially since we're usually parsing files
|
||||
;; instead of hand-crafted XML.
|
||||
|
||||
;;*******************************************************************
|
||||
;;**
|
||||
;;** Macros to parse the list
|
||||
;;**
|
||||
;;*******************************************************************
|
||||
;;; Macros to parse the list
|
||||
|
||||
(defconst xml-undefined-entity "?"
|
||||
"What to substitute for undefined entities")
|
||||
|
||||
(defconst xml-default-ns '(("" . "")
|
||||
("xml" . "http://www.w3.org/XML/1998/namespace")
|
||||
("xmlns" . "http://www.w3.org/2000/xmlns/"))
|
||||
"Alist mapping default XML namespaces to their URIs.")
|
||||
|
||||
(defvar xml-entity-alist
|
||||
'(("lt" . "<")
|
||||
'(("lt" . "<")
|
||||
("gt" . ">")
|
||||
("apos" . "'")
|
||||
("quot" . "\"")
|
||||
("amp" . "&"))
|
||||
"Alist of defined XML entities.")
|
||||
("amp" . "&"))
|
||||
"Alist mapping XML entities to their replacement text.")
|
||||
|
||||
(defvar xml-parameter-entity-alist nil
|
||||
"Alist of defined XML parametric entities.")
|
||||
|
@ -156,11 +157,7 @@ An empty string is returned if the attribute was not found.
|
|||
See also `xml-get-attribute-or-nil'."
|
||||
(or (xml-get-attribute-or-nil node attribute) ""))
|
||||
|
||||
;;*******************************************************************
|
||||
;;**
|
||||
;;** Creating the list
|
||||
;;**
|
||||
;;*******************************************************************
|
||||
;;; Creating the list
|
||||
|
||||
;;;###autoload
|
||||
(defun xml-parse-file (file &optional parse-dtd parse-ns)
|
||||
|
@ -299,8 +296,10 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
|
|||
;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
|
||||
|
||||
;;;###autoload
|
||||
(defun xml-parse-region (beg end &optional buffer parse-dtd parse-ns)
|
||||
(defun xml-parse-region (&optional beg end buffer parse-dtd parse-ns)
|
||||
"Parse the region from BEG to END in BUFFER.
|
||||
If BEG is nil, it defaults to `point-min'.
|
||||
If END is nil, it defaults to `point-max'.
|
||||
If BUFFER is nil, it defaults to the current buffer.
|
||||
Returns the XML list for the region, or raises an error if the region
|
||||
is not well-formed XML.
|
||||
|
@ -312,7 +311,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
|
|||
(unless buffer
|
||||
(setq buffer (current-buffer)))
|
||||
(with-temp-buffer
|
||||
(insert-buffer-substring buffer beg end)
|
||||
(insert-buffer-substring-no-properties buffer beg end)
|
||||
(xml--parse-buffer parse-dtd parse-ns)))
|
||||
|
||||
(defun xml--parse-buffer (parse-dtd parse-ns)
|
||||
|
@ -327,7 +326,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
|
|||
(if (search-forward "<" nil t)
|
||||
(progn
|
||||
(forward-char -1)
|
||||
(setq result (xml-parse-tag parse-dtd parse-ns))
|
||||
(setq result (xml-parse-tag-1 parse-dtd parse-ns))
|
||||
(cond
|
||||
((null result)
|
||||
;; Not looking at an xml start tag.
|
||||
|
@ -379,8 +378,7 @@ specify that the name shouldn't be given a namespace."
|
|||
(xml-parameter-entity-alist xml-parameter-entity-alist)
|
||||
children)
|
||||
(while (not (eobp))
|
||||
(let ((bit (xml-parse-tag
|
||||
parse-dtd parse-ns)))
|
||||
(let ((bit (xml-parse-tag-1 parse-dtd parse-ns)))
|
||||
(if children
|
||||
(setq children (append (list bit) children))
|
||||
(if (stringp bit)
|
||||
|
@ -392,30 +390,32 @@ specify that the name shouldn't be given a namespace."
|
|||
"Parse the tag at point.
|
||||
If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
|
||||
returned as the first element in the list.
|
||||
If PARSE-NS is non-nil, then QNAMES are expanded.
|
||||
Returns one of:
|
||||
If PARSE-NS is non-nil, expand QNAMES; if the value of PARSE-NS
|
||||
is a list, use it as an alist mapping namespaces to URIs.
|
||||
|
||||
Return one of:
|
||||
- a list : the matching node
|
||||
- nil : the point is not looking at a tag.
|
||||
- a pair : the first element is the DTD, the second is the node."
|
||||
(let ((buf (current-buffer))
|
||||
(pos (point)))
|
||||
(with-temp-buffer
|
||||
(insert-buffer-substring-no-properties buf pos)
|
||||
(goto-char (point-min))
|
||||
(xml-parse-tag-1 parse-dtd parse-ns))))
|
||||
|
||||
(defun xml-parse-tag-1 (&optional parse-dtd parse-ns)
|
||||
"Like `xml-parse-tag', but possibly modify the buffer while working."
|
||||
(let ((xml-validating-parser (or parse-dtd xml-validating-parser))
|
||||
(xml-ns (if (consp parse-ns)
|
||||
parse-ns
|
||||
(if parse-ns
|
||||
(list
|
||||
;; Default for empty prefix is no namespace
|
||||
(cons "" "")
|
||||
;; "xml" namespace
|
||||
(cons "xml" "http://www.w3.org/XML/1998/namespace")
|
||||
;; We need to seed the xmlns namespace
|
||||
(cons "xmlns" "http://www.w3.org/2000/xmlns/"))))))
|
||||
(xml-ns (cond ((consp parse-ns) parse-ns)
|
||||
(parse-ns xml-default-ns))))
|
||||
(cond
|
||||
;; Processing instructions (like the <?xml version="1.0"?> tag at the
|
||||
;; beginning of a document).
|
||||
;; Processing instructions, like <?xml version="1.0"?>.
|
||||
((looking-at "<\\?")
|
||||
(search-forward "?>")
|
||||
(skip-syntax-forward " ")
|
||||
(xml-parse-tag parse-dtd xml-ns))
|
||||
;; Character data (CDATA) sections, in which no tag should be interpreted
|
||||
(xml-parse-tag-1 parse-dtd xml-ns))
|
||||
;; Character data (CDATA) sections, in which no tag should be interpreted
|
||||
((looking-at "<!\\[CDATA\\[")
|
||||
(let ((pos (match-end 0)))
|
||||
(unless (search-forward "]]>" nil t)
|
||||
|
@ -423,33 +423,32 @@ Returns one of:
|
|||
(concat
|
||||
(buffer-substring-no-properties pos (match-beginning 0))
|
||||
(xml-parse-string))))
|
||||
;; DTD for the document
|
||||
;; DTD for the document
|
||||
((looking-at "<!DOCTYPE[ \t\n\r]")
|
||||
(let ((dtd (xml-parse-dtd parse-ns)))
|
||||
(skip-syntax-forward " ")
|
||||
(if xml-validating-parser
|
||||
(cons dtd (xml-parse-tag nil xml-ns))
|
||||
(xml-parse-tag nil xml-ns))))
|
||||
;; skip comments
|
||||
(cons dtd (xml-parse-tag-1 nil xml-ns))
|
||||
(xml-parse-tag-1 nil xml-ns))))
|
||||
;; skip comments
|
||||
((looking-at "<!--")
|
||||
(search-forward "-->")
|
||||
;; FIXME: This loses the skipped-over spaces.
|
||||
(skip-syntax-forward " ")
|
||||
(unless (eobp)
|
||||
(let ((xml-sub-parser t))
|
||||
(xml-parse-tag parse-dtd xml-ns))))
|
||||
;; end tag
|
||||
(xml-parse-tag-1 parse-dtd xml-ns))))
|
||||
;; end tag
|
||||
((looking-at "</")
|
||||
'())
|
||||
;; opening tag
|
||||
((looking-at "<\\([^/>[:space:]]+\\)")
|
||||
;; opening tag
|
||||
((looking-at (eval-when-compile (concat "<\\(" xml-name-re "\\)")))
|
||||
(goto-char (match-end 1))
|
||||
|
||||
;; Parse this node
|
||||
(let* ((node-name (match-string-no-properties 1))
|
||||
;; Parse the attribute list.
|
||||
(attrs (xml-parse-attlist xml-ns))
|
||||
children)
|
||||
|
||||
;; add the xmlns:* attrs to our cache
|
||||
(when (consp xml-ns)
|
||||
(dolist (attr attrs)
|
||||
|
@ -458,70 +457,114 @@ Returns one of:
|
|||
(caar attr)))
|
||||
(push (cons (cdar attr) (cdr attr))
|
||||
xml-ns))))
|
||||
|
||||
(setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
|
||||
(cond
|
||||
;; is this an empty element ?
|
||||
((looking-at "/>")
|
||||
(forward-char 2)
|
||||
(nreverse children))
|
||||
;; is this a valid start tag ?
|
||||
((eq (char-after) ?>)
|
||||
(forward-char 1)
|
||||
;; Now check that we have the right end-tag.
|
||||
(let ((end (concat "</" node-name "\\s-*>")))
|
||||
(while (not (looking-at end))
|
||||
(cond
|
||||
((eobp)
|
||||
(error "XML: (Not Well-Formed) End of buffer while reading element `%s'"
|
||||
node-name))
|
||||
((looking-at "</")
|
||||
(forward-char 2)
|
||||
(error "XML: (Not Well-Formed) Invalid end tag `%s' (expecting `%s')"
|
||||
(let ((pos (point)))
|
||||
(buffer-substring pos (if (re-search-forward "\\s-*>" nil t)
|
||||
(match-beginning 0)
|
||||
(point-max))))
|
||||
node-name))
|
||||
;; Read a sub-element and push it onto CHILDREN.
|
||||
((= (char-after) ?<)
|
||||
(let ((tag (xml-parse-tag-1 nil xml-ns)))
|
||||
(when tag
|
||||
(push tag children))))
|
||||
;; Read some character data.
|
||||
(t
|
||||
(let ((expansion (xml-parse-string)))
|
||||
(push (if (stringp (car children))
|
||||
;; If two strings were separated by a
|
||||
;; comment, concat them.
|
||||
(concat (pop children) expansion)
|
||||
expansion)
|
||||
children)))))
|
||||
;; Move point past the end-tag.
|
||||
(goto-char (match-end 0))
|
||||
(nreverse children)))
|
||||
;; Otherwise this was an invalid start tag (expected ">" not found.)
|
||||
(t
|
||||
(error "XML: (Well-Formed) Couldn't parse tag: %s"
|
||||
(buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
|
||||
|
||||
;; is this an empty element ?
|
||||
(if (looking-at "/>")
|
||||
(progn
|
||||
(forward-char 2)
|
||||
(nreverse children))
|
||||
|
||||
;; is this a valid start tag ?
|
||||
(if (eq (char-after) ?>)
|
||||
(progn
|
||||
(forward-char 1)
|
||||
;; Now check that we have the right end-tag. Note that this
|
||||
;; one might contain spaces after the tag name
|
||||
(let ((end (concat "</" node-name "\\s-*>")))
|
||||
(while (not (looking-at end))
|
||||
(cond
|
||||
((looking-at "</")
|
||||
(error "XML: (Not Well-Formed) Invalid end tag (expecting %s) at pos %d"
|
||||
node-name (point)))
|
||||
((= (char-after) ?<)
|
||||
(let ((tag (xml-parse-tag nil xml-ns)))
|
||||
(when tag
|
||||
(push tag children))))
|
||||
(t
|
||||
(let ((expansion (xml-parse-string)))
|
||||
(setq children
|
||||
(if (stringp expansion)
|
||||
(if (stringp (car children))
|
||||
;; The two strings were separated by a comment.
|
||||
(setq children (append (list (concat (car children) expansion))
|
||||
(cdr children)))
|
||||
(setq children (append (list expansion) children)))
|
||||
(setq children (append expansion children))))))))
|
||||
|
||||
(goto-char (match-end 0))
|
||||
(nreverse children)))
|
||||
;; This was an invalid start tag (Expected ">", but didn't see it.)
|
||||
(error "XML: (Well-Formed) Couldn't parse tag: %s"
|
||||
(buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
|
||||
(t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
|
||||
(unless xml-sub-parser ; Usually, we error out.
|
||||
;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
|
||||
(t
|
||||
(unless xml-sub-parser ; Usually, we error out.
|
||||
(error "XML: (Well-Formed) Invalid character"))
|
||||
|
||||
;; However, if we're parsing incrementally, then we need to deal
|
||||
;; with stray CDATA.
|
||||
(xml-parse-string)))))
|
||||
|
||||
(defun xml-parse-string ()
|
||||
"Parse the next whatever. Could be a string, or an element."
|
||||
(let* ((pos (point))
|
||||
(string (progn (skip-chars-forward "^<")
|
||||
(buffer-substring-no-properties pos (point)))))
|
||||
;; Clean up the string. As per XML specifications, the XML
|
||||
;; processor should always pass the whole string to the
|
||||
;; application. But \r's should be replaced:
|
||||
;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
|
||||
(setq pos 0)
|
||||
(while (string-match "\r\n?" string pos)
|
||||
(setq string (replace-match "\n" t t string))
|
||||
(setq pos (1+ (match-beginning 0))))
|
||||
|
||||
(xml-substitute-special string)))
|
||||
"Parse character data at point, and return it as a string.
|
||||
Leave point at the start of the next thing to parse. This
|
||||
function can modify the buffer by expanding entity and character
|
||||
references."
|
||||
(let ((start (point))
|
||||
ref val)
|
||||
(while (and (not (eobp))
|
||||
(not (looking-at "<")))
|
||||
;; Find the next < or & character.
|
||||
(skip-chars-forward "^<&")
|
||||
(when (eq (char-after) ?&)
|
||||
;; If we find an entity or character reference, expand it.
|
||||
(unless (looking-at (eval-when-compile
|
||||
(concat "&\\(?:#\\([0-9]+\\)\\|#x\\([0-9a-fA-F]+\\)\\|\\("
|
||||
xml-name-re "\\)\\);")))
|
||||
(error "XML: (Not Well-Formed) Invalid entity reference"))
|
||||
;; For a character reference, the next entity or character
|
||||
;; reference must be after the replacement. [4.6] "Numerical
|
||||
;; character references are expanded immediately when
|
||||
;; recognized and MUST be treated as character data."
|
||||
(cond ((setq ref (match-string 1))
|
||||
;; Decimal character reference
|
||||
(setq val (save-match-data
|
||||
(decode-char 'ucs (string-to-number ref))))
|
||||
(and (null val)
|
||||
xml-validating-parser
|
||||
(error "XML: (Validity) Invalid character `%s'" ref))
|
||||
(replace-match (or (string val) xml-undefined-entity) t t))
|
||||
;; Hexadecimal character reference
|
||||
((setq ref (match-string 2))
|
||||
(setq val (save-match-data
|
||||
(decode-char 'ucs (string-to-number ref 16))))
|
||||
(and (null val)
|
||||
xml-validating-parser
|
||||
(error "XML: (Validity) Invalid character `x%s'" ref))
|
||||
(replace-match (or (string val) xml-undefined-entity) t t))
|
||||
;; For an entity reference, search again from the start
|
||||
;; of the replaced text, since the replacement can
|
||||
;; contain entity or character references, or markup.
|
||||
((setq ref (match-string 3))
|
||||
(setq val (assoc ref xml-entity-alist))
|
||||
(and (null val)
|
||||
xml-validating-parser
|
||||
(error "XML: (Validity) Undefined entity `%s'" ref))
|
||||
(replace-match (cdr val) t t)
|
||||
(goto-char (match-beginning 0))))))
|
||||
;; [2.11] Clean up line breaks.
|
||||
(let ((end-marker (point-marker)))
|
||||
(goto-char start)
|
||||
(while (re-search-forward "\r\n?" end-marker t)
|
||||
(replace-match "\n" t t))
|
||||
(goto-char end-marker)
|
||||
(buffer-substring start (point)))))
|
||||
|
||||
(defun xml-parse-attlist (&optional xml-ns)
|
||||
"Return the attribute-list after point.
|
||||
|
@ -564,15 +607,11 @@ Leave point at the first non-blank character after the tag."
|
|||
(skip-syntax-forward " "))
|
||||
(nreverse attlist)))
|
||||
|
||||
;;*******************************************************************
|
||||
;;**
|
||||
;;** The DTD (document type declaration)
|
||||
;;** The following functions know how to skip or parse the DTD of
|
||||
;;** a document
|
||||
;;**
|
||||
;;*******************************************************************
|
||||
;;; DTD (document type declaration)
|
||||
|
||||
;; Fixme: This fails at least if the DTD contains conditional sections.
|
||||
;; The following functions know how to skip or parse the DTD of a
|
||||
;; document. FIXME: it fails at least if the DTD contains conditional
|
||||
;; sections.
|
||||
|
||||
(defun xml-skip-dtd ()
|
||||
"Skip the DTD at point.
|
||||
|
@ -789,9 +828,10 @@ references and parameter-entity references."
|
|||
;; Parameter entity reference
|
||||
((setq ref (match-string 3 string))
|
||||
(setq val (assoc ref xml-parameter-entity-alist))
|
||||
(if val
|
||||
(push (cdr val) children)
|
||||
(push (concat "%" ref ";") children))))
|
||||
(and (null val)
|
||||
xml-validating-parser
|
||||
(error "XML: (Validity) Undefined parameter entity `%s'" ref))
|
||||
(push (or (cdr val) xml-undefined-entity) children)))
|
||||
(setq string remainder)))
|
||||
(mapconcat 'identity (nreverse (cons string children)) "")))
|
||||
|
||||
|
@ -828,79 +868,40 @@ references and parameter-entity references."
|
|||
(t
|
||||
elem))))
|
||||
|
||||
;;*******************************************************************
|
||||
;;**
|
||||
;;** Substituting special XML sequences
|
||||
;;**
|
||||
;;*******************************************************************
|
||||
;;; Substituting special XML sequences
|
||||
|
||||
(defun xml-substitute-special (string)
|
||||
"Return STRING, after substituting entity references."
|
||||
;; This originally made repeated passes through the string from the
|
||||
;; beginning, which isn't correct, since then either "&amp;" or
|
||||
;; "&amp;" won't DTRT.
|
||||
|
||||
(let ((point 0)
|
||||
children end-point)
|
||||
(while (string-match "&\\([^;]*\\);" string point)
|
||||
(setq end-point (match-end 0))
|
||||
(let* ((this-part (match-string-no-properties 1 string))
|
||||
(prev-part (substring string point (match-beginning 0)))
|
||||
(entity (assoc this-part xml-entity-alist))
|
||||
(expansion
|
||||
(cond ((string-match "#\\([0-9]+\\)" this-part)
|
||||
(let ((c (decode-char
|
||||
'ucs
|
||||
(string-to-number (match-string-no-properties 1 this-part)))))
|
||||
(if c (string c))))
|
||||
((string-match "#x\\([[:xdigit:]]+\\)" this-part)
|
||||
(let ((c (decode-char
|
||||
'ucs
|
||||
(string-to-number (match-string-no-properties 1 this-part) 16))))
|
||||
(if c (string c))))
|
||||
(entity
|
||||
(cdr entity))
|
||||
((eq (length this-part) 0)
|
||||
(error "XML: (Not Well-Formed) No entity given"))
|
||||
(t
|
||||
(if xml-validating-parser
|
||||
(error "XML: (Validity) Undefined entity `%s'"
|
||||
this-part)
|
||||
xml-undefined-entity)))))
|
||||
|
||||
(cond ((null children)
|
||||
;; FIXME: If we have an entity that expands into XML, this won't work.
|
||||
(setq children
|
||||
(concat prev-part expansion)))
|
||||
((stringp children)
|
||||
(if (stringp expansion)
|
||||
(setq children (concat children prev-part expansion))
|
||||
(setq children (list expansion (concat prev-part children)))))
|
||||
((and (stringp expansion)
|
||||
(stringp (car children)))
|
||||
(setcar children (concat prev-part expansion (car children))))
|
||||
((stringp expansion)
|
||||
(setq children (append (concat prev-part expansion)
|
||||
children)))
|
||||
((stringp (car children))
|
||||
(setcar children (concat (car children) prev-part))
|
||||
(setq children (append expansion children)))
|
||||
(t
|
||||
(setq children (list expansion
|
||||
prev-part
|
||||
children))))
|
||||
(setq point end-point)))
|
||||
(cond ((stringp children)
|
||||
(concat children (substring string point)))
|
||||
((stringp (car (last children)))
|
||||
(concat (car (last children)) (substring string point)))
|
||||
((null children)
|
||||
string)
|
||||
(t
|
||||
(concat (mapconcat 'identity
|
||||
(nreverse children)
|
||||
"")
|
||||
(substring string point))))))
|
||||
"Return STRING, after substituting entity and character references.
|
||||
STRING is assumed to occur in an XML attribute value."
|
||||
(let ((ref-re (eval-when-compile
|
||||
(concat "&\\(?:#\\(x\\)?\\([0-9]+\\)\\|\\("
|
||||
xml-name-re "\\)\\);")))
|
||||
children)
|
||||
(while (string-match ref-re string)
|
||||
(push (substring string 0 (match-beginning 0)) children)
|
||||
(let* ((remainder (substring string (match-end 0)))
|
||||
(ref (match-string 2 string)))
|
||||
(if ref
|
||||
;; [4.6] Character references are included as
|
||||
;; character data.
|
||||
(let ((val (decode-char 'ucs (string-to-number
|
||||
ref (if (match-string 1 string) 16)))))
|
||||
(push (cond (val (string val))
|
||||
(xml-validating-parser
|
||||
(error "XML: (Validity) Undefined character `x%s'" ref))
|
||||
(t xml-undefined-entity))
|
||||
children)
|
||||
(setq string remainder))
|
||||
;; [4.4.5] Entity references are "included in literal".
|
||||
;; Note that we don't need do anything special to treat
|
||||
;; quotes as normal data characters.
|
||||
(setq ref (match-string 3 string))
|
||||
(let ((val (or (cdr (assoc ref xml-entity-alist))
|
||||
(if xml-validating-parser
|
||||
(error "XML: (Validity) Undefined entity `%s'" ref)
|
||||
xml-undefined-entity))))
|
||||
(setq string (concat val remainder))))))
|
||||
(mapconcat 'identity (nreverse (cons string children)) "")))
|
||||
|
||||
(defun xml-substitute-numeric-entities (string)
|
||||
"Substitute SGML numeric entities by their respective utf characters.
|
||||
|
@ -921,12 +922,7 @@ by \"*\"."
|
|||
string)
|
||||
nil))
|
||||
|
||||
;;*******************************************************************
|
||||
;;**
|
||||
;;** Printing a tree.
|
||||
;;** This function is intended mainly for debugging purposes.
|
||||
;;**
|
||||
;;*******************************************************************
|
||||
;;; Printing a parse tree (mainly for debugging).
|
||||
|
||||
(defun xml-debug-print (xml &optional indent-string)
|
||||
"Outputs the XML in the current buffer.
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2012-07-02 Chong Yidong <cyd@gnu.org>
|
||||
|
||||
* automated/xml-parse-tests.el (xml-parse-tests--data): More
|
||||
testcases.
|
||||
|
||||
2012-07-01 Chong Yidong <cyd@gnu.org>
|
||||
|
||||
* automated/xml-parse-tests.el: New file.
|
||||
|
|
|
@ -33,15 +33,26 @@
|
|||
'(;; General entity substitution
|
||||
("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
|
||||
((foo ((a . "b")) (bar nil "AbC;"))))
|
||||
("<?xml version=\"1.0\"?><foo>&amp;&apos;'<>"</foo>" .
|
||||
((foo () "&''<>\"")))
|
||||
;; Parameter entity substitution
|
||||
("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
|
||||
((foo ((a . "b")) (bar nil "AbC;"))))
|
||||
;; Tricky parameter entity substitution (like XML spec Appendix D)
|
||||
("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '%zz;'><!ENTITY % zz '<!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" .
|
||||
((foo nil "AbC")))
|
||||
((foo () "AbC")))
|
||||
;; Bug#7172
|
||||
("<?xml version=\"1.0\"?><!DOCTYPE foo [ <!ELEMENT EXAM_PLE EMPTY> ]><foo></foo>" .
|
||||
((foo nil))))
|
||||
((foo ())))
|
||||
;; Entities referencing entities, in character data
|
||||
("<!DOCTYPE foo [ <!ENTITY b \"B\"><!ENTITY abc \"a&b;c\">]><foo>&abc;</foo>" .
|
||||
((foo () "aBc")))
|
||||
;; Entities referencing entities, in attribute values
|
||||
("<!DOCTYPE foo [ <!ENTITY b \"B\"><!ENTITY abc \"a&b;c\">]><foo a=\"-&abc;-\">1</foo>" .
|
||||
((foo ((a . "-aBc-")) "1")))
|
||||
;; Character references must be treated as character data
|
||||
("<foo>AT&T;</foo>" . ((foo () "AT&T;")))
|
||||
("<foo>&amp;</foo>" . ((foo () "&"))))
|
||||
"Alist of XML strings and their expected parse trees.")
|
||||
|
||||
(ert-deftest xml-parse-tests ()
|
||||
|
|
Loading…
Add table
Reference in a new issue