'libxml-parse(html|xml)-region': new optional param 'discard-comments'.
* doc/lispref/text.texi (Parsing HTML/XML): Document new optional parameter 'discard-comments' of 'libxml-parse(html|xml)-region'. * src/xml.c (parse_region): Take care of new optional parameter 'discard-comments' of 'libxml-parse(html|xml)-region'. (Flibxml_parse_html_region, Flibxml_parse_xml_region): New optional parameter 'discard-comments'. * test/automated/libxml-tests.el (libxml-tests--data-comments-preserved): Renamed from 'libxml-tests--data'. (libxml-tests--data-comments-discarded): New. (libxml-tests): Check whether 'libxml-parse-xml-region' is discarding comments correctly.
This commit is contained in:
parent
e14c4354cf
commit
c39443c1d6
6 changed files with 73 additions and 26 deletions
|
@ -1,3 +1,8 @@
|
|||
2014-11-21 Ulf Jasper <ulf.jasper@web.de>
|
||||
|
||||
* text.texi (Parsing HTML/XML): Document new optional parameter
|
||||
'discard-comments' of 'libxml-parse(html|xml)-region'.
|
||||
|
||||
2014-11-18 Leo Liu <sdl.web@gmail.com>
|
||||
|
||||
* functions.texi (Advising Named Functions): Document
|
||||
|
|
|
@ -4324,7 +4324,7 @@ coding instead.
|
|||
When Emacs is compiled with libxml2 support, the following functions
|
||||
are available to parse HTML or XML text into Lisp object trees.
|
||||
|
||||
@defun libxml-parse-html-region start end &optional base-url
|
||||
@defun libxml-parse-html-region start end &optional base-url discard-comments
|
||||
This function parses the text between @var{start} and @var{end} as
|
||||
HTML, and returns a list representing the HTML @dfn{parse tree}. It
|
||||
attempts to handle ``real world'' HTML by robustly coping with syntax
|
||||
|
@ -4333,6 +4333,9 @@ mistakes.
|
|||
The optional argument @var{base-url}, if non-@code{nil}, should be a
|
||||
string specifying the base URL for relative URLs occurring in links.
|
||||
|
||||
If the optional argument @var{discard-comments} is non-@code{nil},
|
||||
then the parse tree is created without any comments.
|
||||
|
||||
In the parse tree, each HTML node is represented by a list in which
|
||||
the first element is a symbol representing the node name, the second
|
||||
element is an alist of node attributes, and the remaining elements are
|
||||
|
@ -4368,7 +4371,7 @@ buffer. The argument @var{dom} should be a list as generated by
|
|||
@end defun
|
||||
|
||||
@cindex parsing xml
|
||||
@defun libxml-parse-xml-region start end &optional base-url
|
||||
@defun libxml-parse-xml-region start end &optional base-url discard-comments
|
||||
This function is the same as @code{libxml-parse-html-region}, except
|
||||
that it parses the text as XML rather than HTML (so it is stricter
|
||||
about syntax).
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2014-11-21 Ulf Jasper <ulf.jasper@web.de>
|
||||
|
||||
* xml.c (parse_region): Take care of new optional parameter
|
||||
'discard-comments' of 'libxml-parse(html|xml)-region'.
|
||||
(Flibxml_parse_html_region, Flibxml_parse_xml_region): New
|
||||
optional parameter 'discard-comments'.
|
||||
|
||||
2014-11-17 Paul Eggert <eggert@cs.ucla.edu>
|
||||
|
||||
Improve time stamp handling, and be more consistent about it.
|
||||
|
|
45
src/xml.c
45
src/xml.c
|
@ -175,7 +175,7 @@ make_dom (xmlNode *node)
|
|||
}
|
||||
|
||||
static Lisp_Object
|
||||
parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, int htmlp)
|
||||
parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments, int htmlp)
|
||||
{
|
||||
xmlDoc *doc;
|
||||
Lisp_Object result = Qnil;
|
||||
|
@ -214,21 +214,24 @@ parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, int html
|
|||
|
||||
if (doc != NULL)
|
||||
{
|
||||
/* If the document is just comments, then this should get us the
|
||||
nodes anyway. */
|
||||
xmlNode *n = doc->children;
|
||||
Lisp_Object r = Qnil;
|
||||
if (NILP(discard_comments))
|
||||
{
|
||||
/* If the document has toplevel comments, then this should
|
||||
get us the nodes and the comments. */
|
||||
xmlNode *n = doc->children;
|
||||
|
||||
while (n) {
|
||||
if (!NILP (r))
|
||||
result = Fcons (r, result);
|
||||
r = make_dom (n);
|
||||
n = n->next;
|
||||
}
|
||||
while (n) {
|
||||
if (!NILP (r))
|
||||
result = Fcons (r, result);
|
||||
r = make_dom (n);
|
||||
n = n->next;
|
||||
}
|
||||
}
|
||||
|
||||
if (NILP (result)) {
|
||||
/* The document isn't just comments, so get the tree the
|
||||
proper way. */
|
||||
/* The document doesn't have toplevel comments or we discarded
|
||||
them. Get the tree the proper way. */
|
||||
xmlNode *node = fn_xmlDocGetRootElement (doc);
|
||||
if (node != NULL)
|
||||
result = make_dom (node);
|
||||
|
@ -251,25 +254,27 @@ xml_cleanup_parser (void)
|
|||
|
||||
DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region,
|
||||
Slibxml_parse_html_region,
|
||||
2, 3, 0,
|
||||
2, 4, 0,
|
||||
doc: /* Parse the region as an HTML document and return the parse tree.
|
||||
If BASE-URL is non-nil, it is used to expand relative URLs. */)
|
||||
(Lisp_Object start, Lisp_Object end, Lisp_Object base_url)
|
||||
If BASE-URL is non-nil, it is used to expand relative URLs.
|
||||
If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */)
|
||||
(Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments)
|
||||
{
|
||||
if (init_libxml2_functions ())
|
||||
return parse_region (start, end, base_url, 1);
|
||||
return parse_region (start, end, base_url, discard_comments, 1);
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region,
|
||||
Slibxml_parse_xml_region,
|
||||
2, 3, 0,
|
||||
2, 4, 0,
|
||||
doc: /* Parse the region as an XML document and return the parse tree.
|
||||
If BASE-URL is non-nil, it is used to expand relative URLs. */)
|
||||
(Lisp_Object start, Lisp_Object end, Lisp_Object base_url)
|
||||
If BASE-URL is non-nil, it is used to expand relative URLs.
|
||||
If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */)
|
||||
(Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object discard_comments)
|
||||
{
|
||||
if (init_libxml2_functions ())
|
||||
return parse_region (start, end, base_url, 0);
|
||||
return parse_region (start, end, base_url, discard_comments, 0);
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
2014-11-21 Ulf Jasper <ulf.jasper@web.de>
|
||||
|
||||
* automated/libxml-tests.el
|
||||
(libxml-tests--data-comments-preserved): Renamed from
|
||||
'libxml-tests--data'.
|
||||
(libxml-tests--data-comments-discarded): New.
|
||||
(libxml-tests): Check whether 'libxml-parse-xml-region' is
|
||||
discarding comments correctly.
|
||||
|
||||
2014-11-17 Michal Nazarewicz <mina86@mina86.com>
|
||||
|
||||
* automated/tildify-tests.el (tildify-test-html, tildify-test-xml):
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
(require 'ert)
|
||||
|
||||
(defvar libxml-tests--data
|
||||
(defvar libxml-tests--data-comments-preserved
|
||||
`(;; simple case
|
||||
("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>"
|
||||
. (foo ((baz . "true")) "bar"))
|
||||
|
@ -40,17 +40,35 @@
|
|||
"<bar>blub</bar></foo><!--comment-b--><!--comment-c-->")
|
||||
. (top nil (comment nil "comment-a") (foo ((a . "b")) (bar nil "blub"))
|
||||
(comment nil "comment-b") (comment nil "comment-c"))))
|
||||
"Alist of XML strings and their expected parse trees.")
|
||||
"Alist of XML strings and their expected parse trees for preserved comments.")
|
||||
|
||||
(defvar libxml-tests--data-comments-discarded
|
||||
`(;; simple case
|
||||
("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>"
|
||||
. (foo ((baz . "true")) "bar"))
|
||||
;; toplevel comments -- first document child must not get lost
|
||||
(,(concat "<?xml version=\"1.0\"?><foo>bar</foo><!--comment-1-->"
|
||||
"<!--comment-2-->")
|
||||
. (foo nil "bar"))
|
||||
(,(concat "<?xml version=\"1.0\"?><!--comment-a--><foo a=\"b\">"
|
||||
"<bar>blub</bar></foo><!--comment-b--><!--comment-c-->")
|
||||
. (foo ((a . "b")) (bar nil "blub"))))
|
||||
"Alist of XML strings and their expected parse trees for discarded comments.")
|
||||
|
||||
|
||||
(ert-deftest libxml-tests ()
|
||||
"Test libxml."
|
||||
(when (fboundp 'libxml-parse-xml-region)
|
||||
(with-temp-buffer
|
||||
(dolist (test libxml-tests--data)
|
||||
(dolist (test libxml-tests--data-comments-preserved)
|
||||
(erase-buffer)
|
||||
(insert (car test))
|
||||
(should (equal (cdr test)
|
||||
(libxml-parse-xml-region (point-min) (point-max))))))))
|
||||
(libxml-parse-xml-region (point-min) (point-max)))))
|
||||
(dolist (test libxml-tests--data-comments-discarded)
|
||||
(erase-buffer)
|
||||
(insert (car test))
|
||||
(should (equal (cdr test)
|
||||
(libxml-parse-xml-region (point-min) (point-max) nil t)))))))
|
||||
|
||||
;;; libxml-tests.el ends here
|
||||
|
|
Loading…
Add table
Reference in a new issue