Add a new, somewhat experimental "readability" command to eww
* net/eww.el (eww-readable): New command and keystroke. * net/shr.el (shr-retransform-dom): New function.
This commit is contained in:
parent
816cad6e24
commit
2e8259b044
4 changed files with 86 additions and 0 deletions
6
etc/NEWS
6
etc/NEWS
|
@ -133,6 +133,12 @@ result of the calculation into the current buffer.
|
|||
*** New minor mode global-eldoc-mode
|
||||
*** eldoc-documentation-function now defaults to nil
|
||||
|
||||
** eww
|
||||
|
||||
*** A new command `R' (`eww-readable') will try do identify the main
|
||||
textual parts of a web page and display only that, leaving menus and
|
||||
the like off the page.
|
||||
|
||||
** Message mode
|
||||
|
||||
*** text/html messages that contain inline image parts will be
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
2014-11-02 Lars Magne Ingebrigtsen <larsi@gnus.org>
|
||||
|
||||
* net/eww.el (eww-readable): New command and keystroke.
|
||||
|
||||
* net/shr.el (shr-retransform-dom): New function.
|
||||
|
||||
* net/eww.el (eww-display-html): Set `eww-current-source' in the
|
||||
correct buffer.
|
||||
(eww-view-source): Use it.
|
||||
|
|
|
@ -402,6 +402,7 @@ word(s) will be searched for via `eww-search-prefix'."
|
|||
(setq-local eww-contents-url nil))
|
||||
|
||||
(defun eww-view-source ()
|
||||
"View the HTML source code of the current page."
|
||||
(interactive)
|
||||
(let ((buf (get-buffer-create "*eww-source*"))
|
||||
(source eww-current-source))
|
||||
|
@ -413,6 +414,60 @@ word(s) will be searched for via `eww-search-prefix'."
|
|||
(html-mode)))
|
||||
(view-buffer buf)))
|
||||
|
||||
(defun eww-readable ()
|
||||
"View the main \"readable\" parts of the current web page.
|
||||
This command uses heuristics to find the parts of the web page that
|
||||
contains the main textual portion, leaving out navigation menus and
|
||||
the like."
|
||||
(interactive)
|
||||
(let* ((source eww-current-source)
|
||||
(dom (shr-transform-dom
|
||||
(with-temp-buffer
|
||||
(insert source)
|
||||
(libxml-parse-html-region (point-min) (point-max))))))
|
||||
(eww-score-readability dom)
|
||||
(eww-display-html 'utf-8 nil (shr-retransform-dom
|
||||
(eww-highest-readability dom)))
|
||||
(setq eww-current-source source)))
|
||||
|
||||
(defun eww-score-readability (node)
|
||||
(let ((score -1))
|
||||
(cond
|
||||
((memq (car node) '(script head))
|
||||
(setq score -2))
|
||||
((eq (car node) 'meta)
|
||||
(setq score -1))
|
||||
((eq (car node) 'a)
|
||||
(setq score (- (length (split-string
|
||||
(or (cdr (assoc 'text (cdr node))) ""))))))
|
||||
(t
|
||||
(dolist (elem (cdr node))
|
||||
(cond
|
||||
((eq (car elem) 'text)
|
||||
(setq score (+ score (length (split-string (cdr elem))))))
|
||||
((consp (cdr elem))
|
||||
(setq score (+ score
|
||||
(or (cdr (assoc :eww-readability-score (cdr elem)))
|
||||
(eww-score-readability elem)))))))))
|
||||
;; Cache the score of the node to avoid recomputing all the time.
|
||||
(setcdr node (cons (cons :eww-readability-score score) (cdr node)))
|
||||
score))
|
||||
|
||||
(defun eww-highest-readability (node)
|
||||
(let ((result node)
|
||||
highest)
|
||||
(dolist (elem (cdr node))
|
||||
(when (and (consp (cdr elem))
|
||||
(> (or (cdr (assoc
|
||||
:eww-readability-score
|
||||
(setq highest
|
||||
(eww-highest-readability elem))))
|
||||
most-negative-fixnum)
|
||||
(or (cdr (assoc :eww-readability-score (cdr result)))
|
||||
most-negative-fixnum)))
|
||||
(setq result highest)))
|
||||
result))
|
||||
|
||||
(defvar eww-mode-map
|
||||
(let ((map (make-sparse-keymap)))
|
||||
(suppress-keymap map)
|
||||
|
@ -435,6 +490,7 @@ word(s) will be searched for via `eww-search-prefix'."
|
|||
(define-key map "w" 'eww-copy-page-url)
|
||||
(define-key map "C" 'url-cookie-list)
|
||||
(define-key map "v" 'eww-view-source)
|
||||
(define-key map "R" 'eww-readable)
|
||||
(define-key map "H" 'eww-list-histories)
|
||||
|
||||
(define-key map "b" 'eww-add-bookmark)
|
||||
|
|
|
@ -370,6 +370,26 @@ size, and full-buffer size."
|
|||
(push (shr-transform-dom sub) result)))
|
||||
(nreverse result)))
|
||||
|
||||
(defun shr-retransform-dom (dom)
|
||||
"Transform the shr DOM back into the libxml DOM."
|
||||
(let ((tag (car dom))
|
||||
(attributes nil)
|
||||
(text nil)
|
||||
(sub-nodes nil))
|
||||
(dolist (elem (cdr dom))
|
||||
(cond
|
||||
((eq (car elem) 'text)
|
||||
(setq text (cdr elem)))
|
||||
((not (consp (cdr elem)))
|
||||
(push (cons (intern (substring (symbol-name (car elem)) 1) obarray)
|
||||
(cdr elem))
|
||||
attributes))
|
||||
(t
|
||||
(push (shr-retransform-dom elem) sub-nodes))))
|
||||
(append (list tag (nreverse attributes))
|
||||
(nreverse sub-nodes)
|
||||
(and text (list text)))))
|
||||
|
||||
(defsubst shr-generic (cont)
|
||||
(dolist (sub cont)
|
||||
(cond
|
||||
|
|
Loading…
Add table
Reference in a new issue