Support Posix-standard pax extended header in tar files
* lisp/tar-mode.el (pax-extended-attribute-record-regexp) (tar-attr-vector): New variables. (pax-gid-index, pax-gname-index, pax-linkpath-index) (pax-mtime-index, pax-path-index, pax-size-index, pax-uid-index) (pax-uname-index): New constants. (pax-header-gid, pax-header-gname, pax-header-linkpath) (pax-header-mtime, pax-header-path, pax-header-size) (pax-header-uid, pax-header-uname): New accessors to pax header. (pax-decode-string, tar-parse-pax-extended-header): New functions. (tar-header-block-tokenize): Recognize and handle Posix-standard pax extended header, and use its attributes instead of those in the standard tar header. (Bug#64686)
This commit is contained in:
parent
b936ff0963
commit
2dc5f17c3e
1 changed files with 198 additions and 60 deletions
258
lisp/tar-mode.el
258
lisp/tar-mode.el
|
@ -215,6 +215,99 @@ Preserve the modified states of the buffers and set `tar-data-swapped'."
|
|||
"Round S up to the next multiple of 512."
|
||||
(ash (ash (+ s 511) -9) 9))
|
||||
|
||||
;; Reference:
|
||||
;; https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_02
|
||||
(defconst pax-extended-attribute-record-regexp
|
||||
;; We omit attributes that are "reserved" by Posix, since no
|
||||
;; processing has been defined for them.
|
||||
"\\([0-9]+\\) \\(gid\\|gname\\|hdrcharset\\|linkpath\\|mtime\\|path\\|size\\|uid\\|uname\\)="
|
||||
"Regular expression for looking up extended attributes in a
|
||||
Posix-standard pax extended header of a tar file.
|
||||
Only attributes that `tar-mode' can grok are mentioned.")
|
||||
|
||||
(defconst pax-gid-index 0)
|
||||
(defconst pax-gname-index 1)
|
||||
(defconst pax-linkpath-index 2)
|
||||
(defconst pax-mtime-index 3)
|
||||
(defconst pax-path-index 4)
|
||||
(defconst pax-size-index 5)
|
||||
(defconst pax-uid-index 6)
|
||||
(defconst pax-uname-index 7)
|
||||
(defsubst pax-header-gid (attr-vec)
|
||||
(aref attr-vec pax-gid-index))
|
||||
(defsubst pax-header-gname (attr-vec)
|
||||
(aref attr-vec pax-gname-index))
|
||||
(defsubst pax-header-linkpath (attr-vec)
|
||||
(aref attr-vec pax-linkpath-index))
|
||||
(defsubst pax-header-mtime (attr-vec)
|
||||
(aref attr-vec pax-mtime-index))
|
||||
(defsubst pax-header-path (attr-vec)
|
||||
(aref attr-vec pax-path-index))
|
||||
(defsubst pax-header-size (attr-vec)
|
||||
(aref attr-vec pax-size-index))
|
||||
(defsubst pax-header-uid (attr-vec)
|
||||
(aref attr-vec pax-uid-index))
|
||||
(defsubst pax-header-uname (attr-vec)
|
||||
(aref attr-vec pax-uid-index))
|
||||
|
||||
(defsubst pax-decode-string (str coding)
|
||||
(if str
|
||||
(decode-coding-string str coding)
|
||||
str))
|
||||
|
||||
(defvar tar-attr-vector '[nil nil nil nil nil nil nil nil])
|
||||
(defun tar-parse-pax-extended-header (pos)
|
||||
"Parse a pax external header of a Posix-format tar file."
|
||||
(let ((end (+ pos 512))
|
||||
(result tar-attr-vector)
|
||||
(coding 'utf-8-unix)
|
||||
attr value record-len value-len)
|
||||
(dotimes (i 8)
|
||||
(aset result i nil))
|
||||
(goto-char pos)
|
||||
(while (and (< pos end)
|
||||
(re-search-forward pax-extended-attribute-record-regexp
|
||||
end 'move))
|
||||
(setq record-len (string-to-number (match-string 1))
|
||||
attr (match-string 2)
|
||||
value-len (- record-len
|
||||
(length (match-string 1))
|
||||
1
|
||||
(length (match-string 2))
|
||||
2)
|
||||
value (buffer-substring (point) (+ (point) value-len)))
|
||||
(setq pos (goto-char (+ (point) value-len 1)))
|
||||
(cond
|
||||
((equal attr "gid")
|
||||
(aset result pax-gid-index value))
|
||||
((equal attr "gname")
|
||||
(aset result pax-gname-index value))
|
||||
((equal attr "linkpath")
|
||||
(aset result pax-linkpath-index value))
|
||||
((equal attr "mtime")
|
||||
(aset result pax-mtime-index (string-to-number value)))
|
||||
((equal attr "path")
|
||||
(aset result pax-path-index value))
|
||||
((equal attr "size")
|
||||
(aset result pax-size-index value))
|
||||
((equal attr "uid")
|
||||
(aset result pax-uid-index value))
|
||||
((equal attr "uname")
|
||||
(aset result pax-uname-index value))
|
||||
((equal attr "hdrcharset")
|
||||
(setq coding (if (equal value "BINARY") 'no-conversion 'utf-8-unix))))
|
||||
(setq pos (+ pos (skip-chars-forward "\000"))))
|
||||
;; Decode string-valued attributes.
|
||||
(aset result pax-gname-index
|
||||
(pax-decode-string (aref result pax-gname-index) coding))
|
||||
(aset result pax-linkpath-index
|
||||
(pax-decode-string (aref result pax-linkpath-index) coding))
|
||||
(aset result pax-path-index
|
||||
(pax-decode-string (aref result pax-path-index) coding))
|
||||
(aset result pax-uname-index
|
||||
(pax-decode-string (aref result pax-uname-index) coding))
|
||||
result))
|
||||
|
||||
(defun tar-header-block-tokenize (pos coding &optional disable-slash)
|
||||
"Return a `tar-header' structure.
|
||||
This is a list of name, mode, uid, gid, size,
|
||||
|
@ -271,67 +364,112 @@ of the file header. This is used for \"old GNU\" Tar format."
|
|||
(if (and (null link-p) (null disable-slash) (string-match "/\\'" name))
|
||||
(setq link-p 5)) ; directory
|
||||
|
||||
(if (and (equal name "././@LongLink")
|
||||
;; Supposedly @LongLink is only used for GNUTAR
|
||||
;; format (i.e. "ustar ") but some POSIX Tar files
|
||||
;; (with "ustar\0") have been seen using it as well.
|
||||
(member magic-str '("ustar " "ustar\0")))
|
||||
;; This is a GNU Tar long-file-name header.
|
||||
(let* ((size (tar-parse-octal-integer
|
||||
string tar-size-offset tar-time-offset))
|
||||
;; The long name is in the next 512-byte block.
|
||||
;; We've already moved POS there, when we computed
|
||||
;; STRING above.
|
||||
(name (decode-coding-string
|
||||
;; -1 so as to strip the terminating 0 byte.
|
||||
(buffer-substring pos (+ pos size -1)) coding))
|
||||
;; Tokenize the header of the _real_ file entry,
|
||||
;; which is further 512 bytes into the archive.
|
||||
(descriptor (tar-header-block-tokenize
|
||||
(+ pos (tar-roundup-512 size)) coding
|
||||
;; Don't intuit directories from
|
||||
;; the trailing slash, because the
|
||||
;; truncated name might by chance end
|
||||
;; in a slash.
|
||||
'ignore-trailing-slash)))
|
||||
;; Fix the descriptor of the real file entry by using
|
||||
;; the information from the long name entry.
|
||||
(cond
|
||||
((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME.
|
||||
(setf (tar-header-name descriptor) name))
|
||||
((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK.
|
||||
(setf (tar-header-link-name descriptor) name))
|
||||
(t
|
||||
(message "Unrecognized GNU Tar @LongLink format")))
|
||||
;; Fix the "link-type" attribute, based on the long name.
|
||||
(if (and (null (tar-header-link-type descriptor))
|
||||
(string-match "/\\'" name))
|
||||
(setf (tar-header-link-type descriptor) 5)) ; directory
|
||||
(setf (tar-header-header-start descriptor)
|
||||
(copy-marker (- pos 512) t))
|
||||
descriptor)
|
||||
(if (member magic-str '("ustar " "ustar\0"))
|
||||
(if (equal name "././@LongLink")
|
||||
;; Supposedly @LongLink is only used for GNUTAR
|
||||
;; format (i.e. "ustar ") but some POSIX Tar files
|
||||
;; (with "ustar\0") have been seen using it as well.
|
||||
;; This is a GNU Tar long-file-name header.
|
||||
(let* ((size (tar-parse-octal-integer
|
||||
string tar-size-offset tar-time-offset))
|
||||
;; The long name is in the next 512-byte block.
|
||||
;; We've already moved POS there, when we
|
||||
;; computed STRING above.
|
||||
(name (decode-coding-string
|
||||
;; -1 so as to strip the terminating 0 byte.
|
||||
(buffer-substring pos (+ pos size -1)) coding))
|
||||
;; Tokenize the header of the _real_ file entry,
|
||||
;; which is further 512 bytes into the archive.
|
||||
(descriptor (tar-header-block-tokenize
|
||||
(+ pos (tar-roundup-512 size)) coding
|
||||
;; Don't intuit directories from
|
||||
;; the trailing slash, because the
|
||||
;; truncated name might by chance end
|
||||
;; in a slash.
|
||||
'ignore-trailing-slash)))
|
||||
;; Fix the descriptor of the real file entry by using
|
||||
;; the information from the long name entry.
|
||||
(cond
|
||||
((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME.
|
||||
(setf (tar-header-name descriptor) name))
|
||||
((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK.
|
||||
(setf (tar-header-link-name descriptor) name))
|
||||
(t
|
||||
(message "Unrecognized GNU Tar @LongLink format")))
|
||||
;; Fix the "link-type" attribute, based on the long name.
|
||||
(if (and (null (tar-header-link-type descriptor))
|
||||
(string-match "/\\'" name))
|
||||
(setf (tar-header-link-type descriptor) 5)) ; directory
|
||||
(setf (tar-header-header-start descriptor)
|
||||
(copy-marker (- pos 512) t))
|
||||
descriptor)
|
||||
;; Posix pax extended header. FIXME: support ?g as well.
|
||||
(if (eq link-p (- ?x ?0))
|
||||
;; Get whatever attributes are in the extended header,
|
||||
(let* ((pax-attrs (tar-parse-pax-extended-header pos))
|
||||
(gid (pax-header-gid pax-attrs))
|
||||
(gname (pax-header-gname pax-attrs))
|
||||
(linkpath (pax-header-linkpath pax-attrs))
|
||||
(mtime (pax-header-mtime pax-attrs))
|
||||
(path (pax-header-path pax-attrs))
|
||||
(size (pax-header-size pax-attrs))
|
||||
(uid (pax-header-uid pax-attrs))
|
||||
(uname (pax-header-uname pax-attrs))
|
||||
;; Tokenize the header of the _real_ file entry,
|
||||
;; which is further 512 bytes into the archive.
|
||||
(descriptor
|
||||
(tar-header-block-tokenize (+ pos 512) coding
|
||||
'ignore-trailing-slash)))
|
||||
;; Fix the descriptor of the real file entry by
|
||||
;; overriding some of the fields with the information
|
||||
;; from the extended header.
|
||||
(if gid
|
||||
(setf (tar-header-gid descriptor) gid))
|
||||
(if gname
|
||||
(setf (tar-header-gname descriptor) gname))
|
||||
(if linkpath
|
||||
(setf (tar-header-link-name descriptor) linkpath))
|
||||
(if mtime
|
||||
(setf (tar-header-date descriptor) mtime))
|
||||
(if path
|
||||
(setf (tar-header-name descriptor) path))
|
||||
(if size
|
||||
(setf (tar-header-size descriptor) size))
|
||||
(if uid
|
||||
(setf (tar-header-uid descriptor) uid))
|
||||
(if uname
|
||||
(setf (tar-header-uname descriptor) uname))
|
||||
descriptor)
|
||||
|
||||
(make-tar-header
|
||||
(copy-marker pos nil)
|
||||
name
|
||||
(tar-parse-octal-integer string tar-mode-offset tar-uid-offset)
|
||||
(tar-parse-octal-integer string tar-uid-offset tar-gid-offset)
|
||||
(tar-parse-octal-integer string tar-gid-offset tar-size-offset)
|
||||
(tar-parse-octal-integer string tar-size-offset tar-time-offset)
|
||||
(tar-parse-octal-integer string tar-time-offset tar-chk-offset)
|
||||
(tar-parse-octal-integer string tar-chk-offset tar-linkp-offset)
|
||||
link-p
|
||||
linkname
|
||||
uname-valid-p
|
||||
(when uname-valid-p
|
||||
(decode-coding-string
|
||||
(substring string tar-uname-offset uname-end) coding))
|
||||
(when uname-valid-p
|
||||
(decode-coding-string
|
||||
(substring string tar-gname-offset gname-end) coding))
|
||||
(tar-parse-octal-integer string tar-dmaj-offset tar-dmin-offset)
|
||||
(tar-parse-octal-integer string tar-dmin-offset tar-prefix-offset)
|
||||
))))))
|
||||
(make-tar-header
|
||||
(copy-marker pos nil)
|
||||
name
|
||||
(tar-parse-octal-integer string tar-mode-offset
|
||||
tar-uid-offset)
|
||||
(tar-parse-octal-integer string tar-uid-offset
|
||||
tar-gid-offset)
|
||||
(tar-parse-octal-integer string tar-gid-offset
|
||||
tar-size-offset)
|
||||
(tar-parse-octal-integer string tar-size-offset
|
||||
tar-time-offset)
|
||||
(tar-parse-octal-integer string tar-time-offset
|
||||
tar-chk-offset)
|
||||
(tar-parse-octal-integer string tar-chk-offset
|
||||
tar-linkp-offset)
|
||||
link-p
|
||||
linkname
|
||||
uname-valid-p
|
||||
(when uname-valid-p
|
||||
(decode-coding-string
|
||||
(substring string tar-uname-offset uname-end) coding))
|
||||
(when uname-valid-p
|
||||
(decode-coding-string
|
||||
(substring string tar-gname-offset gname-end) coding))
|
||||
(tar-parse-octal-integer string tar-dmaj-offset
|
||||
tar-dmin-offset)
|
||||
(tar-parse-octal-integer string tar-dmin-offset
|
||||
tar-prefix-offset)
|
||||
))))))))
|
||||
|
||||
;; Pseudo-field.
|
||||
(defun tar-header-data-end (descriptor)
|
||||
|
|
Loading…
Add table
Reference in a new issue