Support Posix-standard pax extended header in tar files

* lisp/tar-mode.el (pax-extended-attribute-record-regexp)
(tar-attr-vector): New variables.
(pax-gid-index, pax-gname-index, pax-linkpath-index)
(pax-mtime-index, pax-path-index, pax-size-index, pax-uid-index)
(pax-uname-index): New constants.
(pax-header-gid, pax-header-gname, pax-header-linkpath)
(pax-header-mtime, pax-header-path, pax-header-size)
(pax-header-uid, pax-header-uname): New accessors to pax header.
(pax-decode-string, tar-parse-pax-extended-header): New functions.
(tar-header-block-tokenize): Recognize and handle Posix-standard
pax extended header, and use its attributes instead of those in
the standard tar header.  (Bug#64686)
This commit is contained in:
Eli Zaretskii 2023-07-27 11:36:00 +03:00
parent b936ff0963
commit 2dc5f17c3e

View file

@ -215,6 +215,99 @@ Preserve the modified states of the buffers and set `tar-data-swapped'."
"Round S up to the next multiple of 512."
(ash (ash (+ s 511) -9) 9))
;; Reference:
;; https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_02
(defconst pax-extended-attribute-record-regexp
;; We omit attributes that are "reserved" by Posix, since no
;; processing has been defined for them.
"\\([0-9]+\\) \\(gid\\|gname\\|hdrcharset\\|linkpath\\|mtime\\|path\\|size\\|uid\\|uname\\)="
"Regular expression for looking up extended attributes in a
Posix-standard pax extended header of a tar file.
Only attributes that `tar-mode' can grok are mentioned.")
(defconst pax-gid-index 0)
(defconst pax-gname-index 1)
(defconst pax-linkpath-index 2)
(defconst pax-mtime-index 3)
(defconst pax-path-index 4)
(defconst pax-size-index 5)
(defconst pax-uid-index 6)
(defconst pax-uname-index 7)
(defsubst pax-header-gid (attr-vec)
(aref attr-vec pax-gid-index))
(defsubst pax-header-gname (attr-vec)
(aref attr-vec pax-gname-index))
(defsubst pax-header-linkpath (attr-vec)
(aref attr-vec pax-linkpath-index))
(defsubst pax-header-mtime (attr-vec)
(aref attr-vec pax-mtime-index))
(defsubst pax-header-path (attr-vec)
(aref attr-vec pax-path-index))
(defsubst pax-header-size (attr-vec)
(aref attr-vec pax-size-index))
(defsubst pax-header-uid (attr-vec)
(aref attr-vec pax-uid-index))
(defsubst pax-header-uname (attr-vec)
(aref attr-vec pax-uid-index))
(defsubst pax-decode-string (str coding)
(if str
(decode-coding-string str coding)
str))
(defvar tar-attr-vector '[nil nil nil nil nil nil nil nil])
(defun tar-parse-pax-extended-header (pos)
"Parse a pax external header of a Posix-format tar file."
(let ((end (+ pos 512))
(result tar-attr-vector)
(coding 'utf-8-unix)
attr value record-len value-len)
(dotimes (i 8)
(aset result i nil))
(goto-char pos)
(while (and (< pos end)
(re-search-forward pax-extended-attribute-record-regexp
end 'move))
(setq record-len (string-to-number (match-string 1))
attr (match-string 2)
value-len (- record-len
(length (match-string 1))
1
(length (match-string 2))
2)
value (buffer-substring (point) (+ (point) value-len)))
(setq pos (goto-char (+ (point) value-len 1)))
(cond
((equal attr "gid")
(aset result pax-gid-index value))
((equal attr "gname")
(aset result pax-gname-index value))
((equal attr "linkpath")
(aset result pax-linkpath-index value))
((equal attr "mtime")
(aset result pax-mtime-index (string-to-number value)))
((equal attr "path")
(aset result pax-path-index value))
((equal attr "size")
(aset result pax-size-index value))
((equal attr "uid")
(aset result pax-uid-index value))
((equal attr "uname")
(aset result pax-uname-index value))
((equal attr "hdrcharset")
(setq coding (if (equal value "BINARY") 'no-conversion 'utf-8-unix))))
(setq pos (+ pos (skip-chars-forward "\000"))))
;; Decode string-valued attributes.
(aset result pax-gname-index
(pax-decode-string (aref result pax-gname-index) coding))
(aset result pax-linkpath-index
(pax-decode-string (aref result pax-linkpath-index) coding))
(aset result pax-path-index
(pax-decode-string (aref result pax-path-index) coding))
(aset result pax-uname-index
(pax-decode-string (aref result pax-uname-index) coding))
result))
(defun tar-header-block-tokenize (pos coding &optional disable-slash)
"Return a `tar-header' structure.
This is a list of name, mode, uid, gid, size,
@ -271,67 +364,112 @@ of the file header. This is used for \"old GNU\" Tar format."
(if (and (null link-p) (null disable-slash) (string-match "/\\'" name))
(setq link-p 5)) ; directory
(if (and (equal name "././@LongLink")
;; Supposedly @LongLink is only used for GNUTAR
;; format (i.e. "ustar ") but some POSIX Tar files
;; (with "ustar\0") have been seen using it as well.
(member magic-str '("ustar " "ustar\0")))
;; This is a GNU Tar long-file-name header.
(let* ((size (tar-parse-octal-integer
string tar-size-offset tar-time-offset))
;; The long name is in the next 512-byte block.
;; We've already moved POS there, when we computed
;; STRING above.
(name (decode-coding-string
;; -1 so as to strip the terminating 0 byte.
(buffer-substring pos (+ pos size -1)) coding))
;; Tokenize the header of the _real_ file entry,
;; which is further 512 bytes into the archive.
(descriptor (tar-header-block-tokenize
(+ pos (tar-roundup-512 size)) coding
;; Don't intuit directories from
;; the trailing slash, because the
;; truncated name might by chance end
;; in a slash.
'ignore-trailing-slash)))
;; Fix the descriptor of the real file entry by using
;; the information from the long name entry.
(cond
((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME.
(setf (tar-header-name descriptor) name))
((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK.
(setf (tar-header-link-name descriptor) name))
(t
(message "Unrecognized GNU Tar @LongLink format")))
;; Fix the "link-type" attribute, based on the long name.
(if (and (null (tar-header-link-type descriptor))
(string-match "/\\'" name))
(setf (tar-header-link-type descriptor) 5)) ; directory
(setf (tar-header-header-start descriptor)
(copy-marker (- pos 512) t))
descriptor)
(if (member magic-str '("ustar " "ustar\0"))
(if (equal name "././@LongLink")
;; Supposedly @LongLink is only used for GNUTAR
;; format (i.e. "ustar ") but some POSIX Tar files
;; (with "ustar\0") have been seen using it as well.
;; This is a GNU Tar long-file-name header.
(let* ((size (tar-parse-octal-integer
string tar-size-offset tar-time-offset))
;; The long name is in the next 512-byte block.
;; We've already moved POS there, when we
;; computed STRING above.
(name (decode-coding-string
;; -1 so as to strip the terminating 0 byte.
(buffer-substring pos (+ pos size -1)) coding))
;; Tokenize the header of the _real_ file entry,
;; which is further 512 bytes into the archive.
(descriptor (tar-header-block-tokenize
(+ pos (tar-roundup-512 size)) coding
;; Don't intuit directories from
;; the trailing slash, because the
;; truncated name might by chance end
;; in a slash.
'ignore-trailing-slash)))
;; Fix the descriptor of the real file entry by using
;; the information from the long name entry.
(cond
((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME.
(setf (tar-header-name descriptor) name))
((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK.
(setf (tar-header-link-name descriptor) name))
(t
(message "Unrecognized GNU Tar @LongLink format")))
;; Fix the "link-type" attribute, based on the long name.
(if (and (null (tar-header-link-type descriptor))
(string-match "/\\'" name))
(setf (tar-header-link-type descriptor) 5)) ; directory
(setf (tar-header-header-start descriptor)
(copy-marker (- pos 512) t))
descriptor)
;; Posix pax extended header. FIXME: support ?g as well.
(if (eq link-p (- ?x ?0))
;; Get whatever attributes are in the extended header,
(let* ((pax-attrs (tar-parse-pax-extended-header pos))
(gid (pax-header-gid pax-attrs))
(gname (pax-header-gname pax-attrs))
(linkpath (pax-header-linkpath pax-attrs))
(mtime (pax-header-mtime pax-attrs))
(path (pax-header-path pax-attrs))
(size (pax-header-size pax-attrs))
(uid (pax-header-uid pax-attrs))
(uname (pax-header-uname pax-attrs))
;; Tokenize the header of the _real_ file entry,
;; which is further 512 bytes into the archive.
(descriptor
(tar-header-block-tokenize (+ pos 512) coding
'ignore-trailing-slash)))
;; Fix the descriptor of the real file entry by
;; overriding some of the fields with the information
;; from the extended header.
(if gid
(setf (tar-header-gid descriptor) gid))
(if gname
(setf (tar-header-gname descriptor) gname))
(if linkpath
(setf (tar-header-link-name descriptor) linkpath))
(if mtime
(setf (tar-header-date descriptor) mtime))
(if path
(setf (tar-header-name descriptor) path))
(if size
(setf (tar-header-size descriptor) size))
(if uid
(setf (tar-header-uid descriptor) uid))
(if uname
(setf (tar-header-uname descriptor) uname))
descriptor)
(make-tar-header
(copy-marker pos nil)
name
(tar-parse-octal-integer string tar-mode-offset tar-uid-offset)
(tar-parse-octal-integer string tar-uid-offset tar-gid-offset)
(tar-parse-octal-integer string tar-gid-offset tar-size-offset)
(tar-parse-octal-integer string tar-size-offset tar-time-offset)
(tar-parse-octal-integer string tar-time-offset tar-chk-offset)
(tar-parse-octal-integer string tar-chk-offset tar-linkp-offset)
link-p
linkname
uname-valid-p
(when uname-valid-p
(decode-coding-string
(substring string tar-uname-offset uname-end) coding))
(when uname-valid-p
(decode-coding-string
(substring string tar-gname-offset gname-end) coding))
(tar-parse-octal-integer string tar-dmaj-offset tar-dmin-offset)
(tar-parse-octal-integer string tar-dmin-offset tar-prefix-offset)
))))))
(make-tar-header
(copy-marker pos nil)
name
(tar-parse-octal-integer string tar-mode-offset
tar-uid-offset)
(tar-parse-octal-integer string tar-uid-offset
tar-gid-offset)
(tar-parse-octal-integer string tar-gid-offset
tar-size-offset)
(tar-parse-octal-integer string tar-size-offset
tar-time-offset)
(tar-parse-octal-integer string tar-time-offset
tar-chk-offset)
(tar-parse-octal-integer string tar-chk-offset
tar-linkp-offset)
link-p
linkname
uname-valid-p
(when uname-valid-p
(decode-coding-string
(substring string tar-uname-offset uname-end) coding))
(when uname-valid-p
(decode-coding-string
(substring string tar-gname-offset gname-end) coding))
(tar-parse-octal-integer string tar-dmaj-offset
tar-dmin-offset)
(tar-parse-octal-integer string tar-dmin-offset
tar-prefix-offset)
))))))))
;; Pseudo-field.
(defun tar-header-data-end (descriptor)