Avoid U+FFFD in commit messages

* build-aux/git-hooks/commit-msg:
Also check against U+FFFD REPLACEMENT CHARACTER in commit messages.
This commit is contained in:
Paul Eggert 2024-12-23 13:38:51 -08:00
parent b1de495eea
commit 28c420afab

View file

@ -31,6 +31,8 @@ fi
# Use U+00A2 CENT SIGN to test whether the locale works.
cent_sign_utf8_format='\302\242\n'
cent_sign=`printf "$cent_sign_utf8_format"`
replacement_character_utf8_format='\357\277\275\n'
replacement_character=`printf "$replacement_character_utf8_format"`
print_at_sign='BEGIN {print substr("'$cent_sign'@", 2)}'
at_sign=`$awk "$print_at_sign" </dev/null 2>/dev/null`
if test "$at_sign" != @; then
@ -44,7 +46,12 @@ if test "$at_sign" != @; then
fi
# Check the log entry.
exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" -v file="$1" '
exec $awk \
-v at_sign="$at_sign" \
-v cent_sign="$cent_sign" \
-v file="$1" \
-v replacement_character="$replacement_character" \
'
BEGIN {
# These regular expressions assume traditional Unix unibyte behavior.
# They are needed for old or broken versions of awk, e.g.,
@ -137,6 +144,10 @@ exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" -v file="$1" '
print "Unprintable character in commit message"
status = 1
}
$0 ~ replacement_character {
print "Replacement character in commit message"
status = 1
}
END {
if (nlines == 0) {