From 8441bc5323fa3c4bc753340b0faa1d668335ad9c Mon Sep 17 00:00:00 2001
From: Jasper Lievisse Adriaanse <j@jasper.la>
Date: Wed, 1 Jul 2020 09:10:21 +0200
Subject: [PATCH 01/29] Remove references to sourceforge

The mailinglists have been migrated to lists.nasm.us

Signed-off-by: Jasper Lievisse Adriaanse <j@jasper.la>
---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 704fba14..e9b3dc30 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,5 @@ This means its development is open to even wider society of programmers
 wishing to improve their lovely assembler.
 
 Visit our [nasm.us](https://www.nasm.us/) website for more details.
-We are gradually moving services away from Sourceforge. For our remaining
-Sourceforge services see [here](https://sourceforge.net/projects/nasm/).
 
 With best regards, the NASM crew.

From 174c8ccbad10fa021a2ad2dfe929d6850db503f2 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Thu, 9 Jul 2020 21:15:16 -0700
Subject: [PATCH 02/29] NASM 2.15.03rc3

---
 version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version b/version
index cb648feb..8d4ac6e4 100644
--- a/version
+++ b/version
@@ -1 +1 @@
-2.15.03rc2
+2.15.03rc3

From 23abe9fe88a0d045e063627e682a540a5d06779c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 01:52:49 -0700
Subject: [PATCH 03/29] preproc: correctly handle %00 capturing a label

defining->dstk.mmac should point back to "defining" when the topmost
definition block is a %macro block.

Otherwise %00 will not inhibit label emission.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 asm/preproc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/asm/preproc.c b/asm/preproc.c
index 81c72042..4fcdb359 100644
--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -4035,7 +4035,13 @@ issue_error:
         nasm_assert(!defining);
         nasm_new(def);
         def->casesense = casesense;
-        def->dstk.mmac = defining;
+        /*
+         * dstk.mstk points to the previous definition bracket,
+         * whereas dstk.mmac points to the topmost mmacro, which
+         * in this case is the one we are just starting to create.
+         */
+        def->dstk.mstk = defining;
+        def->dstk.mmac = def;
         if (op == PP_RMACRO)
             def->max_depth = nasm_limit[LIMIT_MACRO_LEVELS];
         if (!parse_mmacro_spec(tline, def, dname)) {

From b3c554555687886e2a521cf0fb0b27ef9d51d306 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 01:54:38 -0700
Subject: [PATCH 04/29] test/Makefile: fix command line for .obj format

The command line for .obj had a stray -F.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 test/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/Makefile b/test/Makefile
index 6b6ffbfe..7d09b346 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -34,7 +34,7 @@ $(NASM):
 	$(NASM) $(NASMOPT) -f aout -o $@ -MD $@.dep -l $@.lst $<
 
 %.obj: %.asm $(NASMDEP)
-	$(NASM) $(NASMOPT) -f obj -gborland -F -o $@ -MD $@.dep -l $@.lst $<
+	$(NASM) $(NASMOPT) -f obj -gborland -o $@ -MD $@.dep -l $@.lst $<
 
 %.rdf: %.asm $(NASMDEP)
 	$(NASM) $(NASMOPT) -f rdf -o $@ -MD $@.dep -l $@.lst $<

From 22a3f567c0d7b4801a1b0838231073572d766795 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 01:59:53 -0700
Subject: [PATCH 05/29] changes.src: document %00 fix

Add %00 fix to release note.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 doc/changes.src | 2 ++
 doc/nasmdoc.src | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/doc/changes.src b/doc/changes.src
index 47fb03b4..3dc85deb 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -25,6 +25,8 @@ section type.
 where one or more parts result from empty token expansion, resulting
 in \c{%+} tokens at the beginning or end, or multiple ones in a row.
 
+\b Fix macro label capture (\c{%00}, \k{percent00}).
+
 \b Portability fixes.
 
 \S{cl-2.15.02} Version 2.15.02
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index 62a70d57..26e4db27 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -2935,6 +2935,10 @@ Examples are given in \k{rotate}.
 label must be on the same line as the macro invocation, may be a local label
 (see \k{locallab}), and need not end in a colon.
 
+If \c{%00} is present anywhere in the macro body, the label itself
+will not be emitted by NASM. You can, of course, put \c{%00:}
+explicitly at the beginning of your macro.
+
 
 \S{rotate} \i\c{%rotate}: \i{Rotating Macro Parameters}
 

From 543069acec0e978a9dafe22041887b1ba2cd58ff Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 02:00:31 -0700
Subject: [PATCH 06/29] NASM 2.15.03rc4

---
 version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version b/version
index 8d4ac6e4..9c729c89 100644
--- a/version
+++ b/version
@@ -1 +1 @@
-2.15.03rc3
+2.15.03rc4

From 254a56acca1511afadb30caa5e432b575f54ea43 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 02:44:33 -0700
Subject: [PATCH 07/29] assemble: use proper rel/abs state for lea reg,imm

When using the LEA instruction with immediate syntax instead of memory
operand syntax, the IP_REL flag will not have made it into the operand
type. Make it do so.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 asm/assemble.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/asm/assemble.c b/asm/assemble.c
index 2c47ba58..49faa6b8 100644
--- a/asm/assemble.c
+++ b/asm/assemble.c
@@ -2776,14 +2776,23 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
         if (input->basereg == -1 &&
             (input->indexreg == -1 || input->scale == 0)) {
             /*
-             * It's a pure offset.
+             * It's a pure offset. If it is an IMMEDIATE, it is a pattern
+             * in insns.dat which allows an immediate to be used as a memory
+             * address, in which case apply the default REL/ABS.
              */
-            if (bits == 64 && ((input->type & IP_REL) == IP_REL)) {
-                if (input->segment == NO_SEG ||
-                    (input->opflags & OPFLAG_RELATIVE)) {
-                    nasm_warn(WARN_OTHER|ERR_PASS2, "absolute address can not be RIP-relative");
-                    input->type &= ~IP_REL;
-                    input->type |= MEMORY;
+            if (bits == 64) {
+                if (is_class(IMMEDIATE, input->type)) {
+                    if (!(input->eaflags & EAF_ABS) &&
+                        ((input->eaflags & EAF_REL) || globalrel))
+                        input->type |= IP_REL;
+                }
+                if ((input->type & IP_REL) == IP_REL) {
+                    if (input->segment == NO_SEG ||
+                        (input->opflags & OPFLAG_RELATIVE)) {
+                        nasm_warn(WARN_OTHER|ERR_PASS2, "absolute address can not be RIP-relative");
+                        input->type &= ~IP_REL;
+                        input->type |= MEMORY;
+                    }
                 }
             }
 

From be1be3f627d82a1352738eb26c6e53281fc924cc Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 02:46:23 -0700
Subject: [PATCH 08/29] %use masm: much better documentation

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 doc/changes.src   |  3 +++
 doc/nasmdoc.src   | 56 ++++++++++++++++++++++++++++++++++++++++++-----
 test/masmdisp.asm |  6 ++++-
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/doc/changes.src b/doc/changes.src
index 3dc85deb..714299d0 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -27,6 +27,9 @@ in \c{%+} tokens at the beginning or end, or multiple ones in a row.
 
 \b Fix macro label capture (\c{%00}, \k{percent00}).
 
+\b Much better documentation for the MASM compatiblity package,
+\c{%use masm} (see \k{pkg_masm}).
+
 \b Portability fixes.
 
 \S{cl-2.15.02} Version 2.15.02
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index 26e4db27..b52f854a 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -4664,17 +4664,61 @@ functionality, as intended to be used primarily with machine-generated code.
 It does not include any "programmer-friendly" shortcuts, nor does it in any way
 support ASSUME, symbol typing, or MASM-style structures.
 
-Currently, the MASM compatibility package emulates only the PTR
-keyword and recognize syntax displacement[index] for memory
-operations.
 To enable the package, use the directive:
 
 \c{%use masm}
 
-In addition, NASM now natively supports the MASM \c{?} and
-\c{DUP} syntax for the \c{DB} etc data declaration directives,
-regardless of if this package is included or not. See \k{db}.
+Currently, the MASM compatibility package emulates:
 
+\b The \c{FLAT} and \c{OFFSET} keywords are recognized and ignored.
+
+\b The \c{PTR} keyword signifies a memory reference, as if the
+argument had been put in square brackets:
+
+\c      mov eax,[foo]               ; memory reference
+\c      mov eax,dword ptr foo       ; memory reference
+\c      mov eax,dowrd ptr flat:foo  ; memory reference
+\c      mov eax,offset foo          ; address
+\c      mov eax,foo                 ; address (ambiguous syntax in MASM)
+
+\b The \c{SEGMENT} ... \c{ENDS} syntax:
+
+\c    segname SEGMENT
+\c        ...
+\c    segname ENDS
+
+\b The \c{PROC} ... \c{ENDP} syntax:
+
+\c    procname PROC [FAR]
+\c         ...
+\c    procname ENDP
+
+\> \c{PROC} will also define \c{RET} as a macro expanding to either
+\c{RETF} if \c{FAR} is specified and \c{RETN} otherwise. Any keyword
+after \c{PROC} other than \c{FAR} is ignored.
+
+\b The \c{TBYTE} keyword as an alias for \c{TWORD} (see \k{qsother}).
+
+\b The \c{END} directive is ignored.
+
+\b In 64-bit mode relative addressing is the default (\c{DEFAULT REL},
+see \k{REL & ABS}).
+
+In addition, NASM now natively supports, regardless of whether this
+package is used or not:
+
+\b \c{?} and \c{DUP} syntax for the \c{DB} etc data declaration
+directives (see \k{db}).
+
+\b \c{displacement[base+index]} syntax for memory operations, instead
+of \c{[base+index+displacement]}.
+
+\b \c{seg:[addr]} instead of \c{[seg:addr]} syntax.
+
+\b A pure offset can be given to \c{LEA} without square brackets:
+
+\c      lea rax,[foo]               ; standard syntax
+\c      lea rax,foo                 ; also accepted
 
 \C{directive} \i{Assembler Directives}
 
diff --git a/test/masmdisp.asm b/test/masmdisp.asm
index 295d88d7..c5e9af4f 100644
--- a/test/masmdisp.asm
+++ b/test/masmdisp.asm
@@ -14,6 +14,7 @@ fproc	proc far
 	lea rsi,dword ptr foo
 	lea rsi,[foo]
 	lea rsi,dword [foo]
+	mov rdi,gs:[rbx]
 	ret
 fproc	endp
 
@@ -21,6 +22,8 @@ nproc	proc near
 	mov eax,dword ptr foo
 	mov rdx,offset foo
 	mov ecx,bar[rbx]
+	mov rdi,[gs:foo]
+	mov rdi,qword ptr gs:foo
 	ret
 nproc	endp
 
@@ -31,6 +34,7 @@ nxx	dd 80
 foo	dd 100
 _DATA	ends
 
-_BSS	segment nobits
+	segment _BSS nobits
 bar	resd 100
+xyzzy	dd 64 dup (?)
 _BSS	ends

From 861f2cf2692b850a784a34d81c289a9a5cf9e803 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 02:49:14 -0700
Subject: [PATCH 09/29] changes.src: document LEA fix

Document fix of LEA without square brackets.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 doc/changes.src | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/changes.src b/doc/changes.src
index 714299d0..db58bdf6 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -30,6 +30,8 @@ in \c{%+} tokens at the beginning or end, or multiple ones in a row.
 \b Much better documentation for the MASM compatiblity package,
 \c{%use masm} (see \k{pkg_masm}).
 
+\b Fix \c{LEA} without square brackets, for MASM compatibility.
+
 \b Portability fixes.
 
 \S{cl-2.15.02} Version 2.15.02

From 015ddc1b33d3b40d16e018f80528bbd15979b873 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 02:50:51 -0700
Subject: [PATCH 10/29] NASM 2.15.03rc5

---
 version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version b/version
index 9c729c89..413fe765 100644
--- a/version
+++ b/version
@@ -1 +1 @@
-2.15.03rc4
+2.15.03rc5

From a79a700208d771fb3b8e6e7f03fcc195c2d1831c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 02:55:50 -0700
Subject: [PATCH 11/29] preproc: add a %null directive for the masm macro
 package

Instead of %pragma ignore, use a new %null directive which ignores the
rest of the line, without bothering to expand it.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 asm/pptok.dat   | 1 +
 asm/preproc.c   | 5 +++++
 macros/masm.mac | 4 ++--
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/asm/pptok.dat b/asm/pptok.dat
index b6285c36..0fdbbad6 100644
--- a/asm/pptok.dat
+++ b/asm/pptok.dat
@@ -89,6 +89,7 @@
 %include
 %line
 %local
+%null
 %pop
 %pragma
 %push
diff --git a/asm/preproc.c b/asm/preproc.c
index 4fcdb359..ccb00f3b 100644
--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -4649,6 +4649,11 @@ issue_error:
     case PP_LINE:
         nasm_panic("`%s' directive not preprocessed early", dname);
         break;
+
+    case PP_NULL:
+        /* Goes nowhere, does nothing... */
+        break;
+
     }
 
 done:
diff --git a/macros/masm.mac b/macros/masm.mac
index da7e6eea..6bd27273 100644
--- a/macros/masm.mac
+++ b/macros/masm.mac
@@ -50,7 +50,7 @@ USE: masm
 %endmacro
 
 %imacro ends 0+.nolist
-  %pragma ignore ends %00
+  %null ends %00
 %endmacro
 
 %imacro proc 0-*.nolist
@@ -65,7 +65,7 @@ USE: masm
 %endmacro
 
 %imacro endp 0.nolist
-  %pragma ignore endp %00
+  %null endp %00
   %undef ret
 %endmacro
 

From fcd3cb88615a200fbee85e5906e37e265a8d297d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 17:22:47 -0700
Subject: [PATCH 12/29] preproc: preserve %[...] in listings

When generating list output, preserve %[...] in the output if we list
a TOK_INDIRECT. The tokenization process removes these deliminators,
so we have to explicitly put them back.

This doesn't affect assembly output, which will only ever be generated
after all TOK_INDIRECT tokens have been removed, but it does affect
some of the listing modes.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 asm/preproc.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/asm/preproc.c b/asm/preproc.c
index ccb00f3b..693cbcbb 100644
--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -2000,6 +2000,16 @@ static char *detoken(Token * tlist, bool expand_locals)
 	    }
 	    break;
 
+        case TOK_INDIRECT:
+            /*
+             * This won't happen in when emitting to the assembler,
+             * but can happen when emitting output for some of the
+             * list options. The token string doesn't actually include
+             * the brackets in this case.
+             */
+            len += 3;           /* %[] */
+            break;
+
 	default:
 	    break;		/* No modifications */
         }
@@ -2019,8 +2029,19 @@ static char *detoken(Token * tlist, bool expand_locals)
 
     p = line = nasm_malloc(len + 1);
 
-    list_for_each(t, tlist)
-	p = mempcpy(p, tok_text(t), t->len);
+    list_for_each(t, tlist) {
+        switch (t->type) {
+        case TOK_INDIRECT:
+            *p++ = '%';
+            *p++ = '[';
+            p = mempcpy(p, tok_text(t), t->len);
+            *p++ = ']';
+            break;
+
+        default:
+            p = mempcpy(p, tok_text(t), t->len);
+        }
+    }
     *p = '\0';
 
     return line;

From bb3156533b9baf836b746c638111fca82c65f98a Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 17:24:43 -0700
Subject: [PATCH 13/29] ppindirect.asm: make it possible to assemble to a
 binary

Add a couple of dd/db directives to ppindirect.asm to make it possible
to actually run it through the assembler.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 test/ppindirect.asm | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/test/ppindirect.asm b/test/ppindirect.asm
index 0a30d075..2785378a 100644
--- a/test/ppindirect.asm
+++ b/test/ppindirect.asm
@@ -2,6 +2,8 @@
 
 ; Fun tests of the preprocessor indirection mode...
 
+	bits 64
+	
 %assign foo1		11
 %assign foo11		1111
 %assign foo2		22
@@ -9,34 +11,34 @@
 %assign foo3		33
 %assign foo33		3333
 %assign n		2
-foo%[foo%[n]]*100
-foo%[n]*100
+	dd	foo%[foo%[n]]*100
+	dd	foo%[n]*100
 %assign foo%[foo%[n]]	foo%[foo%[n]]*100
 ;%assign foo%[n]		foo%[n]*100
 
-	foo1
-	foo2
-	foo3
-	foo11
-	foo22
-	foo33
+	dd	foo1
+	dd	foo2
+	dd	foo3
+	dd	foo11
+	dd	foo22
+	dd	foo33
 
 %define foo33bar	999999
-	%[foo%[foo3]bar]
+	dd	%[foo%[foo3]bar]
 	
 %assign bctr 0
 %macro bluttan 0
 %assign bctr bctr+1
 %assign bluttan%[bctr]	bctr
 %defstr bstr bluttan%[bctr]
-	bluttan%[bctr]
-	bstr
+	db bluttan%[bctr]
+	db bstr
 %endmacro
 
 %rep 20
 	bluttan
 %endrep
 %rep 20
-	bluttan%[bctr]
+	db bluttan%[bctr]
 %assign bctr bctr-1
 %endrep

From baaa5ca4413e7ac73fe98e682be13f2da529e2cf Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 18:14:09 -0700
Subject: [PATCH 14/29] outcoff: don't drop align= option alone on a section
 line

If the section/segment directive *only* contained an align= directive,
it would get lost. Fix that.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 output/outcoff.c | 68 +++++++++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/output/outcoff.c b/output/outcoff.c
index de22fb88..bcd9ff3f 100644
--- a/output/outcoff.c
+++ b/output/outcoff.c
@@ -72,11 +72,11 @@
  * (2) Win32 doesn't bother putting any flags in the header flags
  * field (at offset 0x12 into the file).
  *
- * (3) Win32 uses some extra flags into the section header table:
+ * (3) Win32/64 uses some extra flags into the section header table:
  * it defines flags 0x80000000 (writable), 0x40000000 (readable)
  * and 0x20000000 (executable), and uses them in the expected
- * combinations. It also defines 0x00100000 through 0x00700000 for
- * section alignments of 1 through 64 bytes.
+ * combinations. It also defines 0x00100000 through 0x00f00000 for
+ * section alignments of 1 through 8192 bytes.
  *
  * (4) Both standard COFF and Win32 COFF seem to use the DWORD
  * field directly after the section name in the section header
@@ -285,14 +285,22 @@ int coff_make_section(char *name, uint32_t flags)
     return coff_nsects - 1;
 }
 
+/*
+ * Convert an alignment value to the corresponding flags.
+ * An alignment value of 0 means no flags should be set.
+ */
 static inline uint32_t coff_sectalign_flags(unsigned int align)
 {
-    return (ilog2_32(align) + 1) << 20;
+    return (alignlog2_32(align) + 1) << 20;
 }
 
+/*
+ * Get the alignment value from a flags field.
+ * Returns 0 if no alignment defined.
+ */
 static inline unsigned int coff_alignment(uint32_t flags)
 {
-    return 1U << (((flags & IMAGE_SCN_ALIGN_MASK) >> 20) - 1);
+    return (1U << ((flags & IMAGE_SCN_ALIGN_MASK) >> 20)) >> 1;
 }
 
 static int32_t coff_section_names(char *name, int *bits)
@@ -364,10 +372,13 @@ static int32_t coff_section_names(char *name, int *bits)
                 nasm_nonfatal("argument to `align' is not numeric");
             else {
                 unsigned int align = atoi(q + 6);
-                if (!align || ((align - 1) & align)) {
+                /* Allow align=0 meaning use default */
+                if (!align) {
+                    align_flags = 0;
+                } else if (!is_power2(align)) {
                     nasm_nonfatal("argument to `align' is not a"
                                   " power of two");
-                } else if (align > 8192) {
+                } else if (align > COFF_MAX_ALIGNMENT) {
                     nasm_nonfatal("maximum alignment in COFF is %d bytes",
                                   COFF_MAX_ALIGNMENT);
                 } else {
@@ -382,30 +393,31 @@ static int32_t coff_section_names(char *name, int *bits)
             break;
     if (i == coff_nsects) {
         if (!flags) {
-            if (!strcmp(name, ".data"))
+            flags = TEXT_FLAGS;
+
+            if (!strcmp(name, ".data")) {
                 flags = DATA_FLAGS;
-            else if (!strcmp(name, ".rdata"))
+            } else if (!strcmp(name, ".rdata")) {
                 flags = RDATA_FLAGS;
-            else if (!strcmp(name, ".bss"))
+            } else if (!strcmp(name, ".bss")) {
                 flags = BSS_FLAGS;
-            else if (win64 && !strcmp(name, ".pdata"))
-                flags = PDATA_FLAGS;
-            else if (win64 && !strcmp(name, ".xdata"))
-                flags = XDATA_FLAGS;
-            else
-                flags = TEXT_FLAGS;
+            } else if (win64) {
+                if (!strcmp(name, ".pdata"))
+                    flags = PDATA_FLAGS;
+                else if (!strcmp(name, ".xdata"))
+                    flags = XDATA_FLAGS;
+            }
         }
         i = coff_make_section(name, flags);
-        if (flags)
-            coff_sects[i]->flags = flags;
-    } else if (flags) {
-        /* Check if any flags are respecified */
-
-        /* Warn if non-alignment flags differ */
-        if ((flags ^ coff_sects[i]->flags) & ~IMAGE_SCN_ALIGN_MASK &&
-            coff_sects[i]->pass_last_seen == pass_count()) {
-            nasm_warn(WARN_OTHER, "section attributes changed on"
-                      " redeclaration of section `%s'", name);
+        coff_sects[i]->align_flags = align_flags;
+    } else {
+        if (flags) {
+            /* Warn if non-alignment flags differ */
+            if (((flags ^ coff_sects[i]->flags) & ~IMAGE_SCN_ALIGN_MASK) &&
+                coff_sects[i]->pass_last_seen == pass_count()) {
+                nasm_warn(WARN_OTHER, "section attributes changed on"
+                          " redeclaration of section `%s'", name);
+            }
         }
 
         /* Check if alignment might be needed */
@@ -419,6 +431,7 @@ static int32_t coff_section_names(char *name, int *bits)
             if (align_flags > sect_align_flags) {
                 coff_sects[i]->align_flags = align_flags;
             }
+
             /* Check if not already aligned */
             /* XXX: other formats don't do this... */
             if (coff_sects[i]->len % align) {
@@ -428,9 +441,6 @@ static int32_t coff_section_names(char *name, int *bits)
 
                 nasm_assert(padding <= sizeof buffer);
 
-                if (pass_final())
-                    nasm_nonfatal("section alignment changed during code generation");
-
                 if (coff_sects[i]->flags & IMAGE_SCN_CNT_CODE) {
                     /* Fill with INT 3 instructions */
                     memset(buffer, 0xCC, padding);

From 42a73b776a6f987e1720256b502139cf1bec9e68 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 18:14:54 -0700
Subject: [PATCH 15/29] test/winalign.asm: simple test for COFF alignment

Simple test case based on debugging BR 3392692.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 test/winalign.asm | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 test/winalign.asm

diff --git a/test/winalign.asm b/test/winalign.asm
new file mode 100644
index 00000000..62abf827
--- /dev/null
+++ b/test/winalign.asm
@@ -0,0 +1,45 @@
+	section .pdata rdata align=2
+	dd 1
+	dd 2
+	dd 3
+
+	section .rdata align=16
+	dd 4
+	dd 5
+	dd 6
+
+	section ultra
+	dd 10
+	dd 11
+	dd 12
+
+	section infra rdata
+	dd 20
+	dd 21
+	dd 22
+
+	section omega rdata align=1
+	dd 90
+	dd 91
+	dd 92
+	
+	section .xdata
+	dd 7
+	dd 8
+	dd 9
+
+	section ultra align=8
+	dd 13
+	dd 14
+	dd 15
+
+	section infra rdata align=1
+	dd 23
+	dd 24
+	dd 25
+
+	section omega rdata
+	sectalign 2
+	dd 93
+	dd 94
+	dd 95

From d9ea17fb47909ef27811c945b4c6e4b0742796d4 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 18:40:05 -0700
Subject: [PATCH 16/29] preproc: saner handling of cpp-style line directives

NASM now supports a proper superset of cpp line number markers, so
there is no need to hack around them using the
"prepreprocessor". Instead, just put a quick test in do_directive()
treating it just like %line, except convert a "-quoted string into a
`-quoted string.

(This can break if there is a ` or \" sequence in the string... fix
that at some point. This is still much better than what there is now.)

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 asm/preproc.c   | 43 +++++++++++++++++++++++++------------------
 doc/nasmdoc.src | 15 ++++++++++++++-
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/asm/preproc.c b/asm/preproc.c
index 693cbcbb..8230f16f 100644
--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -945,26 +945,12 @@ static char *check_tasm_directive(char *line)
  * flags') into NASM preprocessor line number indications (`%line
  * lineno file').
  */
-static char *prepreproc(char *line)
+static inline char *prepreproc(char *line)
 {
-    int lineno, fnlen;
-    char *fname, *oldline;
-
-    if (line[0] == '#' && line[1] == ' ') {
-        oldline = line;
-        fname = oldline + 2;
-        lineno = atoi(fname);
-        fname += strspn(fname, "0123456789 ");
-        if (*fname == '"')
-            fname++;
-        fnlen = strcspn(fname, "\"");
-        line = nasm_malloc(20 + fnlen);
-        snprintf(line, 20 + fnlen, "%%line %d %.*s", lineno, fnlen, fname);
-        nasm_free(oldline);
-    }
-    if (tasm_compatible_mode)
+    if (unlikely(tasm_compatible_mode))
         return check_tasm_directive(line);
-    return line;
+    else
+        return line;
 }
 
 /*
@@ -3426,6 +3412,14 @@ static int do_directive(Token *tline, Token **output)
     *output = NULL;             /* No output generated */
     origline = tline;
 
+    if (tok_is(tline, '#')) {
+        /* cpp-style line directive */
+        if (!tok_white(tline->next))
+            return NO_DIRECTIVE_FOUND;
+        dname = tok_text(tline);
+        goto pp_line;
+    }
+
     tline = skip_white(tline);
     if (!tline || !tok_type(tline, TOK_PREPROC_ID))
 	return NO_DIRECTIVE_FOUND;
@@ -3448,6 +3442,7 @@ static int do_directive(Token *tline, Token **output)
      * in externally preprocessed sources.
      */
     if (op == PP_LINE) {
+    pp_line:
         /*
          * Syntax is `%line nnn[+mmm] [filename]'
          */
@@ -3478,7 +3473,19 @@ static int do_directive(Token *tline, Token **output)
         tline = skip_white(tline);
         if (tline) {
             if (tline->type == TOK_STRING) {
+                if (dname[0] == '#') {
+                    /* cpp version: treat double quotes like NASM backquotes */
+                    char *txt = tok_text_buf(tline);
+                    if (txt[0] == '"') {
+                        txt[0] = '`';
+                        txt[tline->len - 1] = '`';
+                    }
+                }
                 src_set_fname(unquote_token(tline));
+                /*
+                 * Anything after the string is ignored by design (for cpp
+                 * compatibility and future extensions.)
+                 */
             } else {
                 char *fname = detoken(tline, false);
                 src_set_fname(fname);
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index b52f854a..cfa92fd1 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -2753,6 +2753,11 @@ interfering with the local label mechanism, as described in
 (the \c{..@} prefix, then a number, then another period) in case
 they interfere with macro-local labels.
 
+These labels are really macro-local \e{tokens}, and can be used for
+other purposes where a token unique to each macro invocation is
+desired, e.g. to name single-line macros without using the context
+feature (\k{ctxlocal}).
+
 
 \S{mlmacgre} \i{Greedy Macro Parameters}
 
@@ -4047,7 +4052,8 @@ which specifies a line increment value; each line of the input file
 read in is considered to correspond to \c{mmm} lines of the original
 source file.  Finally, \c{filename} is an optional parameter which
 specifies the file name of the original source file. It may be a
-quoted string.
+quoted string, in which case any additional argument after the quoted
+string will be ignored.
 
 After reading a \c{%line} preprocessor directive, NASM will report
 all file name and line numbers relative to the values specified
@@ -4060,6 +4066,13 @@ code. See \k{opt-no-line}.
 Starting in NASM 2.15, \c{%line} directives are processed before any
 other processing takes place.
 
+For compatibility with the output from some other preprocessors,
+including many C preprocessors, a \c{#} character followed by
+whitespace \e{at the very beginning of a line} is also treated as a
+\c{%line} directive, except that double quotes surrounding the
+filename are treated like NASM backquotes, with \c{\\}-escaped
+sequences decoded.
+
 \# This isn't a directive, it should be moved elsewhere...
 \S{getenv} \i\c{%!}\e{variable}: Read an Environment Variable.
 

From bc5fc72d5bdaa10ea93fe2bb2b11892ef82bbbff Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Fri, 10 Jul 2020 18:46:12 -0700
Subject: [PATCH 17/29] NASM 2.15.03rc6

---
 version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version b/version
index 413fe765..65d7cb37 100644
--- a/version
+++ b/version
@@ -1 +1 @@
-2.15.03rc5
+2.15.03rc6

From 65c6ba87166e3dd1143cc399ef535aa86145a450 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sat, 11 Jul 2020 19:12:13 +0300
Subject: [PATCH 18/29] BR 3392696: nasm_quote -- fixup callers

In 41e9682efed7cd1df133b1b4ac806e07723f1486 we've
changed the nasm_quote arguments still not all callers
were converted which could lead to nil dereference.

[hpa: no need to call strlen() for the asm/preproc.c chunk]

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 asm/nasm.c    | 4 +++-
 asm/preproc.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/asm/nasm.c b/asm/nasm.c
index c5d9aff1..a0e17193 100644
--- a/asm/nasm.c
+++ b/asm/nasm.c
@@ -455,6 +455,7 @@ static char *nasm_quote_filename(const char *fn)
 {
     const unsigned char *p =
         (const unsigned char *)fn;
+    size_t len;
 
     if (!p || !*p)
         return nasm_strdup("\"\"");
@@ -478,7 +479,8 @@ static char *nasm_quote_filename(const char *fn)
     return nasm_strdup(fn);
 
 quote:
-    return nasm_quote(fn, NULL);
+    len = strlen(fn);
+    return nasm_quote(fn, &len);
 }
 
 static void timestamp(void)
diff --git a/asm/preproc.c b/asm/preproc.c
index 8230f16f..8415d572 100644
--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -796,7 +796,7 @@ Token *quote_token(Token *t)
  */
 static Token *quote_any_token(Token *t)
 {
-    size_t len;
+    size_t len = t->len;
     char *p;
 
     p = nasm_quote(tok_text(t), &len);

From f1cf95480d9df9e7854a4be6844f86ff854e03d2 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 12 Jul 2020 14:04:44 +0300
Subject: [PATCH 19/29] travis: weirdpaste -- add nil dereference test

To address BR 3392696

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 travis/test/weirdpaste.asm   |  10 ++++++++++
 travis/test/weirdpaste.bin.t | Bin 42 -> 45 bytes
 travis/test/weirdpaste.i.t   |  10 ++++++++++
 3 files changed, 20 insertions(+)

diff --git a/travis/test/weirdpaste.asm b/travis/test/weirdpaste.asm
index 353ef8a8..e43d855e 100644
--- a/travis/test/weirdpaste.asm
+++ b/travis/test/weirdpaste.asm
@@ -24,3 +24,13 @@
 
 %define N 1e%++%+ 5
 	dd N, 1e+5
+
+;
+; test nil dereference, since we're
+; modifying with %line keep it last
+; in the file
+;
+; BR 3392696
+;
+%line 1 "`weirdpaste.asm"
+mov eax, eax
diff --git a/travis/test/weirdpaste.bin.t b/travis/test/weirdpaste.bin.t
index db0468ed1f8aa557a898f0f3ed5835cdee7a570e..23061d3d9814d0185394c375baee94dd711c3e4b 100644
GIT binary patch
delta 8
PcmdPWouI{>)_DK`3Dp9`

delta 4
LcmdPZnxF*$0)PP$

diff --git a/travis/test/weirdpaste.i.t b/travis/test/weirdpaste.i.t
index bf2acd6c..1df996dc 100644
--- a/travis/test/weirdpaste.i.t
+++ b/travis/test/weirdpaste.i.t
@@ -21,3 +21,13 @@
 
 
  dd 1e+5, 1e+5
+
+
+
+
+
+
+
+
+%line 2+1 '`weirdpaste.asm'
+mov eax, eax

From 4c0bd9e73696c2643e7823489e56ad3b70290a4a Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 15 Jul 2020 16:16:57 -0700
Subject: [PATCH 20/29] preproc: BR 2292698: fix handling of whitespace around
 %+

The previous code to fix whitespace around and multiple %+ symbols in
a row (checkin 122c5fb75986adc37dfb147cc2a613e3ebc66e80) had some
seriously broken pointer handling when zapping tokens. This could
cause paste_tokens() to go into an infinite loop because it would
attach %+ to another token and then immediately break them apart
again, over and over.

Reported-by: <alexfru@gmail.com>
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 asm/preproc.c | 55 +++++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/asm/preproc.c b/asm/preproc.c
index 8415d572..2f8fb3cd 100644
--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -4728,7 +4728,7 @@ static inline bool pp_concat_match(const Token *t, unsigned int mask)
 static bool paste_tokens(Token **head, const struct tokseq_match *m,
                          size_t mnum, bool handle_explicit)
 {
-    Token *tok, *t, *next, **prev_next, **prev_nonspace;
+    Token *tok, *t, *next, **prev_next, **prev_nonspace, **nextp;
     bool pasted = false;
     char *buf, *p;
     size_t len, i;
@@ -4765,30 +4765,28 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m,
             if (!handle_explicit)
                 break;
 
-            /* Left pasting token is start of line, just drop %+ */
-            if (!prev_nonspace) {
-                tok = delete_Token(tok);
-                break;
-            }
-
             did_paste = true;
 
-            prev_next = prev_nonspace;
-            t = *prev_nonspace;
-
-            /* Delete leading whitespace */
-            next = zap_white(t->next);
+            /* Left pasting token is start of line, just drop %+ */
+            if (!prev_nonspace) {
+                prev_next = nextp = head;
+                t = NULL;
+            } else {
+                prev_next = prev_nonspace;
+                t = *prev_next;
+                nextp = &t->next;
+            }
 
             /*
-             * Delete the %+ token itself, followed by any whitespace.
+             * Delete the %+ token itself plus any whitespace.
              * In a sequence of %+ ... %+ ... %+ pasting sequences where
              * some expansions in the middle have ended up empty,
              * we can end up having multiple %+ tokens in a row;
              * just drop whem in that case.
              */
-            while (next) {
+            while ((next = *nextp)) {
                 if (next->type == TOK_PASTE || next->type == TOK_WHITESPACE)
-                    next = delete_Token(next);
+                    *nextp = delete_Token(next);
                 else
                     break;
             }
@@ -4796,11 +4794,16 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m,
             /*
              * Nothing after? Just leave the existing token.
              */
-            if (!next) {
-                t->next = tok = NULL; /* End of line */
+            if (!next)
+                break;
+
+            if (!t) {
+                /* Nothing to actually paste, just zapping the paste */
+                *prev_next = tok = next;
                 break;
             }
 
+            /* An actual paste */
             p = buf = nasm_malloc(t->len + next->len + 1);
             p = mempcpy(p, tok_text(t), t->len);
             p = mempcpy(p, tok_text(next), next->len);
@@ -4814,10 +4817,10 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m,
                  * No output at all? Replace with a single whitespace.
                  * This should never happen.
                  */
-                t = new_White(NULL);
+                tok = t = new_White(NULL);
+            } else {
+                *prev_nonspace = tok = t;
             }
-
-            *prev_nonspace = tok = t;
             while (t->next)
                 t = t->next;    /* Find the last token produced */
 
@@ -4825,7 +4828,7 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m,
             t->next = delete_Token(next);
 
             /* We want to restart from the head of the pasted token */
-            next = tok;
+            *prev_next = next = tok;
             break;
 
         default:
@@ -4861,10 +4864,14 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m,
              * Connect pasted into original stream,
              * ie A -> new-tokens -> B
              */
-            while (t->next)
-                t = t->next;
+            while ((tok = t->next)) {
+                if (tok->type != TOK_WHITESPACE && tok->type != TOK_PASTE)
+                    prev_nonspace = &t->next;
+                t = tok;
+            }
+
             t->next = next;
-            prev_next = prev_nonspace = &t->next;
+            prev_next = &t->next;
             did_paste = true;
             break;
         }

From 271dc7a7f7d6192b2a0ec6f660027cc13127b13e Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 15 Jul 2020 16:21:08 -0700
Subject: [PATCH 21/29] NASM 2.15.03rc7

---
 version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version b/version
index 65d7cb37..f3632bf4 100644
--- a/version
+++ b/version
@@ -1 +1 @@
-2.15.03rc6
+2.15.03rc7

From 36814f1fc83c4876ebb6a1cfec5ee000316a1127 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 16 Jul 2020 20:26:58 -0700
Subject: [PATCH 22/29] autoconf: look for _Decltype as yet another alias for
 typeof().

If the past is any indication, the final standardization of typeof()
probably will be _Decltype().

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 autoconf/m4/pa_c_typeof.m4 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autoconf/m4/pa_c_typeof.m4 b/autoconf/m4/pa_c_typeof.m4
index d182ea2c..87b9ed60 100644
--- a/autoconf/m4/pa_c_typeof.m4
+++ b/autoconf/m4/pa_c_typeof.m4
@@ -7,7 +7,7 @@ dnl --------------------------------------------------------------------------
 AC_DEFUN([PA_C_TYPEOF],
 [AC_CACHE_CHECK([if $CC supports typeof], [pa_cv_typeof],
  [pa_cv_typeof=no
- for pa_typeof_try in typeof __typeof __typeof__ decltype __decltype __decltype__
+ for pa_typeof_try in typeof __typeof __typeof__ decltype __decltype __decltype__ _Decltype
  do
   AS_IF([test $pa_cv_typeof = no],
         [AC_COMPILE_IFELSE([AC_LANG_SOURCE([
@@ -29,4 +29,4 @@ int testme(int x)
 	AS_IF([test $pa_cv_typeof = typeof],
 	      [],
 	      [AC_DEFINE_UNQUOTED([typeof], [$pa_cv_typeof],
-	        [Define if your typeof operator is not named typeof.])])])])
+	        [Define if your typeof operator is not named `typeof'.])])])])

From b31a4c9906459215d406de6ce116b77c09af5635 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 16 Jul 2020 21:48:28 -0700
Subject: [PATCH 23/29] Add support for new instructions from ISE June 2020

Add support for new instructions as defined in the Instruction Set
Extensions manual as of June 2020.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 asm/assemble.c    | 41 +++++++++++++++++++++++++++++------------
 disasm/disasm.c   | 18 ++++++++++++++++++
 doc/changes.src   |  3 +++
 include/opflags.h | 32 +++++++++++++++++---------------
 test/amx.asm      | 36 ++++++++++++++++++++++++++++++++++++
 x86/iflags.ph     | 10 ++++++++++
 x86/insns.dat     | 45 +++++++++++++++++++++++++++++++++++++++++++++
 x86/insns.pl      | 20 +++++++++++---------
 x86/regs.dat      |  3 +++
 9 files changed, 172 insertions(+), 36 deletions(-)
 create mode 100644 test/amx.asm

diff --git a/asm/assemble.c b/asm/assemble.c
index 49faa6b8..c82fcb1d 100644
--- a/asm/assemble.c
+++ b/asm/assemble.c
@@ -63,17 +63,18 @@
  *                                          assembly mode or the operand-size override on the operand
  * \70..\73         rel32                   a long relative operand, from operand 0..3
  * \74..\77         seg                     a word constant, from the _segment_ part of operand 0..3
- * \1ab                                     a ModRM, calculated on EA in operand a, with the spare
+ * \1ab             /r                      a ModRM, calculated on EA in operand a, with the reg
  *                                          field the register value of operand b.
- * \172\ab                                  the register number from operand a in bits 7..4, with
+ * \171\mab         /mrb (e.g /3r0)         a ModRM, with the reg field taken from operand a, and the m
+ *                                          and b fields set to the specified values.
+ * \172\ab          /is4                    the register number from operand a in bits 7..4, with
  *                                          the 4-bit immediate from operand b in bits 3..0.
  * \173\xab                                 the register number from operand a in bits 7..4, with
  *                                          the value b in bits 3..0.
  * \174..\177                               the register number from operand 0..3 in bits 7..4, and
  *                                          an arbitrary value in bits 3..0 (assembled as zero.)
- * \2ab                                     a ModRM, calculated on EA in operand a, with the spare
+ * \2ab             /b                      a ModRM, calculated on EA in operand a, with the reg
  *                                          field equal to digit b.
- *
  * \240..\243                               this instruction uses EVEX rather than REX or VEX/XOP, with the
  *                                          V field taken from operand 0..3.
  * \250                                     this instruction uses EVEX rather than REX or VEX/XOP, with the
@@ -103,12 +104,11 @@
  *                tup is tuple type for Disp8*N from %tuple_codes in insns.pl
  *                    (compressed displacement encoding)
  *
- * \254..\257       id,s                        a signed 32-bit operand to be extended to 64 bits.
- * \260..\263                                   this instruction uses VEX/XOP rather than REX, with the
- *                                              V field taken from operand 0..3.
- * \270                                         this instruction uses VEX/XOP rather than REX, with the
- *                                              V field set to 1111b.
- *
+ * \254..\257       id,s                    a signed 32-bit operand to be extended to 64 bits.
+ * \260..\263                               this instruction uses VEX/XOP rather than REX, with the
+ *                                          V field taken from operand 0..3.
+ * \270                                     this instruction uses VEX/XOP rather than REX, with the
+ *                                          V field set to 1111b.
  * VEX/XOP prefixes are followed by the sequence:
  * \tmm\wlp        where mm is the M field; and wlp is:
  *                 00 wwl lpp
@@ -1317,6 +1317,14 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
             length += 2;
             break;
 
+        case 0171:
+            c = *codes++;
+            op2 = (op2 & ~3) | ((c >> 3) & 3);
+            opx = &ins->oprs[op2];
+            ins->rex |= op_rexflags(opx, REX_R|REX_H|REX_P|REX_W);
+            length++;
+            break;
+
         case 0172:
         case 0173:
             codes++;
@@ -1951,6 +1959,15 @@ static void gencode(struct out_data *data, insn *ins)
             out_segment(data, opx);
             break;
 
+        case 0171:
+            c = *codes++;
+            op2 = (op2 & ~3) | ((c >> 3) & 3);
+            opx = &ins->oprs[op2];
+            r = nasm_regvals[opx->basereg];
+            c = (c & ~070) | ((r & 7) << 3);
+            out_rawbyte(data, c);
+            break;
+
         case 0172:
         {
             int mask = ins->prefixes[PPS_VEX] == P_EVEX ? 7 : 15;
@@ -2807,7 +2824,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
                  input->disp_size != (addrbits != 16 ? 32 : 16)))
                 nasm_warn(WARN_OTHER, "displacement size ignored on absolute address");
 
-            if (bits == 64 && (~input->type & IP_REL)) {
+            if ((eaflags & EAF_MIB) || (bits == 64 && (~input->type & IP_REL))) {
                 output->sib_present = true;
                 output->sib         = GEN_SIB(0, 4, 5);
                 output->bytes       = 4;
@@ -3026,7 +3043,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
                 output->rex |= rexflags(it, ix, REX_X);
                 output->rex |= rexflags(bt, bx, REX_B);
 
-                if (it == -1 && (bt & 7) != REG_NUM_ESP) {
+                if (it == -1 && (bt & 7) != REG_NUM_ESP && !(eaflags & EAF_MIB)) {
                     /* no SIB needed */
                     int mod, rm;
 
diff --git a/disasm/disasm.c b/disasm/disasm.c
index e1b5ebc3..7c6ea816 100644
--- a/disasm/disasm.c
+++ b/disasm/disasm.c
@@ -203,6 +203,8 @@ static enum reg_enum whichreg(opflags_t regflags, int regval, int rex)
         return GET_REGISTER(nasm_rd_opmaskreg, regval);
     if (!(BNDREG & ~regflags))
         return GET_REGISTER(nasm_rd_bndreg, regval);
+    if (!(TMMREG & ~regflags))
+        return GET_REGISTER(nasm_rd_tmmreg, regval);
 
 #undef GET_REGISTER
     return 0;
@@ -679,6 +681,22 @@ static int matches(const struct itemplate *t, uint8_t *data,
             break;
         }
 
+        case 0171:
+        {
+            uint8_t t = *r++;
+            uint8_t d = *data++;
+            if ((d ^ t) & ~070) {
+                return 0;
+            } else {
+                op2 = (op2 & ~3) | ((t >> 3) & 3);
+                opy = &ins->oprs[op2];
+                opy->basereg = ((d >> 3) & 7) +
+                    (ins->rex & REX_R ? 8 : 0);
+                opy->segment |= SEG_RMREG;
+            }
+            break;
+        }
+
         case 0172:
             {
                 uint8_t ximm = *data++;
diff --git a/doc/changes.src b/doc/changes.src
index db58bdf6..cf95224a 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -9,6 +9,9 @@ since 2007.
 
 \S{cl-2.15.03} Version 2.15.03
 
+\b Add instructions from the Intel Instruction Set Extensions and
+Future Features Programming Reference, June 2020.
+
 \b Properly display warnings in preprocess-only mode.
 
 \b Fix copy-and-paste of examples from the PDF documentation.
diff --git a/include/opflags.h b/include/opflags.h
index 28bb236f..f5dd50ba 100644
--- a/include/opflags.h
+++ b/include/opflags.h
@@ -81,19 +81,19 @@
 /*
  * Register classes.
  *
- * Bits: 7 - 16
+ * Bits: 7 - 17
  */
 #define REG_CLASS_SHIFT         (7)
-#define REG_CLASS_BITS          (10)
+#define REG_CLASS_BITS          (11)
 #define REG_CLASS_MASK          OP_GENMASK(REG_CLASS_BITS, REG_CLASS_SHIFT)
 #define GEN_REG_CLASS(bit)      OP_GENBIT(bit, REG_CLASS_SHIFT)
 
 /*
  * Subclasses. Depends on type of operand.
  *
- * Bits: 17 - 24
+ * Bits: 18 - 25
  */
-#define SUBCLASS_SHIFT          (17)
+#define SUBCLASS_SHIFT          (18)
 #define SUBCLASS_BITS           (8)
 #define SUBCLASS_MASK           OP_GENMASK(SUBCLASS_BITS, SUBCLASS_SHIFT)
 #define GEN_SUBCLASS(bit)       OP_GENBIT(bit, SUBCLASS_SHIFT)
@@ -101,9 +101,9 @@
 /*
  * Special flags. Context dependant.
  *
- * Bits: 25 - 31
+ * Bits: 26 - 32
  */
-#define SPECIAL_SHIFT           (25)
+#define SPECIAL_SHIFT           (26)
 #define SPECIAL_BITS            (7)
 #define SPECIAL_MASK            OP_GENMASK(SPECIAL_BITS, SPECIAL_SHIFT)
 #define GEN_SPECIAL(bit)        OP_GENBIT(bit, SPECIAL_SHIFT)
@@ -111,9 +111,9 @@
 /*
  * Sizes of the operands and attributes.
  *
- * Bits: 32 - 42
+ * Bits: 33 - 43
  */
-#define SIZE_SHIFT              (32)
+#define SIZE_SHIFT              (33)
 #define SIZE_BITS               (11)
 #define SIZE_MASK               OP_GENMASK(SIZE_BITS, SIZE_SHIFT)
 #define GEN_SIZE(bit)           OP_GENBIT(bit, SIZE_SHIFT)
@@ -121,9 +121,9 @@
 /*
  * Register set count
  *
- * Bits: 47 - 43
+ * Bits: 44 - 48
  */
-#define REGSET_SHIFT            (43)
+#define REGSET_SHIFT            (44)
 #define REGSET_BITS             (5)
 #define REGSET_MASK             OP_GENMASK(REGSET_BITS, REGSET_SHIFT)
 #define GEN_REGSET(bit)         OP_GENBIT(bit, REGSET_SHIFT)
@@ -138,11 +138,11 @@
  *
  * ............................................................1111 optypes
  * .........................................................111.... modifiers
- * ...............................................1111111111....... register classes
- * .......................................11111111................. subclasses
- * ................................1111111......................... specials
- * .....................11111111111................................ sizes
- * ................11111........................................... regset count
+ * ..............................................11111111111....... register classes
+ * ......................................11111111.................. subclasses
+ * ...............................1111111.......................... specials
+ * ....................11111111111................................. sizes
+ * ...............11111............................................ regset count
  */
 
 #define REGISTER                GEN_OPTYPE(0)                   /* register number in 'basereg' */
@@ -176,6 +176,7 @@
 #define REG_CLASS_RM_ZMM        GEN_REG_CLASS(7)
 #define REG_CLASS_OPMASK        GEN_REG_CLASS(8)
 #define REG_CLASS_BND           GEN_REG_CLASS(9)
+#define REG_CLASS_RM_TMM	GEN_REG_CLASS(10)
 
 static inline bool is_class(opflags_t class, opflags_t op)
 {
@@ -217,6 +218,7 @@ static inline bool is_reg_class(opflags_t class, opflags_t reg)
 #define KREG                    OPMASKREG
 #define RM_BND                  (                  REG_CLASS_BND              | REGMEM)                 /* Bounds operand */
 #define BNDREG                  (                  REG_CLASS_BND              | REGMEM | REGISTER)      /* Bounds register */
+#define TMMREG                  (                  REG_CLASS_RM_TMM           | REGMEM | REGISTER)      /* TMM (AMX) register */
 #define REG_CDT                 (                  REG_CLASS_CDT    | BITS32           | REGISTER)      /* CRn, DRn and TRn */
 #define REG_CREG                (GEN_SUBCLASS(1) | REG_CLASS_CDT    | BITS32           | REGISTER)      /* CRn */
 #define REG_DREG                (GEN_SUBCLASS(2) | REG_CLASS_CDT    | BITS32           | REGISTER)      /* DRn */
diff --git a/test/amx.asm b/test/amx.asm
new file mode 100644
index 00000000..88455508
--- /dev/null
+++ b/test/amx.asm
@@ -0,0 +1,36 @@
+	bits 64
+
+%macro amx 1
+  %define treg tmm %+ %1
+
+	ldtilecfg [rsi]
+	sttilecfg [rdi]
+
+	tilezero treg
+
+	tileloadd treg, [rax]
+	tileloadd treg, [rax,rdx]
+	tileloadd treg, [rax,rdx*2]
+
+	tileloaddt1 treg, [rax]
+	tileloaddt1 treg, [rax,rdx]
+	tileloaddt1 treg, [rax,rdx*2]
+
+	tdpbf16ps treg, treg, treg
+	tdpbssd treg, treg, treg
+	tdpbusd treg, treg, treg
+	tdpbsud treg, treg, treg
+	tdpbuud treg, treg, treg
+
+	tilestored [rax], treg
+	tilestored [rax,rdx], treg
+	tilestored [rax,rdx*2], treg
+
+	tilerelease
+%endmacro
+
+%assign n 0
+  %rep 8
+	amx n
+    %assign n n+1
+  %endrep
diff --git a/x86/iflags.ph b/x86/iflags.ph
index 2c05b293..7067d740 100644
--- a/x86/iflags.ph
+++ b/x86/iflags.ph
@@ -84,6 +84,16 @@ if_("AVX5124FMAPS",      "AVX-512 4-iteration multiply-add");
 if_("AVX5124VNNIW",      "AVX-512 4-iteration dot product");
 if_("SGX",               "Intel Software Guard Extensions (SGX)");
 if_("CET",               "Intel Control-Flow Enforcement Technology (CET)");
+if_("ENQCMD",            "Enqueue command instructions");
+if_("PCONFIG",           "Platform configuration instruction");
+if_("WBNOINVD",          "Writeback and do not invalidate instruction");
+if_("TSXLDTRK",          "TSX suspend load address tracking");
+if_("SERIALIZE",         "SERIALIZE instruction");
+if_("AVX512BF16",        "AVX-512 bfloat16");
+if_("AVX512VP2INTERSECT", "AVX-512 VP2INTERSECT instructions");
+if_("AMXTILE",           "AMX tile configuration instructions");
+if_("AMXBF16",           "AMX bfloat16 multiplication");
+if_("AMXINT8",           "AMX 8-bit integer multiplication");
 
 # Put these last [hpa: why?]
 if_("OBSOLETE",          "Instruction removed from architecture");
diff --git a/x86/insns.dat b/x86/insns.dat
index 980c5943..2776cfdf 100644
--- a/x86/insns.dat
+++ b/x86/insns.dat
@@ -5999,6 +5999,51 @@ WRUSSQ		mem,reg64			[mr:	o64 66 0f 38 f5 /r]			CET,FUTURE,X64
 WRSSD		mem,reg32			[mr:	o32 0f 38 f6 /r]			CET,FUTURE
 WRSSQ		mem,reg64			[mr:	o64 0f 38 f6 /r]			CET,FUTURE,X64
 
+;# Instructions from ISE doc 319433-040, June 2020
+ENQCMD		reg16,mem512			[rm:	a16 f2 0f 38 f8 /r]			ENQCMD,FUTURE
+ENQCMD		reg32,mem512			[rm:	a16 f2 0f 38 f8 /r]			ENQCMD,FUTURE,ND
+ENQCMD		reg32,mem512			[rm:	a32 f2 0f 38 f8 /r]			ENQCMD,FUTURE
+ENQCMD		reg64,mem512			[rm:	a64 f2 0f 38 f8 /r]			ENQCMD,FUTURE,X64
+ENQCMDS		reg16,mem512			[rm:	a16 f2 0f 38 f8 /r]			ENQCMD,FUTURE,PRIV
+ENQCMDS		reg32,mem512			[rm:	a16 f2 0f 38 f8 /r]			ENQCMD,FUTURE,PRIV,ND
+ENQCMDS		reg32,mem512			[rm:	a32 f2 0f 38 f8 /r]			ENQCMD,FUTURE,PRIV
+ENQCMDS		reg64,mem512			[rm:	a64 f2 0f 38 f8 /r]			ENQCMD,FUTURE,PRIV,X64
+PCONFIG		void				[	np 0f 01 c5]				PCONFIG,FUTURE,PRIV
+SERIALIZE	void				[	np 0f 01 e8]				SERIALIZE,FUTURE
+WBNOINVD	void				[	f3 0f 09]				WBNOINVD,FUTURE,PRIV
+XRESLDTRK	void				[	f2 0f 01 e9]				TSXLDTRK,FUTURE
+XSUSLDTRK	void				[	f2 0f 01 e8]				TSXLDTRK,FUTURE
+
+;# AVX512 Bfloat16 instructions
+VCVTNE2PS2BF16	xmmreg|mask|z,xmmreg*,xmmrm128|b32	[rvm:	evex.128.f2.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	ymmreg|mask|z,ymmreg*,ymmrm256|b32	[rvm:	evex.256.f2.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	zmmreg|mask|z,zmmreg*,zmmrm512|b32	[rvm:	evex.512.f2.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	xmmreg|mask|z,xmmreg*,xmmrm128|b32	[rvm:	evex.128.f3.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	ymmreg|mask|z,ymmreg*,ymmrm256|b32	[rvm:	evex.256.f3.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	zmmreg|mask|z,zmmreg*,zmmrm512|b32	[rvm:	evex.512.f3.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VDPBF16PS	xmmreg|mask|z,xmmreg*,xmmrm128|b32	[rvm:	evex.128.f3.0f38.w0 52 /r]	AVX512BF16,FUTURE
+VDPBF16PS	ymmreg|mask|z,ymmreg*,ymmrm128|b32	[rvm:	evex.256.f3.0f38.w0 52 /r]	AVX512BF16,FUTURE
+VDPBF16PS	zmmreg|mask|z,zmmreg*,zmmrm128|b32	[rvm:	evex.512.f3.0f38.w0 52 /r]	AVX512BF16,FUTURE
+
+;# AVX512 mask intersect instructions
+VP2INTERSECTD	kreg|rs2,xmmreg,xmmrm128|b32		[rvm:	evex.nds.128.f2.0f38.w0 68 /r]	AVX512BF16,FUTURE
+VP2INTERSECTD	kreg|rs2,ymmreg,ymmrm128|b32		[rvm:	evex.nds.256.f2.0f38.w0 68 /r]	AVX512BF16,FUTURE
+VP2INTERSECTD	kreg|rs2,zmmreg,zmmrm128|b32		[rvm:	evex.nds.512.f2.0f38.w0 68 /r]	AVX512BF16,FUTURE
+
+;# Intel Advanced Matrix Extensions (AMX)
+LDTILECFG	mem512				[m:	vex.128.np.0f38.w0 49 /0]		AMXTILE,FUTURE,SZ,X64
+STTILECFG	mem512				[m:	vex.128.66.0f38.w0 49 /0]		AMXTILE,FUTURE,SZ,X64
+TDPBF16PS	tmmreg,tmmreg,tmmreg		[rmv:	vex.128.f3.0f38.w0 5c /r]		AMXBF16,FUTURE,X64
+TDPBSSD		tmmreg,tmmreg,tmmreg		[rmv:	vex.128.f2.0f38.w0 5e /r]		AMXINT8,FUTURE,X64
+TDPBSUD		tmmreg,tmmreg,tmmreg		[rmv:	vex.128.f3.0f38.w0 5e /r]		AMXINT8,FUTURE,X64
+TDPBUSD		tmmreg,tmmreg,tmmreg		[rmv:	vex.128.66.0f38.w0 5e /r]		AMXINT8,FUTURE,X64
+TDPBUUD		tmmreg,tmmreg,tmmreg		[rmv:	vex.128.np.0f38.w0 5e /r]		AMXINT8,FUTURE,X64
+TILELOADD	tmmreg,mem			[rm:	vex.128.f2.0f38.w0 4b /r]		AMXTILE,MIB,FUTURE,SX,X64
+TILELOADDT1	tmmreg,mem			[rm:	vex.128.f2.0f38.w0 4b /r]		AMXTILE,MIB,FUTURE,SX,X64
+TILERELEASE	void				[	vex.128.np.0f38.w0 49 c0]		AMXTILE,FUTURE,X64
+TILESTORED	mem,tmmreg			[mr:	vex.128.f3.0f38.w0 4b /r]		AMXTILE,MIB,FUTURE,SX,X64
+TILEZERO	tmmreg				[r:	vex.128.f2.0f38.w0 49 /3r0]		AMXTILE,FUTURE,X64
+
 ;# Systematic names for the hinting nop instructions
 ; These should be last in the file
 HINT_NOP0	rm16				[m:	o16 0f 18 /0]				P6,UNDOC
diff --git a/x86/insns.pl b/x86/insns.pl
index cd9aaf4f..911ef7eb 100755
--- a/x86/insns.pl
+++ b/x86/insns.pl
@@ -880,11 +880,19 @@ sub byte_code_compile($$) {
             $prefix_ok = 0;
         } elsif ($op =~ m:^/([0-7])$:) {
             if (!defined($oppos{'m'})) {
-                die "$fname:$line: $op requires m operand\n";
+                die "$fname:$line: $op requires an m operand\n";
             }
             push(@codes, 06) if ($oppos{'m'} & 4);
             push(@codes, 0200 + (($oppos{'m'} & 3) << 3) + $1);
             $prefix_ok = 0;
+	} elsif ($op =~ m:^/([0-3]?)r([0-7])$:) {
+	    if (!defined($oppos{'r'})) {
+                die "$fname:$line: $op requires an r operand\n";
+	    }
+	    push(@codes, 05) if ($oppos{'r'} & 4);
+	    push(@codes, 0171);
+	    push(@codes, (($1+0) << 6) + (($oppos{'r'} & 3) << 3) + $2);
+	    $prefix_ok = 0;
         } elsif ($op =~ /^(vex|xop)(|\..*)$/) {
             my $vexname = $1;
             my $c = $vexmap{$vexname};
@@ -907,7 +915,7 @@ sub byte_code_compile($$) {
                         $w = 2;
                     } elsif ($oq eq 'ww') {
                         $w = 3;
-                    } elsif ($oq eq 'p0') {
+                    } elsif ($oq eq 'np' || $oq eq 'p0') {
                         $p = 0;
                     } elsif ($oq eq '66' || $oq eq 'p1') {
                         $p = 1;
@@ -935,9 +943,6 @@ sub byte_code_compile($$) {
             if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) {
                 die "$fname:$line: missing fields in \U$vexname\E specification\n";
             }
-            if (defined($oppos{'v'}) && !$has_nds) {
-                die "$fname:$line: 'v' operand without ${vexname}.nds or ${vexname}.ndd\n";
-            }
 	    my $minmap = ($c == 1) ? 8 : 0; # 0-31 for VEX, 8-31 for XOP
 	    if ($m < $minmap || $m > 31) {
 		die "$fname:$line: Only maps ${minmap}-31 are valid for \U${vexname}\n";
@@ -966,7 +971,7 @@ sub byte_code_compile($$) {
                         $w = 2;
                     } elsif ($oq eq 'ww') {
                         $w = 3;
-                    } elsif ($oq eq 'p0') {
+                    } elsif ($oq eq 'np' || $oq eq 'p0') {
                         $p = 0;
                     } elsif ($oq eq '66' || $oq eq 'p1') {
                         $p = 1;
@@ -994,9 +999,6 @@ sub byte_code_compile($$) {
             if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) {
                 die "$fname:$line: missing fields in EVEX specification\n";
             }
-            if (defined($oppos{'v'}) && !$has_nds) {
-                die "$fname:$line: 'v' operand without evex.nds or evex.ndd\n";
-            }
 	    if ($m > 15) {
 		die "$fname:$line: Only maps 0-15 are valid for EVEX\n";
 	    }
diff --git a/x86/regs.dat b/x86/regs.dat
index 723f6a44..cec8420f 100644
--- a/x86/regs.dat
+++ b/x86/regs.dat
@@ -130,6 +130,9 @@ zmm0	ZMM0		zmmreg		0
 zmm1-15	ZMM_L16		zmmreg		1
 zmm16-31	ZMMREG		zmmreg		16
 
+# AMX tile registers
+tmm0-7	TMMREG		tmmreg		0
+
 # Opmask registers
 k0	OPMASK0		opmaskreg	0
 k1-7	OPMASKREG	opmaskreg	1   TFLAG_BRC_OPT

From e830e92b7792a3a8c0e81774c0fb6b3414398753 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 16 Jul 2020 22:50:33 -0700
Subject: [PATCH 24/29] preproc: when printing errors, don't descent into
 unlisted macros

If macros are nolisted, *or* they don't have any filename associated
with them, it is absolutely pointless to try to descend into them for
error messages, so just don't, even if -Lb is provided.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 asm/preproc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/asm/preproc.c b/asm/preproc.c
index 2f8fb3cd..fec9520c 100644
--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -7046,6 +7046,8 @@ static void pp_error_list_macros(errflags severity)
     severity |= ERR_PP_LISTMACRO | ERR_NO_SEVERITY | ERR_HERE;
 
     while ((m = src_error_down())) {
+        if ((m->nolist & NL_LIST) || !m->where.filename)
+            break;
 	nasm_error(severity, "... from macro `%s' defined", m->name);
     }
 

From d081f0db5d491ee473fdb97b109dd9810b68d9b7 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 16 Jul 2020 23:11:03 -0700
Subject: [PATCH 25/29] fp: support bfloat16 constants

Support generating bfloat16 constants. This is a bit awkward, as "DW"
already generates IEEE half precision constants; therefore there is no
longer a single floating-point format for each size. This requires
some replumbing.

Fortunately bfloat16 fits in 64 bits, so support generating them with
a macro that uses __?bfloat16?__() to convert to integers first before
passing them to DW.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 asm/eval.c      | 25 +++++++----------
 asm/floats.c    | 74 ++++++++++++++++++++++++++-----------------------
 asm/floats.h    | 17 ++++++++++--
 asm/parser.c    |  7 +++--
 asm/tokens.dat  |  3 ++
 doc/changes.src |  3 ++
 doc/nasmdoc.src | 20 +++++++++++--
 include/nasm.h  |  5 +++-
 macros/fp.mac   | 10 ++++++-
 test/float.asm  | 36 +++++++++++++++++++++++-
 10 files changed, 140 insertions(+), 60 deletions(-)

diff --git a/asm/eval.c b/asm/eval.c
index cd3c526d..5d6ee1e7 100644
--- a/asm/eval.c
+++ b/asm/eval.c
@@ -694,21 +694,13 @@ static expr *expr5(void)
 static expr *eval_floatize(enum floatize type)
 {
     uint8_t result[16], *p;     /* Up to 128 bits */
-    static const struct {
-        int bytes, start, len;
-    } formats[] = {
-        {  1, 0, 1 },           /* FLOAT_8 */
-        {  2, 0, 2 },           /* FLOAT_16 */
-        {  4, 0, 4 },           /* FLOAT_32 */
-        {  8, 0, 8 },           /* FLOAT_64 */
-        { 10, 0, 8 },           /* FLOAT_80M */
-        { 10, 8, 2 },           /* FLOAT_80E */
-        { 16, 0, 8 },           /* FLOAT_128L */
-        { 16, 8, 8 },           /* FLOAT_128H */
-    };
     int sign = 1;
     int64_t val;
+    size_t len;
     int i;
+    const struct ieee_format *fmt;
+
+    fmt = &fp_formats[type];
 
     scan();
     if (tt != '(') {
@@ -724,7 +716,7 @@ static expr *eval_floatize(enum floatize type)
         nasm_nonfatal("expecting floating-point number");
         return NULL;
     }
-    if (!float_const(tokval->t_charptr, sign, result, formats[type].bytes))
+    if (!float_const(tokval->t_charptr, sign, result, type))
         return NULL;
     scan();
     if (tt != ')') {
@@ -732,9 +724,12 @@ static expr *eval_floatize(enum floatize type)
         return NULL;
     }
 
-    p = result+formats[type].start+formats[type].len;
+    len = fmt->bytes - fmt->offset;
+    if (len > 8)
+        len = 8;                /* Max 64 bits */
+    p = result + len;
     val = 0;
-    for (i = formats[type].len; i; i--) {
+    for (i = len; i; i--) {
         p--;
         val = (val << 8) + *p;
     }
diff --git a/asm/floats.c b/asm/floats.c
index adc6afbf..27180bdc 100644
--- a/asm/floats.c
+++ b/asm/floats.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -629,13 +629,6 @@ static void ieee_shr(fp_limb *mant, int i)
    - the sign bit plus exponent fit in 16 bits.
    - the exponent bias is 2^(n-1)-1 for an n-bit exponent */
 
-struct ieee_format {
-    int bytes;
-    int mantissa;               /* Fractional bits in the mantissa */
-    int explicit;               /* Explicit integer */
-    int exponent;               /* Bits in the exponent */
-};
-
 /*
  * The 16- and 128-bit formats are expected to be in IEEE 754r.
  * AMD SSE5 uses the 16-bit format.
@@ -646,13 +639,31 @@ struct ieee_format {
  *
  * The 8-bit format appears to be the consensus 8-bit floating-point
  * format.  It is apparently used in graphics applications.
+ *
+ * The b16 format is a 16-bit format with smaller mantissa and larger
+ * exponent field.  It is effectively a truncated version of the standard
+ * IEEE 32-bit (single) format, but is explicitly supported here in
+ * order to support proper rounding.
+ *
+ * This array must correspond to enum floatize in include/nasm.h.
+ * Note that there are some formats which have more than one enum;
+ * both need to be listed here with the appropriate offset into the
+ * floating-point byte array (use for the floatize operators.)
+ *
+ * FLOAT_ERR is a value that both represents "invalid format" and the
+ * size of this array.
  */
-static const struct ieee_format ieee_8   = {  1,   3, 0,  4 };
-static const struct ieee_format ieee_16  = {  2,  10, 0,  5 };
-static const struct ieee_format ieee_32  = {  4,  23, 0,  8 };
-static const struct ieee_format ieee_64  = {  8,  52, 0, 11 };
-static const struct ieee_format ieee_80  = { 10,  63, 1, 15 };
-static const struct ieee_format ieee_128 = { 16, 112, 0, 15 };
+const struct ieee_format fp_formats[FLOAT_ERR] = {
+    {  1,   3, 0,  4, 0 },         /* FLOAT_8 */
+    {  2,  10, 0,  5, 0 },         /* FLOAT_16 */
+    {  2,   7, 0,  8, 0 },         /* FLOAT_B16 */
+    {  4,  23, 0,  8, 0 },         /* FLOAT_32 */
+    {  8,  52, 0, 11, 0 },         /* FLOAT_64 */
+    { 10,  63, 1, 15, 0 },         /* FLOAT_80M */
+    { 10,  63, 1, 15, 8 },         /* FLOAT_80E */
+    { 16, 112, 0, 15, 0 },         /* FLOAT_128L */
+    { 16, 112, 0, 15, 8 }          /* FLOAT_128H */
+};
 
 /* Types of values we can generate */
 enum floats {
@@ -672,7 +683,7 @@ static int to_packed_bcd(const char *str, const char *p,
     char c;
     int tv = -1;
 
-    if (fmt != &ieee_80) {
+    if (fmt->bytes != 10) {
         nasm_nonfatal("packed BCD requires an 80-bit format");
         return 0;
     }
@@ -711,9 +722,9 @@ static int to_packed_bcd(const char *str, const char *p,
     return 1;                   /* success */
 }
 
-static int to_float(const char *str, int s, uint8_t *result,
-                    const struct ieee_format *fmt)
+int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt)
 {
+    const struct ieee_format *fmt = &fp_formats[ffmt];
     fp_limb mant[MANT_LIMBS];
     int32_t exponent = 0;
     const int32_t expmax = 1 << (fmt->exponent - 1);
@@ -902,25 +913,20 @@ static int to_float(const char *str, int s, uint8_t *result,
     return 1;                   /* success */
 }
 
-int float_const(const char *number, int sign, uint8_t *result, int bytes)
+/*
+ * Get the default floating point format for this specific field size.
+ * Used for the Dx pseudoops.
+ */
+enum floatize float_deffmt(int bytes)
 {
-    switch (bytes) {
-    case 1:
-        return to_float(number, sign, result, &ieee_8);
-    case 2:
-        return to_float(number, sign, result, &ieee_16);
-    case 4:
-        return to_float(number, sign, result, &ieee_32);
-    case 8:
-        return to_float(number, sign, result, &ieee_64);
-    case 10:
-        return to_float(number, sign, result, &ieee_80);
-    case 16:
-        return to_float(number, sign, result, &ieee_128);
-    default:
-        nasm_panic("strange value %d passed to float_const", bytes);
-        return 0;
+    enum floatize type;
+
+    for (type = 0; type < FLOAT_ERR; type++) {
+        if (fp_formats[type].bytes == bytes)
+            break;
     }
+
+    return type;                /* FLOAT_ERR if invalid */
 }
 
 /* Set floating-point options */
diff --git a/asm/floats.h b/asm/floats.h
index 4f80acac..c4635136 100644
--- a/asm/floats.h
+++ b/asm/floats.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *   
- *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -48,7 +48,20 @@ enum float_round {
     FLOAT_RC_UP
 };
 
-int float_const(const char *string, int sign, uint8_t *result, int bytes);
+/* Note: enum floatize and FLOAT_ERR are defined in nasm.h */
+
+/* Floating-point format description */
+struct ieee_format {
+    int bytes;                  /* Total bytes */
+    int mantissa;               /* Fractional bits in the mantissa */
+    int explicit;               /* Explicit integer */
+    int exponent;               /* Bits in the exponent */
+    int offset;                 /* Offset into byte array for floatize op */
+};
+extern const struct ieee_format fp_formats[FLOAT_ERR];
+
+int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt);
+enum floatize float_deffmt(int bytes);
 int float_option(const char *option);
 
 #endif /* NASM_FLOATS_H */
diff --git a/asm/parser.c b/asm/parser.c
index 47b46ecd..dbd2240c 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -531,10 +531,12 @@ static int parse_eops(extop **result, bool critical, int elem)
                 goto is_float;
             }
         } else if (i == TOKEN_FLOAT) {
+            enum floatize fmt;
         is_float:
             eop->type = EOT_DB_FLOAT;
 
-            if (eop->elem > 16) {
+            fmt = float_deffmt(eop->elem);
+            if (fmt == FLOAT_ERR) {
                 nasm_nonfatal("no %d-bit floating-point format supported",
                               eop->elem << 3);
                 eop->val.string.len = 0;
@@ -552,8 +554,7 @@ static int parse_eops(extop **result, bool critical, int elem)
                 eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len);
                 eop->val.string.data = (char *)eop + sizeof(extop);
                 if (!float_const(tokval.t_charptr, sign,
-                                 (uint8_t *)eop->val.string.data,
-                                 eop->val.string.len))
+                                 (uint8_t *)eop->val.string.data, fmt))
                     eop->val.string.len = 0;
             }
             if (!eop->val.string.len)
diff --git a/asm/tokens.dat b/asm/tokens.dat
index ab37dcc1..356b39a2 100644
--- a/asm/tokens.dat
+++ b/asm/tokens.dat
@@ -113,6 +113,9 @@ __?float80e?__
 __?float128l?__
 __?float128h?__
 
+% TOKEN_FLOATIZE, 0, 0, FLOAT_B{__?bfloat*?__}
+__?bfloat16?__
+
 % TOKEN_STRFUNC, 0, 0, STRFUNC_{__?*?__}
 __?utf16?__
 __?utf16le?__
diff --git a/doc/changes.src b/doc/changes.src
index cf95224a..c1459231 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -12,6 +12,9 @@ since 2007.
 \b Add instructions from the Intel Instruction Set Extensions and
 Future Features Programming Reference, June 2020.
 
+\b Support for \c{bfloat16} floating-point constants. See \k{fltconst}
+and \k{pkg_fp}.
+
 \b Properly display warnings in preprocess-only mode.
 
 \b Fix copy-and-paste of examples from the PDF documentation.
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index cfa92fd1..e3d503c5 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -1692,9 +1692,9 @@ context.
 \i{Floating-point} constants are acceptable only as arguments to
 \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}, or as
 arguments to the special operators \i\c{__?float8?__},
-\i\c{__?float16?__}, \i\c{__?float32?__}, \i\c{__?float64?__},
-\i\c{__?float80m?__}, \i\c{__?float80e?__}, \i\c{__?float128l?__}, and
-\i\c{__?float128h?__}.
+\i\c{__?float16?__}, \i\c{__?bfloat16?__}, \i\c{__?float32?__},
+\i\c{__?float64?__}, \i\c{__?float80m?__}, \i\c{__?float80e?__},
+\i\c{__?float128l?__}, and \i\c{__?float128h?__}. See also \k{pkg_fp}.
 
 Floating-point constants are expressed in the traditional form:
 digits, then a period, then optionally more digits, then optionally an
@@ -1733,6 +1733,13 @@ appears to be the most frequently used 8-bit floating-point format,
 although it is not covered by any formal standard.  This is sometimes
 called a "\i{minifloat}."
 
+The \i\c{bfloat16} format is effectively a compressed version of the
+32-bit single precision format, with a reduced mantissa. It is
+effectively the same as truncating the 32-bit format to the upper 16
+bits, except for rounding. There is no \c{D}\e{x} directive that
+corresponds to \c{bfloat16} as it obviously has the same size as the
+IEEE standard 16-bit half precision format, see however \k{pkg_fp}.
+
 The special operators are used to produce floating-point numbers in
 other contexts.  They produce the binary representation of a specific
 floating-point number as an integer, and can use anywhere integer
@@ -4633,6 +4640,7 @@ This packages contains the following floating-point convenience macros:
 \c
 \c %define float8(x)       __?float8?__(x)
 \c %define float16(x)      __?float16?__(x)
+\c %define bfloat16(x)     __?bfloat16?__(x)
 \c %define float32(x)      __?float32?__(x)
 \c %define float64(x)      __?float64?__(x)
 \c %define float80m(x)     __?float80m?__(x)
@@ -4640,6 +4648,12 @@ This packages contains the following floating-point convenience macros:
 \c %define float128l(x)    __?float128l?__(x)
 \c %define float128h(x)    __?float128h?__(x)
 
+It also defines the a multi-line macro \i\c{bf16} that can be used
+in a similar way to the \c{D}\e{x} directives for the other
+floating-point numbers:
+
+\c      bf16 -3.1415, NaN, 2000.0, +Inf
+
 
 \H{pkg_ifunc} \i\c{ifunc}: \i{Integer functions}
 
diff --git a/include/nasm.h b/include/nasm.h
index 6cffaf5d..950ac45b 100644
--- a/include/nasm.h
+++ b/include/nasm.h
@@ -196,15 +196,18 @@ enum token_type { /* token types, other than chars */
     TOKEN_OPMASK        /* translated token for opmask registers */
 };
 
+/* Must match the fp_formats[] array in asm/floats.c */
 enum floatize {
     FLOAT_8,
     FLOAT_16,
+    FLOAT_B16,
     FLOAT_32,
     FLOAT_64,
     FLOAT_80M,
     FLOAT_80E,
     FLOAT_128L,
-    FLOAT_128H
+    FLOAT_128H,
+    FLOAT_ERR                   /* Invalid format, MUST BE LAST */
 };
 
 /* Must match the list in string_transform(), in strfunc.c */
diff --git a/macros/fp.mac b/macros/fp.mac
index eb297014..3a094a5c 100644
--- a/macros/fp.mac
+++ b/macros/fp.mac
@@ -1,6 +1,6 @@
 ;; --------------------------------------------------------------------------
 ;;   
-;;   Copyright 2010 The NASM Authors - All Rights Reserved
+;;   Copyright 2010-2020 The NASM Authors - All Rights Reserved
 ;;   See the file AUTHORS included with the NASM distribution for
 ;;   the specific copyright holders.
 ;;
@@ -46,9 +46,17 @@ USE: fp
 
 %define float8(x)	__?float8?__(x)
 %define float16(x)	__?float16?__(x)
+%define bfloat16(x)     __?bfloat16?__(x)
 %define float32(x)	__?float32?__(x)
 %define float64(x)	__?float64?__(x)
 %define float80m(x)	__?float80m?__(x)
 %define float80e(x)	__?float80e?__(x)
 %define float128l(x)	__?float128l?__(x)
 %define float128h(x)	__?float128h?__(x)
+
+%imacro bf16 1-*.nolist
+  %rep %0
+    dw __?bfloat16?__(%1)
+    %rotate 1
+  %endrep
+%endmacro
diff --git a/test/float.asm b/test/float.asm
index 88519b2e..1dd92a96 100644
--- a/test/float.asm
+++ b/test/float.asm
@@ -5,6 +5,8 @@
 ; Test of floating-point formats
 ;
 
+%use fp
+
 ; 8-bit
 	db 1.0
 	db +1.0
@@ -65,6 +67,37 @@
 	dw __SNaN__
 	dw 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
 
+; 16-bit bfloat
+	bf16 1.0
+	bf16 +1.0
+	bf16 -1.0
+	bf16 1.5
+	bf16 +1.5
+	bf16 -1.5
+	bf16 0.0
+	bf16 +0.0
+	bf16 -0.0
+	bf16 1.83203125
+	bf16 +1.83203125
+	bf16 -1.83203125
+	bf16 1.83203125e15
+	bf16 +1.83203125e15
+	bf16 -1.83203125e15
+	bf16 1.83203125e-15
+	bf16 +1.83203125e-15
+	bf16 -1.83203125e-15
+	bf16 1.83203125e-40		; Denormal!
+	bf16 +1.83203125e-40		; Denormal!
+	bf16 -1.83203125e-40		; Denormal!
+	bf16 __Infinity__
+	bf16 +__Infinity__
+	bf16 -__Infinity__
+	bf16 __NaN__
+	bf16 __QNaN__
+	bf16 __SNaN__
+	bf16 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
+	bf16 -3.1415, NaN, 2000.0, +Inf
+
 ; 32-bit
 	dd 1.0
 	dd +1.0
@@ -94,6 +127,7 @@
 	dd __QNaN__
 	dd __SNaN__
 	dd 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
+	dd -3.1415, NaN, 2000.0, +Inf
 
 ; 64-bit
 	dq 1.0
@@ -124,7 +158,7 @@
 	dq __QNaN__
 	dq __SNaN__
 	dq 3.1415926535_8979323846_2643383279_5028841971_6939937510_5
-	
+
 ; 80-bit
 	dt 1.0
 	dt +1.0

From 848b1657fd52f6d4b71814047deaebfd91a7dbed Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 16 Jul 2020 23:13:24 -0700
Subject: [PATCH 26/29] NASM 2.15.03rc8

---
 version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version b/version
index f3632bf4..0f7f97eb 100644
--- a/version
+++ b/version
@@ -1 +1 @@
-2.15.03rc7
+2.15.03rc8

From 1d8c09b24e4204767f2d05047e2ac16dcdf46bfb Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 17 Jul 2020 17:44:27 -0700
Subject: [PATCH 27/29] x86/insns.dat: add tuple type for the latest AVX512
 instructions

Add missing tuple type (all are Full - fv:) for the latest AVX512
instructions.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 x86/insns.dat | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/x86/insns.dat b/x86/insns.dat
index 2776cfdf..141d68b3 100644
--- a/x86/insns.dat
+++ b/x86/insns.dat
@@ -6015,20 +6015,20 @@ XRESLDTRK	void				[	f2 0f 01 e9]				TSXLDTRK,FUTURE
 XSUSLDTRK	void				[	f2 0f 01 e8]				TSXLDTRK,FUTURE
 
 ;# AVX512 Bfloat16 instructions
-VCVTNE2PS2BF16	xmmreg|mask|z,xmmreg*,xmmrm128|b32	[rvm:	evex.128.f2.0f38.w0 72 /r]	AVX512BF16,FUTURE
-VCVTNE2PS2BF16	ymmreg|mask|z,ymmreg*,ymmrm256|b32	[rvm:	evex.256.f2.0f38.w0 72 /r]	AVX512BF16,FUTURE
-VCVTNE2PS2BF16	zmmreg|mask|z,zmmreg*,zmmrm512|b32	[rvm:	evex.512.f2.0f38.w0 72 /r]	AVX512BF16,FUTURE
-VCVTNE2PS2BF16	xmmreg|mask|z,xmmreg*,xmmrm128|b32	[rvm:	evex.128.f3.0f38.w0 72 /r]	AVX512BF16,FUTURE
-VCVTNE2PS2BF16	ymmreg|mask|z,ymmreg*,ymmrm256|b32	[rvm:	evex.256.f3.0f38.w0 72 /r]	AVX512BF16,FUTURE
-VCVTNE2PS2BF16	zmmreg|mask|z,zmmreg*,zmmrm512|b32	[rvm:	evex.512.f3.0f38.w0 72 /r]	AVX512BF16,FUTURE
-VDPBF16PS	xmmreg|mask|z,xmmreg*,xmmrm128|b32	[rvm:	evex.128.f3.0f38.w0 52 /r]	AVX512BF16,FUTURE
-VDPBF16PS	ymmreg|mask|z,ymmreg*,ymmrm128|b32	[rvm:	evex.256.f3.0f38.w0 52 /r]	AVX512BF16,FUTURE
-VDPBF16PS	zmmreg|mask|z,zmmreg*,zmmrm128|b32	[rvm:	evex.512.f3.0f38.w0 52 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	xmmreg|mask|z,xmmreg*,xmmrm128|b32	[rvm:fv:	evex.128.f2.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	ymmreg|mask|z,ymmreg*,ymmrm256|b32	[rvm:fv:	evex.256.f2.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	zmmreg|mask|z,zmmreg*,zmmrm512|b32	[rvm:fv:	evex.512.f2.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	xmmreg|mask|z,xmmreg*,xmmrm128|b32	[rvm:fv:	evex.128.f3.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	ymmreg|mask|z,ymmreg*,ymmrm256|b32	[rvm:fv:	evex.256.f3.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VCVTNE2PS2BF16	zmmreg|mask|z,zmmreg*,zmmrm512|b32	[rvm:fv:	evex.512.f3.0f38.w0 72 /r]	AVX512BF16,FUTURE
+VDPBF16PS	xmmreg|mask|z,xmmreg*,xmmrm128|b32	[rvm:fv:	evex.128.f3.0f38.w0 52 /r]	AVX512BF16,FUTURE
+VDPBF16PS	ymmreg|mask|z,ymmreg*,ymmrm128|b32	[rvm:fv:	evex.256.f3.0f38.w0 52 /r]	AVX512BF16,FUTURE
+VDPBF16PS	zmmreg|mask|z,zmmreg*,zmmrm128|b32	[rvm:fv:	evex.512.f3.0f38.w0 52 /r]	AVX512BF16,FUTURE
 
 ;# AVX512 mask intersect instructions
-VP2INTERSECTD	kreg|rs2,xmmreg,xmmrm128|b32		[rvm:	evex.nds.128.f2.0f38.w0 68 /r]	AVX512BF16,FUTURE
-VP2INTERSECTD	kreg|rs2,ymmreg,ymmrm128|b32		[rvm:	evex.nds.256.f2.0f38.w0 68 /r]	AVX512BF16,FUTURE
-VP2INTERSECTD	kreg|rs2,zmmreg,zmmrm128|b32		[rvm:	evex.nds.512.f2.0f38.w0 68 /r]	AVX512BF16,FUTURE
+VP2INTERSECTD	kreg|rs2,xmmreg,xmmrm128|b32		[rvm:fv:	evex.nds.128.f2.0f38.w0 68 /r]	AVX512BF16,FUTURE
+VP2INTERSECTD	kreg|rs2,ymmreg,ymmrm128|b32		[rvm:fv:	evex.nds.256.f2.0f38.w0 68 /r]	AVX512BF16,FUTURE
+VP2INTERSECTD	kreg|rs2,zmmreg,zmmrm128|b32		[rvm:fv:	evex.nds.512.f2.0f38.w0 68 /r]	AVX512BF16,FUTURE
 
 ;# Intel Advanced Matrix Extensions (AMX)
 LDTILECFG	mem512				[m:	vex.128.np.0f38.w0 49 /0]		AMXTILE,FUTURE,SZ,X64

From ec204170028cdd582de1d2db28a365085d57424c Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 17 Jul 2020 17:46:45 -0700
Subject: [PATCH 28/29] changes.src: slighly better description of new
 instructions

Describe what the new instructions actually are.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
---
 doc/changes.src | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/changes.src b/doc/changes.src
index c1459231..d1182271 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -10,7 +10,9 @@ since 2007.
 \S{cl-2.15.03} Version 2.15.03
 
 \b Add instructions from the Intel Instruction Set Extensions and
-Future Features Programming Reference, June 2020.
+Future Features Programming Reference, June 2020. This includes
+AVX5512 \c{bfloat16}, AVX512 mask intersect, and Intel Advanced Matrix
+Extensions (AMX).
 
 \b Support for \c{bfloat16} floating-point constants. See \k{fltconst}
 and \k{pkg_fp}.

From d27427846f8e61bf194721c9ccd72a95547289e8 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Fri, 17 Jul 2020 17:53:37 -0700
Subject: [PATCH 29/29] NASM 2.15.03

---
 version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/version b/version
index 0f7f97eb..3f699026 100644
--- a/version
+++ b/version
@@ -1 +1 @@
-2.15.03rc8
+2.15.03