From 8441bc5323fa3c4bc753340b0faa1d668335ad9c Mon Sep 17 00:00:00 2001 From: Jasper Lievisse Adriaanse Date: Wed, 1 Jul 2020 09:10:21 +0200 Subject: [PATCH 01/29] Remove references to sourceforge The mailinglists have been migrated to lists.nasm.us Signed-off-by: Jasper Lievisse Adriaanse --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 704fba14..e9b3dc30 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,5 @@ This means its development is open to even wider society of programmers wishing to improve their lovely assembler. Visit our [nasm.us](https://www.nasm.us/) website for more details. -We are gradually moving services away from Sourceforge. For our remaining -Sourceforge services see [here](https://sourceforge.net/projects/nasm/). With best regards, the NASM crew. From 174c8ccbad10fa021a2ad2dfe929d6850db503f2 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Thu, 9 Jul 2020 21:15:16 -0700 Subject: [PATCH 02/29] NASM 2.15.03rc3 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index cb648feb..8d4ac6e4 100644 --- a/version +++ b/version @@ -1 +1 @@ -2.15.03rc2 +2.15.03rc3 From 23abe9fe88a0d045e063627e682a540a5d06779c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 01:52:49 -0700 Subject: [PATCH 03/29] preproc: correctly handle %00 capturing a label defining->dstk.mmac should point back to "defining" when the topmost definition block is a %macro block. Otherwise %00 will not inhibit label emission. Signed-off-by: H. Peter Anvin (Intel) --- asm/preproc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/asm/preproc.c b/asm/preproc.c index 81c72042..4fcdb359 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -4035,7 +4035,13 @@ issue_error: nasm_assert(!defining); nasm_new(def); def->casesense = casesense; - def->dstk.mmac = defining; + /* + * dstk.mstk points to the previous definition bracket, + * whereas dstk.mmac points to the topmost mmacro, which + * in this case is the one we are just starting to create. + */ + def->dstk.mstk = defining; + def->dstk.mmac = def; if (op == PP_RMACRO) def->max_depth = nasm_limit[LIMIT_MACRO_LEVELS]; if (!parse_mmacro_spec(tline, def, dname)) { From b3c554555687886e2a521cf0fb0b27ef9d51d306 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 01:54:38 -0700 Subject: [PATCH 04/29] test/Makefile: fix command line for .obj format The command line for .obj had a stray -F. Signed-off-by: H. Peter Anvin (Intel) --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 6b6ffbfe..7d09b346 100644 --- a/test/Makefile +++ b/test/Makefile @@ -34,7 +34,7 @@ $(NASM): $(NASM) $(NASMOPT) -f aout -o $@ -MD $@.dep -l $@.lst $< %.obj: %.asm $(NASMDEP) - $(NASM) $(NASMOPT) -f obj -gborland -F -o $@ -MD $@.dep -l $@.lst $< + $(NASM) $(NASMOPT) -f obj -gborland -o $@ -MD $@.dep -l $@.lst $< %.rdf: %.asm $(NASMDEP) $(NASM) $(NASMOPT) -f rdf -o $@ -MD $@.dep -l $@.lst $< From 22a3f567c0d7b4801a1b0838231073572d766795 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 01:59:53 -0700 Subject: [PATCH 05/29] changes.src: document %00 fix Add %00 fix to release note. Signed-off-by: H. Peter Anvin (Intel) --- doc/changes.src | 2 ++ doc/nasmdoc.src | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/doc/changes.src b/doc/changes.src index 47fb03b4..3dc85deb 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -25,6 +25,8 @@ section type. where one or more parts result from empty token expansion, resulting in \c{%+} tokens at the beginning or end, or multiple ones in a row. +\b Fix macro label capture (\c{%00}, \k{percent00}). + \b Portability fixes. \S{cl-2.15.02} Version 2.15.02 diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index 62a70d57..26e4db27 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -2935,6 +2935,10 @@ Examples are given in \k{rotate}. label must be on the same line as the macro invocation, may be a local label (see \k{locallab}), and need not end in a colon. +If \c{%00} is present anywhere in the macro body, the label itself +will not be emitted by NASM. You can, of course, put \c{%00:} +explicitly at the beginning of your macro. + \S{rotate} \i\c{%rotate}: \i{Rotating Macro Parameters} From 543069acec0e978a9dafe22041887b1ba2cd58ff Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 02:00:31 -0700 Subject: [PATCH 06/29] NASM 2.15.03rc4 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index 8d4ac6e4..9c729c89 100644 --- a/version +++ b/version @@ -1 +1 @@ -2.15.03rc3 +2.15.03rc4 From 254a56acca1511afadb30caa5e432b575f54ea43 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 02:44:33 -0700 Subject: [PATCH 07/29] assemble: use proper rel/abs state for lea reg,imm When using the LEA instruction with immediate syntax instead of memory operand syntax, the IP_REL flag will not have made it into the operand type. Make it do so. Signed-off-by: H. Peter Anvin (Intel) --- asm/assemble.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/asm/assemble.c b/asm/assemble.c index 2c47ba58..49faa6b8 100644 --- a/asm/assemble.c +++ b/asm/assemble.c @@ -2776,14 +2776,23 @@ static enum ea_type process_ea(operand *input, ea *output, int bits, if (input->basereg == -1 && (input->indexreg == -1 || input->scale == 0)) { /* - * It's a pure offset. + * It's a pure offset. If it is an IMMEDIATE, it is a pattern + * in insns.dat which allows an immediate to be used as a memory + * address, in which case apply the default REL/ABS. */ - if (bits == 64 && ((input->type & IP_REL) == IP_REL)) { - if (input->segment == NO_SEG || - (input->opflags & OPFLAG_RELATIVE)) { - nasm_warn(WARN_OTHER|ERR_PASS2, "absolute address can not be RIP-relative"); - input->type &= ~IP_REL; - input->type |= MEMORY; + if (bits == 64) { + if (is_class(IMMEDIATE, input->type)) { + if (!(input->eaflags & EAF_ABS) && + ((input->eaflags & EAF_REL) || globalrel)) + input->type |= IP_REL; + } + if ((input->type & IP_REL) == IP_REL) { + if (input->segment == NO_SEG || + (input->opflags & OPFLAG_RELATIVE)) { + nasm_warn(WARN_OTHER|ERR_PASS2, "absolute address can not be RIP-relative"); + input->type &= ~IP_REL; + input->type |= MEMORY; + } } } From be1be3f627d82a1352738eb26c6e53281fc924cc Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 02:46:23 -0700 Subject: [PATCH 08/29] %use masm: much better documentation Signed-off-by: H. Peter Anvin (Intel) --- doc/changes.src | 3 +++ doc/nasmdoc.src | 56 ++++++++++++++++++++++++++++++++++++++++++----- test/masmdisp.asm | 6 ++++- 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/doc/changes.src b/doc/changes.src index 3dc85deb..714299d0 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -27,6 +27,9 @@ in \c{%+} tokens at the beginning or end, or multiple ones in a row. \b Fix macro label capture (\c{%00}, \k{percent00}). +\b Much better documentation for the MASM compatiblity package, +\c{%use masm} (see \k{pkg_masm}). + \b Portability fixes. \S{cl-2.15.02} Version 2.15.02 diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index 26e4db27..b52f854a 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -4664,17 +4664,61 @@ functionality, as intended to be used primarily with machine-generated code. It does not include any "programmer-friendly" shortcuts, nor does it in any way support ASSUME, symbol typing, or MASM-style structures. -Currently, the MASM compatibility package emulates only the PTR -keyword and recognize syntax displacement[index] for memory -operations. To enable the package, use the directive: \c{%use masm} -In addition, NASM now natively supports the MASM \c{?} and -\c{DUP} syntax for the \c{DB} etc data declaration directives, -regardless of if this package is included or not. See \k{db}. +Currently, the MASM compatibility package emulates: +\b The \c{FLAT} and \c{OFFSET} keywords are recognized and ignored. + +\b The \c{PTR} keyword signifies a memory reference, as if the +argument had been put in square brackets: + +\c mov eax,[foo] ; memory reference +\c mov eax,dword ptr foo ; memory reference +\c mov eax,dowrd ptr flat:foo ; memory reference +\c mov eax,offset foo ; address +\c mov eax,foo ; address (ambiguous syntax in MASM) + +\b The \c{SEGMENT} ... \c{ENDS} syntax: + +\c segname SEGMENT +\c ... +\c segname ENDS + +\b The \c{PROC} ... \c{ENDP} syntax: + +\c procname PROC [FAR] +\c ... +\c procname ENDP + +\> \c{PROC} will also define \c{RET} as a macro expanding to either +\c{RETF} if \c{FAR} is specified and \c{RETN} otherwise. Any keyword +after \c{PROC} other than \c{FAR} is ignored. + +\b The \c{TBYTE} keyword as an alias for \c{TWORD} (see \k{qsother}). + +\b The \c{END} directive is ignored. + +\b In 64-bit mode relative addressing is the default (\c{DEFAULT REL}, +see \k{REL & ABS}). + +In addition, NASM now natively supports, regardless of whether this +package is used or not: + +\b \c{?} and \c{DUP} syntax for the \c{DB} etc data declaration +directives (see \k{db}). + +\b \c{displacement[base+index]} syntax for memory operations, instead +of \c{[base+index+displacement]}. + +\b \c{seg:[addr]} instead of \c{[seg:addr]} syntax. + +\b A pure offset can be given to \c{LEA} without square brackets: + +\c lea rax,[foo] ; standard syntax +\c lea rax,foo ; also accepted \C{directive} \i{Assembler Directives} diff --git a/test/masmdisp.asm b/test/masmdisp.asm index 295d88d7..c5e9af4f 100644 --- a/test/masmdisp.asm +++ b/test/masmdisp.asm @@ -14,6 +14,7 @@ fproc proc far lea rsi,dword ptr foo lea rsi,[foo] lea rsi,dword [foo] + mov rdi,gs:[rbx] ret fproc endp @@ -21,6 +22,8 @@ nproc proc near mov eax,dword ptr foo mov rdx,offset foo mov ecx,bar[rbx] + mov rdi,[gs:foo] + mov rdi,qword ptr gs:foo ret nproc endp @@ -31,6 +34,7 @@ nxx dd 80 foo dd 100 _DATA ends -_BSS segment nobits + segment _BSS nobits bar resd 100 +xyzzy dd 64 dup (?) _BSS ends From 861f2cf2692b850a784a34d81c289a9a5cf9e803 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 02:49:14 -0700 Subject: [PATCH 09/29] changes.src: document LEA fix Document fix of LEA without square brackets. Signed-off-by: H. Peter Anvin (Intel) --- doc/changes.src | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/changes.src b/doc/changes.src index 714299d0..db58bdf6 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -30,6 +30,8 @@ in \c{%+} tokens at the beginning or end, or multiple ones in a row. \b Much better documentation for the MASM compatiblity package, \c{%use masm} (see \k{pkg_masm}). +\b Fix \c{LEA} without square brackets, for MASM compatibility. + \b Portability fixes. \S{cl-2.15.02} Version 2.15.02 From 015ddc1b33d3b40d16e018f80528bbd15979b873 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 02:50:51 -0700 Subject: [PATCH 10/29] NASM 2.15.03rc5 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index 9c729c89..413fe765 100644 --- a/version +++ b/version @@ -1 +1 @@ -2.15.03rc4 +2.15.03rc5 From a79a700208d771fb3b8e6e7f03fcc195c2d1831c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 02:55:50 -0700 Subject: [PATCH 11/29] preproc: add a %null directive for the masm macro package Instead of %pragma ignore, use a new %null directive which ignores the rest of the line, without bothering to expand it. Signed-off-by: H. Peter Anvin (Intel) --- asm/pptok.dat | 1 + asm/preproc.c | 5 +++++ macros/masm.mac | 4 ++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/asm/pptok.dat b/asm/pptok.dat index b6285c36..0fdbbad6 100644 --- a/asm/pptok.dat +++ b/asm/pptok.dat @@ -89,6 +89,7 @@ %include %line %local +%null %pop %pragma %push diff --git a/asm/preproc.c b/asm/preproc.c index 4fcdb359..ccb00f3b 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -4649,6 +4649,11 @@ issue_error: case PP_LINE: nasm_panic("`%s' directive not preprocessed early", dname); break; + + case PP_NULL: + /* Goes nowhere, does nothing... */ + break; + } done: diff --git a/macros/masm.mac b/macros/masm.mac index da7e6eea..6bd27273 100644 --- a/macros/masm.mac +++ b/macros/masm.mac @@ -50,7 +50,7 @@ USE: masm %endmacro %imacro ends 0+.nolist - %pragma ignore ends %00 + %null ends %00 %endmacro %imacro proc 0-*.nolist @@ -65,7 +65,7 @@ USE: masm %endmacro %imacro endp 0.nolist - %pragma ignore endp %00 + %null endp %00 %undef ret %endmacro From fcd3cb88615a200fbee85e5906e37e265a8d297d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 17:22:47 -0700 Subject: [PATCH 12/29] preproc: preserve %[...] in listings When generating list output, preserve %[...] in the output if we list a TOK_INDIRECT. The tokenization process removes these deliminators, so we have to explicitly put them back. This doesn't affect assembly output, which will only ever be generated after all TOK_INDIRECT tokens have been removed, but it does affect some of the listing modes. Signed-off-by: H. Peter Anvin (Intel) --- asm/preproc.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/asm/preproc.c b/asm/preproc.c index ccb00f3b..693cbcbb 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -2000,6 +2000,16 @@ static char *detoken(Token * tlist, bool expand_locals) } break; + case TOK_INDIRECT: + /* + * This won't happen in when emitting to the assembler, + * but can happen when emitting output for some of the + * list options. The token string doesn't actually include + * the brackets in this case. + */ + len += 3; /* %[] */ + break; + default: break; /* No modifications */ } @@ -2019,8 +2029,19 @@ static char *detoken(Token * tlist, bool expand_locals) p = line = nasm_malloc(len + 1); - list_for_each(t, tlist) - p = mempcpy(p, tok_text(t), t->len); + list_for_each(t, tlist) { + switch (t->type) { + case TOK_INDIRECT: + *p++ = '%'; + *p++ = '['; + p = mempcpy(p, tok_text(t), t->len); + *p++ = ']'; + break; + + default: + p = mempcpy(p, tok_text(t), t->len); + } + } *p = '\0'; return line; From bb3156533b9baf836b746c638111fca82c65f98a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 17:24:43 -0700 Subject: [PATCH 13/29] ppindirect.asm: make it possible to assemble to a binary Add a couple of dd/db directives to ppindirect.asm to make it possible to actually run it through the assembler. Signed-off-by: H. Peter Anvin (Intel) --- test/ppindirect.asm | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/test/ppindirect.asm b/test/ppindirect.asm index 0a30d075..2785378a 100644 --- a/test/ppindirect.asm +++ b/test/ppindirect.asm @@ -2,6 +2,8 @@ ; Fun tests of the preprocessor indirection mode... + bits 64 + %assign foo1 11 %assign foo11 1111 %assign foo2 22 @@ -9,34 +11,34 @@ %assign foo3 33 %assign foo33 3333 %assign n 2 -foo%[foo%[n]]*100 -foo%[n]*100 + dd foo%[foo%[n]]*100 + dd foo%[n]*100 %assign foo%[foo%[n]] foo%[foo%[n]]*100 ;%assign foo%[n] foo%[n]*100 - foo1 - foo2 - foo3 - foo11 - foo22 - foo33 + dd foo1 + dd foo2 + dd foo3 + dd foo11 + dd foo22 + dd foo33 %define foo33bar 999999 - %[foo%[foo3]bar] + dd %[foo%[foo3]bar] %assign bctr 0 %macro bluttan 0 %assign bctr bctr+1 %assign bluttan%[bctr] bctr %defstr bstr bluttan%[bctr] - bluttan%[bctr] - bstr + db bluttan%[bctr] + db bstr %endmacro %rep 20 bluttan %endrep %rep 20 - bluttan%[bctr] + db bluttan%[bctr] %assign bctr bctr-1 %endrep From baaa5ca4413e7ac73fe98e682be13f2da529e2cf Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 18:14:09 -0700 Subject: [PATCH 14/29] outcoff: don't drop align= option alone on a section line If the section/segment directive *only* contained an align= directive, it would get lost. Fix that. Signed-off-by: H. Peter Anvin (Intel) --- output/outcoff.c | 68 +++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/output/outcoff.c b/output/outcoff.c index de22fb88..bcd9ff3f 100644 --- a/output/outcoff.c +++ b/output/outcoff.c @@ -72,11 +72,11 @@ * (2) Win32 doesn't bother putting any flags in the header flags * field (at offset 0x12 into the file). * - * (3) Win32 uses some extra flags into the section header table: + * (3) Win32/64 uses some extra flags into the section header table: * it defines flags 0x80000000 (writable), 0x40000000 (readable) * and 0x20000000 (executable), and uses them in the expected - * combinations. It also defines 0x00100000 through 0x00700000 for - * section alignments of 1 through 64 bytes. + * combinations. It also defines 0x00100000 through 0x00f00000 for + * section alignments of 1 through 8192 bytes. * * (4) Both standard COFF and Win32 COFF seem to use the DWORD * field directly after the section name in the section header @@ -285,14 +285,22 @@ int coff_make_section(char *name, uint32_t flags) return coff_nsects - 1; } +/* + * Convert an alignment value to the corresponding flags. + * An alignment value of 0 means no flags should be set. + */ static inline uint32_t coff_sectalign_flags(unsigned int align) { - return (ilog2_32(align) + 1) << 20; + return (alignlog2_32(align) + 1) << 20; } +/* + * Get the alignment value from a flags field. + * Returns 0 if no alignment defined. + */ static inline unsigned int coff_alignment(uint32_t flags) { - return 1U << (((flags & IMAGE_SCN_ALIGN_MASK) >> 20) - 1); + return (1U << ((flags & IMAGE_SCN_ALIGN_MASK) >> 20)) >> 1; } static int32_t coff_section_names(char *name, int *bits) @@ -364,10 +372,13 @@ static int32_t coff_section_names(char *name, int *bits) nasm_nonfatal("argument to `align' is not numeric"); else { unsigned int align = atoi(q + 6); - if (!align || ((align - 1) & align)) { + /* Allow align=0 meaning use default */ + if (!align) { + align_flags = 0; + } else if (!is_power2(align)) { nasm_nonfatal("argument to `align' is not a" " power of two"); - } else if (align > 8192) { + } else if (align > COFF_MAX_ALIGNMENT) { nasm_nonfatal("maximum alignment in COFF is %d bytes", COFF_MAX_ALIGNMENT); } else { @@ -382,30 +393,31 @@ static int32_t coff_section_names(char *name, int *bits) break; if (i == coff_nsects) { if (!flags) { - if (!strcmp(name, ".data")) + flags = TEXT_FLAGS; + + if (!strcmp(name, ".data")) { flags = DATA_FLAGS; - else if (!strcmp(name, ".rdata")) + } else if (!strcmp(name, ".rdata")) { flags = RDATA_FLAGS; - else if (!strcmp(name, ".bss")) + } else if (!strcmp(name, ".bss")) { flags = BSS_FLAGS; - else if (win64 && !strcmp(name, ".pdata")) - flags = PDATA_FLAGS; - else if (win64 && !strcmp(name, ".xdata")) - flags = XDATA_FLAGS; - else - flags = TEXT_FLAGS; + } else if (win64) { + if (!strcmp(name, ".pdata")) + flags = PDATA_FLAGS; + else if (!strcmp(name, ".xdata")) + flags = XDATA_FLAGS; + } } i = coff_make_section(name, flags); - if (flags) - coff_sects[i]->flags = flags; - } else if (flags) { - /* Check if any flags are respecified */ - - /* Warn if non-alignment flags differ */ - if ((flags ^ coff_sects[i]->flags) & ~IMAGE_SCN_ALIGN_MASK && - coff_sects[i]->pass_last_seen == pass_count()) { - nasm_warn(WARN_OTHER, "section attributes changed on" - " redeclaration of section `%s'", name); + coff_sects[i]->align_flags = align_flags; + } else { + if (flags) { + /* Warn if non-alignment flags differ */ + if (((flags ^ coff_sects[i]->flags) & ~IMAGE_SCN_ALIGN_MASK) && + coff_sects[i]->pass_last_seen == pass_count()) { + nasm_warn(WARN_OTHER, "section attributes changed on" + " redeclaration of section `%s'", name); + } } /* Check if alignment might be needed */ @@ -419,6 +431,7 @@ static int32_t coff_section_names(char *name, int *bits) if (align_flags > sect_align_flags) { coff_sects[i]->align_flags = align_flags; } + /* Check if not already aligned */ /* XXX: other formats don't do this... */ if (coff_sects[i]->len % align) { @@ -428,9 +441,6 @@ static int32_t coff_section_names(char *name, int *bits) nasm_assert(padding <= sizeof buffer); - if (pass_final()) - nasm_nonfatal("section alignment changed during code generation"); - if (coff_sects[i]->flags & IMAGE_SCN_CNT_CODE) { /* Fill with INT 3 instructions */ memset(buffer, 0xCC, padding); From 42a73b776a6f987e1720256b502139cf1bec9e68 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 18:14:54 -0700 Subject: [PATCH 15/29] test/winalign.asm: simple test for COFF alignment Simple test case based on debugging BR 3392692. Signed-off-by: H. Peter Anvin (Intel) --- test/winalign.asm | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 test/winalign.asm diff --git a/test/winalign.asm b/test/winalign.asm new file mode 100644 index 00000000..62abf827 --- /dev/null +++ b/test/winalign.asm @@ -0,0 +1,45 @@ + section .pdata rdata align=2 + dd 1 + dd 2 + dd 3 + + section .rdata align=16 + dd 4 + dd 5 + dd 6 + + section ultra + dd 10 + dd 11 + dd 12 + + section infra rdata + dd 20 + dd 21 + dd 22 + + section omega rdata align=1 + dd 90 + dd 91 + dd 92 + + section .xdata + dd 7 + dd 8 + dd 9 + + section ultra align=8 + dd 13 + dd 14 + dd 15 + + section infra rdata align=1 + dd 23 + dd 24 + dd 25 + + section omega rdata + sectalign 2 + dd 93 + dd 94 + dd 95 From d9ea17fb47909ef27811c945b4c6e4b0742796d4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 18:40:05 -0700 Subject: [PATCH 16/29] preproc: saner handling of cpp-style line directives NASM now supports a proper superset of cpp line number markers, so there is no need to hack around them using the "prepreprocessor". Instead, just put a quick test in do_directive() treating it just like %line, except convert a "-quoted string into a `-quoted string. (This can break if there is a ` or \" sequence in the string... fix that at some point. This is still much better than what there is now.) Signed-off-by: H. Peter Anvin (Intel) --- asm/preproc.c | 43 +++++++++++++++++++++++++------------------ doc/nasmdoc.src | 15 ++++++++++++++- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/asm/preproc.c b/asm/preproc.c index 693cbcbb..8230f16f 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -945,26 +945,12 @@ static char *check_tasm_directive(char *line) * flags') into NASM preprocessor line number indications (`%line * lineno file'). */ -static char *prepreproc(char *line) +static inline char *prepreproc(char *line) { - int lineno, fnlen; - char *fname, *oldline; - - if (line[0] == '#' && line[1] == ' ') { - oldline = line; - fname = oldline + 2; - lineno = atoi(fname); - fname += strspn(fname, "0123456789 "); - if (*fname == '"') - fname++; - fnlen = strcspn(fname, "\""); - line = nasm_malloc(20 + fnlen); - snprintf(line, 20 + fnlen, "%%line %d %.*s", lineno, fnlen, fname); - nasm_free(oldline); - } - if (tasm_compatible_mode) + if (unlikely(tasm_compatible_mode)) return check_tasm_directive(line); - return line; + else + return line; } /* @@ -3426,6 +3412,14 @@ static int do_directive(Token *tline, Token **output) *output = NULL; /* No output generated */ origline = tline; + if (tok_is(tline, '#')) { + /* cpp-style line directive */ + if (!tok_white(tline->next)) + return NO_DIRECTIVE_FOUND; + dname = tok_text(tline); + goto pp_line; + } + tline = skip_white(tline); if (!tline || !tok_type(tline, TOK_PREPROC_ID)) return NO_DIRECTIVE_FOUND; @@ -3448,6 +3442,7 @@ static int do_directive(Token *tline, Token **output) * in externally preprocessed sources. */ if (op == PP_LINE) { + pp_line: /* * Syntax is `%line nnn[+mmm] [filename]' */ @@ -3478,7 +3473,19 @@ static int do_directive(Token *tline, Token **output) tline = skip_white(tline); if (tline) { if (tline->type == TOK_STRING) { + if (dname[0] == '#') { + /* cpp version: treat double quotes like NASM backquotes */ + char *txt = tok_text_buf(tline); + if (txt[0] == '"') { + txt[0] = '`'; + txt[tline->len - 1] = '`'; + } + } src_set_fname(unquote_token(tline)); + /* + * Anything after the string is ignored by design (for cpp + * compatibility and future extensions.) + */ } else { char *fname = detoken(tline, false); src_set_fname(fname); diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index b52f854a..cfa92fd1 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -2753,6 +2753,11 @@ interfering with the local label mechanism, as described in (the \c{..@} prefix, then a number, then another period) in case they interfere with macro-local labels. +These labels are really macro-local \e{tokens}, and can be used for +other purposes where a token unique to each macro invocation is +desired, e.g. to name single-line macros without using the context +feature (\k{ctxlocal}). + \S{mlmacgre} \i{Greedy Macro Parameters} @@ -4047,7 +4052,8 @@ which specifies a line increment value; each line of the input file read in is considered to correspond to \c{mmm} lines of the original source file. Finally, \c{filename} is an optional parameter which specifies the file name of the original source file. It may be a -quoted string. +quoted string, in which case any additional argument after the quoted +string will be ignored. After reading a \c{%line} preprocessor directive, NASM will report all file name and line numbers relative to the values specified @@ -4060,6 +4066,13 @@ code. See \k{opt-no-line}. Starting in NASM 2.15, \c{%line} directives are processed before any other processing takes place. +For compatibility with the output from some other preprocessors, +including many C preprocessors, a \c{#} character followed by +whitespace \e{at the very beginning of a line} is also treated as a +\c{%line} directive, except that double quotes surrounding the +filename are treated like NASM backquotes, with \c{\\}-escaped +sequences decoded. + \# This isn't a directive, it should be moved elsewhere... \S{getenv} \i\c{%!}\e{variable}: Read an Environment Variable. From bc5fc72d5bdaa10ea93fe2bb2b11892ef82bbbff Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Fri, 10 Jul 2020 18:46:12 -0700 Subject: [PATCH 17/29] NASM 2.15.03rc6 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index 413fe765..65d7cb37 100644 --- a/version +++ b/version @@ -1 +1 @@ -2.15.03rc5 +2.15.03rc6 From 65c6ba87166e3dd1143cc399ef535aa86145a450 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 11 Jul 2020 19:12:13 +0300 Subject: [PATCH 18/29] BR 3392696: nasm_quote -- fixup callers In 41e9682efed7cd1df133b1b4ac806e07723f1486 we've changed the nasm_quote arguments still not all callers were converted which could lead to nil dereference. [hpa: no need to call strlen() for the asm/preproc.c chunk] Signed-off-by: Cyrill Gorcunov Signed-off-by: H. Peter Anvin --- asm/nasm.c | 4 +++- asm/preproc.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/asm/nasm.c b/asm/nasm.c index c5d9aff1..a0e17193 100644 --- a/asm/nasm.c +++ b/asm/nasm.c @@ -455,6 +455,7 @@ static char *nasm_quote_filename(const char *fn) { const unsigned char *p = (const unsigned char *)fn; + size_t len; if (!p || !*p) return nasm_strdup("\"\""); @@ -478,7 +479,8 @@ static char *nasm_quote_filename(const char *fn) return nasm_strdup(fn); quote: - return nasm_quote(fn, NULL); + len = strlen(fn); + return nasm_quote(fn, &len); } static void timestamp(void) diff --git a/asm/preproc.c b/asm/preproc.c index 8230f16f..8415d572 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -796,7 +796,7 @@ Token *quote_token(Token *t) */ static Token *quote_any_token(Token *t) { - size_t len; + size_t len = t->len; char *p; p = nasm_quote(tok_text(t), &len); From f1cf95480d9df9e7854a4be6844f86ff854e03d2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 12 Jul 2020 14:04:44 +0300 Subject: [PATCH 19/29] travis: weirdpaste -- add nil dereference test To address BR 3392696 Signed-off-by: Cyrill Gorcunov --- travis/test/weirdpaste.asm | 10 ++++++++++ travis/test/weirdpaste.bin.t | Bin 42 -> 45 bytes travis/test/weirdpaste.i.t | 10 ++++++++++ 3 files changed, 20 insertions(+) diff --git a/travis/test/weirdpaste.asm b/travis/test/weirdpaste.asm index 353ef8a8..e43d855e 100644 --- a/travis/test/weirdpaste.asm +++ b/travis/test/weirdpaste.asm @@ -24,3 +24,13 @@ %define N 1e%++%+ 5 dd N, 1e+5 + +; +; test nil dereference, since we're +; modifying with %line keep it last +; in the file +; +; BR 3392696 +; +%line 1 "`weirdpaste.asm" +mov eax, eax diff --git a/travis/test/weirdpaste.bin.t b/travis/test/weirdpaste.bin.t index db0468ed1f8aa557a898f0f3ed5835cdee7a570e..23061d3d9814d0185394c375baee94dd711c3e4b 100644 GIT binary patch delta 8 PcmdPWouI{>)_DK`3Dp9` delta 4 LcmdPZnxF*$0)PP$ diff --git a/travis/test/weirdpaste.i.t b/travis/test/weirdpaste.i.t index bf2acd6c..1df996dc 100644 --- a/travis/test/weirdpaste.i.t +++ b/travis/test/weirdpaste.i.t @@ -21,3 +21,13 @@ dd 1e+5, 1e+5 + + + + + + + + +%line 2+1 '`weirdpaste.asm' +mov eax, eax From 4c0bd9e73696c2643e7823489e56ad3b70290a4a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Wed, 15 Jul 2020 16:16:57 -0700 Subject: [PATCH 20/29] preproc: BR 2292698: fix handling of whitespace around %+ The previous code to fix whitespace around and multiple %+ symbols in a row (checkin 122c5fb75986adc37dfb147cc2a613e3ebc66e80) had some seriously broken pointer handling when zapping tokens. This could cause paste_tokens() to go into an infinite loop because it would attach %+ to another token and then immediately break them apart again, over and over. Reported-by: Signed-off-by: H. Peter Anvin (Intel) --- asm/preproc.c | 55 +++++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/asm/preproc.c b/asm/preproc.c index 8415d572..2f8fb3cd 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -4728,7 +4728,7 @@ static inline bool pp_concat_match(const Token *t, unsigned int mask) static bool paste_tokens(Token **head, const struct tokseq_match *m, size_t mnum, bool handle_explicit) { - Token *tok, *t, *next, **prev_next, **prev_nonspace; + Token *tok, *t, *next, **prev_next, **prev_nonspace, **nextp; bool pasted = false; char *buf, *p; size_t len, i; @@ -4765,30 +4765,28 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m, if (!handle_explicit) break; - /* Left pasting token is start of line, just drop %+ */ - if (!prev_nonspace) { - tok = delete_Token(tok); - break; - } - did_paste = true; - prev_next = prev_nonspace; - t = *prev_nonspace; - - /* Delete leading whitespace */ - next = zap_white(t->next); + /* Left pasting token is start of line, just drop %+ */ + if (!prev_nonspace) { + prev_next = nextp = head; + t = NULL; + } else { + prev_next = prev_nonspace; + t = *prev_next; + nextp = &t->next; + } /* - * Delete the %+ token itself, followed by any whitespace. + * Delete the %+ token itself plus any whitespace. * In a sequence of %+ ... %+ ... %+ pasting sequences where * some expansions in the middle have ended up empty, * we can end up having multiple %+ tokens in a row; * just drop whem in that case. */ - while (next) { + while ((next = *nextp)) { if (next->type == TOK_PASTE || next->type == TOK_WHITESPACE) - next = delete_Token(next); + *nextp = delete_Token(next); else break; } @@ -4796,11 +4794,16 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m, /* * Nothing after? Just leave the existing token. */ - if (!next) { - t->next = tok = NULL; /* End of line */ + if (!next) + break; + + if (!t) { + /* Nothing to actually paste, just zapping the paste */ + *prev_next = tok = next; break; } + /* An actual paste */ p = buf = nasm_malloc(t->len + next->len + 1); p = mempcpy(p, tok_text(t), t->len); p = mempcpy(p, tok_text(next), next->len); @@ -4814,10 +4817,10 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m, * No output at all? Replace with a single whitespace. * This should never happen. */ - t = new_White(NULL); + tok = t = new_White(NULL); + } else { + *prev_nonspace = tok = t; } - - *prev_nonspace = tok = t; while (t->next) t = t->next; /* Find the last token produced */ @@ -4825,7 +4828,7 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m, t->next = delete_Token(next); /* We want to restart from the head of the pasted token */ - next = tok; + *prev_next = next = tok; break; default: @@ -4861,10 +4864,14 @@ static bool paste_tokens(Token **head, const struct tokseq_match *m, * Connect pasted into original stream, * ie A -> new-tokens -> B */ - while (t->next) - t = t->next; + while ((tok = t->next)) { + if (tok->type != TOK_WHITESPACE && tok->type != TOK_PASTE) + prev_nonspace = &t->next; + t = tok; + } + t->next = next; - prev_next = prev_nonspace = &t->next; + prev_next = &t->next; did_paste = true; break; } From 271dc7a7f7d6192b2a0ec6f660027cc13127b13e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Wed, 15 Jul 2020 16:21:08 -0700 Subject: [PATCH 21/29] NASM 2.15.03rc7 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index 65d7cb37..f3632bf4 100644 --- a/version +++ b/version @@ -1 +1 @@ -2.15.03rc6 +2.15.03rc7 From 36814f1fc83c4876ebb6a1cfec5ee000316a1127 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 16 Jul 2020 20:26:58 -0700 Subject: [PATCH 22/29] autoconf: look for _Decltype as yet another alias for typeof(). If the past is any indication, the final standardization of typeof() probably will be _Decltype(). Signed-off-by: H. Peter Anvin --- autoconf/m4/pa_c_typeof.m4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autoconf/m4/pa_c_typeof.m4 b/autoconf/m4/pa_c_typeof.m4 index d182ea2c..87b9ed60 100644 --- a/autoconf/m4/pa_c_typeof.m4 +++ b/autoconf/m4/pa_c_typeof.m4 @@ -7,7 +7,7 @@ dnl -------------------------------------------------------------------------- AC_DEFUN([PA_C_TYPEOF], [AC_CACHE_CHECK([if $CC supports typeof], [pa_cv_typeof], [pa_cv_typeof=no - for pa_typeof_try in typeof __typeof __typeof__ decltype __decltype __decltype__ + for pa_typeof_try in typeof __typeof __typeof__ decltype __decltype __decltype__ _Decltype do AS_IF([test $pa_cv_typeof = no], [AC_COMPILE_IFELSE([AC_LANG_SOURCE([ @@ -29,4 +29,4 @@ int testme(int x) AS_IF([test $pa_cv_typeof = typeof], [], [AC_DEFINE_UNQUOTED([typeof], [$pa_cv_typeof], - [Define if your typeof operator is not named typeof.])])])]) + [Define if your typeof operator is not named `typeof'.])])])]) From b31a4c9906459215d406de6ce116b77c09af5635 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 16 Jul 2020 21:48:28 -0700 Subject: [PATCH 23/29] Add support for new instructions from ISE June 2020 Add support for new instructions as defined in the Instruction Set Extensions manual as of June 2020. Signed-off-by: H. Peter Anvin --- asm/assemble.c | 41 +++++++++++++++++++++++++++++------------ disasm/disasm.c | 18 ++++++++++++++++++ doc/changes.src | 3 +++ include/opflags.h | 32 +++++++++++++++++--------------- test/amx.asm | 36 ++++++++++++++++++++++++++++++++++++ x86/iflags.ph | 10 ++++++++++ x86/insns.dat | 45 +++++++++++++++++++++++++++++++++++++++++++++ x86/insns.pl | 20 +++++++++++--------- x86/regs.dat | 3 +++ 9 files changed, 172 insertions(+), 36 deletions(-) create mode 100644 test/amx.asm diff --git a/asm/assemble.c b/asm/assemble.c index 49faa6b8..c82fcb1d 100644 --- a/asm/assemble.c +++ b/asm/assemble.c @@ -63,17 +63,18 @@ * assembly mode or the operand-size override on the operand * \70..\73 rel32 a long relative operand, from operand 0..3 * \74..\77 seg a word constant, from the _segment_ part of operand 0..3 - * \1ab a ModRM, calculated on EA in operand a, with the spare + * \1ab /r a ModRM, calculated on EA in operand a, with the reg * field the register value of operand b. - * \172\ab the register number from operand a in bits 7..4, with + * \171\mab /mrb (e.g /3r0) a ModRM, with the reg field taken from operand a, and the m + * and b fields set to the specified values. + * \172\ab /is4 the register number from operand a in bits 7..4, with * the 4-bit immediate from operand b in bits 3..0. * \173\xab the register number from operand a in bits 7..4, with * the value b in bits 3..0. * \174..\177 the register number from operand 0..3 in bits 7..4, and * an arbitrary value in bits 3..0 (assembled as zero.) - * \2ab a ModRM, calculated on EA in operand a, with the spare + * \2ab /b a ModRM, calculated on EA in operand a, with the reg * field equal to digit b. - * * \240..\243 this instruction uses EVEX rather than REX or VEX/XOP, with the * V field taken from operand 0..3. * \250 this instruction uses EVEX rather than REX or VEX/XOP, with the @@ -103,12 +104,11 @@ * tup is tuple type for Disp8*N from %tuple_codes in insns.pl * (compressed displacement encoding) * - * \254..\257 id,s a signed 32-bit operand to be extended to 64 bits. - * \260..\263 this instruction uses VEX/XOP rather than REX, with the - * V field taken from operand 0..3. - * \270 this instruction uses VEX/XOP rather than REX, with the - * V field set to 1111b. - * + * \254..\257 id,s a signed 32-bit operand to be extended to 64 bits. + * \260..\263 this instruction uses VEX/XOP rather than REX, with the + * V field taken from operand 0..3. + * \270 this instruction uses VEX/XOP rather than REX, with the + * V field set to 1111b. * VEX/XOP prefixes are followed by the sequence: * \tmm\wlp where mm is the M field; and wlp is: * 00 wwl lpp @@ -1317,6 +1317,14 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, length += 2; break; + case 0171: + c = *codes++; + op2 = (op2 & ~3) | ((c >> 3) & 3); + opx = &ins->oprs[op2]; + ins->rex |= op_rexflags(opx, REX_R|REX_H|REX_P|REX_W); + length++; + break; + case 0172: case 0173: codes++; @@ -1951,6 +1959,15 @@ static void gencode(struct out_data *data, insn *ins) out_segment(data, opx); break; + case 0171: + c = *codes++; + op2 = (op2 & ~3) | ((c >> 3) & 3); + opx = &ins->oprs[op2]; + r = nasm_regvals[opx->basereg]; + c = (c & ~070) | ((r & 7) << 3); + out_rawbyte(data, c); + break; + case 0172: { int mask = ins->prefixes[PPS_VEX] == P_EVEX ? 7 : 15; @@ -2807,7 +2824,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits, input->disp_size != (addrbits != 16 ? 32 : 16))) nasm_warn(WARN_OTHER, "displacement size ignored on absolute address"); - if (bits == 64 && (~input->type & IP_REL)) { + if ((eaflags & EAF_MIB) || (bits == 64 && (~input->type & IP_REL))) { output->sib_present = true; output->sib = GEN_SIB(0, 4, 5); output->bytes = 4; @@ -3026,7 +3043,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits, output->rex |= rexflags(it, ix, REX_X); output->rex |= rexflags(bt, bx, REX_B); - if (it == -1 && (bt & 7) != REG_NUM_ESP) { + if (it == -1 && (bt & 7) != REG_NUM_ESP && !(eaflags & EAF_MIB)) { /* no SIB needed */ int mod, rm; diff --git a/disasm/disasm.c b/disasm/disasm.c index e1b5ebc3..7c6ea816 100644 --- a/disasm/disasm.c +++ b/disasm/disasm.c @@ -203,6 +203,8 @@ static enum reg_enum whichreg(opflags_t regflags, int regval, int rex) return GET_REGISTER(nasm_rd_opmaskreg, regval); if (!(BNDREG & ~regflags)) return GET_REGISTER(nasm_rd_bndreg, regval); + if (!(TMMREG & ~regflags)) + return GET_REGISTER(nasm_rd_tmmreg, regval); #undef GET_REGISTER return 0; @@ -679,6 +681,22 @@ static int matches(const struct itemplate *t, uint8_t *data, break; } + case 0171: + { + uint8_t t = *r++; + uint8_t d = *data++; + if ((d ^ t) & ~070) { + return 0; + } else { + op2 = (op2 & ~3) | ((t >> 3) & 3); + opy = &ins->oprs[op2]; + opy->basereg = ((d >> 3) & 7) + + (ins->rex & REX_R ? 8 : 0); + opy->segment |= SEG_RMREG; + } + break; + } + case 0172: { uint8_t ximm = *data++; diff --git a/doc/changes.src b/doc/changes.src index db58bdf6..cf95224a 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -9,6 +9,9 @@ since 2007. \S{cl-2.15.03} Version 2.15.03 +\b Add instructions from the Intel Instruction Set Extensions and +Future Features Programming Reference, June 2020. + \b Properly display warnings in preprocess-only mode. \b Fix copy-and-paste of examples from the PDF documentation. diff --git a/include/opflags.h b/include/opflags.h index 28bb236f..f5dd50ba 100644 --- a/include/opflags.h +++ b/include/opflags.h @@ -81,19 +81,19 @@ /* * Register classes. * - * Bits: 7 - 16 + * Bits: 7 - 17 */ #define REG_CLASS_SHIFT (7) -#define REG_CLASS_BITS (10) +#define REG_CLASS_BITS (11) #define REG_CLASS_MASK OP_GENMASK(REG_CLASS_BITS, REG_CLASS_SHIFT) #define GEN_REG_CLASS(bit) OP_GENBIT(bit, REG_CLASS_SHIFT) /* * Subclasses. Depends on type of operand. * - * Bits: 17 - 24 + * Bits: 18 - 25 */ -#define SUBCLASS_SHIFT (17) +#define SUBCLASS_SHIFT (18) #define SUBCLASS_BITS (8) #define SUBCLASS_MASK OP_GENMASK(SUBCLASS_BITS, SUBCLASS_SHIFT) #define GEN_SUBCLASS(bit) OP_GENBIT(bit, SUBCLASS_SHIFT) @@ -101,9 +101,9 @@ /* * Special flags. Context dependant. * - * Bits: 25 - 31 + * Bits: 26 - 32 */ -#define SPECIAL_SHIFT (25) +#define SPECIAL_SHIFT (26) #define SPECIAL_BITS (7) #define SPECIAL_MASK OP_GENMASK(SPECIAL_BITS, SPECIAL_SHIFT) #define GEN_SPECIAL(bit) OP_GENBIT(bit, SPECIAL_SHIFT) @@ -111,9 +111,9 @@ /* * Sizes of the operands and attributes. * - * Bits: 32 - 42 + * Bits: 33 - 43 */ -#define SIZE_SHIFT (32) +#define SIZE_SHIFT (33) #define SIZE_BITS (11) #define SIZE_MASK OP_GENMASK(SIZE_BITS, SIZE_SHIFT) #define GEN_SIZE(bit) OP_GENBIT(bit, SIZE_SHIFT) @@ -121,9 +121,9 @@ /* * Register set count * - * Bits: 47 - 43 + * Bits: 44 - 48 */ -#define REGSET_SHIFT (43) +#define REGSET_SHIFT (44) #define REGSET_BITS (5) #define REGSET_MASK OP_GENMASK(REGSET_BITS, REGSET_SHIFT) #define GEN_REGSET(bit) OP_GENBIT(bit, REGSET_SHIFT) @@ -138,11 +138,11 @@ * * ............................................................1111 optypes * .........................................................111.... modifiers - * ...............................................1111111111....... register classes - * .......................................11111111................. subclasses - * ................................1111111......................... specials - * .....................11111111111................................ sizes - * ................11111........................................... regset count + * ..............................................11111111111....... register classes + * ......................................11111111.................. subclasses + * ...............................1111111.......................... specials + * ....................11111111111................................. sizes + * ...............11111............................................ regset count */ #define REGISTER GEN_OPTYPE(0) /* register number in 'basereg' */ @@ -176,6 +176,7 @@ #define REG_CLASS_RM_ZMM GEN_REG_CLASS(7) #define REG_CLASS_OPMASK GEN_REG_CLASS(8) #define REG_CLASS_BND GEN_REG_CLASS(9) +#define REG_CLASS_RM_TMM GEN_REG_CLASS(10) static inline bool is_class(opflags_t class, opflags_t op) { @@ -217,6 +218,7 @@ static inline bool is_reg_class(opflags_t class, opflags_t reg) #define KREG OPMASKREG #define RM_BND ( REG_CLASS_BND | REGMEM) /* Bounds operand */ #define BNDREG ( REG_CLASS_BND | REGMEM | REGISTER) /* Bounds register */ +#define TMMREG ( REG_CLASS_RM_TMM | REGMEM | REGISTER) /* TMM (AMX) register */ #define REG_CDT ( REG_CLASS_CDT | BITS32 | REGISTER) /* CRn, DRn and TRn */ #define REG_CREG (GEN_SUBCLASS(1) | REG_CLASS_CDT | BITS32 | REGISTER) /* CRn */ #define REG_DREG (GEN_SUBCLASS(2) | REG_CLASS_CDT | BITS32 | REGISTER) /* DRn */ diff --git a/test/amx.asm b/test/amx.asm new file mode 100644 index 00000000..88455508 --- /dev/null +++ b/test/amx.asm @@ -0,0 +1,36 @@ + bits 64 + +%macro amx 1 + %define treg tmm %+ %1 + + ldtilecfg [rsi] + sttilecfg [rdi] + + tilezero treg + + tileloadd treg, [rax] + tileloadd treg, [rax,rdx] + tileloadd treg, [rax,rdx*2] + + tileloaddt1 treg, [rax] + tileloaddt1 treg, [rax,rdx] + tileloaddt1 treg, [rax,rdx*2] + + tdpbf16ps treg, treg, treg + tdpbssd treg, treg, treg + tdpbusd treg, treg, treg + tdpbsud treg, treg, treg + tdpbuud treg, treg, treg + + tilestored [rax], treg + tilestored [rax,rdx], treg + tilestored [rax,rdx*2], treg + + tilerelease +%endmacro + +%assign n 0 + %rep 8 + amx n + %assign n n+1 + %endrep diff --git a/x86/iflags.ph b/x86/iflags.ph index 2c05b293..7067d740 100644 --- a/x86/iflags.ph +++ b/x86/iflags.ph @@ -84,6 +84,16 @@ if_("AVX5124FMAPS", "AVX-512 4-iteration multiply-add"); if_("AVX5124VNNIW", "AVX-512 4-iteration dot product"); if_("SGX", "Intel Software Guard Extensions (SGX)"); if_("CET", "Intel Control-Flow Enforcement Technology (CET)"); +if_("ENQCMD", "Enqueue command instructions"); +if_("PCONFIG", "Platform configuration instruction"); +if_("WBNOINVD", "Writeback and do not invalidate instruction"); +if_("TSXLDTRK", "TSX suspend load address tracking"); +if_("SERIALIZE", "SERIALIZE instruction"); +if_("AVX512BF16", "AVX-512 bfloat16"); +if_("AVX512VP2INTERSECT", "AVX-512 VP2INTERSECT instructions"); +if_("AMXTILE", "AMX tile configuration instructions"); +if_("AMXBF16", "AMX bfloat16 multiplication"); +if_("AMXINT8", "AMX 8-bit integer multiplication"); # Put these last [hpa: why?] if_("OBSOLETE", "Instruction removed from architecture"); diff --git a/x86/insns.dat b/x86/insns.dat index 980c5943..2776cfdf 100644 --- a/x86/insns.dat +++ b/x86/insns.dat @@ -5999,6 +5999,51 @@ WRUSSQ mem,reg64 [mr: o64 66 0f 38 f5 /r] CET,FUTURE,X64 WRSSD mem,reg32 [mr: o32 0f 38 f6 /r] CET,FUTURE WRSSQ mem,reg64 [mr: o64 0f 38 f6 /r] CET,FUTURE,X64 +;# Instructions from ISE doc 319433-040, June 2020 +ENQCMD reg16,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE +ENQCMD reg32,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE,ND +ENQCMD reg32,mem512 [rm: a32 f2 0f 38 f8 /r] ENQCMD,FUTURE +ENQCMD reg64,mem512 [rm: a64 f2 0f 38 f8 /r] ENQCMD,FUTURE,X64 +ENQCMDS reg16,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV +ENQCMDS reg32,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV,ND +ENQCMDS reg32,mem512 [rm: a32 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV +ENQCMDS reg64,mem512 [rm: a64 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV,X64 +PCONFIG void [ np 0f 01 c5] PCONFIG,FUTURE,PRIV +SERIALIZE void [ np 0f 01 e8] SERIALIZE,FUTURE +WBNOINVD void [ f3 0f 09] WBNOINVD,FUTURE,PRIV +XRESLDTRK void [ f2 0f 01 e9] TSXLDTRK,FUTURE +XSUSLDTRK void [ f2 0f 01 e8] TSXLDTRK,FUTURE + +;# AVX512 Bfloat16 instructions +VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm: evex.128.f2.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm: evex.256.f2.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm: evex.512.f2.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm: evex.128.f3.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm: evex.256.f3.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm: evex.512.f3.0f38.w0 72 /r] AVX512BF16,FUTURE +VDPBF16PS xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm: evex.128.f3.0f38.w0 52 /r] AVX512BF16,FUTURE +VDPBF16PS ymmreg|mask|z,ymmreg*,ymmrm128|b32 [rvm: evex.256.f3.0f38.w0 52 /r] AVX512BF16,FUTURE +VDPBF16PS zmmreg|mask|z,zmmreg*,zmmrm128|b32 [rvm: evex.512.f3.0f38.w0 52 /r] AVX512BF16,FUTURE + +;# AVX512 mask intersect instructions +VP2INTERSECTD kreg|rs2,xmmreg,xmmrm128|b32 [rvm: evex.nds.128.f2.0f38.w0 68 /r] AVX512BF16,FUTURE +VP2INTERSECTD kreg|rs2,ymmreg,ymmrm128|b32 [rvm: evex.nds.256.f2.0f38.w0 68 /r] AVX512BF16,FUTURE +VP2INTERSECTD kreg|rs2,zmmreg,zmmrm128|b32 [rvm: evex.nds.512.f2.0f38.w0 68 /r] AVX512BF16,FUTURE + +;# Intel Advanced Matrix Extensions (AMX) +LDTILECFG mem512 [m: vex.128.np.0f38.w0 49 /0] AMXTILE,FUTURE,SZ,X64 +STTILECFG mem512 [m: vex.128.66.0f38.w0 49 /0] AMXTILE,FUTURE,SZ,X64 +TDPBF16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 5c /r] AMXBF16,FUTURE,X64 +TDPBSSD tmmreg,tmmreg,tmmreg [rmv: vex.128.f2.0f38.w0 5e /r] AMXINT8,FUTURE,X64 +TDPBSUD tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 5e /r] AMXINT8,FUTURE,X64 +TDPBUSD tmmreg,tmmreg,tmmreg [rmv: vex.128.66.0f38.w0 5e /r] AMXINT8,FUTURE,X64 +TDPBUUD tmmreg,tmmreg,tmmreg [rmv: vex.128.np.0f38.w0 5e /r] AMXINT8,FUTURE,X64 +TILELOADD tmmreg,mem [rm: vex.128.f2.0f38.w0 4b /r] AMXTILE,MIB,FUTURE,SX,X64 +TILELOADDT1 tmmreg,mem [rm: vex.128.f2.0f38.w0 4b /r] AMXTILE,MIB,FUTURE,SX,X64 +TILERELEASE void [ vex.128.np.0f38.w0 49 c0] AMXTILE,FUTURE,X64 +TILESTORED mem,tmmreg [mr: vex.128.f3.0f38.w0 4b /r] AMXTILE,MIB,FUTURE,SX,X64 +TILEZERO tmmreg [r: vex.128.f2.0f38.w0 49 /3r0] AMXTILE,FUTURE,X64 + ;# Systematic names for the hinting nop instructions ; These should be last in the file HINT_NOP0 rm16 [m: o16 0f 18 /0] P6,UNDOC diff --git a/x86/insns.pl b/x86/insns.pl index cd9aaf4f..911ef7eb 100755 --- a/x86/insns.pl +++ b/x86/insns.pl @@ -880,11 +880,19 @@ sub byte_code_compile($$) { $prefix_ok = 0; } elsif ($op =~ m:^/([0-7])$:) { if (!defined($oppos{'m'})) { - die "$fname:$line: $op requires m operand\n"; + die "$fname:$line: $op requires an m operand\n"; } push(@codes, 06) if ($oppos{'m'} & 4); push(@codes, 0200 + (($oppos{'m'} & 3) << 3) + $1); $prefix_ok = 0; + } elsif ($op =~ m:^/([0-3]?)r([0-7])$:) { + if (!defined($oppos{'r'})) { + die "$fname:$line: $op requires an r operand\n"; + } + push(@codes, 05) if ($oppos{'r'} & 4); + push(@codes, 0171); + push(@codes, (($1+0) << 6) + (($oppos{'r'} & 3) << 3) + $2); + $prefix_ok = 0; } elsif ($op =~ /^(vex|xop)(|\..*)$/) { my $vexname = $1; my $c = $vexmap{$vexname}; @@ -907,7 +915,7 @@ sub byte_code_compile($$) { $w = 2; } elsif ($oq eq 'ww') { $w = 3; - } elsif ($oq eq 'p0') { + } elsif ($oq eq 'np' || $oq eq 'p0') { $p = 0; } elsif ($oq eq '66' || $oq eq 'p1') { $p = 1; @@ -935,9 +943,6 @@ sub byte_code_compile($$) { if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) { die "$fname:$line: missing fields in \U$vexname\E specification\n"; } - if (defined($oppos{'v'}) && !$has_nds) { - die "$fname:$line: 'v' operand without ${vexname}.nds or ${vexname}.ndd\n"; - } my $minmap = ($c == 1) ? 8 : 0; # 0-31 for VEX, 8-31 for XOP if ($m < $minmap || $m > 31) { die "$fname:$line: Only maps ${minmap}-31 are valid for \U${vexname}\n"; @@ -966,7 +971,7 @@ sub byte_code_compile($$) { $w = 2; } elsif ($oq eq 'ww') { $w = 3; - } elsif ($oq eq 'p0') { + } elsif ($oq eq 'np' || $oq eq 'p0') { $p = 0; } elsif ($oq eq '66' || $oq eq 'p1') { $p = 1; @@ -994,9 +999,6 @@ sub byte_code_compile($$) { if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) { die "$fname:$line: missing fields in EVEX specification\n"; } - if (defined($oppos{'v'}) && !$has_nds) { - die "$fname:$line: 'v' operand without evex.nds or evex.ndd\n"; - } if ($m > 15) { die "$fname:$line: Only maps 0-15 are valid for EVEX\n"; } diff --git a/x86/regs.dat b/x86/regs.dat index 723f6a44..cec8420f 100644 --- a/x86/regs.dat +++ b/x86/regs.dat @@ -130,6 +130,9 @@ zmm0 ZMM0 zmmreg 0 zmm1-15 ZMM_L16 zmmreg 1 zmm16-31 ZMMREG zmmreg 16 +# AMX tile registers +tmm0-7 TMMREG tmmreg 0 + # Opmask registers k0 OPMASK0 opmaskreg 0 k1-7 OPMASKREG opmaskreg 1 TFLAG_BRC_OPT From e830e92b7792a3a8c0e81774c0fb6b3414398753 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 16 Jul 2020 22:50:33 -0700 Subject: [PATCH 24/29] preproc: when printing errors, don't descent into unlisted macros If macros are nolisted, *or* they don't have any filename associated with them, it is absolutely pointless to try to descend into them for error messages, so just don't, even if -Lb is provided. Signed-off-by: H. Peter Anvin --- asm/preproc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/asm/preproc.c b/asm/preproc.c index 2f8fb3cd..fec9520c 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -7046,6 +7046,8 @@ static void pp_error_list_macros(errflags severity) severity |= ERR_PP_LISTMACRO | ERR_NO_SEVERITY | ERR_HERE; while ((m = src_error_down())) { + if ((m->nolist & NL_LIST) || !m->where.filename) + break; nasm_error(severity, "... from macro `%s' defined", m->name); } From d081f0db5d491ee473fdb97b109dd9810b68d9b7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 16 Jul 2020 23:11:03 -0700 Subject: [PATCH 25/29] fp: support bfloat16 constants Support generating bfloat16 constants. This is a bit awkward, as "DW" already generates IEEE half precision constants; therefore there is no longer a single floating-point format for each size. This requires some replumbing. Fortunately bfloat16 fits in 64 bits, so support generating them with a macro that uses __?bfloat16?__() to convert to integers first before passing them to DW. Signed-off-by: H. Peter Anvin --- asm/eval.c | 25 +++++++---------- asm/floats.c | 74 ++++++++++++++++++++++++++----------------------- asm/floats.h | 17 ++++++++++-- asm/parser.c | 7 +++-- asm/tokens.dat | 3 ++ doc/changes.src | 3 ++ doc/nasmdoc.src | 20 +++++++++++-- include/nasm.h | 5 +++- macros/fp.mac | 10 ++++++- test/float.asm | 36 +++++++++++++++++++++++- 10 files changed, 140 insertions(+), 60 deletions(-) diff --git a/asm/eval.c b/asm/eval.c index cd3c526d..5d6ee1e7 100644 --- a/asm/eval.c +++ b/asm/eval.c @@ -694,21 +694,13 @@ static expr *expr5(void) static expr *eval_floatize(enum floatize type) { uint8_t result[16], *p; /* Up to 128 bits */ - static const struct { - int bytes, start, len; - } formats[] = { - { 1, 0, 1 }, /* FLOAT_8 */ - { 2, 0, 2 }, /* FLOAT_16 */ - { 4, 0, 4 }, /* FLOAT_32 */ - { 8, 0, 8 }, /* FLOAT_64 */ - { 10, 0, 8 }, /* FLOAT_80M */ - { 10, 8, 2 }, /* FLOAT_80E */ - { 16, 0, 8 }, /* FLOAT_128L */ - { 16, 8, 8 }, /* FLOAT_128H */ - }; int sign = 1; int64_t val; + size_t len; int i; + const struct ieee_format *fmt; + + fmt = &fp_formats[type]; scan(); if (tt != '(') { @@ -724,7 +716,7 @@ static expr *eval_floatize(enum floatize type) nasm_nonfatal("expecting floating-point number"); return NULL; } - if (!float_const(tokval->t_charptr, sign, result, formats[type].bytes)) + if (!float_const(tokval->t_charptr, sign, result, type)) return NULL; scan(); if (tt != ')') { @@ -732,9 +724,12 @@ static expr *eval_floatize(enum floatize type) return NULL; } - p = result+formats[type].start+formats[type].len; + len = fmt->bytes - fmt->offset; + if (len > 8) + len = 8; /* Max 64 bits */ + p = result + len; val = 0; - for (i = formats[type].len; i; i--) { + for (i = len; i; i--) { p--; val = (val << 8) + *p; } diff --git a/asm/floats.c b/asm/floats.c index adc6afbf..27180bdc 100644 --- a/asm/floats.c +++ b/asm/floats.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2018 The NASM Authors - All Rights Reserved + * Copyright 1996-2020 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -629,13 +629,6 @@ static void ieee_shr(fp_limb *mant, int i) - the sign bit plus exponent fit in 16 bits. - the exponent bias is 2^(n-1)-1 for an n-bit exponent */ -struct ieee_format { - int bytes; - int mantissa; /* Fractional bits in the mantissa */ - int explicit; /* Explicit integer */ - int exponent; /* Bits in the exponent */ -}; - /* * The 16- and 128-bit formats are expected to be in IEEE 754r. * AMD SSE5 uses the 16-bit format. @@ -646,13 +639,31 @@ struct ieee_format { * * The 8-bit format appears to be the consensus 8-bit floating-point * format. It is apparently used in graphics applications. + * + * The b16 format is a 16-bit format with smaller mantissa and larger + * exponent field. It is effectively a truncated version of the standard + * IEEE 32-bit (single) format, but is explicitly supported here in + * order to support proper rounding. + * + * This array must correspond to enum floatize in include/nasm.h. + * Note that there are some formats which have more than one enum; + * both need to be listed here with the appropriate offset into the + * floating-point byte array (use for the floatize operators.) + * + * FLOAT_ERR is a value that both represents "invalid format" and the + * size of this array. */ -static const struct ieee_format ieee_8 = { 1, 3, 0, 4 }; -static const struct ieee_format ieee_16 = { 2, 10, 0, 5 }; -static const struct ieee_format ieee_32 = { 4, 23, 0, 8 }; -static const struct ieee_format ieee_64 = { 8, 52, 0, 11 }; -static const struct ieee_format ieee_80 = { 10, 63, 1, 15 }; -static const struct ieee_format ieee_128 = { 16, 112, 0, 15 }; +const struct ieee_format fp_formats[FLOAT_ERR] = { + { 1, 3, 0, 4, 0 }, /* FLOAT_8 */ + { 2, 10, 0, 5, 0 }, /* FLOAT_16 */ + { 2, 7, 0, 8, 0 }, /* FLOAT_B16 */ + { 4, 23, 0, 8, 0 }, /* FLOAT_32 */ + { 8, 52, 0, 11, 0 }, /* FLOAT_64 */ + { 10, 63, 1, 15, 0 }, /* FLOAT_80M */ + { 10, 63, 1, 15, 8 }, /* FLOAT_80E */ + { 16, 112, 0, 15, 0 }, /* FLOAT_128L */ + { 16, 112, 0, 15, 8 } /* FLOAT_128H */ +}; /* Types of values we can generate */ enum floats { @@ -672,7 +683,7 @@ static int to_packed_bcd(const char *str, const char *p, char c; int tv = -1; - if (fmt != &ieee_80) { + if (fmt->bytes != 10) { nasm_nonfatal("packed BCD requires an 80-bit format"); return 0; } @@ -711,9 +722,9 @@ static int to_packed_bcd(const char *str, const char *p, return 1; /* success */ } -static int to_float(const char *str, int s, uint8_t *result, - const struct ieee_format *fmt) +int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt) { + const struct ieee_format *fmt = &fp_formats[ffmt]; fp_limb mant[MANT_LIMBS]; int32_t exponent = 0; const int32_t expmax = 1 << (fmt->exponent - 1); @@ -902,25 +913,20 @@ static int to_float(const char *str, int s, uint8_t *result, return 1; /* success */ } -int float_const(const char *number, int sign, uint8_t *result, int bytes) +/* + * Get the default floating point format for this specific field size. + * Used for the Dx pseudoops. + */ +enum floatize float_deffmt(int bytes) { - switch (bytes) { - case 1: - return to_float(number, sign, result, &ieee_8); - case 2: - return to_float(number, sign, result, &ieee_16); - case 4: - return to_float(number, sign, result, &ieee_32); - case 8: - return to_float(number, sign, result, &ieee_64); - case 10: - return to_float(number, sign, result, &ieee_80); - case 16: - return to_float(number, sign, result, &ieee_128); - default: - nasm_panic("strange value %d passed to float_const", bytes); - return 0; + enum floatize type; + + for (type = 0; type < FLOAT_ERR; type++) { + if (fp_formats[type].bytes == bytes) + break; } + + return type; /* FLOAT_ERR if invalid */ } /* Set floating-point options */ diff --git a/asm/floats.h b/asm/floats.h index 4f80acac..c4635136 100644 --- a/asm/floats.h +++ b/asm/floats.h @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2009 The NASM Authors - All Rights Reserved + * Copyright 1996-2020 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -48,7 +48,20 @@ enum float_round { FLOAT_RC_UP }; -int float_const(const char *string, int sign, uint8_t *result, int bytes); +/* Note: enum floatize and FLOAT_ERR are defined in nasm.h */ + +/* Floating-point format description */ +struct ieee_format { + int bytes; /* Total bytes */ + int mantissa; /* Fractional bits in the mantissa */ + int explicit; /* Explicit integer */ + int exponent; /* Bits in the exponent */ + int offset; /* Offset into byte array for floatize op */ +}; +extern const struct ieee_format fp_formats[FLOAT_ERR]; + +int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt); +enum floatize float_deffmt(int bytes); int float_option(const char *option); #endif /* NASM_FLOATS_H */ diff --git a/asm/parser.c b/asm/parser.c index 47b46ecd..dbd2240c 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -531,10 +531,12 @@ static int parse_eops(extop **result, bool critical, int elem) goto is_float; } } else if (i == TOKEN_FLOAT) { + enum floatize fmt; is_float: eop->type = EOT_DB_FLOAT; - if (eop->elem > 16) { + fmt = float_deffmt(eop->elem); + if (fmt == FLOAT_ERR) { nasm_nonfatal("no %d-bit floating-point format supported", eop->elem << 3); eop->val.string.len = 0; @@ -552,8 +554,7 @@ static int parse_eops(extop **result, bool critical, int elem) eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len); eop->val.string.data = (char *)eop + sizeof(extop); if (!float_const(tokval.t_charptr, sign, - (uint8_t *)eop->val.string.data, - eop->val.string.len)) + (uint8_t *)eop->val.string.data, fmt)) eop->val.string.len = 0; } if (!eop->val.string.len) diff --git a/asm/tokens.dat b/asm/tokens.dat index ab37dcc1..356b39a2 100644 --- a/asm/tokens.dat +++ b/asm/tokens.dat @@ -113,6 +113,9 @@ __?float80e?__ __?float128l?__ __?float128h?__ +% TOKEN_FLOATIZE, 0, 0, FLOAT_B{__?bfloat*?__} +__?bfloat16?__ + % TOKEN_STRFUNC, 0, 0, STRFUNC_{__?*?__} __?utf16?__ __?utf16le?__ diff --git a/doc/changes.src b/doc/changes.src index cf95224a..c1459231 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -12,6 +12,9 @@ since 2007. \b Add instructions from the Intel Instruction Set Extensions and Future Features Programming Reference, June 2020. +\b Support for \c{bfloat16} floating-point constants. See \k{fltconst} +and \k{pkg_fp}. + \b Properly display warnings in preprocess-only mode. \b Fix copy-and-paste of examples from the PDF documentation. diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index cfa92fd1..e3d503c5 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -1692,9 +1692,9 @@ context. \i{Floating-point} constants are acceptable only as arguments to \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}, or as arguments to the special operators \i\c{__?float8?__}, -\i\c{__?float16?__}, \i\c{__?float32?__}, \i\c{__?float64?__}, -\i\c{__?float80m?__}, \i\c{__?float80e?__}, \i\c{__?float128l?__}, and -\i\c{__?float128h?__}. +\i\c{__?float16?__}, \i\c{__?bfloat16?__}, \i\c{__?float32?__}, +\i\c{__?float64?__}, \i\c{__?float80m?__}, \i\c{__?float80e?__}, +\i\c{__?float128l?__}, and \i\c{__?float128h?__}. See also \k{pkg_fp}. Floating-point constants are expressed in the traditional form: digits, then a period, then optionally more digits, then optionally an @@ -1733,6 +1733,13 @@ appears to be the most frequently used 8-bit floating-point format, although it is not covered by any formal standard. This is sometimes called a "\i{minifloat}." +The \i\c{bfloat16} format is effectively a compressed version of the +32-bit single precision format, with a reduced mantissa. It is +effectively the same as truncating the 32-bit format to the upper 16 +bits, except for rounding. There is no \c{D}\e{x} directive that +corresponds to \c{bfloat16} as it obviously has the same size as the +IEEE standard 16-bit half precision format, see however \k{pkg_fp}. + The special operators are used to produce floating-point numbers in other contexts. They produce the binary representation of a specific floating-point number as an integer, and can use anywhere integer @@ -4633,6 +4640,7 @@ This packages contains the following floating-point convenience macros: \c \c %define float8(x) __?float8?__(x) \c %define float16(x) __?float16?__(x) +\c %define bfloat16(x) __?bfloat16?__(x) \c %define float32(x) __?float32?__(x) \c %define float64(x) __?float64?__(x) \c %define float80m(x) __?float80m?__(x) @@ -4640,6 +4648,12 @@ This packages contains the following floating-point convenience macros: \c %define float128l(x) __?float128l?__(x) \c %define float128h(x) __?float128h?__(x) +It also defines the a multi-line macro \i\c{bf16} that can be used +in a similar way to the \c{D}\e{x} directives for the other +floating-point numbers: + +\c bf16 -3.1415, NaN, 2000.0, +Inf + \H{pkg_ifunc} \i\c{ifunc}: \i{Integer functions} diff --git a/include/nasm.h b/include/nasm.h index 6cffaf5d..950ac45b 100644 --- a/include/nasm.h +++ b/include/nasm.h @@ -196,15 +196,18 @@ enum token_type { /* token types, other than chars */ TOKEN_OPMASK /* translated token for opmask registers */ }; +/* Must match the fp_formats[] array in asm/floats.c */ enum floatize { FLOAT_8, FLOAT_16, + FLOAT_B16, FLOAT_32, FLOAT_64, FLOAT_80M, FLOAT_80E, FLOAT_128L, - FLOAT_128H + FLOAT_128H, + FLOAT_ERR /* Invalid format, MUST BE LAST */ }; /* Must match the list in string_transform(), in strfunc.c */ diff --git a/macros/fp.mac b/macros/fp.mac index eb297014..3a094a5c 100644 --- a/macros/fp.mac +++ b/macros/fp.mac @@ -1,6 +1,6 @@ ;; -------------------------------------------------------------------------- ;; -;; Copyright 2010 The NASM Authors - All Rights Reserved +;; Copyright 2010-2020 The NASM Authors - All Rights Reserved ;; See the file AUTHORS included with the NASM distribution for ;; the specific copyright holders. ;; @@ -46,9 +46,17 @@ USE: fp %define float8(x) __?float8?__(x) %define float16(x) __?float16?__(x) +%define bfloat16(x) __?bfloat16?__(x) %define float32(x) __?float32?__(x) %define float64(x) __?float64?__(x) %define float80m(x) __?float80m?__(x) %define float80e(x) __?float80e?__(x) %define float128l(x) __?float128l?__(x) %define float128h(x) __?float128h?__(x) + +%imacro bf16 1-*.nolist + %rep %0 + dw __?bfloat16?__(%1) + %rotate 1 + %endrep +%endmacro diff --git a/test/float.asm b/test/float.asm index 88519b2e..1dd92a96 100644 --- a/test/float.asm +++ b/test/float.asm @@ -5,6 +5,8 @@ ; Test of floating-point formats ; +%use fp + ; 8-bit db 1.0 db +1.0 @@ -65,6 +67,37 @@ dw __SNaN__ dw 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 +; 16-bit bfloat + bf16 1.0 + bf16 +1.0 + bf16 -1.0 + bf16 1.5 + bf16 +1.5 + bf16 -1.5 + bf16 0.0 + bf16 +0.0 + bf16 -0.0 + bf16 1.83203125 + bf16 +1.83203125 + bf16 -1.83203125 + bf16 1.83203125e15 + bf16 +1.83203125e15 + bf16 -1.83203125e15 + bf16 1.83203125e-15 + bf16 +1.83203125e-15 + bf16 -1.83203125e-15 + bf16 1.83203125e-40 ; Denormal! + bf16 +1.83203125e-40 ; Denormal! + bf16 -1.83203125e-40 ; Denormal! + bf16 __Infinity__ + bf16 +__Infinity__ + bf16 -__Infinity__ + bf16 __NaN__ + bf16 __QNaN__ + bf16 __SNaN__ + bf16 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 + bf16 -3.1415, NaN, 2000.0, +Inf + ; 32-bit dd 1.0 dd +1.0 @@ -94,6 +127,7 @@ dd __QNaN__ dd __SNaN__ dd 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 + dd -3.1415, NaN, 2000.0, +Inf ; 64-bit dq 1.0 @@ -124,7 +158,7 @@ dq __QNaN__ dq __SNaN__ dq 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 - + ; 80-bit dt 1.0 dt +1.0 From 848b1657fd52f6d4b71814047deaebfd91a7dbed Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 16 Jul 2020 23:13:24 -0700 Subject: [PATCH 26/29] NASM 2.15.03rc8 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index f3632bf4..0f7f97eb 100644 --- a/version +++ b/version @@ -1 +1 @@ -2.15.03rc7 +2.15.03rc8 From 1d8c09b24e4204767f2d05047e2ac16dcdf46bfb Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 17 Jul 2020 17:44:27 -0700 Subject: [PATCH 27/29] x86/insns.dat: add tuple type for the latest AVX512 instructions Add missing tuple type (all are Full - fv:) for the latest AVX512 instructions. Signed-off-by: H. Peter Anvin (Intel) --- x86/insns.dat | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/x86/insns.dat b/x86/insns.dat index 2776cfdf..141d68b3 100644 --- a/x86/insns.dat +++ b/x86/insns.dat @@ -6015,20 +6015,20 @@ XRESLDTRK void [ f2 0f 01 e9] TSXLDTRK,FUTURE XSUSLDTRK void [ f2 0f 01 e8] TSXLDTRK,FUTURE ;# AVX512 Bfloat16 instructions -VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm: evex.128.f2.0f38.w0 72 /r] AVX512BF16,FUTURE -VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm: evex.256.f2.0f38.w0 72 /r] AVX512BF16,FUTURE -VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm: evex.512.f2.0f38.w0 72 /r] AVX512BF16,FUTURE -VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm: evex.128.f3.0f38.w0 72 /r] AVX512BF16,FUTURE -VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm: evex.256.f3.0f38.w0 72 /r] AVX512BF16,FUTURE -VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm: evex.512.f3.0f38.w0 72 /r] AVX512BF16,FUTURE -VDPBF16PS xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm: evex.128.f3.0f38.w0 52 /r] AVX512BF16,FUTURE -VDPBF16PS ymmreg|mask|z,ymmreg*,ymmrm128|b32 [rvm: evex.256.f3.0f38.w0 52 /r] AVX512BF16,FUTURE -VDPBF16PS zmmreg|mask|z,zmmreg*,zmmrm128|b32 [rvm: evex.512.f3.0f38.w0 52 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f2.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm:fv: evex.256.f2.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm:fv: evex.512.f2.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f3.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm:fv: evex.256.f3.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm:fv: evex.512.f3.0f38.w0 72 /r] AVX512BF16,FUTURE +VDPBF16PS xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f3.0f38.w0 52 /r] AVX512BF16,FUTURE +VDPBF16PS ymmreg|mask|z,ymmreg*,ymmrm128|b32 [rvm:fv: evex.256.f3.0f38.w0 52 /r] AVX512BF16,FUTURE +VDPBF16PS zmmreg|mask|z,zmmreg*,zmmrm128|b32 [rvm:fv: evex.512.f3.0f38.w0 52 /r] AVX512BF16,FUTURE ;# AVX512 mask intersect instructions -VP2INTERSECTD kreg|rs2,xmmreg,xmmrm128|b32 [rvm: evex.nds.128.f2.0f38.w0 68 /r] AVX512BF16,FUTURE -VP2INTERSECTD kreg|rs2,ymmreg,ymmrm128|b32 [rvm: evex.nds.256.f2.0f38.w0 68 /r] AVX512BF16,FUTURE -VP2INTERSECTD kreg|rs2,zmmreg,zmmrm128|b32 [rvm: evex.nds.512.f2.0f38.w0 68 /r] AVX512BF16,FUTURE +VP2INTERSECTD kreg|rs2,xmmreg,xmmrm128|b32 [rvm:fv: evex.nds.128.f2.0f38.w0 68 /r] AVX512BF16,FUTURE +VP2INTERSECTD kreg|rs2,ymmreg,ymmrm128|b32 [rvm:fv: evex.nds.256.f2.0f38.w0 68 /r] AVX512BF16,FUTURE +VP2INTERSECTD kreg|rs2,zmmreg,zmmrm128|b32 [rvm:fv: evex.nds.512.f2.0f38.w0 68 /r] AVX512BF16,FUTURE ;# Intel Advanced Matrix Extensions (AMX) LDTILECFG mem512 [m: vex.128.np.0f38.w0 49 /0] AMXTILE,FUTURE,SZ,X64 From ec204170028cdd582de1d2db28a365085d57424c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 17 Jul 2020 17:46:45 -0700 Subject: [PATCH 28/29] changes.src: slighly better description of new instructions Describe what the new instructions actually are. Signed-off-by: H. Peter Anvin (Intel) --- doc/changes.src | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/changes.src b/doc/changes.src index c1459231..d1182271 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -10,7 +10,9 @@ since 2007. \S{cl-2.15.03} Version 2.15.03 \b Add instructions from the Intel Instruction Set Extensions and -Future Features Programming Reference, June 2020. +Future Features Programming Reference, June 2020. This includes +AVX5512 \c{bfloat16}, AVX512 mask intersect, and Intel Advanced Matrix +Extensions (AMX). \b Support for \c{bfloat16} floating-point constants. See \k{fltconst} and \k{pkg_fp}. From d27427846f8e61bf194721c9ccd72a95547289e8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 17 Jul 2020 17:53:37 -0700 Subject: [PATCH 29/29] NASM 2.15.03 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index 0f7f97eb..3f699026 100644 --- a/version +++ b/version @@ -1 +1 @@ -2.15.03rc8 +2.15.03