diff --git a/asm/assemble.c b/asm/assemble.c index 1d28c0ad..cd9222c4 100644 --- a/asm/assemble.c +++ b/asm/assemble.c @@ -63,17 +63,18 @@ * assembly mode or the operand-size override on the operand * \70..\73 rel32 a long relative operand, from operand 0..3 * \74..\77 seg a word constant, from the _segment_ part of operand 0..3 - * \1ab a ModRM, calculated on EA in operand a, with the spare + * \1ab /r a ModRM, calculated on EA in operand a, with the reg * field the register value of operand b. - * \172\ab the register number from operand a in bits 7..4, with + * \171\mab /mrb (e.g /3r0) a ModRM, with the reg field taken from operand a, and the m + * and b fields set to the specified values. + * \172\ab /is4 the register number from operand a in bits 7..4, with * the 4-bit immediate from operand b in bits 3..0. * \173\xab the register number from operand a in bits 7..4, with * the value b in bits 3..0. * \174..\177 the register number from operand 0..3 in bits 7..4, and * an arbitrary value in bits 3..0 (assembled as zero.) - * \2ab a ModRM, calculated on EA in operand a, with the spare + * \2ab /b a ModRM, calculated on EA in operand a, with the reg * field equal to digit b. - * * \240..\243 this instruction uses EVEX rather than REX or VEX/XOP, with the * V field taken from operand 0..3. * \250 this instruction uses EVEX rather than REX or VEX/XOP, with the @@ -103,12 +104,11 @@ * tup is tuple type for Disp8*N from %tuple_codes in insns.pl * (compressed displacement encoding) * - * \254..\257 id,s a signed 32-bit operand to be extended to 64 bits. - * \260..\263 this instruction uses VEX/XOP rather than REX, with the - * V field taken from operand 0..3. - * \270 this instruction uses VEX/XOP rather than REX, with the - * V field set to 1111b. - * + * \254..\257 id,s a signed 32-bit operand to be extended to 64 bits. + * \260..\263 this instruction uses VEX/XOP rather than REX, with the + * V field taken from operand 0..3. + * \270 this instruction uses VEX/XOP rather than REX, with the + * V field set to 1111b. * VEX/XOP prefixes are followed by the sequence: * \tmm\wlp where mm is the M field; and wlp is: * 00 wwl lpp @@ -1337,6 +1337,14 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, length += 2; break; + case 0171: + c = *codes++; + op2 = (op2 & ~3) | ((c >> 3) & 3); + opx = &ins->oprs[op2]; + ins->rex |= op_rexflags(opx, REX_R|REX_H|REX_P|REX_W); + length++; + break; + case 0172: case 0173: codes++; @@ -1971,6 +1979,15 @@ static void gencode(struct out_data *data, insn *ins) out_segment(data, opx); break; + case 0171: + c = *codes++; + op2 = (op2 & ~3) | ((c >> 3) & 3); + opx = &ins->oprs[op2]; + r = nasm_regvals[opx->basereg]; + c = (c & ~070) | ((r & 7) << 3); + out_rawbyte(data, c); + break; + case 0172: { int mask = ins->prefixes[PPS_VEX] == P_EVEX ? 7 : 15; @@ -2796,14 +2813,23 @@ static enum ea_type process_ea(operand *input, ea *output, int bits, if (input->basereg == -1 && (input->indexreg == -1 || input->scale == 0)) { /* - * It's a pure offset. + * It's a pure offset. If it is an IMMEDIATE, it is a pattern + * in insns.dat which allows an immediate to be used as a memory + * address, in which case apply the default REL/ABS. */ - if (bits == 64 && ((input->type & IP_REL) == IP_REL)) { - if (input->segment == NO_SEG || - (input->opflags & OPFLAG_RELATIVE)) { - nasm_warn(WARN_OTHER|ERR_PASS2, "absolute address can not be RIP-relative"); - input->type &= ~IP_REL; - input->type |= MEMORY; + if (bits == 64) { + if (is_class(IMMEDIATE, input->type)) { + if (!(input->eaflags & EAF_ABS) && + ((input->eaflags & EAF_REL) || globalrel)) + input->type |= IP_REL; + } + if ((input->type & IP_REL) == IP_REL) { + if (input->segment == NO_SEG || + (input->opflags & OPFLAG_RELATIVE)) { + nasm_warn(WARN_OTHER|ERR_PASS2, "absolute address can not be RIP-relative"); + input->type &= ~IP_REL; + input->type |= MEMORY; + } } } @@ -2818,7 +2844,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits, input->disp_size != (addrbits != 16 ? 32 : 16))) nasm_warn(WARN_OTHER, "displacement size ignored on absolute address"); - if (bits == 64 && (~input->type & IP_REL)) { + if ((eaflags & EAF_MIB) || (bits == 64 && (~input->type & IP_REL))) { output->sib_present = true; output->sib = GEN_SIB(0, 4, 5); output->bytes = 4; @@ -3037,7 +3063,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits, output->rex |= rexflags(it, ix, REX_X); output->rex |= rexflags(bt, bx, REX_B); - if (it == -1 && (bt & 7) != REG_NUM_ESP) { + if (it == -1 && (bt & 7) != REG_NUM_ESP && !(eaflags & EAF_MIB)) { /* no SIB needed */ int mod, rm; diff --git a/asm/eval.c b/asm/eval.c index cd3c526d..5d6ee1e7 100644 --- a/asm/eval.c +++ b/asm/eval.c @@ -694,21 +694,13 @@ static expr *expr5(void) static expr *eval_floatize(enum floatize type) { uint8_t result[16], *p; /* Up to 128 bits */ - static const struct { - int bytes, start, len; - } formats[] = { - { 1, 0, 1 }, /* FLOAT_8 */ - { 2, 0, 2 }, /* FLOAT_16 */ - { 4, 0, 4 }, /* FLOAT_32 */ - { 8, 0, 8 }, /* FLOAT_64 */ - { 10, 0, 8 }, /* FLOAT_80M */ - { 10, 8, 2 }, /* FLOAT_80E */ - { 16, 0, 8 }, /* FLOAT_128L */ - { 16, 8, 8 }, /* FLOAT_128H */ - }; int sign = 1; int64_t val; + size_t len; int i; + const struct ieee_format *fmt; + + fmt = &fp_formats[type]; scan(); if (tt != '(') { @@ -724,7 +716,7 @@ static expr *eval_floatize(enum floatize type) nasm_nonfatal("expecting floating-point number"); return NULL; } - if (!float_const(tokval->t_charptr, sign, result, formats[type].bytes)) + if (!float_const(tokval->t_charptr, sign, result, type)) return NULL; scan(); if (tt != ')') { @@ -732,9 +724,12 @@ static expr *eval_floatize(enum floatize type) return NULL; } - p = result+formats[type].start+formats[type].len; + len = fmt->bytes - fmt->offset; + if (len > 8) + len = 8; /* Max 64 bits */ + p = result + len; val = 0; - for (i = formats[type].len; i; i--) { + for (i = len; i; i--) { p--; val = (val << 8) + *p; } diff --git a/asm/floats.c b/asm/floats.c index adc6afbf..27180bdc 100644 --- a/asm/floats.c +++ b/asm/floats.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2018 The NASM Authors - All Rights Reserved + * Copyright 1996-2020 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -629,13 +629,6 @@ static void ieee_shr(fp_limb *mant, int i) - the sign bit plus exponent fit in 16 bits. - the exponent bias is 2^(n-1)-1 for an n-bit exponent */ -struct ieee_format { - int bytes; - int mantissa; /* Fractional bits in the mantissa */ - int explicit; /* Explicit integer */ - int exponent; /* Bits in the exponent */ -}; - /* * The 16- and 128-bit formats are expected to be in IEEE 754r. * AMD SSE5 uses the 16-bit format. @@ -646,13 +639,31 @@ struct ieee_format { * * The 8-bit format appears to be the consensus 8-bit floating-point * format. It is apparently used in graphics applications. + * + * The b16 format is a 16-bit format with smaller mantissa and larger + * exponent field. It is effectively a truncated version of the standard + * IEEE 32-bit (single) format, but is explicitly supported here in + * order to support proper rounding. + * + * This array must correspond to enum floatize in include/nasm.h. + * Note that there are some formats which have more than one enum; + * both need to be listed here with the appropriate offset into the + * floating-point byte array (use for the floatize operators.) + * + * FLOAT_ERR is a value that both represents "invalid format" and the + * size of this array. */ -static const struct ieee_format ieee_8 = { 1, 3, 0, 4 }; -static const struct ieee_format ieee_16 = { 2, 10, 0, 5 }; -static const struct ieee_format ieee_32 = { 4, 23, 0, 8 }; -static const struct ieee_format ieee_64 = { 8, 52, 0, 11 }; -static const struct ieee_format ieee_80 = { 10, 63, 1, 15 }; -static const struct ieee_format ieee_128 = { 16, 112, 0, 15 }; +const struct ieee_format fp_formats[FLOAT_ERR] = { + { 1, 3, 0, 4, 0 }, /* FLOAT_8 */ + { 2, 10, 0, 5, 0 }, /* FLOAT_16 */ + { 2, 7, 0, 8, 0 }, /* FLOAT_B16 */ + { 4, 23, 0, 8, 0 }, /* FLOAT_32 */ + { 8, 52, 0, 11, 0 }, /* FLOAT_64 */ + { 10, 63, 1, 15, 0 }, /* FLOAT_80M */ + { 10, 63, 1, 15, 8 }, /* FLOAT_80E */ + { 16, 112, 0, 15, 0 }, /* FLOAT_128L */ + { 16, 112, 0, 15, 8 } /* FLOAT_128H */ +}; /* Types of values we can generate */ enum floats { @@ -672,7 +683,7 @@ static int to_packed_bcd(const char *str, const char *p, char c; int tv = -1; - if (fmt != &ieee_80) { + if (fmt->bytes != 10) { nasm_nonfatal("packed BCD requires an 80-bit format"); return 0; } @@ -711,9 +722,9 @@ static int to_packed_bcd(const char *str, const char *p, return 1; /* success */ } -static int to_float(const char *str, int s, uint8_t *result, - const struct ieee_format *fmt) +int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt) { + const struct ieee_format *fmt = &fp_formats[ffmt]; fp_limb mant[MANT_LIMBS]; int32_t exponent = 0; const int32_t expmax = 1 << (fmt->exponent - 1); @@ -902,25 +913,20 @@ static int to_float(const char *str, int s, uint8_t *result, return 1; /* success */ } -int float_const(const char *number, int sign, uint8_t *result, int bytes) +/* + * Get the default floating point format for this specific field size. + * Used for the Dx pseudoops. + */ +enum floatize float_deffmt(int bytes) { - switch (bytes) { - case 1: - return to_float(number, sign, result, &ieee_8); - case 2: - return to_float(number, sign, result, &ieee_16); - case 4: - return to_float(number, sign, result, &ieee_32); - case 8: - return to_float(number, sign, result, &ieee_64); - case 10: - return to_float(number, sign, result, &ieee_80); - case 16: - return to_float(number, sign, result, &ieee_128); - default: - nasm_panic("strange value %d passed to float_const", bytes); - return 0; + enum floatize type; + + for (type = 0; type < FLOAT_ERR; type++) { + if (fp_formats[type].bytes == bytes) + break; } + + return type; /* FLOAT_ERR if invalid */ } /* Set floating-point options */ diff --git a/asm/floats.h b/asm/floats.h index 4f80acac..c4635136 100644 --- a/asm/floats.h +++ b/asm/floats.h @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2009 The NASM Authors - All Rights Reserved + * Copyright 1996-2020 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -48,7 +48,20 @@ enum float_round { FLOAT_RC_UP }; -int float_const(const char *string, int sign, uint8_t *result, int bytes); +/* Note: enum floatize and FLOAT_ERR are defined in nasm.h */ + +/* Floating-point format description */ +struct ieee_format { + int bytes; /* Total bytes */ + int mantissa; /* Fractional bits in the mantissa */ + int explicit; /* Explicit integer */ + int exponent; /* Bits in the exponent */ + int offset; /* Offset into byte array for floatize op */ +}; +extern const struct ieee_format fp_formats[FLOAT_ERR]; + +int float_const(const char *str, int s, uint8_t *result, enum floatize ffmt); +enum floatize float_deffmt(int bytes); int float_option(const char *option); #endif /* NASM_FLOATS_H */ diff --git a/asm/parser.c b/asm/parser.c index 47b46ecd..dbd2240c 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -531,10 +531,12 @@ static int parse_eops(extop **result, bool critical, int elem) goto is_float; } } else if (i == TOKEN_FLOAT) { + enum floatize fmt; is_float: eop->type = EOT_DB_FLOAT; - if (eop->elem > 16) { + fmt = float_deffmt(eop->elem); + if (fmt == FLOAT_ERR) { nasm_nonfatal("no %d-bit floating-point format supported", eop->elem << 3); eop->val.string.len = 0; @@ -552,8 +554,7 @@ static int parse_eops(extop **result, bool critical, int elem) eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len); eop->val.string.data = (char *)eop + sizeof(extop); if (!float_const(tokval.t_charptr, sign, - (uint8_t *)eop->val.string.data, - eop->val.string.len)) + (uint8_t *)eop->val.string.data, fmt)) eop->val.string.len = 0; } if (!eop->val.string.len) diff --git a/asm/pptok.dat b/asm/pptok.dat index 3dede0bc..21144973 100644 --- a/asm/pptok.dat +++ b/asm/pptok.dat @@ -90,6 +90,7 @@ %include %line %local +%null %pop %pragma %push diff --git a/asm/preproc.c b/asm/preproc.c index cfa8df15..b64c8fab 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -1906,6 +1906,16 @@ static char *detoken(Token * tlist, bool expand_locals) } break; + case TOKEN_INDIRECT: + /* + * This won't happen in when emitting to the assembler, + * but can happen when emitting output for some of the + * list options. The token string doesn't actually include + * the brackets in this case. + */ + len += 3; /* %[] */ + break; + default: break; /* No modifications */ } @@ -1925,8 +1935,19 @@ static char *detoken(Token * tlist, bool expand_locals) p = line = nasm_malloc(len + 1); - list_for_each(t, tlist) - p = mempcpy(p, tok_text(t), t->len); + list_for_each(t, tlist) { + switch (t->type) { + case TOKEN_INDIRECT: + *p++ = '%'; + *p++ = '['; + p = mempcpy(p, tok_text(t), t->len); + *p++ = ']'; + break; + + default: + p = mempcpy(p, tok_text(t), t->len); + } + } *p = '\0'; return line; @@ -3534,8 +3555,7 @@ static int do_directive(Token *tline, Token **output) * unconditionally, as they are intended to reflect position * in externally preprocessed sources. */ - if (op == PP_LINE) - return line_directive(origline, tline); + return line_directive(origline, tline); default: break; @@ -4127,7 +4147,13 @@ issue_error: nasm_assert(!defining); nasm_new(def); def->casesense = casesense; - def->dstk.mmac = defining; + /* + * dstk.mstk points to the previous definition bracket, + * whereas dstk.mmac points to the topmost mmacro, which + * in this case is the one we are just starting to create. + */ + def->dstk.mstk = defining; + def->dstk.mmac = def; if (op == PP_RMACRO) def->max_depth = nasm_limit[LIMIT_MACRO_LEVELS]; if (!parse_mmacro_spec(tline, def, dname)) { @@ -4709,6 +4735,11 @@ issue_error: case PP_LINE: nasm_panic("`%s' directive not preprocessed early", dname); break; + + case PP_NULL: + /* Goes nowhere, does nothing... */ + break; + } done: @@ -4803,7 +4834,7 @@ static inline bool pp_concat_match(const Token *t, enum concat_flags mask) static bool paste_tokens(Token **head, const struct concat_mask *m, size_t mnum, bool handle_explicit) { - Token *tok, *t, *next, **prev_next, **prev_nonspace; + Token *tok, *t, *next, **prev_next, **prev_nonspace, **nextp; bool pasted = false; char *buf, *p; size_t len, i; @@ -4842,29 +4873,24 @@ static bool paste_tokens(Token **head, const struct concat_mask *m, did_paste = true; + /* Left pasting token is start of line, just drop %+ */ if (!prev_nonspace) { - /* - * Left pasting token is start of line, just drop %+ - * and any whitespace leading up to it. - */ - *head = next = delete_Token(tok); - break; + prev_next = nextp = head; + t = NULL; + } else { + prev_next = prev_nonspace; + t = *prev_next; + nextp = &t->next; } - prev_next = prev_nonspace; - t = *prev_nonspace; - - /* Delete leading whitespace */ - next = zap_white(t->next); - /* - * Delete the %+ token itself, followed by any whitespace. + * Delete the %+ token itself plus any whitespace. * In a sequence of %+ ... %+ ... %+ pasting sequences where * some expansions in the middle have ended up empty, * we can end up having multiple %+ tokens in a row; * just drop whem in that case. */ - while (next) { + while ((next = *nextp)) { if (next->type == TOKEN_PASTE || next->type == TOKEN_WHITESPACE) next = delete_Token(next); else @@ -4874,11 +4900,16 @@ static bool paste_tokens(Token **head, const struct concat_mask *m, /* * Nothing after? Just leave the existing token. */ - if (!next) { - t->next = next = NULL; /* End of line */ + if (!next) + break; + + if (!t) { + /* Nothing to actually paste, just zapping the paste */ + *prev_next = tok = next; break; } + /* An actual paste */ p = buf = nasm_malloc(t->len + next->len + 1); p = mempcpy(p, tok_text(t), t->len); p = mempcpy(p, tok_text(next), next->len); @@ -4903,7 +4934,7 @@ static bool paste_tokens(Token **head, const struct concat_mask *m, t->next = delete_Token(next); /* We want to restart from the head of the pasted token */ - next = tok; + *prev_next = next = tok; break; default: @@ -4939,10 +4970,14 @@ static bool paste_tokens(Token **head, const struct concat_mask *m, * Connect pasted into original stream, * ie A -> new-tokens -> B */ - while (t->next) - t = t->next; + while ((tok = t->next)) { + if (tok->type != TOKEN_WHITESPACE && tok->type != TOKEN_PASTE) + prev_nonspace = &t->next; + t = tok; + } + t->next = next; - tok = t; + prev_next = &t->next; did_paste = true; break; } @@ -7331,6 +7366,8 @@ void pp_error_list_macros(errflags severity) severity |= ERR_PP_LISTMACRO | ERR_NO_SEVERITY | ERR_HERE; while ((m = src_error_down())) { + if ((m->nolist & NL_LIST) || !m->where.filename) + break; nasm_error(severity, "... from macro `%s' defined", m->name); } diff --git a/asm/tokens.dat b/asm/tokens.dat index ab37dcc1..356b39a2 100644 --- a/asm/tokens.dat +++ b/asm/tokens.dat @@ -113,6 +113,9 @@ __?float80e?__ __?float128l?__ __?float128h?__ +% TOKEN_FLOATIZE, 0, 0, FLOAT_B{__?bfloat*?__} +__?bfloat16?__ + % TOKEN_STRFUNC, 0, 0, STRFUNC_{__?*?__} __?utf16?__ __?utf16le?__ diff --git a/autoconf/m4/pa_c_typeof.m4 b/autoconf/m4/pa_c_typeof.m4 index d182ea2c..87b9ed60 100644 --- a/autoconf/m4/pa_c_typeof.m4 +++ b/autoconf/m4/pa_c_typeof.m4 @@ -7,7 +7,7 @@ dnl -------------------------------------------------------------------------- AC_DEFUN([PA_C_TYPEOF], [AC_CACHE_CHECK([if $CC supports typeof], [pa_cv_typeof], [pa_cv_typeof=no - for pa_typeof_try in typeof __typeof __typeof__ decltype __decltype __decltype__ + for pa_typeof_try in typeof __typeof __typeof__ decltype __decltype __decltype__ _Decltype do AS_IF([test $pa_cv_typeof = no], [AC_COMPILE_IFELSE([AC_LANG_SOURCE([ @@ -29,4 +29,4 @@ int testme(int x) AS_IF([test $pa_cv_typeof = typeof], [], [AC_DEFINE_UNQUOTED([typeof], [$pa_cv_typeof], - [Define if your typeof operator is not named typeof.])])])]) + [Define if your typeof operator is not named `typeof'.])])])]) diff --git a/disasm/disasm.c b/disasm/disasm.c index e1b5ebc3..7c6ea816 100644 --- a/disasm/disasm.c +++ b/disasm/disasm.c @@ -203,6 +203,8 @@ static enum reg_enum whichreg(opflags_t regflags, int regval, int rex) return GET_REGISTER(nasm_rd_opmaskreg, regval); if (!(BNDREG & ~regflags)) return GET_REGISTER(nasm_rd_bndreg, regval); + if (!(TMMREG & ~regflags)) + return GET_REGISTER(nasm_rd_tmmreg, regval); #undef GET_REGISTER return 0; @@ -679,6 +681,22 @@ static int matches(const struct itemplate *t, uint8_t *data, break; } + case 0171: + { + uint8_t t = *r++; + uint8_t d = *data++; + if ((d ^ t) & ~070) { + return 0; + } else { + op2 = (op2 & ~3) | ((t >> 3) & 3); + opy = &ins->oprs[op2]; + opy->basereg = ((d >> 3) & 7) + + (ins->rex & REX_R ? 8 : 0); + opy->segment |= SEG_RMREG; + } + break; + } + case 0172: { uint8_t ximm = *data++; diff --git a/doc/changes.src b/doc/changes.src index 47fb03b4..d1182271 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -9,6 +9,14 @@ since 2007. \S{cl-2.15.03} Version 2.15.03 +\b Add instructions from the Intel Instruction Set Extensions and +Future Features Programming Reference, June 2020. This includes +AVX5512 \c{bfloat16}, AVX512 mask intersect, and Intel Advanced Matrix +Extensions (AMX). + +\b Support for \c{bfloat16} floating-point constants. See \k{fltconst} +and \k{pkg_fp}. + \b Properly display warnings in preprocess-only mode. \b Fix copy-and-paste of examples from the PDF documentation. @@ -25,6 +33,13 @@ section type. where one or more parts result from empty token expansion, resulting in \c{%+} tokens at the beginning or end, or multiple ones in a row. +\b Fix macro label capture (\c{%00}, \k{percent00}). + +\b Much better documentation for the MASM compatiblity package, +\c{%use masm} (see \k{pkg_masm}). + +\b Fix \c{LEA} without square brackets, for MASM compatibility. + \b Portability fixes. \S{cl-2.15.02} Version 2.15.02 diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index 62a70d57..e3d503c5 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -1692,9 +1692,9 @@ context. \i{Floating-point} constants are acceptable only as arguments to \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}, or as arguments to the special operators \i\c{__?float8?__}, -\i\c{__?float16?__}, \i\c{__?float32?__}, \i\c{__?float64?__}, -\i\c{__?float80m?__}, \i\c{__?float80e?__}, \i\c{__?float128l?__}, and -\i\c{__?float128h?__}. +\i\c{__?float16?__}, \i\c{__?bfloat16?__}, \i\c{__?float32?__}, +\i\c{__?float64?__}, \i\c{__?float80m?__}, \i\c{__?float80e?__}, +\i\c{__?float128l?__}, and \i\c{__?float128h?__}. See also \k{pkg_fp}. Floating-point constants are expressed in the traditional form: digits, then a period, then optionally more digits, then optionally an @@ -1733,6 +1733,13 @@ appears to be the most frequently used 8-bit floating-point format, although it is not covered by any formal standard. This is sometimes called a "\i{minifloat}." +The \i\c{bfloat16} format is effectively a compressed version of the +32-bit single precision format, with a reduced mantissa. It is +effectively the same as truncating the 32-bit format to the upper 16 +bits, except for rounding. There is no \c{D}\e{x} directive that +corresponds to \c{bfloat16} as it obviously has the same size as the +IEEE standard 16-bit half precision format, see however \k{pkg_fp}. + The special operators are used to produce floating-point numbers in other contexts. They produce the binary representation of a specific floating-point number as an integer, and can use anywhere integer @@ -2753,6 +2760,11 @@ interfering with the local label mechanism, as described in (the \c{..@} prefix, then a number, then another period) in case they interfere with macro-local labels. +These labels are really macro-local \e{tokens}, and can be used for +other purposes where a token unique to each macro invocation is +desired, e.g. to name single-line macros without using the context +feature (\k{ctxlocal}). + \S{mlmacgre} \i{Greedy Macro Parameters} @@ -2935,6 +2947,10 @@ Examples are given in \k{rotate}. label must be on the same line as the macro invocation, may be a local label (see \k{locallab}), and need not end in a colon. +If \c{%00} is present anywhere in the macro body, the label itself +will not be emitted by NASM. You can, of course, put \c{%00:} +explicitly at the beginning of your macro. + \S{rotate} \i\c{%rotate}: \i{Rotating Macro Parameters} @@ -4043,7 +4059,8 @@ which specifies a line increment value; each line of the input file read in is considered to correspond to \c{mmm} lines of the original source file. Finally, \c{filename} is an optional parameter which specifies the file name of the original source file. It may be a -quoted string. +quoted string, in which case any additional argument after the quoted +string will be ignored. After reading a \c{%line} preprocessor directive, NASM will report all file name and line numbers relative to the values specified @@ -4056,6 +4073,13 @@ code. See \k{opt-no-line}. Starting in NASM 2.15, \c{%line} directives are processed before any other processing takes place. +For compatibility with the output from some other preprocessors, +including many C preprocessors, a \c{#} character followed by +whitespace \e{at the very beginning of a line} is also treated as a +\c{%line} directive, except that double quotes surrounding the +filename are treated like NASM backquotes, with \c{\\}-escaped +sequences decoded. + \# This isn't a directive, it should be moved elsewhere... \S{getenv} \i\c{%!}\e{variable}: Read an Environment Variable. @@ -4616,6 +4640,7 @@ This packages contains the following floating-point convenience macros: \c \c %define float8(x) __?float8?__(x) \c %define float16(x) __?float16?__(x) +\c %define bfloat16(x) __?bfloat16?__(x) \c %define float32(x) __?float32?__(x) \c %define float64(x) __?float64?__(x) \c %define float80m(x) __?float80m?__(x) @@ -4623,6 +4648,12 @@ This packages contains the following floating-point convenience macros: \c %define float128l(x) __?float128l?__(x) \c %define float128h(x) __?float128h?__(x) +It also defines the a multi-line macro \i\c{bf16} that can be used +in a similar way to the \c{D}\e{x} directives for the other +floating-point numbers: + +\c bf16 -3.1415, NaN, 2000.0, +Inf + \H{pkg_ifunc} \i\c{ifunc}: \i{Integer functions} @@ -4660,17 +4691,61 @@ functionality, as intended to be used primarily with machine-generated code. It does not include any "programmer-friendly" shortcuts, nor does it in any way support ASSUME, symbol typing, or MASM-style structures. -Currently, the MASM compatibility package emulates only the PTR -keyword and recognize syntax displacement[index] for memory -operations. To enable the package, use the directive: \c{%use masm} -In addition, NASM now natively supports the MASM \c{?} and -\c{DUP} syntax for the \c{DB} etc data declaration directives, -regardless of if this package is included or not. See \k{db}. +Currently, the MASM compatibility package emulates: +\b The \c{FLAT} and \c{OFFSET} keywords are recognized and ignored. + +\b The \c{PTR} keyword signifies a memory reference, as if the +argument had been put in square brackets: + +\c mov eax,[foo] ; memory reference +\c mov eax,dword ptr foo ; memory reference +\c mov eax,dowrd ptr flat:foo ; memory reference +\c mov eax,offset foo ; address +\c mov eax,foo ; address (ambiguous syntax in MASM) + +\b The \c{SEGMENT} ... \c{ENDS} syntax: + +\c segname SEGMENT +\c ... +\c segname ENDS + +\b The \c{PROC} ... \c{ENDP} syntax: + +\c procname PROC [FAR] +\c ... +\c procname ENDP + +\> \c{PROC} will also define \c{RET} as a macro expanding to either +\c{RETF} if \c{FAR} is specified and \c{RETN} otherwise. Any keyword +after \c{PROC} other than \c{FAR} is ignored. + +\b The \c{TBYTE} keyword as an alias for \c{TWORD} (see \k{qsother}). + +\b The \c{END} directive is ignored. + +\b In 64-bit mode relative addressing is the default (\c{DEFAULT REL}, +see \k{REL & ABS}). + +In addition, NASM now natively supports, regardless of whether this +package is used or not: + +\b \c{?} and \c{DUP} syntax for the \c{DB} etc data declaration +directives (see \k{db}). + +\b \c{displacement[base+index]} syntax for memory operations, instead +of \c{[base+index+displacement]}. + +\b \c{seg:[addr]} instead of \c{[seg:addr]} syntax. + +\b A pure offset can be given to \c{LEA} without square brackets: + +\c lea rax,[foo] ; standard syntax +\c lea rax,foo ; also accepted \C{directive} \i{Assembler Directives} diff --git a/include/nasm.h b/include/nasm.h index a06c71d5..011c1f8f 100644 --- a/include/nasm.h +++ b/include/nasm.h @@ -259,15 +259,18 @@ enum token_type { /* token types, other than chars */ TOKEN_MAX = INT_MAX /* Keep compiler from reducing the range */ }; +/* Must match the fp_formats[] array in asm/floats.c */ enum floatize { FLOAT_8, FLOAT_16, + FLOAT_B16, FLOAT_32, FLOAT_64, FLOAT_80M, FLOAT_80E, FLOAT_128L, - FLOAT_128H + FLOAT_128H, + FLOAT_ERR /* Invalid format, MUST BE LAST */ }; /* Must match the list in string_transform(), in strfunc.c */ diff --git a/include/opflags.h b/include/opflags.h index 28bb236f..f5dd50ba 100644 --- a/include/opflags.h +++ b/include/opflags.h @@ -81,19 +81,19 @@ /* * Register classes. * - * Bits: 7 - 16 + * Bits: 7 - 17 */ #define REG_CLASS_SHIFT (7) -#define REG_CLASS_BITS (10) +#define REG_CLASS_BITS (11) #define REG_CLASS_MASK OP_GENMASK(REG_CLASS_BITS, REG_CLASS_SHIFT) #define GEN_REG_CLASS(bit) OP_GENBIT(bit, REG_CLASS_SHIFT) /* * Subclasses. Depends on type of operand. * - * Bits: 17 - 24 + * Bits: 18 - 25 */ -#define SUBCLASS_SHIFT (17) +#define SUBCLASS_SHIFT (18) #define SUBCLASS_BITS (8) #define SUBCLASS_MASK OP_GENMASK(SUBCLASS_BITS, SUBCLASS_SHIFT) #define GEN_SUBCLASS(bit) OP_GENBIT(bit, SUBCLASS_SHIFT) @@ -101,9 +101,9 @@ /* * Special flags. Context dependant. * - * Bits: 25 - 31 + * Bits: 26 - 32 */ -#define SPECIAL_SHIFT (25) +#define SPECIAL_SHIFT (26) #define SPECIAL_BITS (7) #define SPECIAL_MASK OP_GENMASK(SPECIAL_BITS, SPECIAL_SHIFT) #define GEN_SPECIAL(bit) OP_GENBIT(bit, SPECIAL_SHIFT) @@ -111,9 +111,9 @@ /* * Sizes of the operands and attributes. * - * Bits: 32 - 42 + * Bits: 33 - 43 */ -#define SIZE_SHIFT (32) +#define SIZE_SHIFT (33) #define SIZE_BITS (11) #define SIZE_MASK OP_GENMASK(SIZE_BITS, SIZE_SHIFT) #define GEN_SIZE(bit) OP_GENBIT(bit, SIZE_SHIFT) @@ -121,9 +121,9 @@ /* * Register set count * - * Bits: 47 - 43 + * Bits: 44 - 48 */ -#define REGSET_SHIFT (43) +#define REGSET_SHIFT (44) #define REGSET_BITS (5) #define REGSET_MASK OP_GENMASK(REGSET_BITS, REGSET_SHIFT) #define GEN_REGSET(bit) OP_GENBIT(bit, REGSET_SHIFT) @@ -138,11 +138,11 @@ * * ............................................................1111 optypes * .........................................................111.... modifiers - * ...............................................1111111111....... register classes - * .......................................11111111................. subclasses - * ................................1111111......................... specials - * .....................11111111111................................ sizes - * ................11111........................................... regset count + * ..............................................11111111111....... register classes + * ......................................11111111.................. subclasses + * ...............................1111111.......................... specials + * ....................11111111111................................. sizes + * ...............11111............................................ regset count */ #define REGISTER GEN_OPTYPE(0) /* register number in 'basereg' */ @@ -176,6 +176,7 @@ #define REG_CLASS_RM_ZMM GEN_REG_CLASS(7) #define REG_CLASS_OPMASK GEN_REG_CLASS(8) #define REG_CLASS_BND GEN_REG_CLASS(9) +#define REG_CLASS_RM_TMM GEN_REG_CLASS(10) static inline bool is_class(opflags_t class, opflags_t op) { @@ -217,6 +218,7 @@ static inline bool is_reg_class(opflags_t class, opflags_t reg) #define KREG OPMASKREG #define RM_BND ( REG_CLASS_BND | REGMEM) /* Bounds operand */ #define BNDREG ( REG_CLASS_BND | REGMEM | REGISTER) /* Bounds register */ +#define TMMREG ( REG_CLASS_RM_TMM | REGMEM | REGISTER) /* TMM (AMX) register */ #define REG_CDT ( REG_CLASS_CDT | BITS32 | REGISTER) /* CRn, DRn and TRn */ #define REG_CREG (GEN_SUBCLASS(1) | REG_CLASS_CDT | BITS32 | REGISTER) /* CRn */ #define REG_DREG (GEN_SUBCLASS(2) | REG_CLASS_CDT | BITS32 | REGISTER) /* DRn */ diff --git a/macros/fp.mac b/macros/fp.mac index eb297014..3a094a5c 100644 --- a/macros/fp.mac +++ b/macros/fp.mac @@ -1,6 +1,6 @@ ;; -------------------------------------------------------------------------- ;; -;; Copyright 2010 The NASM Authors - All Rights Reserved +;; Copyright 2010-2020 The NASM Authors - All Rights Reserved ;; See the file AUTHORS included with the NASM distribution for ;; the specific copyright holders. ;; @@ -46,9 +46,17 @@ USE: fp %define float8(x) __?float8?__(x) %define float16(x) __?float16?__(x) +%define bfloat16(x) __?bfloat16?__(x) %define float32(x) __?float32?__(x) %define float64(x) __?float64?__(x) %define float80m(x) __?float80m?__(x) %define float80e(x) __?float80e?__(x) %define float128l(x) __?float128l?__(x) %define float128h(x) __?float128h?__(x) + +%imacro bf16 1-*.nolist + %rep %0 + dw __?bfloat16?__(%1) + %rotate 1 + %endrep +%endmacro diff --git a/macros/masm.mac b/macros/masm.mac index da7e6eea..6bd27273 100644 --- a/macros/masm.mac +++ b/macros/masm.mac @@ -50,7 +50,7 @@ USE: masm %endmacro %imacro ends 0+.nolist - %pragma ignore ends %00 + %null ends %00 %endmacro %imacro proc 0-*.nolist @@ -65,7 +65,7 @@ USE: masm %endmacro %imacro endp 0.nolist - %pragma ignore endp %00 + %null endp %00 %undef ret %endmacro diff --git a/output/outcoff.c b/output/outcoff.c index de22fb88..bcd9ff3f 100644 --- a/output/outcoff.c +++ b/output/outcoff.c @@ -72,11 +72,11 @@ * (2) Win32 doesn't bother putting any flags in the header flags * field (at offset 0x12 into the file). * - * (3) Win32 uses some extra flags into the section header table: + * (3) Win32/64 uses some extra flags into the section header table: * it defines flags 0x80000000 (writable), 0x40000000 (readable) * and 0x20000000 (executable), and uses them in the expected - * combinations. It also defines 0x00100000 through 0x00700000 for - * section alignments of 1 through 64 bytes. + * combinations. It also defines 0x00100000 through 0x00f00000 for + * section alignments of 1 through 8192 bytes. * * (4) Both standard COFF and Win32 COFF seem to use the DWORD * field directly after the section name in the section header @@ -285,14 +285,22 @@ int coff_make_section(char *name, uint32_t flags) return coff_nsects - 1; } +/* + * Convert an alignment value to the corresponding flags. + * An alignment value of 0 means no flags should be set. + */ static inline uint32_t coff_sectalign_flags(unsigned int align) { - return (ilog2_32(align) + 1) << 20; + return (alignlog2_32(align) + 1) << 20; } +/* + * Get the alignment value from a flags field. + * Returns 0 if no alignment defined. + */ static inline unsigned int coff_alignment(uint32_t flags) { - return 1U << (((flags & IMAGE_SCN_ALIGN_MASK) >> 20) - 1); + return (1U << ((flags & IMAGE_SCN_ALIGN_MASK) >> 20)) >> 1; } static int32_t coff_section_names(char *name, int *bits) @@ -364,10 +372,13 @@ static int32_t coff_section_names(char *name, int *bits) nasm_nonfatal("argument to `align' is not numeric"); else { unsigned int align = atoi(q + 6); - if (!align || ((align - 1) & align)) { + /* Allow align=0 meaning use default */ + if (!align) { + align_flags = 0; + } else if (!is_power2(align)) { nasm_nonfatal("argument to `align' is not a" " power of two"); - } else if (align > 8192) { + } else if (align > COFF_MAX_ALIGNMENT) { nasm_nonfatal("maximum alignment in COFF is %d bytes", COFF_MAX_ALIGNMENT); } else { @@ -382,30 +393,31 @@ static int32_t coff_section_names(char *name, int *bits) break; if (i == coff_nsects) { if (!flags) { - if (!strcmp(name, ".data")) + flags = TEXT_FLAGS; + + if (!strcmp(name, ".data")) { flags = DATA_FLAGS; - else if (!strcmp(name, ".rdata")) + } else if (!strcmp(name, ".rdata")) { flags = RDATA_FLAGS; - else if (!strcmp(name, ".bss")) + } else if (!strcmp(name, ".bss")) { flags = BSS_FLAGS; - else if (win64 && !strcmp(name, ".pdata")) - flags = PDATA_FLAGS; - else if (win64 && !strcmp(name, ".xdata")) - flags = XDATA_FLAGS; - else - flags = TEXT_FLAGS; + } else if (win64) { + if (!strcmp(name, ".pdata")) + flags = PDATA_FLAGS; + else if (!strcmp(name, ".xdata")) + flags = XDATA_FLAGS; + } } i = coff_make_section(name, flags); - if (flags) - coff_sects[i]->flags = flags; - } else if (flags) { - /* Check if any flags are respecified */ - - /* Warn if non-alignment flags differ */ - if ((flags ^ coff_sects[i]->flags) & ~IMAGE_SCN_ALIGN_MASK && - coff_sects[i]->pass_last_seen == pass_count()) { - nasm_warn(WARN_OTHER, "section attributes changed on" - " redeclaration of section `%s'", name); + coff_sects[i]->align_flags = align_flags; + } else { + if (flags) { + /* Warn if non-alignment flags differ */ + if (((flags ^ coff_sects[i]->flags) & ~IMAGE_SCN_ALIGN_MASK) && + coff_sects[i]->pass_last_seen == pass_count()) { + nasm_warn(WARN_OTHER, "section attributes changed on" + " redeclaration of section `%s'", name); + } } /* Check if alignment might be needed */ @@ -419,6 +431,7 @@ static int32_t coff_section_names(char *name, int *bits) if (align_flags > sect_align_flags) { coff_sects[i]->align_flags = align_flags; } + /* Check if not already aligned */ /* XXX: other formats don't do this... */ if (coff_sects[i]->len % align) { @@ -428,9 +441,6 @@ static int32_t coff_section_names(char *name, int *bits) nasm_assert(padding <= sizeof buffer); - if (pass_final()) - nasm_nonfatal("section alignment changed during code generation"); - if (coff_sects[i]->flags & IMAGE_SCN_CNT_CODE) { /* Fill with INT 3 instructions */ memset(buffer, 0xCC, padding); diff --git a/test/Makefile b/test/Makefile index 6b6ffbfe..7d09b346 100644 --- a/test/Makefile +++ b/test/Makefile @@ -34,7 +34,7 @@ $(NASM): $(NASM) $(NASMOPT) -f aout -o $@ -MD $@.dep -l $@.lst $< %.obj: %.asm $(NASMDEP) - $(NASM) $(NASMOPT) -f obj -gborland -F -o $@ -MD $@.dep -l $@.lst $< + $(NASM) $(NASMOPT) -f obj -gborland -o $@ -MD $@.dep -l $@.lst $< %.rdf: %.asm $(NASMDEP) $(NASM) $(NASMOPT) -f rdf -o $@ -MD $@.dep -l $@.lst $< diff --git a/test/amx.asm b/test/amx.asm new file mode 100644 index 00000000..88455508 --- /dev/null +++ b/test/amx.asm @@ -0,0 +1,36 @@ + bits 64 + +%macro amx 1 + %define treg tmm %+ %1 + + ldtilecfg [rsi] + sttilecfg [rdi] + + tilezero treg + + tileloadd treg, [rax] + tileloadd treg, [rax,rdx] + tileloadd treg, [rax,rdx*2] + + tileloaddt1 treg, [rax] + tileloaddt1 treg, [rax,rdx] + tileloaddt1 treg, [rax,rdx*2] + + tdpbf16ps treg, treg, treg + tdpbssd treg, treg, treg + tdpbusd treg, treg, treg + tdpbsud treg, treg, treg + tdpbuud treg, treg, treg + + tilestored [rax], treg + tilestored [rax,rdx], treg + tilestored [rax,rdx*2], treg + + tilerelease +%endmacro + +%assign n 0 + %rep 8 + amx n + %assign n n+1 + %endrep diff --git a/test/float.asm b/test/float.asm index 88519b2e..1dd92a96 100644 --- a/test/float.asm +++ b/test/float.asm @@ -5,6 +5,8 @@ ; Test of floating-point formats ; +%use fp + ; 8-bit db 1.0 db +1.0 @@ -65,6 +67,37 @@ dw __SNaN__ dw 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 +; 16-bit bfloat + bf16 1.0 + bf16 +1.0 + bf16 -1.0 + bf16 1.5 + bf16 +1.5 + bf16 -1.5 + bf16 0.0 + bf16 +0.0 + bf16 -0.0 + bf16 1.83203125 + bf16 +1.83203125 + bf16 -1.83203125 + bf16 1.83203125e15 + bf16 +1.83203125e15 + bf16 -1.83203125e15 + bf16 1.83203125e-15 + bf16 +1.83203125e-15 + bf16 -1.83203125e-15 + bf16 1.83203125e-40 ; Denormal! + bf16 +1.83203125e-40 ; Denormal! + bf16 -1.83203125e-40 ; Denormal! + bf16 __Infinity__ + bf16 +__Infinity__ + bf16 -__Infinity__ + bf16 __NaN__ + bf16 __QNaN__ + bf16 __SNaN__ + bf16 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 + bf16 -3.1415, NaN, 2000.0, +Inf + ; 32-bit dd 1.0 dd +1.0 @@ -94,6 +127,7 @@ dd __QNaN__ dd __SNaN__ dd 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 + dd -3.1415, NaN, 2000.0, +Inf ; 64-bit dq 1.0 @@ -124,7 +158,7 @@ dq __QNaN__ dq __SNaN__ dq 3.1415926535_8979323846_2643383279_5028841971_6939937510_5 - + ; 80-bit dt 1.0 dt +1.0 diff --git a/test/masmdisp.asm b/test/masmdisp.asm index 295d88d7..c5e9af4f 100644 --- a/test/masmdisp.asm +++ b/test/masmdisp.asm @@ -14,6 +14,7 @@ fproc proc far lea rsi,dword ptr foo lea rsi,[foo] lea rsi,dword [foo] + mov rdi,gs:[rbx] ret fproc endp @@ -21,6 +22,8 @@ nproc proc near mov eax,dword ptr foo mov rdx,offset foo mov ecx,bar[rbx] + mov rdi,[gs:foo] + mov rdi,qword ptr gs:foo ret nproc endp @@ -31,6 +34,7 @@ nxx dd 80 foo dd 100 _DATA ends -_BSS segment nobits + segment _BSS nobits bar resd 100 +xyzzy dd 64 dup (?) _BSS ends diff --git a/test/ppindirect.asm b/test/ppindirect.asm index 0a30d075..2785378a 100644 --- a/test/ppindirect.asm +++ b/test/ppindirect.asm @@ -2,6 +2,8 @@ ; Fun tests of the preprocessor indirection mode... + bits 64 + %assign foo1 11 %assign foo11 1111 %assign foo2 22 @@ -9,34 +11,34 @@ %assign foo3 33 %assign foo33 3333 %assign n 2 -foo%[foo%[n]]*100 -foo%[n]*100 + dd foo%[foo%[n]]*100 + dd foo%[n]*100 %assign foo%[foo%[n]] foo%[foo%[n]]*100 ;%assign foo%[n] foo%[n]*100 - foo1 - foo2 - foo3 - foo11 - foo22 - foo33 + dd foo1 + dd foo2 + dd foo3 + dd foo11 + dd foo22 + dd foo33 %define foo33bar 999999 - %[foo%[foo3]bar] + dd %[foo%[foo3]bar] %assign bctr 0 %macro bluttan 0 %assign bctr bctr+1 %assign bluttan%[bctr] bctr %defstr bstr bluttan%[bctr] - bluttan%[bctr] - bstr + db bluttan%[bctr] + db bstr %endmacro %rep 20 bluttan %endrep %rep 20 - bluttan%[bctr] + db bluttan%[bctr] %assign bctr bctr-1 %endrep diff --git a/test/winalign.asm b/test/winalign.asm new file mode 100644 index 00000000..62abf827 --- /dev/null +++ b/test/winalign.asm @@ -0,0 +1,45 @@ + section .pdata rdata align=2 + dd 1 + dd 2 + dd 3 + + section .rdata align=16 + dd 4 + dd 5 + dd 6 + + section ultra + dd 10 + dd 11 + dd 12 + + section infra rdata + dd 20 + dd 21 + dd 22 + + section omega rdata align=1 + dd 90 + dd 91 + dd 92 + + section .xdata + dd 7 + dd 8 + dd 9 + + section ultra align=8 + dd 13 + dd 14 + dd 15 + + section infra rdata align=1 + dd 23 + dd 24 + dd 25 + + section omega rdata + sectalign 2 + dd 93 + dd 94 + dd 95 diff --git a/x86/iflags.ph b/x86/iflags.ph index 2c05b293..7067d740 100644 --- a/x86/iflags.ph +++ b/x86/iflags.ph @@ -84,6 +84,16 @@ if_("AVX5124FMAPS", "AVX-512 4-iteration multiply-add"); if_("AVX5124VNNIW", "AVX-512 4-iteration dot product"); if_("SGX", "Intel Software Guard Extensions (SGX)"); if_("CET", "Intel Control-Flow Enforcement Technology (CET)"); +if_("ENQCMD", "Enqueue command instructions"); +if_("PCONFIG", "Platform configuration instruction"); +if_("WBNOINVD", "Writeback and do not invalidate instruction"); +if_("TSXLDTRK", "TSX suspend load address tracking"); +if_("SERIALIZE", "SERIALIZE instruction"); +if_("AVX512BF16", "AVX-512 bfloat16"); +if_("AVX512VP2INTERSECT", "AVX-512 VP2INTERSECT instructions"); +if_("AMXTILE", "AMX tile configuration instructions"); +if_("AMXBF16", "AMX bfloat16 multiplication"); +if_("AMXINT8", "AMX 8-bit integer multiplication"); # Put these last [hpa: why?] if_("OBSOLETE", "Instruction removed from architecture"); diff --git a/x86/insns.dat b/x86/insns.dat index 980c5943..141d68b3 100644 --- a/x86/insns.dat +++ b/x86/insns.dat @@ -5999,6 +5999,51 @@ WRUSSQ mem,reg64 [mr: o64 66 0f 38 f5 /r] CET,FUTURE,X64 WRSSD mem,reg32 [mr: o32 0f 38 f6 /r] CET,FUTURE WRSSQ mem,reg64 [mr: o64 0f 38 f6 /r] CET,FUTURE,X64 +;# Instructions from ISE doc 319433-040, June 2020 +ENQCMD reg16,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE +ENQCMD reg32,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE,ND +ENQCMD reg32,mem512 [rm: a32 f2 0f 38 f8 /r] ENQCMD,FUTURE +ENQCMD reg64,mem512 [rm: a64 f2 0f 38 f8 /r] ENQCMD,FUTURE,X64 +ENQCMDS reg16,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV +ENQCMDS reg32,mem512 [rm: a16 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV,ND +ENQCMDS reg32,mem512 [rm: a32 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV +ENQCMDS reg64,mem512 [rm: a64 f2 0f 38 f8 /r] ENQCMD,FUTURE,PRIV,X64 +PCONFIG void [ np 0f 01 c5] PCONFIG,FUTURE,PRIV +SERIALIZE void [ np 0f 01 e8] SERIALIZE,FUTURE +WBNOINVD void [ f3 0f 09] WBNOINVD,FUTURE,PRIV +XRESLDTRK void [ f2 0f 01 e9] TSXLDTRK,FUTURE +XSUSLDTRK void [ f2 0f 01 e8] TSXLDTRK,FUTURE + +;# AVX512 Bfloat16 instructions +VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f2.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm:fv: evex.256.f2.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm:fv: evex.512.f2.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f3.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 ymmreg|mask|z,ymmreg*,ymmrm256|b32 [rvm:fv: evex.256.f3.0f38.w0 72 /r] AVX512BF16,FUTURE +VCVTNE2PS2BF16 zmmreg|mask|z,zmmreg*,zmmrm512|b32 [rvm:fv: evex.512.f3.0f38.w0 72 /r] AVX512BF16,FUTURE +VDPBF16PS xmmreg|mask|z,xmmreg*,xmmrm128|b32 [rvm:fv: evex.128.f3.0f38.w0 52 /r] AVX512BF16,FUTURE +VDPBF16PS ymmreg|mask|z,ymmreg*,ymmrm128|b32 [rvm:fv: evex.256.f3.0f38.w0 52 /r] AVX512BF16,FUTURE +VDPBF16PS zmmreg|mask|z,zmmreg*,zmmrm128|b32 [rvm:fv: evex.512.f3.0f38.w0 52 /r] AVX512BF16,FUTURE + +;# AVX512 mask intersect instructions +VP2INTERSECTD kreg|rs2,xmmreg,xmmrm128|b32 [rvm:fv: evex.nds.128.f2.0f38.w0 68 /r] AVX512BF16,FUTURE +VP2INTERSECTD kreg|rs2,ymmreg,ymmrm128|b32 [rvm:fv: evex.nds.256.f2.0f38.w0 68 /r] AVX512BF16,FUTURE +VP2INTERSECTD kreg|rs2,zmmreg,zmmrm128|b32 [rvm:fv: evex.nds.512.f2.0f38.w0 68 /r] AVX512BF16,FUTURE + +;# Intel Advanced Matrix Extensions (AMX) +LDTILECFG mem512 [m: vex.128.np.0f38.w0 49 /0] AMXTILE,FUTURE,SZ,X64 +STTILECFG mem512 [m: vex.128.66.0f38.w0 49 /0] AMXTILE,FUTURE,SZ,X64 +TDPBF16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 5c /r] AMXBF16,FUTURE,X64 +TDPBSSD tmmreg,tmmreg,tmmreg [rmv: vex.128.f2.0f38.w0 5e /r] AMXINT8,FUTURE,X64 +TDPBSUD tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 5e /r] AMXINT8,FUTURE,X64 +TDPBUSD tmmreg,tmmreg,tmmreg [rmv: vex.128.66.0f38.w0 5e /r] AMXINT8,FUTURE,X64 +TDPBUUD tmmreg,tmmreg,tmmreg [rmv: vex.128.np.0f38.w0 5e /r] AMXINT8,FUTURE,X64 +TILELOADD tmmreg,mem [rm: vex.128.f2.0f38.w0 4b /r] AMXTILE,MIB,FUTURE,SX,X64 +TILELOADDT1 tmmreg,mem [rm: vex.128.f2.0f38.w0 4b /r] AMXTILE,MIB,FUTURE,SX,X64 +TILERELEASE void [ vex.128.np.0f38.w0 49 c0] AMXTILE,FUTURE,X64 +TILESTORED mem,tmmreg [mr: vex.128.f3.0f38.w0 4b /r] AMXTILE,MIB,FUTURE,SX,X64 +TILEZERO tmmreg [r: vex.128.f2.0f38.w0 49 /3r0] AMXTILE,FUTURE,X64 + ;# Systematic names for the hinting nop instructions ; These should be last in the file HINT_NOP0 rm16 [m: o16 0f 18 /0] P6,UNDOC diff --git a/x86/insns.pl b/x86/insns.pl index cd9aaf4f..911ef7eb 100755 --- a/x86/insns.pl +++ b/x86/insns.pl @@ -880,11 +880,19 @@ sub byte_code_compile($$) { $prefix_ok = 0; } elsif ($op =~ m:^/([0-7])$:) { if (!defined($oppos{'m'})) { - die "$fname:$line: $op requires m operand\n"; + die "$fname:$line: $op requires an m operand\n"; } push(@codes, 06) if ($oppos{'m'} & 4); push(@codes, 0200 + (($oppos{'m'} & 3) << 3) + $1); $prefix_ok = 0; + } elsif ($op =~ m:^/([0-3]?)r([0-7])$:) { + if (!defined($oppos{'r'})) { + die "$fname:$line: $op requires an r operand\n"; + } + push(@codes, 05) if ($oppos{'r'} & 4); + push(@codes, 0171); + push(@codes, (($1+0) << 6) + (($oppos{'r'} & 3) << 3) + $2); + $prefix_ok = 0; } elsif ($op =~ /^(vex|xop)(|\..*)$/) { my $vexname = $1; my $c = $vexmap{$vexname}; @@ -907,7 +915,7 @@ sub byte_code_compile($$) { $w = 2; } elsif ($oq eq 'ww') { $w = 3; - } elsif ($oq eq 'p0') { + } elsif ($oq eq 'np' || $oq eq 'p0') { $p = 0; } elsif ($oq eq '66' || $oq eq 'p1') { $p = 1; @@ -935,9 +943,6 @@ sub byte_code_compile($$) { if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) { die "$fname:$line: missing fields in \U$vexname\E specification\n"; } - if (defined($oppos{'v'}) && !$has_nds) { - die "$fname:$line: 'v' operand without ${vexname}.nds or ${vexname}.ndd\n"; - } my $minmap = ($c == 1) ? 8 : 0; # 0-31 for VEX, 8-31 for XOP if ($m < $minmap || $m > 31) { die "$fname:$line: Only maps ${minmap}-31 are valid for \U${vexname}\n"; @@ -966,7 +971,7 @@ sub byte_code_compile($$) { $w = 2; } elsif ($oq eq 'ww') { $w = 3; - } elsif ($oq eq 'p0') { + } elsif ($oq eq 'np' || $oq eq 'p0') { $p = 0; } elsif ($oq eq '66' || $oq eq 'p1') { $p = 1; @@ -994,9 +999,6 @@ sub byte_code_compile($$) { if (!defined($m) || !defined($w) || !defined($l) || !defined($p)) { die "$fname:$line: missing fields in EVEX specification\n"; } - if (defined($oppos{'v'}) && !$has_nds) { - die "$fname:$line: 'v' operand without evex.nds or evex.ndd\n"; - } if ($m > 15) { die "$fname:$line: Only maps 0-15 are valid for EVEX\n"; } diff --git a/x86/regs.dat b/x86/regs.dat index 723f6a44..cec8420f 100644 --- a/x86/regs.dat +++ b/x86/regs.dat @@ -130,6 +130,9 @@ zmm0 ZMM0 zmmreg 0 zmm1-15 ZMM_L16 zmmreg 1 zmm16-31 ZMMREG zmmreg 16 +# AMX tile registers +tmm0-7 TMMREG tmmreg 0 + # Opmask registers k0 OPMASK0 opmaskreg 0 k1-7 OPMASKREG opmaskreg 1 TFLAG_BRC_OPT