The package rpms/luajit.git has added or updated architecture specific content in its
spec file (ExclusiveArch/ExcludeArch or %ifarch/%ifnarch) in commit(s):
https://src.fedoraproject.org/cgit/rpms/luajit.git/commit/?id=3bc1e4725fb....
Change:
+ExclusiveArch: %{arm} %{ix86} x86_64 %{mips} aarch64
Thanks.
Full change:
============
commit ee6c7ab93f1b70e0297868d285935cae3aea43b9
Author: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
Date: Mon Nov 29 18:35:24 2021 +0530
Update dates in changelog for F35
diff --git a/luajit.spec b/luajit.spec
index 49540fc..4d1d67a 100644
--- a/luajit.spec
+++ b/luajit.spec
@@ -94,10 +94,10 @@ make check || true
%{_libdir}/pkgconfig/%{name}.pc
%changelog
-* Tue Oct 26 2021 Siddhesh Poyarekar <siddhesh(a)gotplt.org> - 2.1.0-0.22beta3
+* Mon Nov 29 2021 Siddhesh Poyarekar <siddhesh(a)gotplt.org> - 2.1.0-0.22beta3
- Bring back the earlier code to do ln -sf.
-* Tue Oct 12 2021 Andreas Schneider <asn(a)redhat.com> - 2.1.0-0.21beta3
+* Mon Nov 29 2021 Andreas Schneider <asn(a)redhat.com> - 2.1.0-0.21beta3
- Rebase onto
https://github.com/LuaJIT/LuaJIT/tree/v2.1
- Dropped support for ppc64le
- Dropped support for s390x
commit 74fc89321e0bd683e944f505dde072fd73d8a2ce
Author: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
Date: Tue Oct 26 21:38:28 2021 +0530
Bring back the earlier code to do ln -sf
diff --git a/luajit.spec b/luajit.spec
index 48300de..49540fc 100644
--- a/luajit.spec
+++ b/luajit.spec
@@ -4,7 +4,7 @@ Name: luajit
Version: 2.1.0
%global apiver %(v=%{version}; echo ${v%.${v#[0-9].[0-9].}})
%global srcver %{version}%{?rctag:-%{rctag}}
-Release: 0.21%{?rctag:%{rctag}}%{?dist}
+Release: 0.22%{?rctag:%{rctag}}%{?dist}
Summary: Just-In-Time Compiler for Lua
License: MIT
URL:
http://luajit.org/
@@ -60,14 +60,17 @@ make amalg Q= E=@: PREFIX=%{_prefix} TARGET_STRIP=: \
%make_install PREFIX=%{_prefix} \
MULTILIB=%{_lib}
-ln -sf luajit-2.1.0-beta3 %{buildroot}%{_bindir}/luajit
-
rm -rf _tmp_html ; mkdir _tmp_html
cp -a doc _tmp_html/html
# Remove static .a
find %{buildroot} -type f -name *.a -delete -print
+%if %{defined rctag}
+# Development versions are not doing such symlink
+ln -s %{name}-%{srcver} %{buildroot}%{_bindir}/%{name}
+%endif
+
%ldconfig_scriptlets
%check
@@ -91,6 +94,9 @@ make check || true
%{_libdir}/pkgconfig/%{name}.pc
%changelog
+* Tue Oct 26 2021 Siddhesh Poyarekar <siddhesh(a)gotplt.org> - 2.1.0-0.22beta3
+- Bring back the earlier code to do ln -sf.
+
* Tue Oct 12 2021 Andreas Schneider <asn(a)redhat.com> - 2.1.0-0.21beta3
- Rebase onto
https://github.com/LuaJIT/LuaJIT/tree/v2.1
- Dropped support for ppc64le
commit 3bc1e4725fb68d9b8b5a528673b143d437084948
Author: Andreas Schneider <asn(a)cryptomilk.org>
Date: Tue Oct 12 22:14:42 2021 +0200
Apply patches from
https://github.com/LuaJIT/LuaJIT/
diff --git a/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch
b/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch
deleted file mode 100644
index 16aca3b..0000000
--- a/0001-Fix-LJ_MAX_JSLOTS-assertion-in-rec_check_slots.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-commit 31afda31814ec02f82ffb0519bee496c87eeaa89
-Merge: 8271c64 1c89933
-Author: Mike Pall <mike>
-Date: Tue May 9 21:01:23 2017 +0200
-
- Merge branch 'master' into v2.1
-
-commit 1c89933f129dde76944336c6bfd05297b8d67730
-Author: Mike Pall <mike>
-Date: Tue May 9 20:59:37 2017 +0200
-
- Fix LJ_MAX_JSLOTS assertion in rec_check_slots().
-
- Thanks to Yichun Zhang.
-
-diff --git a/src/lj_record.c b/src/lj_record.c
-index 9d0469c..c2d0274 100644
---- a/src/lj_record.c
-+++ b/src/lj_record.c
-@@ -87,9 +87,9 @@ static void rec_check_slots(jit_State *J)
- BCReg s, nslots = J->baseslot + J->maxslot;
- int32_t depth = 0;
- cTValue *base = J->L->base - J->baseslot;
-- lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot <
LJ_MAX_JSLOTS);
-+ lua_assert(J->baseslot >= 1+LJ_FR2);
- lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] &
TREF_FRAME));
-- lua_assert(nslots < LJ_MAX_JSLOTS);
-+ lua_assert(nslots <= LJ_MAX_JSLOTS);
- for (s = 0; s < nslots; s++) {
- TRef tr = J->slot[s];
- if (tr) {
diff --git a/0002-Add-missing-LJ_MAX_JSLOTS-check.patch
b/0002-Add-missing-LJ_MAX_JSLOTS-check.patch
deleted file mode 100644
index 70ccfd5..0000000
--- a/0002-Add-missing-LJ_MAX_JSLOTS-check.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-commit 6259c0b909a8c00fabe3c7e6bd81150ee08cbf9f
-Merge: 31afda3 630ff31
-Author: Mike Pall <mike>
-Date: Wed May 17 17:38:53 2017 +0200
-
- Merge branch 'master' into v2.1
-
-commit 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c
-Author: Mike Pall <mike>
-Date: Wed May 17 17:37:35 2017 +0200
-
- Add missing LJ_MAX_JSLOTS check.
-
- Thanks to Yichun Zhang.
-
-From 630ff3196a06353c6a7ccd1e9ac3958f4a8ca13c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 17 May 2017 17:37:35 +0200
-Subject: [PATCH 02/72] Add missing LJ_MAX_JSLOTS check.
-
-Thanks to Yichun Zhang.
----
- src/lj_record.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/lj_record.c b/src/lj_record.c
-index cecacd2..bc4e8a6 100644
---- a/src/lj_record.c
-+++ b/src/lj_record.c
-@@ -633,6 +633,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
- J->framedepth++;
- J->base += func+1+LJ_FR2;
- J->baseslot += func+1+LJ_FR2;
-+ if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
-+ lj_trace_err(J, LJ_TRERR_STACKOV);
- }
-
- /* Record tail call. */
---
-2.20.1
diff --git a/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch
b/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch
deleted file mode 100644
index 9d8300f..0000000
--- a/0003-MIPS-Use-precise-search-for-exit-jump-patching.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-From 7381b620358c2561e8690149f1d25828fdad6675 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 7 Jun 2017 19:16:22 +0200
-Subject: [PATCH 03/72] MIPS: Use precise search for exit jump patching.
-
-Contributed by Djordje Kovacevic and Stefan Pejic.
----
- src/lj_asm_mips.h | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
-index 03270cc..d0a1ca5 100644
---- a/src/lj_asm_mips.h
-+++ b/src/lj_asm_mips.h
-@@ -1933,7 +1933,11 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
- MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
- for (p++; p < pe; p++) {
- if (*p == exitload) { /* Look for load of exit number. */
-- if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */
-+ /* Look for exitstub branch. Yes, this covers all used branch variants. */
-+ if (((p[-1] ^ (px-p)) & 0xffffu) == 0 &&
-+ ((p[-1] & 0xf0000000u) == MIPSI_BEQ ||
-+ (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ ||
-+ (p[-1] & 0xffe00000u) == MIPSI_BC1F)) {
- ptrdiff_t delta = target - p;
- if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */
- patchbranch:
---
-2.20.1
-
diff --git a/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch
b/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch
deleted file mode 100644
index 4da6b4d..0000000
--- a/0004-MIPS-Fix-handling-of-spare-long-range-jump-slots.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From c7c3c4da432ddb543d4b0a9abbb245f11b26afd0 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 7 Jun 2017 19:36:46 +0200
-Subject: [PATCH 04/72] MIPS: Fix handling of spare long-range jump slots.
-
-Contributed by Djordje Kovacevic and Stefan Pejic.
----
- src/lj_asm_mips.h | 9 +++++----
- src/lj_jit.h | 6 ++++++
- src/lj_mcode.c | 6 ------
- 3 files changed, 11 insertions(+), 10 deletions(-)
-
-diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
-index d0a1ca5..7631190 100644
---- a/src/lj_asm_mips.h
-+++ b/src/lj_asm_mips.h
-@@ -65,10 +65,9 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
- static void asm_sparejump_setup(ASMState *as)
- {
- MCode *mxp = as->mcbot;
-- /* Assumes sizeof(MCLink) == 8. */
-- if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) {
-+ if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) {
- lua_assert(MIPSI_NOP == 0);
-- memset(mxp+2, 0, MIPS_SPAREJUMP*8);
-+ memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode));
- mxp += MIPS_SPAREJUMP*2;
- lua_assert(mxp < as->mctop);
- lj_mcode_sync(as->mcbot, mxp);
-@@ -1947,7 +1946,9 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
- if (!cstart) cstart = p-1;
- } else { /* Branch out of range. Use spare jump slot in mcarea. */
- int i;
-- for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) {
-+ for (i = (int)(sizeof(MCLink)/sizeof(MCode));
-+ i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2);
-+ i += 2) {
- if (mcarea[i] == tjump) {
- delta = mcarea+i - p;
- goto patchbranch;
-diff --git a/src/lj_jit.h b/src/lj_jit.h
-index a2e8fd9..3f38d28 100644
---- a/src/lj_jit.h
-+++ b/src/lj_jit.h
-@@ -155,6 +155,12 @@ typedef uint8_t MCode;
- typedef uint32_t MCode;
- #endif
-
-+/* Linked list of MCode areas. */
-+typedef struct MCLink {
-+ MCode *next; /* Next area. */
-+ size_t size; /* Size of current area. */
-+} MCLink;
-+
- /* Stack snapshot header. */
- typedef struct SnapShot {
- uint16_t mapofs; /* Offset into snapshot map. */
-diff --git a/src/lj_mcode.c b/src/lj_mcode.c
-index f0a1f69..5ea89f6 100644
---- a/src/lj_mcode.c
-+++ b/src/lj_mcode.c
-@@ -272,12 +272,6 @@ static void *mcode_alloc(jit_State *J, size_t sz)
-
- /* -- MCode area management ----------------------------------------------- */
-
--/* Linked list of MCode areas. */
--typedef struct MCLink {
-- MCode *next; /* Next area. */
-- size_t size; /* Size of current area. */
--} MCLink;
--
- /* Allocate a new MCode area. */
- static void mcode_allocarea(jit_State *J)
- {
---
-2.20.1
-
diff --git a/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch
b/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch
deleted file mode 100644
index dda4ae2..0000000
--- a/0005-MIPS64-Add-soft-float-support-to-JIT-compiler-backen.patch
+++ /dev/null
@@ -1,982 +0,0 @@
-From a057a07ab702e225e21848d4f918886c5b0ac06b Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 7 Jun 2017 23:56:54 +0200
-Subject: [PATCH 05/72] MIPS64: Add soft-float support to JIT compiler backend.
-
-Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
-Sponsored by Cisco Systems, Inc.
----
- src/lj_arch.h | 4 +-
- src/lj_asm.c | 8 +-
- src/lj_asm_mips.h | 217 +++++++++++++++++++++++++++++++++++++--------
- src/lj_crecord.c | 4 +-
- src/lj_emit_mips.h | 2 +
- src/lj_ffrecord.c | 2 +-
- src/lj_ircall.h | 43 ++++++---
- src/lj_iropt.h | 2 +-
- src/lj_jit.h | 4 +-
- src/lj_obj.h | 3 +
- src/lj_opt_split.c | 2 +-
- src/lj_snap.c | 21 +++--
- src/vm_mips64.dasc | 49 ++++++++++
- 13 files changed, 286 insertions(+), 75 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index c8d7138..b770564 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -337,9 +337,6 @@
- #define LJ_ARCH_BITS 32
- #define LJ_TARGET_MIPS32 1
- #else
--#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU
--#define LJ_ARCH_NOJIT 1 /* NYI */
--#endif
- #define LJ_ARCH_BITS 64
- #define LJ_TARGET_MIPS64 1
- #define LJ_TARGET_GC64 1
-@@ -512,6 +509,7 @@
- #define LJ_ABI_SOFTFP 0
- #endif
- #define LJ_SOFTFP (!LJ_ARCH_HASFPU)
-+#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32)
-
- #if LJ_ARCH_ENDIAN == LUAJIT_BE
- #define LJ_LE 0
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index c2cf5a9..bed2268 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -338,7 +338,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
- ra_modified(as, r);
- ir->r = RID_INIT; /* Do not keep any hint. */
- RA_DBGX((as, "remat $i $r", ir, r));
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- if (ir->o == IR_KNUM) {
- emit_loadk64(as, r, ir);
- } else
-@@ -1305,7 +1305,7 @@ static void asm_call(ASMState *as, IRIns *ir)
- asm_gencall(as, ci, args);
- }
-
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
- {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
-@@ -1652,10 +1652,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
- case IR_MUL: asm_mul(as, ir); break;
- case IR_MOD: asm_mod(as, ir); break;
- case IR_NEG: asm_neg(as, ir); break;
--#if LJ_SOFTFP
-+#if LJ_SOFTFP32
- case IR_DIV: case IR_POW: case IR_ABS:
- case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
-- lua_assert(0); /* Unused for LJ_SOFTFP. */
-+ lua_assert(0); /* Unused for LJ_SOFTFP32. */
- break;
- #else
- case IR_DIV: asm_div(as, ir); break;
-diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
-index 05af3d0..1406a87 100644
---- a/src/lj_asm_mips.h
-+++ b/src/lj_asm_mips.h
-@@ -290,7 +290,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- {
- ra_leftov(as, gpr, ref);
- gpr++;
--#if LJ_64
-+#if LJ_64 && !LJ_SOFTFP
- fpr++;
- #endif
- }
-@@ -301,7 +301,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- emit_spstore(as, ir, r, ofs);
- ofs += irt_isnum(ir->t) ? 8 : 4;
- #else
-- emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR)
&& !irt_is64(ir->t)) ? 4 : 0));
-+ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) &&
!irt_is64(ir->t)) ? 4 : 0));
- ofs += 8;
- #endif
- }
-@@ -312,7 +312,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- #endif
- if (gpr <= REGARG_LASTGPR) {
- gpr++;
--#if LJ_64
-+#if LJ_64 && !LJ_SOFTFP
- fpr++;
- #endif
- } else {
-@@ -461,12 +461,36 @@ static void asm_tobit(ASMState *as, IRIns *ir)
- emit_tg(as, MIPSI_MFC1, dest, tmp);
- emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
- }
-+#elif LJ_64 /* && LJ_SOFTFP */
-+static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
-+{
-+ /* The modified regs must match with the *.dasc implementation. */
-+ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
-+ RID2RSET(RID_R1)|RID2RSET(RID_R12);
-+ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
-+ ra_evictset(as, drop);
-+ /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
-+ ra_destreg(as, ir, RID_RET);
-+ asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO);
-+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0);
-+ if (r == RID_NONE)
-+ ra_leftov(as, REGARG_FIRSTGPR, ir->op1);
-+ else if (r != REGARG_FIRSTGPR)
-+ emit_move(as, REGARG_FIRSTGPR, r);
-+}
-+
-+static void asm_tobit(ASMState *as, IRIns *ir)
-+{
-+ Reg dest = ra_dest(as, ir, RSET_GPR);
-+ emit_dta(as, MIPSI_SLL, dest, dest, 0);
-+ asm_callid(as, ir, IRCALL_lj_vm_tobit);
-+}
- #endif
-
- static void asm_conv(ASMState *as, IRIns *ir)
- {
- IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- int stfp = (st == IRT_NUM || st == IRT_FLOAT);
- #endif
- #if LJ_64
-@@ -477,12 +501,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
- lua_assert(!(irt_isint64(ir->t) ||
- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
- #endif
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP32
- /* FP conversions are handled by SPLIT. */
- lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
- /* Can't check for same types: SPLIT uses CONV
int.int + BXOR for sfp NEG. */
- #else
- lua_assert(irt_type(ir->t) != st);
-+#if !LJ_SOFTFP
- if (irt_isfp(ir->t)) {
- Reg dest = ra_dest(as, ir, RSET_FPR);
- if (stfp) { /* FP to FP conversion. */
-@@ -608,6 +633,42 @@ static void asm_conv(ASMState *as, IRIns *ir)
- }
- }
- } else
-+#else
-+ if (irt_isfp(ir->t)) {
-+#if LJ_64 && LJ_HASFFI
-+ if (stfp) { /* FP to FP conversion. */
-+ asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d :
-+ IRCALL_softfp_d2f);
-+ } else { /* Integer to FP conversion. */
-+ IRCallID cid = ((IRT_IS64 >> st) & 1) ?
-+ (irt_isnum(ir->t) ?
-+ (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) :
-+ (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) :
-+ (irt_isnum(ir->t) ?
-+ (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) :
-+ (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f));
-+ asm_callid(as, ir, cid);
-+ }
-+#else
-+ asm_callid(as, ir, IRCALL_softfp_i2d);
-+#endif
-+ } else if (stfp) { /* FP to integer conversion. */
-+ if (irt_isguard(ir->t)) {
-+ /* Checked conversions are only supported from number to int. */
-+ lua_assert(irt_isint(ir->t) && st == IRT_NUM);
-+ asm_tointg(as, ir, RID_NONE);
-+ } else {
-+ IRCallID cid = irt_is64(ir->t) ?
-+ ((st == IRT_NUM) ?
-+ (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
-+ (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
-+ ((st == IRT_NUM) ?
-+ (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
-+ (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
-+ asm_callid(as, ir, cid);
-+ }
-+ } else
-+#endif
- #endif
- {
- Reg dest = ra_dest(as, ir, RSET_GPR);
-@@ -665,7 +726,7 @@ static void asm_strto(ASMState *as, IRIns *ir)
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
- IRRef args[2];
- int32_t ofs = 0;
--#if LJ_SOFTFP
-+#if LJ_SOFTFP32
- ra_evictset(as, RSET_SCRATCH);
- if (ra_used(ir)) {
- if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
-@@ -806,7 +867,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- MCLabel l_end, l_loop, l_next;
-
- rset_clear(allow, tab);
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP32
- if (!isk) {
- key = ra_alloc1(as, refkey, allow);
- rset_clear(allow, key);
-@@ -826,7 +887,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- }
- }
- #else
-- if (irt_isnum(kt)) {
-+ if (!LJ_SOFTFP && irt_isnum(kt)) {
- key = ra_alloc1(as, refkey, RSET_FPR);
- tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
- } else if (!irt_ispri(kt)) {
-@@ -882,6 +943,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
- emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
- emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
-+ } else if (LJ_SOFTFP && irt_isnum(kt)) {
-+ emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
-+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
- } else if (irt_isaddr(kt)) {
- Reg refk = tmp2;
- if (isk) {
-@@ -960,7 +1024,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
- if (irt_isnum(kt)) {
- emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
-- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0);
-+ emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0);
- emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0);
- #if !LJ_SOFTFP
- emit_tg(as, MIPSI_DMFC1, tmp1, key);
-@@ -1123,7 +1187,7 @@ static MIPSIns asm_fxloadins(IRIns *ir)
- case IRT_U8: return MIPSI_LBU;
- case IRT_I16: return MIPSI_LH;
- case IRT_U16: return MIPSI_LHU;
-- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1;
-+ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1;
- case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
- default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW;
- }
-@@ -1134,7 +1198,7 @@ static MIPSIns asm_fxstoreins(IRIns *ir)
- switch (irt_type(ir->t)) {
- case IRT_I8: case IRT_U8: return MIPSI_SB;
- case IRT_I16: case IRT_U16: return MIPSI_SH;
-- case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1;
-+ case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1;
- case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
- default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW;
- }
-@@ -1199,7 +1263,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
-
- static void asm_ahuvload(ASMState *as, IRIns *ir)
- {
-- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP);
-+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
- Reg dest = RID_NONE, type = RID_TMP, idx;
- RegSet allow = RSET_GPR;
- int32_t ofs = 0;
-@@ -1212,7 +1276,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
- }
- }
- if (ra_used(ir)) {
-- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
-+ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
- irt_isint(ir->t) || irt_isaddr(ir->t));
- dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
- rset_clear(allow, dest);
-@@ -1261,10 +1325,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
- int32_t ofs = 0;
- if (ir->r == RID_SINK)
- return;
-- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-- src = ra_alloc1(as, ir->op2, RSET_FPR);
-+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
-+ src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
- idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
-- emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
-+ emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs);
- } else {
- #if LJ_32
- if (!irt_ispri(ir->t)) {
-@@ -1312,7 +1376,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
- IRType1 t = ir->t;
- #if LJ_32
- int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
-- int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP);
-+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
- if (hiop)
- t.irt = IRT_NUM;
- #else
-@@ -1320,7 +1384,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
- #endif
- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
- lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP32
- lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
- if (hiop && ra_used(ir+1)) {
- type = ra_dest(as, ir+1, allow);
-@@ -1328,29 +1392,44 @@ static void asm_sload(ASMState *as, IRIns *ir)
- }
- #else
- if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) &&
irt_isint(t)) {
-- dest = ra_scratch(as, RSET_FPR);
-+ dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR);
- asm_tointg(as, ir, dest);
- t.irt = IRT_NUM; /* Continue with a regular number type check. */
- } else
- #endif
- if (ra_used(ir)) {
-- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
-+ lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
- irt_isint(ir->t) || irt_isaddr(ir->t));
- dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
- rset_clear(allow, dest);
- base = ra_alloc1(as, REF_BASE, allow);
- rset_clear(allow, base);
-- if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
-+ if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) {
- if (irt_isint(t)) {
-- Reg tmp = ra_scratch(as, RSET_FPR);
-+ Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
-+#if LJ_SOFTFP
-+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
-+ ra_destreg(as, ir, RID_RET);
-+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0);
-+ if (tmp != REGARG_FIRSTGPR)
-+ emit_move(as, REGARG_FIRSTGPR, tmp);
-+#else
- emit_tg(as, MIPSI_MFC1, dest, tmp);
- emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
-+#endif
- dest = tmp;
- t.irt = IRT_NUM; /* Check for original type. */
- } else {
- Reg tmp = ra_scratch(as, RSET_GPR);
-+#if LJ_SOFTFP
-+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
-+ ra_destreg(as, ir, RID_RET);
-+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0);
-+ emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0);
-+#else
- emit_fg(as, MIPSI_CVT_D_W, dest, dest);
- emit_tg(as, MIPSI_MTC1, tmp, dest);
-+#endif
- dest = tmp;
- t.irt = IRT_INT; /* Check for original type. */
- }
-@@ -1399,7 +1478,7 @@ dotypecheck:
- if (irt_isnum(t)) {
- asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
- emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
-- if (ra_hasreg(dest))
-+ if (!LJ_SOFTFP && ra_hasreg(dest))
- emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
- } else {
- asm_guard(as, MIPSI_BNE, RID_TMP,
-@@ -1409,7 +1488,7 @@ dotypecheck:
- }
- emit_tsi(as, MIPSI_LD, type, base, ofs);
- } else if (ra_hasreg(dest)) {
-- if (irt_isnum(t))
-+ if (!LJ_SOFTFP && irt_isnum(t))
- emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
- else
- emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base,
-@@ -1548,26 +1627,40 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
- Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
- emit_fg(as, mi, dest, left);
- }
-+#endif
-
-+#if !LJ_SOFTFP32
- static void asm_fpmath(ASMState *as, IRIns *ir)
- {
- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
- return;
-+#if !LJ_SOFTFP
- if (ir->op2 <= IRFPM_TRUNC)
- asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
- else if (ir->op2 == IRFPM_SQRT)
- asm_fpunary(as, ir, MIPSI_SQRT_D);
- else
-+#endif
- asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
- }
- #endif
-
-+#if !LJ_SOFTFP
-+#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D)
-+#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D)
-+#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D)
-+#elif LJ_64 /* && LJ_SOFTFP */
-+#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add)
-+#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub)
-+#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul)
-+#endif
-+
- static void asm_add(ASMState *as, IRIns *ir)
- {
- IRType1 t = ir->t;
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- if (irt_isnum(t)) {
-- asm_fparith(as, ir, MIPSI_ADD_D);
-+ asm_fpadd(as, ir);
- } else
- #endif
- {
-@@ -1589,9 +1682,9 @@ static void asm_add(ASMState *as, IRIns *ir)
-
- static void asm_sub(ASMState *as, IRIns *ir)
- {
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- if (irt_isnum(ir->t)) {
-- asm_fparith(as, ir, MIPSI_SUB_D);
-+ asm_fpsub(as, ir);
- } else
- #endif
- {
-@@ -1605,9 +1698,9 @@ static void asm_sub(ASMState *as, IRIns *ir)
-
- static void asm_mul(ASMState *as, IRIns *ir)
- {
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- if (irt_isnum(ir->t)) {
-- asm_fparith(as, ir, MIPSI_MUL_D);
-+ asm_fpmul(as, ir);
- } else
- #endif
- {
-@@ -1634,7 +1727,7 @@ static void asm_mod(ASMState *as, IRIns *ir)
- asm_callid(as, ir, IRCALL_lj_vm_modi);
- }
-
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- static void asm_pow(ASMState *as, IRIns *ir)
- {
- #if LJ_64 && LJ_HASFFI
-@@ -1654,7 +1747,11 @@ static void asm_div(ASMState *as, IRIns *ir)
- IRCALL_lj_carith_divu64);
- else
- #endif
-+#if !LJ_SOFTFP
- asm_fparith(as, ir, MIPSI_DIV_D);
-+#else
-+ asm_callid(as, ir, IRCALL_softfp_div);
-+#endif
- }
- #endif
-
-@@ -1664,6 +1761,13 @@ static void asm_neg(ASMState *as, IRIns *ir)
- if (irt_isnum(ir->t)) {
- asm_fpunary(as, ir, MIPSI_NEG_D);
- } else
-+#elif LJ_64 /* && LJ_SOFTFP */
-+ if (irt_isnum(ir->t)) {
-+ Reg dest = ra_dest(as, ir, RSET_GPR);
-+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
-+ emit_dst(as, MIPSI_XOR, dest, left,
-+ ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest)));
-+ } else
- #endif
- {
- Reg dest = ra_dest(as, ir, RSET_GPR);
-@@ -1673,7 +1777,17 @@ static void asm_neg(ASMState *as, IRIns *ir)
- }
- }
-
-+#if !LJ_SOFTFP
- #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
-+#elif LJ_64 /* && LJ_SOFTFP */
-+static void asm_abs(ASMState *as, IRIns *ir)
-+{
-+ Reg dest = ra_dest(as, ir, RSET_GPR);
-+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
-+ emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0);
-+}
-+#endif
-+
- #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
- #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
-
-@@ -1918,15 +2032,21 @@ static void asm_bror(ASMState *as, IRIns *ir)
- }
- }
-
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP
- static void asm_sfpmin_max(ASMState *as, IRIns *ir)
- {
- CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin :
IRCALL_lj_vm_sfmax];
-+#if LJ_64
-+ IRRef args[2];
-+ args[0] = ir->op1;
-+ args[1] = ir->op2;
-+#else
- IRRef args[4];
- args[0^LJ_BE] = ir->op1;
- args[1^LJ_BE] = (ir+1)->op1;
- args[2^LJ_BE] = ir->op2;
- args[3^LJ_BE] = (ir+1)->op2;
-+#endif
- asm_setupresult(as, ir, &ci);
- emit_call(as, (void *)ci.func, 0);
- ci.func = NULL;
-@@ -1936,7 +2056,10 @@ static void asm_sfpmin_max(ASMState *as, IRIns *ir)
-
- static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
- {
-- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
-+#if LJ_SOFTFP
-+ asm_sfpmin_max(as, ir);
-+#else
- Reg dest = ra_dest(as, ir, RSET_FPR);
- Reg right, left = ra_alloc2(as, ir, RSET_FPR);
- right = (left >> 8); left &= 255;
-@@ -1947,6 +2070,7 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
- if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
- }
- emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
-+#endif
- } else {
- Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg right, left = ra_alloc2(as, ir, RSET_GPR);
-@@ -1967,18 +2091,24 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
-
- /* -- Comparisons --------------------------------------------------------- */
-
--#if LJ_32 && LJ_SOFTFP
-+#if LJ_SOFTFP
- /* SFP comparisons. */
- static void asm_sfpcomp(ASMState *as, IRIns *ir)
- {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
- RegSet drop = RSET_SCRATCH;
- Reg r;
-+#if LJ_64
-+ IRRef args[2];
-+ args[0] = ir->op1;
-+ args[1] = ir->op2;
-+#else
- IRRef args[4];
- args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1;
- args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2;
-+#endif
-
-- for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
-+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) {
- if (!rset_test(as->freeset, r) &&
- regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
- rset_clear(drop, r);
-@@ -2032,11 +2162,15 @@ static void asm_comp(ASMState *as, IRIns *ir)
- {
- /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
- IROp op = ir->o;
-- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
-+#if LJ_SOFTFP
-+ asm_sfpcomp(as, ir);
-+#else
- Reg right, left = ra_alloc2(as, ir, RSET_FPR);
- right = (left >> 8); left &= 255;
- asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
- emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
-+#endif
- } else {
- Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
- if (op == IR_ABC) op = IR_UGT;
-@@ -2068,9 +2202,13 @@ static void asm_equal(ASMState *as, IRIns *ir)
- Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ?
- RSET_FPR : RSET_GPR);
- right = (left >> 8); left &= 255;
-- if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
-+#if LJ_SOFTFP
-+ asm_sfpcomp(as, ir);
-+#else
- asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
- emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
-+#endif
- } else {
- asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
- }
-@@ -2263,7 +2401,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
- if ((sn & SNAP_NORESTORE))
- continue;
- if (irt_isnum(ir->t)) {
--#if LJ_SOFTFP
-+#if LJ_SOFTFP32
- Reg tmp;
- RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
- lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
-@@ -2272,6 +2410,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
- if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
- tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
- emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
-+#elif LJ_SOFTFP /* && LJ_64 */
-+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
-+ emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs);
- #else
- Reg src = ra_alloc1(as, ref, RSET_FPR);
- emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
-diff --git a/src/lj_crecord.c b/src/lj_crecord.c
-index e32ae23..fd59e28 100644
---- a/src/lj_crecord.c
-+++ b/src/lj_crecord.c
-@@ -212,7 +212,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp,
- ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0);
- ml[i].trofs = trofs;
- i++;
-- rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1;
-+ rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1;
- if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */
- rwin = 0;
- for ( ; j < i; j++) {
-@@ -1130,7 +1130,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
- else
- tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT);
- }
-- } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4)
{
-+ } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size >
4) {
- lj_needsplit(J);
- }
- #if LJ_TARGET_X86
-diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
-index 8a9ee24..bb6593a 100644
---- a/src/lj_emit_mips.h
-+++ b/src/lj_emit_mips.h
-@@ -12,6 +12,8 @@ static intptr_t get_k64val(IRIns *ir)
- return (intptr_t)ir_kgc(ir);
- } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
- return (intptr_t)ir_kptr(ir);
-+ } else if (LJ_SOFTFP && ir->o == IR_KNUM) {
-+ return (intptr_t)ir_knum(ir)->u64;
- } else {
- lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
- return ir->i; /* Sign-extended. */
-diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
-index dfdee2d..849d7a2 100644
---- a/src/lj_ffrecord.c
-+++ b/src/lj_ffrecord.c
-@@ -1012,7 +1012,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J,
RecordFFData *rd)
- handle_num:
- tra = lj_ir_tonum(J, tra);
- tr = lj_ir_call(J, id, tr, trsf, tra);
-- if (LJ_SOFTFP) lj_needsplit(J);
-+ if (LJ_SOFTFP32) lj_needsplit(J);
- break;
- case STRFMT_STR:
- if (!tref_isstr(tra)) {
-diff --git a/src/lj_ircall.h b/src/lj_ircall.h
-index 973c36e..7312006 100644
---- a/src/lj_ircall.h
-+++ b/src/lj_ircall.h
-@@ -51,7 +51,7 @@ typedef struct CCallInfo {
- #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
- #define CCI_XA (1u << CCI_XARGS_SHIFT)
-
--#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
-+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
- #define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
- #else
- #define CCI_XNARGS(ci) CCI_NARGS((ci))
-@@ -78,13 +78,19 @@ typedef struct CCallInfo {
- #define IRCALLCOND_SOFTFP_FFI(x) NULL
- #endif
-
--#if LJ_SOFTFP && LJ_TARGET_MIPS32
-+#if LJ_SOFTFP && LJ_TARGET_MIPS
- #define IRCALLCOND_SOFTFP_MIPS(x) x
- #else
- #define IRCALLCOND_SOFTFP_MIPS(x) NULL
- #endif
-
--#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32)
-+#if LJ_SOFTFP && LJ_TARGET_MIPS64
-+#define IRCALLCOND_SOFTFP_MIPS64(x) x
-+#else
-+#define IRCALLCOND_SOFTFP_MIPS64(x) NULL
-+#endif
-+
-+#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
-
- #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
- #define IRCALLCOND_FP64_FFI(x) x
-@@ -112,6 +118,14 @@ typedef struct CCallInfo {
- #define XA2_FP 0
- #endif
-
-+#if LJ_SOFTFP32
-+#define XA_FP32 CCI_XA
-+#define XA2_FP32 (CCI_XA+CCI_XA)
-+#else
-+#define XA_FP32 0
-+#define XA2_FP32 0
-+#endif
-+
- #if LJ_32
- #define XA_64 CCI_XA
- #define XA2_64 (CCI_XA+CCI_XA)
-@@ -181,20 +195,21 @@ typedef struct CCallInfo {
- _(ANY, pow, 2, N, NUM, XA2_FP) \
- _(ANY, atan2, 2, N, NUM, XA2_FP) \
- _(ANY, ldexp, 2, N, NUM, XA_FP) \
-- _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
-- _(SOFTFP, softfp_add, 4, N, NUM, 0) \
-- _(SOFTFP, softfp_sub, 4, N, NUM, 0) \
-- _(SOFTFP, softfp_mul, 4, N, NUM, 0) \
-- _(SOFTFP, softfp_div, 4, N, NUM, 0) \
-- _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \
-+ _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
-+ _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \
- _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \
-- _(SOFTFP, softfp_d2i, 2, N, INT, 0) \
-- _(SOFTFP_MIPS, lj_vm_sfmin, 4, N, NUM, 0) \
-- _(SOFTFP_MIPS, lj_vm_sfmax, 4, N, NUM, 0) \
-+ _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \
-+ _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \
-+ _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
- _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
- _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
-- _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \
-- _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \
-+ _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
-+ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
- _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
- _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
- _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
-diff --git a/src/lj_iropt.h b/src/lj_iropt.h
-index 73aef0e..a59ba3f 100644
---- a/src/lj_iropt.h
-+++ b/src/lj_iropt.h
-@@ -150,7 +150,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
- /* Optimization passes. */
- LJ_FUNC void lj_opt_dce(jit_State *J);
- LJ_FUNC int lj_opt_loop(jit_State *J);
--#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
-+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
- LJ_FUNC void lj_opt_split(jit_State *J);
- #else
- #define lj_opt_split(J) UNUSED(J)
-diff --git a/src/lj_jit.h b/src/lj_jit.h
-index 2fa8efc..f37e792 100644
---- a/src/lj_jit.h
-+++ b/src/lj_jit.h
-@@ -374,7 +374,7 @@ enum {
- ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
-
- /* Set/reset flag to activate the SPLIT pass for the current trace. */
--#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
-+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
- #define lj_needsplit(J) (J->needsplit = 1)
- #define lj_resetsplit(J) (J->needsplit = 0)
- #else
-@@ -437,7 +437,7 @@ typedef struct jit_State {
- MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
-
- PostProc postproc; /* Required post-processing after execution. */
--#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
-+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
- uint8_t needsplit; /* Need SPLIT pass. */
- #endif
- uint8_t retryrec; /* Retry recording. */
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index 52372c3..c7e4742 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -924,6 +924,9 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue
*o2)
-
- #if LJ_SOFTFP
- LJ_ASMF int32_t lj_vm_tobit(double x);
-+#if LJ_TARGET_MIPS64
-+LJ_ASMF int32_t lj_vm_tointg(double x);
-+#endif
- #endif
-
- static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
-diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
-index fc93520..79ac3cc 100644
---- a/src/lj_opt_split.c
-+++ b/src/lj_opt_split.c
-@@ -8,7 +8,7 @@
-
- #include "lj_obj.h"
-
--#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
-+#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
-
- #include "lj_err.h"
- #include "lj_buf.h"
-diff --git a/src/lj_snap.c b/src/lj_snap.c
-index bb063c2..44fa379 100644
---- a/src/lj_snap.c
-+++ b/src/lj_snap.c
-@@ -93,7 +93,7 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg
nslots)
- (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
- sn |= SNAP_NORESTORE;
- }
-- if (LJ_SOFTFP && irt_isnum(ir->t))
-+ if (LJ_SOFTFP32 && irt_isnum(ir->t))
- sn |= SNAP_SOFTFPNUM;
- map[n++] = sn;
- }
-@@ -374,7 +374,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
- break;
- }
- }
-- } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
-+ } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
- ref++;
- } else if (ir->o == IR_PVAL) {
- ref = ir->op1 + REF_BIAS;
-@@ -486,7 +486,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
- } else {
- IRType t = irt_type(ir->t);
- uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
-- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
-+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
- if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
- tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
- }
-@@ -520,7 +520,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
- if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
- if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
- snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
-- else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
-+ else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
- irs+1 < irlast && (irs+1)->o == IR_HIOP)
- snap_pref(J, T, map, nent, seen, (irs+1)->op2);
- }
-@@ -579,10 +579,10 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
- lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
- val = snap_pref(J, T, map, nent, seen, irc->op1);
- val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
-- } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
-+ } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
- irs+1 < irlast && (irs+1)->o == IR_HIOP) {
- IRType t = IRT_I64;
-- if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
-+ if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
- t = IRT_NUM;
- lj_needsplit(J);
- if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
-@@ -635,7 +635,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
- int32_t *sps = &ex->spill[regsp_spill(rs)];
- if (irt_isinteger(t)) {
- setintV(o, *sps);
--#if !LJ_SOFTFP
-+#if !LJ_SOFTFP32
- } else if (irt_isnum(t)) {
- o->u64 = *(uint64_t *)sps;
- #endif
-@@ -660,6 +660,9 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
- #if !LJ_SOFTFP
- } else if (irt_isnum(t)) {
- setnumV(o, ex->fpr[r-RID_MIN_FPR]);
-+#elif LJ_64 /* && LJ_SOFTFP */
-+ } else if (irt_isnum(t)) {
-+ o->u64 = ex->gpr[r-RID_MIN_GPR];
- #endif
- #if LJ_64 && !LJ_GC64
- } else if (irt_is64(t)) {
-@@ -813,7 +816,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
- val = lj_tab_set(J->L, t, &tmp);
- /* NOBARRIER: The table is new (marked white). */
- snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
-- if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o ==
IR_HIOP) {
-+ if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o
== IR_HIOP) {
- snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
- val->u32.hi = tmp.u32.lo;
- }
-@@ -874,7 +877,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
- continue;
- }
- snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
-- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
-+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
- TValue tmp;
- snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
- o->u32.hi = tmp.u32.lo;
-diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
-index c06270a..75b38de 100644
---- a/src/vm_mips64.dasc
-+++ b/src/vm_mips64.dasc
-@@ -1980,6 +1980,38 @@ static void build_subroutines(BuildCtx *ctx)
- |1:
- | jr ra
- |. move CRET1, r0
-+ |
-+ |// FP number to int conversion with a check for soft-float.
-+ |// Modifies CARG1, CRET1, CRET2, TMP0, AT.
-+ |->vm_tointg:
-+ |.if JIT
-+ | dsll CRET2, CARG1, 1
-+ | beqz CRET2, >2
-+ |. li TMP0, 1076
-+ | dsrl AT, CRET2, 53
-+ | dsubu TMP0, TMP0, AT
-+ | sltiu AT, TMP0, 54
-+ | beqz AT, >1
-+ |. dextm CRET2, CRET2, 0, 20
-+ | dinsu CRET2, AT, 21, 21
-+ | slt AT, CARG1, r0
-+ | dsrlv CRET1, CRET2, TMP0
-+ | dsubu CARG1, r0, CRET1
-+ | movn CRET1, CARG1, AT
-+ | li CARG1, 64
-+ | subu TMP0, CARG1, TMP0
-+ | dsllv CRET2, CRET2, TMP0 // Integer check.
-+ | sextw AT, CRET1
-+ | xor AT, CRET1, AT // Range check.
-+ | jr ra
-+ |. movz CRET2, AT, CRET2
-+ |1:
-+ | jr ra
-+ |. li CRET2, 1
-+ |2:
-+ | jr ra
-+ |. move CRET1, r0
-+ |.endif
- |.endif
- |
- |.macro .ffunc_bit, name
-@@ -2665,6 +2697,23 @@ static void build_subroutines(BuildCtx *ctx)
- |. li CRET1, 0
- |.endif
- |
-+ |.macro sfmin_max, name, intins
-+ |->vm_sf .. name:
-+ |.if JIT and not FPU
-+ | move TMP2, ra
-+ | bal ->vm_sfcmpolt
-+ |. nop
-+ | move ra, TMP2
-+ | move TMP0, CRET1
-+ | move CRET1, CARG1
-+ | jr ra
-+ |. intins CRET1, CARG2, TMP0
-+ |.endif
-+ |.endmacro
-+ |
-+ | sfmin_max min, movz
-+ | sfmin_max max, movn
-+ |
- |//-----------------------------------------------------------------------
- |//-- Miscellaneous functions --------------------------------------------
- |//-----------------------------------------------------------------------
---
-2.20.1
-
diff --git a/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch
b/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch
deleted file mode 100644
index 133018d..0000000
--- a/0006-FreeBSD-x64-Avoid-changing-resource-limits-if-not-ne.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From b0ecc6dd65a0b40e1868f20719c4f7c4880dc32d Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 8 Jun 2017 00:15:15 +0200
-Subject: [PATCH 06/72] FreeBSD/x64: Avoid changing resource limits, if not
- needed.
-
----
- src/lj_alloc.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_alloc.c b/src/lj_alloc.c
-index 95d15d0..9fc761c 100644
---- a/src/lj_alloc.c
-+++ b/src/lj_alloc.c
-@@ -343,7 +343,7 @@ static void *CALL_MMAP(size_t size)
- }
- #endif
-
--#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
-+#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__
< 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
-
- #include <sys/resource.h>
-
---
-2.20.1
-
diff --git a/0007-Remove-unused-define.patch b/0007-Remove-unused-define.patch
deleted file mode 100644
index c4729e1..0000000
--- a/0007-Remove-unused-define.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 6a71e71c1430e5a8f794a52cb2da66e2693db796 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 11 Jun 2017 10:02:08 +0200
-Subject: [PATCH 07/72] Remove unused define.
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Suggested by 罗泽轩.
----
- src/lj_def.h | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/lj_def.h b/src/lj_def.h
-index 2d8fff6..e67bb24 100644
---- a/src/lj_def.h
-+++ b/src/lj_def.h
-@@ -80,7 +80,6 @@ typedef unsigned int uintptr_t;
- #define LJ_MIN_SBUF 32 /* Min. string buffer length. */
- #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
- #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
--#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */
-
- /* JIT compiler limits. */
- #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
---
-2.20.1
-
diff --git a/0008-Modify-fix-for-warning-from-ar.patch
b/0008-Modify-fix-for-warning-from-ar.patch
deleted file mode 100644
index 4d9b0e4..0000000
--- a/0008-Modify-fix-for-warning-from-ar.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From 82151a4514e6538086f3f5e01cb8d4b22287b14f Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 12 Jun 2017 09:24:00 +0200
-Subject: [PATCH 08/72] Modify fix for warning from 'ar'.
-
----
- src/Makefile | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index f7f81a4..24e8c0e 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -208,7 +208,7 @@ TARGET_CC= $(STATIC_CC)
- TARGET_STCC= $(STATIC_CC)
- TARGET_DYNCC= $(DYNAMIC_CC)
- TARGET_LD= $(CROSS)$(CC)
--TARGET_AR= $(CROSS)ar rcus 2>/dev/null
-+TARGET_AR= $(CROSS)ar rcus
- TARGET_STRIP= $(CROSS)strip
-
- TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
-@@ -293,6 +293,7 @@ ifeq (Windows,$(TARGET_SYS))
- TARGET_XSHLDFLAGS= -shared
- TARGET_DYNXLDOPTS=
- else
-+ TARGET_AR+= 2>/dev/null
- ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector
2>/dev/null || echo 1))
- TARGET_XCFLAGS+= -fno-stack-protector
- endif
---
-2.20.1
-
diff --git a/0009-x64-LJ_GC64-Fix-emit_rma.patch b/0009-x64-LJ_GC64-Fix-emit_rma.patch
deleted file mode 100644
index ff59f09..0000000
--- a/0009-x64-LJ_GC64-Fix-emit_rma.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From 7e662e4f87134f1e84f7bea80933e033c5bf53a3 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 26 Jul 2017 09:52:53 +0200
-Subject: [PATCH 09/72] x64/LJ_GC64: Fix emit_rma().
-
----
- src/lj_emit_x86.h | 24 +++++++++++++++++++++---
- 1 file changed, 21 insertions(+), 3 deletions(-)
-
-diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
-index 5207f9d..5b139bd 100644
---- a/src/lj_emit_x86.h
-+++ b/src/lj_emit_x86.h
-@@ -343,9 +343,27 @@ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void
*addr)
- emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
- } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
- emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
-- } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) {
-- emit_rmro(as, xo, rr, rr, 0);
-- emit_loadu64(as, rr, (uintptr_t)addr);
-+ } else if (!checki32((intptr_t)addr)) {
-+ Reg ra = (rr & 15);
-+ if (xo != XO_MOV) {
-+ /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
-+ uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
-+ uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
-+ ra = RID_DISPATCH;
-+ if (checku32(dispaddr)) {
-+ emit_loadi(as, ra, (int32_t)dispaddr);
-+ } else { /* Full-size 64 bit load. */
-+ MCode *p = as->mcp;
-+ *(uint64_t *)(p-8) = dispaddr;
-+ p[-9] = (MCode)(XI_MOVri+(ra&7));
-+ p[-10] = 0x48 + ((ra>>3)&1);
-+ p -= 10;
-+ as->mcp = p;
-+ }
-+ if (xo == XO_GROUP3b) emit_i8(as, i8);
-+ }
-+ emit_rmro(as, xo, rr, ra, 0);
-+ emit_loadu64(as, ra, (uintptr_t)addr);
- } else
- #endif
- {
---
-2.20.1
-
diff --git a/0010-PPC-Add-soft-float-support-to-interpreter.patch
b/0010-PPC-Add-soft-float-support-to-interpreter.patch
deleted file mode 100644
index 52d3638..0000000
--- a/0010-PPC-Add-soft-float-support-to-interpreter.patch
+++ /dev/null
@@ -1,2761 +0,0 @@
-From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 26 Jul 2017 09:52:19 +0200
-Subject: [PATCH 10/72] PPC: Add soft-float support to interpreter.
-
-Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
-Sponsored by Cisco Systems, Inc.
----
- src/host/buildvm_asm.c | 2 +-
- src/lj_arch.h | 29 +-
- src/lj_ccall.c | 38 +-
- src/lj_ccall.h | 4 +-
- src/lj_ccallback.c | 30 +-
- src/lj_frame.h | 2 +-
- src/lj_ircall.h | 2 +-
- src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++-------
- 8 files changed, 1101 insertions(+), 255 deletions(-)
-
-diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
-index ffd1490..43595b3 100644
---- a/src/host/buildvm_asm.c
-+++ b/src/host/buildvm_asm.c
-@@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx)
- #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
- fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX
"progbits\n");
- #endif
--#if LJ_TARGET_PPC && !LJ_TARGET_PS3
-+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
- /* Hard-float ABI. */
- fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
- #endif
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index b770564..0145a7c 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -254,6 +254,29 @@
- #else
- #define LJ_ARCH_BITS 32
- #define LJ_ARCH_NAME "ppc"
-+
-+#if !defined(LJ_ARCH_HASFPU)
-+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
-+#define LJ_ARCH_HASFPU 0
-+#else
-+#define LJ_ARCH_HASFPU 1
-+#endif
-+#endif
-+
-+#if !defined(LJ_ABI_SOFTFP)
-+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
-+#define LJ_ABI_SOFTFP 1
-+#else
-+#define LJ_ABI_SOFTFP 0
-+#endif
-+#endif
-+#endif
-+
-+#if LJ_ABI_SOFTFP
-+#define LJ_ARCH_NOJIT 1 /* NYI */
-+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-+#else
-+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
- #endif
-
- #define LJ_TARGET_PPC 1
-@@ -262,7 +285,6 @@
- #define LJ_TARGET_MASKSHIFT 0
- #define LJ_TARGET_MASKROT 1
- #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
--#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
-
- #if LJ_TARGET_CONSOLE
- #define LJ_ARCH_PPC32ON64 1
-@@ -415,16 +437,13 @@
- #error "No support for ILP32 model on ARM64"
- #endif
- #elif LJ_TARGET_PPC
--#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
--#error "No support for PowerPC CPUs without double-precision FPU"
--#endif
- #if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
- #error "No support for little-endian PPC32"
- #endif
- #if LJ_ARCH_PPC64
- #error "No support for PowerPC 64 bit mode (yet)"
- #endif
--#ifdef __NO_FPRS__
-+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
- #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
- #endif
- #elif LJ_TARGET_MIPS32
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index 5c252e5..799be48 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -387,6 +387,24 @@
- #define CCALL_HANDLE_COMPLEXARG \
- /* Pass complex by value in 2 or 4 GPRs. */
-
-+#define CCALL_HANDLE_GPR \
-+ /* Try to pass argument in GPRs. */ \
-+ if (n > 1) { \
-+ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
-+ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
-+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
-+ else if (ngpr + n > maxgpr) \
-+ ngpr = maxgpr; /* Prevent reordering. */ \
-+ } \
-+ if (ngpr + n <= maxgpr) { \
-+ dp = &cc->gpr[ngpr]; \
-+ ngpr += n; \
-+ goto done; \
-+ } \
-+
-+#if LJ_ABI_SOFTFP
-+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
-+#else
- #define CCALL_HANDLE_REGARG \
- if (isfp) { /* Try to pass argument in FPRs. */ \
- if (nfpr + 1 <= CCALL_NARG_FPR) { \
-@@ -395,24 +413,16 @@
- d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
- goto done; \
- } \
-- } else { /* Try to pass argument in GPRs. */ \
-- if (n > 1) { \
-- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
-- if (ctype_isinteger(d->info)) \
-- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
-- else if (ngpr + n > maxgpr) \
-- ngpr = maxgpr; /* Prevent reordering. */ \
-- } \
-- if (ngpr + n <= maxgpr) { \
-- dp = &cc->gpr[ngpr]; \
-- ngpr += n; \
-- goto done; \
-- } \
-+ } else { \
-+ CCALL_HANDLE_GPR \
- }
-+#endif
-
-+#if !LJ_ABI_SOFTFP
- #define CCALL_HANDLE_RET \
- if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
- ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
-+#endif
-
- #elif LJ_TARGET_MIPS32
- /* -- MIPS o32 calling conventions ---------------------------------------- */
-@@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- }
- if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
-
--#if LJ_TARGET_X64 || LJ_TARGET_PPC
-+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
- cc->nfpr = nfpr; /* Required for vararg functions. */
- #endif
- cc->nsp = nsp;
-diff --git a/src/lj_ccall.h b/src/lj_ccall.h
-index 59f6648..6efa48c 100644
---- a/src/lj_ccall.h
-+++ b/src/lj_ccall.h
-@@ -86,9 +86,9 @@ typedef union FPRArg {
- #elif LJ_TARGET_PPC
-
- #define CCALL_NARG_GPR 8
--#define CCALL_NARG_FPR 8
-+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
- #define CCALL_NRET_GPR 4 /* For complex double. */
--#define CCALL_NRET_FPR 1
-+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
- #define CCALL_SPS_EXTRA 4
- #define CCALL_SPS_FREE 0
-
-diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
-index 846827b..03494a7 100644
---- a/src/lj_ccallback.c
-+++ b/src/lj_ccallback.c
-@@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *cts)
-
- #elif LJ_TARGET_PPC
-
-+#define CALLBACK_HANDLE_GPR \
-+ if (n > 1) { \
-+ lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */
\
-+ ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \
-+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
-+ } \
-+ if (ngpr + n <= maxgpr) { \
-+ sp = &cts->cb.gpr[ngpr]; \
-+ ngpr += n; \
-+ goto done; \
-+ }
-+
-+#if LJ_ABI_SOFTFP
-+#define CALLBACK_HANDLE_REGARG \
-+ CALLBACK_HANDLE_GPR \
-+ UNUSED(isfp);
-+#else
- #define CALLBACK_HANDLE_REGARG \
- if (isfp) { \
- if (nfpr + 1 <= CCALL_NARG_FPR) { \
-@@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *cts)
- goto done; \
- } \
- } else { /* Try to pass argument in GPRs. */ \
-- if (n > 1) { \
-- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
-- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
-- } \
-- if (ngpr + n <= maxgpr) { \
-- sp = &cts->cb.gpr[ngpr]; \
-- ngpr += n; \
-- goto done; \
-- } \
-+ CALLBACK_HANDLE_GPR \
- }
-+#endif
-
-+#if !LJ_ABI_SOFTFP
- #define CALLBACK_HANDLE_RET \
- if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
- *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
-+#endif
-
- #elif LJ_TARGET_MIPS32
-
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index 19c49a4..04cb5a3 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special
continuations. */
- #define CFRAME_OFS_L 36
- #define CFRAME_OFS_PC 32
- #define CFRAME_OFS_MULTRES 28
--#define CFRAME_SIZE 272
-+#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
- #define CFRAME_SHIFT_MULTRES 3
- #endif
- #elif LJ_TARGET_MIPS32
-diff --git a/src/lj_ircall.h b/src/lj_ircall.h
-index 7312006..9b3883b 100644
---- a/src/lj_ircall.h
-+++ b/src/lj_ircall.h
-@@ -287,7 +287,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
- #define fp64_f2l __aeabi_f2lz
- #define fp64_f2ul __aeabi_f2ulz
- #endif
--#elif LJ_TARGET_MIPS
-+#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
- #define softfp_add __adddf3
- #define softfp_sub __subdf3
- #define softfp_mul __muldf3
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index b4260eb..0839668 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -103,6 +103,18 @@
- |// Fixed register assignments for the interpreter.
- |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
- |
-+|.macro .FPU, a, b
-+|.if FPU
-+| a, b
-+|.endif
-+|.endmacro
-+|
-+|.macro .FPU, a, b, c
-+|.if FPU
-+| a, b, c
-+|.endif
-+|.endmacro
-+|
- |// The following must be C callee-save (but BASE is often refetched).
- |.define BASE, r14 // Base of current Lua stack frame.
- |.define KBASE, r15 // Constants of current Lua function.
-@@ -116,8 +128,10 @@
- |.define TISNUM, r22
- |.define TISNIL, r23
- |.define ZERO, r24
-+|.if FPU
- |.define TOBIT, f30 // 2^52 + 2^51.
- |.define TONUM, f31 // 2^52 + 2^51 + 2^31.
-+|.endif
- |
- |// The following temporaries are not saved across C calls, except for RA.
- |.define RA, r20 // Callee-save.
-@@ -133,6 +147,7 @@
- |
- |// Saved temporaries.
- |.define SAVE0, r21
-+|.define SAVE1, r25
- |
- |// Calling conventions.
- |.define CARG1, r3
-@@ -141,8 +156,10 @@
- |.define CARG4, r6 // Overlaps TMP3.
- |.define CARG5, r7 // Overlaps INS.
- |
-+|.if FPU
- |.define FARG1, f1
- |.define FARG2, f2
-+|.endif
- |
- |.define CRET1, r3
- |.define CRET2, r4
-@@ -213,10 +230,16 @@
- |.endif
- |.else
- |
-+|.if FPU
- |.define SAVE_LR, 276(sp)
- |.define CFRAME_SPACE, 272 // Delta for sp.
- |// Back chain for sp: 272(sp) <-- sp entering interpreter
- |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
-+|.else
-+|.define SAVE_LR, 132(sp)
-+|.define CFRAME_SPACE, 128 // Delta for sp.
-+|// Back chain for sp: 128(sp) <-- sp entering interpreter
-+|.endif
- |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
- |.define SAVE_CR, 52(sp) // 32 bit CR save.
- |.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
-@@ -226,16 +249,25 @@
- |.define SAVE_PC, 32(sp)
- |.define SAVE_MULTRES, 28(sp)
- |.define UNUSED1, 24(sp)
-+|.if FPU
- |.define TMPD_LO, 20(sp)
- |.define TMPD_HI, 16(sp)
- |.define TONUM_LO, 12(sp)
- |.define TONUM_HI, 8(sp)
-+|.else
-+|.define SFSAVE_4, 20(sp)
-+|.define SFSAVE_3, 16(sp)
-+|.define SFSAVE_2, 12(sp)
-+|.define SFSAVE_1, 8(sp)
-+|.endif
- |// Next frame lr: 4(sp)
- |// Back chain for sp: 0(sp) <-- sp while in interpreter
- |
-+|.if FPU
- |.define TMPD_BLO, 23(sp)
- |.define TMPD, TMPD_HI
- |.define TONUM_D, TONUM_HI
-+|.endif
- |
- |.endif
- |
-@@ -245,7 +277,7 @@
- |.else
- | stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
- |.endif
--| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
-+| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
- |.endmacro
- |.macro rest_, reg
- |.if GPR64
-@@ -253,7 +285,7 @@
- |.else
- | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
- |.endif
--| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
-+| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
- |.endmacro
- |
- |.macro saveregs
-@@ -323,6 +355,7 @@
- |// Trap for not-yet-implemented parts.
- |.macro NYI; tw 4, sp, sp; .endmacro
- |
-+|.if FPU
- |// int/FP conversions.
- |.macro tonum_i, freg, reg
- | xoris reg, reg, 0x8000
-@@ -346,6 +379,7 @@
- |.macro toint, reg, freg
- | toint reg, freg, freg
- |.endmacro
-+|.endif
- |
- |//-----------------------------------------------------------------------
- |
-@@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *ctx)
- | beq >2
- |1:
- | addic. TMP1, TMP1, -8
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz CARG1, 0(RA)
-+ | lwz CARG2, 4(RA)
-+ |.endif
- | addi RA, RA, 8
-+ |.if FPU
- | stfd f0, 0(BASE)
-+ |.else
-+ | stw CARG1, 0(BASE)
-+ | stw CARG2, 4(BASE)
-+ |.endif
- | addi BASE, BASE, 8
- | bney <1
- |
-@@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *ctx)
- | .toc ld TOCREG, SAVE_TOC
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp BASE, L->base
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | li ZERO, 0
-- | stw TMP3, TMPD
-+ | .FPU stw TMP3, TMPD
- | li TMP1, LJ_TFALSE
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | li TISNIL, LJ_TNIL
- | li_vmstate INTERP
-- | lfs TOBIT, TMPD
-+ | .FPU lfs TOBIT, TMPD
- | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
- | la RA, -8(BASE) // Results start at BASE-8.
-- | stw TMP3, TMPD
-+ | .FPU stw TMP3, TMPD
- | addi DISPATCH, DISPATCH, GG_G2DISP
- | stw TMP1, 0(RA) // Prepend false to error message.
- | li RD, 16 // 2 results: false + error message.
- | st_vmstate
-- | lfs TONUM, TMPD
-+ | .FPU lfs TONUM, TMPD
- | b ->vm_returnc
- |
- |//-----------------------------------------------------------------------
-@@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *ctx)
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp TMP1, L->top
- | lwz PC, FRAME_PC(BASE)
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | stb CARG3, L->status
-- | stw TMP3, TMPD
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-- | lfs TOBIT, TMPD
-+ | .FPU stw TMP3, TMPD
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU lfs TOBIT, TMPD
- | sub RD, TMP1, BASE
-- | stw TMP3, TMPD
-- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
-+ | .FPU stw TMP3, TMPD
-+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | addi RD, RD, 8
-- | stw TMP0, TONUM_HI
-+ | .FPU stw TMP0, TONUM_HI
- | li_vmstate INTERP
- | li ZERO, 0
- | st_vmstate
- | andix. TMP0, PC, FRAME_TYPE
- | mr MULTRES, RD
-- | lfs TONUM, TMPD
-+ | .FPU lfs TONUM, TMPD
- | li TISNIL, LJ_TNIL
- | beq ->BC_RET_Z
- | b ->vm_return
-@@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *ctx)
- | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp TMP1, L->top
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | add PC, PC, BASE
-- | stw TMP3, TMPD
-+ | .FPU stw TMP3, TMPD
- | li ZERO, 0
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-- | lfs TOBIT, TMPD
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU lfs TOBIT, TMPD
- | sub PC, PC, TMP2 // PC = frame delta + frame type
-- | stw TMP3, TMPD
-- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
-+ | .FPU stw TMP3, TMPD
-+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | sub NARGS8:RC, TMP1, BASE
-- | stw TMP0, TONUM_HI
-+ | .FPU stw TMP0, TONUM_HI
- | li_vmstate INTERP
-- | lfs TONUM, TMPD
-+ | .FPU lfs TONUM, TMPD
- | li TISNIL, LJ_TNIL
- | st_vmstate
- |
-@@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz INS, -4(PC)
- | subi CARG2, RB, 16
- | decode_RB8 SAVE0, INS
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz TMP2, 0(RA)
-+ | lwz TMP3, 4(RA)
-+ |.endif
- | add TMP1, BASE, SAVE0
- | stp BASE, L->base
- | cmplw TMP1, CARG2
- | sub CARG3, CARG2, TMP1
- | decode_RA8 RA, INS
-+ |.if FPU
- | stfd f0, 0(CARG2)
-+ |.else
-+ | stw TMP2, 0(CARG2)
-+ | stw TMP3, 4(CARG2)
-+ |.endif
- | bney ->BC_CAT_Z
-+ |.if FPU
- | stfdx f0, BASE, RA
-+ |.else
-+ | stwux TMP2, RA, BASE
-+ | stw TMP3, 4(RA)
-+ |.endif
- | b ->cont_nop
- |
- |//-- Table indexing metamethods -----------------------------------------
-@@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *ctx)
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
- | beq >3
-+ |.if FPU
- | lfd f0, 0(CRET1)
-+ |.else
-+ | lwz TMP0, 0(CRET1)
-+ | lwz TMP1, 4(CRET1)
-+ |.endif
- | ins_next1
-+ |.if FPU
- | stfdx f0, BASE, RA
-+ |.else
-+ | stwux TMP0, RA, BASE
-+ | stw TMP1, 4(RA)
-+ |.endif
- | ins_next2
- |
- |3: // Call __index metamethod.
-@@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *ctx)
- | // Returns cTValue * or NULL.
- | cmplwi CRET1, 0
- | beq >1
-+ |.if FPU
- | lfd f14, 0(CRET1)
-+ |.else
-+ | lwz SAVE0, 0(CRET1)
-+ | lwz SAVE1, 4(CRET1)
-+ |.endif
- | b ->BC_TGETR_Z
- |1:
- | stwx TISNIL, BASE, RA
-@@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *ctx)
- | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
-+ |.if FPU
- | lfdx f0, BASE, RA
-+ |.else
-+ | lwzux TMP2, RA, BASE
-+ | lwz TMP3, 4(RA)
-+ |.endif
- | beq >3
- | // NOBARRIER: lj_meta_tset ensures the table is not black.
- | ins_next1
-+ |.if FPU
- | stfd f0, 0(CRET1)
-+ |.else
-+ | stw TMP2, 0(CRET1)
-+ | stw TMP3, 4(CRET1)
-+ |.endif
- | ins_next2
- |
- |3: // Call __newindex metamethod.
-@@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *ctx)
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 24 // 3 args for func(t, k, v)
-+ |.if FPU
- | stfd f0, 16(BASE) // Copy value to third argument.
-+ |.else
-+ | stw TMP2, 16(BASE)
-+ | stw TMP3, 20(BASE)
-+ |.endif
- | b ->vm_call_dispatch_f
- |
- |->vmeta_tsetr:
-@@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *ctx)
- | stw PC, SAVE_PC
- | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
- | // Returns TValue *.
-+ |.if FPU
- | stfd f14, 0(CRET1)
-+ |.else
-+ | stw SAVE0, 0(CRET1)
-+ | stw SAVE1, 4(CRET1)
-+ |.endif
- | b ->cont_nop
- |
- |//-- Comparison metamethods ---------------------------------------------
-@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->cont_ra: // RA = resultptr
- | lwz INS, -4(PC)
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz CARG1, 0(RA)
-+ | lwz CARG2, 4(RA)
-+ |.endif
- | decode_RA8 TMP1, INS
-+ |.if FPU
- | stfdx f0, BASE, TMP1
-+ |.else
-+ | stwux CARG1, TMP1, BASE
-+ | stw CARG2, 4(TMP1)
-+ |.endif
- | b ->cont_nop
- |
- |->cont_condt: // RA = resultptr
-@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_n, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG1, 0(BASE)
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | blt ->fff_fallback
-- | checknum CARG3; bge ->fff_fallback
-+ | checknum CARG1; bge ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_nn, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG1, 0(BASE)
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-- | lwz CARG4, 8(BASE)
-+ | lwz CARG3, 8(BASE)
- | lfd FARG2, 8(BASE)
-+ |.else
-+ | lwz CARG2, 4(BASE)
-+ | lwz CARG3, 8(BASE)
-+ | lwz CARG4, 12(BASE)
-+ |.endif
- | blt ->fff_fallback
-+ | checknum CARG1; bge ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
-- | checknum CARG4; bge ->fff_fallback
- |.endmacro
- |
- |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
-@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx)
- | bge cr1, ->fff_fallback
- | stw CARG3, 0(RA)
- | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
-+ | addi TMP1, BASE, 8
-+ | add TMP2, RA, NARGS8:RC
- | stw CARG1, 4(RA)
- | beq ->fff_res // Done if exactly 1 argument.
-- | li TMP1, 8
-- | subi RC, RC, 8
- |1:
-- | cmplw TMP1, RC
-- | lfdx f0, BASE, TMP1
-- | stfdx f0, RA, TMP1
-+ | cmplw TMP1, TMP2
-+ |.if FPU
-+ | lfd f0, 0(TMP1)
-+ | stfd f0, 0(TMP1)
-+ |.else
-+ | lwz CARG1, 0(TMP1)
-+ | lwz CARG2, 4(TMP1)
-+ | stw CARG1, -8(TMP1)
-+ | stw CARG2, -4(TMP1)
-+ |.endif
- | addi TMP1, TMP1, 8
- | bney <1
- | b ->fff_res
-@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx)
- | orc TMP1, TMP2, TMP0
- | addi TMP1, TMP1, ~LJ_TISNUM+1
- | slwi TMP1, TMP1, 3
-+ |.if FPU
- | la TMP2, CFUNC:RB->upvalue
- | lfdx FARG1, TMP2, TMP1
-+ |.else
-+ | add TMP1, CFUNC:RB, TMP1
-+ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
-+ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
-+ |.endif
- | b ->fff_resn
- |
- |//-- Base library: getters and setters ---------------------------------
-@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx)
- | mr CARG1, L
- | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
- | // Returns cTValue *.
-+ |.if FPU
- | lfd FARG1, 0(CRET1)
-+ |.else
-+ | lwz CARG2, 4(CRET1)
-+ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
-+ |.endif
- | b ->fff_resn
- |
- |//-- Base library: conversions ------------------------------------------
-@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx)
- | // Only handles the number case inline (without a base argument).
- | cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | bne ->fff_fallback // Exactly one argument.
- | checknum CARG1; bgt ->fff_fallback
- | b ->fff_resn
-@@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *ctx)
- | cmplwi CRET1, 0
- | li CARG3, LJ_TNIL
- | beq ->fff_restv // End of traversal: return nil.
-- | lfd f0, 8(BASE) // Copy key and value to results.
- | la RA, -8(BASE)
-+ |.if FPU
-+ | lfd f0, 8(BASE) // Copy key and value to results.
- | lfd f1, 16(BASE)
- | stfd f0, 0(RA)
-- | li RD, (2+1)*8
- | stfd f1, 8(RA)
-+ |.else
-+ | lwz CARG1, 8(BASE)
-+ | lwz CARG2, 12(BASE)
-+ | lwz CARG3, 16(BASE)
-+ | lwz CARG4, 20(BASE)
-+ | stw CARG1, 0(RA)
-+ | stw CARG2, 4(RA)
-+ | stw CARG3, 8(RA)
-+ | stw CARG4, 12(RA)
-+ |.endif
-+ | li RD, (2+1)*8
- | b ->fff_res
- |
- |.ffunc_1 pairs
-@@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *ctx)
- | bne ->fff_fallback
- #if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
-+ |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
-+ |.else
-+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
-+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
-+ |.endif
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
- #else
-+ |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
-+ |.else
-+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
-+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
-+ |.endif
- | la RA, -8(BASE)
- #endif
- | stw TISNIL, 8(BASE)
- | li RD, (3+1)*8
-+ |.if FPU
- | stfd f0, 0(RA)
-+ |.else
-+ | stw TMP0, 0(RA)
-+ | stw TMP1, 4(RA)
-+ |.endif
- | b ->fff_res
- |
- |.ffunc ipairs_aux
-@@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *ctx)
- | stfd FARG2, 0(RA)
- |.endif
- | ble >2 // Not in array part?
-+ |.if FPU
- | lwzx TMP2, TMP1, TMP3
- | lfdx f0, TMP1, TMP3
-+ |.else
-+ | lwzux TMP2, TMP1, TMP3
-+ | lwz TMP3, 4(TMP1)
-+ |.endif
- |1:
- | checknil TMP2
- | li RD, (0+1)*8
- | beq ->fff_res // End of iteration, return 0 results.
- | li RD, (2+1)*8
-+ |.if FPU
- | stfd f0, 8(RA)
-+ |.else
-+ | stw TMP2, 8(RA)
-+ | stw TMP3, 12(RA)
-+ |.endif
- | b ->fff_res
- |2: // Check for empty hash part first. Otherwise call C function.
- | lwz TMP0, TAB:CARG1->hmask
-@@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *ctx)
- | li RD, (0+1)*8
- | beq ->fff_res
- | lwz TMP2, 0(CRET1)
-+ |.if FPU
- | lfd f0, 0(CRET1)
-+ |.else
-+ | lwz TMP3, 4(CRET1)
-+ |.endif
- | b <1
- |
- |.ffunc_1 ipairs
-@@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *ctx)
- | bne ->fff_fallback
- #if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
-+ |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
-+ |.else
-+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
-+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
-+ |.endif
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
- #else
-+ |.if FPU
- | lfd f0, CFUNC:RB->upvalue[0]
-+ |.else
-+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
-+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
-+ |.endif
- | la RA, -8(BASE)
- #endif
- |.if DUALNUM
-@@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | stw ZERO, 12(BASE)
- | li RD, (3+1)*8
-+ |.if FPU
- | stfd f0, 0(RA)
-+ |.else
-+ | stw TMP0, 0(RA)
-+ | stw TMP1, 4(RA)
-+ |.endif
- | b ->fff_res
- |
- |//-- Base library: catch errors ----------------------------------------
-@@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc xpcall
- | cmplwi NARGS8:RC, 16
-- | lwz CARG4, 8(BASE)
-+ | lwz CARG3, 8(BASE)
-+ |.if FPU
- | lfd FARG2, 8(BASE)
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG1, 0(BASE)
-+ | lwz CARG2, 4(BASE)
-+ | lwz CARG4, 12(BASE)
-+ |.endif
- | blt ->fff_fallback
- | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
- | mr TMP2, BASE
-- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
-+ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
- | la BASE, 16(BASE)
- | // Remember active hook before pcall.
- | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
-+ |.if FPU
- | stfd FARG2, 0(TMP2) // Swap function and traceback.
-- | subi NARGS8:RC, NARGS8:RC, 16
- | stfd FARG1, 8(TMP2)
-+ |.else
-+ | stw CARG3, 0(TMP2)
-+ | stw CARG4, 4(TMP2)
-+ | stw CARG1, 8(TMP2)
-+ | stw CARG2, 12(TMP2)
-+ |.endif
-+ | subi NARGS8:RC, NARGS8:RC, 16
- | addi PC, TMP1, 16+FRAME_PCALL
- | b ->vm_call_dispatch
- |
-@@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *ctx)
- | stp BASE, L->top
- |2: // Move args to coroutine.
- | cmpw TMP1, NARGS8:RC
-+ |.if FPU
- | lfdx f0, BASE, TMP1
-+ |.else
-+ | add CARG3, BASE, TMP1
-+ | lwz TMP2, 0(CARG3)
-+ | lwz TMP3, 4(CARG3)
-+ |.endif
- | beq >3
-+ |.if FPU
- | stfdx f0, CARG2, TMP1
-+ |.else
-+ | add CARG3, CARG2, TMP1
-+ | stw TMP2, 0(CARG3)
-+ | stw TMP3, 4(CARG3)
-+ |.endif
- | addi TMP1, TMP1, 8
- | b <2
- |3:
-@@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *ctx)
- | stp TMP2, L:SAVE0->top // Clear coroutine stack.
- |5: // Move results from coroutine.
- | cmplw TMP1, TMP3
-+ |.if FPU
- | lfdx f0, TMP2, TMP1
- | stfdx f0, BASE, TMP1
-+ |.else
-+ | add CARG3, TMP2, TMP1
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ | add CARG3, BASE, TMP1
-+ | stw CARG1, 0(CARG3)
-+ | stw CARG2, 4(CARG3)
-+ |.endif
- | addi TMP1, TMP1, 8
- | bne <5
- |6:
-@@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *ctx)
- | andix. TMP0, PC, FRAME_TYPE
- | la TMP3, -8(TMP3)
- | li TMP1, LJ_TFALSE
-+ |.if FPU
- | lfd f0, 0(TMP3)
-+ |.else
-+ | lwz CARG1, 0(TMP3)
-+ | lwz CARG2, 4(TMP3)
-+ |.endif
- | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
- | li RD, (2+1)*8
- | stw TMP1, -8(BASE) // Prepend false to results.
- | la RA, -8(BASE)
-+ |.if FPU
- | stfd f0, 0(BASE) // Copy error message.
-+ |.else
-+ | stw CARG1, 0(BASE) // Copy error message.
-+ | stw CARG2, 4(BASE)
-+ |.endif
- | b <7
- |.else
- | mr CARG1, L
-@@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *ctx)
- | lus CARG1, 0x8000 // -(2^31).
- | beqy ->fff_resi
- |5:
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG1, 0(BASE)
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | blex func
- | b ->fff_resn
- |.endmacro
-@@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc math_log
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-- | lfd FARG1, 0(BASE)
-+ | lwz CARG1, 0(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
-- | checknum CARG3; bge ->fff_fallback
-+ | checknum CARG1; bge ->fff_fallback
-+ |.if FPU
-+ | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | blex log
- | b ->fff_resn
- |
-@@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *ctx)
- |.if DUALNUM
- |.ffunc math_ldexp
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-+ | lwz TMP0, 0(BASE)
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-- | lwz CARG4, 8(BASE)
-+ |.else
-+ | lwz CARG1, 0(BASE)
-+ | lwz CARG2, 4(BASE)
-+ |.endif
-+ | lwz TMP1, 8(BASE)
- |.if GPR64
- | lwz CARG2, 12(BASE)
-- |.else
-+ |.elif FPU
- | lwz CARG1, 12(BASE)
-+ |.else
-+ | lwz CARG3, 12(BASE)
- |.endif
- | blt ->fff_fallback
-- | checknum CARG3; bge ->fff_fallback
-- | checknum CARG4; bne ->fff_fallback
-+ | checknum TMP0; bge ->fff_fallback
-+ | checknum TMP1; bne ->fff_fallback
- |.else
- |.ffunc_nn math_ldexp
- |.if GPR64
-@@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc_n math_frexp
- |.if GPR64
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
-- |.else
-+ |.elif FPU
- | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
-+ |.else
-+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- |.endif
- | lwz PC, FRAME_PC(BASE)
- | blex frexp
-@@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *ctx)
- |.if not DUALNUM
- | tonum_i FARG2, TMP1
- |.endif
-+ |.if FPU
- | stfd FARG1, 0(RA)
-+ |.else
-+ | stw CRET1, 0(RA)
-+ | stw CRET2, 4(RA)
-+ |.endif
- | li RD, (2+1)*8
- |.if DUALNUM
- | stw TISNUM, 8(RA)
-@@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc_n math_modf
- |.if GPR64
- | la CARG2, -8(BASE)
-- |.else
-+ |.elif FPU
- | la CARG1, -8(BASE)
-+ |.else
-+ | la CARG3, -8(BASE)
- |.endif
- | lwz PC, FRAME_PC(BASE)
- | blex modf
- | la RA, -8(BASE)
-+ |.if FPU
- | stfd FARG1, 0(BASE)
-+ |.else
-+ | stw CRET1, 0(BASE)
-+ | stw CRET2, 4(BASE)
-+ |.endif
- | li RD, (2+1)*8
- | b ->fff_res
- |
-@@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *ctx)
- |.if DUALNUM
- | .ffunc_1 name
- | checknum CARG3
-- | addi TMP1, BASE, 8
-- | add TMP2, BASE, NARGS8:RC
-+ | addi SAVE0, BASE, 8
-+ | add SAVE1, BASE, NARGS8:RC
- | bne >4
- |1: // Handle integers.
-- | lwz CARG4, 0(TMP1)
-- | cmplw cr1, TMP1, TMP2
-- | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 0(SAVE0)
-+ | cmplw cr1, SAVE0, SAVE1
-+ | lwz CARG2, 4(SAVE0)
- | bge cr1, ->fff_resi
- | checknum CARG4
- | xoris TMP0, CARG1, 0x8000
-@@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *ctx)
- |.if GPR64
- | rldicl CARG1, CARG1, 0, 32
- |.endif
-- | addi TMP1, TMP1, 8
-+ | addi SAVE0, SAVE0, 8
- | b <1
- |3:
- | bge ->fff_fallback
- | // Convert intermediate result to number and continue below.
-+ |.if FPU
- | tonum_i FARG1, CARG1
-- | lfd FARG2, 0(TMP1)
-+ | lfd FARG2, 0(SAVE0)
-+ |.else
-+ | mr CARG2, CARG1
-+ | bl ->vm_sfi2d_1
-+ | lwz CARG3, 0(SAVE0)
-+ | lwz CARG4, 4(SAVE0)
-+ |.endif
- | b >6
- |4:
-+ |.if FPU
- | lfd FARG1, 0(BASE)
-+ |.else
-+ | lwz CARG1, 0(BASE)
-+ | lwz CARG2, 4(BASE)
-+ |.endif
- | bge ->fff_fallback
- |5: // Handle numbers.
-- | lwz CARG4, 0(TMP1)
-- | cmplw cr1, TMP1, TMP2
-- | lfd FARG2, 0(TMP1)
-+ | lwz CARG3, 0(SAVE0)
-+ | cmplw cr1, SAVE0, SAVE1
-+ |.if FPU
-+ | lfd FARG2, 0(SAVE0)
-+ |.else
-+ | lwz CARG4, 4(SAVE0)
-+ |.endif
- | bge cr1, ->fff_resn
-- | checknum CARG4; bge >7
-+ | checknum CARG3; bge >7
- |6:
-+ | addi SAVE0, SAVE0, 8
-+ |.if FPU
- | fsub f0, FARG1, FARG2
-- | addi TMP1, TMP1, 8
- |.if ismax
- | fsel FARG1, f0, FARG1, FARG2
- |.else
- | fsel FARG1, f0, FARG2, FARG1
- |.endif
-+ |.else
-+ | stw CARG1, SFSAVE_1
-+ | stw CARG2, SFSAVE_2
-+ | stw CARG3, SFSAVE_3
-+ | stw CARG4, SFSAVE_4
-+ | blex __ledf2
-+ | cmpwi CRET1, 0
-+ |.if ismax
-+ | blt >8
-+ |.else
-+ | bge >8
-+ |.endif
-+ | lwz CARG1, SFSAVE_1
-+ | lwz CARG2, SFSAVE_2
-+ | b <5
-+ |8:
-+ | lwz CARG1, SFSAVE_3
-+ | lwz CARG2, SFSAVE_4
-+ |.endif
- | b <5
- |7: // Convert integer to number and continue above.
-- | lwz CARG2, 4(TMP1)
-+ | lwz CARG3, 4(SAVE0)
- | bne ->fff_fallback
-- | tonum_i FARG2, CARG2
-+ |.if FPU
-+ | tonum_i FARG2, CARG3
-+ |.else
-+ | bl ->vm_sfi2d_2
-+ |.endif
- | b <6
- |.else
- | .ffunc_n name
-@@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name
-- | addi TMP1, BASE, 8
-- | add TMP2, BASE, NARGS8:RC
-+ | addi SAVE0, BASE, 8
-+ | add SAVE1, BASE, NARGS8:RC
- |1:
-- | lwz CARG4, 0(TMP1)
-- | cmplw cr1, TMP1, TMP2
-+ | lwz CARG4, 0(SAVE0)
-+ | cmplw cr1, SAVE0, SAVE1
- |.if DUALNUM
-- | lwz CARG2, 4(TMP1)
-+ | lwz CARG2, 4(SAVE0)
- |.else
-- | lfd FARG1, 0(TMP1)
-+ | lfd FARG1, 0(SAVE0)
- |.endif
- | bgey cr1, ->fff_resi
- | checknum CARG4
- |.if DUALNUM
-+ |.if FPU
- | bnel ->fff_bitop_fb
- |.else
-+ | beq >3
-+ | stw CARG1, SFSAVE_1
-+ | bl ->fff_bitop_fb
-+ | mr CARG2, CARG1
-+ | lwz CARG1, SFSAVE_1
-+ |3:
-+ |.endif
-+ |.else
- | fadd FARG1, FARG1, TOBIT
- | bge ->fff_fallback
- | stfd FARG1, TMPD
- | lwz CARG2, TMPD_LO
- |.endif
- | ins CARG1, CARG1, CARG2
-- | addi TMP1, TMP1, 8
-+ | addi SAVE0, SAVE0, 8
- | b <1
- |.endmacro
- |
-@@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_bit_sh, name, ins, shmod
- |.if DUALNUM
- | .ffunc_2 bit_..name
-+ |.if FPU
- | checknum CARG3; bnel ->fff_tobit_fb
-+ |.else
-+ | checknum CARG3; beq >1
-+ | bl ->fff_tobit_fb
-+ | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
-+ |1:
-+ |.endif
- | // Note: no inline conversion from number for 2nd argument!
- | checknum CARG4; bne ->fff_fallback
- |.else
-@@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *ctx)
- |->fff_resn:
- | lwz PC, FRAME_PC(BASE)
- | la RA, -8(BASE)
-+ |.if FPU
- | stfd FARG1, -8(BASE)
-+ |.else
-+ | stw CARG1, -8(BASE)
-+ | stw CARG2, -4(BASE)
-+ |.endif
- | b ->fff_res1
- |
- |// Fallback FP number to bit conversion.
- |->fff_tobit_fb:
- |.if DUALNUM
-+ |.if FPU
- | lfd FARG1, 0(BASE)
- | bgt ->fff_fallback
- | fadd FARG1, FARG1, TOBIT
- | stfd FARG1, TMPD
- | lwz CARG1, TMPD_LO
- | blr
-+ |.else
-+ | bgt ->fff_fallback
-+ | mr CARG2, CARG1
-+ | mr CARG1, CARG3
-+ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
-+ |->vm_tobit:
-+ | slwi TMP2, CARG1, 1
-+ | addis TMP2, TMP2, 0x0020
-+ | cmpwi TMP2, 0
-+ | bge >2
-+ | li TMP1, 0x3e0
-+ | srawi TMP2, TMP2, 21
-+ | not TMP1, TMP1
-+ | sub. TMP2, TMP1, TMP2
-+ | cmpwi cr7, CARG1, 0
-+ | blt >1
-+ | slwi TMP1, CARG1, 11
-+ | srwi TMP0, CARG2, 21
-+ | oris TMP1, TMP1, 0x8000
-+ | or TMP1, TMP1, TMP0
-+ | srw CARG1, TMP1, TMP2
-+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
-+ | neg CARG1, CARG1
-+ | blr
-+ |1:
-+ | addi TMP2, TMP2, 21
-+ | srw TMP1, CARG2, TMP2
-+ | slwi CARG2, CARG1, 12
-+ | subfic TMP2, TMP2, 20
-+ | slw TMP0, CARG2, TMP2
-+ | or CARG1, TMP1, TMP0
-+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
-+ | neg CARG1, CARG1
-+ | blr
-+ |2:
-+ | li CARG1, 0
-+ | blr
-+ |.endif
- |.endif
- |->fff_bitop_fb:
- |.if DUALNUM
-- | lfd FARG1, 0(TMP1)
-+ |.if FPU
-+ | lfd FARG1, 0(SAVE0)
- | bgt ->fff_fallback
- | fadd FARG1, FARG1, TOBIT
- | stfd FARG1, TMPD
- | lwz CARG2, TMPD_LO
- | blr
-+ |.else
-+ | bgt ->fff_fallback
-+ | mr CARG1, CARG4
-+ | b ->vm_tobit
-+ |.endif
- |.endif
- |
- |//-----------------------------------------------------------------------
-@@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *ctx)
- | decode_RA8 RC, INS // Call base.
- | beq >2
- |1: // Move results down.
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz CARG1, 0(RA)
-+ | lwz CARG2, 4(RA)
-+ |.endif
- | addic. TMP1, TMP1, -8
- | addi RA, RA, 8
-+ |.if FPU
- | stfdx f0, BASE, RC
-+ |.else
-+ | add CARG3, BASE, RC
-+ | stw CARG1, 0(CARG3)
-+ | stw CARG2, 4(CARG3)
-+ |.endif
- | addi RC, RC, 8
- | bne <1
- |2:
-@@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |.macro savex_, a, b, c, d
-+ |.if FPU
- | stfd f..a, 16+a*8(sp)
- | stfd f..b, 16+b*8(sp)
- | stfd f..c, 16+c*8(sp)
- | stfd f..d, 16+d*8(sp)
-+ |.endif
- |.endmacro
- |
- |->vm_exit_handler:
-@@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | // Setup type comparison constants.
- | li TISNUM, LJ_TISNUM
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-- | stw TMP3, TMPD
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU stw TMP3, TMPD
- | li ZERO, 0
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-- | lfs TOBIT, TMPD
-- | stw TMP3, TMPD
-- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU lfs TOBIT, TMPD
-+ | .FPU stw TMP3, TMPD
-+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | li TISNIL, LJ_TNIL
-- | stw TMP0, TONUM_HI
-- | lfs TONUM, TMPD
-+ | .FPU stw TMP0, TONUM_HI
-+ | .FPU lfs TONUM, TMPD
- | // Modified copy of ins_next which handles function header dispatch, too.
- | lwz INS, 0(PC)
- | addi PC, PC, 4
-@@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- Math helper functions ----------------------------------------------
- |//-----------------------------------------------------------------------
- |
-- |// NYI: Use internal implementations of floor, ceil, trunc.
-+ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
-+ |
-+ |.macro sfi2d, AHI, ALO
-+ |.if not FPU
-+ | mr. AHI, ALO
-+ | bclr 12, 2 // Handle zero first.
-+ | srawi TMP0, ALO, 31
-+ | xor TMP1, ALO, TMP0
-+ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
-+ | cntlzw AHI, TMP1
-+ | andix. TMP0, TMP0, 0x800 // Mask sign bit.
-+ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
-+ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
-+ | slwi ALO, TMP1, 21
-+ | or AHI, AHI, TMP0 // Sign | Exponent.
-+ | srwi TMP1, TMP1, 11
-+ | slwi AHI, AHI, 20 // Align left.
-+ | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
-+ | blr
-+ |.endif
-+ |.endmacro
-+ |
-+ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
-+ |->vm_sfi2d_1:
-+ | sfi2d CARG1, CARG2
-+ |
-+ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
-+ |->vm_sfi2d_2:
-+ | sfi2d CARG3, CARG4
- |
- |->vm_modi:
- | divwo. TMP0, CARG1, CARG2
-@@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *ctx)
- | addi DISPATCH, r12, GG_G2DISP
- | stw r11, CTSTATE->cb.slot
- | stw r3, CTSTATE->cb.gpr[0]
-- | stfd f1, CTSTATE->cb.fpr[0]
-+ | .FPU stfd f1, CTSTATE->cb.fpr[0]
- | stw r4, CTSTATE->cb.gpr[1]
-- | stfd f2, CTSTATE->cb.fpr[1]
-+ | .FPU stfd f2, CTSTATE->cb.fpr[1]
- | stw r5, CTSTATE->cb.gpr[2]
-- | stfd f3, CTSTATE->cb.fpr[2]
-+ | .FPU stfd f3, CTSTATE->cb.fpr[2]
- | stw r6, CTSTATE->cb.gpr[3]
-- | stfd f4, CTSTATE->cb.fpr[3]
-+ | .FPU stfd f4, CTSTATE->cb.fpr[3]
- | stw r7, CTSTATE->cb.gpr[4]
-- | stfd f5, CTSTATE->cb.fpr[4]
-+ | .FPU stfd f5, CTSTATE->cb.fpr[4]
- | stw r8, CTSTATE->cb.gpr[5]
-- | stfd f6, CTSTATE->cb.fpr[5]
-+ | .FPU stfd f6, CTSTATE->cb.fpr[5]
- | stw r9, CTSTATE->cb.gpr[6]
-- | stfd f7, CTSTATE->cb.fpr[6]
-+ | .FPU stfd f7, CTSTATE->cb.fpr[6]
- | stw r10, CTSTATE->cb.gpr[7]
-- | stfd f8, CTSTATE->cb.fpr[7]
-+ | .FPU stfd f8, CTSTATE->cb.fpr[7]
- | addi TMP0, sp, CFRAME_SPACE+8
- | stw TMP0, CTSTATE->cb.stack
- | mr CARG1, CTSTATE
-@@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *ctx)
- | lp BASE, L:CRET1->base
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
- | lp RC, L:CRET1->top
-- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | li ZERO, 0
- | mr L, CRET1
-- | stw TMP3, TMPD
-- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
-+ | .FPU stw TMP3, TMPD
-+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
-- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-- | stw TMP0, TONUM_HI
-+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
-+ | .FPU stw TMP0, TONUM_HI
- | li TISNIL, LJ_TNIL
- | li_vmstate INTERP
-- | lfs TOBIT, TMPD
-- | stw TMP3, TMPD
-+ | .FPU lfs TOBIT, TMPD
-+ | .FPU stw TMP3, TMPD
- | sub RC, RC, BASE
- | st_vmstate
-- | lfs TONUM, TMPD
-+ | .FPU lfs TONUM, TMPD
- | ins_callt
- |.endif
- |
-@@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *ctx)
- | mr CARG2, RA
- | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
- | lwz CRET1, CTSTATE->cb.gpr[0]
-- | lfd FARG1, CTSTATE->cb.fpr[0]
-+ | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
- | lwz CRET2, CTSTATE->cb.gpr[1]
- | b ->vm_leave_unw
- |.endif
-@@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *ctx)
- | bge <1
- |2:
- | bney cr1, >3
-- | lfd f1, CCSTATE->fpr[0]
-- | lfd f2, CCSTATE->fpr[1]
-- | lfd f3, CCSTATE->fpr[2]
-- | lfd f4, CCSTATE->fpr[3]
-- | lfd f5, CCSTATE->fpr[4]
-- | lfd f6, CCSTATE->fpr[5]
-- | lfd f7, CCSTATE->fpr[6]
-- | lfd f8, CCSTATE->fpr[7]
-+ | .FPU lfd f1, CCSTATE->fpr[0]
-+ | .FPU lfd f2, CCSTATE->fpr[1]
-+ | .FPU lfd f3, CCSTATE->fpr[2]
-+ | .FPU lfd f4, CCSTATE->fpr[3]
-+ | .FPU lfd f5, CCSTATE->fpr[4]
-+ | .FPU lfd f6, CCSTATE->fpr[5]
-+ | .FPU lfd f7, CCSTATE->fpr[6]
-+ | .FPU lfd f8, CCSTATE->fpr[7]
- |3:
- | lp TMP0, CCSTATE->func
- | lwz CARG2, CCSTATE->gpr[1]
-@@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz TMP2, -4(r14)
- | lwz TMP0, 4(r14)
- | stw CARG1, CCSTATE:TMP1->gpr[0]
-- | stfd FARG1, CCSTATE:TMP1->fpr[0]
-+ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
- | stw CARG2, CCSTATE:TMP1->gpr[1]
- | mtlr TMP0
- | stw CARG3, CCSTATE:TMP1->gpr[2]
-@@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux TMP0, RA, BASE
-+ | lwzux CARG1, RA, BASE
- | addi PC, PC, 4
- | lwz CARG2, 4(RA)
-- | lwzux TMP1, RD, BASE
-+ | lwzux CARG3, RD, BASE
- | lwz TMP2, -4(PC)
-- | checknum cr0, TMP0
-- | lwz CARG3, 4(RD)
-+ | checknum cr0, CARG1
-+ | lwz CARG4, 4(RD)
- | decode_RD4 TMP2, TMP2
-- | checknum cr1, TMP1
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | checknum cr1, CARG3
-+ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
- | bne cr0, >7
- | bne cr1, >8
-- | cmpw CARG2, CARG3
-+ | cmpw CARG2, CARG4
- if (op == BC_ISLT) {
- | bge >2
- } else if (op == BC_ISGE) {
-@@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ble >2
- }
- |1:
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |2:
- | ins_next
- |
- |7: // RA is not an integer.
- | bgt cr0, ->vmeta_comp
- | // RA is a number.
-- | lfd f0, 0(RA)
-+ | .FPU lfd f0, 0(RA)
- | bgt cr1, ->vmeta_comp
- | blt cr1, >4
- | // RA is a number, RD is an integer.
-- | tonum_i f1, CARG3
-+ |.if FPU
-+ | tonum_i f1, CARG4
-+ |.else
-+ | bl ->vm_sfi2d_2
-+ |.endif
- | b >5
- |
- |8: // RA is an integer, RD is not an integer.
- | bgt cr1, ->vmeta_comp
- | // RA is an integer, RD is a number.
-+ |.if FPU
- | tonum_i f0, CARG2
-+ |.else
-+ | bl ->vm_sfi2d_1
-+ |.endif
- |4:
-- | lfd f1, 0(RD)
-+ | .FPU lfd f1, 0(RD)
- |5:
-+ |.if FPU
- | fcmpu cr0, f0, f1
-+ |.else
-+ | blex __ledf2
-+ | cmpwi CRET1, 0
-+ |.endif
- if (op == BC_ISLT) {
- | bge <2
- } else if (op == BC_ISGE) {
-@@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = op == BC_ISEQV;
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux TMP0, RA, BASE
-+ | lwzux CARG1, RA, BASE
- | addi PC, PC, 4
- | lwz CARG2, 4(RA)
-- | lwzux TMP1, RD, BASE
-- | checknum cr0, TMP0
-- | lwz TMP2, -4(PC)
-- | checknum cr1, TMP1
-- | decode_RD4 TMP2, TMP2
-- | lwz CARG3, 4(RD)
-+ | lwzux CARG3, RD, BASE
-+ | checknum cr0, CARG1
-+ | lwz SAVE0, -4(PC)
-+ | checknum cr1, CARG3
-+ | decode_RD4 SAVE0, SAVE0
-+ | lwz CARG4, 4(RD)
- | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- if (vk) {
- | ble cr7, ->BC_ISEQN_Z
- } else {
- | ble cr7, ->BC_ISNEN_Z
- }
- |.else
-- | lwzux TMP0, RA, BASE
-- | lwz TMP2, 0(PC)
-+ | lwzux CARG1, RA, BASE
-+ | lwz SAVE0, 0(PC)
- | lfd f0, 0(RA)
- | addi PC, PC, 4
-- | lwzux TMP1, RD, BASE
-- | checknum cr0, TMP0
-- | decode_RD4 TMP2, TMP2
-+ | lwzux CARG3, RD, BASE
-+ | checknum cr0, CARG1
-+ | decode_RD4 SAVE0, SAVE0
- | lfd f1, 0(RD)
-- | checknum cr1, TMP1
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | checknum cr1, CARG3
-+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- | bge cr0, >5
- | bge cr1, >5
- | fcmpu cr0, f0, f1
- if (vk) {
- | bne >1
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- } else {
- | beq >1
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- }
- |1:
- | ins_next
-@@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |5: // Either or both types are not numbers.
- |.if not DUALNUM
- | lwz CARG2, 4(RA)
-- | lwz CARG3, 4(RD)
-+ | lwz CARG4, 4(RD)
- |.endif
- |.if FFI
-- | cmpwi cr7, TMP0, LJ_TCDATA
-- | cmpwi cr5, TMP1, LJ_TCDATA
-+ | cmpwi cr7, CARG1, LJ_TCDATA
-+ | cmpwi cr5, CARG3, LJ_TCDATA
- |.endif
-- | not TMP3, TMP0
-- | cmplw TMP0, TMP1
-- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
-+ | not TMP2, CARG1
-+ | cmplw CARG1, CARG3
-+ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
- |.if FFI
- | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
- |.endif
-- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
-+ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
- |.if FFI
- | beq cr7, ->vmeta_equal_cd
- |.endif
-- | cmplw cr5, CARG2, CARG3
-+ | cmplw cr5, CARG2, CARG4
- | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
- | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
-- | mr SAVE0, PC
-+ | mr SAVE1, PC
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
- if (vk) {
- | bne cr0, >6
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |6:
- } else {
- | beq cr0, >6
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |6:
- }
- |.if DUALNUM
-@@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |
- | // Different tables or userdatas. Need to check __eq metamethod.
- | // Field metatable must be at same offset for GCtab and GCudata!
-+ | mr CARG3, CARG4
- | lwz TAB:TMP2, TAB:CARG2->metatable
- | li CARG4, 1-vk // ne = 0 or 1.
- | cmplwi TAB:TMP2, 0
-@@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lbz TMP2, TAB:TMP2->nomm
- | andix. TMP2, TMP2, 1<<MM_eq
- | bne <1 // Or 'no __eq' flag set?
-- | mr PC, SAVE0 // Restore old PC.
-+ | mr PC, SAVE1 // Restore old PC.
- | b ->vmeta_equal // Handle __eq metamethod.
- break;
-
-@@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = op == BC_ISEQN;
- | // RA = src*8, RD = num_const*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux TMP0, RA, BASE
-+ | lwzux CARG1, RA, BASE
- | addi PC, PC, 4
- | lwz CARG2, 4(RA)
-- | lwzux TMP1, RD, KBASE
-- | checknum cr0, TMP0
-- | lwz TMP2, -4(PC)
-- | checknum cr1, TMP1
-- | decode_RD4 TMP2, TMP2
-- | lwz CARG3, 4(RD)
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | lwzux CARG3, RD, KBASE
-+ | checknum cr0, CARG1
-+ | lwz SAVE0, -4(PC)
-+ | checknum cr1, CARG3
-+ | decode_RD4 SAVE0, SAVE0
-+ | lwz CARG4, 4(RD)
-+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- if (vk) {
- |->BC_ISEQN_Z:
- } else {
-@@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | bne cr0, >7
- | bne cr1, >8
-- | cmpw CARG2, CARG3
-+ | cmpw CARG2, CARG4
- |4:
- |.else
- if (vk) {
-@@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- } else {
- |->BC_ISNEN_Z: // Dummy label.
- }
-- | lwzx TMP0, BASE, RA
-+ | lwzx CARG1, BASE, RA
- | addi PC, PC, 4
- | lfdx f0, BASE, RA
-- | lwz TMP2, -4(PC)
-+ | lwz SAVE0, -4(PC)
- | lfdx f1, KBASE, RD
-- | decode_RD4 TMP2, TMP2
-- | checknum TMP0
-- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-+ | decode_RD4 SAVE0, SAVE0
-+ | checknum CARG1
-+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
- | bge >3
- | fcmpu cr0, f0, f1
- |.endif
- if (vk) {
- | bne >1
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |1:
- |.if not FFI
- |3:
-@@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.if not FFI
- |3:
- |.endif
-- | add PC, PC, TMP2
-+ | add PC, PC, SAVE0
- |2:
- }
- | ins_next
- |.if FFI
- |3:
-- | cmpwi TMP0, LJ_TCDATA
-+ | cmpwi CARG1, LJ_TCDATA
- | beq ->vmeta_equal_cd
- | b <1
- |.endif
-@@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |7: // RA is not an integer.
- | bge cr0, <3
- | // RA is a number.
-- | lfd f0, 0(RA)
-+ | .FPU lfd f0, 0(RA)
- | blt cr1, >1
- | // RA is a number, RD is an integer.
-- | tonum_i f1, CARG3
-+ |.if FPU
-+ | tonum_i f1, CARG4
-+ |.else
-+ | bl ->vm_sfi2d_2
-+ |.endif
- | b >2
- |
- |8: // RA is an integer, RD is a number.
-+ |.if FPU
- | tonum_i f0, CARG2
-+ |.else
-+ | bl ->vm_sfi2d_1
-+ |.endif
- |1:
-- | lfd f1, 0(RD)
-+ | .FPU lfd f1, 0(RD)
- |2:
-+ |.if FPU
- | fcmpu cr0, f0, f1
-+ |.else
-+ | blex __ledf2
-+ | cmpwi CRET1, 0
-+ |.endif
- | b <4
- |.endif
- break;
-@@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add PC, PC, TMP2
- } else {
- | li TMP1, LJ_TFALSE
-+ |.if FPU
- | lfdx f0, BASE, RD
-+ |.else
-+ | lwzux CARG1, RD, BASE
-+ | lwz CARG2, 4(RD)
-+ |.endif
- | cmplw TMP0, TMP1
- if (op == BC_ISTC) {
- | bge >1
-@@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | addis PC, PC, -(BCBIAS_J*4 >> 16)
- | decode_RD4 TMP2, INS
-+ |.if FPU
- | stfdx f0, BASE, RA
-+ |.else
-+ | stwux CARG1, RA, BASE
-+ | stw CARG2, 4(RA)
-+ |.endif
- | add PC, PC, TMP2
- |1:
- }
-@@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_MOV:
- | // RA = dst*8, RD = src*8
- | ins_next1
-+ |.if FPU
- | lfdx f0, BASE, RD
- | stfdx f0, BASE, RA
-+ |.else
-+ | lwzux TMP0, RD, BASE
-+ | lwz TMP1, 4(RD)
-+ | stwux TMP0, RA, BASE
-+ | stw TMP1, 4(RA)
-+ |.endif
- | ins_next2
- break;
- case BC_NOT:
-@@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-- | lwzx TMP1, BASE, RB
-+ | lwzx CARG1, BASE, RB
- | .if DUALNUM
-- | lwzx TMP2, KBASE, RC
-+ | lwzx CARG3, KBASE, RC
- | .endif
-+ | .if FPU
- | lfdx f14, BASE, RB
- | lfdx f15, KBASE, RC
-+ | .else
-+ | add TMP1, BASE, RB
-+ | add TMP2, KBASE, RC
-+ | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 4(TMP2)
-+ | .endif
- | .if DUALNUM
-- | checknum cr0, TMP1
-- | checknum cr1, TMP2
-+ | checknum cr0, CARG1
-+ | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_vn
- | .else
-- | checknum TMP1; bge ->vmeta_arith_vn
-+ | checknum CARG1; bge ->vmeta_arith_vn
- | .endif
- || break;
- ||case 1:
-- | lwzx TMP1, BASE, RB
-+ | lwzx CARG1, BASE, RB
- | .if DUALNUM
-- | lwzx TMP2, KBASE, RC
-+ | lwzx CARG3, KBASE, RC
- | .endif
-+ | .if FPU
- | lfdx f15, BASE, RB
- | lfdx f14, KBASE, RC
-+ | .else
-+ | add TMP1, BASE, RB
-+ | add TMP2, KBASE, RC
-+ | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 4(TMP2)
-+ | .endif
- | .if DUALNUM
-- | checknum cr0, TMP1
-- | checknum cr1, TMP2
-+ | checknum cr0, CARG1
-+ | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_nv
- | .else
-- | checknum TMP1; bge ->vmeta_arith_nv
-+ | checknum CARG1; bge ->vmeta_arith_nv
- | .endif
- || break;
- ||default:
-- | lwzx TMP1, BASE, RB
-- | lwzx TMP2, BASE, RC
-+ | lwzx CARG1, BASE, RB
-+ | lwzx CARG3, BASE, RC
-+ | .if FPU
- | lfdx f14, BASE, RB
- | lfdx f15, BASE, RC
-- | checknum cr0, TMP1
-- | checknum cr1, TMP2
-+ | .else
-+ | add TMP1, BASE, RB
-+ | add TMP2, BASE, RC
-+ | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 4(TMP2)
-+ | .endif
-+ | checknum cr0, CARG1
-+ | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_vv
- || break;
-@@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | fsub a, b, a // b - floor(b/c)*c
- |.endmacro
- |
-+ |.macro sfpmod
-+ |->BC_MODVN_Z:
-+ | stw CARG1, SFSAVE_1
-+ | stw CARG2, SFSAVE_2
-+ | mr SAVE0, CARG3
-+ | mr SAVE1, CARG4
-+ | blex __divdf3
-+ | blex floor
-+ | mr CARG3, SAVE0
-+ | mr CARG4, SAVE1
-+ | blex __muldf3
-+ | mr CARG3, CRET1
-+ | mr CARG4, CRET2
-+ | lwz CARG1, SFSAVE_1
-+ | lwz CARG2, SFSAVE_2
-+ | blex __subdf3
-+ |.endmacro
-+ |
- |.macro ins_arithfp, fpins
- | ins_arithpre
- |.if "fpins" == "fpmod_"
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
-- |.else
-+ |.elif FPU
- | fpins f0, f14, f15
- | ins_next1
- | stfdx f0, BASE, RA
- | ins_next2
-+ |.else
-+ | blex __divdf3 // Only soft-float div uses this macro.
-+ | ins_next1
-+ | stwux CRET1, RA, BASE
-+ | stw CRET2, 4(RA)
-+ | ins_next2
- |.endif
- |.endmacro
- |
-- |.macro ins_arithdn, intins, fpins
-+ |.macro ins_arithdn, intins, fpins, fpcall
- | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-- | lwzux TMP1, RB, BASE
-- | lwzux TMP2, RC, KBASE
-- | lwz CARG1, 4(RB)
-- | checknum cr0, TMP1
-- | lwz CARG2, 4(RC)
-+ | lwzux CARG1, RB, BASE
-+ | lwzux CARG3, RC, KBASE
-+ | lwz CARG2, 4(RB)
-+ | checknum cr0, CARG1
-+ | lwz CARG4, 4(RC)
-+ | checknum cr1, CARG3
- || break;
- ||case 1:
-- | lwzux TMP1, RB, BASE
-- | lwzux TMP2, RC, KBASE
-- | lwz CARG2, 4(RB)
-- | checknum cr0, TMP1
-- | lwz CARG1, 4(RC)
-+ | lwzux CARG3, RB, BASE
-+ | lwzux CARG1, RC, KBASE
-+ | lwz CARG4, 4(RB)
-+ | checknum cr0, CARG3
-+ | lwz CARG2, 4(RC)
-+ | checknum cr1, CARG1
- || break;
- ||default:
-- | lwzux TMP1, RB, BASE
-- | lwzux TMP2, RC, BASE
-- | lwz CARG1, 4(RB)
-- | checknum cr0, TMP1
-- | lwz CARG2, 4(RC)
-+ | lwzux CARG1, RB, BASE
-+ | lwzux CARG3, RC, BASE
-+ | lwz CARG2, 4(RB)
-+ | checknum cr0, CARG1
-+ | lwz CARG4, 4(RC)
-+ | checknum cr1, CARG3
- || break;
- ||}
-- | checknum cr1, TMP2
- | bne >5
- | bne cr1, >5
-- | intins CARG1, CARG1, CARG2
-+ |.if "intins" == "intmod"
-+ | mr CARG1, CARG2
-+ | mr CARG2, CARG4
-+ |.endif
-+ | intins CARG1, CARG2, CARG4
- | bso >4
- |1:
- | ins_next1
-@@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | checkov TMP0, <1 // Ignore unrelated overflow.
- | ins_arithfallback b
- |5: // FP variant.
-+ |.if FPU
- ||if (vk == 1) {
- | lfd f15, 0(RB)
-- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | lfd f14, 0(RC)
- ||} else {
- | lfd f14, 0(RB)
-- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | lfd f15, 0(RC)
- ||}
-+ |.endif
-+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | ins_arithfallback bge
- |.if "fpins" == "fpmod_"
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- |.else
-+ |.if FPU
- | fpins f0, f14, f15
-- | ins_next1
- | stfdx f0, BASE, RA
-+ |.else
-+ |.if "fpcall" == "sfpmod"
-+ | sfpmod
-+ |.else
-+ | blex fpcall
-+ |.endif
-+ | stwux CRET1, RA, BASE
-+ | stw CRET2, 4(RA)
-+ |.endif
-+ | ins_next1
- | b <2
- |.endif
- |.endmacro
- |
-- |.macro ins_arith, intins, fpins
-+ |.macro ins_arith, intins, fpins, fpcall
- |.if DUALNUM
-- | ins_arithdn intins, fpins
-+ | ins_arithdn intins, fpins, fpcall
- |.else
- | ins_arithfp fpins
- |.endif
-@@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addo. TMP0, TMP0, TMP3
- | add y, a, b
- |.endmacro
-- | ins_arith addo32., fadd
-+ | ins_arith addo32., fadd, __adddf3
- |.else
-- | ins_arith addo., fadd
-+ | ins_arith addo., fadd, __adddf3
- |.endif
- break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-@@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | subo. TMP0, TMP0, TMP3
- | sub y, a, b
- |.endmacro
-- | ins_arith subo32., fsub
-+ | ins_arith subo32., fsub, __subdf3
- |.else
-- | ins_arith subo., fsub
-+ | ins_arith subo., fsub, __subdf3
- |.endif
- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
-- | ins_arith mullwo., fmul
-+ | ins_arith mullwo., fmul, __muldf3
- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arithfp fdiv
- break;
- case BC_MODVN:
-- | ins_arith intmod, fpmod
-+ | ins_arith intmod, fpmod, sfpmod
- break;
- case BC_MODNV: case BC_MODVV:
-- | ins_arith intmod, fpmod_
-+ | ins_arith intmod, fpmod_, sfpmod
- break;
- case BC_POW:
- | // NYI: (partial) integer arithmetic.
-- | lwzx TMP1, BASE, RB
-+ | lwzx CARG1, BASE, RB
-+ | lwzx CARG3, BASE, RC
-+ |.if FPU
- | lfdx FARG1, BASE, RB
-- | lwzx TMP2, BASE, RC
- | lfdx FARG2, BASE, RC
-- | checknum cr0, TMP1
-- | checknum cr1, TMP2
-+ |.else
-+ | add TMP1, BASE, RB
-+ | add TMP2, BASE, RC
-+ | lwz CARG2, 4(TMP1)
-+ | lwz CARG4, 4(TMP2)
-+ |.endif
-+ | checknum cr0, CARG1
-+ | checknum cr1, CARG3
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | bge ->vmeta_arith_vv
- | blex pow
- | ins_next1
-+ |.if FPU
- | stfdx FARG1, BASE, RA
-+ |.else
-+ | stwux CARG1, RA, BASE
-+ | stw CARG2, 4(RA)
-+ |.endif
- | ins_next2
- break;
-
-@@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lp BASE, L->base
- | bne ->vmeta_binop
- | ins_next1
-+ |.if FPU
- | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
- | stfdx f0, BASE, RA
-+ |.else
-+ | lwzux TMP0, SAVE0, BASE
-+ | lwz TMP1, 4(SAVE0)
-+ | stwux TMP0, RA, BASE
-+ | stw TMP1, 4(RA)
-+ |.endif
- | ins_next2
- break;
-
-@@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_KNUM:
- | // RA = dst*8, RD = num_const*8
- | ins_next1
-+ |.if FPU
- | lfdx f0, KBASE, RD
- | stfdx f0, BASE, RA
-+ |.else
-+ | lwzux TMP0, RD, KBASE
-+ | lwz TMP1, 4(RD)
-+ | stwux TMP0, RA, BASE
-+ | stw TMP1, 4(RA)
-+ |.endif
- | ins_next2
- break;
- case BC_KPRI:
-@@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwzx UPVAL:RB, LFUNC:RB, RD
- | ins_next1
- | lwz TMP1, UPVAL:RB->v
-+ |.if FPU
- | lfd f0, 0(TMP1)
- | stfdx f0, BASE, RA
-+ |.else
-+ | lwz TMP2, 0(TMP1)
-+ | lwz TMP3, 4(TMP1)
-+ | stwux TMP2, RA, BASE
-+ | stw TMP3, 4(RA)
-+ |.endif
- | ins_next2
- break;
- case BC_USETV:
-@@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
-+ |.if FPU
- | lfdux f0, RD, BASE
-+ |.else
-+ | lwzux CARG1, RD, BASE
-+ | lwz CARG3, 4(RD)
-+ |.endif
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | lbz TMP3, UPVAL:RB->marked
- | lwz CARG2, UPVAL:RB->v
- | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
- | lbz TMP0, UPVAL:RB->closed
- | lwz TMP2, 0(RD)
-+ |.if FPU
- | stfd f0, 0(CARG2)
-+ |.else
-+ | stw CARG1, 0(CARG2)
-+ | stw CARG3, 4(CARG2)
-+ |.endif
- | cmplwi cr1, TMP0, 0
- | lwz TMP1, 4(RD)
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
-@@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
-+ |.if FPU
- | lfdx f0, KBASE, RD
-+ |.else
-+ | lwzux TMP2, RD, KBASE
-+ | lwz TMP3, 4(RD)
-+ |.endif
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | ins_next1
- | lwz TMP1, UPVAL:RB->v
-+ |.if FPU
- | stfd f0, 0(TMP1)
-+ |.else
-+ | stw TMP2, 0(TMP1)
-+ | stw TMP3, 4(TMP1)
-+ |.endif
- | ins_next2
- break;
- case BC_USETP:
-@@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.endif
- | ble ->vmeta_tgetv // Integer key and in array part?
- | lwzx TMP0, TMP1, TMP2
-+ |.if FPU
- | lfdx f14, TMP1, TMP2
-+ |.else
-+ | lwzux SAVE0, TMP1, TMP2
-+ | lwz SAVE1, 4(TMP1)
-+ |.endif
- | checknil TMP0; beq >2
- |1:
- | ins_next1
-+ |.if FPU
- | stfdx f14, BASE, RA
-+ |.else
-+ | stwux SAVE0, RA, BASE
-+ | stw SAVE1, 4(RA)
-+ |.endif
- | ins_next2
- |
- |2: // Check for __index if table value is nil.
-@@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
- | cmplw TMP0, TMP1; bge ->vmeta_tgetb
-+ |.if FPU
- | lwzx TMP1, TMP2, RC
- | lfdx f0, TMP2, RC
-+ |.else
-+ | lwzux TMP1, TMP2, RC
-+ | lwz TMP3, 4(TMP2)
-+ |.endif
- | checknil TMP1; beq >5
- |1:
- | ins_next1
-+ |.if FPU
- | stfdx f0, BASE, RA
-+ |.else
-+ | stwux TMP1, RA, BASE
-+ | stw TMP3, 4(RA)
-+ |.endif
- | ins_next2
- |
- |5: // Check for __index if table value is nil.
-@@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cmplw TMP0, CARG2
- | slwi TMP2, CARG2, 3
- | ble ->vmeta_tgetr // In array part?
-+ |.if FPU
- | lfdx f14, TMP1, TMP2
-+ |.else
-+ | lwzux SAVE0, TMP2, TMP1
-+ | lwz SAVE1, 4(TMP2)
-+ |.endif
- |->BC_TGETR_Z:
- | ins_next1
-+ |.if FPU
- | stfdx f14, BASE, RA
-+ |.else
-+ | stwux SAVE0, RA, BASE
-+ | stw SAVE1, 4(RA)
-+ |.endif
- | ins_next2
- break;
-
-@@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ble ->vmeta_tsetv // Integer key and in array part?
- | lwzx TMP2, TMP1, TMP0
- | lbz TMP3, TAB:RB->marked
-+ |.if FPU
- | lfdx f14, BASE, RA
-+ |.else
-+ | add SAVE1, BASE, RA
-+ | lwz SAVE0, 0(SAVE1)
-+ | lwz SAVE1, 4(SAVE1)
-+ |.endif
- | checknil TMP2; beq >3
- |1:
- | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
-+ |.if FPU
- | stfdx f14, TMP1, TMP0
-+ |.else
-+ | stwux SAVE0, TMP1, TMP0
-+ | stw SAVE1, 4(TMP1)
-+ |.endif
- | bne >7
- |2:
- | ins_next
-@@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz NODE:TMP2, TAB:RB->node
- | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
-+ |.if FPU
- | lfdx f14, BASE, RA
-+ |.else
-+ | add CARG2, BASE, RA
-+ | lwz SAVE0, 0(CARG2)
-+ | lwz SAVE1, 4(CARG2)
-+ |.endif
- | slwi TMP0, TMP1, 5
- | slwi TMP1, TMP1, 3
- | sub TMP1, TMP0, TMP1
-@@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | checknil CARG2; beq >4 // Key found, but nil value?
- |2:
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
-+ |.if FPU
- | stfd f14, NODE:TMP2->val
-+ |.else
-+ | stw SAVE0, NODE:TMP2->val.u32.hi
-+ | stw SAVE1, NODE:TMP2->val.u32.lo
-+ |.endif
- | bne >7
- |3:
- | ins_next
-@@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
- | // Returns TValue *.
- | lp BASE, L->base
-+ |.if FPU
- | stfd f14, 0(CRET1)
-+ |.else
-+ | stw SAVE0, 0(CRET1)
-+ | stw SAVE1, 4(CRET1)
-+ |.endif
- | b <3 // No 2nd write barrier needed.
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-@@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz TMP2, TAB:RB->array
- | lbz TMP3, TAB:RB->marked
- | cmplw TMP0, TMP1
-+ |.if FPU
- | lfdx f14, BASE, RA
-+ |.else
-+ | add CARG2, BASE, RA
-+ | lwz SAVE0, 0(CARG2)
-+ | lwz SAVE1, 4(CARG2)
-+ |.endif
- | bge ->vmeta_tsetb
- | lwzx TMP1, TMP2, RC
- | checknil TMP1; beq >5
- |1:
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
-+ |.if FPU
- | stfdx f14, TMP2, RC
-+ |.else
-+ | stwux SAVE0, RC, TMP2
-+ | stw SAVE1, 4(RC)
-+ |.endif
- | bne >7
- |2:
- | ins_next
-@@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |2:
- | cmplw TMP0, CARG3
- | slwi TMP2, CARG3, 3
-+ |.if FPU
- | lfdx f14, BASE, RA
-+ |.else
-+ | lwzux SAVE0, RA, BASE
-+ | lwz SAVE1, 4(RA)
-+ |.endif
- | ble ->vmeta_tsetr // In array part?
- | ins_next1
-+ |.if FPU
- | stfdx f14, TMP1, TMP2
-+ |.else
-+ | stwux SAVE0, TMP1, TMP2
-+ | stw SAVE1, 4(TMP1)
-+ |.endif
- | ins_next2
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-@@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add TMP1, TMP1, TMP0
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- |3: // Copy result slots to table.
-+ |.if FPU
- | lfd f0, 0(RA)
-+ |.else
-+ | lwz SAVE0, 0(RA)
-+ | lwz SAVE1, 4(RA)
-+ |.endif
- | addi RA, RA, 8
- | cmpw cr1, RA, TMP2
-+ |.if FPU
- | stfd f0, 0(TMP1)
-+ |.else
-+ | stw SAVE0, 0(TMP1)
-+ | stw SAVE1, 4(TMP1)
-+ |.endif
- | addi TMP1, TMP1, 8
- | blt cr1, <3
- | bne >7
-@@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | beq cr1, >3
- |2:
- | addi TMP3, TMP2, 8
-+ |.if FPU
- | lfdx f0, RA, TMP2
-+ |.else
-+ | add CARG3, RA, TMP2
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ |.endif
- | cmplw cr1, TMP3, NARGS8:RC
-+ |.if FPU
- | stfdx f0, BASE, TMP2
-+ |.else
-+ | stwux CARG1, TMP2, BASE
-+ | stw CARG2, 4(TMP2)
-+ |.endif
- | mr TMP2, TMP3
- | bne cr1, <2
- |3:
-@@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add BASE, BASE, RA
- | lwz TMP1, -24(BASE)
- | lwz LFUNC:RB, -20(BASE)
-+ |.if FPU
- | lfd f1, -8(BASE)
- | lfd f0, -16(BASE)
-+ |.else
-+ | lwz CARG1, -8(BASE)
-+ | lwz CARG2, -4(BASE)
-+ | lwz CARG3, -16(BASE)
-+ | lwz CARG4, -12(BASE)
-+ |.endif
- | stw TMP1, 0(BASE) // Copy callable.
- | stw LFUNC:RB, 4(BASE)
- | checkfunc TMP1
-- | stfd f1, 16(BASE) // Copy control var.
- | li NARGS8:RC, 16 // Iterators get 2 arguments.
-+ |.if FPU
-+ | stfd f1, 16(BASE) // Copy control var.
- | stfdu f0, 8(BASE) // Copy state.
-+ |.else
-+ | stw CARG1, 16(BASE) // Copy control var.
-+ | stw CARG2, 20(BASE)
-+ | stwu CARG3, 8(BASE) // Copy state.
-+ | stw CARG4, 4(BASE)
-+ |.endif
- | bne ->vmeta_call
- | ins_call
- break;
-@@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi TMP3, RC, 3
- | bge >5 // Index points after array part?
- | lwzx TMP2, TMP1, TMP3
-+ |.if FPU
- | lfdx f0, TMP1, TMP3
-+ |.else
-+ | lwzux CARG1, TMP3, TMP1
-+ | lwz CARG2, 4(TMP3)
-+ |.endif
- | checknil TMP2
- | lwz INS, -4(PC)
- | beq >4
-@@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.endif
- | addi RC, RC, 1
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
-+ |.if FPU
- | stfd f0, 8(RA)
-+ |.else
-+ | stw CARG1, 8(RA)
-+ | stw CARG2, 12(RA)
-+ |.endif
- | decode_RD4 TMP1, INS
- | stw RC, -4(RA) // Update control var.
- | add PC, TMP1, TMP3
-@@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi RB, RC, 3
- | sub TMP3, TMP3, RB
- | lwzx RB, TMP2, TMP3
-+ |.if FPU
- | lfdx f0, TMP2, TMP3
-+ |.else
-+ | add CARG3, TMP2, TMP3
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ |.endif
- | add NODE:TMP3, TMP2, TMP3
- | checknil RB
- | lwz INS, -4(PC)
- | beq >7
-+ |.if FPU
- | lfd f1, NODE:TMP3->key
-+ |.else
-+ | lwz CARG3, NODE:TMP3->key.u32.hi
-+ | lwz CARG4, NODE:TMP3->key.u32.lo
-+ |.endif
- | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
-+ |.if FPU
- | stfd f0, 8(RA)
-+ |.else
-+ | stw CARG1, 8(RA)
-+ | stw CARG2, 12(RA)
-+ |.endif
- | add RC, RC, TMP0
- | decode_RD4 TMP1, INS
-+ |.if FPU
- | stfd f1, 0(RA)
-+ |.else
-+ | stw CARG3, 0(RA)
-+ | stw CARG4, 4(RA)
-+ |.endif
- | addi RC, RC, 1
- | add PC, TMP1, TMP2
- | stw RC, -4(RA) // Update control var.
-@@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | subi TMP2, TMP2, 16
- | ble >2 // No vararg slots?
- |1: // Copy vararg slots to destination slots.
-+ |.if FPU
- | lfd f0, 0(RC)
-+ |.else
-+ | lwz CARG1, 0(RC)
-+ | lwz CARG2, 4(RC)
-+ |.endif
- | addi RC, RC, 8
-+ |.if FPU
- | stfd f0, 0(RA)
-+ |.else
-+ | stw CARG1, 0(RA)
-+ | stw CARG2, 4(RA)
-+ |.endif
- | cmplw RA, TMP2
- | cmplw cr1, RC, TMP3
- | bge >3 // All destination slots filled?
-@@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addi MULTRES, TMP1, 8
- | bgt >7
- |6:
-+ |.if FPU
- | lfd f0, 0(RC)
-+ |.else
-+ | lwz CARG1, 0(RC)
-+ | lwz CARG2, 4(RC)
-+ |.endif
- | addi RC, RC, 8
-+ |.if FPU
- | stfd f0, 0(RA)
-+ |.else
-+ | stw CARG1, 0(RA)
-+ | stw CARG2, 4(RA)
-+ |.endif
- | cmplw RC, TMP3
- | addi RA, RA, 8
- | blt <6 // More vararg slots?
-@@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | li TMP1, 0
- |2:
- | addi TMP3, TMP1, 8
-+ |.if FPU
- | lfdx f0, RA, TMP1
-+ |.else
-+ | add CARG3, RA, TMP1
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ |.endif
- | cmpw TMP3, RC
-+ |.if FPU
- | stfdx f0, TMP2, TMP1
-+ |.else
-+ | add CARG3, TMP2, TMP1
-+ | stw CARG1, 0(CARG3)
-+ | stw CARG2, 4(CARG3)
-+ |.endif
- | beq >3
- | addi TMP1, TMP3, 8
-+ |.if FPU
- | lfdx f1, RA, TMP3
-+ |.else
-+ | add CARG3, RA, TMP3
-+ | lwz CARG1, 0(CARG3)
-+ | lwz CARG2, 4(CARG3)
-+ |.endif
- | cmpw TMP1, RC
-+ |.if FPU
- | stfdx f1, TMP2, TMP3
-+ |.else
-+ | add CARG3, TMP2, TMP3
-+ | stw CARG1, 0(CARG3)
-+ | stw CARG2, 4(CARG3)
-+ |.endif
- | bne <2
- |3:
- |5:
-@@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | subi TMP2, BASE, 8
- | decode_RB8 RB, INS
- if (op == BC_RET1) {
-+ |.if FPU
- | lfd f0, 0(RA)
- | stfd f0, 0(TMP2)
-+ |.else
-+ | lwz CARG1, 0(RA)
-+ | lwz CARG2, 4(RA)
-+ | stw CARG1, 0(TMP2)
-+ | stw CARG2, 4(TMP2)
-+ |.endif
- }
- |5:
- | cmplw RB, RD
-@@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |4:
- | stw CARG1, FORL_IDX*8+4(RA)
- } else {
-- | lwz TMP3, FORL_STEP*8(RA)
-+ | lwz SAVE0, FORL_STEP*8(RA)
- | lwz CARG3, FORL_STEP*8+4(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | lwz CARG2, FORL_STOP*8+4(RA)
-- | cmplw cr7, TMP3, TISNUM
-+ | cmplw cr7, SAVE0, TISNUM
- | cmplw cr1, TMP2, TISNUM
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
-@@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- if (vk) {
- |.if DUALNUM
- |9: // FP loop.
-+ |.if FPU
- | lfd f1, FORL_IDX*8(RA)
- |.else
-+ | lwz CARG1, FORL_IDX*8(RA)
-+ | lwz CARG2, FORL_IDX*8+4(RA)
-+ |.endif
-+ |.else
- | lfdux f1, RA, BASE
- |.endif
-+ |.if FPU
- | lfd f3, FORL_STEP*8(RA)
- | lfd f2, FORL_STOP*8(RA)
-- | lwz TMP3, FORL_STEP*8(RA)
- | fadd f1, f1, f3
- | stfd f1, FORL_IDX*8(RA)
-+ |.else
-+ | lwz CARG3, FORL_STEP*8(RA)
-+ | lwz CARG4, FORL_STEP*8+4(RA)
-+ | mr SAVE1, RD
-+ | blex __adddf3
-+ | mr RD, SAVE1
-+ | stw CRET1, FORL_IDX*8(RA)
-+ | stw CRET2, FORL_IDX*8+4(RA)
-+ | lwz CARG3, FORL_STOP*8(RA)
-+ | lwz CARG4, FORL_STOP*8+4(RA)
-+ |.endif
-+ | lwz SAVE0, FORL_STEP*8(RA)
- } else {
- |.if DUALNUM
- |9: // FP loop.
- |.else
- | lwzux TMP1, RA, BASE
-- | lwz TMP3, FORL_STEP*8(RA)
-+ | lwz SAVE0, FORL_STEP*8(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | cmplw cr0, TMP1, TISNUM
-- | cmplw cr7, TMP3, TISNUM
-+ | cmplw cr7, SAVE0, TISNUM
- | cmplw cr1, TMP2, TISNUM
- |.endif
-+ |.if FPU
- | lfd f1, FORL_IDX*8(RA)
-+ |.else
-+ | lwz CARG1, FORL_IDX*8(RA)
-+ | lwz CARG2, FORL_IDX*8+4(RA)
-+ |.endif
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
-+ |.if FPU
- | lfd f2, FORL_STOP*8(RA)
-+ |.else
-+ | lwz CARG3, FORL_STOP*8(RA)
-+ | lwz CARG4, FORL_STOP*8+4(RA)
-+ |.endif
- | bge ->vmeta_for
- }
-- | cmpwi cr6, TMP3, 0
-+ | cmpwi cr6, SAVE0, 0
- if (op != BC_JFORL) {
- | srwi RD, RD, 1
- }
-+ |.if FPU
- | stfd f1, FORL_EXT*8(RA)
-+ |.else
-+ | stw CARG1, FORL_EXT*8(RA)
-+ | stw CARG2, FORL_EXT*8+4(RA)
-+ |.endif
- if (op != BC_JFORL) {
- | add RD, PC, RD
- }
-+ |.if FPU
- | fcmpu cr0, f1, f2
-+ |.else
-+ | mr SAVE1, RD
-+ | blex __ledf2
-+ | cmpwi CRET1, 0
-+ | mr RD, SAVE1
-+ |.endif
- if (op == BC_JFORI) {
- | addis PC, RD, -(BCBIAS_J*4 >> 16)
- }
---
-2.20.1
-
diff --git a/0011-Use-https-for-freelists.org-links.patch
b/0011-Use-https-for-freelists.org-links.patch
deleted file mode 100644
index c0c2a19..0000000
--- a/0011-Use-https-for-freelists.org-links.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From f3d75075ed91137699c6071abe49e2252e794a9c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Fri, 18 Aug 2017 12:52:14 +0200
-Subject: [PATCH 11/72] Use https for
freelists.org links.
-
----
- doc/ext_ffi_semantics.html | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
-index 899640c..ae3c037 100644
---- a/doc/ext_ffi_semantics.html
-+++ b/doc/ext_ffi_semantics.html
-@@ -844,7 +844,7 @@ place of a type, you'd need to use
<tt>ffi.typeof("int")</tt> instead.
- <p>
- The main use for parameterized types are libraries implementing abstract
- data types
--(<a
href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8&...
class="ext">»</span> example</a>),
-+(<a
href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8...>),
- similar to what can be achieved with C++ template metaprogramming.
- Another use case are derived types of anonymous structs, which avoids
- pollution of the global struct namespace.
---
-2.20.1
-
diff --git a/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch
b/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch
deleted file mode 100644
index 80ca5b0..0000000
--- a/0012-x64-LJ_GC64-Fix-fallback-case-of-asm_fuseloadk64.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From 6b0824852677cc12570c20a3211fbfe0e4f0ce14 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 28 Aug 2017 10:43:37 +0200
-Subject: [PATCH 12/72] x64/LJ_GC64: Fix fallback case of asm_fuseloadk64().
-
-Contributed by Peter Cawley.
----
- src/lj_asm_x86.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
-index 3e189b1..55c02d2 100644
---- a/src/lj_asm_x86.h
-+++ b/src/lj_asm_x86.h
-@@ -387,6 +387,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
- ir->i = (int32_t)(as->mctop - as->mcbot);
- as->mcbot += 8;
- as->mclim = as->mcbot + MCLIM_REDZONE;
-+ lj_mcode_commitbot(as->J, as->mcbot);
- }
- as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
- as->mrm.base = RID_RIP;
---
-2.20.1
-
diff --git a/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch
b/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch
deleted file mode 100644
index faaa94a..0000000
--- a/0013-PPC-Add-soft-float-support-to-JIT-compiler-backend.patch
+++ /dev/null
@@ -1,751 +0,0 @@
-From 71b7bc88341945f13f3951e2bb5fd247b639ff7a Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 3 Sep 2017 23:20:53 +0200
-Subject: [PATCH 13/72] PPC: Add soft-float support to JIT compiler backend.
-
-Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
-Sponsored by Cisco Systems, Inc.
----
- src/lj_arch.h | 1 -
- src/lj_asm_ppc.h | 321 ++++++++++++++++++++++++++++++++++++++++-------
- 2 files changed, 278 insertions(+), 44 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 0145a7c..5962f3a 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -273,7 +273,6 @@
- #endif
-
- #if LJ_ABI_SOFTFP
--#define LJ_ARCH_NOJIT 1 /* NYI */
- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
- #else
- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
-diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
-index 6daa861..1955429 100644
---- a/src/lj_asm_ppc.h
-+++ b/src/lj_asm_ppc.h
-@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef
ref,
- emit_tab(as, pi, rt, left, right);
- }
-
-+#if !LJ_SOFTFP
- /* Fuse to multiply-add/sub instruction. */
- static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
- {
-@@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns
pir)
- }
- return 0;
- }
-+#endif
-
- /* -- Calls --------------------------------------------------------------- */
-
-@@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- {
- uint32_t n, nargs = CCI_XNARGS(ci);
- int32_t ofs = 8;
-- Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
-+ Reg gpr = REGARG_FIRSTGPR;
-+#if !LJ_SOFTFP
-+ Reg fpr = REGARG_FIRSTFPR;
-+#endif
- if ((void *)ci->func)
- emit_call(as, (void *)ci->func);
- for (n = 0; n < nargs; n++) { /* Setup args. */
- IRRef ref = args[n];
- if (ref) {
- IRIns *ir = IR(ref);
-+#if !LJ_SOFTFP
- if (irt_isfp(ir->t)) {
- if (fpr <= REGARG_LASTFPR) {
- lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
-@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- emit_spstore(as, ir, r, ofs);
- ofs += irt_isnum(ir->t) ? 8 : 4;
- }
-- } else {
-+ } else
-+#endif
-+ {
- if (gpr <= REGARG_LASTGPR) {
- lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
- ra_leftov(as, gpr, ref);
-@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
- }
- checkmclim(as);
- }
-+#if !LJ_SOFTFP
- if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
- emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
-+#endif
- }
-
- /* Setup result reg/sp for call. Evict scratch regs. */
-@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
- {
- RegSet drop = RSET_SCRATCH;
- int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
-+#if !LJ_SOFTFP
- if ((ci->flags & CCI_NOFPRCLOBBER))
- drop &= ~RSET_FPR;
-+#endif
- if (ra_hasreg(ir->r))
- rset_clear(drop, ir->r); /* Dest reg handled below. */
- if (hiop && ra_hasreg((ir+1)->r))
-@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
- ra_evictset(as, drop); /* Evictions must be performed first. */
- if (ra_used(ir)) {
- lua_assert(!irt_ispri(ir->t));
-- if (irt_isfp(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
- if ((ci->flags & CCI_CASTU64)) {
- /* Use spill slot or temp slots. */
- int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
-@@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
-
- /* -- Type conversions ---------------------------------------------------- */
-
-+#if !LJ_SOFTFP
- static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
- {
- RegSet allow = RSET_FPR;
-@@ -409,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir)
- emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
- emit_fab(as, PPCI_FADD, tmp, left, right);
- }
-+#endif
-
- static void asm_conv(ASMState *as, IRIns *ir)
- {
- IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
-+#if !LJ_SOFTFP
- int stfp = (st == IRT_NUM || st == IRT_FLOAT);
-+#endif
- IRRef lref = ir->op1;
-- lua_assert(irt_type(ir->t) != st);
- lua_assert(!(irt_isint64(ir->t) ||
- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
-+#if LJ_SOFTFP
-+ /* FP conversions are handled by SPLIT. */
-+ lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
-+ /* Can't check for same types: SPLIT uses CONV
int.int + BXOR for sfp NEG. */
-+#else
-+ lua_assert(irt_type(ir->t) != st);
- if (irt_isfp(ir->t)) {
- Reg dest = ra_dest(as, ir, RSET_FPR);
- if (stfp) { /* FP to FP conversion. */
-@@ -476,7 +497,9 @@ static void asm_conv(ASMState *as, IRIns *ir)
- emit_fb(as, PPCI_FCTIWZ, tmp, left);
- }
- }
-- } else {
-+ } else
-+#endif
-+ {
- Reg dest = ra_dest(as, ir, RSET_GPR);
- if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
- Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
-@@ -496,17 +519,41 @@ static void asm_strto(ASMState *as, IRIns *ir)
- {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
- IRRef args[2];
-- int32_t ofs;
-+ int32_t ofs = SPOFS_TMP;
-+#if LJ_SOFTFP
-+ ra_evictset(as, RSET_SCRATCH);
-+ if (ra_used(ir)) {
-+ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
-+ (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
-+ int i;
-+ for (i = 0; i < 2; i++) {
-+ Reg r = (ir+i)->r;
-+ if (ra_hasreg(r)) {
-+ ra_free(as, r);
-+ ra_modified(as, r);
-+ emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
-+ }
-+ }
-+ ofs = sps_scale(ir->s & ~1);
-+ } else {
-+ Reg rhi = ra_dest(as, ir+1, RSET_GPR);
-+ Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
-+ emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
-+ emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
-+ }
-+ }
-+#else
- RegSet drop = RSET_SCRATCH;
- if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
- ra_evictset(as, drop);
-+ if (ir->s) ofs = sps_scale(ir->s);
-+#endif
- asm_guardcc(as, CC_EQ);
- emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
- args[0] = ir->op1; /* GCstr *str */
- args[1] = ASMREF_TMP1; /* TValue *n */
- asm_gencall(as, ci, args);
- /* Store the result to the spill slot or temp slots. */
-- ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
- emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
- }
-
-@@ -530,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
- Reg src = ra_alloc1(as, ref, allow);
- emit_setgl(as, src, tmptv.gcr);
- }
-- type = ra_allock(as, irt_toitype(ir->t), allow);
-+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-+ type = ra_alloc1(as, ref+1, allow);
-+ else
-+ type = ra_allock(as, irt_toitype(ir->t), allow);
- emit_setgl(as, type, tmptv.it);
- }
- }
-@@ -574,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- Reg tisnum = RID_NONE, tmpnum = RID_NONE;
- IRRef refkey = ir->op2;
- IRIns *irkey = IR(refkey);
-+ int isk = irref_isk(refkey);
- IRType1 kt = irkey->t;
- uint32_t khash;
- MCLabel l_end, l_loop, l_next;
-
- rset_clear(allow, tab);
-+#if LJ_SOFTFP
-+ if (!isk) {
-+ key = ra_alloc1(as, refkey, allow);
-+ rset_clear(allow, key);
-+ if (irkey[1].o == IR_HIOP) {
-+ if (ra_hasreg((irkey+1)->r)) {
-+ tmpnum = (irkey+1)->r;
-+ ra_noweak(as, tmpnum);
-+ } else {
-+ tmpnum = ra_allocref(as, refkey+1, allow);
-+ }
-+ rset_clear(allow, tmpnum);
-+ }
-+ }
-+#else
- if (irt_isnum(kt)) {
- key = ra_alloc1(as, refkey, RSET_FPR);
- tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
-@@ -588,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- key = ra_alloc1(as, refkey, allow);
- rset_clear(allow, key);
- }
-+#endif
- tmp2 = ra_scratch(as, allow);
- rset_clear(allow, tmp2);
-
-@@ -610,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- asm_guardcc(as, CC_EQ);
- else
- emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
-- if (irt_isnum(kt)) {
-+ if (!LJ_SOFTFP && irt_isnum(kt)) {
- emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
- emit_condbranch(as, PPCI_BC, CC_GE, l_next);
- emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
-@@ -620,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_ab(as, PPCI_CMPW, tmp2, key);
- emit_condbranch(as, PPCI_BC, CC_NE, l_next);
- }
-- emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
-+ if (LJ_SOFTFP && ra_hasreg(tmpnum))
-+ emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
-+ else
-+ emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
- if (!irt_ispri(kt))
- emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
- }
-@@ -629,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- (((char *)as->mcp-(char *)l_loop) & 0xffffu);
-
- /* Load main position relative to tab->node into dest. */
-- khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
-+ khash = isk ? ir_khash(irkey) : 1;
- if (khash == 0) {
- emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
- } else {
- Reg tmphash = tmp1;
-- if (irref_isk(refkey))
-+ if (isk)
- tmphash = ra_allock(as, khash, allow);
- emit_tab(as, PPCI_ADD, dest, dest, tmp1);
- emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
- emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
- emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
- emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
-- if (irref_isk(refkey)) {
-+ if (isk) {
- /* Nothing to do. */
- } else if (irt_isstr(kt)) {
- emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
-@@ -651,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
- emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
- emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
-- if (irt_isnum(kt)) {
-+ if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
-+#if LJ_SOFTFP
-+ emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
-+ emit_rotlwi(as, dest, tmp1, HASH_ROT1);
-+ emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
-+#else
- int32_t ofs = ra_spill(as, irkey);
- emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
- emit_rotlwi(as, dest, tmp1, HASH_ROT1);
- emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
- emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
- emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
-+#endif
- } else {
- emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
- emit_rotlwi(as, dest, tmp1, HASH_ROT1);
-@@ -784,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir)
- case IRT_U8: return PPCI_LBZ;
- case IRT_I16: return PPCI_LHA;
- case IRT_U16: return PPCI_LHZ;
-- case IRT_NUM: return PPCI_LFD;
-- case IRT_FLOAT: return PPCI_LFS;
-+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD;
-+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
- default: return PPCI_LWZ;
- }
- }
-@@ -795,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir)
- switch (irt_type(ir->t)) {
- case IRT_I8: case IRT_U8: return PPCI_STB;
- case IRT_I16: case IRT_U16: return PPCI_STH;
-- case IRT_NUM: return PPCI_STFD;
-- case IRT_FLOAT: return PPCI_STFS;
-+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD;
-+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
- default: return PPCI_STW;
- }
- }
-@@ -839,7 +915,8 @@ static void asm_fstore(ASMState *as, IRIns *ir)
-
- static void asm_xload(ASMState *as, IRIns *ir)
- {
-- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
-+ Reg dest = ra_dest(as, ir,
-+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
- if (irt_isi8(ir->t))
- emit_as(as, PPCI_EXTSB, dest, dest);
-@@ -857,7 +934,8 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
- Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
- asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
- } else {
-- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
-+ Reg src = ra_alloc1(as, ir->op2,
-+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
- rset_exclude(RSET_GPR, src), ofs);
- }
-@@ -871,10 +949,19 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
- Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
- RegSet allow = RSET_GPR;
- int32_t ofs = AHUREF_LSX;
-+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
-+ t.irt = IRT_NUM;
-+ if (ra_used(ir+1)) {
-+ type = ra_dest(as, ir+1, allow);
-+ rset_clear(allow, type);
-+ }
-+ ofs = 0;
-+ }
- if (ra_used(ir)) {
-- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
-- if (!irt_isnum(t)) ofs = 0;
-- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
-+ lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
-+ irt_isint(ir->t) || irt_isaddr(ir->t));
-+ if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
-+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
- rset_clear(allow, dest);
- }
- idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
-@@ -883,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
- asm_guardcc(as, CC_GE);
- emit_ab(as, PPCI_CMPLW, type, tisnum);
- if (ra_hasreg(dest)) {
-- if (ofs == AHUREF_LSX) {
-+ if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
- tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
- (idx&255)), (idx>>8)));
- emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
- } else {
-- emit_fai(as, PPCI_LFD, dest, idx, ofs);
-+ emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
-+ ofs+4*LJ_SOFTFP);
- }
- }
- } else {
-@@ -911,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
- int32_t ofs = AHUREF_LSX;
- if (ir->r == RID_SINK)
- return;
-- if (irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
- src = ra_alloc1(as, ir->op2, RSET_FPR);
- } else {
- if (!irt_ispri(ir->t)) {
-@@ -919,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
- rset_clear(allow, src);
- ofs = 0;
- }
-- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
-+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-+ type = ra_alloc1(as, (ir+1)->op2, allow);
-+ else
-+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
- rset_clear(allow, type);
- }
- idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
-- if (irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
- if (ofs == AHUREF_LSX) {
- emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
- emit_slwi(as, RID_TMP, (idx>>8), 3);
-@@ -948,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir)
- IRType1 t = ir->t;
- Reg dest = RID_NONE, type = RID_NONE, base;
- RegSet allow = RSET_GPR;
-+ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
-+ if (hiop)
-+ t.irt = IRT_NUM;
- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
-- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
-+ lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
- lua_assert(LJ_DUALNUM ||
- !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
-+#if LJ_SOFTFP
-+ lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
-+ if (hiop && ra_used(ir+1)) {
-+ type = ra_dest(as, ir+1, allow);
-+ rset_clear(allow, type);
-+ }
-+#else
- if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) &&
irt_isint(t)) {
- dest = ra_scratch(as, RSET_FPR);
- asm_tointg(as, ir, dest);
- t.irt = IRT_NUM; /* Continue with a regular number type check. */
-- } else if (ra_used(ir)) {
-+ } else
-+#endif
-+ if (ra_used(ir)) {
- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
-- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
-+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
- rset_clear(allow, dest);
- base = ra_alloc1(as, REF_BASE, allow);
- rset_clear(allow, base);
-- if ((ir->op2 & IRSLOAD_CONVERT)) {
-+ if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
- if (irt_isint(t)) {
- emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
- dest = ra_scratch(as, RSET_FPR);
-@@ -994,10 +1097,13 @@ dotypecheck:
- if ((ir->op2 & IRSLOAD_TYPECHECK)) {
- Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
- asm_guardcc(as, CC_GE);
-- emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
-+#if !LJ_SOFTFP
- type = RID_TMP;
-+#endif
-+ emit_ab(as, PPCI_CMPLW, type, tisnum);
- }
-- if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
-+ if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
-+ base, ofs-(LJ_SOFTFP?0:4));
- } else {
- if ((ir->op2 & IRSLOAD_TYPECHECK)) {
- asm_guardcc(as, CC_NE);
-@@ -1119,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
-
- /* -- Arithmetic and logic operations ------------------------------------- */
-
-+#if !LJ_SOFTFP
- static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
- {
- Reg dest = ra_dest(as, ir, RSET_FPR);
-@@ -1146,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
- else
- asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
- }
-+#endif
-
- static void asm_add(ASMState *as, IRIns *ir)
- {
-+#if !LJ_SOFTFP
- if (irt_isnum(ir->t)) {
- if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
- asm_fparith(as, ir, PPCI_FADD);
-- } else {
-+ } else
-+#endif
-+ {
- Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
- PPCIns pi;
-@@ -1191,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir)
-
- static void asm_sub(ASMState *as, IRIns *ir)
- {
-+#if !LJ_SOFTFP
- if (irt_isnum(ir->t)) {
- if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
- asm_fparith(as, ir, PPCI_FSUB);
-- } else {
-+ } else
-+#endif
-+ {
- PPCIns pi = PPCI_SUBF;
- Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg left, right;
-@@ -1220,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
-
- static void asm_mul(ASMState *as, IRIns *ir)
- {
-+#if !LJ_SOFTFP
- if (irt_isnum(ir->t)) {
- asm_fparith(as, ir, PPCI_FMUL);
-- } else {
-+ } else
-+#endif
-+ {
- PPCIns pi = PPCI_MULLW;
- Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
-@@ -1250,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns *ir)
-
- static void asm_neg(ASMState *as, IRIns *ir)
- {
-+#if !LJ_SOFTFP
- if (irt_isnum(ir->t)) {
- asm_fpunary(as, ir, PPCI_FNEG);
-- } else {
-+ } else
-+#endif
-+ {
- Reg dest, left;
- PPCIns pi = PPCI_NEG;
- if (as->flagmcp == as->mcp) {
-@@ -1563,9 +1683,40 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi,
PPCIns pik)
- PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
- #define asm_bror(as, ir) lua_assert(0)
-
-+#if LJ_SOFTFP
-+static void asm_sfpmin_max(ASMState *as, IRIns *ir)
-+{
-+ CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
-+ IRRef args[4];
-+ MCLabel l_right, l_end;
-+ Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
-+ Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
-+ Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
-+ PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
-+ righthi = (lefthi >> 8); lefthi &= 255;
-+ rightlo = (leftlo >> 8); leftlo &= 255;
-+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
-+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
-+ l_end = emit_label(as);
-+ if (desthi != righthi) emit_mr(as, desthi, righthi);
-+ if (destlo != rightlo) emit_mr(as, destlo, rightlo);
-+ l_right = emit_label(as);
-+ if (l_end != l_right) emit_jmp(as, l_end);
-+ if (desthi != lefthi) emit_mr(as, desthi, lefthi);
-+ if (destlo != leftlo) emit_mr(as, destlo, leftlo);
-+ if (l_right == as->mcp+1) {
-+ cond ^= 4; l_right = l_end; ++as->mcp;
-+ }
-+ emit_condbranch(as, PPCI_BC, cond, l_right);
-+ ra_evictset(as, RSET_SCRATCH);
-+ emit_cmpi(as, RID_RET, 1);
-+ asm_gencall(as, &ci, args);
-+}
-+#endif
-+
- static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
- {
-- if (irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
- Reg dest = ra_dest(as, ir, RSET_FPR);
- Reg tmp = dest;
- Reg right, left = ra_alloc2(as, ir, RSET_FPR);
-@@ -1653,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg
cr, PPCCC cc)
- static void asm_comp(ASMState *as, IRIns *ir)
- {
- PPCCC cc = asm_compmap[ir->o];
-- if (irt_isnum(ir->t)) {
-+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
- Reg right, left = ra_alloc2(as, ir, RSET_FPR);
- right = (left >> 8); left &= 255;
- asm_guardcc(as, (cc >> 4));
-@@ -1674,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns *ir)
-
- #define asm_equal(as, ir) asm_comp(as, ir)
-
-+#if LJ_SOFTFP
-+/* SFP comparisons. */
-+static void asm_sfpcomp(ASMState *as, IRIns *ir)
-+{
-+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
-+ RegSet drop = RSET_SCRATCH;
-+ Reg r;
-+ IRRef args[4];
-+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
-+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
-+
-+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
-+ if (!rset_test(as->freeset, r) &&
-+ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
-+ rset_clear(drop, r);
-+ }
-+ ra_evictset(as, drop);
-+ asm_setupresult(as, ir, ci);
-+ switch ((IROp)ir->o) {
-+ case IR_ULT:
-+ asm_guardcc(as, CC_EQ);
-+ emit_ai(as, PPCI_CMPWI, RID_RET, 0);
-+ case IR_ULE:
-+ asm_guardcc(as, CC_EQ);
-+ emit_ai(as, PPCI_CMPWI, RID_RET, 1);
-+ break;
-+ case IR_GE: case IR_GT:
-+ asm_guardcc(as, CC_EQ);
-+ emit_ai(as, PPCI_CMPWI, RID_RET, 2);
-+ default:
-+ asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
-+ emit_ai(as, PPCI_CMPWI, RID_RET, 0);
-+ break;
-+ }
-+ asm_gencall(as, ci, args);
-+}
-+#endif
-+
- #if LJ_HASFFI
- /* 64 bit integer comparisons. */
- static void asm_comp64(ASMState *as, IRIns *ir)
-@@ -1703,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRIns *ir)
- /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
- static void asm_hiop(ASMState *as, IRIns *ir)
- {
--#if LJ_HASFFI
-+#if LJ_HASFFI || LJ_SOFTFP
- /* HIOP is marked as a store because it needs its own DCE logic. */
- int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
- if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
- if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
- as->curins--; /* Always skip the CONV. */
-+#if LJ_HASFFI && !LJ_SOFTFP
- if (usehi || uselo)
- asm_conv64(as, ir);
- return;
-+#endif
- } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
- as->curins--; /* Always skip the loword comparison. */
-+#if LJ_SOFTFP
-+ if (!irt_isint(ir->t)) {
-+ asm_sfpcomp(as, ir-1);
-+ return;
-+ }
-+#endif
-+#if LJ_HASFFI
- asm_comp64(as, ir);
-+#endif
-+ return;
-+#if LJ_SOFTFP
-+ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
-+ as->curins--; /* Always skip the loword min/max. */
-+ if (uselo || usehi)
-+ asm_sfpmin_max(as, ir-1);
- return;
-+#endif
- } else if ((ir-1)->o == IR_XSTORE) {
- as->curins--; /* Handle both stores here. */
- if ((ir-1)->r != RID_SINK) {
-@@ -1726,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns *ir)
- }
- if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
- switch ((ir-1)->o) {
-+#if LJ_HASFFI
- case IR_ADD: as->curins--; asm_add64(as, ir); break;
- case IR_SUB: as->curins--; asm_sub64(as, ir); break;
- case IR_NEG: as->curins--; asm_neg64(as, ir); break;
-+#endif
-+#if LJ_SOFTFP
-+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
-+ case IR_STRTO:
-+ if (!uselo)
-+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
-+ break;
-+#endif
- case IR_CALLN:
-+ case IR_CALLS:
- case IR_CALLXS:
- if (!uselo)
- ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
- break;
-+#if LJ_SOFTFP
-+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
-+#endif
- case IR_CNEWI:
- /* Nothing to do here. Handled by lo op itself. */
- break;
-@@ -1797,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
- if ((sn & SNAP_NORESTORE))
- continue;
- if (irt_isnum(ir->t)) {
-+#if LJ_SOFTFP
-+ Reg tmp;
-+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
-+ lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
-+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
-+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
-+ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
-+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
-+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
-+#else
- Reg src = ra_alloc1(as, ref, RSET_FPR);
- emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
-+#endif
- } else {
- Reg type;
- RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
-@@ -1811,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
- if ((sn & (SNAP_CONT|SNAP_FRAME))) {
- if (s == 0) continue; /* Do not overwrite link to previous frame. */
- type = ra_allock(as, (int32_t)(*flinks--), allow);
-+#if LJ_SOFTFP
-+ } else if ((sn & SNAP_SOFTFPNUM)) {
-+ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
-+#endif
- } else {
- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
- }
-@@ -1947,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const
CCallInfo *ci)
- int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
- asm_collectargs(as, ir, ci, args);
- for (i = 0; i < nargs; i++)
-- if (args[i] && irt_isfp(IR(args[i])->t)) {
-+ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
- if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
- } else {
- if (ngpr > 0) ngpr--; else nslots++;
- }
- if (nslots > as->evenspill) /* Leave room for args in stack slots. */
- as->evenspill = nslots;
-- return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
-+ return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
-+ REGSP_HINT(RID_RET);
- }
-
- static void asm_setup_target(ASMState *as)
---
-2.20.1
-
diff --git a/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch
b/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch
deleted file mode 100644
index 7e9dd8a..0000000
--- a/0014-x64-LJ_GC64-Fix-type-check-only-variant-of-SLOAD.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 05fbdf565c700365d22e38f11478101a0d92a23e Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 10 Sep 2017 14:05:30 +0200
-Subject: [PATCH 14/72] x64/LJ_GC64: Fix type-check-only variant of SLOAD.
-
-Thanks to Peter Cawley.
----
- src/lj_asm_x86.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
-index 55c02d2..af54dc7 100644
---- a/src/lj_asm_x86.h
-+++ b/src/lj_asm_x86.h
-@@ -1759,7 +1759,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
- emit_i8(as, irt_toitype(t));
- emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
- emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
-- emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4);
-+ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs);
- #else
- } else {
- emit_i8(as, irt_toitype(t));
---
-2.20.1
-
diff --git a/0015-MIPS64-Hide-internal-function.patch
b/0015-MIPS64-Hide-internal-function.patch
deleted file mode 100644
index 0e2f4fd..0000000
--- a/0015-MIPS64-Hide-internal-function.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From bf12f1dafb157008b963f829b57b2472b6993cc8 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 18 Sep 2017 09:50:22 +0200
-Subject: [PATCH 15/72] MIPS64: Hide internal function.
-
----
- src/lj_ccall.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index 799be48..25e938c 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -848,7 +848,8 @@ noth: /* Not a homogeneous float/double aggregate. */
- return 0; /* Struct is in GPRs. */
- }
-
--void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft)
-+static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
-+ int ft)
- {
- if (LJ_ABI_SOFTFP ? ft :
- ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
---
-2.20.1
-
diff --git a/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch
b/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch
deleted file mode 100644
index 66f5bf0..0000000
--- a/0016-DynASM-x86-Fix-potential-REL_A-overflow.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-commit 6a2d8b0b4d49eb5aac600c219e5903420806e56e
-Merge: bf12f1d 0c0e7b1
-Author: Mike Pall <mike>
-Date: Wed Sep 20 19:42:34 2017 +0200
-
- Merge branch 'master' into v2.1
-
-From 0c0e7b168ea147866835954267c151ef789f64fb Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 20 Sep 2017 19:39:50 +0200
-Subject: [PATCH 16/72] DynASM/x86: Fix potential REL_A overflow.
-
-Thanks to Joshua Haberman.
----
- dynasm/dasm_x86.h | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
-index 90dc5d1..f9260b0 100644
---- a/dynasm/dasm_x86.h
-+++ b/dynasm/dasm_x86.h
-@@ -395,7 +395,8 @@ int dasm_encode(Dst_DECL, void *buffer)
- }
- case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
- b++; n = (int)(ptrdiff_t)D->globals[-n];
-- case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
-+ case DASM_REL_A: rel_a:
-+ n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
- case DASM_REL_PC: rel_pc: {
- int shrink = *b++;
- int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
---
-2.20.1
-
diff --git a/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch
b/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch
deleted file mode 100644
index aff6f20..0000000
--- a/0017-LJ_GC64-Fix-ir_khash-for-non-string-GCobj.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From b4ed3219a1a98dd9fe7d1e3eeea3b82f5a780948 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 2 Oct 2017 09:22:46 +0200
-Subject: [PATCH 17/72] LJ_GC64: Fix ir_khash for non-string GCobj.
-
-Contributed by Peter Cawley.
----
- src/lj_asm.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index bed2268..d961927 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -1017,7 +1017,11 @@ static uint32_t ir_khash(IRIns *ir)
- } else {
- lua_assert(irt_isgcv(ir->t));
- lo = u32ptr(ir_kgc(ir));
-+#if LJ_GC64
-+ hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) <<
15);
-+#else
- hi = lo + HASH_BIAS;
-+#endif
- }
- return hashrot(lo, hi);
- }
---
-2.20.1
-
diff --git a/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch
b/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch
deleted file mode 100644
index d604876..0000000
--- a/0018-LJ_GC64-Make-ASMREF_L-references-64-bit.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 850f8c59d3d04a9847f21f32a6c36d8269b5b6b1 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 2 Oct 2017 23:10:56 +0200
-Subject: [PATCH 18/72] LJ_GC64: Make ASMREF_L references 64 bit.
-
-Reported by Yichun Zhang.
----
- src/lj_asm.c | 1 +
- src/lj_ir.h | 4 +++-
- src/lj_opt_sink.c | 1 +
- 3 files changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index d961927..753fe6b 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -2015,6 +2015,7 @@ static void asm_setup_regsp(ASMState *as)
- ir->prev = REGSP_INIT;
- if (irt_is64(ir->t) && ir->o != IR_KNULL) {
- #if LJ_GC64
-+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
- ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
- #else
- /* Make life easier for backends by putting address of constant in i. */
-diff --git a/src/lj_ir.h b/src/lj_ir.h
-index 34c2785..8057a75 100644
---- a/src/lj_ir.h
-+++ b/src/lj_ir.h
-@@ -377,10 +377,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
- #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64))
-
- #if LJ_GC64
-+/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */
- #define IRT_IS64 \
- ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
-
(1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
--
(1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA))
-+
(1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\
-+ (1u<<IRT_NIL))
- #elif LJ_64
- #define IRT_IS64 \
-
((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
-diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
-index 929ccb6..a16d112 100644
---- a/src/lj_opt_sink.c
-+++ b/src/lj_opt_sink.c
-@@ -219,6 +219,7 @@ static void sink_sweep_ins(jit_State *J)
- for (ir = IR(J->cur.nk); ir < irbase; ir++) {
- irt_clearmark(ir->t);
- ir->prev = REGSP_INIT;
-+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
- if (irt_is64(ir->t) && ir->o != IR_KNULL)
- ir++;
- }
---
-2.20.1
-
diff --git a/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch
b/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch
deleted file mode 100644
index c999ce8..0000000
--- a/0019-Fix-FOLD-rule-for-strength-reduction-of-widening.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 9f0caad0e43f97a4613850b3874b851cb1bc301d Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 8 Nov 2017 12:53:05 +0100
-Subject: [PATCH 19/72] Fix FOLD rule for strength reduction of widening.
-
-Reported by Matthew Burk.
----
- src/lj_opt_fold.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
-index 3d0e35a..5dc7ae3 100644
---- a/src/lj_opt_fold.c
-+++ b/src/lj_opt_fold.c
-@@ -1052,7 +1052,7 @@ LJFOLDF(simplify_conv_sext)
- if (ref == J->scev.idx) {
- IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
- lua_assert(irt_isint(J->scev.t));
-- if (lo && IR(lo)->i + ofs >= 0) {
-+ if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
- ok_reduce:
- #if LJ_TARGET_X64
- /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
---
-2.20.1
-
diff --git a/0020-ARM64-Fix-assembly-of-HREFK.patch
b/0020-ARM64-Fix-assembly-of-HREFK.patch
deleted file mode 100644
index 3200304..0000000
--- a/0020-ARM64-Fix-assembly-of-HREFK.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From 06cd9fce7df440323647174f1ca4a01281ec8acd Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 8 Nov 2017 12:53:48 +0100
-Subject: [PATCH 20/72] ARM64: Fix assembly of HREFK.
-
-Reported by Jason Teplitz.
----
- src/lj_asm_arm64.h | 11 +++++------
- 1 file changed, 5 insertions(+), 6 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index 8fd92e7..cbb186d 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -869,14 +869,12 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
- int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
- int32_t kofs = ofs + (int32_t)offsetof(Node, key);
- int bigofs = !emit_checkofs(A64I_LDRx, ofs);
-- RegSet allow = RSET_GPR;
- Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
-- Reg node = ra_alloc1(as, ir->op1, allow);
-- Reg key = ra_scratch(as, rset_clear(allow, node));
-- Reg idx = node;
-+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
-+ Reg key, idx = node;
-+ RegSet allow = rset_exclude(RSET_GPR, node);
- uint64_t k;
- lua_assert(ofs % sizeof(Node) == 0);
-- rset_clear(allow, key);
- if (bigofs) {
- idx = dest;
- rset_clear(allow, dest);
-@@ -892,7 +890,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
- } else {
- k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
- }
-- emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow));
-+ key = ra_scratch(as, allow);
-+ emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
- emit_lso(as, A64I_LDRx, key, idx, kofs);
- if (bigofs)
- emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
---
-2.20.1
-
diff --git a/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch
b/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch
deleted file mode 100644
index 80fad2f..0000000
--- a/0021-MIPS64-Fix-register-allocation-in-assembly-of-HREF.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From 99cdfbf6a1e8856f64908072ef10443a7eab14f2 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 8 Nov 2017 12:54:03 +0100
-Subject: [PATCH 21/72] MIPS64: Fix register allocation in assembly of HREF.
-
-Contributed by James Cowgill.
----
- src/lj_asm_mips.h | 42 +++++++++++++++++++++++++-----------------
- 1 file changed, 25 insertions(+), 17 deletions(-)
-
-diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
-index 1406a87..3a4679b 100644
---- a/src/lj_asm_mips.h
-+++ b/src/lj_asm_mips.h
-@@ -859,6 +859,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- Reg dest = ra_dest(as, ir, allow);
- Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
- Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
-+#if LJ_64
-+ Reg cmp64 = RID_NONE;
-+#endif
- IRRef refkey = ir->op2;
- IRIns *irkey = IR(refkey);
- int isk = irref_isk(refkey);
-@@ -901,6 +904,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- #endif
- tmp2 = ra_scratch(as, allow);
- rset_clear(allow, tmp2);
-+#if LJ_64
-+ if (LJ_SOFTFP || !irt_isnum(kt)) {
-+ /* Allocate cmp64 register used for 64-bit comparisons */
-+ if (LJ_SOFTFP && irt_isnum(kt)) {
-+ cmp64 = key;
-+ } else if (!isk && irt_isaddr(kt)) {
-+ cmp64 = tmp2;
-+ } else {
-+ int64_t k;
-+ if (isk && irt_isaddr(kt)) {
-+ k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
-+ } else {
-+ lua_assert(irt_ispri(kt) && !irt_isnil(kt));
-+ k = ~((int64_t)~irt_toitype(ir->t) << 47);
-+ }
-+ cmp64 = ra_allock(as, k, allow);
-+ rset_clear(allow, cmp64);
-+ }
-+ }
-+#endif
-
- /* Key not found in chain: jump to exit (if merged) or load niltv. */
- l_end = emit_label(as);
-@@ -943,24 +966,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
- emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
- emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
- emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
-- } else if (LJ_SOFTFP && irt_isnum(kt)) {
-- emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
-- emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
-- } else if (irt_isaddr(kt)) {
-- Reg refk = tmp2;
-- if (isk) {
-- int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
-- refk = ra_allock(as, k, allow);
-- rset_clear(allow, refk);
-- }
-- emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end);
-- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key));
- } else {
-- Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
-- rset_clear(allow, pri);
-- lua_assert(irt_ispri(kt) && !irt_isnil(kt));
-- emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end);
-- emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key));
-+ emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end);
-+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
- }
- *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
- if (!isk && irt_isaddr(kt)) {
---
-2.20.1
-
diff --git a/0022-ARM64-Fix-xpcall-error-case.patch
b/0022-ARM64-Fix-xpcall-error-case.patch
deleted file mode 100644
index ec05a7c..0000000
--- a/0022-ARM64-Fix-xpcall-error-case.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From 33082a6f4778aa152f6a4a684a7fe79436f1ecb6 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 16 Nov 2017 12:53:34 +0100
-Subject: [PATCH 22/72] ARM64: Fix xpcall() error case.
-
-Thanks to Stefan Pejic.
----
- src/vm_arm64.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
-index 3eaf376..241c58a 100644
---- a/src/vm_arm64.dasc
-+++ b/src/vm_arm64.dasc
-@@ -1185,12 +1185,12 @@ static void build_subroutines(BuildCtx *ctx)
- | subs NARGS8:RC, NARGS8:RC, #16
- | blo ->fff_fallback
- | mov RB, BASE
-- | add BASE, BASE, #24
- | asr ITYPE, CARG2, #47
- | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
- | cmn ITYPE, #-LJ_TFUNC
- | add PC, TMP0, #24+FRAME_PCALL
- | bne ->fff_fallback // Traceback must be a function.
-+ | add BASE, BASE, #24
- | stp CARG2, CARG1, [RB] // Swap function and traceback.
- | cbz NARGS8:RC, ->vm_call_dispatch
- | b <1
---
-2.20.1
-
diff --git a/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch
b/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch
deleted file mode 100644
index 740a5a7..0000000
--- a/0023-Fix-saved-bytecode-encapsulated-in-ELF-objects.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 7dbf0b05f1228c1c719866db5e5f3d58f87f74c8 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 16 Nov 2017 12:58:12 +0100
-Subject: [PATCH 23/72] Fix saved bytecode encapsulated in ELF objects.
-
-Thanks to Dimitry Andric.
----
- src/jit/bcsave.lua | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
-index aa677df..c94064e 100644
---- a/src/jit/bcsave.lua
-+++ b/src/jit/bcsave.lua
-@@ -275,7 +275,7 @@ typedef struct {
- o.sect[2].size = fofs(ofs)
- o.sect[3].type = f32(3) -- .strtab
- o.sect[3].ofs = fofs(sofs + ofs)
-- o.sect[3].size = fofs(#symname+1)
-+ o.sect[3].size = fofs(#symname+2)
- ffi.copy(o.space+ofs+1, symname)
- ofs = ofs + #symname + 2
- o.sect[4].type = f32(1) -- .rodata
---
-2.20.1
-
diff --git a/0024-ARM64-Fix-xpcall-error-case-really.patch
b/0024-ARM64-Fix-xpcall-error-case-really.patch
deleted file mode 100644
index ab518e1..0000000
--- a/0024-ARM64-Fix-xpcall-error-case-really.patch
+++ /dev/null
@@ -1,37 +0,0 @@
-From d417ded17945b4211608d497d50b509e0274f5e0 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sat, 18 Nov 2017 12:23:57 +0100
-Subject: [PATCH 24/72] ARM64: Fix xpcall() error case (really).
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Thanks to François Perrad and Stefan Pejic.
----
- src/vm_arm64.dasc | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
-index 241c58a..c55794a 100644
---- a/src/vm_arm64.dasc
-+++ b/src/vm_arm64.dasc
-@@ -1182,7 +1182,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc xpcall
- | ldp CARG1, CARG2, [BASE]
- | ldrb TMP0w, GL->hookmask
-- | subs NARGS8:RC, NARGS8:RC, #16
-+ | subs NARGS8:TMP1, NARGS8:RC, #16
- | blo ->fff_fallback
- | mov RB, BASE
- | asr ITYPE, CARG2, #47
-@@ -1190,6 +1190,7 @@ static void build_subroutines(BuildCtx *ctx)
- | cmn ITYPE, #-LJ_TFUNC
- | add PC, TMP0, #24+FRAME_PCALL
- | bne ->fff_fallback // Traceback must be a function.
-+ | mov NARGS8:RC, NARGS8:TMP1
- | add BASE, BASE, #24
- | stp CARG2, CARG1, [RB] // Swap function and traceback.
- | cbz NARGS8:RC, ->vm_call_dispatch
---
-2.20.1
-
diff --git a/0025-MIPS64-Fix-xpcall-error-case.patch
b/0025-MIPS64-Fix-xpcall-error-case.patch
deleted file mode 100644
index 5b17e81..0000000
--- a/0025-MIPS64-Fix-xpcall-error-case.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From ea7071d3c30b6432bfe6f8a9d263e0285cec25e3 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sat, 18 Nov 2017 12:25:35 +0100
-Subject: [PATCH 25/72] MIPS64: Fix xpcall() error case.
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Thanks to François Perrad and Stefan Pejic.
----
- src/vm_mips64.dasc | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
-index 75b38de..a78cd25 100644
---- a/src/vm_mips64.dasc
-+++ b/src/vm_mips64.dasc
-@@ -1399,15 +1399,16 @@ static void build_subroutines(BuildCtx *ctx)
- |. nop
- |
- |.ffunc xpcall
-- | daddiu NARGS8:RC, NARGS8:RC, -16
-+ | daddiu NARGS8:TMP0, NARGS8:RC, -16
- | ld CARG1, 0(BASE)
- | ld CARG2, 8(BASE)
-- | bltz NARGS8:RC, ->fff_fallback
-+ | bltz NARGS8:TMP0, ->fff_fallback
- |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
- | gettp AT, CARG2
- | daddiu AT, AT, -LJ_TFUNC
- | bnez AT, ->fff_fallback // Traceback must be a function.
- |. move TMP2, BASE
-+ | move NARGS8:RC, NARGS8:TMP0
- | daddiu BASE, BASE, 24
- | // Remember active hook before pcall.
- | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
---
-2.20.1
-
diff --git a/0026-Fix-IR_BUFPUT-assembly.patch b/0026-Fix-IR_BUFPUT-assembly.patch
deleted file mode 100644
index c942467..0000000
--- a/0026-Fix-IR_BUFPUT-assembly.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 58d0dde0a2df49abc991decbabff15230010829a Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 14 Jan 2018 13:57:00 +0100
-Subject: [PATCH 26/72] Fix IR_BUFPUT assembly.
-
-Thanks to Peter Cawley.
----
- src/lj_asm.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index 753fe6b..5f83779 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -1119,7 +1119,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
- IRRef args[3];
- IRIns *irs;
-- int kchar = -1;
-+ int kchar = -129;
- args[0] = ir->op1; /* SBuf * */
- args[1] = ir->op2; /* GCstr * */
- irs = IR(ir->op2);
-@@ -1127,7 +1127,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
- if (irs->o == IR_KGC) {
- GCstr *s = ir_kstr(irs);
- if (s->len == 1) { /* Optimize put of single-char string constant. */
-- kchar = strdata(s)[0];
-+ kchar = (int8_t)strdata(s)[0]; /* Signed! */
- args[1] = ASMREF_TMP1; /* int, truncated to char */
- ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
- }
-@@ -1154,7 +1154,7 @@ static void asm_bufput(ASMState *as, IRIns *ir)
- asm_gencall(as, ci, args);
- if (args[1] == ASMREF_TMP1) {
- Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
-- if (kchar == -1)
-+ if (kchar == -129)
- asm_tvptr(as, tmp, irs->op1);
- else
- ra_allockreg(as, kchar, tmp);
---
-2.20.1
-
diff --git a/0027-Fix-string.format-c-0.patch b/0027-Fix-string.format-c-0.patch
deleted file mode 100644
index caece09..0000000
--- a/0027-Fix-string.format-c-0.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-commit 4660dbfa8a4f9eea5218b739075d04faadfeeef6
-Merge: 58d0dde 430d9f8
-Author: Mike Pall <mike>
-Date: Sun Jan 14 14:26:10 2018 +0100
-
- Merge branch 'master' into v2.1
-
-From 430d9f8f7ebb779948dbd43944b876b1a3f58551 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 14 Jan 2018 14:11:59 +0100
-Subject: [PATCH 27/72] Fix string.format("%c", 0).
-
----
- src/lib_string.c | 15 ++++++++-------
- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/0028-Fix-ARMv8-32-bit-subset-detection.patch
b/0028-Fix-ARMv8-32-bit-subset-detection.patch
deleted file mode 100644
index 00687af..0000000
--- a/0028-Fix-ARMv8-32-bit-subset-detection.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 9eaad8574f5b2271b981cd31966b1e832cd8de12 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 18 Jan 2018 12:24:36 +0100
-Subject: [PATCH 28/72] Fix ARMv8 (32 bit subset) detection.
-
-Thanks to Markus Oberhumber.
----
- src/lj_arch.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 5962f3a..fcebd84 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -201,7 +201,7 @@
- #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-
--#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
-+#if __ARM_ARCH_8__ || __ARM_ARCH_8A__
- #define LJ_ARCH_VERSION 80
- #elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ ||
__ARM_ARCH_7VE__
- #define LJ_ARCH_VERSION 70
---
-2.20.1
-
diff --git a/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch
b/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch
deleted file mode 100644
index 70ae35a..0000000
--- a/0029-Fix-LuaJIT-API-docs-for-LUAJIT_MODE_.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From c88602f080dcafea6ba222a2f7cc1ea0e41ef3cc Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 18 Jan 2018 12:29:39 +0100
-Subject: [PATCH 29/72] Fix LuaJIT API docs for LUAJIT_MODE_*.
-
-Thanks to sunfishgao.
----
- doc/ext_c_api.html | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
-index 041a722..4bb8251 100644
---- a/doc/ext_c_api.html
-+++ b/doc/ext_c_api.html
-@@ -89,8 +89,8 @@ other Lua/C API functions).
- </p>
- <p>
- The third argument specifies the mode, which is 'or'ed with a flag.
--The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on,
--<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or
-+The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off,
-+<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or
- <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code.
- </p>
- <p>
---
-2.20.1
-
diff --git a/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch
b/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch
deleted file mode 100644
index 8ee3a17..0000000
--- a/0030-MIPS64-Fix-soft-float-0.0-vs.-0.0-comparison.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 8071aa4ad65cf09e3b7adda4a7787d8897e5314c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 29 Jan 2018 12:12:29 +0100
-Subject: [PATCH 30/72] MIPS64: Fix soft-float +-0.0 vs. +-0.0 comparison.
-
-Thanks to Stefan Pejic.
----
- src/vm_mips64.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
-index a78cd25..0a3f8e5 100644
---- a/src/vm_mips64.dasc
-+++ b/src/vm_mips64.dasc
-@@ -2661,7 +2661,7 @@ static void build_subroutines(BuildCtx *ctx)
- |. slt CRET1, CARG2, CARG1
- |8:
- | jr ra
-- |. nop
-+ |. li CRET1, 0
- |9:
- | jr ra
- |. move CRET1, CRET2
---
-2.20.1
-
diff --git a/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch
b/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch
deleted file mode 100644
index b95ca0c..0000000
--- a/0031-FFI-Don-t-assert-on-1LL-5.2-compatibility-mode-only.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-commit 74c544d68c07bcd416225598cdf15f88e62fd457
-Merge: 8071aa4 b03a56f
-Author: Mike Pall <mike>
-Date: Mon Jan 29 12:53:42 2018 +0100
-
- Merge branch 'master' into v2.1
-
-From b03a56f28ec360bbcf43091afd0607890a4a33c7 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 29 Jan 2018 12:47:08 +0100
-Subject: [PATCH 31/72] FFI: Don't assert on #1LL (5.2 compatibility mode
- only).
-
-Reported by Denis Golovan.
----
- src/lib_ffi.c | 2 +-
- src/lj_carith.c | 9 +++++++++
- src/lj_carith.h | 1 +
- 3 files changed, 11 insertions(+), 1 deletion(-)
-
-diff --git a/src/lib_ffi.c b/src/lib_ffi.c
-index f2f2ede..83483d9 100644
---- a/src/lib_ffi.c
-+++ b/src/lib_ffi.c
-@@ -193,7 +193,7 @@ LJLIB_CF(ffi_meta___eq) LJLIB_REC(cdata_arith MM_eq)
-
- LJLIB_CF(ffi_meta___len) LJLIB_REC(cdata_arith MM_len)
- {
-- return ffi_arith(L);
-+ return lj_carith_len(L);
- }
-
- LJLIB_CF(ffi_meta___lt) LJLIB_REC(cdata_arith MM_lt)
-diff --git a/src/lj_carith.c b/src/lj_carith.c
-index 6224dee..c34596c 100644
---- a/src/lj_carith.c
-+++ b/src/lj_carith.c
-@@ -272,6 +272,15 @@ int lj_carith_op(lua_State *L, MMS mm)
- return lj_carith_meta(L, cts, &ca, mm);
- }
-
-+/* No built-in functionality for length of cdata. */
-+int lj_carith_len(lua_State *L)
-+{
-+ CTState *cts = ctype_cts(L);
-+ CDArith ca;
-+ carith_checkarg(L, cts, &ca);
-+ return lj_carith_meta(L, cts, &ca, MM_len);
-+}
-+
- /* -- 64 bit bit operations helpers --------------------------------------- */
-
- #if LJ_64
-diff --git a/src/lj_carith.h b/src/lj_carith.h
-index 3c15591..82fc824 100644
---- a/src/lj_carith.h
-+++ b/src/lj_carith.h
-@@ -11,6 +11,7 @@
- #if LJ_HASFFI
-
- LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
-+LJ_FUNC int lj_carith_len(lua_State *L);
-
- #if LJ_32
- LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
-
---
-2.20.1
-
diff --git a/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch
b/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch
deleted file mode 100644
index 192f271..0000000
--- a/0032-Fix-GCC-7-Wimplicit-fallthrough-warnings.patch
+++ /dev/null
@@ -1,291 +0,0 @@
-commit 0bf46e1edf94c43795b5e491efe682ab70974ce7
-Merge: 74c544d d4ee803
-Author: Mike Pall <mike>
-Date: Mon Jan 29 13:19:30 2018 +0100
-
- Merge branch 'master' into v2.1
-
-From d4ee80342770d1281e2ce877f8ae8ab1d99e6528 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 29 Jan 2018 13:06:13 +0100
-Subject: [PATCH 32/72] Fix GCC 7 -Wimplicit-fallthrough warnings.
-
----
- dynasm/dasm_arm.h | 2 ++
- dynasm/dasm_mips.h | 1 +
- dynasm/dasm_ppc.h | 1 +
- dynasm/dasm_x86.h | 14 ++++++++++++--
- src/lj_asm.c | 3 ++-
- src/lj_cparse.c | 10 ++++++++++
- src/lj_err.c | 1 +
- src/lj_opt_sink.c | 2 +-
- src/lj_parse.c | 3 ++-
- src/luajit.c | 1 +
- 10 files changed, 33 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
-index a43f7c6..1d404cc 100644
---- a/dynasm/dasm_arm.h
-+++ b/dynasm/dasm_arm.h
-@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...)
- case DASM_IMMV8:
- CK((n & 3) == 0, RANGE_I);
- n >>= 2;
-+ /* fallthrough */
- case DASM_IMML8:
- case DASM_IMML12:
- CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
-@@ -371,6 +372,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-+ /* fallthrough */
- case DASM_REL_PC:
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4;
-diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
-index 7eac669..46af034 100644
---- a/dynasm/dasm_mips.h
-+++ b/dynasm/dasm_mips.h
-@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-+ /* fallthrough */
- case DASM_REL_PC:
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n);
-diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
-index 6110361..81b9a76 100644
---- a/dynasm/dasm_ppc.h
-+++ b/dynasm/dasm_ppc.h
-@@ -350,6 +350,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-+ /* fallthrough */
- case DASM_REL_PC:
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
-diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
-index f9260b0..8ae911d 100644
---- a/dynasm/dasm_x86.h
-+++ b/dynasm/dasm_x86.h
-@@ -194,12 +194,13 @@ void dasm_put(Dst_DECL, int start, ...)
- switch (action) {
- case DASM_DISP:
- if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
-- case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
-+ /* fallthrough */
-+ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
- case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
- case DASM_IMM_D: ofs += 4; break;
- case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
- case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
-- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
-+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
- case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
- case DASM_SPACE: p++; ofs += n; break;
- case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
-@@ -323,11 +324,14 @@ int dasm_link(Dst_DECL, size_t *szp)
- pos += 2;
- break;
- }
-+ /* fallthrough */
- case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
-+ /* fallthrough */
- case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
- case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
- case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
- case DASM_LABEL_LG: p++;
-+ /* fallthrough */
- case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
- case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
- case DASM_EXTERN: p += 2; break;
-@@ -385,12 +389,15 @@ int dasm_encode(Dst_DECL, void *buffer)
- if (mrm != 5) { mm[-1] -= 0x80; break; } }
- if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
- }
-+ /* fallthrough */
- case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
- case DASM_IMM_DB: if (((n+128)&-256) == 0) {
- db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
- } else mark = NULL;
-+ /* fallthrough */
- case DASM_IMM_D: wd: dasmd(n); break;
- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
-+ /* fallthrough */
- case DASM_IMM_W: dasmw(n); break;
- case DASM_VREG: {
- int t = *p++;
-@@ -397,6 +404,7 @@
- }
- case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
- b++; n = (int)(ptrdiff_t)D->globals[-n];
-+ /* fallthrough */
- case DASM_REL_A: rel_a:
- n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
- case DASM_REL_PC: rel_pc: {
-@@ -407,6 +415,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- }
- case DASM_IMM_LG:
- p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
-+ /* fallthrough */
- case DASM_IMM_PC: {
- int *pb = DASM_POS2PTR(D, n);
- n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
-@@ -427,6 +436,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
- case DASM_MARK: mark = cp; break;
- case DASM_ESC: action = *p++;
-+ /* fallthrough */
- default: *cp++ = action; break;
- case DASM_SECTION: case DASM_STOP: goto stop;
- }
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index 02714d4..dd7186f 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -2136,6 +2136,7 @@ static void asm_setup_regsp(ASMState *as)
- case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
- if (REGARG_NUMGPR < 3 && as->evenspill < 3)
- as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
-+ /* fallthrough */
- #if LJ_TARGET_X86 && LJ_HASFFI
- if (0) {
- case IR_CNEW:
-@@ -2176,7 +2177,7 @@ static void asm_setup_regsp(ASMState *as)
- continue;
- #endif
- }
-- /* fallthrough for integer POW */
-+ /* fallthrough */ /* for integer POW */
- case IR_DIV: case IR_MOD:
- if (!irt_isnum(ir->t)) {
- ir->prev = REGSP_HINT(RID_RET);
-diff --git a/src/lj_cparse.c b/src/lj_cparse.c
-index 2ba50a7..f111537 100644
---- a/src/lj_cparse.c
-+++ b/src/lj_cparse.c
-@@ -590,28 +590,34 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- k->id = k2.id > k3.id ? k2.id : k3.id;
- continue;
- }
-+ /* fallthrough */
- case 1:
- if (cp_opt(cp, CTOK_OROR)) {
- cp_expr_sub(cp, &k2, 2); k->i32 = k->u32 || k2.u32; k->id = CTID_INT32;
- continue;
- }
-+ /* fallthrough */
- case 2:
- if (cp_opt(cp, CTOK_ANDAND)) {
- cp_expr_sub(cp, &k2, 3); k->i32 = k->u32 && k2.u32; k->id =
CTID_INT32;
- continue;
- }
-+ /* fallthrough */
- case 3:
- if (cp_opt(cp, '|')) {
- cp_expr_sub(cp, &k2, 4); k->u32 = k->u32 | k2.u32; goto arith_result;
- }
-+ /* fallthrough */
- case 4:
- if (cp_opt(cp, '^')) {
- cp_expr_sub(cp, &k2, 5); k->u32 = k->u32 ^ k2.u32; goto arith_result;
- }
-+ /* fallthrough */
- case 5:
- if (cp_opt(cp, '&')) {
- cp_expr_sub(cp, &k2, 6); k->u32 = k->u32 & k2.u32; goto arith_result;
- }
-+ /* fallthrough */
- case 6:
- if (cp_opt(cp, CTOK_EQ)) {
- cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 == k2.u32; k->id = CTID_INT32;
-@@ -620,6 +626,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- cp_expr_sub(cp, &k2, 7); k->i32 = k->u32 != k2.u32; k->id = CTID_INT32;
- continue;
- }
-+ /* fallthrough */
- case 7:
- if (cp_opt(cp, '<')) {
- cp_expr_sub(cp, &k2, 8);
-@@ -654,6 +661,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- k->id = CTID_INT32;
- continue;
- }
-+ /* fallthrough */
- case 8:
- if (cp_opt(cp, CTOK_SHL)) {
- cp_expr_sub(cp, &k2, 9); k->u32 = k->u32 << k2.u32;
-@@ -666,6 +674,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- k->u32 = k->u32 >> k2.u32;
- continue;
- }
-+ /* fallthrough */
- case 9:
- if (cp_opt(cp, '+')) {
- cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 + k2.u32;
-@@ -675,6 +684,7 @@ static void cp_expr_infix(CPState *cp, CPValue *k, int pri)
- } else if (cp_opt(cp, '-')) {
- cp_expr_sub(cp, &k2, 10); k->u32 = k->u32 - k2.u32; goto arith_result;
- }
-+ /* fallthrough */
- case 10:
- if (cp_opt(cp, '*')) {
- cp_expr_unary(cp, &k2); k->u32 = k->u32 * k2.u32; goto arith_result;
-diff --git a/src/lj_err.c b/src/lj_err.c
-index 54f42c3..13a1ded 100644
---- a/src/lj_err.c
-+++ b/src/lj_err.c
-@@ -153,6 +153,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
- case FRAME_CONT: /* Continuation frame. */
- if (frame_iscont_fficb(frame))
- goto unwind_c;
-+ /* fallthrough */
- case FRAME_VARG: /* Vararg frame. */
- frame = frame_prevd(frame);
- break;
-diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
-index 6a00d04..4efe395 100644
---- a/src/lj_opt_sink.c
-+++ b/src/lj_opt_sink.c
-@@ -100,8 +100,8 @@ static void sink_mark_ins(jit_State *J)
- (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP &&
- !sink_checkphi(J, ir, (ir+1)->op2))))
- irt_setmark(ir->t); /* Mark ineligible allocation. */
-- /* fallthrough */
- #endif
-+ /* fallthrough */
- case IR_USTORE:
- irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
- break;
-diff --git a/src/lj_parse.c b/src/lj_parse.c
-index 9e5976f..6785495 100644
---- a/src/lj_parse.c
-+++ b/src/lj_parse.c
-@@ -2696,7 +2696,8 @@ static int parse_stmt(LexState *ls)
- lj_lex_next(ls);
- parse_goto(ls);
- break;
-- } /* else: fallthrough */
-+ }
-+ /* fallthrough */
- default:
- parse_call_assign(ls);
- break;
-diff --git a/src/luajit.c b/src/luajit.c
-index 9e15b26..0e18dc5 100644
---- a/src/luajit.c
-+++ b/src/luajit.c
-@@ -419,6 +419,7 @@ static int collectargs(char **argv, int *flags)
- break;
- case 'e':
- *flags |= FLAGS_EXEC;
-+ /* fallthrough */
- case 'j': /* LuaJIT extension */
- case 'l':
- *flags |= FLAGS_OPTION;
---
-2.20.1
-
diff --git a/0033-Clear-stack-after-print_jit_status-in-CLI.patch
b/0033-Clear-stack-after-print_jit_status-in-CLI.patch
deleted file mode 100644
index 53a4acf..0000000
--- a/0033-Clear-stack-after-print_jit_status-in-CLI.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-commit fddef924097f28c46a0a5b45483a6086b33cab81
-Merge: 0bf46e1 03cd5aa
-Author: Mike Pall <mike>
-Date: Mon Jan 29 13:28:53 2018 +0100
-
- Merge branch 'master' into v2.1
-
-From 03cd5aa749c1bc3bb4b7d4289236b6096cb3dc85 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Mon, 29 Jan 2018 13:25:51 +0100
-Subject: [PATCH 33/72] Clear stack after print_jit_status() in CLI.
-
-Suggested by Hydroque.
----
- src/luajit.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/luajit.c b/src/luajit.c
-index 0e18dc5..9ede59c 100644
---- a/src/luajit.c
-+++ b/src/luajit.c
-@@ -151,6 +151,7 @@ static void print_jit_status(lua_State *L)
- fputs(s, stdout);
- }
- putc('\n', stdout);
-+ lua_settop(L, 0); /* clear stack */
- }
-
- static void createargtable(lua_State *L, char **argv, int argc, int argf)
---
-2.20.1
-
diff --git a/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch
b/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch
deleted file mode 100644
index 1b90fb3..0000000
--- a/0034-Fix-rechaining-of-pseudo-resurrected-string-keys.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 046129dbdda5261c1b17469a2895a113d14c070a Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 27 Feb 2018 23:02:23 +0100
-Subject: [PATCH 34/72] Fix rechaining of pseudo-resurrected string keys.
-
-This is a serious bug. But extremely hard to reproduce, so it went
-undetected for 8 years. One needs two resurrections with different
-main nodes, which are both in a hash chain which gets relinked on
-key insertion where the colliding node is in a non-main position. Phew.
-
-Thanks to lbeiming.
----
- src/lj_tab.c | 23 +++++++++++++++++++++++
- 1 file changed, 23 insertions(+)
-
-diff --git a/src/lj_tab.c b/src/lj_tab.c
-index 50f447e..f2f3c0b 100644
---- a/src/lj_tab.c
-+++ b/src/lj_tab.c
-@@ -457,6 +457,29 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
- freenode->next = nn->next;
- nn->next = n->next;
- setmref(n->next, nn);
-+ /*
-+ ** Rechaining a resurrected string key creates a new dilemma:
-+ ** Another string key may have originally been resurrected via
-+ ** _any_ of the previous nodes as a chain anchor. Including
-+ ** a node that had to be moved, which makes them unreachable.
-+ ** It's not feasible to check for all previous nodes, so rechain
-+ ** any string key that's currently in a non-main positions.
-+ */
-+ while ((nn = nextnode(freenode))) {
-+ if (tvisstr(&nn->key) && !tvisnil(&nn->val)) {
-+ Node *mn = hashstr(t, strV(&nn->key));
-+ if (mn != freenode) {
-+ freenode->next = nn->next;
-+ nn->next = mn->next;
-+ setmref(mn->next, nn);
-+ } else {
-+ freenode = nn;
-+ }
-+ } else {
-+ freenode = nn;
-+ }
-+ }
-+ break;
- } else {
- freenode = nn;
- }
---
-2.20.1
-
diff --git a/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch
b/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch
deleted file mode 100644
index 832809e..0000000
--- a/0035-DynASM-x86-Add-BMI1-and-BMI2-instructions.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From fe651bf6e2b4d02b624be3c289378c08bab2fa9b Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 27 Feb 2018 23:22:40 +0100
-Subject: [PATCH 35/72] DynASM/x86: Add BMI1 and BMI2 instructions.
-
-Thanks to Peter Cawley.
----
- dynasm/dasm_x86.lua | 19 +++++++++++++++++++
- 1 file changed, 19 insertions(+)
-
-diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
-index 4c031e2..c1d267a 100644
---- a/dynasm/dasm_x86.lua
-+++ b/dynasm/dasm_x86.lua
-@@ -955,6 +955,7 @@ end
- -- "u" Use VEX encoding, vvvv unused.
- -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the
operand is
- -- removed from the list used by future characters).
-+-- "w" Use VEX encoding, vvvv from 3rd operand.
- -- "L" Force VEX.L
- --
- -- All of the following characters force a flush of the opcode:
-@@ -1677,6 +1678,24 @@ local map_op = {
- -- Intel ADX
- adcx_2 = "rmqd:660F38F6rM",
- adox_2 = "rmqd:F30F38F6rM",
-+
-+ -- BMI1
-+ andn_3 = "rrmqd:0F38VF2rM",
-+ bextr_3 = "rmrqd:0F38wF7rM",
-+ blsi_2 = "rmqd:0F38vF33m",
-+ blsmsk_2 = "rmqd:0F38vF32m",
-+ blsr_2 = "rmqd:0F38vF31m",
-+ tzcnt_2 = "rmqdw:F30FBCrM",
-+
-+ -- BMI2
-+ bzhi_3 = "rmrqd:0F38wF5rM",
-+ mulx_3 = "rrmqd:F20F38VF6rM",
-+ pdep_3 = "rrmqd:F20F38VF5rM",
-+ pext_3 = "rrmqd:F30F38VF5rM",
-+ rorx_3 = "rmSqd:F20F3AuF0rMS",
-+ sarx_3 = "rmrqd:F30F38wF7rM",
-+ shrx_3 = "rmrqd:F20F38wF7rM",
-+ shlx_3 = "rmrqd:660F38wF7rM",
- }
-
- ------------------------------------------------------------------------------
---
-2.20.1
-
diff --git a/0036-Give-expected-results-for-negative-non-base-10-numbe.patch
b/0036-Give-expected-results-for-negative-non-base-10-numbe.patch
deleted file mode 100644
index 3279dfe..0000000
--- a/0036-Give-expected-results-for-negative-non-base-10-numbe.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From f3cf0d6e15240098147437fed7bd436ff55fdf8c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 22 Apr 2018 13:14:28 +0200
-Subject: [PATCH 36/72] Give expected results for negative non-base-10 numbers
- in tonumber().
-
-This was undefined in Lua 5.1, but it's defined in 5.2.
----
- src/lib_base.c | 27 ++++++++++++++++++---------
- 1 file changed, 18 insertions(+), 9 deletions(-)
-
-diff --git a/src/lib_base.c b/src/lib_base.c
-index 3a75787..d61e876 100644
---- a/src/lib_base.c
-+++ b/src/lib_base.c
-@@ -287,18 +287,27 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
- } else {
- const char *p = strdata(lj_lib_checkstr(L, 1));
- char *ep;
-+ unsigned int neg = 0;
- unsigned long ul;
- if (base < 2 || base > 36)
- lj_err_arg(L, 2, LJ_ERR_BASERNG);
-- ul = strtoul(p, &ep, base);
-- if (p != ep) {
-- while (lj_char_isspace((unsigned char)(*ep))) ep++;
-- if (*ep == '\0') {
-- if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
-- setintV(L->base-1-LJ_FR2, (int32_t)ul);
-- else
-- setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
-- return FFH_RES(1);
-+ while (lj_char_isspace((unsigned char)(*p))) p++;
-+ if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
-+ if (lj_char_isalnum((unsigned char)(*p))) {
-+ ul = strtoul(p, &ep, base);
-+ if (p != ep) {
-+ while (lj_char_isspace((unsigned char)(*ep))) ep++;
-+ if (*ep == '\0') {
-+ if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
-+ if (neg) ul = -ul;
-+ setintV(L->base-1-LJ_FR2, (int32_t)ul);
-+ } else {
-+ lua_Number n = (lua_Number)ul;
-+ if (neg) n = -n;
-+ setnumV(L->base-1-LJ_FR2, n);
-+ }
-+ return FFH_RES(1);
-+ }
- }
- }
- }
---
-2.20.1
-
diff --git a/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch
b/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch
deleted file mode 100644
index c0406a5..0000000
--- a/0037-FFI-Add-tonumber-specialization-for-failed-conversio.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 02b521981a1ab919ff2cd4d9bcaee80baf77dce2 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 22 Apr 2018 13:27:25 +0200
-Subject: [PATCH 37/72] FFI: Add tonumber() specialization for failed
- conversions.
-
-Contributed by Javier Guerra Giraldez.
----
- src/lj_crecord.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/lj_crecord.c b/src/lj_crecord.c
-index 84fc49e..bc88d63 100644
---- a/src/lj_crecord.c
-+++ b/src/lj_crecord.c
-@@ -1661,6 +1661,8 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData
*rd)
- d = ctype_get(cts, CTID_DOUBLE);
- J->base[0] = crec_ct_tv(J, d, 0, J->base[0], &rd->argv[0]);
- } else {
-+ /* Specialize to the ctype that couldn't be converted. */
-+ argv2cdata(J, J->base[0], &rd->argv[0]);
- J->base[0] = TREF_NIL;
- }
- }
---
-2.20.1
-
diff --git a/0038-Bump-copyright-date-to-2018.patch
b/0038-Bump-copyright-date-to-2018.patch
deleted file mode 100644
index 1f9e5eb..0000000
--- a/0038-Bump-copyright-date-to-2018.patch
+++ /dev/null
@@ -1,387 +0,0 @@
-From cf7a0540a3a9f80fc729211eb21d1e9b72acc89c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 25 Apr 2018 12:07:08 +0200
-Subject: [PATCH 38/72] Bump copyright date to 2018.
-
----
- doc/bluequad-print.css | 2 +-
- doc/bluequad.css | 2 +-
- doc/changes.html | 5 ++---
- doc/contact.html | 7 +++----
- doc/ext_c_api.html | 5 ++---
- doc/ext_ffi.html | 5 ++---
- doc/ext_ffi_api.html | 5 ++---
- doc/ext_ffi_semantics.html | 5 ++---
- doc/ext_ffi_tutorial.html | 5 ++---
- doc/ext_jit.html | 5 ++---
- doc/extensions.html | 5 ++---
- doc/faq.html | 5 ++---
- doc/install.html | 5 ++---
- doc/luajit.html | 7 +++----
- doc/running.html | 5 ++---
- doc/status.html | 5 ++---
- 16 files changed, 32 insertions(+), 46 deletions(-)
-
-diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css
-index 62e1c16..d5a3ea3 100644
---- a/doc/bluequad-print.css
-+++ b/doc/bluequad-print.css
-@@ -1,4 +1,4 @@
--/* Copyright (C) 2004-2017 Mike Pall.
-+/* Copyright (C) 2004-2018 Mike Pall.
- *
- * You are welcome to use the general ideas of this design for your own sites.
- * But please do not steal the stylesheet, the layout or the color scheme.
-diff --git a/doc/bluequad.css b/doc/bluequad.css
-index be2c4bf..cfc889a 100644
---- a/doc/bluequad.css
-+++ b/doc/bluequad.css
-@@ -1,4 +1,4 @@
--/* Copyright (C) 2004-2017 Mike Pall.
-+/* Copyright (C) 2004-2018 Mike Pall.
- *
- * You are welcome to use the general ideas of this design for your own sites.
- * But please do not steal the stylesheet, the layout or the color scheme.
-diff --git a/doc/changes.html b/doc/changes.html
-index 4a4d4fb..c1848e8 100644
---- a/doc/changes.html
-+++ b/doc/changes.html
-@@ -3,8 +3,7 @@
- <head>
- <title>LuaJIT Change History</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -1010,7 +1009,7 @@ This is the initial non-public release of LuaJIT.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/contact.html b/doc/contact.html
-index 5e07bde..54ddf74 100644
---- a/doc/contact.html
-+++ b/doc/contact.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Contact</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -91,7 +90,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
- <h2>Copyright</h2>
- <p>
- All documentation is
--Copyright © 2005-2017 Mike Pall.
-+Copyright © 2005-2018 Mike Pall.
- </p>
-
-
-@@ -99,7 +98,7 @@ Copyright © 2005-2017 Mike Pall.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
-index 4bb8251..3825956 100644
---- a/doc/ext_c_api.html
-+++ b/doc/ext_c_api.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Lua/C API Extensions</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -177,7 +176,7 @@ Also note that this mechanism is not without overhead.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
-index d48d77f..74ca294 100644
---- a/doc/ext_ffi.html
-+++ b/doc/ext_ffi.html
-@@ -3,8 +3,7 @@
- <head>
- <title>FFI Library</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -320,7 +319,7 @@ without undue conversion penalties.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
-index 566897c..10f2d02 100644
---- a/doc/ext_ffi_api.html
-+++ b/doc/ext_ffi_api.html
-@@ -3,8 +3,7 @@
- <head>
- <title>ffi.* API Functions</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -556,7 +555,7 @@ named <tt>i</tt>.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
-index ae3c037..218049d 100644
---- a/doc/ext_ffi_semantics.html
-+++ b/doc/ext_ffi_semantics.html
-@@ -3,8 +3,7 @@
- <head>
- <title>FFI Semantics</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -1235,7 +1234,7 @@ compiled.</li>
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
-index 29cf549..cd455cf 100644
---- a/doc/ext_ffi_tutorial.html
-+++ b/doc/ext_ffi_tutorial.html
-@@ -3,8 +3,7 @@
- <head>
- <title>FFI Tutorial</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -591,7 +590,7 @@ it to a local variable in the function scope is unnecessary.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/ext_jit.html b/doc/ext_jit.html
-index 5017e3c..ce6dcd6 100644
---- a/doc/ext_jit.html
-+++ b/doc/ext_jit.html
-@@ -3,8 +3,7 @@
- <head>
- <title>jit.* Library</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -189,7 +188,7 @@ if you want to know more.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/extensions.html b/doc/extensions.html
-index 3d9e82b..fa412e0 100644
---- a/doc/extensions.html
-+++ b/doc/extensions.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Extensions</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -398,7 +397,7 @@ lead to the termination of the process.</li>
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/faq.html b/doc/faq.html
-index afeff94..9338be4 100644
---- a/doc/faq.html
-+++ b/doc/faq.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Frequently Asked Questions (FAQ)</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -174,7 +173,7 @@ the development of certain features, if they are important to you.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/install.html b/doc/install.html
-index 4bcc506..befffa7 100644
---- a/doc/install.html
-+++ b/doc/install.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Installation</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -636,7 +635,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/luajit.html b/doc/luajit.html
-index 0003008..d8f531d 100644
---- a/doc/luajit.html
-+++ b/doc/luajit.html
-@@ -3,8 +3,7 @@
- <head>
- <title>LuaJIT</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -150,7 +149,7 @@ Lua is a powerful, dynamic and light-weight programming language.
- It may be embedded or used as a general-purpose, stand-alone language.
- </p>
- <p>
--LuaJIT is Copyright © 2005-2017 Mike Pall, released under the
-+LuaJIT is Copyright © 2005-2018 Mike Pall, released under the
- <a
href="http://www.opensource.org/licenses/mit-license.php">&l...
class="ext">»</span> MIT open source
license</a>.
- </p>
- <p>
-@@ -224,7 +223,7 @@ Please select a sub-topic in the navigation bar to learn more about
LuaJIT.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/running.html b/doc/running.html
-index 331c22d..08d7f71 100644
---- a/doc/running.html
-+++ b/doc/running.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Running LuaJIT</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -296,7 +295,7 @@ Here are the parameters and their default settings:
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
-diff --git a/doc/status.html b/doc/status.html
-index aa8df93..ea61db1 100644
---- a/doc/status.html
-+++ b/doc/status.html
-@@ -3,8 +3,7 @@
- <head>
- <title>Status</title>
- <meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
--<meta name="Author" content="Mike Pall">
--<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
-+<meta name="Copyright" content="Copyright (C) 2005-2018">
- <meta name="Language" content="en">
- <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
- <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
-@@ -100,7 +99,7 @@ garbage collector.
- </div>
- <div id="foot">
- <hr class="hide">
--Copyright © 2005-2017 Mike Pall
-+Copyright © 2005-2018
- <span class="noprint">
- ·
- <a href="contact.html">Contact</a>
---
-2.20.1
-
diff --git a/0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch
b/0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch
deleted file mode 100644
index ee383da..0000000
--- a/0039-FFI-Make-FP-to-U64-conversions-match-JIT-backend-beh.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-commit 362f034c1b91d52ea2cf971314ed4e0c24348bff
-Merge: 260b9b4 f5d424a
-Author: Mike Pall <mike>
-Date: Sun May 20 12:28:10 2018 +0200
-
- Merge branch 'master' into v2.1
-
-From f5d424afe8b9395f0df05aba905e0e1f6a2262b8 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 20 May 2018 12:25:36 +0200
-Subject: [PATCH 39/72] FFI: Make FP to U64 conversions match JIT backend
- behavior.
-
----
- src/lj_obj.h | 18 +++++++++++++-----
- 1 file changed, 13 insertions(+), 5 deletions(-)
-
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index e70b003..2ee526c 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -816,14 +816,22 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
-
- #define lj_num2int(n) ((int32_t)(n))
-
-+/*
-+** This must match the JIT backend behavior. In particular for archs
-+** that don't have a common hardware instruction for this conversion.
-+** Note that signed FP to unsigned int conversions have an undefined
-+** result and should never be relied upon in portable FFI code.
-+** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
-+*/
- static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
- {
--#ifdef _MSC_VER
-- if (n >= 9223372036854775808.0) /* They think it's a feature. */
-- return (uint64_t)(int64_t)(n - 18446744073709551616.0);
-- else
-+#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
-+ int64_t i = (int64_t)n;
-+ if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
-+ return (uint64_t)i;
-+#else
-+ return (uint64_t)n;
- #endif
-- return (uint64_t)n;
- }
-
- static LJ_AINLINE int32_t numberVint(cTValue *o)
---
-2.20.1
-
diff --git a/0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch
b/0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch
deleted file mode 100644
index 01dd836..0000000
--- a/0040-x86-x64-Check-for-jcc-when-using-xor-r-r-in-emit_loa.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From fb5e522fbc0750c838ef6a926b11c5d870826183 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 20 May 2018 12:40:33 +0200
-Subject: [PATCH 40/72] x86/x64: Check for jcc when using xor r,r in
- emit_loadi().
-
-Thanks to Peter Cawley.
----
- src/lj_emit_x86.h | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
-index bcceb93..9c371a9 100644
---- a/src/lj_emit_x86.h
-+++ b/src/lj_emit_x86.h
-@@ -268,10 +268,12 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs,
int32_t i)
- /* mov r, i / xor r, r */
- static void emit_loadi(ASMState *as, Reg r, int32_t i)
- {
-- /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */
-+ /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP/jcc. */
- if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP ||
- (as->curins+1 < as->T->nins &&
-- IR(as->curins+1)->o == IR_HIOP)))) {
-+ IR(as->curins+1)->o == IR_HIOP))) &&
-+ !((*as->mcp == 0x0f && (as->mcp[1] & 0xf0) == XI_JCCn) ||
-+ (*as->mcp & 0xf0) == XI_JCCs)) {
- emit_rr(as, XO_ARITH(XOg_XOR), r, r);
- } else {
- MCode *p = as->mcp;
---
-2.20.1
-
diff --git a/0041-PPC-NetBSD-Fix-endianess-check.patch
b/0041-PPC-NetBSD-Fix-endianess-check.patch
deleted file mode 100644
index 6800a89..0000000
--- a/0041-PPC-NetBSD-Fix-endianess-check.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-commit d36afcfea57c29fb51060c24679f3b2c07806545
-Merge: b708297 b025b01
-Author: Mike Pall <mike>
-Date: Tue Jun 5 11:39:10 2018 +0200
-
- Merge branch 'master' into v2.1
-
-From b025b01c5b9d23f6218c7d72b7aafa3f1ab1e08a Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 11:36:18 +0200
-Subject: [PATCH 41/72] PPC/NetBSD: Fix endianess check.
-
-Thanks to he32 and daurnimator.
----
- src/lj_arch.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index e04c4ee..5f7e445 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -339,7 +339,7 @@
- #error "No support for ILP32 model on ARM64"
- #endif
- #elif LJ_TARGET_PPC
--#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
-+#if !LJ_ARCH_PPC64 && (defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER)
|| (_BYTE_ORDER == _LITTLE_ENDIAN)))
- #error "No support for little-endian PPC32"
- #endif
- #if LJ_ARCH_PPC64
---
-2.20.1
-
diff --git a/0042-DynASM-x86-Add-FMA3-instructions.patch
b/0042-DynASM-x86-Add-FMA3-instructions.patch
deleted file mode 100644
index 0fe390a..0000000
--- a/0042-DynASM-x86-Add-FMA3-instructions.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From cc299958bb412f229844e53473a035c280544ec3 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 12:23:13 +0200
-Subject: [PATCH 42/72] DynASM/x86: Add FMA3 instructions.
-
-Thanks to Alexander Nasonov.
----
- dynasm/dasm_x86.lua | 67 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 67 insertions(+)
-
-diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
-index c1d267a..73502f6 100644
---- a/dynasm/dasm_x86.lua
-+++ b/dynasm/dasm_x86.lua
-@@ -1696,6 +1696,73 @@ local map_op = {
- sarx_3 = "rmrqd:F30F38wF7rM",
- shrx_3 = "rmrqd:F20F38wF7rM",
- shlx_3 = "rmrqd:660F38wF7rM",
-+
-+ -- FMA3
-+ vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
-+ vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
-+ vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
-+ vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
-+ vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
-+ vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
-+
-+ vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
-+ vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
-+ vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
-+ vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
-+ vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
-+ vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
-+
-+ vfmadd132pd_3 = "rrmoy:660F38VX98rM",
-+ vfmadd132ps_3 = "rrmoy:660F38V98rM",
-+ vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
-+ vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
-+ vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
-+ vfmadd213ps_3 = "rrmoy:660F38VA8rM",
-+ vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
-+ vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
-+ vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
-+ vfmadd231ps_3 = "rrmoy:660F38VB8rM",
-+ vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
-+ vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
-+
-+ vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
-+ vfmsub132ps_3 = "rrmoy:660F38V9ArM",
-+ vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
-+ vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
-+ vfmsub213pd_3 = "rrmoy:660F38VXAArM",
-+ vfmsub213ps_3 = "rrmoy:660F38VAArM",
-+ vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
-+ vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
-+ vfmsub231pd_3 = "rrmoy:660F38VXBArM",
-+ vfmsub231ps_3 = "rrmoy:660F38VBArM",
-+ vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
-+ vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
-+
-+ vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
-+ vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
-+ vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
-+ vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
-+ vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
-+ vfnmadd213ps_3 = "rrmoy:660F38VACrM",
-+ vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
-+ vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
-+ vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
-+ vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
-+ vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
-+ vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
-+
-+ vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
-+ vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
-+ vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
-+ vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
-+ vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
-+ vfnmsub213ps_3 = "rrmoy:660F38VAErM",
-+ vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
-+ vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
-+ vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
-+ vfnmsub231ps_3 = "rrmoy:660F38VBErM",
-+ vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
-+ vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
- }
-
- ------------------------------------------------------------------------------
---
-2.20.1
-
diff --git a/0043-x86-Disassemble-FMA3-instructions.patch
b/0043-x86-Disassemble-FMA3-instructions.patch
deleted file mode 100644
index 1d64c0a..0000000
--- a/0043-x86-Disassemble-FMA3-instructions.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From 55f70823242aa4e6acc248bde5cf8194ba1b27e3 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 12:23:29 +0200
-Subject: [PATCH 43/72] x86: Disassemble FMA3 instructions.
-
-Thanks to Alexander Nasonov.
----
- src/jit/dis_x86.lua | 24 +++++++++++++++++++++++-
- 1 file changed, 23 insertions(+), 1 deletion(-)
-
-diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
-index 4371233..3a68c93 100644
---- a/src/jit/dis_x86.lua
-+++ b/src/jit/dis_x86.lua
-@@ -239,6 +239,24 @@
nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
- --8x
- [0x8c] = "||pmaskmovXrvVSm",
- [0x8e] = "||pmaskmovVSmXvr",
-+--9x
-+[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
-+[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
-+[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
-+[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
-+[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
-+--Ax
-+[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
-+[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
-+[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
-+[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
-+[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
-+--Bx
-+[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
-+[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
-+[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
-+[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
-+[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
- --Dx
- [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
- [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
-@@ -483,7 +501,7 @@ local function putpat(ctx, name, pat)
- local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
- local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
-
-- -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
-+ -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
- for p in gmatch(pat, ".") do
- local x = nil
- if p == "V" or p == "U" then
-@@ -506,6 +524,9 @@ local function putpat(ctx, name, pat)
- sz = ctx.o16 and "X" or "M"; ctx.o16 = false
- if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
- regs = map_regs[sz]
-+ elseif p == "H" then
-+ name = name..(ctx.rexw and "d" or "s")
-+ ctx.rexw = false
- elseif p == "S" then
- name = name..lower(sz)
- elseif p == "s" then
-@@ -735,6 +756,7 @@ map_act = {
- V = putpat, U = putpat, T = putpat,
- M = putpat, X = putpat, P = putpat,
- F = putpat, G = putpat, Y = putpat,
-+ H = putpat,
-
- -- Collect prefixes.
- [":"] = function(ctx, name, pat)
---
-2.20.1
-
diff --git a/0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch
b/0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch
deleted file mode 100644
index 315b528..0000000
--- a/0044-From-Lua-5.3-assert-accepts-any-type-of-error-object.patch
+++ /dev/null
@@ -1,49 +0,0 @@
-From a5a89ab586a3b5bb4f266949bbf3dc2b140e2374 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 12:23:56 +0200
-Subject: [PATCH 44/72] From Lua 5.3: assert() accepts any type of error
- object.
-
----
- doc/extensions.html | 1 +
- src/lib_base.c | 10 +++++-----
- 2 files changed, 6 insertions(+), 5 deletions(-)
-
-diff --git a/doc/extensions.html b/doc/extensions.html
-index 55c4b70..7379041 100644
---- a/doc/extensions.html
-+++ b/doc/extensions.html
-@@ -373,6 +373,7 @@ LuaJIT supports some extensions from Lua 5.3:
- <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8
encoding in string literals.</li>
- <li>The argument table <tt>arg</tt> can be read (and modified) by
<tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
- <li><tt>io.read()</tt> and <tt>file:read()</tt> accept
formats with or without a leading <tt>*</tt>.</li>
-+<li><tt>assert()</tt> accepts any type of error object.</li>
- <li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
- <li><tt>coroutine.isyieldable()</tt>.</li>
- <li>Lua/C API extensions:
-diff --git a/src/lib_base.c b/src/lib_base.c
-index d61e876..1cd8305 100644
---- a/src/lib_base.c
-+++ b/src/lib_base.c
-@@ -42,13 +42,13 @@
-
- LJLIB_ASM(assert) LJLIB_REC(.)
- {
-- GCstr *s;
- lj_lib_checkany(L, 1);
-- s = lj_lib_optstr(L, 2);
-- if (s)
-- lj_err_callermsg(L, strdata(s));
-- else
-+ if (L->top == L->base+1)
- lj_err_caller(L, LJ_ERR_ASSERT);
-+ else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
-+ lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
-+ else
-+ lj_err_run(L);
- return FFH_UNREACHABLE;
- }
-
---
-2.20.1
-
diff --git a/0045-Windows-Add-UWP-support-part-1.patch
b/0045-Windows-Add-UWP-support-part-1.patch
deleted file mode 100644
index fcb91fb..0000000
--- a/0045-Windows-Add-UWP-support-part-1.patch
+++ /dev/null
@@ -1,359 +0,0 @@
-From c3c54ce1aef782823936808a75460e6b53aada2c Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Tue, 5 Jun 2018 17:03:08 +0200
-Subject: [PATCH 45/72] Windows: Add UWP support, part 1.
-
-Contributed by Ben Pye.
----
- doc/ext_ffi_api.html | 2 ++
- src/lib_ffi.c | 3 +++
- src/lib_io.c | 4 ++--
- src/lib_package.c | 24 +++++++++++++++++++++++-
- src/lj_alloc.c | 6 +++---
- src/lj_arch.h | 19 +++++++++++++++++++
- src/lj_ccallback.c | 4 ++--
- src/lj_clib.c | 20 ++++++++++++++++----
- src/lj_mcode.c | 8 ++++----
- src/lj_profile.c | 8 ++++----
- 10 files changed, 78 insertions(+), 20 deletions(-)
-
-diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
-index 25cc974..54ff0ce 100644
---- a/doc/ext_ffi_api.html
-+++ b/doc/ext_ffi_api.html
-@@ -468,6 +468,8 @@ otherwise. The following parameters are currently defined:
- <tr class="odd">
- <td class="abiparam">win</td><td
class="abidesc">Windows variant of the standard ABI</td></tr>
- <tr class="even">
-+<td class="abiparam">uwp</td><td
class="abidesc">Universal Windows Platform</td></tr>
-+<tr class="odd">
- <td class="abiparam">gc64</td><td
class="abidesc">64 bit GC references</td></tr>
- </table>
-
-diff --git a/src/lib_ffi.c b/src/lib_ffi.c
-index 199cfc9..8032411 100644
---- a/src/lib_ffi.c
-+++ b/src/lib_ffi.c
-@@ -746,6 +746,9 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
- #endif
- #if LJ_ABI_WIN
- case H_(4ab624a8,4ab624a8): b = 1; break; /* win */
-+#endif
-+#if LJ_TARGET_UWP
-+ case H_(a40f0bcb,a40f0bcb): b = 1; break; /* uwp */
- #endif
- case H_(3af93066,1f001464): b = 1; break; /* le/be */
- #if LJ_GC64
-diff --git a/src/lib_io.c b/src/lib_io.c
-index 9763ed4..73fd932 100644
---- a/src/lib_io.c
-+++ b/src/lib_io.c
-@@ -99,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
- int stat = -1;
- #if LJ_TARGET_POSIX
- stat = pclose(iof->fp);
--#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE
-+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
- stat = _pclose(iof->fp);
- #else
- lua_assert(0);
-@@ -406,7 +406,7 @@ LJLIB_CF(io_open)
-
- LJLIB_CF(io_popen)
- {
--#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE)
-+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE &&
!LJ_TARGET_UWP)
- const char *fname = strdata(lj_lib_checkstr(L, 1));
- GCstr *s = lj_lib_optstr(L, 2);
- const char *mode = s ? strdata(s) : "r";
-diff --git a/src/lib_package.c b/src/lib_package.c
-index 6fac43e..bedd6d7 100644
---- a/src/lib_package.c
-+++ b/src/lib_package.c
-@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
- BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
- #endif
-
-+#if LJ_TARGET_UWP
-+void *LJ_WIN_LOADLIBA(const char *path)
-+{
-+ DWORD err = GetLastError();
-+ wchar_t wpath[256];
-+ HANDLE lib = NULL;
-+ if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
-+ lib = LoadPackagedLibrary(wpath, 0);
-+ }
-+ SetLastError(err);
-+ return lib;
-+}
-+#endif
-+
- #undef setprogdir
-
- static void setprogdir(lua_State *L)
-@@ -119,7 +133,7 @@ static void ll_unloadlib(void *lib)
-
- static void *ll_load(lua_State *L, const char *path, int gl)
- {
-- HINSTANCE lib = LoadLibraryExA(path, NULL, 0);
-+ HINSTANCE lib = LJ_WIN_LOADLIBA(path);
- if (lib == NULL) pusherror(L);
- UNUSED(gl);
- return lib;
-@@ -132,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char
*sym)
- return f;
- }
-
-+#if LJ_TARGET_UWP
-+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
-+#endif
-+
- static const char *ll_bcsym(void *lib, const char *sym)
- {
- if (lib) {
- return (const char *)GetProcAddress((HINSTANCE)lib, sym);
- } else {
-+#if LJ_TARGET_UWP
-+ return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
-+#else
- HINSTANCE h = GetModuleHandleA(NULL);
- const char *p = (const char *)GetProcAddress(h, sym);
- if (p == NULL &&
GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
- (const char *)ll_bcsym, &h))
- p = (const char *)GetProcAddress(h, sym);
- return p;
-+#endif
- }
- }
-
-diff --git a/src/lj_alloc.c b/src/lj_alloc.c
-index 9fc761c..f3b6a54 100644
---- a/src/lj_alloc.c
-+++ b/src/lj_alloc.c
-@@ -167,7 +167,7 @@ static void *DIRECT_MMAP(size_t size)
- static void *CALL_MMAP(size_t size)
- {
- DWORD olderr = GetLastError();
-- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
-+ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
- SetLastError(olderr);
- return ptr ? ptr : MFAIL;
- }
-@@ -176,8 +176,8 @@ static void *CALL_MMAP(size_t size)
- static void *DIRECT_MMAP(size_t size)
- {
- DWORD olderr = GetLastError();
-- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
-- PAGE_READWRITE);
-+ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
-+ PAGE_READWRITE);
- SetLastError(olderr);
- return ptr ? ptr : MFAIL;
- }
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index e796912..31a1159 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -135,6 +135,13 @@
- #define LJ_TARGET_GC64 1
- #endif
-
-+#ifdef _UWP
-+#define LJ_TARGET_UWP 1
-+#if LUAJIT_TARGET == LUAJIT_ARCH_X64
-+#define LJ_TARGET_GC64 1
-+#endif
-+#endif
-+
- #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
- #define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
- #define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
-@@ -570,6 +577,18 @@
- #define LJ_NO_UNWIND 1
- #endif
-
-+#if LJ_TARGET_WINDOWS
-+#if LJ_TARGET_UWP
-+#define LJ_WIN_VALLOC VirtualAllocFromApp
-+#define LJ_WIN_VPROTECT VirtualProtectFromApp
-+extern void *LJ_WIN_LOADLIBA(const char *path);
-+#else
-+#define LJ_WIN_VALLOC VirtualAlloc
-+#define LJ_WIN_VPROTECT VirtualProtect
-+#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0)
-+#endif
-+#endif
-+
- /* Compatibility with Lua 5.1 vs. 5.2. */
- #ifdef LUAJIT_ENABLE_LUA52COMPAT
- #define LJ_52 1
-diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
-index 03494a7..412dbf8 100644
---- a/src/lj_ccallback.c
-+++ b/src/lj_ccallback.c
-@@ -267,7 +267,7 @@ static void callback_mcode_new(CTState *cts)
- if (CALLBACK_MAX_SLOT == 0)
- lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
- #if LJ_TARGET_WINDOWS
-- p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
-+ p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
- if (!p)
- lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
- #elif LJ_TARGET_POSIX
-@@ -285,7 +285,7 @@ static void callback_mcode_new(CTState *cts)
- #if LJ_TARGET_WINDOWS
- {
- DWORD oprot;
-- VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot);
-+ LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
- }
- #elif LJ_TARGET_POSIX
- mprotect(p, sz, (PROT_READ|PROT_EXEC));
-diff --git a/src/lj_clib.c b/src/lj_clib.c
-index 6142659..f016b06 100644
---- a/src/lj_clib.c
-+++ b/src/lj_clib.c
-@@ -158,11 +158,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
- /* Default libraries. */
- enum {
- CLIB_HANDLE_EXE,
-+#if !LJ_TARGET_UWP
- CLIB_HANDLE_DLL,
- CLIB_HANDLE_CRT,
- CLIB_HANDLE_KERNEL32,
- CLIB_HANDLE_USER32,
- CLIB_HANDLE_GDI32,
-+#endif
- CLIB_HANDLE_MAX
- };
-
-@@ -208,7 +210,7 @@ static const char *clib_extname(lua_State *L, const char *name)
- static void *clib_loadlib(lua_State *L, const char *name, int global)
- {
- DWORD oldwerr = GetLastError();
-- void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0);
-+ void *h = LJ_WIN_LOADLIBA(clib_extname(L, name));
- if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
- SetLastError(oldwerr);
- UNUSED(global);
-@@ -218,6 +220,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int
global)
- static void clib_unloadlib(CLibrary *cl)
- {
- if (cl->handle == CLIB_DEFHANDLE) {
-+#if !LJ_TARGET_UWP
- MSize i;
- for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) {
- void *h = clib_def_handle[i];
-@@ -226,11 +229,16 @@ static void clib_unloadlib(CLibrary *cl)
- FreeLibrary((HINSTANCE)h);
- }
- }
-+#endif
- } else if (cl->handle) {
- FreeLibrary((HINSTANCE)cl->handle);
- }
- }
-
-+#if LJ_TARGET_UWP
-+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
-+#endif
-+
- static void *clib_getsym(CLibrary *cl, const char *name)
- {
- void *p = NULL;
-@@ -239,6 +247,9 @@ static void *clib_getsym(CLibrary *cl, const char *name)
- for (i = 0; i < CLIB_HANDLE_MAX; i++) {
- HINSTANCE h = (HINSTANCE)clib_def_handle[i];
- if (!(void *)h) { /* Resolve default library handles (once). */
-+#if LJ_TARGET_UWP
-+ h = (HINSTANCE)&__ImageBase;
-+#else
- switch (i) {
- case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
NULL, &h); break;
- case CLIB_HANDLE_DLL:
-@@ -249,11 +260,12 @@ static void *clib_getsym(CLibrary *cl, const char *name)
-
GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
- (const char *)&_fmode, &h);
- break;
-- case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0);
break;
-- case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break;
-- case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break;
-+ case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break;
-+ case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break;
-+ case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break;
- }
- if (!h) continue;
-+#endif
- clib_def_handle[i] = (void *)h;
- }
- p = (void *)GetProcAddress(h, name);
-diff --git a/src/lj_mcode.c b/src/lj_mcode.c
-index e46e3ef..64b0ca9 100644
---- a/src/lj_mcode.c
-+++ b/src/lj_mcode.c
-@@ -66,8 +66,8 @@ void lj_mcode_sync(void *start, void *end)
-
- static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
- {
-- void *p = VirtualAlloc((void *)hint, sz,
-- MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
-+ void *p = LJ_WIN_VALLOC((void *)hint, sz,
-+ MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
- if (!p && !hint)
- lj_trace_err(J, LJ_TRERR_MCODEAL);
- return p;
-@@ -82,7 +82,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz)
- static int mcode_setprot(void *p, size_t sz, DWORD prot)
- {
- DWORD oprot;
-- return !VirtualProtect(p, sz, prot, &oprot);
-+ return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
- }
-
- #elif LJ_TARGET_POSIX
-@@ -255,7 +255,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
- /* All memory addresses are reachable by relative jumps. */
- static void *mcode_alloc(jit_State *J, size_t sz)
- {
--#ifdef __OpenBSD__
-+#if defined(__OpenBSD__) || LJ_TARGET_UWP
- /* Allow better executable memory allocation for OpenBSD W^X mode. */
- void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
- if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
-diff --git a/src/lj_profile.c b/src/lj_profile.c
-index 116998e..3223697 100644
---- a/src/lj_profile.c
-+++ b/src/lj_profile.c
-@@ -247,7 +247,7 @@ static DWORD WINAPI profile_thread(void *psx)
- {
- ProfileState *ps = (ProfileState *)psx;
- int interval = ps->interval;
--#if LJ_TARGET_WINDOWS
-+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
- ps->wmm_tbp(interval);
- #endif
- while (1) {
-@@ -255,7 +255,7 @@ static DWORD WINAPI profile_thread(void *psx)
- if (ps->abort) break;
- profile_trigger(ps);
- }
--#if LJ_TARGET_WINDOWS
-+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
- ps->wmm_tep(interval);
- #endif
- return 0;
-@@ -264,9 +264,9 @@ static DWORD WINAPI profile_thread(void *psx)
- /* Start profiling timer thread. */
- static void profile_timer_start(ProfileState *ps)
- {
--#if LJ_TARGET_WINDOWS
-+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
- if (!ps->wmm) { /* Load WinMM library on-demand. */
-- ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0);
-+ ps->wmm = LJ_WIN_LOADLIBA("winmm.dll");
- if (ps->wmm) {
- ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm,
"timeBeginPeriod");
- ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm,
"timeEndPeriod");
---
-2.20.1
-
diff --git a/0046-ARM64-Fix-write-barrier-in-BC_USETS.patch
b/0046-ARM64-Fix-write-barrier-in-BC_USETS.patch
deleted file mode 100644
index 69eea9a..0000000
--- a/0046-ARM64-Fix-write-barrier-in-BC_USETS.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From c785131ca5a6d24adc519e5e0bf1b69b671d912f Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 24 Jun 2018 13:18:03 +0200
-Subject: [PATCH 46/72] ARM64: Fix write barrier in BC_USETS.
-
-Contributed by Javier Guerra Giraldez.
----
- src/vm_arm64.dasc | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
-index c55794a..fb226e3 100644
---- a/src/vm_arm64.dasc
-+++ b/src/vm_arm64.dasc
-@@ -2780,7 +2780,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |2: // Check if string is white and ensure upvalue is closed.
- | ldrb TMP0w, UPVAL:CARG1->closed
- | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
-- | ccmp TMP0w, #0, #0, ne
-+ | ccmp TMP0w, #0, #4, ne
- | beq <1
- | // Crossed a write barrier. Move the barrier forward.
- | mov CARG1, GL
---
-2.20.1
-
diff --git a/0047-ARM64-Fix-exit-stub-patching.patch
b/0047-ARM64-Fix-exit-stub-patching.patch
deleted file mode 100644
index 740d52e..0000000
--- a/0047-ARM64-Fix-exit-stub-patching.patch
+++ /dev/null
@@ -1,238 +0,0 @@
-From 9da06535092d6d9dec442641a26c64bce5574322 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 24 Jun 2018 14:08:59 +0200
-Subject: [PATCH 47/72] ARM64: Fix exit stub patching.
-
-Contributed by Javier Guerra Giraldez.
----
- src/lj_asm_arm64.h | 64 +++++++++++++++++++++++++------------------
- src/lj_emit_arm64.h | 18 ++++++------
- src/lj_target_arm64.h | 7 +++--
- 3 files changed, 51 insertions(+), 38 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index cbb186d..baafa21 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
- asm_mclimit(as);
- /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ...
*/
- for (i = nexits-1; (int32_t)i >= 0; i--)
-- *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
-- *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
-+ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
-+ *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
- mxp--;
-- *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
-- *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
-+ *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
-+ *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
- as->mctop = mxp;
- }
-
-@@ -77,7 +77,7 @@ static void asm_guardcc(ASMState *as, A64CC cc)
- MCode *p = as->mcp;
- if (LJ_UNLIKELY(p == as->invmcp)) {
- as->loopinv = 1;
-- *p = A64I_B | ((target-p) & 0x03ffffffu);
-+ *p = A64I_B | A64F_S26(target-p);
- emit_cond_branch(as, cc^1, p-1);
- return;
- }
-@@ -91,7 +91,7 @@ static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
- MCode *p = as->mcp;
- if (LJ_UNLIKELY(p == as->invmcp)) {
- as->loopinv = 1;
-- *p = A64I_B | ((target-p) & 0x03ffffffu);
-+ *p = A64I_B | A64F_S26(target-p);
- emit_tnb(as, ai^0x01000000u, r, bit, p-1);
- return;
- }
-@@ -105,7 +105,7 @@ static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
- MCode *p = as->mcp;
- if (LJ_UNLIKELY(p == as->invmcp)) {
- as->loopinv = 1;
-- *p = A64I_B | ((target-p) & 0x03ffffffu);
-+ *p = A64I_B | A64F_S26(target-p);
- emit_cnb(as, ai^0x01000000u, r, p-1);
- return;
- }
-@@ -1850,7 +1850,7 @@ static void asm_loop_fixup(ASMState *as)
- p[-2] |= ((uint32_t)delta & mask) << 5;
- } else {
- ptrdiff_t delta = target - (p - 1);
-- p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu);
-+ p[-1] = A64I_B | A64F_S26(delta);
- }
- }
-
-@@ -1919,7 +1919,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
- }
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
-- p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu);
-+ p[-1] = A64I_B | A64F_S26((target-p)+1);
- }
-
- /* Prepare tail of code. */
-@@ -1982,40 +1982,50 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
- {
- MCode *p = T->mcode;
- MCode *pe = (MCode *)((char *)p + T->szmcode);
-- MCode *cstart = NULL, *cend = p;
-+ MCode *cstart = NULL;
- MCode *mcarea = lj_mcode_patch(J, p, 0);
- MCode *px = exitstub_trace_addr(T, exitno);
-+ /* Note: this assumes a trace exit is only ever patched once. */
- for (; p < pe; p++) {
- /* Look for exitstub branch, replace with branch to target. */
-+ ptrdiff_t delta = target - p;
- MCode ins = A64I_LE(*p);
- if ((ins & 0xff000000u) == 0x54000000u &&
- ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
-- /* Patch bcc exitstub. */
-- *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) &
0x00ffffe0u));
-- cend = p+1;
-- if (!cstart) cstart = p;
-+ /* Patch bcc, if within range. */
-+ if (A64F_S_OK(delta, 19)) {
-+ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
-+ if (!cstart) cstart = p;
-+ }
- } else if ((ins & 0xfc000000u) == 0x14000000u &&
- ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
-- /* Patch b exitstub. */
-- *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu));
-- cend = p+1;
-+ /* Patch b. */
-+ lua_assert(A64F_S_OK(delta, 26));
-+ *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta));
- if (!cstart) cstart = p;
- } else if ((ins & 0x7e000000u) == 0x34000000u &&
- ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
-- /* Patch cbz/cbnz exitstub. */
-- *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u));
-- cend = p+1;
-- if (!cstart) cstart = p;
-+ /* Patch cbz/cbnz, if within range. */
-+ if (A64F_S_OK(delta, 19)) {
-+ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
-+ if (!cstart) cstart = p;
-+ }
- } else if ((ins & 0x7e000000u) == 0x36000000u &&
- ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
-- /* Patch tbz/tbnz exitstub. */
-- *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) &
0x0007ffe0u));
-- cend = p+1;
-- if (!cstart) cstart = p;
-+ /* Patch tbz/tbnz, if within range. */
-+ if (A64F_S_OK(delta, 14)) {
-+ *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta));
-+ if (!cstart) cstart = p;
-+ }
- }
- }
-- lua_assert(cstart != NULL);
-- lj_mcode_sync(cstart, cend);
-+ { /* Always patch long-range branch in exit stub itself. */
-+ ptrdiff_t delta = target - px;
-+ lua_assert(A64F_S_OK(delta, 26));
-+ *px = A64I_B | A64F_S26(delta);
-+ if (!cstart) cstart = px;
-+ }
-+ lj_mcode_sync(cstart, px+1);
- lj_mcode_patch(J, mcarea, 1);
- }
-
-diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
-index 6da4c7d..1001b1d 100644
---- a/src/lj_emit_arm64.h
-+++ b/src/lj_emit_arm64.h
-@@ -241,7 +241,7 @@ static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
- #define mcpofs(as, k) \
- ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
- #define checkmcpofs(as, k) \
-- ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0)
-+ (A64F_S_OK(mcpofs(as, k)>>2, 19))
-
- static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
-
-@@ -312,7 +312,7 @@ static void emit_cond_branch(ASMState *as, A64CC cond, MCode
*target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = target - p;
-- lua_assert(((delta + 0x40000) >> 19) == 0);
-+ lua_assert(A64F_S_OK(delta, 19));
- *p = A64I_BCC | A64F_S19(delta) | cond;
- }
-
-@@ -320,24 +320,24 @@ static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = target - p;
-- lua_assert(((delta + 0x02000000) >> 26) == 0);
-- *p = ai | ((uint32_t)delta & 0x03ffffffu);
-+ lua_assert(A64F_S_OK(delta, 26));
-+ *p = ai | A64F_S26(delta);
- }
-
- static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = target - p;
-- lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0);
-+ lua_assert(bit < 63 && A64F_S_OK(delta, 14));
- if (bit > 31) ai |= A64I_X;
-- *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r;
-+ *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r;
- }
-
- static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = target - p;
-- lua_assert(((delta + 0x40000) >> 19) == 0);
-+ lua_assert(A64F_S_OK(delta, 19));
- *p = ai | A64F_S19(delta) | r;
- }
-
-@@ -347,8 +347,8 @@ static void emit_call(ASMState *as, void *target)
- {
- MCode *p = --as->mcp;
- ptrdiff_t delta = (char *)target - (char *)p;
-- if ((((delta>>2) + 0x02000000) >> 26) == 0) {
-- *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu);
-+ if (A64F_S_OK(delta>>2, 26)) {
-+ *p = A64I_BL | A64F_S26(delta>>2);
- } else { /* Target out of range: need indirect call. But don't use R0-R7. */
- Reg r = ra_allock(as, i64ptr(target),
- RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
-diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
-index 520023a..a207a2b 100644
---- a/src/lj_target_arm64.h
-+++ b/src/lj_target_arm64.h
-@@ -132,9 +132,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p,
uint32_t exitno)
- #define A64F_IMMR(x) ((x) << 16)
- #define A64F_U16(x) ((x) << 5)
- #define A64F_U12(x) ((x) << 10)
--#define A64F_S26(x) (x)
-+#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu))
- #define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5)
--#define A64F_S14(x) ((x) << 5)
-+#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5)
- #define A64F_S9(x) ((x) << 12)
- #define A64F_BIT(x) ((x) << 19)
- #define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
-@@ -145,6 +145,9 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p,
uint32_t exitno)
- #define A64F_LSL16(x) (((x) / 16) << 21)
- #define A64F_BSH(sh) ((sh) << 10)
-
-+/* Check for valid field range. */
-+#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
-+
- typedef enum A64Ins {
- A64I_S = 0x20000000,
- A64I_X = 0x80000000,
---
-2.20.1
-
diff --git a/0048-DynASM-Fix-warning.patch b/0048-DynASM-Fix-warning.patch
deleted file mode 100644
index 4af1c32..0000000
--- a/0048-DynASM-Fix-warning.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 9b41062156779160b88fe5e1eb1ece1ee1fe6a74 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 24 Jun 2018 14:10:21 +0200
-Subject: [PATCH 48/72] DynASM: Fix warning.
-
----
- dynasm/dasm_arm64.h | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
-index 47e1e07..ff21236 100644
---- a/dynasm/dasm_arm64.h
-+++ b/dynasm/dasm_arm64.h
-@@ -427,6 +427,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-+ /* fallthrough */
- case DASM_REL_PC:
- CK(n >= 0, UNDEF_PC);
- n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
---
-2.20.1
-
diff --git a/0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch
b/0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch
deleted file mode 100644
index 40bcbce..0000000
--- a/0049-DynASM-x86-Fix-vroundps-vroundpd-encoding.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 26f1023819efb843e10014232cd88bb1d52ea4f5 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Wed, 22 Aug 2018 13:35:41 +0200
-Subject: [PATCH 49/72] DynASM/x86: Fix vroundps/vroundpd encoding.
-
-Thanks to Alexander Nasonov.
----
- dynasm/dasm_x86.lua | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
-index 73502f6..7f536af 100644
---- a/dynasm/dasm_x86.lua
-+++ b/dynasm/dasm_x86.lua
-@@ -1537,8 +1537,8 @@ local map_op = {
- vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
- vrsqrtps_2 = "rmoy:0Fu52rM",
- vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
-- vroundpd_3 = "rmioy:660F3AV09rMU",
-- vroundps_3 = "rmioy:660F3AV08rMU",
-+ vroundpd_3 = "rmioy:660F3Au09rMU",
-+ vroundps_3 = "rmioy:660F3Au08rMU",
- vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
- vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
- vshufpd_4 = "rrmioy:660FVC6rMU",
---
-2.20.1
-
diff --git a/0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch
b/0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch
deleted file mode 100644
index 9b29c4e..0000000
--- a/0050-Fix-memory-probing-allocator-to-check-for-valid-end-.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 646148e747759f0af3b47f9bd287cedd7e174631 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 13 Sep 2018 17:58:50 +0200
-Subject: [PATCH 50/72] Fix memory probing allocator to check for valid end
- address, too.
-
----
- src/lj_alloc.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_alloc.c b/src/lj_alloc.c
-index f3b6a54..33a2eb8 100644
---- a/src/lj_alloc.c
-+++ b/src/lj_alloc.c
-@@ -255,7 +255,8 @@ static void *mmap_probe(size_t size)
- for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
- void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
- uintptr_t addr = (uintptr_t)p;
-- if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >=
LJ_ALLOC_MMAP_PROBE_LOWER) {
-+ if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >=
LJ_ALLOC_MMAP_PROBE_LOWER &&
-+ ((addr + size) >> LJ_ALLOC_MBITS) == 0) {
- /* We got a suitable address. Bump the hint address. */
- hint_addr = addr + size;
- errno = olderr;
---
-2.20.1
-
diff --git a/0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch
b/0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch
deleted file mode 100644
index 3226e33..0000000
--- a/0051-MIPS-MIPS64-Fix-TSETR-barrier-again.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 9c1b637898f38dd4606da08ba1a82a174c3e64b6 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 14 Oct 2018 15:12:59 +0200
-Subject: [PATCH 51/72] MIPS/MIPS64: Fix TSETR barrier (again).
-
----
- src/vm_mips.dasc | 2 +-
- src/vm_mips64.dasc | 2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
-index 1afd611..f324812 100644
---- a/src/vm_mips.dasc
-+++ b/src/vm_mips.dasc
-@@ -4317,7 +4317,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next2
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-- | barrierback TAB:CARG2, TMP3, TMP0, <2
-+ | barrierback TAB:CARG2, TMP3, CRET1, <2
- break;
-
- case BC_TSETM:
-diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
-index 0a3f8e5..1682c81 100644
---- a/src/vm_mips64.dasc
-+++ b/src/vm_mips64.dasc
-@@ -4263,7 +4263,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next2
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-- | barrierback TAB:CARG2, TMP3, TMP0, <2
-+ | barrierback TAB:CARG2, TMP3, CRET1, <2
- break;
-
- case BC_TSETM:
---
-2.20.1
-
diff --git a/0052-Actually-implement-maxirconst-trace-limit.patch
b/0052-Actually-implement-maxirconst-trace-limit.patch
deleted file mode 100644
index 5281168..0000000
--- a/0052-Actually-implement-maxirconst-trace-limit.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From 0a9ff94c4a1fcec2c310dcb092da694f23186e23 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Sun, 14 Oct 2018 15:21:37 +0200
-Subject: [PATCH 52/72] Actually implement maxirconst trace limit.
-
-Suggested by spacewander.
----
- src/lj_record.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/lj_record.c b/src/lj_record.c
-index 1a2b1c5..7f37d6c 100644
---- a/src/lj_record.c
-+++ b/src/lj_record.c
-@@ -2470,8 +2470,9 @@ void lj_record_ins(jit_State *J)
- #undef rbv
- #undef rcv
-
-- /* Limit the number of recorded IR instructions. */
-- if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord])
-+ /* Limit the number of recorded IR instructions and constants. */
-+ if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] ||
-+ J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst])
- lj_trace_err(J, LJ_TRERR_TRACEOV);
- }
-
---
-2.20.1
-
diff --git a/0053-Better-detection-of-MinGW-build.patch
b/0053-Better-detection-of-MinGW-build.patch
deleted file mode 100644
index 9805f09..0000000
--- a/0053-Better-detection-of-MinGW-build.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 3404183e2387f48e3464bd79116d3e8021ca781e Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:02:15 +0100
-Subject: [PATCH 53/72] Better detection of MinGW build.
-
----
- src/Makefile | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/Makefile b/src/Makefile
-index 24e8c0e..962aa94 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -165,6 +165,10 @@ else
- HOST_SYS= Windows
- HOST_MSYS= mingw
- endif
-+ ifneq (,$(findstring MSYS,$(HOST_SYS)))
-+ HOST_SYS= Windows
-+ HOST_MSYS= mingw
-+ endif
- ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
- HOST_SYS= Windows
- HOST_MSYS= cygwin
---
-2.20.1
-
diff --git a/0054-Fix-overflow-of-snapshot-map-offset.patch
b/0054-Fix-overflow-of-snapshot-map-offset.patch
deleted file mode 100644
index 723cb74..0000000
--- a/0054-Fix-overflow-of-snapshot-map-offset.patch
+++ /dev/null
@@ -1,131 +0,0 @@
-commit 749e99ce2a88bf337bd2f6279940d6761ce5f616
-Merge: e2cc89b 380e440
-Author: Mike Pall <mike>
-Date: Thu Jan 10 12:24:17 2019 +0100
-
- Merge branch 'master' into v2.1
-
-From 380e4409a70725df85034f02c968b6ebd7a5e513 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:19:30 +0100
-Subject: [PATCH 54/72] Fix overflow of snapshot map offset.
-
-Thanks to Yichun Zhang.
----
- src/lj_jit.h | 10 +++++-----
- src/lj_opt_loop.c | 8 ++++----
- src/lj_snap.c | 6 +++---
- 3 files changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/src/lj_jit.h b/src/lj_jit.h
-index 3f38d28..0bc6258 100644
---- a/src/lj_jit.h
-+++ b/src/lj_jit.h
-@@ -163,7 +163,7 @@ typedef struct MCLink {
-
- /* Stack snapshot header. */
- typedef struct SnapShot {
-- uint16_t mapofs; /* Offset into snapshot map. */
-+ uint32_t mapofs; /* Offset into snapshot map. */
- IRRef1 ref; /* First IR ref for this snapshot. */
- uint8_t nslots; /* Number of valid slots. */
- uint8_t topslot; /* Maximum frame extent. */
-@@ -217,17 +217,15 @@ typedef enum {
- /* Trace object. */
- typedef struct GCtrace {
- GCHeader;
-- uint8_t topslot; /* Top stack slot already checked to be allocated. */
-- uint8_t linktype; /* Type of link. */
-+ uint16_t nsnap; /* Number of snapshots. */
- IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
- #if LJ_GC64
- uint32_t unused_gc64;
- #endif
- GCRef gclist;
- IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
- IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
-- uint16_t nsnap; /* Number of snapshots. */
-- uint16_t nsnapmap; /* Number of snapshot map elements. */
-+ uint32_t nsnapmap; /* Number of snapshot map elements. */
- SnapShot *snap; /* Snapshot array. */
- SnapEntry *snapmap; /* Snapshot map. */
- GCRef startpt; /* Starting prototype. */
-@@ -241,6 +239,8 @@ typedef struct GCtrace {
- TraceNo1 nextroot; /* Next root trace for same prototype. */
- TraceNo1 nextside; /* Next side trace of same root trace. */
- uint8_t sinktags; /* Trace has SINK tags. */
-+ uint8_t topslot; /* Top stack slot already checked to be allocated. */
-+ uint8_t linktype; /* Type of link. */
- uint8_t unused1;
- #ifdef LUAJIT_USE_GDBJIT
- void *gdbjit_entry; /* GDB JIT entry. */
-diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
-index 36317b3..cc88111 100644
---- a/src/lj_opt_loop.c
-+++ b/src/lj_opt_loop.c
-@@ -223,7 +223,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
- }
- J->guardemit.irt = 0;
- /* Setup new snapshot. */
-- snap->mapofs = (uint16_t)nmapofs;
-+ snap->mapofs = (uint32_t)nmapofs;
- snap->ref = (IRRef1)J->cur.nins;
- snap->nslots = nslots;
- snap->topslot = osnap->topslot;
-@@ -251,7 +251,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
- nmap += nn;
- while (omap < nextmap) /* Copy PC + frame links. */
- *nmap++ = *omap++;
-- J->cur.nsnapmap = (uint16_t)(nmap - J->cur.snapmap);
-+ J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap);
- }
-
- typedef struct LoopState {
-@@ -362,7 +362,7 @@ static void loop_unroll(jit_State *J)
- }
- }
- if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
-- J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs;
-+ J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs;
- lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
- *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
-
-@@ -376,7 +376,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize
nsnapmap)
- SnapShot *snap = &J->cur.snap[nsnap-1];
- SnapEntry *map = J->cur.snapmap;
- map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC.
*/
-- J->cur.nsnapmap = (uint16_t)nsnapmap;
-+ J->cur.nsnapmap = (uint32_t)nsnapmap;
- J->cur.nsnap = nsnap;
- J->guardemit.irt = 0;
- lj_ir_rollback(J, ins);
-diff --git a/src/lj_snap.c b/src/lj_snap.c
-index e891f7a..73f2500 100644
---- a/src/lj_snap.c
-+++ b/src/lj_snap.c
-@@ -129,11 +129,11 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize
nsnapmap)
- nent = snapshot_slots(J, p, nslots);
- snap->nent = (uint8_t)nent;
- nent += snapshot_framelinks(J, p + nent, &snap->topslot);
-- snap->mapofs = (uint16_t)nsnapmap;
-+ snap->mapofs = (uint32_t)nsnapmap;
- snap->ref = (IRRef1)J->cur.nins;
- snap->nslots = (uint8_t)nslots;
- snap->count = 0;
-- J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
-+ J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
- }
-
- /* Add or merge a snapshot. */
-@@ -294,7 +294,7 @@ void lj_snap_shrink(jit_State *J)
- snap->nent = (uint8_t)m;
- nlim = J->cur.nsnapmap - snap->mapofs - 1;
- while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
-- J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */
-+ J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */
- }
-
- /* -- Snapshot access ----------------------------------------------------- */
---
-2.20.1
-
diff --git a/0055-DynASM-PPC-Fix-shadowed-variable.patch
b/0055-DynASM-PPC-Fix-shadowed-variable.patch
deleted file mode 100644
index 4f80b3a..0000000
--- a/0055-DynASM-PPC-Fix-shadowed-variable.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From 20e4c529458fa42ef6651a0042e3955723ee20c2 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:28:24 +0100
-Subject: [PATCH 55/72] DynASM/PPC: Fix shadowed variable.
-
-Cleanup only, bug cannot trigger.
-Thanks to Domingo Alvarez Duarte.
----
- dynasm/dasm_ppc.lua | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
-index 4e1656e..77031fb 100644
---- a/dynasm/dasm_ppc.lua
-+++ b/dynasm/dasm_ppc.lua
-@@ -1056,9 +1056,9 @@ map_op[".template__"] = function(params, template,
nparams)
- elseif p == "M" then
- op = op + parse_shiftmask(params[n], false); n = n + 1
- elseif p == "J" or p == "K" then
-- local mode, n, s = parse_label(params[n], false)
-- if p == "K" then n = n + 2048 end
-- waction("REL_"..mode, n, s, 1)
-+ local mode, m, s = parse_label(params[n], false)
-+ if p == "K" then m = m + 2048 end
-+ waction("REL_"..mode, m, s, 1)
- n = n + 1
- elseif p == "0" then
- if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end
---
-2.20.1
-
diff --git a/0056-DynASM-MIPS-Fix-shadowed-variable.patch
b/0056-DynASM-MIPS-Fix-shadowed-variable.patch
deleted file mode 100644
index e3fc081..0000000
--- a/0056-DynASM-MIPS-Fix-shadowed-variable.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From 5c911998a3c85d024a8006feafc68d0b4c962fd8 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:32:08 +0100
-Subject: [PATCH 56/72] DynASM/MIPS: Fix shadowed variable.
-
-Cleanup only, bug cannot trigger.
-Thanks to Domingo Alvarez Duarte.
----
- dynasm/dasm_mips.lua | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
-index 8e250ce..af53042 100644
---- a/dynasm/dasm_mips.lua
-+++ b/dynasm/dasm_mips.lua
-@@ -757,9 +757,9 @@ map_op[".template__"] = function(params, template,
nparams)
- elseif p == "X" then
- op = op + parse_index(params[n]); n = n + 1
- elseif p == "B" or p == "J" then
-- local mode, n, s = parse_label(params[n], false)
-- if p == "B" then n = n + 2048 end
-- waction("REL_"..mode, n, s, 1)
-+ local mode, m, s = parse_label(params[n], false)
-+ if p == "B" then m = m + 2048 end
-+ waction("REL_"..mode, m, s, 1)
- n = n + 1
- elseif p == "A" then
- op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
---
-2.20.1
-
diff --git a/0057-Fix-MinGW-build.patch b/0057-Fix-MinGW-build.patch
deleted file mode 100644
index d23aa4c..0000000
--- a/0057-Fix-MinGW-build.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 61464b0a5b685489bee7b6680c0e9663f2143a84 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:37:09 +0100
-Subject: [PATCH 57/72] Fix MinGW build.
-
-Thanks to Victor Bombi.
----
- src/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index 962aa94..2c780de 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -194,7 +194,7 @@ CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
- LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
-
- HOST_CC= $(CC)
--HOST_RM= rm -f
-+HOST_RM?= rm -f
- # If left blank, minilua is built and used. You can supply an installed
- # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua
- HOST_LUA=
---
-2.20.1
-
diff --git a/0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch
b/0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch
deleted file mode 100644
index 40324b7..0000000
--- a/0058-Fix-os.date-for-wider-libc-strftime-compatibility.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From fc63c938b522e147ea728b75f385728bf4a8fc35 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 12:47:28 +0100
-Subject: [PATCH 58/72] Fix os.date() for wider libc strftime() compatibility.
-
-Thanks to Jesper Lundgren.
----
- src/lib_os.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/lib_os.c b/src/lib_os.c
-index 9e78d49..ffbc3fd 100644
---- a/src/lib_os.c
-+++ b/src/lib_os.c
-@@ -205,12 +205,12 @@ LJLIB_CF(os_date)
- setboolfield(L, "isdst", stm->tm_isdst);
- } else if (*s) {
- SBuf *sb = &G(L)->tmpbuf;
-- MSize sz = 0;
-+ MSize sz = 0, retry = 4;
- const char *q;
- for (q = s; *q; q++)
- sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
- setsbufL(sb, L);
-- for (;;) {
-+ while (retry--) { /* Limit growth for invalid format or empty result. */
- char *buf = lj_buf_need(sb, sz);
- size_t len = strftime(buf, sbufsz(sb), s, stm);
- if (len) {
---
-2.20.1
-
diff --git a/0059-Improve-luaL_addlstring.patch b/0059-Improve-luaL_addlstring.patch
deleted file mode 100644
index 0bf7cf5..0000000
--- a/0059-Improve-luaL_addlstring.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From f0e865dd4861520258299d0f2a56491bd9d602e1 Mon Sep 17 00:00:00 2001
-From: Mike Pall <mike>
-Date: Thu, 10 Jan 2019 13:09:17 +0100
-Subject: [PATCH 59/72] Improve luaL_addlstring().
-
-Thanks to Domingo Alvarez Duarte.
----
- src/lib_aux.c | 11 +++++++++--
- 1 file changed, 9 insertions(+), 2 deletions(-)
-
-diff --git a/src/lib_aux.c b/src/lib_aux.c
-index c40565c..2682a38 100644
---- a/src/lib_aux.c
-+++ b/src/lib_aux.c
-@@ -218,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
-
- LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
- {
-- while (l--)
-- luaL_addchar(B, *s++);
-+ if (l <= bufffree(B)) {
-+ memcpy(B->p, s, l);
-+ B->p += l;
-+ } else {
-+ emptybuffer(B);
-+ lua_pushlstring(B->L, s, l);
-+ B->lvl++;
-+ adjuststack(B);
-+ }
- }
-
- LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
---
-2.20.1
-
diff --git a/0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch
b/0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch
deleted file mode 100644
index 8fe6e8e..0000000
--- a/0060-Fix-arm64-register-allocation-issue-for-XLOAD.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 43a3893b0d7d82bfbfd13bf458a5906f755989c9 Mon Sep 17 00:00:00 2001
-From: Patrick Galizia <pgalizia.qdt(a)qualcommdatacenter.com>
-Date: Fri, 24 Aug 2018 11:02:15 -0400
-Subject: [PATCH 60/72] Fix arm64 register allocation issue for XLOAD.
-
-For the arm64 implementation of asm_xload(), it is possible for
-the dest register selected to be the same as one of the source
-registers generated in the asm_fusexref() call. To prevent this,
-exclude the dest register from the list of allowed registers for
-that call.
-
-Thanks to Javier for guidance as well as his script to replicate
-the issue.
----
- src/lj_asm_arm64.h | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index baafa21..045f260 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -1008,7 +1008,8 @@ static void asm_xload(ASMState *as, IRIns *ir)
- {
- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
-- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
-+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1,
-+ rset_exclude(RSET_GPR, dest));
- }
-
- static void asm_xstore(ASMState *as, IRIns *ir)
---
-2.20.1
-
diff --git a/0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch
b/0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch
deleted file mode 100644
index 1f58f88..0000000
--- a/0061-Fix-arm64-register-allocation-issue-for-XLOAD.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From ec0d8427ade9346d356623072fcb91c2d11e3bda Mon Sep 17 00:00:00 2001
-From: Patrick Galizia <pgalizia.qdt(a)qualcommdatacenter.com>
-Date: Wed, 28 Nov 2018 14:14:35 -0500
-Subject: [PATCH 61/72] Fix arm64 register allocation issue for XLOAD.
-
-For arm64, it's possible for both IRRefs to fail asm_isk32(), but
-one of them pass irref_isk(). Add a secondary check for the latter
-call if both asm_isk32() calls fail.
----
- src/lj_asm_arm64.h | 18 +++++++++++++-----
- 1 file changed, 13 insertions(+), 5 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index 045f260..ce49cde 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -295,9 +295,18 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef
ref,
- } else if (asm_isk32(as, ir->op1, &ofs)) {
- ref = ir->op2;
- } else {
-- Reg rn = ra_alloc1(as, ir->op1, allow);
-- IRIns *irr = IR(ir->op2);
-+ IRRef ref1 = ir->op1;
-+ IRRef ref2 = ir->op2;
-+ Reg rn;
-+ IRIns *irr;
- uint32_t m;
-+
-+ if (irref_isk(ir->op1)) {
-+ ref1 = ir->op2;
-+ ref2 = ir->op1;
-+ }
-+ rn = ra_alloc1(as, ref1, allow);
-+ irr = IR(ref2);
- if (irr+1 == ir && !ra_used(irr) &&
- irr->o == IR_ADD && irref_isk(irr->op2)) {
- ofs = sizeof(GCstr) + IR(irr->op2)->i;
-@@ -307,7 +316,7 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
- goto skipopm;
- }
- }
-- m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
-+ m = asm_fuseopm(as, 0, ref2, rset_exclude(allow, rn));
- ofs = sizeof(GCstr);
- skipopm:
- emit_lso(as, ai, rd, rd, ofs);
-@@ -1008,8 +1017,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
- {
- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
-- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1,
-- rset_exclude(RSET_GPR, dest));
-+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
- }
-
- static void asm_xstore(ASMState *as, IRIns *ir)
---
-2.20.1
-
diff --git a/0062-Remove-redundant-emit_check_ofs.patch
b/0062-Remove-redundant-emit_check_ofs.patch
deleted file mode 100644
index 9b34eab..0000000
--- a/0062-Remove-redundant-emit_check_ofs.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 1fae7b08e319ba4028d303b09de72b026109a269 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Fri, 22 Feb 2019 19:05:38 +0000
-Subject: [PATCH 62/72] Remove redundant emit_check_ofs
-
-Even if the offset is a constant, it is not 32-bit since it failed
-that check earlier before it came here. The code is thus useless and
-hence removed. This also fixes inconsistencies with op1/op2 renaming
-that were introduced in PR #438. They were never triggered because
-the code path is effectively dead for arm64.
----
- src/lj_asm_arm64.h | 15 +--------------
- 1 file changed, 1 insertion(+), 14 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index ce49cde..c214e10 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -298,27 +298,14 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef
ref,
- IRRef ref1 = ir->op1;
- IRRef ref2 = ir->op2;
- Reg rn;
-- IRIns *irr;
-- uint32_t m;
-
- if (irref_isk(ir->op1)) {
- ref1 = ir->op2;
- ref2 = ir->op1;
- }
- rn = ra_alloc1(as, ref1, allow);
-- irr = IR(ref2);
-- if (irr+1 == ir && !ra_used(irr) &&
-- irr->o == IR_ADD && irref_isk(irr->op2)) {
-- ofs = sizeof(GCstr) + IR(irr->op2)->i;
-- if (emit_checkofs(ai, ofs)) {
-- Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
-- m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
-- goto skipopm;
-- }
-- }
-- m = asm_fuseopm(as, 0, ref2, rset_exclude(allow, rn));
-+ uint32_t m = asm_fuseopm(as, 0, ref2, rset_exclude(allow, rn));
- ofs = sizeof(GCstr);
-- skipopm:
- emit_lso(as, ai, rd, rd, ofs);
- emit_dn(as, A64I_ADDx^m, rd, rn);
- return;
---
-2.20.1
-
diff --git a/0063-aarch64-Use-the-xzr-register-whenever-possible.patch
b/0063-aarch64-Use-the-xzr-register-whenever-possible.patch
deleted file mode 100644
index c2b0505..0000000
--- a/0063-aarch64-Use-the-xzr-register-whenever-possible.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 8fc4ce1c981967fccd5366ace6add6d14cfcde89 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)gotplt.org>
-Date: Mon, 25 Feb 2019 14:40:39 +0000
-Subject: [PATCH 63/72] aarch64: Use the xzr register whenever possible
-
-Using the xzr register for store inputs and the second operand of
-arithmetic operations frees up a register for use elsewhere.
----
- src/lj_asm_arm64.h | 31 ++++++++++++++++++++++++++++---
- 1 file changed, 28 insertions(+), 3 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index c214e10..a826687 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -1007,10 +1007,30 @@ static void asm_xload(ASMState *as, IRIns *ir)
- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
- }
-
-+static int maybe_zero_val(ASMState *as, IRRef ref)
-+{
-+ IRIns *ir = IR(ref);
-+
-+ switch(ir->o) {
-+ case IR_KNULL:
-+ return 1;
-+ case IR_KINT:
-+ return 0 == ir->i;
-+ case IR_KINT64:
-+ return 0 == ir_kint64(ir)->u64;
-+ }
-+
-+ return 0;
-+}
-+
- static void asm_xstore(ASMState *as, IRIns *ir)
- {
- if (ir->r != RID_SINK) {
-- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
-+ Reg src;
-+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
-+ src = RID_ZERO;
-+ else
-+ src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
- rset_exclude(RSET_GPR, src));
- }
-@@ -1198,7 +1218,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
- /* Initialize immutable cdata object. */
- if (ir->o == IR_CNEWI) {
- int32_t ofs = sizeof(GCcdata);
-- Reg r = ra_alloc1(as, ir->op2, allow);
-+ Reg r;
-+ if (irref_isk(ir->op2) && maybe_zero_val(as, ir->op2))
-+ r = RID_ZERO;
-+ else
-+ r = ra_alloc1(as, ir->op2, allow);
-+
- lua_assert(sz == 4 || sz == 8);
- emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
- } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
-@@ -1214,7 +1239,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
-
- /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
- {
-- Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
-+ Reg r = id == 0 ? RID_ZERO : (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
- emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
- emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
- emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
---
-2.20.1
-
diff --git a/0065-Add-support-for-FNMADD-and-FNMSUB.patch
b/0065-Add-support-for-FNMADD-and-FNMSUB.patch
deleted file mode 100644
index c1762f4..0000000
--- a/0065-Add-support-for-FNMADD-and-FNMSUB.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From e99ac1bc2df5c1d138bbc98d35d1a1892144cf2b Mon Sep 17 00:00:00 2001
-From: Sameera Deshpande <sameera.deshpande(a)linaro.org>
-Date: Fri, 15 Feb 2019 07:46:16 +0530
-Subject: [PATCH 65/72] Add support for FNMADD and FNMSUB.
-
----
- src/lj_asm_arm64.h | 32 +++++++++++++++++++++++++++++++-
- 1 file changed, 31 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index a826687..470e65d 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -344,6 +344,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins
air)
- return 0;
- }
-
-+/* Fuse FP neg-multiply-add/sub. */
-+static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
-+{
-+ IRRef ref = ir->op1;
-+ IRIns *irn = IR(ref);
-+ if (irn->o != IR_ADD && irn->o != IR_SUB)
-+ return 0;
-+
-+ if (!mayfuse(as, ref))
-+ return 0;
-+
-+ IRRef lref = irn->op1, rref = irn->op2;
-+ IRIns *irm;
-+ if (lref != rref &&
-+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
-+ ra_noreg(irm->r)) ||
-+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
-+ (rref = lref, ra_noreg(irm->r))))) {
-+ Reg dest = ra_dest(as, ir, RSET_FPR);
-+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
-+ Reg left = ra_alloc2(as, irm,
-+ rset_exclude(rset_exclude(RSET_FPR, dest), add));
-+ Reg right = (left >> 8); left &= 255;
-+ emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31),
(right & 31), (add & 31));
-+ return 1;
-+ }
-+ return 0;
-+}
-+
- /* Fuse BAND + BSHL/BSHR into UBFM. */
- static int asm_fuseandshift(ASMState *as, IRIns *ir)
- {
-@@ -1481,7 +1510,8 @@ static void asm_mod(ASMState *as, IRIns *ir)
- static void asm_neg(ASMState *as, IRIns *ir)
- {
- if (irt_isnum(ir->t)) {
-- asm_fpunary(as, ir, A64I_FNEGd);
-+ if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
-+ asm_fpunary(as, ir, A64I_FNEGd);
- return;
- }
- asm_intneg(as, ir);
---
-2.20.1
-
diff --git a/0066-Fix-os.date-for-timezone-change-awareness.patch
b/0066-Fix-os.date-for-timezone-change-awareness.patch
deleted file mode 100644
index afab3fe..0000000
--- a/0066-Fix-os.date-for-timezone-change-awareness.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 372bb8b22546663ba57e69fad75c97cfd004ac63 Mon Sep 17 00:00:00 2001
-From: Vivien HENRIET <bubuabu(a)bubuabu.org>
-Date: Wed, 30 Jan 2019 23:44:51 +0100
-Subject: [PATCH 66/72] Fix os.date() for timezone change awareness
-
-On POSIX target, system timezone change are not taken into account.
-To reproduce,
-1. call os.date()
-2. change your timezone
-3. call os.date() within the same luajit instance
-
-On POSIX target, os.date use localtime_r to retrieve time.
-On other target, the function localtime is used. But there is a behaviour
-diference between these two function. localtime acts as if it called tzset
-which localtime_r don't.
-
-To fix the issue tzset is called before localtime_r.
----
- src/lib_os.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/lib_os.c b/src/lib_os.c
-index ffbc3fd..09dc737 100644
---- a/src/lib_os.c
-+++ b/src/lib_os.c
-@@ -185,6 +185,7 @@ LJLIB_CF(os_date)
- #endif
- } else {
- #if LJ_TARGET_POSIX
-+ tzset();
- stm = localtime_r(&t, &rtm);
- #else
- stm = localtime(&t);
---
-2.20.1
-
diff --git a/0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch
b/0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch
deleted file mode 100644
index 7f27204..0000000
--- a/0067-Revert-FFI-Make-FP-to-U64-conversions-match-JIT-back.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 49f19e7b31fc033ac1e9208580b5be31e2b66b19 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Thu, 14 Mar 2019 23:08:24 +0530
-Subject: [PATCH 67/72] Revert "FFI: Make FP to U64 conversions match JIT
- backend behavior."
-
-This reverts commit f5d424afe8b9395f0df05aba905e0e1f6a2262b8.
-
-The patch breaks test 279, i.e.
-
- assert(tostring(bit.band(1ll, 1, 1ull, -1)) == "1ULL")
-
-The patch was put in to make the JIT and interpreter behaviour
-consistent[1] for float to unsigned int conversions but it ended up
-making things worse. There needs to be a better fix for this.
-
-[1]
https://github.com/LuaJIT/LuaJIT/pull/415
----
- src/lj_obj.h | 18 +++++-------------
- 1 file changed, 5 insertions(+), 13 deletions(-)
-
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index 72b7ace..c7e4742 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -942,22 +942,14 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
-
- #define lj_num2int(n) ((int32_t)(n))
-
--/*
--** This must match the JIT backend behavior. In particular for archs
--** that don't have a common hardware instruction for this conversion.
--** Note that signed FP to unsigned int conversions have an undefined
--** result and should never be relied upon in portable FFI code.
--** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
--*/
- static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
- {
--#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
-- int64_t i = (int64_t)n;
-- if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
-- return (uint64_t)i;
--#else
-- return (uint64_t)n;
-+#ifdef _MSC_VER
-+ if (n >= 9223372036854775808.0) /* They think it's a feature. */
-+ return (uint64_t)(int64_t)(n - 18446744073709551616.0);
-+ else
- #endif
-+ return (uint64_t)n;
- }
-
- static LJ_AINLINE int32_t numberVint(cTValue *o)
---
-2.20.1
-
diff --git a/0068-bench-Fix-build-warnings.patch b/0068-bench-Fix-build-warnings.patch
deleted file mode 100644
index 5ee8bc7..0000000
--- a/0068-bench-Fix-build-warnings.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-commit 0513e634f0013083d29af9f5762b225297d3ad6c (HEAD -> v2.1, origin/v2.1)
-Author: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Fri Apr 12 20:42:55 2019 +0530
-
- Remove built binary from git
-
- Oops.
-
-From 9b4f498707569f3ecf81a0561a0d3d91570cec3d Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Fri, 15 Mar 2019 15:51:02 +0530
-Subject: [PATCH 68/72] bench: Fix build warnings
-
----
- bench/Makefile | 2 +-
- bench/luajit-bench | Bin 571144 -> 571224 bytes
- bench/luajit-bench.c | 1 +
- 3 files changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/bench/Makefile b/bench/Makefile
-index d0c1e8d..87d213a 100644
---- a/bench/Makefile
-+++ b/bench/Makefile
-@@ -44,7 +44,7 @@ endif
- LUAJIT_A = ../src/$(FILE_A)
-
- $(BENCH_BIN): $(LUAJIT_A) $(BENCH_BIN).c Makefile
-- $(CC) $@.c $(DURATION) -g -O3 -c -o $@.o -I ../src
-+ $(CC) $@.c -std=gnu11 $(DURATION) -g -O3 -c -o $@.o -I ../src
- $(CC) $@.o -lpthread $< -lm -ldl -o $@
-
- # Build the luajit static library if it doesn't exist.
-diff --git a/bench/luajit-bench.c b/bench/luajit-bench.c
-index e7b068d..6603132 100644
---- a/bench/luajit-bench.c
-+++ b/bench/luajit-bench.c
-@@ -39,6 +39,7 @@
- #include <argp.h>
- #include <sys/param.h>
- #include <string.h>
-+#include <time.h>
-
- #include "lua.h"
- #include "lualib.h"
---
-2.20.1
-
diff --git a/0069-Guard-against-undefined-behaviour-when-casting-from-.patch
b/0069-Guard-against-undefined-behaviour-when-casting-from-.patch
deleted file mode 100644
index e498f62..0000000
--- a/0069-Guard-against-undefined-behaviour-when-casting-from-.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 454bea87cff4ff3cd2fd9ae34a3718dd200ce0fb Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Sun, 17 Mar 2019 11:34:04 +0530
-Subject: [PATCH 69/72] Guard against undefined behaviour when casting from
- float to unsigned
-
-Only range (-1.0, UINT64_MAX) can be safely converted to unsigned
-directly, and (-INT64_MAX,INT_64_MAX) through a cast to int64_t first.
-The remaining range is undefined.
-
-TODO: Do the same for JIT as well as for float to other ranges.
----
- src/lj_obj.h | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_obj.h b/src/lj_obj.h
-index c7e4742..4ff5944 100644
---- a/src/lj_obj.h
-+++ b/src/lj_obj.h
-@@ -944,12 +944,18 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
-
- static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
- {
-+ /* Undefined behaviour. This is deliberately not a full check because we
-+ don't want to slow down compliant code. */
-+ lua_assert(n >= -9223372036854775809.0);
- #ifdef _MSC_VER
- if (n >= 9223372036854775808.0) /* They think it's a feature. */
- return (uint64_t)(int64_t)(n - 18446744073709551616.0);
- else
- #endif
-- return (uint64_t)n;
-+ if (n > -1.0)
-+ return (uint64_t)n;
-+ else
-+ return (uint64_t)(int64_t)n;
- }
-
- static LJ_AINLINE int32_t numberVint(cTValue *o)
---
-2.20.1
-
diff --git a/0070-Fix-build-erro-with-fnmsub-fusing.patch
b/0070-Fix-build-erro-with-fnmsub-fusing.patch
deleted file mode 100644
index a506e4f..0000000
--- a/0070-Fix-build-erro-with-fnmsub-fusing.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From ddca2290b8fa73fc32e88f83105219a1f2be75ff Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Mon, 25 Mar 2019 17:56:53 +0530
-Subject: [PATCH 70/72] Fix build erro with fnmsub fusing
-
----
- src/lj_asm_arm64.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index 470e65d..42a4fae 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -1510,7 +1510,7 @@ static void asm_mod(ASMState *as, IRIns *ir)
- static void asm_neg(ASMState *as, IRIns *ir)
- {
- if (irt_isnum(ir->t)) {
-- if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
-+ if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd))
- asm_fpunary(as, ir, A64I_FNEGd);
- return;
- }
---
-2.20.1
-
diff --git a/0071-aarch64-better-float-to-unsigned-int-conversion.patch
b/0071-aarch64-better-float-to-unsigned-int-conversion.patch
deleted file mode 100644
index 305f07b..0000000
--- a/0071-aarch64-better-float-to-unsigned-int-conversion.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From 70e65633d892765bcbaad3493e5b690abd5402f2 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Thu, 28 Mar 2019 09:19:34 +0530
-Subject: [PATCH 71/72] aarch64: better float to unsigned int conversion
-
-A straight float to unsigned conversion has a limited range of (-1.0,
-UTYPE_MAX) which should be fine in general but for the sake of
-consistency across the interpreter and the JIT compiler, it is
-necessary to work a wee bit harder to expand this range to (TYPE_MIN,
-UTYPE_MAX), which can be done with a simple range check. This adds a
-couple of branches but only one of the branches should have a
-noticeable performance impact on most processors with branch
-predictors, and that too only if the input number varies wildly in
-range.
-
-This currently works only for 64-bit conversions, 32-bit is still WIP.
----
- src/lj_asm_arm64.h | 30 ++++++++++++++++++++++--------
- src/lj_target_arm64.h | 1 +
- 2 files changed, 23 insertions(+), 8 deletions(-)
-
-diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
-index 42a4fae..c72144a 100644
---- a/src/lj_asm_arm64.h
-+++ b/src/lj_asm_arm64.h
-@@ -594,14 +594,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
- } else {
- Reg left = ra_alloc1(as, lref, RSET_FPR);
- Reg dest = ra_dest(as, ir, RSET_GPR);
-- A64Ins ai = irt_is64(ir->t) ?
-- (st == IRT_NUM ?
-- (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
-- (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
-- (st == IRT_NUM ?
-- (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
-- (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
-- emit_dn(as, ai, dest, (left & 31));
-+
-+ A64Ins ai_signed = st == IRT_NUM ?
-+ (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) :
-+ (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32);
-+
-+ if (irt_isi64(ir->t) || irt_isint(ir->t))
-+ emit_dn(as, ai_signed, dest, (left & 31));
-+ else {
-+ A64Ins ai_unsigned = st == IRT_NUM ?
-+ (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) :
-+ (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32);
-+
-+ MCLabel l_done = emit_label(as);
-+ emit_dn(as, ai_unsigned, dest, (left & 31));
-+ MCLabel l_signed = emit_label(as);
-+ emit_jmp(as, l_done);
-+ emit_dn(as, ai_signed, dest, (left & 31));
-+ /* The valid range for float to unsigned int conversion is (-1.0,
-+ UINT{,64}_MAX-1), but we just compare with 0 to save a load. */
-+ emit_cond_branch(as, CC_PL, l_signed);
-+ emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0);
-+ }
- }
- } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer.
*/
- Reg dest = ra_dest(as, ir, RSET_GPR);
-diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
-index a207a2b..2f8357f 100644
---- a/src/lj_target_arm64.h
-+++ b/src/lj_target_arm64.h
-@@ -279,6 +279,7 @@ typedef enum A64Ins {
- A64I_STPs = 0x2d000000,
- A64I_STPd = 0x6d000000,
- A64I_FCMPd = 0x1e602000,
-+ A64I_FCMPZs = 0x1e202008,
- A64I_FCMPZd = 0x1e602008,
- A64I_FCSELd = 0x1e600c00,
- A64I_FRINTMd = 0x1e654000,
---
-2.20.1
-
diff --git a/0072-Better-behaviour-for-float-to-uint32_t-conversions.patch
b/0072-Better-behaviour-for-float-to-uint32_t-conversions.patch
deleted file mode 100644
index 20cb957..0000000
--- a/0072-Better-behaviour-for-float-to-uint32_t-conversions.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From f2779155495aee6583abaff4700a7acda80864ef Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Thu, 28 Mar 2019 10:50:23 +0530
-Subject: [PATCH 72/72] Better behaviour for float to uint32_t conversions
-
-This is the uint32_t part of the float to unsigned int conversions for
-the interpreter. The cast ends up working correctly for x86 but not
-for aarch64 since fcvtzu sets the result to zero on negative inputs.
-Work slightly harder to make sure that negative number inputs behave
-like x86.
-
-This fixes the interpreter but not the JIT compiler, which errors out
-during the narrowing pass.
----
- src/lj_cconv.c | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_cconv.c b/src/lj_cconv.c
-index 13b8230..bf8f8e8 100644
---- a/src/lj_cconv.c
-+++ b/src/lj_cconv.c
-@@ -196,7 +196,13 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
- else if (dsize == 2) *(int16_t *)dp = (int16_t)i;
- else *(int8_t *)dp = (int8_t)i;
- } else if (dsize == 4) {
-- *(uint32_t *)dp = (uint32_t)n;
-+ /* Undefined behaviour. This is deliberately not a full check because we
-+ * don't want to slow down compliant code. */
-+ lua_assert(n >= -2147483649.0);
-+ if (n > -1.0)
-+ *(uint32_t *)dp = (uint32_t)n;
-+ else
-+ *(uint32_t *)dp = (uint32_t)(int32_t)n;
- } else if (dsize == 8) {
- if (!(dinfo & CTF_UNSIGNED))
- *(int64_t *)dp = (int64_t)n;
---
-2.20.1
-
diff --git a/arm-Fix-up-condition-codes-for-conditional-arithmeti.patch
b/arm-Fix-up-condition-codes-for-conditional-arithmeti.patch
deleted file mode 100644
index 44aeea4..0000000
--- a/arm-Fix-up-condition-codes-for-conditional-arithmeti.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From 24429cc95657332e3953a21581d3220884da3d75 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Wed, 8 May 2019 22:14:00 +0530
-Subject: [PATCH] arm: Fix up condition codes for conditional arithmetic insn
-
-When an arithmetic instruction such as add or sub are combined with a
-subsequent compare with zero, its following conditional branch code
-needs fixing up. This is necessary because one could generate an add
-with a subtract of the negative but such a substitution, while correct
-on its own, will change the effect on condition flags since while
-addition of two positive numbers may signal an overflow, addition of a
-positive and a negative number may not. So if earlier the condition
-code was GE, it needs to be fixed up to PL to remain correct.
-
-We did that for bit operations but not for arithmetic, so do that now.
----
- src/lj_asm_arm.h | 38 ++++++++++++++++++++------------------
- 1 file changed, 20 insertions(+), 18 deletions(-)
-
-diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
-index 37bfa40f..e585b4c2 100644
---- a/src/lj_asm_arm.h
-+++ b/src/lj_asm_arm.h
-@@ -1412,13 +1412,28 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)
- emit_dn(as, ai^m, dest, left);
- }
-
--static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
-+static ARMIns maybe_drop_zero_cmp(ASMState *as, ARMIns ai)
- {
-- if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
-+ if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
-+ uint32_t cc = (as->mcp[1] >> 28);
- as->flagmcp = NULL;
-- as->mcp++;
-- ai |= ARMI_S;
-+ if (cc <= CC_NE) {
-+ as->mcp++;
-+ ai |= ARMI_S;
-+ } else if (cc == CC_GE) {
-+ *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-+ ai |= ARMI_S;
-+ } else if (cc == CC_LT) {
-+ *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-+ ai |= ARMI_S;
-+ } /* else: other conds don't work with bit ops. */
- }
-+ return ai;
-+}
-+
-+static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
-+{
-+ ai = maybe_drop_zero_cmp(as, ai);
- asm_intop(as, ir, ai);
- }
-
-@@ -1514,20 +1529,7 @@ static void asm_neg(ASMState *as, IRIns *ir)
-
- static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
- {
-- if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
-- uint32_t cc = (as->mcp[1] >> 28);
-- as->flagmcp = NULL;
-- if (cc <= CC_NE) {
-- as->mcp++;
-- ai |= ARMI_S;
-- } else if (cc == CC_GE) {
-- *++as->mcp ^= ((CC_GE^CC_PL) << 28);
-- ai |= ARMI_S;
-- } else if (cc == CC_LT) {
-- *++as->mcp ^= ((CC_LT^CC_MI) << 28);
-- ai |= ARMI_S;
-- } /* else: other conds don't work with bit ops. */
-- }
-+ ai = maybe_drop_zero_cmp(as, ai);
- if (ir->op2 == 0) {
- Reg dest = ra_dest(as, ir, RSET_GPR);
- uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
---
-2.21.0
-
diff --git a/bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch
b/bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch
deleted file mode 100644
index 939ac87..0000000
--- a/bugfix-fixed-a-segfault-when-unsinking-64-bit-pointers.patch
+++ /dev/null
@@ -1,160 +0,0 @@
-From a6a2720ddc22f9f62f119325881d05722c4f392e Mon Sep 17 00:00:00 2001
-From: Thibault Charbonnier <thibaultcha(a)me.com>
-Date: Tue, 19 Mar 2019 13:52:51 -0700
-Subject: [PATCH 1/3] bugfix: fixed a segfault when unsinking 64-bit pointers.
-
-The unsinking code was not using the correct layout for GC64 IR
-constants (value in adjacent slot) for this case.
-
-This patch is a derivative of
-https://github.com/raptorjit/raptorjit/pull/246 ported for LuaJIT
-itself.
-
-Fixed after an intense debugging session with @lukego.
-
-Co-authored-by: Luke Gorrie <lukego(a)gmail.com>
----
- src/lj_ir.h | 12 ++++++------
- src/lj_snap.c | 2 +-
- 2 files changed, 7 insertions(+), 7 deletions(-)
-
-diff --git a/src/lj_ir.h b/src/lj_ir.h
-index 8057a750..a46b561f 100644
---- a/src/lj_ir.h
-+++ b/src/lj_ir.h
-@@ -562,6 +562,11 @@ typedef union IRIns {
- TValue tv; /* TValue constant (overlaps entire slot). */
- } IRIns;
-
-+#define ir_isk64(ir) ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
-+ (LJ_GC64 && \
-+ ((ir)->o == IR_KGC || \
-+ (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)))
-+
- #define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
- #define ir_kstr(ir) (gco2str(ir_kgc((ir))))
- #define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
-@@ -569,12 +574,7 @@ typedef union IRIns {
- #define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
- #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
- #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
--#define ir_k64(ir) \
-- check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
-- (LJ_GC64 && \
-- ((ir)->o == IR_KGC || \
-- (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \
-- &(ir)[1].tv)
-+#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv)
- #define ir_kptr(ir) \
- check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
- mref((ir)[LJ_GC64].ptr, void))
-diff --git a/src/lj_snap.c b/src/lj_snap.c
-index ceaf2ca5..75888d80 100644
---- a/src/lj_snap.c
-+++ b/src/lj_snap.c
-@@ -688,7 +688,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
- int32_t *src;
- uint64_t tmp;
- if (irref_isk(ref)) {
-- if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
-+ if (ir_isk64(ir)) {
- src = (int32_t *)&ir[1];
- } else if (sz == 8) {
- tmp = (uint64_t)(uint32_t)ir->i;
---
-2.21.0
-
-
-From f36cddf49b664d713bfa7c332673bdc66861d2ad Mon Sep 17 00:00:00 2001
-From: Thibault Charbonnier <thibaultcha(a)me.com>
-Date: Tue, 19 Mar 2019 13:49:18 -0700
-Subject: [PATCH 2/3] tests: ffi: added a test case unsinking a 64-bit pointer
- from a constant.
-
-This test case reproduces the issue observed at:
-https://github.com/openresty/lua-resty-core/issues/232 and was
-contributed by @lukego and myself.
-
-Co-authored-by: Luke Gorrie <lukego(a)gmail.com>
----
- test/ffi/unsink_64_kptr.lua | 26 ++++++++++++++++++++++++++
- 1 file changed, 26 insertions(+)
- create mode 100644 test/ffi/unsink_64_kptr.lua
-
-diff --git a/test/ffi/unsink_64_kptr.lua b/test/ffi/unsink_64_kptr.lua
-new file mode 100644
-index 00000000..7fab0e89
---- /dev/null
-+++ b/test/ffi/unsink_64_kptr.lua
-@@ -0,0 +1,26 @@
-+local ffi = require("ffi")
-+
-+local array = ffi.new("struct { int x; } [1]")
-+
-+-- This test forces the VM to unsink a pointer that was constructed
-+-- from a constant. The IR will include a 'cnewi' instruction to
-+-- allocate an FFI pointer object, the pointer value will be an IR
-+-- constant, the allocation will be sunk, and the allocation will
-+-- at some point be "unsunk" due to a reference in the snapshot for
-+-- a taken exit.
-+
-+-- Note: JIT will recognize <array> as a "singleton" and allow its
-+-- address to be inlined ("constified") instead of looking up the
-+-- upvalue at runtime.
-+
-+local function fn(i)
-+ local struct = array[0] -- Load pointer that the JIT will constify.
-+ if i == 1000 then end -- Force trace exit when i==1000.
-+ struct.x = 0 -- Ensure that 'struct' is live after exit.
-+end
-+
-+-- Loop over the function to make it compile and take a trace exit
-+-- during the final iteration.
-+for i = 1, 1000 do
-+ fn(i)
-+end
---
-2.21.0
-
-
-From 7b2f874b8061f206b22c04aee336b15030213637 Mon Sep 17 00:00:00 2001
-From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Tue, 14 May 2019 22:01:37 +0530
-Subject: [PATCH 3/3] Make unsink_64_kptr usable in the testsuite
-
----
- test/lib/ffi/index | 1 +
- test/{ => lib}/ffi/unsink_64_kptr.lua | 6 ++++--
- 2 files changed, 5 insertions(+), 2 deletions(-)
- rename test/{ => lib}/ffi/unsink_64_kptr.lua (93%)
-
-diff --git a/test/lib/ffi/index b/test/lib/ffi/index
-index 59e36dd8..7933c5a7 100644
---- a/test/lib/ffi/index
-+++ b/test/lib/ffi/index
-@@ -10,3 +10,4 @@ jit_struct.lua
- meta_tostring.lua
- redir.lua
- type_punning.lua
-+unsink_64_kptr.lua
-diff --git a/test/ffi/unsink_64_kptr.lua b/test/lib/ffi/unsink_64_kptr.lua
-similarity index 93%
-rename from test/ffi/unsink_64_kptr.lua
-rename to test/lib/ffi/unsink_64_kptr.lua
-index 7fab0e89..f285d9ff 100644
---- a/test/ffi/unsink_64_kptr.lua
-+++ b/test/lib/ffi/unsink_64_kptr.lua
-@@ -21,6 +21,8 @@ end
-
- -- Loop over the function to make it compile and take a trace exit
- -- during the final iteration.
--for i = 1, 1000 do
-- fn(i)
-+do --- unsink 64-bit pointers
-+ for i = 1, 1000 do
-+ fn(i)
-+ end
- end
---
-2.21.0
-
diff --git a/0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch
b/luajit-2.1-fedora.patch
similarity index 99%
rename from 0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch
rename to luajit-2.1-fedora.patch
index fb2b611..e84dfa1 100644
--- a/0064-Merge-in-LuaJIT-test-cleanup-into-the-main-repo.patch
+++ b/luajit-2.1-fedora.patch
@@ -1,15 +1,7 @@
-commit 0513e634f0013083d29af9f5762b225297d3ad6c (HEAD -> v2.1, origin/v2.1)
-Author: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Fri Apr 12 20:42:55 2019 +0530
-
- Remove built binary from git
-
- Oops.
-
-From 48eb69061df1da9d843707ec1d6b854255a3c87d Mon Sep 17 00:00:00 2001
+From 86a1a5033a3eb07e694f8e7f7024550928191024 Mon Sep 17 00:00:00 2001
From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
-Date: Tue, 12 Mar 2019 12:56:01 +0530
-Subject: [PATCH 64/72] Merge in LuaJIT-test-cleanup into the main repo
+Date: Thu, 21 Oct 2021 11:04:58 +0200
+Subject: [PATCH 01/10] Merge in LuaJIT-test-cleanup into the main repo
The tests and benchmarks in the LuaJIT-test-cleanup repo are more or
less complete and with scaffolding added, they can now be called
@@ -24,7 +16,6 @@ taken) and LuaJIT itself to allow for a more succint copyright notice
that credits authors in addition to Mike Pall in the COPYRIGHT file.
---
CONTRIBUTORS | 17 +
- COPYRIGHT | 3 +-
Makefile | 19 +-
bench/FASTA_10000 | 1671 +
bench/FASTA_1000000 | 166671 ++++++++++++++++++++++
@@ -38,9 +29,9 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
bench/SUMCOL_1.txt | 1000 +
bench/SUMCOL_100 | 100 +
bench/SUMCOL_1000 | 1000 +
- bench/TEST_md5sum.txt | 20 +
+ bench/TEST_md5sum.txt | 19 +
bench/TEST_md5sum_arm64.txt | 15 +
- bench/array3d.lua | 59 +
+ bench/array3d.lua | 58 +
bench/binary-trees.lua | 47 +
bench/chameneos.lua | 68 +
bench/coroutine-ring.lua | 42 +
@@ -49,12 +40,11 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
bench/fasta.lua | 95 +
bench/k-nucleotide.lua | 62 +
bench/life.lua | 111 +
- bench/luajit-bench | Bin 0 -> 571144 bytes
- bench/luajit-bench.c | 283 +
+ bench/luajit-bench.c | 284 +
bench/luajit-bench.lua | 53 +
bench/mandelbrot-bit.lua | 33 +
bench/mandelbrot.lua | 23 +
- bench/md5.lua | 183 +
+ bench/md5.lua | 182 +
bench/meteor.lua | 220 +
bench/nbody.lua | 119 +
bench/nsieve-bit-fp.lua | 37 +
@@ -66,7 +56,7 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
bench/recursive-ack.lua | 8 +
bench/recursive-fib.lua | 7 +
bench/revcomp.lua | 39 +
- bench/scimark-2010-12-20.lua | 400 +
+ bench/scimark-2010-12-20.lua | 399 +
bench/scimark-fft.lua | 1 +
bench/scimark-lu.lua | 1 +
bench/scimark-sor.lua | 1 +
@@ -79,14 +69,14 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
test/bc/constov.lua | 16 +
test/bc/index | 1 +
test/common/expect_error.lua | 16 +
- test/common/ffi_util.inc | 41 +
+ test/common/ffi_util.inc | 40 +
test/common/test_runner_canary.lua | 1 +
test/computations.lua | 113 +
test/index | 6 +
test/lang/andor.lua | 61 +
test/lang/assignment.lua | 46 +
test/lang/compare.lua | 323 +
- test/lang/compare_nan.lua | 99 +
+ test/lang/compare_nan.lua | 98 +
test/lang/concat.lua | 112 +
test/lang/constant/index | 2 +
test/lang/constant/number.lua | 12 +
@@ -100,9 +90,9 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
test/lang/meta/arith.lua | 118 +
test/lang/meta/arith_jit.lua | 68 +
test/lang/meta/call.lua | 81 +
- test/lang/meta/cat.lua | 61 +
+ test/lang/meta/cat.lua | 60 +
test/lang/meta/comp.lua | 120 +
- test/lang/meta/comp_jit.lua | 104 +
+ test/lang/meta/comp_jit.lua | 103 +
test/lang/meta/debuginfo.lua | 81 +
test/lang/meta/eq.lua | 30 +
test/lang/meta/eq_jit.lua | 35 +
@@ -111,14 +101,14 @@ that credits authors in addition to Mike Pall in the COPYRIGHT
file.
test/lang/meta/index.lua | 60 +
test/lang/meta/len.lua | 42 +
test/lang/meta/newindex.lua | 69 +
- test/lang/meta/nomm.lua | 21 +
+ test/lang/meta/nomm.lua | 20 +
test/lang/modulo.lua | 46 +
test/lang/self.lua | 19 +
test/lang/table.lua | 32 +
test/lang/tail_recursion.lua | 20 +
test/lang/upvalue/closure.lua | 84 +
test/lang/upvalue/index | 1 +
- test/lang/vararg_jit.lua | 95 +
+ test/lang/vararg_jit.lua | 94 +
test/lib/base/assert.lua | 33 +
test/lib/base/error.lua | 43 +
test/lib/base/getfenv.lua | 13 +
@@ -135,36 +125,36 @@ that credits authors in addition to Mike Pall in the COPYRIGHT
file.
test/lib/contents.lua | 158 +
test/lib/coroutine/index | 1 +
test/lib/coroutine/yield.lua | 109 +
- test/lib/ffi/bit64.lua | 130 +
+ test/lib/ffi/bit64.lua | 129 +
test/lib/ffi/cdata_var.lua | 47 +
- test/lib/ffi/copy_fill.lua | 64 +
- test/lib/ffi/err.lua | 35 +
- test/lib/ffi/ffi_arith_ptr.lua | 106 +
- test/lib/ffi/ffi_bitfield.lua | 108 +
- test/lib/ffi/ffi_call.lua | 266 +
- test/lib/ffi/ffi_callback.lua | 158 +
- test/lib/ffi/ffi_const.lua | 113 +
- test/lib/ffi/ffi_convert.lua | 787 +
- test/lib/ffi/ffi_enum.lua | 57 +
- test/lib/ffi/ffi_gcstep_recursive.lua | 66 +
- test/lib/ffi/ffi_jit_arith.lua | 155 +
- test/lib/ffi/ffi_jit_call.lua | 154 +
+ test/lib/ffi/copy_fill.lua | 63 +
+ test/lib/ffi/err.lua | 34 +
+ test/lib/ffi/ffi_arith_ptr.lua | 105 +
+ test/lib/ffi/ffi_bitfield.lua | 107 +
+ test/lib/ffi/ffi_call.lua | 265 +
+ test/lib/ffi/ffi_callback.lua | 157 +
+ test/lib/ffi/ffi_const.lua | 112 +
+ test/lib/ffi/ffi_convert.lua | 786 +
+ test/lib/ffi/ffi_enum.lua | 56 +
+ test/lib/ffi/ffi_gcstep_recursive.lua | 65 +
+ test/lib/ffi/ffi_jit_arith.lua | 154 +
+ test/lib/ffi/ffi_jit_call.lua | 153 +
test/lib/ffi/ffi_jit_conv.lua | 277 +
- test/lib/ffi/ffi_lex_number.lua | 51 +
- test/lib/ffi/ffi_metatype.lua | 245 +
- test/lib/ffi/ffi_new.lua | 106 +
- test/lib/ffi/ffi_parse_array.lua | 78 +
- test/lib/ffi/ffi_parse_basic.lua | 131 +
- test/lib/ffi/ffi_parse_cdef.lua | 77 +
- test/lib/ffi/ffi_parse_struct.lua | 259 +
+ test/lib/ffi/ffi_lex_number.lua | 50 +
+ test/lib/ffi/ffi_metatype.lua | 244 +
+ test/lib/ffi/ffi_new.lua | 105 +
+ test/lib/ffi/ffi_parse_array.lua | 77 +
+ test/lib/ffi/ffi_parse_basic.lua | 130 +
+ test/lib/ffi/ffi_parse_cdef.lua | 76 +
+ test/lib/ffi/ffi_parse_struct.lua | 258 +
test/lib/ffi/ffi_tabov.lua | 12 +
test/lib/ffi/index | 12 +
test/lib/ffi/istype.lua | 88 +
test/lib/ffi/jit_array.lua | 104 +
test/lib/ffi/jit_complex.lua | 109 +
test/lib/ffi/jit_misc.lua | 109 +
- test/lib/ffi/jit_struct.lua | 201 +
- test/lib/ffi/meta_tostring.lua | 55 +
+ test/lib/ffi/jit_struct.lua | 200 +
+ test/lib/ffi/meta_tostring.lua | 54 +
test/lib/ffi/redir.lua | 19 +
test/lib/ffi/type_punning.lua | 138 +
test/lib/index | 8 +
@@ -187,50 +177,50 @@ that credits authors in addition to Mike Pall in the COPYRIGHT
file.
test/lib/string/sub.lua | 189 +
test/lib/table/concat.lua | 55 +
test/lib/table/index | 6 +
- test/lib/table/insert.lua | 17 +
- test/lib/table/misc.lua | 58 +
+ test/lib/table/insert.lua | 16 +
+ test/lib/table/misc.lua | 55 +
test/lib/table/new.lua | 11 +
test/lib/table/pack.lua | 7 +
test/lib/table/remove.lua | 42 +
test/lib/table/sort.lua | 27 +
- test/misc/alias_alloc.lua | 54 +
- test/misc/api_call.lua | 98 +
- test/misc/catch_wrap.lua | 45 +
- test/misc/coro_traceback.lua | 8 +
- test/misc/coro_yield.lua | 111 +
+ test/misc/alias_alloc.lua | 53 +
+ test/misc/api_call.lua | 97 +
+ test/misc/catch_wrap.lua | 44 +
+ test/misc/coro_traceback.lua | 7 +
+ test/misc/coro_yield.lua | 110 +
test/misc/debug_gc.lua | 47 +
- test/misc/dualnum.lua | 47 +
- test/misc/for_dir.lua | 13 +
- test/misc/fori_coerce.lua | 33 +
- test/misc/gc_rechain.lua | 32 +
- test/misc/gc_trace.lua | 37 +
- test/misc/gcstep.lua | 33 +
- test/misc/hook_active.lua | 95 +
- test/misc/hook_line.lua | 41 +
- test/misc/hook_norecord.lua | 12 +
- test/misc/hook_record.lua | 8 +
- test/misc/hook_top.lua | 55 +
- test/misc/jit_flush.lua | 50 +
- test/misc/lightud.lua | 88 +
- test/misc/loop_unroll.lua | 35 +
+ test/misc/dualnum.lua | 46 +
+ test/misc/for_dir.lua | 12 +
+ test/misc/fori_coerce.lua | 32 +
+ test/misc/gc_rechain.lua | 31 +
+ test/misc/gc_trace.lua | 36 +
+ test/misc/gcstep.lua | 32 +
+ test/misc/hook_active.lua | 94 +
+ test/misc/hook_line.lua | 40 +
+ test/misc/hook_norecord.lua | 11 +
+ test/misc/hook_record.lua | 7 +
+ test/misc/hook_top.lua | 54 +
+ test/misc/jit_flush.lua | 49 +
+ test/misc/lightud.lua | 87 +
+ test/misc/loop_unroll.lua | 34 +
test/misc/parse_comp.lua | 13 +
test/misc/parse_esc.lua | 7 +
- test/misc/parse_misc.lua | 31 +
- test/misc/phi_conv.lua | 53 +
- test/misc/recurse_deep.lua | 29 +
- test/misc/recurse_tail.lua | 22 +
- test/misc/stack_gc.lua | 15 +
- test/misc/stack_purge.lua | 25 +
- test/misc/stackov.lua | 40 +
+ test/misc/parse_misc.lua | 30 +
+ test/misc/phi_conv.lua | 52 +
+ test/misc/recurse_deep.lua | 28 +
+ test/misc/recurse_tail.lua | 21 +
+ test/misc/stack_gc.lua | 14 +
+ test/misc/stack_purge.lua | 24 +
+ test/misc/stackov.lua | 39 +
test/misc/stackovc.lua | 4 +
- test/misc/tcall_base.lua | 20 +
+ test/misc/tcall_base.lua | 19 +
test/misc/tcall_loop.lua | 8 +
- test/misc/tonumber_scan.lua | 180 +
- test/misc/uclo.lua | 91 +
- test/misc/unordered_jit.lua | 96 +
+ test/misc/tonumber_scan.lua | 179 +
+ test/misc/uclo.lua | 90 +
+ test/misc/unordered_jit.lua | 95 +
test/misc/wbarrier.lua | 7 +
- test/misc/wbarrier_jit.lua | 18 +
- test/misc/wbarrier_obar.lua | 22 +
+ test/misc/wbarrier_jit.lua | 17 +
+ test/misc/wbarrier_obar.lua | 21 +
test/opt/dse/array.lua | 197 +
test/opt/dse/field.lua | 70 +
test/opt/dse/index | 2 +
@@ -251,11 +241,11 @@ that credits authors in addition to Mike Pall in the COPYRIGHT
file.
test/opt/sink/nosink.lua | 109 +
test/src/cpptest.cpp | 129 +
test/src/ctest.c | 339 +
- test/sysdep/catch_cpp.lua | 71 +
+ test/sysdep/catch_cpp.lua | 70 +
test/sysdep/ffi_include_gtk.lua | 9 +
- test/sysdep/ffi_include_std.lua | 36 +
- test/sysdep/ffi_lib_c.lua | 87 +
- test/sysdep/ffi_lib_z.lua | 107 +
+ test/sysdep/ffi_include_std.lua | 35 +
+ test/sysdep/ffi_lib_c.lua | 86 +
+ test/sysdep/ffi_lib_z.lua | 106 +
test/test.lua | 416 +
test/trace/exit_frame.lua | 79 +
test/trace/exit_growstack.lua | 28 +
@@ -269,8 +259,8 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
test/trace/snap.lua | 47 +
test/trace/stitch.lua | 19 +
test/unportable/ffi_arith_int64.lua | 68 +
- test/unportable/math_special.lua | 55 +
- 247 files changed, 186644 insertions(+), 5 deletions(-)
+ test/unportable/math_special.lua | 54 +
+ 245 files changed, 186570 insertions(+), 4 deletions(-)
create mode 100644 CONTRIBUTORS
create mode 100644 bench/FASTA_10000
create mode 100644 bench/FASTA_1000000
@@ -295,7 +285,6 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
create mode 100644 bench/fasta.lua
create mode 100644 bench/k-nucleotide.lua
create mode 100644 bench/life.lua
- create mode 100755 bench/luajit-bench
create mode 100644 bench/luajit-bench.c
create mode 100644 bench/luajit-bench.lua
create mode 100644 bench/mandelbrot-bit.lua
@@ -519,7 +508,7 @@ that credits authors in addition to Mike Pall in the COPYRIGHT file.
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644
-index 0000000..a1c9209
+index 00000000..a1c9209b
--- /dev/null
+++ b/CONTRIBUTORS
@@ -0,0 +1,17 @@
@@ -540,22 +529,8 @@ index 0000000..a1c9209
+Siddhesh Poyarekar
+Vlad Krasnov
+William Adams
-diff --git a/COPYRIGHT b/COPYRIGHT
-index 6ed4002..1e5c442 100644
---- a/COPYRIGHT
-+++ b/COPYRIGHT
-@@ -1,7 +1,8 @@
- ===============================================================================
- LuaJIT -- a Just-In-Time Compiler for Lua.
http://luajit.org/
-
--Copyright (C) 2005-2017 Mike Pall. All rights reserved.
-+Copyright (C) 2005-2019 Mike Pall. All rights reserved.
-+Copyright (C) 2015-2019 LuaJIT Contributors, see CONTRIBUTORS file for a list.
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
diff --git a/Makefile b/Makefile
-index 0f93308..923bf72 100644
+index aa1b84bd..cb2b3418 100644
--- a/Makefile
+++ b/Makefile
@@ -106,14 +106,14 @@ endif
@@ -604,7 +579,7 @@ index 0f93308..923bf72 100644
##############################################################################
diff --git a/bench/FASTA_10000 b/bench/FASTA_10000
new file mode 100644
-index 0000000..fb23263
+index 00000000..fb232633
--- /dev/null
+++ b/bench/FASTA_10000
@@ -0,0 +1,1671 @@
@@ -2281,7 +2256,7 @@ index 0000000..fb23263
+gagatacctttgcaattttt
diff --git a/bench/FASTA_1000000 b/bench/FASTA_1000000
new file mode 100644
-index 0000000..bafe0c5
+index 00000000..bafe0c5d
--- /dev/null
+++ b/bench/FASTA_1000000
@@ -0,0 +1,166671 @@
@@ -168958,7 +168933,7 @@ index 0000000..bafe0c5
+tacactgatacgaattattt
diff --git a/bench/Makefile b/bench/Makefile
new file mode 100644
-index 0000000..d0c1e8d
+index 00000000..87d213a5
--- /dev/null
+++ b/bench/Makefile
@@ -0,0 +1,56 @@
@@ -169008,7 +168983,7 @@ index 0000000..d0c1e8d
+LUAJIT_A = ../src/$(FILE_A)
+
+$(BENCH_BIN): $(LUAJIT_A) $(BENCH_BIN).c Makefile
-+ $(CC) $@.c $(DURATION) -g -O3 -c -o $@.o -I ../src
++ $(CC) $@.c -std=gnu11 $(DURATION) -g -O3 -c -o $@.o -I ../src
+ $(CC) $@.o -lpthread $< -lm -ldl -o $@
+
+# Build the luajit static library if it doesn't exist.
@@ -169020,7 +168995,7 @@ index 0000000..d0c1e8d
+endif
diff --git a/bench/PARAM_arm.txt b/bench/PARAM_arm.txt
new file mode 100644
-index 0000000..a07fd01
+index 00000000..a07fd010
--- /dev/null
+++ b/bench/PARAM_arm.txt
@@ -0,0 +1,29 @@
@@ -169055,7 +169030,7 @@ index 0000000..a07fd01
+sum-file 1000 SUMCOL_1000
diff --git a/bench/PARAM_arm64.txt b/bench/PARAM_arm64.txt
new file mode 100644
-index 0000000..1c27638
+index 00000000..1c276385
--- /dev/null
+++ b/bench/PARAM_arm64.txt
@@ -0,0 +1,29 @@
@@ -169090,7 +169065,7 @@ index 0000000..1c27638
+sum-file 2e7 SUMCOL_1000
diff --git a/bench/PARAM_mips.txt b/bench/PARAM_mips.txt
new file mode 100644
-index 0000000..e6bcadb
+index 00000000..e6bcadba
--- /dev/null
+++ b/bench/PARAM_mips.txt
@@ -0,0 +1,29 @@
@@ -169125,7 +169100,7 @@ index 0000000..e6bcadb
+sum-file 100 SUMCOL_100
diff --git a/bench/PARAM_ppc.txt b/bench/PARAM_ppc.txt
new file mode 100644
-index 0000000..c8319a1
+index 00000000..c8319a15
--- /dev/null
+++ b/bench/PARAM_ppc.txt
@@ -0,0 +1,29 @@
@@ -169160,7 +169135,7 @@ index 0000000..c8319a1
+sum-file 1000 SUMCOL_1000
diff --git a/bench/PARAM_x86.txt b/bench/PARAM_x86.txt
new file mode 100644
-index 0000000..87088d7
+index 00000000..87088d7b
--- /dev/null
+++ b/bench/PARAM_x86.txt
@@ -0,0 +1,29 @@
@@ -169195,7 +169170,7 @@ index 0000000..87088d7
+sum-file 5000 SUMCOL_5000
diff --git a/bench/README b/bench/README
new file mode 100644
-index 0000000..16f55cb
+index 00000000..16f55cbb
--- /dev/null
+++ b/bench/README
@@ -0,0 +1,37 @@
@@ -169238,7 +169213,7 @@ index 0000000..16f55cb
+results using the benchmark binary with that of the script.
diff --git a/bench/SUMCOL_1.txt b/bench/SUMCOL_1.txt
new file mode 100644
-index 0000000..956aba1
+index 00000000..956aba14
--- /dev/null
+++ b/bench/SUMCOL_1.txt
@@ -0,0 +1,1000 @@
@@ -170244,7 +170219,7 @@ index 0000000..956aba1
+264
diff --git a/bench/SUMCOL_100 b/bench/SUMCOL_100
new file mode 100644
-index 0000000..daf0c7b
+index 00000000..daf0c7bb
--- /dev/null
+++ b/bench/SUMCOL_100
@@ -0,0 +1,100 @@
@@ -170350,7 +170325,7 @@ index 0000000..daf0c7b
+264
diff --git a/bench/SUMCOL_1000 b/bench/SUMCOL_1000
new file mode 100644
-index 0000000..956aba1
+index 00000000..956aba14
--- /dev/null
+++ b/bench/SUMCOL_1000
@@ -0,0 +1,1000 @@
@@ -171356,10 +171331,10 @@ index 0000000..956aba1
+264
diff --git a/bench/TEST_md5sum.txt b/bench/TEST_md5sum.txt
new file mode 100644
-index 0000000..15aa8a1
+index 00000000..7d417a88
--- /dev/null
+++ b/bench/TEST_md5sum.txt
-@@ -0,0 +1,20 @@
+@@ -0,0 +1,19 @@
+binarytrees 10 7202f4e13df7abc5ad8c07f05fe9d644
+chameneos 1e5 a629ce12f63050c6656bce175258cf8f
+cheapconcr 1000 d29799d1e263810a4db7bbf43ca66499
@@ -171379,10 +171354,9 @@ index 0000000..15aa8a1
+revcomp x 47de276e2f72519b57b82da39f4c7592 <FASTA_10000
+spectralnorm 200 25f44bd552ccd9faa0ee2ae5617947e2
+sumfile x 2ebd3caa45b31a2e74e436b645eab4b0 <SUMCOL_100
-+
diff --git a/bench/TEST_md5sum_arm64.txt b/bench/TEST_md5sum_arm64.txt
new file mode 100644
-index 0000000..deab02e
+index 00000000..deab02e5
--- /dev/null
+++ b/bench/TEST_md5sum_arm64.txt
@@ -0,0 +1,15 @@
@@ -171403,10 +171377,10 @@ index 0000000..deab02e
+sum-file x 5d6b881128665a84e8863cac991b18a2 SUMCOL_100
diff --git a/bench/array3d.lua b/bench/array3d.lua
new file mode 100644
-index 0000000..d638e4d
+index 00000000..0c83c6c8
--- /dev/null
+++ b/bench/array3d.lua
-@@ -0,0 +1,59 @@
+@@ -0,0 +1,58 @@
+
+local function array_set(self, x, y, z, p)
+ assert(x >= 0 and x < self.nx, "x outside PA")
@@ -171465,10 +171439,9 @@ index 0000000..d638e4d
+ arr:set(x, y, z, x*x)
+end
+assert(arr.image[dim^3-1] == (dim-1)^2)
-+
diff --git a/bench/binary-trees.lua b/bench/binary-trees.lua
new file mode 100644
-index 0000000..bf04046
+index 00000000..bf040466
--- /dev/null
+++ b/bench/binary-trees.lua
@@ -0,0 +1,47 @@
@@ -171521,7 +171494,7 @@ index 0000000..bf04046
+ maxdepth, ItemCheck(longlivedtree)))
diff --git a/bench/chameneos.lua b/bench/chameneos.lua
new file mode 100644
-index 0000000..78b64c3
+index 00000000..78b64c3f
--- /dev/null
+++ b/bench/chameneos.lua
@@ -0,0 +1,68 @@
@@ -171595,7 +171568,7 @@ index 0000000..78b64c3
+io.write(schedule(threads), "\n")
diff --git a/bench/coroutine-ring.lua b/bench/coroutine-ring.lua
new file mode 100644
-index 0000000..1e8c5ef
+index 00000000..1e8c5ef6
--- /dev/null
+++ b/bench/coroutine-ring.lua
@@ -0,0 +1,42 @@
@@ -171643,7 +171616,7 @@ index 0000000..1e8c5ef
+io.write(id, "\n")
diff --git a/bench/euler14-bit.lua b/bench/euler14-bit.lua
new file mode 100644
-index 0000000..537f2bf
+index 00000000..537f2bf3
--- /dev/null
+++ b/bench/euler14-bit.lua
@@ -0,0 +1,22 @@
@@ -171671,7 +171644,7 @@ index 0000000..537f2bf
+io.write("Found ", n, " (chain length: ", m, ")\n")
diff --git a/bench/fannkuch.lua b/bench/fannkuch.lua
new file mode 100644
-index 0000000..2a4cd42
+index 00000000..2a4cd426
--- /dev/null
+++ b/bench/fannkuch.lua
@@ -0,0 +1,50 @@
@@ -171727,7 +171700,7 @@ index 0000000..2a4cd42
+io.write("Pfannkuchen(", n, ") = ", fannkuch(n), "\n")
diff --git a/bench/fasta.lua b/bench/fasta.lua
new file mode 100644
-index 0000000..7ce6080
+index 00000000..7ce60804
--- /dev/null
+++ b/bench/fasta.lua
@@ -0,0 +1,95 @@
@@ -171828,7 +171801,7 @@ index 0000000..7ce6080
+make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, N*5)
diff --git a/bench/k-nucleotide.lua b/bench/k-nucleotide.lua
new file mode 100644
-index 0000000..b97e394
+index 00000000..b97e394c
--- /dev/null
+++ b/bench/k-nucleotide.lua
@@ -0,0 +1,62 @@
@@ -171896,7 +171869,7 @@ index 0000000..b97e394
+count(seq, "GGTATTTTAATTTATAGT")
diff --git a/bench/life.lua b/bench/life.lua
new file mode 100644
-index 0000000..911d9fe
+index 00000000..911d9fe1
--- /dev/null
+++ b/bench/life.lua
@@ -0,0 +1,111 @@
@@ -172013,10 +171986,10 @@ index 0000000..911d9fe
+LIFE(40,20)
diff --git a/bench/luajit-bench.c b/bench/luajit-bench.c
new file mode 100644
-index 0000000..e7b068d
+index 00000000..6603132b
--- /dev/null
+++ b/bench/luajit-bench.c
-@@ -0,0 +1,283 @@
+@@ -0,0 +1,284 @@
+/* Benchmark driver.
+ *
+ * Copyright (C) 2019 Vlad Krasnov
@@ -172058,6 +172031,7 @@ index 0000000..e7b068d
+#include <argp.h>
+#include <sys/param.h>
+#include <string.h>
++#include <time.h>
+
+#include "lua.h"
+#include "lualib.h"
@@ -172302,7 +172276,7 @@ index 0000000..e7b068d
+}
diff --git a/bench/luajit-bench.lua b/bench/luajit-bench.lua
new file mode 100644
-index 0000000..7238725
+index 00000000..72387254
--- /dev/null
+++ b/bench/luajit-bench.lua
@@ -0,0 +1,53 @@
@@ -172361,7 +172335,7 @@ index 0000000..7238725
+end
diff --git a/bench/mandelbrot-bit.lua b/bench/mandelbrot-bit.lua
new file mode 100644
-index 0000000..91d9697
+index 00000000..91d96975
--- /dev/null
+++ b/bench/mandelbrot-bit.lua
@@ -0,0 +1,33 @@
@@ -172400,7 +172374,7 @@ index 0000000..91d9697
+end
diff --git a/bench/mandelbrot.lua b/bench/mandelbrot.lua
new file mode 100644
-index 0000000..0ef595a
+index 00000000..0ef595a2
--- /dev/null
+++ b/bench/mandelbrot.lua
@@ -0,0 +1,23 @@
@@ -172429,10 +172403,10 @@ index 0000000..0ef595a
+end
diff --git a/bench/md5.lua b/bench/md5.lua
new file mode 100644
-index 0000000..fdf6b4a
+index 00000000..c4c087ee
--- /dev/null
+++ b/bench/md5.lua
-@@ -0,0 +1,183 @@
+@@ -0,0 +1,182 @@
+
+local bit = require("bit")
+local tobit, tohex, bnot = bit.tobit or bit.cast, bit.tohex, bit.bnot
@@ -172615,10 +172589,9 @@ index 0000000..fdf6b4a
+ res = md5(txt)
+end
+assert(res == 'a831e91e0f70eddcb70dc61c6f82f6cd')
-+
diff --git a/bench/meteor.lua b/bench/meteor.lua
new file mode 100644
-index 0000000..80588ab
+index 00000000..80588ab5
--- /dev/null
+++ b/bench/meteor.lua
@@ -0,0 +1,220 @@
@@ -172844,7 +172817,7 @@ index 0000000..80588ab
+printresult()
diff --git a/bench/nbody.lua b/bench/nbody.lua
new file mode 100644
-index 0000000..e0ff8f7
+index 00000000..e0ff8f77
--- /dev/null
+++ b/bench/nbody.lua
@@ -0,0 +1,119 @@
@@ -172969,7 +172942,7 @@ index 0000000..e0ff8f7
+io.write( string.format("%0.9f",energy(bodies, nbody)), "\n")
diff --git a/bench/nsieve-bit-fp.lua b/bench/nsieve-bit-fp.lua
new file mode 100644
-index 0000000..3971ec1
+index 00000000..3971ec1f
--- /dev/null
+++ b/bench/nsieve-bit-fp.lua
@@ -0,0 +1,37 @@
@@ -173012,7 +172985,7 @@ index 0000000..3971ec1
+end
diff --git a/bench/nsieve-bit.lua b/bench/nsieve-bit.lua
new file mode 100644
-index 0000000..820a372
+index 00000000..820a3726
--- /dev/null
+++ b/bench/nsieve-bit.lua
@@ -0,0 +1,27 @@
@@ -173045,7 +173018,7 @@ index 0000000..820a372
+end
diff --git a/bench/nsieve.lua b/bench/nsieve.lua
new file mode 100644
-index 0000000..6de0524
+index 00000000..6de0524f
--- /dev/null
+++ b/bench/nsieve.lua
@@ -0,0 +1,21 @@
@@ -173072,7 +173045,7 @@ index 0000000..6de0524
+end
diff --git a/bench/partialsums.lua b/bench/partialsums.lua
new file mode 100644
-index 0000000..09ac02f
+index 00000000..09ac02f9
--- /dev/null
+++ b/bench/partialsums.lua
@@ -0,0 +1,29 @@
@@ -173107,7 +173080,7 @@ index 0000000..09ac02f
+pr("%.9f\tGregory\n", a9)
diff --git a/bench/pidigits-nogmp.lua b/bench/pidigits-nogmp.lua
new file mode 100644
-index 0000000..63a1cb0
+index 00000000..63a1cb0e
--- /dev/null
+++ b/bench/pidigits-nogmp.lua
@@ -0,0 +1,100 @@
@@ -173213,7 +173186,7 @@ index 0000000..63a1cb0
+end
diff --git a/bench/ray.lua b/bench/ray.lua
new file mode 100644
-index 0000000..86f159b
+index 00000000..873cc995
--- /dev/null
+++ b/bench/ray.lua
@@ -0,0 +1,135 @@
@@ -173346,7 +173319,7 @@ index 0000000..86f159b
+ for d = y, y+.99, iss do
+ for e = x, x+.99, iss do
+ dir[1], dir[2], dir[3] = unitise(e, d, n)
-+ g = g + ray_trace(light, camera, dir, scene)
++ g = g + ray_trace(light, camera, dir, scene)
+ end
+ end
+ io.write(string.char(math.floor(0.5 + g*gf)))
@@ -173354,7 +173327,7 @@ index 0000000..86f159b
+end
diff --git a/bench/recursive-ack.lua b/bench/recursive-ack.lua
new file mode 100644
-index 0000000..fad3058
+index 00000000..fad30589
--- /dev/null
+++ b/bench/recursive-ack.lua
@@ -0,0 +1,8 @@
@@ -173368,7 +173341,7 @@ index 0000000..fad3058
+io.write("Ack(3,", N ,"): ", Ack(3,N), "\n")
diff --git a/bench/recursive-fib.lua b/bench/recursive-fib.lua
new file mode 100644
-index 0000000..53b6f96
+index 00000000..53b6f96c
--- /dev/null
+++ b/bench/recursive-fib.lua
@@ -0,0 +1,7 @@
@@ -173381,7 +173354,7 @@ index 0000000..53b6f96
+io.write(string.format("Fib(%d): %d\n", n, fib(n)))
diff --git a/bench/revcomp.lua b/bench/revcomp.lua
new file mode 100644
-index 0000000..90b3d5c
+index 00000000..90b3d5c5
--- /dev/null
+++ b/bench/revcomp.lua
@@ -0,0 +1,39 @@
@@ -173426,10 +173399,10 @@ index 0000000..90b3d5c
+writerev(t, n)
diff --git a/bench/scimark-2010-12-20.lua b/bench/scimark-2010-12-20.lua
new file mode 100644
-index 0000000..353acb7
+index 00000000..25f34eeb
--- /dev/null
+++ b/bench/scimark-2010-12-20.lua
-@@ -0,0 +1,400 @@
+@@ -0,0 +1,399 @@
+------------------------------------------------------------------------------
+-- Lua SciMark (2010-12-20).
+--
@@ -173829,38 +173802,37 @@ index 0000000..353acb7
+end
+printf("\nSciMark %8.2f [%s problem sizes]\n", sum / #benchmarks,
SIZE_SELECT)
+io.flush()
-+
diff --git a/bench/scimark-fft.lua b/bench/scimark-fft.lua
new file mode 100644
-index 0000000..c05bb69
+index 00000000..c05bb69a
--- /dev/null
+++ b/bench/scimark-fft.lua
@@ -0,0 +1 @@
+require("scimark_lib").FFT(1024)(tonumber(arg and arg[1]) or 50000)
diff --git a/bench/scimark-lu.lua b/bench/scimark-lu.lua
new file mode 100644
-index 0000000..7636d99
+index 00000000..7636d994
--- /dev/null
+++ b/bench/scimark-lu.lua
@@ -0,0 +1 @@
+require("scimark_lib").LU(100)(tonumber(arg and arg[1]) or 5000)
diff --git a/bench/scimark-sor.lua b/bench/scimark-sor.lua
new file mode 100644
-index 0000000..e537e98
+index 00000000..e537e986
--- /dev/null
+++ b/bench/scimark-sor.lua
@@ -0,0 +1 @@
+require("scimark_lib").SOR(100)(tonumber(arg and arg[1]) or 50000)
diff --git a/bench/scimark-sparse.lua b/bench/scimark-sparse.lua
new file mode 100644
-index 0000000..01a2258
+index 00000000..01a2258d
--- /dev/null
+++ b/bench/scimark-sparse.lua
@@ -0,0 +1 @@
+require("scimark_lib").SPARSE(1000, 5000)(tonumber(arg and arg[1]) or 150000)
diff --git a/bench/scimark_lib.lua b/bench/scimark_lib.lua
new file mode 100644
-index 0000000..aeffd75
+index 00000000..aeffd75a
--- /dev/null
+++ b/bench/scimark_lib.lua
@@ -0,0 +1,297 @@
@@ -174163,7 +174135,7 @@ index 0000000..aeffd75
+return benchmarks
diff --git a/bench/series.lua b/bench/series.lua
new file mode 100644
-index 0000000..f766cb3
+index 00000000..f766cb32
--- /dev/null
+++ b/bench/series.lua
@@ -0,0 +1,34 @@
@@ -174203,7 +174175,7 @@ index 0000000..f766cb3
+ n, tm, (2*n-1)/tm))
diff --git a/bench/spectral-norm.lua b/bench/spectral-norm.lua
new file mode 100644
-index 0000000..ecc8011
+index 00000000..ecc80112
--- /dev/null
+++ b/bench/spectral-norm.lua
@@ -0,0 +1,40 @@
@@ -174249,7 +174221,7 @@ index 0000000..ecc8011
+io.write(string.format("%0.9f\n", math.sqrt(vBv / vv)))
diff --git a/bench/sum-file.lua b/bench/sum-file.lua
new file mode 100644
-index 0000000..a16632b
+index 00000000..a16632b1
--- /dev/null
+++ b/bench/sum-file.lua
@@ -0,0 +1,8 @@
@@ -174263,7 +174235,7 @@ index 0000000..a16632b
+io.write(sum, "\n")
diff --git a/test/README.md b/test/README.md
new file mode 100644
-index 0000000..ff16ac8
+index 00000000..ff16ac8e
--- /dev/null
+++ b/test/README.md
@@ -0,0 +1,110 @@
@@ -174379,7 +174351,7 @@ index 0000000..ff16ac8
+After that, consult the README file by Mike in the directory above this one.
diff --git a/test/bc/constov.lua b/test/bc/constov.lua
new file mode 100644
-index 0000000..5827840
+index 00000000..5827840b
--- /dev/null
+++ b/test/bc/constov.lua
@@ -0,0 +1,16 @@
@@ -174401,14 +174373,14 @@ index 0000000..5827840
+end
diff --git a/test/bc/index b/test/bc/index
new file mode 100644
-index 0000000..dead10f
+index 00000000..dead10f5
--- /dev/null
+++ b/test/bc/index
@@ -0,0 +1 @@
+constov.lua +slow
diff --git a/test/common/expect_error.lua b/test/common/expect_error.lua
new file mode 100644
-index 0000000..e155090
+index 00000000..e155090e
--- /dev/null
+++ b/test/common/expect_error.lua
@@ -0,0 +1,16 @@
@@ -174430,10 +174402,10 @@ index 0000000..e155090
+end
diff --git a/test/common/ffi_util.inc b/test/common/ffi_util.inc
new file mode 100644
-index 0000000..1eee8dd
+index 00000000..1fa28f3b
--- /dev/null
+++ b/test/common/ffi_util.inc
-@@ -0,0 +1,41 @@
+@@ -0,0 +1,40 @@
+-- This should be turned into a proper module and not use globals.
+-- Or combined into a generiv test utility module. With FFI
+-- functionality turned off, if the FFI module is not built-in.
@@ -174474,17 +174446,16 @@ index 0000000..1eee8dd
+ fp:close()
+ ffi.cdef(s)
+end
-+
diff --git a/test/common/test_runner_canary.lua b/test/common/test_runner_canary.lua
new file mode 100644
-index 0000000..fc9cadc
+index 00000000..fc9cadc6
--- /dev/null
+++ b/test/common/test_runner_canary.lua
@@ -0,0 +1 @@
+return "canary is alive"
diff --git a/test/computations.lua b/test/computations.lua
new file mode 100644
-index 0000000..4fce7fc
+index 00000000..64b36af1
--- /dev/null
+++ b/test/computations.lua
@@ -0,0 +1,113 @@
@@ -174494,7 +174465,7 @@ index 0000000..4fce7fc
+ if n == 0 then return Ack(m-1, 1) end
+ return Ack(m-1, (Ack(m, n-1))) -- The parentheses are deliberate.
+ end
-+
++
+ assert(Ack(3,5) == 253)
+end
+
@@ -174504,7 +174475,7 @@ index 0000000..4fce7fc
+ if n == 0 then return Ack(m-1, 1) end
+ return (Ack(m-1, (Ack(m, n-1)))) -- The parentheses are deliberate.
+ end
-+
++
+ assert(Ack(3,5) == 253)
+end
+
@@ -174516,7 +174487,7 @@ index 0000000..4fce7fc
+ end
+ return x
+ end
-+
++
+ assert(fac(10) == 3628800)
+end
+
@@ -174565,7 +174536,7 @@ index 0000000..4fce7fc
+ end
+ return count
+ end
-+
++
+ assert(nsieve(100) == 25)
+ assert(nsieve(12345) == 1474)
+end
@@ -174575,7 +174546,7 @@ index 0000000..4fce7fc
+ if n == 1 then return 1 end
+ return n + sum(n-1)
+ end
-+
++
+ for i=1, 100 do
+ assert(sum(i) == i*(i+1)/2)
+ end
@@ -174587,7 +174558,7 @@ index 0000000..4fce7fc
+ if n == 1 then return 1 end
+ return abs(n + sum(n-1))
+ end
-+
++
+ for i=1, 100 do
+ assert(sum(i) == i*(i+1)/2)
+ end
@@ -174603,7 +174574,7 @@ index 0000000..4fce7fc
+end
diff --git a/test/index b/test/index
new file mode 100644
-index 0000000..bd4081e
+index 00000000..bd4081e3
--- /dev/null
+++ b/test/index
@@ -0,0 +1,6 @@
@@ -174615,7 +174586,7 @@ index 0000000..bd4081e
+opt +jit
diff --git a/test/lang/andor.lua b/test/lang/andor.lua
new file mode 100644
-index 0000000..55b2c75
+index 00000000..55b2c756
--- /dev/null
+++ b/test/lang/andor.lua
@@ -0,0 +1,61 @@
@@ -174682,7 +174653,7 @@ index 0000000..55b2c75
+end
diff --git a/test/lang/assignment.lua b/test/lang/assignment.lua
new file mode 100644
-index 0000000..e9745ef
+index 00000000..e9745ef6
--- /dev/null
+++ b/test/lang/assignment.lua
@@ -0,0 +1,46 @@
@@ -174734,7 +174705,7 @@ index 0000000..e9745ef
+end
diff --git a/test/lang/compare.lua b/test/lang/compare.lua
new file mode 100644
-index 0000000..09c5488
+index 00000000..09c5488d
--- /dev/null
+++ b/test/lang/compare.lua
@@ -0,0 +1,323 @@
@@ -175063,10 +175034,10 @@ index 0000000..09c5488
+end
diff --git a/test/lang/compare_nan.lua b/test/lang/compare_nan.lua
new file mode 100644
-index 0000000..878f39a
+index 00000000..dd152fab
--- /dev/null
+++ b/test/lang/compare_nan.lua
-@@ -0,0 +1,99 @@
+@@ -0,0 +1,98 @@
+
+local function check(a, b)
+ if a ~= b then
@@ -175165,10 +175136,9 @@ index 0000000..878f39a
+ check(not (1==nan), true)
+ check(not (1~=nan), false)
+end
-+
diff --git a/test/lang/concat.lua b/test/lang/concat.lua
new file mode 100644
-index 0000000..04d665b
+index 00000000..04d665b2
--- /dev/null
+++ b/test/lang/concat.lua
@@ -0,0 +1,112 @@
@@ -175286,7 +175256,7 @@ index 0000000..04d665b
+end
diff --git a/test/lang/constant/index b/test/lang/constant/index
new file mode 100644
-index 0000000..e738357
+index 00000000..e738357d
--- /dev/null
+++ b/test/lang/constant/index
@@ -0,0 +1,2 @@
@@ -175294,7 +175264,7 @@ index 0000000..e738357
+table.lua
diff --git a/test/lang/constant/number.lua b/test/lang/constant/number.lua
new file mode 100644
-index 0000000..fb67356
+index 00000000..fb67356e
--- /dev/null
+++ b/test/lang/constant/number.lua
@@ -0,0 +1,12 @@
@@ -175312,7 +175282,7 @@ index 0000000..fb67356
+end
diff --git a/test/lang/constant/table.lua b/test/lang/constant/table.lua
new file mode 100644
-index 0000000..899d0f6
+index 00000000..899d0f67
--- /dev/null
+++ b/test/lang/constant/table.lua
@@ -0,0 +1,15 @@
@@ -175333,7 +175303,7 @@ index 0000000..899d0f6
+end
diff --git a/test/lang/coroutine.lua b/test/lang/coroutine.lua
new file mode 100644
-index 0000000..405135c
+index 00000000..405135c9
--- /dev/null
+++ b/test/lang/coroutine.lua
@@ -0,0 +1,8 @@
@@ -175347,7 +175317,7 @@ index 0000000..405135c
+end
diff --git a/test/lang/for.lua b/test/lang/for.lua
new file mode 100644
-index 0000000..4982b32
+index 00000000..4982b32b
--- /dev/null
+++ b/test/lang/for.lua
@@ -0,0 +1,45 @@
@@ -175398,7 +175368,7 @@ index 0000000..4982b32
+end
diff --git a/test/lang/gc.lua b/test/lang/gc.lua
new file mode 100644
-index 0000000..35e6a1f
+index 00000000..35e6a1f3
--- /dev/null
+++ b/test/lang/gc.lua
@@ -0,0 +1,42 @@
@@ -175446,7 +175416,7 @@ index 0000000..35e6a1f
+end
diff --git a/test/lang/goto.lua b/test/lang/goto.lua
new file mode 100644
-index 0000000..1563a23
+index 00000000..978476c8
--- /dev/null
+++ b/test/lang/goto.lua
@@ -0,0 +1,149 @@
@@ -175460,7 +175430,7 @@ index 0000000..1563a23
+ assert(ok, err)
+ end
+end
-+
++
+do --- Basic goto and label semantics.
+ -- Error: duplicate label.
+ expect("::a:: ::a::", "'a'")
@@ -175601,7 +175571,7 @@ index 0000000..1563a23
+end
diff --git a/test/lang/index b/test/lang/index
new file mode 100644
-index 0000000..88e2edf
+index 00000000..88e2edfa
--- /dev/null
+++ b/test/lang/index
@@ -0,0 +1,18 @@
@@ -175625,7 +175595,7 @@ index 0000000..88e2edf
+meta
diff --git a/test/lang/length.lua b/test/lang/length.lua
new file mode 100644
-index 0000000..67c68ae
+index 00000000..67c68ae7
--- /dev/null
+++ b/test/lang/length.lua
@@ -0,0 +1,23 @@
@@ -175654,7 +175624,7 @@ index 0000000..67c68ae
+end
diff --git a/test/lang/meta/arith.lua b/test/lang/meta/arith.lua
new file mode 100644
-index 0000000..17de4c8
+index 00000000..17de4c8c
--- /dev/null
+++ b/test/lang/meta/arith.lua
@@ -0,0 +1,118 @@
@@ -175778,7 +175748,7 @@ index 0000000..17de4c8
+end
diff --git a/test/lang/meta/arith_jit.lua b/test/lang/meta/arith_jit.lua
new file mode 100644
-index 0000000..2cb35db
+index 00000000..2cb35dbb
--- /dev/null
+++ b/test/lang/meta/arith_jit.lua
@@ -0,0 +1,68 @@
@@ -175852,7 +175822,7 @@ index 0000000..2cb35db
+end
diff --git a/test/lang/meta/call.lua b/test/lang/meta/call.lua
new file mode 100644
-index 0000000..c77c0dd
+index 00000000..c77c0dd8
--- /dev/null
+++ b/test/lang/meta/call.lua
@@ -0,0 +1,81 @@
@@ -175939,10 +175909,10 @@ index 0000000..c77c0dd
+end
diff --git a/test/lang/meta/cat.lua b/test/lang/meta/cat.lua
new file mode 100644
-index 0000000..48a89e4
+index 00000000..3a5db6fc
--- /dev/null
+++ b/test/lang/meta/cat.lua
-@@ -0,0 +1,61 @@
+@@ -0,0 +1,60 @@
+local function create(cat, v1, v2)
+ local meta = { __concat = cat }
+ return setmetatable({v1}, meta), setmetatable({v2}, meta)
@@ -176003,10 +175973,9 @@ index 0000000..48a89e4
+ for i=1,100 do y = a..b.. 1 .. "z" end
+ assert(y == "ab1z")
+end
-+
diff --git a/test/lang/meta/comp.lua b/test/lang/meta/comp.lua
new file mode 100644
-index 0000000..23f18b0
+index 00000000..23f18b08
--- /dev/null
+++ b/test/lang/meta/comp.lua
@@ -0,0 +1,120 @@
@@ -176132,10 +176101,10 @@ index 0000000..23f18b0
+end
diff --git a/test/lang/meta/comp_jit.lua b/test/lang/meta/comp_jit.lua
new file mode 100644
-index 0000000..d0a19d8
+index 00000000..0bf07b9f
--- /dev/null
+++ b/test/lang/meta/comp_jit.lua
-@@ -0,0 +1,104 @@
+@@ -0,0 +1,103 @@
+do --- coverage
+ local lt, le = false, false
+ local t, u = {}, {}
@@ -176239,10 +176208,9 @@ index 0000000..d0a19d8
+ assert(not ok)
+ end
+end
-+
diff --git a/test/lang/meta/debuginfo.lua b/test/lang/meta/debuginfo.lua
new file mode 100644
-index 0000000..a99941f
+index 00000000..a99941fa
--- /dev/null
+++ b/test/lang/meta/debuginfo.lua
@@ -0,0 +1,81 @@
@@ -176329,7 +176297,7 @@ index 0000000..a99941f
+end
diff --git a/test/lang/meta/eq.lua b/test/lang/meta/eq.lua
new file mode 100644
-index 0000000..ebf6043
+index 00000000..ebf60435
--- /dev/null
+++ b/test/lang/meta/eq.lua
@@ -0,0 +1,30 @@
@@ -176365,7 +176333,7 @@ index 0000000..ebf6043
+end
diff --git a/test/lang/meta/eq_jit.lua b/test/lang/meta/eq_jit.lua
new file mode 100644
-index 0000000..47e1420
+index 00000000..47e14207
--- /dev/null
+++ b/test/lang/meta/eq_jit.lua
@@ -0,0 +1,35 @@
@@ -176406,7 +176374,7 @@ index 0000000..47e1420
+end
diff --git a/test/lang/meta/framegap.lua b/test/lang/meta/framegap.lua
new file mode 100644
-index 0000000..0080633
+index 00000000..0080633a
--- /dev/null
+++ b/test/lang/meta/framegap.lua
@@ -0,0 +1,24 @@
@@ -176436,7 +176404,7 @@ index 0000000..0080633
+end
diff --git a/test/lang/meta/index b/test/lang/meta/index
new file mode 100644
-index 0000000..f114e78
+index 00000000..f114e78d
--- /dev/null
+++ b/test/lang/meta/index
@@ -0,0 +1,14 @@
@@ -176456,7 +176424,7 @@ index 0000000..f114e78
+debuginfo.lua
diff --git a/test/lang/meta/index.lua b/test/lang/meta/index.lua
new file mode 100644
-index 0000000..4d6d0ff
+index 00000000..4d6d0ffe
--- /dev/null
+++ b/test/lang/meta/index.lua
@@ -0,0 +1,60 @@
@@ -176522,7 +176490,7 @@ index 0000000..4d6d0ff
+end
diff --git a/test/lang/meta/len.lua b/test/lang/meta/len.lua
new file mode 100644
-index 0000000..2410daa
+index 00000000..2410daa6
--- /dev/null
+++ b/test/lang/meta/len.lua
@@ -0,0 +1,42 @@
@@ -176570,7 +176538,7 @@ index 0000000..2410daa
+end
diff --git a/test/lang/meta/newindex.lua b/test/lang/meta/newindex.lua
new file mode 100644
-index 0000000..6c46b8c
+index 00000000..6c46b8cb
--- /dev/null
+++ b/test/lang/meta/newindex.lua
@@ -0,0 +1,69 @@
@@ -176645,10 +176613,10 @@ index 0000000..6c46b8c
+end
diff --git a/test/lang/meta/nomm.lua b/test/lang/meta/nomm.lua
new file mode 100644
-index 0000000..2b3db86
+index 00000000..e41f72f4
--- /dev/null
+++ b/test/lang/meta/nomm.lua
-@@ -0,0 +1,21 @@
+@@ -0,0 +1,20 @@
+
+do --- untitled
+ local keys = {}
@@ -176669,10 +176637,9 @@ index 0000000..2b3db86
+ end
+ assert(x == 95)
+end
-+
diff --git a/test/lang/modulo.lua b/test/lang/modulo.lua
new file mode 100644
-index 0000000..eddaea7
+index 00000000..eddaea77
--- /dev/null
+++ b/test/lang/modulo.lua
@@ -0,0 +1,46 @@
@@ -176724,7 +176691,7 @@ index 0000000..eddaea7
+end
diff --git a/test/lang/self.lua b/test/lang/self.lua
new file mode 100644
-index 0000000..d374666
+index 00000000..d3746664
--- /dev/null
+++ b/test/lang/self.lua
@@ -0,0 +1,19 @@
@@ -176749,7 +176716,7 @@ index 0000000..d374666
+end
diff --git a/test/lang/table.lua b/test/lang/table.lua
new file mode 100644
-index 0000000..3ff38cf
+index 00000000..3ff38cfe
--- /dev/null
+++ b/test/lang/table.lua
@@ -0,0 +1,32 @@
@@ -176787,7 +176754,7 @@ index 0000000..3ff38cf
+end
diff --git a/test/lang/tail_recursion.lua b/test/lang/tail_recursion.lua
new file mode 100644
-index 0000000..78f071f
+index 00000000..78f071fd
--- /dev/null
+++ b/test/lang/tail_recursion.lua
@@ -0,0 +1,20 @@
@@ -176813,7 +176780,7 @@ index 0000000..78f071f
+end
diff --git a/test/lang/upvalue/closure.lua b/test/lang/upvalue/closure.lua
new file mode 100644
-index 0000000..faa4de1
+index 00000000..faa4de1c
--- /dev/null
+++ b/test/lang/upvalue/closure.lua
@@ -0,0 +1,84 @@
@@ -176903,17 +176870,17 @@ index 0000000..faa4de1
+end
diff --git a/test/lang/upvalue/index b/test/lang/upvalue/index
new file mode 100644
-index 0000000..3c170db
+index 00000000..3c170db9
--- /dev/null
+++ b/test/lang/upvalue/index
@@ -0,0 +1 @@
+closure.lua
diff --git a/test/lang/vararg_jit.lua b/test/lang/vararg_jit.lua
new file mode 100644
-index 0000000..4e78f96
+index 00000000..50729f5e
--- /dev/null
+++ b/test/lang/vararg_jit.lua
-@@ -0,0 +1,95 @@
+@@ -0,0 +1,94 @@
+
+do --- 1
+ local function f(a, b, c, ...)
@@ -177008,10 +176975,9 @@ index 0000000..4e78f96
+ f(nil)
+ f()
+end
-+
diff --git a/test/lib/base/assert.lua b/test/lib/base/assert.lua
new file mode 100644
-index 0000000..9c30ba0
+index 00000000..9c30ba02
--- /dev/null
+++ b/test/lib/base/assert.lua
@@ -0,0 +1,33 @@
@@ -177050,7 +177016,7 @@ index 0000000..9c30ba0
+end
diff --git a/test/lib/base/error.lua b/test/lib/base/error.lua
new file mode 100644
-index 0000000..9193085
+index 00000000..91930854
--- /dev/null
+++ b/test/lib/base/error.lua
@@ -0,0 +1,43 @@
@@ -177099,7 +177065,7 @@ index 0000000..9193085
+end
diff --git a/test/lib/base/getfenv.lua b/test/lib/base/getfenv.lua
new file mode 100644
-index 0000000..9c00ed7
+index 00000000..9c00ed7c
--- /dev/null
+++ b/test/lib/base/getfenv.lua
@@ -0,0 +1,13 @@
@@ -177118,7 +177084,7 @@ index 0000000..9c00ed7
+end
diff --git a/test/lib/base/getsetmetatable.lua b/test/lib/base/getsetmetatable.lua
new file mode 100644
-index 0000000..7d57343
+index 00000000..7d57343e
--- /dev/null
+++ b/test/lib/base/getsetmetatable.lua
@@ -0,0 +1,33 @@
@@ -177157,7 +177123,7 @@ index 0000000..7d57343
+end
diff --git a/test/lib/base/index b/test/lib/base/index
new file mode 100644
-index 0000000..942c53c
+index 00000000..942c53c0
--- /dev/null
+++ b/test/lib/base/index
@@ -0,0 +1,11 @@
@@ -177174,7 +177140,7 @@ index 0000000..942c53c
+xpcall_jit.lua +compat5.2
diff --git a/test/lib/base/ipairs.lua b/test/lib/base/ipairs.lua
new file mode 100644
-index 0000000..a9de087
+index 00000000..a9de087e
--- /dev/null
+++ b/test/lib/base/ipairs.lua
@@ -0,0 +1,41 @@
@@ -177221,7 +177187,7 @@ index 0000000..a9de087
+end
diff --git a/test/lib/base/next.lua b/test/lib/base/next.lua
new file mode 100644
-index 0000000..0e40615
+index 00000000..0e40615a
--- /dev/null
+++ b/test/lib/base/next.lua
@@ -0,0 +1,17 @@
@@ -177244,7 +177210,7 @@ index 0000000..0e40615
+end
diff --git a/test/lib/base/pairs.lua b/test/lib/base/pairs.lua
new file mode 100644
-index 0000000..4d89d42
+index 00000000..4d89d42d
--- /dev/null
+++ b/test/lib/base/pairs.lua
@@ -0,0 +1,73 @@
@@ -177323,7 +177289,7 @@ index 0000000..4d89d42
+end
diff --git a/test/lib/base/pcall_jit.lua b/test/lib/base/pcall_jit.lua
new file mode 100644
-index 0000000..dc9cd5f
+index 00000000..dc9cd5fa
--- /dev/null
+++ b/test/lib/base/pcall_jit.lua
@@ -0,0 +1,74 @@
@@ -177403,13 +177369,13 @@ index 0000000..dc9cd5f
+end
diff --git a/test/lib/base/select.lua b/test/lib/base/select.lua
new file mode 100644
-index 0000000..8278e5e
+index 00000000..8b2b9467
--- /dev/null
+++ b/test/lib/base/select.lua
@@ -0,0 +1,105 @@
+
+do --- select #
-+-- Test whether select("#", 3, 4) returns the correct number of arguments.
++-- Test whether select("#", 3, 4) returns the correct number of arguments.
+ local x = 0
+ for i=1,100 do
+ x = x + select("#", 3, 4)
@@ -177427,7 +177393,7 @@ index 0000000..8278e5e
+ assert(x == 200)
+end
+
-+do --- select 1
++do --- select 1
+ local x = 0
+ for i=1,100 do
+ x = x + select(1, i)
@@ -177514,7 +177480,7 @@ index 0000000..8278e5e
+end
diff --git a/test/lib/base/tonumber_tostring.lua b/test/lib/base/tonumber_tostring.lua
new file mode 100644
-index 0000000..e7f576c
+index 00000000..e7f576ce
--- /dev/null
+++ b/test/lib/base/tonumber_tostring.lua
@@ -0,0 +1,81 @@
@@ -177601,7 +177567,7 @@ index 0000000..e7f576c
+end
diff --git a/test/lib/base/xpcall_jit.lua b/test/lib/base/xpcall_jit.lua
new file mode 100644
-index 0000000..f4993cc
+index 00000000..f4993cc6
--- /dev/null
+++ b/test/lib/base/xpcall_jit.lua
@@ -0,0 +1,83 @@
@@ -177690,7 +177656,7 @@ index 0000000..f4993cc
+end
diff --git a/test/lib/bit.lua b/test/lib/bit.lua
new file mode 100644
-index 0000000..1adf550
+index 00000000..1adf5507
--- /dev/null
+++ b/test/lib/bit.lua
@@ -0,0 +1,98 @@
@@ -177794,7 +177760,7 @@ index 0000000..1adf550
+end
diff --git a/test/lib/contents.lua b/test/lib/contents.lua
new file mode 100644
-index 0000000..2baacd5
+index 00000000..2baacd5c
--- /dev/null
+++ b/test/lib/contents.lua
@@ -0,0 +1,158 @@
@@ -177958,14 +177924,14 @@ index 0000000..2baacd5
+end
diff --git a/test/lib/coroutine/index b/test/lib/coroutine/index
new file mode 100644
-index 0000000..9c5c17e
+index 00000000..9c5c17ec
--- /dev/null
+++ b/test/lib/coroutine/index
@@ -0,0 +1 @@
+yield.lua
diff --git a/test/lib/coroutine/yield.lua b/test/lib/coroutine/yield.lua
new file mode 100644
-index 0000000..d995bf8
+index 00000000..d995bf87
--- /dev/null
+++ b/test/lib/coroutine/yield.lua
@@ -0,0 +1,109 @@
@@ -178080,10 +178046,10 @@ index 0000000..d995bf8
+end
diff --git a/test/lib/ffi/bit64.lua b/test/lib/ffi/bit64.lua
new file mode 100644
-index 0000000..d1b47be
+index 00000000..ffec0def
--- /dev/null
+++ b/test/lib/ffi/bit64.lua
-@@ -0,0 +1,130 @@
+@@ -0,0 +1,129 @@
+local ffi = require("ffi")
+local bit = require("bit")
+
@@ -178213,10 +178179,9 @@ index 0000000..d1b47be
+ end
+ assert(b == -8881785180777266821LL)
+end
-+
diff --git a/test/lib/ffi/cdata_var.lua b/test/lib/ffi/cdata_var.lua
new file mode 100644
-index 0000000..42d6028
+index 00000000..42d6028a
--- /dev/null
+++ b/test/lib/ffi/cdata_var.lua
@@ -0,0 +1,47 @@
@@ -178269,10 +178234,10 @@ index 0000000..42d6028
+end
diff --git a/test/lib/ffi/copy_fill.lua b/test/lib/ffi/copy_fill.lua
new file mode 100644
-index 0000000..2956381
+index 00000000..d50d7cda
--- /dev/null
+++ b/test/lib/ffi/copy_fill.lua
-@@ -0,0 +1,64 @@
+@@ -0,0 +1,63 @@
+local ffi = require("ffi")
+
+do --- misc
@@ -178336,13 +178301,12 @@ index 0000000..2956381
+ end
+ assert(x == "d" and y == "~")
+end
-+
diff --git a/test/lib/ffi/err.lua b/test/lib/ffi/err.lua
new file mode 100644
-index 0000000..4472365
+index 00000000..8cdf9623
--- /dev/null
+++ b/test/lib/ffi/err.lua
-@@ -0,0 +1,35 @@
+@@ -0,0 +1,34 @@
+local ffi = require("ffi")
+
+do --- error in FFI metamethod: don't print metamethod frame.
@@ -178377,13 +178341,12 @@ index 0000000..4472365
+ local line = debug.getinfo(foo).linedefined+3
+ assert(string.match(err, "traceback:[^:]*:"..line..":"))
+end
-+
diff --git a/test/lib/ffi/ffi_arith_ptr.lua b/test/lib/ffi/ffi_arith_ptr.lua
new file mode 100644
-index 0000000..8cf890c
+index 00000000..82535551
--- /dev/null
+++ b/test/lib/ffi/ffi_arith_ptr.lua
-@@ -0,0 +1,106 @@
+@@ -0,0 +1,105 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -178489,13 +178452,12 @@ index 0000000..8cf890c
+ local ok, err = pcall(function(p) return p[1] end, p)
+ assert(not ok and err:match("size.*unknown"))
+end
-+
diff --git a/test/lib/ffi/ffi_bitfield.lua b/test/lib/ffi/ffi_bitfield.lua
new file mode 100644
-index 0000000..cd0b181
+index 00000000..20b89ad8
--- /dev/null
+++ b/test/lib/ffi/ffi_bitfield.lua
-@@ -0,0 +1,108 @@
+@@ -0,0 +1,107 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -178603,13 +178565,12 @@ index 0000000..cd0b181
+ end
+
+end
-+
diff --git a/test/lib/ffi/ffi_call.lua b/test/lib/ffi/ffi_call.lua
new file mode 100644
-index 0000000..1eb5e90
+index 00000000..1a7f4b1b
--- /dev/null
+++ b/test/lib/ffi/ffi_call.lua
-@@ -0,0 +1,266 @@
+@@ -0,0 +1,265 @@
+
+local ffi = require("ffi")
+
@@ -178875,13 +178836,12 @@ index 0000000..1eb5e90
+ assert(C.stdcall_ff(12.5, -3.25) == 12.5-3.25)
+ end
+end
-+
diff --git a/test/lib/ffi/ffi_callback.lua b/test/lib/ffi/ffi_callback.lua
new file mode 100644
-index 0000000..1fd14bd
+index 00000000..3e2759e2
--- /dev/null
+++ b/test/lib/ffi/ffi_callback.lua
-@@ -0,0 +1,158 @@
+@@ -0,0 +1,157 @@
+
+local ffi = require("ffi")
+
@@ -179039,13 +178999,12 @@ index 0000000..1fd14bd
+ debug.sethook(function() debug.sethook(nil, "", 0); f() end, "",
1)
+ local x
+end
-+
diff --git a/test/lib/ffi/ffi_const.lua b/test/lib/ffi/ffi_const.lua
new file mode 100644
-index 0000000..d42133a
+index 00000000..b2b256d4
--- /dev/null
+++ b/test/lib/ffi/ffi_const.lua
-@@ -0,0 +1,113 @@
+@@ -0,0 +1,112 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -179158,13 +179117,12 @@ index 0000000..d42133a
+ x.ccp = ccxa
+ end
+end
-+
diff --git a/test/lib/ffi/ffi_convert.lua b/test/lib/ffi/ffi_convert.lua
new file mode 100644
-index 0000000..bd3fb1f
+index 00000000..1945760a
--- /dev/null
+++ b/test/lib/ffi/ffi_convert.lua
-@@ -0,0 +1,787 @@
+@@ -0,0 +1,786 @@
+local ffi = require("ffi")
+
+local ctest = require("ctest")
@@ -179951,13 +179909,12 @@ index 0000000..bd3fb1f
+ jit.off(f)
+ end
+end
-+
diff --git a/test/lib/ffi/ffi_enum.lua b/test/lib/ffi/ffi_enum.lua
new file mode 100644
-index 0000000..e8e40ad
+index 00000000..9b63b4ec
--- /dev/null
+++ b/test/lib/ffi/ffi_enum.lua
-@@ -0,0 +1,57 @@
+@@ -0,0 +1,56 @@
+
+local ffi = require("ffi")
+
@@ -180014,13 +179971,12 @@ index 0000000..e8e40ad
+ assert(f("II"))
+ assert(not f(0))
+end
-+
diff --git a/test/lib/ffi/ffi_gcstep_recursive.lua
b/test/lib/ffi/ffi_gcstep_recursive.lua
new file mode 100644
-index 0000000..cb19df1
+index 00000000..22eb81af
--- /dev/null
+++ b/test/lib/ffi/ffi_gcstep_recursive.lua
-@@ -0,0 +1,66 @@
+@@ -0,0 +1,65 @@
+-- From Robert G. Jakabosky, 2012-03-20
+
+local N=tonumber(arg[1] or 10000)
@@ -180086,13 +180042,12 @@ index 0000000..cb19df1
+ end
+ cdata = nil
+end
-+
diff --git a/test/lib/ffi/ffi_jit_arith.lua b/test/lib/ffi/ffi_jit_arith.lua
new file mode 100644
-index 0000000..0554fe6
+index 00000000..0f502784
--- /dev/null
+++ b/test/lib/ffi/ffi_jit_arith.lua
-@@ -0,0 +1,155 @@
+@@ -0,0 +1,154 @@
+local ffi = require("ffi")
+
+do
@@ -180247,13 +180202,12 @@ index 0000000..0554fe6
+ assert(x == 1650)
+ assert(y == 970)
+end
-+
diff --git a/test/lib/ffi/ffi_jit_call.lua b/test/lib/ffi/ffi_jit_call.lua
new file mode 100644
-index 0000000..b79d60b
+index 00000000..ab1e26e3
--- /dev/null
+++ b/test/lib/ffi/ffi_jit_call.lua
-@@ -0,0 +1,154 @@
+@@ -0,0 +1,153 @@
+
+local ffi = require("ffi")
+
@@ -180407,10 +180361,9 @@ index 0000000..b79d60b
+ for i=1,100 do assert(lib.stdcall_ff(12.5, -3.25) == 12.5-3.25) end
+ end
+end
-+
diff --git a/test/lib/ffi/ffi_jit_conv.lua b/test/lib/ffi/ffi_jit_conv.lua
new file mode 100644
-index 0000000..d4707db
+index 00000000..d4707db7
--- /dev/null
+++ b/test/lib/ffi/ffi_jit_conv.lua
@@ -0,0 +1,277 @@
@@ -180693,10 +180646,10 @@ index 0000000..d4707db
+end
diff --git a/test/lib/ffi/ffi_lex_number.lua b/test/lib/ffi/ffi_lex_number.lua
new file mode 100644
-index 0000000..e26650e
+index 00000000..1737a8ba
--- /dev/null
+++ b/test/lib/ffi/ffi_lex_number.lua
-@@ -0,0 +1,51 @@
+@@ -0,0 +1,50 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -180747,13 +180700,12 @@ index 0000000..e26650e
+ ".0ll",
+ "0ii",
+}, function(s) assert(loadstring("return "..s)) end)
-+
diff --git a/test/lib/ffi/ffi_metatype.lua b/test/lib/ffi/ffi_metatype.lua
new file mode 100644
-index 0000000..2db717f
+index 00000000..1d3a20bc
--- /dev/null
+++ b/test/lib/ffi/ffi_metatype.lua
-@@ -0,0 +1,245 @@
+@@ -0,0 +1,244 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -180998,13 +180950,12 @@ index 0000000..2db717f
+ local o = xt()
+ assert(o.x == 99)
+end
-+
diff --git a/test/lib/ffi/ffi_new.lua b/test/lib/ffi/ffi_new.lua
new file mode 100644
-index 0000000..9cdbd53
+index 00000000..5f0c88b4
--- /dev/null
+++ b/test/lib/ffi/ffi_new.lua
-@@ -0,0 +1,106 @@
+@@ -0,0 +1,105 @@
+local ffi = require("ffi")
+local bit = require("bit")
+
@@ -181110,13 +181061,12 @@ index 0000000..9cdbd53
+ local p = ffi.gc(ffi.new("int[1]"), function(x) assert(type(x) ==
"cdata") end)
+ -- test for lua_close() cleanup.
+end
-+
diff --git a/test/lib/ffi/ffi_parse_array.lua b/test/lib/ffi/ffi_parse_array.lua
new file mode 100644
-index 0000000..3a9616d
+index 00000000..08176223
--- /dev/null
+++ b/test/lib/ffi/ffi_parse_array.lua
-@@ -0,0 +1,78 @@
+@@ -0,0 +1,77 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -181194,13 +181144,12 @@ index 0000000..3a9616d
+ assert(ffi.sizeof(id, 0x40000000) == nil)
+ assert(ffi.sizeof(id, 0x3ffffffd) == 4+2*0x3ffffffd)
+end
-+
diff --git a/test/lib/ffi/ffi_parse_basic.lua b/test/lib/ffi/ffi_parse_basic.lua
new file mode 100644
-index 0000000..c054bcf
+index 00000000..774d6143
--- /dev/null
+++ b/test/lib/ffi/ffi_parse_basic.lua
-@@ -0,0 +1,131 @@
+@@ -0,0 +1,130 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -181331,13 +181280,12 @@ index 0000000..c054bcf
+ 1, 2, "char __attribute__((aligned(8))) const __attribute__((aligned(2)))",
+ 1, 16, "char __attribute__((aligned(8))) const
__attribute__((aligned(16)))",
+}
-+
diff --git a/test/lib/ffi/ffi_parse_cdef.lua b/test/lib/ffi/ffi_parse_cdef.lua
new file mode 100644
-index 0000000..4bb5d90
+index 00000000..43206f31
--- /dev/null
+++ b/test/lib/ffi/ffi_parse_cdef.lua
-@@ -0,0 +1,77 @@
+@@ -0,0 +1,76 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -181414,13 +181362,12 @@ index 0000000..4bb5d90
+int ext1;
+extern int ext2;
+]]
-+
diff --git a/test/lib/ffi/ffi_parse_struct.lua b/test/lib/ffi/ffi_parse_struct.lua
new file mode 100644
-index 0000000..16a3d05
+index 00000000..02b17dd6
--- /dev/null
+++ b/test/lib/ffi/ffi_parse_struct.lua
-@@ -0,0 +1,259 @@
+@@ -0,0 +1,258 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -181679,10 +181626,9 @@ index 0000000..16a3d05
+ assert(ffi.sizeof("struct foo_packintalign8") == 6)
+ assert(ffi.sizeof("struct foo_packintalign1") == 5)
+end
-+
diff --git a/test/lib/ffi/ffi_tabov.lua b/test/lib/ffi/ffi_tabov.lua
new file mode 100644
-index 0000000..ba62196
+index 00000000..ba621960
--- /dev/null
+++ b/test/lib/ffi/ffi_tabov.lua
@@ -0,0 +1,12 @@
@@ -181700,7 +181646,7 @@ index 0000000..ba62196
+assert(last > 20000)
diff --git a/test/lib/ffi/index b/test/lib/ffi/index
new file mode 100644
-index 0000000..59e36dd
+index 00000000..59e36dd8
--- /dev/null
+++ b/test/lib/ffi/index
@@ -0,0 +1,12 @@
@@ -181718,7 +181664,7 @@ index 0000000..59e36dd
+type_punning.lua
diff --git a/test/lib/ffi/istype.lua b/test/lib/ffi/istype.lua
new file mode 100644
-index 0000000..5aba775
+index 00000000..5aba7759
--- /dev/null
+++ b/test/lib/ffi/istype.lua
@@ -0,0 +1,88 @@
@@ -181812,7 +181758,7 @@ index 0000000..5aba775
+end
diff --git a/test/lib/ffi/jit_array.lua b/test/lib/ffi/jit_array.lua
new file mode 100644
-index 0000000..e8de4af
+index 00000000..e8de4af1
--- /dev/null
+++ b/test/lib/ffi/jit_array.lua
@@ -0,0 +1,104 @@
@@ -181922,7 +181868,7 @@ index 0000000..e8de4af
+end
diff --git a/test/lib/ffi/jit_complex.lua b/test/lib/ffi/jit_complex.lua
new file mode 100644
-index 0000000..3296f0c
+index 00000000..3296f0cb
--- /dev/null
+++ b/test/lib/ffi/jit_complex.lua
@@ -0,0 +1,109 @@
@@ -182037,7 +181983,7 @@ index 0000000..3296f0c
+end
diff --git a/test/lib/ffi/jit_misc.lua b/test/lib/ffi/jit_misc.lua
new file mode 100644
-index 0000000..41e4737
+index 00000000..41e4737b
--- /dev/null
+++ b/test/lib/ffi/jit_misc.lua
@@ -0,0 +1,109 @@
@@ -182152,10 +182098,10 @@ index 0000000..41e4737
+end
diff --git a/test/lib/ffi/jit_struct.lua b/test/lib/ffi/jit_struct.lua
new file mode 100644
-index 0000000..8aa64c1
+index 00000000..ab7ab07f
--- /dev/null
+++ b/test/lib/ffi/jit_struct.lua
-@@ -0,0 +1,201 @@
+@@ -0,0 +1,200 @@
+local ffi = require("ffi")
+
+ffi.cdef[[
@@ -182356,13 +182302,12 @@ index 0000000..8aa64c1
+ end
+ f()
+end
-+
diff --git a/test/lib/ffi/meta_tostring.lua b/test/lib/ffi/meta_tostring.lua
new file mode 100644
-index 0000000..bb065e1
+index 00000000..968eaddf
--- /dev/null
+++ b/test/lib/ffi/meta_tostring.lua
-@@ -0,0 +1,55 @@
+@@ -0,0 +1,54 @@
+local ffi = require("ffi")
+
+ffi.cdef[[
@@ -182417,10 +182362,9 @@ index 0000000..bb065e1
+ x.f[1] = -753.125
+ assert(tostring(x.cf) == "12.5-753.125i")
+end
-+
diff --git a/test/lib/ffi/redir.lua b/test/lib/ffi/redir.lua
new file mode 100644
-index 0000000..c492055
+index 00000000..c492055a
--- /dev/null
+++ b/test/lib/ffi/redir.lua
@@ -0,0 +1,19 @@
@@ -182445,7 +182389,7 @@ index 0000000..c492055
+end
diff --git a/test/lib/ffi/type_punning.lua b/test/lib/ffi/type_punning.lua
new file mode 100644
-index 0000000..ac70b4b
+index 00000000..ac70b4b4
--- /dev/null
+++ b/test/lib/ffi/type_punning.lua
@@ -0,0 +1,138 @@
@@ -182589,7 +182533,7 @@ index 0000000..ac70b4b
+end
diff --git a/test/lib/index b/test/lib/index
new file mode 100644
-index 0000000..cc9d7d7
+index 00000000..cc9d7d73
--- /dev/null
+++ b/test/lib/index
@@ -0,0 +1,8 @@
@@ -182604,7 +182548,7 @@ index 0000000..cc9d7d7
\ No newline at end of file
diff --git a/test/lib/math/abs.lua b/test/lib/math/abs.lua
new file mode 100644
-index 0000000..4223a78
+index 00000000..4223a780
--- /dev/null
+++ b/test/lib/math/abs.lua
@@ -0,0 +1,16 @@
@@ -182626,7 +182570,7 @@ index 0000000..4223a78
+end
diff --git a/test/lib/math/constants.lua b/test/lib/math/constants.lua
new file mode 100644
-index 0000000..ec35b4c
+index 00000000..ec35b4ce
--- /dev/null
+++ b/test/lib/math/constants.lua
@@ -0,0 +1,8 @@
@@ -182640,7 +182584,7 @@ index 0000000..ec35b4c
+end
diff --git a/test/lib/math/index b/test/lib/math/index
new file mode 100644
-index 0000000..944e1ae
+index 00000000..944e1aeb
--- /dev/null
+++ b/test/lib/math/index
@@ -0,0 +1,3 @@
@@ -182649,7 +182593,7 @@ index 0000000..944e1ae
+random.lua
diff --git a/test/lib/math/random.lua b/test/lib/math/random.lua
new file mode 100644
-index 0000000..dc2ca00
+index 00000000..dc2ca00b
--- /dev/null
+++ b/test/lib/math/random.lua
@@ -0,0 +1,47 @@
@@ -182702,7 +182646,7 @@ index 0000000..dc2ca00
+end
diff --git a/test/lib/string/byte.lua b/test/lib/string/byte.lua
new file mode 100644
-index 0000000..697a2c2
+index 00000000..697a2c2e
--- /dev/null
+++ b/test/lib/string/byte.lua
@@ -0,0 +1,92 @@
@@ -182800,7 +182744,7 @@ index 0000000..697a2c2
+end
diff --git a/test/lib/string/char.lua b/test/lib/string/char.lua
new file mode 100644
-index 0000000..544767d
+index 00000000..544767de
--- /dev/null
+++ b/test/lib/string/char.lua
@@ -0,0 +1,29 @@
@@ -182835,7 +182779,7 @@ index 0000000..544767d
+end
diff --git a/test/lib/string/dump.lua b/test/lib/string/dump.lua
new file mode 100644
-index 0000000..216c6eb
+index 00000000..216c6eb8
--- /dev/null
+++ b/test/lib/string/dump.lua
@@ -0,0 +1,31 @@
@@ -182872,14 +182816,14 @@ index 0000000..216c6eb
+end
diff --git a/test/lib/string/format/index b/test/lib/string/format/index
new file mode 100644
-index 0000000..4408853
+index 00000000..44088536
--- /dev/null
+++ b/test/lib/string/format/index
@@ -0,0 +1 @@
+num.lua
diff --git a/test/lib/string/format/num.lua b/test/lib/string/format/num.lua
new file mode 100644
-index 0000000..e8cb33f
+index 00000000..e8cb33f3
--- /dev/null
+++ b/test/lib/string/format/num.lua
@@ -0,0 +1,184 @@
@@ -183069,7 +183013,7 @@ index 0000000..e8cb33f
+end
diff --git a/test/lib/string/index b/test/lib/string/index
new file mode 100644
-index 0000000..c0638e9
+index 00000000..c0638e9c
--- /dev/null
+++ b/test/lib/string/index
@@ -0,0 +1,11 @@
@@ -183086,7 +183030,7 @@ index 0000000..c0638e9
+sub.lua
diff --git a/test/lib/string/len.lua b/test/lib/string/len.lua
new file mode 100644
-index 0000000..8ed7e8a
+index 00000000..8ed7e8ae
--- /dev/null
+++ b/test/lib/string/len.lua
@@ -0,0 +1,14 @@
@@ -183106,7 +183050,7 @@ index 0000000..8ed7e8a
+end
diff --git a/test/lib/string/lower_upper.lua b/test/lib/string/lower_upper.lua
new file mode 100644
-index 0000000..7370c44
+index 00000000..7370c44c
--- /dev/null
+++ b/test/lib/string/lower_upper.lua
@@ -0,0 +1,51 @@
@@ -183163,7 +183107,7 @@ index 0000000..7370c44
+end
diff --git a/test/lib/string/metatable.lua b/test/lib/string/metatable.lua
new file mode 100644
-index 0000000..d39ed43
+index 00000000..d39ed432
--- /dev/null
+++ b/test/lib/string/metatable.lua
@@ -0,0 +1,3 @@
@@ -183172,7 +183116,7 @@ index 0000000..d39ed43
+end
diff --git a/test/lib/string/multiple_functions.lua
b/test/lib/string/multiple_functions.lua
new file mode 100644
-index 0000000..7b9d0f1
+index 00000000..7b9d0f13
--- /dev/null
+++ b/test/lib/string/multiple_functions.lua
@@ -0,0 +1,16 @@
@@ -183194,7 +183138,7 @@ index 0000000..7b9d0f1
+end
diff --git a/test/lib/string/rep.lua b/test/lib/string/rep.lua
new file mode 100644
-index 0000000..550c15b
+index 00000000..550c15b8
--- /dev/null
+++ b/test/lib/string/rep.lua
@@ -0,0 +1,68 @@
@@ -183268,7 +183212,7 @@ index 0000000..550c15b
+end
diff --git a/test/lib/string/reverse.lua b/test/lib/string/reverse.lua
new file mode 100644
-index 0000000..deaade7
+index 00000000..deaade7c
--- /dev/null
+++ b/test/lib/string/reverse.lua
@@ -0,0 +1,13 @@
@@ -183287,7 +183231,7 @@ index 0000000..deaade7
+end
diff --git a/test/lib/string/sub.lua b/test/lib/string/sub.lua
new file mode 100644
-index 0000000..ecb8021
+index 00000000..ecb80216
--- /dev/null
+++ b/test/lib/string/sub.lua
@@ -0,0 +1,189 @@
@@ -183482,7 +183426,7 @@ index 0000000..ecb8021
+end
diff --git a/test/lib/table/concat.lua b/test/lib/table/concat.lua
new file mode 100644
-index 0000000..1f2a2f9
+index 00000000..1f2a2f92
--- /dev/null
+++ b/test/lib/table/concat.lua
@@ -0,0 +1,55 @@
@@ -183543,7 +183487,7 @@ index 0000000..1f2a2f9
+end
diff --git a/test/lib/table/index b/test/lib/table/index
new file mode 100644
-index 0000000..bd3af0b
+index 00000000..bd3af0be
--- /dev/null
+++ b/test/lib/table/index
@@ -0,0 +1,6 @@
@@ -183555,10 +183499,10 @@ index 0000000..bd3af0b
+sort.lua
diff --git a/test/lib/table/insert.lua b/test/lib/table/insert.lua
new file mode 100644
-index 0000000..91d4dd8
+index 00000000..30db18c7
--- /dev/null
+++ b/test/lib/table/insert.lua
-@@ -0,0 +1,17 @@
+@@ -0,0 +1,16 @@
+local tinsert = table.insert
+local assert = assert
+
@@ -183575,13 +183519,12 @@ index 0000000..91d4dd8
+ for i=101,200 do tinsert(t, i, i) end
+ assert(#t == 300 and t[101] == 101 and t[200] == 200 and t[300] == 200)
+end
-+
diff --git a/test/lib/table/misc.lua b/test/lib/table/misc.lua
new file mode 100644
-index 0000000..e0e2fc5
+index 00000000..c54188e1
--- /dev/null
+++ b/test/lib/table/misc.lua
-@@ -0,0 +1,58 @@
+@@ -0,0 +1,55 @@
+-- TODO: Organise
+
+-- ABC elim
@@ -183637,12 +183580,9 @@ index 0000000..e0e2fc5
+ assert(t[1] == 1 and t[2] == 2 and t[3] == 3 and t[4] == 9 and t[5] == 10 and
+ t[6] == nil)
+end
-+
-+
-+
diff --git a/test/lib/table/new.lua b/test/lib/table/new.lua
new file mode 100644
-index 0000000..483c129
+index 00000000..483c1298
--- /dev/null
+++ b/test/lib/table/new.lua
@@ -0,0 +1,11 @@
@@ -183659,7 +183599,7 @@ index 0000000..483c129
+end
diff --git a/test/lib/table/pack.lua b/test/lib/table/pack.lua
new file mode 100644
-index 0000000..5bd6ecb
+index 00000000..5bd6ecbe
--- /dev/null
+++ b/test/lib/table/pack.lua
@@ -0,0 +1,7 @@
@@ -183672,7 +183612,7 @@ index 0000000..5bd6ecb
+end
diff --git a/test/lib/table/remove.lua b/test/lib/table/remove.lua
new file mode 100644
-index 0000000..1b24a4f
+index 00000000..1b24a4fb
--- /dev/null
+++ b/test/lib/table/remove.lua
@@ -0,0 +1,42 @@
@@ -183720,7 +183660,7 @@ index 0000000..1b24a4f
+end
diff --git a/test/lib/table/sort.lua b/test/lib/table/sort.lua
new file mode 100644
-index 0000000..6a86fcf
+index 00000000..6a86fcf3
--- /dev/null
+++ b/test/lib/table/sort.lua
@@ -0,0 +1,27 @@
@@ -183753,10 +183693,10 @@ index 0000000..6a86fcf
+end
diff --git a/test/misc/alias_alloc.lua b/test/misc/alias_alloc.lua
new file mode 100644
-index 0000000..02fe618
+index 00000000..6c89baad
--- /dev/null
+++ b/test/misc/alias_alloc.lua
-@@ -0,0 +1,54 @@
+@@ -0,0 +1,53 @@
+
+do
+ local t = {1}
@@ -183810,13 +183750,12 @@ index 0000000..02fe618
+ w[1] = t[1]
+ end
+end
-+
diff --git a/test/misc/api_call.lua b/test/misc/api_call.lua
new file mode 100644
-index 0000000..7dbd5e4
+index 00000000..28ce7d2b
--- /dev/null
+++ b/test/misc/api_call.lua
-@@ -0,0 +1,98 @@
+@@ -0,0 +1,97 @@
+local ctest = require("ctest")
+
+local function ret0() end
@@ -183914,13 +183853,12 @@ index 0000000..7dbd5e4
+test_yield(ctest.resume, coroutine.yield)
+test_yield(coroutine.resume, ctest.yield)
+test_yield(ctest.resume, ctest.yield)
-+
diff --git a/test/misc/catch_wrap.lua b/test/misc/catch_wrap.lua
new file mode 100644
-index 0000000..7f656bc
+index 00000000..8b934066
--- /dev/null
+++ b/test/misc/catch_wrap.lua
-@@ -0,0 +1,45 @@
+@@ -0,0 +1,44 @@
+
+local cp = require("cpptest")
+cp.wrapon()
@@ -183965,13 +183903,12 @@ index 0000000..7f656bc
+ assert(a == false and b == "foo")
+ if unwind then assert(cp.isalloc() == false) end
+end
-+
diff --git a/test/misc/coro_traceback.lua b/test/misc/coro_traceback.lua
new file mode 100644
-index 0000000..2676d2c
+index 00000000..e075f5dd
--- /dev/null
+++ b/test/misc/coro_traceback.lua
-@@ -0,0 +1,8 @@
+@@ -0,0 +1,7 @@
+
+local co = coroutine.create(function()
+ local x = nil
@@ -183979,13 +183916,12 @@ index 0000000..2676d2c
+end)
+assert(coroutine.resume(co) == false)
+debug.traceback(co)
-+
diff --git a/test/misc/coro_yield.lua b/test/misc/coro_yield.lua
new file mode 100644
-index 0000000..ae3206e
+index 00000000..602ba7d5
--- /dev/null
+++ b/test/misc/coro_yield.lua
-@@ -0,0 +1,111 @@
+@@ -0,0 +1,110 @@
+local create = coroutine.create
+local wrap = coroutine.wrap
+local resume = coroutine.resume
@@ -184096,10 +184032,9 @@ index 0000000..ae3206e
+ end,
+ 42)
+end
-+
diff --git a/test/misc/debug_gc.lua b/test/misc/debug_gc.lua
new file mode 100644
-index 0000000..30fb2b9
+index 00000000..30fb2b99
--- /dev/null
+++ b/test/misc/debug_gc.lua
@@ -0,0 +1,47 @@
@@ -184152,10 +184087,10 @@ index 0000000..30fb2b9
+caught = "end"
diff --git a/test/misc/dualnum.lua b/test/misc/dualnum.lua
new file mode 100644
-index 0000000..5f1288c
+index 00000000..059bf21d
--- /dev/null
+++ b/test/misc/dualnum.lua
-@@ -0,0 +1,47 @@
+@@ -0,0 +1,46 @@
+
+-- Positive overflow
+do
@@ -184202,13 +184137,12 @@ index 0000000..5f1288c
+ assert(fmax(-1, -3) == -1)
+ assert(fmax(-3, -1) == -1)
+end
-+
diff --git a/test/misc/for_dir.lua b/test/misc/for_dir.lua
new file mode 100644
-index 0000000..4dd38de
+index 00000000..3146df13
--- /dev/null
+++ b/test/misc/for_dir.lua
-@@ -0,0 +1,13 @@
+@@ -0,0 +1,12 @@
+
+local a,b,c = 10,1,-1
+for i=1,20 do
@@ -184221,13 +184155,12 @@ index 0000000..4dd38de
+ for i=a,b,c do for j=1,10 do end x=x+1 end
+ assert(x == 10)
+end
-+
diff --git a/test/misc/fori_coerce.lua b/test/misc/fori_coerce.lua
new file mode 100644
-index 0000000..7330943
+index 00000000..03dc37cc
--- /dev/null
+++ b/test/misc/fori_coerce.lua
-@@ -0,0 +1,33 @@
+@@ -0,0 +1,32 @@
+
+do
+ local n = 1
@@ -184260,13 +184193,12 @@ index 0000000..7330943
+ end
+ assert(not pcall(f))
+end
-+
diff --git a/test/misc/gc_rechain.lua b/test/misc/gc_rechain.lua
new file mode 100644
-index 0000000..285f408
+index 00000000..c98fa5af
--- /dev/null
+++ b/test/misc/gc_rechain.lua
-@@ -0,0 +1,32 @@
+@@ -0,0 +1,31 @@
+
+do
+ local k
@@ -184298,13 +184230,12 @@ index 0000000..285f408
+
+ assert(t[k] == 4)
+end
-+
diff --git a/test/misc/gc_trace.lua b/test/misc/gc_trace.lua
new file mode 100644
-index 0000000..bc38ce0
+index 00000000..e394bd49
--- /dev/null
+++ b/test/misc/gc_trace.lua
-@@ -0,0 +1,37 @@
+@@ -0,0 +1,36 @@
+
+if not jit or not jit.status or not jit.status() then return end
+
@@ -184341,13 +184272,12 @@ index 0000000..bc38ce0
+ end
+ jit.attach(reccb)
+end
-+
diff --git a/test/misc/gcstep.lua b/test/misc/gcstep.lua
new file mode 100644
-index 0000000..533356b
+index 00000000..7ee5565b
--- /dev/null
+++ b/test/misc/gcstep.lua
-@@ -0,0 +1,33 @@
+@@ -0,0 +1,32 @@
+
+local function testgc(what, func)
+ collectgarbage()
@@ -184380,13 +184310,12 @@ index 0000000..533356b
+ local s = "x"..i
+ end
+end)
-+
diff --git a/test/misc/hook_active.lua b/test/misc/hook_active.lua
new file mode 100644
-index 0000000..37dfc37
+index 00000000..57532568
--- /dev/null
+++ b/test/misc/hook_active.lua
-@@ -0,0 +1,95 @@
+@@ -0,0 +1,94 @@
+local ctest = require("ctest")
+
+local called = 0
@@ -184481,13 +184410,12 @@ index 0000000..37dfc37
+called = 2
+do local x = 1 end
+assert(called == 2)
-+
diff --git a/test/misc/hook_line.lua b/test/misc/hook_line.lua
new file mode 100644
-index 0000000..36f7108
+index 00000000..6106e492
--- /dev/null
+++ b/test/misc/hook_line.lua
-@@ -0,0 +1,41 @@
+@@ -0,0 +1,40 @@
+local lines = {}
+local function hook()
+ lines[#lines+1] = debug.getinfo(2).currentline
@@ -184528,13 +184456,12 @@ index 0000000..36f7108
+f()
+debug.sethook(nil, "", 0)
+for i=1,#lines do assert(lines[i] ~= 36) end
-+
diff --git a/test/misc/hook_norecord.lua b/test/misc/hook_norecord.lua
new file mode 100644
-index 0000000..8e7cba0
+index 00000000..004f3a3a
--- /dev/null
+++ b/test/misc/hook_norecord.lua
-@@ -0,0 +1,12 @@
+@@ -0,0 +1,11 @@
+
+if not jit or not jit.status or not jit.status() then return end
+
@@ -184546,13 +184473,12 @@ index 0000000..8e7cba0
+assert(called)
+-- Check that no trace was generated.
+assert(require("jit.util").traceinfo(1) == nil)
-+
diff --git a/test/misc/hook_record.lua b/test/misc/hook_record.lua
new file mode 100644
-index 0000000..6f1646d
+index 00000000..f4283d12
--- /dev/null
+++ b/test/misc/hook_record.lua
-@@ -0,0 +1,8 @@
+@@ -0,0 +1,7 @@
+
+if not jit or not jit.status or not jit.status() then return end
+
@@ -184560,13 +184486,12 @@ index 0000000..6f1646d
+for i=1,10 do end
+debug.sethook()
+assert((require("jit.util").traceinfo(1)))
-+
diff --git a/test/misc/hook_top.lua b/test/misc/hook_top.lua
new file mode 100644
-index 0000000..f809fce
+index 00000000..3cc7e651
--- /dev/null
+++ b/test/misc/hook_top.lua
-@@ -0,0 +1,55 @@
+@@ -0,0 +1,54 @@
+
+local t = {}
+for i=1,26 do t[i] = string.char(96+i) end
@@ -184621,13 +184546,12 @@ index 0000000..f809fce
+ assert(a == "bar")
+end
+foo5()
-+
diff --git a/test/misc/jit_flush.lua b/test/misc/jit_flush.lua
new file mode 100644
-index 0000000..ead1e4e
+index 00000000..fe1021ce
--- /dev/null
+++ b/test/misc/jit_flush.lua
-@@ -0,0 +1,50 @@
+@@ -0,0 +1,49 @@
+
+if not jit or not jit.status or not jit.status() then return end
+
@@ -184677,13 +184601,12 @@ index 0000000..ead1e4e
+jit.flush(2) -- ignored
+jit.flush(1) -- ok
+jit.flush(1) -- crashes
-+
diff --git a/test/misc/lightud.lua b/test/misc/lightud.lua
new file mode 100644
-index 0000000..4974d50
+index 00000000..261b106f
--- /dev/null
+++ b/test/misc/lightud.lua
-@@ -0,0 +1,88 @@
+@@ -0,0 +1,87 @@
+local ctest = require("ctest")
+
+local lightud = ctest.lightud
@@ -184771,13 +184694,12 @@ index 0000000..4974d50
+ end, t))
+ assert(x == 16110)
+end
-+
diff --git a/test/misc/loop_unroll.lua b/test/misc/loop_unroll.lua
new file mode 100644
-index 0000000..1700fac
+index 00000000..3dcb3d22
--- /dev/null
+++ b/test/misc/loop_unroll.lua
-@@ -0,0 +1,35 @@
+@@ -0,0 +1,34 @@
+
+-- type instability on loop unroll -> record unroll
+do
@@ -184812,10 +184734,9 @@ index 0000000..1700fac
+ j = j+1
+ until true
+end
-+
diff --git a/test/misc/parse_comp.lua b/test/misc/parse_comp.lua
new file mode 100644
-index 0000000..5e1948d
+index 00000000..5e1948da
--- /dev/null
+++ b/test/misc/parse_comp.lua
@@ -0,0 +1,13 @@
@@ -184834,7 +184755,7 @@ index 0000000..5e1948d
+end
diff --git a/test/misc/parse_esc.lua b/test/misc/parse_esc.lua
new file mode 100644
-index 0000000..4bcce0e
+index 00000000..4bcce0e8
--- /dev/null
+++ b/test/misc/parse_esc.lua
@@ -0,0 +1,7 @@
@@ -184847,10 +184768,10 @@ index 0000000..4bcce0e
+ def"]])() == "abc def")
diff --git a/test/misc/parse_misc.lua b/test/misc/parse_misc.lua
new file mode 100644
-index 0000000..8031ec1
+index 00000000..2c9949e3
--- /dev/null
+++ b/test/misc/parse_misc.lua
-@@ -0,0 +1,31 @@
+@@ -0,0 +1,30 @@
+
+-- Ambiguous syntax: function call vs. new statement.
+if os.getenv("LUA52") then
@@ -184881,13 +184802,12 @@ index 0000000..8031ec1
+assert(#"aäa" == 4)
+assert(#"äöü·€晶" == 14)
+]]))()
-+
diff --git a/test/misc/phi_conv.lua b/test/misc/phi_conv.lua
new file mode 100644
-index 0000000..8d7bea5
+index 00000000..0b7261c8
--- /dev/null
+++ b/test/misc/phi_conv.lua
-@@ -0,0 +1,53 @@
+@@ -0,0 +1,52 @@
+
+local bit = require("bit")
+
@@ -184940,13 +184860,12 @@ index 0000000..8d7bea5
+if jit and jit.status and jit.status() then jit.opt.start("hotloop=1") end
+
+test()
-+
diff --git a/test/misc/recurse_deep.lua b/test/misc/recurse_deep.lua
new file mode 100644
-index 0000000..9b9af29
+index 00000000..f18ff5cb
--- /dev/null
+++ b/test/misc/recurse_deep.lua
-@@ -0,0 +1,29 @@
+@@ -0,0 +1,28 @@
+
+do
+ local function sum(n)
@@ -184975,13 +184894,12 @@ index 0000000..9b9af29
+ end
+ assert(fib(15) == 987)
+end
-+
diff --git a/test/misc/recurse_tail.lua b/test/misc/recurse_tail.lua
new file mode 100644
-index 0000000..ef76443
+index 00000000..d6296e2b
--- /dev/null
+++ b/test/misc/recurse_tail.lua
-@@ -0,0 +1,22 @@
+@@ -0,0 +1,21 @@
+
+do
+ local tr1
@@ -185003,13 +184921,12 @@ index 0000000..ef76443
+ end
+ assert(tr2(200) == 0)
+end
-+
diff --git a/test/misc/stack_gc.lua b/test/misc/stack_gc.lua
new file mode 100644
-index 0000000..656a06a
+index 00000000..f212fec5
--- /dev/null
+++ b/test/misc/stack_gc.lua
-@@ -0,0 +1,15 @@
+@@ -0,0 +1,14 @@
+
+do
+ local t = setmetatable({}, { __index=function(t, k)
@@ -185024,13 +184941,12 @@ index 0000000..656a06a
+ end})
+ local x = t[50]
+end
-+
diff --git a/test/misc/stack_purge.lua b/test/misc/stack_purge.lua
new file mode 100644
-index 0000000..bfaee0f
+index 00000000..de53dea4
--- /dev/null
+++ b/test/misc/stack_purge.lua
-@@ -0,0 +1,25 @@
+@@ -0,0 +1,24 @@
+
+-- Must preserve the modified function slot in the RET snapshot.
+local function a()
@@ -185055,13 +184971,12 @@ index 0000000..bfaee0f
+
+jit.off(c)
+c()
-+
diff --git a/test/misc/stackov.lua b/test/misc/stackov.lua
new file mode 100644
-index 0000000..ef105af
+index 00000000..65c68d95
--- /dev/null
+++ b/test/misc/stackov.lua
-@@ -0,0 +1,40 @@
+@@ -0,0 +1,39 @@
+
+local function f()
+ f()
@@ -185101,10 +185016,9 @@ index 0000000..ef105af
+
+local err, s = xpcall(vcall, debug.traceback, 1)
+assert(err == false)
-+
diff --git a/test/misc/stackovc.lua b/test/misc/stackovc.lua
new file mode 100644
-index 0000000..c00bcbd
+index 00000000..c00bcbd8
--- /dev/null
+++ b/test/misc/stackovc.lua
@@ -0,0 +1,4 @@
@@ -185114,10 +185028,10 @@ index 0000000..c00bcbd
+assert(not ok and string.find(err, "unpack"))
diff --git a/test/misc/tcall_base.lua b/test/misc/tcall_base.lua
new file mode 100644
-index 0000000..c6c4ae1
+index 00000000..52882519
--- /dev/null
+++ b/test/misc/tcall_base.lua
-@@ -0,0 +1,20 @@
+@@ -0,0 +1,19 @@
+
+local r = 0
+local function g()
@@ -185137,10 +185051,9 @@ index 0000000..c6c4ae1
+g() -- Compile this loop first.
+for i=1,50 do f() end
+assert(r == 51)
-+
diff --git a/test/misc/tcall_loop.lua b/test/misc/tcall_loop.lua
new file mode 100644
-index 0000000..d3c6f1a
+index 00000000..d3c6f1a6
--- /dev/null
+++ b/test/misc/tcall_loop.lua
@@ -0,0 +1,8 @@
@@ -185154,10 +185067,10 @@ index 0000000..d3c6f1a
+assert(x == 100)
diff --git a/test/misc/tonumber_scan.lua b/test/misc/tonumber_scan.lua
new file mode 100644
-index 0000000..78e1ca3
+index 00000000..a4f51cee
--- /dev/null
+++ b/test/misc/tonumber_scan.lua
-@@ -0,0 +1,180 @@
+@@ -0,0 +1,179 @@
+local ffi = require("ffi")
+local bit = require("bit")
+
@@ -185337,13 +185250,12 @@ index 0000000..78e1ca3
+-- print(" "..tohex64(u.x)..",
\""..s.."\",")
+ end
+end
-+
diff --git a/test/misc/uclo.lua b/test/misc/uclo.lua
new file mode 100644
-index 0000000..bd9bd24
+index 00000000..6b36127e
--- /dev/null
+++ b/test/misc/uclo.lua
-@@ -0,0 +1,91 @@
+@@ -0,0 +1,90 @@
+
+local function test_for()
+ local z1, z2
@@ -185434,13 +185346,12 @@ index 0000000..bd9bd24
+ x = 2
+ assert(f() == 200)
+end
-+
diff --git a/test/misc/unordered_jit.lua b/test/misc/unordered_jit.lua
new file mode 100644
-index 0000000..5ff1a1b
+index 00000000..78ce72d3
--- /dev/null
+++ b/test/misc/unordered_jit.lua
-@@ -0,0 +1,96 @@
+@@ -0,0 +1,95 @@
+
+local nan = 0/0
+local t = {}
@@ -185536,10 +185447,9 @@ index 0000000..5ff1a1b
+do local z; for i=1,100 do z = not (nan >= nan) end; assert(z == true) end
+do local z; for i=1,100 do z = not (nan >= 1) end; assert(z == true) end
+do local z; for i=1,100 do z = not (1 >= nan) end; assert(z == true) end
-+
diff --git a/test/misc/wbarrier.lua b/test/misc/wbarrier.lua
new file mode 100644
-index 0000000..5536625
+index 00000000..5536625a
--- /dev/null
+++ b/test/misc/wbarrier.lua
@@ -0,0 +1,7 @@
@@ -185552,10 +185462,10 @@ index 0000000..5536625
+end
diff --git a/test/misc/wbarrier_jit.lua b/test/misc/wbarrier_jit.lua
new file mode 100644
-index 0000000..2c8dd7f
+index 00000000..bf1fc1e7
--- /dev/null
+++ b/test/misc/wbarrier_jit.lua
-@@ -0,0 +1,18 @@
+@@ -0,0 +1,17 @@
+
+do
+ local t = {[0]={}}
@@ -185573,13 +185483,12 @@ index 0000000..2c8dd7f
+ end
+ f()
+end
-+
diff --git a/test/misc/wbarrier_obar.lua b/test/misc/wbarrier_obar.lua
new file mode 100644
-index 0000000..258db21
+index 00000000..d6504995
--- /dev/null
+++ b/test/misc/wbarrier_obar.lua
-@@ -0,0 +1,22 @@
+@@ -0,0 +1,21 @@
+-- DSE of USTORE must eliminate OBAR, too.
+
+if jit and jit.opt then pcall(jit.opt.start, "-sink") end
@@ -185601,10 +185510,9 @@ index 0000000..258db21
+collectgarbage("setstepmul", 1)
+collectgarbage("restart")
+f()
-+
diff --git a/test/opt/dse/array.lua b/test/opt/dse/array.lua
new file mode 100644
-index 0000000..8c76624
+index 00000000..8c766248
--- /dev/null
+++ b/test/opt/dse/array.lua
@@ -0,0 +1,197 @@
@@ -185807,7 +185715,7 @@ index 0000000..8c76624
+end
diff --git a/test/opt/dse/field.lua b/test/opt/dse/field.lua
new file mode 100644
-index 0000000..d8a5411
+index 00000000..d8a5411c
--- /dev/null
+++ b/test/opt/dse/field.lua
@@ -0,0 +1,70 @@
@@ -185883,7 +185791,7 @@ index 0000000..d8a5411
+end
diff --git a/test/opt/dse/index b/test/opt/dse/index
new file mode 100644
-index 0000000..7b8ad1f
+index 00000000..7b8ad1f4
--- /dev/null
+++ b/test/opt/dse/index
@@ -0,0 +1,2 @@
@@ -185891,14 +185799,14 @@ index 0000000..7b8ad1f
+field.lua
diff --git a/test/opt/fold/index b/test/opt/fold/index
new file mode 100644
-index 0000000..8b4648c
+index 00000000..8b4648c7
--- /dev/null
+++ b/test/opt/fold/index
@@ -0,0 +1 @@
+kfold.lua
diff --git a/test/opt/fold/kfold.lua b/test/opt/fold/kfold.lua
new file mode 100644
-index 0000000..9cd3919
+index 00000000..9cd39190
--- /dev/null
+++ b/test/opt/fold/kfold.lua
@@ -0,0 +1,81 @@
@@ -185985,7 +185893,7 @@ index 0000000..9cd3919
+end
diff --git a/test/opt/fuse.lua b/test/opt/fuse.lua
new file mode 100644
-index 0000000..a68381e
+index 00000000..a68381ef
--- /dev/null
+++ b/test/opt/fuse.lua
@@ -0,0 +1,5 @@
@@ -185996,7 +185904,7 @@ index 0000000..a68381e
+end
diff --git a/test/opt/fwd/hrefk_rollback.lua b/test/opt/fwd/hrefk_rollback.lua
new file mode 100644
-index 0000000..5a6ad87
+index 00000000..5a6ad876
--- /dev/null
+++ b/test/opt/fwd/hrefk_rollback.lua
@@ -0,0 +1,32 @@
@@ -186034,7 +185942,7 @@ index 0000000..5a6ad87
+end
diff --git a/test/opt/fwd/index b/test/opt/fwd/index
new file mode 100644
-index 0000000..5bb1537
+index 00000000..5bb1537f
--- /dev/null
+++ b/test/opt/fwd/index
@@ -0,0 +1,3 @@
@@ -186043,7 +185951,7 @@ index 0000000..5bb1537
+upval.lua
diff --git a/test/opt/fwd/tnew_tdup.lua b/test/opt/fwd/tnew_tdup.lua
new file mode 100644
-index 0000000..9e18fa3
+index 00000000..9e18fa3b
--- /dev/null
+++ b/test/opt/fwd/tnew_tdup.lua
@@ -0,0 +1,69 @@
@@ -186118,7 +186026,7 @@ index 0000000..9e18fa3
+end
diff --git a/test/opt/fwd/upval.lua b/test/opt/fwd/upval.lua
new file mode 100644
-index 0000000..a3e83df
+index 00000000..a3e83dff
--- /dev/null
+++ b/test/opt/fwd/upval.lua
@@ -0,0 +1,50 @@
@@ -186174,7 +186082,7 @@ index 0000000..a3e83df
+end
diff --git a/test/opt/index b/test/opt/index
new file mode 100644
-index 0000000..94d50ae
+index 00000000..94d50aec
--- /dev/null
+++ b/test/opt/index
@@ -0,0 +1,6 @@
@@ -186186,14 +186094,14 @@ index 0000000..94d50ae
+sink +sink
diff --git a/test/opt/loop/index b/test/opt/loop/index
new file mode 100644
-index 0000000..e582023
+index 00000000..e5820234
--- /dev/null
+++ b/test/opt/loop/index
@@ -0,0 +1 @@
+unroll.lua
diff --git a/test/opt/loop/unroll.lua b/test/opt/loop/unroll.lua
new file mode 100644
-index 0000000..6fbd565
+index 00000000..6fbd565a
--- /dev/null
+++ b/test/opt/loop/unroll.lua
@@ -0,0 +1,32 @@
@@ -186231,7 +186139,7 @@ index 0000000..6fbd565
+end
diff --git a/test/opt/sink/alloc.lua b/test/opt/sink/alloc.lua
new file mode 100644
-index 0000000..bb2a0f7
+index 00000000..bb2a0f72
--- /dev/null
+++ b/test/opt/sink/alloc.lua
@@ -0,0 +1,126 @@
@@ -186363,7 +186271,7 @@ index 0000000..bb2a0f7
+end
diff --git a/test/opt/sink/ffi.lua b/test/opt/sink/ffi.lua
new file mode 100644
-index 0000000..0bba097
+index 00000000..0bba0978
--- /dev/null
+++ b/test/opt/sink/ffi.lua
@@ -0,0 +1,121 @@
@@ -186490,7 +186398,7 @@ index 0000000..0bba097
+end
diff --git a/test/opt/sink/ffi_nosink.lua b/test/opt/sink/ffi_nosink.lua
new file mode 100644
-index 0000000..8f7cced
+index 00000000..8f7cced2
--- /dev/null
+++ b/test/opt/sink/ffi_nosink.lua
@@ -0,0 +1,45 @@
@@ -186541,7 +186449,7 @@ index 0000000..8f7cced
+end
diff --git a/test/opt/sink/index b/test/opt/sink/index
new file mode 100644
-index 0000000..8bfa370
+index 00000000..8bfa370e
--- /dev/null
+++ b/test/opt/sink/index
@@ -0,0 +1,4 @@
@@ -186551,7 +186459,7 @@ index 0000000..8bfa370
+ffi_nosink.lua +ffi
diff --git a/test/opt/sink/nosink.lua b/test/opt/sink/nosink.lua
new file mode 100644
-index 0000000..762aace
+index 00000000..762aaced
--- /dev/null
+++ b/test/opt/sink/nosink.lua
@@ -0,0 +1,109 @@
@@ -186666,7 +186574,7 @@ index 0000000..762aace
+end
diff --git a/test/src/cpptest.cpp b/test/src/cpptest.cpp
new file mode 100644
-index 0000000..a5893ed
+index 00000000..a5893ed6
--- /dev/null
+++ b/test/src/cpptest.cpp
@@ -0,0 +1,129 @@
@@ -186801,7 +186709,7 @@ index 0000000..a5893ed
+}
diff --git a/test/src/ctest.c b/test/src/ctest.c
new file mode 100644
-index 0000000..d257567
+index 00000000..d257567b
--- /dev/null
+++ b/test/src/ctest.c
@@ -0,0 +1,339 @@
@@ -187146,10 +187054,10 @@ index 0000000..d257567
+}
diff --git a/test/sysdep/catch_cpp.lua b/test/sysdep/catch_cpp.lua
new file mode 100644
-index 0000000..b225100
+index 00000000..f2cfca64
--- /dev/null
+++ b/test/sysdep/catch_cpp.lua
-@@ -0,0 +1,71 @@
+@@ -0,0 +1,70 @@
+
+local cp = require("cpptest")
+
@@ -187220,10 +187128,9 @@ index 0000000..b225100
+ local a,b,c,d,e,f = cp.usereg(100, 50, foo, false)
+ assert(a==164 and b==312 and c==428 and d==3696 and e==404 and f==404)
+end
-+
diff --git a/test/sysdep/ffi_include_gtk.lua b/test/sysdep/ffi_include_gtk.lua
new file mode 100644
-index 0000000..a4bfcea
+index 00000000..a4bfceac
--- /dev/null
+++ b/test/sysdep/ffi_include_gtk.lua
@@ -0,0 +1,9 @@
@@ -187238,10 +187145,10 @@ index 0000000..a4bfcea
+include"/usr/include/gtk-2.0/gtk/gtk.h"
diff --git a/test/sysdep/ffi_include_std.lua b/test/sysdep/ffi_include_std.lua
new file mode 100644
-index 0000000..b88c82b
+index 00000000..5ef1affa
--- /dev/null
+++ b/test/sysdep/ffi_include_std.lua
-@@ -0,0 +1,36 @@
+@@ -0,0 +1,35 @@
+local ffi = require("ffi")
+
+dofile("../common/ffi_util.inc")
@@ -187277,13 +187184,12 @@ index 0000000..b88c82b
+ os.remove("/tmp/__tmp.c")
+ ffi.cdef(s)
+end
-+
diff --git a/test/sysdep/ffi_lib_c.lua b/test/sysdep/ffi_lib_c.lua
new file mode 100644
-index 0000000..a368d75
+index 00000000..f0ff0ad9
--- /dev/null
+++ b/test/sysdep/ffi_lib_c.lua
-@@ -0,0 +1,87 @@
+@@ -0,0 +1,86 @@
+local ffi = require("ffi")
+
+ffi.cdef[[
@@ -187370,13 +187276,12 @@ index 0000000..a368d75
+ ffi.load("pthread")
+ end
+end
-+
diff --git a/test/sysdep/ffi_lib_z.lua b/test/sysdep/ffi_lib_z.lua
new file mode 100644
-index 0000000..69a19ae
+index 00000000..91b1272d
--- /dev/null
+++ b/test/sysdep/ffi_lib_z.lua
-@@ -0,0 +1,107 @@
+@@ -0,0 +1,106 @@
+local ffi = require("ffi")
+
+local compress, uncompress
@@ -187483,10 +187388,9 @@ index 0000000..69a19ae
+assert(2*#c < #txt)
+local txt2 = uncompress(c, #txt)
+assert(txt2 == txt)
-+
diff --git a/test/test.lua b/test/test.lua
new file mode 100644
-index 0000000..b064eff
+index 00000000..f5131ba2
--- /dev/null
+++ b/test/test.lua
@@ -0,0 +1,416 @@
@@ -187501,7 +187405,7 @@ index 0000000..b064eff
+
+local function default_tags()
+ local tags = {}
-+
++
+ -- Lua version and features
+ tags.lua = tonumber(_VERSION:match"%d+%.%d+")
+ if table.pack then
@@ -187535,7 +187439,7 @@ index 0000000..b064eff
+ tags[flag:lower()] = true
+ end
+ end
-+
++
+ -- Environment
+ if dirsep == "\\" then
+ tags.windows = true
@@ -187557,7 +187461,7 @@ index 0000000..b064eff
+ tags["abi".. (bytecode:byte(9, 9) * 8)] = true
+ end
+ end
-+
++
+ return tags
+end
+
@@ -187594,7 +187498,7 @@ index 0000000..b064eff
+ want_meta = want_meta,
+ }
+ local result = opts
-+
++
+ local i, tlen = 1, #t
+ local joinedval = ""
+ local function flagval()
@@ -187609,7 +187513,7 @@ index 0000000..b064eff
+ end
+ return val
+ end
-+
++
+ while i <= tlen do
+ local arg = t[i]
+ i = i + 1
@@ -187800,7 +187704,7 @@ index 0000000..b064eff
+ end
+ end
+ seal(_G)
-+
++
+ if getmetatable(package.loaded) == sealed_mt then
+ setmetatable(package.loaded, nil)
+ end
@@ -187908,7 +187812,7 @@ index 0000000..b064eff
+end
diff --git a/test/trace/exit_frame.lua b/test/trace/exit_frame.lua
new file mode 100644
-index 0000000..9537c56
+index 00000000..9537c563
--- /dev/null
+++ b/test/trace/exit_frame.lua
@@ -0,0 +1,79 @@
@@ -187993,7 +187897,7 @@ index 0000000..9537c56
+end
diff --git a/test/trace/exit_growstack.lua b/test/trace/exit_growstack.lua
new file mode 100644
-index 0000000..658a31a
+index 00000000..4aa6fae8
--- /dev/null
+++ b/test/trace/exit_growstack.lua
@@ -0,0 +1,28 @@
@@ -188002,7 +187906,7 @@ index 0000000..658a31a
+ local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a;
+ local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a;
+ local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a;
-+ if i==90 then return end
++ if i==90 then return end
+ end
+ for j=1,5 do
+ collectgarbage() -- Shrink stack.
@@ -188027,7 +187931,7 @@ index 0000000..658a31a
+end
diff --git a/test/trace/exit_jfuncf.lua b/test/trace/exit_jfuncf.lua
new file mode 100644
-index 0000000..67ad7c3
+index 00000000..67ad7c36
--- /dev/null
+++ b/test/trace/exit_jfuncf.lua
@@ -0,0 +1,30 @@
@@ -188063,7 +187967,7 @@ index 0000000..67ad7c3
+end
diff --git a/test/trace/gc64_slot_revival.lua b/test/trace/gc64_slot_revival.lua
new file mode 100644
-index 0000000..40b9d87
+index 00000000..40b9d871
--- /dev/null
+++ b/test/trace/gc64_slot_revival.lua
@@ -0,0 +1,18 @@
@@ -188087,7 +187991,7 @@ index 0000000..40b9d87
+end
diff --git a/test/trace/index b/test/trace/index
new file mode 100644
-index 0000000..ea7a22e
+index 00000000..ea7a22e0
--- /dev/null
+++ b/test/trace/index
@@ -0,0 +1,7 @@
@@ -188100,7 +188004,7 @@ index 0000000..ea7a22e
+stitch.lua
diff --git a/test/trace/phi/copyspill.lua b/test/trace/phi/copyspill.lua
new file mode 100644
-index 0000000..17a8698
+index 00000000..17a8698f
--- /dev/null
+++ b/test/trace/phi/copyspill.lua
@@ -0,0 +1,53 @@
@@ -188159,7 +188063,7 @@ index 0000000..17a8698
+end
diff --git a/test/trace/phi/index b/test/trace/phi/index
new file mode 100644
-index 0000000..74a0733
+index 00000000..74a07333
--- /dev/null
+++ b/test/trace/phi/index
@@ -0,0 +1,3 @@
@@ -188168,7 +188072,7 @@ index 0000000..74a0733
+rotate.lua
diff --git a/test/trace/phi/ref.lua b/test/trace/phi/ref.lua
new file mode 100644
-index 0000000..3662912
+index 00000000..3662912d
--- /dev/null
+++ b/test/trace/phi/ref.lua
@@ -0,0 +1,131 @@
@@ -188305,7 +188209,7 @@ index 0000000..3662912
+end
diff --git a/test/trace/phi/rotate.lua b/test/trace/phi/rotate.lua
new file mode 100644
-index 0000000..cb751e0
+index 00000000..cb751e0b
--- /dev/null
+++ b/test/trace/phi/rotate.lua
@@ -0,0 +1,149 @@
@@ -188460,7 +188364,7 @@ index 0000000..cb751e0
+end
diff --git a/test/trace/snap.lua b/test/trace/snap.lua
new file mode 100644
-index 0000000..ba26326
+index 00000000..ba26326e
--- /dev/null
+++ b/test/trace/snap.lua
@@ -0,0 +1,47 @@
@@ -188513,7 +188417,7 @@ index 0000000..ba26326
+end
diff --git a/test/trace/stitch.lua b/test/trace/stitch.lua
new file mode 100644
-index 0000000..3f7f973
+index 00000000..3f7f9734
--- /dev/null
+++ b/test/trace/stitch.lua
@@ -0,0 +1,19 @@
@@ -188538,7 +188442,7 @@ index 0000000..3f7f973
+end
diff --git a/test/unportable/ffi_arith_int64.lua b/test/unportable/ffi_arith_int64.lua
new file mode 100644
-index 0000000..c05e02a
+index 00000000..c05e02a9
--- /dev/null
+++ b/test/unportable/ffi_arith_int64.lua
@@ -0,0 +1,68 @@
@@ -188612,10 +188516,10 @@ index 0000000..c05e02a
+{471871,702627,720692,1385612,1803393,1171039,1772007,763817,1583994,4486762,2380423,566647,1265370,2319256,770581,1990479,4566660,2319835,566647,1265370,2319256,770581,1990479,4566660,2319835,830322,4833809,4644705,1071753,2822313,7709069,4647021,})
diff --git a/test/unportable/math_special.lua b/test/unportable/math_special.lua
new file mode 100644
-index 0000000..4916101
+index 00000000..ef4de129
--- /dev/null
+++ b/test/unportable/math_special.lua
-@@ -0,0 +1,55 @@
+@@ -0,0 +1,54 @@
+
+local inp = { 0, -"0", 0.5, -0.5, 1, -1, 1/0, -1/0, 0/0 }
+
@@ -188670,7 +188574,443 @@ index 0000000..4916101
+
+-- Pointless: deg, rad, min, max, pow
+-- LATER: %, fmod, frexp, ldexp, modf, sinh, cosh, tanh
+--
+2.31.1
+
+
+From 6508eeb5c841344e3f128267a04e8150dd36f926 Mon Sep 17 00:00:00 2001
+From: Sameera Deshpande <sameera.deshpande(a)linaro.org>
+Date: Fri, 15 Feb 2019 07:46:16 +0530
+Subject: [PATCH 02/10] Add support for FNMADD and FNMSUB.
+
+---
+ src/lj_asm_arm64.h | 32 +++++++++++++++++++++++++++++++-
+ 1 file changed, 31 insertions(+), 1 deletion(-)
+
+diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
+index 67c53ee2..0e913fa5 100644
+--- a/src/lj_asm_arm64.h
++++ b/src/lj_asm_arm64.h
+@@ -353,6 +353,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins
air)
+ return 0;
+ }
+
++/* Fuse FP neg-multiply-add/sub. */
++static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
++{
++ IRRef ref = ir->op1;
++ IRIns *irn = IR(ref);
++ if (irn->o != IR_ADD && irn->o != IR_SUB)
++ return 0;
++
++ if (!mayfuse(as, ref))
++ return 0;
++
++ IRRef lref = irn->op1, rref = irn->op2;
++ IRIns *irm;
++ if (lref != rref &&
++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
++ ra_noreg(irm->r)) ||
++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
++ (rref = lref, ra_noreg(irm->r))))) {
++ Reg dest = ra_dest(as, ir, RSET_FPR);
++ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
++ Reg left = ra_alloc2(as, irm,
++ rset_exclude(rset_exclude(RSET_FPR, dest), add));
++ Reg right = (left >> 8); left &= 255;
++ emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31),
(right & 31), (add & 31));
++ return 1;
++ }
++ return 0;
++}
+
+ /* Fuse BAND + BSHL/BSHR into UBFM. */
+ static int asm_fuseandshift(ASMState *as, IRIns *ir)
+ {
+@@ -1466,7 +1495,8 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ static void asm_neg(ASMState *as, IRIns *ir)
+ {
+ if (irt_isnum(ir->t)) {
+- asm_fpunary(as, ir, A64I_FNEGd);
++ if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
++ asm_fpunary(as, ir, A64I_FNEGd);
+ return;
+ }
+ asm_intneg(as, ir);
+--
+2.31.1
+
+
+From aa0b2a0c837af307d26468fce05a7c24ab6045d3 Mon Sep 17 00:00:00 2001
+From: Vivien HENRIET <bubuabu(a)bubuabu.org>
+Date: Wed, 30 Jan 2019 23:44:51 +0100
+Subject: [PATCH 03/10] Fix os.date() for timezone change awareness
+
+On POSIX target, system timezone change are not taken into account.
+To reproduce,
+1. call os.date()
+2. change your timezone
+3. call os.date() within the same luajit instance
+
+On POSIX target, os.date use localtime_r to retrieve time.
+On other target, the function localtime is used. But there is a behaviour
+diference between these two function. localtime acts as if it called tzset
+which localtime_r don't.
+
+To fix the issue tzset is called before localtime_r.
+---
+ src/lib_os.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/lib_os.c b/src/lib_os.c
+index f19b831c..609cb2ec 100644
+--- a/src/lib_os.c
++++ b/src/lib_os.c
+@@ -185,6 +185,7 @@ LJLIB_CF(os_date)
+ #endif
+ } else {
+ #if LJ_TARGET_POSIX
++ tzset();
+ stm = localtime_r(&t, &rtm);
+ #else
+ stm = localtime(&t);
+--
+2.31.1
+
+
+From a62dc6306c4e5a4b672040067c169143da804a4f Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Thu, 14 Mar 2019 23:08:24 +0530
+Subject: [PATCH 04/10] Revert "FFI: Make FP to U64 conversions match JIT
+ backend behavior."
+
+This reverts commit f5d424afe8b9395f0df05aba905e0e1f6a2262b8.
+
+The patch breaks test 279, i.e.
+
+ assert(tostring(bit.band(1ll, 1, 1ull, -1)) == "1ULL")
+
+The patch was put in to make the JIT and interpreter behaviour
+consistent[1] for float to unsigned int conversions but it ended up
+making things worse. There needs to be a better fix for this.
+
+[1]
https://github.com/LuaJIT/LuaJIT/pull/415
+---
+ src/lj_obj.h | 18 +++++-------------
+ 1 file changed, 5 insertions(+), 13 deletions(-)
+
+diff --git a/src/lj_obj.h b/src/lj_obj.h
+index 1a6445fc..97885683 100644
+--- a/src/lj_obj.h
++++ b/src/lj_obj.h
+@@ -995,22 +995,14 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
+
+ #define lj_num2int(n) ((int32_t)(n))
+
+-/*
+-** This must match the JIT backend behavior. In particular for archs
+-** that don't have a common hardware instruction for this conversion.
+-** Note that signed FP to unsigned int conversions have an undefined
+-** result and should never be relied upon in portable FFI code.
+-** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
+-*/
+ static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
+ {
+-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
+- int64_t i = (int64_t)n;
+- if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
+- return (uint64_t)i;
+-#else
+- return (uint64_t)n;
++#ifdef _MSC_VER
++ if (n >= 9223372036854775808.0) /* They think it's a feature. */
++ return (uint64_t)(int64_t)(n - 18446744073709551616.0);
++ else
+ #endif
++ return (uint64_t)n;
+ }
+
+ static LJ_AINLINE int32_t numberVint(cTValue *o)
+--
+2.31.1
+
+
+From fe2399a76bab67b32409fda1de82c34e8d5d7904 Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Sun, 17 Mar 2019 11:34:04 +0530
+Subject: [PATCH 05/10] Guard against undefined behaviour when casting from
+ float to unsigned
+
+Only range (-1.0, UINT64_MAX) can be safely converted to unsigned
+directly, and (-INT64_MAX,INT_64_MAX) through a cast to int64_t first.
+The remaining range is undefined.
+
+TODO: Do the same for JIT as well as for float to other ranges.
+---
+ src/lj_obj.h | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/src/lj_obj.h b/src/lj_obj.h
+index 97885683..9878059f 100644
+--- a/src/lj_obj.h
++++ b/src/lj_obj.h
+@@ -997,12 +997,18 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
+
+ static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
+ {
++ /* Undefined behaviour. This is deliberately not a full check because we
++ don't want to slow down compliant code. */
++ lj_assertX(n >= -9223372036854775809.0, "Overflow");
+ #ifdef _MSC_VER
+ if (n >= 9223372036854775808.0) /* They think it's a feature. */
+ return (uint64_t)(int64_t)(n - 18446744073709551616.0);
+ else
+ #endif
+- return (uint64_t)n;
++ if (n > -1.0)
++ return (uint64_t)n;
++ else
++ return (uint64_t)(int64_t)n;
+ }
+
+ static LJ_AINLINE int32_t numberVint(cTValue *o)
+--
+2.31.1
+
+
+From c193115e16a138dac69f774a7f57a5b4cc7f1097 Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Mon, 25 Mar 2019 17:56:53 +0530
+Subject: [PATCH 06/10] Fix build erro with fnmsub fusing
+
+---
+ src/lj_asm_arm64.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
+index 0e913fa5..4c7bf401 100644
+--- a/src/lj_asm_arm64.h
++++ b/src/lj_asm_arm64.h
+@@ -1495,7 +1495,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ static void asm_neg(ASMState *as, IRIns *ir)
+ {
+ if (irt_isnum(ir->t)) {
+- if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
++ if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd))
+ asm_fpunary(as, ir, A64I_FNEGd);
+ return;
+ }
+--
+2.31.1
+
+
+From b5c2492406bf07ab80e134f351c1066e8f6224f2 Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Thu, 28 Mar 2019 09:19:34 +0530
+Subject: [PATCH 07/10] aarch64: better float to unsigned int conversion
+
+A straight float to unsigned conversion has a limited range of (-1.0,
+UTYPE_MAX) which should be fine in general but for the sake of
+consistency across the interpreter and the JIT compiler, it is
+necessary to work a wee bit harder to expand this range to (TYPE_MIN,
+UTYPE_MAX), which can be done with a simple range check. This adds a
+couple of branches but only one of the branches should have a
+noticeable performance impact on most processors with branch
+predictors, and that too only if the input number varies wildly in
+range.
+
+This currently works only for 64-bit conversions, 32-bit is still WIP.
+---
+ src/lj_asm_arm64.h | 30 ++++++++++++++++++++++--------
+ src/lj_target_arm64.h | 1 +
+ 2 files changed, 23 insertions(+), 8 deletions(-)
+
+diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
+index 4c7bf401..e7e744a8 100644
+--- a/src/lj_asm_arm64.h
++++ b/src/lj_asm_arm64.h
+@@ -626,14 +626,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+- A64Ins ai = irt_is64(ir->t) ?
+- (st == IRT_NUM ?
+- (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
+- (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
+- (st == IRT_NUM ?
+- (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
+- (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
+- emit_dn(as, ai, dest, (left & 31));
++
++ A64Ins ai_signed = st == IRT_NUM ?
++ (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) :
++ (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32);
++
++ if (irt_isi64(ir->t) || irt_isint(ir->t))
++ emit_dn(as, ai_signed, dest, (left & 31));
++ else {
++ A64Ins ai_unsigned = st == IRT_NUM ?
++ (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) :
++ (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32);
++
++ MCLabel l_done = emit_label(as);
++ emit_dn(as, ai_unsigned, dest, (left & 31));
++ MCLabel l_signed = emit_label(as);
++ emit_jmp(as, l_done);
++ emit_dn(as, ai_signed, dest, (left & 31));
++ /* The valid range for float to unsigned int conversion is (-1.0,
++ UINT{,64}_MAX-1), but we just compare with 0 to save a load. */
++ emit_cond_branch(as, CC_PL, l_signed);
++ emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0);
++ }
+ }
+ } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer.
*/
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
+index 6d39ffb8..370699d9 100644
+--- a/src/lj_target_arm64.h
++++ b/src/lj_target_arm64.h
+@@ -283,6 +283,7 @@ typedef enum A64Ins {
+ A64I_STPs = 0x2d000000,
+ A64I_STPd = 0x6d000000,
+ A64I_FCMPd = 0x1e602000,
++ A64I_FCMPZs = 0x1e202008,
+ A64I_FCMPZd = 0x1e602008,
+ A64I_FCSELd = 0x1e600c00,
+ A64I_FRINTMd = 0x1e654000,
+--
+2.31.1
+
+
+From bd79b1d4596ed6780470c8d02f77b8398d80cd3a Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Thu, 28 Mar 2019 10:50:23 +0530
+Subject: [PATCH 08/10] Better behaviour for float to uint32_t conversions
+
+This is the uint32_t part of the float to unsigned int conversions for
+the interpreter. The cast ends up working correctly for x86 but not
+for aarch64 since fcvtzu sets the result to zero on negative inputs.
+Work slightly harder to make sure that negative number inputs behave
+like x86.
+
+This fixes the interpreter but not the JIT compiler, which errors out
+during the narrowing pass.
+---
+ src/lj_cconv.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/src/lj_cconv.c b/src/lj_cconv.c
+index 613f66e2..7e8a8b92 100644
+--- a/src/lj_cconv.c
++++ b/src/lj_cconv.c
+@@ -203,7 +203,13 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
+ else if (dsize == 2) *(int16_t *)dp = (int16_t)i;
+ else *(int8_t *)dp = (int8_t)i;
+ } else if (dsize == 4) {
+- *(uint32_t *)dp = (uint32_t)n;
++ /* Undefined behaviour. This is deliberately not a full check because we
++ * don't want to slow down compliant code. */
++ lj_assertX(n >= -2147483649.0, "Overflow");
++ if (n > -1.0)
++ *(uint32_t *)dp = (uint32_t)n;
++ else
++ *(uint32_t *)dp = (uint32_t)(int32_t)n;
+ } else if (dsize == 8) {
+ if (!(dinfo & CTF_UNSIGNED))
+ *(int64_t *)dp = (int64_t)n;
+--
+2.31.1
+
+
+From a1636c6e1879b5eeb55a51ebba796501c93614dd Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh(a)sourceware.org>
+Date: Mon, 17 Jun 2019 13:50:57 +0530
+Subject: [PATCH 09/10] test: Check for package.searchers only in compat5.2
+
+LuaJIT version check for lua will return true for +lua<5.2 since it
+does not fully implement 5.2. Move the (not package.searchers) check
+to +compat5.2 instead of the version check since it is implemented by
+compat5.2.
+---
+ test/lib/contents.lua | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/test/lib/contents.lua b/test/lib/contents.lua
+index 2baacd5c..09866f6f 100644
+--- a/test/lib/contents.lua
++++ b/test/lib/contents.lua
+@@ -121,10 +121,13 @@ end
+
+ do --- pre-5.2 package +lua<5.2
+ assert(package.loaders)
+- assert(not package.searchers)
+ assert(package.seeall)
+ end
+
++do --- 5.2 compat package +compat5.2
++ assert(package.searchers)
++end
++
+ do --- 5.2 package +lua>=5.2
+ assert(not package.loaders)
+ assert(package.searchers)
+--
+2.31.1
+
+
+From ec04137a0873c09eef216b32f3df3b66209f47d5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej(a)sury.org>
+Date: Thu, 19 Nov 2015 16:29:02 +0200
+Subject: [PATCH 10/10] Get rid of LUAJIT_VERSION_SYM that changes ABI on every
+ patch release
+
+---
+ src/lj_dispatch.c | 5 -----
+ src/luajit.c | 2 --
+ src/luajit.h | 3 ---
+ 3 files changed, 10 deletions(-)
+
+diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
+index 7b73d3dd..3f69d0d1 100644
+--- a/src/lj_dispatch.c
++++ b/src/lj_dispatch.c
+@@ -318,11 +318,6 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
+ return 1; /* OK. */
+ }
+
+-/* Enforce (dynamic) linker error for version mismatches. See luajit.c. */
+-LUA_API void LUAJIT_VERSION_SYM(void)
+-{
+-}
+-
+ /* -- Hooks --------------------------------------------------------------- */
+
+ /* This function can be called asynchronously (e.g. during a signal). */
+diff --git a/src/luajit.c b/src/luajit.c
+index 6aed5337..a4a11cb1 100644
+--- a/src/luajit.c
++++ b/src/luajit.c
+@@ -518,8 +518,6 @@ static int pmain(lua_State *L)
+ globalL = L;
+ if (argv[0] && argv[0][0]) progname = argv[0];
+
+- LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
+-
+ argn = collectargs(argv, &flags);
+ if (argn < 0) { /* Invalid args? */
+ print_usage();
+diff --git a/src/luajit.h b/src/luajit.h
+index 2ee1f908..04f6b456 100644
+--- a/src/luajit.h
++++ b/src/luajit.h
+@@ -73,7 +73,4 @@ LUA_API void luaJIT_profile_stop(lua_State *L);
+ LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
+ int depth, size_t *len);
+
+-/* Enforce (dynamic) linker error for version mismatches. Call from main. */
+-LUA_API void LUAJIT_VERSION_SYM(void);
+-
+ #endif
--
-2.20.1
+2.31.1
diff --git a/luajit-2.1-update.patch b/luajit-2.1-update.patch
new file mode 100644
index 0000000..93fa207
--- /dev/null
+++ b/luajit-2.1-update.patch
@@ -0,0 +1,37572 @@
+diff --git a/COPYRIGHT b/COPYRIGHT
+index 6ed40025..9c2bca55 100644
+--- a/COPYRIGHT
++++ b/COPYRIGHT
+@@ -1,7 +1,7 @@
+ ===============================================================================
+-LuaJIT -- a Just-In-Time Compiler for Lua.
http://luajit.org/
++LuaJIT -- a Just-In-Time Compiler for Lua.
https://luajit.org/
+
+-Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+@@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+
+-[ MIT license:
http://www.opensource.org/licenses/mit-license.php ]
++[ MIT license:
https://www.opensource.org/licenses/mit-license.php ]
+
+ ===============================================================================
+ [ LuaJIT includes code from Lua 5.1/5.2, which has this license statement: ]
+@@ -51,6 +51,6 @@ THE SOFTWARE.
+
+ This is a version (aka dlmalloc) of malloc/free/realloc written by
+ Doug Lea and released to the public domain, as explained at
+-http://creativecommons.org/licenses/publicdomain
++https://creativecommons.org/licenses/publicdomain
+
+ ===============================================================================
+diff --git a/Makefile b/Makefile
+index 0f933089..aa1b84bd 100644
+--- a/Makefile
++++ b/Makefile
+@@ -10,7 +10,7 @@
+ # For MSVC, please follow the instructions given in src/msvcbuild.bat.
+ # For MinGW and Cygwin, cd to src and run make with the Makefile there.
+ #
+-# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++# Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ ##############################################################################
+
+ MAJVER= 2
+@@ -75,7 +75,7 @@ SYMLINK= ln -sf
+ INSTALL_X= install -m 0755
+ INSTALL_F= install -m 0644
+ UNINSTALL= $(RM)
+-LDCONFIG= ldconfig -n
++LDCONFIG= ldconfig -n 2>/dev/null
+ SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \
+ -e "s|^multilib=.*|multilib=$(MULTILIB)|"
+
+@@ -121,7 +121,7 @@ install: $(INSTALL_DEP)
+ $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
+ cd src && test -f $(FILE_SO) && \
+ $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
+- $(LDCONFIG) $(INSTALL_LIB) && \
++ ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \
+ $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \
+ $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || :
+ cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN)
+diff --git a/README b/README
+index 2b9ae9d2..c9f7d9ad 100644
+--- a/README
++++ b/README
+@@ -3,9 +3,9 @@ README for LuaJIT 2.1.0-beta3
+
+ LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
+
+-Project Homepage:
http://luajit.org/
++Project Homepage:
https://luajit.org/
+
+-LuaJIT is Copyright (C) 2005-2017 Mike Pall.
++LuaJIT is Copyright (C) 2005-2021 Mike Pall.
+ LuaJIT is free software, released under the MIT license.
+ See full Copyright Notice in the COPYRIGHT file or in luajit.h.
+
+diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css
+index 62e1c165..0b385cee 100644
+--- a/doc/bluequad-print.css
++++ b/doc/bluequad-print.css
+@@ -1,4 +1,4 @@
+-/* Copyright (C) 2004-2017 Mike Pall.
++/* Copyright (C) 2004-2021 Mike Pall.
+ *
+ * You are welcome to use the general ideas of this design for your own sites.
+ * But please do not steal the stylesheet, the layout or the color scheme.
+diff --git a/doc/bluequad.css b/doc/bluequad.css
+index be2c4bf2..86cd9ac0 100644
+--- a/doc/bluequad.css
++++ b/doc/bluequad.css
+@@ -1,4 +1,4 @@
+-/* Copyright (C) 2004-2017 Mike Pall.
++/* Copyright (C) 2004-2021 Mike Pall.
+ *
+ * You are welcome to use the general ideas of this design for your own sites.
+ * But please do not steal the stylesheet, the layout or the color scheme.
+diff --git a/doc/changes.html b/doc/changes.html
+deleted file mode 100644
+index a66a8d95..00000000
+--- a/doc/changes.html
++++ /dev/null
+@@ -1,883 +0,0 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
+-<html>
+-<head>
+-<title>LuaJIT Change History</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
+-<meta name="Language" content="en">
+-<link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+-<link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+-<style type="text/css">
+-div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
+-</style>
+-</head>
+-<body>
+-<div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+-</div>
+-<div id="head">
+-<h1>LuaJIT Change History</h1>
+-</div>
+-<div id="nav">
+-<ul><li>
+-<a href="luajit.html">LuaJIT</a>
+-<ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
+-</li><li>
+-<a href="install.html">Installation</a>
+-</li><li>
+-<a href="running.html">Running</a>
+-</li></ul>
+-</li><li>
+-<a href="extensions.html">Extensions</a>
+-<ul><li>
+-<a href="ext_ffi.html">FFI Library</a>
+-<ul><li>
+-<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+-</li><li>
+-<a href="ext_ffi_api.html">ffi.* API</a>
+-</li><li>
+-<a href="ext_ffi_semantics.html">FFI Semantics</a>
+-</li></ul>
+-</li><li>
+-<a href="ext_jit.html">jit.* Library</a>
+-</li><li>
+-<a href="ext_c_api.html">Lua/C API</a>
+-</li><li>
+-<a href="ext_profiler.html">Profiler</a>
+-</li></ul>
+-</li><li>
+-<a href="status.html">Status</a>
+-<ul><li>
+-<a class="current" href="changes.html">Changes</a>
+-</li></ul>
+-</li><li>
+-<a href="faq.html">FAQ</a>
+-</li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+-<a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+-</li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+-</li></ul>
+-</div>
+-<div id="main">
+-<p>
+-This is a list of changes between the released versions of LuaJIT.<br>
+-The current <span style="color: #0000c0;">stable version</span> is
<strong>LuaJIT 2.0.5</strong>.<br>
+-</p>
+-<p>
+-Please check the
+-<a
href="http://luajit.org/changes.html"><span
class="ext">»</span> Online Change
History</a>
+-to see whether newer versions are available.
+-</p>
+-
+-<div class="major" style="background: #d0d0ff;">
+-<h2 id="LuaJIT-2.1.0-beta3">LuaJIT 2.1.0-beta3 —
2017-05-01</h2>
+-<ul>
+-<li>Rewrite memory block allocator.</li>
+-<li>Add various extension from Lua 5.2/5.3.</li>
+-<li>Remove old Lua 5.0 compatibility defines.</li>
+-<li>Set arg table before evaluating <tt>LUA_INIT</tt> and
<tt>-e</tt> chunks.</li>
+-<li>Fix FOLD rules for <tt>math.abs()</tt> and FP
negation.</li>
+-<li>Fix soft-float <tt>math.abs()</tt> and negation.</li>
+-<li>Fix formatting of some small denormals at low precision.</li>
+-<li>LJ_GC64: Add JIT compiler support.</li>
+-<li>x64/LJ_GC64: Add JIT compiler backend.</li>
+-<li>x86/x64: Generate BMI2 shifts and rotates, if available.</li>
+-<li>Windows/x86: Add full exception interoperability.</li>
+-<li>ARM64: Add big-endian support.</li>
+-<li>ARM64: Add JIT compiler backend.</li>
+-<li>MIPS: Fix <tt>TSETR</tt> barrier.</li>
+-<li>MIPS: Support MIPS16 interlinking.</li>
+-<li>MIPS soft-float: Fix code generation for
<tt>HREF</tt>.</li>
+-<li>MIPS64: Add MIPS64 hard-float JIT compiler backend.</li>
+-<li>MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.</li>
+-<li>FFI: Compile bitfield loads/stores.</li>
+-<li>Various fixes common with the 2.0 branch.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 —
2016-03-03</h2>
+-<ul>
+-<li>Enable trace stitching.</li>
+-<li>Use internal implementation for converting FP numbers to strings.</li>
+-<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string
literals.</li>
+-<li>Add MIPS soft-float support.</li>
+-<li>Switch MIPS port to dual-number mode.</li>
+-<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li>
+-<li>FFI: Add <tt>ssize_t</tt> declaration.</li>
+-<li>FFI: Parse <tt>#line NN</tt> and
<tt>#NN</tt>.</li>
+-<li>Various minor fixes.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 —
2015-08-25</h2>
+-<p>
+-This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0.
+-Please take a look at the commit history for more details.
+-</p>
+-<ul>
+-<li>Changes to the VM core:
+-<ul>
+-<li>Add low-overhead profiler (<tt>-jp</tt>).</li>
+-<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47
bit). Interpreter-only for now.</li>
+-<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by
<tt>LJ_GC64</tt> mode.</li>
+-<li>Add <tt>table.new()</tt> and
<tt>table.clear()</tt>.</li>
+-<li>Parse binary number literals (<tt>0bxxx</tt>).</li>
+-</ul></li>
+-<li>Improvements to the JIT compiler:
+-<ul>
+-<li>Add trace stitching (disabled for now).</li>
+-<li>Compile various builtins: <tt>string.char()</tt>,
<tt>string.reverse()</tt>, <tt>string.lower()</tt>,
<tt>string.upper()</tt>, <tt>string.rep()</tt>,
<tt>string.format()</tt>, <tt>table.concat()</tt>,
<tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>,
<tt>debug.getmetatable()</tt>.</li>
+-<li>Compile <tt>string.find()</tt> for fixed string searches (no
patterns).</li>
+-<li>Compile <tt>BC_TSETM</tt>, e.g.
<tt>{1,2,3,f()}</tt>.</li>
+-<li>Compile string concatenations (<tt>BC_CAT</tt>).</li>
+-<li>Compile <tt>__concat</tt> metamethod.</li>
+-<li>Various minor optimizations.</li>
+-</ul></li>
+-<li>Internal Changes:
+-<ul>
+-<li>Add support for embedding LuaJIT bytecode for builtins.</li>
+-<li>Replace various builtins with embedded bytecode.</li>
+-<li>Refactor string buffers and string formatting.</li>
+-<li>Remove obsolete non-truncating number to integer conversions.</li>
+-</ul></li>
+-<li>Ports:
+-<ul>
+-<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li>
+-<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt>
mode).</li>
+-<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt>
mode.</li>
+-<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li>
+-<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li>
+-<li>PPC/e500: Drop support for this architecture.</li>
+-</ul></li>
+-<li>FFI library:
+-<ul>
+-<li>FFI: Add 64 bit bitwise operations.</li>
+-<li>FFI: Compile VLA/VLS and large cdata allocations with default
initialization.</li>
+-<li>FFI: Compile conversions from functions to function pointers.</li>
+-<li>FFI: Compile lightuserdata to <tt>void *</tt>
conversion.</li>
+-<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li>
+-<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li>
+-</ul></li>
+-</ul>
+-</div>
+-
+-<div class="major" style="background: #ffffd0;">
+-<h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 — 2017-05-01</h2>
+-<ul>
+-<li>Add workaround for MSVC 2015 stdio changes.</li>
+-<li>Limit mcode alloc probing, depending on the available pool size.</li>
+-<li>Fix overly restrictive range calculation in mcode allocation.</li>
+-<li>Fix out-of-scope goto handling in parser.</li>
+-<li>Remove internal <tt>__mode = "K"</tt> and replace with
safe check.</li>
+-<li>Add "proto" field to
<tt>jit.util.funcinfo()</tt>.</li>
+-<li>Fix GC step size calculation.</li>
+-<li>Initialize <tt>uv->immutable</tt> for upvalues of loaded
chunks.</li>
+-<li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li>
+-<li>Drop leftover regs in 'for' iterator assignment, too.</li>
+-<li>Fix PHI remarking in SINK pass.</li>
+-<li>Don't try to record outermost <tt>pcall()</tt> return to lower
frame.</li>
+-<li>Add guard for obscure aliasing between open upvalues and SSA
slots.</li>
+-<li>Remove assumption that <tt>lj_math_random_step()</tt> doesn't
clobber FPRs.</li>
+-<li>Fix handling of non-numeric strings in arithmetic coercions.</li>
+-<li>Fix recording of <tt>select(n, ...)</tt> with off-trace
varargs</li>
+-<li>Fix install for cross-builds.</li>
+-<li>Don't allocate unused 2nd result register in JIT compiler
backend.</li>
+-<li>Drop marks from replayed instructions when sinking.</li>
+-<li>Fix unsinking check.</li>
+-<li>Properly handle OOM in <tt>trace_save()</tt>.</li>
+-<li>Limit number of arguments given to <tt>io.lines()</tt> and
<tt>fp:lines()</tt>.</li>
+-<li>Fix narrowing of <tt>TOBIT</tt>.</li>
+-<li>OSX: Fix build with recent XCode.</li>
+-<li>x86/x64: Don't spill an explicit <tt>REF_BASE</tt> in the
IR.</li>
+-<li>x86/x64: Fix instruction length decoder.</li>
+-<li>x86/x64: Search for exit jumps with instruction length decoder.</li>
+-<li>ARM: Fix <tt>BLX</tt> encoding for Thumb interworking
calls.</li>
+-<li>MIPS: Don't use <tt>RID_GP</tt> as a scratch
register.</li>
+-<li>MIPS: Fix emitted code for U32 to float conversion.</li>
+-<li>MIPS: Backport workaround for compact unwind tables.</li>
+-<li>MIPS: Fix cross-endian jit.bcsave.</li>
+-<li>MIPS: Fix <tt>BC_ISNEXT</tt> fallback path.</li>
+-<li>MIPS: Fix use of ffgccheck delay slots in interpreter.</li>
+-<li>FFI: Fix FOLD rules for <tt>int64_t</tt> comparisons.</li>
+-<li>FFI: Fix SPLIT pass for <tt>CONV i64.u64</tt>.</li>
+-<li>FFI: Fix <tt>ipairs()</tt> recording.</li>
+-<li>FFI: Don't propagate qualifiers into subtypes of complex.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.4">LuaJIT 2.0.4 — 2015-05-14</h2>
+-<ul>
+-<li>Fix stack check in narrowing optimization.</li>
+-<li>Fix Lua/C API typecheck error for special indexes.</li>
+-<li>Fix string to number conversion.</li>
+-<li>Fix lexer error for chunks without tokens.</li>
+-<li>Don't compile <tt>IR_RETF</tt> after
<tt>CALLT</tt> to ff with-side effects.</li>
+-<li>Fix <tt>BC_UCLO</tt>/<tt>BC_JMP</tt> join optimization
in Lua parser.</li>
+-<li>Fix corner case in string to number conversion.</li>
+-<li>Gracefully handle <tt>lua_error()</tt> for a suspended
coroutine.</li>
+-<li>Avoid error messages when building with Clang.</li>
+-<li>Fix snapshot #0 handling for traces with a stack check on entry.</li>
+-<li>Fix fused constant loads under high register pressure.</li>
+-<li>Invalidate backpropagation cache after DCE.</li>
+-<li>Fix ABC elimination.</li>
+-<li>Fix debug info for main chunk of stripped bytecode.</li>
+-<li>Fix FOLD rule for <tt>string.sub(s, ...) == k</tt>.</li>
+-<li>Fix FOLD rule for <tt>STRREF</tt> of
<tt>SNEW</tt>.</li>
+-<li>Fix frame traversal while searching for error function.</li>
+-<li>Prevent GC estimate miscalculation due to buffer growth.</li>
+-<li>Prevent adding side traces for stack checks.</li>
+-<li>Fix top slot calculation for snapshots with continuations.</li>
+-<li>Fix check for reuse of SCEV results in <tt>FORL</tt>.</li>
+-<li>Add PS Vita port.</li>
+-<li>Fix compatibility issues with Illumos.</li>
+-<li>Fix DragonFly build (unsupported).</li>
+-<li>OpenBSD/x86: Better executable memory allocation for W^X mode.</li>
+-<li>x86: Fix argument checks for <tt>ipairs()</tt>
iterator.</li>
+-<li>x86: <tt>lj_math_random_step()</tt> clobbers XMM regs on OSX
Clang.</li>
+-<li>x86: Fix code generation for unused result of
<tt>math.random()</tt>.</li>
+-<li>x64: Allow building with <tt>LUAJIT_USE_SYSMALLOC</tt> and
<tt>LUAJIT_USE_VALGRIND</tt>.</li>
+-<li>x86/x64: Fix argument check for bit shifts.</li>
+-<li>x86/x64: Fix code generation for fused test/arith ops.</li>
+-<li>ARM: Fix write barrier check in <tt>BC_USETS</tt>.</li>
+-<li>PPC: Fix red zone overflow in machine code generation.</li>
+-<li>PPC: Don't use <tt>mcrxr</tt> on PPE.</li>
+-<li>Various archs: Fix excess stack growth in interpreter.</li>
+-<li>FFI: Fix FOLD rule for <tt>TOBIT</tt> + <tt>CONV
num.u32</tt>.</li>
+-<li>FFI: Prevent DSE across <tt>ffi.string()</tt>.</li>
+-<li>FFI: No meta fallback when indexing pointer to incomplete struct.</li>
+-<li>FFI: Fix initialization of unions of subtypes.</li>
+-<li>FFI: Fix cdata vs. non-cdata arithmetic and comparisons.</li>
+-<li>FFI: Fix <tt>__index</tt>/<tt>__newindex</tt>
metamethod resolution for ctypes.</li>
+-<li>FFI: Fix compilation of reference field access.</li>
+-<li>FFI: Fix frame traversal for backtraces with FFI callbacks.</li>
+-<li>FFI: Fix recording of indexing a struct pointer ctype object
itself.</li>
+-<li>FFI: Allow non-scalar cdata to be compared for equality by
address.</li>
+-<li>FFI: Fix pseudo type conversions for type punning.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.3">LuaJIT 2.0.3 — 2014-03-12</h2>
+-<ul>
+-<li>Add PS4 port.</li>
+-<li>Add support for multilib distro builds.</li>
+-<li>Fix OSX build.</li>
+-<li>Fix MinGW build.</li>
+-<li>Fix Xbox 360 build.</li>
+-<li>Improve ULOAD forwarding for open upvalues.</li>
+-<li>Fix GC steps threshold handling when called by JIT-compiled code.</li>
+-<li>Fix argument checks for <tt>math.deg()</tt> and
<tt>math.rad()</tt>.</li>
+-<li>Fix <tt>jit.flush(func|true)</tt>.</li>
+-<li>Respect <tt>jit.off(func)</tt> when returning to a function,
too.</li>
+-<li>Fix compilation of <tt>string.byte(s, nil, n)</tt>.</li>
+-<li>Fix line number for relocated bytecode after closure fixup</li>
+-<li>Fix frame traversal for backtraces.</li>
+-<li>Fix ABC elimination.</li>
+-<li>Fix handling of redundant PHIs.</li>
+-<li>Fix snapshot restore for exit to function header.</li>
+-<li>Fix type punning alias analysis for constified pointers</li>
+-<li>Fix call unroll checks in the presence of metamethod frames.</li>
+-<li>Fix initial maxslot for down-recursive traces.</li>
+-<li>Prevent BASE register coalescing if parent uses
<tt>IR_RETF</tt>.</li>
+-<li>Don't purge modified function from stack slots in
<tt>BC_RET</tt>.</li>
+-<li>Fix recording of <tt>BC_VARG</tt>.</li>
+-<li>Don't access dangling reference to reallocated IR.</li>
+-<li>Fix frame depth display for bytecode dump in
<tt>-jdump</tt>.</li>
+-<li>ARM: Fix register allocation when rematerializing FPRs.</li>
+-<li>x64: Fix store to upvalue for lightuserdata values.</li>
+-<li>FFI: Add missing GC steps for callback argument conversions.</li>
+-<li>FFI: Properly unload loaded DLLs.</li>
+-<li>FFI: Fix argument checks for <tt>ffi.string()</tt>.</li>
+-<li>FFI/x64: Fix passing of vector arguments to calls.</li>
+-<li>FFI: Rehash finalizer table after GC cycle, if needed.</li>
+-<li>FFI: Fix <tt>cts->L</tt> for cdata unsinking in snapshot
restore.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.2">LuaJIT 2.0.2 — 2013-06-03</h2>
+-<ul>
+-<li>Fix memory access check for fast string interning.</li>
+-<li>Fix MSVC intrinsics for older versions.</li>
+-<li>Add missing GC steps for <tt>io.*</tt> functions.</li>
+-<li>Fix spurious red zone overflows in machine code generation.</li>
+-<li>Fix jump-range constrained mcode allocation.</li>
+-<li>Inhibit DSE for implicit loads via calls.</li>
+-<li>Fix builtin string to number conversion for overflow digits.</li>
+-<li>Fix optional argument handling while recording builtins.</li>
+-<li>Fix optional argument handling in
<tt>table.concat()</tt>.</li>
+-<li>Add partial support for building with MingW64 GCC 4.8-SEH.</li>
+-<li>Add missing PHI barrier to <tt>string.sub(str, a, b) == kstr</tt>
FOLD rule.</li>
+-<li>Fix compatibility issues with Illumos.</li>
+-<li>ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.</li>
+-<li>MIPS: Fix cache flush/sync for JIT-compiled code jump area.</li>
+-<li>PPC: Add <tt>plt</tt> suffix for external calls from assembler
code.</li>
+-<li>FFI: Fix snapshot substitution in SPLIT pass.</li>
+-<li>FFI/x86: Fix register allocation for 64 bit comparisons.</li>
+-<li>FFI: Fix tailcall in lowest frame to C function with bool
result.</li>
+-<li>FFI: Ignore <tt>long</tt> type specifier in
<tt>ffi.istype()</tt>.</li>
+-<li>FFI: Fix calling conventions for 32 bit OSX and iOS simulator (struct
returns).</li>
+-<li>FFI: Fix calling conventions for ARM hard-float EABI (nested
structs).</li>
+-<li>FFI: Improve error messages for arithmetic and comparison
operators.</li>
+-<li>FFI: Insert no-op type conversion for pointer to integer cast.</li>
+-<li>FFI: Fix unroll limit for <tt>ffi.fill()</tt>.</li>
+-<li>FFI: Must sink <tt>XBAR</tt> together with
<tt>XSTORE</tt>s.</li>
+-<li>FFI: Preserve intermediate string for
<tt>const char *</tt> conversion.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.1">LuaJIT 2.0.1 — 2013-02-19</h2>
+-<ul>
+-<li>Don't clear frame for out-of-memory error.</li>
+-<li>Leave hook when resume catches error thrown from hook.</li>
+-<li>Add missing GC steps for template table creation.</li>
+-<li>Fix discharge order of comparisons in Lua parser.</li>
+-<li>Improve buffer handling for <tt>io.read()</tt>.</li>
+-<li>OSX: Add support for Mach-O object files to <tt>-b</tt>
option.</li>
+-<li>Fix PS3 port.</li>
+-<li>Fix/enable Xbox 360 port.</li>
+-<li>x86/x64: Always mark ref for shift count as non-weak.</li>
+-<li>x64: Don't fuse implicitly 32-to-64 extended operands.</li>
+-<li>ARM: Fix armhf call argument handling.</li>
+-<li>ARM: Fix code generation for integer math.min/math.max.</li>
+-<li>PPC/e500: Fix <tt>lj_vm_floor()</tt> for Inf/NaN.</li>
+-<li>FFI: Change priority of table initializer variants for structs.</li>
+-<li>FFI: Fix code generation for bool call result check on x86/x64.</li>
+-<li>FFI: Load FFI library on-demand for bytecode with cdata literals.</li>
+-<li>FFI: Fix handling of qualified transparent structs/unions.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0">LuaJIT 2.0.0 — 2012-11-08</h2>
+-<ul>
+-<li>Correctness and completeness:
+-<ul>
+- <li>Fix Android/x86 build.</li>
+- <li>Fix recording of equality comparisons with <tt>__eq</tt>
metamethods.</li>
+- <li>Fix detection of immutable upvalues.</li>
+- <li>Replace error with PANIC for callbacks from JIT-compiled code.</li>
+- <li>Fix builtin string to number conversion for
<tt>INT_MIN</tt>.</li>
+- <li>Don't create unneeded array part for template tables.</li>
+- <li>Fix <tt>CONV.num.int</tt> sinking.</li>
+- <li>Don't propagate implicitly widened number to index
metamethods.</li>
+- <li>ARM: Fix ordered comparisons of number vs. non-number.</li>
+- <li>FFI: Fix code generation for replay of sunk float fields.</li>
+- <li>FFI: Fix signedness of bool.</li>
+- <li>FFI: Fix recording of bool call result check on x86/x64.</li>
+- <li>FFI: Fix stack-adjustment for <tt>__thiscall</tt>
callbacks.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta11">LuaJIT 2.0.0-beta11 —
2012-10-16</h2>
+-<ul>
+-<li>New features:
+-<ul>
+- <li>Use ARM VFP instructions, if available (build-time detection).</li>
+- <li>Add support for ARM hard-float EABI
(<tt>armhf</tt>).</li>
+- <li>Add PS3 port.</li>
+- <li>Add many features from Lua 5.2, e.g.
<tt>goto</tt>/labels.
+- Refer to <a href="extensions.html#lua52">this
list</a>.</li>
+- <li>FFI: Add parameterized C types.</li>
+- <li>FFI: Add support for copy constructors.</li>
+- <li>FFI: Equality comparisons never raise an error (treat as unequal
instead).</li>
+- <li>FFI: Box all accessed or returned enums.</li>
+- <li>FFI: Check for <tt>__new</tt> metamethod when calling a
constructor.</li>
+- <li>FFI: Handle <tt>__pairs</tt>/<tt>__ipairs</tt>
metamethods for cdata objects.</li>
+- <li>FFI: Convert <tt>io.*</tt> file handle to <tt>FILE
*</tt> pointer (but as a <tt>void *</tt>).</li>
+- <li>FFI: Detect and support type punning through unions.</li>
+- <li>FFI: Improve various error messages.</li>
+-</ul></li>
+-<li>Build-system reorganization:
+-<ul>
+- <li>Reorganize directory layout:<br>
+- <tt>lib/*</tt> → <tt>src/jit/*</tt><br>
+- <tt>src/buildvm_*.dasc</tt> →
<tt>src/vm_*.dasc</tt><br>
+- <tt>src/buildvm_*.h</tt> → removed<br>
+- <tt>src/buildvm*</tt> →
<tt>src/host/*</tt></li>
+- <li>Add minified Lua interpreter plus Lua BitOp (<tt>minilua</tt>)
to run DynASM.</li>
+- <li>Change DynASM bit operations to use Lua BitOp</li>
+- <li>Translate only <tt>vm_*.dasc</tt> for detected target
architecture.</li>
+- <li>Improve target detection for <tt>msvcbuild.bat</tt>.</li>
+- <li>Fix build issues on Cygwin and MinGW with optional MSys.</li>
+- <li>Handle cross-compiles with FPU/no-FPU or hard-fp/soft-fp ABI
mismatch.</li>
+- <li>Remove some library functions for no-JIT/no-FFI builds.</li>
+- <li>Add uninstall target to top-level Makefile.</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+- <li>Preserve snapshot #0 PC for all traces.</li>
+- <li>Fix argument checks for <tt>coroutine.create()</tt>.</li>
+- <li>Command line prints version and JIT status to <tt>stdout</tt>,
not <tt>stderr</tt>.</li>
+- <li>Fix userdata <tt>__gc</tt> separations at Lua state
close.</li>
+- <li>Fix <tt>TDUP</tt> to <tt>HLOAD</tt> forwarding for
<tt>LJ_DUALNUM</tt> builds.</li>
+- <li>Fix buffer check in bytecode writer.</li>
+- <li>Make <tt>os.date()</tt> thread-safe.</li>
+- <li>Add missing declarations for MSVC intrinsics.</li>
+- <li>Fix dispatch table modifications for return hooks.</li>
+- <li>Workaround for MSVC conversion bug (<tt>double</tt> →
<tt>uint32_t</tt> → <tt>int32_t</tt>).</li>
+- <li>Fix FOLD rule <tt>(i-j)-i => 0-j</tt>.</li>
+- <li>Never use DWARF unwinder on Windows.</li>
+- <li>Fix shrinking of direct mapped blocks in builtin allocator.</li>
+- <li>Limit recursion depth in <tt>string.match()</tt> et
al.</li>
+- <li>Fix late despecialization of <tt>ITERN</tt> after loop has been
entered.</li>
+- <li>Fix <tt>'f'</tt> and <tt>'L'</tt>
options for <tt>debug.getinfo()</tt> and
<tt>lua_getinfo()</tt>.</li>
+- <li>Fix <tt>package.searchpath()</tt>.</li>
+- <li>OSX: Change dylib names to be consistent with other platforms.</li>
+- <li>Android: Workaround for broken
<tt>sprintf("%g", -0.0)</tt>.</li>
+- <li>x86: Remove support for ancient CPUs without <tt>CMOV</tt>
(before Pentium Pro).</li>
+- <li>x86: Fix register allocation for calls returning register pair.</li>
+- <li>x86/x64: Fix fusion of unsigned byte comparisons with swapped
operands.</li>
+- <li>ARM: Fix <tt>tonumber()</tt> argument check.</li>
+- <li>ARM: Fix modulo operator and
<tt>math.floor()</tt>/<tt>math.ceil()</tt> for
<tt>inf</tt>/<tt>nan</tt>.</li>
+- <li>ARM: Invoke SPLIT pass for leftover
<tt>IR_TOBIT</tt>.</li>
+- <li>ARM: Fix BASE register coalescing.</li>
+- <li>PPC: Fix interpreter state setup in callbacks.</li>
+- <li>PPC: Fix <tt>string.sub()</tt> range check.</li>
+- <li>MIPS: Support generation of MIPS/MIPSEL bytecode object files.</li>
+- <li>MIPS: Fix calls to
<tt>floor()</tt>/<tt>ceil()</tt><tt>/trunc()</tt>.</li>
+- <li>ARM/PPC: Detect more target architecture variants.</li>
+- <li>ARM/PPC/e500/MIPS: Fix tailcalls from fast functions, esp.
<tt>tostring()</tt>.</li>
+- <li>ARM/PPC/MIPS: Fix rematerialization of FP constants.</li>
+- <li>FFI: Don't call <tt>FreeLibrary()</tt> on our own
EXE/DLL.</li>
+- <li>FFI: Resolve metamethods for constructors, too.</li>
+- <li>FFI: Properly disable callbacks on iOS (would require executable
memory).</li>
+- <li>FFI: Fix cdecl string parsing during recording.</li>
+- <li>FFI: Show address pointed to for <tt>tostring(ref)</tt>,
too.</li>
+- <li>FFI: Fix alignment of C call argument/return structure.</li>
+- <li>FFI: Initialize all fields of standard types.</li>
+- <li>FFI: Fix callback handling when new C types are declared in
callback.</li>
+- <li>FFI: Fix recording of constructors for pointers.</li>
+- <li>FFI: Always resolve metamethods for pointers to structs.</li>
+- <li>FFI: Correctly propagate alignment when interning nested types.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+- <li>Add allocation sinking and store sinking optimization.</li>
+- <li>Constify immutable upvalues.</li>
+- <li>Add builtin string to integer or FP number conversion. Improves
cross-platform consistency and correctness.</li>
+- <li>Create string hash slots in template tables for non-const values, too.
Avoids later table resizes.</li>
+- <li>Eliminate <tt>HREFK</tt> guard for template table
references.</li>
+- <li>Add various new FOLD rules.</li>
+- <li>Don't use stack unwinding for <tt>lua_yield()</tt> (slow on
x64).</li>
+- <li>ARM, PPC, MIPS: Improve <tt>XLOAD</tt> operand fusion and
register hinting.</li>
+- <li>PPC, MIPS: Compile <tt>math.sqrt()</tt> to sqrt instruction, if
available.</li>
+- <li>FFI: Fold <tt>KPTR</tt> + constant offset in SPLIT
pass.</li>
+- <li>FFI: Optimize/inline <tt>ffi.copy()</tt> and
<tt>ffi.fill()</tt>.</li>
+- <li>FFI: Compile and optimize array/struct copies.</li>
+- <li>FFI: Compile <tt>ffi.typeof(cdata|ctype)</tt>,
<tt>ffi.sizeof()</tt>, <tt>ffi.alignof()</tt>,
<tt>ffi.offsetof()</tt> and <tt>ffi.gc()</tt>.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta10">LuaJIT 2.0.0-beta10 —
2012-05-09</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>The MIPS of LuaJIT is complete. It requires a CPU conforming to the
+-MIPS32 R1 architecture with hardware FPU. O32 hard-fp ABI,
+-little-endian or big-endian.</li>
+-<li>Auto-detect target arch via cross-compiler. No need for
+-<tt>TARGET=arch</tt> anymore.</li>
+-<li>Make DynASM compatible with Lua 5.2.</li>
+-<li>From Lua 5.2: Try <tt>__tostring</tt> metamethod on non-string
error
+-messages..</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix parsing of hex literals with exponents.</li>
+-<li>Fix bytecode dump for certain number constants.</li>
+-<li>Fix argument type in error message for relative arguments.</li>
+-<li>Fix argument error handling on Lua stacks without a frame.</li>
+-<li>Add missing mcode limit check in assembler backend.</li>
+-<li>Fix compilation on OpenBSD.</li>
+-<li>Avoid recursive GC steps after GC-triggered trace exit.</li>
+-<li>Replace <tt><unwind.h></tt> definitions with our
own.</li>
+-<li>Fix OSX build issues. Bump minimum required OSX version to 10.4.</li>
+-<li>Fix discharge order of comparisons in Lua parser.</li>
+-<li>Ensure running <tt>__gc</tt> of userdata created in
<tt>__gc</tt>
+-at state close.</li>
+-<li>Limit number of userdata <tt>__gc</tt> separations at state
close.</li>
+-<li>Fix bytecode <tt>JMP</tt> slot range when optimizing
+-<tt>and</tt>/<tt>or</tt> with constant LHS.</li>
+-<li>Fix DSE of <tt>USTORE</tt>.</li>
+-<li>Make <tt>lua_concat()</tt> work from C hook with partial
frame.</li>
+-<li>Add required PHIs for implicit conversions, e.g. via
<tt>XREF</tt>
+-forwarding.</li>
+-<li>Add more comparison variants to Valgrind suppressions file.</li>
+-<li>Disable loading bytecode with an extra header (BOM or
<tt>#!</tt>).</li>
+-<li>Fix PHI stack slot syncing.</li>
+-<li>ARM: Reorder type/value tests to silence Valgrind.</li>
+-<li>ARM: Fix register allocation for <tt>ldrd</tt>-optimized
+-<tt>HREFK</tt>.</li>
+-<li>ARM: Fix conditional branch fixup for <tt>OBAR</tt>.</li>
+-<li>ARM: Invoke SPLIT pass for <tt>double</tt> args in FFI
call.</li>
+-<li>ARM: Handle all <tt>CALL*</tt> ops with
<tt>double</tt> results in
+-SPLIT pass.</li>
+-<li>ARM: Fix rejoin of <tt>POW</tt> in SPLIT pass.</li>
+-<li>ARM: Fix compilation of <tt>math.sinh</tt>,
<tt>math.cosh</tt>,
+-<tt>math.tanh</tt>.</li>
+-<li>ARM, PPC: Avoid pointless arg clearing in
<tt>BC_IFUNCF</tt>.</li>
+-<li>PPC: Fix resume after yield from hook.</li>
+-<li>PPC: Fix argument checking for <tt>rawget()</tt>.</li>
+-<li>PPC: Fix fusion of floating-point
<tt>XLOAD</tt>/<tt>XSTORE</tt>.</li>
+-<li>PPC: Fix <tt>HREFK</tt> code generation for huge
tables.</li>
+-<li>PPC: Use builtin D-Cache/I-Cache sync code.</li>
+-</ul></li>
+-<li>FFI library:
+-<ul>
+-<li>Ignore empty statements in <tt>ffi.cdef()</tt>.</li>
+-<li>Ignore number parsing errors while skipping definitions.</li>
+-<li>Don't touch frame in callbacks with tailcalls to fast
functions.</li>
+-<li>Fix library unloading on POSIX systems.</li>
+-<li>Finalize cdata before userdata when closing the state.</li>
+-<li>Change <tt>ffi.load()</tt> library name resolution for
Cygwin.</li>
+-<li>Fix resolving of function name redirects on Windows/x86.</li>
+-<li>Fix symbol resolving error messages on Windows.</li>
+-<li>Fix blacklisting of C functions calling callbacks.</li>
+-<li>Fix result type of pointer difference.</li>
+-<li>Use correct PC in FFI metamethod error message.</li>
+-<li>Allow <tt>'typedef _Bool int BOOL;'</tt> for the Windows
API.</li>
+-<li>Don't record test for bool result of call, if ignored.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta9">LuaJIT 2.0.0-beta9 —
2011-12-14</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>PPC port of LuaJIT is complete. Default is the dual-number port
+-(usually faster). Single-number port selectable via <tt>src/Makefile</tt>
+-at build time.</li>
+-<li>Add FFI callback support.</li>
+-<li>Extend <tt>-b</tt> to generate <tt>.c</tt>,
<tt>.h</tt> or <tt>.obj/.o</tt>
+-files with embedded bytecode.</li>
+-<li>Allow loading embedded bytecode with
<tt>require()</tt>.</li>
+-<li>From Lua 5.2: Change to <tt>'\z'</tt> escape. Reject
undefined escape
+-sequences.</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix OSX 10.7 build. Fix <tt>install_name</tt> and versioning on
OSX.</li>
+-<li>Fix iOS build.</li>
+-<li>Install <tt>dis_arm.lua</tt>, too.</li>
+-<li>Mark installed shared library as executable.</li>
+-<li>Add debug option to <tt>msvcbuild.bat</tt> and improve error
handling.</li>
+-<li>Fix data-flow analysis for iterators.</li>
+-<li>Fix forced unwinding triggered by external unwinder.</li>
+-<li>Record missing <tt>for</tt> loop slot loads (return to lower
frame).</li>
+-<li>Always use ANSI variants of Windows system functions.</li>
+-<li>Fix GC barrier for multi-result table constructor
(<tt>TSETM</tt>).</li>
+-<li>Fix/add various FOLD rules.</li>
+-<li>Add potential PHI for number conversions due to type instability.</li>
+-<li>Do not eliminate PHIs only referenced from other PHIs.</li>
+-<li>Correctly anchor implicit number to string conversions in Lua/C
API.</li>
+-<li>Fix various stack limit checks.</li>
+-<li>x64: Use thread-safe exceptions for external unwinding (GCC
platforms).</li>
+-<li>x64: Fix result type of cdata index conversions.</li>
+-<li>x64: Fix <tt>math.random()</tt> and
<tt>bit.bswap()</tt> code generation.</li>
+-<li>x64: Fix <tt>lightuserdata</tt> comparisons.</li>
+-<li>x64: Always extend stack-passed arguments to pointer size.</li>
+-<li>ARM: Many fixes to code generation backend.</li>
+-<li>PPC/e500: Fix dispatch for binop metamethods.</li>
+-<li>PPC/e500: Save/restore condition registers when entering/leaving the
VM.</li>
+-<li>PPC/e500: Fix write barrier in stores of strings to upvalues.</li>
+-</ul></li>
+-<li>FFI library:
+-<ul>
+-<li>Fix C comment parsing.</li>
+-<li>Fix snapshot optimization for cdata comparisons.</li>
+-<li>Fix recording of const/enum lookups in namespaces.</li>
+-<li>Fix call argument and return handling for <tt>I8/U8/I16/U16</tt>
types.</li>
+-<li>Fix unfused loads of float fields.</li>
+-<li>Fix <tt>ffi.string()</tt> recording.</li>
+-<li>Save <tt>GetLastError()</tt> around
<tt>ffi.load()</tt> and symbol
+-resolving, too.</li>
+-<li>Improve ld script detection in <tt>ffi.load()</tt>.</li>
+-<li>Record loads/stores to external variables in namespaces.</li>
+-<li>Compile calls to stdcall, fastcall and vararg functions.</li>
+-<li>Treat function ctypes like pointers in comparisons.</li>
+-<li>Resolve <tt>__call</tt> metamethod for pointers, too.</li>
+-<li>Record C function calls with bool return values.</li>
+-<li>Record <tt>ffi.errno()</tt>.</li>
+-<li>x86: Fix number to <tt>uint32_t</tt> conversion
rounding.</li>
+-<li>x86: Fix 64 bit arithmetic in assembler backend.</li>
+-<li>x64: Fix struct-by-value calling conventions.</li>
+-<li>ARM: Ensure invocation of SPLIT pass for float conversions.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Display trace types with <tt>-jv</tt> and
<tt>-jdump</tt>.</li>
+-<li>Record isolated calls. But prefer recording loops over calls.</li>
+-<li>Specialize to prototype for non-monomorphic functions. Solves the
+-trace-explosion problem for closure-heavy programming styles.</li>
+-<li>Always generate a portable <tt>vmdef.lua</tt>. Easier for
distros.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta8">LuaJIT 2.0.0-beta8 —
2011-06-23</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>Soft-float ARM port of LuaJIT is complete.</li>
+-<li>Add support for bytecode loading/saving and <tt>-b</tt> command
line
+-option.</li>
+-<li>From Lua 5.2: <tt>__len</tt> metamethod for tables
+-(disabled by default).</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>ARM: Misc. fixes for interpreter.</li>
+-<li>x86/x64: Fix <tt>bit.*</tt> argument checking in
interpreter.</li>
+-<li>Catch early out-of-memory in memory allocator initialization.</li>
+-<li>Fix data-flow analysis for paths leading to an upvalue close.</li>
+-<li>Fix check for missing arguments in
<tt>string.format()</tt>.</li>
+-<li>Fix Solaris/x86 build (note: not a supported target).</li>
+-<li>Fix recording of loops with instable directions in side traces.</li>
+-<li>x86/x64: Fix fusion of comparisons with
<tt>u8</tt>/<tt>u16</tt>
+-<tt>XLOAD</tt>.</li>
+-<li>x86/x64: Fix register allocation for variable shifts.</li>
+-</ul></li>
+-<li>FFI library:
+-<ul>
+-<li>Add <tt>ffi.errno()</tt>. Save
<tt>errno</tt>/<tt>GetLastError()</tt>
+-around allocations etc.</li>
+-<li>Fix <tt>__gc</tt> for VLA/VLS cdata objects.</li>
+-<li>Fix recording of casts from 32 bit cdata pointers to integers.</li>
+-<li><tt>tonumber(cdata)</tt> returns <tt>nil</tt> for
non-numbers.</li>
+-<li>Show address pointed to for
<tt>tostring(pointer)</tt>.</li>
+-<li>Print <tt>NULL</tt> pointers as <tt>"cdata<...
*>: NULL"</tt>.</li>
+-<li>Support <tt>__tostring</tt> metamethod for pointers to structs,
too.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>More tuning for loop unrolling heuristics.</li>
+-<li>Flatten and compress in-memory debug info (saves ~70%).</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta7">LuaJIT 2.0.0-beta7 —
2011-05-05</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>ARM port of the LuaJIT interpreter is complete.</li>
+-<li>FFI library: Add <tt>ffi.gc()</tt>,
<tt>ffi.metatype()</tt>,
+-<tt>ffi.istype()</tt>.</li>
+-<li>FFI library: Resolve ld script redirection in
<tt>ffi.load()</tt>.</li>
+-<li>From Lua 5.2: <tt>package.searchpath()</tt>,
<tt>fp:read("*L")</tt>,
+-<tt>load(string)</tt>.</li>
+-<li>From Lua 5.2, disabled by default: empty statement,
+-<tt>table.unpack()</tt>, modified
<tt>coroutine.running()</tt>.</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>FFI library: numerous fixes.</li>
+-<li>Fix type mismatches in store-to-load forwarding.</li>
+-<li>Fix error handling within metamethods.</li>
+-<li>Fix <tt>table.maxn()</tt>.</li>
+-<li>Improve accuracy of <tt>x^-k</tt> on x64.</li>
+-<li>Fix code generation for Intel Atom in x64 mode.</li>
+-<li>Fix narrowing of POW.</li>
+-<li>Fix recording of retried fast functions.</li>
+-<li>Fix code generation for <tt>bit.bnot()</tt> and
multiplies.</li>
+-<li>Fix error location within cpcall frames.</li>
+-<li>Add workaround for old libgcc unwind bug.</li>
+-<li>Fix <tt>lua_yield()</tt> and
<tt>getmetatable(lightuserdata)</tt> on x64.</li>
+-<li>Misc. fixes for PPC/e500 interpreter.</li>
+-<li>Fix stack slot updates for down-recursion.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Add dual-number mode (int/double) for the VM. Enabled for ARM.</li>
+-<li>Improve narrowing of arithmetic operators and <tt>for</tt>
loops.</li>
+-<li>Tune loop unrolling heuristics and increase trace recorder limits.</li>
+-<li>Eliminate dead slots in snapshots using bytecode data-flow
analysis.</li>
+-<li>Avoid phantom stores to proxy tables.</li>
+-<li>Optimize lookups in empty proxy tables.</li>
+-<li>Improve bytecode optimization of
<tt>and</tt>/<tt>or</tt> operators.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta6">LuaJIT 2.0.0-beta6 —
2011-02-11</h2>
+-<ul>
+-<li>New features:
+-<ul>
+-<li>PowerPC/e500v2 port of the LuaJIT interpreter is complete.</li>
+-<li>Various minor features from Lua 5.2: Hex escapes in literals,
+-<tt>'\*'</tt> escape, reversible
<tt>string.format("%q",s)</tt>,
+-<tt>"%g"</tt> pattern, <tt>table.sort</tt> checks
callbacks,
+-<tt>os.exit(status|true|false[,close])</tt>.</li>
+-<li>Lua 5.2 <tt>__pairs</tt> and <tt>__ipairs</tt>
metamethods
+-(disabled by default).</li>
+-<li>Initial release of the FFI library.</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix <tt>string.format()</tt> for non-finite numbers.</li>
+-<li>Fix memory leak when compiled to use the built-in allocator.</li>
+-<li>x86/x64: Fix unnecessary resize in <tt>TSETM</tt>
bytecode.</li>
+-<li>Fix various GC issues with traces and
<tt>jit.flush()</tt>.</li>
+-<li>x64: Fix fusion of indexes for array references.</li>
+-<li>x86/x64: Fix stack overflow handling for coroutine results.</li>
+-<li>Enable low-2GB memory allocation on FreeBSD/x64.</li>
+-<li>Fix <tt>collectgarbage("count")</tt> result if more than
2GB is in use.</li>
+-<li>Fix parsing of hex floats.</li>
+-<li>x86/x64: Fix loop branch inversion with trailing
+-<tt>HREF+NE/EQ</tt>.</li>
+-<li>Add <tt>jit.os</tt> string.</li>
+-<li><tt>coroutine.create()</tt> permits running C functions,
too.</li>
+-<li>Fix OSX build to work with newer ld64 versions.</li>
+-<li>Fix bytecode optimization of <tt>and</tt>/<tt>or</tt>
operators.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Emit specialized bytecode for
<tt>pairs()</tt>/<tt>next()</tt>.</li>
+-<li>Improve bytecode coalescing of <tt>nil</tt> constants.</li>
+-<li>Compile calls to vararg functions.</li>
+-<li>Compile <tt>select()</tt>.</li>
+-<li>Improve alias analysis, esp. for loads from allocations.</li>
+-<li>Tuning of various compiler heuristics.</li>
+-<li>Refactor and extend IR conversion instructions.</li>
+-<li>x86/x64: Various backend enhancements related to the FFI.</li>
+-<li>Add SPLIT pass to split 64 bit IR instructions for 32 bit CPUs.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta5">LuaJIT 2.0.0-beta5 —
2010-08-24</h2>
+-<ul>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix trace exit dispatch to function headers.</li>
+-<li>Fix Windows and OSX builds with LUAJIT_DISABLE_JIT.</li>
+-<li>Reorganize and fix placement of generated machine code on x64.</li>
+-<li>Fix TNEW in x64 interpreter.</li>
+-<li>Do not eliminate PHIs for values only referenced from side exits.</li>
+-<li>OS-independent canonicalization of strings for non-finite numbers.</li>
+-<li>Fix <tt>string.char()</tt> range check on x64.</li>
+-<li>Fix <tt>tostring()</tt> resolving within
<tt>print()</tt>.</li>
+-<li>Fix error handling for <tt>next()</tt>.</li>
+-<li>Fix passing of constant arguments to external calls on x64.</li>
+-<li>Fix interpreter argument check for two-argument SSE math
functions.</li>
+-<li>Fix C frame chain corruption caused by
<tt>lua_cpcall()</tt>.</li>
+-<li>Fix return from <tt>pcall()</tt> within active hook.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Replace on-trace GC frame syncing with interpreter exit.</li>
+-<li>Improve hash lookup specialization by not removing dead keys during
GC.</li>
+-<li>Turn traces into true GC objects.</li>
+-<li>Avoid starting a GC cycle immediately after library init.</li>
+-<li>Add weak guards to improve dead-code elimination.</li>
+-<li>Speed up string interning.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta4">LuaJIT 2.0.0-beta4 —
2010-03-28</h2>
+-<ul>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix precondition for on-trace creation of table keys.</li>
+-<li>Fix <tt>{f()}</tt> on x64 when table is resized.</li>
+-<li>Fix folding of ordered comparisons with same references.</li>
+-<li>Fix snapshot restores for multi-result bytecodes.</li>
+-<li>Fix potential hang when recording bytecode with nested closures.</li>
+-<li>Fix recording of <tt>getmetatable()</tt>,
<tt>tonumber()</tt> and bad argument types.</li>
+-<li>Fix SLOAD fusion across returns to lower frames.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Add array bounds check elimination. <tt>-Oabc</tt> is enabled by
default.</li>
+-<li>More tuning for x64, e.g. smaller table objects.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta3">LuaJIT 2.0.0-beta3 —
2010-03-07</h2>
+-<ul>
+-<li>LuaJIT x64 port:
+-<ul>
+-<li>Port integrated memory allocator to Linux/x64, Windows/x64 and
OSX/x64.</li>
+-<li>Port interpreter and JIT compiler to x64.</li>
+-<li>Port DynASM to x64.</li>
+-<li>Many 32/64 bit cleanups in the VM.</li>
+-<li>Allow building the interpreter with either x87 or SSE2
arithmetics.</li>
+-<li>Add external unwinding and C++ exception interop (default on x64).</li>
+-</ul></li>
+-<li>Correctness and completeness:
+-<ul>
+-<li>Fix constructor bytecode generation for certain conditional
values.</li>
+-<li>Fix some cases of ordered string comparisons.</li>
+-<li>Fix <tt>lua_tocfunction()</tt>.</li>
+-<li>Fix cutoff register in JMP bytecode for some conditional
expressions.</li>
+-<li>Fix PHI marking algorithm for references from variant slots.</li>
+-<li>Fix <tt>package.cpath</tt> for non-default PREFIX.</li>
+-<li>Fix DWARF2 frame unwind information for interpreter on OSX.</li>
+-<li>Drive the GC forward on string allocations in the parser.</li>
+-<li>Implement call/return hooks (zero-cost if disabled).</li>
+-<li>Implement yield from C hooks.</li>
+-<li>Disable JIT compiler on older non-SSE2 CPUs instead of aborting.</li>
+-</ul></li>
+-<li>Structural and performance enhancements:
+-<ul>
+-<li>Compile recursive code (tail-, up- and down-recursion).</li>
+-<li>Improve heuristics for bytecode penalties and blacklisting.</li>
+-<li>Split CALL/FUNC recording and clean up fast function call
semantics.</li>
+-<li>Major redesign of internal function call handling.</li>
+-<li>Improve FOR loop const specialization and integerness checks.</li>
+-<li>Switch to pre-initialized stacks. Avoid frame-clearing.</li>
+-<li>Colocation of prototypes and related data: bytecode, constants, debug
info.</li>
+-<li>Cleanup parser and streamline bytecode generation.</li>
+-<li>Add support for weak IR references to register allocator.</li>
+-<li>Switch to compressed, extensible snapshots.</li>
+-<li>Compile returns to frames below the start frame.</li>
+-<li>Improve alias analysis of upvalues using a disambiguation hash
value.</li>
+-<li>Compile floor/ceil/trunc to SSE2 helper calls or SSE4.1
instructions.</li>
+-<li>Add generic C call handling to IR and backend.</li>
+-<li>Improve KNUM fuse vs. load heuristics.</li>
+-<li>Compile various <tt>io.*()</tt> functions.</li>
+-<li>Compile <tt>math.sinh()</tt>, <tt>math.cosh()</tt>,
<tt>math.tanh()</tt>
+-and <tt>math.random()</tt>.</li>
+-</ul></li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 —
2009-11-09</h2>
+-<ul>
+-<li>Reorganize build system. Build static+shared library on POSIX.</li>
+-<li>Allow C++ exception conversion on all platforms
+-using a wrapper function.</li>
+-<li>Automatically catch C++ exceptions and rethrow Lua error
+-(DWARF2 only).</li>
+-<li>Check for the correct x87 FPU precision at strategic points.</li>
+-<li>Always use wrappers for libm functions.</li>
+-<li>Resurrect metamethod name strings before copying them.</li>
+-<li>Mark current trace, even if compiler is idle.</li>
+-<li>Ensure FILE metatable is created only once.</li>
+-<li>Fix type comparisons when different integer types are involved.</li>
+-<li>Fix <tt>getmetatable()</tt> recording.</li>
+-<li>Fix TDUP with dead keys in template table.</li>
+-<li><tt>jit.flush(tr)</tt> returns status.
+-Prevent manual flush of a trace that's still linked.</li>
+-<li>Improve register allocation heuristics for invariant references.</li>
+-<li>Compile the push/pop variants of <tt>table.insert()</tt> and
+-<tt>table.remove()</tt>.</li>
+-<li>Compatibility with MSVC <tt>link /debug</tt>.</li>
+-<li>Fix <tt>lua_iscfunction()</tt>.</li>
+-<li>Fix <tt>math.random()</tt> when compiled with
<tt>-fpic</tt> (OSX).</li>
+-<li>Fix <tt>table.maxn()</tt>.</li>
+-<li>Bump <tt>MACOSX_DEPLOYMENT_TARGET</tt> to
<tt>10.4</tt></li>
+-<li><tt>luaL_check*()</tt> and <tt>luaL_opt*()</tt> now
support
+-negative arguments, too.<br>
+-This matches the behavior of Lua 5.1, but not the specification.</li>
+-</ul>
+-
+-<h2 id="LuaJIT-2.0.0-beta1">LuaJIT 2.0.0-beta1 —
2009-10-31</h2>
+-<ul>
+-<li>This is the first public release of LuaJIT 2.0.</li>
+-<li>The whole VM has been rewritten from the ground up, so there's
+-no point in listing differences over earlier versions.</li>
+-</ul>
+-</div>
+-<br class="flush">
+-</div>
+-<div id="foot">
+-<hr class="hide">
+-Copyright © 2005-2017 Mike Pall
+-<span class="noprint">
+-·
+-<a href="contact.html">Contact</a>
+-</span>
+-</div>
+-</body>
+-</html>
+diff --git a/doc/contact.html b/doc/contact.html
+index fe4751c0..c253a08b 100644
+--- a/doc/contact.html
++++ b/doc/contact.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Contact</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Contact</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -46,28 +47,23 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+ <p>
+ If you want to report bugs, propose fixes or suggest enhancements,
+ please use the
+-<a
href="https://github.com/LuaJIT/LuaJIT/issues">GitHub issue
tracker</a>.
++<a
href="https://github.com/LuaJIT/LuaJIT/issues"><span
class="ext">»</span> GitHub issue
tracker</a>.
+ </p>
+ <p>
+ Please send general questions to the
+-<a
href="http://luajit.org/list.html"><span
class="ext">»</span> LuaJIT mailing list</a>.
++<a
href="https://luajit.org/list.html"><span
class="ext">»</span> LuaJIT mailing list</a>.
+ </p>
+ <p>
+ You can also send any questions you have directly to me:
+@@ -93,7 +89,7 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
+ <h2>Copyright</h2>
+ <p>
+ All documentation is
+-Copyright © 2005-2017 Mike Pall.
++Copyright © 2005-2021 Mike Pall.
+ </p>
+
+
+@@ -101,7 +97,7 @@ Copyright © 2005-2017 Mike Pall.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html
+new file mode 100644
+index 00000000..63c2efe3
+--- /dev/null
++++ b/doc/ext_buffer.html
+@@ -0,0 +1,693 @@
++<!DOCTYPE html>
++<html>
++<head>
++<title>String Buffer Library</title>
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
++<meta name="Language" content="en">
++<link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
++<link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
++<style type="text/css">
++.lib {
++ vertical-align: middle;
++ margin-left: 5px;
++ padding: 0 5px;
++ font-size: 60%;
++ border-radius: 5px;
++ background: #c5d5ff;
++ color: #000;
++}
++</style>
++</head>
++<body>
++<div id="site">
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++</div>
++<div id="head">
++<h1>String Buffer Library</h1>
++</div>
++<div id="nav">
++<ul><li>
++<a href="luajit.html">LuaJIT</a>
++<ul><li>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
++</li><li>
++<a href="install.html">Installation</a>
++</li><li>
++<a href="running.html">Running</a>
++</li></ul>
++</li><li>
++<a href="extensions.html">Extensions</a>
++<ul><li>
++<a href="ext_ffi.html">FFI Library</a>
++<ul><li>
++<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
++</li><li>
++<a href="ext_ffi_api.html">ffi.* API</a>
++</li><li>
++<a href="ext_ffi_semantics.html">FFI Semantics</a>
++</li></ul>
++</li><li>
++<a class="current" href="ext_buffer.html">String
Buffers</a>
++</li><li>
++<a href="ext_jit.html">jit.* Library</a>
++</li><li>
++<a href="ext_c_api.html">Lua/C API</a>
++</li><li>
++<a href="ext_profiler.html">Profiler</a>
++</li></ul>
++</li><li>
++<a href="status.html">Status</a>
++</li><li>
++<a href="faq.html">FAQ</a>
++</li><li>
++<a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
++</li><li>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++</li></ul>
++</div>
++<div id="main">
++<p>
++The string buffer library allows <b>high-performance manipulation of
++string-like data</b>.
++</p>
++<p>
++Unlike Lua strings, which are constants, string buffers are
++<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data
++can be stored, formatted and encoded into a string buffer and later
++converted, extracted or decoded.
++</p>
++<p>
++The convenient string buffer API simplifies common string manipulation
++tasks, that would otherwise require creating many intermediate strings.
++String buffers improve performance by eliminating redundant memory
++copies, object creation, string interning and garbage collection
++overhead. In conjunction with the FFI library, they allow zero-copy
++operations.
++</p>
++<p>
++The string buffer libary also includes a high-performance
++<a href="serialize">serializer</a> for Lua objects.
++</p>
++
++<h2 id="wip" style="color:#ff0000">Work in
Progress</h2>
++<p>
++<b style="color:#ff0000">This library is a work in progress. More
++functionality will be added soon.</b>
++</p>
++
++<h2 id="use">Using the String Buffer Library</h2>
++<p>
++The string buffer library is built into LuaJIT by default, but it's not
++loaded by default. Add this to the start of every Lua file that needs
++one of its functions:
++</p>
++<pre class="code">
++local buffer = require("string.buffer")
++</pre>
++<p>
++The convention for the syntax shown on this page is that <tt>buffer</tt>
++refers to the buffer library and <tt>buf</tt> refers to an individual
++buffer object.
++</p>
++<p>
++Please note the difference between a Lua function call, e.g.
++<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g.
++<tt>buf:reset()</tt> (with a colon).
++</p>
++
++<h3 id="buffer_object">Buffer Objects</h3>
++<p>
++A buffer object is a garbage-collected Lua object. After creation with
++<tt>buffer.new()</tt>, it can (and should) be reused for many operations.
++When the last reference to a buffer object is gone, it will eventually
++be freed by the garbage collector, along with the allocated buffer
++space.
++</p>
++<p>
++Buffers operate like a FIFO (first-in first-out) data structure. Data
++can be appended (written) to the end of the buffer and consumed (read)
++from the front of the buffer. These operations may be freely mixed.
++</p>
++<p>
++The buffer space that holds the characters is managed automatically
++— it grows as needed and already consumed space is recycled. Use
++<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need
more
++control.
++</p>
++<p>
++The maximum size of a single buffer is the same as the maximum size of a
++Lua string, which is slightly below two gigabytes. For huge data sizes,
++neither strings nor buffers are the right data structure — use the
++FFI library to directly map memory or files up to the virtual memory
++limit of your OS.
++</p>
++
++<h3 id="buffer_overview">Buffer Method Overview</h3>
++<ul>
++<li>
++The <tt>buf:put*()</tt>-like methods append (write) characters to the
++end of the buffer.
++</li>
++<li>
++The <tt>buf:get*()</tt>-like methods consume (read) characters from the
++front of the buffer.
++</li>
++<li>
++Other methods, like <tt>buf:tostring()</tt> only read the buffer
++contents, but don't change the buffer.
++</li>
++<li>
++The <tt>buf:set()</tt> method allows zero-copy consumption of a string
++or an FFI cdata object as a buffer.
++</li>
++<li>
++The FFI-specific methods allow zero-copy read/write-style operations or
++modifying the buffer contents in-place. Please check the
++<a href="#ffi_caveats">FFI caveats</a> below, too.
++</li>
++<li>
++Methods that don't need to return anything specific, return the buffer
++object itself as a convenience. This allows method chaining, e.g.:
++<tt>buf:reset():encode(obj)</tt> or
<tt>buf:skip(len):get()</tt>
++</li>
++</ul>
++
++<h2 id="create">Buffer Creation and Management</h2>
++
++<h3 id="buffer_new"><tt>local buf = buffer.new([size
[,options]])<br>
++local buf = buffer.new([options])</tt></h3>
++<p>
++Creates a new buffer object.
++</p>
++<p>
++The optional <tt>size</tt> argument ensures a minimum initial buffer
++size. This is strictly an optimization when the required buffer size is
++known beforehand. The buffer space will grow as needed, in any case.
++</p>
++<p>
++The optional table <tt>options</tt> sets various
++<a href="#serialize_options">serialization options</a>.
++</p>
++
++<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3>
++<p>
++Reset (empty) the buffer. The allocated buffer space is not freed and
++may be reused.
++</p>
++
++<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3>
++<p>
++The buffer space of the buffer object is freed. The object itself
++remains intact, empty and may be reused.
++</p>
++<p>
++Note: you normally don't need to use this method. The garbage collector
++automatically frees the buffer space, when the buffer object is
++collected. Use this method, if you need to free the associated memory
++immediately.
++</p>
++
++<h2 id="write">Buffer Writers</h2>
++
++<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj]
[,…])</tt></h3>
++<p>
++Appends a string <tt>str</tt>, a number <tt>num</tt> or any
object
++<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the
buffer.
++Multiple arguments are appended in the given order.
++</p>
++<p>
++Appending a buffer to a buffer is possible and short-circuited
++internally. But it still involves a copy. Better combine the buffer
++writes to use a single buffer.
++</p>
++
++<h3 id="buffer_putf"><tt>buf = buf:putf(format,
…)</tt></h3>
++<p>
++Appends the formatted arguments to the buffer. The <tt>format</tt>
++string supports the same options as <tt>string.format()</tt>.
++</p>
++
++<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata,
len)</tt><span class="lib">FFI</span></h3>
++<p>
++Appends the given <tt>len</tt> number of bytes from the memory pointed
++to by the FFI <tt>cdata</tt> object to the buffer. The object needs to
++be convertible to a (constant) pointer.
++</p>
++
++<h3 id="buffer_set"><tt>buf = buf:set(str)<br>
++buf = buf:set(cdata, len)</tt><span
class="lib">FFI</span></h3>
++<p>
++This method allows zero-copy consumption of a string or an FFI cdata
++object as a buffer. It stores a reference to the passed string
++<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any
buffer
++space originally allocated is freed. This is <i>not</i> an append
++operation, unlike the <tt>buf:put*()</tt> methods.
++</p>
++<p>
++After calling this method, the buffer behaves as if
++<tt>buf:free():put(str)</tt> or
<tt>buf:free():put(cdata, len)</tt>
++had been called. However, the data is only referenced and not copied, as
++long as the buffer is only consumed.
++</p>
++<p>
++In case the buffer is written to later on, the referenced data is copied
++and the object reference is removed (copy-on-write semantics).
++</p>
++<p>
++The stored reference is an anchor for the garbage collector and keeps the
++originally passed string or FFI cdata object alive.
++</p>
++
++<h3 id="buffer_reserve"><tt>ptr, len =
buf:reserve(size)</tt><span class="lib">FFI</span><br>
++<tt>buf = buf:commit(used)</tt><span
class="lib">FFI</span></h3>
++<p>
++The <tt>reserve</tt> method reserves at least <tt>size</tt>
bytes of
++write space in the buffer. It returns an <tt>uint8_t *</tt> FFI
++cdata pointer <tt>ptr</tt> that points to this space.
++</p>
++<p>
++The available length in bytes is returned in <tt>len</tt>. This is at
++least <tt>size</tt> bytes, but may be more to facilitate efficient
++buffer growth. You can either make use of the additional space or ignore
++<tt>len</tt> and only use <tt>size</tt> bytes.
++</p>
++<p>
++The <tt>commit</tt> method appends the <tt>used</tt> bytes of
the
++previously returned write space to the buffer data.
++</p>
++<p>
++This pair of methods allows zero-copy use of C read-style APIs:
++</p>
++<pre class="code">
++local MIN_SIZE = 65536
++repeat
++ local ptr, len = buf:reserve(MIN_SIZE)
++ local n = C.read(fd, ptr, len)
++ if n == 0 then break end -- EOF.
++ if n < 0 then error("read error") end
++ buf:commit(n)
++until false
++</pre>
++<p>
++The reserved write space is <i>not</i> initialized. At least the
++<tt>used</tt> bytes <b>must</b> be written to before calling
the
++<tt>commit</tt> method. There's no need to call the
<tt>commit</tt>
++method, if nothing is added to the buffer (e.g. on error).
++</p>
++
++<h2 id="read">Buffer Readers</h2>
++
++<h3 id="buffer_length"><tt>len = #buf</tt></h3>
++<p>
++Returns the current length of the buffer data in bytes.
++</p>
++
++<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf
[…]</tt></h3>
++<p>
++The Lua concatenation operator <tt>..</tt> also accepts buffers, just
++like strings or numbers. It always returns a string and not a buffer.
++</p>
++<p>
++Note that although this is supported for convenience, this thwarts one
++of the main reasons to use buffers, which is to avoid string
++allocations. Rewrite it with <tt>buf:put()</tt> and
<tt>buf:get()</tt>.
++</p>
++<p>
++Mixing this with unrelated objects that have a <tt>__concat</tt>
++metamethod may not work, since these probably only expect strings.
++</p>
++
++<h3 id="buffer_skip"><tt>buf =
buf:skip(len)</tt></h3>
++<p>
++Skips (consumes) <tt>len</tt> bytes from the buffer up to the current
++length of the buffer data.
++</p>
++
++<h3 id="buffer_get"><tt>str, … = buf:get([len|nil]
[,…])</tt></h3>
++<p>
++Consumes the buffer data and returns one or more strings. If called
++without arguments, the whole buffer data is consumed. If called with a
++number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt>
argument
++consumes the remaining buffer space (this only makes sense as the last
++argument). Multiple arguments consume the buffer data in the given
++order.
++</p>
++<p>
++Note: a zero length or no remaining buffer data returns an empty string
++and not <tt>nil</tt>.
++</p>
++
++<h3 id="buffer_tostring"><tt>str = buf:tostring()<br>
++str = tostring(buf)</tt></h3>
++<p>
++Creates a string from the buffer data, but doesn't consume it. The
++buffer remains unchanged.
++</p>
++<p>
++Buffer objects also define a <tt>__tostring</tt> metamethod. This means
++buffers can be passed to the global <tt>tostring()</tt> function and
++many other functions that accept this in place of strings. The important
++internal uses in functions like <tt>io.write()</tt> are short-circuited
++to avoid the creation of an intermediate string object.
++</p>
++
++<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span
class="lib">FFI</span></h3>
++<p>
++Returns an <tt>uint8_t *</tt> FFI cdata pointer
<tt>ptr</tt> that
++points to the buffer data. The length of the buffer data in bytes is
++returned in <tt>len</tt>.
++</p>
++<p>
++The returned pointer can be directly passed to C functions that expect a
++buffer and a length. You can also do bytewise reads
++(<tt>local x = ptr[i]</tt>) or writes
++(<tt>ptr[i] = 0x40</tt>) of the buffer data.
++</p>
++<p>
++In conjunction with the <tt>skip</tt> method, this allows zero-copy use
++of C write-style APIs:
++</p>
++<pre class="code">
++repeat
++ local ptr, len = buf:ref()
++ if len == 0 then break end
++ local n = C.write(fd, ptr, len)
++ if n < 0 then error("write error") end
++ buf:skip(n)
++until n >= len
++</pre>
++<p>
++Unlike Lua strings, buffer data is <i>not</i> implicitly
++zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that
++expect zero-terminated strings. If you're not using <tt>len</tt>, then
++you're doing something wrong.
++</p>
++
++<h2 id="serialize">Serialization of Lua Objects</h2>
++<p>
++The following functions and methods allow <b>high-speed serialization</b>
++(encoding) of a Lua object into a string and decoding it back to a Lua
++object. This allows convenient storage and transport of <b>structured
++data</b>.
++</p>
++<p>
++The encoded data is in an <a href="#serialize_format">internal binary
++format</a>. The data can be stored in files, binary-transparent
++databases or transmitted to other LuaJIT instances across threads,
++processes or networks.
++</p>
++<p>
++Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or
++server-class system, even when serializing many small objects. Decoding
++speed is mostly constrained by object creation cost.
++</p>
++<p>
++The serializer handles most Lua types, common FFI number types and
++nested structures. Functions, thread objects, other FFI cdata and full
++userdata cannot be serialized (yet).
++</p>
++<p>
++The encoder serializes nested structures as trees. Multiple references
++to a single object will be stored separately and create distinct objects
++after decoding. Circular references cause an error.
++</p>
++
++<h3 id="serialize_methods">Serialization Functions and
Methods</h3>
++
++<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br>
++buf = buf:encode(obj)</tt></h3>
++<p>
++Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone
++function returns a string <tt>str</tt>. The buffer method appends the
++encoding to the buffer.
++</p>
++<p>
++<tt>obj</tt> can be any of the supported Lua types — it
doesn't
++need to be a Lua table.
++</p>
++<p>
++This function may throw an error when attempting to serialize
++unsupported object types, circular references or deeply nested tables.
++</p>
++
++<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br>
++obj = buf:decode()</tt></h3>
++<p>
++The stand-alone function de-serializes (decodes) the string
++<tt>str</tt>, the buffer method de-serializes one object from the
++buffer. Both return a Lua object <tt>obj</tt>.
++</p>
++<p>
++The returned object may be any of the supported Lua types —
++even <tt>nil</tt>.
++</p>
++<p>
++This function may throw an error when fed with malformed or incomplete
++encoded data. The stand-alone function throws when there's left-over
++data after decoding a single top-level object. The buffer method leaves
++any left-over data in the buffer.
++</p>
++
++<h3 id="serialize_options">Serialization Options</h3>
++<p>
++The <tt>options</tt> table passed to <tt>buffer.new()</tt> may
contain
++the following members (all optional):
++</p>
++<ul>
++<li>
++<tt>dict</tt> is a Lua table holding a <b>dictionary of
strings</b> that
++commonly occur as table keys of objects you are serializing. These keys
++are compactly encoded as indexes during serialization. A well chosen
++dictionary saves space and improves serialization performance.
++</li>
++<li>
++<tt>metatable</tt> is a Lua table holding a <b>dictionary of
metatables</b>
++for the table objects you are serializing.
++</li>
++</ul>
++<p>
++<tt>dict</tt> needs to be an array of strings and
<tt>metatable</tt> needs
++to be an array of tables. Both starting at index 1 and without holes (no
++<tt>nil</tt> inbetween). The tables are anchored in the buffer object and
++internally modified into a two-way index (don't do this yourself, just pass
++a plain array). The tables must not be modified after they have been passed
++to <tt>buffer.new()</tt>.
++</p>
++<p>
++The <tt>dict</tt> and <tt>metatable</tt> tables used by the
encoder and
++decoder must be the same. Put the most common entries at the front. Extend
++at the end to ensure backwards-compatibility — older encodings can
++then still be read. You may also set some indexes to <tt>false</tt> to
++explicitly drop backwards-compatibility. Old encodings that use these
++indexes will throw an error when decoded.
++</p>
++<p>
++Metatables that are not found in the <tt>metatable</tt> dictionary are
++ignored when encoding. Decoding returns a table with a <tt>nil</tt>
++metatable.
++</p>
++<p>
++Note: parsing and preparation of the options table is somewhat
++expensive. Create a buffer object only once and recycle it for multiple
++uses. Avoid mixing encoder and decoder buffers, since the
++<tt>buf:set()</tt> method frees the already allocated buffer space:
++</p>
++<pre class="code">
++local options = {
++ dict = { "commonly", "used", "string", "keys"
},
++}
++local buf_enc = buffer.new(options)
++local buf_dec = buffer.new(options)
++
++local function encode(obj)
++ return buf_enc:reset():encode(obj):get()
++end
++
++local function decode(str)
++ return buf_dec:set(str):decode()
++end
++</pre>
++
++<h3 id="serialize_stream">Streaming Serialization</h3>
++<p>
++In some contexts, it's desirable to do piecewise serialization of large
++datasets, also known as <i>streaming</i>.
++</p>
++<p>
++This serialization format can be safely concatenated and supports streaming.
++Multiple encodings can simply be appended to a buffer and later decoded
++individually:
++</p>
++<pre class="code">
++local buf = buffer.new()
++buf:encode(obj1)
++buf:encode(obj2)
++local copy1 = buf:decode()
++local copy2 = buf:decode()
++</pre>
++<p>
++Here's how to iterate over a stream:
++</p>
++<pre class="code">
++while #buf ~= 0 do
++ local obj = buf:decode()
++ -- Do something with obj.
++end
++</pre>
++<p>
++Since the serialization format doesn't prepend a length to its encoding,
++network applications may need to transmit the length, too.
++</p>
++
++<h3 id="serialize_format">Serialization Format Specification</h3>
++<p>
++This serialization format is designed for <b>internal use</b> by LuaJIT
++applications. Serialized data is upwards-compatible and portable across
++all supported LuaJIT platforms.
++</p>
++<p>
++It's an <b>8-bit binary format</b> and not human-readable. It uses e.g.
++embedded zeroes and stores embedded Lua string objects unmodified, which
++are 8-bit-clean, too. Encoded data can be safely concatenated for
++streaming and later decoded one top-level object at a time.
++</p>
++<p>
++The encoding is reasonably compact, but tuned for maximum performance,
++not for minimum space usage. It compresses well with any of the common
++byte-oriented data compression algorithms.
++</p>
++<p>
++Although documented here for reference, this format is explicitly
++<b>not</b> intended to be a 'public standard' for structured data
++interchange across computer languages (like JSON or MessagePack). Please
++do not use it as such.
++</p>
++<p>
++The specification is given below as a context-free grammar with a
++top-level <tt>object</tt> as the starting point. Alternatives are
++separated by the <tt>|</tt> symbol and <tt>*</tt> indicates
repeats.
++Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are
++either plain hex numbers, encoded as bytes, or have a <tt>.format</tt>
++suffix.
++</p>
++<pre>
++object → nil | false | true
++ | null | lightud32 | lightud64
++ | int | num | tab | tab_mt
++ | int64 | uint64 | complex
++ | string
++
++nil → 0x00
++false → 0x01
++true → 0x02
++
++null → 0x03 // NULL lightuserdata
++lightud32 → 0x04 data.I // 32 bit lightuserdata
++lightud64 → 0x05 data.L // 64 bit lightuserdata
++
++int → 0x06 int.I // int32_t
++num → 0x07 double.L
++
++tab → 0x08 // Empty table
++ | 0x09 h.U h*{object object} // Key/value hash
++ | 0x0a a.U a*object // 0-based array
++ | 0x0b a.U a*object h.U h*{object object} // Mixed
++ | 0x0c a.U (a-1)*object // 1-based array
++ | 0x0d a.U (a-1)*object h.U h*{object object} // Mixed
++tab_mt → 0x0e (index-1).U tab // Metatable dict entry
++
++int64 → 0x10 int.L // FFI int64_t
++uint64 → 0x11 uint.L // FFI uint64_t
++complex → 0x12 re.L im.L // FFI complex
++
++string → (0x20+len).U len*char.B
++ | 0x0f (index-1).U // String dict entry
++
++.B = 8 bit
++.I = 32 bit little-endian
++.L = 64 bit little-endian
++.U = prefix-encoded 32 bit unsigned number n:
++ 0x00..0xdf → n.B
++ 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B
++ 0x1fe0.. → 0xff n.I
++</pre>
++
++<h2 id="error">Error handling</h2>
++<p>
++Many of the buffer methods can throw an error. Out-of-memory or usage
++errors are best caught with an outer wrapper for larger parts of code.
++There's not much one can do after that, anyway.
++</p>
++<p>
++OTOH you may want to catch some errors individually. Buffer methods need
++to receive the buffer object as the first argument. The Lua colon-syntax
++<tt>obj:method()</tt> does that implicitly. But to wrap a method with
++<tt>pcall()</tt>, the arguments need to be passed like this:
++</p>
++<pre class="code">
++local ok, err = pcall(buf.encode, buf, obj)
++if not ok then
++ -- Handle error in err.
++end
++</pre>
++
++<h2 id="ffi_caveats">FFI caveats</h2>
++<p>
++The string buffer library has been designed to work well together with
++the FFI library. But due to the low-level nature of the FFI library,
++some care needs to be taken:
++</p>
++<p>
++First, please remember that FFI pointers are zero-indexed. The space
++returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts
at the
++returned pointer and ends before <tt>len</tt> bytes after that.
++</p>
++<p>
++I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index
++is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no
valid
++index at all. The returned pointer may even be <tt>NULL</tt>.
++</p>
++<p>
++The space pointed to by the returned pointer is only valid as long as
++the buffer is not modified in any way (neither append, nor consume, nor
++reset, etc.). The pointer is also not a GC anchor for the buffer object
++itself.
++</p>
++<p>
++Buffer data is only guaranteed to be byte-aligned. Casting the returned
++pointer to a data type with higher alignment may cause unaligned
++accesses. It depends on the CPU architecture whether this is allowed or
++not (it's always OK on x86/x64 and mostly OK on other modern
++architectures).
++</p>
++<p>
++FFI pointers or references do not count as GC anchors for an underlying
++object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt>
is
++anchored by <tt>buf:set(array, len)</tt>, but not by
++<tt>buf:set(array+offset, len)</tt>. The addition of the offset
++creates a new pointer, even when the offset is zero. In this case, you
++need to make sure there's still a reference to the original array as
++long as its contents are in use by the buffer.
++</p>
++<p>
++Even though each LuaJIT VM instance is single-threaded (but you can
++create multiple VMs), FFI data structures can be accessed concurrently.
++Be careful when reading/writing FFI cdata from/to buffers to avoid
++concurrent accesses or modifications. In particular, the memory
++referenced by <tt>buf:set(cdata, len)</tt> must not be modified
++while buffer readers are working on it. Shared, but read-only memory
++mappings of files are OK, but only if the file does not change.
++</p>
++<br class="flush">
++</div>
++<div id="foot">
++<hr class="hide">
++Copyright © 2005-2021
++<span class="noprint">
++·
++<a href="contact.html">Contact</a>
++</span>
++</div>
++</body>
++</html>
+diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
+index ad462c63..9f1ad212 100644
+--- a/doc/ext_c_api.html
++++ b/doc/ext_c_api.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Lua/C API Extensions</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Lua/C API Extensions</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a class="current" href="ext_c_api.html">Lua/C API</a>
+@@ -46,17 +47,12 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -91,8 +87,8 @@ other Lua/C API functions).
+ </p>
+ <p>
+ The third argument specifies the mode, which is 'or'ed with a flag.
+-The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature on,
+-<tt>LUAJIT_MODE_ON</tt> to turn a feature off, or
++The flag can be <tt>LUAJIT_MODE_OFF</tt> to turn a feature off,
++<tt>LUAJIT_MODE_ON</tt> to turn a feature on, or
+ <tt>LUAJIT_MODE_FLUSH</tt> to flush cached code.
+ </p>
+ <p>
+@@ -179,7 +175,7 @@ Also note that this mechanism is not without overhead.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
+index 5e1daaf5..b934dc78 100644
+--- a/doc/ext_ffi.html
++++ b/doc/ext_ffi.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>FFI Library</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>FFI Library</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -46,17 +47,12 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -322,7 +318,7 @@ without undue conversion penalties.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
+index 91af2e1d..061cc42a 100644
+--- a/doc/ext_ffi_api.html
++++ b/doc/ext_ffi_api.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>ffi.* API Functions</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -16,7 +15,7 @@ td.abiparam { font-weight: bold; width: 6em; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1><tt>ffi.*</tt> API Functions</h1>
+@@ -25,7 +24,7 @@ td.abiparam { font-weight: bold; width: 6em; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -43,6 +42,8 @@ td.abiparam { font-weight: bold; width: 6em; }
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -51,17 +52,12 @@ td.abiparam { font-weight: bold; width: 6em; }
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -261,7 +257,7 @@ contents of an <tt>__index</tt> table (if any) may be
modified
+ afterwards. The associated metatable automatically applies to all uses
+ of this type, no matter how the objects are created or where they
+ originate from. Note that pre-defined operations on types have
+-precedence (e.g. declared field names cannot be overriden).
++precedence (e.g. declared field names cannot be overridden).
+ </p>
+ <p>
+ All standard Lua metamethods are implemented. These are called directly,
+@@ -469,6 +465,8 @@ otherwise. The following parameters are currently defined:
+ <tr class="odd">
+ <td class="abiparam">win</td><td
class="abidesc">Windows variant of the standard ABI</td></tr>
+ <tr class="even">
++<td class="abiparam">uwp</td><td
class="abidesc">Universal Windows Platform</td></tr>
++<tr class="odd">
+ <td class="abiparam">gc64</td><td
class="abidesc">64 bit GC references</td></tr>
+ </table>
+
+@@ -560,7 +558,7 @@ named <tt>i</tt>.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
+index 800b6b18..fef39c32 100644
+--- a/doc/ext_ffi_semantics.html
++++ b/doc/ext_ffi_semantics.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>FFI Semantics</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -16,7 +15,7 @@ td.convop { font-style: italic; width: 40%; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>FFI Semantics</h1>
+@@ -25,7 +24,7 @@ td.convop { font-style: italic; width: 40%; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -43,6 +42,8 @@ td.convop { font-style: italic; width: 40%; }
+ <a class="current" href="ext_ffi_semantics.html">FFI
Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -51,17 +52,12 @@ td.convop { font-style: italic; width: 40%; }
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -678,7 +674,7 @@ through unions is explicitly detected and allowed.
+ <a href="ext_ffi_api.html#ffi_new">constructor</a>. This is
equivalent
+ to <tt>ffi.new(ct, ...)</tt>, unless a <tt>__new</tt> metamethod
is
+ defined. The <tt>__new</tt> metamethod is called with the ctype object
+-plus any other arguments passed to the contructor. Note that you have to
++plus any other arguments passed to the constructor. Note that you have to
+ use <tt>ffi.new</tt> inside of it, since calling
<tt>ct(...)</tt> would
+ cause infinite recursion.</li>
+
+@@ -864,7 +860,7 @@ place of a type, you'd need to use
<tt>ffi.typeof("int")</tt> instead.
+ <p>
+ The main use for parameterized types are libraries implementing abstract
+ data types
+-(<a
href="http://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8&...
class="ext">»</span> example</a>),
++(<a
href="https://www.freelists.org/post/luajit/ffi-type-of-pointer-to,8...
class="ext">»</span> example</a>),
+ similar to what can be achieved with C++ template metaprogramming.
+ Another use case are derived types of anonymous structs, which avoids
+ pollution of the global struct namespace.
+@@ -1225,7 +1221,7 @@ suboptimal performance, especially when used in inner loops:
+ <li>Table initializers.</li>
+ <li>Initialization of nested
<tt>struct</tt>/<tt>union</tt> types.</li>
+ <li>Non-default initialization of VLA/VLS or large C types
+-(> 128 bytes or > 16 array elements.</li>
++(> 128 bytes or > 16 array elements).</li>
+ <li>Bitfield initializations.</li>
+ <li>Pointer differences for element sizes that are not a power of
+ two.</li>
+@@ -1252,7 +1248,7 @@ compiled.</li>
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
+index 36500664..ca71be4d 100644
+--- a/doc/ext_ffi_tutorial.html
++++ b/doc/ext_ffi_tutorial.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>FFI Tutorial</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -18,7 +17,7 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>FFI Tutorial</h1>
+@@ -27,7 +26,7 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -45,6 +44,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -53,17 +54,12 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -222,7 +218,7 @@ a fascinating best-selling game is left as an exercise for the
reader.
+ <h2 id="zlib">Accessing the zlib Compression Library</h2>
+ <p>
+ The following code shows how to access the <a
+-href="http://zlib.net/">zlib</a> compression library from Lua code.
++href="https://zlib.net/"><span
class="ext">»</span> zlib</a> compression
library from Lua code.
+ We'll define two convenience wrapper functions that take a string and
+ compress or uncompress it to another string:
+ </p>
+@@ -305,7 +301,7 @@ comes pre-installed. Since <tt>ffi.load()</tt>
automatically adds any
+ missing standard prefixes/suffixes, we can simply load the
+ <tt>"z"</tt> library. On Windows it's named
<tt>zlib1.dll</tt> and
+ you'll have to download it first from the
+-<a
href="http://zlib.net/"><span
class="ext">»</span> zlib site</a>. The check
for
++<a
href="https://zlib.net/"><span
class="ext">»</span> zlib site</a>. The check
for
+ <tt>ffi.os</tt> makes sure we pass the right name to
+ <tt>ffi.load()</tt>.
+ </p>
+@@ -593,7 +589,7 @@ it to a local variable in the function scope is unnecessary.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_jit.html b/doc/ext_jit.html
+index e4088bcb..6dd54c70 100644
+--- a/doc/ext_jit.html
++++ b/doc/ext_jit.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>jit.* Library</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1><tt>jit.*</tt> Library</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a class="current" href="ext_jit.html">jit.*
Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -46,17 +47,12 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -153,7 +149,7 @@ Contains the target OS name:
+ <h3 id="jit_arch"><tt>jit.arch</tt></h3>
+ <p>
+ Contains the target architecture name:
+-"x86", "x64", "arm", "arm64", "ppc",
"mips" or "mips64".
++"x86", "x64", "arm", "arm64",
"arm64be", "ppc", "mips", "mipsel",
"mips64", "mips64el", "mips64r6", "mips64r6el".
+ </p>
+
+ <h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler
optimization control</h2>
+@@ -191,7 +187,7 @@ if you want to know more.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html
+index 71b8c033..2783abdb 100644
+--- a/doc/ext_profiler.html
++++ b/doc/ext_profiler.html
+@@ -1,17 +1,16 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Profiler</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Profiler</h1>
+@@ -20,7 +19,7 @@
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -38,6 +37,8 @@
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -46,17 +47,12 @@
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -355,7 +351,7 @@ use.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/extensions.html b/doc/extensions.html
+index d7cc9693..748c1793 100644
+--- a/doc/extensions.html
++++ b/doc/extensions.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Extensions</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -28,7 +27,7 @@ td.excinterop {
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Extensions</h1>
+@@ -37,7 +36,7 @@ td.excinterop {
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -55,6 +54,8 @@ td.excinterop {
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -63,25 +64,20 @@ td.excinterop {
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+ <p>
+ LuaJIT is fully upwards-compatible with Lua 5.1. It supports all
+-<a
href="http://www.lua.org/manual/5.1/manual.html#5"><span
class="ext">»</span> standard Lua
++<a
href="https://www.lua.org/manual/5.1/manual.html#5"><span
class="ext">»</span> standard Lua
+ library functions</a> and the full set of
+-<a
href="http://www.lua.org/manual/5.1/manual.html#3"><span
class="ext">»</span> Lua/C API
++<a
href="https://www.lua.org/manual/5.1/manual.html#3"><span
class="ext">»</span> Lua/C API
+ functions</a>.
+ </p>
+ <p>
+@@ -105,7 +101,7 @@ LuaJIT comes with several built-in extension modules:
+ <h3 id="bit"><tt>bit.*</tt> — Bitwise
operations</h3>
+ <p>
+ LuaJIT supports all bitwise operations as defined by
+-<a href="http://bitop.luajit.org"><span
class="ext">»</span> Lua BitOp</a>:
++<a href="https://bitop.luajit.org"><span
class="ext">»</span> Lua BitOp</a>:
+ </p>
+ <pre class="code">
+ bit.tobit bit.tohex bit.bnot bit.band bit.bor bit.bxor
+@@ -114,7 +110,7 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap
+ <p>
+ This module is a LuaJIT built-in — you don't need to download or
+ install Lua BitOp. The Lua BitOp site has full documentation for all
+-<a
href="http://bitop.luajit.org/api.html"><span
class="ext">»</span> Lua BitOp API
functions</a>.
++<a
href="https://bitop.luajit.org/api.html"><span
class="ext">»</span> Lua BitOp API
functions</a>.
+ The FFI adds support for
+ <a href="ext_ffi_semantics.html#cdata_arith">64 bit bitwise
operations</a>,
+ using the same API functions.
+@@ -209,9 +205,8 @@ bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
+ </p>
+ <p>
+ Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which
implies
+-a different, incompatible bytecode format for ports that use this mode (e.g.
+-ARM64 or MIPS64) or when explicitly enabled for x64. This may be rectified
+-in the future.
++a different, incompatible bytecode format for all 64 bit ports. This may be
++rectified in the future.
+ </p>
+
+ <h3 id="table_new"><tt>table.new(narray, nhash)</tt>
allocates a pre-sized table</h3>
+@@ -254,6 +249,10 @@ for every call. The result is uniformly distributed between 0.0 and
1.0.
+ It's correctly scaled up and rounded for
<tt>math.random(n [,m])</tt> to
+ preserve uniformity.
+ </p>
++<p>
++Important: Neither this nor any other PRNG based on the simplistic
++<tt>math.random()</tt> API is suitable for cryptographic use.
++</p>
+
+ <h3 id="io"><tt>io.*</tt> functions handle 64 bit
file offsets</h3>
+ <p>
+@@ -374,6 +373,7 @@ LuaJIT supports some extensions from Lua 5.3:
+ <li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8
encoding in string literals.</li>
+ <li>The argument table <tt>arg</tt> can be read (and modified) by
<tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
+ <li><tt>io.read()</tt> and <tt>file:read()</tt> accept
formats with or without a leading <tt>*</tt>.</li>
++<li><tt>assert()</tt> accepts any type of error object.</li>
+ <li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
+ <li><tt>coroutine.isyieldable()</tt>.</li>
+ <li>Lua/C API extensions:
+@@ -394,29 +394,19 @@ the toolchain used to compile LuaJIT:
+ <td class="excinterop">Interoperability</td>
+ </tr>
+ <tr class="odd separate">
+-<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
+-<td class="exccompiler">GCC 4.3+, Clang</td>
++<td class="excplatform">External frame unwinding</td>
++<td class="exccompiler">GCC, Clang, MSVC</td>
+ <td class="excinterop"><b style="color:
#00a000;">Full</b></td>
+ </tr>
+ <tr class="even">
+-<td class="excplatform">ARM
<tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
+-<td class="exccompiler">GCC, Clang</td>
+-<td class="excinterop"><b style="color:
#00a000;">Full</b></td>
+-</tr>
+-<tr class="odd">
+-<td class="excplatform">Other platforms, DWARF2 unwinding</td>
++<td class="excplatform">Internal frame unwinding + DWARF2</td>
+ <td class="exccompiler">GCC, Clang</td>
+ <td class="excinterop"><b style="color:
#c06000;">Limited</b></td>
+ </tr>
+-<tr class="even">
+-<td class="excplatform">Windows/x64</td>
+-<td class="exccompiler">MSVC or WinSDK</td>
+-<td class="excinterop"><b style="color:
#00a000;">Full</b></td>
+-</tr>
+ <tr class="odd">
+-<td class="excplatform">Windows/x86</td>
+-<td class="exccompiler">Any</td>
+-<td class="excinterop"><b style="color:
#00a000;">Full</b></td>
++<td class="excplatform">Windows 64 bit</td>
++<td class="exccompiler">non-MSVC</td>
++<td class="excinterop"><b style="color:
#c06000;">Limited</b></td>
+ </tr>
+ <tr class="even">
+ <td class="excplatform">Other platforms</td>
+@@ -437,7 +427,9 @@ the toolchain used to compile LuaJIT:
+ on the C stack. The contents of the C++ exception object
+ pass through unmodified.</li>
+ <li>Lua errors can be caught on the C++ side with
<tt>catch(...)</tt>.
+-The corresponding Lua error message can be retrieved from the Lua stack.</li>
++The corresponding Lua error message can be retrieved from the Lua stack.<br>
++For MSVC for Windows 64 bit this requires compilation of your C++ code
++with <tt>/EHa</tt>.</li>
+ <li>Throwing Lua errors across C++ frames is safe. C++ destructors
+ will be called.</li>
+ </ul>
+@@ -472,7 +464,7 @@ C++ destructors.</li>
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/faq.html b/doc/faq.html
+index 2c930743..1b7cb371 100644
+--- a/doc/faq.html
++++ b/doc/faq.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Frequently Asked Questions (FAQ)</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -14,7 +13,7 @@ dd { margin-left: 1.5em; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Frequently Asked Questions (FAQ)</h1>
+@@ -23,7 +22,7 @@ dd { margin-left: 1.5em; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -41,6 +40,8 @@ dd { margin-left: 1.5em; }
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -49,67 +50,60 @@ dd { margin-left: 1.5em; }
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a class="current" href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+-<dl>
++<dl id="info">
+ <dt>Q: Where can I learn more about LuaJIT and Lua?</dt>
+ <dd>
+ <ul style="padding: 0;">
+-<li>The <a
href="http://luajit.org/list.html"><span
class="ext">»</span> LuaJIT mailing list</a>
focuses on topics
++<li>The <a
href="https://luajit.org/list.html"><span
class="ext">»</span> LuaJIT mailing list</a>
focuses on topics
+ related to LuaJIT.</li>
+ <li>The <a
href="http://wiki.luajit.org/"><span
class="ext">»</span> LuaJIT wiki</a> gathers
community
+ resources about LuaJIT.</li>
+ <li>News about Lua itself can be found at the
+-<a
href="http://www.lua.org/lua-l.html"><span
class="ext">»</span> Lua mailing list</a>.
++<a
href="https://www.lua.org/lua-l.html"><span
class="ext">»</span> Lua mailing list</a>.
+ The mailing list archives are worth checking out for older postings
+ about LuaJIT.</li>
+-<li>The <a href="http://lua.org"><span
class="ext">»</span> main
Lua.org site</a> has
complete
+-<a
href="http://www.lua.org/docs.html"><span
class="ext">»</span> documentation</a> of the
language
++<li>The <a href="https://lua.org"><span
class="ext">»</span> main
Lua.org site</a> has
complete
++<a
href="https://www.lua.org/docs.html"><span
class="ext">»</span> documentation</a> of the
language
+ and links to books and papers about Lua.</li>
+ <li>The community-managed <a
href="http://lua-users.org/wiki/"><span
class="ext">»</span> Lua Wiki</a>
+ has information about diverse topics.</li>
+-</ul>
++</ul></dd>
+ </dl>
+
+-<dl>
++<dl id="tech">
+ <dt>Q: Where can I learn more about the compiler technology used by
LuaJIT?</dt>
+ <dd>
+-I'm planning to write more documentation about the internals of LuaJIT.
+-In the meantime, please use the following Google Scholar searches
+-to find relevant papers:<br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=Trace+Compiler">&l...
class="ext">»</span> Trace
Compiler</a><br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=JIT+Compiler"><...
class="ext">»</span> JIT
Compiler</a><br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=Dynamic+Language+Optimizat...
class="ext">»</span> Dynamic Language
Optimizations</a><br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=SSA+Form"><span
class="ext">»</span> SSA Form</a><br>
+-Search for: <a
href="http://scholar.google.com/scholar?q=Linear+Scan+Register+Alloc...
class="ext">»</span> Linear Scan Register
Allocation</a><br>
+-Here is a list of the <a
href="http://article.gmane.org/gmane.comp.lang.lua.general/58908&quo...
class="ext">»</span> innovative features in
LuaJIT</a>.<br>
+-And, you know, reading the source is of course the only way to enlightenment. :-)
++Please use the following Google Scholar searches to find relevant papers:<br>
++Search for: <a
href="https://scholar.google.com/scholar?q=Trace+Compiler">&...
class="ext">»</span> Trace
Compiler</a><br>
++Search for: <a
href="https://scholar.google.com/scholar?q=JIT+Compiler"><...
class="ext">»</span> JIT
Compiler</a><br>
++Search for: <a
href="https://scholar.google.com/scholar?q=Dynamic+Language+Optimiza...
class="ext">»</span> Dynamic Language
Optimizations</a><br>
++Search for: <a
href="https://scholar.google.com/scholar?q=SSA+Form"><span
class="ext">»</span> SSA Form</a><br>
++Search for: <a
href="https://scholar.google.com/scholar?q=Linear+Scan+Register+Allo...
class="ext">»</span> Linear Scan Register
Allocation</a><br>
++Here is a list of the <a
href="http://lua-users.org/lists/lua-l/2009-11/msg00089.html"&g...
class="ext">»</span> innovative features in
LuaJIT</a>.<br>
++And, you know, reading the source is of course the only way to enlightenment.
+ </dd>
+ </dl>
+
+-<dl>
++<dl id="arg">
+ <dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil
value)"?<br>
+ Q: My vararg functions fail after switching to LuaJIT!</dt>
+ <dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't
+ support the implicit <tt>arg</tt> parameter for old-style vararg
+ functions from Lua 5.0.<br>Please convert your code to the
+-<a
href="http://www.lua.org/manual/5.1/manual.html#2.5.9"><span
class="ext">»</span> Lua 5.1
++<a
href="https://www.lua.org/manual/5.1/manual.html#2.5.9"><...
class="ext">»</span> Lua 5.1
+ vararg syntax</a>.</dd>
+ </dl>
+
+-<dl>
++<dl id="x87">
+ <dt>Q: Why do I get this error: "bad FPU precision"?<br>
+ <dt>Q: I get weird behavior after initializing Direct3D.<br>
+ <dt>Q: Some FPU operations crash after I load a Delphi DLL.<br>
+@@ -127,56 +121,75 @@ Consider testing your application with older versions,
too.<br>
+
+ Similarly, the Borland/Delphi runtime modifies the FPU control word and
+ enables FP exceptions. Of course this violates the Windows ABI, too.
+-Please check the Delphi docs for the Set8087CW method.
+-
++Please check the Delphi docs for the Set8087CW method.</dd>
+ </dl>
+
+-<dl>
++<dl id="ctrlc">
+ <dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt>
+ <dd>The interrupt signal handler sets a Lua debug hook. But this is
+-currently ignored by compiled code (this will eventually be fixed). If
+-your program is running in a tight loop and never falls back to the
+-interpreter, the debug hook never runs and can't throw the
+-"interrupted!" error.<br> In the meantime you have to press Ctrl-C
+-twice to get stop your program. That's similar to when it's stuck
+-running inside a C function under the Lua interpreter.</dd>
++ignored by compiled code. If your program is running in a tight loop
++and never falls back to the interpreter, the debug hook never runs and
++can't throw the "interrupted!" error.<br>
++You have to press Ctrl-C twice to get stop your program. That's similar
++to when it's stuck running inside a C function under the Lua
interpreter.</dd>
+ </dl>
+
+-<dl>
+-<dt>Q: Why doesn't my favorite power-patch for Lua apply against
LuaJIT?</dt>
+-<dd>Because it's a completely redesigned VM and has very little code
+-in common with Lua anymore. Also, if the patch introduces changes to
+-the Lua semantics, these would need to be reflected everywhere in the
+-VM, from the interpreter up to all stages of the compiler.<br> Please
+-use only standard Lua language constructs. For many common needs you
+-can use source transformations or use wrapper or proxy functions.
+-The compiler will happily optimize away such indirections.</dd>
++<dl id="order">
++<dt>Q: Table iteration with <tt>pairs()</tt> does not result in the
same order?</dt>
++<dd>The order of table iteration is explicitly <b>undefined</b> by
++the Lua language standard.<br>
++Different Lua implementations or versions may use different orders for
++otherwise identical tables. Different ways of constructing a table may
++result in different orders, too.<br>
++Due to improved VM security, LuaJIT 2.1 may even use a different order
++on separate VM invocations or when string keys are newly interned.<br><br>
++If your program relies on a deterministic order, it has a bug. Rewrite it,
++so it doesn't rely on the key order. Or sort the table keys, if you
must.</dd>
+ </dl>
+
+-<dl>
++<dl id="sandbox">
++<dt>Q: Can Lua code be safely sandboxed?</dt>
++<dd>
++Maybe for an extremly restricted subset of Lua and if you relentlessly
++scrutinize every single interface function you offer to the untrusted code.<br>
++
++Although Lua provides some sandboxing functionality (<tt>setfenv()</tt>,
hooks),
++it's very hard to get this right even for the Lua core libraries. Of course,
++you'll need to inspect any extension library, too. And there are libraries
++that are inherently unsafe, e.g. the <a href="ext_ffi.html">FFI
library</a>.<br>
++
++More reading material at the <a
href="http://lua-users.org/wiki/SandBoxes"><span
class="ext">»</span> Lua Wiki</a> and <a
href="https://en.wikipedia.org/wiki/Sandbox_(computer_security)"...
class="ext">»</span> Wikipedia</a>.<br><br>
++
++Relatedly, <b>loading untrusted bytecode is not safe!</b><br>
++
++It's trivial to crash the Lua or LuaJIT VM with maliciously crafted bytecode.
++This is well known and there's no bytecode verification on purpose, so please
++don't report a bug about it. Check the <tt>mode</tt> parameter for the
++<tt>load*()</tt> functions to disable loading of
bytecode.<br><br>
++
++<b>In general, the only promising approach is to sandbox Lua code at the
++process level and not the VM level.</b>
++</dd>
++</dl>
++
++<dl id="arch">
+ <dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt>
+ <dd>Because it's a compiler — it needs to generate native
+ machine code. This means the code generator must be ported to each
+ architecture. And the fast interpreter is written in assembler and
+ must be ported, too. This is quite an undertaking.<br>
+ The <a href="install.html">install documentation</a> shows the
supported
+-architectures. Other architectures will follow based on sufficient user
+-demand and/or sponsoring.</dd>
+-</dl>
+-
+-<dl>
+-<dt>Q: When will feature X be added? When will the next version be
released?</dt>
+-<dd>When it's ready.<br>
+-C'mon, it's open source — I'm doing it on my own time and
you're
+-getting it for free. You can either contribute a patch or sponsor
+-the development of certain features, if they are important to you.
+-</dd>
++architectures.<br>
++Other architectures may follow based on sufficient user demand and
++market-relevance of the architecture. Sponsoring is required to develop
++the port itself, to integrate it and to continuously maintain it in the
++actively developed branches.</dd>
+ </dl>
+ <br class="flush">
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/install.html b/doc/install.html
+index c491c601..e4af9dde 100644
+--- a/doc/install.html
++++ b/doc/install.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Installation</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -39,7 +38,7 @@ td.compatno {
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Installation</h1>
+@@ -48,7 +47,7 @@ td.compatno {
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a class="current" href="install.html">Installation</a>
+ </li><li>
+@@ -66,6 +65,8 @@ td.compatno {
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -74,17 +75,12 @@ td.compatno {
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -109,22 +105,22 @@ operating systems, CPUs and compilers:
+ <td class="compatcpu">CPU / OS</td>
+ <td class="compatos"><a href="#posix">Linux</a>
or<br><a href="#android">Android</a></td>
+ <td class="compatos"><a href="#posix">*BSD,
Other</a></td>
+-<td class="compatos"><a href="#posix">OSX
10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td>
+-<td class="compatos"><a
href="#windows">Windows<br>XP/Vista/7</a></td>
++<td class="compatos"><a href="#posix">macOS
10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td>
++<td class="compatos"><a href="#windows">Windows
7<br>or later</a></td>
+ </tr>
+ <tr class="odd separate">
+ <td class="compatcpu">x86 (32 bit)</td>
+ <td class="compatos">GCC 4.2+</td>
+ <td class="compatos">GCC 4.2+</td>
+ <td class="compatos">XCode 5.0+<br>Clang</td>
+-<td class="compatos">MSVC, MSVC/EE<br>WinSDK<br>MinGW,
Cygwin</td>
++<td class="compatos">MSVC<br>MinGW, Cygwin</td>
+ </tr>
+ <tr class="even">
+ <td class="compatcpu">x64 (64 bit)</td>
+ <td class="compatos">GCC 4.2+</td>
+ <td class="compatos">GCC 4.2+<br>ORBIS (<a
href="#ps4">PS4</a>)</td>
+ <td class="compatos">XCode 5.0+<br>Clang</td>
+-<td class="compatos">MSVC + SDK v7.0<br>WinSDK
v7.0<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
++<td class="compatos">MSVC<br>Durango (<a
href="#xboxone">Xbox One</a>)</td>
+ </tr>
+ <tr class="odd">
+ <td class="compatcpu"><a
href="#cross2">ARMv5+<br>ARM9E+</a></td>
+@@ -134,7 +130,7 @@ operating systems, CPUs and compilers:
+ <td class="compatos compatno"> </td>
+ </tr>
+ <tr class="even">
+-<td class="compatcpu"><a
href="#cross2">ARM64</a></td>
++<td class="compatcpu"><a
href="#cross2">ARM64<br>ARM64be</a></td>
+ <td class="compatos">GCC 4.8+</td>
+ <td class="compatos compatno"> </td>
+ <td class="compatos">XCode 6.0+<br>Clang 3.5+</td>
+@@ -148,7 +144,7 @@ operating systems, CPUs and compilers:
+ <td class="compatos">XEDK (<a href="#xbox360">Xbox
360</a>)</td>
+ </tr>
+ <tr class="even">
+-<td class="compatcpu"><a
href="#cross2">MIPS32<br>MIPS64</a></td>
++<td class="compatcpu"><a
href="#cross2">MIPS32<br>MIPS64<br>MIPS64r6</a></td>
+ <td class="compatos">GCC 4.3+</td>
+ <td class="compatos">GCC 4.3+</td>
+ <td class="compatos compatno"> </td>
+@@ -169,22 +165,21 @@ only).</li>
+ <li><tt>src/Makefile</tt> has settings for
<b>compiling</b> LuaJIT
+ under POSIX, MinGW or Cygwin.</li>
+ <li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
+-MSVC or WinSDK.</li>
++MSVC (Visual Studio).</li>
+ </ul>
+ <p>
+ Please read the instructions given in these files, before changing
+ any settings.
+ </p>
+ <p>
+-LuaJIT on x64 currently uses 32 bit GC objects by default.
+-<tt>LJ_GC64</tt> mode may be explicitly enabled:
+-add <tt>XCFLAGS=-DLUAJIT_ENABLE_GC64</tt> to the make command or run
+-<tt>msvcbuild gc64</tt> for MSVC/WinSDK. Please check the note
+-about the <a href="extensions.html#string_dump">bytecode
format</a>
+-differences, too.
++All LuaJIT 64 bit ports use 64 bit GC objects by default
(<tt>LJ_GC64</tt>).
++For x64, you can select the old 32-on-64 bit mode by adding
++<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command.
++Please check the note about the
++<a href="extensions.html#string_dump">bytecode format</a>
differences, too.
+ </p>
+
+-<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2>
++<h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2>
+ <h3>Prerequisites</h3>
+ <p>
+ Depending on your distribution, you may need to install a package for
+@@ -192,14 +187,19 @@ GCC, the development headers and/or a complete SDK. E.g. on a
current
+ Debian/Ubuntu, install <tt>libc6-dev</tt> with the package manager.
+ </p>
+ <p>
+-Download the current source package of LuaJIT (pick the .tar.gz),
+-if you haven't already done so. Move it to a directory of your choice,
+-open a terminal window and change to this directory. Now unpack the archive
+-and change to the newly created directory:
++The recommended way to fetch the latest version is to do a pull from
++the git repository.
++</p>
++<p>
++Alternatively download the latest source package of LuaJIT (pick the .tar.gz).
++Move it to a directory of your choice, open a terminal window and change
++to this directory. Now unpack the archive and change to the newly created
++directory (replace XX.YY.ZZ with the version you downloaded):
+ </p>
+ <pre class="code">
+-tar zxf LuaJIT-2.0.5.tar.gz
+-cd LuaJIT-2.0.5</pre>
++tar zxf LuaJIT-XX.YY.ZZ.tar.gz
++cd LuaJIT-XX.YY.ZZ
++</pre>
+ <h3>Building LuaJIT</h3>
+ <p>
+ The supplied Makefiles try to auto-detect the settings needed for your
+@@ -223,9 +223,12 @@ You can add an extra prefix to the search paths by appending the
+ make PREFIX=/home/myself/lj2
+ </pre>
+ <p>
+-Note for OSX: if the <tt>MACOSX_DEPLOYMENT_TARGET</tt> environment
+-variable is not set, then it's forced to <tt>10.4</tt>.
++Note for macOS: you <b>must</b> set the
<tt>MACOSX_DEPLOYMENT_TARGET</tt>
++environment variable to a value supported by your toolchain:
+ </p>
++<pre class="code">
++MACOSX_DEPLOYMENT_TARGET=XX.YY make
++</pre>
+ <h3>Installing LuaJIT</h3>
+ <p>
+ The top-level Makefile installs LuaJIT by default under
+@@ -252,27 +255,18 @@ Obviously the prefixes given during build and installation need to
be the same.
+ <p>
+ Either install one of the open source SDKs
+ (<a
href="http://mingw.org/"><span
class="ext">»</span> MinGW</a> or
+-<a
href="http://www.cygwin.com/"><span
class="ext">»</span> Cygwin</a>), which come
with a modified
++<a
href="https://www.cygwin.com/"><span
class="ext">»</span> Cygwin</a>), which come
with a modified
+ GCC plus the required development headers.
++Or install Microsoft's Visual Studio (MSVC).
+ </p>
+ <p>
+-Or install Microsoft's Visual C++ (MSVC). The freely downloadable
+-<a
href="http://www.microsoft.com/Express/VC/"><span
class="ext">»</span> Express Edition</a>
+-works just fine, but only contains an x86 compiler.
+-</p>
+-<p>
+-The freely downloadable
+-<a
href="http://msdn.microsoft.com/en-us/windowsserver/bb980924.aspx&qu...
class="ext">»</span> Windows SDK</a>
+-only comes with command line tools, but this is all you need to build LuaJIT.
+-It contains x86 and x64 compilers.
+-</p>
+-<p>
+-Next, download the source package and unpack it using an archive manager
+-(e.g. the Windows Explorer) to a directory of your choice.
++Next, pull from the git repository or download the source package and
++unpack it using an archive manager (e.g. the Windows Explorer) to
++a directory of your choice.
+ </p>
+ <h3>Building with MSVC</h3>
+ <p>
+-Open a "Visual Studio .NET Command Prompt", <tt>cd</tt> to the
++Open a "Visual Studio Command Prompt" (either x86 or x64),
<tt>cd</tt> to the
+ directory where you've unpacked the sources and run these commands:
+ </p>
+ <pre class="code">
+@@ -280,37 +274,14 @@ cd src
+ msvcbuild
+ </pre>
+ <p>
+-Then follow the installation instructions below.
+-</p>
+-<h3>Building with the Windows SDK</h3>
+-<p>
+-Open a "Windows SDK Command Shell" and select the x86 compiler:
+-</p>
+-<pre class="code">
+-setenv /release /x86
+-</pre>
+-<p>
+-Or select the x64 compiler:
+-</p>
+-<pre class="code">
+-setenv /release /x64
+-</pre>
+-<p>
+-Then <tt>cd</tt> to the directory where you've unpacked the sources
+-and run these commands:
+-</p>
+-<pre class="code">
+-cd src
+-msvcbuild
+-</pre>
+-<p>
++Check the <tt>msvcbuild.bat</tt> file for more options.
+ Then follow the installation instructions below.
+ </p>
+ <h3>Building with MinGW or Cygwin</h3>
+ <p>
+ Open a command prompt window and make sure the MinGW or Cygwin programs
+-are in your path. Then <tt>cd</tt> to the directory where
+-you've unpacked the sources and run this command for MinGW:
++are in your path. Then <tt>cd</tt> to the directory of the git repository
++or where you've unpacked the sources. Then run this command for MinGW:
+ </p>
+ <pre class="code">
+ mingw32-make
+@@ -365,7 +336,7 @@ You need to specify <tt>TARGET_SYS</tt> whenever the host
OS and the
+ target OS differ, or you'll get assembler or linker errors:
+ </p>
+ <ul>
+-<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or
Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples
below.</li>
++<li>E.g. if you're compiling on a Windows or macOS host for embedded Linux or
Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples
below.</li>
+ <li>For a minimal target OS, you may need to disable the built-in allocator in
<tt>src/Makefile</tt> and use
<tt>TARGET_SYS=Other</tt>.</li>
+ <li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the
install step, too.</li>
+ </ul>
+@@ -428,52 +399,31 @@ make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2
-mabi=64"
+ make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
+ </pre>
+ <p>
+-You can cross-compile for <b id="android">Android</b> using the
<a
href="https://developer.android.com/ndk/index.html">Android
NDK</a>.
+-The environment variables need to match the install locations and the
+-desired target platform. E.g. Android 4.0 corresponds to ABI
level 14.
+-For details check the folder <tt>docs</tt> in the NDK directory.
+-</p>
+-<p>
+-Only a few common variations for the different CPUs, ABIs and platforms
+-are listed. Please use your own judgement for which combination you want
+-to build/deploy or which lowest common denominator you want to pick:
++You can cross-compile for <b id="android">Android</b> using the
<a
href="https://developer.android.com/ndk/"><span
class="ext">»</span> Android NDK</a>.
++Please adapt the environment variables to match the install locations and the
++desired target platform. E.g. Android 4.1 corresponds to ABI
level 16.
+ </p>
+ <pre class="code">
+-# Android/ARM, armeabi (ARMv5TE soft-float), Android 2.2+ (Froyo)
+-NDK=/opt/android/ndk
+-NDKABI=8
+-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
+-NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
+-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
+-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
+-
+-# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.0+ (ICS)
+-NDK=/opt/android/ndk
+-NDKABI=14
+-NDKVER=$NDK/toolchains/arm-linux-androideabi-4.9
+-NDKP=$NDKVER/prebuilt/linux-x86/bin/arm-linux-androideabi-
+-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-arm"
+-NDKARCH="-march=armv7-a -mfloat-abi=softfp -Wl,--fix-cortex-a8"
+-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF $NDKARCH"
++# Android/ARM64, aarch64, Android 5.0+ (L)
++NDKDIR=/opt/android/ndk
++NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
++NDKCROSS=$NDKBIN/aarch64-linux-android-
++NDKCC=$NDKBIN/aarch64-linux-android21-clang
++make CROSS=$NDKCROSS \
++ STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
++ TARGET_LD=$NDKCC
+
+-# Android/MIPS, mipsel (MIPS32R1 hard-float), Android 4.0+ (ICS)
+-NDK=/opt/android/ndk
+-NDKABI=14
+-NDKVER=$NDK/toolchains/mipsel-linux-android-4.9
+-NDKP=$NDKVER/prebuilt/linux-x86/bin/mipsel-linux-android-
+-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-mips"
+-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
+-
+-# Android/x86, x86 (i686 SSE3), Android 4.0+ (ICS)
+-NDK=/opt/android/ndk
+-NDKABI=14
+-NDKVER=$NDK/toolchains/x86-4.9
+-NDKP=$NDKVER/prebuilt/linux-x86/bin/i686-linux-android-
+-NDKF="--sysroot $NDK/platforms/android-$NDKABI/arch-x86"
+-make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
++# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
++NDKDIR=/opt/android/ndk
++NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
++NDKCROSS=$NDKBIN/arm-linux-androideabi-
++NDKCC=$NDKBIN/armv7a-linux-androideabi16-clang
++make HOST_CC="gcc -m32" CROSS=$NDKCROSS \
++ STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
++ TARGET_LD=$NDKCC
+ </pre>
+ <p>
+-You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad)
using the <a
href="http://developer.apple.com/devcenter/ios/index.action">...
class="ext">»</span> iOS SDK</a>:
++You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad)
using the <a
href="https://developer.apple.com/ios/"><span
class="ext">»</span> iOS SDK</a>:
+ </p>
+ <p style="font-size: 8pt;">
+ Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
+@@ -483,13 +433,6 @@ much slower than the JIT compiler. Please complain to Apple, not
me.
+ Or use Android. :-p
+ </p>
+ <pre class="code">
+-# iOS/ARM (32 bit)
+-ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+-ICC=$(xcrun --sdk iphoneos --find clang)
+-ISDKF="-arch armv7 -isysroot $ISDKP"
+-make DEFAULT_CC=clang HOST_CC="clang -m32 -arch i386" \
+- CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
+-
+ # iOS/ARM64
+ ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ ICC=$(xcrun --sdk iphoneos --find clang)
+@@ -590,14 +533,6 @@ the DLL). You may link LuaJIT statically on Windows only if you
don't
+ intend to load Lua/C modules at runtime.
+ </li></ul>
+ </li>
+-<li>
+-If you're building a 64 bit application on OSX which links directly or
+-indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode,
+-you need to link your main executable with these flags:
+-<pre class="code">
+--pagezero_size 10000 -image_base 100000000
+-</pre>
+-</li>
+ </ul>
+ <p>Additional hints for initializing LuaJIT using the C API functions:</p>
+ <ul>
+@@ -606,7 +541,7 @@ you need to link your main executable with these flags:
+ for embedding Lua or LuaJIT into your application.</li>
+ <li>Make sure you use <tt>luaL_newstate</tt>. Avoid using
+ <tt>lua_newstate</tt>, since this uses the (slower) default memory
+-allocator from your system (no support for this on x64).</li>
++allocator from your system (no support for this on 64 bit
architectures).</li>
+ <li>Make sure you use <tt>luaL_openlibs</tt> and not the old Lua 5.0
style
+ of calling <tt>luaopen_base</tt> etc. directly.</li>
+ <li>To change or extend the list of standard libraries to load, copy
+@@ -615,7 +550,7 @@ Make sure the <tt>jit</tt> library is loaded or the JIT
compiler
+ will not be activated.</li>
+ <li>The <tt>bit.*</tt> module for bitwise operations
+ is already built-in. There's no need to statically link
+-<a
href="http://bitop.luajit.org/"><span
class="ext">»</span> Lua BitOp</a> to your
application.</li>
++<a
href="https://bitop.luajit.org/"><span
class="ext">»</span> Lua BitOp</a> to your
application.</li>
+ </ul>
+
+ <h2 id="distro">Hints for Distribution Maintainers</h2>
+@@ -682,7 +617,7 @@ to me (the upstream) and not you (the package maintainer), anyway.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/luajit.html b/doc/luajit.html
+index ef5b824c..a25267a6 100644
+--- a/doc/luajit.html
++++ b/doc/luajit.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>LuaJIT</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -96,7 +95,7 @@ table.feature small {
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>LuaJIT</h1>
+@@ -105,7 +104,7 @@ table.feature small {
+ <ul><li>
+ <a class="current" href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -123,6 +122,8 @@ table.feature small {
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -131,36 +132,31 @@ table.feature small {
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+ <p>
+ LuaJIT is a <b>Just-In-Time Compiler</b> (JIT) for the
+-<a
href="http://www.lua.org/"><span
class="ext">»</span> Lua</a> programming
language.
++<a
href="https://www.lua.org/"><span
class="ext">»</span> Lua</a> programming
language.
+ Lua is a powerful, dynamic and light-weight programming language.
+ It may be embedded or used as a general-purpose, stand-alone language.
+ </p>
+ <p>
+-LuaJIT is Copyright © 2005-2017 Mike Pall, released under the
+-<a
href="http://www.opensource.org/licenses/mit-license.php">&l...
class="ext">»</span> MIT open source
license</a>.
++LuaJIT is Copyright © 2005-2021 Mike Pall, released under the
++<a
href="https://www.opensource.org/licenses/mit-license.php">&...
class="ext">»</span> MIT open source
license</a>.
+ </p>
+ <p>
+ </p>
+
+ <h2>Compatibility</h2>
+ <table class="feature os os1">
+-<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>OSX</td><td>POSIX</td></tr>
++<tr><td>Windows</td><td>Linux</td><td>BSD</td><td>macOS</td><td>POSIX</td></tr>
+ </table>
+ <table class="feature os os2">
+ <tr><td><span
style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
+@@ -194,7 +190,7 @@ LuaJIT has been successfully used as a <b>scripting
middleware</b> in
+ games, appliances, network and graphics apps, numerical simulations,
+ trading platforms and many other specialty applications. It scales from
+ embedded devices, smartphones, desktops up to server farms. It combines
+-high flexibility with <a
href="http://luajit.org/performance.html"><span
class="ext">»</span> high performance</a>
++high flexibility with high performance
+ and an unmatched <b>low memory footprint</b>.
+ </p>
+ <p>
+@@ -226,7 +222,7 @@ Please select a sub-topic in the navigation bar to learn more about
LuaJIT.
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/running.html b/doc/running.html
+index 666b0abc..b55b8439 100644
+--- a/doc/running.html
++++ b/doc/running.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Running LuaJIT</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -33,7 +32,7 @@ td.param_default {
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Running LuaJIT</h1>
+@@ -42,7 +41,7 @@ td.param_default {
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -60,6 +59,8 @@ td.param_default {
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -68,17 +69,12 @@ td.param_default {
+ </li></ul>
+ </li><li>
+ <a href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+@@ -95,7 +91,7 @@ The <tt>luajit</tt> stand-alone executable is just a
slightly modified
+ version of the regular <tt>lua</tt> stand-alone executable.
+ It supports the same basic options, too. <tt>luajit -h</tt>
+ prints a short list of the available options. Please have a look at the
+-<a
href="http://www.lua.org/manual/5.1/manual.html#6"><span
class="ext">»</span> Lua manual</a>
++<a
href="https://www.lua.org/manual/5.1/manual.html#6"><span
class="ext">»</span> Lua manual</a>
+ for details.
+ </p>
+ <p>
+@@ -189,8 +185,8 @@ itself. For a description of their options and output format, please
+ read the comment block at the start of their source.
+ They can be found in the <tt>lib</tt> directory of the source
+ distribution or installed under the <tt>jit</tt> directory. By default
+-this is <tt>/usr/local/share/luajit-2.0.5/jit</tt> on POSIX
+-systems.
++this is <tt>/usr/local/share/luajit-XX.YY.ZZ>/jit</tt> on POSIX
++systems (replace XX.YY.ZZ by the installed version).
+ </p>
+
+ <h3 id="opt_O"><tt>-O[level]</tt><br>
+@@ -299,7 +295,7 @@ Here are the parameters and their default settings:
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/doc/status.html b/doc/status.html
+index cad6ca65..1d3ba984 100644
+--- a/doc/status.html
++++ b/doc/status.html
+@@ -1,10 +1,9 @@
+-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>Status</title>
+-<meta http-equiv="Content-Type" content="text/html;
charset=iso-8859-1">
+-<meta name="Author" content="Mike Pall">
+-<meta name="Copyright" content="Copyright (C) 2005-2017, Mike
Pall">
++<meta charset="utf-8">
++<meta name="Copyright" content="Copyright (C) 2005-2021">
+ <meta name="Language" content="en">
+ <link rel="stylesheet" type="text/css"
href="bluequad.css" media="screen">
+ <link rel="stylesheet" type="text/css"
href="bluequad-print.css" media="print">
+@@ -14,7 +13,7 @@ ul li { padding-bottom: 0.3em; }
+ </head>
+ <body>
+ <div id="site">
+-<a href="http://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
++<a href="https://luajit.org"><span>Lua<span
id="logo">JIT</span></span></a>
+ </div>
+ <div id="head">
+ <h1>Status</h1>
+@@ -23,7 +22,7 @@ ul li { padding-bottom: 0.3em; }
+ <ul><li>
+ <a href="luajit.html">LuaJIT</a>
+ <ul><li>
+-<a
href="http://luajit.org/download.html">Download <span
class="ext">»</span></a>
++<a
href="https://luajit.org/download.html">Download <span
class="ext">»</span></a>
+ </li><li>
+ <a href="install.html">Installation</a>
+ </li><li>
+@@ -41,6 +40,8 @@ ul li { padding-bottom: 0.3em; }
+ <a href="ext_ffi_semantics.html">FFI Semantics</a>
+ </li></ul>
+ </li><li>
++<a href="ext_buffer.html">String Buffers</a>
++</li><li>
+ <a href="ext_jit.html">jit.* Library</a>
+ </li><li>
+ <a href="ext_c_api.html">Lua/C API</a>
+@@ -49,24 +50,25 @@ ul li { padding-bottom: 0.3em; }
+ </li></ul>
+ </li><li>
+ <a class="current" href="status.html">Status</a>
+-<ul><li>
+-<a href="changes.html">Changes</a>
+-</li></ul>
+ </li><li>
+ <a href="faq.html">FAQ</a>
+ </li><li>
+-<a
href="http://luajit.org/performance.html">Performance <span
class="ext">»</span></a>
+-</li><li>
+ <a
href="http://wiki.luajit.org/">Wiki <span
class="ext">»</span></a>
+ </li><li>
+-<a
href="http://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
++<a
href="https://luajit.org/list.html">Mailing List <span
class="ext">»</span></a>
+ </li></ul>
+ </div>
+ <div id="main">
+ <p>
+-<span style="color: #0000c0;">LuaJIT 2.0</span> is the
current
+-<span style="color: #0000c0;">stable branch</span>. This branch is
in
+-feature-freeze — new features will only be added to LuaJIT 2.1.
++This documentation is for LuaJIT 2.1.0-beta3. Please check the <tt>doc</tt>
++directory in each git branch for the version-specific documentation.
++</p>
++<p>
++The currently developed branches are LuaJIT 2.1 and LuaJIT 2.0.
++</p>
++<p>
++LuaJIT 2.0 is in feature-freeze — new features will only
++be added to LuaJIT 2.1.
+ </p>
+
+ <h2>Current Status</h2>
+@@ -90,30 +92,12 @@ The Lua <b>debug API</b> is missing a couple of features
(return
+ hooks for non-Lua functions) and shows slightly different behavior
+ in LuaJIT (no per-coroutine hooks, no tail call counting).
+ </li>
+-<li>
+-Currently some <b>out-of-memory</b> errors from <b>on-trace
code</b> are not
+-handled correctly. The error may fall through an on-trace
+-<tt>pcall</tt> or it may be passed on to the function set with
+-<tt>lua_atpanic</tt> on x64. This issue will be fixed with the new
+-garbage collector.
+-</li>
+-<li>
+-LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the
+-<b>legacy <tt>lightuserdata</tt></b> data type.
+-This is only relevant on x64 systems which use the negative part of the
+-virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems
+-configured with a 48 bit or 52 bit VA.
+-Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside
+-of that range, e.g. variables on the stack. In general, avoid this data
+-type for new code and replace it with (much more performant) FFI bindings.
+-FFI cdata pointers can address the full 64 bit range.
+-</li>
+ </ul>
+ <br class="flush">
+ </div>
+ <div id="foot">
+ <hr class="hide">
+-Copyright © 2005-2017 Mike Pall
++Copyright © 2005-2021
+ <span class="noprint">
+ ·
+ <a href="contact.html">Contact</a>
+diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
+index a43f7c66..ebcf4ac0 100644
+--- a/dynasm/dasm_arm.h
++++ b/dynasm/dasm_arm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM ARM encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -254,6 +254,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ case DASM_IMMV8:
+ CK((n & 3) == 0, RANGE_I);
+ n >>= 2;
++ /* fallthrough */
+ case DASM_IMML8:
+ case DASM_IMML12:
+ CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :
+@@ -293,7 +294,7 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+@@ -370,7 +371,11 @@ int dasm_encode(Dst_DECL, void *buffer)
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+ break;
+ case DASM_REL_LG:
+- CK(n >= 0, UNDEF_LG);
++ if (n < 0) {
++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4);
++ goto patchrel;
++ }
++ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4;
+diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua
+index 32f595af..0c775ae2 100644
+--- a/dynasm/dasm_arm.lua
++++ b/dynasm/dasm_arm.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM ARM module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+
+@@ -9,9 +9,9 @@
+ local _info = {
+ arch = "arm",
+ description = "DynASM ARM module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
+index 47e1e074..d6da4528 100644
+--- a/dynasm/dasm_arm64.h
++++ b/dynasm/dasm_arm64.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM ARM64 encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -21,8 +21,9 @@ enum {
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+- DASM_REL_PC, DASM_LABEL_PC,
++ DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A,
+ DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
++ DASM_IMMV, DASM_VREG,
+ DASM__MAX
+ };
+
+@@ -39,6 +40,7 @@ enum {
+ #define DASM_S_RANGE_LG 0x13000000
+ #define DASM_S_RANGE_PC 0x14000000
+ #define DASM_S_RANGE_REL 0x15000000
++#define DASM_S_RANGE_VREG 0x16000000
+ #define DASM_S_UNDEF_LG 0x21000000
+ #define DASM_S_UNDEF_PC 0x22000000
+
+@@ -247,7 +249,7 @@ void dasm_put(Dst_DECL, int start, ...)
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n]; goto stop;
+ case DASM_ESC: p++; ofs += 4; break;
+- case DASM_REL_EXT: break;
++ case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break;
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+ case DASM_REL_LG:
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
+@@ -268,6 +270,11 @@ void dasm_put(Dst_DECL, int start, ...)
+ *pl = pos;
+ }
+ pos++;
++ if ((ins & 0x8000)) ofs += 8;
++ break;
++ case DASM_REL_A:
++ b[pos++] = n;
++ b[pos++] = va_arg(ap, int);
+ break;
+ case DASM_LABEL_LG:
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+@@ -312,13 +319,21 @@ void dasm_put(Dst_DECL, int start, ...)
+ }
+ case DASM_IMML: {
+ #ifdef DASM_CHECKS
+- int scale = (p[-2] >> 30);
++ int scale = (ins & 3);
+ CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) <
4096) ||
+ (unsigned int)(n+256) < 512, RANGE_I);
+ #endif
+ b[pos++] = n;
+ break;
+ }
++ case DASM_IMMV:
++ ofs += 4;
++ b[pos++] = n;
++ break;
++ case DASM_VREG:
++ CK(n < 32, RANGE_VREG);
++ b[pos++] = n;
++ break;
+ }
+ }
+ }
+@@ -348,7 +363,7 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+@@ -375,8 +390,8 @@ int dasm_link(Dst_DECL, size_t *szp)
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+ case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
+- case DASM_IMML: pos++; break;
+- case DASM_IMM13X: pos += 2; break;
++ case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break;
++ case DASM_IMM13X: case DASM_REL_A: pos += 2; break;
+ }
+ }
+ stop: (void)0;
+@@ -426,7 +441,13 @@ int dasm_encode(Dst_DECL, void *buffer)
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+ break;
+ case DASM_REL_LG:
+- CK(n >= 0, UNDEF_LG);
++ if (n < 0) {
++ ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4;
++ n = (int)na;
++ CK((ptrdiff_t)n == na, RANGE_REL);
++ goto patchrel;
++ }
++ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+@@ -445,8 +466,24 @@ int dasm_encode(Dst_DECL, void *buffer)
+ } else if ((ins & 0x1000)) { /* TBZ, TBNZ */
+ CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
+ cp[-1] |= ((n << 3) & 0x0007ffe0);
++ } else if ((ins & 0x8000)) { /* absolute */
++ cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n);
++ cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32);
++ cp += 2;
+ }
+ break;
++ case DASM_REL_A: {
++ ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n);
++ if ((ins & 0x3000) == 0x3000) { /* ADRP */
++ ins &= ~0x1000;
++ na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12);
++ } else {
++ na = na - (ptrdiff_t)cp + 4;
++ }
++ n = (int)na;
++ CK((ptrdiff_t)n == na, RANGE_REL);
++ goto patchrel;
++ }
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+ break;
+@@ -467,11 +504,17 @@ int dasm_encode(Dst_DECL, void *buffer)
+ cp[-1] |= (dasm_imm13(n, *b++) << 10);
+ break;
+ case DASM_IMML: {
+- int scale = (p[-2] >> 30);
++ int scale = (ins & 3);
+ cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale)
< 4096) ?
+ ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
+ break;
+ }
++ case DASM_IMMV:
++ *cp++ = n;
++ break;
++ case DASM_VREG:
++ cp[-1] |= (n & 0x1f) << (ins & 0x1f);
++ break;
+ default: *cp++ = ins; break;
+ }
+ }
+diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua
+index 8a5f735d..cb82dc4a 100644
+--- a/dynasm/dasm_arm64.lua
++++ b/dynasm/dasm_arm64.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM ARM64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+
+@@ -9,9 +9,9 @@
+ local _info = {
+ arch = "arm",
+ description = "DynASM ARM64 module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+@@ -23,12 +23,12 @@ local _M = { _info = _info }
+ local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+ local assert, setmetatable, rawget = assert, setmetatable, rawget
+ local _s = string
+-local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
++local format, byte, char = _s.format, _s.byte, _s.char
+ local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+ local concat, sort, insert = table.concat, table.sort, table.insert
+ local bit = bit or require("bit")
+ local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+-local ror, tohex = bit.ror, bit.tohex
++local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit
+
+ -- Inherited tables and callbacks.
+ local g_opt, g_arch
+@@ -39,7 +39,9 @@ local wline, werror, wfatal, wwarn
+ local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+- "REL_PC", "LABEL_PC", "IMM", "IMM6",
"IMM12", "IMM13W", "IMM13X", "IMML",
++ "REL_PC", "LABEL_PC", "REL_A",
++ "IMM", "IMM6", "IMM12", "IMM13W",
"IMM13X", "IMML", "IMMV",
++ "VREG",
+ }
+
+ -- Maximum number of section buffer positions for dasm_put().
+@@ -246,9 +248,12 @@ local map_cond = {
+
+ local parse_reg_type
+
+-local function parse_reg(expr)
++local function parse_reg(expr, shift)
+ if not expr then werror("expected register name") end
+ local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
++ if not tname then
++ tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$")
++ end
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+@@ -266,18 +271,28 @@ local function parse_reg(expr)
+ elseif parse_reg_type ~= rt then
+ werror("register size mismatch")
+ end
+- return r, tp
++ return shl(r, shift), tp
++ end
++ end
++ local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$")
++ if vreg then
++ if not parse_reg_type then
++ parse_reg_type = vrt
++ elseif parse_reg_type ~= vrt then
++ werror("register size mismatch")
+ end
++ if shift then waction("VREG", shift, vreg) end
++ return 0
+ end
+ werror("bad register name `"..expr.."'")
+ end
+
+ local function parse_reg_base(expr)
+ if expr == "sp" then return 0x3e0 end
+- local base, tp = parse_reg(expr)
++ local base, tp = parse_reg(expr, 5)
+ if parse_reg_type ~= "x" then werror("bad register type") end
+ parse_reg_type = false
+- return shl(base, 5), tp
++ return base, tp
+ end
+
+ local parse_ctx = {}
+@@ -297,7 +312,7 @@ local function parse_number(n)
+ local code = loadenv("return "..n)
+ if code then
+ local ok, y = pcall(code)
+- if ok then return y end
++ if ok and type(y) == "number" then return y end
+ end
+ return nil
+ end
+@@ -403,7 +418,7 @@ local function parse_imm_load(imm, scale)
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+- waction("IMML", 0, imm)
++ waction("IMML", scale, imm)
+ return 0
+ end
+ end
+@@ -462,6 +477,7 @@ end
+
+ local function parse_load(params, nparams, n, op)
+ if params[n+2] then werror("too many operands") end
++ local scale = shr(op, 30)
+ local pn, p2 = params[n], params[n+1]
+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+ if not p1 then
+@@ -470,14 +486,13 @@ local function parse_load(params, nparams, n, op)
+ if reg and tailr ~= "" then
+ local base, tp = parse_reg_base(reg)
+ if tp then
+- waction("IMML", 0, format(tp.ctypefmt, tailr))
++ waction("IMML", scale, format(tp.ctypefmt, tailr))
+ return op + base
+ end
+ end
+ end
+ werror("expected address operand")
+ end
+- local scale = shr(op, 30)
+ if p2 then
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+@@ -494,7 +509,7 @@ local function parse_load(params, nparams, n, op)
+ op = op + parse_imm_load(imm, scale)
+ else
+ local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+- op = op + shl(parse_reg(p2b), 16) + 0x00200800
++ op = op + parse_reg(p2b, 16) + 0x00200800
+ if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+ werror("bad index register type")
+ end
+@@ -561,14 +576,14 @@ local function parse_load_pair(params, nparams, n, op)
+ end
+
+ local function parse_label(label, def)
+- local prefix = sub(label, 1, 2)
++ local prefix = label:sub(1, 2)
+ -- =>label (pc label reference)
+ if prefix == "=>" then
+- return "PC", 0, sub(label, 3)
++ return "PC", 0, label:sub(3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+- return "LG", map_global[sub(label, 3)]
++ return "LG", map_global[label:sub(3)]
+ end
+ if def then
+ -- [1-9] (local label definition)
+@@ -586,8 +601,11 @@ local function parse_label(label, def)
+ if extname then
+ return "EXT", map_extern[extname]
+ end
++ -- &expr (pointer)
++ if label:sub(1, 1) == "&" then
++ return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2))
++ end
+ end
+- werror("bad label `"..label.."'")
+ end
+
+ local function branch_type(op)
+@@ -620,24 +638,24 @@ local function alias_bfx(p)
+ end
+
+ local function alias_bfiz(p)
+- parse_reg(p[1])
++ parse_reg(p[1], 0)
+ if parse_reg_type == "w" then
+- p[3] = "#-("..p[3]:sub(2)..")%32"
++ p[3] = "#(32-("..p[3]:sub(2).."))%32"
+ p[4] = "#("..p[4]:sub(2)..")-1"
+ else
+- p[3] = "#-("..p[3]:sub(2)..")%64"
++ p[3] = "#(64-("..p[3]:sub(2).."))%64"
+ p[4] = "#("..p[4]:sub(2)..")-1"
+ end
+ end
+
+ local alias_lslimm = op_alias("ubfm_4", function(p)
+- parse_reg(p[1])
++ parse_reg(p[1], 0)
+ local sh = p[3]:sub(2)
+ if parse_reg_type == "w" then
+- p[3] = "#-("..sh..")%32"
++ p[3] = "#(32-("..sh.."))%32"
+ p[4] = "#31-("..sh..")"
+ else
+- p[3] = "#-("..sh..")%64"
++ p[3] = "#(64-("..sh.."))%64"
+ p[4] = "#63-("..sh..")"
+ end
+ end)
+@@ -881,25 +899,25 @@ end
+
+ -- Handle opcodes defined with template strings.
+ local function parse_template(params, template, nparams, pos)
+- local op = tonumber(sub(template, 1, 8), 16)
++ local op = tonumber(template:sub(1, 8), 16)
+ local n = 1
+ local rtt = {}
+
+ parse_reg_type = false
+
+ -- Process each character.
+- for p in gmatch(sub(template, 9), ".") do
++ for p in gmatch(template:sub(9), ".") do
+ local q = params[n]
+ if p == "D" then
+- op = op + parse_reg(q); n = n + 1
++ op = op + parse_reg(q, 0); n = n + 1
+ elseif p == "N" then
+- op = op + shl(parse_reg(q), 5); n = n + 1
++ op = op + parse_reg(q, 5); n = n + 1
+ elseif p == "M" then
+- op = op + shl(parse_reg(q), 16); n = n + 1
++ op = op + parse_reg(q, 16); n = n + 1
+ elseif p == "A" then
+- op = op + shl(parse_reg(q), 10); n = n + 1
++ op = op + parse_reg(q, 10); n = n + 1
+ elseif p == "m" then
+- op = op + shl(parse_reg(params[n-1]), 16)
++ op = op + parse_reg(params[n-1], 16)
+
+ elseif p == "p" then
+ if q == "sp" then params[n] = "@x31" end
+@@ -930,8 +948,14 @@ local function parse_template(params, template, nparams, pos)
+
+ elseif p == "B" then
+ local mode, v, s = parse_label(q, false); n = n + 1
++ if not mode then werror("bad label `"..q.."'") end
+ local m = branch_type(op)
+- waction("REL_"..mode, v+m, s, 1)
++ if mode == "A" then
++ waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s))
++ actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s)
++ else
++ waction("REL_"..mode, v+m, s, 1)
++ end
+
+ elseif p == "I" then
+ op = op + parse_imm12(q); n = n + 1
+@@ -977,8 +1001,8 @@ function op_template(params, template, nparams)
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "")
end
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+- -- A single opcode needs a maximum of 3 positions.
+- if secpos+3 > maxsecpos then wflush() end
++ -- A single opcode needs a maximum of 4 positions.
++ if secpos+4 > maxsecpos then wflush() end
+ local pos = wpos()
+ local lpos, apos, spos = #actlist, #actargs, secpos
+
+@@ -990,9 +1014,11 @@ function op_template(params, template, nparams)
+ actlist[lpos+1] = nil
+ actlist[lpos+2] = nil
+ actlist[lpos+3] = nil
++ actlist[lpos+4] = nil
+ actargs[apos+1] = nil
+ actargs[apos+2] = nil
+ actargs[apos+3] = nil
++ actargs[apos+4] = nil
+ end
+ error(err, 0)
+ end
+@@ -1036,23 +1062,50 @@ map_op[".label_1"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr" end
+ if secpos+1 > maxsecpos then wflush() end
+ local mode, n, s = parse_label(params[1], true)
+- if mode == "EXT" then werror("bad label definition") end
++ if not mode or mode == "EXT" then werror("bad label definition")
end
+ waction("LABEL_"..mode, n, s, 1)
+ end
+
+ ------------------------------------------------------------------------------
+
+ -- Pseudo-opcodes for data storage.
+-map_op[".long_*"] = function(params)
++local function op_data(params)
+ if not params then return "imm..." end
++ local sz = params.op == ".long" and 4 or 8
+ for _,p in ipairs(params) do
+- local n = tonumber(p)
+- if not n then werror("bad immediate `"..p.."'") end
+- if n < 0 then n = n + 2^32 end
+- wputw(n)
++ local imm = parse_number(p)
++ if imm then
++ local n = tobit(imm)
++ if n == imm or (n < 0 and n + 2^32 == imm) then
++ wputw(n < 0 and n + 2^32 or n)
++ if sz == 8 then
++ wputw(imm < 0 and 0xffffffff or 0)
++ end
++ elseif sz == 4 then
++ werror("bad immediate `"..p.."'")
++ else
++ imm = nil
++ end
++ end
++ if not imm then
++ local mode, v, s = parse_label(p, false)
++ if sz == 4 then
++ if mode then werror("label does not fit into .long") end
++ waction("IMMV", 0, p)
++ elseif mode and mode ~= "A" then
++ waction("REL_"..mode, v+0x8000, s, 1)
++ else
++ if mode == "A" then p = s end
++ waction("IMMV", 0, format("(unsigned int)(%s)", p))
++ waction("IMMV", 0, format("(unsigned int)((unsigned long
long)(%s)>>32)", p))
++ end
++ end
+ if secpos+2 > maxsecpos then wflush() end
+ end
+ end
++map_op[".long_*"] = op_data
++map_op[".quad_*"] = op_data
++map_op[".addr_*"] = op_data
+
+ -- Alignment pseudo-opcode.
+ map_op[".align_1"] = function(params)
+diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
+index 4b49fd8c..b99b56b0 100644
+--- a/dynasm/dasm_mips.h
++++ b/dynasm/dasm_mips.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM MIPS encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -273,7 +273,7 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+@@ -349,19 +349,24 @@ int dasm_encode(Dst_DECL, void *buffer)
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
+ break;
+ case DASM_REL_LG:
+- CK(n >= 0, UNDEF_LG);
++ if (n < 0) {
++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
++ goto patchrel;
++ }
++ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n);
+ if (ins & 2048)
+- n = n - (int)((char *)cp - base);
+- else
+ n = (n + (int)(size_t)base) & 0x0fffffff;
+- patchrel:
++ else
++ n = n - (int)((char *)cp - base);
++ patchrel: {
++ unsigned int e = 16 + ((ins >> 12) & 15);
+ CK((n & 3) == 0 &&
+- ((n + ((ins & 2048) ? 0x00020000 : 0)) >>
+- ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL);
+- cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff));
++ ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0,
RANGE_REL);
++ cp[-1] |= ((n>>2) & ((1<<e)-1));
++ }
+ break;
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
+index 78a4e34a..59147015 100644
+--- a/dynasm/dasm_mips.lua
++++ b/dynasm/dasm_mips.lua
+@@ -1,19 +1,20 @@
+ ------------------------------------------------------------------------------
+ -- DynASM MIPS32/MIPS64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+
+ local mips64 = mips64
++local mipsr6 = _map_def.MIPSR6
+
+ -- Module information:
+ local _info = {
+ arch = mips64 and "mips64" or "mips",
+ description = "DynASM MIPS32/MIPS64 module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2016-05-24",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+@@ -238,7 +239,6 @@ local map_op = {
+ bne_3 = "14000000STB",
+ blez_2 = "18000000SB",
+ bgtz_2 = "1c000000SB",
+- addi_3 = "20000000TSI",
+ li_2 = "24000000TI",
+ addiu_3 = "24000000TSI",
+ slti_3 = "28000000TSI",
+@@ -248,40 +248,22 @@ local map_op = {
+ ori_3 = "34000000TSU",
+ xori_3 = "38000000TSU",
+ lui_2 = "3c000000TU",
+- beqzl_2 = "50000000SB",
+- beql_3 = "50000000STB",
+- bnezl_2 = "54000000SB",
+- bnel_3 = "54000000STB",
+- blezl_2 = "58000000SB",
+- bgtzl_2 = "5c000000SB",
+- daddi_3 = mips64 and "60000000TSI",
+ daddiu_3 = mips64 and "64000000TSI",
+ ldl_2 = mips64 and "68000000TO",
+ ldr_2 = mips64 and "6c000000TO",
+ lb_2 = "80000000TO",
+ lh_2 = "84000000TO",
+- lwl_2 = "88000000TO",
+ lw_2 = "8c000000TO",
+ lbu_2 = "90000000TO",
+ lhu_2 = "94000000TO",
+- lwr_2 = "98000000TO",
+ lwu_2 = mips64 and "9c000000TO",
+ sb_2 = "a0000000TO",
+ sh_2 = "a4000000TO",
+- swl_2 = "a8000000TO",
+ sw_2 = "ac000000TO",
+- sdl_2 = mips64 and "b0000000TO",
+- sdr_2 = mips64 and "b1000000TO",
+- swr_2 = "b8000000TO",
+- cache_2 = "bc000000NO",
+- ll_2 = "c0000000TO",
+ lwc1_2 = "c4000000HO",
+- pref_2 = "cc000000NO",
+ ldc1_2 = "d4000000HO",
+ ld_2 = mips64 and "dc000000TO",
+- sc_2 = "e0000000TO",
+ swc1_2 = "e4000000HO",
+- scd_2 = mips64 and "f0000000TO",
+ sdc1_2 = "f4000000HO",
+ sd_2 = mips64 and "fc000000TO",
+
+@@ -289,10 +271,6 @@ local map_op = {
+ nop_0 = "00000000",
+ sll_3 = "00000000DTA",
+ sextw_2 = "00000000DT",
+- movf_2 = "00000001DS",
+- movf_3 = "00000001DSC",
+- movt_2 = "00010001DS",
+- movt_3 = "00010001DSC",
+ srl_3 = "00000002DTA",
+ rotr_3 = "00200002DTA",
+ sra_3 = "00000003DTA",
+@@ -301,31 +279,16 @@ local map_op = {
+ rotrv_3 = "00000046DTS",
+ drotrv_3 = mips64 and "00000056DTS",
+ srav_3 = "00000007DTS",
+- jr_1 = "00000008S",
+ jalr_1 = "0000f809S",
+ jalr_2 = "00000009DS",
+- movz_3 = "0000000aDST",
+- movn_3 = "0000000bDST",
+ syscall_0 = "0000000c",
+ syscall_1 = "0000000cY",
+ break_0 = "0000000d",
+ break_1 = "0000000dY",
+ sync_0 = "0000000f",
+- mfhi_1 = "00000010D",
+- mthi_1 = "00000011S",
+- mflo_1 = "00000012D",
+- mtlo_1 = "00000013S",
+ dsllv_3 = mips64 and "00000014DTS",
+ dsrlv_3 = mips64 and "00000016DTS",
+ dsrav_3 = mips64 and "00000017DTS",
+- mult_2 = "00000018ST",
+- multu_2 = "00000019ST",
+- div_2 = "0000001aST",
+- divu_2 = "0000001bST",
+- dmult_2 = mips64 and "0000001cST",
+- dmultu_2 = mips64 and "0000001dST",
+- ddiv_2 = mips64 and "0000001eST",
+- ddivu_2 = mips64 and "0000001fST",
+ add_3 = "00000020DST",
+ move_2 = mips64 and "00000025DS" or "00000021DS",
+ addu_3 = "00000021DST",
+@@ -369,32 +332,9 @@ local map_op = {
+ bgez_2 = "04010000SB",
+ bltzl_2 = "04020000SB",
+ bgezl_2 = "04030000SB",
+- tgei_2 = "04080000SI",
+- tgeiu_2 = "04090000SI",
+- tlti_2 = "040a0000SI",
+- tltiu_2 = "040b0000SI",
+- teqi_2 = "040c0000SI",
+- tnei_2 = "040e0000SI",
+- bltzal_2 = "04100000SB",
+ bal_1 = "04110000B",
+- bgezal_2 = "04110000SB",
+- bltzall_2 = "04120000SB",
+- bgezall_2 = "04130000SB",
+ synci_1 = "041f0000O",
+
+- -- Opcode SPECIAL2.
+- madd_2 = "70000000ST",
+- maddu_2 = "70000001ST",
+- mul_3 = "70000002DST",
+- msub_2 = "70000004ST",
+- msubu_2 = "70000005ST",
+- clz_2 = "70000020DS=",
+- clo_2 = "70000021DS=",
+- dclz_2 = mips64 and "70000024DS=",
+- dclo_2 = mips64 and "70000025DS=",
+- sdbbp_0 = "7000003f",
+- sdbbp_1 = "7000003fY",
+-
+ -- Opcode SPECIAL3.
+ ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
+ dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
+@@ -445,15 +385,6 @@ local map_op = {
+ ctc1_2 = "44c00000TG",
+ mthc1_2 = "44e00000TG",
+
+- bc1f_1 = "45000000B",
+- bc1f_2 = "45000000CB",
+- bc1t_1 = "45010000B",
+- bc1t_2 = "45010000CB",
+- bc1fl_1 = "45020000B",
+- bc1fl_2 = "45020000CB",
+- bc1tl_1 = "45030000B",
+- bc1tl_2 = "45030000CB",
+-
+ ["add.s_3"] = "46000000FGH",
+ ["sub.s_3"] = "46000001FGH",
+ ["mul.s_3"] = "46000002FGH",
+@@ -470,51 +401,11 @@ local map_op = {
+ ["trunc.w.s_2"] = "4600000dFG",
+ ["ceil.w.s_2"] = "4600000eFG",
+ ["floor.w.s_2"] = "4600000fFG",
+- ["movf.s_2"] = "46000011FG",
+- ["movf.s_3"] = "46000011FGC",
+- ["movt.s_2"] = "46010011FG",
+- ["movt.s_3"] = "46010011FGC",
+- ["movz.s_3"] = "46000012FGT",
+- ["movn.s_3"] = "46000013FGT",
+ ["recip.s_2"] = "46000015FG",
+ ["rsqrt.s_2"] = "46000016FG",
+ ["cvt.d.s_2"] = "46000021FG",
+ ["cvt.w.s_2"] = "46000024FG",
+ ["cvt.l.s_2"] = "46000025FG",
+- ["cvt.ps.s_3"] = "46000026FGH",
+- ["c.f.s_2"] = "46000030GH",
+- ["c.f.s_3"] = "46000030VGH",
+- ["c.un.s_2"] = "46000031GH",
+- ["c.un.s_3"] = "46000031VGH",
+- ["c.eq.s_2"] = "46000032GH",
+- ["c.eq.s_3"] = "46000032VGH",
+- ["c.ueq.s_2"] = "46000033GH",
+- ["c.ueq.s_3"] = "46000033VGH",
+- ["c.olt.s_2"] = "46000034GH",
+- ["c.olt.s_3"] = "46000034VGH",
+- ["c.ult.s_2"] = "46000035GH",
+- ["c.ult.s_3"] = "46000035VGH",
+- ["c.ole.s_2"] = "46000036GH",
+- ["c.ole.s_3"] = "46000036VGH",
+- ["c.ule.s_2"] = "46000037GH",
+- ["c.ule.s_3"] = "46000037VGH",
+- ["c.sf.s_2"] = "46000038GH",
+- ["c.sf.s_3"] = "46000038VGH",
+- ["c.ngle.s_2"] = "46000039GH",
+- ["c.ngle.s_3"] = "46000039VGH",
+- ["c.seq.s_2"] = "4600003aGH",
+- ["c.seq.s_3"] = "4600003aVGH",
+- ["c.ngl.s_2"] = "4600003bGH",
+- ["c.ngl.s_3"] = "4600003bVGH",
+- ["c.lt.s_2"] = "4600003cGH",
+- ["c.lt.s_3"] = "4600003cVGH",
+- ["c.nge.s_2"] = "4600003dGH",
+- ["c.nge.s_3"] = "4600003dVGH",
+- ["c.le.s_2"] = "4600003eGH",
+- ["c.le.s_3"] = "4600003eVGH",
+- ["c.ngt.s_2"] = "4600003fGH",
+- ["c.ngt.s_3"] = "4600003fVGH",
+-
+ ["add.d_3"] = "46200000FGH",
+ ["sub.d_3"] = "46200001FGH",
+ ["mul.d_3"] = "46200002FGH",
+@@ -531,130 +422,410 @@ local map_op = {
+ ["trunc.w.d_2"] = "4620000dFG",
+ ["ceil.w.d_2"] = "4620000eFG",
+ ["floor.w.d_2"] = "4620000fFG",
+- ["movf.d_2"] = "46200011FG",
+- ["movf.d_3"] = "46200011FGC",
+- ["movt.d_2"] = "46210011FG",
+- ["movt.d_3"] = "46210011FGC",
+- ["movz.d_3"] = "46200012FGT",
+- ["movn.d_3"] = "46200013FGT",
+ ["recip.d_2"] = "46200015FG",
+ ["rsqrt.d_2"] = "46200016FG",
+ ["cvt.s.d_2"] = "46200020FG",
+ ["cvt.w.d_2"] = "46200024FG",
+ ["cvt.l.d_2"] = "46200025FG",
+- ["c.f.d_2"] = "46200030GH",
+- ["c.f.d_3"] = "46200030VGH",
+- ["c.un.d_2"] = "46200031GH",
+- ["c.un.d_3"] = "46200031VGH",
+- ["c.eq.d_2"] = "46200032GH",
+- ["c.eq.d_3"] = "46200032VGH",
+- ["c.ueq.d_2"] = "46200033GH",
+- ["c.ueq.d_3"] = "46200033VGH",
+- ["c.olt.d_2"] = "46200034GH",
+- ["c.olt.d_3"] = "46200034VGH",
+- ["c.ult.d_2"] = "46200035GH",
+- ["c.ult.d_3"] = "46200035VGH",
+- ["c.ole.d_2"] = "46200036GH",
+- ["c.ole.d_3"] = "46200036VGH",
+- ["c.ule.d_2"] = "46200037GH",
+- ["c.ule.d_3"] = "46200037VGH",
+- ["c.sf.d_2"] = "46200038GH",
+- ["c.sf.d_3"] = "46200038VGH",
+- ["c.ngle.d_2"] = "46200039GH",
+- ["c.ngle.d_3"] = "46200039VGH",
+- ["c.seq.d_2"] = "4620003aGH",
+- ["c.seq.d_3"] = "4620003aVGH",
+- ["c.ngl.d_2"] = "4620003bGH",
+- ["c.ngl.d_3"] = "4620003bVGH",
+- ["c.lt.d_2"] = "4620003cGH",
+- ["c.lt.d_3"] = "4620003cVGH",
+- ["c.nge.d_2"] = "4620003dGH",
+- ["c.nge.d_3"] = "4620003dVGH",
+- ["c.le.d_2"] = "4620003eGH",
+- ["c.le.d_3"] = "4620003eVGH",
+- ["c.ngt.d_2"] = "4620003fGH",
+- ["c.ngt.d_3"] = "4620003fVGH",
+-
+- ["add.ps_3"] = "46c00000FGH",
+- ["sub.ps_3"] = "46c00001FGH",
+- ["mul.ps_3"] = "46c00002FGH",
+- ["abs.ps_2"] = "46c00005FG",
+- ["mov.ps_2"] = "46c00006FG",
+- ["neg.ps_2"] = "46c00007FG",
+- ["movf.ps_2"] = "46c00011FG",
+- ["movf.ps_3"] = "46c00011FGC",
+- ["movt.ps_2"] = "46c10011FG",
+- ["movt.ps_3"] = "46c10011FGC",
+- ["movz.ps_3"] = "46c00012FGT",
+- ["movn.ps_3"] = "46c00013FGT",
+- ["cvt.s.pu_2"] = "46c00020FG",
+- ["cvt.s.pl_2"] = "46c00028FG",
+- ["pll.ps_3"] = "46c0002cFGH",
+- ["plu.ps_3"] = "46c0002dFGH",
+- ["pul.ps_3"] = "46c0002eFGH",
+- ["puu.ps_3"] = "46c0002fFGH",
+- ["c.f.ps_2"] = "46c00030GH",
+- ["c.f.ps_3"] = "46c00030VGH",
+- ["c.un.ps_2"] = "46c00031GH",
+- ["c.un.ps_3"] = "46c00031VGH",
+- ["c.eq.ps_2"] = "46c00032GH",
+- ["c.eq.ps_3"] = "46c00032VGH",
+- ["c.ueq.ps_2"] = "46c00033GH",
+- ["c.ueq.ps_3"] = "46c00033VGH",
+- ["c.olt.ps_2"] = "46c00034GH",
+- ["c.olt.ps_3"] = "46c00034VGH",
+- ["c.ult.ps_2"] = "46c00035GH",
+- ["c.ult.ps_3"] = "46c00035VGH",
+- ["c.ole.ps_2"] = "46c00036GH",
+- ["c.ole.ps_3"] = "46c00036VGH",
+- ["c.ule.ps_2"] = "46c00037GH",
+- ["c.ule.ps_3"] = "46c00037VGH",
+- ["c.sf.ps_2"] = "46c00038GH",
+- ["c.sf.ps_3"] = "46c00038VGH",
+- ["c.ngle.ps_2"] = "46c00039GH",
+- ["c.ngle.ps_3"] = "46c00039VGH",
+- ["c.seq.ps_2"] = "46c0003aGH",
+- ["c.seq.ps_3"] = "46c0003aVGH",
+- ["c.ngl.ps_2"] = "46c0003bGH",
+- ["c.ngl.ps_3"] = "46c0003bVGH",
+- ["c.lt.ps_2"] = "46c0003cGH",
+- ["c.lt.ps_3"] = "46c0003cVGH",
+- ["c.nge.ps_2"] = "46c0003dGH",
+- ["c.nge.ps_3"] = "46c0003dVGH",
+- ["c.le.ps_2"] = "46c0003eGH",
+- ["c.le.ps_3"] = "46c0003eVGH",
+- ["c.ngt.ps_2"] = "46c0003fGH",
+- ["c.ngt.ps_3"] = "46c0003fVGH",
+-
+ ["cvt.s.w_2"] = "46800020FG",
+ ["cvt.d.w_2"] = "46800021FG",
+-
+ ["cvt.s.l_2"] = "46a00020FG",
+ ["cvt.d.l_2"] = "46a00021FG",
+-
+- -- Opcode COP1X.
+- lwxc1_2 = "4c000000FX",
+- ldxc1_2 = "4c000001FX",
+- luxc1_2 = "4c000005FX",
+- swxc1_2 = "4c000008FX",
+- sdxc1_2 = "4c000009FX",
+- suxc1_2 = "4c00000dFX",
+- prefx_2 = "4c00000fMX",
+- ["alnv.ps_4"] = "4c00001eFGHS",
+- ["madd.s_4"] = "4c000020FRGH",
+- ["madd.d_4"] = "4c000021FRGH",
+- ["madd.ps_4"] = "4c000026FRGH",
+- ["msub.s_4"] = "4c000028FRGH",
+- ["msub.d_4"] = "4c000029FRGH",
+- ["msub.ps_4"] = "4c00002eFRGH",
+- ["nmadd.s_4"] = "4c000030FRGH",
+- ["nmadd.d_4"] = "4c000031FRGH",
+- ["nmadd.ps_4"] = "4c000036FRGH",
+- ["nmsub.s_4"] = "4c000038FRGH",
+- ["nmsub.d_4"] = "4c000039FRGH",
+- ["nmsub.ps_4"] = "4c00003eFRGH",
+ }
+
++if mipsr6 then -- Instructions added with MIPSR6.
++
++ for k,v in pairs({
++
++ -- Add immediate to upper bits.
++ aui_3 = "3c000000TSI",
++ daui_3 = mips64 and "74000000TSI",
++ dahi_2 = mips64 and "04060000SI",
++ dati_2 = mips64 and "041e0000SI",
++
++ -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc.
++
++ -- Compact branches.
++ blezalc_2 = "18000000TB", -- rt != 0.
++ bgezalc_2 = "18000000T=SB", -- rt != 0.
++ bgtzalc_2 = "1c000000TB", -- rt != 0.
++ bltzalc_2 = "1c000000T=SB", -- rt != 0.
++
++ blezc_2 = "58000000TB", -- rt != 0.
++ bgezc_2 = "58000000T=SB", -- rt != 0.
++ bgec_3 = "58000000STB", -- rs != rt.
++ blec_3 = "58000000TSB", -- rt != rs.
++
++ bgtzc_2 = "5c000000TB", -- rt != 0.
++ bltzc_2 = "5c000000T=SB", -- rt != 0.
++ bltc_3 = "5c000000STB", -- rs != rt.
++ bgtc_3 = "5c000000TSB", -- rt != rs.
++
++ bgeuc_3 = "18000000STB", -- rs != rt.
++ bleuc_3 = "18000000TSB", -- rt != rs.
++ bltuc_3 = "1c000000STB", -- rs != rt.
++ bgtuc_3 = "1c000000TSB", -- rt != rs.
++
++ beqzalc_2 = "20000000TB", -- rt != 0.
++ bnezalc_2 = "60000000TB", -- rt != 0.
++ beqc_3 = "20000000STB", -- rs < rt.
++ bnec_3 = "60000000STB", -- rs < rt.
++ bovc_3 = "20000000STB", -- rs >= rt.
++ bnvc_3 = "60000000STB", -- rs >= rt.
++
++ beqzc_2 = "d8000000SK", -- rs != 0.
++ bnezc_2 = "f8000000SK", -- rs != 0.
++ jic_2 = "d8000000TI",
++ jialc_2 = "f8000000TI",
++ bc_1 = "c8000000L",
++ balc_1 = "e8000000L",
++
++ -- Opcode SPECIAL.
++ jr_1 = "00000009S",
++ sdbbp_0 = "0000000e",
++ sdbbp_1 = "0000000eY",
++ lsa_4 = "00000005DSTA",
++ dlsa_4 = mips64 and "00000015DSTA",
++ seleqz_3 = "00000035DST",
++ selnez_3 = "00000037DST",
++ clz_2 = "00000050DS",
++ clo_2 = "00000051DS",
++ dclz_2 = mips64 and "00000052DS",
++ dclo_2 = mips64 and "00000053DS",
++ mul_3 = "00000098DST",
++ muh_3 = "000000d8DST",
++ mulu_3 = "00000099DST",
++ muhu_3 = "000000d9DST",
++ div_3 = "0000009aDST",
++ mod_3 = "000000daDST",
++ divu_3 = "0000009bDST",
++ modu_3 = "000000dbDST",
++ dmul_3 = mips64 and "0000009cDST",
++ dmuh_3 = mips64 and "000000dcDST",
++ dmulu_3 = mips64 and "0000009dDST",
++ dmuhu_3 = mips64 and "000000ddDST",
++ ddiv_3 = mips64 and "0000009eDST",
++ dmod_3 = mips64 and "000000deDST",
++ ddivu_3 = mips64 and "0000009fDST",
++ dmodu_3 = mips64 and "000000dfDST",
++
++ -- Opcode SPECIAL3.
++ align_4 = "7c000220DSTA",
++ dalign_4 = mips64 and "7c000224DSTA",
++ bitswap_2 = "7c000020DT",
++ dbitswap_2 = mips64 and "7c000024DT",
++
++ -- Opcode COP1.
++ bc1eqz_2 = "45200000HB",
++ bc1nez_2 = "45a00000HB",
++
++ ["sel.s_3"] = "46000010FGH",
++ ["seleqz.s_3"] = "46000014FGH",
++ ["selnez.s_3"] = "46000017FGH",
++ ["maddf.s_3"] = "46000018FGH",
++ ["msubf.s_3"] = "46000019FGH",
++ ["rint.s_2"] = "4600001aFG",
++ ["class.s_2"] = "4600001bFG",
++ ["min.s_3"] = "4600001cFGH",
++ ["mina.s_3"] = "4600001dFGH",
++ ["max.s_3"] = "4600001eFGH",
++ ["maxa.s_3"] = "4600001fFGH",
++ ["cmp.af.s_3"] = "46800000FGH",
++ ["cmp.un.s_3"] = "46800001FGH",
++ ["cmp.or.s_3"] = "46800011FGH",
++ ["cmp.eq.s_3"] = "46800002FGH",
++ ["cmp.une.s_3"] = "46800012FGH",
++ ["cmp.ueq.s_3"] = "46800003FGH",
++ ["cmp.ne.s_3"] = "46800013FGH",
++ ["cmp.lt.s_3"] = "46800004FGH",
++ ["cmp.ult.s_3"] = "46800005FGH",
++ ["cmp.le.s_3"] = "46800006FGH",
++ ["cmp.ule.s_3"] = "46800007FGH",
++ ["cmp.saf.s_3"] = "46800008FGH",
++ ["cmp.sun.s_3"] = "46800009FGH",
++ ["cmp.sor.s_3"] = "46800019FGH",
++ ["cmp.seq.s_3"] = "4680000aFGH",
++ ["cmp.sune.s_3"] = "4680001aFGH",
++ ["cmp.sueq.s_3"] = "4680000bFGH",
++ ["cmp.sne.s_3"] = "4680001bFGH",
++ ["cmp.slt.s_3"] = "4680000cFGH",
++ ["cmp.sult.s_3"] = "4680000dFGH",
++ ["cmp.sle.s_3"] = "4680000eFGH",
++ ["cmp.sule.s_3"] = "4680000fFGH",
++
++ ["sel.d_3"] = "46200010FGH",
++ ["seleqz.d_3"] = "46200014FGH",
++ ["selnez.d_3"] = "46200017FGH",
++ ["maddf.d_3"] = "46200018FGH",
++ ["msubf.d_3"] = "46200019FGH",
++ ["rint.d_2"] = "4620001aFG",
++ ["class.d_2"] = "4620001bFG",
++ ["min.d_3"] = "4620001cFGH",
++ ["mina.d_3"] = "4620001dFGH",
++ ["max.d_3"] = "4620001eFGH",
++ ["maxa.d_3"] = "4620001fFGH",
++ ["cmp.af.d_3"] = "46a00000FGH",
++ ["cmp.un.d_3"] = "46a00001FGH",
++ ["cmp.or.d_3"] = "46a00011FGH",
++ ["cmp.eq.d_3"] = "46a00002FGH",
++ ["cmp.une.d_3"] = "46a00012FGH",
++ ["cmp.ueq.d_3"] = "46a00003FGH",
++ ["cmp.ne.d_3"] = "46a00013FGH",
++ ["cmp.lt.d_3"] = "46a00004FGH",
++ ["cmp.ult.d_3"] = "46a00005FGH",
++ ["cmp.le.d_3"] = "46a00006FGH",
++ ["cmp.ule.d_3"] = "46a00007FGH",
++ ["cmp.saf.d_3"] = "46a00008FGH",
++ ["cmp.sun.d_3"] = "46a00009FGH",
++ ["cmp.sor.d_3"] = "46a00019FGH",
++ ["cmp.seq.d_3"] = "46a0000aFGH",
++ ["cmp.sune.d_3"] = "46a0001aFGH",
++ ["cmp.sueq.d_3"] = "46a0000bFGH",
++ ["cmp.sne.d_3"] = "46a0001bFGH",
++ ["cmp.slt.d_3"] = "46a0000cFGH",
++ ["cmp.sult.d_3"] = "46a0000dFGH",
++ ["cmp.sle.d_3"] = "46a0000eFGH",
++ ["cmp.sule.d_3"] = "46a0000fFGH",
++
++ }) do map_op[k] = v end
++
++else -- Instructions removed by MIPSR6.
++
++ for k,v in pairs({
++ -- Traps, don't use.
++ addi_3 = "20000000TSI",
++ daddi_3 = mips64 and "60000000TSI",
++
++ -- Branch on likely, don't use.
++ beqzl_2 = "50000000SB",
++ beql_3 = "50000000STB",
++ bnezl_2 = "54000000SB",
++ bnel_3 = "54000000STB",
++ blezl_2 = "58000000SB",
++ bgtzl_2 = "5c000000SB",
++
++ lwl_2 = "88000000TO",
++ lwr_2 = "98000000TO",
++ swl_2 = "a8000000TO",
++ sdl_2 = mips64 and "b0000000TO",
++ sdr_2 = mips64 and "b1000000TO",
++ swr_2 = "b8000000TO",
++ cache_2 = "bc000000NO",
++ ll_2 = "c0000000TO",
++ pref_2 = "cc000000NO",
++ sc_2 = "e0000000TO",
++ scd_2 = mips64 and "f0000000TO",
++
++ -- Opcode SPECIAL.
++ movf_2 = "00000001DS",
++ movf_3 = "00000001DSC",
++ movt_2 = "00010001DS",
++ movt_3 = "00010001DSC",
++ jr_1 = "00000008S",
++ movz_3 = "0000000aDST",
++ movn_3 = "0000000bDST",
++ mfhi_1 = "00000010D",
++ mthi_1 = "00000011S",
++ mflo_1 = "00000012D",
++ mtlo_1 = "00000013S",
++ mult_2 = "00000018ST",
++ multu_2 = "00000019ST",
++ div_3 = "0000001aST",
++ divu_3 = "0000001bST",
++ ddiv_3 = mips64 and "0000001eST",
++ ddivu_3 = mips64 and "0000001fST",
++ dmult_2 = mips64 and "0000001cST",
++ dmultu_2 = mips64 and "0000001dST",
++
++ -- Opcode REGIMM.
++ tgei_2 = "04080000SI",
++ tgeiu_2 = "04090000SI",
++ tlti_2 = "040a0000SI",
++ tltiu_2 = "040b0000SI",
++ teqi_2 = "040c0000SI",
++ tnei_2 = "040e0000SI",
++ bltzal_2 = "04100000SB",
++ bgezal_2 = "04110000SB",
++ bltzall_2 = "04120000SB",
++ bgezall_2 = "04130000SB",
++
++ -- Opcode SPECIAL2.
++ madd_2 = "70000000ST",
++ maddu_2 = "70000001ST",
++ mul_3 = "70000002DST",
++ msub_2 = "70000004ST",
++ msubu_2 = "70000005ST",
++ clz_2 = "70000020D=TS",
++ clo_2 = "70000021D=TS",
++ dclz_2 = mips64 and "70000024D=TS",
++ dclo_2 = mips64 and "70000025D=TS",
++ sdbbp_0 = "7000003f",
++ sdbbp_1 = "7000003fY",
++
++ -- Opcode COP1.
++ bc1f_1 = "45000000B",
++ bc1f_2 = "45000000CB",
++ bc1t_1 = "45010000B",
++ bc1t_2 = "45010000CB",
++ bc1fl_1 = "45020000B",
++ bc1fl_2 = "45020000CB",
++ bc1tl_1 = "45030000B",
++ bc1tl_2 = "45030000CB",
++
++ ["movf.s_2"] = "46000011FG",
++ ["movf.s_3"] = "46000011FGC",
++ ["movt.s_2"] = "46010011FG",
++ ["movt.s_3"] = "46010011FGC",
++ ["movz.s_3"] = "46000012FGT",
++ ["movn.s_3"] = "46000013FGT",
++ ["cvt.ps.s_3"] = "46000026FGH",
++ ["c.f.s_2"] = "46000030GH",
++ ["c.f.s_3"] = "46000030VGH",
++ ["c.un.s_2"] = "46000031GH",
++ ["c.un.s_3"] = "46000031VGH",
++ ["c.eq.s_2"] = "46000032GH",
++ ["c.eq.s_3"] = "46000032VGH",
++ ["c.ueq.s_2"] = "46000033GH",
++ ["c.ueq.s_3"] = "46000033VGH",
++ ["c.olt.s_2"] = "46000034GH",
++ ["c.olt.s_3"] = "46000034VGH",
++ ["c.ult.s_2"] = "46000035GH",
++ ["c.ult.s_3"] = "46000035VGH",
++ ["c.ole.s_2"] = "46000036GH",
++ ["c.ole.s_3"] = "46000036VGH",
++ ["c.ule.s_2"] = "46000037GH",
++ ["c.ule.s_3"] = "46000037VGH",
++ ["c.sf.s_2"] = "46000038GH",
++ ["c.sf.s_3"] = "46000038VGH",
++ ["c.ngle.s_2"] = "46000039GH",
++ ["c.ngle.s_3"] = "46000039VGH",
++ ["c.seq.s_2"] = "4600003aGH",
++ ["c.seq.s_3"] = "4600003aVGH",
++ ["c.ngl.s_2"] = "4600003bGH",
++ ["c.ngl.s_3"] = "4600003bVGH",
++ ["c.lt.s_2"] = "4600003cGH",
++ ["c.lt.s_3"] = "4600003cVGH",
++ ["c.nge.s_2"] = "4600003dGH",
++ ["c.nge.s_3"] = "4600003dVGH",
++ ["c.le.s_2"] = "4600003eGH",
++ ["c.le.s_3"] = "4600003eVGH",
++ ["c.ngt.s_2"] = "4600003fGH",
++ ["c.ngt.s_3"] = "4600003fVGH",
++ ["movf.d_2"] = "46200011FG",
++ ["movf.d_3"] = "46200011FGC",
++ ["movt.d_2"] = "46210011FG",
++ ["movt.d_3"] = "46210011FGC",
++ ["movz.d_3"] = "46200012FGT",
++ ["movn.d_3"] = "46200013FGT",
++ ["c.f.d_2"] = "46200030GH",
++ ["c.f.d_3"] = "46200030VGH",
++ ["c.un.d_2"] = "46200031GH",
++ ["c.un.d_3"] = "46200031VGH",
++ ["c.eq.d_2"] = "46200032GH",
++ ["c.eq.d_3"] = "46200032VGH",
++ ["c.ueq.d_2"] = "46200033GH",
++ ["c.ueq.d_3"] = "46200033VGH",
++ ["c.olt.d_2"] = "46200034GH",
++ ["c.olt.d_3"] = "46200034VGH",
++ ["c.ult.d_2"] = "46200035GH",
++ ["c.ult.d_3"] = "46200035VGH",
++ ["c.ole.d_2"] = "46200036GH",
++ ["c.ole.d_3"] = "46200036VGH",
++ ["c.ule.d_2"] = "46200037GH",
++ ["c.ule.d_3"] = "46200037VGH",
++ ["c.sf.d_2"] = "46200038GH",
++ ["c.sf.d_3"] = "46200038VGH",
++ ["c.ngle.d_2"] = "46200039GH",
++ ["c.ngle.d_3"] = "46200039VGH",
++ ["c.seq.d_2"] = "4620003aGH",
++ ["c.seq.d_3"] = "4620003aVGH",
++ ["c.ngl.d_2"] = "4620003bGH",
++ ["c.ngl.d_3"] = "4620003bVGH",
++ ["c.lt.d_2"] = "4620003cGH",
++ ["c.lt.d_3"] = "4620003cVGH",
++ ["c.nge.d_2"] = "4620003dGH",
++ ["c.nge.d_3"] = "4620003dVGH",
++ ["c.le.d_2"] = "4620003eGH",
++ ["c.le.d_3"] = "4620003eVGH",
++ ["c.ngt.d_2"] = "4620003fGH",
++ ["c.ngt.d_3"] = "4620003fVGH",
++ ["add.ps_3"] = "46c00000FGH",
++ ["sub.ps_3"] = "46c00001FGH",
++ ["mul.ps_3"] = "46c00002FGH",
++ ["abs.ps_2"] = "46c00005FG",
++ ["mov.ps_2"] = "46c00006FG",
++ ["neg.ps_2"] = "46c00007FG",
++ ["movf.ps_2"] = "46c00011FG",
++ ["movf.ps_3"] = "46c00011FGC",
++ ["movt.ps_2"] = "46c10011FG",
++ ["movt.ps_3"] = "46c10011FGC",
++ ["movz.ps_3"] = "46c00012FGT",
++ ["movn.ps_3"] = "46c00013FGT",
++ ["cvt.s.pu_2"] = "46c00020FG",
++ ["cvt.s.pl_2"] = "46c00028FG",
++ ["pll.ps_3"] = "46c0002cFGH",
++ ["plu.ps_3"] = "46c0002dFGH",
++ ["pul.ps_3"] = "46c0002eFGH",
++ ["puu.ps_3"] = "46c0002fFGH",
++ ["c.f.ps_2"] = "46c00030GH",
++ ["c.f.ps_3"] = "46c00030VGH",
++ ["c.un.ps_2"] = "46c00031GH",
++ ["c.un.ps_3"] = "46c00031VGH",
++ ["c.eq.ps_2"] = "46c00032GH",
++ ["c.eq.ps_3"] = "46c00032VGH",
++ ["c.ueq.ps_2"] = "46c00033GH",
++ ["c.ueq.ps_3"] = "46c00033VGH",
++ ["c.olt.ps_2"] = "46c00034GH",
++ ["c.olt.ps_3"] = "46c00034VGH",
++ ["c.ult.ps_2"] = "46c00035GH",
++ ["c.ult.ps_3"] = "46c00035VGH",
++ ["c.ole.ps_2"] = "46c00036GH",
++ ["c.ole.ps_3"] = "46c00036VGH",
++ ["c.ule.ps_2"] = "46c00037GH",
++ ["c.ule.ps_3"] = "46c00037VGH",
++ ["c.sf.ps_2"] = "46c00038GH",
++ ["c.sf.ps_3"] = "46c00038VGH",
++ ["c.ngle.ps_2"] = "46c00039GH",
++ ["c.ngle.ps_3"] = "46c00039VGH",
++ ["c.seq.ps_2"] = "46c0003aGH",
++ ["c.seq.ps_3"] = "46c0003aVGH",
++ ["c.ngl.ps_2"] = "46c0003bGH",
++ ["c.ngl.ps_3"] = "46c0003bVGH",
++ ["c.lt.ps_2"] = "46c0003cGH",
++ ["c.lt.ps_3"] = "46c0003cVGH",
++ ["c.nge.ps_2"] = "46c0003dGH",
++ ["c.nge.ps_3"] = "46c0003dVGH",
++ ["c.le.ps_2"] = "46c0003eGH",
++ ["c.le.ps_3"] = "46c0003eVGH",
++ ["c.ngt.ps_2"] = "46c0003fGH",
++ ["c.ngt.ps_3"] = "46c0003fVGH",
++
++ -- Opcode COP1X.
++ lwxc1_2 = "4c000000FX",
++ ldxc1_2 = "4c000001FX",
++ luxc1_2 = "4c000005FX",
++ swxc1_2 = "4c000008FX",
++ sdxc1_2 = "4c000009FX",
++ suxc1_2 = "4c00000dFX",
++ prefx_2 = "4c00000fMX",
++ ["alnv.ps_4"] = "4c00001eFGHS",
++ ["madd.s_4"] = "4c000020FRGH",
++ ["madd.d_4"] = "4c000021FRGH",
++ ["madd.ps_4"] = "4c000026FRGH",
++ ["msub.s_4"] = "4c000028FRGH",
++ ["msub.d_4"] = "4c000029FRGH",
++ ["msub.ps_4"] = "4c00002eFRGH",
++ ["nmadd.s_4"] = "4c000030FRGH",
++ ["nmadd.d_4"] = "4c000031FRGH",
++ ["nmadd.ps_4"] = "4c000036FRGH",
++ ["nmsub.s_4"] = "4c000038FRGH",
++ ["nmsub.d_4"] = "4c000039FRGH",
++ ["nmsub.ps_4"] = "4c00003eFRGH",
++
++ }) do map_op[k] = v end
++
++end
++
+ ------------------------------------------------------------------------------
+
+ local function parse_gpr(expr)
+@@ -808,10 +979,12 @@ map_op[".template__"] = function(params, template,
nparams)
+ op = op + parse_disp(params[n]); n = n + 1
+ elseif p == "X" then
+ op = op + parse_index(params[n]); n = n + 1
+- elseif p == "B" or p == "J" then
+- local mode, n, s = parse_label(params[n], false)
+- if p == "B" then n = n + 2048 end
+- waction("REL_"..mode, n, s, 1)
++ elseif p == "B" or p == "J" or p == "K" or p ==
"L" then
++ local mode, m, s = parse_label(params[n], false)
++ if p == "J" then m = m + 0xa800
++ elseif p == "K" then m = m + 0x5000
++ elseif p == "L" then m = m + 0xa000 end
++ waction("REL_"..mode, m, s, 1)
+ n = n + 1
+ elseif p == "A" then
+ op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
+@@ -833,7 +1006,7 @@ map_op[".template__"] = function(params, template,
nparams)
+ elseif p == "Z" then
+ op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1
+ elseif p == "=" then
+- op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo.
++ n = n - 1 -- Re-use previous parameter for next template char.
+ else
+ assert(false)
+ end
+diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua
+index 5636b23a..8ab5d33a 100644
+--- a/dynasm/dasm_mips64.lua
++++ b/dynasm/dasm_mips64.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM MIPS64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+ -- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
+diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
+index 3a7ee9b0..35264f2e 100644
+--- a/dynasm/dasm_ppc.h
++++ b/dynasm/dasm_ppc.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM PPC/PPC64 encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -277,7 +277,7 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+@@ -353,7 +353,11 @@ int dasm_encode(Dst_DECL, void *buffer)
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
+ break;
+ case DASM_REL_LG:
+- CK(n >= 0, UNDEF_LG);
++ if (n < 0) {
++ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
++ goto patchrel;
++ }
++ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
+index f73974dd..ee2afb2e 100644
+--- a/dynasm/dasm_ppc.lua
++++ b/dynasm/dasm_ppc.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM PPC/PPC64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ --
+ -- Support for various extensions contributed by Caio Souza Oliveira.
+@@ -11,9 +11,9 @@
+ local _info = {
+ arch = "ppc",
+ description = "DynASM PPC module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+@@ -1722,9 +1722,9 @@ op_template = function(params, template, nparams)
+ elseif p == "M" then
+ op = op + parse_shiftmask(params[n], false); n = n + 1
+ elseif p == "J" or p == "K" then
+- local mode, n, s = parse_label(params[n], false)
+- if p == "K" then n = n + 2048 end
+- waction("REL_"..mode, n, s, 1)
++ local mode, m, s = parse_label(params[n], false)
++ if p == "K" then m = m + 2048 end
++ waction("REL_"..mode, m, s, 1)
+ n = n + 1
+ elseif p == "0" then
+ if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end
+diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
+index 59d9e2b2..8914596a 100644
+--- a/dynasm/dasm_proto.h
++++ b/dynasm/dasm_proto.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM encoding engine prototypes.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -10,8 +10,8 @@
+ #include <stddef.h>
+ #include <stdarg.h>
+
+-#define DASM_IDENT "DynASM 1.4.0"
+-#define DASM_VERSION 10400 /* 1.4.0 */
++#define DASM_IDENT "DynASM 1.5.0"
++#define DASM_VERSION 10500 /* 1.5.0 */
+
+ #ifndef Dst_DECL
+ #define Dst_DECL dasm_State **Dst
+diff --git a/dynasm/dasm_x64.lua b/dynasm/dasm_x64.lua
+index e8bdeb37..2c0a0e86 100644
+--- a/dynasm/dasm_x64.lua
++++ b/dynasm/dasm_x64.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM x64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+ -- This module just sets 64 bit mode for the combined x86/x64 module.
+diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
+index bc636357..d8d4928c 100644
+--- a/dynasm/dasm_x86.h
++++ b/dynasm/dasm_x86.h
+@@ -1,6 +1,6 @@
+ /*
+ ** DynASM x86 encoding engine.
+-** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ ** Released under the MIT license. See dynasm.lua for full copyright notice.
+ */
+
+@@ -194,12 +194,13 @@ void dasm_put(Dst_DECL, int start, ...)
+ switch (action) {
+ case DASM_DISP:
+ if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
+- case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
++ /* fallthrough */
++ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
+ case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
+ case DASM_IMM_D: ofs += 4; break;
+ case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
+ case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
+- case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
++ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
+ case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
+ case DASM_SPACE: p++; ofs += n; break;
+ case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
+@@ -207,8 +208,8 @@ void dasm_put(Dst_DECL, int start, ...)
+ if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
+ if (*p < 0x20 && (n&7) == 4) ofs++;
+ switch ((*p++ >> 3) & 3) {
+- case 3: n |= b[pos-3];
+- case 2: n |= b[pos-2];
++ case 3: n |= b[pos-3]; /* fallthrough */
++ case 2: n |= b[pos-2]; /* fallthrough */
+ case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
+ }
+ continue;
+@@ -238,8 +239,11 @@ void dasm_put(Dst_DECL, int start, ...)
+ }
+ pos++;
+ ofs += 4; /* Maximum offset needed. */
+- if (action == DASM_REL_LG || action == DASM_REL_PC)
++ if (action == DASM_REL_LG || action == DASM_REL_PC) {
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
++ } else if (sizeof(ptrdiff_t) == 8) {
++ ofs += 4;
++ }
+ break;
+ case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
+@@ -304,11 +308,13 @@ int dasm_link(Dst_DECL, size_t *szp)
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
++ int op = 0;
+ while (1) {
+- int op, action = *p++;
++ int action = *p++;
+ switch (action) {
+- case DASM_REL_LG: p++; op = p[-3]; goto rel_pc;
+- case DASM_REL_PC: op = p[-2]; rel_pc: {
++ case DASM_REL_LG: p++;
++ /* fallthrough */
++ case DASM_REL_PC: {
+ int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0);
+ if (shrink) { /* Shrinkable branch opcode? */
+ int lofs, lpos = b[pos];
+@@ -329,17 +335,21 @@ int dasm_link(Dst_DECL, size_t *szp)
+ pos += 2;
+ break;
+ }
++ /* fallthrough */
+ case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
++ /* fallthrough */
+ case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
+ case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
+ case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
+ case DASM_LABEL_LG: p++;
++ /* fallthrough */
+ case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
+ case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
+ case DASM_EXTERN: p += 2; break;
+- case DASM_ESC: p++; break;
++ case DASM_ESC: op = *p++; break;
+ case DASM_MARK: break;
+ case DASM_SECTION: case DASM_STOP: goto stop;
++ default: op = action; break;
+ }
+ }
+ stop: (void)0;
+@@ -358,10 +368,22 @@ int dasm_link(Dst_DECL, size_t *szp)
+ do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
+ #define dasmd(x) \
+ do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
++#define dasmq(x) \
++ do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0)
+ #else
+ #define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
+ #define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
++#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0)
+ #endif
++static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x)
++{
++ if (sizeof(ptrdiff_t) == 8)
++ dasmq((unsigned long long)x);
++ else
++ dasmd((unsigned int)x);
++ return cp;
++}
++#define dasma(x) (cp = dasma_(cp, (x)))
+
+ /* Pass 3: Encode sections. */
+ int dasm_encode(Dst_DECL, void *buffer)
+@@ -391,12 +413,15 @@ int dasm_encode(Dst_DECL, void *buffer)
+ if (mrm != 5) { mm[-1] -= 0x80; break; } }
+ if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
+ }
++ /* fallthrough */
+ case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
+ case DASM_IMM_DB: if (((n+128)&-256) == 0) {
+ db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
+ } else mark = NULL;
++ /* fallthrough */
+ case DASM_IMM_D: wd: dasmd(n); break;
+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
++ /* fallthrough */
+ case DASM_IMM_W: dasmw(n); break;
+ case DASM_VREG: {
+ int t = *p++;
+@@ -421,7 +446,9 @@ int dasm_encode(Dst_DECL, void *buffer)
+ }
+ case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
+ b++; n = (int)(ptrdiff_t)D->globals[-n];
+- case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
++ /* fallthrough */
++ case DASM_REL_A: rel_a:
++ n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
+ case DASM_REL_PC: rel_pc: {
+ int shrink = *b++;
+ int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
+@@ -431,11 +458,13 @@ int dasm_encode(Dst_DECL, void *buffer)
+ goto wb;
+ }
+ case DASM_IMM_LG:
+- p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
++ p++;
++ if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; }
++ /* fallthrough */
+ case DASM_IMM_PC: {
+ int *pb = DASM_POS2PTR(D, n);
+- n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
+- goto wd;
++ dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base));
++ break;
+ }
+ case DASM_LABEL_LG: {
+ int idx = *p++;
+@@ -452,6 +481,7 @@ int dasm_encode(Dst_DECL, void *buffer)
+ case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
+ case DASM_MARK: mark = cp; break;
+ case DASM_ESC: action = *p++;
++ /* fallthrough */
+ default: *cp++ = action; break;
+ case DASM_SECTION: case DASM_STOP: goto stop;
+ }
+diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
+index 4c031e2c..fe2cf579 100644
+--- a/dynasm/dasm_x86.lua
++++ b/dynasm/dasm_x86.lua
+@@ -1,7 +1,7 @@
+ ------------------------------------------------------------------------------
+ -- DynASM x86/x64 module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See dynasm.lua for full copyright notice.
+ ------------------------------------------------------------------------------
+
+@@ -11,9 +11,9 @@ local x64 = x64
+ local _info = {
+ arch = x64 and "x64" or "x86",
+ description = "DynASM x86/x64 module",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+ }
+@@ -484,6 +484,22 @@ local function wputdarg(n)
+ end
+ end
+
++-- Put signed or unsigned qword or arg.
++local function wputqarg(n)
++ local tn = type(n)
++ if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
++ wputb(band(n, 255))
++ wputb(band(shr(n, 8), 255))
++ wputb(band(shr(n, 16), 255))
++ wputb(shr(n, 24))
++ local sign = n < 0 and 255 or 0
++ wputb(sign); wputb(sign); wputb(sign); wputb(sign)
++ else
++ waction("IMM_D", format("(unsigned int)(%s)", n))
++ waction("IMM_D", format("(unsigned int)((unsigned long
long)(%s)>>32)", n))
++ end
++end
++
+ -- Put operand-size dependent number or arg (defaults to dword).
+ local function wputszarg(sz, n)
+ if not sz or sz == "d" or sz == "q" then wputdarg(n)
+@@ -663,10 +679,16 @@ local function opmodestr(op, args)
+ end
+
+ -- Convert number to valid integer or nil.
+-local function toint(expr)
++local function toint(expr, isqword)
+ local n = tonumber(expr)
+ if n then
+- if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
++ if n % 1 ~= 0 then
++ werror("not an integer number `"..expr.."'")
++ elseif isqword then
++ if n < -2147483648 or n > 2147483647 then
++ n = nil -- Handle it as an expression to avoid precision loss.
++ end
++ elseif n < -2147483648 or n > 4294967295 then
+ werror("bad integer number `"..expr.."'")
+ end
+ return n
+@@ -749,7 +771,7 @@ local function rtexpr(expr)
+ end
+
+ -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
+-local function parseoperand(param)
++local function parseoperand(param, isqword)
+ local t = {}
+
+ local expr = param
+@@ -810,7 +832,7 @@ local function parseoperand(param)
+ if t.disp then break end
+
+ -- [reg+xreg...]
+- local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
++ local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$")
+ xreg, t.xreg, tp = rtexpr(xreg)
+ if not t.xreg then
+ -- [reg+-expr]
+@@ -837,7 +859,7 @@ local function parseoperand(param)
+ t.disp = dispexpr(tailx)
+ else
+ -- imm or opsize*imm
+- local imm = toint(expr)
++ local imm = toint(expr, isqword)
+ if not imm and sub(expr, 1, 1) == "*" and t.opsize then
+ imm = toint(sub(expr, 2))
+ if imm then
+@@ -955,6 +977,7 @@ end
+ -- "u" Use VEX encoding, vvvv unused.
+ -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the
operand is
+ -- removed from the list used by future characters).
++-- "w" Use VEX encoding, vvvv from 3rd operand.
+ -- "L" Force VEX.L
+ --
+ -- All of the following characters force a flush of the opcode:
+@@ -1536,8 +1559,8 @@ local map_op = {
+ vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
+ vrsqrtps_2 = "rmoy:0Fu52rM",
+ vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
+- vroundpd_3 = "rmioy:660F3AV09rMU",
+- vroundps_3 = "rmioy:660F3AV08rMU",
++ vroundpd_3 = "rmioy:660F3Au09rMU",
++ vroundps_3 = "rmioy:660F3Au08rMU",
+ vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
+ vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
+ vshufpd_4 = "rrmioy:660FVC6rMU",
+@@ -1677,6 +1700,91 @@ local map_op = {
+ -- Intel ADX
+ adcx_2 = "rmqd:660F38F6rM",
+ adox_2 = "rmqd:F30F38F6rM",
++
++ -- BMI1
++ andn_3 = "rrmqd:0F38VF2rM",
++ bextr_3 = "rmrqd:0F38wF7rM",
++ blsi_2 = "rmqd:0F38vF33m",
++ blsmsk_2 = "rmqd:0F38vF32m",
++ blsr_2 = "rmqd:0F38vF31m",
++ tzcnt_2 = "rmqdw:F30FBCrM",
++
++ -- BMI2
++ bzhi_3 = "rmrqd:0F38wF5rM",
++ mulx_3 = "rrmqd:F20F38VF6rM",
++ pdep_3 = "rrmqd:F20F38VF5rM",
++ pext_3 = "rrmqd:F30F38VF5rM",
++ rorx_3 = "rmSqd:F20F3AuF0rMS",
++ sarx_3 = "rmrqd:F30F38wF7rM",
++ shrx_3 = "rmrqd:F20F38wF7rM",
++ shlx_3 = "rmrqd:660F38wF7rM",
++
++ -- FMA3
++ vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
++ vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
++ vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
++ vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
++ vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
++ vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
++
++ vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
++ vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
++ vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
++ vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
++ vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
++ vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
++
++ vfmadd132pd_3 = "rrmoy:660F38VX98rM",
++ vfmadd132ps_3 = "rrmoy:660F38V98rM",
++ vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
++ vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
++ vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
++ vfmadd213ps_3 = "rrmoy:660F38VA8rM",
++ vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
++ vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
++ vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
++ vfmadd231ps_3 = "rrmoy:660F38VB8rM",
++ vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
++ vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
++
++ vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
++ vfmsub132ps_3 = "rrmoy:660F38V9ArM",
++ vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
++ vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
++ vfmsub213pd_3 = "rrmoy:660F38VXAArM",
++ vfmsub213ps_3 = "rrmoy:660F38VAArM",
++ vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
++ vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
++ vfmsub231pd_3 = "rrmoy:660F38VXBArM",
++ vfmsub231ps_3 = "rrmoy:660F38VBArM",
++ vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
++ vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
++
++ vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
++ vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
++ vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
++ vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
++ vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
++ vfnmadd213ps_3 = "rrmoy:660F38VACrM",
++ vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
++ vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
++ vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
++ vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
++ vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
++ vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
++
++ vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
++ vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
++ vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
++ vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
++ vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
++ vfnmsub213ps_3 = "rrmoy:660F38VAErM",
++ vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
++ vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
++ vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
++ vfnmsub231ps_3 = "rrmoy:660F38VBErM",
++ vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
++ vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
+ }
+
+ ------------------------------------------------------------------------------
+@@ -1766,7 +1874,7 @@ end
+
+ ------------------------------------------------------------------------------
+
+-local map_vexarg = { u = false, v = 1, V = 2 }
++local map_vexarg = { u = false, v = 1, V = 2, w = 3 }
+
+ -- Process pattern string.
+ local function dopattern(pat, args, sz, op, needrex)
+@@ -1866,7 +1974,7 @@ local function dopattern(pat, args, sz, op, needrex)
+ local a = args[narg]
+ narg = narg + 1
+ local mode, imm = a.mode, a.imm
+- if mode == "iJ" and not match("iIJ", c) then
++ if mode == "iJ" and not match(x64 and "J" or "iIJ", c)
then
+ werror("bad operand size for label")
+ end
+ if c == "S" then
+@@ -2058,14 +2166,16 @@ end
+ local function op_data(params)
+ if not params then return "imm..." end
+ local sz = sub(params.op, 2, 2)
+- if sz == "a" then sz = addrsize end
++ if sz == "l" then sz = "d" elseif sz == "a" then sz =
addrsize end
+ for _,p in ipairs(params) do
+- local a = parseoperand(p)
++ local a = parseoperand(p, sz == "q")
+ if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
+ werror("bad mode or size in `"..p.."'")
+ end
+ if a.mode == "iJ" then
+ wputlabel("IMM_", a.imm, 1)
++ elseif sz == "q" then
++ wputqarg(a.imm)
+ else
+ wputszarg(sz, a.imm)
+ end
+@@ -2077,7 +2187,11 @@ map_op[".byte_*"] = op_data
+ map_op[".sbyte_*"] = op_data
+ map_op[".word_*"] = op_data
+ map_op[".dword_*"] = op_data
++map_op[".qword_*"] = op_data
+ map_op[".aword_*"] = op_data
++map_op[".long_*"] = op_data
++map_op[".quad_*"] = op_data
++map_op[".addr_*"] = op_data
+
+ ------------------------------------------------------------------------------
+
+diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
+index 5ec21a79..f4e71eca 100644
+--- a/dynasm/dynasm.lua
++++ b/dynasm/dynasm.lua
+@@ -2,7 +2,7 @@
+ -- DynASM. A dynamic assembler for code generation engines.
+ -- Originally designed and implemented for LuaJIT.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- See below for full copyright notice.
+ ------------------------------------------------------------------------------
+
+@@ -10,14 +10,14 @@
+ local _info = {
+ name = "DynASM",
+ description = "A dynamic assembler for code generation engines",
+- version = "1.4.0",
+- vernum = 10400,
+- release = "2015-10-18",
++ version = "1.5.0",
++ vernum = 10500,
++ release = "2021-05-02",
+ author = "Mike Pall",
+- url = "http://luajit.org/dynasm.html",
++ url = "https://luajit.org/dynasm.html",
+ license = "MIT",
+ copyright = [[
+-Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+@@ -38,7 +38,7 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-[ MIT license:
http://www.opensource.org/licenses/mit-license.php ]
++[ MIT license:
https://www.opensource.org/licenses/mit-license.php ]
+ ]],
+ }
+
+@@ -630,6 +630,7 @@ end
+ -- Load architecture-specific module.
+ local function loadarch(arch)
+ if not match(arch, "^[%w_]+$") then return "bad arch name" end
++ _G._map_def = map_def
+ local ok, m_arch = pcall(require, "dasm_"..arch)
+ if not ok then return "cannot load module: "..m_arch end
+ g_arch = m_arch
+diff --git a/etc/luajit.1 b/etc/luajit.1
+index 0d263db7..2846d073 100644
+--- a/etc/luajit.1
++++ b/etc/luajit.1
+@@ -6,7 +6,7 @@ luajit \- Just-In-Time Compiler for the Lua Language
+ .B luajit
+ [\fIoptions\fR]... [\fIscript\fR [\fIargs\fR]...]
+ .SH "WEB SITE"
+-.IR
http://luajit.org
++.IR
https://luajit.org
+ .SH DESCRIPTION
+ .PP
+ This is the command-line program to run Lua programs with \fBLuaJIT\fR.
+@@ -74,15 +74,15 @@ luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end
end end"
+ Runs some nested loops and shows the resulting traces.
+ .SH COPYRIGHT
+ .PP
+-\fBLuaJIT\fR is Copyright \(co 2005-2017 Mike Pall.
++\fBLuaJIT\fR is Copyright \(co 2005-2021 Mike Pall.
+ .br
+ \fBLuaJIT\fR is open source software, released under the MIT license.
+ .SH SEE ALSO
+ .PP
+ More details in the provided HTML docs or at:
+-.IR
http://luajit.org
++.IR
https://luajit.org
+ .br
+ More about the Lua language can be found at:
+-.IR
http://lua.org/docs.html
++.IR
https://lua.org/docs.html
+ .PP
+ lua(1)
+diff --git a/etc/luajit.pc b/etc/luajit.pc
+index a78f1746..39e1e577 100644
+--- a/etc/luajit.pc
++++ b/etc/luajit.pc
+@@ -17,7 +17,7 @@ INSTALL_CMOD=${prefix}/${multilib}/lua/${abiver}
+
+ Name: LuaJIT
+ Description: Just-in-time compiler for Lua
+-URL:
http://luajit.org
++URL:
https://luajit.org
+ Version: ${version}
+ Requires:
+ Libs: -L${libdir} -l${libname}
+diff --git a/src/Makefile b/src/Makefile
+index f56465d1..2538503f 100644
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -7,7 +7,7 @@
+ # Also works with MinGW and Cygwin on Windows.
+ # Please check msvcbuild.bat for building with MSVC on Windows.
+ #
+-# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++# Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ ##############################################################################
+
+ MAJVER= 2
+@@ -110,8 +110,8 @@ XCFLAGS=
+ #XCFLAGS+= -DLUAJIT_NUMMODE=1
+ #XCFLAGS+= -DLUAJIT_NUMMODE=2
+ #
+-# Enable GC64 mode for x64.
+-#XCFLAGS+= -DLUAJIT_ENABLE_GC64
++# Disable LJ_GC64 mode for x64.
++#XCFLAGS+= -DLUAJIT_DISABLE_GC64
+ #
+ ##############################################################################
+
+@@ -132,7 +132,6 @@ XCFLAGS=
+ #
+ # This define is required to run LuaJIT under Valgrind. The Valgrind
+ # header files must be installed. You should enable debug information, too.
+-# Use --suppressions=lj.supp to avoid some false positives.
+ #XCFLAGS+= -DLUAJIT_USE_VALGRIND
+ #
+ # This is the client for the GDB JIT API. GDB 7.0 or higher is required
+@@ -158,13 +157,16 @@ XCFLAGS=
+
+ ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
+ HOST_SYS= Windows
+- HOST_RM= del
+ else
+ HOST_SYS:= $(shell uname -s)
+ ifneq (,$(findstring MINGW,$(HOST_SYS)))
+ HOST_SYS= Windows
+ HOST_MSYS= mingw
+ endif
++ ifneq (,$(findstring MSYS,$(HOST_SYS)))
++ HOST_SYS= Windows
++ HOST_MSYS= mingw
++ endif
+ ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
+ HOST_SYS= Windows
+ HOST_MSYS= cygwin
+@@ -191,7 +193,7 @@ CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
+ LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
+
+ HOST_CC= $(CC)
+-HOST_RM= rm -f
++HOST_RM?= rm -f
+ # If left blank, minilua is built and used. You can supply an installed
+ # copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua
+ HOST_LUA=
+@@ -209,7 +211,7 @@ TARGET_CC= $(STATIC_CC)
+ TARGET_STCC= $(STATIC_CC)
+ TARGET_DYNCC= $(DYNAMIC_CC)
+ TARGET_LD= $(CROSS)$(CC)
+-TARGET_AR= $(CROSS)ar rcus 2>/dev/null
++TARGET_AR= $(CROSS)ar rcus
+ TARGET_STRIP= $(CROSS)strip
+
+ TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
+@@ -217,6 +219,7 @@ TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER)
+ TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib
+ TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME)
+ TARGET_DLLNAME= lua$(NODOTABIVER).dll
++TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a
+ TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME)
+ TARGET_DYNXLDOPTS=
+
+@@ -305,24 +308,22 @@ endif
+ TARGET_SYS?= $(HOST_SYS)
+ ifeq (Windows,$(TARGET_SYS))
+ TARGET_STRIP+= --strip-unneeded
+- TARGET_XSHLDFLAGS= -shared
++ TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME)
+ TARGET_DYNXLDOPTS=
+ else
++ TARGET_AR+= 2>/dev/null
+ ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector
2>/dev/null || echo 1))
+ TARGET_XCFLAGS+= -fno-stack-protector
+ endif
+ ifeq (Darwin,$(TARGET_SYS))
+ ifeq (,$(MACOSX_DEPLOYMENT_TARGET))
+- export MACOSX_DEPLOYMENT_TARGET=10.4
++ $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY)
+ endif
+ TARGET_STRIP+= -x
++ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+ TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
+ TARGET_DYNXLDOPTS=
+ TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version
$(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
+- ifeq (x64,$(TARGET_LJARCH))
+- TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
+- TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
+- endif
+ else
+ ifeq (iOS,$(TARGET_SYS))
+ TARGET_STRIP+= -x
+@@ -333,6 +334,13 @@ ifeq (iOS,$(TARGET_SYS))
+ TARGET_XCFLAGS+= -fno-omit-frame-pointer
+ endif
+ else
++ ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
++ # Find out whether the target toolchain always generates unwind tables.
++ TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int
a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa
-e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info
tmpunwind.o; } && echo E; rm -f tmpunwind.o)
++ ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
++ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
++ endif
++ endif
+ ifneq (SunOS,$(TARGET_SYS))
+ ifneq (PS3,$(TARGET_SYS))
+ TARGET_XLDFLAGS+= -Wl,-E
+@@ -359,7 +367,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS))
+ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
+ else
+ ifeq (iOS,$(TARGET_SYS))
+- HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
++ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1
+ else
+ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER
+ endif
+@@ -439,6 +447,9 @@ ifeq (arm,$(TARGET_LJARCH))
+ DASM_AFLAGS+= -D IOS
+ endif
+ else
++ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH)))
++ DASM_AFLAGS+= -D MIPSR6
++endif
+ ifeq (ppc,$(TARGET_LJARCH))
+ ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D SQRT
+@@ -452,9 +463,6 @@ ifeq (ppc,$(TARGET_LJARCH))
+ ifeq (PS3,$(TARGET_SYS))
+ DASM_AFLAGS+= -D PPE -D TOC
+ endif
+- ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
+- DASM_ARCH= ppc64
+- endif
+ endif
+ endif
+ endif
+@@ -476,13 +484,15 @@ LJVM_BOUT= $(LJVM_S)
+ LJVM_MODE= elfasm
+
+ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
+- lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
++ lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
++ lib_buffer.o
+ LJLIB_C= $(LJLIB_O:.o=.c)
+
+-LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
++LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
+ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
+- lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
+- lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \
++ lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \
++ lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \
++ lj_api.o lj_profile.o \
+ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
+ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
+@@ -557,6 +567,7 @@ ifeq (Windows,$(HOST_SYS))
+ MINILUA_X= host\minilua
+ BUILDVM_X= host\buildvm
+ ALL_RM:= $(subst /,\,$(ALL_RM))
++ HOST_RM= del
+ endif
+ endif
+
+@@ -597,7 +608,6 @@ E= @echo
+ default all: $(TARGET_T)
+
+ amalg:
+- @grep "^[+|]" ljamalg.c
+ $(MAKE) all "LJCORE_O=ljamalg.o"
+
+ clean:
+@@ -631,7 +641,7 @@ $(MINILUA_T): $(MINILUA_O)
+ $(E) "HOSTLINK $@"
+ $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
+
+-host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua
++host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua lj_arch.h lua.h
luaconf.h
+ $(E) "DYNASM $@"
+ $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
+
+diff --git a/src/Makefile.dep b/src/Makefile.dep
+index 2b1cb5ef..1ad6701a 100644
+--- a/src/Makefile.dep
++++ b/src/Makefile.dep
+@@ -1,15 +1,19 @@
+ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \
+- lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h
++ lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h
+ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
+- lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \
+- lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
+- lj_strfmt.h lj_lib.h lj_libdef.h
++ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \
++ lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
++ lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \
++ lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h
+ lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
+ lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
+ lj_ffdef.h lj_lib.h lj_libdef.h
++lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
++ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
++ lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \
++ lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h
+ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
+ lj_libdef.h
+@@ -28,7 +32,7 @@ lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h
lj_def.h \
+ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
+ lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+- lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
++ lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h
+ lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
+ lj_libdef.h
+@@ -41,16 +45,18 @@ lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h
lj_obj.h \
+ lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+-lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
++lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \
++ lj_prng.h
+ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
+ lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
+ lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
+ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+- lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
+- lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+- lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \
+- lj_asm_*.h
++ lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
++ lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
++ lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
++ lj_emit_*.h lj_asm_*.h
++lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
+ lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
+ lj_bcdef.h
+ lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+@@ -75,8 +81,8 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h
+ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
+- lj_ccallback.h
++ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \
++ lj_cdata.h lj_cconv.h lj_ccallback.h
+ lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
+ lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
+@@ -108,10 +114,10 @@ lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_err.h \
+ lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h lj_strfmt.h
+ lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
+- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+- lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
+- lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
++ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \
++ lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
++ lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \
++ lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h
+ lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_traceerr.h lj_vm.h
+@@ -125,21 +131,21 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h
lj_arch.h \
+ lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
+- lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
++ lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h
+ lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
+ lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
+ lj_strfmt.h
+ lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
+- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
+- lj_bcdump.h lj_lib.h
++ lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \
++ lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h
+ lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
+ lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
+- lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
++ lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h
+ lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
+@@ -155,7 +161,7 @@ lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h
lj_arch.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
+ lj_vm.h
+ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h
++ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h
+ lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h lj_strscan.h
+@@ -168,6 +174,7 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
+ lj_vm.h lj_vmevent.h
++lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
+ lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
+ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
+@@ -175,7 +182,10 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
\
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
+ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
+ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+- lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
++ lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h
++lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \
++ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
++ lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h
+ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
+@@ -183,11 +193,13 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_gc.h \
+ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
+- lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
++ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \
++ lj_alloc.h luajit.h
+ lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+- lj_err.h lj_errmsg.h lj_str.h lj_char.h
++ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h
+ lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
++ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \
++ lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h
+ lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
+ lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+@@ -198,36 +210,37 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
\
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \
+ lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \
+- lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h
++ lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h
+ lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+- lj_gc.h lj_udata.h
++ lj_gc.h lj_err.h lj_errmsg.h lj_udata.h
+ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
+ lj_vm.h lj_vmevent.h
+ lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_ir.h lj_vm.h
+-ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
+- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
+- lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
+- lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
+- lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
+- lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
+- lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
+- lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
+- lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
+- lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \
+- lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
+- lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
+- lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
+- lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
+- lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
+- lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
+- lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
+- lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
+- lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
+- lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
+- lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
+- lib_ffi.c lib_init.c
++ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \
++ lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \
++ lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \
++ lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
++ lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \
++ lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \
++ lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
++ lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \
++ lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \
++ lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \
++ lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
++ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
++ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
++ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
++ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
++ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
++ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
++ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
++ lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
++ lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
++ lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
++ lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
++ lib_buffer.c lib_init.c
+ luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
+ host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
+ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
+diff --git a/src/host/buildvm.c b/src/host/buildvm.c
+index de23fabd..9dc328fc 100644
+--- a/src/host/buildvm.c
++++ b/src/host/buildvm.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** This is a tool to build the hand-tuned assembler code required for
+ ** LuaJIT's bytecode interpreter. It supports a variety of output formats
+diff --git a/src/host/buildvm.h b/src/host/buildvm.h
+index b90428dc..f81ef7e0 100644
+--- a/src/host/buildvm.h
++++ b/src/host/buildvm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _BUILDVM_H
+diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
+index ffd14903..01a1ba06 100644
+--- a/src/host/buildvm_asm.c
++++ b/src/host/buildvm_asm.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder: Assembler source code emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "buildvm.h"
+@@ -144,14 +144,6 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
+ fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
+ (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins
>> 16) & 31, sym);
+ } else if ((ins >> 26) == 18) {
+-#if LJ_ARCH_PPC64
+- const char *suffix = strchr(sym, '@');
+- if (suffix && suffix[1] == 'h') {
+- fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
+- } else if (suffix && suffix[1] == 'l') {
+- fprintf(ctx->fp, "\tld 12, %s\n", sym);
+- } else
+-#endif
+ fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ?
"bl" : "b", sym);
+ } else {
+ fprintf(stderr,
+@@ -250,9 +242,6 @@ void emit_asm(BuildCtx *ctx)
+ int i, rel;
+
+ fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n",
ctx->dasm_arch);
+-#if LJ_ARCH_PPC64
+- fprintf(ctx->fp, "\t.abiversion 2\n");
+-#endif
+ fprintf(ctx->fp, "\t.text\n");
+ emit_asm_align(ctx, 4);
+
+@@ -338,7 +327,7 @@ void emit_asm(BuildCtx *ctx)
+ #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
+ fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX
"progbits\n");
+ #endif
+-#if LJ_TARGET_PPC && !LJ_TARGET_PS3
++#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
+ /* Hard-float ABI. */
+ fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
+ #endif
+diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c
+index d579f4d4..02b51c4e 100644
+--- a/src/host/buildvm_fold.c
++++ b/src/host/buildvm_fold.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder: IR folding hash table generator.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "buildvm.h"
+diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
+index 2956fdb6..20bb77cd 100644
+--- a/src/host/buildvm_lib.c
++++ b/src/host/buildvm_lib.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder: library definition compiler.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include "buildvm.h"
+@@ -385,6 +385,8 @@ void emit_lib(BuildCtx *ctx)
+ ok = LJ_HASJIT;
+ else if (!strcmp(buf, "#if LJ_HASFFI\n"))
+ ok = LJ_HASFFI;
++ else if (!strcmp(buf, "#if LJ_HASBUFFER\n"))
++ ok = LJ_HASBUFFER;
+ if (!ok) {
+ int lvl = 1;
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c
+index 2eb2bb7b..aa061e6e 100644
+--- a/src/host/buildvm_peobj.c
++++ b/src/host/buildvm_peobj.c
+@@ -1,6 +1,6 @@
+ /*
+ ** LuaJIT VM builder: PE object emitter.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Only used for building on Windows, since we cannot assume the presence
+ ** of a suitable assembler. The host and target byte order must match.
+@@ -9,7 +9,7 @@
+ #include "buildvm.h"
+ #include "lj_bc.h"
+
+-#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC
++#if LJ_TARGET_X86ORX64
+
+ /* Context for PE object emitter. */
+ static char *strtab;
+@@ -93,12 +93,6 @@ typedef struct PEsymaux {
+ #define PEOBJ_RELOC_ADDR32NB 0x03
+ #define PEOBJ_RELOC_OFS 0
+ #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
+-#elif LJ_TARGET_PPC
+-#define PEOBJ_ARCH_TARGET 0x01f2
+-#define PEOBJ_RELOC_REL32 0x06
+-#define PEOBJ_RELOC_DIR32 0x02
+-#define PEOBJ_RELOC_OFS (-4)
+-#define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */
+ #endif
+
+ /* Section numbers (0-based). */
+@@ -251,15 +245,8 @@ void emit_peobj(BuildCtx *ctx)
+ /* Write .text section. */
+ host_endian.u = 1;
+ if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) {
+-#if LJ_TARGET_PPC
+- uint32_t *p = (uint32_t *)ctx->code;
+- int n = (int)(ctx->codesz >> 2);
+- for (i = 0; i < n; i++, p++)
+- *p = lj_bswap(*p); /* Byteswap .text section. */
+-#else
+ fprintf(stderr, "Error: different byte order for host and target\n");
+ exit(1);
+-#endif
+ }
+ owrite(ctx, ctx->code, ctx->codesz);
+ for (i = 0; i < ctx->nreloc; i++) {
+diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
+index 6f5a05cc..921769f7 100644
+--- a/src/host/genlibbc.lua
++++ b/src/host/genlibbc.lua
+@@ -2,7 +2,7 @@
+ -- Lua script to dump the bytecode of the library functions written in Lua.
+ -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
+ ----------------------------------------------------------------------------
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+
+diff --git a/src/host/genminilua.lua b/src/host/genminilua.lua
+index 50feff01..c05ed63c 100644
+--- a/src/host/genminilua.lua
++++ b/src/host/genminilua.lua
+@@ -2,7 +2,7 @@
+ -- Lua script to generate a customized, minified version of Lua.
+ -- The resulting 'minilua' is used for the build process of LuaJIT.
+ ----------------------------------------------------------------------------
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+
+diff --git a/src/host/minilua.c b/src/host/minilua.c
+index 79150286..cfc7491d 100644
+--- a/src/host/minilua.c
++++ b/src/host/minilua.c
+@@ -1134,7 +1134,7 @@ if(!cl->isC){
+ CallInfo*ci;
+ StkId st,base;
+ Proto*p=cl->p;
+-luaD_checkstack(L,p->maxstacksize);
++luaD_checkstack(L,p->maxstacksize+p->numparams);
+ func=restorestack(L,funcr);
+ if(!p->is_vararg){
+ base=func+1;
+diff --git a/src/jit/bc.lua b/src/jit/bc.lua
+index 193cf01f..e58a3fef 100644
+--- a/src/jit/bc.lua
++++ b/src/jit/bc.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT bytecode listing module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
+index c17c88e0..ab13667a 100644
+--- a/src/jit/bcsave.lua
++++ b/src/jit/bcsave.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT module to save/list bytecode.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+@@ -17,6 +17,10 @@ local bit = require("bit")
+ -- Symbol name prefix for LuaJIT bytecode.
+ local LJBC_PREFIX = "luaJIT_BC_"
+
++local type, assert = type, assert
++local format = string.format
++local tremove, tconcat = table.remove, table.concat
++
+ ------------------------------------------------------------------------------
+
+ local function usage()
+@@ -63,8 +67,18 @@ local map_type = {
+ }
+
+ local map_arch = {
+- x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true,
+- ppc = true, mips = true, mipsel = true,
++ x86 = { e = "le", b = 32, m = 3, p = 0x14c, },
++ x64 = { e = "le", b = 64, m = 62, p = 0x8664, },
++ arm = { e = "le", b = 32, m = 40, p = 0x1c0, },
++ arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, },
++ arm64be = { e = "be", b = 64, m = 183, },
++ ppc = { e = "be", b = 32, m = 20, },
++ mips = { e = "be", b = 32, m = 8, f = 0x50001006, },
++ mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, },
++ mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, },
++ mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
++ mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
++ mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
+ }
+
+ local map_os = {
+@@ -73,33 +87,33 @@ local map_os = {
+ }
+
+ local function checkarg(str, map, err)
+- str = string.lower(str)
++ str = str:lower()
+ local s = check(map[str], "unknown ", err)
+- return s == true and str or s
++ return type(s) == "string" and s or str
+ end
+
+ local function detecttype(str)
+- local ext = string.match(string.lower(str), "%.(%a+)$")
++ local ext = str:lower():match("%.(%a+)$")
+ return map_type[ext] or "raw"
+ end
+
+ local function checkmodname(str)
+- check(string.match(str, "^[%w_.%-]+$"), "bad module name")
+- return string.gsub(str, "[%.%-]", "_")
++ check(str:match("^[%w_.%-]+$"), "bad module name")
++ return str:gsub("[%.%-]", "_")
+ end
+
+ local function detectmodname(str)
+ if type(str) == "string" then
+- local tail = string.match(str, "[^/\\]+$")
++ local tail = str:match("[^/\\]+$")
+ if tail then str = tail end
+- local head = string.match(str, "^(.*)%.[^.]*$")
++ local head = str:match("^(.*)%.[^.]*$")
+ if head then str = head end
+- str = string.match(str, "^[%w_.%-]+")
++ str = str:match("^[%w_.%-]+")
+ else
+ str = nil
+ end
+ check(str, "cannot derive module name, use -n name")
+- return string.gsub(str, "[%.%-]", "_")
++ return str:gsub("[%.%-]", "_")
+ end
+
+ ------------------------------------------------------------------------------
+@@ -118,8 +132,8 @@ end
+ local function bcsave_c(ctx, output, s)
+ local fp = savefile(output, "w")
+ if ctx.type == "c" then
+- fp:write(string.format([[
+-#ifdef _cplusplus
++ fp:write(format([[
++#ifdef __cplusplus
+ extern "C"
+ #endif
+ #ifdef _WIN32
+@@ -128,7 +142,7 @@ __declspec(dllexport)
+ const unsigned char %s%s[] = {
+ ]], LJBC_PREFIX, ctx.modname))
+ else
+- fp:write(string.format([[
++ fp:write(format([[
+ #define %s%s_SIZE %d
+ static const unsigned char %s%s[] = {
+ ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
+@@ -138,13 +152,13 @@ static const unsigned char %s%s[] = {
+ local b = tostring(string.byte(s, i))
+ m = m + #b + 1
+ if m > 78 then
+- fp:write(table.concat(t, ",", 1, n), ",\n")
++ fp:write(tconcat(t, ",", 1, n), ",\n")
+ n, m = 0, #b + 1
+ end
+ n = n + 1
+ t[n] = b
+ end
+- bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n")
++ bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n")
+ end
+
+ local function bcsave_elfobj(ctx, output, s, ffi)
+@@ -199,12 +213,8 @@ typedef struct {
+ } ELF64obj;
+ ]]
+ local symname = LJBC_PREFIX..ctx.modname
+- local is64, isbe = false, false
+- if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch ==
"arm64be" then
+- is64 = true
+- elseif ctx.arch == "ppc" or ctx.arch == "mips" then
+- isbe = true
+- end
++ local ai = assert(map_arch[ctx.arch])
++ local is64, isbe = ai.b == 64, ai.e == "be"
+
+ -- Handle different host/target endianess.
+ local function f32(x) return x end
+@@ -237,10 +247,8 @@ typedef struct {
+ hdr.eendian = isbe and 2 or 1
+ hdr.eversion = 1
+ hdr.type = f16(1)
+- hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8,
mipsel=8 })[ctx.arch])
+- if ctx.arch == "mips" or ctx.arch == "mipsel" then
+- hdr.flags = f32(0x50001006)
+- end
++ hdr.machine = f16(ai.m)
++ hdr.flags = f32(ai.f or 0)
+ hdr.version = f32(1)
+ hdr.shofs = fofs(ffi.offsetof(o, "sect"))
+ hdr.ehsize = f16(ffi.sizeof(hdr))
+@@ -275,7 +283,7 @@ typedef struct {
+ o.sect[2].size = fofs(ofs)
+ o.sect[3].type = f32(3) -- .strtab
+ o.sect[3].ofs = fofs(sofs + ofs)
+- o.sect[3].size = fofs(#symname+1)
++ o.sect[3].size = fofs(#symname+2)
+ ffi.copy(o.space+ofs+1, symname)
+ ofs = ofs + #symname + 2
+ o.sect[4].type = f32(1) -- .rodata
+@@ -336,12 +344,8 @@ typedef struct {
+ } PEobj;
+ ]]
+ local symname = LJBC_PREFIX..ctx.modname
+- local is64 = false
+- if ctx.arch == "x86" then
+- symname = "_"..symname
+- elseif ctx.arch == "x64" then
+- is64 = true
+- end
++ local ai = assert(map_arch[ctx.arch])
++ local is64 = ai.b == 64
+ local symexport = " /EXPORT:"..symname..",DATA "
+
+ -- The file format is always little-endian. Swap if the host is big-endian.
+@@ -355,7 +359,7 @@ typedef struct {
+ -- Create PE object and fill in header.
+ local o = ffi.new("PEobj")
+ local hdr = o.hdr
+- hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366,
mipsel=0x366 })[ctx.arch])
++ hdr.arch = f16(assert(ai.p))
+ hdr.nsects = f16(2)
+ hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
+ hdr.nsyms = f32(6)
+@@ -605,16 +609,16 @@ local function docmd(...)
+ local n = 1
+ local list = false
+ local ctx = {
+- strip = true, arch = jit.arch, os = string.lower(jit.os),
++ strip = true, arch = jit.arch, os = jit.os:lower(),
+ type = false, modname = false,
+ }
+ while n <= #arg do
+ local a = arg[n]
+- if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~=
"-" then
+- table.remove(arg, n)
++ if type(a) == "string" and a:sub(1, 1) == "-" and a ~=
"-" then
++ tremove(arg, n)
+ if a == "--" then break end
+ for m=2,#a do
+- local opt = string.sub(a, m, m)
++ local opt = a:sub(m, m)
+ if opt == "l" then
+ list = true
+ elseif opt == "s" then
+@@ -627,13 +631,13 @@ local function docmd(...)
+ if n ~= 1 then usage() end
+ arg[1] = check(loadstring(arg[1]))
+ elseif opt == "n" then
+- ctx.modname = checkmodname(table.remove(arg, n))
++ ctx.modname = checkmodname(tremove(arg, n))
+ elseif opt == "t" then
+- ctx.type = checkarg(table.remove(arg, n), map_type, "file type")
++ ctx.type = checkarg(tremove(arg, n), map_type, "file type")
+ elseif opt == "a" then
+- ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture")
++ ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture")
+ elseif opt == "o" then
+- ctx.os = checkarg(table.remove(arg, n), map_os, "OS name")
++ ctx.os = checkarg(tremove(arg, n), map_os, "OS name")
+ else
+ usage()
+ end
+diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
+index c2dd7769..ba79c47e 100644
+--- a/src/jit/dis_arm.lua
++++ b/src/jit/dis_arm.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT ARM disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This is a helper module used by the LuaJIT machine code dumper module.
+diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua
+index a7173326..ad909fbd 100644
+--- a/src/jit/dis_arm64.lua
++++ b/src/jit/dis_arm64.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT ARM64 disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ --
+ -- Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
+@@ -1089,7 +1089,7 @@ local function disass_ins(ctx)
+ last = "#"..(sf+32 - immr)
+ operands[#operands] = last
+ x = x + 1
+- elseif x >= immr then
++ else
+ name = a2
+ x = x - immr + 1
+ end
+diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua
+index 7eb389e2..edcbffa8 100644
+--- a/src/jit/dis_arm64be.lua
++++ b/src/jit/dis_arm64be.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT ARM64BE disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- ARM64 instructions are always little-endian. So just forward to the
+diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
+index a12b8e62..6ad17f54 100644
+--- a/src/jit/dis_mips.lua
++++ b/src/jit/dis_mips.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT MIPS disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT/X license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This is a helper module used by the LuaJIT machine code dumper module.
+@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex
+ local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+
+ ------------------------------------------------------------------------------
+--- Primary and extended opcode maps
++-- Extended opcode maps common to all MIPS releases
+ ------------------------------------------------------------------------------
+
+-local map_movci = { shift = 16, mask = 1, [0] = "movfDSC",
"movtDSC", }
+ local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA",
}
+ local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS",
}
+
++local map_cop0 = {
++ shift = 25, mask = 1,
++ [0] = {
++ shift = 21, mask = 15,
++ [0] = "mfc0TDW", [4] = "mtc0TDW",
++ [10] = "rdpgprDT",
++ [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
++ [14] = "wrpgprDT",
++ }, {
++ shift = 0, mask = 63,
++ [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] =
"tlbp",
++ [24] = "eret", [31] = "deret",
++ [32] = "wait",
++ },
++}
++
++------------------------------------------------------------------------------
++-- Primary and extended opcode maps for MIPS R1-R5
++------------------------------------------------------------------------------
++
++local map_movci = { shift = 16, mask = 1, [0] = "movfDSC",
"movtDSC", }
++
+ local map_special = {
+ shift = 0, mask = 63,
+ [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
+@@ -87,22 +108,6 @@ local map_regimm = {
+ false, false, false, "synciSO",
+ }
+
+-local map_cop0 = {
+- shift = 25, mask = 1,
+- [0] = {
+- shift = 21, mask = 15,
+- [0] = "mfc0TDW", [4] = "mtc0TDW",
+- [10] = "rdpgprDT",
+- [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
+- [14] = "wrpgprDT",
+- }, {
+- shift = 0, mask = 63,
+- [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] =
"tlbp",
+- [24] = "eret", [31] = "deret",
+- [32] = "wait",
+- },
+-}
+-
+ local map_cop1s = {
+ shift = 0, mask = 63,
+ [0] =
"add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
+@@ -233,6 +238,208 @@ local map_pri = {
+ false, "sdc1HSO", "sdc2TSO", "sdTSO",
+ }
+
++------------------------------------------------------------------------------
++-- Primary and extended opcode maps for MIPS R6
++------------------------------------------------------------------------------
++
++local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] =
"muhDST" }
++local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] =
"muhuDST" }
++local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] =
"modDST" }
++local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] =
"moduDST" }
++local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] =
"dmuhDST" }
++local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] =
"dmuhuDST" }
++local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] =
"dmodDST" }
++local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] =
"dmoduDST" }
++
++local map_special_r6 = {
++ shift = 0, mask = 63,
++ [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
++ false, map_srl, "sraDTA",
++ "sllvDTS", false, map_srlv, "sravDTS",
++ "jrS", "jalrD1S", false, false,
++ "syscallY", "breakY", false, "sync",
++ "clzDS", "cloDS", "dclzDS", "dcloDS",
++
"dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST",
++ map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6,
++ map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6,
++ "addDST", "addu|moveDST0",
"subDST", "subu|neguDS0T",
++
"andDST", "or|moveDST0", "xorDST", "nor|notDST0",
++ false, false, "sltDST", "sltuDST",
++ "daddDST", "dadduDST", "dsubDST", "dsubuDST",
++ "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
++ "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST",
++ "dsllDTA", false, "dsrlDTA", "dsraDTA",
++ "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
++}
++
++local map_bshfl_r6 = {
++ shift = 9, mask = 3,
++ [1] = "alignDSTa",
++ _ = {
++ shift = 6, mask = 31,
++ [0] = "bitswapDT",
++ [2] = "wsbhDT",
++ [16] = "sebDT",
++ [24] = "sehDT",
++ }
++}
++
++local map_dbshfl_r6 = {
++ shift = 9, mask = 3,
++ [1] = "dalignDSTa",
++ _ = {
++ shift = 6, mask = 31,
++ [0] = "dbitswapDT",
++ [2] = "dsbhDT",
++ [5] = "dshdDT",
++ }
++}
++
++local map_special3_r6 = {
++ shift = 0, mask = 63,
++ [0] = "extTSAK", [1] = "dextmTSAP", [3] =
"dextTSAK",
++ [4] = "insTSAL", [6] = "dinsuTSEQ", [7] =
"dinsTSAL",
++ [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD",
++}
++
++local map_regimm_r6 = {
++ shift = 16, mask = 31,
++ [0] = "bltzSB", [1] = "bgezSB",
++ [6] = "dahiSI", [30] = "datiSI",
++ [23] = "sigrieI", [31] = "synciSO",
++}
++
++local map_pcrel_r6 = {
++ shift = 19, mask = 3,
++ [0] = "addiupcS2", "lwpcS2", "lwupcS2", {
++ shift = 18, mask = 1,
++ [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] =
"aluipcSI" }
++ }
++}
++
++local map_cop1s_r6 = {
++ shift = 0, mask = 63,
++ [0] =
"add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
++ "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG",
++
"round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG",
++
"round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG",
++ "sel.sFGH", false, false, false,
++
"seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH",
++
"maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG",
++
"min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH",
++ false, "cvt.d.sFG", false, false,
++ "cvt.w.sFG", "cvt.l.sFG",
++}
++
++local map_cop1d_r6 = {
++ shift = 0, mask = 63,
++ [0] =
"add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH",
++ "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG",
++
"round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG",
++
"round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG",
++ "sel.dFGH", false, false, false,
++
"seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH",
++
"maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG",
++
"min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH",
++ "cvt.s.dFG", false, false, false,
++ "cvt.w.dFG", "cvt.l.dFG",
++}
++
++local map_cop1w_r6 = {
++ shift = 0, mask = 63,
++ [0] =
"cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH",
++
"cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH",
++
"cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH",
++
"cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH",
++ false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH",
++ false, false, false, false,
++
false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH",
++ false, false, false, false,
++ "cvt.s.wFG", "cvt.d.wFG",
++}
++
++local map_cop1l_r6 = {
++ shift = 0, mask = 63,
++ [0] =
"cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH",
++
"cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH",
++
"cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH",
++
"cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH",
++ false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH",
++ false, false, false, false,
++
false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH",
++ false, false, false, false,
++ "cvt.s.lFG", "cvt.d.lFG",
++}
++
++local map_cop1_r6 = {
++ shift = 21, mask = 31,
++ [0] = "mfc1TG",
"dmfc1TG", "cfc1TG", "mfhc1TG",
++ "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
++ false, "bc1eqzHB", false, false,
++ false, "bc1nezHB", false, false,
++ map_cop1s_r6, map_cop1d_r6, false, false,
++ map_cop1w_r6, map_cop1l_r6,
++}
++
++local function maprs_popTS(rs, rt)
++ if rt == 0 then return 0 elseif rs == 0 then return 1
++ elseif rs == rt then return 2 else return 3 end
++end
++
++local map_pop06_r6 = {
++ maprs = maprs_popTS, [0] = "blezSB", "blezalcTB",
"bgezalcTB", "bgeucSTB"
++}
++local map_pop07_r6 = {
++ maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB",
"bltzalcTB", "bltucSTB"
++}
++local map_pop26_r6 = {
++ maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB"
++}
++local map_pop27_r6 = {
++ maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB"
++}
++
++local function maprs_popS(rs, rt)
++ if rs == 0 then return 0 else return 1 end
++end
++
++local map_pop66_r6 = {
++ maprs = maprs_popS, [0] = "jicTI", "beqzcSb"
++}
++local map_pop76_r6 = {
++ maprs = maprs_popS, [0] = "jialcTI", "bnezcSb"
++}
++
++local function maprs_popST(rs, rt)
++ if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end
++end
++
++local map_pop10_r6 = {
++ maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB",
"beqcSTB"
++}
++local map_pop30_r6 = {
++ maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB",
"bnecSTB"
++}
++
++local map_pri_r6 = {
++ [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ",
++ "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6,
++ map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI",
++
"andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U",
++ map_cop0, map_cop1_r6, false, false,
++ false, false, map_pop26_r6, map_pop27_r6,
++ map_pop30_r6, "daddiuTSI", false, false,
++ false, "dauiTSI", false, map_special3_r6,
++ "lbTSO", "lhTSO", false, "lwTSO",
++ "lbuTSO", "lhuTSO", false, false,
++ "sbTSO", "shTSO", false, "swTSO",
++ false, false, false, false,
++ false, "lwc1HSO", "bc#", false,
++ false, "ldc1HSO", map_pop66_r6, "ldTSO",
++ false, "swc1HSO", "balc#", map_pcrel_r6,
++ false, "sdc1HSO", map_pop76_r6, "sdTSO",
++}
++
+ ------------------------------------------------------------------------------
+
+ local map_gpr = {
+@@ -287,10 +494,14 @@ local function disass_ins(ctx)
+ ctx.op = op
+ ctx.rel = nil
+
+- local opat = map_pri[rshift(op, 26)]
++ local opat = ctx.map_pri[rshift(op, 26)]
+ while type(opat) ~= "string" do
+ if not opat then return unknown(ctx) end
+- opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
++ if opat.maprs then
++ opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))]
++ else
++ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
++ end
+ end
+ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
+ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
+@@ -314,6 +525,8 @@ local function disass_ins(ctx)
+ x = "f"..band(rshift(op, 21), 31)
+ elseif p == "A" then
+ x = band(rshift(op, 6), 31)
++ elseif p == "a" then
++ x = band(rshift(op, 6), 7)
+ elseif p == "E" then
+ x = band(rshift(op, 6), 31) + 32
+ elseif p == "M" then
+@@ -333,6 +546,10 @@ local function disass_ins(ctx)
+ x = band(rshift(op, 11), 31) - last + 33
+ elseif p == "I" then
+ x = arshift(lshift(op, 16), 16)
++ elseif p == "2" then
++ x = arshift(lshift(op, 13), 11)
++ elseif p == "3" then
++ x = arshift(lshift(op, 14), 11)
+ elseif p == "U" then
+ x = band(op, 0xffff)
+ elseif p == "O" then
+@@ -342,7 +559,15 @@ local function disass_ins(ctx)
+ local index = map_gpr[band(rshift(op, 16), 31)]
+ operands[#operands] = format("%s(%s)", index, last)
+ elseif p == "B" then
+- x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4
++ x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4
++ ctx.rel = x
++ x = format("0x%08x", x)
++ elseif p == "b" then
++ x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4
++ ctx.rel = x
++ x = format("0x%08x", x)
++ elseif p == "#" then
++ x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4
+ ctx.rel = x
+ x = format("0x%08x", x)
+ elseif p == "J" then
+@@ -408,6 +633,7 @@ local function create(code, addr, out)
+ ctx.disass = disass_block
+ ctx.hexdump = 8
+ ctx.get = get_be
++ ctx.map_pri = map_pri
+ return ctx
+ end
+
+@@ -417,6 +643,19 @@ local function create_el(code, addr, out)
+ return ctx
+ end
+
++local function create_r6(code, addr, out)
++ local ctx = create(code, addr, out)
++ ctx.map_pri = map_pri_r6
++ return ctx
++end
++
++local function create_r6_el(code, addr, out)
++ local ctx = create(code, addr, out)
++ ctx.get = get_le
++ ctx.map_pri = map_pri_r6
++ return ctx
++end
++
+ -- Simple API: disassemble code (a string) at address and output via out.
+ local function disass(code, addr, out)
+ create(code, addr, out):disass()
+@@ -426,6 +665,14 @@ local function disass_el(code, addr, out)
+ create_el(code, addr, out):disass()
+ end
+
++local function disass_r6(code, addr, out)
++ create_r6(code, addr, out):disass()
++end
++
++local function disass_r6_el(code, addr, out)
++ create_r6_el(code, addr, out):disass()
++end
++
+ -- Return register name for RID.
+ local function regname(r)
+ if r < 32 then return map_gpr[r] end
+@@ -436,8 +683,12 @@ end
+ return {
+ create = create,
+ create_el = create_el,
++ create_r6 = create_r6,
++ create_r6_el = create_r6_el,
+ disass = disass,
+ disass_el = disass_el,
++ disass_r6 = disass_r6,
++ disass_r6_el = disass_r6_el,
+ regname = regname
+ }
+
+diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua
+index c4374928..5ad48f8f 100644
+--- a/src/jit/dis_mips64.lua
++++ b/src/jit/dis_mips64.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT MIPS64 disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This module just exports the big-endian functions from the
+diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua
+index 2b1470af..d50e3a18 100644
+--- a/src/jit/dis_mips64el.lua
++++ b/src/jit/dis_mips64el.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT MIPS64EL disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This module just exports the little-endian functions from the
+diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64r6.lua
+similarity index 67%
+copy from src/jit/dis_mips64el.lua
+copy to src/jit/dis_mips64r6.lua
+index 2b1470af..921b3cbe 100644
+--- a/src/jit/dis_mips64el.lua
++++ b/src/jit/dis_mips64r6.lua
+@@ -1,17 +1,17 @@
+ ----------------------------------------------------------------------------
+--- LuaJIT MIPS64EL disassembler wrapper module.
++-- LuaJIT MIPS64R6 disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+--- This module just exports the little-endian functions from the
++-- This module just exports the r6 big-endian functions from the
+ -- MIPS disassembler module. All the interesting stuff is there.
+ ------------------------------------------------------------------------------
+
+ local dis_mips = require((string.match(..., ".*%.") or
"").."dis_mips")
+ return {
+- create = dis_mips.create_el,
+- disass = dis_mips.disass_el,
++ create = dis_mips.create_r6,
++ disass = dis_mips.disass_r6,
+ regname = dis_mips.regname
+ }
+
+diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64r6el.lua
+similarity index 66%
+copy from src/jit/dis_mips64el.lua
+copy to src/jit/dis_mips64r6el.lua
+index 2b1470af..aadef9f3 100644
+--- a/src/jit/dis_mips64el.lua
++++ b/src/jit/dis_mips64r6el.lua
+@@ -1,17 +1,17 @@
+ ----------------------------------------------------------------------------
+--- LuaJIT MIPS64EL disassembler wrapper module.
++-- LuaJIT MIPS64R6EL disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+--- This module just exports the little-endian functions from the
++-- This module just exports the r6 little-endian functions from the
+ -- MIPS disassembler module. All the interesting stuff is there.
+ ------------------------------------------------------------------------------
+
+ local dis_mips = require((string.match(..., ".*%.") or
"").."dis_mips")
+ return {
+- create = dis_mips.create_el,
+- disass = dis_mips.disass_el,
++ create = dis_mips.create_r6_el,
++ disass = dis_mips.disass_r6_el,
+ regname = dis_mips.regname
+ }
+
+diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
+index f69b11f0..52cebefb 100644
+--- a/src/jit/dis_mipsel.lua
++++ b/src/jit/dis_mipsel.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT MIPSEL disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This module just exports the little-endian functions from the
+diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
+index 2aeb1b29..08d742f1 100644
+--- a/src/jit/dis_ppc.lua
++++ b/src/jit/dis_ppc.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT PPC disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT/X license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This is a helper module used by the LuaJIT machine code dumper module.
+diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
+index d5714ee1..2d37423e 100644
+--- a/src/jit/dis_x64.lua
++++ b/src/jit/dis_x64.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT x64 disassembler wrapper module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This module just exports the 64 bit functions from the combined
+diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
+index 4371233d..5480854c 100644
+--- a/src/jit/dis_x86.lua
++++ b/src/jit/dis_x86.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT x86/x64 disassembler module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ -- This is a helper module used by the LuaJIT machine code dumper module.
+@@ -239,6 +239,24 @@
nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
+ --8x
+ [0x8c] = "||pmaskmovXrvVSm",
+ [0x8e] = "||pmaskmovVSmXvr",
++--9x
++[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
++[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
++[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
++[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
++[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
++--Ax
++[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
++[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
++[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
++[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
++[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
++--Bx
++[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
++[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
++[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
++[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
++[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
+ --Dx
+ [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
+ [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
+@@ -483,7 +501,7 @@ local function putpat(ctx, name, pat)
+ local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
+ local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
+
+- -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
++ -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
+ for p in gmatch(pat, ".") do
+ local x = nil
+ if p == "V" or p == "U" then
+@@ -506,6 +524,9 @@ local function putpat(ctx, name, pat)
+ sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+ if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
+ regs = map_regs[sz]
++ elseif p == "H" then
++ name = name..(ctx.rexw and "d" or "s")
++ ctx.rexw = false
+ elseif p == "S" then
+ name = name..lower(sz)
+ elseif p == "s" then
+@@ -735,6 +756,7 @@ map_act = {
+ V = putpat, U = putpat, T = putpat,
+ M = putpat, X = putpat, P = putpat,
+ F = putpat, G = putpat, Y = putpat,
++ H = putpat,
+
+ -- Collect prefixes.
+ [":"] = function(ctx, name, pat)
+diff --git a/src/jit/dump.lua b/src/jit/dump.lua
+index 2bea652b..9eda08c4 100644
+--- a/src/jit/dump.lua
++++ b/src/jit/dump.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT compiler dump module.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+@@ -102,10 +102,12 @@ end
+ local function fillsymtab(tr, nexit)
+ local t = symtab
+ if nexitsym == 0 then
++ local maskaddr = jit.arch == "arm" and -2
+ local ircall = vmdef.ircall
+ for i=0,#ircall do
+ local addr = ircalladdr(i)
+ if addr ~= 0 then
++ if maskaddr then addr = band(addr, maskaddr) end
+ if addr < 0 then addr = addr + 2^32 end
+ t[addr] = ircall[i]
+ end
+@@ -217,8 +219,10 @@ local function colorize_text(s)
+ return s
+ end
+
+-local function colorize_ansi(s, t)
+- return format(colortype_ansi[t], s)
++local function colorize_ansi(s, t, extra)
++ local out = format(colortype_ansi[t], s)
++ if extra then out = "\027[3m"..out end
++ return out
+ end
+
+ local irtype_ansi = setmetatable({},
+@@ -227,9 +231,10 @@ local irtype_ansi = setmetatable({},
+
+ local html_escape = { ["<"] = "<", [">"] =
">", ["&"] = "&", }
+
+-local function colorize_html(s, t)
++local function colorize_html(s, t, extra)
+ s = gsub(s, "[<>&]", html_escape)
+- return format('<span class="irt_%s">%s</span>',
irtype_text[t], s)
++ return format('<span class="irt_%s%s">%s</span>',
++ irtype_text[t], extra and " irt_extra" or "", s)
+ end
+
+ local irtype_html = setmetatable({},
+@@ -254,6 +259,7 @@ span.irt_tab { color: #c00000; }
+ span.irt_udt, span.irt_lud { color: #00c0c0; }
+ span.irt_num { color: #4040c0; }
+ span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
++span.irt_extra { font-style: italic; }
+ </style>
+ ]]
+
+@@ -269,6 +275,7 @@ local litname = {
+ if band(mode, 8) ~= 0 then s = s.."C" end
+ if band(mode, 16) ~= 0 then s = s.."R" end
+ if band(mode, 32) ~= 0 then s = s.."I" end
++ if band(mode, 64) ~= 0 then s = s.."K" end
+ t[mode] = s
+ return s
+ end}),
+@@ -277,15 +284,18 @@ local litname = {
+ local s = irtype[band(mode, 31)]
+ s = irtype[band(shr(mode, 5), 31)].."."..s
+ if band(mode, 0x800) ~= 0 then s = s.." sext" end
+- local c = shr(mode, 14)
+- if c == 2 then s = s.." index" elseif c == 3 then s = s.."
check" end
++ local c = shr(mode, 12)
++ if c == 1 then s = s.." none"
++ elseif c == 2 then s = s.." index"
++ elseif c == 3 then s = s.." check" end
+ t[mode] = s
+ return s
+ end}),
+ ["FLOAD "] = vmdef.irfield,
+ ["FREF "] = vmdef.irfield,
+ ["FPMATH"] = vmdef.irfpm,
+- ["BUFHDR"] = { [0] = "RESET", "APPEND" },
++ ["TMPREF"] = { [0] = "", "IN", "OUT",
"INOUT", "", "", "OUT2", "INOUT2" },
++ ["BUFHDR"] = { [0] = "RESET", "APPEND",
"WRITE" },
+ ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
+ }
+
+@@ -315,7 +325,9 @@ local function formatk(tr, idx, sn)
+ local tn = type(k)
+ local s
+ if tn == "number" then
+- if band(sn or 0, 0x30000) ~= 0 then
++ if t < 12 then
++ s = k == 0 and "NULL" or format("[0x%08x]", k)
++ elseif band(sn or 0, 0x30000) ~= 0 then
+ s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
+ elseif k == 2^52+2^51 then
+ s = "bias"
+@@ -343,7 +355,7 @@ local function formatk(tr, idx, sn)
+ else
+ s = tostring(k) -- For primitives.
+ end
+- s = colorize(format("%-4s", s), t)
++ s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0)
+ if slot then
+ s = format("%s @%d", s, slot)
+ end
+@@ -363,7 +375,7 @@ local function printsnap(tr, snap)
+ out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
+ else
+ local m, ot, op1, op2 = traceir(tr, ref)
+- out:write(colorize(format("%04d", ref), band(ot, 31)))
++ out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~=
0))
+ end
+ out:write(band(sn, 0x10000) == 0 and " " or "|") --
SNAP_FRAME
+ else
+@@ -582,7 +594,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
+ end
+
+ -- Dump recorded bytecode.
+-local function dump_record(tr, func, pc, depth, callee)
++local function dump_record(tr, func, pc, depth)
+ if depth ~= recdepth then
+ recdepth = depth
+ recprefix = rep(" .", depth)
+@@ -593,7 +605,6 @@ local function dump_record(tr, func, pc, depth, callee)
+ if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end
+ else
+ line = "0000 "..recprefix.." FUNCC \n"
+- callee = func
+ end
+ if pc <= 0 then
+ out:write(sub(line, 1, -2), " ; ", fmtfunc(func), "\n")
+@@ -607,12 +618,15 @@ end
+
+ ------------------------------------------------------------------------------
+
++local gpr64 = jit.arch:match("64")
++local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel"
++
+ -- Dump taken trace exits.
+ local function dump_texit(tr, ex, ngpr, nfpr, ...)
+ out:write("---- TRACE ", tr, " exit ", ex, "\n")
+ if dumpmode.X then
+ local regs = {...}
+- if jit.arch == "x64" then
++ if gpr64 then
+ for i=1,ngpr do
+ out:write(format(" %016x", regs[i]))
+ if i % 4 == 0 then out:write("\n") end
+@@ -623,7 +637,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
+ if i % 8 == 0 then out:write("\n") end
+ end
+ end
+- if jit.arch == "mips" or jit.arch == "mipsel" then
++ if fprmips32 then
+ for i=1,nfpr,2 do
+ out:write(format(" %+17.14g", regs[ngpr+i]))
+ if i % 8 == 7 then out:write("\n") end
+diff --git a/src/jit/p.lua b/src/jit/p.lua
+index 7be10586..c9ec1d8b 100644
+--- a/src/jit/p.lua
++++ b/src/jit/p.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT profiler.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+@@ -238,6 +238,7 @@ local function prof_finish()
+ prof_count1 = nil
+ prof_count2 = nil
+ prof_ud = nil
++ if out ~= stdout then out:close() end
+ end
+ end
+
+diff --git a/src/jit/v.lua b/src/jit/v.lua
+index 934de985..83589143 100644
+--- a/src/jit/v.lua
++++ b/src/jit/v.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- Verbose mode of the LuaJIT compiler.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+diff --git a/src/jit/zone.lua b/src/jit/zone.lua
+index fa702c4e..94357854 100644
+--- a/src/jit/zone.lua
++++ b/src/jit/zone.lua
+@@ -1,7 +1,7 @@
+ ----------------------------------------------------------------------------
+ -- LuaJIT profiler zones.
+ --
+--- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
++-- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+ -- Released under the MIT license. See Copyright Notice in luajit.h
+ ----------------------------------------------------------------------------
+ --
+diff --git a/src/lib_aux.c b/src/lib_aux.c
+index c40565c3..4ef55581 100644
+--- a/src/lib_aux.c
++++ b/src/lib_aux.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Auxiliary library for the Lua/C API.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major parts taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -218,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
+
+ LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
+ {
+- while (l--)
+- luaL_addchar(B, *s++);
++ if (l <= bufffree(B)) {
++ memcpy(B->p, s, l);
++ B->p += l;
++ } else {
++ emptybuffer(B);
++ lua_pushlstring(B->L, s, l);
++ B->lvl++;
++ adjuststack(B);
++ }
+ }
+
+ LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
+@@ -338,17 +345,13 @@ LUALIB_API lua_State *luaL_newstate(void)
+
+ #else
+
+-#include "lj_alloc.h"
+-
+ LUALIB_API lua_State *luaL_newstate(void)
+ {
+ lua_State *L;
+- void *ud = lj_alloc_create();
+- if (ud == NULL) return NULL;
+ #if LJ_64 && !LJ_GC64
+- L = lj_state_newstate(lj_alloc_f, ud);
++ L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL);
+ #else
+- L = lua_newstate(lj_alloc_f, ud);
++ L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL);
+ #endif
+ if (L) G(L)->panic = panic;
+ return L;
+diff --git a/src/lib_base.c b/src/lib_base.c
+index 3a757870..55e3c6b8 100644
+--- a/src/lib_base.c
++++ b/src/lib_base.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Base and coroutine library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2011
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -19,6 +19,7 @@
+ #include "lj_gc.h"
+ #include "lj_err.h"
+ #include "lj_debug.h"
++#include "lj_buf.h"
+ #include "lj_str.h"
+ #include "lj_tab.h"
+ #include "lj_meta.h"
+@@ -42,13 +43,13 @@
+
+ LJLIB_ASM(assert) LJLIB_REC(.)
+ {
+- GCstr *s;
+ lj_lib_checkany(L, 1);
+- s = lj_lib_optstr(L, 2);
+- if (s)
+- lj_err_callermsg(L, strdata(s));
+- else
++ if (L->top == L->base+1)
+ lj_err_caller(L, LJ_ERR_ASSERT);
++ else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
++ lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
++ else
++ lj_err_run(L);
+ return FFH_UNREACHABLE;
+ }
+
+@@ -75,9 +76,10 @@ LJLIB_ASM_(type) LJLIB_REC(.)
+ /* This solves a circular dependency problem -- change FF_next_N as needed. */
+ LJ_STATIC_ASSERT((int)FF_next == FF_next_N);
+
+-LJLIB_ASM(next)
++LJLIB_ASM(next) LJLIB_REC(.)
+ {
+ lj_lib_checktab(L, 1);
++ lj_err_msg(L, LJ_ERR_NEXTIDX);
+ return FFH_UNREACHABLE;
+ }
+
+@@ -224,9 +226,11 @@ LJLIB_CF(unpack)
+ int32_t n, i = lj_lib_optint(L, 2, 1);
+ int32_t e = (L->base+3-1 < L->top && !tvisnil(L->base+3-1)) ?
+ lj_lib_checkint(L, 3) : (int32_t)lj_tab_len(t);
++ uint32_t nu;
+ if (i > e) return 0;
+- n = e - i + 1;
+- if (n <= 0 || !lua_checkstack(L, n))
++ nu = (uint32_t)e - (uint32_t)i;
++ n = (int32_t)(nu+1);
++ if (nu >= LUAI_MAXCSTACK || !lua_checkstack(L, n))
+ lj_err_caller(L, LJ_ERR_UNPACK);
+ do {
+ cTValue *tv = lj_tab_getint(t, i);
+@@ -287,18 +291,27 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
+ } else {
+ const char *p = strdata(lj_lib_checkstr(L, 1));
+ char *ep;
++ unsigned int neg = 0;
+ unsigned long ul;
+ if (base < 2 || base > 36)
+ lj_err_arg(L, 2, LJ_ERR_BASERNG);
+- ul = strtoul(p, &ep, base);
+- if (p != ep) {
+- while (lj_char_isspace((unsigned char)(*ep))) ep++;
+- if (*ep == '\0') {
+- if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
+- setintV(L->base-1-LJ_FR2, (int32_t)ul);
+- else
+- setnumV(L->base-1-LJ_FR2, (lua_Number)ul);
+- return FFH_RES(1);
++ while (lj_char_isspace((unsigned char)(*p))) p++;
++ if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
++ if (lj_char_isalnum((unsigned char)(*p))) {
++ ul = strtoul(p, &ep, base);
++ if (p != ep) {
++ while (lj_char_isspace((unsigned char)(*ep))) ep++;
++ if (*ep == '\0') {
++ if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
++ if (neg) ul = (unsigned long)-(long)ul;
++ setintV(L->base-1-LJ_FR2, (int32_t)ul);
++ } else {
++ lua_Number n = (lua_Number)ul;
++ if (neg) n = -n;
++ setnumV(L->base-1-LJ_FR2, n);
++ }
++ return FFH_RES(1);
++ }
+ }
+ }
+ }
+@@ -395,10 +408,22 @@ LJLIB_CF(load)
+ GCstr *name = lj_lib_optstr(L, 2);
+ GCstr *mode = lj_lib_optstr(L, 3);
+ int status;
+- if (L->base < L->top && (tvisstr(L->base) ||
tvisnumber(L->base))) {
+- GCstr *s = lj_lib_checkstr(L, 1);
++ if (L->base < L->top &&
++ (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) {
++ const char *s;
++ MSize len;
++ if (tvisbuf(L->base)) {
++ SBufExt *sbx = bufV(L->base);
++ s = sbx->r;
++ len = sbufxlen(sbx);
++ if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */
++ } else {
++ GCstr *str = lj_lib_checkstr(L, 1);
++ s = strdata(str);
++ len = str->len;
++ }
+ lua_settop(L, 4); /* Ensure env arg exists. */
+- status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s),
++ status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s,
+ mode ? strdata(mode) : NULL);
+ } else {
+ lj_lib_checkfunc(L, 1);
+@@ -493,7 +518,8 @@ LJLIB_CF(print)
+ lua_gettable(L, LUA_GLOBALSINDEX);
+ tv = L->top-1;
+ }
+- shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
++ shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) &&
++ !gcrefu(basemt_it(G(L), LJ_TNUMX));
+ for (i = 0; i < nargs; i++) {
+ cTValue *o = &L->base[i];
+ const char *str;
+diff --git a/src/lib_bit.c b/src/lib_bit.c
+index c979a448..6fb8ad47 100644
+--- a/src/lib_bit.c
++++ b/src/lib_bit.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Bit manipulation library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lib_bit_c
+diff --git a/src/lib_buffer.c b/src/lib_buffer.c
+new file mode 100644
+index 00000000..2e364861
+--- /dev/null
++++ b/src/lib_buffer.c
+@@ -0,0 +1,356 @@
++/*
++** Buffer library.
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
++*/
++
++#define lib_buffer_c
++#define LUA_LIB
++
++#include "lua.h"
++#include "lauxlib.h"
++#include "lualib.h"
++
++#include "lj_obj.h"
++
++#if LJ_HASBUFFER
++#include "lj_gc.h"
++#include "lj_err.h"
++#include "lj_buf.h"
++#include "lj_str.h"
++#include "lj_tab.h"
++#include "lj_udata.h"
++#include "lj_meta.h"
++#if LJ_HASFFI
++#include "lj_ctype.h"
++#include "lj_cdata.h"
++#include "lj_cconv.h"
++#endif
++#include "lj_strfmt.h"
++#include "lj_serialize.h"
++#include "lj_lib.h"
++
++/* -- Helper functions ---------------------------------------------------- */
++
++/* Check that the first argument is a string buffer. */
++static SBufExt *buffer_tobuf(lua_State *L)
++{
++ if (!(L->base < L->top && tvisbuf(L->base)))
++ lj_err_argtype(L, 1, "buffer");
++ return bufV(L->base);
++}
++
++/* Ditto, but for writers. */
++static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ setsbufXL_(sbx, L);
++ return sbx;
++}
++
++#define buffer_toudata(sbx) ((GCudata *)(sbx)-1)
++
++/* -- Buffer methods ------------------------------------------------------ */
++
++#define LJLIB_MODULE_buffer_method
++
++LJLIB_CF(buffer_method_free)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ lj_bufx_free(L, sbx);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_reset) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ lj_bufx_reset(sbx);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_skip) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
++ MSize len = sbufxlen(sbx);
++ if (n < len) {
++ sbx->r += n;
++ } else {
++ sbx->r = sbx->w = sbx->b;
++ }
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_set) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ GCobj *ref;
++ const char *p;
++ MSize len;
++#if LJ_HASFFI
++ if (tviscdata(L->base+1)) {
++ CTState *cts = ctype_cts(L);
++ lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
++ L->base+1, CCF_ARG(2));
++ len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
++ } else
++#endif
++ {
++ GCstr *str = lj_lib_checkstrx(L, 2);
++ p = strdata(str);
++ len = str->len;
++ }
++ lj_bufx_free(L, sbx);
++ lj_bufx_set_cow(L, sbx, p, len);
++ ref = gcV(L->base+1);
++ setgcref(sbx->cowref, ref);
++ lj_gc_objbarrier(L, buffer_toudata(sbx), ref);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_put) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ ptrdiff_t arg, narg = L->top - L->base;
++ for (arg = 1; arg < narg; arg++) {
++ cTValue *o = &L->base[arg], *mo = NULL;
++ retry:
++ if (tvisstr(o)) {
++ lj_buf_putstr((SBuf *)sbx, strV(o));
++ } else if (tvisint(o)) {
++ lj_strfmt_putint((SBuf *)sbx, intV(o));
++ } else if (tvisnum(o)) {
++ lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o));
++ } else if (tvisbuf(o)) {
++ SBufExt *sbx2 = bufV(o);
++ if (sbx2 == sbx) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
++ lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2));
++ } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
++ /* Call __tostring metamethod inline. */
++ copyTV(L, L->top++, mo);
++ copyTV(L, L->top++, o);
++ lua_call(L, 1, 1);
++ o = &L->base[arg]; /* The stack may have been reallocated. */
++ copyTV(L, &L->base[arg], L->top-1);
++ L->top = L->base + narg;
++ goto retry; /* Retry with the result. */
++ } else {
++ lj_err_argtype(L, arg+1, "string/number/__tostring");
++ }
++ /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */
++ }
++ L->top = L->base+1; /* Chain buffer object. */
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_method_putf) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2);
++ L->top = L->base+1; /* Chain buffer object. */
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_method_get) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ ptrdiff_t arg, narg = L->top - L->base;
++ if (narg == 1) {
++ narg++;
++ setnilV(L->top++); /* get() is the same as get(nil). */
++ }
++ for (arg = 1; arg < narg; arg++) {
++ TValue *o = &L->base[arg];
++ MSize n = tvisnil(o) ? LJ_MAX_BUF :
++ (MSize) lj_lib_checkintrange(L, arg+1, 0, LJ_MAX_BUF);
++ MSize len = sbufxlen(sbx);
++ if (n > len) n = len;
++ setstrV(L, o, lj_str_new(L, sbx->r, n));
++ sbx->r += n;
++ }
++ if (sbx->r == sbx->w) sbx->r = sbx->w = sbx->b;
++ lj_gc_check(L);
++ return narg-1;
++}
++
++#if LJ_HASFFI
++LJLIB_CF(buffer_method_putcdata) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ const char *p;
++ MSize len;
++ if (tviscdata(L->base+1)) {
++ CTState *cts = ctype_cts(L);
++ lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
++ L->base+1, CCF_ARG(2));
++ } else {
++ lj_err_argtype(L, 2, "cdata");
++ }
++ len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
++ lj_buf_putmem((SBuf *)sbx, p, len);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_reserve) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
++ GCcdata *cd;
++ lj_buf_more((SBuf *)sbx, sz);
++ ctype_loadffi(L);
++ cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
++ *(void **)cdataptr(cd) = sbx->w;
++ setcdataV(L, L->top++, cd);
++ setintV(L->top++, sbufleft(sbx));
++ return 2;
++}
++
++LJLIB_CF(buffer_method_commit) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
++ if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG);
++ sbx->w += len;
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_ref) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ GCcdata *cd;
++ ctype_loadffi(L);
++ cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
++ *(void **)cdataptr(cd) = sbx->r;
++ setcdataV(L, L->top++, cd);
++ setintV(L->top++, sbufxlen(sbx));
++ return 2;
++}
++#endif
++
++LJLIB_CF(buffer_method_encode) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ cTValue *o = lj_lib_checkany(L, 2);
++ lj_serialize_put(sbx, o);
++ lj_gc_check(L);
++ L->top = L->base+1; /* Chain buffer object. */
++ return 1;
++}
++
++LJLIB_CF(buffer_method_decode) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobufw(L);
++ setnilV(L->top++);
++ sbx->r = lj_serialize_get(sbx, L->top-1);
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_method___gc)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ lj_bufx_free(L, sbx);
++ return 0;
++}
++
++LJLIB_CF(buffer_method___tostring) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx)));
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_method___len) LJLIB_REC(.)
++{
++ SBufExt *sbx = buffer_tobuf(L);
++ setintV(L->top-1, (int32_t)sbufxlen(sbx));
++ return 1;
++}
++
++LJLIB_PUSH("buffer") LJLIB_SET(__metatable)
++LJLIB_PUSH(top-1) LJLIB_SET(__index)
++
++/* -- Buffer library functions -------------------------------------------- */
++
++#define LJLIB_MODULE_buffer
++
++LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
++
++LJLIB_CF(buffer_new)
++{
++ MSize sz = 0;
++ int targ = 1;
++ GCtab *env, *dict_str = NULL, *dict_mt = NULL;
++ GCudata *ud;
++ SBufExt *sbx;
++ if (L->base < L->top && !tvistab(L->base)) {
++ targ = 2;
++ if (!tvisnil(L->base))
++ sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF);
++ }
++ if (L->base+targ-1 < L->top) {
++ GCtab *options = lj_lib_checktab(L, targ);
++ cTValue *opt_dict, *opt_mt;
++ opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
++ if (opt_dict && tvistab(opt_dict)) {
++ dict_str = tabV(opt_dict);
++ lj_serialize_dict_prep_str(L, dict_str);
++ }
++ opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable"));
++ if (opt_mt && tvistab(opt_mt)) {
++ dict_mt = tabV(opt_mt);
++ lj_serialize_dict_prep_mt(L, dict_mt);
++ }
++ }
++ env = tabref(curr_func(L)->c.env);
++ ud = lj_udata_new(L, sizeof(SBufExt), env);
++ ud->udtype = UDTYPE_BUFFER;
++ /* NOBARRIER: The GCudata is new (marked white). */
++ setgcref(ud->metatable, obj2gco(env));
++ setudataV(L, L->top++, ud);
++ sbx = (SBufExt *)uddata(ud);
++ lj_bufx_init(L, sbx);
++ setgcref(sbx->dict_str, obj2gco(dict_str));
++ setgcref(sbx->dict_mt, obj2gco(dict_mt));
++ if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
++ return 1;
++}
++
++LJLIB_CF(buffer_encode) LJLIB_REC(.)
++{
++ cTValue *o = lj_lib_checkany(L, 1);
++ setstrV(L, L->top++, lj_serialize_encode(L, o));
++ lj_gc_check(L);
++ return 1;
++}
++
++LJLIB_CF(buffer_decode) LJLIB_REC(.)
++{
++ GCstr *str = lj_lib_checkstrx(L, 1);
++ setnilV(L->top++);
++ lj_serialize_decode(L, L->top-1, str);
++ return 1;
++}
++
++/* ------------------------------------------------------------------------ */
++
++#include "lj_libdef.h"
++
++int luaopen_string_buffer(lua_State *L)
++{
++ LJ_LIB_REG(L, NULL, buffer_method);
++ lua_getfield(L, -1, "__tostring");
++ lua_setfield(L, -2, "tostring");
++ LJ_LIB_REG(L, NULL, buffer);
++ return 1;
++}
++
++#endif
+diff --git a/src/lib_debug.c b/src/lib_debug.c
+index f112b5bc..a6acc6f2 100644
+--- a/src/lib_debug.c
++++ b/src/lib_debug.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Debug library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -231,8 +231,8 @@ LJLIB_CF(debug_upvalueid)
+ int32_t n = lj_lib_checkint(L, 2) - 1;
+ if ((uint32_t)n >= fn->l.nupvalues)
+ lj_err_arg(L, 2, LJ_ERR_IDXRNG);
+- setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
+- (void *)&fn->c.upvalue[n]);
++ lua_pushlightuserdata(L, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
++ (void *)&fn->c.upvalue[n]);
+ return 1;
+ }
+
+@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue)
+
+ /* ------------------------------------------------------------------------ */
+
+-#define KEY_HOOK ((void *)0x3004)
++#define KEY_HOOK (U64x(80000000,00000000)|'h')
+
+ static void hookf(lua_State *L, lua_Debug *ar)
+ {
+ static const char *const hooknames[] =
+ {"call", "return", "line", "count",
"tail return"};
+- lua_pushlightuserdata(L, KEY_HOOK);
++ (L->top++)->u64 = KEY_HOOK;
+ lua_rawget(L, LUA_REGISTRYINDEX);
+ if (lua_isfunction(L, -1)) {
+ lua_pushstring(L, hooknames[(int)ar->event]);
+@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook)
+ count = luaL_optint(L, arg+3, 0);
+ func = hookf; mask = makemask(smask, count);
+ }
+- lua_pushlightuserdata(L, KEY_HOOK);
++ (L->top++)->u64 = KEY_HOOK;
+ lua_pushvalue(L, arg+1);
+ lua_rawset(L, LUA_REGISTRYINDEX);
+ lua_sethook(L, func, mask, count);
+@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook)
+ if (hook != NULL && hook != hookf) { /* external hook? */
+ lua_pushliteral(L, "external hook");
+ } else {
+- lua_pushlightuserdata(L, KEY_HOOK);
++ (L->top++)->u64 = KEY_HOOK;
+ lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */
+ }
+ lua_pushstring(L, unmakemask(mask, buff));
+@@ -369,7 +369,8 @@ LJLIB_CF(debug_debug)
+ return 0;
+ if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") ||
+ lua_pcall(L, 0, 0, 0)) {
+- fputs(lua_tostring(L, -1), stderr);
++ const char *s = lua_tostring(L, -1);
++ fputs(s ? s : "(error object is not a string)", stderr);
+ fputs("\n", stderr);
+ }
+ lua_settop(L, 0); /* remove eventual returns */
+diff --git a/src/lib_ffi.c b/src/lib_ffi.c
+index 136e98e8..b4321048 100644
+--- a/src/lib_ffi.c
++++ b/src/lib_ffi.c
+@@ -1,6 +1,6 @@
+ /*
+ ** FFI library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lib_ffi_c
+@@ -573,6 +573,7 @@ LJLIB_CF(ffi_typeinfo)
+ setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")),
(int32_t)ct->sib);
+ if (gcref(ct->name)) {
+ GCstr *s = gco2str(gcref(ct->name));
++ if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s));
+ setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
+ }
+ lj_gc_check(L);
+@@ -720,47 +721,47 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.)
+ return 0;
+ }
+
+-#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
+-
+ /* Test ABI string. */
+ LJLIB_CF(ffi_abi) LJLIB_REC(.)
+ {
+ GCstr *s = lj_lib_checkstr(L, 1);
+- int b = 0;
+- switch (s->hash) {
++ int b = lj_cparse_case(s,
+ #if LJ_64
+- case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */
++ "\00564bit"
+ #else
+- case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */
++ "\00532bit"
+ #endif
+ #if LJ_ARCH_HASFPU
+- case H_(e33ee463,e33ee463): b = 1; break; /* fpu */
++ "\003fpu"
+ #endif
+ #if LJ_ABI_SOFTFP
+- case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */
++ "\006softfp"
+ #else
+- case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */
++ "\006hardfp"
+ #endif
+ #if LJ_ABI_EABI
+- case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */
++ "\004eabi"
+ #endif
+ #if LJ_ABI_WIN
+- case H_(4ab624a8,4ab624a8): b = 1; break; /* win */
++ "\003win"
++#endif
++#if LJ_TARGET_UWP
++ "\003uwp"
++#endif
++#if LJ_LE
++ "\002le"
++#else
++ "\002be"
+ #endif
+- case H_(3af93066,1f001464): b = 1; break; /* le/be */
+ #if LJ_GC64
+- case H_(9e89d2c9,13c83c92): b = 1; break; /* gc64 */
++ "\004gc64"
+ #endif
+- default:
+- break;
+- }
++ ) >= 0;
+ setboolV(L->top-1, b);
+ setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */
+ return 1;
+ }
+
+-#undef H_
+-
+ LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */
+
+ LJLIB_CF(ffi_metatype)
+diff --git a/src/lib_init.c b/src/lib_init.c
+index 2ed370e9..56e0619a 100644
+--- a/src/lib_init.c
++++ b/src/lib_init.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Library initialization.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major parts taken verbatim from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+diff --git a/src/lib_io.c b/src/lib_io.c
+index 9763ed46..b9d8cc75 100644
+--- a/src/lib_io.c
++++ b/src/lib_io.c
+@@ -1,6 +1,6 @@
+ /*
+ ** I/O library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2011
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -60,12 +60,12 @@ static IOFileUD *io_tofile(lua_State *L)
+ return iof;
+ }
+
+-static FILE *io_stdfile(lua_State *L, ptrdiff_t id)
++static IOFileUD *io_stdfile(lua_State *L, ptrdiff_t id)
+ {
+ IOFileUD *iof = IOSTDF_IOF(L, id);
+ if (iof->fp == NULL)
+ lj_err_caller(L, LJ_ERR_IOSTDCL);
+- return iof->fp;
++ return iof;
+ }
+
+ static IOFileUD *io_file_new(lua_State *L)
+@@ -99,11 +99,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
+ int stat = -1;
+ #if LJ_TARGET_POSIX
+ stat = pclose(iof->fp);
+-#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE
++#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
+ stat = _pclose(iof->fp);
+-#else
+- lua_assert(0);
+- return 0;
+ #endif
+ #if LJ_52
+ iof->fp = NULL;
+@@ -112,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
+ ok = (stat != -1);
+ #endif
+ } else {
+- lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF);
++ lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF,
++ "close of unknown FILE* type");
+ setnilV(L->top++);
+ lua_pushliteral(L, "cannot close standard file");
+ return 2;
+@@ -180,7 +178,7 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
+ MSize n = (MSize)fread(buf, 1, m, fp);
+ setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
+ lj_gc_check(L);
+- return (n > 0 || m == 0);
++ return n > 0;
+ } else {
+ int c = getc(fp);
+ ungetc(c, fp);
+@@ -189,8 +187,9 @@ static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
+ }
+ }
+
+-static int io_file_read(lua_State *L, FILE *fp, int start)
++static int io_file_read(lua_State *L, IOFileUD *iof, int start)
+ {
++ FILE *fp = iof->fp;
+ int ok, n, nargs = (int)(L->top - L->base) - start;
+ clearerr(fp);
+ if (nargs == 0) {
+@@ -226,8 +225,9 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
+ return n - start;
+ }
+
+-static int io_file_write(lua_State *L, FILE *fp, int start)
++static int io_file_write(lua_State *L, IOFileUD *iof, int start)
+ {
++ FILE *fp = iof->fp;
+ cTValue *tv;
+ int status = 1;
+ for (tv = L->base+start; tv < L->top; tv++) {
+@@ -255,13 +255,11 @@ static int io_file_iter(lua_State *L)
+ lj_err_caller(L, LJ_ERR_IOCLFL);
+ L->top = L->base;
+ if (n) { /* Copy upvalues with options to stack. */
+- if (n > LUAI_MAXCSTACK)
+- lj_err_caller(L, LJ_ERR_STKOV);
+ lj_state_checkstack(L, (MSize)n);
+ memcpy(L->top, &fn->c.upvalue[1], n*sizeof(TValue));
+ L->top += n;
+ }
+- n = io_file_read(L, iof->fp, 0);
++ n = io_file_read(L, iof, 0);
+ if (ferror(iof->fp))
+ lj_err_callermsg(L, strVdata(L->top-2));
+ if (tvisnil(L->base) && (iof->type & IOFILE_FLAG_CLOSE)) {
+@@ -286,19 +284,25 @@ static int io_file_lines(lua_State *L)
+
+ LJLIB_CF(io_method_close)
+ {
+- IOFileUD *iof = L->base < L->top ? io_tofile(L) :
+- IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
++ IOFileUD *iof;
++ if (L->base < L->top) {
++ iof = io_tofile(L);
++ } else {
++ iof = IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
++ if (iof->fp == NULL)
++ lj_err_caller(L, LJ_ERR_IOCLFL);
++ }
+ return io_file_close(L, iof);
+ }
+
+ LJLIB_CF(io_method_read)
+ {
+- return io_file_read(L, io_tofile(L)->fp, 1);
++ return io_file_read(L, io_tofile(L), 1);
+ }
+
+ LJLIB_CF(io_method_write) LJLIB_REC(io_write 0)
+ {
+- return io_file_write(L, io_tofile(L)->fp, 1);
++ return io_file_write(L, io_tofile(L), 1);
+ }
+
+ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
+@@ -306,6 +310,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
+ return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
+ }
+
++#if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24
++/* The Android NDK is such an unmatched marvel of engineering. */
++extern int fseeko32(FILE *, long int, int) __asm__("fseeko");
++extern long int ftello32(FILE *) __asm__("ftello");
++#define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence)))
++#define ftello(fp) (ftello32((fp)))
++#endif
++
+ LJLIB_CF(io_method_seek)
+ {
+ FILE *fp = io_tofile(L)->fp;
+@@ -406,7 +418,7 @@ LJLIB_CF(io_open)
+
+ LJLIB_CF(io_popen)
+ {
+-#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE)
++#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE &&
!LJ_TARGET_UWP)
+ const char *fname = strdata(lj_lib_checkstr(L, 1));
+ GCstr *s = lj_lib_optstr(L, 2);
+ const char *mode = s ? strdata(s) : "r";
+@@ -452,7 +464,7 @@ LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT)
+
+ LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT)
+ {
+- return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL);
++ return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)->fp) == 0, NULL);
+ }
+
+ static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode)
+diff --git a/src/lib_jit.c b/src/lib_jit.c
+index 22ca0a1a..817c2967 100644
+--- a/src/lib_jit.c
++++ b/src/lib_jit.c
+@@ -1,6 +1,6 @@
+ /*
+ ** JIT library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lib_jit_c
+@@ -104,8 +104,8 @@ LJLIB_CF(jit_status)
+ jit_State *J = L2J(L);
+ L->top = L->base;
+ setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
+- flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING);
+- flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING);
++ flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING);
++ flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING);
+ return (int)(L->top - L->base);
+ #else
+ setboolV(L->top++, 0);
+@@ -113,6 +113,13 @@ LJLIB_CF(jit_status)
+ #endif
+ }
+
++LJLIB_CF(jit_security)
++{
++ int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING);
++ setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3));
++ return 1;
++}
++
+ LJLIB_CF(jit_attach)
+ {
+ #ifdef LUAJIT_DISABLE_VMEVENT
+@@ -227,7 +234,7 @@ LJLIB_CF(jit_util_funcbc)
+ if (pc < pt->sizebc) {
+ BCIns ins = proto_bc(pt)[pc];
+ BCOp op = bc_op(ins);
+- lua_assert(op < BC__MAX);
++ lj_assertL(op < BC__MAX, "bad bytecode op %d", op);
+ setintV(L->top, ins);
+ setintV(L->top+1, lj_bc_mode[op]);
+ L->top += 2;
+@@ -339,11 +346,7 @@ LJLIB_CF(jit_util_tracek)
+ ir = &T->ir[ir->op1];
+ }
+ #if LJ_HASFFI
+- if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
+- ptrdiff_t oldtop = savestack(L, L->top);
+- luaopen_ffi(L); /* Load FFI library on-demand. */
+- L->top = restorestack(L, oldtop);
+- }
++ if (ir->o == IR_KINT64) ctype_loadffi(L);
+ #endif
+ lj_ir_kvalue(L, L->top-2, ir);
+ setintV(L->top-1, (int32_t)irt_type(ir->t));
+@@ -471,7 +474,7 @@ static int jitopt_flag(jit_State *J, const char *str)
+ str += str[2] == '-' ? 3 : 2;
+ set = 0;
+ }
+- for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) {
++ for (opt = JIT_F_OPT; ; opt <<= 1) {
+ size_t len = *(const uint8_t *)lst;
+ if (len == 0)
+ break;
+@@ -491,7 +494,7 @@ static int jitopt_param(jit_State *J, const char *str)
+ int i;
+ for (i = 0; i < JIT_P__MAX; i++) {
+ size_t len = *(const uint8_t *)lst;
+- lua_assert(len != 0);
++ lj_assertJ(len != 0, "bad JIT_P_STRING");
+ if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
+ int32_t n = 0;
+ const char *p = &str[len+1];
+@@ -540,15 +543,15 @@ LJLIB_CF(jit_opt_start)
+
+ /* Not loaded by default, use: local profile = require("jit.profile") */
+
+-static const char KEY_PROFILE_THREAD = 't';
+-static const char KEY_PROFILE_FUNC = 'f';
++#define KEY_PROFILE_THREAD (U64x(80000000,00000000)|'t')
++#define KEY_PROFILE_FUNC (U64x(80000000,00000000)|'f')
+
+ static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
+ int vmstate)
+ {
+ TValue key;
+ cTValue *tv;
+- setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
++ key.u64 = KEY_PROFILE_FUNC;
+ tv = lj_tab_get(L, tabV(registry(L)), &key);
+ if (tvisfunc(tv)) {
+ char vmst = (char)vmstate;
+@@ -575,9 +578,9 @@ LJLIB_CF(jit_profile_start)
+ lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */
+ TValue key;
+ /* Anchor thread and function in registry. */
+- setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
++ key.u64 = KEY_PROFILE_THREAD;
+ setthreadV(L, lj_tab_set(L, registry, &key), L2);
+- setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
++ key.u64 = KEY_PROFILE_FUNC;
+ setfuncV(L, lj_tab_set(L, registry, &key), func);
+ lj_gc_anybarriert(L, registry);
+ luaJIT_profile_start(L, mode ? strdata(mode) : "",
+@@ -592,9 +595,9 @@ LJLIB_CF(jit_profile_stop)
+ TValue key;
+ luaJIT_profile_stop(L);
+ registry = tabV(registry(L));
+- setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
++ key.u64 = KEY_PROFILE_THREAD;
+ setnilV(lj_tab_set(L, registry, &key));
+- setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
++ key.u64 = KEY_PROFILE_FUNC;
+ setnilV(lj_tab_set(L, registry, &key));
+ lj_gc_anybarriert(L, registry);
+ return 0;
+@@ -640,59 +643,41 @@ JIT_PARAMDEF(JIT_PARAMINIT)
+ #undef JIT_PARAMINIT
+ 0
+ };
+-#endif
+
+ #if LJ_TARGET_ARM && LJ_TARGET_LINUX
+ #include <sys/utsname.h>
+ #endif
+
+-/* Arch-dependent CPU detection. */
+-static uint32_t jit_cpudetect(lua_State *L)
++/* Arch-dependent CPU feature detection. */
++static uint32_t jit_cpudetect(void)
+ {
+ uint32_t flags = 0;
+ #if LJ_TARGET_X86ORX64
++
+ uint32_t vendor[4];
+ uint32_t features[4];
+ if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
+-#if !LJ_HASJIT
+-#define JIT_F_SSE2 2
+-#endif
+- flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
+-#if LJ_HASJIT
+ flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
+ flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
+- if (vendor[2] == 0x6c65746e) { /* Intel. */
+- if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
+- flags |= JIT_F_LEA_AGU;
+- } else if (vendor[2] == 0x444d4163) { /* AMD. */
+- uint32_t fam = (features[0] & 0x0ff00f00);
+- if (fam >= 0x00000f00) /* K8, K10. */
+- flags |= JIT_F_PREFER_IMUL;
+- }
+ if (vendor[0] >= 7) {
+ uint32_t xfeatures[4];
+ lj_vm_cpuid(7, xfeatures);
+ flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
+ }
+-#endif
+ }
+- /* Check for required instruction set support on x86 (unnecessary on x64). */
+-#if LJ_TARGET_X86
+- if (!(flags & JIT_F_SSE2))
+- luaL_error(L, "CPU with SSE2 required");
+-#endif
++ /* Don't bother checking for SSE2 -- the VM will crash before getting here. */
++
+ #elif LJ_TARGET_ARM
+-#if LJ_HASJIT
++
+ int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
+ #if LJ_TARGET_LINUX
+ if (ver < 70) { /* Runtime ARM CPU detection. */
+ struct utsname ut;
+ uname(&ut);
+ if (strncmp(ut.machine, "armv", 4) == 0) {
+- if (ut.machine[4] >= '7')
+- ver = 70;
+- else if (ut.machine[4] == '6')
+- ver = 60;
++ if (ut.machine[4] >= '8') ver = 80;
++ else if (ut.machine[4] == '7') ver = 70;
++ else if (ut.machine[4] == '6') ver = 60;
+ }
+ }
+ #endif
+@@ -700,20 +685,22 @@ static uint32_t jit_cpudetect(lua_State *L)
+ ver >= 61 ? JIT_F_ARMV6T2_ :
+ ver >= 60 ? JIT_F_ARMV6_ : 0;
+ flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
+-#endif
++
+ #elif LJ_TARGET_ARM64
++
+ /* No optional CPU features to detect (for now). */
++
+ #elif LJ_TARGET_PPC
+-#if LJ_HASJIT
++
+ #if LJ_ARCH_SQRT
+ flags |= JIT_F_SQRT;
+ #endif
+ #if LJ_ARCH_ROUND
+ flags |= JIT_F_ROUND;
+ #endif
+-#endif
++
+ #elif LJ_TARGET_MIPS
+-#if LJ_HASJIT
++
+ /* Compile-time MIPS CPU detection. */
+ #if LJ_ARCH_VERSION >= 20
+ flags |= JIT_F_MIPSXXR2;
+@@ -731,31 +718,28 @@ static uint32_t jit_cpudetect(lua_State *L)
+ if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
+ }
+ #endif
+-#endif
++
+ #else
+ #error "Missing CPU detection for this architecture"
+ #endif
+- UNUSED(L);
+ return flags;
+ }
+
+ /* Initialize JIT compiler. */
+ static void jit_init(lua_State *L)
+ {
+- uint32_t flags = jit_cpudetect(L);
+-#if LJ_HASJIT
+ jit_State *J = L2J(L);
+- J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
++ J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
+ memcpy(J->param, jit_param_default, sizeof(J->param));
+ lj_dispatch_update(G(L));
+-#else
+- UNUSED(flags);
+-#endif
+ }
++#endif
+
+ LUALIB_API int luaopen_jit(lua_State *L)
+ {
++#if LJ_HASJIT
+ jit_init(L);
++#endif
+ lua_pushliteral(L, LJ_OS_NAME);
+ lua_pushliteral(L, LJ_ARCH_NAME);
+ lua_pushinteger(L, LUAJIT_VERSION_NUM);
+diff --git a/src/lib_math.c b/src/lib_math.c
+index ef9dda2d..e9a0b597 100644
+--- a/src/lib_math.c
++++ b/src/lib_math.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Math library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #include <math.h>
+@@ -15,6 +15,7 @@
+ #include "lj_obj.h"
+ #include "lj_lib.h"
+ #include "lj_vm.h"
++#include "lj_prng.h"
+
+ /* ------------------------------------------------------------------------ */
+
+@@ -33,19 +34,19 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT)
+ lj_lib_checknum(L, 1);
+ return FFH_RETRY;
+ }
+-LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10)
+-LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP)
+-LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN)
+-LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS)
+-LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN)
+-LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin)
+-LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos)
+-LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan)
+-LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh)
+-LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh)
+-LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
++LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10)
++LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp)
++LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin)
++LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos)
++LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan)
++LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin)
++LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos)
++LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan)
++LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh)
++LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh)
++LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh)
+ LJLIB_ASM_(math_frexp)
+-LJLIB_ASM_(math_modf) LJLIB_REC(.)
++LJLIB_ASM_(math_modf)
+
+ LJLIB_ASM(math_log) LJLIB_REC(math_log)
+ {
+@@ -105,34 +106,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge)
+ ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
+ */
+
+-/* PRNG state. */
+-struct RandomState {
+- uint64_t gen[4]; /* State of the 4 LFSR generators. */
+- int valid; /* State is valid. */
+-};
+-
+ /* Union needed for bit-pattern conversion between uint64_t and double. */
+ typedef union { uint64_t u64; double d; } U64double;
+
+-/* Update generator i and compute a running xor of all states. */
+-#define TW223_GEN(i, k, q, s) \
+- z = rs->gen[i]; \
+- z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 <<
(64-k)))<<s); \
+- r ^= z; rs->gen[i] = z;
+-
+-/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
+-LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
+-{
+- uint64_t z, r = 0;
+- TW223_GEN(0, 63, 31, 18)
+- TW223_GEN(1, 58, 19, 28)
+- TW223_GEN(2, 55, 24, 7)
+- TW223_GEN(3, 47, 21, 8)
+- return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
+-}
+-
+-/* PRNG initialization function. */
+-static void random_init(RandomState *rs, double d)
++/* PRNG seeding function. */
++static void random_seed(PRNGState *rs, double d)
+ {
+ uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
+ int i;
+@@ -141,24 +119,22 @@ static void random_init(RandomState *rs, double d)
+ uint32_t m = 1u << (r&255);
+ r >>= 8;
+ u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
+- if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */
+- rs->gen[i] = u.u64;
++ if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */
++ rs->u[i] = u.u64;
+ }
+- rs->valid = 1;
+ for (i = 0; i < 10; i++)
+- lj_math_random_step(rs);
++ (void)lj_prng_u64(rs);
+ }
+
+ /* PRNG extract function. */
+-LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
++LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
+ LJLIB_CF(math_random) LJLIB_REC(.)
+ {
+ int n = (int)(L->top - L->base);
+- RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
++ PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ U64double u;
+ double d;
+- if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0);
+- u.u64 = lj_math_random_step(rs);
++ u.u64 = lj_prng_u64d(rs);
+ d = u.d - 1.0;
+ if (n > 0) {
+ #if LJ_DUALNUM
+@@ -203,11 +179,11 @@ LJLIB_CF(math_random) LJLIB_REC(.)
+ }
+
+ /* PRNG seed function. */
+-LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
++LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
+ LJLIB_CF(math_randomseed)
+ {
+- RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+- random_init(rs, lj_lib_checknum(L, 1));
++ PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
++ random_seed(rs, lj_lib_checknum(L, 1));
+ return 0;
+ }
+
+@@ -217,9 +193,8 @@ LJLIB_CF(math_randomseed)
+
+ LUALIB_API int luaopen_math(lua_State *L)
+ {
+- RandomState *rs;
+- rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
+- rs->valid = 0; /* Use lazy initialization to save some time on startup. */
++ PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState));
++ lj_prng_seed_fixed(rs);
+ LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
+ return 1;
+ }
+diff --git a/src/lib_os.c b/src/lib_os.c
+index 9e78d49a..f19b831c 100644
+--- a/src/lib_os.c
++++ b/src/lib_os.c
+@@ -1,6 +1,6 @@
+ /*
+ ** OS library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -205,12 +205,12 @@ LJLIB_CF(os_date)
+ setboolfield(L, "isdst", stm->tm_isdst);
+ } else if (*s) {
+ SBuf *sb = &G(L)->tmpbuf;
+- MSize sz = 0;
++ MSize sz = 0, retry = 4;
+ const char *q;
+ for (q = s; *q; q++)
+ sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
+ setsbufL(sb, L);
+- for (;;) {
++ while (retry--) { /* Limit growth for invalid format or empty result. */
+ char *buf = lj_buf_need(sb, sz);
+ size_t len = strftime(buf, sbufsz(sb), s, stm);
+ if (len) {
+diff --git a/src/lib_package.c b/src/lib_package.c
+index 6fac43ec..2068a098 100644
+--- a/src/lib_package.c
++++ b/src/lib_package.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Package library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2012
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
+ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
+ #endif
+
++#if LJ_TARGET_UWP
++void *LJ_WIN_LOADLIBA(const char *path)
++{
++ DWORD err = GetLastError();
++ wchar_t wpath[256];
++ HANDLE lib = NULL;
++ if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
++ lib = LoadPackagedLibrary(wpath, 0);
++ }
++ SetLastError(err);
++ return lib;
++}
++#endif
++
+ #undef setprogdir
+
+ static void setprogdir(lua_State *L)
+@@ -119,7 +133,7 @@ static void ll_unloadlib(void *lib)
+
+ static void *ll_load(lua_State *L, const char *path, int gl)
+ {
+- HINSTANCE lib = LoadLibraryExA(path, NULL, 0);
++ HINSTANCE lib = LJ_WIN_LOADLIBA(path);
+ if (lib == NULL) pusherror(L);
+ UNUSED(gl);
+ return lib;
+@@ -132,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char
*sym)
+ return f;
+ }
+
++#if LJ_TARGET_UWP
++EXTERN_C IMAGE_DOS_HEADER __ImageBase;
++#endif
++
+ static const char *ll_bcsym(void *lib, const char *sym)
+ {
+ if (lib) {
+ return (const char *)GetProcAddress((HINSTANCE)lib, sym);
+ } else {
++#if LJ_TARGET_UWP
++ return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
++#else
+ HINSTANCE h = GetModuleHandleA(NULL);
+ const char *p = (const char *)GetProcAddress(h, sym);
+ if (p == NULL &&
GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+ (const char *)ll_bcsym, &h))
+ p = (const char *)GetProcAddress(h, sym);
+ return p;
++#endif
+ }
+ }
+
+@@ -215,7 +237,12 @@ static const char *mksymname(lua_State *L, const char *modname,
+
+ static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r)
+ {
+- void **reg = ll_register(L, path);
++ void **reg;
++ if (strlen(path) >= 4096) {
++ lua_pushliteral(L, "path too long");
++ return PACKAGE_ERR_LIB;
++ }
++ reg = ll_register(L, path);
+ if (*reg == NULL) *reg = ll_load(L, path, (*name == '*'));
+ if (*reg == NULL) {
+ return PACKAGE_ERR_LIB; /* Unable to load library. */
+@@ -233,7 +260,7 @@ static int ll_loadfunc(lua_State *L, const char *path, const char
*name, int r)
+ const char *bcdata = ll_bcsym(*reg, mksymname(L, name, SYMPREFIX_BC));
+ lua_pop(L, 1);
+ if (bcdata) {
+- if (luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
++ if (luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+ return PACKAGE_ERR_LOAD;
+ return 0;
+ }
+@@ -390,7 +417,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
+ if (lua_isnil(L, -1)) { /* Not found? */
+ const char *bcname = mksymname(L, name, SYMPREFIX_BC);
+ const char *bcdata = ll_bcsym(NULL, bcname);
+- if (bcdata == NULL || luaL_loadbuffer(L, bcdata, LJ_MAX_BUF, name) != 0)
++ if (bcdata == NULL || luaL_loadbuffer(L, bcdata, ~(size_t)0, name) != 0)
+ lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
+ }
+ return 1;
+@@ -398,7 +425,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
+
+ /* ------------------------------------------------------------------------ */
+
+-#define sentinel ((void *)0x4004)
++#define KEY_SENTINEL (U64x(80000000,00000000)|'s')
+
+ static int lj_cf_package_require(lua_State *L)
+ {
+@@ -408,7 +435,7 @@ static int lj_cf_package_require(lua_State *L)
+ lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
+ lua_getfield(L, 2, name);
+ if (lua_toboolean(L, -1)) { /* is it there? */
+- if (lua_touserdata(L, -1) == sentinel) /* check loops */
++ if ((L->top-1)->u64 == KEY_SENTINEL) /* check loops */
+ luaL_error(L, "loop or previous error loading module " LUA_QS, name);
+ return 1; /* package is already loaded */
+ }
+@@ -431,14 +458,14 @@ static int lj_cf_package_require(lua_State *L)
+ else
+ lua_pop(L, 1);
+ }
+- lua_pushlightuserdata(L, sentinel);
++ (L->top++)->u64 = KEY_SENTINEL;
+ lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */
+ lua_pushstring(L, name); /* pass name as argument to module */
+ lua_call(L, 1, 1); /* run loaded module */
+ if (!lua_isnil(L, -1)) /* non-nil return? */
+ lua_setfield(L, 2, name); /* _LOADED[name] = returned value */
+ lua_getfield(L, 2, name);
+- if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */
++ if ((L->top-1)->u64 == KEY_SENTINEL) { /* module did not set a value? */
+ lua_pushboolean(L, 1); /* use true as result */
+ lua_pushvalue(L, -1); /* extra copy to be returned */
+ lua_setfield(L, 2, name); /* _LOADED[name] = true */
+diff --git a/src/lib_string.c b/src/lib_string.c
+index 76b0730a..75d855d6 100644
+--- a/src/lib_string.c
++++ b/src/lib_string.c
+@@ -1,6 +1,6 @@
+ /*
+ ** String library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -136,7 +136,7 @@ LJLIB_CF(string_dump)
+ /* ------------------------------------------------------------------------ */
+
+ /* macro to `unsign' a character */
+-#define uchar(c) ((unsigned char)(c))
++#define uchar(c) ((unsigned char)(c))
+
+ #define CAP_UNFINISHED (-1)
+ #define CAP_POSITION (-2)
+@@ -640,89 +640,14 @@ LJLIB_CF(string_gsub)
+
+ /* ------------------------------------------------------------------------ */
+
+-/* Emulate tostring() inline. */
+-static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
+-{
+- TValue *o = L->base+arg-1;
+- cTValue *mo;
+- lua_assert(o < L->top); /* Caller already checks for existence. */
+- if (LJ_LIKELY(tvisstr(o)))
+- return strV(o);
+- if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+- copyTV(L, L->top++, mo);
+- copyTV(L, L->top++, o);
+- lua_call(L, 1, 1);
+- copyTV(L, L->base+arg-1, --L->top);
+- return NULL; /* Buffer may be overwritten, retry. */
+- }
+- return lj_strfmt_obj(L, o);
+-}
+-
+ LJLIB_CF(string_format) LJLIB_REC(.)
+ {
+- int arg, top = (int)(L->top - L->base);
+- GCstr *fmt;
+- SBuf *sb;
+- FormatState fs;
+- SFormat sf;
+ int retry = 0;
+-again:
+- arg = 1;
+- sb = lj_buf_tmp_(L);
+- fmt = lj_lib_checkstr(L, arg);
+- lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+- while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+- if (sf == STRFMT_LIT) {
+- lj_buf_putmem(sb, fs.str, fs.len);
+- } else if (sf == STRFMT_ERR) {
+- lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
+- } else {
+- if (++arg > top)
+- luaL_argerror(L, arg, lj_obj_typename[0]);
+- switch (STRFMT_TYPE(sf)) {
+- case STRFMT_INT:
+- if (tvisint(L->base+arg-1)) {
+- int32_t k = intV(L->base+arg-1);
+- if (sf == STRFMT_INT)
+- lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
+- else
+- lj_strfmt_putfxint(sb, sf, k);
+- } else {
+- lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+- }
+- break;
+- case STRFMT_UINT:
+- if (tvisint(L->base+arg-1))
+- lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
+- else
+- lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
+- break;
+- case STRFMT_NUM:
+- lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
+- break;
+- case STRFMT_STR: {
+- GCstr *str = string_fmt_tostring(L, arg, retry);
+- if (str == NULL)
+- retry = 1;
+- else if ((sf & STRFMT_T_QUOTED))
+- lj_strfmt_putquoted(sb, str); /* No formatting. */
+- else
+- lj_strfmt_putfstr(sb, sf, str);
+- break;
+- }
+- case STRFMT_CHAR:
+- lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+- break;
+- case STRFMT_PTR: /* No formatting. */
+- lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
+- break;
+- default:
+- lua_assert(0);
+- break;
+- }
+- }
+- }
+- if (retry++ == 1) goto again;
++ SBuf *sb;
++ do {
++ sb = lj_buf_tmp_(L);
++ retry = lj_strfmt_putarg(L, sb, 1, -retry);
++ } while (retry > 0);
+ setstrV(L, L->top-1, lj_buf_str(L, sb));
+ lj_gc_check(L);
+ return 1;
+@@ -743,6 +668,9 @@ LUALIB_API int luaopen_string(lua_State *L)
+ setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
+ settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1));
+ mt->nomm = (uint8_t)(~(1u<<MM_index));
++#if LJ_HASBUFFER
++ lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer,
tabV(L->top-1));
++#endif
+ return 1;
+ }
+
+diff --git a/src/lib_table.c b/src/lib_table.c
+index 0450f1f6..0214bb40 100644
+--- a/src/lib_table.c
++++ b/src/lib_table.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Table library.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -159,7 +159,7 @@ LJLIB_CF(table_concat) LJLIB_REC(.)
+ SBuf *sb = lj_buf_tmp_(L);
+ SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
+ if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */
+- int32_t idx = (int32_t)(intptr_t)sbufP(sb);
++ int32_t idx = (int32_t)(intptr_t)sb->w;
+ cTValue *o = lj_tab_getint(t, idx);
+ lj_err_callerv(L, LJ_ERR_TABCAT,
+ lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
+diff --git a/src/lj.supp b/src/lj.supp
+deleted file mode 100644
+index 217f7c89..00000000
+--- a/src/lj.supp
++++ /dev/null
+@@ -1,41 +0,0 @@
+-# Valgrind suppression file for LuaJIT 2.0.
+-{
+- Optimized string compare
+- Memcheck:Addr4
+- fun:lj_str_cmp
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr1
+- fun:lj_str_cmp
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr4
+- fun:lj_str_new
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr1
+- fun:lj_str_new
+-}
+-{
+- Optimized string compare
+- Memcheck:Cond
+- fun:lj_str_new
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr4
+- fun:str_fastcmp
+-}
+-{
+- Optimized string compare
+- Memcheck:Addr1
+- fun:str_fastcmp
+-}
+-{
+- Optimized string compare
+- Memcheck:Cond
+- fun:str_fastcmp
+-}
+diff --git a/src/lj_alloc.c b/src/lj_alloc.c
+index 95d15d04..165203fa 100644
+--- a/src/lj_alloc.c
++++ b/src/lj_alloc.c
+@@ -6,7 +6,7 @@
+ **
+ ** This is a version (aka dlmalloc) of malloc/free/realloc written by
+ ** Doug Lea and released to the public domain, as explained at
+-**
http://creativecommons.org/licenses/publicdomain.
++**
https://creativecommons.org/licenses/publicdomain.
+ **
+ ** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee)
+ **
+@@ -16,8 +16,8 @@
+ ** If you want to use dlmalloc in another project, you should get
+ ** the original from:
ftp://gee.cs.oswego.edu/pub/misc/
+ ** For thread-safe derivatives, take a look at:
+-** - ptmalloc:
http://www.malloc.de/
+-** - nedmalloc:
http://www.nedprod.com/programs/portable/nedmalloc/
++** - ptmalloc:
https://www.malloc.de/
++** - nedmalloc:
https://www.nedprod.com/programs/portable/nedmalloc/
+ */
+
+ #define lj_alloc_c
+@@ -31,6 +31,7 @@
+ #include "lj_def.h"
+ #include "lj_arch.h"
+ #include "lj_alloc.h"
++#include "lj_prng.h"
+
+ #ifndef LUAJIT_USE_SYSMALLOC
+
+@@ -123,7 +124,7 @@
+
+ #if LJ_ALLOC_NTAVM
+ /* Undocumented, but hey, that's what we all love so much about Windows. */
+-typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
++typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits,
+ size_t *size, ULONG alloctype, ULONG prot);
+ static PNTAVM ntavm;
+
+@@ -140,7 +141,7 @@ static void init_mmap(void)
+ #define INIT_MMAP() init_mmap()
+
+ /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
+-static void *CALL_MMAP(size_t size)
++static void *mmap_plain(size_t size)
+ {
+ DWORD olderr = GetLastError();
+ void *ptr = NULL;
+@@ -151,7 +152,7 @@ static void *CALL_MMAP(size_t size)
+ }
+
+ /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+-static void *DIRECT_MMAP(size_t size)
++static void *direct_mmap(size_t size)
+ {
+ DWORD olderr = GetLastError();
+ void *ptr = NULL;
+@@ -164,26 +165,29 @@ static void *DIRECT_MMAP(size_t size)
+ #else
+
+ /* Win32 MMAP via VirtualAlloc */
+-static void *CALL_MMAP(size_t size)
++static void *mmap_plain(size_t size)
+ {
+ DWORD olderr = GetLastError();
+- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
++ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ SetLastError(olderr);
+ return ptr ? ptr : MFAIL;
+ }
+
+ /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+-static void *DIRECT_MMAP(size_t size)
++static void *direct_mmap(size_t size)
+ {
+ DWORD olderr = GetLastError();
+- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+- PAGE_READWRITE);
++ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
++ PAGE_READWRITE);
+ SetLastError(olderr);
+ return ptr ? ptr : MFAIL;
+ }
+
+ #endif
+
++#define CALL_MMAP(prng, size) mmap_plain(size)
++#define DIRECT_MMAP(prng, size) direct_mmap(size)
++
+ /* This function supports releasing coalesed segments */
+ static int CALL_MUNMAP(void *ptr, size_t size)
+ {
+@@ -226,36 +230,17 @@ static int CALL_MUNMAP(void *ptr, size_t size)
+
+ #define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
+
+-/* No point in a giant ifdef mess. Just try to open /dev/urandom.
+-** It doesn't really matter if this fails, since we get some ASLR bits from
+-** every unsuitable allocation, too. And we prefer linear allocation, anyway.
+-*/
+-#include <fcntl.h>
+-#include <unistd.h>
+-
+-static uintptr_t mmap_probe_seed(void)
+-{
+- uintptr_t val;
+- int fd = open("/dev/urandom", O_RDONLY);
+- if (fd != -1) {
+- int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val));
+- (void)close(fd);
+- if (ok) return val;
+- }
+- return 1; /* Punt. */
+-}
+-
+-static void *mmap_probe(size_t size)
++static void *mmap_probe(PRNGState *rs, size_t size)
+ {
+ /* Hint for next allocation. Doesn't need to be thread-safe. */
+ static uintptr_t hint_addr = 0;
+- static uintptr_t hint_prng = 0;
+ int olderr = errno;
+ int retry;
+ for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
+ void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
+ uintptr_t addr = (uintptr_t)p;
+- if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >=
LJ_ALLOC_MMAP_PROBE_LOWER) {
++ if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >=
LJ_ALLOC_MMAP_PROBE_LOWER &&
++ ((addr + size) >> LJ_ALLOC_MBITS) == 0) {
+ /* We got a suitable address. Bump the hint address. */
+ hint_addr = addr + size;
+ errno = olderr;
+@@ -280,15 +265,8 @@ static void *mmap_probe(size_t size)
+ }
+ }
+ /* Finally, try pseudo-random probing. */
+- if (LJ_UNLIKELY(hint_prng == 0)) {
+- hint_prng = mmap_probe_seed();
+- }
+- /* The unsuitable address we got has some ASLR PRNG bits. */
+- hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1));
+- do { /* The PRNG itself is very weak, but see above. */
+- hint_prng = hint_prng * 1103515245 + 12345;
+- hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE;
+- hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1);
++ do {
++ hint_addr = lj_prng_u64(rs) &
(((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE);
+ } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
+ }
+ errno = olderr;
+@@ -299,18 +277,22 @@ static void *mmap_probe(size_t size)
+
+ #if LJ_ALLOC_MMAP32
+
+-#if defined(__sun__)
++#if LJ_TARGET_SOLARIS
+ #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
+ #else
+ #define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
+ #endif
+
++#if LJ_ALLOC_MMAP_PROBE
++static void *mmap_map32(PRNGState *rs, size_t size)
++#else
+ static void *mmap_map32(size_t size)
++#endif
+ {
+ #if LJ_ALLOC_MMAP_PROBE
+ static int fallback = 0;
+ if (fallback)
+- return mmap_probe(size);
++ return mmap_probe(rs, size);
+ #endif
+ {
+ int olderr = errno;
+@@ -320,7 +302,7 @@ static void *mmap_map32(size_t size)
+ #if LJ_ALLOC_MMAP_PROBE
+ if (ptr == MFAIL) {
+ fallback = 1;
+- return mmap_probe(size);
++ return mmap_probe(rs, size);
+ }
+ #endif
+ return ptr;
+@@ -330,20 +312,25 @@ static void *mmap_map32(size_t size)
+ #endif
+
+ #if LJ_ALLOC_MMAP32
+-#define CALL_MMAP(size) mmap_map32(size)
++#if LJ_ALLOC_MMAP_PROBE
++#define CALL_MMAP(prng, size) mmap_map32(prng, size)
++#else
++#define CALL_MMAP(prng, size) mmap_map32(size)
++#endif
+ #elif LJ_ALLOC_MMAP_PROBE
+-#define CALL_MMAP(size) mmap_probe(size)
++#define CALL_MMAP(prng, size) mmap_probe(prng, size)
+ #else
+-static void *CALL_MMAP(size_t size)
++static void *mmap_plain(size_t size)
+ {
+ int olderr = errno;
+ void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
+ errno = olderr;
+ return ptr;
+ }
++#define CALL_MMAP(prng, size) mmap_plain(size)
+ #endif
+
+-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
++#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__
< 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
+
+ #include <sys/resource.h>
+
+@@ -378,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int
flags)
+ #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
+ #define CALL_MREMAP_NOMOVE 0
+ #define CALL_MREMAP_MAYMOVE 1
+-#if LJ_64 && !LJ_GC64
++#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
+ #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
+ #else
+ #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
+@@ -393,7 +380,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int
flags)
+ #endif
+
+ #ifndef DIRECT_MMAP
+-#define DIRECT_MMAP(s) CALL_MMAP(s)
++#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s)
+ #endif
+
+ #ifndef CALL_MREMAP
+@@ -552,6 +539,7 @@ struct malloc_state {
+ mchunkptr smallbins[(NSMALLBINS+1)*2];
+ tbinptr treebins[NTREEBINS];
+ msegment seg;
++ PRNGState *prng;
+ };
+
+ typedef struct malloc_state *mstate;
+@@ -609,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss)
+ noncontiguous segments are added.
+ */
+ #define TOP_FOOT_SIZE\
+- (align_offset(chunk2mem(0))+pad_request(sizeof(struct
malloc_segment))+MIN_CHUNK_SIZE)
++ (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct
malloc_segment))+MIN_CHUNK_SIZE)
+
+ /* ---------------------------- Indexing Bins ---------------------------- */
+
+@@ -834,11 +822,11 @@ static int has_segment_link(mstate m, msegmentptr ss)
+
+ /* ----------------------- Direct-mmapping chunks ----------------------- */
+
+-static void *direct_alloc(size_t nb)
++static void *direct_alloc(mstate m, size_t nb)
+ {
+ size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */
+- char *mm = (char *)(DIRECT_MMAP(mmsize));
++ char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize));
+ if (mm != CMFAIL) {
+ size_t offset = align_offset(chunk2mem(mm));
+ size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
+@@ -850,6 +838,7 @@ static void *direct_alloc(size_t nb)
+ return chunk2mem(p);
+ }
+ }
++ UNUSED(m);
+ return NULL;
+ }
+
+@@ -998,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb)
+
+ /* Directly map large chunks */
+ if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
+- void *mem = direct_alloc(nb);
++ void *mem = direct_alloc(m, nb);
+ if (mem != 0)
+ return mem;
+ }
+@@ -1007,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb)
+ size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
+ size_t rsize = granularity_align(req);
+ if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
+- char *mp = (char *)(CALL_MMAP(rsize));
++ char *mp = (char *)(CALL_MMAP(m->prng, rsize));
+ if (mp != CMFAIL) {
+ tbase = mp;
+ tsize = rsize;
+@@ -1234,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb)
+
+ /* ----------------------------------------------------------------------- */
+
+-void *lj_alloc_create(void)
++void *lj_alloc_create(PRNGState *rs)
+ {
+ size_t tsize = DEFAULT_GRANULARITY;
+ char *tbase;
+ INIT_MMAP();
+- tbase = (char *)(CALL_MMAP(tsize));
++ UNUSED(rs);
++ tbase = (char *)(CALL_MMAP(rs, tsize));
+ if (tbase != CMFAIL) {
+ size_t msize = pad_request(sizeof(struct malloc_state));
+ mchunkptr mn;
+@@ -1258,6 +1248,12 @@ void *lj_alloc_create(void)
+ return NULL;
+ }
+
++void lj_alloc_setprng(void *msp, PRNGState *rs)
++{
++ mstate ms = (mstate)msp;
++ ms->prng = rs;
++}
++
+ void lj_alloc_destroy(void *msp)
+ {
+ mstate ms = (mstate)msp;
+diff --git a/src/lj_alloc.h b/src/lj_alloc.h
+index f87a7cf3..669f50b7 100644
+--- a/src/lj_alloc.h
++++ b/src/lj_alloc.h
+@@ -9,7 +9,8 @@
+ #include "lj_def.h"
+
+ #ifndef LUAJIT_USE_SYSMALLOC
+-LJ_FUNC void *lj_alloc_create(void);
++LJ_FUNC void *lj_alloc_create(PRNGState *rs);
++LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs);
+ LJ_FUNC void lj_alloc_destroy(void *msp);
+ LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize);
+ #endif
+diff --git a/src/lj_api.c b/src/lj_api.c
+index d17a5754..8c60c058 100644
+--- a/src/lj_api.c
++++ b/src/lj_api.c
+@@ -1,6 +1,6 @@
+ /*
+ ** Public Lua/C API.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Major portions taken verbatim or adapted from the Lua interpreter.
+ ** Copyright (C) 1994-2008
Lua.org, PUC-Rio. See Copyright Notice in lua.h
+@@ -28,8 +28,8 @@
+
+ /* -- Common helper functions --------------------------------------------- */
+
+-#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base))
+-#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L))
++#define lj_checkapi_slot(idx) \
++ lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of
range", (idx))
+
+ static TValue *index2adr(lua_State *L, int idx)
+ {
+@@ -37,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx)
+ TValue *o = L->base + (idx - 1);
+ return o < L->top ? o : niltv(L);
+ } else if (idx > LUA_REGISTRYINDEX) {
+- api_check(L, idx != 0 && -idx <= L->top - L->base);
++ lj_checkapi(idx != 0 && -idx <= L->top - L->base,
++ "bad stack slot %d", idx);
+ return L->top + idx;
+ } else if (idx == LUA_GLOBALSINDEX) {
+ TValue *o = &G(L)->tmptv;
+@@ -47,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx)
+ return registry(L);
+ } else {
+ GCfunc *fn = curr_func(L);
+- api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn));
++ lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn),
++ "calling frame is not a C function");
+ if (idx == LUA_ENVIRONINDEX) {
+ TValue *o = &G(L)->tmptv;
+ settabV(L, o, tabref(fn->c.env));
+@@ -59,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx)
+ }
+ }
+
+-static TValue *stkindex2adr(lua_State *L, int idx)
++static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx)
++{
++ TValue *o = index2adr(L, idx);
++ lj_checkapi(o != niltv(L), "invalid stack slot %d", idx);
++ return o;
++}
++
++static TValue *index2adr_stack(lua_State *L, int idx)
+ {
+ if (idx > 0) {
+ TValue *o = L->base + (idx - 1);
++ if (o < L->top) {
++ return o;
++ } else {
++ lj_checkapi(0, "invalid stack slot %d", idx);
++ return niltv(L);
++ }
+ return o < L->top ? o : niltv(L);
+ } else {
+- api_check(L, idx != 0 && -idx <= L->top - L->base);
++ lj_checkapi(idx != 0 && -idx <= L->top - L->base,
++ "invalid stack slot %d", idx);
+ return L->top + idx;
+ }
+ }
+@@ -99,17 +115,17 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char
*msg)
+ lj_err_callerv(L, LJ_ERR_STKOVM, msg);
+ }
+
+-LUA_API void lua_xmove(lua_State *from, lua_State *to, int n)
++LUA_API void lua_xmove(lua_State *L, lua_State *to, int n)
+ {
+ TValue *f, *t;
+- if (from == to) return;
+- api_checknelems(from, n);
+- api_check(from, G(from) == G(to));
++ if (L == to) return;
++ lj_checkapi_slot(n);
++ lj_checkapi(G(L) == G(to), "move across global states");
+ lj_state_checkstack(to, (MSize)n);
+- f = from->top;
++ f = L->top;
+ t = to->top = to->top + n;
+ while (--n >= 0) copyTV(to, --t, --f);
+- from->top = f;
++ L->top = f;
+ }
+
+ LUA_API const lua_Number *lua_version(lua_State *L)
+@@ -129,7 +145,7 @@ LUA_API int lua_gettop(lua_State *L)
+ LUA_API void lua_settop(lua_State *L, int idx)
+ {
+ if (idx >= 0) {
+- api_check(L, idx <= tvref(L->maxstack) - L->base);
++ lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot
%d", idx);
+ if (L->base + idx > L->top) {
+ if (L->base + idx >= tvref(L->maxstack))
+ lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base));
+@@ -138,23 +154,21 @@ LUA_API void lua_settop(lua_State *L, int idx)
+ L->top = L->base + idx;
+ }
+ } else {
+- api_check(L, -(idx+1) <= (L->top - L->base));
++ lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d",
idx);
+ L->top += idx+1; /* Shrinks top (idx < 0). */
+ }
+ }
+
+ LUA_API void lua_remove(lua_State *L, int idx)
+ {
+- TValue *p = stkindex2adr(L, idx);
+- api_checkvalidindex(L, p);
++ TValue *p = index2adr_stack(L, idx);
+ while (++p < L->top) copyTV(L, p-1, p);
+ L->top--;
+ }
+
+ LUA_API void lua_insert(lua_State *L, int idx)
+ {
+- TValue *q, *p = stkindex2adr(L, idx);
+- api_checkvalidindex(L, p);
++ TValue *q, *p = index2adr_stack(L, idx);
+ for (q = L->top; q > p; q--) copyTV(L, q, q-1);
+ copyTV(L, p, L->top);
+ }
+@@ -162,19 +176,18 @@ LUA_API void lua_insert(lua_State *L, int idx)
+ static void copy_slot(lua_State *L, TValue *f, int idx)
+ {
+ if (idx == LUA_GLOBALSINDEX) {
+- api_check(L, tvistab(f));
++ lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
+ /* NOBARRIER: A thread (i.e. L) is never black. */
+ setgcref(L->env, obj2gco(tabV(f)));
+ } else if (idx == LUA_ENVIRONINDEX) {
+ GCfunc *fn = curr_func(L);
+ if (fn->c.gct != ~LJ_TFUNC)
+ lj_err_msg(L, LJ_ERR_NOENV);
+- api_check(L, tvistab(f));
++ lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
+ setgcref(fn->c.env, obj2gco(tabV(f)));
+ lj_gc_barrier(L, fn, f);
+ } else {
+- TValue *o = index2adr(L, idx);
+- api_checkvalidindex(L, o);
++ TValue *o = index2adr_check(L, idx);
+ copyTV(L, o, f);
+ if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */
+ lj_gc_barrier(L, curr_func(L), f);
+@@ -183,7 +196,7 @@ static void copy_slot(lua_State *L, TValue *f, int idx)
+
+ LUA_API void lua_replace(lua_State *L, int idx)
+ {
+- api_checknelems(L, 1);
++ lj_checkapi_slot(1);
+ copy_slot(L, L->top - 1, idx);
+ L->top--;
+ }
+@@ -219,7 +232,7 @@ LUA_API int lua_type(lua_State *L, int idx)
+ #else
+ int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) &
15u);
+ #endif
+- lua_assert(tt != LUA_TNIL || tvisnil(o));
++ lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion");
+ return tt;
+ }
+ }
+@@ -595,7 +608,7 @@ LUA_API void *lua_touserdata(lua_State *L, int idx)
+ if (tvisudata(o))
+ return uddata(udataV(o));
+ else if (tvislightud(o))
+- return lightudV(o);
++ return lightudV(G(L), o);
+ else
+ return NULL;
+ }
+@@ -608,7 +621,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
+
+ LUA_API const void *lua_topointer(lua_State *L, int idx)
+ {
+- return lj_obj_ptr(index2adr(L, idx));
++ return lj_obj_ptr(G(L), index2adr(L, idx));
+ }
+
+ /* -- Stack setters (object creation) ------------------------------------- */
+@@ -677,14 +690,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int
n)
+ {
+ GCfunc *fn;
+ lj_gc_check(L);
+- api_checknelems(L, n);
++ lj_checkapi_slot(n);
+ fn = lj_func_newC(L, (MSize)n, getcurrenv(L));
+ fn->c.f = f;
+ L->top -= n;
+ while (n--)
+ copyTV(L, &fn->c.upvalue[n], L->top+n);
+ setfuncV(L, L->top, fn);
+- lua_assert(iswhite(obj2gco(fn)));
++ lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white");
+ incr_top(L);
+ }
+
+@@ -696,7 +709,10 @@ LUA_API void lua_pushboolean(lua_State *L, int b)
+
+ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
+ {
+- setlightudV(L->top, checklightudptr(L, p));
++#if LJ_64
++ p = lj_lightud_intern(L, p);
++#endif
++ setrawlightudV(L->top, p);
+ incr_top(L);
+ }
+
+@@ -754,7 +770,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size)
+
+ LUA_API void lua_concat(lua_State *L, int n)
+ {
+- api_checknelems(L, n);
++ lj_checkapi_slot(n);
+ if (n >= 2) {
+ n--;
+ do {
+@@ -780,9 +796,8 @@ LUA_API void lua_concat(lua_State *L, int n)
+
+ LUA_API void lua_gettable(lua_State *L, int idx)
+ {
+- cTValue *v, *t = index2adr(L, idx);
+- api_checkvalidindex(L, t);
+- v = lj_meta_tget(L, t, L->top-1);
++ cTValue *t = index2adr_check(L, idx);
++ cTValue *v = lj_meta_tget(L, t, L->top-1);
+ if (v == NULL) {
+ L->top += 2;
+ lj_vm_call(L, L->top-2, 1+1);
+@@ -794,9 +809,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
+
+ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
+ {
+- cTValue *v, *t = index2adr(L, idx);
++ cTValue *v, *t = index2adr_check(L, idx);
+ TValue key;
+- api_checkvalidindex(L, t);
+ setstrV(L, &key, lj_str_newz(L, k));
+ v = lj_meta_tget(L, t, &key);
+ if (v == NULL) {
+@@ -812,14 +826,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
+ LUA_API void lua_rawget(lua_State *L, int idx)
+ {
+ cTValue *t = index2adr(L, idx);
+- api_check(L, tvistab(t));
++ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
+ copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1));
+ }
+
+ LUA_API void lua_rawgeti(lua_State *L, int idx, int n)
+ {
+ cTValue *v, *t = index2adr(L, idx);
+- api_check(L, tvistab(t));
++ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
+ v = lj_tab_getint(tabV(t), n);
+ if (v) {
+ copyTV(L, L->top, v);
+@@ -861,8 +875,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char
*field)
+
+ LUA_API void lua_getfenv(lua_State *L, int idx)
+ {
+- cTValue *o = index2adr(L, idx);
+- api_checkvalidindex(L, o);
++ cTValue *o = index2adr_check(L, idx);
+ if (tvisfunc(o)) {
+ settabV(L, L->top, tabref(funcV(o)->c.env));
+ } else if (tvisudata(o)) {
+@@ -879,12 +892,14 @@ LUA_API int lua_next(lua_State *L, int idx)
+ {
+ cTValue *t = index2adr(L, idx);
+ int more;
+- api_check(L, tvistab(t));
+- more = lj_tab_next(L, tabV(t), L->top-1);
+- if (more) {
++ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
++ more = lj_tab_next(tabV(t), L->top-1, L->top-1);
++ if (more > 0) {
+ incr_top(L); /* Return new key and value slot. */
+- } else { /* End of traversal. */
++ } else if (!more) { /* End of traversal. */
+ L->top--; /* Remove key slot. */
++ } else {
++ lj_err_msg(L, LJ_ERR_NEXTIDX);
+ }
+ return more;
+ }
+@@ -892,7 +907,8 @@ LUA_API int lua_next(lua_State *L, int idx)
+ LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n)
+ {
+ TValue *val;
+- const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val);
++ GCobj *o;
++ const char *name = lj_debug_uvnamev(index2adr(L, idx), (uint32_t)(n-1), &val,
&o);
+ if (name) {
+ copyTV(L, L->top, val);
+ incr_top(L);
+@@ -904,7 +920,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n)
+ {
+ GCfunc *fn = funcV(index2adr(L, idx));
+ n--;
+- api_check(L, (uint32_t)n < fn->l.nupvalues);
++ lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n);
+ return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
+ (void *)&fn->c.upvalue[n];
+ }
+@@ -914,8 +930,10 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int
idx2, int n2)
+ GCfunc *fn1 = funcV(index2adr(L, idx1));
+ GCfunc *fn2 = funcV(index2adr(L, idx2));
+ n1--; n2--;
+- api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues);
+- api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues);
++ lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1);
++ lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2);
++ lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1);
++ lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1);
+ setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]);
+ lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
+ }
+@@ -944,9 +962,8 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char
*tname)
+ LUA_API void lua_settable(lua_State *L, int idx)
+ {
+ TValue *o;
+- cTValue *t = index2adr(L, idx);
+- api_checknelems(L, 2);
+- api_checkvalidindex(L, t);
++ cTValue *t = index2adr_check(L, idx);
++ lj_checkapi_slot(2);
+ o = lj_meta_tset(L, t, L->top-2);
+ if (o) {
+ /* NOBARRIER: lj_meta_tset ensures the table is not black. */
+@@ -965,9 +982,8 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
+ {
+ TValue *o;
+ TValue key;
+- cTValue *t = index2adr(L, idx);
+- api_checknelems(L, 1);
+- api_checkvalidindex(L, t);
++ cTValue *t = index2adr_check(L, idx);
++ lj_checkapi_slot(1);
+ setstrV(L, &key, lj_str_newz(L, k));
+ o = lj_meta_tset(L, t, &key);
+ if (o) {
+@@ -986,7 +1002,7 @@ LUA_API void lua_rawset(lua_State *L, int idx)
+ {
+ GCtab *t = tabV(index2adr(L, idx));
+ TValue *dst, *key;
+- api_checknelems(L, 2);
++ lj_checkapi_slot(2);
+ key = L->top-2;
+ dst = lj_tab_set(L, t, key);
+ copyTV(L, dst, key+1);
+@@ -998,7 +1014,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n)
+ {
+ GCtab *t = tabV(index2adr(L, idx));
+ TValue *dst, *src;
+- api_checknelems(L, 1);
++ lj_checkapi_slot(1);
+ dst = lj_tab_setint(L, t, n);
+ src = L->top-1;
+ copyTV(L, dst, src);
+@@ -1010,13 +1026,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
+ {
+ global_State *g;
+ GCtab *mt;
+- cTValue *o = index2adr(L, idx);
+- api_checknelems(L, 1);
+- api_checkvalidindex(L, o);
++ cTValue *o = index2adr_check(L, idx);
++ lj_checkapi_slot(1);
+ if (tvisnil(L->top-1)) {
+ mt = NULL;
+ } else {
+- api_check(L, tvistab(L->top-1));
++ lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
+ mt = tabV(L->top-1);
+ }
+ g = G(L);
+@@ -1053,11 +1068,10 @@ LUALIB_API void luaL_setmetatable(lua_State *L, const char
*tname)
+
+ LUA_API int lua_setfenv(lua_State *L, int idx)
+ {
+- cTValue *o = index2adr(L, idx);
++ cTValue *o = index2adr_check(L, idx);
+ GCtab *t;
+- api_checknelems(L, 1);
+- api_checkvalidindex(L, o);
+- api_check(L, tvistab(L->top-1));
++ lj_checkapi_slot(1);
++ lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
+ t = tabV(L->top-1);
+ if (tvisfunc(o)) {
+ setgcref(funcV(o)->c.env, obj2gco(t));
+@@ -1078,13 +1092,14 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
+ {
+ cTValue *f = index2adr(L, idx);
+ TValue *val;
++ GCobj *o;
+ const char *name;
+- api_checknelems(L, 1);
+- name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val);
++ lj_checkapi_slot(1);
++ name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o);
+ if (name) {
+ L->top--;
+ copyTV(L, val, L->top);
+- lj_gc_barrier(L, funcV(f), L->top);
++ lj_gc_barrier(L, o, L->top);
+ }
+ return name;
+ }
+@@ -1106,8 +1121,9 @@ static TValue *api_call_base(lua_State *L, int nargs)
+
+ LUA_API void lua_call(lua_State *L, int nargs, int nresults)
+ {
+- api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
+- api_checknelems(L, nargs+1);
++ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
++ "thread called in wrong state %d", L->status);
++ lj_checkapi_slot(nargs+1);
+ lj_vm_call(L, api_call_base(L, nargs), nresults+1);
+ }
+
+@@ -1117,13 +1133,13 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int
errfunc)
+ uint8_t oldh = hook_save(g);
+ ptrdiff_t ef;
+ int status;
+- api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
+- api_checknelems(L, nargs+1);
++ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
++ "thread called in wrong state %d", L->status);
++ lj_checkapi_slot(nargs+1);
+ if (errfunc == 0) {
+ ef = 0;
+ } else {
+- cTValue *o = stkindex2adr(L, errfunc);
+- api_checkvalidindex(L, o);
++ cTValue *o = index2adr_stack(L, errfunc);
+ ef = savestack(L, o);
+ }
+ status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
+@@ -1138,7 +1154,10 @@ static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
+ fn->c.f = func;
+ setfuncV(L, top++, fn);
+ if (LJ_FR2) setnilV(top++);
+- setlightudV(top++, checklightudptr(L, ud));
++#if LJ_64
++ ud = lj_lightud_intern(L, ud);
++#endif
++ setrawlightudV(top++, ud);
+ cframe_nres(L->cframe) = 1+0; /* Zero results. */
+ L->top = top;
+ return top-1; /* Now call the newly allocated C function. */
+@@ -1149,7 +1168,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
+ global_State *g = G(L);
+ uint8_t oldh = hook_save(g);
+ int status;
+- api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
++ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
++ "thread called in wrong state %d", L->status);
+ status = lj_vm_cpcall(L, func, ud, cpcall);
+ if (status) hook_restore(g, oldh);
+ return status;
+@@ -1198,11 +1218,12 @@ LUA_API int lua_yield(lua_State *L, int nresults)
+ setcont(top, lj_cont_hook);
+ if (LJ_FR2) top++;
+ setframe_pc(top, cframe_pc(cf)-1);
+- if (LJ_FR2) top++;
++ top++;
+ setframe_gc(top, obj2gco(L), LJ_TTHREAD);
++ if (LJ_FR2) top++;
+ setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
+ L->top = L->base = top+1;
+-#if LJ_TARGET_X64
++#if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 ||
defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS
+ lj_err_throw(L, LUA_YIELD);
+ #else
+ L->cframe = NULL;
+diff --git a/src/lj_arch.h b/src/lj_arch.h
+index c8d7138e..ae999467 100644
+--- a/src/lj_arch.h
++++ b/src/lj_arch.h
+@@ -1,6 +1,6 @@
+ /*
+ ** Target architecture selection.
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_ARCH_H
+@@ -8,6 +8,8 @@
+
+ #include "lua.h"
+
++/* -- Target definitions -------------------------------------------------- */
++
+ /* Target endianess. */
+ #define LUAJIT_LE 0
+ #define LUAJIT_BE 1
+@@ -38,6 +40,14 @@
+ #define LUAJIT_OS_BSD 4
+ #define LUAJIT_OS_POSIX 5
+
++/* Number mode. */
++#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
++#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
++#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
++#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */
++
++/* -- Target detection ---------------------------------------------------- */
++
+ /* Select native target if no target defined. */
+ #ifndef LUAJIT_TARGET
+
+@@ -69,12 +79,16 @@
+ #elif defined(__linux__)
+ #define LUAJIT_OS LUAJIT_OS_LINUX
+ #elif defined(__MACH__) && defined(__APPLE__)
++#include "TargetConditionals.h"
+ #define LUAJIT_OS LUAJIT_OS_OSX
+ #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
+ defined(__NetBSD__) || defined(__OpenBSD__) || \
+ defined(__DragonFly__)) && !defined(__ORBIS__)
+ #define LUAJIT_OS LUAJIT_OS_BSD
+-#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__)
++#elif (defined(__sun__) && defined(__svr4__))
++#define LJ_TARGET_SOLARIS 1
++#define LUAJIT_OS LUAJIT_OS_POSIX
++#elif defined(__HAIKU__)
+ #define LUAJIT_OS LUAJIT_OS_POSIX
+ #elif defined(__CYGWIN__)
+ #define LJ_TARGET_CYGWIN 1
+@@ -103,10 +117,16 @@
+ #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS)
+ #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX)
+ #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
+-#define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM ||
LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
++#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD)
+ #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
+ #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
+
++#if TARGET_OS_IPHONE
++#define LJ_TARGET_IOS 1
++#else
++#define LJ_TARGET_IOS 0
++#endif
++
+ #ifdef __CELLOS_LV2__
+ #define LJ_TARGET_PS3 1
+ #define LJ_TARGET_CONSOLE 1
+@@ -135,10 +155,14 @@
+ #define LJ_TARGET_GC64 1
+ #endif
+
+-#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
+-#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
+-#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
+-#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */
++#ifdef _UWP
++#define LJ_TARGET_UWP 1
++#if LUAJIT_TARGET == LUAJIT_ARCH_X64
++#define LJ_TARGET_GC64 1
++#endif
++#endif
++
++/* -- Arch-specific settings ---------------------------------------------- */
+
+ /* Set target architecture properties. */
+ #if LUAJIT_TARGET == LUAJIT_ARCH_X86
+@@ -146,14 +170,10 @@
+ #define LJ_ARCH_NAME "x86"
+ #define LJ_ARCH_BITS 32
+ #define LJ_ARCH_ENDIAN LUAJIT_LE
+-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
+-#define LJ_ABI_WIN 1
+-#else
+-#define LJ_ABI_WIN 0
+-#endif
+ #define LJ_TARGET_X86 1
+ #define LJ_TARGET_X86ORX64 1
+ #define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_EHRAREG 8
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNALIGNED 1
+@@ -164,21 +184,19 @@
+ #define LJ_ARCH_NAME "x64"
+ #define LJ_ARCH_BITS 64
+ #define LJ_ARCH_ENDIAN LUAJIT_LE
+-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
+-#define LJ_ABI_WIN 1
+-#else
+-#define LJ_ABI_WIN 0
+-#endif
+ #define LJ_TARGET_X64 1
+ #define LJ_TARGET_X86ORX64 1
+ #define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_EHRAREG 16
+ #define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNALIGNED 1
+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
+-#ifdef LUAJIT_ENABLE_GC64
++#ifndef LUAJIT_DISABLE_GC64
+ #define LJ_TARGET_GC64 1
++#elif LJ_TARGET_OSX
++#error "macOS requires GC64 -- don't disable it"
+ #endif
+
+ #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
+@@ -195,19 +213,20 @@
+ #define LJ_ABI_EABI 1
+ #define LJ_TARGET_ARM 1
+ #define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_EHRAREG 14
+ #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
+ #define LJ_TARGET_MASKSHIFT 0
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+
+-#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
++#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
+ #define LJ_ARCH_VERSION 80
+-#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ ||
__ARM_ARCH_7VE__
++#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||
__ARM_ARCH_7S__ || __ARM_ARCH_7VE__
+ #define LJ_ARCH_VERSION 70
+ #elif __ARM_ARCH_6T2__
+ #define LJ_ARCH_VERSION 61
+-#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ ||
__ARM_ARCH_6ZK__
++#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ ||
__ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
+ #define LJ_ARCH_VERSION 60
+ #else
+ #define LJ_ARCH_VERSION 50
+@@ -225,6 +244,7 @@
+ #endif
+ #define LJ_TARGET_ARM64 1
+ #define LJ_TARGET_EHRETREG 0
++#define LJ_TARGET_EHRAREG 30
+ #define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+@@ -254,23 +274,43 @@
+ #else
+ #define LJ_ARCH_BITS 32
+ #define LJ_ARCH_NAME "ppc"
++
++#if !defined(LJ_ARCH_HASFPU)
++#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
++#define LJ_ARCH_HASFPU 0
++#else
++#define LJ_ARCH_HASFPU 1
++#endif
++#endif
++
++#if !defined(LJ_ABI_SOFTFP)
++#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
++#define LJ_ABI_SOFTFP 1
++#else
++#define LJ_ABI_SOFTFP 0
++#endif
++#endif
++#endif
++
++#if LJ_ABI_SOFTFP
++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
++#else
++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
+ #endif
+
+ #define LJ_TARGET_PPC 1
+ #define LJ_TARGET_EHRETREG 3
++#define LJ_TARGET_EHRAREG 65
+ #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
+ #define LJ_TARGET_MASKSHIFT 0
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
+-#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
+
+ #if LJ_TARGET_CONSOLE
+ #define LJ_ARCH_PPC32ON64 1
+ #define LJ_ARCH_NOFFI 1
+ #elif LJ_ARCH_BITS == 64
+-#define LJ_ARCH_PPC64 1
+-#define LJ_TARGET_GC64 1
+-#define LJ_ARCH_NOJIT 1 /* NYI */
++#error "No support for PPC64"
+ #endif
+
+ #if _ARCH_PWR7
+@@ -302,18 +342,38 @@
+ #elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
+
+ #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
++#if __mips_isa_rev >= 6
++#define LJ_TARGET_MIPSR6 1
++#define LJ_TARGET_UNALIGNED 1
++#endif
+ #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_NAME "mips32r6el"
++#else
+ #define LJ_ARCH_NAME "mipsel"
++#endif
++#else
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_NAME "mips64r6el"
+ #else
+ #define LJ_ARCH_NAME "mips64el"
+ #endif
++#endif
+ #define LJ_ARCH_ENDIAN LUAJIT_LE
+ #else
+ #if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_NAME "mips32r6"
++#else
+ #define LJ_ARCH_NAME "mips"
++#endif
++#else
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_NAME "mips64r6"
+ #else
+ #define LJ_ARCH_NAME "mips64"
+ #endif
++#endif
+ #define LJ_ARCH_ENDIAN LUAJIT_BE
+ #endif
+
+@@ -337,22 +397,22 @@
+ #define LJ_ARCH_BITS 32
+ #define LJ_TARGET_MIPS32 1
+ #else
+-#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU
+-#define LJ_ARCH_NOJIT 1 /* NYI */
+-#endif
+ #define LJ_ARCH_BITS 64
+ #define LJ_TARGET_MIPS64 1
+ #define LJ_TARGET_GC64 1
+ #endif
+ #define LJ_TARGET_MIPS 1
+ #define LJ_TARGET_EHRETREG 4
++#define LJ_TARGET_EHRAREG 31
+ #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
+ #define LJ_TARGET_MASKSHIFT 1
+ #define LJ_TARGET_MASKROT 1
+ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+
+-#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
++#if LJ_TARGET_MIPSR6
++#define LJ_ARCH_VERSION 60
++#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
+ #define LJ_ARCH_VERSION 20
+ #else
+ #define LJ_ARCH_VERSION 10
+@@ -362,9 +422,7 @@
+ #error "No target architecture defined"
+ #endif
+
+-#ifndef LJ_PAGESIZE
+-#define LJ_PAGESIZE 4096
+-#endif
++/* -- Checks for requirements --------------------------------------------- */
+
+ /* Check for minimum required compiler versions. */
+ #if defined(__GNUC__)
+@@ -418,29 +476,30 @@
+ #error "No support for ILP32 model on ARM64"
+ #endif
+ #elif LJ_TARGET_PPC
+-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+-#error "No support for PowerPC CPUs without double-precision FPU"
+-#endif
+-#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
++#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER ==
_LITTLE_ENDIAN))
+ #error "No support for little-endian PPC32"
+ #endif
+-#if LJ_ARCH_PPC64
+-#error "No support for PowerPC 64 bit mode (yet)"
+-#endif
+-#ifdef __NO_FPRS__
++#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
+ #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
+ #endif
+ #elif LJ_TARGET_MIPS32
+ #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) ||
(defined(_ABIO32) && _MIPS_SIM == _ABIO32))
+ #error "Only o32 ABI supported for MIPS32"
+ #endif
++#if LJ_TARGET_MIPSR6
++/* Not that useful, since most available r6 CPUs are 64 bit. */
++#error "No support for MIPS32R6"
++#endif
+ #elif LJ_TARGET_MIPS64
+ #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) ||
(defined(_ABI64) && _MIPS_SIM == _ABI64))
++/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
+ #error "Only n64 ABI supported for MIPS64"
+ #endif
+ #endif
+ #endif
+
++/* -- Derived defines ----------------------------------------------------- */
++
+ /* Enable or disable the dual-number mode for the VM. */
+ #if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \
+ (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1)
+@@ -490,6 +549,13 @@
+ #define LJ_HASFFI 1
+ #endif
+
++/* Disable or enable the string buffer extension. */
++#if defined(LUAJIT_DISABLE_BUFFER)
++#define LJ_HASBUFFER 0
++#else
++#define LJ_HASBUFFER 1
++#endif
++
+ #if defined(LUAJIT_DISABLE_PROFILE)
+ #define LJ_HASPROFILE 0
+ #elif LJ_TARGET_POSIX
+@@ -512,6 +578,7 @@
+ #define LJ_ABI_SOFTFP 0
+ #endif
+ #define LJ_SOFTFP (!LJ_ARCH_HASFPU)
++#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32)
+
+ #if LJ_ARCH_ENDIAN == LUAJIT_BE
+ #define LJ_LE 0
+@@ -537,26 +604,52 @@
+ #define LJ_TARGET_UNALIGNED 0
+ #endif
+
++#ifndef LJ_PAGESIZE
++#define LJ_PAGESIZE 4096
++#endif
++
+ /* Various workarounds for embedded operating systems or weak C runtimes. */
+ #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 ||
LJ_TARGET_WINDOWS
+ #define LUAJIT_NO_LOG2
+ #endif
+-#if defined(__symbian__) || LJ_TARGET_WINDOWS
+-#define LUAJIT_NO_EXP2
+-#endif
+ #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED
>= __IPHONE_8_0)
+ #define LJ_NO_SYSTEM 1
+ #endif
+
+-#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
+-/* NYI: no support for compact unwind specification, yet. */
+-#define LUAJIT_NO_UNWIND 1
++#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
++#define LJ_ABI_WIN 1
++#else
++#define LJ_ABI_WIN 0
+ #endif
+
+-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3
|| LJ_TARGET_PS4
++#if LJ_TARGET_WINDOWS
++#if LJ_TARGET_UWP
++#define LJ_WIN_VALLOC VirtualAllocFromApp
++#define LJ_WIN_VPROTECT VirtualProtectFromApp
++extern void *LJ_WIN_LOADLIBA(const char *path);
++#else
++#define LJ_WIN_VALLOC VirtualAlloc
++#define LJ_WIN_VPROTECT VirtualProtect
++#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0)
++#endif
++#endif
++
++#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) ||
LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
+ #define LJ_NO_UNWIND 1
+ #endif
+
++#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN ||
(defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
++#define LJ_UNWIND_EXT 1
++#else
++#define LJ_UNWIND_EXT 0
++#endif
++
++#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN
&& LJ_TARGET_X86)
++#define LJ_UNWIND_JIT 1
++#else
++#define LJ_UNWIND_JIT 0
++#endif
++
+ /* Compatibility with Lua 5.1 vs. 5.2. */
+ #ifdef LUAJIT_ENABLE_LUA52COMPAT
+ #define LJ_52 1
+@@ -564,4 +657,46 @@
+ #define LJ_52 0
+ #endif
+
++/* -- VM security --------------------------------------------------------- */
++
++/* Don't make any changes here. Instead build with:
++** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value"
++**
++** Important note to distro maintainers: DO NOT change the defaults for a
++** regular distro build -- neither upwards, nor downwards!
++** These build-time configurable security flags are intended for embedders
++** who may have specific needs wrt. security vs. performance.
++*/
++
++/* Security defaults. */
++#ifndef LUAJIT_SECURITY_PRNG
++/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */
++#define LUAJIT_SECURITY_PRNG 1
++#endif
++
++#ifndef LUAJIT_SECURITY_STRHASH
++/* String hash: 0 = sparse only, 1 = sparse + dense. */
++#define LUAJIT_SECURITY_STRHASH 1
++#endif
++
++#ifndef LUAJIT_SECURITY_STRID
++/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */
++#define LUAJIT_SECURITY_STRID 1
++#endif
++
++#ifndef LUAJIT_SECURITY_MCODE
++/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */
++#define LUAJIT_SECURITY_MCODE 1
++#endif
++
++#define LJ_SECURITY_MODE \
++ ( 0u \
++ | ((LUAJIT_SECURITY_PRNG & 3) << 0) \
++ | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \
++ | ((LUAJIT_SECURITY_STRID & 3) << 4) \
++ | ((LUAJIT_SECURITY_MCODE & 3) << 6) \
++ )
++#define LJ_SECURITY_MODESTRING \
++ "\004prng\007strhash\005strid\005mcode"
++
+ #endif
+diff --git a/src/lj_asm.c b/src/lj_asm.c
+index c2cf5a95..5968c5e3 100644
+--- a/src/lj_asm.c
++++ b/src/lj_asm.c
+@@ -1,6 +1,6 @@
+ /*
+ ** IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #define lj_asm_c
+@@ -11,6 +11,7 @@
+ #if LJ_HASJIT
+
+ #include "lj_gc.h"
++#include "lj_buf.h"
+ #include "lj_str.h"
+ #include "lj_tab.h"
+ #include "lj_frame.h"
+@@ -22,7 +23,6 @@
+ #include "lj_ircall.h"
+ #include "lj_iropt.h"
+ #include "lj_mcode.h"
+-#include "lj_iropt.h"
+ #include "lj_trace.h"
+ #include "lj_snap.h"
+ #include "lj_asm.h"
+@@ -72,6 +72,8 @@ typedef struct ASMState {
+ IRRef snaprename; /* Rename highwater mark for snapshot check. */
+ SnapNo snapno; /* Current snapshot number. */
+ SnapNo loopsnapno; /* Loop snapshot number. */
++ int snapalloc; /* Current snapshot needs allocation. */
++ BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */
+
+ IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
+ IRRef sectref; /* Section base reference (loopref or 0). */
+@@ -85,6 +87,7 @@ typedef struct ASMState {
+
+ MCode *mcbot; /* Bottom of reserved MCode. */
+ MCode *mctop; /* Top of generated MCode. */
++ MCode *mctoporig; /* Original top of generated MCode. */
+ MCode *mcloop; /* Pointer to loop MCode (or NULL). */
+ MCode *invmcp; /* Points to invertible loop branch (or NULL). */
+ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
+@@ -97,6 +100,12 @@ typedef struct ASMState {
+ uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
+ } ASMState;
+
++#ifdef LUA_USE_ASSERT
++#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
++#else
++#define lj_assertA(c, ...) ((void)as)
++#endif
++
+ #define IR(ref) (&as->ir[(ref)])
+
+ #define ASMREF_TMP1 REF_TRUE /* Temp. register. */
+@@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as)
+ #ifdef LUA_USE_ASSERT
+ if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
+ IRIns *ir = IR(as->curins+1);
+- fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n",
as->mcp,
+- as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
+- lua_assert(0);
++ lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n",
as->mcp,
++ as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
+ }
+ #endif
+ if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
+@@ -244,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
+ *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
+ } else {
+ *p++ = '?';
+- lua_assert(0);
++ lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
+ }
+ } else if (e[1] == 'f' || e[1] == 'i') {
+ IRRef ref;
+@@ -262,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
+ } else if (e[1] == 'x') {
+ p += sprintf(p, "%08x", va_arg(argp, int32_t));
+ } else {
+- lua_assert(0);
++ lj_assertA(0, "bad debug format code");
+ }
+ fmt = e+2;
+ }
+@@ -321,7 +329,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ Reg r;
+ if (ra_iskref(ref)) {
+ r = ra_krefreg(ref);
+- lua_assert(!rset_test(as->freeset, r));
++ lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
+ ra_free(as, r);
+ ra_modified(as, r);
+ #if LJ_64
+@@ -333,12 +341,14 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ }
+ ir = IR(ref);
+ r = ir->r;
+- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
++ lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
++ lj_assertA(!ra_hasspill(ir->s),
++ "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
+ ra_free(as, r);
+ ra_modified(as, r);
+ ir->r = RID_INIT; /* Do not keep any hint. */
+ RA_DBGX((as, "remat $i $r", ir, r));
+-#if !LJ_SOFTFP
++#if !LJ_SOFTFP32
+ if (ir->o == IR_KNUM) {
+ emit_loadk64(as, r, ir);
+ } else
+@@ -347,7 +357,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
+ emit_getgl(as, r, jit_base);
+ } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
+- lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
++ /* REF_NIL stores ASMREF_L register. */
++ lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
+ emit_getgl(as, r, cur_L);
+ #if LJ_64
+ } else if (ir->o == IR_KINT64) {
+@@ -360,8 +371,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ #endif
+ #endif
+ } else {
+- lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
+- ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
++ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
++ "rematk of bad IR op %d", ir->o);
+ emit_loadi(as, r, ir->i);
+ }
+ return r;
+@@ -371,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
+ static int32_t ra_spill(ASMState *as, IRIns *ir)
+ {
+ int32_t slot = ir->s;
+- lua_assert(ir >= as->ir + REF_TRUE);
++ lj_assertA(ir >= as->ir + REF_TRUE,
++ "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
+ if (!ra_hasspill(slot)) {
+ if (irt_is64(ir->t)) {
+ slot = as->evenspill;
+@@ -396,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
+ {
+ IRIns *ir = IR(ref);
+ Reg r = ir->r;
+- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
++ lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
++ lj_assertA(!ra_hasspill(ir->s),
++ "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
+ ra_free(as, r);
+ ra_modified(as, r);
+ ir->r = RID_INIT;
+@@ -412,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref)
+ IRIns *ir = IR(ref);
+ int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */
+ Reg r = ir->r;
+- lua_assert(ra_hasreg(r));
++ lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref -
REF_BIAS);
+ ra_sethint(ir->r, r); /* Keep hint. */
+ ra_free(as, r);
+ if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
+@@ -441,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow)
+ {
+ IRRef ref;
+ RegCost cost = ~(RegCost)0;
+- lua_assert(allow != RSET_EMPTY);
++ lj_assertA(allow != RSET_EMPTY, "evict from empty set");
+ if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
+ GPRDEF(MINCOST)
+ } else {
+ FPRDEF(MINCOST)
+ }
+ ref = regcost_ref(cost);
+- lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref <
as->T->nins));
++ lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref <
as->T->nins),
++ "evict of out-of-range IR %04d", ref - REF_BIAS);
+ /* Preferably pick any weak ref instead of a non-weak, non-const ref. */
+ if (!irref_isk(ref) && (as->weakset & allow)) {
+ IRIns *ir = IR(ref);
+@@ -606,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
+ IRIns *ir = IR(ref);
+ RegSet pick = as->freeset & allow;
+ Reg r;
+- lua_assert(ra_noreg(ir->r));
++ lj_assertA(ra_noreg(ir->r),
++ "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
+ if (pick) {
+ /* First check register hint from propagation or PHI. */
+ if (ra_hashint(ir->r)) {
+@@ -670,8 +687,10 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
+ IRIns *ir = IR(ref);
+ ir->r = (uint8_t)up;
+ as->cost[down] = 0;
+- lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
+- lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset,
up));
++ lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
++ "rename between GPR/FPR %d and %d", down, up);
++ lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d",
down);
++ lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
+ ra_free(as, down); /* 'down' is free ... */
+ ra_modified(as, down);
+ rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
+@@ -679,7 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
+ RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down,
up));
+ emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
+ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
+- ra_addrename(as, down, ref, as->snapno);
++ /*
++ ** The rename is effective at the subsequent (already emitted) exit
++ ** branch. This is for the current snapshot (as->snapno). Except if we
++ ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
++ ** then it belongs to the next snapshot.
++ ** See also the discussion at asm_snap_checkrename().
++ */
++ ra_addrename(as, down, ref, as->snapno + as->snapalloc);
+ }
+ }
+
+@@ -712,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
+ {
+ Reg dest = ra_dest(as, ir, RID2RSET(r));
+ if (dest != r) {
+- lua_assert(rset_test(as->freeset, r));
++ lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
+ ra_modified(as, r);
+ emit_movrr(as, ir, dest, r);
+ }
+@@ -745,8 +771,9 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
+ #endif
+ #endif
+ } else if (ir->o != IR_KPRI) {
+- lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
+- ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
++ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
++ "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
+ emit_loadi(as, dest, ir->i);
+ return;
+ }
+@@ -791,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
+ }
+ #endif
+
+-#if !LJ_64
+ /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
+ static void ra_destpair(ASMState *as, IRIns *ir)
+ {
+ Reg destlo = ir->r, desthi = (ir+1)->r;
++ IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir;
+ /* First spill unrelated refs blocking the destination registers. */
+ if (!rset_test(as->freeset, RID_RETLO) &&
+ destlo != RID_RETLO && desthi != RID_RETLO)
+@@ -819,29 +846,28 @@ static void ra_destpair(ASMState *as, IRIns *ir)
+ /* Check for conflicts and shuffle the registers as needed. */
+ if (destlo == RID_RETHI) {
+ if (desthi == RID_RETLO) {
+-#if LJ_TARGET_X86
+- *--as->mcp = XI_XCHGa + RID_RETHI;
++#if LJ_TARGET_X86ORX64
++ *--as->mcp = REX_64IR(irx, XI_XCHGa + RID_RETHI);
+ #else
+- emit_movrr(as, ir, RID_RETHI, RID_TMP);
+- emit_movrr(as, ir, RID_RETLO, RID_RETHI);
+- emit_movrr(as, ir, RID_TMP, RID_RETLO);
++ emit_movrr(as, irx, RID_RETHI, RID_TMP);
++ emit_movrr(as, irx, RID_RETLO, RID_RETHI);
++ emit_movrr(as, irx, RID_TMP, RID_RETLO);
+ #endif
+ } else {
+- emit_movrr(as, ir, RID_RETHI, RID_RETLO);
+- if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
++ emit_movrr(as, irx, RID_RETHI, RID_RETLO);
++ if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
+ }
+ } else if (desthi == RID_RETLO) {
+- emit_movrr(as, ir, RID_RETLO, RID_RETHI);
+- if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
++ emit_movrr(as, irx, RID_RETLO, RID_RETHI);
++ if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
+ } else {
+- if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
+- if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
++ if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
++ if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
+ }
+ /* Restore spill slots (if any). */
+ if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
+ if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
+ }
+-#endif
+
+ /* -- Snapshot handling --------- ----------------------------------------- */
+
+@@ -876,7 +902,10 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
+ static void asm_snap_alloc1(ASMState *as, IRRef ref)
+ {
+ IRIns *ir = IR(ref);
+- if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) {
++ if (!irref_isk(ref) && ir->r != RID_SUNK) {
++ bloomset(as->snapfilt1, ref);
++ bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
++ if (ra_used(ir)) return;
+ if (ir->r == RID_SINK) {
+ ir->r = RID_SUNK;
+ #if LJ_HASFFI
+@@ -888,11 +917,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
+ #endif
+ { /* Allocate stored values for TNEW, TDUP and CNEW. */
+ IRIns *irs;
+- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW);
++ lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
++ "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
+ for (irs = IR(as->snapref-1); irs > ir; irs--)
+ if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
+- lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
+- irs->o == IR_FSTORE || irs->o == IR_XSTORE);
++ lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
++ irs->o == IR_FSTORE || irs->o == IR_XSTORE,
++ "sunk store IR %04d has bad op %d",
++ (int)(irs - as->ir) - REF_BIAS, irs->o);
+ asm_snap_alloc1(as, irs->op2);
+ if (LJ_32 && (irs+1)->o == IR_HIOP)
+ asm_snap_alloc1(as, (irs+1)->op2);
+@@ -928,18 +960,21 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
+ }
+
+ /* Allocate refs escaping to a snapshot. */
+-static void asm_snap_alloc(ASMState *as)
++static void asm_snap_alloc(ASMState *as, int snapno)
+ {
+- SnapShot *snap = &as->T->snap[as->snapno];
++ SnapShot *snap = &as->T->snap[snapno];
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
+ MSize n, nent = snap->nent;
++ as->snapfilt1 = as->snapfilt2 = 0;
+ for (n = 0; n < nent; n++) {
+ SnapEntry sn = map[n];
+ IRRef ref = snap_ref(sn);
+ if (!irref_isk(ref)) {
+ asm_snap_alloc1(as, ref);
+ if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
+- lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP);
++ lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
++ "snap %d[%d] points to bad SOFTFP IR %04d",
++ snapno, n, ref - REF_BIAS);
+ asm_snap_alloc1(as, ref+1);
+ }
+ }
+@@ -955,35 +990,26 @@ static void asm_snap_alloc(ASMState *as)
+ */
+ static int asm_snap_checkrename(ASMState *as, IRRef ren)
+ {
+- SnapShot *snap = &as->T->snap[as->snapno];
+- SnapEntry *map = &as->T->snapmap[snap->mapofs];
+- MSize n, nent = snap->nent;
+- for (n = 0; n < nent; n++) {
+- SnapEntry sn = map[n];
+- IRRef ref = snap_ref(sn);
+- if (ref == ren || (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && ++ref
== ren)) {
+- IRIns *ir = IR(ref);
+- ra_spill(as, ir); /* Register renamed, so force a spill slot. */
+- RA_DBGX((as, "snaprensp $f $s", ref, ir->s));
+- return 1; /* Found. */
+- }
++ if (bloomtest(as->snapfilt1, ren) &&
++ bloomtest(as->snapfilt2, hashrot(ren, ren + HASH_BIAS))) {
++ IRIns *ir = IR(ren);
++ ra_spill(as, ir); /* Register renamed, so force a spill slot. */
++ RA_DBGX((as, "snaprensp $f $s", ren, ir->s));
++ return 1; /* Found. */
+ }
+ return 0; /* Not found. */
+ }
+
+-/* Prepare snapshot for next guard instruction. */
++/* Prepare snapshot for next guard or throwing instruction. */
+ static void asm_snap_prep(ASMState *as)
+ {
+- if (as->curins < as->snapref) {
+- do {
+- if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */
+- as->snapno--;
+- as->snapref = as->T->snap[as->snapno].ref;
+- } while (as->curins < as->snapref);
+- asm_snap_alloc(as);
++ if (as->snapalloc) {
++ /* Alloc on first invocation for each snapshot. */
++ as->snapalloc = 0;
++ asm_snap_alloc(as, as->snapno);
+ as->snaprename = as->T->nins;
+ } else {
+- /* Process any renames above the highwater mark. */
++ /* Check any renames above the highwater mark. */
+ for (; as->snaprename < as->T->nins; as->snaprename++) {
+ IRIns *ir = &as->T->ir[as->snaprename];
+ if (asm_snap_checkrename(as, ir->op1))
+@@ -992,6 +1018,35 @@ static void asm_snap_prep(ASMState *as)
+ }
+ }
+
++/* Move to previous snapshot when we cross the current snapshot ref. */
++static void asm_snap_prev(ASMState *as)
++{
++ if (as->curins < as->snapref) {
++ uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp);
++ if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
++ do {
++ if (as->snapno == 0) return;
++ as->snapno--;
++ as->snapref = as->T->snap[as->snapno].ref;
++ as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs.
*/
++ } while (as->curins < as->snapref); /* May have no ins inbetween. */
++ as->snapalloc = 1;
++ }
++}
++
++/* Fixup snapshot mcode offsetst. */
++static void asm_snap_fixup_mcofs(ASMState *as)
++{
++ uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
++ SnapShot *snap = as->T->snap;
++ SnapNo i;
++ for (i = as->T->nsnap-1; i > 0; i--) {
++ /* Compute offset from mcode start and store in correct snapshot. */
++ snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
++ }
++ snap[0].mcofs = 0;
++}
++
+ /* -- Miscellaneous helpers ----------------------------------------------- */
+
+ /* Calculate stack adjustment. */
+@@ -1003,21 +1058,26 @@ static int32_t asm_stack_adjust(ASMState *as)
+ }
+
+ /* Must match with hash*() in lj_tab.c. */
+-static uint32_t ir_khash(IRIns *ir)
++static uint32_t ir_khash(ASMState *as, IRIns *ir)
+ {
+ uint32_t lo, hi;
++ UNUSED(as);
+ if (irt_isstr(ir->t)) {
+- return ir_kstr(ir)->hash;
++ return ir_kstr(ir)->sid;
+ } else if (irt_isnum(ir->t)) {
+ lo = ir_knum(ir)->u32.lo;
+ hi = ir_knum(ir)->u32.hi << 1;
+ } else if (irt_ispri(ir->t)) {
+- lua_assert(!irt_isnil(ir->t));
++ lj_assertA(!irt_isnil(ir->t), "hash of nil key");
+ return irt_type(ir->t)-IRT_FALSE;
+ } else {
+- lua_assert(irt_isgcv(ir->t));
++ lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d",
irt_type(ir->t));
+ lo = u32ptr(ir_kgc(ir));
++#if LJ_GC64
++ hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) <<
15);
++#else
+ hi = lo + HASH_BIAS;
++#endif
+ }
+ return hashrot(lo, hi);
+ }
+@@ -1031,6 +1091,7 @@ static void asm_snew(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
+ IRRef args[3];
++ asm_snap_prep(as);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* const char *str */
+ args[2] = ir->op2; /* size_t len */
+@@ -1043,6 +1104,7 @@ static void asm_tnew(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
+ IRRef args[2];
++ asm_snap_prep(as);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ASMREF_TMP1; /* uint32_t ahsize */
+ as->gcsteps++;
+@@ -1055,6 +1117,7 @@ static void asm_tdup(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
+ IRRef args[2];
++ asm_snap_prep(as);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* const GCtab *kt */
+ as->gcsteps++;
+@@ -1080,28 +1143,43 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
+
+ /* -- Buffer operations --------------------------------------------------- */
+
+-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb);
++#endif
+
+ static void asm_bufhdr(ASMState *as, IRIns *ir)
+ {
+ Reg sb = ra_dest(as, ir, RSET_GPR);
+- if ((ir->op2 & IRBUFHDR_APPEND)) {
++ switch (ir->op2) {
++ case IRBUFHDR_RESET: {
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irbp;
++ irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */
++ emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w));
++ emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b));
++ break;
++ }
++ case IRBUFHDR_APPEND: {
+ /* Rematerialize const buffer pointer instead of likely spill. */
+ IRIns *irp = IR(ir->op1);
+ if (!(ra_hasreg(irp->r) || irp == ir-1 ||
+ (irp == ir-2 && !ra_used(ir-1)))) {
+- while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
++ while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET))
+ irp = IR(irp->op1);
+ if (irref_isk(irp->op1)) {
+ ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
+ ir = irp;
+ }
+ }
+- } else {
+- Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+- /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
+- emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
+- emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
++ break;
++ }
++#if LJ_HASBUFFER
++ case IRBUFHDR_WRITE:
++ asm_bufhdr_write(as, sb);
++ break;
++#endif
++ default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break;
+ }
+ #if LJ_TARGET_X86ORX64
+ ra_left(as, sb, ir->op1);
+@@ -1115,15 +1193,16 @@ static void asm_bufput(ASMState *as, IRIns *ir)
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
+ IRRef args[3];
+ IRIns *irs;
+- int kchar = -1;
++ int kchar = -129;
+ args[0] = ir->op1; /* SBuf * */
+ args[1] = ir->op2; /* GCstr * */
+ irs = IR(ir->op2);
+- lua_assert(irt_isstr(irs->t));
++ lj_assertA(irt_isstr(irs->t),
++ "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
+ if (irs->o == IR_KGC) {
+ GCstr *s = ir_kstr(irs);
+ if (s->len == 1) { /* Optimize put of single-char string constant. */
+- kchar = strdata(s)[0];
++ kchar = (int8_t)strdata(s)[0]; /* Signed! */
+ args[1] = ASMREF_TMP1; /* int, truncated to char */
+ ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+ }
+@@ -1133,7 +1212,8 @@ static void asm_bufput(ASMState *as, IRIns *ir)
+ args[1] = ASMREF_TMP1; /* TValue * */
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
+ } else {
+- lua_assert(irt_isinteger(IR(irs->op1)->t));
++ lj_assertA(irt_isinteger(IR(irs->op1)->t),
++ "TOSTR of non-numeric IR %04d", irs->op1);
+ args[1] = irs->op1; /* int */
+ if (irs->op2 == IRTOSTR_INT)
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
+@@ -1150,8 +1230,8 @@ static void asm_bufput(ASMState *as, IRIns *ir)
+ asm_gencall(as, ci, args);
+ if (args[1] == ASMREF_TMP1) {
+ Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
+- if (kchar == -1)
+- asm_tvptr(as, tmp, irs->op1);
++ if (kchar == -129)
++ asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
+ else
+ ra_allockreg(as, kchar, tmp);
+ }
+@@ -1173,6 +1253,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
+ {
+ const CCallInfo *ci;
+ IRRef args[2];
++ asm_snap_prep(as);
+ args[0] = ASMREF_L;
+ as->gcsteps++;
+ if (ir->op2 == IRTOSTR_NUM) {
+@@ -1188,7 +1269,7 @@ static void asm_tostr(ASMState *as, IRIns *ir)
+ asm_setupresult(as, ir, ci); /* GCstr * */
+ asm_gencall(as, ci, args);
+ if (ir->op2 == IRTOSTR_NUM)
+- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
++ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
+ }
+
+ #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
+@@ -1198,7 +1279,8 @@ static void asm_conv64(ASMState *as, IRIns *ir)
+ IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+ IRCallID id;
+ IRRef args[2];
+- lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
++ lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
++ "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
+ args[LJ_BE] = (ir-1)->op1;
+ args[LJ_LE] = ir->op1;
+ if (st == IRT_NUM || st == IRT_FLOAT) {
+@@ -1228,12 +1310,19 @@ static void asm_newref(ASMState *as, IRIns *ir)
+ IRRef args[3];
+ if (ir->r == RID_SINK)
+ return;
++ asm_snap_prep(as);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* GCtab *t */
+ args[2] = ASMREF_TMP1; /* cTValue *key */
+ asm_setupresult(as, ir, ci); /* TValue * */
+ asm_gencall(as, ci, args);
+- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
++ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
++}
++
++static void asm_tmpref(ASMState *as, IRIns *ir)
++{
++ Reg r = ra_dest(as, ir, RSET_GPR);
++ asm_tvptr(as, r, ir->op1, ir->op2);
+ }
+
+ static void asm_lref(ASMState *as, IRIns *ir)
+@@ -1253,15 +1342,16 @@ static void asm_collectargs(ASMState *as, IRIns *ir,
+ const CCallInfo *ci, IRRef *args)
+ {
+ uint32_t n = CCI_XNARGS(ci);
+- lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
++ /* Account for split args. */
++ lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
+ if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+ while (n-- > 1) {
+ ir = IR(ir->op1);
+- lua_assert(ir->o == IR_CARG);
++ lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
+ args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
+ }
+ args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
+- lua_assert(IR(ir->op1)->o != IR_CARG);
++ lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
+ }
+
+ /* Reconstruct CCallInfo flags for CALLX*. */
+@@ -1305,32 +1395,6 @@ static void asm_call(ASMState *as, IRIns *ir)
+ asm_gencall(as, ci, args);
+ }
+
+-#if !LJ_SOFTFP
+-static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
+-{
+- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
+- IRRef args[2];
+- args[0] = lref;
+- args[1] = rref;
+- asm_setupresult(as, ir, ci);
+- asm_gencall(as, ci, args);
+-}
+-
+-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
+-{
+- IRIns *irp = IR(ir->op1);
+- if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+- IRIns *irpp = IR(irp->op1);
+- if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+- irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+- asm_fppow(as, ir, irpp->op1, irp->op2);
+- return 1;
+- }
+- }
+- return 0;
+-}
+-#endif
+-
+ /* -- PHI and loop handling ----------------------------------------------- */
+
+ /* Break a PHI cycle by renaming to a free register (evict if needed). */
+@@ -1601,6 +1665,68 @@ static void asm_loop(ASMState *as)
+ #error "Missing assembler for target CPU"
+ #endif
+
++/* -- Common instruction helpers ------------------------------------------ */
++
++#if !LJ_SOFTFP32
++#if !LJ_TARGET_X86ORX64
++#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
++#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
++#endif
++
++static void asm_pow(ASMState *as, IRIns *ir)
++{
++#if LJ_64 && LJ_HASFFI
++ if (!irt_isnum(ir->t))
++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
++ IRCALL_lj_carith_powu64);
++ else
++#endif
++ if (irt_isnum(IR(ir->op2)->t))
++ asm_callid(as, ir, IRCALL_pow);
++ else
++ asm_fppowi(as, ir);
++}
++
++static void asm_div(ASMState *as, IRIns *ir)
++{
++#if LJ_64 && LJ_HASFFI
++ if (!irt_isnum(ir->t))
++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
++ IRCALL_lj_carith_divu64);
++ else
++#endif
++ asm_fpdiv(as, ir);
++}
++#endif
++
++static void asm_mod(ASMState *as, IRIns *ir)
++{
++#if LJ_64 && LJ_HASFFI
++ if (!irt_isint(ir->t))
++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
++ IRCALL_lj_carith_modu64);
++ else
++#endif
++ asm_callid(as, ir, IRCALL_lj_vm_modi);
++}
++
++static void asm_fuseequal(ASMState *as, IRIns *ir)
++{
++ /* Fuse HREF + EQ/NE. */
++ if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
++ as->curins--;
++ asm_href(as, ir-1, (IROp)ir->o);
++ } else {
++ asm_equal(as, ir);
++ }
++}
++
++static void asm_alen(ASMState *as, IRIns *ir)
++{
++ asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
++ IRCALL_lj_tab_len_hint);
++}
++
+ /* -- Instruction dispatch ------------------------------------------------ */
+
+ /* Assemble a single instruction. */
+@@ -1609,7 +1735,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ switch ((IROp)ir->o) {
+ /* Miscellaneous ops. */
+ case IR_LOOP: asm_loop(as); break;
+- case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
++ case IR_NOP: case IR_XBAR:
++ lj_assertA(!ra_used(ir),
++ "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
++ break;
+ case IR_USE:
+ ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+ case IR_PHI: asm_phi(as, ir); break;
+@@ -1623,14 +1752,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_ABC:
+ asm_comp(as, ir);
+ break;
+- case IR_EQ: case IR_NE:
+- if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+- as->curins--;
+- asm_href(as, ir-1, (IROp)ir->o);
+- } else {
+- asm_equal(as, ir);
+- }
+- break;
++ case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
+
+ case IR_RETF: asm_retf(as, ir); break;
+
+@@ -1652,16 +1774,17 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_MUL: asm_mul(as, ir); break;
+ case IR_MOD: asm_mod(as, ir); break;
+ case IR_NEG: asm_neg(as, ir); break;
+-#if LJ_SOFTFP
++#if LJ_SOFTFP32
+ case IR_DIV: case IR_POW: case IR_ABS:
+- case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
+- lua_assert(0); /* Unused for LJ_SOFTFP. */
++ case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
++ /* Unused for LJ_SOFTFP32. */
++ lj_assertA(0, "IR %04d with unused op %d",
++ (int)(ir - as->ir) - REF_BIAS, ir->o);
+ break;
+ #else
+ case IR_DIV: asm_div(as, ir); break;
+ case IR_POW: asm_pow(as, ir); break;
+ case IR_ABS: asm_abs(as, ir); break;
+- case IR_ATAN2: asm_atan2(as, ir); break;
+ case IR_LDEXP: asm_ldexp(as, ir); break;
+ case IR_FPMATH: asm_fpmath(as, ir); break;
+ case IR_TOBIT: asm_tobit(as, ir); break;
+@@ -1681,6 +1804,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_NEWREF: asm_newref(as, ir); break;
+ case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+ case IR_FREF: asm_fref(as, ir); break;
++ case IR_TMPREF: asm_tmpref(as, ir); break;
+ case IR_STRREF: asm_strref(as, ir); break;
+ case IR_LREF: asm_lref(as, ir); break;
+
+@@ -1691,6 +1815,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_FLOAD: asm_fload(as, ir); break;
+ case IR_XLOAD: asm_xload(as, ir); break;
+ case IR_SLOAD: asm_sload(as, ir); break;
++ case IR_ALEN: asm_alen(as, ir); break;
+
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+ case IR_FSTORE: asm_fstore(as, ir); break;
+@@ -1700,7 +1825,14 @@ static void asm_ir(ASMState *as, IRIns *ir)
+ case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+ case IR_TNEW: asm_tnew(as, ir); break;
+ case IR_TDUP: asm_tdup(as, ir); break;
+- case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
++ case IR_CNEW: case IR_CNEWI:
++#if LJ_HASFFI
++ asm_cnew(as, ir);
++#else
++ lj_assertA(0, "IR %04d with unused op %d",
++ (int)(ir - as->ir) - REF_BIAS, ir->o);
++#endif
++ break;
+
+ /* Buffer operations. */
+ case IR_BUFHDR: asm_bufhdr(as, ir); break;
+@@ -1767,8 +1899,7 @@ static void asm_head_side(ASMState *as)
+
+ if (as->snapno && as->topslot > as->parent->topslot) {
+ /* Force snap #0 alloc to prevent register overwrite in stack check. */
+- as->snapno = 0;
+- asm_snap_alloc(as);
++ asm_snap_alloc(as, 0);
+ }
+ allow = asm_head_side_base(as, irp, allow);
+
+@@ -1776,8 +1907,10 @@ static void asm_head_side(ASMState *as)
+ for (i = as->stopins; i > REF_BASE; i--) {
+ IRIns *ir = IR(i);
+ RegSP rs;
+- lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
+- (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL);
++ lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
++ (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
++ "IR %04d has bad parent op %d",
++ (int)(ir - as->ir) - REF_BIAS, ir->o);
+ rs = as->parentmap[i - REF_FIRST];
+ if (ra_hasreg(ir->r)) {
+ rset_clear(allow, ir->r);
+@@ -2005,12 +2138,16 @@ static void asm_setup_regsp(ASMState *as)
+ #endif
+
+ ra_setup(as);
++#if LJ_TARGET_ARM64
++ ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
++#endif
+
+ /* Clear reg/sp for constants. */
+ for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
+ ir->prev = REGSP_INIT;
+ if (irt_is64(ir->t) && ir->o != IR_KNULL) {
+ #if LJ_GC64
++ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
+ ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
+ #else
+ /* Make life easier for backends by putting address of constant in i. */
+@@ -2026,6 +2163,7 @@ static void asm_setup_regsp(ASMState *as)
+ as->snaprename = nins;
+ as->snapref = nins;
+ as->snapno = T->nsnap;
++ as->snapalloc = 0;
+
+ as->stopins = REF_BASE;
+ as->orignins = nins;
+@@ -2035,7 +2173,7 @@ static void asm_setup_regsp(ASMState *as)
+ ir = IR(REF_FIRST);
+ if (as->parent) {
+ uint16_t *p;
+- lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir);
++ lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
+ if (lastir - ir > LJ_MAX_JSLOTS)
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+ as->stopins = (IRRef)((lastir-1) - as->ir);
+@@ -2074,6 +2212,10 @@ static void asm_setup_regsp(ASMState *as)
+ ir->prev = (uint16_t)REGSP_HINT((rload & 15));
+ rload = lj_ror(rload, 4);
+ continue;
++ case IR_TMPREF:
++ if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
++ as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */
++ break;
+ #endif
+ case IR_CALLXS: {
+ CCallInfo ci;
+@@ -2083,7 +2225,17 @@ static void asm_setup_regsp(ASMState *as)
+ as->modset |= RSET_SCRATCH;
+ continue;
+ }
+- case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
++ case IR_CALLL:
++ /* lj_vm_next needs two TValues on the stack. */
++#if LJ_TARGET_X64 && LJ_ABI_WIN
++ if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST +
4)
++ as->evenspill = SPS_FIRST + 4;
++#else
++ if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next &&
as->evenspill < 4)
++ as->evenspill = 4;
++#endif
++ /* fallthrough */
++ case IR_CALLN: case IR_CALLA: case IR_CALLS: {
+ const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+ ir->prev = asm_setup_call_slots(as, ir, ci);
+ if (inloop)
+@@ -2091,7 +2243,6 @@ static void asm_setup_regsp(ASMState *as)
+ (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
+ continue;
+ }
+-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
+ case IR_HIOP:
+ switch ((ir-1)->o) {
+ #if LJ_SOFTFP && LJ_TARGET_ARM
+@@ -2102,15 +2253,15 @@ static void asm_setup_regsp(ASMState *as)
+ }
+ break;
+ #endif
+-#if !LJ_SOFTFP && LJ_NEED_FP64
++#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI
+ case IR_CONV:
+ if (irt_isfp((ir-1)->t)) {
+ ir->prev = REGSP_HINT(RID_FPRET);
+ continue;
+ }
+- /* fallthrough */
+ #endif
+- case IR_CALLN: case IR_CALLXS:
++ /* fallthrough */
++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+ #if LJ_SOFTFP
+ case IR_MIN: case IR_MAX:
+ #endif
+@@ -2121,12 +2272,11 @@ static void asm_setup_regsp(ASMState *as)
+ break;
+ }
+ break;
+-#endif
+ #if LJ_SOFTFP
+ case IR_MIN: case IR_MAX:
+ if ((ir+1)->o != IR_HIOP) break;
+- /* fallthrough */
+ #endif
++ /* fallthrough */
+ /* C calls evict all scratch regs and return results in RID_RET. */
+ case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
+ if (REGARG_NUMGPR < 3 && as->evenspill < 3)
+@@ -2137,9 +2287,12 @@ static void asm_setup_regsp(ASMState *as)
+ if (ir->op2 != REF_NIL && as->evenspill < 4)
+ as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
+ }
++ /* fallthrough */
+ #else
++ /* fallthrough */
+ case IR_CNEW:
+ #endif
++ /* fallthrough */
+ case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
+ case IR_BUFSTR:
+ ir->prev = REGSP_HINT(RID_RET);
+@@ -2151,35 +2304,45 @@ static void asm_setup_regsp(ASMState *as)
+ as->modset = RSET_SCRATCH;
+ break;
+ #if !LJ_SOFTFP
+- case IR_ATAN2:
+-#if LJ_TARGET_X86
+- if (as->evenspill < 4) /* Leave room to call atan2(). */
+- as->evenspill = 4;
+-#endif
+ #if !LJ_TARGET_X86ORX64
+ case IR_LDEXP:
+ #endif
+ #endif
++ /* fallthrough */
+ case IR_POW:
+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
+ if (inloop)
+ as->modset |= RSET_SCRATCH;
+ #if LJ_TARGET_X86
++ if (irt_isnum(IR(ir->op2)->t)) {
++ if (as->evenspill < 4) /* Leave room to call pow(). */
++ as->evenspill = 4;
++ }
+ break;
+ #else
+ ir->prev = REGSP_HINT(RID_FPRET);
+ continue;
+ #endif
+ }
+- /* fallthrough for integer POW */
++ /* fallthrough */ /* for integer POW */
+ case IR_DIV: case IR_MOD:
+- if (!irt_isnum(ir->t)) {
++ if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) {
++ ir->prev = REGSP_HINT(RID_RET);
++ if (inloop)
++ as->modset |= (RSET_SCRATCH & RSET_GPR);
++ continue;
++ }
++ break;
++#if LJ_64 && LJ_SOFTFP
++ case IR_ADD: case IR_SUB: case IR_MUL:
++ if (irt_isnum(ir->t)) {
+ ir->prev = REGSP_HINT(RID_RET);
+ if (inloop)
+ as->modset |= (RSET_SCRATCH & RSET_GPR);
+ continue;
+ }
+ break;
++#endif
+ case IR_FPMATH:
+ #if LJ_TARGET_X86ORX64
+ if (ir->op2 <= IRFPM_TRUNC) {
+@@ -2190,9 +2353,6 @@ static void asm_setup_regsp(ASMState *as)
+ continue;
+ }
+ break;
+- } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
+- if (as->evenspill < 4) /* Leave room to call pow(). */
+- as->evenspill = 4;
+ }
+ #endif
+ if (inloop)
+@@ -2208,6 +2368,7 @@ static void asm_setup_regsp(ASMState *as)
+ case IR_BSHL: case IR_BSHR: case IR_BSAR:
+ if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
+ break;
++ /* fallthrough */
+ case IR_BROL: case IR_BROR:
+ if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
+ IR(ir->op2)->r = REGSP_HINT(RID_ECX);
+@@ -2252,7 +2413,6 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ {
+ ASMState as_;
+ ASMState *as = &as_;
+- MCode *origtop;
+
+ /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
+ {
+@@ -2267,7 +2427,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ /* Ensure an initialized instruction beyond the last one for HIOP checks. */
+ /* This also allows one RENAME to be added without reallocating curfinal. */
+ as->orignins = lj_ir_nextins(J);
+- J->cur.ir[as->orignins].o = IR_NOP;
++ lj_ir_nop(&J->cur.ir[as->orignins]);
+
+ /* Setup initial state. Copy some fields to reduce indirections. */
+ as->J = J;
+@@ -2280,7 +2440,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ as->parent = J->parent ? traceref(J, J->parent) : NULL;
+
+ /* Reserve MCode memory. */
+- as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot);
++ as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
+ as->mcp = as->mctop;
+ as->mclim = as->mcbot + MCLIM_REDZONE;
+ asm_setup_target(as);
+@@ -2338,7 +2498,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ /* Assemble a trace in linear backwards order. */
+ for (as->curins--; as->curins > as->stopins; as->curins--) {
+ IRIns *ir = IR(as->curins);
+- lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */
++ /* 64 bit types handled by SPLIT for 32 bit archs. */
++ lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
++ "IR %04d has unsplit 64 bit type",
++ (int)(ir - as->ir) - REF_BIAS);
++ asm_snap_prev(as);
+ if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags &
JIT_F_OPT_DCE))
+ continue; /* Dead-code elimination can be soooo easy. */
+ if (irt_isguard(ir->t))
+@@ -2368,10 +2532,13 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ asm_phi_fixup(as);
+
+ if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
+- lua_assert(J->curfinal->nk == T->nk);
++ lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant
growth");
+ memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
+ (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
+ T->nins = J->curfinal->nins;
++ /* Fill mcofs of any unprocessed snapshots. */
++ as->curins = REF_FIRST;
++ asm_snap_prev(as);
+ break; /* Done. */
+ }
+
+@@ -2390,13 +2557,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
+ /* Set trace entry point before fixing up tail to allow link to self. */
+ T->mcode = as->mcp;
+ T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) :
0;
+- if (!as->loopref)
++ if (as->loopref)
++ asm_loop_tail_fixup(as);
++ else
+ asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
+ T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
++ asm_snap_fixup_mcofs(as);
+ #if LJ_TARGET_MCODE_FIXUP
+ asm_mcode_fixup(T->mcode, T->szmcode);
+ #endif
+- lj_mcode_sync(T->mcode, origtop);
++ lj_mcode_sync(T->mcode, as->mctoporig);
+ }
+
+ #undef IR
+diff --git a/src/lj_asm.h b/src/lj_asm.h
+index 2819481b..624da844 100644
+--- a/src/lj_asm.h
++++ b/src/lj_asm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ #ifndef _LJ_ASM_H
+diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
+index 37bfa40f..cc608c0d 100644
+--- a/src/lj_asm_arm.h
++++ b/src/lj_asm_arm.h
+@@ -1,6 +1,6 @@
+ /*
+ ** ARM IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ */
+
+ /* -- Register allocator extensions --------------------------------------- */
+@@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow)
+ }
+ }
+ }
+- lua_assert(rset_test(RSET_GPREVEN, r));
++ lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r);
+ ra_modified(as, r);
+ ra_modified(as, r+1);
+ RA_DBGX((as, "scratchpair $r $r", r, r+1));
+@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp,
RegSet allow,
+ *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */
+ return ra_allock(as, (ofs & ~255), allow);
+ }
++ } else if (ir->o == IR_TMPREF) {
++ *ofsp = 0;
++ return RID_SP;
+ }
+ }
+ *ofsp = 0;
+@@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
+ return;
+ }
+ } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai &
0x08000000))) {
+- lua_assert(ofs == 0);
++ lj_assertA(ofs == 0, "bad usage");
+ ofs = (int32_t)sizeof(GCstr);
+ if (irref_isk(ir->op2)) {
+ ofs += IR(ir->op2)->i;
+@@ -389,9 +392,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
+ if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u;
+ if (gpr <= REGARG_LASTGPR) {
+- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
+ if (irt_isnum(ir->t)) {
+- lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */
++ lj_assertA(rset_test(as->freeset, gpr+1),
++ "reg %d not free", gpr+1); /* Ditto. */
+ emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15));
+ gpr += 2;
+ } else {
+@@ -408,7 +413,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ #endif
+ {
+ if (gpr <= REGARG_LASTGPR) {
+- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
+ if (ref) ra_leftov(as, gpr, ref);
+ gpr++;
+ } else {
+@@ -433,7 +439,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
+ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+- lua_assert(!irt_ispri(ir->t));
++ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
+ if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) {
+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
+@@ -495,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir)
+ emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
+ }
+
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irgc;
++ int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
++ if ((as->flags & JIT_F_ARMV6T2)) {
++ emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
++ } else {
++ emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
++ emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp);
++ }
++ emit_lso(as, ARMI_LDR, RID_TMP,
++ ra_allock(as, (addr & ~4095),
++ rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
++ (addr & 4095));
++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
+ /* -- Type conversions ---------------------------------------------------- */
+
+ #if !LJ_SOFTFP
+@@ -530,13 +560,17 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ #endif
+ IRRef lref = ir->op1;
+ /* 64 bit integer conversions are handled by SPLIT. */
+- lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64));
++ lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64),
++ "IR %04d has unsplit 64 bit type",
++ (int)(ir - as->ir) - REF_BIAS);
+ #if LJ_SOFTFP
+ /* FP conversions are handled by SPLIT. */
+- lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
++ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
++ "IR %04d has FP type",
++ (int)(ir - as->ir) - REF_BIAS);
+ /* Can't check for same types: SPLIT uses CONV
int.int + BXOR for sfp NEG. */
+ #else
+- lua_assert(irt_type(ir->t) != st);
++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+ if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+@@ -553,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
+@@ -572,7 +607,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV
EXT");
+ if ((as->flags & JIT_F_ARMV6)) {
+ ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
+ st == IRT_U8 ? ARMI_UXTB :
+@@ -658,35 +693,55 @@ static void asm_strto(ASMState *as, IRIns *ir)
+ /* -- Memory references --------------------------------------------------- */
+
+ /* Get pointer to TValue. */
+-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
+ {
+- IRIns *ir = IR(ref);
+- if (irt_isnum(ir->t)) {
+- if (irref_isk(ref)) {
+- /* Use the number constant itself as a TValue. */
+- ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+- } else {
++ if ((mode & IRTMPREF_IN1)) {
++ IRIns *ir = IR(ref);
++ if (irt_isnum(ir->t)) {
++ if ((mode & IRTMPREF_OUT1)) {
++#if LJ_SOFTFP
++ lj_assertA(irref_isk(ref), "unsplit FP op");
++ emit_dm(as, ARMI_MOV, dest, RID_SP);
++ emit_lso(as, ARMI_STR,
++ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
++ RID_SP, 0);
++ emit_lso(as, ARMI_STR,
++ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
++ RID_SP, 4);
++#else
++ Reg src = ra_alloc1(as, ref, RSET_FPR);
++ emit_dm(as, ARMI_MOV, dest, RID_SP);
++ emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
++#endif
++ } else if (irref_isk(ref)) {
++ /* Use the number constant itself as a TValue. */
++ ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
++ } else {
+ #if LJ_SOFTFP
+- lua_assert(0);
++ lj_assertA(0, "unsplit FP op");
+ #else
+- /* Otherwise force a spill and use the spill slot. */
+- emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
++ /* Otherwise force a spill and use the spill slot. */
++ emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+ #endif
++ }
++ } else {
++ /* Otherwise use [sp] and [sp+4] to hold the TValue.
++ ** This assumes the following call has max. 4 args.
++ */
++ Reg type;
++ emit_dm(as, ARMI_MOV, dest, RID_SP);
++ if (!irt_ispri(ir->t)) {
++ Reg src = ra_alloc1(as, ref, RSET_GPR);
++ emit_lso(as, ARMI_STR, src, RID_SP, 0);
++ }
++ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP &&
!irt_isnil((ir+1)->t))
++ type = ra_alloc1(as, ref+1, RSET_GPR);
++ else
++ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
++ emit_lso(as, ARMI_STR, type, RID_SP, 4);
+ }
+ } else {
+- /* Otherwise use [sp] and [sp+4] to hold the TValue. */
+- RegSet allow = rset_exclude(RSET_GPR, dest);
+- Reg type;
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
+- if (!irt_ispri(ir->t)) {
+- Reg src = ra_alloc1(as, ref, allow);
+- emit_lso(as, ARMI_STR, src, RID_SP, 0);
+- }
+- if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
+- type = ra_alloc1(as, ref+1, allow);
+- else
+- type = ra_allock(as, irt_toitype(ir->t), allow);
+- emit_lso(as, ARMI_STR, type, RID_SP, 4);
+ }
+ }
+
+@@ -811,16 +866,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu);
+
+ /* Load main position relative to tab->node into dest. */
+- khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
++ khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+ } else {
+ emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
+ emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
+- if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */
++ if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */
+ emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP);
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+- emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash));
++ emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid));
+ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
+ } else if (irref_isk(refkey)) {
+ emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash,
+@@ -867,7 +922,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg key = RID_NONE, type = RID_TMP, idx = node;
+ RegSet allow = rset_exclude(RSET_GPR, node);
+- lua_assert(ofs % sizeof(Node) == 0);
++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (ofs > 4095) {
+ idx = dest;
+ rset_clear(allow, dest);
+@@ -934,7 +989,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
+ static void asm_fref(ASMState *as, IRIns *ir)
+ {
+ UNUSED(as); UNUSED(ir);
+- lua_assert(!ra_used(ir));
++ lj_assertA(!ra_used(ir), "unfused FREF");
+ }
+
+ static void asm_strref(ASMState *as, IRIns *ir)
+@@ -971,39 +1026,43 @@ static void asm_strref(ASMState *as, IRIns *ir)
+
+ /* -- Loads and stores ---------------------------------------------------- */
+
+-static ARMIns asm_fxloadins(IRIns *ir)
++static ARMIns asm_fxloadins(ASMState *as, IRIns *ir)
+ {
++ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: return ARMI_LDRSB;
+ case IRT_U8: return ARMI_LDRB;
+ case IRT_I16: return ARMI_LDRSH;
+ case IRT_U16: return ARMI_LDRH;
+- case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D;
+- case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S;
++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D;
++ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */
+ default: return ARMI_LDR;
+ }
+ }
+
+-static ARMIns asm_fxstoreins(IRIns *ir)
++static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir)
+ {
++ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: case IRT_U8: return ARMI_STRB;
+ case IRT_I16: case IRT_U16: return ARMI_STRH;
+- case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D;
+- case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S;
++ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D;
++ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */
+ default: return ARMI_STR;
+ }
+ }
+
+ static void asm_fload(ASMState *as, IRIns *ir)
+ {
+- if (ir->op1 == REF_NIL) {
+- lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */
++ Reg dest = ra_dest(as, ir, RSET_GPR);
++ ARMIns ai = asm_fxloadins(as, ir);
++ Reg idx;
++ int32_t ofs;
++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
++ idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J),
RSET_GPR);
++ ofs = 0;
+ } else {
+- Reg dest = ra_dest(as, ir, RSET_GPR);
+- Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
+- ARMIns ai = asm_fxloadins(ir);
+- int32_t ofs;
++ idx = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+ ofs = asm_fuseabase(as, ir->op1);
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
+@@ -1012,11 +1071,11 @@ static void asm_fload(ASMState *as, IRIns *ir)
+ }
+ }
+ ofs = field_ofs[ir->op2];
+- if ((ai & 0x04000000))
+- emit_lso(as, ai, dest, idx, ofs);
+- else
+- emit_lsox(as, ai, dest, idx, ofs);
+ }
++ if ((ai & 0x04000000))
++ emit_lso(as, ai, dest, idx, ofs);
++ else
++ emit_lsox(as, ai, dest, idx, ofs);
+ }
+
+ static void asm_fstore(ASMState *as, IRIns *ir)
+@@ -1026,7 +1085,7 @@ static void asm_fstore(ASMState *as, IRIns *ir)
+ IRIns *irf = IR(ir->op1);
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+ int32_t ofs = field_ofs[irf->op2];
+- ARMIns ai = asm_fxstoreins(ir);
++ ARMIns ai = asm_fxstoreins(as, ir);
+ if ((ai & 0x04000000))
+ emit_lso(as, ai, src, idx, ofs);
+ else
+@@ -1038,8 +1097,8 @@ static void asm_xload(ASMState *as, IRIns *ir)
+ {
+ Reg dest = ra_dest(as, ir,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
++ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
+ }
+
+ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+@@ -1047,7 +1106,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+ if (ir->r != RID_SINK) {
+ Reg src = ra_alloc1(as, ir->op2,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
+ rset_exclude(RSET_GPR, src), ofs);
+ }
+ }
+@@ -1066,13 +1125,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ rset_clear(allow, type);
+ }
+ if (ra_used(ir)) {
+- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+- irt_isint(ir->t) || irt_isaddr(ir->t));
++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
++ irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad load type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ }
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow,
+ (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
+ if (!hiop || type == RID_NONE) {
+ rset_clear(allow, idx);
+ if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0
&&
+@@ -1133,10 +1194,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ IRType t = hiop ? IRT_NUM : irt_type(ir->t);
+ Reg dest = RID_NONE, type = RID_NONE, base;
+ RegSet allow = RSET_GPR;
+- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
+- lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
++ "bad parent SLOAD"); /* Handled by asm_head_side(). */
++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
++ "inconsistent SLOAD variant");
+ #if LJ_SOFTFP
+- lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
++ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
++ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
+ if (hiop && ra_used(ir+1)) {
+ type = ra_dest(as, ir+1, allow);
+ rset_clear(allow, type);
+@@ -1152,8 +1216,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ Reg tmp = RID_NONE;
+ if ((ir->op2 & IRSLOAD_CONVERT))
+ tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
+- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+- irt_isint(ir->t) || irt_isaddr(ir->t));
++ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
++ irt_isint(ir->t) || irt_isaddr(ir->t),
++ "bad SLOAD type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ base = ra_alloc1(as, REF_BASE, allow);
+@@ -1218,7 +1283,8 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ IRRef args[4];
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+ RegSet drop = RSET_SCRATCH;
+- lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL));
++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 !=
REF_NIL),
++ "bad CNEW/CNEWI operands");
+
+ as->gcsteps++;
+ if (ra_hasreg(ir->r))
+@@ -1230,10 +1296,10 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ /* Initialize immutable cdata object. */
+ if (ir->o == IR_CNEWI) {
+ int32_t ofs = sizeof(GCcdata);
+- lua_assert(sz == 4 || sz == 8);
++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ if (sz == 8) {
+ ofs += 4; ir++;
+- lua_assert(ir->o == IR_HIOP);
++ lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI");
+ }
+ for (;;) {
+ Reg r = ra_alloc1(as, ir->op2, allow);
+@@ -1268,8 +1334,6 @@ static void asm_cnew(ASMState *as, IRIns *ir)
+ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+ ra_releasetmp(as, ASMREF_TMP1));
+ }
+-#else
+-#define asm_cnew(as, ir) ((void)0)
+ #endif
+
+ /* -- Write barriers ------------------------------------------------------ */
+@@ -1301,7 +1365,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
+ MCLabel l_end;
+ Reg obj, val, tmp;
+ /* No need for other object barriers (yet). */
+- lua_assert(IR(ir->op1)->o == IR_UREFC);
++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+@@ -1364,8 +1428,6 @@ static void asm_callround(ASMState *as, IRIns *ir, int id)
+
+ static void asm_fpmath(ASMState *as, IRIns *ir)
+ {
+- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
+- return;
+ if (ir->op2 <= IRFPM_TRUNC)
+ asm_callround(as, ir, ir->op2);
+ else if (ir->op2 == IRFPM_SQRT)
+@@ -1412,14 +1474,29 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)
+ emit_dn(as, ai^m, dest, left);
+ }
+
+-static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
++/* Try to drop cmp r, #0. */
++static ARMIns asm_drop_cmp0(ASMState *as, ARMIns ai)
+ {
+- if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
++ if (as->flagmcp == as->mcp) {
++ uint32_t cc = (as->mcp[1] >> 28);
+ as->flagmcp = NULL;
+- as->mcp++;
+- ai |= ARMI_S;
++ if (cc <= CC_NE) {
++ as->mcp++;
++ ai |= ARMI_S;
++ } else if (cc == CC_GE) {
++ *++as->mcp ^= ((CC_GE^CC_PL) << 28);
++ ai |= ARMI_S;
++ } else if (cc == CC_LT) {
++ *++as->mcp ^= ((CC_LT^CC_MI) << 28);
++ ai |= ARMI_S;
++ } /* else: other conds don't work in general. */
+ }
+- asm_intop(as, ir, ai);
++ return ai;
++}
++
++static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
++{
++ asm_intop(as, ir, asm_drop_cmp0(as, ai));
+ }
+
+ static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
+@@ -1492,15 +1569,10 @@ static void asm_mul(ASMState *as, IRIns *ir)
+ #define asm_mulov(as, ir) asm_mul(as, ir)
+
+ #if !LJ_SOFTFP
+-#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
+-#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
++#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
+ #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
+-#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
+-#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
+ #endif
+
+-#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
+-
+ static void asm_neg(ASMState *as, IRIns *ir)
+ {
+ #if !LJ_SOFTFP
+@@ -1514,20 +1586,7 @@ static void asm_neg(ASMState *as, IRIns *ir)
+
+ static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
+ {
+- if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */
+- uint32_t cc = (as->mcp[1] >> 28);
+- as->flagmcp = NULL;
+- if (cc <= CC_NE) {
+- as->mcp++;
+- ai |= ARMI_S;
+- } else if (cc == CC_GE) {
+- *++as->mcp ^= ((CC_GE^CC_PL) << 28);
+- ai |= ARMI_S;
+- } else if (cc == CC_LT) {
+- *++as->mcp ^= ((CC_LT^CC_MI) << 28);
+- ai |= ARMI_S;
+- } /* else: other conds don't work with bit ops. */
+- }
++ ai = asm_drop_cmp0(as, ai);
+ if (ir->op2 == 0) {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+@@ -1582,7 +1641,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
+ #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
+ #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
+ #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
+-#define asm_brol(as, ir) lua_assert(0)
++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
+
+ static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
+ {
+@@ -1657,8 +1716,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
+ asm_intmin_max(as, ir, cc);
+ }
+
+-#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
+-#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
++#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL)
++#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE)
+
+ /* -- Comparisons --------------------------------------------------------- */
+
+@@ -1733,7 +1792,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
+ Reg left;
+ uint32_t m;
+ int cmpprev0 = 0;
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
++ "bad comparison data type %d", irt_type(ir->t));
+ if (asm_swapops(as, lref, rref)) {
+ Reg tmp = lref; lref = rref; rref = tmp;
+ if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
+@@ -1825,15 +1885,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir)
+ }
+ #endif
+
+-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
++/* -- Split register ops -------------------------------------------------- */
+
+-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
++/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */
+ static void asm_hiop(ASMState *as, IRIns *ir)
+ {
+-#if LJ_HASFFI || LJ_SOFTFP
+ /* HIOP is marked as a store because it needs its own DCE logic. */
+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
++#if LJ_HASFFI || LJ_SOFTFP
+ if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */
+ as->curins--; /* Always skip the loword comparison. */
+ #if LJ_SOFTFP
+@@ -1850,7 +1910,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
+ as->curins--; /* Always skip the loword min/max. */
+ if (uselo || usehi)
+- asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
++ asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
+ return;
+ #elif LJ_HASFFI
+ } else if ((ir-1)->o == IR_CONV) {
+@@ -1864,6 +1924,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ asm_xstore_(as, ir, 4);
+ return;
+ }
++#endif
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
+ switch ((ir-1)->o) {
+ #if LJ_HASFFI
+@@ -1882,6 +1943,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ asm_intneg(as, ir, ARMI_RSC);
+ asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);
+ break;
++ case IR_CNEWI:
++ /* Nothing to do here. Handled by lo op itself. */
++ break;
+ #endif
+ #if LJ_SOFTFP
+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+@@ -1889,24 +1953,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
+ if (!uselo)
+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
+ break;
++ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
++ /* Nothing to do here. Handled by lo op itself. */
++ break;
+ #endif
+- case IR_CALLN:
+- case IR_CALLS:
+- case IR_CALLXS:
++ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ break;
+-#if LJ_SOFTFP
+- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
+-#endif
+- case IR_CNEWI:
+- /* Nothing to do here. Handled by lo op itself. */
+- break;
+- default: lua_assert(0); break;
++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+ }
+-#else
+- UNUSED(as); UNUSED(ir); lua_assert(0);
+-#endif
+ }
+
+ /* -- Profiling ----------------------------------------------------------- */
+@@ -1930,7 +1986,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
+ if (irp) {
+ if (!ra_hasspill(irp->s)) {
+ pbase = irp->r;
+- lua_assert(ra_hasreg(pbase));
++ lj_assertA(ra_hasreg(pbase), "base reg lost");
+ } else if (allow) {
+ pbase = rset_pickbot(allow);
+ } else {
+@@ -1942,7 +1998,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
+ }
+ emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
+ k = emit_isk12(0, (int32_t)(8*topslot));
+- lua_assert(k);
++ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
+ emit_n(as, ARMI_CMP^k, RID_TMP);
+ emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
+ emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
+@@ -1979,7 +2035,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ #if LJ_SOFTFP
+ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
+ Reg tmp;
+- lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
++ /* LJ_SOFTFP: must be a number constant. */
++ lj_assertA(irref_isk(ref), "unsplit FP op");
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo,
+ rset_exclude(RSET_GPREVEN, RID_BASE));
+ emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
+@@ -1993,7 +2050,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ } else {
+ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
+ Reg type;
+- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
irt_isinteger(ir->t));
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
irt_isinteger(ir->t),
++ "restore of IR type %d", irt_type(ir->t));
+ if (!irt_ispri(ir->t)) {
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
+ emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
+@@ -2006,6 +2064,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ } else if ((sn & SNAP_SOFTFPNUM)) {
+ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));
+ #endif
++ } else if ((sn & SNAP_KEYINDEX)) {
++ type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd);
+ } else {
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
+ }
+@@ -2013,11 +2073,14 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
+ }
+ checkmclim(as);
+ }
+- lua_assert(map + nent == flinks);
++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+ }
+
+ /* -- GC handling --------------------------------------------------------- */
+
++/* Marker to prevent patching the GC check exit. */
++#define ARM_NOPATCH_GC_CHECK (ARMI_BIC|ARMI_K12)
++
+ /* Check GC threshold and do one or more GC steps. */
+ static void asm_gc_check(ASMState *as)
+ {
+@@ -2029,6 +2092,7 @@ static void asm_gc_check(ASMState *as)
+ l_end = emit_label(as);
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+ asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
++ *--as->mcp = ARM_NOPATCH_GC_CHECK;
+ emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ASMREF_TMP2; /* MSize steps */
+@@ -2063,6 +2127,12 @@ static void asm_loop_fixup(ASMState *as)
+ }
+ }
+
++/* Fixup the tail of the loop. */
++static void asm_loop_tail_fixup(ASMState *as)
++{
++ UNUSED(as); /* Nothing to do. */
++}
++
+ /* -- Head of trace ------------------------------------------------------- */
+
+ /* Reload L register from g->cur_L. */
+@@ -2099,7 +2169,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet
allow)
+ rset_clear(allow, ra_dest(as, ir, allow));
+ } else {
+ Reg r = irp->r;
+- lua_assert(ra_hasreg(r));
++ lj_assertA(ra_hasreg(r), "base reg lost");
+ rset_clear(allow, r);
+ if (r != ir->r && !rset_test(as->freeset, r))
+ ra_restore(as, regcost_ref(as->cost[r]));
+@@ -2121,7 +2191,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+ } else {
+ /* Patch stack adjustment. */
+ uint32_t k = emit_isk12(ARMI_ADD, spadj);
+- lua_assert(k);
++ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
+ p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+ }
+ /* Patch exit branch. */
+@@ -2197,13 +2267,14 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno,
MCode *target)
+ /* Look for bl_cc exitstub, replace with b_cc target. */
+ uint32_t ins = *p;
+ if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u
&&
+- ((ins ^ (px-p)) & 0x00ffffffu) == 0) {
++ ((ins ^ (px-p)) & 0x00ffffffu) == 0 &&
++ p[-1] != ARM_NOPATCH_GC_CHECK) {
+ *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu);
+ cend = p+1;
+ if (!cstart) cstart = p;
+ }
+ }
+- lua_assert(cstart != NULL);
++ lj_assertJ(cstart != NULL, "exit stub %d not found", exitno);
+ lj_mcode_sync(cstart, cend);
+ lj_mcode_patch(J, mcarea, 1);
+ }
+diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
+index 8fd92e76..67c53ee2 100644
+--- a/src/lj_asm_arm64.h
++++ b/src/lj_asm_arm64.h
+@@ -1,6 +1,6 @@
+ /*
+ ** ARM64 IR assembler (SSA IR -> machine code).
+-** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
++** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+ **
+ ** Contributed by Djordje Kovacevic and Stefan Pejic from
RT-RK.com.
+ ** Sponsored by Cisco Systems, Inc.
+@@ -56,11 +56,11 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+ asm_mclimit(as);
+ /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ...
*/
+ for (i = nexits-1; (int32_t)i >= 0; i--)
+- *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
+- *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
++ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
++ *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
+ mxp--;
+- *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
+- *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
++ *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
++ *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
+ as->mctop = mxp;
+ }
+
+@@ -77,7 +77,7 @@ static void asm_guardcc(ASMState *as, A64CC cc)
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+- *p = A64I_B | ((target-p) & 0x03ffffffu);
++ *p = A64I_B | A64F_S26(target-p);
+ emit_cond_branch(as, cc^1, p-1);
+ return;
+ }
+@@ -91,7 +91,7 @@ static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+- *p = A64I_B | ((target-p) & 0x03ffffffu);
++ *p = A64I_B | A64F_S26(target-p);
+ emit_tnb(as, ai^0x01000000u, r, bit, p-1);
+ return;
+ }
+@@ -105,7 +105,7 @@ static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+- *p = A64I_B | ((target-p) & 0x03ffffffu);
++ *p = A64I_B | A64F_S26(target-p);
+ emit_cnb(as, ai^0x01000000u, r, p-1);
+ return;
+ }
+@@ -198,6 +198,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp,
RegSet allow,
+ return RID_GL;
+ }
+ }
++ } else if (ir->o == IR_TMPREF) {
++ *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv);
++ return RID_GL;
+ }
+ }
+ *ofsp = 0;
+@@ -213,7 +216,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref,
RegSet allow)
+ return A64F_M(ir->r);
+ } else if (irref_isk(ref)) {
+ uint32_t m;
+- int64_t k = get_k64val(ir);
++ int64_t k = get_k64val(as, ref);
+ if ((ai & 0x1f000000) == 0x0a000000)
+ m = emit_isk13(k, irt_is64(ir->t));
+ else
+@@ -295,8 +298,10 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef
ref,
+ } else if (asm_isk32(as, ir->op1, &ofs)) {
+ ref = ir->op2;
+ } else {
+- Reg rn = ra_alloc1(as, ir->op1, allow);
+- IRIns *irr = IR(ir->op2);
++ Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2;
++ Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1;
++ Reg rn = ra_alloc1(as, refv, allow);
++ IRIns *irr = IR(refk);
+ uint32_t m;
+ if (irr+1 == ir && !ra_used(irr) &&
+ irr->o == IR_ADD && irref_isk(irr->op2)) {
+@@ -307,7 +312,7 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
+ goto skipopm;
+ }
+ }
+- m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
++ m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn));
+ ofs = sizeof(GCstr);
+ skipopm:
+ emit_lso(as, ai, rd, rd, ofs);
+@@ -352,9 +357,9 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins
air)
+ static int asm_fuseandshift(ASMState *as, IRIns *ir)
+ {
+ IRIns *irl = IR(ir->op1);
+- lua_assert(ir->o == IR_BAND);
++ lj_assertA(ir->o == IR_BAND, "bad usage");
+ if (canfuse(as, irl) && irref_isk(ir->op2)) {
+- uint64_t mask = get_k64val(IR(ir->op2));
++ uint64_t mask = get_k64val(as, ir->op2);
+ if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o ==
IR_BSHL)) {
+ int32_t shmask = irt_is64(irl->t) ? 63 : 31;
+ int32_t shift = (IR(irl->op2)->i & shmask);
+@@ -382,7 +387,7 @@ static int asm_fuseandshift(ASMState *as, IRIns *ir)
+ static int asm_fuseorshift(ASMState *as, IRIns *ir)
+ {
+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+- lua_assert(ir->o == IR_BOR);
++ lj_assertA(ir->o == IR_BOR, "bad usage");
+ if (canfuse(as, irl) && canfuse(as, irr) &&
+ ((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
+ (irl->o == IR_BSHL && irr->o == IR_BSHR))) {
+@@ -426,7 +431,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ if (ref) {
+ if (irt_isfp(ir->t)) {
+ if (fpr <= REGARG_LASTFPR) {
+- lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */
++ lj_assertA(rset_test(as->freeset, fpr),
++ "reg %d not free", fpr); /* Must have been evicted. */
+ ra_leftov(as, fpr, ref);
+ fpr++;
+ } else {
+@@ -436,7 +442,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ }
+ } else {
+ if (gpr <= REGARG_LASTGPR) {
+- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
++ lj_assertA(rset_test(as->freeset, gpr),
++ "reg %d not free", gpr); /* Must have been evicted. */
+ ra_leftov(as, gpr, ref);
+ gpr++;
+ } else {
+@@ -453,11 +460,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef
*args)
+ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+ {
+ RegSet drop = RSET_SCRATCH;
++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
++ if (hiop && ra_hasreg((ir+1)->r))
++ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+- lua_assert(!irt_ispri(ir->t));
++ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (irt_isfp(ir->t)) {
+ if (ci->flags & CCI_CASTU64) {
+ Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
+@@ -466,6 +476,8 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo
*ci)
+ } else {
+ ra_destreg(as, ir, RID_FPRET);
+ }
++ } else if (hiop) {
++ ra_destpair(as, ir);
+ } else {
+ ra_destreg(as, ir, RID_RET);
+ }
+@@ -515,6 +527,21 @@ static void asm_retf(ASMState *as, IRIns *ir)
+ emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
+ }
+
++/* -- Buffer operations --------------------------------------------------- */
++
++#if LJ_HASBUFFER
++static void asm_bufhdr_write(ASMState *as, Reg sb)
++{
++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
++ IRIns irgc;
++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
++ emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP,
tmp);
++ emit_getgl(as, RID_TMP, cur_L);
++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
++}
++#endif
++
+ /* -- Type conversions ---------------------------------------------------- */
+
+ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+@@ -544,7 +571,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+ IRRef lref = ir->op1;
+- lua_assert(irt_type(ir->t) != st);
++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+ if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+@@ -564,7 +591,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
++ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
+@@ -584,7 +612,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
+ st == IRT_U8 ? A64I_UXTBw :
+ st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
+- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV
EXT");
+ emit_dn(as, ai, dest, left);
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+@@ -597,7 +625,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
+ emit_dn(as, A64I_SXTW, dest, left);
+ }
+ } else {
+- if (st64) {
++ if (st64 && !(ir->op2 & IRCONV_NONE)) {
+ /* This is either a 32 bit reg/reg mov which zeroes the hiword
+ ** or a load of the loword from a 64 bit address.
+ */
+@@ -648,7 +676,8 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef
ref)
+ {
+ RegSet allow = rset_exclude(RSET_GPR, base);
+ IRIns *ir = IR(ref);
+- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
++ "store of IR type %d", irt_type(ir->t));
+ if (irref_isk(ref)) {
+ TValue k;
+ lj_ir_kvalue(as->J->L, &k, ir);
+@@ -669,22 +698,23 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs,
IRRef ref)
+ }
+
+ /* Get pointer to TValue. */
+-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
+ {
+- IRIns *ir = IR(ref);
+- if (irt_isnum(ir->t)) {
+- if (irref_isk(ref)) {
+- /* Use the number constant itself as a TValue. */
+- ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
++ if ((mode & IRTMPREF_IN1)) {
++ IRIns *ir = IR(ref);
++ if (irt_isnum(ir->t)) {
++ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
++ /* Use the number constant itself as a TValue. */
++ ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
++ return;
++ }
++ emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0);
+ } else {
+- /* Otherwise force a spill and use the spill slot. */
+- emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
++ asm_tvstore64(as, dest, 0, ref);
+ }
+- } else {
+- /* Otherwise use g->tmptv to hold the TValue. */
+- asm_tvstore64(as, dest, 0, ref);
+- ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
+ }
++ /* g->tmptv holds the TValue(s). */
++ emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest,
RID_GL);
+ }
+
+ static void asm_aref(ASMState *as, IRIns *ir)
+@@ -722,6 +752,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ Reg dest = ra_dest(as, ir, allow);
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+ Reg key = 0, tmp = RID_TMP;
++ Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
+ IRRef refkey = ir->op2;
+ IRIns *irkey = IR(refkey);
+ int isk = irref_isk(ir->op2);
+@@ -751,6 +782,28 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ }
+ }
+
++ /* Allocate constants early. */
++ if (irt_isnum(kt)) {
++ if (!isk) {
++ tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
++ ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
++ rset_clear(allow, tisnum);
++ }
++ } else if (irt_isaddr(kt)) {
++ if (isk) {
++ int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
++ scr = ra_allock(as, kk, allow);
++ } else {
++ scr = ra_scratch(as, allow);
++ }
++ rset_clear(allow, scr);
++ } else {
++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
++ type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
++ scr = ra_scratch(as, rset_clear(allow, type));
++ rset_clear(allow, scr);
++ }
++
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
+ l_end = emit_label(as);
+ as->invmcp = NULL;
+@@ -780,9 +833,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_nm(as, A64I_CMPx, key, tmp);
+ emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+ } else {
+- Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
+- Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
+- rset_clear(allow, tisnum);
+ emit_nm(as, A64I_FCMPd, key, ftmp);
+ emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
+ emit_cond_branch(as, CC_LO, l_next);
+@@ -790,36 +840,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
+ }
+ } else if (irt_isaddr(kt)) {
+- Reg scr;
+ if (isk) {
+- int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
+- scr = ra_allock(as, kk, allow);
+ emit_nm(as, A64I_CMPx, scr, tmp);
+ emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+ } else {
+- scr = ra_scratch(as, allow);
+ emit_nm(as, A64I_CMPx, tmp, scr);
+ emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
+ }
+- rset_clear(allow, scr);
+ } else {
+- Reg type, scr;
+- lua_assert(irt_ispri(kt) && !irt_isnil(kt));
+- type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
+- scr = ra_scratch(as, rset_clear(allow, type));
+- rset_clear(allow, scr);
+- emit_nm(as, A64I_CMPw, scr, type);
++ emit_nm(as, A64I_CMPx, scr, type);
+ emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
+ }
+
+ *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
+ if (!isk && irt_isaddr(kt)) {
+- Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
++ type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
+ emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
+ rset_clear(allow, type);
+ }
+ /* Load main position relative to tab->node into dest. */
+- khash = isk ? ir_khash(irkey) : 1;
++ khash = isk ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
+ } else {
+@@ -831,9 +871,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+ emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
+ emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+ } else if (irt_isstr(kt)) {
+- /* Fetch of str->hash is cheaper than ra_allock. */
++ /* Fetch of str->sid is cheaper than ra_allock. */
+ emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+- emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash));
++ emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
+ emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+ } else { /* Must match with hash*() in lj_tab.c. */
+ emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+@@ -869,14 +909,12 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+ int bigofs = !emit_checkofs(A64I_LDRx, ofs);
+- RegSet allow = RSET_GPR;
+ Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+- Reg node = ra_alloc1(as, ir->op1, allow);
+- Reg key = ra_scratch(as, rset_clear(allow, node));
+- Reg idx = node;
++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
++ Reg key, idx = node;
++ RegSet allow = rset_exclude(RSET_GPR, node);
+ uint64_t k;
+- lua_assert(ofs % sizeof(Node) == 0);
+- rset_clear(allow, key);
++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (bigofs) {
+ idx = dest;
+ rset_clear(allow, dest);
+@@ -892,7 +930,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
+ } else {
+ k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
+ }
+- emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow));
++ key = ra_scratch(as, allow);
++ emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
+ emit_lso(as, A64I_LDRx, key, idx, kofs);
+ if (bigofs)
+ emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
+@@ -925,7 +964,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
+ static void asm_fref(ASMState *as, IRIns *ir)
+ {
+ UNUSED(as); UNUSED(ir);
+- lua_assert(!ra_used(ir));
++ lj_assertA(!ra_used(ir), "unfused FREF");
+ }
+
+ static void asm_strref(ASMState *as, IRIns *ir)
+@@ -977,7 +1016,7 @@ static void asm_fload(ASMState *as, IRIns *ir)
+ Reg idx;
+ A64Ins ai = asm_fxloadins(ir);
+ int32_t ofs;
+- if (ir->op1 == REF_NIL) {
++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = RID_GL;
+ ofs = (ir->op2 << 2) - GG_OFS(g);
+ } else {
+@@ -1008,7 +1047,7 @@ static void asm_fstore(ASMState *as, IRIns *ir)
+ static void asm_xload(ASMState *as, IRIns *ir)
+ {
+ Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
++ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+ }
+
+@@ -1026,8 +1065,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ Reg idx, tmp, type;
+ int32_t ofs = 0;
+ RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+- lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+- irt_isint(ir->t));
++ lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
++ irt_isint(ir->t),
++ "bad load type %d", irt_type(ir->t));
+ if (ra_used(ir)) {
+ Reg dest = ra_dest(as, ir, allow);
+ tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
+@@ -1043,10 +1083,12 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ }
+ type = ra_scratch(as, rset_clear(gpr, tmp));
+ idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
+ /* Always do the type check, even if the load result is unused. */
+ asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
+ if (irt_type(ir->t) >= IRT_NUM) {
+- lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
++ lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
++ "bad load type %d", irt_type(ir->t));
+ emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+ ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
+ } else if (irt_isaddr(ir->t)) {
+@@ -1056,7 +1098,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
+ emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
+ } else {
+ emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+- ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp);
++ ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp);
+ }
+ if (ofs & FUSE_REG)
+ emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs &
31));
+@@ -1111,8 +1153,10 @@ static void asm_sload(ASMState *as, IRIns *ir)
+ IRType1 t = ir->t;
+ Reg dest = RID_NONE, base;
+ RegSet allow = RSET_GPR;
+- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
+- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
++ "bad parent SLOAD"); /* Handled by asm_head_side(). */
++ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
++ "inconsistent SLOAD variant");
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) &&
irt_isint(t)) {
+ dest = ra_scratch(as, RSET_FPR);
+ asm_tointg(as, ir, dest);
+@@ -1121,7 +1165,8 @@ static