From 9438ab2df484790d8aceda36862b0f7403199f4f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 24 Aug 2008 13:37:56 +0000 Subject: improve lua opcode dispatch performance by using computed goto instead of switch/case - improves performance by about 10% in a simple loop test git-svn-id: svn://svn.openwrt.org/openwrt/trunk@12378 3c298f89-4303-0410-b956-a3cf2f4a3e73 --- package/lua/patches/300-opcode_performance.patch | 363 +++++++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 package/lua/patches/300-opcode_performance.patch (limited to 'package') diff --git a/package/lua/patches/300-opcode_performance.patch b/package/lua/patches/300-opcode_performance.patch new file mode 100644 index 0000000000..dbc89a8ed0 --- /dev/null +++ b/package/lua/patches/300-opcode_performance.patch @@ -0,0 +1,363 @@ +--- a/src/lvm.c ++++ b/src/lvm.c +@@ -31,6 +31,9 @@ + /* limit for table tag-method chains (to avoid loops) */ + #define MAXTAGLOOP 100 + ++#ifdef __GNUC__ ++#define COMPUTED_GOTO 1 ++#endif + + /* + * If 'obj' is a string, it is tried to be interpreted as a number. +@@ -562,12 +565,65 @@ + ARITH_OP1_END + #endif + ++#ifdef COMPUTED_GOTO ++#define OPCODE_TARGET(op) DO_OP_##op: ++#define CALL_OPCODE(op) \ ++ if ((op < sizeof(opcodes) / sizeof(opcodes[0])) && opcodes[op]) \ ++ goto *opcodes[op]; ++#define OPCODE_PTR(op) [OP_##op] = &&DO_OP_##op ++#else ++#define OPCODE_TARGET(op) case OP_##op: ++#define CALL_OPCODE(op) switch (op) ++#endif ++ + + void luaV_execute (lua_State *L, int nexeccalls) { + LClosure *cl; + StkId base; + TValue *k; + const Instruction *pc; ++#ifdef COMPUTED_GOTO ++ static const void *opcodes[] = { ++ OPCODE_PTR(MOVE), ++ OPCODE_PTR(LOADK), ++ OPCODE_PTR(LOADBOOL), ++ OPCODE_PTR(LOADNIL), ++ OPCODE_PTR(GETUPVAL), ++ OPCODE_PTR(GETGLOBAL), ++ OPCODE_PTR(GETTABLE), ++ OPCODE_PTR(SETGLOBAL), ++ OPCODE_PTR(SETUPVAL), ++ OPCODE_PTR(SETTABLE), ++ OPCODE_PTR(NEWTABLE), ++ OPCODE_PTR(SELF), ++ OPCODE_PTR(ADD), ++ OPCODE_PTR(SUB), ++ OPCODE_PTR(MUL), ++ OPCODE_PTR(DIV), ++ OPCODE_PTR(MOD), ++ OPCODE_PTR(POW), ++ OPCODE_PTR(UNM), ++ OPCODE_PTR(NOT), ++ OPCODE_PTR(LEN), ++ OPCODE_PTR(CONCAT), ++ OPCODE_PTR(JMP), ++ OPCODE_PTR(EQ), ++ OPCODE_PTR(LT), ++ OPCODE_PTR(LE), ++ OPCODE_PTR(TEST), ++ OPCODE_PTR(TESTSET), ++ OPCODE_PTR(CALL), ++ OPCODE_PTR(TAILCALL), ++ OPCODE_PTR(RETURN), ++ OPCODE_PTR(FORLOOP), ++ OPCODE_PTR(FORPREP), ++ OPCODE_PTR(TFORLOOP), ++ OPCODE_PTR(SETLIST), ++ OPCODE_PTR(CLOSE), ++ OPCODE_PTR(CLOSURE), ++ OPCODE_PTR(VARARG) ++ }; ++#endif + reentry: /* entry point */ + lua_assert(isLua(L->ci)); + pc = L->savedpc; +@@ -593,32 +649,32 @@ + lua_assert(base <= L->top && L->top <= L->stack + L->stacksize); + lua_assert(L->top == L->ci->top || luaG_checkopenop(i)); + switch (GET_OPCODE(i)) { +- case OP_MOVE: { ++ OPCODE_TARGET(MOVE) { + setobjs2s(L, ra, RB(i)); + continue; + } +- case OP_LOADK: { ++ OPCODE_TARGET(LOADK) { + setobj2s(L, ra, KBx(i)); + continue; + } +- case OP_LOADBOOL: { ++ OPCODE_TARGET(LOADBOOL) { + setbvalue(ra, GETARG_B(i)); + if (GETARG_C(i)) pc++; /* skip next instruction (if C) */ + continue; + } +- case OP_LOADNIL: { ++ OPCODE_TARGET(LOADNIL) { + TValue *rb = RB(i); + do { + setnilvalue(rb--); + } while (rb >= ra); + continue; + } +- case OP_GETUPVAL: { ++ OPCODE_TARGET(GETUPVAL) { + int b = GETARG_B(i); + setobj2s(L, ra, cl->upvals[b]->v); + continue; + } +- case OP_GETGLOBAL: { ++ OPCODE_TARGET(GETGLOBAL) { + TValue g; + TValue *rb = KBx(i); + sethvalue(L, &g, cl->env); +@@ -626,88 +682,88 @@ + Protect(luaV_gettable(L, &g, rb, ra)); + continue; + } +- case OP_GETTABLE: { ++ OPCODE_TARGET(GETTABLE) { + Protect(luaV_gettable(L, RB(i), RKC(i), ra)); + continue; + } +- case OP_SETGLOBAL: { ++ OPCODE_TARGET(SETGLOBAL) { + TValue g; + sethvalue(L, &g, cl->env); + lua_assert(ttisstring(KBx(i))); + Protect(luaV_settable(L, &g, KBx(i), ra)); + continue; + } +- case OP_SETUPVAL: { ++ OPCODE_TARGET(SETUPVAL) { + UpVal *uv = cl->upvals[GETARG_B(i)]; + setobj(L, uv->v, ra); + luaC_barrier(L, uv, ra); + continue; + } +- case OP_SETTABLE: { ++ OPCODE_TARGET(SETTABLE) { + Protect(luaV_settable(L, ra, RKB(i), RKC(i))); + continue; + } +- case OP_NEWTABLE: { ++ OPCODE_TARGET(NEWTABLE) { + int b = GETARG_B(i); + int c = GETARG_C(i); + sethvalue(L, ra, luaH_new(L, luaO_fb2int(b), luaO_fb2int(c))); + Protect(luaC_checkGC(L)); + continue; + } +- case OP_SELF: { ++ OPCODE_TARGET(SELF) { + StkId rb = RB(i); + setobjs2s(L, ra+1, rb); + Protect(luaV_gettable(L, rb, RKC(i), ra)); + continue; + } +- case OP_ADD: { ++ OPCODE_TARGET(ADD) { + TValue *rb = RKB(i), *rc= RKC(i); + arith_op_continue( luai_numadd, try_addint, luai_vectadd ); + Protect(Arith(L, ra, rb, rc, TM_ADD)); \ + continue; + } +- case OP_SUB: { ++ OPCODE_TARGET(SUB) { + TValue *rb = RKB(i), *rc= RKC(i); + arith_op_continue( luai_numsub, try_subint, luai_vectsub ); + Protect(Arith(L, ra, rb, rc, TM_SUB)); + continue; + } +- case OP_MUL: { ++ OPCODE_TARGET(MUL) { + TValue *rb = RKB(i), *rc= RKC(i); + arith_op_continue(luai_nummul, try_mulint, luai_vectmul); + Protect(Arith(L, ra, rb, rc, TM_MUL)); + continue; + } +- case OP_DIV: { ++ OPCODE_TARGET(DIV) { + TValue *rb = RKB(i), *rc= RKC(i); + arith_op_continue(luai_numdiv, try_divint, luai_vectdiv); + Protect(Arith(L, ra, rb, rc, TM_DIV)); + continue; + } +- case OP_MOD: { ++ OPCODE_TARGET(MOD) { + TValue *rb = RKB(i), *rc= RKC(i); + arith_op_continue_scalar(luai_nummod, try_modint); /* scalars only */ + Protect(Arith(L, ra, rb, rc, TM_MOD)); + continue; + } +- case OP_POW: { ++ OPCODE_TARGET(POW) { + TValue *rb = RKB(i), *rc= RKC(i); + arith_op_continue(luai_numpow, try_powint, luai_vectpow); + Protect(Arith(L, ra, rb, rc, TM_POW)); + continue; + } +- case OP_UNM: { ++ OPCODE_TARGET(UNM) { + TValue *rb = RB(i); + arith_op1_continue(luai_numunm, try_unmint, luai_vectunm); + Protect(Arith(L, ra, rb, rb, TM_UNM)); + continue; + } +- case OP_NOT: { ++ OPCODE_TARGET(NOT) { + int res = l_isfalse(RB(i)); /* next assignment may change this value */ + setbvalue(ra, res); + continue; + } +- case OP_LEN: { ++ OPCODE_TARGET(LEN) { + const TValue *rb = RB(i); + switch (ttype(rb)) { + case LUA_TTABLE: { +@@ -727,18 +783,18 @@ + } + continue; + } +- case OP_CONCAT: { ++ OPCODE_TARGET(CONCAT) { + int b = GETARG_B(i); + int c = GETARG_C(i); + Protect(luaV_concat(L, c-b+1, c); luaC_checkGC(L)); + setobjs2s(L, RA(i), base+b); + continue; + } +- case OP_JMP: { ++ OPCODE_TARGET(JMP) { + dojump(L, pc, GETARG_sBx(i)); + continue; + } +- case OP_EQ: { ++ OPCODE_TARGET(EQ) { + TValue *rb = RKB(i); + TValue *rc = RKC(i); + Protect( +@@ -748,7 +804,7 @@ + pc++; + continue; + } +- case OP_LT: { ++ OPCODE_TARGET(LT) { + Protect( + if (luaV_lessthan(L, RKB(i), RKC(i)) == GETARG_A(i)) + dojump(L, pc, GETARG_sBx(*pc)); +@@ -756,7 +812,7 @@ + pc++; + continue; + } +- case OP_LE: { ++ OPCODE_TARGET(LE) { + Protect( + if (lessequal(L, RKB(i), RKC(i)) == GETARG_A(i)) + dojump(L, pc, GETARG_sBx(*pc)); +@@ -764,13 +820,13 @@ + pc++; + continue; + } +- case OP_TEST: { ++ OPCODE_TARGET(TEST) { + if (l_isfalse(ra) != GETARG_C(i)) + dojump(L, pc, GETARG_sBx(*pc)); + pc++; + continue; + } +- case OP_TESTSET: { ++ OPCODE_TARGET(TESTSET) { + TValue *rb = RB(i); + if (l_isfalse(rb) != GETARG_C(i)) { + setobjs2s(L, ra, rb); +@@ -779,7 +835,7 @@ + pc++; + continue; + } +- case OP_CALL: { ++ OPCODE_TARGET(CALL) { + int b = GETARG_B(i); + int nresults = GETARG_C(i) - 1; + if (b != 0) L->top = ra+b; /* else previous instruction set top */ +@@ -800,7 +856,7 @@ + } + } + } +- case OP_TAILCALL: { ++ OPCODE_TARGET(TAILCALL) { + int b = GETARG_B(i); + if (b != 0) L->top = ra+b; /* else previous instruction set top */ + L->savedpc = pc; +@@ -832,7 +888,7 @@ + } + } + } +- case OP_RETURN: { ++ OPCODE_TARGET(RETURN) { + int b = GETARG_B(i); + if (b != 0) L->top = ra+b-1; + if (L->openupval) luaF_close(L, base); +@@ -847,7 +903,7 @@ + goto reentry; + } + } +- case OP_FORLOOP: { ++ OPCODE_TARGET(FORLOOP) { + /* If start,step and limit are all integers, we don't need to check + * against overflow in the looping. + */ +@@ -875,7 +931,7 @@ + } + continue; + } +- case OP_FORPREP: { ++ OPCODE_TARGET(FORPREP) { + const TValue *init = ra; + const TValue *plimit = ra+1; + const TValue *pstep = ra+2; +@@ -898,7 +954,7 @@ + dojump(L, pc, GETARG_sBx(i)); + continue; + } +- case OP_TFORLOOP: { ++ OPCODE_TARGET(TFORLOOP) { + StkId cb = ra + 3; /* call base */ + setobjs2s(L, cb+2, ra+2); + setobjs2s(L, cb+1, ra+1); +@@ -914,7 +970,7 @@ + pc++; + continue; + } +- case OP_SETLIST: { ++ OPCODE_TARGET(SETLIST) { + int n = GETARG_B(i); + int c = GETARG_C(i); + int last; +@@ -936,11 +992,11 @@ + } + continue; + } +- case OP_CLOSE: { ++ OPCODE_TARGET(CLOSE) { + luaF_close(L, ra); + continue; + } +- case OP_CLOSURE: { ++ OPCODE_TARGET(CLOSURE) { + Proto *p; + Closure *ncl; + int nup, j; +@@ -960,7 +1016,7 @@ + Protect(luaC_checkGC(L)); + continue; + } +- case OP_VARARG: { ++ OPCODE_TARGET(VARARG) { + int b = GETARG_B(i) - 1; + int j; + CallInfo *ci = L->ci; -- cgit v1.2.3