aboutsummaryrefslogtreecommitdiffstats
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/LICENSE3
-rw-r--r--tcg/README551
-rw-r--r--tcg/TODO14
-rw-r--r--tcg/aarch64/tcg-target.c1882
-rw-r--r--tcg/aarch64/tcg-target.h108
-rw-r--r--tcg/arm/tcg-target.c2128
-rw-r--r--tcg/arm/tcg-target.h109
-rw-r--r--tcg/i386/tcg-target.c2451
-rw-r--r--tcg/i386/tcg-target.h149
-rw-r--r--tcg/ia64/tcg-target.c2445
-rw-r--r--tcg/ia64/tcg-target.h185
-rw-r--r--tcg/mips/tcg-target.c1828
-rw-r--r--tcg/mips/tcg-target.h138
-rw-r--r--tcg/optimize.c1330
-rw-r--r--tcg/ppc/tcg-target.c2722
-rw-r--r--tcg/ppc/tcg-target.h112
-rw-r--r--tcg/s390/tcg-target.c2393
-rw-r--r--tcg/s390/tcg-target.h123
-rw-r--r--tcg/sparc/tcg-target.c1650
-rw-r--r--tcg/sparc/tcg-target.h160
-rw-r--r--tcg/tcg-be-ldst.h88
-rw-r--r--tcg/tcg-be-null.h43
-rw-r--r--tcg/tcg-op.c1945
-rw-r--r--tcg/tcg-op.h991
-rw-r--r--tcg/tcg-opc.h195
-rw-r--r--tcg/tcg-runtime.h16
-rw-r--r--tcg/tcg.c2764
-rw-r--r--tcg/tcg.h1011
-rw-r--r--tcg/tci/README130
-rw-r--r--tcg/tci/tcg-target.c875
-rw-r--r--tcg/tci/tcg-target.h185
31 files changed, 28724 insertions, 0 deletions
diff --git a/tcg/LICENSE b/tcg/LICENSE
new file mode 100644
index 00000000..be817fa1
--- /dev/null
+++ b/tcg/LICENSE
@@ -0,0 +1,3 @@
+All the files in this directory and subdirectories are released under
+a BSD like license (see header in each file). No other license is
+accepted.
diff --git a/tcg/README b/tcg/README
new file mode 100644
index 00000000..a550ff17
--- /dev/null
+++ b/tcg/README
@@ -0,0 +1,551 @@
+Tiny Code Generator - Fabrice Bellard.
+
+1) Introduction
+
+TCG (Tiny Code Generator) began as a generic backend for a C
+compiler. It was simplified to be used in QEMU. It also has its roots
+in the QOP code generator written by Paul Brook.
+
+2) Definitions
+
+The TCG "target" is the architecture for which we generate the
+code. It is of course not the same as the "target" of QEMU which is
+the emulated architecture. As TCG started as a generic C backend used
+for cross compiling, it is assumed that the TCG target is different
+from the host, although it is never the case for QEMU.
+
+In this document, we use "guest" to specify what architecture we are
+emulating; "target" always means the TCG target, the machine on which
+we are running QEMU.
+
+A TCG "function" corresponds to a QEMU Translated Block (TB).
+
+A TCG "temporary" is a variable only live in a basic
+block. Temporaries are allocated explicitly in each function.
+
+A TCG "local temporary" is a variable only live in a function. Local
+temporaries are allocated explicitly in each function.
+
+A TCG "global" is a variable which is live in all the functions
+(equivalent of a C global variable). They are defined before the
+functions defined. A TCG global can be a memory location (e.g. a QEMU
+CPU register), a fixed host register (e.g. the QEMU CPU state pointer)
+or a memory location which is stored in a register outside QEMU TBs
+(not implemented yet).
+
+A TCG "basic block" corresponds to a list of instructions terminated
+by a branch instruction.
+
+An operation with "undefined behavior" may result in a crash.
+
+An operation with "unspecified behavior" shall not crash. However,
+the result may be one of several possibilities so may be considered
+an "undefined result".
+
+3) Intermediate representation
+
+3.1) Introduction
+
+TCG instructions operate on variables which are temporaries, local
+temporaries or globals. TCG instructions and variables are strongly
+typed. Two types are supported: 32 bit integers and 64 bit
+integers. Pointers are defined as an alias to 32 bit or 64 bit
+integers depending on the TCG target word size.
+
+Each instruction has a fixed number of output variable operands, input
+variable operands and always constant operands.
+
+The notable exception is the call instruction which has a variable
+number of outputs and inputs.
+
+In the textual form, output operands usually come first, followed by
+input operands, followed by constant operands. The output type is
+included in the instruction name. Constants are prefixed with a '$'.
+
+add_i32 t0, t1, t2 (t0 <- t1 + t2)
+
+3.2) Assumptions
+
+* Basic blocks
+
+- Basic blocks end after branches (e.g. brcond_i32 instruction),
+ goto_tb and exit_tb instructions.
+- Basic blocks start after the end of a previous basic block, or at a
+ set_label instruction.
+
+After the end of a basic block, the content of temporaries is
+destroyed, but local temporaries and globals are preserved.
+
+* Floating point types are not supported yet
+
+* Pointers: depending on the TCG target, pointer size is 32 bit or 64
+ bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or
+ TCG_TYPE_I64.
+
+* Helpers:
+
+Using the tcg_gen_helper_x_y it is possible to call any function
+taking i32, i64 or pointer types. By default, before calling a helper,
+all globals are stored at their canonical location and it is assumed
+that the function can modify them. By default, the helper is allowed to
+modify the CPU state or raise an exception.
+
+This can be overridden using the following function modifiers:
+- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals,
+ either directly or via an exception. They will not be saved to their
+ canonical locations before calling the helper.
+- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals.
+ They will only be saved to their canonical location before calling helpers,
+ but they won't be reloaded afterwise.
+- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if
+ the return value is not used.
+
+Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS.
+
+On some TCG targets (e.g. x86), several calling conventions are
+supported.
+
+* Branches:
+
+Use the instruction 'br' to jump to a label.
+
+3.3) Code Optimizations
+
+When generating instructions, you can count on at least the following
+optimizations:
+
+- Single instructions are simplified, e.g.
+
+ and_i32 t0, t0, $0xffffffff
+
+ is suppressed.
+
+- A liveness analysis is done at the basic block level. The
+ information is used to suppress moves from a dead variable to
+ another one. It is also used to remove instructions which compute
+ dead results. The later is especially useful for condition code
+ optimization in QEMU.
+
+ In the following example:
+
+ add_i32 t0, t1, t2
+ add_i32 t0, t0, $1
+ mov_i32 t0, $1
+
+ only the last instruction is kept.
+
+3.4) Instruction Reference
+
+********* Function call
+
+* call <ret> <params> ptr
+
+call function 'ptr' (pointer type)
+
+<ret> optional 32 bit or 64 bit return value
+<params> optional 32 bit or 64 bit parameters
+
+********* Jumps/Labels
+
+* set_label $label
+
+Define label 'label' at the current program point.
+
+* br $label
+
+Jump to label.
+
+* brcond_i32/i64 t0, t1, cond, label
+
+Conditional jump if t0 cond t1 is true. cond can be:
+ TCG_COND_EQ
+ TCG_COND_NE
+ TCG_COND_LT /* signed */
+ TCG_COND_GE /* signed */
+ TCG_COND_LE /* signed */
+ TCG_COND_GT /* signed */
+ TCG_COND_LTU /* unsigned */
+ TCG_COND_GEU /* unsigned */
+ TCG_COND_LEU /* unsigned */
+ TCG_COND_GTU /* unsigned */
+
+********* Arithmetic
+
+* add_i32/i64 t0, t1, t2
+
+t0=t1+t2
+
+* sub_i32/i64 t0, t1, t2
+
+t0=t1-t2
+
+* neg_i32/i64 t0, t1
+
+t0=-t1 (two's complement)
+
+* mul_i32/i64 t0, t1, t2
+
+t0=t1*t2
+
+* div_i32/i64 t0, t1, t2
+
+t0=t1/t2 (signed). Undefined behavior if division by zero or overflow.
+
+* divu_i32/i64 t0, t1, t2
+
+t0=t1/t2 (unsigned). Undefined behavior if division by zero.
+
+* rem_i32/i64 t0, t1, t2
+
+t0=t1%t2 (signed). Undefined behavior if division by zero or overflow.
+
+* remu_i32/i64 t0, t1, t2
+
+t0=t1%t2 (unsigned). Undefined behavior if division by zero.
+
+********* Logical
+
+* and_i32/i64 t0, t1, t2
+
+t0=t1&t2
+
+* or_i32/i64 t0, t1, t2
+
+t0=t1|t2
+
+* xor_i32/i64 t0, t1, t2
+
+t0=t1^t2
+
+* not_i32/i64 t0, t1
+
+t0=~t1
+
+* andc_i32/i64 t0, t1, t2
+
+t0=t1&~t2
+
+* eqv_i32/i64 t0, t1, t2
+
+t0=~(t1^t2), or equivalently, t0=t1^~t2
+
+* nand_i32/i64 t0, t1, t2
+
+t0=~(t1&t2)
+
+* nor_i32/i64 t0, t1, t2
+
+t0=~(t1|t2)
+
+* orc_i32/i64 t0, t1, t2
+
+t0=t1|~t2
+
+********* Shifts/Rotates
+
+* shl_i32/i64 t0, t1, t2
+
+t0=t1 << t2. Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* shr_i32/i64 t0, t1, t2
+
+t0=t1 >> t2 (unsigned). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* sar_i32/i64 t0, t1, t2
+
+t0=t1 >> t2 (signed). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* rotl_i32/i64 t0, t1, t2
+
+Rotation of t2 bits to the left.
+Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* rotr_i32/i64 t0, t1, t2
+
+Rotation of t2 bits to the right.
+Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+********* Misc
+
+* mov_i32/i64 t0, t1
+
+t0 = t1
+
+Move t1 to t0 (both operands must have the same type).
+
+* ext8s_i32/i64 t0, t1
+ext8u_i32/i64 t0, t1
+ext16s_i32/i64 t0, t1
+ext16u_i32/i64 t0, t1
+ext32s_i64 t0, t1
+ext32u_i64 t0, t1
+
+8, 16 or 32 bit sign/zero extension (both operands must have the same type)
+
+* bswap16_i32/i64 t0, t1
+
+16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order
+bytes are set to zero.
+
+* bswap32_i32/i64 t0, t1
+
+32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that
+the four high order bytes are set to zero.
+
+* bswap64_i64 t0, t1
+
+64 bit byte swap
+
+* discard_i32/i64 t0
+
+Indicate that the value of t0 won't be used later. It is useful to
+force dead code elimination.
+
+* deposit_i32/i64 dest, t1, t2, pos, len
+
+Deposit T2 as a bitfield into T1, placing the result in DEST.
+The bitfield is described by POS/LEN, which are immediate values:
+
+ LEN - the length of the bitfield
+ POS - the position of the first bit, counting from the LSB
+
+For example, pos=8, len=4 indicates a 4-bit field at bit 8.
+This operation would be equivalent to
+
+ dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00)
+
+* trunc_shr_i32 t0, t1, pos
+
+For 64-bit hosts only, right shift the 64-bit input T1 by POS and
+truncate to 32-bit output T0. Depending on the host, this may be
+a simple mov/shift, or may require additional canonicalization.
+
+********* Conditional moves
+
+* setcond_i32/i64 dest, t1, t2, cond
+
+dest = (t1 cond t2)
+
+Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+
+* movcond_i32/i64 dest, c1, c2, v1, v2, cond
+
+dest = (c1 cond c2 ? v1 : v2)
+
+Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2.
+
+********* Type conversions
+
+* ext_i32_i64 t0, t1
+Convert t1 (32 bit) to t0 (64 bit) and does sign extension
+
+* extu_i32_i64 t0, t1
+Convert t1 (32 bit) to t0 (64 bit) and does zero extension
+
+* trunc_i64_i32 t0, t1
+Truncate t1 (64 bit) to t0 (32 bit)
+
+* concat_i32_i64 t0, t1, t2
+Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
+from t2 (32 bit).
+
+* concat32_i64 t0, t1, t2
+Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half
+from t2 (64 bit).
+
+********* Load/Store
+
+* ld_i32/i64 t0, t1, offset
+ld8s_i32/i64 t0, t1, offset
+ld8u_i32/i64 t0, t1, offset
+ld16s_i32/i64 t0, t1, offset
+ld16u_i32/i64 t0, t1, offset
+ld32s_i64 t0, t1, offset
+ld32u_i64 t0, t1, offset
+
+t0 = read(t1 + offset)
+Load 8, 16, 32 or 64 bits with or without sign extension from host memory.
+offset must be a constant.
+
+* st_i32/i64 t0, t1, offset
+st8_i32/i64 t0, t1, offset
+st16_i32/i64 t0, t1, offset
+st32_i64 t0, t1, offset
+
+write(t0, t1 + offset)
+Write 8, 16, 32 or 64 bits to host memory.
+
+All this opcodes assume that the pointed host memory doesn't correspond
+to a global. In the latter case the behaviour is unpredictable.
+
+********* Multiword arithmetic support
+
+* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+
+Similar to add/sub, except that the double-word inputs T1 and T2 are
+formed from two single-word arguments, and the double-word output T0
+is returned in two single-word outputs.
+
+* mulu2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mul, except two unsigned inputs T1 and T2 yielding the full
+double-word product T0. The later is returned in two single-word outputs.
+
+* muls2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mulu2, except the two inputs T1 and T2 are signed.
+
+********* 64-bit guest on 32-bit host support
+
+The following opcodes are internal to TCG. Thus they are to be implemented by
+32-bit host code generators, but are not to be emitted by guest translators.
+They are emitted as needed by inline functions within "tcg-op.h".
+
+* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label
+
+Similar to brcond, except that the 64-bit values T0 and T1
+are formed from two 32-bit arguments.
+
+* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
+
+Similar to setcond, except that the 64-bit values T1 and T2 are
+formed from two 32-bit arguments. The result is a 32-bit value.
+
+********* QEMU specific operations
+
+* exit_tb t0
+
+Exit the current TB and return the value t0 (word type).
+
+* goto_tb index
+
+Exit the current TB and jump to the TB index 'index' (constant) if the
+current TB was linked to this TB. Otherwise execute the next
+instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
+at most once with each slot index per TB.
+
+* qemu_ld_i32/i64 t0, t1, flags, memidx
+* qemu_st_i32/i64 t0, t1, flags, memidx
+
+Load data at the guest address t1 into t0, or store data in t0 at guest
+address t1. The _i32/_i64 size applies to the size of the input/output
+register t0 only. The address t1 is always sized according to the guest,
+and the width of the memory operation is controlled by flags.
+
+Both t0 and t1 may be split into little-endian ordered pairs of registers
+if dealing with 64-bit quantities on a 32-bit host.
+
+The memidx selects the qemu tlb index to use (e.g. user or kernel access).
+The flags are the TCGMemOp bits, selecting the sign, width, and endianness
+of the memory access.
+
+For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
+64-bit memory access specified in flags.
+
+*********
+
+Note 1: Some shortcuts are defined when the last operand is known to be
+a constant (e.g. addi for add, movi for mov).
+
+Note 2: When using TCG, the opcodes must never be generated directly
+as some of them may not be available as "real" opcodes. Always use the
+function tcg_gen_xxx(args).
+
+4) Backend
+
+tcg-target.h contains the target specific definitions. tcg-target.c
+contains the target specific code.
+
+4.1) Assumptions
+
+The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
+64 bit. It is expected that the pointer has the same size as the word.
+
+On a 32 bit target, all 64 bit operations are converted to 32 bits. A
+few specific operations must be implemented to allow it (see add2_i32,
+sub2_i32, brcond2_i32).
+
+Floating point operations are not supported in this version. A
+previous incarnation of the code generator had full support of them,
+but it is better to concentrate on integer operations first.
+
+On a 64 bit target, no assumption is made in TCG about the storage of
+the 32 bit values in 64 bit registers.
+
+4.2) Constraints
+
+GCC like constraints are used to define the constraints of every
+instruction. Memory constraints are not supported in this
+version. Aliases are specified in the input operands as for GCC.
+
+The same register may be used for both an input and an output, even when
+they are not explicitly aliased. If an op expands to multiple target
+instructions then care must be taken to avoid clobbering input values.
+GCC style "early clobber" outputs are not currently supported.
+
+A target can define specific register or constant constraints. If an
+operation uses a constant input constraint which does not allow all
+constants, it must also accept registers in order to have a fallback.
+
+The movi_i32 and movi_i64 operations must accept any constants.
+
+The mov_i32 and mov_i64 operations must accept any registers of the
+same type.
+
+The ld/st instructions must accept signed 32 bit constant offsets. It
+can be implemented by reserving a specific register to compute the
+address if the offset is too big.
+
+The ld/st instructions must accept any destination (ld) or source (st)
+register.
+
+4.3) Function call assumptions
+
+- The only supported types for parameters and return value are: 32 and
+ 64 bit integers and pointer.
+- The stack grows downwards.
+- The first N parameters are passed in registers.
+- The next parameters are passed on the stack by storing them as words.
+- Some registers are clobbered during the call.
+- The function can return 0 or 1 value in registers. On a 32 bit
+ target, functions must be able to return 2 values in registers for
+ 64 bit return type.
+
+5) Recommended coding rules for best performance
+
+- Use globals to represent the parts of the QEMU CPU state which are
+ often modified, e.g. the integer registers and the condition
+ codes. TCG will be able to use host registers to store them.
+
+- Avoid globals stored in fixed registers. They must be used only to
+ store the pointer to the CPU state and possibly to store a pointer
+ to a register window.
+
+- Use temporaries. Use local temporaries only when really needed,
+ e.g. when you need to use a value after a jump. Local temporaries
+ introduce a performance hit in the current TCG implementation: their
+ content is saved to memory at end of each basic block.
+
+- Free temporaries and local temporaries when they are no longer used
+ (tcg_temp_free). Since tcg_const_x() also creates a temporary, you
+ should free it after it is used. Freeing temporaries does not yield
+ a better generated code, but it reduces the memory usage of TCG and
+ the speed of the translation.
+
+- Don't hesitate to use helpers for complicated or seldom used guest
+ instructions. There is little performance advantage in using TCG to
+ implement guest instructions taking more than about twenty TCG
+ instructions. Note that this rule of thumb is more applicable to
+ helpers doing complex logic or arithmetic, where the C compiler has
+ scope to do a good job of optimisation; it is less relevant where
+ the instruction is mostly doing loads and stores, and in those cases
+ inline TCG may still be faster for longer sequences.
+
+- The hard limit on the number of TCG instructions you can generate
+ per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h --
+ you cannot exceed this without risking a buffer overrun.
+
+- Use the 'discard' instruction if you know that TCG won't be able to
+ prove that a given global is "dead" at a given program point. The
+ x86 guest uses it to improve the condition codes optimisation.
diff --git a/tcg/TODO b/tcg/TODO
new file mode 100644
index 00000000..07478477
--- /dev/null
+++ b/tcg/TODO
@@ -0,0 +1,14 @@
+- Add new instructions such as: clz, ctz, popcnt.
+
+- See if it is worth exporting mul2, mulu2, div2, divu2.
+
+- Support of globals saved in fixed registers between TBs.
+
+Ideas:
+
+- Move the slow part of the qemu_ld/st ops after the end of the TB.
+
+- Change exception syntax to get closer to QOP system (exception
+ parameters given with a specific instruction).
+
+- Add float and vector support.
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
new file mode 100644
index 00000000..bad8b1de
--- /dev/null
+++ b/tcg/aarch64/tcg-target.c
@@ -0,0 +1,1882 @@
+/*
+ * Initial TCG Implementation for aarch64
+ *
+ * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
+ * Written by Claudio Fontana
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ */
+
+#include "tcg-be-ldst.h"
+#include "qemu/bitops.h"
+
+/* We're going to re-use TCGType in setting of the SF bit, which controls
+ the size of the operation performed. If we know the values match, it
+ makes things much cleaner. */
+QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
+ "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
+ "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
+ "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
+};
+#endif /* NDEBUG */
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
+ TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
+ TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
+
+ TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
+ TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
+ TCG_REG_X16, TCG_REG_X17,
+
+ TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
+ TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
+
+ /* X18 reserved by system */
+ /* X19 reserved for AREG0 */
+ /* X29 reserved as fp */
+ /* X30 reserved as temporary */
+};
+
+static const int tcg_target_call_iarg_regs[8] = {
+ TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
+ TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
+};
+static const int tcg_target_call_oarg_regs[1] = {
+ TCG_REG_X0
+};
+
+#define TCG_REG_TMP TCG_REG_X30
+
+#ifndef CONFIG_SOFTMMU
+/* Note that XZR cannot be encoded in the address base register slot,
+ as that actaully encodes SP. So if we need to zero-extend the guest
+ address, via the address index register slot, we need to load even
+ a zero guest base into a register. */
+#define USE_GUEST_BASE (GUEST_BASE != 0 || TARGET_LONG_BITS == 32)
+
+# ifdef CONFIG_USE_GUEST_BASE
+# define TCG_REG_GUEST_BASE TCG_REG_X28
+# else
+# define TCG_REG_GUEST_BASE TCG_REG_XZR
+# endif
+#endif
+
+static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+{
+ ptrdiff_t offset = target - code_ptr;
+ assert(offset == sextract64(offset, 0, 26));
+ /* read instruction, mask away previous PC_REL26 parameter contents,
+ set the proper offset, then write back the instruction. */
+ *code_ptr = deposit32(*code_ptr, 0, 26, offset);
+}
+
+static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+{
+ ptrdiff_t offset = target - code_ptr;
+ assert(offset == sextract64(offset, 0, 19));
+ *code_ptr = deposit32(*code_ptr, 5, 19, offset);
+}
+
+static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ assert(addend == 0);
+ switch (type) {
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ reloc_pc26(code_ptr, (tcg_insn_unit *)value);
+ break;
+ case R_AARCH64_CONDBR19:
+ reloc_pc19(code_ptr, (tcg_insn_unit *)value);
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+#define TCG_CT_CONST_AIMM 0x100
+#define TCG_CT_CONST_LIMM 0x200
+#define TCG_CT_CONST_ZERO 0x400
+#define TCG_CT_CONST_MONE 0x800
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct,
+ const char **pct_str)
+{
+ const char *ct_str = *pct_str;
+
+ switch (ct_str[0]) {
+ case 'r':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
+ break;
+ case 'l': /* qemu_ld / qemu_st address, data_reg */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
+#ifdef CONFIG_SOFTMMU
+ /* x0 and x1 will be overwritten when reading the tlb entry,
+ and x2, and x3 for helper args, better to avoid using them. */
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
+#endif
+ break;
+ case 'A': /* Valid for arithmetic immediate (positive or negative). */
+ ct->ct |= TCG_CT_CONST_AIMM;
+ break;
+ case 'L': /* Valid for logical immediate. */
+ ct->ct |= TCG_CT_CONST_LIMM;
+ break;
+ case 'M': /* minus one */
+ ct->ct |= TCG_CT_CONST_MONE;
+ break;
+ case 'Z': /* zero */
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
+ default:
+ return -1;
+ }
+
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+static inline bool is_aimm(uint64_t val)
+{
+ return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
+}
+
+static inline bool is_limm(uint64_t val)
+{
+ /* Taking a simplified view of the logical immediates for now, ignoring
+ the replication that can happen across the field. Match bit patterns
+ of the forms
+ 0....01....1
+ 0..01..10..0
+ and their inverses. */
+
+ /* Make things easier below, by testing the form with msb clear. */
+ if ((int64_t)val < 0) {
+ val = ~val;
+ }
+ if (val == 0) {
+ return false;
+ }
+ val += val & -val;
+ return (val & (val - 1)) == 0;
+}
+
+static int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct = arg_ct->ct;
+
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+ if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_MONE) && val == -1) {
+ return 1;
+ }
+
+ return 0;
+}
+
+enum aarch64_cond_code {
+ COND_EQ = 0x0,
+ COND_NE = 0x1,
+ COND_CS = 0x2, /* Unsigned greater or equal */
+ COND_HS = COND_CS, /* ALIAS greater or equal */
+ COND_CC = 0x3, /* Unsigned less than */
+ COND_LO = COND_CC, /* ALIAS Lower */
+ COND_MI = 0x4, /* Negative */
+ COND_PL = 0x5, /* Zero or greater */
+ COND_VS = 0x6, /* Overflow */
+ COND_VC = 0x7, /* No overflow */
+ COND_HI = 0x8, /* Unsigned greater than */
+ COND_LS = 0x9, /* Unsigned less or equal */
+ COND_GE = 0xa,
+ COND_LT = 0xb,
+ COND_GT = 0xc,
+ COND_LE = 0xd,
+ COND_AL = 0xe,
+ COND_NV = 0xf, /* behaves like COND_AL here */
+};
+
+static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
+ [TCG_COND_EQ] = COND_EQ,
+ [TCG_COND_NE] = COND_NE,
+ [TCG_COND_LT] = COND_LT,
+ [TCG_COND_GE] = COND_GE,
+ [TCG_COND_LE] = COND_LE,
+ [TCG_COND_GT] = COND_GT,
+ /* unsigned */
+ [TCG_COND_LTU] = COND_LO,
+ [TCG_COND_GTU] = COND_HI,
+ [TCG_COND_GEU] = COND_HS,
+ [TCG_COND_LEU] = COND_LS,
+};
+
+typedef enum {
+ LDST_ST = 0, /* store */
+ LDST_LD = 1, /* load */
+ LDST_LD_S_X = 2, /* load and sign-extend into Xt */
+ LDST_LD_S_W = 3, /* load and sign-extend into Wt */
+} AArch64LdstType;
+
+/* We encode the format of the insn into the beginning of the name, so that
+ we can have the preprocessor help "typecheck" the insn vs the output
+ function. Arm didn't provide us with nice names for the formats, so we
+ use the section number of the architecture reference manual in which the
+ instruction group is described. */
+typedef enum {
+ /* Compare and branch (immediate). */
+ I3201_CBZ = 0x34000000,
+ I3201_CBNZ = 0x35000000,
+
+ /* Conditional branch (immediate). */
+ I3202_B_C = 0x54000000,
+
+ /* Unconditional branch (immediate). */
+ I3206_B = 0x14000000,
+ I3206_BL = 0x94000000,
+
+ /* Unconditional branch (register). */
+ I3207_BR = 0xd61f0000,
+ I3207_BLR = 0xd63f0000,
+ I3207_RET = 0xd65f0000,
+
+ /* Load/store register. Described here as 3.3.12, but the helper
+ that emits them can transform to 3.3.10 or 3.3.13. */
+ I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
+ I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
+ I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
+ I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
+
+ I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
+ I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
+ I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
+ I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
+
+ I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
+ I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
+
+ I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
+ I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
+ I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
+
+ I3312_TO_I3310 = 0x00200800,
+ I3312_TO_I3313 = 0x01000000,
+
+ /* Load/store register pair instructions. */
+ I3314_LDP = 0x28400000,
+ I3314_STP = 0x28000000,
+
+ /* Add/subtract immediate instructions. */
+ I3401_ADDI = 0x11000000,
+ I3401_ADDSI = 0x31000000,
+ I3401_SUBI = 0x51000000,
+ I3401_SUBSI = 0x71000000,
+
+ /* Bitfield instructions. */
+ I3402_BFM = 0x33000000,
+ I3402_SBFM = 0x13000000,
+ I3402_UBFM = 0x53000000,
+
+ /* Extract instruction. */
+ I3403_EXTR = 0x13800000,
+
+ /* Logical immediate instructions. */
+ I3404_ANDI = 0x12000000,
+ I3404_ORRI = 0x32000000,
+ I3404_EORI = 0x52000000,
+
+ /* Move wide immediate instructions. */
+ I3405_MOVN = 0x12800000,
+ I3405_MOVZ = 0x52800000,
+ I3405_MOVK = 0x72800000,
+
+ /* PC relative addressing instructions. */
+ I3406_ADR = 0x10000000,
+ I3406_ADRP = 0x90000000,
+
+ /* Add/subtract shifted register instructions (without a shift). */
+ I3502_ADD = 0x0b000000,
+ I3502_ADDS = 0x2b000000,
+ I3502_SUB = 0x4b000000,
+ I3502_SUBS = 0x6b000000,
+
+ /* Add/subtract shifted register instructions (with a shift). */
+ I3502S_ADD_LSL = I3502_ADD,
+
+ /* Add/subtract with carry instructions. */
+ I3503_ADC = 0x1a000000,
+ I3503_SBC = 0x5a000000,
+
+ /* Conditional select instructions. */
+ I3506_CSEL = 0x1a800000,
+ I3506_CSINC = 0x1a800400,
+
+ /* Data-processing (1 source) instructions. */
+ I3507_REV16 = 0x5ac00400,
+ I3507_REV32 = 0x5ac00800,
+ I3507_REV64 = 0x5ac00c00,
+
+ /* Data-processing (2 source) instructions. */
+ I3508_LSLV = 0x1ac02000,
+ I3508_LSRV = 0x1ac02400,
+ I3508_ASRV = 0x1ac02800,
+ I3508_RORV = 0x1ac02c00,
+ I3508_SMULH = 0x9b407c00,
+ I3508_UMULH = 0x9bc07c00,
+ I3508_UDIV = 0x1ac00800,
+ I3508_SDIV = 0x1ac00c00,
+
+ /* Data-processing (3 source) instructions. */
+ I3509_MADD = 0x1b000000,
+ I3509_MSUB = 0x1b008000,
+
+ /* Logical shifted register instructions (without a shift). */
+ I3510_AND = 0x0a000000,
+ I3510_BIC = 0x0a200000,
+ I3510_ORR = 0x2a000000,
+ I3510_ORN = 0x2a200000,
+ I3510_EOR = 0x4a000000,
+ I3510_EON = 0x4a200000,
+ I3510_ANDS = 0x6a000000,
+} AArch64Insn;
+
+static inline uint32_t tcg_in32(TCGContext *s)
+{
+ uint32_t v = *(uint32_t *)s->code_ptr;
+ return v;
+}
+
+/* Emit an opcode with "type-checking" of the format. */
+#define tcg_out_insn(S, FMT, OP, ...) \
+ glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
+
+static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rt, int imm19)
+{
+ tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
+}
+
+static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
+ TCGCond c, int imm19)
+{
+ tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
+}
+
+static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
+{
+ tcg_out32(s, insn | (imm26 & 0x03ffffff));
+}
+
+static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
+{
+ tcg_out32(s, insn | rn << 5);
+}
+
+static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
+ TCGReg r1, TCGReg r2, TCGReg rn,
+ tcg_target_long ofs, bool pre, bool w)
+{
+ insn |= 1u << 31; /* ext */
+ insn |= pre << 24;
+ insn |= w << 23;
+
+ assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
+ insn |= (ofs & (0x7f << 3)) << (15 - 3);
+
+ tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
+}
+
+static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, uint64_t aimm)
+{
+ if (aimm > 0xfff) {
+ assert((aimm & 0xfff) == 0);
+ aimm >>= 12;
+ assert(aimm <= 0xfff);
+ aimm |= 1 << 12; /* apply LSL 12 */
+ }
+ tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
+}
+
+/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
+ (Logical immediate). Both insn groups have N, IMMR and IMMS fields
+ that feed the DecodeBitMasks pseudo function. */
+static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, int n, int immr, int imms)
+{
+ tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
+ | rn << 5 | rd);
+}
+
+#define tcg_out_insn_3404 tcg_out_insn_3402
+
+static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, TCGReg rm, int imms)
+{
+ tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
+ | rn << 5 | rd);
+}
+
+/* This function is used for the Move (wide immediate) instruction group.
+ Note that SHIFT is a full shift count, not the 2 bit HW field. */
+static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, uint16_t half, unsigned shift)
+{
+ assert((shift & ~0x30) == 0);
+ tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
+}
+
+static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, int64_t disp)
+{
+ tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
+}
+
+/* This function is for both 3.5.2 (Add/Subtract shifted register), for
+ the rare occasion when we actually want to supply a shift amount. */
+static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
+ TCGType ext, TCGReg rd, TCGReg rn,
+ TCGReg rm, int imm6)
+{
+ tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
+}
+
+/* This function is for 3.5.2 (Add/subtract shifted register),
+ and 3.5.10 (Logical shifted register), for the vast majorty of cases
+ when we don't want to apply a shift. Thus it can also be used for
+ 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
+static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
+}
+
+#define tcg_out_insn_3503 tcg_out_insn_3502
+#define tcg_out_insn_3508 tcg_out_insn_3502
+#define tcg_out_insn_3510 tcg_out_insn_3502
+
+static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
+{
+ tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
+ | tcg_cond_to_aarch64[c] << 12);
+}
+
+static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn)
+{
+ tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
+}
+
+static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
+{
+ tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
+}
+
+static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, TCGReg base, TCGType ext,
+ TCGReg regoff)
+{
+ /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
+ tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
+ 0x4000 | ext << 13 | base << 5 | rd);
+}
+
+static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, TCGReg rn, intptr_t offset)
+{
+ tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
+}
+
+static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
+{
+ /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
+ tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
+}
+
+/* Register to register move using ORR (shifted register with no shift). */
+static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
+{
+ tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
+}
+
+/* Register to register move using ADDI (move to/from SP). */
+static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
+{
+ tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
+}
+
+/* This function is used for the Logical (immediate) instruction group.
+ The value of LIMM must satisfy IS_LIMM. See the comment above about
+ only supporting simplified logical immediates. */
+static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, uint64_t limm)
+{
+ unsigned h, l, r, c;
+
+ assert(is_limm(limm));
+
+ h = clz64(limm);
+ l = ctz64(limm);
+ if (l == 0) {
+ r = 0; /* form 0....01....1 */
+ c = ctz64(~limm) - 1;
+ if (h == 0) {
+ r = clz64(~limm); /* form 1..10..01..1 */
+ c += r;
+ }
+ } else {
+ r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
+ c = r - h - 1;
+ }
+ if (ext == TCG_TYPE_I32) {
+ r &= 31;
+ c &= 31;
+ }
+
+ tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
+ tcg_target_long value)
+{
+ AArch64Insn insn;
+ int i, wantinv, shift;
+ tcg_target_long svalue = value;
+ tcg_target_long ivalue = ~value;
+ tcg_target_long imask;
+
+ /* For 32-bit values, discard potential garbage in value. For 64-bit
+ values within [2**31, 2**32-1], we can create smaller sequences by
+ interpreting this as a negative 32-bit number, while ensuring that
+ the high 32 bits are cleared by setting SF=0. */
+ if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
+ svalue = (int32_t)value;
+ value = (uint32_t)value;
+ ivalue = (uint32_t)ivalue;
+ type = TCG_TYPE_I32;
+ }
+
+ /* Speed things up by handling the common case of small positive
+ and negative values specially. */
+ if ((value & ~0xffffull) == 0) {
+ tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
+ return;
+ } else if ((ivalue & ~0xffffull) == 0) {
+ tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
+ return;
+ }
+
+ /* Check for bitfield immediates. For the benefit of 32-bit quantities,
+ use the sign-extended value. That lets us match rotated values such
+ as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
+ if (is_limm(svalue)) {
+ tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
+ return;
+ }
+
+ /* Look for host pointer values within 4G of the PC. This happens
+ often when loading pointers to QEMU's own data structures. */
+ if (type == TCG_TYPE_I64) {
+ tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
+ if (disp == sextract64(disp, 0, 21)) {
+ tcg_out_insn(s, 3406, ADRP, rd, disp);
+ if (value & 0xfff) {
+ tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
+ }
+ return;
+ }
+ }
+
+ /* Would it take fewer insns to begin with MOVN? For the value and its
+ inverse, count the number of 16-bit lanes that are 0. */
+ for (i = wantinv = imask = 0; i < 64; i += 16) {
+ tcg_target_long mask = 0xffffull << i;
+ if ((value & mask) == 0) {
+ wantinv -= 1;
+ }
+ if ((ivalue & mask) == 0) {
+ wantinv += 1;
+ imask |= mask;
+ }
+ }
+
+ /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
+ insn = I3405_MOVZ;
+ if (wantinv > 0) {
+ value = ivalue;
+ insn = I3405_MOVN;
+ }
+
+ /* Find the lowest lane that is not 0x0000. */
+ shift = ctz64(value) & (63 & -16);
+ tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
+
+ if (wantinv > 0) {
+ /* Re-invert the value, so MOVK sees non-inverted bits. */
+ value = ~value;
+ /* Clear out all the 0xffff lanes. */
+ value ^= imask;
+ }
+ /* Clear out the lane that we just set. */
+ value &= ~(0xffffUL << shift);
+
+ /* Iterate until all lanes have been set, and thus cleared from VALUE. */
+ while (value) {
+ shift = ctz64(value) & (63 & -16);
+ tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
+ value &= ~(0xffffUL << shift);
+ }
+}
+
+/* Define something more legible for general use. */
+#define tcg_out_ldst_r tcg_out_insn_3310
+
+static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, TCGReg rn, intptr_t offset)
+{
+ TCGMemOp size = (uint32_t)insn >> 30;
+
+ /* If the offset is naturally aligned and in range, then we can
+ use the scaled uimm12 encoding */
+ if (offset >= 0 && !(offset & ((1 << size) - 1))) {
+ uintptr_t scaled_uimm = offset >> size;
+ if (scaled_uimm <= 0xfff) {
+ tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
+ return;
+ }
+ }
+
+ /* Small signed offsets can use the unscaled encoding. */
+ if (offset >= -256 && offset < 256) {
+ tcg_out_insn_3312(s, insn, rd, rn, offset);
+ return;
+ }
+
+ /* Worst-case scenario, move offset to temp register, use reg offset. */
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
+ tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
+}
+
+static inline void tcg_out_mov(TCGContext *s,
+ TCGType type, TCGReg ret, TCGReg arg)
+{
+ if (ret != arg) {
+ tcg_out_movr(s, type, ret, arg);
+ }
+}
+
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
+ arg, arg1, arg2);
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
+ arg, arg1, arg2);
+}
+
+static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int a, unsigned int b)
+{
+ tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
+}
+
+static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int a, unsigned int b)
+{
+ tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
+}
+
+static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int a, unsigned int b)
+{
+ tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
+}
+
+static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, TCGReg rm, unsigned int a)
+{
+ tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
+}
+
+static inline void tcg_out_shl(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int bits = ext ? 64 : 32;
+ int max = bits - 1;
+ tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
+}
+
+static inline void tcg_out_shr(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int max = ext ? 63 : 31;
+ tcg_out_ubfm(s, ext, rd, rn, m & max, max);
+}
+
+static inline void tcg_out_sar(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int max = ext ? 63 : 31;
+ tcg_out_sbfm(s, ext, rd, rn, m & max, max);
+}
+
+static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int max = ext ? 63 : 31;
+ tcg_out_extr(s, ext, rd, rn, rn, m & max);
+}
+
+static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int bits = ext ? 64 : 32;
+ int max = bits - 1;
+ tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
+}
+
+static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned lsb, unsigned width)
+{
+ unsigned size = ext ? 64 : 32;
+ unsigned a = (size - lsb) & (size - 1);
+ unsigned b = width - 1;
+ tcg_out_bfm(s, ext, rd, rn, a, b);
+}
+
+static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
+ tcg_target_long b, bool const_b)
+{
+ if (const_b) {
+ /* Using CMP or CMN aliases. */
+ if (b >= 0) {
+ tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
+ } else {
+ tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
+ }
+ } else {
+ /* Using CMP alias SUBS wzr, Wn, Wm */
+ tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
+ }
+}
+
+static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
+{
+ ptrdiff_t offset = target - s->code_ptr;
+ assert(offset == sextract64(offset, 0, 26));
+ tcg_out_insn(s, 3206, B, offset);
+}
+
+static inline void tcg_out_goto_noaddr(TCGContext *s)
+{
+ /* We pay attention here to not modify the branch target by reading from
+ the buffer. This ensure that caches and memory are kept coherent during
+ retranslation. Mask away possible garbage in the high bits for the
+ first translation, while keeping the offset bits for retranslation. */
+ uint32_t old = tcg_in32(s);
+ tcg_out_insn(s, 3206, B, old);
+}
+
+static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
+{
+ /* See comments in tcg_out_goto_noaddr. */
+ uint32_t old = tcg_in32(s) >> 5;
+ tcg_out_insn(s, 3202, B_C, c, old);
+}
+
+static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
+{
+ tcg_out_insn(s, 3207, BLR, reg);
+}
+
+static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
+{
+ ptrdiff_t offset = target - s->code_ptr;
+ if (offset == sextract64(offset, 0, 26)) {
+ tcg_out_insn(s, 3206, BL, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
+ tcg_out_callr(s, TCG_REG_TMP);
+ }
+}
+
+void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
+{
+ tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
+ tcg_insn_unit *target = (tcg_insn_unit *)addr;
+
+ reloc_pc26(code_ptr, target);
+ flush_icache_range(jmp_addr, jmp_addr + 4);
+}
+
+static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
+{
+ if (!l->has_value) {
+ tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
+ tcg_out_goto_noaddr(s);
+ } else {
+ tcg_out_goto(s, l->u.value_ptr);
+ }
+}
+
+static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
+ TCGArg b, bool b_const, TCGLabel *l)
+{
+ intptr_t offset;
+ bool need_cmp;
+
+ if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
+ need_cmp = false;
+ } else {
+ need_cmp = true;
+ tcg_out_cmp(s, ext, a, b, b_const);
+ }
+
+ if (!l->has_value) {
+ tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
+ offset = tcg_in32(s) >> 5;
+ } else {
+ offset = l->u.value_ptr - s->code_ptr;
+ assert(offset == sextract64(offset, 0, 19));
+ }
+
+ if (need_cmp) {
+ tcg_out_insn(s, 3202, B_C, c, offset);
+ } else if (c == TCG_COND_EQ) {
+ tcg_out_insn(s, 3201, CBZ, ext, a, offset);
+ } else {
+ tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
+ }
+}
+
+static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+ tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
+}
+
+static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+ tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
+}
+
+static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+ tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
+}
+
+static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
+ TCGReg rd, TCGReg rn)
+{
+ /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
+ int bits = (8 << s_bits) - 1;
+ tcg_out_sbfm(s, ext, rd, rn, 0, bits);
+}
+
+static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
+ TCGReg rd, TCGReg rn)
+{
+ /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
+ int bits = (8 << s_bits) - 1;
+ tcg_out_ubfm(s, 0, rd, rn, 0, bits);
+}
+
+static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
+ TCGReg rn, int64_t aimm)
+{
+ if (aimm >= 0) {
+ tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
+ } else {
+ tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
+ }
+}
+
+static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
+ TCGReg rh, TCGReg al, TCGReg ah,
+ tcg_target_long bl, tcg_target_long bh,
+ bool const_bl, bool const_bh, bool sub)
+{
+ TCGReg orig_rl = rl;
+ AArch64Insn insn;
+
+ if (rl == ah || (!const_bh && rl == bh)) {
+ rl = TCG_REG_TMP;
+ }
+
+ if (const_bl) {
+ insn = I3401_ADDSI;
+ if ((bl < 0) ^ sub) {
+ insn = I3401_SUBSI;
+ bl = -bl;
+ }
+ tcg_out_insn_3401(s, insn, ext, rl, al, bl);
+ } else {
+ tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
+ }
+
+ insn = I3503_ADC;
+ if (const_bh) {
+ /* Note that the only two constants we support are 0 and -1, and
+ that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
+ if ((bh != 0) ^ sub) {
+ insn = I3503_SBC;
+ }
+ bh = TCG_REG_XZR;
+ } else if (sub) {
+ insn = I3503_SBC;
+ }
+ tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
+
+ tcg_out_mov(s, ext, orig_rl, rl);
+}
+
+#ifdef CONFIG_SOFTMMU
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * TCGMemOpIdx oi, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[16] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+};
+
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, TCGMemOpIdx oi,
+ * uintptr_t ra)
+ */
+static void * const qemu_st_helpers[16] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+
+static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
+{
+ ptrdiff_t offset = tcg_pcrel_diff(s, target);
+ assert(offset == sextract64(offset, 0, 21));
+ tcg_out_insn(s, 3406, ADR, rd, offset);
+}
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGMemOpIdx oi = lb->oi;
+ TCGMemOp opc = get_memop(oi);
+ TCGMemOp size = opc & MO_SIZE;
+
+ reloc_pc19(lb->label_ptr[0], s->code_ptr);
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
+ tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
+ tcg_out_adr(s, TCG_REG_X3, lb->raddr);
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+ if (opc & MO_SIGN) {
+ tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
+ } else {
+ tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
+ }
+
+ tcg_out_goto(s, lb->raddr);
+}
+
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGMemOpIdx oi = lb->oi;
+ TCGMemOp opc = get_memop(oi);
+ TCGMemOp size = opc & MO_SIZE;
+
+ reloc_pc19(lb->label_ptr[0], s->code_ptr);
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
+ tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
+ tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
+ tcg_out_adr(s, TCG_REG_X4, lb->raddr);
+ tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+ tcg_out_goto(s, lb->raddr);
+}
+
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
+ TCGType ext, TCGReg data_reg, TCGReg addr_reg,
+ tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->type = ext;
+ label->datalo_reg = data_reg;
+ label->addrlo_reg = addr_reg;
+ label->raddr = raddr;
+ label->label_ptr[0] = label_ptr;
+}
+
+/* Load and compare a TLB entry, emitting the conditional jump to the
+ slow path for the failure case, which will be patched later when finalizing
+ the slow path. Generated code returns the host addend in X1,
+ clobbers X0,X2,X3,TMP. */
+static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits,
+ tcg_insn_unit **label_ptr, int mem_index,
+ bool is_read)
+{
+ TCGReg base = TCG_AREG0;
+ int tlb_offset = is_read ?
+ offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+ : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
+
+ /* Extract the TLB index from the address into X0.
+ X0<CPU_TLB_BITS:0> =
+ addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
+ tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
+ TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
+
+ /* Store the page mask part of the address and the low s_bits into X3.
+ Later this allows checking for equality and alignment at the same time.
+ X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
+ tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
+ addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
+ /* Add any "high bits" from the tlb offset to the env address into X2,
+ to take advantage of the LSL12 form of the ADDI instruction.
+ X2 = env + (tlb_offset & 0xfff000) */
+ if (tlb_offset & 0xfff000) {
+ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
+ tlb_offset & 0xfff000);
+ base = TCG_REG_X2;
+ }
+
+ /* Merge the tlb index contribution into X2.
+ X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
+ tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
+ TCG_REG_X0, CPU_TLB_ENTRY_BITS);
+
+ /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
+ X0 = load [X2 + (tlb_offset & 0x000fff)] */
+ tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
+ TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
+
+ /* Load the tlb addend. Do that early to avoid stalling.
+ X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
+ tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
+ (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
+ (is_read ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write)));
+
+ /* Perform the address comparison. */
+ tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
+
+ /* If not equal, we jump to the slow path. */
+ *label_ptr = s->code_ptr;
+ tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
+}
+
+#endif /* CONFIG_SOFTMMU */
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
+ TCGReg data_r, TCGReg addr_r,
+ TCGType otype, TCGReg off_r)
+{
+ const TCGMemOp bswap = memop & MO_BSWAP;
+
+ switch (memop & MO_SSIZE) {
+ case MO_UB:
+ tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
+ break;
+ case MO_SB:
+ tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
+ data_r, addr_r, otype, off_r);
+ break;
+ case MO_UW:
+ tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
+ if (bswap) {
+ tcg_out_rev16(s, data_r, data_r);
+ }
+ break;
+ case MO_SW:
+ if (bswap) {
+ tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
+ tcg_out_rev16(s, data_r, data_r);
+ tcg_out_sxt(s, ext, MO_16, data_r, data_r);
+ } else {
+ tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
+ data_r, addr_r, otype, off_r);
+ }
+ break;
+ case MO_UL:
+ tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
+ if (bswap) {
+ tcg_out_rev32(s, data_r, data_r);
+ }
+ break;
+ case MO_SL:
+ if (bswap) {
+ tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
+ tcg_out_rev32(s, data_r, data_r);
+ tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
+ } else {
+ tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
+ }
+ break;
+ case MO_Q:
+ tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
+ if (bswap) {
+ tcg_out_rev64(s, data_r, data_r);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
+ TCGReg data_r, TCGReg addr_r,
+ TCGType otype, TCGReg off_r)
+{
+ const TCGMemOp bswap = memop & MO_BSWAP;
+
+ switch (memop & MO_SIZE) {
+ case MO_8:
+ tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
+ break;
+ case MO_16:
+ if (bswap && data_r != TCG_REG_XZR) {
+ tcg_out_rev16(s, TCG_REG_TMP, data_r);
+ data_r = TCG_REG_TMP;
+ }
+ tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
+ break;
+ case MO_32:
+ if (bswap && data_r != TCG_REG_XZR) {
+ tcg_out_rev32(s, TCG_REG_TMP, data_r);
+ data_r = TCG_REG_TMP;
+ }
+ tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
+ break;
+ case MO_64:
+ if (bswap && data_r != TCG_REG_XZR) {
+ tcg_out_rev64(s, TCG_REG_TMP, data_r);
+ data_r = TCG_REG_TMP;
+ }
+ tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+ TCGMemOpIdx oi, TCGType ext)
+{
+ TCGMemOp memop = get_memop(oi);
+ const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+#ifdef CONFIG_SOFTMMU
+ unsigned mem_index = get_mmuidx(oi);
+ TCGMemOp s_bits = memop & MO_SIZE;
+ tcg_insn_unit *label_ptr;
+
+ tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
+ tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
+ TCG_REG_X1, otype, addr_reg);
+ add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
+ s->code_ptr, label_ptr);
+#else /* !CONFIG_SOFTMMU */
+ if (USE_GUEST_BASE) {
+ tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
+ TCG_REG_GUEST_BASE, otype, addr_reg);
+ } else {
+ tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
+ addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
+ }
+#endif /* CONFIG_SOFTMMU */
+}
+
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+ TCGMemOpIdx oi)
+{
+ TCGMemOp memop = get_memop(oi);
+ const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+#ifdef CONFIG_SOFTMMU
+ unsigned mem_index = get_mmuidx(oi);
+ TCGMemOp s_bits = memop & MO_SIZE;
+ tcg_insn_unit *label_ptr;
+
+ tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
+ tcg_out_qemu_st_direct(s, memop, data_reg,
+ TCG_REG_X1, otype, addr_reg);
+ add_qemu_ldst_label(s, false, oi, s_bits == MO_64, data_reg, addr_reg,
+ s->code_ptr, label_ptr);
+#else /* !CONFIG_SOFTMMU */
+ if (USE_GUEST_BASE) {
+ tcg_out_qemu_st_direct(s, memop, data_reg,
+ TCG_REG_GUEST_BASE, otype, addr_reg);
+ } else {
+ tcg_out_qemu_st_direct(s, memop, data_reg,
+ addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
+ }
+#endif /* CONFIG_SOFTMMU */
+}
+
+static tcg_insn_unit *tb_ret_addr;
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ /* 99% of the time, we can signal the use of extension registers
+ by looking to see if the opcode handles 64-bit data. */
+ TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
+
+ /* Hoist the loads of the most common arguments. */
+ TCGArg a0 = args[0];
+ TCGArg a1 = args[1];
+ TCGArg a2 = args[2];
+ int c2 = const_args[2];
+
+ /* Some operands are defined with "rZ" constraint, a register or
+ the zero register. These need not actually test args[I] == 0. */
+#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
+ tcg_out_goto(s, tb_ret_addr);
+ break;
+
+ case INDEX_op_goto_tb:
+#ifndef USE_DIRECT_JUMP
+#error "USE_DIRECT_JUMP required for aarch64"
+#endif
+ assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
+ s->tb_jmp_offset[a0] = tcg_current_code_size(s);
+ /* actual branch destination will be patched by
+ aarch64_tb_set_jmp_target later, beware retranslation. */
+ tcg_out_goto_noaddr(s);
+ s->tb_next_offset[a0] = tcg_current_code_size(s);
+ break;
+
+ case INDEX_op_br:
+ tcg_out_goto_label(s, arg_label(a0));
+ break;
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
+ break;
+ case INDEX_op_ld8s_i32:
+ tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
+ break;
+ case INDEX_op_ld8s_i64:
+ tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
+ break;
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
+ break;
+ case INDEX_op_ld16s_i32:
+ tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
+ break;
+ case INDEX_op_ld16s_i64:
+ tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
+ break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
+ break;
+ case INDEX_op_ld32s_i64:
+ tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
+ break;
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
+ break;
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
+ break;
+
+ case INDEX_op_add_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_add_i64:
+ if (c2) {
+ tcg_out_addsubi(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_sub_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_sub_i64:
+ if (c2) {
+ tcg_out_addsubi(s, ext, a0, a1, -a2);
+ } else {
+ tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_neg_i64:
+ case INDEX_op_neg_i32:
+ tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
+ break;
+
+ case INDEX_op_and_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_and_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_andc_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_andc_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
+ } else {
+ tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_or_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_or_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_orc_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_orc_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
+ } else {
+ tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_xor_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_xor_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_eqv_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_eqv_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
+ } else {
+ tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_not_i64:
+ case INDEX_op_not_i32:
+ tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
+ break;
+
+ case INDEX_op_mul_i64:
+ case INDEX_op_mul_i32:
+ tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
+ break;
+
+ case INDEX_op_div_i64:
+ case INDEX_op_div_i32:
+ tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
+ break;
+ case INDEX_op_divu_i64:
+ case INDEX_op_divu_i32:
+ tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
+ break;
+
+ case INDEX_op_rem_i64:
+ case INDEX_op_rem_i32:
+ tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
+ break;
+ case INDEX_op_remu_i64:
+ case INDEX_op_remu_i32:
+ tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
+ break;
+
+ case INDEX_op_shl_i64:
+ case INDEX_op_shl_i32:
+ if (c2) {
+ tcg_out_shl(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_shr_i64:
+ case INDEX_op_shr_i32:
+ if (c2) {
+ tcg_out_shr(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_sar_i64:
+ case INDEX_op_sar_i32:
+ if (c2) {
+ tcg_out_sar(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_rotr_i64:
+ case INDEX_op_rotr_i32:
+ if (c2) {
+ tcg_out_rotr(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotl_i32:
+ if (c2) {
+ tcg_out_rotl(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
+ }
+ break;
+
+ case INDEX_op_brcond_i32:
+ a1 = (int32_t)a1;
+ /* FALLTHRU */
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
+ break;
+
+ case INDEX_op_setcond_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_setcond_i64:
+ tcg_out_cmp(s, ext, a1, a2, c2);
+ /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
+ tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
+ TCG_REG_XZR, tcg_invert_cond(args[3]));
+ break;
+
+ case INDEX_op_movcond_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_movcond_i64:
+ tcg_out_cmp(s, ext, a1, a2, c2);
+ tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, a0, a1, a2, ext);
+ break;
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, REG0(0), a1, a2);
+ break;
+
+ case INDEX_op_bswap64_i64:
+ tcg_out_rev64(s, a0, a1);
+ break;
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap32_i32:
+ tcg_out_rev32(s, a0, a1);
+ break;
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap16_i32:
+ tcg_out_rev16(s, a0, a1);
+ break;
+
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext8s_i32:
+ tcg_out_sxt(s, ext, MO_8, a0, a1);
+ break;
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext16s_i32:
+ tcg_out_sxt(s, ext, MO_16, a0, a1);
+ break;
+ case INDEX_op_ext32s_i64:
+ tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
+ break;
+ case INDEX_op_ext8u_i64:
+ case INDEX_op_ext8u_i32:
+ tcg_out_uxt(s, MO_8, a0, a1);
+ break;
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext16u_i32:
+ tcg_out_uxt(s, MO_16, a0, a1);
+ break;
+ case INDEX_op_ext32u_i64:
+ tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
+ break;
+
+ case INDEX_op_deposit_i64:
+ case INDEX_op_deposit_i32:
+ tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
+ break;
+
+ case INDEX_op_add2_i32:
+ tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
+ (int32_t)args[4], args[5], const_args[4],
+ const_args[5], false);
+ break;
+ case INDEX_op_add2_i64:
+ tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
+ args[5], const_args[4], const_args[5], false);
+ break;
+ case INDEX_op_sub2_i32:
+ tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
+ (int32_t)args[4], args[5], const_args[4],
+ const_args[5], true);
+ break;
+ case INDEX_op_sub2_i64:
+ tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
+ args[5], const_args[4], const_args[5], true);
+ break;
+
+ case INDEX_op_muluh_i64:
+ tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
+ break;
+ case INDEX_op_mulsh_i64:
+ tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_movi_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+
+#undef REG0
+}
+
+static const TCGTargetOpDef aarch64_op_defs[] = {
+ { INDEX_op_exit_tb, { } },
+ { INDEX_op_goto_tb, { } },
+ { INDEX_op_br, { } },
+
+ { INDEX_op_ld8u_i32, { "r", "r" } },
+ { INDEX_op_ld8s_i32, { "r", "r" } },
+ { INDEX_op_ld16u_i32, { "r", "r" } },
+ { INDEX_op_ld16s_i32, { "r", "r" } },
+ { INDEX_op_ld_i32, { "r", "r" } },
+ { INDEX_op_ld8u_i64, { "r", "r" } },
+ { INDEX_op_ld8s_i64, { "r", "r" } },
+ { INDEX_op_ld16u_i64, { "r", "r" } },
+ { INDEX_op_ld16s_i64, { "r", "r" } },
+ { INDEX_op_ld32u_i64, { "r", "r" } },
+ { INDEX_op_ld32s_i64, { "r", "r" } },
+ { INDEX_op_ld_i64, { "r", "r" } },
+
+ { INDEX_op_st8_i32, { "rZ", "r" } },
+ { INDEX_op_st16_i32, { "rZ", "r" } },
+ { INDEX_op_st_i32, { "rZ", "r" } },
+ { INDEX_op_st8_i64, { "rZ", "r" } },
+ { INDEX_op_st16_i64, { "rZ", "r" } },
+ { INDEX_op_st32_i64, { "rZ", "r" } },
+ { INDEX_op_st_i64, { "rZ", "r" } },
+
+ { INDEX_op_add_i32, { "r", "r", "rA" } },
+ { INDEX_op_add_i64, { "r", "r", "rA" } },
+ { INDEX_op_sub_i32, { "r", "r", "rA" } },
+ { INDEX_op_sub_i64, { "r", "r", "rA" } },
+ { INDEX_op_mul_i32, { "r", "r", "r" } },
+ { INDEX_op_mul_i64, { "r", "r", "r" } },
+ { INDEX_op_div_i32, { "r", "r", "r" } },
+ { INDEX_op_div_i64, { "r", "r", "r" } },
+ { INDEX_op_divu_i32, { "r", "r", "r" } },
+ { INDEX_op_divu_i64, { "r", "r", "r" } },
+ { INDEX_op_rem_i32, { "r", "r", "r" } },
+ { INDEX_op_rem_i64, { "r", "r", "r" } },
+ { INDEX_op_remu_i32, { "r", "r", "r" } },
+ { INDEX_op_remu_i64, { "r", "r", "r" } },
+ { INDEX_op_and_i32, { "r", "r", "rL" } },
+ { INDEX_op_and_i64, { "r", "r", "rL" } },
+ { INDEX_op_or_i32, { "r", "r", "rL" } },
+ { INDEX_op_or_i64, { "r", "r", "rL" } },
+ { INDEX_op_xor_i32, { "r", "r", "rL" } },
+ { INDEX_op_xor_i64, { "r", "r", "rL" } },
+ { INDEX_op_andc_i32, { "r", "r", "rL" } },
+ { INDEX_op_andc_i64, { "r", "r", "rL" } },
+ { INDEX_op_orc_i32, { "r", "r", "rL" } },
+ { INDEX_op_orc_i64, { "r", "r", "rL" } },
+ { INDEX_op_eqv_i32, { "r", "r", "rL" } },
+ { INDEX_op_eqv_i64, { "r", "r", "rL" } },
+
+ { INDEX_op_neg_i32, { "r", "r" } },
+ { INDEX_op_neg_i64, { "r", "r" } },
+ { INDEX_op_not_i32, { "r", "r" } },
+ { INDEX_op_not_i64, { "r", "r" } },
+
+ { INDEX_op_shl_i32, { "r", "r", "ri" } },
+ { INDEX_op_shr_i32, { "r", "r", "ri" } },
+ { INDEX_op_sar_i32, { "r", "r", "ri" } },
+ { INDEX_op_rotl_i32, { "r", "r", "ri" } },
+ { INDEX_op_rotr_i32, { "r", "r", "ri" } },
+ { INDEX_op_shl_i64, { "r", "r", "ri" } },
+ { INDEX_op_shr_i64, { "r", "r", "ri" } },
+ { INDEX_op_sar_i64, { "r", "r", "ri" } },
+ { INDEX_op_rotl_i64, { "r", "r", "ri" } },
+ { INDEX_op_rotr_i64, { "r", "r", "ri" } },
+
+ { INDEX_op_brcond_i32, { "r", "rA" } },
+ { INDEX_op_brcond_i64, { "r", "rA" } },
+ { INDEX_op_setcond_i32, { "r", "r", "rA" } },
+ { INDEX_op_setcond_i64, { "r", "r", "rA" } },
+ { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
+ { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
+
+ { INDEX_op_qemu_ld_i32, { "r", "l" } },
+ { INDEX_op_qemu_ld_i64, { "r", "l" } },
+ { INDEX_op_qemu_st_i32, { "lZ", "l" } },
+ { INDEX_op_qemu_st_i64, { "lZ", "l" } },
+
+ { INDEX_op_bswap16_i32, { "r", "r" } },
+ { INDEX_op_bswap32_i32, { "r", "r" } },
+ { INDEX_op_bswap16_i64, { "r", "r" } },
+ { INDEX_op_bswap32_i64, { "r", "r" } },
+ { INDEX_op_bswap64_i64, { "r", "r" } },
+
+ { INDEX_op_ext8s_i32, { "r", "r" } },
+ { INDEX_op_ext16s_i32, { "r", "r" } },
+ { INDEX_op_ext8u_i32, { "r", "r" } },
+ { INDEX_op_ext16u_i32, { "r", "r" } },
+
+ { INDEX_op_ext8s_i64, { "r", "r" } },
+ { INDEX_op_ext16s_i64, { "r", "r" } },
+ { INDEX_op_ext32s_i64, { "r", "r" } },
+ { INDEX_op_ext8u_i64, { "r", "r" } },
+ { INDEX_op_ext16u_i64, { "r", "r" } },
+ { INDEX_op_ext32u_i64, { "r", "r" } },
+
+ { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+ { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
+
+ { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
+ { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
+ { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
+
+ { INDEX_op_muluh_i64, { "r", "r", "r" } },
+ { INDEX_op_mulsh_i64, { "r", "r", "r" } },
+
+ { -1 },
+};
+
+static void tcg_target_init(TCGContext *s)
+{
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
+
+ tcg_regset_set32(tcg_target_call_clobber_regs, 0,
+ (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
+ (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
+ (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
+ (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
+ (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
+ (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
+ (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
+ (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
+ (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
+ (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
+
+ tcg_add_target_add_op_defs(aarch64_op_defs);
+}
+
+/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
+#define PUSH_SIZE ((30 - 19 + 1) * 8)
+
+#define FRAME_SIZE \
+ ((PUSH_SIZE \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & ~(TCG_TARGET_STACK_ALIGN - 1))
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+/* We're expecting to use a single ADDI insn. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ TCGReg r;
+
+ /* Push (FP, LR) and allocate space for all saved registers. */
+ tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
+ TCG_REG_SP, -PUSH_SIZE, 1, 1);
+
+ /* Set up frame pointer for canonical unwinding. */
+ tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
+
+ /* Store callee-preserved regs x19..x28. */
+ for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
+ int ofs = (r - TCG_REG_X19 + 2) * 8;
+ tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
+ }
+
+ /* Make stack space for TCG locals. */
+ tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
+ FRAME_SIZE - PUSH_SIZE);
+
+ /* Inform TCG about how to find TCG locals with register, offset, size. */
+ tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+#if defined(CONFIG_USE_GUEST_BASE)
+ if (USE_GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
+ }
+#endif
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
+
+ tb_ret_addr = s->code_ptr;
+
+ /* Remove TCG locals stack space. */
+ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
+ FRAME_SIZE - PUSH_SIZE);
+
+ /* Restore registers x19..x28. */
+ for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
+ int ofs = (r - TCG_REG_X19 + 2) * 8;
+ tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
+ }
+
+ /* Pop (FP, LR), restore SP to previous frame. */
+ tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
+ TCG_REG_SP, PUSH_SIZE, 0, 1);
+ tcg_out_insn(s, 3207, RET, TCG_REG_LR);
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[24];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_AARCH64
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 0x78, /* sleb128 -8 */
+ .h.cie.return_column = TCG_REG_LR,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
+ 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
+ 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
+ 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
+ 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
+ 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
+ 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
+ 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
+ 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
+ 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
+ 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
+ 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
+ }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
new file mode 100644
index 00000000..8aec04d2
--- /dev/null
+++ b/tcg/aarch64/tcg-target.h
@@ -0,0 +1,108 @@
+/*
+ * Initial TCG Implementation for aarch64
+ *
+ * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
+ * Written by Claudio Fontana
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ */
+
+#ifndef TCG_TARGET_AARCH64
+#define TCG_TARGET_AARCH64 1
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
+#undef TCG_TARGET_STACK_GROWSUP
+
+typedef enum {
+ TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
+ TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
+ TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
+ TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
+ TCG_REG_X16, TCG_REG_X17, TCG_REG_X18, TCG_REG_X19,
+ TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
+ TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
+ TCG_REG_X28, TCG_REG_X29, TCG_REG_X30,
+
+ /* X31 is either the stack pointer or zero, depending on context. */
+ TCG_REG_SP = 31,
+ TCG_REG_XZR = 31,
+
+ /* Aliases. */
+ TCG_REG_FP = TCG_REG_X29,
+ TCG_REG_LR = TCG_REG_X30,
+ TCG_AREG0 = TCG_REG_X19,
+} TCGReg;
+
+#define TCG_TARGET_NB_REGS 32
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_eqv_i32 1
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_trunc_shr_i32 0
+
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_eqv_i64 1
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 1
+#define TCG_TARGET_HAS_mulsh_i64 1
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ __builtin___clear_cache((char *)start, (char *)stop);
+}
+
+#endif /* TCG_TARGET_AARCH64 */
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
new file mode 100644
index 00000000..ae2ec7a9
--- /dev/null
+++ b/tcg/arm/tcg-target.c
@@ -0,0 +1,2128 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Andrzej Zaborowski
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "elf.h"
+#include "tcg-be-ldst.h"
+
+/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
+#ifndef __ARM_ARCH
+# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+ || defined(__ARM_ARCH_7EM__)
+# define __ARM_ARCH 7
+# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
+ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
+# define __ARM_ARCH 6
+# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
+ || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
+ || defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH 5
+# else
+# define __ARM_ARCH 4
+# endif
+#endif
+
+static int arm_arch = __ARM_ARCH;
+
+#if defined(__ARM_ARCH_5T__) \
+ || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
+# define use_armv5t_instructions 1
+#else
+# define use_armv5t_instructions use_armv6_instructions
+#endif
+
+#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6)
+#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
+
+#ifndef use_idiv_instructions
+bool use_idiv_instructions;
+#endif
+
+/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
+#ifdef CONFIG_SOFTMMU
+# define USING_SOFTMMU 1
+#else
+# define USING_SOFTMMU 0
+#endif
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "%r0",
+ "%r1",
+ "%r2",
+ "%r3",
+ "%r4",
+ "%r5",
+ "%r6",
+ "%r7",
+ "%r8",
+ "%r9",
+ "%r10",
+ "%r11",
+ "%r12",
+ "%r13",
+ "%r14",
+ "%pc",
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R13,
+ TCG_REG_R0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R12,
+ TCG_REG_R14,
+};
+
+static const int tcg_target_call_iarg_regs[4] = {
+ TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
+};
+static const int tcg_target_call_oarg_regs[2] = {
+ TCG_REG_R0, TCG_REG_R1
+};
+
+#define TCG_REG_TMP TCG_REG_R12
+
+static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
+{
+ ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
+ *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
+}
+
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ assert(type == R_ARM_PC24);
+ assert(addend == 0);
+ reloc_pc24(code_ptr, (tcg_insn_unit *)value);
+}
+
+#define TCG_CT_CONST_ARM 0x100
+#define TCG_CT_CONST_INV 0x200
+#define TCG_CT_CONST_NEG 0x400
+#define TCG_CT_CONST_ZERO 0x800
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str;
+
+ ct_str = *pct_str;
+ switch (ct_str[0]) {
+ case 'I':
+ ct->ct |= TCG_CT_CONST_ARM;
+ break;
+ case 'K':
+ ct->ct |= TCG_CT_CONST_INV;
+ break;
+ case 'N': /* The gcc constraint letter is L, already used here. */
+ ct->ct |= TCG_CT_CONST_NEG;
+ break;
+ case 'Z':
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
+
+ case 'r':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
+ break;
+
+ /* qemu_ld address */
+ case 'l':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
+#ifdef CONFIG_SOFTMMU
+ /* r0-r2,lr will be overwritten when reading the tlb entry,
+ so don't use these. */
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
+#endif
+ break;
+
+ /* qemu_st address & data */
+ case 's':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
+ /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
+ and r0-r1 doing the byte swapping, so don't use these. */
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
+#if defined(CONFIG_SOFTMMU)
+ /* Avoid clashes with registers being used for helper args */
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+#if TARGET_LONG_BITS == 64
+ /* Avoid clashes with registers being used for helper args */
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+#endif
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
+#endif
+ break;
+
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+
+ return 0;
+}
+
+static inline uint32_t rotl(uint32_t val, int n)
+{
+ return (val << n) | (val >> (32 - n));
+}
+
+/* ARM immediates for ALU instructions are made of an unsigned 8-bit
+ right-rotated by an even amount between 0 and 30. */
+static inline int encode_imm(uint32_t imm)
+{
+ int shift;
+
+ /* simple case, only lower bits */
+ if ((imm & ~0xff) == 0)
+ return 0;
+ /* then try a simple even shift */
+ shift = ctz32(imm) & ~1;
+ if (((imm >> shift) & ~0xff) == 0)
+ return 32 - shift;
+ /* now try harder with rotations */
+ if ((rotl(imm, 2) & ~0xff) == 0)
+ return 2;
+ if ((rotl(imm, 4) & ~0xff) == 0)
+ return 4;
+ if ((rotl(imm, 6) & ~0xff) == 0)
+ return 6;
+ /* imm can't be encoded */
+ return -1;
+}
+
+static inline int check_fit_imm(uint32_t imm)
+{
+ return encode_imm(imm) >= 0;
+}
+
+/* Test if a constant matches the constraint.
+ * TODO: define constraints for:
+ *
+ * ldr/str offset: between -0xfff and 0xfff
+ * ldrh/strh offset: between -0xff and 0xff
+ * mov operand2: values represented with x << (2 * y), x < 0x100
+ * add, sub, eor...: ditto
+ */
+static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct;
+ ct = arg_ct->ct;
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+#define TO_CPSR (1 << 20)
+
+typedef enum {
+ ARITH_AND = 0x0 << 21,
+ ARITH_EOR = 0x1 << 21,
+ ARITH_SUB = 0x2 << 21,
+ ARITH_RSB = 0x3 << 21,
+ ARITH_ADD = 0x4 << 21,
+ ARITH_ADC = 0x5 << 21,
+ ARITH_SBC = 0x6 << 21,
+ ARITH_RSC = 0x7 << 21,
+ ARITH_TST = 0x8 << 21 | TO_CPSR,
+ ARITH_CMP = 0xa << 21 | TO_CPSR,
+ ARITH_CMN = 0xb << 21 | TO_CPSR,
+ ARITH_ORR = 0xc << 21,
+ ARITH_MOV = 0xd << 21,
+ ARITH_BIC = 0xe << 21,
+ ARITH_MVN = 0xf << 21,
+
+ INSN_LDR_IMM = 0x04100000,
+ INSN_LDR_REG = 0x06100000,
+ INSN_STR_IMM = 0x04000000,
+ INSN_STR_REG = 0x06000000,
+
+ INSN_LDRH_IMM = 0x005000b0,
+ INSN_LDRH_REG = 0x001000b0,
+ INSN_LDRSH_IMM = 0x005000f0,
+ INSN_LDRSH_REG = 0x001000f0,
+ INSN_STRH_IMM = 0x004000b0,
+ INSN_STRH_REG = 0x000000b0,
+
+ INSN_LDRB_IMM = 0x04500000,
+ INSN_LDRB_REG = 0x06500000,
+ INSN_LDRSB_IMM = 0x005000d0,
+ INSN_LDRSB_REG = 0x001000d0,
+ INSN_STRB_IMM = 0x04400000,
+ INSN_STRB_REG = 0x06400000,
+
+ INSN_LDRD_IMM = 0x004000d0,
+ INSN_LDRD_REG = 0x000000d0,
+ INSN_STRD_IMM = 0x004000f0,
+ INSN_STRD_REG = 0x000000f0,
+} ARMInsn;
+
+#define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
+#define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
+#define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
+#define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
+#define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
+#define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
+#define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
+#define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
+
+enum arm_cond_code_e {
+ COND_EQ = 0x0,
+ COND_NE = 0x1,
+ COND_CS = 0x2, /* Unsigned greater or equal */
+ COND_CC = 0x3, /* Unsigned less than */
+ COND_MI = 0x4, /* Negative */
+ COND_PL = 0x5, /* Zero or greater */
+ COND_VS = 0x6, /* Overflow */
+ COND_VC = 0x7, /* No overflow */
+ COND_HI = 0x8, /* Unsigned greater than */
+ COND_LS = 0x9, /* Unsigned less or equal */
+ COND_GE = 0xa,
+ COND_LT = 0xb,
+ COND_GT = 0xc,
+ COND_LE = 0xd,
+ COND_AL = 0xe,
+};
+
+static const uint8_t tcg_cond_to_arm_cond[] = {
+ [TCG_COND_EQ] = COND_EQ,
+ [TCG_COND_NE] = COND_NE,
+ [TCG_COND_LT] = COND_LT,
+ [TCG_COND_GE] = COND_GE,
+ [TCG_COND_LE] = COND_LE,
+ [TCG_COND_GT] = COND_GT,
+ /* unsigned */
+ [TCG_COND_LTU] = COND_CC,
+ [TCG_COND_GEU] = COND_CS,
+ [TCG_COND_LEU] = COND_LS,
+ [TCG_COND_GTU] = COND_HI,
+};
+
+static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
+{
+ tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
+}
+
+static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
+{
+ tcg_out32(s, (cond << 28) | 0x0a000000 |
+ (((offset - 8) >> 2) & 0x00ffffff));
+}
+
+static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
+{
+ /* We pay attention here to not modify the branch target by masking
+ the corresponding bytes. This ensure that caches and memory are
+ kept coherent during retranslation. */
+ tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
+}
+
+static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
+{
+ /* We pay attention here to not modify the branch target by masking
+ the corresponding bytes. This ensure that caches and memory are
+ kept coherent during retranslation. */
+ tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
+}
+
+static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
+{
+ tcg_out32(s, (cond << 28) | 0x0b000000 |
+ (((offset - 8) >> 2) & 0x00ffffff));
+}
+
+static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
+{
+ tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
+}
+
+static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
+{
+ tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
+ (((offset - 8) >> 2) & 0x00ffffff));
+}
+
+static inline void tcg_out_dat_reg(TCGContext *s,
+ int cond, int opc, int rd, int rn, int rm, int shift)
+{
+ tcg_out32(s, (cond << 28) | (0 << 25) | opc |
+ (rn << 16) | (rd << 12) | shift | rm);
+}
+
+static inline void tcg_out_nop(TCGContext *s)
+{
+ if (use_armv7_instructions) {
+ /* Architected nop introduced in v6k. */
+ /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this
+ also Just So Happened to do nothing on pre-v6k so that we
+ don't need to conditionalize it? */
+ tcg_out32(s, 0xe320f000);
+ } else {
+ /* Prior to that the assembler uses mov r0, r0. */
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
+ }
+}
+
+static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
+{
+ /* Simple reg-reg move, optimising out the 'do nothing' case */
+ if (rd != rm) {
+ tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
+ }
+}
+
+static inline void tcg_out_dat_imm(TCGContext *s,
+ int cond, int opc, int rd, int rn, int im)
+{
+ tcg_out32(s, (cond << 28) | (1 << 25) | opc |
+ (rn << 16) | (rd << 12) | im);
+}
+
+static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
+{
+ int rot, opc, rn;
+
+ /* For armv7, make sure not to use movw+movt when mov/mvn would do.
+ Speed things up by only checking when movt would be required.
+ Prior to armv7, have one go at fully rotated immediates before
+ doing the decomposition thing below. */
+ if (!use_armv7_instructions || (arg & 0xffff0000)) {
+ rot = encode_imm(arg);
+ if (rot >= 0) {
+ tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
+ rotl(arg, rot) | (rot << 7));
+ return;
+ }
+ rot = encode_imm(~arg);
+ if (rot >= 0) {
+ tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
+ rotl(~arg, rot) | (rot << 7));
+ return;
+ }
+ }
+
+ /* Use movw + movt. */
+ if (use_armv7_instructions) {
+ /* movw */
+ tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
+ | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
+ if (arg & 0xffff0000) {
+ /* movt */
+ tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
+ | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
+ }
+ return;
+ }
+
+ /* TODO: This is very suboptimal, we can easily have a constant
+ pool somewhere after all the instructions. */
+ opc = ARITH_MOV;
+ rn = 0;
+ /* If we have lots of leading 1's, we can shorten the sequence by
+ beginning with mvn and then clearing higher bits with eor. */
+ if (clz32(~arg) > clz32(arg)) {
+ opc = ARITH_MVN, arg = ~arg;
+ }
+ do {
+ int i = ctz32(arg) & ~1;
+ rot = ((32 - i) << 7) & 0xf00;
+ tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
+ arg &= ~(0xff << i);
+
+ opc = ARITH_EOR;
+ rn = rd;
+ } while (arg);
+}
+
+static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
+ TCGArg lhs, TCGArg rhs, int rhs_is_const)
+{
+ /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rI" constraint.
+ */
+ if (rhs_is_const) {
+ int rot = encode_imm(rhs);
+ assert(rot >= 0);
+ tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+ } else {
+ tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+ }
+}
+
+static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
+ TCGReg dst, TCGReg lhs, TCGArg rhs,
+ bool rhs_is_const)
+{
+ /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rIK" constraint.
+ */
+ if (rhs_is_const) {
+ int rot = encode_imm(rhs);
+ if (rot < 0) {
+ rhs = ~rhs;
+ rot = encode_imm(rhs);
+ assert(rot >= 0);
+ opc = opinv;
+ }
+ tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+ } else {
+ tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+ }
+}
+
+static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
+ TCGArg dst, TCGArg lhs, TCGArg rhs,
+ bool rhs_is_const)
+{
+ /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rIN" constraint.
+ */
+ if (rhs_is_const) {
+ int rot = encode_imm(rhs);
+ if (rot < 0) {
+ rhs = -rhs;
+ rot = encode_imm(rhs);
+ assert(rot >= 0);
+ opc = opneg;
+ }
+ tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+ } else {
+ tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+ }
+}
+
+static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
+ TCGReg rn, TCGReg rm)
+{
+ /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */
+ if (!use_armv6_instructions && rd == rn) {
+ if (rd == rm) {
+ /* rd == rn == rm; copy an input to tmp first. */
+ tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
+ rm = rn = TCG_REG_TMP;
+ } else {
+ rn = rm;
+ rm = rd;
+ }
+ }
+ /* mul */
+ tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
+}
+
+static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
+ TCGReg rd1, TCGReg rn, TCGReg rm)
+{
+ /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
+ if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
+ if (rd0 == rm || rd1 == rm) {
+ tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
+ rn = TCG_REG_TMP;
+ } else {
+ TCGReg t = rn;
+ rn = rm;
+ rm = t;
+ }
+ }
+ /* umull */
+ tcg_out32(s, (cond << 28) | 0x00800090 |
+ (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
+}
+
+static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
+ TCGReg rd1, TCGReg rn, TCGReg rm)
+{
+ /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
+ if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
+ if (rd0 == rm || rd1 == rm) {
+ tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
+ rn = TCG_REG_TMP;
+ } else {
+ TCGReg t = rn;
+ rn = rm;
+ rm = t;
+ }
+ }
+ /* smull */
+ tcg_out32(s, (cond << 28) | 0x00c00090 |
+ (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
+}
+
+static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
+{
+ tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
+}
+
+static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
+{
+ tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
+}
+
+static inline void tcg_out_ext8s(TCGContext *s, int cond,
+ int rd, int rn)
+{
+ if (use_armv6_instructions) {
+ /* sxtb */
+ tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rn, SHIFT_IMM_LSL(24));
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rd, SHIFT_IMM_ASR(24));
+ }
+}
+
+static inline void tcg_out_ext8u(TCGContext *s, int cond,
+ int rd, int rn)
+{
+ tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, int cond,
+ int rd, int rn)
+{
+ if (use_armv6_instructions) {
+ /* sxth */
+ tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rn, SHIFT_IMM_LSL(16));
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rd, SHIFT_IMM_ASR(16));
+ }
+}
+
+static inline void tcg_out_ext16u(TCGContext *s, int cond,
+ int rd, int rn)
+{
+ if (use_armv6_instructions) {
+ /* uxth */
+ tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rn, SHIFT_IMM_LSL(16));
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rd, SHIFT_IMM_LSR(16));
+ }
+}
+
+static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
+{
+ if (use_armv6_instructions) {
+ /* revsh */
+ tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
+ rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
+ }
+}
+
+static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
+{
+ if (use_armv6_instructions) {
+ /* rev16 */
+ tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
+ rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
+ }
+}
+
+/* swap the two low bytes assuming that the two high input bytes and the
+ two high output bit can hold any value. */
+static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
+{
+ if (use_armv6_instructions) {
+ /* rev16 */
+ tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
+ tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
+ rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
+ }
+}
+
+static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
+{
+ if (use_armv6_instructions) {
+ /* rev */
+ tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_EOR,
+ TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
+ tcg_out_dat_imm(s, cond, ARITH_BIC,
+ TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rn, SHIFT_IMM_ROR(8));
+ tcg_out_dat_reg(s, cond, ARITH_EOR,
+ rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
+ }
+}
+
+bool tcg_target_deposit_valid(int ofs, int len)
+{
+ /* ??? Without bfi, we could improve over generic code by combining
+ the right-shift from a non-zero ofs with the orr. We do run into
+ problems when rd == rs, and the mask generated from ofs+len doesn't
+ fit into an immediate. We would have to be careful not to pessimize
+ wrt the optimizations performed on the expanded code. */
+ return use_armv7_instructions;
+}
+
+static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
+ TCGArg a1, int ofs, int len, bool const_a1)
+{
+ if (const_a1) {
+ /* bfi becomes bfc with rn == 15. */
+ a1 = 15;
+ }
+ /* bfi/bfc */
+ tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
+ | (ofs << 7) | ((ofs + len - 1) << 16));
+}
+
+/* Note that this routine is used for both LDR and LDRH formats, so we do
+ not wish to include an immediate shift at this point. */
+static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
+ TCGReg rn, TCGReg rm, bool u, bool p, bool w)
+{
+ tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
+ | (w << 21) | (rn << 16) | (rt << 12) | rm);
+}
+
+static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
+ TCGReg rn, int imm8, bool p, bool w)
+{
+ bool u = 1;
+ if (imm8 < 0) {
+ imm8 = -imm8;
+ u = 0;
+ }
+ tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
+ (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
+}
+
+static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
+ TCGReg rn, int imm12, bool p, bool w)
+{
+ bool u = 1;
+ if (imm12 < 0) {
+ imm12 = -imm12;
+ u = 0;
+ }
+ tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
+ (rn << 16) | (rt << 12) | imm12);
+}
+
+static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm12)
+{
+ tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
+}
+
+static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm12)
+{
+ tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
+}
+
+static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
+}
+
+static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
+}
+
+static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
+}
+
+/* Register pre-increment with base writeback. */
+static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
+}
+
+static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
+}
+
+static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
+}
+
+static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
+}
+
+static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
+}
+
+static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm12)
+{
+ tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
+}
+
+static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm12)
+{
+ tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
+}
+
+static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
+}
+
+static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static inline void tcg_out_ld32u(TCGContext *s, int cond,
+ int rd, int rn, int32_t offset)
+{
+ if (offset > 0xfff || offset < -0xfff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld32_12(s, cond, rd, rn, offset);
+}
+
+static inline void tcg_out_st32(TCGContext *s, int cond,
+ int rd, int rn, int32_t offset)
+{
+ if (offset > 0xfff || offset < -0xfff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_st32_12(s, cond, rd, rn, offset);
+}
+
+static inline void tcg_out_ld16u(TCGContext *s, int cond,
+ int rd, int rn, int32_t offset)
+{
+ if (offset > 0xff || offset < -0xff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld16u_8(s, cond, rd, rn, offset);
+}
+
+static inline void tcg_out_ld16s(TCGContext *s, int cond,
+ int rd, int rn, int32_t offset)
+{
+ if (offset > 0xff || offset < -0xff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld16s_8(s, cond, rd, rn, offset);
+}
+
+static inline void tcg_out_st16(TCGContext *s, int cond,
+ int rd, int rn, int32_t offset)
+{
+ if (offset > 0xff || offset < -0xff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_st16_8(s, cond, rd, rn, offset);
+}
+
+static inline void tcg_out_ld8u(TCGContext *s, int cond,
+ int rd, int rn, int32_t offset)
+{
+ if (offset > 0xfff || offset < -0xfff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld8_12(s, cond, rd, rn, offset);
+}
+
+static inline void tcg_out_ld8s(TCGContext *s, int cond,
+ int rd, int rn, int32_t offset)
+{
+ if (offset > 0xff || offset < -0xff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld8s_8(s, cond, rd, rn, offset);
+}
+
+static inline void tcg_out_st8(TCGContext *s, int cond,
+ int rd, int rn, int32_t offset)
+{
+ if (offset > 0xfff || offset < -0xfff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_st8_12(s, cond, rd, rn, offset);
+}
+
+/* The _goto case is normally between TBs within the same code buffer, and
+ * with the code buffer limited to 16MB we wouldn't need the long case.
+ * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
+ */
+static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
+{
+ intptr_t addri = (intptr_t)addr;
+ ptrdiff_t disp = tcg_pcrel_diff(s, addr);
+
+ if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
+ tcg_out_b(s, cond, disp);
+ return;
+ }
+
+ tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
+ if (use_armv5t_instructions) {
+ tcg_out_bx(s, cond, TCG_REG_TMP);
+ } else {
+ if (addri & 1) {
+ tcg_abort();
+ }
+ tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
+ }
+}
+
+/* The call case is mostly used for helpers - so it's not unreasonable
+ * for them to be beyond branch range */
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
+{
+ intptr_t addri = (intptr_t)addr;
+ ptrdiff_t disp = tcg_pcrel_diff(s, addr);
+
+ if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
+ if (addri & 1) {
+ /* Use BLX if the target is in Thumb mode */
+ if (!use_armv5t_instructions) {
+ tcg_abort();
+ }
+ tcg_out_blx_imm(s, disp);
+ } else {
+ tcg_out_bl(s, COND_AL, disp);
+ }
+ } else if (use_armv7_instructions) {
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
+ tcg_out_blx(s, COND_AL, TCG_REG_TMP);
+ } else {
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
+ tcg_out32(s, addri);
+ }
+}
+
+static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
+{
+ if (l->has_value) {
+ tcg_out_goto(s, cond, l->u.value_ptr);
+ } else {
+ tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
+ tcg_out_b_noaddr(s, cond);
+ }
+}
+
+#ifdef CONFIG_SOFTMMU
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[16] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_SB] = helper_ret_ldsb_mmu,
+
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_LESW] = helper_le_ldsw_mmu,
+ [MO_LESL] = helper_le_ldul_mmu,
+
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+ [MO_BESW] = helper_be_ldsw_mmu,
+ [MO_BESL] = helper_be_ldul_mmu,
+};
+
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_st_helpers[16] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+
+/* Helper routines for marshalling helper function arguments into
+ * the correct registers and stack.
+ * argreg is where we want to put this argument, arg is the argument itself.
+ * Return value is the updated argreg ready for the next call.
+ * Note that argreg 0..3 is real registers, 4+ on stack.
+ *
+ * We provide routines for arguments which are: immediate, 32 bit
+ * value in register, 16 and 8 bit values in register (which must be zero
+ * extended before use) and 64 bit value in a lo:hi register pair.
+ */
+#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
+static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
+{ \
+ if (argreg < 4) { \
+ MOV_ARG(s, COND_AL, argreg, arg); \
+ } else { \
+ int ofs = (argreg - 4) * 4; \
+ EXT_ARG; \
+ assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
+ tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
+ } \
+ return argreg + 1; \
+}
+
+DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
+ (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
+ (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
+ (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
+
+static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
+ TCGReg arglo, TCGReg arghi)
+{
+ /* 64 bit arguments must go in even/odd register pairs
+ * and in 8-aligned stack slots.
+ */
+ if (argreg & 1) {
+ argreg++;
+ }
+ if (use_armv6_instructions && argreg >= 4
+ && (arglo & 1) == 0 && arghi == arglo + 1) {
+ tcg_out_strd_8(s, COND_AL, arglo,
+ TCG_REG_CALL_STACK, (argreg - 4) * 4);
+ return argreg + 2;
+ } else {
+ argreg = tcg_out_arg_reg32(s, argreg, arglo);
+ argreg = tcg_out_arg_reg32(s, argreg, arghi);
+ return argreg;
+ }
+}
+
+#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
+
+/* We're expecting to use an 8-bit immediate and to mask. */
+QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
+
+/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
+ Using the offset of the second entry in the last tlb table ensures
+ that we can index all of the elements of the first entry. */
+QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
+ > 0xffff);
+
+/* Load and compare a TLB entry, leaving the flags set. Returns the register
+ containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
+
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
+ TCGMemOp s_bits, int mem_index, bool is_load)
+{
+ TCGReg base = TCG_AREG0;
+ int cmp_off =
+ (is_load
+ ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+ : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+ int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+
+ /* Should generate something like the following:
+ * shr tmp, addrlo, #TARGET_PAGE_BITS (1)
+ * add r2, env, #high
+ * and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
+ * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
+ * ldr r0, [r2, #cmp] (4)
+ * tst addrlo, #s_mask
+ * ldr r2, [r2, #add] (5)
+ * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
+ */
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
+ 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
+
+ /* We checked that the offset is contained within 16 bits above. */
+ if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
+ (24 << 7) | (cmp_off >> 8));
+ base = TCG_REG_R2;
+ add_off -= cmp_off & 0xff00;
+ cmp_off &= 0xff;
+ }
+
+ tcg_out_dat_imm(s, COND_AL, ARITH_AND,
+ TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
+ TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
+
+ /* Load the tlb comparator. Use ldrd if needed and available,
+ but due to how the pointer needs setting up, ldm isn't useful.
+ Base arm5 doesn't have ldrd, but armv5te does. */
+ if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
+ } else {
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
+ }
+ }
+
+ /* Check alignment. */
+ if (s_bits) {
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST,
+ 0, addrlo, (1 << s_bits) - 1);
+ }
+
+ /* Load the tlb addend. */
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
+
+ tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
+ TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
+ TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
+ }
+
+ return TCG_REG_R2;
+}
+
+/* Record the context of a call to the out of line helper code for the slow
+ path for a load or store, so that we can later generate the correct
+ helper code. */
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
+ TCGReg datalo, TCGReg datahi, TCGReg addrlo,
+ TCGReg addrhi, tcg_insn_unit *raddr,
+ tcg_insn_unit *label_ptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->datalo_reg = datalo;
+ label->datahi_reg = datahi;
+ label->addrlo_reg = addrlo;
+ label->addrhi_reg = addrhi;
+ label->raddr = raddr;
+ label->label_ptr[0] = label_ptr;
+}
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGReg argreg, datalo, datahi;
+ TCGMemOpIdx oi = lb->oi;
+ TCGMemOp opc = get_memop(oi);
+ void *func;
+
+ reloc_pc24(lb->label_ptr[0], s->code_ptr);
+
+ argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
+ if (TARGET_LONG_BITS == 64) {
+ argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
+ } else {
+ argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
+ }
+ argreg = tcg_out_arg_imm32(s, argreg, oi);
+ argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
+
+ /* For armv6 we can use the canonical unsigned helpers and minimize
+ icache usage. For pre-armv6, use the signed helpers since we do
+ not have a single insn sign-extend. */
+ if (use_armv6_instructions) {
+ func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
+ } else {
+ func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
+ if (opc & MO_SIGN) {
+ opc = MO_UL;
+ }
+ }
+ tcg_out_call(s, func);
+
+ datalo = lb->datalo_reg;
+ datahi = lb->datahi_reg;
+ switch (opc & MO_SSIZE) {
+ case MO_SB:
+ tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
+ break;
+ case MO_SW:
+ tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
+ break;
+ default:
+ tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
+ break;
+ case MO_Q:
+ if (datalo != TCG_REG_R1) {
+ tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
+ tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
+ } else if (datahi != TCG_REG_R0) {
+ tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
+ tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
+ } else {
+ tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
+ tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
+ tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
+ }
+ break;
+ }
+
+ tcg_out_goto(s, COND_AL, lb->raddr);
+}
+
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGReg argreg, datalo, datahi;
+ TCGMemOpIdx oi = lb->oi;
+ TCGMemOp opc = get_memop(oi);
+
+ reloc_pc24(lb->label_ptr[0], s->code_ptr);
+
+ argreg = TCG_REG_R0;
+ argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
+ if (TARGET_LONG_BITS == 64) {
+ argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
+ } else {
+ argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
+ }
+
+ datalo = lb->datalo_reg;
+ datahi = lb->datahi_reg;
+ switch (opc & MO_SIZE) {
+ case MO_8:
+ argreg = tcg_out_arg_reg8(s, argreg, datalo);
+ break;
+ case MO_16:
+ argreg = tcg_out_arg_reg16(s, argreg, datalo);
+ break;
+ case MO_32:
+ default:
+ argreg = tcg_out_arg_reg32(s, argreg, datalo);
+ break;
+ case MO_64:
+ argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
+ break;
+ }
+
+ argreg = tcg_out_arg_imm32(s, argreg, oi);
+ argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
+
+ /* Tail-call to the helper, which will return to the fast path. */
+ tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+}
+#endif /* SOFTMMU */
+
+static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addend)
+{
+ TCGMemOp bswap = opc & MO_BSWAP;
+
+ switch (opc & MO_SSIZE) {
+ case MO_UB:
+ tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
+ break;
+ case MO_SB:
+ tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
+ break;
+ case MO_UW:
+ tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
+ if (bswap) {
+ tcg_out_bswap16(s, COND_AL, datalo, datalo);
+ }
+ break;
+ case MO_SW:
+ if (bswap) {
+ tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
+ tcg_out_bswap16s(s, COND_AL, datalo, datalo);
+ } else {
+ tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
+ }
+ break;
+ case MO_UL:
+ default:
+ tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
+ if (bswap) {
+ tcg_out_bswap32(s, COND_AL, datalo, datalo);
+ }
+ break;
+ case MO_Q:
+ {
+ TCGReg dl = (bswap ? datahi : datalo);
+ TCGReg dh = (bswap ? datalo : datahi);
+
+ /* Avoid ldrd for user-only emulation, to handle unaligned. */
+ if (USING_SOFTMMU && use_armv6_instructions
+ && (dl & 1) == 0 && dh == dl + 1) {
+ tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
+ } else if (dl != addend) {
+ tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
+ tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
+ } else {
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
+ addend, addrlo, SHIFT_IMM_LSL(0));
+ tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
+ tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
+ }
+ if (bswap) {
+ tcg_out_bswap32(s, COND_AL, dl, dl);
+ tcg_out_bswap32(s, COND_AL, dh, dh);
+ }
+ }
+ break;
+ }
+}
+
+static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo)
+{
+ TCGMemOp bswap = opc & MO_BSWAP;
+
+ switch (opc & MO_SSIZE) {
+ case MO_UB:
+ tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_SB:
+ tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_UW:
+ tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
+ if (bswap) {
+ tcg_out_bswap16(s, COND_AL, datalo, datalo);
+ }
+ break;
+ case MO_SW:
+ if (bswap) {
+ tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
+ tcg_out_bswap16s(s, COND_AL, datalo, datalo);
+ } else {
+ tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
+ }
+ break;
+ case MO_UL:
+ default:
+ tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
+ if (bswap) {
+ tcg_out_bswap32(s, COND_AL, datalo, datalo);
+ }
+ break;
+ case MO_Q:
+ {
+ TCGReg dl = (bswap ? datahi : datalo);
+ TCGReg dh = (bswap ? datalo : datahi);
+
+ /* Avoid ldrd for user-only emulation, to handle unaligned. */
+ if (USING_SOFTMMU && use_armv6_instructions
+ && (dl & 1) == 0 && dh == dl + 1) {
+ tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
+ } else if (dl == addrlo) {
+ tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
+ tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
+ } else {
+ tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
+ tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
+ }
+ if (bswap) {
+ tcg_out_bswap32(s, COND_AL, dl, dl);
+ tcg_out_bswap32(s, COND_AL, dh, dh);
+ }
+ }
+ break;
+ }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+{
+ TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
+ TCGMemOpIdx oi;
+ TCGMemOp opc;
+#ifdef CONFIG_SOFTMMU
+ int mem_index;
+ TCGReg addend;
+ tcg_insn_unit *label_ptr;
+#endif
+
+ datalo = *args++;
+ datahi = (is64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#ifdef CONFIG_SOFTMMU
+ mem_index = get_mmuidx(oi);
+ addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
+
+ /* This a conditional BL only to load a pointer within this opcode into LR
+ for the slow path. We will not be using the value for a tail call. */
+ label_ptr = s->code_ptr;
+ tcg_out_bl_noaddr(s, COND_NE);
+
+ tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
+
+ add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#else /* !CONFIG_SOFTMMU */
+ if (GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
+ tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
+ } else {
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
+ }
+#endif
+}
+
+static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addend)
+{
+ TCGMemOp bswap = opc & MO_BSWAP;
+
+ switch (opc & MO_SIZE) {
+ case MO_8:
+ tcg_out_st8_r(s, cond, datalo, addrlo, addend);
+ break;
+ case MO_16:
+ if (bswap) {
+ tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
+ tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
+ } else {
+ tcg_out_st16_r(s, cond, datalo, addrlo, addend);
+ }
+ break;
+ case MO_32:
+ default:
+ if (bswap) {
+ tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
+ tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
+ } else {
+ tcg_out_st32_r(s, cond, datalo, addrlo, addend);
+ }
+ break;
+ case MO_64:
+ /* Avoid strd for user-only emulation, to handle unaligned. */
+ if (bswap) {
+ tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
+ tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
+ tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
+ tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
+ } else if (USING_SOFTMMU && use_armv6_instructions
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
+ tcg_out_strd_r(s, cond, datalo, addrlo, addend);
+ } else {
+ tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
+ tcg_out_st32_12(s, cond, datahi, addend, 4);
+ }
+ break;
+ }
+}
+
+static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo)
+{
+ TCGMemOp bswap = opc & MO_BSWAP;
+
+ switch (opc & MO_SIZE) {
+ case MO_8:
+ tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_16:
+ if (bswap) {
+ tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
+ tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
+ } else {
+ tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
+ }
+ break;
+ case MO_32:
+ default:
+ if (bswap) {
+ tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
+ tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
+ } else {
+ tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
+ }
+ break;
+ case MO_64:
+ /* Avoid strd for user-only emulation, to handle unaligned. */
+ if (bswap) {
+ tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
+ tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
+ tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
+ tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
+ } else if (USING_SOFTMMU && use_armv6_instructions
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
+ tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
+ } else {
+ tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
+ tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
+ }
+ break;
+ }
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
+{
+ TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
+ TCGMemOpIdx oi;
+ TCGMemOp opc;
+#ifdef CONFIG_SOFTMMU
+ int mem_index;
+ TCGReg addend;
+ tcg_insn_unit *label_ptr;
+#endif
+
+ datalo = *args++;
+ datahi = (is64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#ifdef CONFIG_SOFTMMU
+ mem_index = get_mmuidx(oi);
+ addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
+
+ tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
+
+ /* The conditional call must come last, as we're going to return here. */
+ label_ptr = s->code_ptr;
+ tcg_out_bl_noaddr(s, COND_NE);
+
+ add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#else /* !CONFIG_SOFTMMU */
+ if (GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
+ tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
+ datahi, addrlo, TCG_REG_TMP);
+ } else {
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
+ }
+#endif
+}
+
+static tcg_insn_unit *tb_ret_addr;
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg *args, const int *const_args)
+{
+ TCGArg a0, a1, a2, a3, a4, a5;
+ int c;
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
+ tcg_out_goto(s, COND_AL, tb_ret_addr);
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_offset) {
+ /* Direct jump method */
+ s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+ tcg_out_b_noaddr(s, COND_AL);
+ } else {
+ /* Indirect jump method */
+ intptr_t ptr = (intptr_t)(s->tb_next + args[0]);
+ tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
+ }
+ s->tb_next_offset[args[0]] = tcg_current_code_size(s);
+ break;
+ case INDEX_op_br:
+ tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
+ break;
+
+ case INDEX_op_ld8u_i32:
+ tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld8s_i32:
+ tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld16u_i32:
+ tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld16s_i32:
+ tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld_i32:
+ tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st8_i32:
+ tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st16_i32:
+ tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i32:
+ tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_movcond_i32:
+ /* Constraints mean that v2 is always in the same register as dest,
+ * so we only need to do "if condition passed, move v1 to dest".
+ */
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+ args[1], args[2], const_args[2]);
+ tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
+ ARITH_MVN, args[0], 0, args[3], const_args[3]);
+ break;
+ case INDEX_op_add_i32:
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
+ args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_sub_i32:
+ if (const_args[1]) {
+ if (const_args[2]) {
+ tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
+ } else {
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
+ args[0], args[2], args[1], 1);
+ }
+ } else {
+ tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
+ args[0], args[1], args[2], const_args[2]);
+ }
+ break;
+ case INDEX_op_and_i32:
+ tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
+ args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_andc_i32:
+ tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
+ args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_or_i32:
+ c = ARITH_ORR;
+ goto gen_arith;
+ case INDEX_op_xor_i32:
+ c = ARITH_EOR;
+ /* Fall through. */
+ gen_arith:
+ tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_add2_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ a3 = args[3], a4 = args[4], a5 = args[5];
+ if (a0 == a3 || (a0 == a5 && !const_args[5])) {
+ a0 = TCG_REG_TMP;
+ }
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
+ a0, a2, a4, const_args[4]);
+ tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
+ a1, a3, a5, const_args[5]);
+ tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ break;
+ case INDEX_op_sub2_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ a3 = args[3], a4 = args[4], a5 = args[5];
+ if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
+ a0 = TCG_REG_TMP;
+ }
+ if (const_args[2]) {
+ if (const_args[4]) {
+ tcg_out_movi32(s, COND_AL, a0, a4);
+ a4 = a0;
+ }
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
+ } else {
+ tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
+ ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
+ }
+ if (const_args[3]) {
+ if (const_args[5]) {
+ tcg_out_movi32(s, COND_AL, a1, a5);
+ a5 = a1;
+ }
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
+ } else {
+ tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
+ a1, a3, a5, const_args[5]);
+ }
+ tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ break;
+ case INDEX_op_neg_i32:
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
+ break;
+ case INDEX_op_not_i32:
+ tcg_out_dat_reg(s, COND_AL,
+ ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
+ break;
+ case INDEX_op_mul_i32:
+ tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_mulu2_i32:
+ tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ break;
+ case INDEX_op_muls2_i32:
+ tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ break;
+ /* XXX: Perhaps args[2] & 0x1f is wrong */
+ case INDEX_op_shl_i32:
+ c = const_args[2] ?
+ SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
+ goto gen_shift32;
+ case INDEX_op_shr_i32:
+ c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
+ goto gen_shift32;
+ case INDEX_op_sar_i32:
+ c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
+ goto gen_shift32;
+ case INDEX_op_rotr_i32:
+ c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
+ /* Fall through. */
+ gen_shift32:
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
+ break;
+
+ case INDEX_op_rotl_i32:
+ if (const_args[2]) {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
+ ((0x20 - args[2]) & 0x1f) ?
+ SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
+ SHIFT_IMM_LSL(0));
+ } else {
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
+ SHIFT_REG_ROR(TCG_REG_TMP));
+ }
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+ args[0], args[1], const_args[1]);
+ tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
+ arg_label(args[3]));
+ break;
+ case INDEX_op_brcond2_i32:
+ /* The resulting conditions are:
+ * TCG_COND_EQ --> a0 == a2 && a1 == a3,
+ * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
+ * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
+ * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
+ * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
+ * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
+ */
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+ args[1], args[3], const_args[3]);
+ tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
+ args[0], args[2], const_args[2]);
+ tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
+ arg_label(args[5]));
+ break;
+ case INDEX_op_setcond_i32:
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+ args[1], args[2], const_args[2]);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
+ ARITH_MOV, args[0], 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
+ ARITH_MOV, args[0], 0, 0);
+ break;
+ case INDEX_op_setcond2_i32:
+ /* See brcond2_i32 comment */
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+ args[2], args[4], const_args[4]);
+ tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
+ args[1], args[3], const_args[3]);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
+ ARITH_MOV, args[0], 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
+ ARITH_MOV, args[0], 0, 0);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args, 0);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args, 1);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_qemu_st(s, args, 0);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args, 1);
+ break;
+
+ case INDEX_op_bswap16_i32:
+ tcg_out_bswap16(s, COND_AL, args[0], args[1]);
+ break;
+ case INDEX_op_bswap32_i32:
+ tcg_out_bswap32(s, COND_AL, args[0], args[1]);
+ break;
+
+ case INDEX_op_ext8s_i32:
+ tcg_out_ext8s(s, COND_AL, args[0], args[1]);
+ break;
+ case INDEX_op_ext16s_i32:
+ tcg_out_ext16s(s, COND_AL, args[0], args[1]);
+ break;
+ case INDEX_op_ext16u_i32:
+ tcg_out_ext16u(s, COND_AL, args[0], args[1]);
+ break;
+
+ case INDEX_op_deposit_i32:
+ tcg_out_deposit(s, COND_AL, args[0], args[2],
+ args[3], args[4], const_args[2]);
+ break;
+
+ case INDEX_op_div_i32:
+ tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_divu_i32:
+ tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static const TCGTargetOpDef arm_op_defs[] = {
+ { INDEX_op_exit_tb, { } },
+ { INDEX_op_goto_tb, { } },
+ { INDEX_op_br, { } },
+
+ { INDEX_op_ld8u_i32, { "r", "r" } },
+ { INDEX_op_ld8s_i32, { "r", "r" } },
+ { INDEX_op_ld16u_i32, { "r", "r" } },
+ { INDEX_op_ld16s_i32, { "r", "r" } },
+ { INDEX_op_ld_i32, { "r", "r" } },
+ { INDEX_op_st8_i32, { "r", "r" } },
+ { INDEX_op_st16_i32, { "r", "r" } },
+ { INDEX_op_st_i32, { "r", "r" } },
+
+ /* TODO: "r", "r", "ri" */
+ { INDEX_op_add_i32, { "r", "r", "rIN" } },
+ { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
+ { INDEX_op_mul_i32, { "r", "r", "r" } },
+ { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
+ { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
+ { INDEX_op_and_i32, { "r", "r", "rIK" } },
+ { INDEX_op_andc_i32, { "r", "r", "rIK" } },
+ { INDEX_op_or_i32, { "r", "r", "rI" } },
+ { INDEX_op_xor_i32, { "r", "r", "rI" } },
+ { INDEX_op_neg_i32, { "r", "r" } },
+ { INDEX_op_not_i32, { "r", "r" } },
+
+ { INDEX_op_shl_i32, { "r", "r", "ri" } },
+ { INDEX_op_shr_i32, { "r", "r", "ri" } },
+ { INDEX_op_sar_i32, { "r", "r", "ri" } },
+ { INDEX_op_rotl_i32, { "r", "r", "ri" } },
+ { INDEX_op_rotr_i32, { "r", "r", "ri" } },
+
+ { INDEX_op_brcond_i32, { "r", "rIN" } },
+ { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
+ { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
+
+ { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
+ { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
+ { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
+
+#if TARGET_LONG_BITS == 32
+ { INDEX_op_qemu_ld_i32, { "r", "l" } },
+ { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
+ { INDEX_op_qemu_st_i32, { "s", "s" } },
+ { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
+#else
+ { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
+ { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
+ { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
+ { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
+#endif
+
+ { INDEX_op_bswap16_i32, { "r", "r" } },
+ { INDEX_op_bswap32_i32, { "r", "r" } },
+
+ { INDEX_op_ext8s_i32, { "r", "r" } },
+ { INDEX_op_ext16s_i32, { "r", "r" } },
+ { INDEX_op_ext16u_i32, { "r", "r" } },
+
+ { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+
+ { INDEX_op_div_i32, { "r", "r", "r" } },
+ { INDEX_op_divu_i32, { "r", "r", "r" } },
+
+ { -1 },
+};
+
+static void tcg_target_init(TCGContext *s)
+{
+ /* Only probe for the platform and capabilities if we havn't already
+ determined maximum values at compile time. */
+#ifndef use_idiv_instructions
+ {
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+ use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
+ }
+#endif
+ if (__ARM_ARCH < 7) {
+ const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
+ if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
+ arm_arch = pl[1] - '0';
+ }
+ }
+
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
+ tcg_regset_set32(tcg_target_call_clobber_regs, 0,
+ (1 << TCG_REG_R0) |
+ (1 << TCG_REG_R1) |
+ (1 << TCG_REG_R2) |
+ (1 << TCG_REG_R3) |
+ (1 << TCG_REG_R12) |
+ (1 << TCG_REG_R14));
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
+
+ tcg_add_target_add_op_defs(arm_op_defs);
+}
+
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_st32(s, COND_AL, arg, arg1, arg2);
+}
+
+static inline void tcg_out_mov(TCGContext *s, TCGType type,
+ TCGReg ret, TCGReg arg)
+{
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
+}
+
+static inline void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ tcg_out_movi32(s, COND_AL, ret, arg);
+}
+
+/* Compute frame size via macros, to share between tcg_target_qemu_prologue
+ and tcg_register_jit. */
+
+#define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
+
+#define FRAME_SIZE \
+ ((PUSH_SIZE \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & -TCG_TARGET_STACK_ALIGN)
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int stack_addend;
+
+ /* Calling convention requires us to save r4-r11 and lr. */
+ /* stmdb sp!, { r4 - r11, lr } */
+ tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
+
+ /* Reserve callee argument and tcg temp space. */
+ stack_addend = FRAME_SIZE - PUSH_SIZE;
+
+ tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
+ TCG_REG_CALL_STACK, stack_addend, 1);
+ tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+
+ tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
+ tb_ret_addr = s->code_ptr;
+
+ /* Epilogue. We branch here via tb_ret_addr. */
+ tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
+ TCG_REG_CALL_STACK, stack_addend, 1);
+
+ /* ldmia sp!, { r4 - r11, pc } */
+ tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[18];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_ARM
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 0x7c, /* sleb128 -4 */
+ .h.cie.return_column = 14,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, 13, /* DW_CFA_def_cfa sp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ /* The following must match the stmdb in the prologue. */
+ 0x8e, 1, /* DW_CFA_offset, lr, -4 */
+ 0x8b, 2, /* DW_CFA_offset, r11, -8 */
+ 0x8a, 3, /* DW_CFA_offset, r10, -12 */
+ 0x89, 4, /* DW_CFA_offset, r9, -16 */
+ 0x88, 5, /* DW_CFA_offset, r8, -20 */
+ 0x87, 6, /* DW_CFA_offset, r7, -24 */
+ 0x86, 7, /* DW_CFA_offset, r6, -28 */
+ 0x85, 8, /* DW_CFA_offset, r5, -32 */
+ 0x84, 9, /* DW_CFA_offset, r4, -36 */
+ }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
new file mode 100644
index 00000000..6559f80b
--- /dev/null
+++ b/tcg/arm/tcg-target.h
@@ -0,0 +1,109 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ * Copyright (c) 2008 Andrzej Zaborowski
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TCG_TARGET_ARM
+#define TCG_TARGET_ARM 1
+
+#undef TCG_TARGET_STACK_GROWSUP
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+
+typedef enum {
+ TCG_REG_R0 = 0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_PC,
+} TCGReg;
+
+#define TCG_TARGET_NB_REGS 16
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+#define use_idiv_instructions 1
+#else
+extern bool use_idiv_instructions;
+#endif
+
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_R13
+#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 0 /* and r0, r1, #0xff */
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_div_i32 use_idiv_instructions
+#define TCG_TARGET_HAS_rem_i32 0
+
+extern bool tcg_target_deposit_valid(int ofs, int len);
+#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid
+
+enum {
+ TCG_AREG0 = TCG_REG_R6,
+};
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+#if QEMU_GNUC_PREREQ(4, 1)
+ __builtin___clear_cache((char *) start, (char *) stop);
+#else
+ register uintptr_t _beg __asm("a1") = start;
+ register uintptr_t _end __asm("a2") = stop;
+ register uintptr_t _flg __asm("a3") = 0;
+ __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
+#endif
+}
+
+#endif
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
new file mode 100644
index 00000000..887f22f6
--- /dev/null
+++ b/tcg/i386/tcg-target.c
@@ -0,0 +1,2451 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg-be-ldst.h"
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+#if TCG_TARGET_REG_BITS == 64
+ "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+#else
+ "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+#endif
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+#if TCG_TARGET_REG_BITS == 64
+ TCG_REG_RBP,
+ TCG_REG_RBX,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R9,
+ TCG_REG_R8,
+ TCG_REG_RCX,
+ TCG_REG_RDX,
+ TCG_REG_RSI,
+ TCG_REG_RDI,
+ TCG_REG_RAX,
+#else
+ TCG_REG_EBX,
+ TCG_REG_ESI,
+ TCG_REG_EDI,
+ TCG_REG_EBP,
+ TCG_REG_ECX,
+ TCG_REG_EDX,
+ TCG_REG_EAX,
+#endif
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+#if TCG_TARGET_REG_BITS == 64
+#if defined(_WIN64)
+ TCG_REG_RCX,
+ TCG_REG_RDX,
+#else
+ TCG_REG_RDI,
+ TCG_REG_RSI,
+ TCG_REG_RDX,
+ TCG_REG_RCX,
+#endif
+ TCG_REG_R8,
+ TCG_REG_R9,
+#else
+ /* 32 bit mode uses stack based calling convention (GCC default). */
+#endif
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_EAX,
+#if TCG_TARGET_REG_BITS == 32
+ TCG_REG_EDX
+#endif
+};
+
+/* Constants we accept. */
+#define TCG_CT_CONST_S32 0x100
+#define TCG_CT_CONST_U32 0x200
+#define TCG_CT_CONST_I32 0x400
+
+/* Registers used with L constraint, which are the first argument
+ registers on x86_64, and two random call clobbered registers on
+ i386. */
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
+# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
+#else
+# define TCG_REG_L0 TCG_REG_EAX
+# define TCG_REG_L1 TCG_REG_EDX
+#endif
+
+/* The host compiler should supply <cpuid.h> to enable runtime features
+ detection, as we're not going to go so far as our own inline assembly.
+ If not available, default values will be assumed. */
+#if defined(CONFIG_CPUID_H)
+#include <cpuid.h>
+#endif
+
+/* For 32-bit, we are going to attempt to determine at runtime whether cmov
+ is available. */
+#if TCG_TARGET_REG_BITS == 64
+# define have_cmov 1
+#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
+static bool have_cmov;
+#else
+# define have_cmov 0
+#endif
+
+/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
+ going to attempt to determine at runtime whether movbe is available. */
+#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
+static bool have_movbe;
+#else
+# define have_movbe 0
+#endif
+
+/* We need this symbol in tcg-target.h, and we can't properly conditionalize
+ it there. Therefore we always define the variable. */
+bool have_bmi1;
+
+#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
+static bool have_bmi2;
+#else
+# define have_bmi2 0
+#endif
+
+static tcg_insn_unit *tb_ret_addr;
+
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ value += addend;
+ switch(type) {
+ case R_386_PC32:
+ value -= (uintptr_t)code_ptr;
+ if (value != (int32_t)value) {
+ tcg_abort();
+ }
+ tcg_patch32(code_ptr, value);
+ break;
+ case R_386_PC8:
+ value -= (uintptr_t)code_ptr;
+ if (value != (int8_t)value) {
+ tcg_abort();
+ }
+ tcg_patch8(code_ptr, value);
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str;
+
+ ct_str = *pct_str;
+ switch(ct_str[0]) {
+ case 'a':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
+ break;
+ case 'b':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
+ break;
+ case 'c':
+ case_c:
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
+ break;
+ case 'd':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
+ break;
+ case 'S':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
+ break;
+ case 'D':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
+ break;
+ case 'q':
+ ct->ct |= TCG_CT_REG;
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set32(ct->u.regs, 0, 0xffff);
+ } else {
+ tcg_regset_set32(ct->u.regs, 0, 0xf);
+ }
+ break;
+ case 'Q':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xf);
+ break;
+ case 'r':
+ case_r:
+ ct->ct |= TCG_CT_REG;
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set32(ct->u.regs, 0, 0xffff);
+ } else {
+ tcg_regset_set32(ct->u.regs, 0, 0xff);
+ }
+ break;
+ case 'C':
+ /* With SHRX et al, we need not use ECX as shift count register. */
+ if (have_bmi2) {
+ goto case_r;
+ } else {
+ goto case_c;
+ }
+
+ /* qemu_ld/st address constraint */
+ case 'L':
+ ct->ct |= TCG_CT_REG;
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set32(ct->u.regs, 0, 0xffff);
+ } else {
+ tcg_regset_set32(ct->u.regs, 0, 0xff);
+ }
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
+ break;
+
+ case 'e':
+ ct->ct |= TCG_CT_CONST_S32;
+ break;
+ case 'Z':
+ ct->ct |= TCG_CT_CONST_U32;
+ break;
+ case 'I':
+ ct->ct |= TCG_CT_CONST_I32;
+ break;
+
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct = arg_ct->ct;
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
+ return 1;
+ }
+ return 0;
+}
+
+#if TCG_TARGET_REG_BITS == 64
+# define LOWREGMASK(x) ((x) & 7)
+#else
+# define LOWREGMASK(x) (x)
+#endif
+
+#define P_EXT 0x100 /* 0x0f opcode prefix */
+#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
+#define P_DATA16 0x400 /* 0x66 opcode prefix */
+#if TCG_TARGET_REG_BITS == 64
+# define P_ADDR32 0x800 /* 0x67 opcode prefix */
+# define P_REXW 0x1000 /* Set REX.W = 1 */
+# define P_REXB_R 0x2000 /* REG field as byte register */
+# define P_REXB_RM 0x4000 /* R/M field as byte register */
+# define P_GS 0x8000 /* gs segment override */
+#else
+# define P_ADDR32 0
+# define P_REXW 0
+# define P_REXB_R 0
+# define P_REXB_RM 0
+# define P_GS 0
+#endif
+#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
+#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
+
+#define OPC_ARITH_EvIz (0x81)
+#define OPC_ARITH_EvIb (0x83)
+#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
+#define OPC_ANDN (0xf2 | P_EXT38)
+#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
+#define OPC_BSWAP (0xc8 | P_EXT)
+#define OPC_CALL_Jz (0xe8)
+#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
+#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
+#define OPC_DEC_r32 (0x48)
+#define OPC_IMUL_GvEv (0xaf | P_EXT)
+#define OPC_IMUL_GvEvIb (0x6b)
+#define OPC_IMUL_GvEvIz (0x69)
+#define OPC_INC_r32 (0x40)
+#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
+#define OPC_JCC_short (0x70) /* ... plus condition code */
+#define OPC_JMP_long (0xe9)
+#define OPC_JMP_short (0xeb)
+#define OPC_LEA (0x8d)
+#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
+#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
+#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
+#define OPC_MOVB_EvIz (0xc6)
+#define OPC_MOVL_EvIz (0xc7)
+#define OPC_MOVL_Iv (0xb8)
+#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
+#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
+#define OPC_MOVSBL (0xbe | P_EXT)
+#define OPC_MOVSWL (0xbf | P_EXT)
+#define OPC_MOVSLQ (0x63 | P_REXW)
+#define OPC_MOVZBL (0xb6 | P_EXT)
+#define OPC_MOVZWL (0xb7 | P_EXT)
+#define OPC_POP_r32 (0x58)
+#define OPC_PUSH_r32 (0x50)
+#define OPC_PUSH_Iv (0x68)
+#define OPC_PUSH_Ib (0x6a)
+#define OPC_RET (0xc3)
+#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
+#define OPC_SHIFT_1 (0xd1)
+#define OPC_SHIFT_Ib (0xc1)
+#define OPC_SHIFT_cl (0xd3)
+#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
+#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
+#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
+#define OPC_TESTL (0x85)
+#define OPC_XCHG_ax_r32 (0x90)
+
+#define OPC_GRP3_Ev (0xf7)
+#define OPC_GRP5 (0xff)
+
+/* Group 1 opcode extensions for 0x80-0x83.
+ These are also used as modifiers for OPC_ARITH. */
+#define ARITH_ADD 0
+#define ARITH_OR 1
+#define ARITH_ADC 2
+#define ARITH_SBB 3
+#define ARITH_AND 4
+#define ARITH_SUB 5
+#define ARITH_XOR 6
+#define ARITH_CMP 7
+
+/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
+#define SHIFT_ROL 0
+#define SHIFT_ROR 1
+#define SHIFT_SHL 4
+#define SHIFT_SHR 5
+#define SHIFT_SAR 7
+
+/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
+#define EXT3_NOT 2
+#define EXT3_NEG 3
+#define EXT3_MUL 4
+#define EXT3_IMUL 5
+#define EXT3_DIV 6
+#define EXT3_IDIV 7
+
+/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
+#define EXT5_INC_Ev 0
+#define EXT5_DEC_Ev 1
+#define EXT5_CALLN_Ev 2
+#define EXT5_JMPN_Ev 4
+
+/* Condition codes to be added to OPC_JCC_{long,short}. */
+#define JCC_JMP (-1)
+#define JCC_JO 0x0
+#define JCC_JNO 0x1
+#define JCC_JB 0x2
+#define JCC_JAE 0x3
+#define JCC_JE 0x4
+#define JCC_JNE 0x5
+#define JCC_JBE 0x6
+#define JCC_JA 0x7
+#define JCC_JS 0x8
+#define JCC_JNS 0x9
+#define JCC_JP 0xa
+#define JCC_JNP 0xb
+#define JCC_JL 0xc
+#define JCC_JGE 0xd
+#define JCC_JLE 0xe
+#define JCC_JG 0xf
+
+static const uint8_t tcg_cond_to_jcc[] = {
+ [TCG_COND_EQ] = JCC_JE,
+ [TCG_COND_NE] = JCC_JNE,
+ [TCG_COND_LT] = JCC_JL,
+ [TCG_COND_GE] = JCC_JGE,
+ [TCG_COND_LE] = JCC_JLE,
+ [TCG_COND_GT] = JCC_JG,
+ [TCG_COND_LTU] = JCC_JB,
+ [TCG_COND_GEU] = JCC_JAE,
+ [TCG_COND_LEU] = JCC_JBE,
+ [TCG_COND_GTU] = JCC_JA,
+};
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
+{
+ int rex;
+
+ if (opc & P_GS) {
+ tcg_out8(s, 0x65);
+ }
+ if (opc & P_DATA16) {
+ /* We should never be asking for both 16 and 64-bit operation. */
+ assert((opc & P_REXW) == 0);
+ tcg_out8(s, 0x66);
+ }
+ if (opc & P_ADDR32) {
+ tcg_out8(s, 0x67);
+ }
+
+ rex = 0;
+ rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
+ rex |= (r & 8) >> 1; /* REX.R */
+ rex |= (x & 8) >> 2; /* REX.X */
+ rex |= (rm & 8) >> 3; /* REX.B */
+
+ /* P_REXB_{R,RM} indicates that the given register is the low byte.
+ For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
+ as otherwise the encoding indicates %[abcd]h. Note that the values
+ that are ORed in merely indicate that the REX byte must be present;
+ those bits get discarded in output. */
+ rex |= opc & (r >= 4 ? P_REXB_R : 0);
+ rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
+
+ if (rex) {
+ tcg_out8(s, (uint8_t)(rex | 0x40));
+ }
+
+ if (opc & (P_EXT | P_EXT38)) {
+ tcg_out8(s, 0x0f);
+ if (opc & P_EXT38) {
+ tcg_out8(s, 0x38);
+ }
+ }
+
+ tcg_out8(s, opc);
+}
+#else
+static void tcg_out_opc(TCGContext *s, int opc)
+{
+ if (opc & P_DATA16) {
+ tcg_out8(s, 0x66);
+ }
+ if (opc & (P_EXT | P_EXT38)) {
+ tcg_out8(s, 0x0f);
+ if (opc & P_EXT38) {
+ tcg_out8(s, 0x38);
+ }
+ }
+ tcg_out8(s, opc);
+}
+/* Discard the register arguments to tcg_out_opc early, so as not to penalize
+ the 32-bit compilation paths. This method works with all versions of gcc,
+ whereas relying on optimization may not be able to exclude them. */
+#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
+#endif
+
+static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
+{
+ tcg_out_opc(s, opc, r, rm, 0);
+ tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+}
+
+static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
+{
+ int tmp;
+
+ if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
+ /* Three byte VEX prefix. */
+ tcg_out8(s, 0xc4);
+
+ /* VEX.m-mmmm */
+ if (opc & P_EXT38) {
+ tmp = 2;
+ } else if (opc & P_EXT) {
+ tmp = 1;
+ } else {
+ tcg_abort();
+ }
+ tmp |= 0x40; /* VEX.X */
+ tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
+ tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
+ tcg_out8(s, tmp);
+
+ tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
+ } else {
+ /* Two byte VEX prefix. */
+ tcg_out8(s, 0xc5);
+
+ tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
+ }
+ /* VEX.pp */
+ if (opc & P_DATA16) {
+ tmp |= 1; /* 0x66 */
+ } else if (opc & P_SIMDF3) {
+ tmp |= 2; /* 0xf3 */
+ } else if (opc & P_SIMDF2) {
+ tmp |= 3; /* 0xf2 */
+ }
+ tmp |= (~v & 15) << 3; /* VEX.vvvv */
+ tcg_out8(s, tmp);
+ tcg_out8(s, opc);
+ tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+}
+
+/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
+ We handle either RM and INDEX missing with a negative value. In 64-bit
+ mode for absolute addresses, ~RM is the size of the immediate operand
+ that will follow the instruction. */
+
+static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
+ int index, int shift, intptr_t offset)
+{
+ int mod, len;
+
+ if (index < 0 && rm < 0) {
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* Try for a rip-relative addressing mode. This has replaced
+ the 32-bit-mode absolute addressing encoding. */
+ intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
+ intptr_t disp = offset - pc;
+ if (disp == (int32_t)disp) {
+ tcg_out_opc(s, opc, r, 0, 0);
+ tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
+ tcg_out32(s, disp);
+ return;
+ }
+
+ /* Try for an absolute address encoding. This requires the
+ use of the MODRM+SIB encoding and is therefore larger than
+ rip-relative addressing. */
+ if (offset == (int32_t)offset) {
+ tcg_out_opc(s, opc, r, 0, 0);
+ tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
+ tcg_out8(s, (4 << 3) | 5);
+ tcg_out32(s, offset);
+ return;
+ }
+
+ /* ??? The memory isn't directly addressable. */
+ tcg_abort();
+ } else {
+ /* Absolute address. */
+ tcg_out_opc(s, opc, r, 0, 0);
+ tcg_out8(s, (r << 3) | 5);
+ tcg_out32(s, offset);
+ return;
+ }
+ }
+
+ /* Find the length of the immediate addend. Note that the encoding
+ that would be used for (%ebp) indicates absolute addressing. */
+ if (rm < 0) {
+ mod = 0, len = 4, rm = 5;
+ } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
+ mod = 0, len = 0;
+ } else if (offset == (int8_t)offset) {
+ mod = 0x40, len = 1;
+ } else {
+ mod = 0x80, len = 4;
+ }
+
+ /* Use a single byte MODRM format if possible. Note that the encoding
+ that would be used for %esp is the escape to the two byte form. */
+ if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
+ /* Single byte MODRM format. */
+ tcg_out_opc(s, opc, r, rm, 0);
+ tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+ } else {
+ /* Two byte MODRM+SIB format. */
+
+ /* Note that the encoding that would place %esp into the index
+ field indicates no index register. In 64-bit mode, the REX.X
+ bit counts, so %r12 can be used as the index. */
+ if (index < 0) {
+ index = 4;
+ } else {
+ assert(index != TCG_REG_ESP);
+ }
+
+ tcg_out_opc(s, opc, r, rm, index);
+ tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
+ tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
+ }
+
+ if (len == 1) {
+ tcg_out8(s, offset);
+ } else if (len == 4) {
+ tcg_out32(s, offset);
+ }
+}
+
+/* A simplification of the above with no index or shift. */
+static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
+ int rm, intptr_t offset)
+{
+ tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
+}
+
+/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
+static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
+{
+ /* Propagate an opcode prefix, such as P_REXW. */
+ int ext = subop & ~0x7;
+ subop &= 0x7;
+
+ tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
+}
+
+static inline void tcg_out_mov(TCGContext *s, TCGType type,
+ TCGReg ret, TCGReg arg)
+{
+ if (arg != ret) {
+ int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+ tcg_out_modrm(s, opc, ret, arg);
+ }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ tcg_target_long diff;
+
+ if (arg == 0) {
+ tgen_arithr(s, ARITH_XOR, ret, ret);
+ return;
+ }
+ if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
+ tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
+ tcg_out32(s, arg);
+ return;
+ }
+ if (arg == (int32_t)arg) {
+ tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
+ tcg_out32(s, arg);
+ return;
+ }
+
+ /* Try a 7 byte pc-relative lea before the 10 byte movq. */
+ diff = arg - ((uintptr_t)s->code_ptr + 7);
+ if (diff == (int32_t)diff) {
+ tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
+ tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
+ tcg_out32(s, diff);
+ return;
+ }
+
+ tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
+ tcg_out64(s, arg);
+}
+
+static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
+{
+ if (val == (int8_t)val) {
+ tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
+ tcg_out8(s, val);
+ } else if (val == (int32_t)val) {
+ tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
+ tcg_out32(s, val);
+ } else {
+ tcg_abort();
+ }
+}
+
+static inline void tcg_out_push(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_pop(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg arg1, intptr_t arg2)
+{
+ int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+ tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+ tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
+}
+
+static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
+ tcg_target_long ofs, tcg_target_long val)
+{
+ int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
+ tcg_out_modrm_offset(s, opc, 0, base, ofs);
+ tcg_out32(s, val);
+}
+
+static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
+{
+ /* Propagate an opcode prefix, such as P_DATA16. */
+ int ext = subopc & ~0x7;
+ subopc &= 0x7;
+
+ if (count == 1) {
+ tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
+ } else {
+ tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
+ tcg_out8(s, count);
+ }
+}
+
+static inline void tcg_out_bswap32(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_rolw_8(TCGContext *s, int reg)
+{
+ tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
+}
+
+static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
+{
+ /* movzbl */
+ assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+ tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
+}
+
+static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
+{
+ /* movsbl */
+ assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+ tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
+}
+
+static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
+{
+ /* movzwl */
+ tcg_out_modrm(s, OPC_MOVZWL, dest, src);
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
+{
+ /* movsw[lq] */
+ tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
+}
+
+static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
+{
+ /* 32-bit mov zero extends. */
+ tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
+}
+
+static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
+{
+ tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
+}
+
+static inline void tcg_out_bswap64(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static void tgen_arithi(TCGContext *s, int c, int r0,
+ tcg_target_long val, int cf)
+{
+ int rexw = 0;
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ rexw = c & -8;
+ c &= 7;
+ }
+
+ /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
+ partial flags update stalls on Pentium4 and are not recommended
+ by current Intel optimization manuals. */
+ if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
+ int is_inc = (c == ARITH_ADD) ^ (val < 0);
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* The single-byte increment encodings are re-tasked as the
+ REX prefixes. Use the MODRM encoding. */
+ tcg_out_modrm(s, OPC_GRP5 + rexw,
+ (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
+ } else {
+ tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
+ }
+ return;
+ }
+
+ if (c == ARITH_AND) {
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (val == 0xffffffffu) {
+ tcg_out_ext32u(s, r0, r0);
+ return;
+ }
+ if (val == (uint32_t)val) {
+ /* AND with no high bits set can use a 32-bit operation. */
+ rexw = 0;
+ }
+ }
+ if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
+ tcg_out_ext8u(s, r0, r0);
+ return;
+ }
+ if (val == 0xffffu) {
+ tcg_out_ext16u(s, r0, r0);
+ return;
+ }
+ }
+
+ if (val == (int8_t)val) {
+ tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
+ tcg_out8(s, val);
+ return;
+ }
+ if (rexw == 0 || val == (int32_t)val) {
+ tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
+ tcg_out32(s, val);
+ return;
+ }
+
+ tcg_abort();
+}
+
+static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+{
+ if (val != 0) {
+ tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
+ }
+}
+
+/* Use SMALL != 0 to force a short forward branch. */
+static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
+{
+ int32_t val, val1;
+
+ if (l->has_value) {
+ val = tcg_pcrel_diff(s, l->u.value_ptr);
+ val1 = val - 2;
+ if ((int8_t)val1 == val1) {
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_short);
+ } else {
+ tcg_out8(s, OPC_JCC_short + opc);
+ }
+ tcg_out8(s, val1);
+ } else {
+ if (small) {
+ tcg_abort();
+ }
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_long);
+ tcg_out32(s, val - 5);
+ } else {
+ tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
+ tcg_out32(s, val - 6);
+ }
+ }
+ } else if (small) {
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_short);
+ } else {
+ tcg_out8(s, OPC_JCC_short + opc);
+ }
+ tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
+ s->code_ptr += 1;
+ } else {
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_long);
+ } else {
+ tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
+ }
+ tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
+ s->code_ptr += 4;
+ }
+}
+
+static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
+ int const_arg2, int rexw)
+{
+ if (const_arg2) {
+ if (arg2 == 0) {
+ /* test r, r */
+ tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
+ } else {
+ tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
+ }
+ } else {
+ tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
+ }
+}
+
+static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ TCGLabel *label, int small)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ TCGLabel *label, int small)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
+}
+#else
+/* XXX: we implement it at the target level to avoid having to
+ handle cross basic blocks temporaries */
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args, int small)
+{
+ TCGLabel *label_next = gen_new_label();
+ TCGLabel *label_this = arg_label(args[5]);
+
+ switch(args[4]) {
+ case TCG_COND_EQ:
+ tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
+ label_this, small);
+ break;
+ case TCG_COND_NE:
+ tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ label_this, small);
+ tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
+ label_this, small);
+ break;
+ case TCG_COND_LT:
+ tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_LE:
+ tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_GT:
+ tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_GE:
+ tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_LTU:
+ tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_LEU:
+ tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_GTU:
+ tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_GEU:
+ tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ default:
+ tcg_abort();
+ }
+ tcg_out_label(s, label_next, s->code_ptr);
+}
+#endif
+
+static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+ tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+ tcg_out_ext8u(s, dest, dest);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
+ tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+ tcg_out_ext8u(s, dest, dest);
+}
+#else
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ TCGArg new_args[6];
+ TCGLabel *label_true, *label_over;
+
+ memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+ if (args[0] == args[1] || args[0] == args[2]
+ || (!const_args[3] && args[0] == args[3])
+ || (!const_args[4] && args[0] == args[4])) {
+ /* When the destination overlaps with one of the argument
+ registers, don't do anything tricky. */
+ label_true = gen_new_label();
+ label_over = gen_new_label();
+
+ new_args[5] = label_arg(label_true);
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+ tcg_out_jxx(s, JCC_JMP, label_over, 1);
+ tcg_out_label(s, label_true, s->code_ptr);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+ tcg_out_label(s, label_over, s->code_ptr);
+ } else {
+ /* When the destination does not overlap one of the arguments,
+ clear the destination first, jump if cond false, and emit an
+ increment in the true case. This results in smaller code. */
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+ label_over = gen_new_label();
+ new_args[4] = tcg_invert_cond(new_args[4]);
+ new_args[5] = label_arg(label_over);
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+ tcg_out_label(s, label_over, s->code_ptr);
+ }
+}
+#endif
+
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg c1, TCGArg c2, int const_c2,
+ TCGArg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, 0);
+ if (have_cmov) {
+ tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+ } else {
+ TCGLabel *over = gen_new_label();
+ tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
+ tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
+ tcg_out_label(s, over, s->code_ptr);
+ }
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg c1, TCGArg c2, int const_c2,
+ TCGArg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
+ tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
+}
+#endif
+
+static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
+{
+ intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
+
+ if (disp == (int32_t)disp) {
+ tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
+ tcg_out32(s, disp);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
+ tcg_out_modrm(s, OPC_GRP5,
+ call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
+ }
+}
+
+static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
+{
+ tcg_out_branch(s, 1, dest);
+}
+
+static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
+{
+ tcg_out_branch(s, 0, dest);
+}
+
+#if defined(CONFIG_SOFTMMU)
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[16] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+};
+
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_st_helpers[16] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+
+/* Perform the TLB load and compare.
+
+ Inputs:
+ ADDRLO and ADDRHI contain the low and high part of the address.
+
+ MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+ WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+ This should be offsetof addr_read or addr_write.
+
+ Outputs:
+ LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
+ positions of the displacements of forward jumps to the TLB miss case.
+
+ Second argument register is loaded with the low part of the address.
+ In the TLB hit case, it has been adjusted as indicated by the TLB
+ and so is a host address. In the TLB miss case, it continues to
+ hold a guest address.
+
+ First argument register is clobbered. */
+
+static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
+ int mem_index, TCGMemOp s_bits,
+ tcg_insn_unit **label_ptr, int which)
+{
+ const TCGReg r0 = TCG_REG_L0;
+ const TCGReg r1 = TCG_REG_L1;
+ TCGType ttype = TCG_TYPE_I32;
+ TCGType htype = TCG_TYPE_I32;
+ int trexw = 0, hrexw = 0;
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (TARGET_LONG_BITS == 64) {
+ ttype = TCG_TYPE_I64;
+ trexw = P_REXW;
+ }
+ if (TCG_TYPE_PTR == TCG_TYPE_I64) {
+ htype = TCG_TYPE_I64;
+ hrexw = P_REXW;
+ }
+ }
+
+ tcg_out_mov(s, htype, r0, addrlo);
+ tcg_out_mov(s, ttype, r1, addrlo);
+
+ tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+ tgen_arithi(s, ARITH_AND + trexw, r1,
+ TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+ tgen_arithi(s, ARITH_AND + hrexw, r0,
+ (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
+
+ tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
+ offsetof(CPUArchState, tlb_table[mem_index][0])
+ + which);
+
+ /* cmp 0(r0), r1 */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
+
+ /* Prepare for both the fast path add of the tlb addend, and the slow
+ path function argument setup. There are two cases worth note:
+ For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
+ before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
+ copies the entire guest address for the slow path, while truncation
+ for the 32-bit host happens with the fastpath ADDL below. */
+ tcg_out_mov(s, ttype, r1, addrlo);
+
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ label_ptr[0] = s->code_ptr;
+ s->code_ptr += 4;
+
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ /* cmp 4(r0), addrhi */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
+
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ label_ptr[1] = s->code_ptr;
+ s->code_ptr += 4;
+ }
+
+ /* TLB Hit. */
+
+ /* add addend(r0), r1 */
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
+ offsetof(CPUTLBEntry, addend) - which);
+}
+
+/*
+ * Record the context of a call to the out of line helper code for the slow path
+ * for a load or store, so that we can later generate the correct helper code
+ */
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addrhi,
+ tcg_insn_unit *raddr,
+ tcg_insn_unit **label_ptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->datalo_reg = datalo;
+ label->datahi_reg = datahi;
+ label->addrlo_reg = addrlo;
+ label->addrhi_reg = addrhi;
+ label->raddr = raddr;
+ label->label_ptr[0] = label_ptr[0];
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ label->label_ptr[1] = label_ptr[1];
+ }
+}
+
+/*
+ * Generate code for the slow path for a load at the end of block
+ */
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ TCGMemOpIdx oi = l->oi;
+ TCGMemOp opc = get_memop(oi);
+ TCGReg data_reg;
+ tcg_insn_unit **label_ptr = &l->label_ptr[0];
+
+ /* resolve label address */
+ tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ int ofs = 0;
+
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+ }
+
+ tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
+ ofs += 4;
+
+ tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ /* The second argument is already loaded with addrlo. */
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
+ tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
+ (uintptr_t)l->raddr);
+ }
+
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+
+ data_reg = l->datalo_reg;
+ switch (opc & MO_SSIZE) {
+ case MO_SB:
+ tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
+ break;
+ case MO_SW:
+ tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case MO_SL:
+ tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
+ break;
+#endif
+ case MO_UB:
+ case MO_UW:
+ /* Note that the helpers have zero-extended to tcg_target_long. */
+ case MO_UL:
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ break;
+ case MO_Q:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
+ } else if (data_reg == TCG_REG_EDX) {
+ /* xchg %edx, %eax */
+ tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+
+ /* Jump to the code corresponding to next IR of qemu_st */
+ tcg_out_jmp(s, l->raddr);
+}
+
+/*
+ * Generate code for the slow path for a store at the end of block
+ */
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ TCGMemOpIdx oi = l->oi;
+ TCGMemOp opc = get_memop(oi);
+ TCGMemOp s_bits = opc & MO_SIZE;
+ tcg_insn_unit **label_ptr = &l->label_ptr[0];
+ TCGReg retaddr;
+
+ /* resolve label address */
+ tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ int ofs = 0;
+
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+ }
+
+ tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ if (s_bits == MO_64) {
+ tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+ }
+
+ tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi);
+ ofs += 4;
+
+ retaddr = TCG_REG_EAX;
+ tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
+ tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ /* The second argument is already loaded with addrlo. */
+ tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ tcg_target_call_iarg_regs[2], l->datalo_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
+
+ if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
+ retaddr = tcg_target_call_iarg_regs[4];
+ tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
+ } else {
+ retaddr = TCG_REG_RAX;
+ tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
+ tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
+ TCG_TARGET_CALL_STACK_OFFSET);
+ }
+ }
+
+ /* "Tail call" to the helper, with the return address back inline. */
+ tcg_out_push(s, retaddr);
+ tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+}
+#elif defined(__x86_64__) && defined(__linux__)
+# include <asm/prctl.h>
+# include <sys/prctl.h>
+
+int arch_prctl(int code, unsigned long addr);
+
+static int guest_base_flags;
+static inline void setup_guest_base_seg(void)
+{
+ if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
+ guest_base_flags = P_GS;
+ }
+}
+#else
+# define guest_base_flags 0
+static inline void setup_guest_base_seg(void) { }
+#endif /* SOFTMMU */
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+ TCGReg base, int index, intptr_t ofs,
+ int seg, TCGMemOp memop)
+{
+ const TCGMemOp real_bswap = memop & MO_BSWAP;
+ TCGMemOp bswap = real_bswap;
+ int movop = OPC_MOVL_GvEv;
+
+ if (have_movbe && real_bswap) {
+ bswap = 0;
+ movop = OPC_MOVBE_GyMy;
+ }
+
+ switch (memop & MO_SSIZE) {
+ case MO_UB:
+ tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
+ base, index, 0, ofs);
+ break;
+ case MO_SB:
+ tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
+ base, index, 0, ofs);
+ break;
+ case MO_UW:
+ tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
+ base, index, 0, ofs);
+ if (real_bswap) {
+ tcg_out_rolw_8(s, datalo);
+ }
+ break;
+ case MO_SW:
+ if (real_bswap) {
+ if (have_movbe) {
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+ datalo, base, index, 0, ofs);
+ } else {
+ tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
+ base, index, 0, ofs);
+ tcg_out_rolw_8(s, datalo);
+ }
+ tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
+ } else {
+ tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
+ datalo, base, index, 0, ofs);
+ }
+ break;
+ case MO_UL:
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
+ if (bswap) {
+ tcg_out_bswap32(s, datalo);
+ }
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case MO_SL:
+ if (real_bswap) {
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+ base, index, 0, ofs);
+ if (bswap) {
+ tcg_out_bswap32(s, datalo);
+ }
+ tcg_out_ext32s(s, datalo, datalo);
+ } else {
+ tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
+ base, index, 0, ofs);
+ }
+ break;
+#endif
+ case MO_Q:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
+ base, index, 0, ofs);
+ if (bswap) {
+ tcg_out_bswap64(s, datalo);
+ }
+ } else {
+ if (real_bswap) {
+ int t = datalo;
+ datalo = datahi;
+ datahi = t;
+ }
+ if (base != datalo) {
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+ base, index, 0, ofs);
+ tcg_out_modrm_sib_offset(s, movop + seg, datahi,
+ base, index, 0, ofs + 4);
+ } else {
+ tcg_out_modrm_sib_offset(s, movop + seg, datahi,
+ base, index, 0, ofs + 4);
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+ base, index, 0, ofs);
+ }
+ if (bswap) {
+ tcg_out_bswap32(s, datalo);
+ tcg_out_bswap32(s, datahi);
+ }
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
+ EAX. It will be useful once fixed registers globals are less
+ common. */
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+{
+ TCGReg datalo, datahi, addrlo;
+ TCGReg addrhi __attribute__((unused));
+ TCGMemOpIdx oi;
+ TCGMemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ int mem_index;
+ TCGMemOp s_bits;
+ tcg_insn_unit *label_ptr[2];
+#endif
+
+ datalo = *args++;
+ datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ mem_index = get_mmuidx(oi);
+ s_bits = opc & MO_SIZE;
+
+ tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
+ label_ptr, offsetof(CPUTLBEntry, addr_read));
+
+ /* TLB Hit. */
+ tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
+
+ /* Record the current context of a load into ldst label */
+ add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#else
+ {
+ int32_t offset = GUEST_BASE;
+ TCGReg base = addrlo;
+ int index = -1;
+ int seg = 0;
+
+ /* For a 32-bit guest, the high 32 bits may contain garbage.
+ We can do this with the ADDR32 prefix if we're not using
+ a guest base, or when using segmentation. Otherwise we
+ need to zero-extend manually. */
+ if (GUEST_BASE == 0 || guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ seg |= P_ADDR32;
+ }
+ } else if (TCG_TARGET_REG_BITS == 64) {
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_ext32u(s, TCG_REG_L0, base);
+ base = TCG_REG_L0;
+ }
+ if (offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+ index = TCG_REG_L1;
+ offset = 0;
+ }
+ }
+
+ tcg_out_qemu_ld_direct(s, datalo, datahi,
+ base, index, offset, seg, opc);
+ }
+#endif
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+ TCGReg base, intptr_t ofs, int seg,
+ TCGMemOp memop)
+{
+ /* ??? Ideally we wouldn't need a scratch register. For user-only,
+ we could perform the bswap twice to restore the original value
+ instead of moving to the scratch. But as it is, the L constraint
+ means that TCG_REG_L0 is definitely free here. */
+ const TCGReg scratch = TCG_REG_L0;
+ const TCGMemOp real_bswap = memop & MO_BSWAP;
+ TCGMemOp bswap = real_bswap;
+ int movop = OPC_MOVL_EvGv;
+
+ if (have_movbe && real_bswap) {
+ bswap = 0;
+ movop = OPC_MOVBE_MyGy;
+ }
+
+ switch (memop & MO_SIZE) {
+ case MO_8:
+ /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
+ Use the scratch register if necessary. */
+ if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
+ tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+ datalo = scratch;
+ }
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
+ datalo, base, ofs);
+ break;
+ case MO_16:
+ if (bswap) {
+ tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+ tcg_out_rolw_8(s, scratch);
+ datalo = scratch;
+ }
+ tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
+ break;
+ case MO_32:
+ if (bswap) {
+ tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+ tcg_out_bswap32(s, scratch);
+ datalo = scratch;
+ }
+ tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+ break;
+ case MO_64:
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (bswap) {
+ tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
+ tcg_out_bswap64(s, scratch);
+ datalo = scratch;
+ }
+ tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
+ } else if (bswap) {
+ tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
+ tcg_out_bswap32(s, scratch);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
+ tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+ tcg_out_bswap32(s, scratch);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
+ } else {
+ if (real_bswap) {
+ int t = datalo;
+ datalo = datahi;
+ datahi = t;
+ }
+ tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
+ tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
+{
+ TCGReg datalo, datahi, addrlo;
+ TCGReg addrhi __attribute__((unused));
+ TCGMemOpIdx oi;
+ TCGMemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ int mem_index;
+ TCGMemOp s_bits;
+ tcg_insn_unit *label_ptr[2];
+#endif
+
+ datalo = *args++;
+ datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ mem_index = get_mmuidx(oi);
+ s_bits = opc & MO_SIZE;
+
+ tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
+ label_ptr, offsetof(CPUTLBEntry, addr_write));
+
+ /* TLB Hit. */
+ tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
+
+ /* Record the current context of a store into ldst label */
+ add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#else
+ {
+ int32_t offset = GUEST_BASE;
+ TCGReg base = addrlo;
+ int seg = 0;
+
+ /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
+ if (GUEST_BASE == 0 || guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ seg |= P_ADDR32;
+ }
+ } else if (TCG_TARGET_REG_BITS == 64) {
+ /* ??? Note that we can't use the same SIB addressing scheme
+ as for loads, since we require L0 free for bswap. */
+ if (offset != GUEST_BASE) {
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_ext32u(s, TCG_REG_L0, base);
+ base = TCG_REG_L0;
+ }
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+ base = TCG_REG_L1;
+ offset = 0;
+ } else if (TARGET_LONG_BITS == 32) {
+ tcg_out_ext32u(s, TCG_REG_L1, base);
+ base = TCG_REG_L1;
+ }
+ }
+
+ tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
+ }
+#endif
+}
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg *args, const int *const_args)
+{
+ int c, vexop, rexw = 0;
+
+#if TCG_TARGET_REG_BITS == 64
+# define OP_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i64): \
+ rexw = P_REXW; /* FALLTHRU */ \
+ case glue(glue(INDEX_op_, x), _i32)
+#else
+# define OP_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i32)
+#endif
+
+ switch(opc) {
+ case INDEX_op_exit_tb:
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
+ tcg_out_jmp(s, tb_ret_addr);
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_offset) {
+ /* direct jump method */
+ tcg_out8(s, OPC_JMP_long); /* jmp im */
+ s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+ tcg_out32(s, 0);
+ } else {
+ /* indirect jump method */
+ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
+ (intptr_t)(s->tb_next + args[0]));
+ }
+ s->tb_next_offset[args[0]] = tcg_current_code_size(s);
+ break;
+ case INDEX_op_br:
+ tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
+ break;
+ OP_32_64(ld8u):
+ /* Note that we can ignore REXW for the zero-extend to 64-bit. */
+ tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
+ break;
+ OP_32_64(ld8s):
+ tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
+ break;
+ OP_32_64(ld16u):
+ /* Note that we can ignore REXW for the zero-extend to 64-bit. */
+ tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
+ break;
+ OP_32_64(ld16s):
+ tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_ld32u_i64:
+#endif
+ case INDEX_op_ld_i32:
+ tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ break;
+
+ OP_32_64(st8):
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
+ 0, args[1], args[2]);
+ tcg_out8(s, args[0]);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+ args[0], args[1], args[2]);
+ }
+ break;
+ OP_32_64(st16):
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
+ 0, args[1], args[2]);
+ tcg_out16(s, args[0]);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
+ args[0], args[1], args[2]);
+ }
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_st32_i64:
+#endif
+ case INDEX_op_st_i32:
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
+ tcg_out32(s, args[0]);
+ } else {
+ tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ }
+ break;
+
+ OP_32_64(add):
+ /* For 3-operand addition, use LEA. */
+ if (args[0] != args[1]) {
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
+
+ if (const_args[2]) {
+ c3 = a2, a2 = -1;
+ } else if (a0 == a2) {
+ /* Watch out for dest = src + dest, since we've removed
+ the matching constraint on the add. */
+ tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
+ break;
+ }
+
+ tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
+ break;
+ }
+ c = ARITH_ADD;
+ goto gen_arith;
+ OP_32_64(sub):
+ c = ARITH_SUB;
+ goto gen_arith;
+ OP_32_64(and):
+ c = ARITH_AND;
+ goto gen_arith;
+ OP_32_64(or):
+ c = ARITH_OR;
+ goto gen_arith;
+ OP_32_64(xor):
+ c = ARITH_XOR;
+ goto gen_arith;
+ gen_arith:
+ if (const_args[2]) {
+ tgen_arithi(s, c + rexw, args[0], args[2], 0);
+ } else {
+ tgen_arithr(s, c + rexw, args[0], args[2]);
+ }
+ break;
+
+ OP_32_64(andc):
+ if (const_args[2]) {
+ tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
+ args[0], args[1]);
+ tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
+ } else {
+ tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
+ }
+ break;
+
+ OP_32_64(mul):
+ if (const_args[2]) {
+ int32_t val;
+ val = args[2];
+ if (val == (int8_t)val) {
+ tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
+ tcg_out8(s, val);
+ } else {
+ tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
+ tcg_out32(s, val);
+ }
+ } else {
+ tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
+ }
+ break;
+
+ OP_32_64(div2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
+ break;
+ OP_32_64(divu2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
+ break;
+
+ OP_32_64(shl):
+ c = SHIFT_SHL;
+ vexop = OPC_SHLX;
+ goto gen_shift_maybe_vex;
+ OP_32_64(shr):
+ c = SHIFT_SHR;
+ vexop = OPC_SHRX;
+ goto gen_shift_maybe_vex;
+ OP_32_64(sar):
+ c = SHIFT_SAR;
+ vexop = OPC_SARX;
+ goto gen_shift_maybe_vex;
+ OP_32_64(rotl):
+ c = SHIFT_ROL;
+ goto gen_shift;
+ OP_32_64(rotr):
+ c = SHIFT_ROR;
+ goto gen_shift;
+ gen_shift_maybe_vex:
+ if (have_bmi2 && !const_args[2]) {
+ tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
+ break;
+ }
+ /* FALLTHRU */
+ gen_shift:
+ if (const_args[2]) {
+ tcg_out_shifti(s, c + rexw, args[0], args[2]);
+ } else {
+ tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
+ }
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
+ arg_label(args[3]), 0);
+ break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond32(s, args[3], args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond32(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
+
+ OP_32_64(bswap16):
+ tcg_out_rolw_8(s, args[0]);
+ break;
+ OP_32_64(bswap32):
+ tcg_out_bswap32(s, args[0]);
+ break;
+
+ OP_32_64(neg):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
+ break;
+ OP_32_64(not):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
+ break;
+
+ OP_32_64(ext8s):
+ tcg_out_ext8s(s, args[0], args[1], rexw);
+ break;
+ OP_32_64(ext16s):
+ tcg_out_ext16s(s, args[0], args[1], rexw);
+ break;
+ OP_32_64(ext8u):
+ tcg_out_ext8u(s, args[0], args[1]);
+ break;
+ OP_32_64(ext16u):
+ tcg_out_ext16u(s, args[0], args[1]);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args, 0);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args, 1);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_qemu_st(s, args, 0);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args, 1);
+ break;
+
+ OP_32_64(mulu2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
+ break;
+ OP_32_64(muls2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
+ break;
+ OP_32_64(add2):
+ if (const_args[4]) {
+ tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
+ } else {
+ tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
+ }
+ if (const_args[5]) {
+ tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
+ } else {
+ tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
+ }
+ break;
+ OP_32_64(sub2):
+ if (const_args[4]) {
+ tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
+ } else {
+ tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
+ }
+ if (const_args[5]) {
+ tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
+ } else {
+ tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
+ }
+ break;
+
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+ tcg_out_brcond2(s, args, const_args, 0);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args, const_args);
+ break;
+#else /* TCG_TARGET_REG_BITS == 64 */
+ case INDEX_op_ld32s_i64:
+ tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i64:
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
+ 0, args[1], args[2]);
+ tcg_out32(s, args[0]);
+ } else {
+ tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ }
+ break;
+
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
+ arg_label(args[3]), 0);
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond64(s, args[3], args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond64(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
+
+ case INDEX_op_bswap64_i64:
+ tcg_out_bswap64(s, args[0]);
+ break;
+ case INDEX_op_ext32u_i64:
+ tcg_out_ext32u(s, args[0], args[1]);
+ break;
+ case INDEX_op_ext32s_i64:
+ tcg_out_ext32s(s, args[0], args[1]);
+ break;
+#endif
+
+ OP_32_64(deposit):
+ if (args[3] == 0 && args[4] == 8) {
+ /* load bits 0..7 */
+ tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
+ args[2], args[0]);
+ } else if (args[3] == 8 && args[4] == 8) {
+ /* load bits 8..15 */
+ tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
+ } else if (args[3] == 0 && args[4] == 16) {
+ /* load bits 0..15 */
+ tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
+ } else {
+ tcg_abort();
+ }
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_movi_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+
+#undef OP_32_64
+}
+
+static const TCGTargetOpDef x86_op_defs[] = {
+ { INDEX_op_exit_tb, { } },
+ { INDEX_op_goto_tb, { } },
+ { INDEX_op_br, { } },
+ { INDEX_op_ld8u_i32, { "r", "r" } },
+ { INDEX_op_ld8s_i32, { "r", "r" } },
+ { INDEX_op_ld16u_i32, { "r", "r" } },
+ { INDEX_op_ld16s_i32, { "r", "r" } },
+ { INDEX_op_ld_i32, { "r", "r" } },
+ { INDEX_op_st8_i32, { "qi", "r" } },
+ { INDEX_op_st16_i32, { "ri", "r" } },
+ { INDEX_op_st_i32, { "ri", "r" } },
+
+ { INDEX_op_add_i32, { "r", "r", "ri" } },
+ { INDEX_op_sub_i32, { "r", "0", "ri" } },
+ { INDEX_op_mul_i32, { "r", "0", "ri" } },
+ { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
+ { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
+ { INDEX_op_and_i32, { "r", "0", "ri" } },
+ { INDEX_op_or_i32, { "r", "0", "ri" } },
+ { INDEX_op_xor_i32, { "r", "0", "ri" } },
+ { INDEX_op_andc_i32, { "r", "r", "ri" } },
+
+ { INDEX_op_shl_i32, { "r", "0", "Ci" } },
+ { INDEX_op_shr_i32, { "r", "0", "Ci" } },
+ { INDEX_op_sar_i32, { "r", "0", "Ci" } },
+ { INDEX_op_rotl_i32, { "r", "0", "ci" } },
+ { INDEX_op_rotr_i32, { "r", "0", "ci" } },
+
+ { INDEX_op_brcond_i32, { "r", "ri" } },
+
+ { INDEX_op_bswap16_i32, { "r", "0" } },
+ { INDEX_op_bswap32_i32, { "r", "0" } },
+
+ { INDEX_op_neg_i32, { "r", "0" } },
+
+ { INDEX_op_not_i32, { "r", "0" } },
+
+ { INDEX_op_ext8s_i32, { "r", "q" } },
+ { INDEX_op_ext16s_i32, { "r", "r" } },
+ { INDEX_op_ext8u_i32, { "r", "q" } },
+ { INDEX_op_ext16u_i32, { "r", "r" } },
+
+ { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+
+ { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
+
+ { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
+ { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
+ { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
+ { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
+
+#if TCG_TARGET_REG_BITS == 32
+ { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
+ { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+#else
+ { INDEX_op_ld8u_i64, { "r", "r" } },
+ { INDEX_op_ld8s_i64, { "r", "r" } },
+ { INDEX_op_ld16u_i64, { "r", "r" } },
+ { INDEX_op_ld16s_i64, { "r", "r" } },
+ { INDEX_op_ld32u_i64, { "r", "r" } },
+ { INDEX_op_ld32s_i64, { "r", "r" } },
+ { INDEX_op_ld_i64, { "r", "r" } },
+ { INDEX_op_st8_i64, { "ri", "r" } },
+ { INDEX_op_st16_i64, { "ri", "r" } },
+ { INDEX_op_st32_i64, { "ri", "r" } },
+ { INDEX_op_st_i64, { "re", "r" } },
+
+ { INDEX_op_add_i64, { "r", "r", "re" } },
+ { INDEX_op_mul_i64, { "r", "0", "re" } },
+ { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
+ { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
+ { INDEX_op_sub_i64, { "r", "0", "re" } },
+ { INDEX_op_and_i64, { "r", "0", "reZ" } },
+ { INDEX_op_or_i64, { "r", "0", "re" } },
+ { INDEX_op_xor_i64, { "r", "0", "re" } },
+ { INDEX_op_andc_i64, { "r", "r", "rI" } },
+
+ { INDEX_op_shl_i64, { "r", "0", "Ci" } },
+ { INDEX_op_shr_i64, { "r", "0", "Ci" } },
+ { INDEX_op_sar_i64, { "r", "0", "Ci" } },
+ { INDEX_op_rotl_i64, { "r", "0", "ci" } },
+ { INDEX_op_rotr_i64, { "r", "0", "ci" } },
+
+ { INDEX_op_brcond_i64, { "r", "re" } },
+ { INDEX_op_setcond_i64, { "r", "r", "re" } },
+
+ { INDEX_op_bswap16_i64, { "r", "0" } },
+ { INDEX_op_bswap32_i64, { "r", "0" } },
+ { INDEX_op_bswap64_i64, { "r", "0" } },
+ { INDEX_op_neg_i64, { "r", "0" } },
+ { INDEX_op_not_i64, { "r", "0" } },
+
+ { INDEX_op_ext8s_i64, { "r", "r" } },
+ { INDEX_op_ext16s_i64, { "r", "r" } },
+ { INDEX_op_ext32s_i64, { "r", "r" } },
+ { INDEX_op_ext8u_i64, { "r", "r" } },
+ { INDEX_op_ext16u_i64, { "r", "r" } },
+ { INDEX_op_ext32u_i64, { "r", "r" } },
+
+ { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
+ { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
+
+ { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
+ { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
+ { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
+ { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_qemu_ld_i32, { "r", "L" } },
+ { INDEX_op_qemu_st_i32, { "L", "L" } },
+ { INDEX_op_qemu_ld_i64, { "r", "L" } },
+ { INDEX_op_qemu_st_i64, { "L", "L" } },
+#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ { INDEX_op_qemu_ld_i32, { "r", "L" } },
+ { INDEX_op_qemu_st_i32, { "L", "L" } },
+ { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
+ { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
+#else
+ { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
+ { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
+ { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
+ { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
+#endif
+ { -1 },
+};
+
+static int tcg_target_callee_save_regs[] = {
+#if TCG_TARGET_REG_BITS == 64
+ TCG_REG_RBP,
+ TCG_REG_RBX,
+#if defined(_WIN64)
+ TCG_REG_RDI,
+ TCG_REG_RSI,
+#endif
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14, /* Currently used for the global env. */
+ TCG_REG_R15,
+#else
+ TCG_REG_EBP, /* Currently used for the global env. */
+ TCG_REG_EBX,
+ TCG_REG_ESI,
+ TCG_REG_EDI,
+#endif
+};
+
+/* Compute frame size via macros, to share between tcg_target_qemu_prologue
+ and tcg_register_jit. */
+
+#define PUSH_SIZE \
+ ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
+ * (TCG_TARGET_REG_BITS / 8))
+
+#define FRAME_SIZE \
+ ((PUSH_SIZE \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & ~(TCG_TARGET_STACK_ALIGN - 1))
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int i, stack_addend;
+
+ /* TB prologue */
+
+ /* Reserve some stack space, also for TCG temps. */
+ stack_addend = FRAME_SIZE - PUSH_SIZE;
+ tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+ /* Save all callee saved registers. */
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_push(s, tcg_target_callee_save_regs[i]);
+ }
+
+#if TCG_TARGET_REG_BITS == 32
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+ /* jmp *tb. */
+ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
+ + stack_addend);
+#else
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+ /* jmp *tb. */
+ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
+#endif
+
+ /* TB epilogue */
+ tb_ret_addr = s->code_ptr;
+
+ tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
+
+ for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
+ tcg_out_pop(s, tcg_target_callee_save_regs[i]);
+ }
+ tcg_out_opc(s, OPC_RET, 0, 0, 0);
+
+#if !defined(CONFIG_SOFTMMU)
+ /* Try to set up a segment register to point to GUEST_BASE. */
+ if (GUEST_BASE) {
+ setup_guest_base_seg();
+ }
+#endif
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+#ifdef CONFIG_CPUID_H
+ unsigned a, b, c, d;
+ int max = __get_cpuid_max(0, 0);
+
+ if (max >= 1) {
+ __cpuid(1, a, b, c, d);
+#ifndef have_cmov
+ /* For 32-bit, 99% certainty that we're running on hardware that
+ supports cmov, but we still need to check. In case cmov is not
+ available, we'll use a small forward branch. */
+ have_cmov = (d & bit_CMOV) != 0;
+#endif
+#ifndef have_movbe
+ /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
+ need to probe for it. */
+ have_movbe = (c & bit_MOVBE) != 0;
+#endif
+ }
+
+ if (max >= 7) {
+ /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
+ __cpuid_count(7, 0, a, b, c, d);
+#ifdef bit_BMI
+ have_bmi1 = (b & bit_BMI) != 0;
+#endif
+#ifndef have_bmi2
+ have_bmi2 = (b & bit_BMI2) != 0;
+#endif
+ }
+#endif
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
+ } else {
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
+ }
+
+ tcg_regset_clear(tcg_target_call_clobber_regs);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
+ if (TCG_TARGET_REG_BITS == 64) {
+#if !defined(_WIN64)
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
+#endif
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
+ }
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+
+ tcg_add_target_add_op_defs(x86_op_defs);
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[14];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#if !defined(__ELF__)
+ /* Host machine without ELF. */
+#elif TCG_TARGET_REG_BITS == 64
+#define ELF_HOST_MACHINE EM_X86_64
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 0x78, /* sleb128 -8 */
+ .h.cie.return_column = 16,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, 7, /* DW_CFA_def_cfa %rsp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x90, 1, /* DW_CFA_offset, %rip, -8 */
+ /* The following ordering must match tcg_target_callee_save_regs. */
+ 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
+ 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
+ 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
+ 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
+ 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
+ 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
+ }
+};
+#else
+#define ELF_HOST_MACHINE EM_386
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 0x7c, /* sleb128 -4 */
+ .h.cie.return_column = 8,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, 4, /* DW_CFA_def_cfa %esp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x88, 1, /* DW_CFA_offset, %eip, -4 */
+ /* The following ordering must match tcg_target_callee_save_regs. */
+ 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
+ 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
+ 0x86, 4, /* DW_CFA_offset, %esi, -16 */
+ 0x87, 5, /* DW_CFA_offset, %edi, -20 */
+ }
+};
+#endif
+
+#if defined(ELF_HOST_MACHINE)
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
+#endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
new file mode 100644
index 00000000..25b51335
--- /dev/null
+++ b/tcg/i386/tcg-target.h
@@ -0,0 +1,149 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TCG_TARGET_I386
+#define TCG_TARGET_I386 1
+
+#define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
+
+#ifdef __x86_64__
+# define TCG_TARGET_REG_BITS 64
+# define TCG_TARGET_NB_REGS 16
+#else
+# define TCG_TARGET_REG_BITS 32
+# define TCG_TARGET_NB_REGS 8
+#endif
+
+typedef enum {
+ TCG_REG_EAX = 0,
+ TCG_REG_ECX,
+ TCG_REG_EDX,
+ TCG_REG_EBX,
+ TCG_REG_ESP,
+ TCG_REG_EBP,
+ TCG_REG_ESI,
+ TCG_REG_EDI,
+
+ /* 64-bit registers; always define the symbols to avoid
+ too much if-deffing. */
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_RAX = TCG_REG_EAX,
+ TCG_REG_RCX = TCG_REG_ECX,
+ TCG_REG_RDX = TCG_REG_EDX,
+ TCG_REG_RBX = TCG_REG_EBX,
+ TCG_REG_RSP = TCG_REG_ESP,
+ TCG_REG_RBP = TCG_REG_EBP,
+ TCG_REG_RSI = TCG_REG_ESI,
+ TCG_REG_RDI = TCG_REG_EDI,
+} TCGReg;
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_ESP
+#define TCG_TARGET_STACK_ALIGN 16
+#if defined(_WIN64)
+#define TCG_TARGET_CALL_STACK_OFFSET 32
+#else
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
+
+extern bool have_bmi1;
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div2_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_andc_i32 have_bmi1
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_trunc_shr_i32 0
+#define TCG_TARGET_HAS_div2_i64 1
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_andc_i64 have_bmi1
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 1
+#define TCG_TARGET_HAS_muls2_i64 1
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
+#endif
+
+#define TCG_TARGET_deposit_i32_valid(ofs, len) \
+ (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
+ ((ofs) == 0 && (len) == 16))
+#define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid
+
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_AREG0 TCG_REG_R14
+#else
+# define TCG_AREG0 TCG_REG_EBP
+#endif
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+}
+
+#endif
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
new file mode 100644
index 00000000..81cb9f79
--- /dev/null
+++ b/tcg/ia64/tcg-target.c
@@ -0,0 +1,2445 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009-2010 Aurelien Jarno <aurelien@aurel32.net>
+ * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * Register definitions
+ */
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
+ "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
+ "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
+ "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
+};
+#endif
+
+#ifdef CONFIG_USE_GUEST_BASE
+#define TCG_GUEST_BASE_REG TCG_REG_R55
+#else
+#define TCG_GUEST_BASE_REG TCG_REG_R0
+#endif
+#ifndef GUEST_BASE
+#define GUEST_BASE 0
+#endif
+
+/* Branch registers */
+enum {
+ TCG_REG_B0 = 0,
+ TCG_REG_B1,
+ TCG_REG_B2,
+ TCG_REG_B3,
+ TCG_REG_B4,
+ TCG_REG_B5,
+ TCG_REG_B6,
+ TCG_REG_B7,
+};
+
+/* Floating point registers */
+enum {
+ TCG_REG_F0 = 0,
+ TCG_REG_F1,
+ TCG_REG_F2,
+ TCG_REG_F3,
+ TCG_REG_F4,
+ TCG_REG_F5,
+ TCG_REG_F6,
+ TCG_REG_F7,
+ TCG_REG_F8,
+ TCG_REG_F9,
+ TCG_REG_F10,
+ TCG_REG_F11,
+ TCG_REG_F12,
+ TCG_REG_F13,
+ TCG_REG_F14,
+ TCG_REG_F15,
+};
+
+/* Predicate registers */
+enum {
+ TCG_REG_P0 = 0,
+ TCG_REG_P1,
+ TCG_REG_P2,
+ TCG_REG_P3,
+ TCG_REG_P4,
+ TCG_REG_P5,
+ TCG_REG_P6,
+ TCG_REG_P7,
+ TCG_REG_P8,
+ TCG_REG_P9,
+ TCG_REG_P10,
+ TCG_REG_P11,
+ TCG_REG_P12,
+ TCG_REG_P13,
+ TCG_REG_P14,
+ TCG_REG_P15,
+};
+
+/* Application registers */
+enum {
+ TCG_REG_PFS = 64,
+};
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_R35,
+ TCG_REG_R36,
+ TCG_REG_R37,
+ TCG_REG_R38,
+ TCG_REG_R39,
+ TCG_REG_R40,
+ TCG_REG_R41,
+ TCG_REG_R42,
+ TCG_REG_R43,
+ TCG_REG_R44,
+ TCG_REG_R45,
+ TCG_REG_R46,
+ TCG_REG_R47,
+ TCG_REG_R48,
+ TCG_REG_R49,
+ TCG_REG_R50,
+ TCG_REG_R51,
+ TCG_REG_R52,
+ TCG_REG_R53,
+ TCG_REG_R54,
+ TCG_REG_R55,
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_R16,
+ TCG_REG_R17,
+ TCG_REG_R18,
+ TCG_REG_R19,
+ TCG_REG_R20,
+ TCG_REG_R21,
+ TCG_REG_R22,
+ TCG_REG_R23,
+ TCG_REG_R24,
+ TCG_REG_R25,
+ TCG_REG_R26,
+ TCG_REG_R27,
+ TCG_REG_R28,
+ TCG_REG_R29,
+ TCG_REG_R30,
+ TCG_REG_R31,
+ TCG_REG_R56,
+ TCG_REG_R57,
+ TCG_REG_R58,
+ TCG_REG_R59,
+ TCG_REG_R60,
+ TCG_REG_R61,
+ TCG_REG_R62,
+ TCG_REG_R63,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11
+};
+
+static const int tcg_target_call_iarg_regs[8] = {
+ TCG_REG_R56,
+ TCG_REG_R57,
+ TCG_REG_R58,
+ TCG_REG_R59,
+ TCG_REG_R60,
+ TCG_REG_R61,
+ TCG_REG_R62,
+ TCG_REG_R63,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_R8
+};
+
+/*
+ * opcode formation
+ */
+
+/* bundle templates: stops (double bar in the IA64 manual) are marked with
+ an uppercase letter. */
+enum {
+ mii = 0x00,
+ miI = 0x01,
+ mIi = 0x02,
+ mII = 0x03,
+ mlx = 0x04,
+ mLX = 0x05,
+ mmi = 0x08,
+ mmI = 0x09,
+ Mmi = 0x0a,
+ MmI = 0x0b,
+ mfi = 0x0c,
+ mfI = 0x0d,
+ mmf = 0x0e,
+ mmF = 0x0f,
+ mib = 0x10,
+ miB = 0x11,
+ mbb = 0x12,
+ mbB = 0x13,
+ bbb = 0x16,
+ bbB = 0x17,
+ mmb = 0x18,
+ mmB = 0x19,
+ mfb = 0x1c,
+ mfB = 0x1d,
+};
+
+enum {
+ OPC_ADD_A1 = 0x10000000000ull,
+ OPC_AND_A1 = 0x10060000000ull,
+ OPC_AND_A3 = 0x10160000000ull,
+ OPC_ANDCM_A1 = 0x10068000000ull,
+ OPC_ANDCM_A3 = 0x10168000000ull,
+ OPC_ADDS_A4 = 0x10800000000ull,
+ OPC_ADDL_A5 = 0x12000000000ull,
+ OPC_ALLOC_M34 = 0x02c00000000ull,
+ OPC_BR_DPTK_FEW_B1 = 0x08400000000ull,
+ OPC_BR_SPTK_MANY_B1 = 0x08000001000ull,
+ OPC_BR_CALL_SPNT_FEW_B3 = 0x0a200000000ull,
+ OPC_BR_SPTK_MANY_B4 = 0x00100001000ull,
+ OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull,
+ OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull,
+ OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull,
+ OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull,
+ OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull,
+ OPC_CMP_LT_A6 = 0x18000000000ull,
+ OPC_CMP_LTU_A6 = 0x1a000000000ull,
+ OPC_CMP_EQ_A6 = 0x1c000000000ull,
+ OPC_CMP4_LT_A6 = 0x18400000000ull,
+ OPC_CMP4_LTU_A6 = 0x1a400000000ull,
+ OPC_CMP4_EQ_A6 = 0x1c400000000ull,
+ OPC_DEP_I14 = 0x0ae00000000ull,
+ OPC_DEP_I15 = 0x08000000000ull,
+ OPC_DEP_Z_I12 = 0x0a600000000ull,
+ OPC_EXTR_I11 = 0x0a400002000ull,
+ OPC_EXTR_U_I11 = 0x0a400000000ull,
+ OPC_FCVT_FX_TRUNC_S1_F10 = 0x004d0000000ull,
+ OPC_FCVT_FXU_TRUNC_S1_F10 = 0x004d8000000ull,
+ OPC_FCVT_XF_F11 = 0x000e0000000ull,
+ OPC_FMA_S1_F1 = 0x10400000000ull,
+ OPC_FNMA_S1_F1 = 0x18400000000ull,
+ OPC_FRCPA_S1_F6 = 0x00600000000ull,
+ OPC_GETF_SIG_M19 = 0x08708000000ull,
+ OPC_LD1_M1 = 0x08000000000ull,
+ OPC_LD1_M3 = 0x0a000000000ull,
+ OPC_LD2_M1 = 0x08040000000ull,
+ OPC_LD2_M3 = 0x0a040000000ull,
+ OPC_LD4_M1 = 0x08080000000ull,
+ OPC_LD4_M3 = 0x0a080000000ull,
+ OPC_LD8_M1 = 0x080c0000000ull,
+ OPC_LD8_M3 = 0x0a0c0000000ull,
+ OPC_MUX1_I3 = 0x0eca0000000ull,
+ OPC_NOP_B9 = 0x04008000000ull,
+ OPC_NOP_F16 = 0x00008000000ull,
+ OPC_NOP_I18 = 0x00008000000ull,
+ OPC_NOP_M48 = 0x00008000000ull,
+ OPC_MOV_I21 = 0x00e00100000ull,
+ OPC_MOV_RET_I21 = 0x00e00500000ull,
+ OPC_MOV_I22 = 0x00188000000ull,
+ OPC_MOV_I_I26 = 0x00150000000ull,
+ OPC_MOVL_X2 = 0x0c000000000ull,
+ OPC_OR_A1 = 0x10070000000ull,
+ OPC_OR_A3 = 0x10170000000ull,
+ OPC_SETF_EXP_M18 = 0x0c748000000ull,
+ OPC_SETF_SIG_M18 = 0x0c708000000ull,
+ OPC_SHL_I7 = 0x0f240000000ull,
+ OPC_SHR_I5 = 0x0f220000000ull,
+ OPC_SHR_U_I5 = 0x0f200000000ull,
+ OPC_SHRP_I10 = 0x0ac00000000ull,
+ OPC_SXT1_I29 = 0x000a0000000ull,
+ OPC_SXT2_I29 = 0x000a8000000ull,
+ OPC_SXT4_I29 = 0x000b0000000ull,
+ OPC_ST1_M4 = 0x08c00000000ull,
+ OPC_ST2_M4 = 0x08c40000000ull,
+ OPC_ST4_M4 = 0x08c80000000ull,
+ OPC_ST8_M4 = 0x08cc0000000ull,
+ OPC_SUB_A1 = 0x10028000000ull,
+ OPC_SUB_A3 = 0x10128000000ull,
+ OPC_UNPACK4_L_I2 = 0x0f860000000ull,
+ OPC_XMA_L_F2 = 0x1d000000000ull,
+ OPC_XOR_A1 = 0x10078000000ull,
+ OPC_XOR_A3 = 0x10178000000ull,
+ OPC_ZXT1_I29 = 0x00080000000ull,
+ OPC_ZXT2_I29 = 0x00088000000ull,
+ OPC_ZXT4_I29 = 0x00090000000ull,
+
+ INSN_NOP_M = OPC_NOP_M48, /* nop.m 0 */
+ INSN_NOP_I = OPC_NOP_I18, /* nop.i 0 */
+};
+
+static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1,
+ int r2, int r3)
+{
+ return opc
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_a3(int qp, uint64_t opc, int r1,
+ uint64_t imm, int r3)
+{
+ return opc
+ | ((imm & 0x80) << 29) /* s */
+ | ((imm & 0x7f) << 13) /* imm7b */
+ | ((r3 & 0x7f) << 20)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_a4(int qp, uint64_t opc, int r1,
+ uint64_t imm, int r3)
+{
+ return opc
+ | ((imm & 0x2000) << 23) /* s */
+ | ((imm & 0x1f80) << 20) /* imm6d */
+ | ((imm & 0x007f) << 13) /* imm7b */
+ | ((r3 & 0x7f) << 20)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_a5(int qp, uint64_t opc, int r1,
+ uint64_t imm, int r3)
+{
+ return opc
+ | ((imm & 0x200000) << 15) /* s */
+ | ((imm & 0x1f0000) << 6) /* imm5c */
+ | ((imm & 0x00ff80) << 20) /* imm9d */
+ | ((imm & 0x00007f) << 13) /* imm7b */
+ | ((r3 & 0x03) << 20)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_a6(int qp, uint64_t opc, int p1,
+ int p2, int r2, int r3)
+{
+ return opc
+ | ((p2 & 0x3f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((p1 & 0x3f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x100000) << 16) /* s */
+ | ((imm & 0x0fffff) << 13) /* imm20b */
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x100000) << 16) /* s */
+ | ((imm & 0x0fffff) << 13) /* imm20b */
+ | ((b1 & 0x7) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2)
+{
+ return opc
+ | ((b2 & 0x7) << 13)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_b5(int qp, uint64_t opc, int b1, int b2)
+{
+ return opc
+ | ((b2 & 0x7) << 13)
+ | ((b1 & 0x7) << 6)
+ | (qp & 0x3f);
+}
+
+
+static inline uint64_t tcg_opc_b9(int qp, uint64_t opc, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x100000) << 16) /* i */
+ | ((imm & 0x0fffff) << 6) /* imm20a */
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_f1(int qp, uint64_t opc, int f1,
+ int f3, int f4, int f2)
+{
+ return opc
+ | ((f4 & 0x7f) << 27)
+ | ((f3 & 0x7f) << 20)
+ | ((f2 & 0x7f) << 13)
+ | ((f1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_f2(int qp, uint64_t opc, int f1,
+ int f3, int f4, int f2)
+{
+ return opc
+ | ((f4 & 0x7f) << 27)
+ | ((f3 & 0x7f) << 20)
+ | ((f2 & 0x7f) << 13)
+ | ((f1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_f6(int qp, uint64_t opc, int f1,
+ int p2, int f2, int f3)
+{
+ return opc
+ | ((p2 & 0x3f) << 27)
+ | ((f3 & 0x7f) << 20)
+ | ((f2 & 0x7f) << 13)
+ | ((f1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_f10(int qp, uint64_t opc, int f1, int f2)
+{
+ return opc
+ | ((f2 & 0x7f) << 13)
+ | ((f1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_f11(int qp, uint64_t opc, int f1, int f2)
+{
+ return opc
+ | ((f2 & 0x7f) << 13)
+ | ((f1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_f16(int qp, uint64_t opc, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x100000) << 16) /* i */
+ | ((imm & 0x0fffff) << 6) /* imm20a */
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i2(int qp, uint64_t opc, int r1,
+ int r2, int r3)
+{
+ return opc
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i3(int qp, uint64_t opc, int r1,
+ int r2, int mbtype)
+{
+ return opc
+ | ((mbtype & 0x0f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i5(int qp, uint64_t opc, int r1,
+ int r3, int r2)
+{
+ return opc
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i7(int qp, uint64_t opc, int r1,
+ int r2, int r3)
+{
+ return opc
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i10(int qp, uint64_t opc, int r1,
+ int r2, int r3, uint64_t count)
+{
+ return opc
+ | ((count & 0x3f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i11(int qp, uint64_t opc, int r1,
+ int r3, uint64_t pos, uint64_t len)
+{
+ return opc
+ | ((len & 0x3f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((pos & 0x3f) << 14)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1,
+ int r2, uint64_t pos, uint64_t len)
+{
+ return opc
+ | ((len & 0x3f) << 27)
+ | ((pos & 0x3f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm,
+ int r3, uint64_t pos, uint64_t len)
+{
+ return opc
+ | ((imm & 0x01) << 36)
+ | ((len & 0x3f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((pos & 0x3f) << 14)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2,
+ int r3, uint64_t pos, uint64_t len)
+{
+ return opc
+ | ((pos & 0x3f) << 31)
+ | ((len & 0x0f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x100000) << 16) /* i */
+ | ((imm & 0x0fffff) << 6) /* imm20a */
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i21(int qp, uint64_t opc, int b1,
+ int r2, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x1ff) << 24)
+ | ((r2 & 0x7f) << 13)
+ | ((b1 & 0x7) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i22(int qp, uint64_t opc, int r1, int b2)
+{
+ return opc
+ | ((b2 & 0x7) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i26(int qp, uint64_t opc, int ar3, int r2)
+{
+ return opc
+ | ((ar3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i29(int qp, uint64_t opc, int r1, int r3)
+{
+ return opc
+ | ((r3 & 0x7f) << 20)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_l2(uint64_t imm)
+{
+ return (imm & 0x7fffffffffc00000ull) >> 22;
+}
+
+static inline uint64_t tcg_opc_l3(uint64_t imm)
+{
+ return (imm & 0x07fffffffff00000ull) >> 18;
+}
+
+#define tcg_opc_l4 tcg_opc_l3
+
+static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3)
+{
+ return opc
+ | ((r3 & 0x7f) << 20)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_m3(int qp, uint64_t opc, int r1,
+ int r3, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x100) << 28) /* s */
+ | ((imm & 0x080) << 20) /* i */
+ | ((imm & 0x07f) << 13) /* imm7b */
+ | ((r3 & 0x7f) << 20)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_m4(int qp, uint64_t opc, int r2, int r3)
+{
+ return opc
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_m18(int qp, uint64_t opc, int f1, int r2)
+{
+ return opc
+ | ((r2 & 0x7f) << 13)
+ | ((f1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_m19(int qp, uint64_t opc, int r1, int f2)
+{
+ return opc
+ | ((f2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_m34(int qp, uint64_t opc, int r1,
+ int sof, int sol, int sor)
+{
+ return opc
+ | ((sor & 0x0f) << 27)
+ | ((sol & 0x7f) << 20)
+ | ((sof & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_m48(int qp, uint64_t opc, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x100000) << 16) /* i */
+ | ((imm & 0x0fffff) << 6) /* imm20a */
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_x2(int qp, uint64_t opc,
+ int r1, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x8000000000000000ull) >> 27) /* i */
+ | (imm & 0x0000000000200000ull) /* ic */
+ | ((imm & 0x00000000001f0000ull) << 6) /* imm5c */
+ | ((imm & 0x000000000000ff80ull) << 20) /* imm9d */
+ | ((imm & 0x000000000000007full) << 13) /* imm7b */
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x0800000000000000ull) >> 23) /* i */
+ | ((imm & 0x00000000000fffffull) << 13) /* imm20b */
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm)
+{
+ return opc
+ | ((imm & 0x0800000000000000ull) >> 23) /* i */
+ | ((imm & 0x00000000000fffffull) << 13) /* imm20b */
+ | ((b1 & 0x7) << 6)
+ | (qp & 0x3f);
+}
+
+
+/*
+ * Relocations - Note that we never encode branches elsewhere than slot 2.
+ */
+
+static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+ uint64_t imm = target - pc;
+
+ pc->hi = (pc->hi & 0xf700000fffffffffull)
+ | ((imm & 0x100000) << 39) /* s */
+ | ((imm & 0x0fffff) << 36); /* imm20b */
+}
+
+static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc)
+{
+ int64_t high = pc->hi;
+
+ return ((high >> 39) & 0x100000) + /* s */
+ ((high >> 36) & 0x0fffff); /* imm20b */
+}
+
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ assert(addend == 0);
+ assert(type == R_IA64_PCREL21B);
+ reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value);
+}
+
+/*
+ * Constraints
+ */
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str;
+
+ ct_str = *pct_str;
+ switch(ct_str[0]) {
+ case 'r':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set(ct->u.regs, 0xffffffffffffffffull);
+ break;
+ case 'I':
+ ct->ct |= TCG_CT_CONST_S22;
+ break;
+ case 'S':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set(ct->u.regs, 0xffffffffffffffffull);
+#if defined(CONFIG_SOFTMMU)
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58);
+#endif
+ break;
+ case 'Z':
+ /* We are cheating a bit here, using the fact that the register
+ r0 is also the register number 0. Hence there is no need
+ to check for const_args in each instruction. */
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct;
+ ct = arg_ct->ct;
+ if (ct & TCG_CT_CONST)
+ return 1;
+ else if ((ct & TCG_CT_CONST_ZERO) && val == 0)
+ return 1;
+ else if ((ct & TCG_CT_CONST_S22) && val == ((int32_t)val << 10) >> 10)
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Code generation
+ */
+
+static tcg_insn_unit *tb_ret_addr;
+
+static inline void tcg_out_bundle(TCGContext *s, int template,
+ uint64_t slot0, uint64_t slot1,
+ uint64_t slot2)
+{
+ template &= 0x1f; /* 5 bits */
+ slot0 &= 0x1ffffffffffull; /* 41 bits */
+ slot1 &= 0x1ffffffffffull; /* 41 bits */
+ slot2 &= 0x1ffffffffffull; /* 41 bits */
+
+ *s->code_ptr++ = (tcg_insn_unit){
+ (slot1 << 46) | (slot0 << 5) | template,
+ (slot2 << 23) | (slot1 >> 18)
+ };
+}
+
+static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src)
+{
+ return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src);
+}
+
+static inline void tcg_out_mov(TCGContext *s, TCGType type,
+ TCGReg ret, TCGReg arg)
+{
+ tcg_out_bundle(s, mmI,
+ INSN_NOP_M,
+ INSN_NOP_M,
+ tcg_opc_mov_a(TCG_REG_P0, ret, arg));
+}
+
+static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src)
+{
+ assert(src == sextract64(src, 0, 22));
+ return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0);
+}
+
+static inline void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg reg, tcg_target_long arg)
+{
+ tcg_out_bundle(s, mLX,
+ INSN_NOP_M,
+ tcg_opc_l2 (arg),
+ tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg));
+}
+
+static void tcg_out_br(TCGContext *s, TCGLabel *l)
+{
+ uint64_t imm;
+
+ /* We pay attention here to not modify the branch target by reading
+ the existing value and using it again. This ensure that caches and
+ memory are kept coherent during retranslation. */
+ if (l->has_value) {
+ imm = l->u.value_ptr - s->code_ptr;
+ } else {
+ imm = get_reloc_pcrel21b_slot2(s->code_ptr);
+ tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0);
+ }
+
+ tcg_out_bundle(s, mmB,
+ INSN_NOP_M,
+ INSN_NOP_M,
+ tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm));
+}
+
+static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc)
+{
+ uintptr_t func = desc->lo, gp = desc->hi, disp;
+
+ /* Look through the function descriptor. */
+ tcg_out_bundle(s, mlx,
+ INSN_NOP_M,
+ tcg_opc_l2 (gp),
+ tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp));
+ disp = (tcg_insn_unit *)func - s->code_ptr;
+ tcg_out_bundle(s, mLX,
+ INSN_NOP_M,
+ tcg_opc_l4 (disp),
+ tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4,
+ TCG_REG_B0, disp));
+}
+
+static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg)
+{
+ uint64_t imm, opc1;
+
+ /* At least arg == 0 is a common operation. */
+ if (arg == sextract64(arg, 0, 22)) {
+ opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg);
+ opc1 = INSN_NOP_M;
+ }
+
+ imm = tb_ret_addr - s->code_ptr;
+
+ tcg_out_bundle(s, mLX,
+ opc1,
+ tcg_opc_l3 (imm),
+ tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm));
+}
+
+static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
+{
+ if (s->tb_jmp_offset) {
+ /* direct jump method */
+ tcg_abort();
+ } else {
+ /* indirect jump method */
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2,
+ (tcg_target_long)(s->tb_next + arg));
+ tcg_out_bundle(s, MmI,
+ tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1,
+ TCG_REG_R2, TCG_REG_R2),
+ INSN_NOP_M,
+ tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6,
+ TCG_REG_R2, 0));
+ tcg_out_bundle(s, mmB,
+ INSN_NOP_M,
+ INSN_NOP_M,
+ tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4,
+ TCG_REG_B6));
+ }
+ s->tb_next_offset[arg] = tcg_current_code_size(s);
+}
+
+static inline void tcg_out_jmp(TCGContext *s, TCGArg addr)
+{
+ tcg_out_bundle(s, mmI,
+ INSN_NOP_M,
+ INSN_NOP_M,
+ tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0));
+ tcg_out_bundle(s, mmB,
+ INSN_NOP_M,
+ INSN_NOP_M,
+ tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6));
+}
+
+static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg,
+ TCGArg arg1, tcg_target_long arg2)
+{
+ if (arg2 == ((int16_t)arg2 >> 2) << 2) {
+ tcg_out_bundle(s, MmI,
+ tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4,
+ TCG_REG_R2, arg2, arg1),
+ tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
+ INSN_NOP_I);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2);
+ tcg_out_bundle(s, MmI,
+ tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
+ TCG_REG_R2, TCG_REG_R2, arg1),
+ tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
+ INSN_NOP_I);
+ }
+}
+
+static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg,
+ TCGArg arg1, tcg_target_long arg2)
+{
+ if (arg2 == ((int16_t)arg2 >> 2) << 2) {
+ tcg_out_bundle(s, MmI,
+ tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4,
+ TCG_REG_R2, arg2, arg1),
+ tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
+ INSN_NOP_I);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2);
+ tcg_out_bundle(s, MmI,
+ tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
+ TCG_REG_R2, TCG_REG_R2, arg1),
+ tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
+ INSN_NOP_I);
+ }
+}
+
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ if (type == TCG_TYPE_I32) {
+ tcg_out_ld_rel(s, OPC_LD4_M1, arg, arg1, arg2);
+ } else {
+ tcg_out_ld_rel(s, OPC_LD8_M1, arg, arg1, arg2);
+ }
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ if (type == TCG_TYPE_I32) {
+ tcg_out_st_rel(s, OPC_ST4_M4, arg, arg1, arg2);
+ } else {
+ tcg_out_st_rel(s, OPC_ST8_M4, arg, arg1, arg2);
+ }
+}
+
+static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3,
+ TCGReg ret, TCGArg arg1, int const_arg1,
+ TCGArg arg2, int const_arg2)
+{
+ uint64_t opc1 = 0, opc2 = 0, opc3 = 0;
+
+ if (const_arg2 && arg2 != 0) {
+ opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2);
+ arg2 = TCG_REG_R3;
+ }
+ if (const_arg1 && arg1 != 0) {
+ if (opc_a3 && arg1 == (int8_t)arg1) {
+ opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2);
+ } else {
+ opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1);
+ arg1 = TCG_REG_R2;
+ }
+ }
+ if (opc3 == 0) {
+ opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2);
+ }
+
+ tcg_out_bundle(s, (opc1 || opc2 ? mII : miI),
+ opc1 ? opc1 : INSN_NOP_M,
+ opc2 ? opc2 : INSN_NOP_I,
+ opc3);
+}
+
+static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) {
+ tcg_out_bundle(s, mmI,
+ INSN_NOP_M,
+ INSN_NOP_M,
+ tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1));
+ } else {
+ tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2);
+ }
+}
+
+static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1,
+ int const_arg1, TCGArg arg2, int const_arg2)
+{
+ if (!const_arg1 && const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) {
+ tcg_out_bundle(s, mmI,
+ INSN_NOP_M,
+ INSN_NOP_M,
+ tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1));
+ } else {
+ tcg_out_alu(s, OPC_SUB_A1, OPC_SUB_A3, ret,
+ arg1, const_arg1, arg2, const_arg2);
+ }
+}
+
+static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
+ TCGArg arg1, int const_arg1,
+ TCGArg arg2, int const_arg2)
+{
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2),
+ tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
+}
+
+static inline void tcg_out_nand(TCGContext *s, TCGArg ret,
+ TCGArg arg1, int const_arg1,
+ TCGArg arg2, int const_arg2)
+{
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2),
+ tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
+}
+
+static inline void tcg_out_nor(TCGContext *s, TCGArg ret,
+ TCGArg arg1, int const_arg1,
+ TCGArg arg2, int const_arg2)
+{
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2),
+ tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
+}
+
+static inline void tcg_out_orc(TCGContext *s, TCGArg ret,
+ TCGArg arg1, int const_arg1,
+ TCGArg arg2, int const_arg2)
+{
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2),
+ tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2));
+}
+
+static inline void tcg_out_mul(TCGContext *s, TCGArg ret,
+ TCGArg arg1, TCGArg arg2)
+{
+ tcg_out_bundle(s, mmI,
+ tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1),
+ tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2),
+ INSN_NOP_I);
+ tcg_out_bundle(s, mmF,
+ INSN_NOP_M,
+ INSN_NOP_M,
+ tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6,
+ TCG_REG_F7, TCG_REG_F0));
+ tcg_out_bundle(s, miI,
+ tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6),
+ INSN_NOP_I,
+ INSN_NOP_I);
+}
+
+static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11,
+ ret, arg1, arg2, 31 - arg2));
+ } else {
+ tcg_out_bundle(s, mII,
+ tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3,
+ TCG_REG_R3, 0x1f, arg2),
+ tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1),
+ tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret,
+ TCG_REG_R2, TCG_REG_R3));
+ }
+}
+
+static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11,
+ ret, arg1, arg2, 63 - arg2));
+ } else {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2));
+ }
+}
+
+static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret,
+ arg1, 63 - arg2, 31 - arg2));
+ } else {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2,
+ 0x1f, arg2),
+ tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret,
+ arg1, TCG_REG_R2));
+ }
+}
+
+static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret,
+ arg1, 63 - arg2, 63 - arg2));
+ } else {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret,
+ arg1, arg2));
+ }
+}
+
+static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
+ arg1, arg2, 31 - arg2));
+ } else {
+ tcg_out_bundle(s, mII,
+ tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3,
+ 0x1f, arg2),
+ tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1),
+ tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
+ TCG_REG_R2, TCG_REG_R3));
+ }
+}
+
+static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
+ arg1, arg2, 63 - arg2));
+ } else {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
+ arg1, arg2));
+ }
+}
+
+static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
+ TCG_REG_R2, arg1, arg1),
+ tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
+ TCG_REG_R2, 32 - arg2, 31));
+ } else {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
+ TCG_REG_R2, arg1, arg1),
+ tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3,
+ 0x1f, arg2));
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3,
+ 0x20, TCG_REG_R3),
+ tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
+ TCG_REG_R2, TCG_REG_R3));
+ }
+}
+
+static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1,
+ arg1, 0x40 - arg2));
+ } else {
+ tcg_out_bundle(s, mII,
+ tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2,
+ 0x40, arg2),
+ tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R3,
+ arg1, arg2),
+ tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2,
+ arg1, TCG_REG_R2));
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret,
+ TCG_REG_R2, TCG_REG_R3));
+ }
+}
+
+static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
+ TCG_REG_R2, arg1, arg1),
+ tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
+ TCG_REG_R2, arg2, 31));
+ } else {
+ tcg_out_bundle(s, mII,
+ tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3,
+ 0x1f, arg2),
+ tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
+ TCG_REG_R2, arg1, arg1),
+ tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
+ TCG_REG_R2, TCG_REG_R3));
+ }
+}
+
+static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
+{
+ if (const_arg2) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1,
+ arg1, arg2));
+ } else {
+ tcg_out_bundle(s, mII,
+ tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2,
+ 0x40, arg2),
+ tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R3,
+ arg1, arg2),
+ tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2,
+ arg1, TCG_REG_R2));
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret,
+ TCG_REG_R2, TCG_REG_R3));
+ }
+}
+
+static const uint64_t opc_ext_i29[8] = {
+ OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
+ OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
+};
+
+static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s)
+{
+ if ((opc & MO_SIZE) == MO_64) {
+ return tcg_opc_mov_a(qp, d, s);
+ } else {
+ return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s);
+ }
+}
+
+static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
+ TCGArg ret, TCGArg arg)
+{
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg));
+}
+
+static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s)
+{
+ return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb);
+}
+
+static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
+{
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15),
+ tcg_opc_bswap64_i(TCG_REG_P0, ret, ret));
+}
+
+static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
+{
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31),
+ tcg_opc_bswap64_i(TCG_REG_P0, ret, ret));
+}
+
+static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
+{
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_bswap64_i(TCG_REG_P0, ret, arg));
+}
+
+static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
+ TCGArg a2, int const_a2, int pos, int len)
+{
+ uint64_t i1 = 0, i2 = 0;
+ int cpos = 63 - pos, lm1 = len - 1;
+
+ if (const_a2) {
+ /* Truncate the value of a constant a2 to the width of the field. */
+ int mask = (1u << len) - 1;
+ a2 &= mask;
+
+ if (a2 == 0 || a2 == mask) {
+ /* 1-bit signed constant inserted into register. */
+ i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1);
+ } else {
+ /* Otherwise, load any constant into a temporary. Do this into
+ the first I slot to help out with cross-unit delays. */
+ i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2);
+ a2 = TCG_REG_R2;
+ }
+ }
+ if (i2 == 0) {
+ i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1);
+ }
+ tcg_out_bundle(s, (i1 ? mII : miI),
+ INSN_NOP_M,
+ i1 ? i1 : INSN_NOP_I,
+ i2);
+}
+
+static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
+ TCGArg arg2, int cmp4)
+{
+ uint64_t opc_eq_a6, opc_lt_a6, opc_ltu_a6;
+
+ if (cmp4) {
+ opc_eq_a6 = OPC_CMP4_EQ_A6;
+ opc_lt_a6 = OPC_CMP4_LT_A6;
+ opc_ltu_a6 = OPC_CMP4_LTU_A6;
+ } else {
+ opc_eq_a6 = OPC_CMP_EQ_A6;
+ opc_lt_a6 = OPC_CMP_LT_A6;
+ opc_ltu_a6 = OPC_CMP_LTU_A6;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2);
+ case TCG_COND_NE:
+ return tcg_opc_a6 (qp, opc_eq_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2);
+ case TCG_COND_LT:
+ return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2);
+ case TCG_COND_LTU:
+ return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2);
+ case TCG_COND_GE:
+ return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2);
+ case TCG_COND_GEU:
+ return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2);
+ case TCG_COND_LE:
+ return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1);
+ case TCG_COND_LEU:
+ return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1);
+ case TCG_COND_GT:
+ return tcg_opc_a6 (qp, opc_lt_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1);
+ case TCG_COND_GTU:
+ return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1);
+ default:
+ tcg_abort();
+ break;
+ }
+}
+
+static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+ TCGReg arg2, TCGLabel *l, int cmp4)
+{
+ uint64_t imm;
+
+ /* We pay attention here to not modify the branch target by reading
+ the existing value and using it again. This ensure that caches and
+ memory are kept coherent during retranslation. */
+ if (l->has_value) {
+ imm = l->u.value_ptr - s->code_ptr;
+ } else {
+ imm = get_reloc_pcrel21b_slot2(s->code_ptr);
+ tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0);
+ }
+
+ tcg_out_bundle(s, miB,
+ INSN_NOP_M,
+ tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
+ tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm));
+}
+
+static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
+ TCGArg arg1, TCGArg arg2, int cmp4)
+{
+ tcg_out_bundle(s, MmI,
+ tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
+ tcg_opc_movi_a(TCG_REG_P6, ret, 1),
+ tcg_opc_movi_a(TCG_REG_P7, ret, 0));
+}
+
+static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
+ TCGArg c1, TCGArg c2,
+ TCGArg v1, int const_v1,
+ TCGArg v2, int const_v2, int cmp4)
+{
+ uint64_t opc1, opc2;
+
+ if (const_v1) {
+ opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1);
+ } else if (ret == v1) {
+ opc1 = INSN_NOP_M;
+ } else {
+ opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1);
+ }
+ if (const_v2) {
+ opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2);
+ } else if (ret == v2) {
+ opc2 = INSN_NOP_I;
+ } else {
+ opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2);
+ }
+
+ tcg_out_bundle(s, MmI,
+ tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4),
+ opc1,
+ opc2);
+}
+
+#if defined(CONFIG_SOFTMMU)
+/* We're expecting to use an signed 22-bit immediate add. */
+QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
+ > 0x1fffff)
+
+/* Load and compare a TLB entry, and return the result in (p6, p7).
+ R2 is loaded with the addend TLB entry.
+ R57 is loaded with the address, zero extented on 32-bit targets.
+ R1, R3 are clobbered, leaving R56 free for...
+ BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */
+static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
+ TCGMemOp s_bits, int off_rw, int off_add,
+ uint64_t bswap1, uint64_t bswap2)
+{
+ /*
+ .mii
+ mov r2 = off_rw
+ extr.u r3 = addr_reg, ... # extract tlb page
+ zxt4 r57 = addr_reg # or mov for 64-bit guest
+ ;;
+ .mii
+ addl r2 = r2, areg0
+ shl r3 = r3, cteb # via dep.z
+ dep r1 = 0, r57, ... # zero page ofs, keep align
+ ;;
+ .mmi
+ add r2 = r2, r3
+ ;;
+ ld4 r3 = [r2], off_add-off_rw # or ld8 for 64-bit guest
+ nop
+ ;;
+ .mmi
+ nop
+ cmp.eq p6, p7 = r3, r58
+ nop
+ ;;
+ */
+ tcg_out_bundle(s, miI,
+ tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw),
+ tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3,
+ addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
+ tcg_opc_ext_i(TCG_REG_P0,
+ TARGET_LONG_BITS == 32 ? MO_UL : MO_Q,
+ TCG_REG_R57, addr_reg));
+ tcg_out_bundle(s, miI,
+ tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
+ TCG_REG_R2, TCG_AREG0),
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3,
+ TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS,
+ 63 - CPU_TLB_ENTRY_BITS),
+ tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0,
+ TCG_REG_R57, 63 - s_bits,
+ TARGET_PAGE_BITS - s_bits - 1));
+ tcg_out_bundle(s, MmI,
+ tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
+ TCG_REG_R2, TCG_REG_R2, TCG_REG_R3),
+ tcg_opc_m3 (TCG_REG_P0,
+ (TARGET_LONG_BITS == 32
+ ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3,
+ TCG_REG_R2, off_add - off_rw),
+ bswap1);
+ tcg_out_bundle(s, mmI,
+ tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2),
+ tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
+ TCG_REG_P7, TCG_REG_R1, TCG_REG_R3),
+ bswap2);
+}
+
+typedef struct TCGLabelQemuLdst {
+ bool is_ld;
+ TCGMemOp size;
+ tcg_insn_unit *label_ptr; /* label pointers to be updated */
+ struct TCGLabelQemuLdst *next;
+} TCGLabelQemuLdst;
+
+typedef struct TCGBackendData {
+ TCGLabelQemuLdst *labels;
+} TCGBackendData;
+
+static inline void tcg_out_tb_init(TCGContext *s)
+{
+ s->be->labels = NULL;
+}
+
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
+ tcg_insn_unit *label_ptr)
+{
+ TCGBackendData *be = s->be;
+ TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
+
+ l->is_ld = is_ld;
+ l->size = opc & MO_SIZE;
+ l->label_ptr = label_ptr;
+ l->next = be->labels;
+ be->labels = l;
+}
+
+static void tcg_out_tb_finalize(TCGContext *s)
+{
+ static const void * const helpers[8] = {
+ helper_ret_stb_mmu,
+ helper_le_stw_mmu,
+ helper_le_stl_mmu,
+ helper_le_stq_mmu,
+ helper_ret_ldub_mmu,
+ helper_le_lduw_mmu,
+ helper_le_ldul_mmu,
+ helper_le_ldq_mmu,
+ };
+ tcg_insn_unit *thunks[8] = { };
+ TCGLabelQemuLdst *l;
+
+ for (l = s->be->labels; l != NULL; l = l->next) {
+ long x = l->is_ld * 4 + l->size;
+ tcg_insn_unit *dest = thunks[x];
+
+ /* The out-of-line thunks are all the same; load the return address
+ from B0, load the GP, and branch to the code. Note that we are
+ always post-call, so the register window has rolled, so we're
+ using incomming parameter register numbers, not outgoing. */
+ if (dest == NULL) {
+ uintptr_t *desc = (uintptr_t *)helpers[x];
+ uintptr_t func = desc[0], gp = desc[1], disp;
+
+ thunks[x] = dest = s->code_ptr;
+
+ tcg_out_bundle(s, mlx,
+ INSN_NOP_M,
+ tcg_opc_l2 (gp),
+ tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
+ TCG_REG_R1, gp));
+ tcg_out_bundle(s, mii,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
+ l->is_ld ? TCG_REG_R35 : TCG_REG_R36,
+ TCG_REG_B0));
+ disp = (tcg_insn_unit *)func - s->code_ptr;
+ tcg_out_bundle(s, mLX,
+ INSN_NOP_M,
+ tcg_opc_l3 (disp),
+ tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp));
+ }
+
+ reloc_pcrel21b_slot2(l->label_ptr, dest);
+ }
+}
+
+static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
+{
+ static const uint64_t opc_ld_m1[4] = {
+ OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
+ };
+ int addr_reg, data_reg, mem_index;
+ TCGMemOpIdx oi;
+ TCGMemOp opc, s_bits;
+ uint64_t fin1, fin2;
+ tcg_insn_unit *label_ptr;
+
+ data_reg = args[0];
+ addr_reg = args[1];
+ oi = args[2];
+ opc = get_memop(oi);
+ mem_index = get_mmuidx(oi);
+ s_bits = opc & MO_SIZE;
+
+ /* Read the TLB entry */
+ tcg_out_qemu_tlb(s, addr_reg, s_bits,
+ offsetof(CPUArchState, tlb_table[mem_index][0].addr_read),
+ offsetof(CPUArchState, tlb_table[mem_index][0].addend),
+ INSN_NOP_I, INSN_NOP_I);
+
+ /* P6 is the fast path, and P7 the slow path */
+
+ fin2 = 0;
+ if (opc & MO_BSWAP) {
+ fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8);
+ if (s_bits < MO_64) {
+ int shift = 64 - (8 << s_bits);
+ fin2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11);
+ fin2 = tcg_opc_i11(TCG_REG_P0, fin2,
+ data_reg, data_reg, shift, 63 - shift);
+ }
+ } else {
+ fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8);
+ }
+
+ tcg_out_bundle(s, mmI,
+ tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
+ tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
+ TCG_REG_R2, TCG_REG_R57),
+ tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, oi));
+ label_ptr = s->code_ptr;
+ tcg_out_bundle(s, miB,
+ tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
+ TCG_REG_R8, TCG_REG_R2),
+ INSN_NOP_I,
+ tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+ get_reloc_pcrel21b_slot2(label_ptr)));
+
+ add_qemu_ldst_label(s, 1, opc, label_ptr);
+
+ /* Note that we always use LE helper functions, so the bswap insns
+ here for the fast path also apply to the slow path. */
+ tcg_out_bundle(s, (fin2 ? mII : miI),
+ INSN_NOP_M,
+ fin1,
+ fin2 ? fin2 : INSN_NOP_I);
+}
+
+static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
+{
+ static const uint64_t opc_st_m4[4] = {
+ OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
+ };
+ TCGReg addr_reg, data_reg;
+ int mem_index;
+ uint64_t pre1, pre2;
+ TCGMemOpIdx oi;
+ TCGMemOp opc, s_bits;
+ tcg_insn_unit *label_ptr;
+
+ data_reg = args[0];
+ addr_reg = args[1];
+ oi = args[2];
+ opc = get_memop(oi);
+ mem_index = get_mmuidx(oi);
+ s_bits = opc & MO_SIZE;
+
+ /* Note that we always use LE helper functions, so the bswap insns
+ that are here for the fast path also apply to the slow path,
+ and move the data into the argument register. */
+ pre2 = INSN_NOP_I;
+ if (opc & MO_BSWAP) {
+ pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg);
+ if (s_bits < MO_64) {
+ int shift = 64 - (8 << s_bits);
+ pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
+ TCG_REG_R58, TCG_REG_R58, shift, 63 - shift);
+ }
+ } else {
+ /* Just move the data into place for the slow path. */
+ pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg);
+ }
+
+ tcg_out_qemu_tlb(s, addr_reg, s_bits,
+ offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
+ offsetof(CPUArchState, tlb_table[mem_index][0].addend),
+ pre1, pre2);
+
+ /* P6 is the fast path, and P7 the slow path */
+ tcg_out_bundle(s, mmI,
+ tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
+ tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
+ TCG_REG_R2, TCG_REG_R57),
+ tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, oi));
+ label_ptr = s->code_ptr;
+ tcg_out_bundle(s, miB,
+ tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
+ TCG_REG_R58, TCG_REG_R2),
+ INSN_NOP_I,
+ tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+ get_reloc_pcrel21b_slot2(label_ptr)));
+
+ add_qemu_ldst_label(s, 0, opc, label_ptr);
+}
+
+#else /* !CONFIG_SOFTMMU */
+# include "tcg-be-null.h"
+
+static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
+{
+ static uint64_t const opc_ld_m1[4] = {
+ OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
+ };
+ int addr_reg, data_reg;
+ TCGMemOp opc, s_bits, bswap;
+
+ data_reg = args[0];
+ addr_reg = args[1];
+ opc = args[2];
+ s_bits = opc & MO_SIZE;
+ bswap = opc & MO_BSWAP;
+
+#if TARGET_LONG_BITS == 32
+ if (GUEST_BASE != 0) {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
+ TCG_REG_R3, addr_reg),
+ tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
+ TCG_GUEST_BASE_REG, TCG_REG_R3));
+ } else {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
+ TCG_REG_R2, addr_reg),
+ INSN_NOP_I);
+ }
+
+ if (!bswap) {
+ if (!(opc & MO_SIGN)) {
+ tcg_out_bundle(s, miI,
+ tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
+ data_reg, TCG_REG_R2),
+ INSN_NOP_I,
+ INSN_NOP_I);
+ } else {
+ tcg_out_bundle(s, mII,
+ tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
+ data_reg, TCG_REG_R2),
+ INSN_NOP_I,
+ tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
+ }
+ } else if (s_bits == MO_64) {
+ tcg_out_bundle(s, mII,
+ tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
+ data_reg, TCG_REG_R2),
+ INSN_NOP_I,
+ tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
+ } else {
+ if (s_bits == MO_16) {
+ tcg_out_bundle(s, mII,
+ tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
+ data_reg, TCG_REG_R2),
+ INSN_NOP_I,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
+ data_reg, data_reg, 15, 15));
+ } else {
+ tcg_out_bundle(s, mII,
+ tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
+ data_reg, TCG_REG_R2),
+ INSN_NOP_I,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
+ data_reg, data_reg, 31, 31));
+ }
+ if (!(opc & MO_SIGN)) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
+ } else {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg),
+ tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
+ }
+ }
+#else
+ if (GUEST_BASE != 0) {
+ tcg_out_bundle(s, MmI,
+ tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
+ TCG_GUEST_BASE_REG, addr_reg),
+ tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
+ data_reg, TCG_REG_R2),
+ INSN_NOP_I);
+ } else {
+ tcg_out_bundle(s, mmI,
+ INSN_NOP_M,
+ tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
+ data_reg, addr_reg),
+ INSN_NOP_I);
+ }
+
+ if (bswap && s_bits == MO_16) {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
+ data_reg, data_reg, 15, 15),
+ tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
+ } else if (bswap && s_bits == MO_32) {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
+ data_reg, data_reg, 31, 31),
+ tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
+ } else if (bswap && s_bits == MO_64) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
+ }
+ if (opc & MO_SIGN) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
+ }
+#endif
+}
+
+static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
+{
+ static uint64_t const opc_st_m4[4] = {
+ OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
+ };
+ int addr_reg, data_reg;
+#if TARGET_LONG_BITS == 64
+ uint64_t add_guest_base;
+#endif
+ TCGMemOp opc, s_bits, bswap;
+
+ data_reg = args[0];
+ addr_reg = args[1];
+ opc = args[2];
+ s_bits = opc & MO_SIZE;
+ bswap = opc & MO_BSWAP;
+
+#if TARGET_LONG_BITS == 32
+ if (GUEST_BASE != 0) {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
+ TCG_REG_R3, addr_reg),
+ tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
+ TCG_GUEST_BASE_REG, TCG_REG_R3));
+ } else {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
+ TCG_REG_R2, addr_reg),
+ INSN_NOP_I);
+ }
+
+ if (bswap) {
+ if (s_bits == MO_16) {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
+ TCG_REG_R3, data_reg, 15, 15),
+ tcg_opc_bswap64_i(TCG_REG_P0,
+ TCG_REG_R3, TCG_REG_R3));
+ data_reg = TCG_REG_R3;
+ } else if (s_bits == MO_32) {
+ tcg_out_bundle(s, mII,
+ INSN_NOP_M,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
+ TCG_REG_R3, data_reg, 31, 31),
+ tcg_opc_bswap64_i(TCG_REG_P0,
+ TCG_REG_R3, TCG_REG_R3));
+ data_reg = TCG_REG_R3;
+ } else if (s_bits == MO_64) {
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ INSN_NOP_I,
+ tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg));
+ data_reg = TCG_REG_R3;
+ }
+ }
+ tcg_out_bundle(s, mmI,
+ tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
+ data_reg, TCG_REG_R2),
+ INSN_NOP_M,
+ INSN_NOP_I);
+#else
+ if (GUEST_BASE != 0) {
+ add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
+ TCG_GUEST_BASE_REG, addr_reg);
+ addr_reg = TCG_REG_R2;
+ } else {
+ add_guest_base = INSN_NOP_M;
+ }
+
+ if (!bswap) {
+ tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI),
+ add_guest_base,
+ tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
+ data_reg, addr_reg),
+ INSN_NOP_I);
+ } else {
+ if (s_bits == MO_16) {
+ tcg_out_bundle(s, mII,
+ add_guest_base,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
+ TCG_REG_R3, data_reg, 15, 15),
+ tcg_opc_bswap64_i(TCG_REG_P0,
+ TCG_REG_R3, TCG_REG_R3));
+ data_reg = TCG_REG_R3;
+ } else if (s_bits == MO_32) {
+ tcg_out_bundle(s, mII,
+ add_guest_base,
+ tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
+ TCG_REG_R3, data_reg, 31, 31),
+ tcg_opc_bswap64_i(TCG_REG_P0,
+ TCG_REG_R3, TCG_REG_R3));
+ data_reg = TCG_REG_R3;
+ } else if (s_bits == MO_64) {
+ tcg_out_bundle(s, miI,
+ add_guest_base,
+ INSN_NOP_I,
+ tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg));
+ data_reg = TCG_REG_R3;
+ }
+ tcg_out_bundle(s, miI,
+ tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
+ data_reg, addr_reg),
+ INSN_NOP_I,
+ INSN_NOP_I);
+ }
+#endif
+}
+
+#endif
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg *args, const int *const_args)
+{
+ switch(opc) {
+ case INDEX_op_exit_tb:
+ tcg_out_exit_tb(s, args[0]);
+ break;
+ case INDEX_op_br:
+ tcg_out_br(s, arg_label(args[0]));
+ break;
+ case INDEX_op_goto_tb:
+ tcg_out_goto_tb(s, args[0]);
+ break;
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
+ tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]);
+ tcg_out_ext(s, OPC_SXT1_I29, args[0], args[0]);
+ break;
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
+ tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]);
+ tcg_out_ext(s, OPC_SXT2_I29, args[0], args[0]);
+ break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld32s_i64:
+ tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]);
+ tcg_out_ext(s, OPC_SXT4_I29, args[0], args[0]);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ld_rel(s, OPC_LD8_M1, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ tcg_out_st_rel(s, OPC_ST1_M4, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ tcg_out_st_rel(s, OPC_ST2_M4, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ tcg_out_st_rel(s, OPC_ST4_M4, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_st_rel(s, OPC_ST8_M4, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_add_i32:
+ case INDEX_op_add_i64:
+ tcg_out_add(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_sub_i32:
+ case INDEX_op_sub_i64:
+ tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]);
+ break;
+
+ case INDEX_op_and_i32:
+ case INDEX_op_and_i64:
+ /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */
+ tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0],
+ args[2], const_args[2], args[1], const_args[1]);
+ break;
+ case INDEX_op_andc_i32:
+ case INDEX_op_andc_i64:
+ tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0],
+ args[1], const_args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_eqv_i32:
+ case INDEX_op_eqv_i64:
+ tcg_out_eqv(s, args[0], args[1], const_args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_nand_i32:
+ case INDEX_op_nand_i64:
+ tcg_out_nand(s, args[0], args[1], const_args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
+ tcg_out_nor(s, args[0], args[1], const_args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
+ /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */
+ tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0],
+ args[2], const_args[2], args[1], const_args[1]);
+ break;
+ case INDEX_op_orc_i32:
+ case INDEX_op_orc_i64:
+ tcg_out_orc(s, args[0], args[1], const_args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
+ /* TCG expects arg2 constant; A3 expects arg1 constant. Swap. */
+ tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0],
+ args[2], const_args[2], args[1], const_args[1]);
+ break;
+
+ case INDEX_op_mul_i32:
+ case INDEX_op_mul_i64:
+ tcg_out_mul(s, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_sar_i32:
+ tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_sar_i64:
+ tcg_out_sar_i64(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_shl_i32:
+ tcg_out_shl_i32(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_shl_i64:
+ tcg_out_shl_i64(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_shr_i32:
+ tcg_out_shr_i32(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_shr_i64:
+ tcg_out_shr_i64(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_rotl_i32:
+ tcg_out_rotl_i32(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_rotl_i64:
+ tcg_out_rotl_i64(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_rotr_i32:
+ tcg_out_rotr_i32(s, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_rotr_i64:
+ tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]);
+ break;
+
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]);
+ break;
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext8u_i64:
+ tcg_out_ext(s, OPC_ZXT1_I29, args[0], args[1]);
+ break;
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ tcg_out_ext(s, OPC_SXT2_I29, args[0], args[1]);
+ break;
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_ext16u_i64:
+ tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]);
+ break;
+ case INDEX_op_ext32s_i64:
+ tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]);
+ break;
+ case INDEX_op_ext32u_i64:
+ tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]);
+ break;
+
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ tcg_out_bswap16(s, args[0], args[1]);
+ break;
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ tcg_out_bswap32(s, args[0], args[1]);
+ break;
+ case INDEX_op_bswap64_i64:
+ tcg_out_bswap64(s, args[0], args[1]);
+ break;
+
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ tcg_out_deposit(s, args[0], args[1], args[2], const_args[2],
+ args[3], args[4]);
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 1);
+ break;
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 0);
+ break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1);
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+ break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ args[3], const_args[3], args[4], const_args[4], 1);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ args[3], const_args[3], args[4], const_args[4], 0);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_qemu_st(s, args);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_movi_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static const TCGTargetOpDef ia64_op_defs[] = {
+ { INDEX_op_br, { } },
+ { INDEX_op_exit_tb, { } },
+ { INDEX_op_goto_tb, { } },
+
+ { INDEX_op_ld8u_i32, { "r", "r" } },
+ { INDEX_op_ld8s_i32, { "r", "r" } },
+ { INDEX_op_ld16u_i32, { "r", "r" } },
+ { INDEX_op_ld16s_i32, { "r", "r" } },
+ { INDEX_op_ld_i32, { "r", "r" } },
+ { INDEX_op_st8_i32, { "rZ", "r" } },
+ { INDEX_op_st16_i32, { "rZ", "r" } },
+ { INDEX_op_st_i32, { "rZ", "r" } },
+
+ { INDEX_op_add_i32, { "r", "rZ", "rI" } },
+ { INDEX_op_sub_i32, { "r", "rI", "rI" } },
+
+ { INDEX_op_and_i32, { "r", "rI", "rI" } },
+ { INDEX_op_andc_i32, { "r", "rI", "rI" } },
+ { INDEX_op_eqv_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_nand_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_nor_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_or_i32, { "r", "rI", "rI" } },
+ { INDEX_op_orc_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_xor_i32, { "r", "rI", "rI" } },
+
+ { INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
+
+ { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_shl_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
+
+ { INDEX_op_ext8s_i32, { "r", "rZ"} },
+ { INDEX_op_ext8u_i32, { "r", "rZ"} },
+ { INDEX_op_ext16s_i32, { "r", "rZ"} },
+ { INDEX_op_ext16u_i32, { "r", "rZ"} },
+
+ { INDEX_op_bswap16_i32, { "r", "rZ" } },
+ { INDEX_op_bswap32_i32, { "r", "rZ" } },
+
+ { INDEX_op_brcond_i32, { "rZ", "rZ" } },
+ { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
+
+ { INDEX_op_ld8u_i64, { "r", "r" } },
+ { INDEX_op_ld8s_i64, { "r", "r" } },
+ { INDEX_op_ld16u_i64, { "r", "r" } },
+ { INDEX_op_ld16s_i64, { "r", "r" } },
+ { INDEX_op_ld32u_i64, { "r", "r" } },
+ { INDEX_op_ld32s_i64, { "r", "r" } },
+ { INDEX_op_ld_i64, { "r", "r" } },
+ { INDEX_op_st8_i64, { "rZ", "r" } },
+ { INDEX_op_st16_i64, { "rZ", "r" } },
+ { INDEX_op_st32_i64, { "rZ", "r" } },
+ { INDEX_op_st_i64, { "rZ", "r" } },
+
+ { INDEX_op_add_i64, { "r", "rZ", "rI" } },
+ { INDEX_op_sub_i64, { "r", "rI", "rI" } },
+
+ { INDEX_op_and_i64, { "r", "rI", "rI" } },
+ { INDEX_op_andc_i64, { "r", "rI", "rI" } },
+ { INDEX_op_eqv_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_nand_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_nor_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_or_i64, { "r", "rI", "rI" } },
+ { INDEX_op_orc_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_xor_i64, { "r", "rI", "rI" } },
+
+ { INDEX_op_mul_i64, { "r", "rZ", "rZ" } },
+
+ { INDEX_op_sar_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_shl_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_shr_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_rotl_i64, { "r", "rZ", "ri" } },
+ { INDEX_op_rotr_i64, { "r", "rZ", "ri" } },
+
+ { INDEX_op_ext8s_i64, { "r", "rZ"} },
+ { INDEX_op_ext8u_i64, { "r", "rZ"} },
+ { INDEX_op_ext16s_i64, { "r", "rZ"} },
+ { INDEX_op_ext16u_i64, { "r", "rZ"} },
+ { INDEX_op_ext32s_i64, { "r", "rZ"} },
+ { INDEX_op_ext32u_i64, { "r", "rZ"} },
+
+ { INDEX_op_bswap16_i64, { "r", "rZ" } },
+ { INDEX_op_bswap32_i64, { "r", "rZ" } },
+ { INDEX_op_bswap64_i64, { "r", "rZ" } },
+
+ { INDEX_op_brcond_i64, { "rZ", "rZ" } },
+ { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } },
+
+ { INDEX_op_deposit_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_deposit_i64, { "r", "rZ", "ri" } },
+
+ { INDEX_op_qemu_ld_i32, { "r", "r" } },
+ { INDEX_op_qemu_ld_i64, { "r", "r" } },
+ { INDEX_op_qemu_st_i32, { "SZ", "r" } },
+ { INDEX_op_qemu_st_i64, { "SZ", "r" } },
+
+ { -1 },
+};
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int frame_size;
+
+ /* reserve some stack space */
+ frame_size = TCG_STATIC_CALL_ARGS_SIZE +
+ CPU_TEMP_BUF_NLONGS * sizeof(long);
+ frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
+ ~(TCG_TARGET_STACK_ALIGN - 1);
+ tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+ /* First emit adhoc function descriptor */
+ *s->code_ptr = (tcg_insn_unit){
+ (uint64_t)(s->code_ptr + 1), /* entry point */
+ 0 /* skip gp */
+ };
+ s->code_ptr++;
+
+ /* prologue */
+ tcg_out_bundle(s, miI,
+ tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34,
+ TCG_REG_R34, 32, 24, 0),
+ INSN_NOP_I,
+ tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
+ TCG_REG_B6, TCG_REG_R33, 0));
+
+ /* ??? If GUEST_BASE < 0x200000, we could load the register via
+ an ADDL in the M slot of the next bundle. */
+ if (GUEST_BASE != 0) {
+ tcg_out_bundle(s, mlx,
+ INSN_NOP_M,
+ tcg_opc_l2 (GUEST_BASE),
+ tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
+ TCG_GUEST_BASE_REG, GUEST_BASE));
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+
+ tcg_out_bundle(s, miB,
+ tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
+ TCG_REG_R12, -frame_size, TCG_REG_R12),
+ tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
+ TCG_REG_R33, TCG_REG_B0),
+ tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6));
+
+ /* epilogue */
+ tb_ret_addr = s->code_ptr;
+ tcg_out_bundle(s, miI,
+ INSN_NOP_M,
+ tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
+ TCG_REG_B0, TCG_REG_R33, 0),
+ tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
+ TCG_REG_R12, frame_size, TCG_REG_R12));
+ tcg_out_bundle(s, miB,
+ INSN_NOP_M,
+ tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26,
+ TCG_REG_PFS, TCG_REG_R34),
+ tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4,
+ TCG_REG_B0));
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32],
+ 0xffffffffffffffffull);
+ tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I64],
+ 0xffffffffffffffffull);
+
+ tcg_regset_clear(tcg_target_call_clobber_regs);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R15);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R16);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R17);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R18);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R19);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R27);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R28);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R29);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R30);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R31);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R56);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R57);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R58);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R59);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R60);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R61);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R62);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R63);
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* zero register */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* global pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33); /* return address */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34); /* PFS */
+
+ /* The following 4 are not in use, are call-saved, but *not* saved
+ by the prologue. Therefore we cannot use them without modifying
+ the prologue. There doesn't seem to be any good reason to use
+ these as opposed to the windowed registers. */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7);
+
+ tcg_add_target_add_op_defs(ia64_op_defs);
+}
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
new file mode 100644
index 00000000..a04ed812
--- /dev/null
+++ b/tcg/ia64/tcg-target.h
@@ -0,0 +1,185 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009-2010 Aurelien Jarno <aurelien@aurel32.net>
+ * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TCG_TARGET_IA64
+#define TCG_TARGET_IA64 1
+
+#define TCG_TARGET_INSN_UNIT_SIZE 16
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 21
+
+typedef struct {
+ uint64_t lo __attribute__((aligned(16)));
+ uint64_t hi;
+} tcg_insn_unit;
+
+/* We only map the first 64 registers */
+#define TCG_TARGET_NB_REGS 64
+typedef enum {
+ TCG_REG_R0 = 0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_R16,
+ TCG_REG_R17,
+ TCG_REG_R18,
+ TCG_REG_R19,
+ TCG_REG_R20,
+ TCG_REG_R21,
+ TCG_REG_R22,
+ TCG_REG_R23,
+ TCG_REG_R24,
+ TCG_REG_R25,
+ TCG_REG_R26,
+ TCG_REG_R27,
+ TCG_REG_R28,
+ TCG_REG_R29,
+ TCG_REG_R30,
+ TCG_REG_R31,
+ TCG_REG_R32,
+ TCG_REG_R33,
+ TCG_REG_R34,
+ TCG_REG_R35,
+ TCG_REG_R36,
+ TCG_REG_R37,
+ TCG_REG_R38,
+ TCG_REG_R39,
+ TCG_REG_R40,
+ TCG_REG_R41,
+ TCG_REG_R42,
+ TCG_REG_R43,
+ TCG_REG_R44,
+ TCG_REG_R45,
+ TCG_REG_R46,
+ TCG_REG_R47,
+ TCG_REG_R48,
+ TCG_REG_R49,
+ TCG_REG_R50,
+ TCG_REG_R51,
+ TCG_REG_R52,
+ TCG_REG_R53,
+ TCG_REG_R54,
+ TCG_REG_R55,
+ TCG_REG_R56,
+ TCG_REG_R57,
+ TCG_REG_R58,
+ TCG_REG_R59,
+ TCG_REG_R60,
+ TCG_REG_R61,
+ TCG_REG_R62,
+ TCG_REG_R63,
+
+ TCG_AREG0 = TCG_REG_R32,
+} TCGReg;
+
+#define TCG_CT_CONST_ZERO 0x100
+#define TCG_CT_CONST_S22 0x200
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_R12
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET 16
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32 0
+#define TCG_TARGET_HAS_rem_i32 0
+#define TCG_TARGET_HAS_div_i64 0
+#define TCG_TARGET_HAS_rem_i64 0
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_eqv_i32 1
+#define TCG_TARGET_HAS_eqv_i64 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_nand_i32 1
+#define TCG_TARGET_HAS_nand_i64 1
+#define TCG_TARGET_HAS_nor_i32 1
+#define TCG_TARGET_HAS_nor_i64 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_mulsh_i64 0
+#define TCG_TARGET_HAS_trunc_shr_i32 0
+
+#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
+#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
+
+/* optional instructions automatically implemented */
+#define TCG_TARGET_HAS_neg_i32 0 /* sub r1, r0, r3 */
+#define TCG_TARGET_HAS_neg_i64 0 /* sub r1, r0, r3 */
+#define TCG_TARGET_HAS_not_i32 0 /* xor r1, -1, r3 */
+#define TCG_TARGET_HAS_not_i64 0 /* xor r1, -1, r3 */
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ start = start & ~(32UL - 1UL);
+ stop = (stop + (32UL - 1UL)) & ~(32UL - 1UL);
+
+ for (; start < stop; start += 32UL) {
+ asm volatile ("fc.i %0" :: "r" (start));
+ }
+ asm volatile (";;sync.i;;srlz.i;;");
+}
+
+#endif
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
new file mode 100644
index 00000000..2ccd0e82
--- /dev/null
+++ b/tcg/mips/tcg-target.c
@@ -0,0 +1,1828 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
+ * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg-be-ldst.h"
+
+#ifdef HOST_WORDS_BIGENDIAN
+# define MIPS_BE 1
+#else
+# define MIPS_BE 0
+#endif
+
+#define LO_OFF (MIPS_BE * 4)
+#define HI_OFF (4 - LO_OFF)
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "zero",
+ "at",
+ "v0",
+ "v1",
+ "a0",
+ "a1",
+ "a2",
+ "a3",
+ "t0",
+ "t1",
+ "t2",
+ "t3",
+ "t4",
+ "t5",
+ "t6",
+ "t7",
+ "s0",
+ "s1",
+ "s2",
+ "s3",
+ "s4",
+ "s5",
+ "s6",
+ "s7",
+ "t8",
+ "t9",
+ "k0",
+ "k1",
+ "gp",
+ "sp",
+ "s8",
+ "ra",
+};
+#endif
+
+#define TCG_TMP0 TCG_REG_AT
+#define TCG_TMP1 TCG_REG_T9
+
+/* check if we really need so many registers :P */
+static const TCGReg tcg_target_reg_alloc_order[] = {
+ /* Call saved registers. */
+ TCG_REG_S0,
+ TCG_REG_S1,
+ TCG_REG_S2,
+ TCG_REG_S3,
+ TCG_REG_S4,
+ TCG_REG_S5,
+ TCG_REG_S6,
+ TCG_REG_S7,
+ TCG_REG_S8,
+
+ /* Call clobbered registers. */
+ TCG_REG_T0,
+ TCG_REG_T1,
+ TCG_REG_T2,
+ TCG_REG_T3,
+ TCG_REG_T4,
+ TCG_REG_T5,
+ TCG_REG_T6,
+ TCG_REG_T7,
+ TCG_REG_T8,
+ TCG_REG_T9,
+ TCG_REG_V1,
+ TCG_REG_V0,
+
+ /* Argument registers, opposite order of allocation. */
+ TCG_REG_A3,
+ TCG_REG_A2,
+ TCG_REG_A1,
+ TCG_REG_A0,
+};
+
+static const TCGReg tcg_target_call_iarg_regs[4] = {
+ TCG_REG_A0,
+ TCG_REG_A1,
+ TCG_REG_A2,
+ TCG_REG_A3
+};
+
+static const TCGReg tcg_target_call_oarg_regs[2] = {
+ TCG_REG_V0,
+ TCG_REG_V1
+};
+
+static tcg_insn_unit *tb_ret_addr;
+
+static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+ /* Let the compiler perform the right-shift as part of the arithmetic. */
+ ptrdiff_t disp = target - (pc + 1);
+ assert(disp == (int16_t)disp);
+ return disp & 0xffff;
+}
+
+static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+ *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target));
+}
+
+static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+ assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0);
+ return ((uintptr_t)target >> 2) & 0x3ffffff;
+}
+
+static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+ *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target));
+}
+
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ assert(type == R_MIPS_PC16);
+ assert(addend == 0);
+ reloc_pc16(code_ptr, (tcg_insn_unit *)value);
+}
+
+#define TCG_CT_CONST_ZERO 0x100
+#define TCG_CT_CONST_U16 0x200 /* Unsigned 16-bit: 0 - 0xffff. */
+#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */
+#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */
+#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */
+
+static inline bool is_p2m1(tcg_target_long val)
+{
+ return val && ((val + 1) & val) == 0;
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str;
+
+ ct_str = *pct_str;
+ switch(ct_str[0]) {
+ case 'r':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set(ct->u.regs, 0xffffffff);
+ break;
+ case 'L': /* qemu_ld output arg constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set(ct->u.regs, 0xffffffff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0);
+ break;
+ case 'l': /* qemu_ld input arg constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set(ct->u.regs, 0xffffffff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
+#if defined(CONFIG_SOFTMMU)
+ if (TARGET_LONG_BITS == 64) {
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
+ }
+#endif
+ break;
+ case 'S': /* qemu_st constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set(ct->u.regs, 0xffffffff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
+#if defined(CONFIG_SOFTMMU)
+ if (TARGET_LONG_BITS == 32) {
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
+ } else {
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
+ }
+#endif
+ break;
+ case 'I':
+ ct->ct |= TCG_CT_CONST_U16;
+ break;
+ case 'J':
+ ct->ct |= TCG_CT_CONST_S16;
+ break;
+ case 'K':
+ ct->ct |= TCG_CT_CONST_P2M1;
+ break;
+ case 'N':
+ ct->ct |= TCG_CT_CONST_N16;
+ break;
+ case 'Z':
+ /* We are cheating a bit here, using the fact that the register
+ ZERO is also the register number 0. Hence there is no need
+ to check for const_args in each instruction. */
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct;
+ ct = arg_ct->ct;
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_P2M1)
+ && use_mips32r2_instructions && is_p2m1(val)) {
+ return 1;
+ }
+ return 0;
+}
+
+/* instruction opcodes */
+typedef enum {
+ OPC_J = 0x02 << 26,
+ OPC_JAL = 0x03 << 26,
+ OPC_BEQ = 0x04 << 26,
+ OPC_BNE = 0x05 << 26,
+ OPC_BLEZ = 0x06 << 26,
+ OPC_BGTZ = 0x07 << 26,
+ OPC_ADDIU = 0x09 << 26,
+ OPC_SLTI = 0x0A << 26,
+ OPC_SLTIU = 0x0B << 26,
+ OPC_ANDI = 0x0C << 26,
+ OPC_ORI = 0x0D << 26,
+ OPC_XORI = 0x0E << 26,
+ OPC_LUI = 0x0F << 26,
+ OPC_LB = 0x20 << 26,
+ OPC_LH = 0x21 << 26,
+ OPC_LW = 0x23 << 26,
+ OPC_LBU = 0x24 << 26,
+ OPC_LHU = 0x25 << 26,
+ OPC_LWU = 0x27 << 26,
+ OPC_SB = 0x28 << 26,
+ OPC_SH = 0x29 << 26,
+ OPC_SW = 0x2B << 26,
+
+ OPC_SPECIAL = 0x00 << 26,
+ OPC_SLL = OPC_SPECIAL | 0x00,
+ OPC_SRL = OPC_SPECIAL | 0x02,
+ OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02,
+ OPC_SRA = OPC_SPECIAL | 0x03,
+ OPC_SLLV = OPC_SPECIAL | 0x04,
+ OPC_SRLV = OPC_SPECIAL | 0x06,
+ OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06,
+ OPC_SRAV = OPC_SPECIAL | 0x07,
+ OPC_JR = OPC_SPECIAL | 0x08,
+ OPC_JALR = OPC_SPECIAL | 0x09,
+ OPC_MOVZ = OPC_SPECIAL | 0x0A,
+ OPC_MOVN = OPC_SPECIAL | 0x0B,
+ OPC_MFHI = OPC_SPECIAL | 0x10,
+ OPC_MFLO = OPC_SPECIAL | 0x12,
+ OPC_MULT = OPC_SPECIAL | 0x18,
+ OPC_MULTU = OPC_SPECIAL | 0x19,
+ OPC_DIV = OPC_SPECIAL | 0x1A,
+ OPC_DIVU = OPC_SPECIAL | 0x1B,
+ OPC_ADDU = OPC_SPECIAL | 0x21,
+ OPC_SUBU = OPC_SPECIAL | 0x23,
+ OPC_AND = OPC_SPECIAL | 0x24,
+ OPC_OR = OPC_SPECIAL | 0x25,
+ OPC_XOR = OPC_SPECIAL | 0x26,
+ OPC_NOR = OPC_SPECIAL | 0x27,
+ OPC_SLT = OPC_SPECIAL | 0x2A,
+ OPC_SLTU = OPC_SPECIAL | 0x2B,
+
+ OPC_REGIMM = 0x01 << 26,
+ OPC_BLTZ = OPC_REGIMM | (0x00 << 16),
+ OPC_BGEZ = OPC_REGIMM | (0x01 << 16),
+
+ OPC_SPECIAL2 = 0x1c << 26,
+ OPC_MUL = OPC_SPECIAL2 | 0x002,
+
+ OPC_SPECIAL3 = 0x1f << 26,
+ OPC_EXT = OPC_SPECIAL3 | 0x000,
+ OPC_INS = OPC_SPECIAL3 | 0x004,
+ OPC_WSBH = OPC_SPECIAL3 | 0x0a0,
+ OPC_SEB = OPC_SPECIAL3 | 0x420,
+ OPC_SEH = OPC_SPECIAL3 | 0x620,
+} MIPSInsn;
+
+/*
+ * Type reg
+ */
+static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc,
+ TCGReg rd, TCGReg rs, TCGReg rt)
+{
+ int32_t inst;
+
+ inst = opc;
+ inst |= (rs & 0x1F) << 21;
+ inst |= (rt & 0x1F) << 16;
+ inst |= (rd & 0x1F) << 11;
+ tcg_out32(s, inst);
+}
+
+/*
+ * Type immediate
+ */
+static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
+ TCGReg rt, TCGReg rs, TCGArg imm)
+{
+ int32_t inst;
+
+ inst = opc;
+ inst |= (rs & 0x1F) << 21;
+ inst |= (rt & 0x1F) << 16;
+ inst |= (imm & 0xffff);
+ tcg_out32(s, inst);
+}
+
+/*
+ * Type bitfield
+ */
+static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
+ TCGReg rs, int msb, int lsb)
+{
+ int32_t inst;
+
+ inst = opc;
+ inst |= (rs & 0x1F) << 21;
+ inst |= (rt & 0x1F) << 16;
+ inst |= (msb & 0x1F) << 11;
+ inst |= (lsb & 0x1F) << 6;
+ tcg_out32(s, inst);
+}
+
+/*
+ * Type branch
+ */
+static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc,
+ TCGReg rt, TCGReg rs)
+{
+ /* We pay attention here to not modify the branch target by reading
+ the existing value and using it again. This ensure that caches and
+ memory are kept coherent during retranslation. */
+ uint16_t offset = (uint16_t)*s->code_ptr;
+
+ tcg_out_opc_imm(s, opc, rt, rs, offset);
+}
+
+/*
+ * Type sa
+ */
+static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc,
+ TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ int32_t inst;
+
+ inst = opc;
+ inst |= (rt & 0x1F) << 16;
+ inst |= (rd & 0x1F) << 11;
+ inst |= (sa & 0x1F) << 6;
+ tcg_out32(s, inst);
+
+}
+
+/*
+ * Type jump.
+ * Returns true if the branch was in range and the insn was emitted.
+ */
+static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target)
+{
+ uintptr_t dest = (uintptr_t)target;
+ uintptr_t from = (uintptr_t)s->code_ptr + 4;
+ int32_t inst;
+
+ /* The pc-region branch happens within the 256MB region of
+ the delay slot (thus the +4). */
+ if ((from ^ dest) & -(1 << 28)) {
+ return false;
+ }
+ assert((dest & 3) == 0);
+
+ inst = opc;
+ inst |= (dest >> 2) & 0x3ffffff;
+ tcg_out32(s, inst);
+ return true;
+}
+
+static inline void tcg_out_nop(TCGContext *s)
+{
+ tcg_out32(s, 0);
+}
+
+static inline void tcg_out_mov(TCGContext *s, TCGType type,
+ TCGReg ret, TCGReg arg)
+{
+ /* Simple reg-reg move, optimising out the 'do nothing' case */
+ if (ret != arg) {
+ tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO);
+ }
+}
+
+static inline void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg reg, tcg_target_long arg)
+{
+ if (arg == (int16_t)arg) {
+ tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg);
+ } else if (arg == (uint16_t)arg) {
+ tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg);
+ } else {
+ tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16);
+ if (arg & 0xffff) {
+ tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff);
+ }
+ }
+}
+
+static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+ } else {
+ /* ret and arg can't be register at */
+ if (ret == TCG_TMP0 || arg == TCG_TMP0) {
+ tcg_abort();
+ }
+
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
+ tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);
+ tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);
+ tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+ }
+}
+
+static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+ tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);
+ } else {
+ /* ret and arg can't be register at */
+ if (ret == TCG_TMP0 || arg == TCG_TMP0) {
+ tcg_abort();
+ }
+
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
+ tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
+ tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
+ tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+ }
+}
+
+static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+ tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
+ } else {
+ /* ret and arg must be different and can't be register at */
+ if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) {
+ tcg_abort();
+ }
+
+ tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
+
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24);
+ tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00);
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
+ tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00);
+ tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+ }
+}
+
+static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg);
+ } else {
+ tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
+ tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24);
+ }
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg);
+ } else {
+ tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16);
+ tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
+ }
+}
+
+static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
+ TCGReg addr, intptr_t ofs)
+{
+ int16_t lo = ofs;
+ if (ofs != lo) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo);
+ if (addr != TCG_REG_ZERO) {
+ tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr);
+ }
+ addr = TCG_TMP0;
+ }
+ tcg_out_opc_imm(s, opc, data, addr, lo);
+}
+
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_ldst(s, OPC_LW, arg, arg1, arg2);
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
+}
+
+static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val)
+{
+ if (val == (int16_t)val) {
+ tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val);
+ tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0);
+ }
+}
+
+/* Bit 0 set if inversion required; bit 1 set if swapping required. */
+#define MIPS_CMP_INV 1
+#define MIPS_CMP_SWAP 2
+
+static const uint8_t mips_cmp_map[16] = {
+ [TCG_COND_LT] = 0,
+ [TCG_COND_LTU] = 0,
+ [TCG_COND_GE] = MIPS_CMP_INV,
+ [TCG_COND_GEU] = MIPS_CMP_INV,
+ [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP,
+ [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP,
+ [TCG_COND_GT] = MIPS_CMP_SWAP,
+ [TCG_COND_GTU] = MIPS_CMP_SWAP,
+};
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg arg1, TCGReg arg2)
+{
+ MIPSInsn s_opc = OPC_SLTU;
+ int cmp_map;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ if (arg2 != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
+ arg1 = ret;
+ }
+ tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1);
+ break;
+
+ case TCG_COND_NE:
+ if (arg2 != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
+ arg1 = ret;
+ }
+ tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1);
+ break;
+
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ s_opc = OPC_SLT;
+ /* FALLTHRU */
+
+ case TCG_COND_LTU:
+ case TCG_COND_GEU:
+ case TCG_COND_LEU:
+ case TCG_COND_GTU:
+ cmp_map = mips_cmp_map[cond];
+ if (cmp_map & MIPS_CMP_SWAP) {
+ TCGReg t = arg1;
+ arg1 = arg2;
+ arg2 = t;
+ }
+ tcg_out_opc_reg(s, s_opc, ret, arg1, arg2);
+ if (cmp_map & MIPS_CMP_INV) {
+ tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+ }
+ break;
+
+ default:
+ tcg_abort();
+ break;
+ }
+}
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+ TCGReg arg2, TCGLabel *l)
+{
+ static const MIPSInsn b_zero[16] = {
+ [TCG_COND_LT] = OPC_BLTZ,
+ [TCG_COND_GT] = OPC_BGTZ,
+ [TCG_COND_LE] = OPC_BLEZ,
+ [TCG_COND_GE] = OPC_BGEZ,
+ };
+
+ MIPSInsn s_opc = OPC_SLTU;
+ MIPSInsn b_opc;
+ int cmp_map;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ b_opc = OPC_BEQ;
+ break;
+ case TCG_COND_NE:
+ b_opc = OPC_BNE;
+ break;
+
+ case TCG_COND_LT:
+ case TCG_COND_GT:
+ case TCG_COND_LE:
+ case TCG_COND_GE:
+ if (arg2 == 0) {
+ b_opc = b_zero[cond];
+ arg2 = arg1;
+ arg1 = 0;
+ break;
+ }
+ s_opc = OPC_SLT;
+ /* FALLTHRU */
+
+ case TCG_COND_LTU:
+ case TCG_COND_GTU:
+ case TCG_COND_LEU:
+ case TCG_COND_GEU:
+ cmp_map = mips_cmp_map[cond];
+ if (cmp_map & MIPS_CMP_SWAP) {
+ TCGReg t = arg1;
+ arg1 = arg2;
+ arg2 = t;
+ }
+ tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2);
+ b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE);
+ arg1 = TCG_TMP0;
+ arg2 = TCG_REG_ZERO;
+ break;
+
+ default:
+ tcg_abort();
+ break;
+ }
+
+ tcg_out_opc_br(s, b_opc, arg1, arg2);
+ if (l->has_value) {
+ reloc_pc16(s->code_ptr - 1, l->u.value_ptr);
+ } else {
+ tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0);
+ }
+ tcg_out_nop(s);
+}
+
+static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1,
+ TCGReg al, TCGReg ah,
+ TCGReg bl, TCGReg bh)
+{
+ /* Merge highpart comparison into AH. */
+ if (bh != 0) {
+ if (ah != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh);
+ ah = tmp0;
+ } else {
+ ah = bh;
+ }
+ }
+ /* Merge lowpart comparison into AL. */
+ if (bl != 0) {
+ if (al != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl);
+ al = tmp1;
+ } else {
+ al = bl;
+ }
+ }
+ /* Merge high and low part comparisons into AL. */
+ if (ah != 0) {
+ if (al != 0) {
+ tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al);
+ al = tmp0;
+ } else {
+ al = ah;
+ }
+ }
+ return al;
+}
+
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh)
+{
+ TCGReg tmp0 = TCG_TMP0;
+ TCGReg tmp1 = ret;
+
+ assert(ret != TCG_TMP0);
+ if (ret == ah || ret == bh) {
+ assert(ret != TCG_TMP1);
+ tmp1 = TCG_TMP1;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh);
+ tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO);
+ break;
+
+ default:
+ tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh);
+ tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl);
+ tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0);
+ tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh);
+ tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0);
+ break;
+ }
+}
+
+static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
+ TCGReg bl, TCGReg bh, TCGLabel *l)
+{
+ TCGCond b_cond = TCG_COND_NE;
+ TCGReg tmp = TCG_TMP1;
+
+ /* With branches, we emit between 4 and 9 insns with 2 or 3 branches.
+ With setcond, we emit between 3 and 10 insns and only 1 branch,
+ which ought to get better branch prediction. */
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ b_cond = cond;
+ tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh);
+ break;
+
+ default:
+ /* Minimize code size by preferring a compare not requiring INV. */
+ if (mips_cmp_map[cond] & MIPS_CMP_INV) {
+ cond = tcg_invert_cond(cond);
+ b_cond = TCG_COND_EQ;
+ }
+ tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh);
+ break;
+ }
+
+ tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, l);
+}
+
+static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, TCGReg c2, TCGReg v)
+{
+ MIPSInsn m_opc = OPC_MOVN;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ m_opc = OPC_MOVZ;
+ /* FALLTHRU */
+ case TCG_COND_NE:
+ if (c2 != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2);
+ c1 = TCG_TMP0;
+ }
+ break;
+
+ default:
+ /* Minimize code size by preferring a compare not requiring INV. */
+ if (mips_cmp_map[cond] & MIPS_CMP_INV) {
+ cond = tcg_invert_cond(cond);
+ m_opc = OPC_MOVZ;
+ }
+ tcg_out_setcond(s, cond, TCG_TMP0, c1, c2);
+ c1 = TCG_TMP0;
+ break;
+ }
+
+ tcg_out_opc_reg(s, m_opc, ret, v, c1);
+}
+
+static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
+{
+ /* Note that the ABI requires the called function's address to be
+ loaded into T9, even if a direct branch is in range. */
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg);
+
+ /* But do try a direct branch, allowing the cpu better insn prefetch. */
+ if (tail) {
+ if (!tcg_out_opc_jmp(s, OPC_J, arg)) {
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0);
+ }
+ } else {
+ if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) {
+ tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
+ }
+ }
+}
+
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+{
+ tcg_out_call_int(s, arg, false);
+ tcg_out_nop(s);
+}
+
+#if defined(CONFIG_SOFTMMU)
+static void * const qemu_ld_helpers[16] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_SB] = helper_ret_ldsb_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LESW] = helper_le_ldsw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BESW] = helper_be_ldsw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+};
+
+static void * const qemu_st_helpers[16] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+
+/* Helper routines for marshalling helper function arguments into
+ * the correct registers and stack.
+ * I is where we want to put this argument, and is updated and returned
+ * for the next call. ARG is the argument itself.
+ *
+ * We provide routines for arguments which are: immediate, 32 bit
+ * value in register, 16 and 8 bit values in register (which must be zero
+ * extended before use) and 64 bit value in a lo:hi register pair.
+ */
+
+static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
+{
+ if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
+ } else {
+ tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
+ }
+ return i + 1;
+}
+
+static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg)
+{
+ TCGReg tmp = TCG_TMP0;
+ if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+ tmp = tcg_target_call_iarg_regs[i];
+ }
+ tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff);
+ return tcg_out_call_iarg_reg(s, i, tmp);
+}
+
+static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg)
+{
+ TCGReg tmp = TCG_TMP0;
+ if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+ tmp = tcg_target_call_iarg_regs[i];
+ }
+ tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff);
+ return tcg_out_call_iarg_reg(s, i, tmp);
+}
+
+static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
+{
+ TCGReg tmp = TCG_TMP0;
+ if (arg == 0) {
+ tmp = TCG_REG_ZERO;
+ } else {
+ if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+ tmp = tcg_target_call_iarg_regs[i];
+ }
+ tcg_out_movi(s, TCG_TYPE_REG, tmp, arg);
+ }
+ return tcg_out_call_iarg_reg(s, i, tmp);
+}
+
+static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
+{
+ i = (i + 1) & ~1;
+ i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
+ i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
+ return i;
+}
+
+/* Perform the tlb comparison operation. The complete host address is
+ placed in BASE. Clobbers AT, T0, A0. */
+static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
+ TCGReg addrh, int mem_index, TCGMemOp s_bits,
+ tcg_insn_unit *label_ptr[2], bool is_load)
+{
+ int cmp_off
+ = (is_load
+ ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+ : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+ int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+
+ tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0,
+ (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+ tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
+
+ /* Compensate for very large offsets. */
+ if (add_off >= 0x8000) {
+ /* Most target env are smaller than 32k; none are larger than 64k.
+ Simplify the logic here merely to offset by 0x7ff0, giving us a
+ range just shy of 64k. Check this assumption. */
+ QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
+ tlb_table[NB_MMU_MODES - 1][1])
+ > 0x7ff0 + 0x7fff);
+ tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0);
+ cmp_off -= 0x7ff0;
+ add_off -= 0x7ff0;
+ }
+
+ /* Load the (low half) tlb comparator. */
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0,
+ cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0));
+
+ /* Mask the page bits, keeping the alignment bits to compare against.
+ In between on 32-bit targets, load the tlb addend for the fast path. */
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1,
+ TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+ }
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
+
+ label_ptr[0] = s->code_ptr;
+ tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
+
+ /* Load and test the high half tlb comparator. */
+ if (TARGET_LONG_BITS == 64) {
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF);
+
+ /* Load the tlb addend for the fast path. We can't do it earlier with
+ 64-bit targets or we'll clobber a0 before reading the high half tlb
+ comparator. */
+ tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
+
+ label_ptr[1] = s->code_ptr;
+ tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
+ }
+
+ /* delay slot */
+ tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl);
+}
+
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addrhi,
+ void *raddr, tcg_insn_unit *label_ptr[2])
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->datalo_reg = datalo;
+ label->datahi_reg = datahi;
+ label->addrlo_reg = addrlo;
+ label->addrhi_reg = addrhi;
+ label->raddr = raddr;
+ label->label_ptr[0] = label_ptr[0];
+ if (TARGET_LONG_BITS == 64) {
+ label->label_ptr[1] = label_ptr[1];
+ }
+}
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ TCGMemOpIdx oi = l->oi;
+ TCGMemOp opc = get_memop(oi);
+ TCGReg v0;
+ int i;
+
+ /* resolve label address */
+ reloc_pc16(l->label_ptr[0], s->code_ptr);
+ if (TARGET_LONG_BITS == 64) {
+ reloc_pc16(l->label_ptr[1], s->code_ptr);
+ }
+
+ i = 1;
+ if (TARGET_LONG_BITS == 64) {
+ i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
+ } else {
+ i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
+ }
+ i = tcg_out_call_iarg_imm(s, i, oi);
+ i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr);
+ tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
+ /* delay slot */
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+
+ v0 = l->datalo_reg;
+ if ((opc & MO_SIZE) == MO_64) {
+ /* We eliminated V0 from the possible output registers, so it
+ cannot be clobbered here. So we must move V1 first. */
+ if (MIPS_BE) {
+ tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1);
+ v0 = l->datahi_reg;
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
+ }
+ }
+
+ reloc_pc16(s->code_ptr, l->raddr);
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
+ /* delay slot */
+ tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0);
+}
+
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ TCGMemOpIdx oi = l->oi;
+ TCGMemOp opc = get_memop(oi);
+ TCGMemOp s_bits = opc & MO_SIZE;
+ int i;
+
+ /* resolve label address */
+ reloc_pc16(l->label_ptr[0], s->code_ptr);
+ if (TARGET_LONG_BITS == 64) {
+ reloc_pc16(l->label_ptr[1], s->code_ptr);
+ }
+
+ i = 1;
+ if (TARGET_LONG_BITS == 64) {
+ i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
+ } else {
+ i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
+ }
+ switch (s_bits) {
+ case MO_8:
+ i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg);
+ break;
+ case MO_16:
+ i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg);
+ break;
+ case MO_32:
+ i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
+ break;
+ case MO_64:
+ i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
+ break;
+ default:
+ tcg_abort();
+ }
+ i = tcg_out_call_iarg_imm(s, i, oi);
+
+ /* Tail call to the store helper. Thus force the return address
+ computation to take place in the return address register. */
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr);
+ i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA);
+ tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true);
+ /* delay slot */
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+}
+#endif
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+ TCGReg base, TCGMemOp opc)
+{
+ switch (opc & (MO_SSIZE | MO_BSWAP)) {
+ case MO_UB:
+ tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0);
+ break;
+ case MO_SB:
+ tcg_out_opc_imm(s, OPC_LB, datalo, base, 0);
+ break;
+ case MO_UW | MO_BSWAP:
+ tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
+ tcg_out_bswap16(s, datalo, TCG_TMP1);
+ break;
+ case MO_UW:
+ tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0);
+ break;
+ case MO_SW | MO_BSWAP:
+ tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
+ tcg_out_bswap16s(s, datalo, TCG_TMP1);
+ break;
+ case MO_SW:
+ tcg_out_opc_imm(s, OPC_LH, datalo, base, 0);
+ break;
+ case MO_UL | MO_BSWAP:
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0);
+ tcg_out_bswap32(s, datalo, TCG_TMP1);
+ break;
+ case MO_UL:
+ tcg_out_opc_imm(s, OPC_LW, datalo, base, 0);
+ break;
+ case MO_Q | MO_BSWAP:
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF);
+ tcg_out_bswap32(s, datalo, TCG_TMP1);
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF);
+ tcg_out_bswap32(s, datahi, TCG_TMP1);
+ break;
+ case MO_Q:
+ tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF);
+ tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF);
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg addr_regl, addr_regh __attribute__((unused));
+ TCGReg data_regl, data_regh;
+ TCGMemOpIdx oi;
+ TCGMemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ tcg_insn_unit *label_ptr[2];
+ int mem_index;
+ TCGMemOp s_bits;
+#endif
+ /* Note that we've eliminated V0 from the output registers,
+ so we won't overwrite the base register during loading. */
+ TCGReg base = TCG_REG_V0;
+
+ data_regl = *args++;
+ data_regh = (is_64 ? *args++ : 0);
+ addr_regl = *args++;
+ addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ mem_index = get_mmuidx(oi);
+ s_bits = opc & MO_SIZE;
+
+ tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index,
+ s_bits, label_ptr, 1);
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc);
+ add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh,
+ s->code_ptr, label_ptr);
+#else
+ if (GUEST_BASE == 0 && data_regl != addr_regl) {
+ base = addr_regl;
+ } else if (GUEST_BASE == (int16_t)GUEST_BASE) {
+ tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE);
+ tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
+ }
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc);
+#endif
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+ TCGReg base, TCGMemOp opc)
+{
+ switch (opc & (MO_SIZE | MO_BSWAP)) {
+ case MO_8:
+ tcg_out_opc_imm(s, OPC_SB, datalo, base, 0);
+ break;
+
+ case MO_16 | MO_BSWAP:
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff);
+ tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1);
+ datalo = TCG_TMP1;
+ /* FALLTHRU */
+ case MO_16:
+ tcg_out_opc_imm(s, OPC_SH, datalo, base, 0);
+ break;
+
+ case MO_32 | MO_BSWAP:
+ tcg_out_bswap32(s, TCG_TMP1, datalo);
+ datalo = TCG_TMP1;
+ /* FALLTHRU */
+ case MO_32:
+ tcg_out_opc_imm(s, OPC_SW, datalo, base, 0);
+ break;
+
+ case MO_64 | MO_BSWAP:
+ tcg_out_bswap32(s, TCG_TMP1, datalo);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF);
+ tcg_out_bswap32(s, TCG_TMP1, datahi);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF);
+ break;
+ case MO_64:
+ tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF);
+ tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF);
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al,
+ TCGReg ah, TCGArg bl, TCGArg bh, bool cbl,
+ bool cbh, bool is_sub)
+{
+ TCGReg th = TCG_TMP1;
+
+ /* If we have a negative constant such that negating it would
+ make the high part zero, we can (usually) eliminate one insn. */
+ if (cbl && cbh && bh == -1 && bl != 0) {
+ bl = -bl;
+ bh = 0;
+ is_sub = !is_sub;
+ }
+
+ /* By operating on the high part first, we get to use the final
+ carry operation to move back from the temporary. */
+ if (!cbh) {
+ tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh);
+ } else if (bh != 0 || ah == rl) {
+ tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh));
+ } else {
+ th = ah;
+ }
+
+ /* Note that tcg optimization should eliminate the bl == 0 case. */
+ if (is_sub) {
+ if (cbl) {
+ tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl);
+ tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl);
+ tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl);
+ }
+ tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0);
+ } else {
+ if (cbl) {
+ tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl);
+ tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl);
+ } else if (rl == al && rl == bl) {
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31);
+ tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl);
+ } else {
+ tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl);
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl));
+ }
+ tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0);
+ }
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg addr_regl, addr_regh __attribute__((unused));
+ TCGReg data_regl, data_regh, base;
+ TCGMemOpIdx oi;
+ TCGMemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ tcg_insn_unit *label_ptr[2];
+ int mem_index;
+ TCGMemOp s_bits;
+#endif
+
+ data_regl = *args++;
+ data_regh = (is_64 ? *args++ : 0);
+ addr_regl = *args++;
+ addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ mem_index = get_mmuidx(oi);
+ s_bits = opc & 3;
+
+ /* Note that we eliminated the helper's address argument,
+ so we can reuse that for the base. */
+ base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2);
+ tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index,
+ s_bits, label_ptr, 0);
+ tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+ add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh,
+ s->code_ptr, label_ptr);
+#else
+ if (GUEST_BASE == 0) {
+ base = addr_regl;
+ } else {
+ base = TCG_REG_A0;
+ if (GUEST_BASE == (int16_t)GUEST_BASE) {
+ tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE);
+ tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
+ }
+ }
+ tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+#endif
+}
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg *args, const int *const_args)
+{
+ MIPSInsn i1, i2;
+ TCGArg a0, a1, a2;
+ int c2;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ c2 = const_args[2];
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ {
+ TCGReg b0 = TCG_REG_ZERO;
+
+ if (a0 & ~0xffff) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff);
+ b0 = TCG_REG_V0;
+ }
+ if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0,
+ (uintptr_t)tb_ret_addr);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
+ }
+ tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff);
+ }
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_offset) {
+ /* direct jump method */
+ s->tb_jmp_offset[a0] = tcg_current_code_size(s);
+ /* Avoid clobbering the address during retranslation. */
+ tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff));
+ } else {
+ /* indirect jump method */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO,
+ (uintptr_t)(s->tb_next + a0));
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
+ }
+ tcg_out_nop(s);
+ s->tb_next_offset[a0] = tcg_current_code_size(s);
+ break;
+ case INDEX_op_br:
+ tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO,
+ arg_label(a0));
+ break;
+
+ case INDEX_op_ld8u_i32:
+ i1 = OPC_LBU;
+ goto do_ldst;
+ case INDEX_op_ld8s_i32:
+ i1 = OPC_LB;
+ goto do_ldst;
+ case INDEX_op_ld16u_i32:
+ i1 = OPC_LHU;
+ goto do_ldst;
+ case INDEX_op_ld16s_i32:
+ i1 = OPC_LH;
+ goto do_ldst;
+ case INDEX_op_ld_i32:
+ i1 = OPC_LW;
+ goto do_ldst;
+ case INDEX_op_st8_i32:
+ i1 = OPC_SB;
+ goto do_ldst;
+ case INDEX_op_st16_i32:
+ i1 = OPC_SH;
+ goto do_ldst;
+ case INDEX_op_st_i32:
+ i1 = OPC_SW;
+ do_ldst:
+ tcg_out_ldst(s, i1, a0, a1, a2);
+ break;
+
+ case INDEX_op_add_i32:
+ i1 = OPC_ADDU, i2 = OPC_ADDIU;
+ goto do_binary;
+ case INDEX_op_or_i32:
+ i1 = OPC_OR, i2 = OPC_ORI;
+ goto do_binary;
+ case INDEX_op_xor_i32:
+ i1 = OPC_XOR, i2 = OPC_XORI;
+ do_binary:
+ if (c2) {
+ tcg_out_opc_imm(s, i2, a0, a1, a2);
+ break;
+ }
+ do_binaryv:
+ tcg_out_opc_reg(s, i1, a0, a1, a2);
+ break;
+
+ case INDEX_op_sub_i32:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2);
+ break;
+ }
+ i1 = OPC_SUBU;
+ goto do_binary;
+ case INDEX_op_and_i32:
+ if (c2 && a2 != (uint16_t)a2) {
+ int msb = ctz32(~a2) - 1;
+ assert(use_mips32r2_instructions);
+ assert(is_p2m1(a2));
+ tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0);
+ break;
+ }
+ i1 = OPC_AND, i2 = OPC_ANDI;
+ goto do_binary;
+ case INDEX_op_nor_i32:
+ i1 = OPC_NOR;
+ goto do_binaryv;
+
+ case INDEX_op_mul_i32:
+ if (use_mips32_instructions) {
+ tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_MULT, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_mulsh_i32:
+ i1 = OPC_MULT, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_muluh_i32:
+ i1 = OPC_MULTU, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_div_i32:
+ i1 = OPC_DIV, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_divu_i32:
+ i1 = OPC_DIVU, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_rem_i32:
+ i1 = OPC_DIV, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_remu_i32:
+ i1 = OPC_DIVU, i2 = OPC_MFHI;
+ do_hilo1:
+ tcg_out_opc_reg(s, i1, 0, a1, a2);
+ tcg_out_opc_reg(s, i2, a0, 0, 0);
+ break;
+
+ case INDEX_op_muls2_i32:
+ i1 = OPC_MULT;
+ goto do_hilo2;
+ case INDEX_op_mulu2_i32:
+ i1 = OPC_MULTU;
+ do_hilo2:
+ tcg_out_opc_reg(s, i1, 0, a2, args[3]);
+ tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0);
+ tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0);
+ break;
+
+ case INDEX_op_not_i32:
+ i1 = OPC_NOR;
+ goto do_unary;
+ case INDEX_op_bswap16_i32:
+ i1 = OPC_WSBH;
+ goto do_unary;
+ case INDEX_op_ext8s_i32:
+ i1 = OPC_SEB;
+ goto do_unary;
+ case INDEX_op_ext16s_i32:
+ i1 = OPC_SEH;
+ do_unary:
+ tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1);
+ break;
+
+ case INDEX_op_sar_i32:
+ i1 = OPC_SRAV, i2 = OPC_SRA;
+ goto do_shift;
+ case INDEX_op_shl_i32:
+ i1 = OPC_SLLV, i2 = OPC_SLL;
+ goto do_shift;
+ case INDEX_op_shr_i32:
+ i1 = OPC_SRLV, i2 = OPC_SRL;
+ goto do_shift;
+ case INDEX_op_rotr_i32:
+ i1 = OPC_ROTRV, i2 = OPC_ROTR;
+ do_shift:
+ if (c2) {
+ tcg_out_opc_sa(s, i2, a0, a1, a2);
+ } else {
+ tcg_out_opc_reg(s, i1, a0, a2, a1);
+ }
+ break;
+ case INDEX_op_rotl_i32:
+ if (c2) {
+ tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2);
+ tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1);
+ }
+ break;
+
+ case INDEX_op_bswap32_i32:
+ tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1);
+ tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16);
+ break;
+
+ case INDEX_op_deposit_i32:
+ tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]);
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
+ break;
+ case INDEX_op_brcond2_i32:
+ tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5]));
+ break;
+
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], a0, a1, a2, args[3]);
+ break;
+
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, args[3], a0, a1, a2);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args, false);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args, true);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_qemu_st(s, args, false);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args, true);
+ break;
+
+ case INDEX_op_add2_i32:
+ tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+ const_args[4], const_args[5], false);
+ break;
+ case INDEX_op_sub2_i32:
+ tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+ const_args[4], const_args[5], true);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static const TCGTargetOpDef mips_op_defs[] = {
+ { INDEX_op_exit_tb, { } },
+ { INDEX_op_goto_tb, { } },
+ { INDEX_op_br, { } },
+
+ { INDEX_op_ld8u_i32, { "r", "r" } },
+ { INDEX_op_ld8s_i32, { "r", "r" } },
+ { INDEX_op_ld16u_i32, { "r", "r" } },
+ { INDEX_op_ld16s_i32, { "r", "r" } },
+ { INDEX_op_ld_i32, { "r", "r" } },
+ { INDEX_op_st8_i32, { "rZ", "r" } },
+ { INDEX_op_st16_i32, { "rZ", "r" } },
+ { INDEX_op_st_i32, { "rZ", "r" } },
+
+ { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } },
+ { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } },
+ { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_div_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_divu_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_rem_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_remu_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_sub_i32, { "r", "rZ", "rN" } },
+
+ { INDEX_op_and_i32, { "r", "rZ", "rIK" } },
+ { INDEX_op_nor_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_not_i32, { "r", "rZ" } },
+ { INDEX_op_or_i32, { "r", "rZ", "rIZ" } },
+ { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } },
+
+ { INDEX_op_shl_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
+
+ { INDEX_op_bswap16_i32, { "r", "r" } },
+ { INDEX_op_bswap32_i32, { "r", "r" } },
+
+ { INDEX_op_ext8s_i32, { "r", "rZ" } },
+ { INDEX_op_ext16s_i32, { "r", "rZ" } },
+
+ { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+
+ { INDEX_op_brcond_i32, { "rZ", "rZ" } },
+ { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
+ { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
+
+ { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } },
+ { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } },
+
+#if TARGET_LONG_BITS == 32
+ { INDEX_op_qemu_ld_i32, { "L", "lZ" } },
+ { INDEX_op_qemu_st_i32, { "SZ", "SZ" } },
+ { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } },
+ { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } },
+#else
+ { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } },
+ { INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } },
+ { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } },
+ { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } },
+#endif
+ { -1 },
+};
+
+static int tcg_target_callee_save_regs[] = {
+ TCG_REG_S0, /* used for the global env (TCG_AREG0) */
+ TCG_REG_S1,
+ TCG_REG_S2,
+ TCG_REG_S3,
+ TCG_REG_S4,
+ TCG_REG_S5,
+ TCG_REG_S6,
+ TCG_REG_S7,
+ TCG_REG_S8,
+ TCG_REG_RA, /* should be last for ABI compliance */
+};
+
+/* The Linux kernel doesn't provide any information about the available
+ instruction set. Probe it using a signal handler. */
+
+#include <signal.h>
+
+#ifndef use_movnz_instructions
+bool use_movnz_instructions = false;
+#endif
+
+#ifndef use_mips32_instructions
+bool use_mips32_instructions = false;
+#endif
+
+#ifndef use_mips32r2_instructions
+bool use_mips32r2_instructions = false;
+#endif
+
+static volatile sig_atomic_t got_sigill;
+
+static void sigill_handler(int signo, siginfo_t *si, void *data)
+{
+ /* Skip the faulty instruction */
+ ucontext_t *uc = (ucontext_t *)data;
+ uc->uc_mcontext.pc += 4;
+
+ got_sigill = 1;
+}
+
+static void tcg_target_detect_isa(void)
+{
+ struct sigaction sa_old, sa_new;
+
+ memset(&sa_new, 0, sizeof(sa_new));
+ sa_new.sa_flags = SA_SIGINFO;
+ sa_new.sa_sigaction = sigill_handler;
+ sigaction(SIGILL, &sa_new, &sa_old);
+
+ /* Probe for movn/movz, necessary to implement movcond. */
+#ifndef use_movnz_instructions
+ got_sigill = 0;
+ asm volatile(".set push\n"
+ ".set mips32\n"
+ "movn $zero, $zero, $zero\n"
+ "movz $zero, $zero, $zero\n"
+ ".set pop\n"
+ : : : );
+ use_movnz_instructions = !got_sigill;
+#endif
+
+ /* Probe for MIPS32 instructions. As no subsetting is allowed
+ by the specification, it is only necessary to probe for one
+ of the instructions. */
+#ifndef use_mips32_instructions
+ got_sigill = 0;
+ asm volatile(".set push\n"
+ ".set mips32\n"
+ "mul $zero, $zero\n"
+ ".set pop\n"
+ : : : );
+ use_mips32_instructions = !got_sigill;
+#endif
+
+ /* Probe for MIPS32r2 instructions if MIPS32 instructions are
+ available. As no subsetting is allowed by the specification,
+ it is only necessary to probe for one of the instructions. */
+#ifndef use_mips32r2_instructions
+ if (use_mips32_instructions) {
+ got_sigill = 0;
+ asm volatile(".set push\n"
+ ".set mips32r2\n"
+ "seb $zero, $zero\n"
+ ".set pop\n"
+ : : : );
+ use_mips32r2_instructions = !got_sigill;
+ }
+#endif
+
+ sigaction(SIGILL, &sa_old, NULL);
+}
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int i, frame_size;
+
+ /* reserve some stack space, also for TCG temps. */
+ frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4
+ + TCG_STATIC_CALL_ARGS_SIZE
+ + CPU_TEMP_BUF_NLONGS * sizeof(long);
+ frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
+ ~(TCG_TARGET_STACK_ALIGN - 1);
+ tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4
+ + TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+ /* TB prologue */
+ tcg_out_addi(s, TCG_REG_SP, -frame_size);
+ for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) {
+ tcg_out_st(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4);
+ }
+
+ /* Call generated code */
+ tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0);
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ tb_ret_addr = s->code_ptr;
+
+ /* TB epilogue */
+ for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) {
+ tcg_out_ld(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4);
+ }
+
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ tcg_out_addi(s, TCG_REG_SP, frame_size);
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ tcg_target_detect_isa();
+ tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], 0xffffffff);
+ tcg_regset_set(tcg_target_call_clobber_regs,
+ (1 << TCG_REG_V0) |
+ (1 << TCG_REG_V1) |
+ (1 << TCG_REG_A0) |
+ (1 << TCG_REG_A1) |
+ (1 << TCG_REG_A2) |
+ (1 << TCG_REG_A3) |
+ (1 << TCG_REG_T0) |
+ (1 << TCG_REG_T1) |
+ (1 << TCG_REG_T2) |
+ (1 << TCG_REG_T3) |
+ (1 << TCG_REG_T4) |
+ (1 << TCG_REG_T5) |
+ (1 << TCG_REG_T6) |
+ (1 << TCG_REG_T7) |
+ (1 << TCG_REG_T8) |
+ (1 << TCG_REG_T9));
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0); /* kernel use only */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */
+
+ tcg_add_target_add_op_defs(mips_op_defs);
+}
+
+void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+ uint32_t *ptr = (uint32_t *)jmp_addr;
+ *ptr = deposit32(*ptr, 0, 26, addr >> 2);
+ flush_icache_range(jmp_addr, jmp_addr + 4);
+}
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
new file mode 100644
index 00000000..f5ba52ca
--- /dev/null
+++ b/tcg/mips/tcg-target.h
@@ -0,0 +1,138 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
+ * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TCG_TARGET_MIPS
+#define TCG_TARGET_MIPS 1
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define TCG_TARGET_NB_REGS 32
+
+typedef enum {
+ TCG_REG_ZERO = 0,
+ TCG_REG_AT,
+ TCG_REG_V0,
+ TCG_REG_V1,
+ TCG_REG_A0,
+ TCG_REG_A1,
+ TCG_REG_A2,
+ TCG_REG_A3,
+ TCG_REG_T0,
+ TCG_REG_T1,
+ TCG_REG_T2,
+ TCG_REG_T3,
+ TCG_REG_T4,
+ TCG_REG_T5,
+ TCG_REG_T6,
+ TCG_REG_T7,
+ TCG_REG_S0,
+ TCG_REG_S1,
+ TCG_REG_S2,
+ TCG_REG_S3,
+ TCG_REG_S4,
+ TCG_REG_S5,
+ TCG_REG_S6,
+ TCG_REG_S7,
+ TCG_REG_T8,
+ TCG_REG_T9,
+ TCG_REG_K0,
+ TCG_REG_K1,
+ TCG_REG_GP,
+ TCG_REG_SP,
+ TCG_REG_S8,
+ TCG_REG_RA,
+
+ TCG_REG_CALL_STACK = TCG_REG_SP,
+ TCG_AREG0 = TCG_REG_S0,
+} TCGReg;
+
+/* used for function call generation */
+#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_CALL_STACK_OFFSET 16
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
+
+/* MOVN/MOVZ instructions detection */
+#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
+ defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
+ defined(_MIPS_ARCH_MIPS4)
+#define use_movnz_instructions 1
+#else
+extern bool use_movnz_instructions;
+#endif
+
+/* MIPS32 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
+#define use_mips32_instructions 1
+#else
+extern bool use_mips32_instructions;
+#endif
+
+/* MIPS32R2 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+#define use_mips32r2_instructions 1
+#else
+extern bool use_mips32r2_instructions;
+#endif
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_nor_i32 1
+#define TCG_TARGET_HAS_andc_i32 0
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 1
+#define TCG_TARGET_HAS_mulsh_i32 1
+
+/* optional instructions detected at runtime */
+#define TCG_TARGET_HAS_movcond_i32 use_movnz_instructions
+#define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap32_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions
+
+/* optional instructions automatically implemented */
+#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */
+#define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */
+#define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */
+
+#ifdef __OpenBSD__
+#include <machine/sysarch.h>
+#else
+#include <sys/cachectl.h>
+#endif
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ cacheflush ((void *)start, stop-start, ICACHE);
+}
+
+#endif
diff --git a/tcg/optimize.c b/tcg/optimize.c
new file mode 100644
index 00000000..18283cfd
--- /dev/null
+++ b/tcg/optimize.c
@@ -0,0 +1,1330 @@
+/*
+ * Optimizations for Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2010 Samsung Electronics.
+ * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "qemu-common.h"
+#include "tcg-op.h"
+
+#define CASE_OP_32_64(x) \
+ glue(glue(case INDEX_op_, x), _i32): \
+ glue(glue(case INDEX_op_, x), _i64)
+
+typedef enum {
+ TCG_TEMP_UNDEF = 0,
+ TCG_TEMP_CONST,
+ TCG_TEMP_COPY,
+} tcg_temp_state;
+
+struct tcg_temp_info {
+ tcg_temp_state state;
+ uint16_t prev_copy;
+ uint16_t next_copy;
+ tcg_target_ulong val;
+ tcg_target_ulong mask;
+};
+
+static struct tcg_temp_info temps[TCG_MAX_TEMPS];
+
+/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
+ the copy flag from the left temp. */
+static void reset_temp(TCGArg temp)
+{
+ if (temps[temp].state == TCG_TEMP_COPY) {
+ if (temps[temp].prev_copy == temps[temp].next_copy) {
+ temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
+ } else {
+ temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
+ temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
+ }
+ }
+ temps[temp].state = TCG_TEMP_UNDEF;
+ temps[temp].mask = -1;
+}
+
+static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
+ TCGOpcode opc, int nargs)
+{
+ int oi = s->gen_next_op_idx;
+ int pi = s->gen_next_parm_idx;
+ int prev = old_op->prev;
+ int next = old_op - s->gen_op_buf;
+ TCGOp *new_op;
+
+ tcg_debug_assert(oi < OPC_BUF_SIZE);
+ tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
+ s->gen_next_op_idx = oi + 1;
+ s->gen_next_parm_idx = pi + nargs;
+
+ new_op = &s->gen_op_buf[oi];
+ *new_op = (TCGOp){
+ .opc = opc,
+ .args = pi,
+ .prev = prev,
+ .next = next
+ };
+ if (prev >= 0) {
+ s->gen_op_buf[prev].next = oi;
+ } else {
+ s->gen_first_op_idx = oi;
+ }
+ old_op->prev = oi;
+
+ return new_op;
+}
+
+/* Reset all temporaries, given that there are NB_TEMPS of them. */
+static void reset_all_temps(int nb_temps)
+{
+ int i;
+ for (i = 0; i < nb_temps; i++) {
+ temps[i].state = TCG_TEMP_UNDEF;
+ temps[i].mask = -1;
+ }
+}
+
+static int op_bits(TCGOpcode op)
+{
+ const TCGOpDef *def = &tcg_op_defs[op];
+ return def->flags & TCG_OPF_64BIT ? 64 : 32;
+}
+
+static TCGOpcode op_to_mov(TCGOpcode op)
+{
+ switch (op_bits(op)) {
+ case 32:
+ return INDEX_op_mov_i32;
+ case 64:
+ return INDEX_op_mov_i64;
+ default:
+ fprintf(stderr, "op_to_mov: unexpected return value of "
+ "function op_bits.\n");
+ tcg_abort();
+ }
+}
+
+static TCGOpcode op_to_movi(TCGOpcode op)
+{
+ switch (op_bits(op)) {
+ case 32:
+ return INDEX_op_movi_i32;
+ case 64:
+ return INDEX_op_movi_i64;
+ default:
+ fprintf(stderr, "op_to_movi: unexpected return value of "
+ "function op_bits.\n");
+ tcg_abort();
+ }
+}
+
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
+{
+ TCGArg i;
+
+ /* If this is already a global, we can't do better. */
+ if (temp < s->nb_globals) {
+ return temp;
+ }
+
+ /* Search for a global first. */
+ for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+ if (i < s->nb_globals) {
+ return i;
+ }
+ }
+
+ /* If it is a temp, search for a temp local. */
+ if (!s->temps[temp].temp_local) {
+ for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+ if (s->temps[i].temp_local) {
+ return i;
+ }
+ }
+ }
+
+ /* Failure to find a better representation, return the same temp. */
+ return temp;
+}
+
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
+{
+ TCGArg i;
+
+ if (arg1 == arg2) {
+ return true;
+ }
+
+ if (temps[arg1].state != TCG_TEMP_COPY
+ || temps[arg2].state != TCG_TEMP_COPY) {
+ return false;
+ }
+
+ for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
+ if (i == arg2) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
+ TCGArg dst, TCGArg val)
+{
+ TCGOpcode new_op = op_to_movi(op->opc);
+ tcg_target_ulong mask;
+
+ op->opc = new_op;
+
+ reset_temp(dst);
+ temps[dst].state = TCG_TEMP_CONST;
+ temps[dst].val = val;
+ mask = val;
+ if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
+ /* High bits of the destination are now garbage. */
+ mask |= ~0xffffffffull;
+ }
+ temps[dst].mask = mask;
+
+ args[0] = dst;
+ args[1] = val;
+}
+
+static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
+ TCGArg dst, TCGArg src)
+{
+ if (temps_are_copies(dst, src)) {
+ tcg_op_remove(s, op);
+ return;
+ }
+
+ if (temps[src].state == TCG_TEMP_CONST) {
+ tcg_opt_gen_movi(s, op, args, dst, temps[src].val);
+ return;
+ }
+
+ TCGOpcode new_op = op_to_mov(op->opc);
+ tcg_target_ulong mask;
+
+ op->opc = new_op;
+
+ reset_temp(dst);
+ mask = temps[src].mask;
+ if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
+ /* High bits of the destination are now garbage. */
+ mask |= ~0xffffffffull;
+ }
+ temps[dst].mask = mask;
+
+ assert(temps[src].state != TCG_TEMP_CONST);
+
+ if (s->temps[src].type == s->temps[dst].type) {
+ if (temps[src].state != TCG_TEMP_COPY) {
+ temps[src].state = TCG_TEMP_COPY;
+ temps[src].next_copy = src;
+ temps[src].prev_copy = src;
+ }
+ temps[dst].state = TCG_TEMP_COPY;
+ temps[dst].next_copy = temps[src].next_copy;
+ temps[dst].prev_copy = src;
+ temps[temps[dst].next_copy].prev_copy = dst;
+ temps[src].next_copy = dst;
+ }
+
+ args[0] = dst;
+ args[1] = src;
+}
+
+static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
+{
+ uint64_t l64, h64;
+
+ switch (op) {
+ CASE_OP_32_64(add):
+ return x + y;
+
+ CASE_OP_32_64(sub):
+ return x - y;
+
+ CASE_OP_32_64(mul):
+ return x * y;
+
+ CASE_OP_32_64(and):
+ return x & y;
+
+ CASE_OP_32_64(or):
+ return x | y;
+
+ CASE_OP_32_64(xor):
+ return x ^ y;
+
+ case INDEX_op_shl_i32:
+ return (uint32_t)x << (y & 31);
+
+ case INDEX_op_shl_i64:
+ return (uint64_t)x << (y & 63);
+
+ case INDEX_op_shr_i32:
+ return (uint32_t)x >> (y & 31);
+
+ case INDEX_op_trunc_shr_i32:
+ case INDEX_op_shr_i64:
+ return (uint64_t)x >> (y & 63);
+
+ case INDEX_op_sar_i32:
+ return (int32_t)x >> (y & 31);
+
+ case INDEX_op_sar_i64:
+ return (int64_t)x >> (y & 63);
+
+ case INDEX_op_rotr_i32:
+ return ror32(x, y & 31);
+
+ case INDEX_op_rotr_i64:
+ return ror64(x, y & 63);
+
+ case INDEX_op_rotl_i32:
+ return rol32(x, y & 31);
+
+ case INDEX_op_rotl_i64:
+ return rol64(x, y & 63);
+
+ CASE_OP_32_64(not):
+ return ~x;
+
+ CASE_OP_32_64(neg):
+ return -x;
+
+ CASE_OP_32_64(andc):
+ return x & ~y;
+
+ CASE_OP_32_64(orc):
+ return x | ~y;
+
+ CASE_OP_32_64(eqv):
+ return ~(x ^ y);
+
+ CASE_OP_32_64(nand):
+ return ~(x & y);
+
+ CASE_OP_32_64(nor):
+ return ~(x | y);
+
+ CASE_OP_32_64(ext8s):
+ return (int8_t)x;
+
+ CASE_OP_32_64(ext16s):
+ return (int16_t)x;
+
+ CASE_OP_32_64(ext8u):
+ return (uint8_t)x;
+
+ CASE_OP_32_64(ext16u):
+ return (uint16_t)x;
+
+ case INDEX_op_ext32s_i64:
+ return (int32_t)x;
+
+ case INDEX_op_ext32u_i64:
+ return (uint32_t)x;
+
+ case INDEX_op_muluh_i32:
+ return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
+ case INDEX_op_mulsh_i32:
+ return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
+
+ case INDEX_op_muluh_i64:
+ mulu64(&l64, &h64, x, y);
+ return h64;
+ case INDEX_op_mulsh_i64:
+ muls64(&l64, &h64, x, y);
+ return h64;
+
+ case INDEX_op_div_i32:
+ /* Avoid crashing on divide by zero, otherwise undefined. */
+ return (int32_t)x / ((int32_t)y ? : 1);
+ case INDEX_op_divu_i32:
+ return (uint32_t)x / ((uint32_t)y ? : 1);
+ case INDEX_op_div_i64:
+ return (int64_t)x / ((int64_t)y ? : 1);
+ case INDEX_op_divu_i64:
+ return (uint64_t)x / ((uint64_t)y ? : 1);
+
+ case INDEX_op_rem_i32:
+ return (int32_t)x % ((int32_t)y ? : 1);
+ case INDEX_op_remu_i32:
+ return (uint32_t)x % ((uint32_t)y ? : 1);
+ case INDEX_op_rem_i64:
+ return (int64_t)x % ((int64_t)y ? : 1);
+ case INDEX_op_remu_i64:
+ return (uint64_t)x % ((uint64_t)y ? : 1);
+
+ default:
+ fprintf(stderr,
+ "Unrecognized operation %d in do_constant_folding.\n", op);
+ tcg_abort();
+ }
+}
+
+static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
+{
+ TCGArg res = do_constant_folding_2(op, x, y);
+ if (op_bits(op) == 32) {
+ res &= 0xffffffff;
+ }
+ return res;
+}
+
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_EQ:
+ return x == y;
+ case TCG_COND_NE:
+ return x != y;
+ case TCG_COND_LT:
+ return (int32_t)x < (int32_t)y;
+ case TCG_COND_GE:
+ return (int32_t)x >= (int32_t)y;
+ case TCG_COND_LE:
+ return (int32_t)x <= (int32_t)y;
+ case TCG_COND_GT:
+ return (int32_t)x > (int32_t)y;
+ case TCG_COND_LTU:
+ return x < y;
+ case TCG_COND_GEU:
+ return x >= y;
+ case TCG_COND_LEU:
+ return x <= y;
+ case TCG_COND_GTU:
+ return x > y;
+ default:
+ tcg_abort();
+ }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_EQ:
+ return x == y;
+ case TCG_COND_NE:
+ return x != y;
+ case TCG_COND_LT:
+ return (int64_t)x < (int64_t)y;
+ case TCG_COND_GE:
+ return (int64_t)x >= (int64_t)y;
+ case TCG_COND_LE:
+ return (int64_t)x <= (int64_t)y;
+ case TCG_COND_GT:
+ return (int64_t)x > (int64_t)y;
+ case TCG_COND_LTU:
+ return x < y;
+ case TCG_COND_GEU:
+ return x >= y;
+ case TCG_COND_LEU:
+ return x <= y;
+ case TCG_COND_GTU:
+ return x > y;
+ default:
+ tcg_abort();
+ }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_GT:
+ case TCG_COND_LTU:
+ case TCG_COND_LT:
+ case TCG_COND_GTU:
+ case TCG_COND_NE:
+ return 0;
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ case TCG_COND_EQ:
+ return 1;
+ default:
+ tcg_abort();
+ }
+}
+
+/* Return 2 if the condition can't be simplified, and the result
+ of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
+ TCGArg y, TCGCond c)
+{
+ if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
+ switch (op_bits(op)) {
+ case 32:
+ return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
+ case 64:
+ return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
+ default:
+ tcg_abort();
+ }
+ } else if (temps_are_copies(x, y)) {
+ return do_constant_folding_cond_eq(c);
+ } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
+ switch (c) {
+ case TCG_COND_LTU:
+ return 0;
+ case TCG_COND_GEU:
+ return 1;
+ default:
+ return 2;
+ }
+ } else {
+ return 2;
+ }
+}
+
+/* Return 2 if the condition can't be simplified, and the result
+ of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+ TCGArg al = p1[0], ah = p1[1];
+ TCGArg bl = p2[0], bh = p2[1];
+
+ if (temps[bl].state == TCG_TEMP_CONST
+ && temps[bh].state == TCG_TEMP_CONST) {
+ uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
+
+ if (temps[al].state == TCG_TEMP_CONST
+ && temps[ah].state == TCG_TEMP_CONST) {
+ uint64_t a;
+ a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
+ return do_constant_folding_cond_64(a, b, c);
+ }
+ if (b == 0) {
+ switch (c) {
+ case TCG_COND_LTU:
+ return 0;
+ case TCG_COND_GEU:
+ return 1;
+ default:
+ break;
+ }
+ }
+ }
+ if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
+ return do_constant_folding_cond_eq(c);
+ }
+ return 2;
+}
+
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+ TCGArg a1 = *p1, a2 = *p2;
+ int sum = 0;
+ sum += temps[a1].state == TCG_TEMP_CONST;
+ sum -= temps[a2].state == TCG_TEMP_CONST;
+
+ /* Prefer the constant in second argument, and then the form
+ op a, a, b, which is better handled on non-RISC hosts. */
+ if (sum > 0 || (sum == 0 && dest == a2)) {
+ *p1 = a2;
+ *p2 = a1;
+ return true;
+ }
+ return false;
+}
+
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+ int sum = 0;
+ sum += temps[p1[0]].state == TCG_TEMP_CONST;
+ sum += temps[p1[1]].state == TCG_TEMP_CONST;
+ sum -= temps[p2[0]].state == TCG_TEMP_CONST;
+ sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+ if (sum > 0) {
+ TCGArg t;
+ t = p1[0], p1[0] = p2[0], p2[0] = t;
+ t = p1[1], p1[1] = p2[1], p2[1] = t;
+ return true;
+ }
+ return false;
+}
+
+/* Propagate constants and copies, fold constant expressions. */
+void tcg_optimize(TCGContext *s)
+{
+ int oi, oi_next, nb_temps, nb_globals;
+
+ /* Array VALS has an element for each temp.
+ If this temp holds a constant then its value is kept in VALS' element.
+ If this temp is a copy of other ones then the other copies are
+ available through the doubly linked circular list. */
+
+ nb_temps = s->nb_temps;
+ nb_globals = s->nb_globals;
+ reset_all_temps(nb_temps);
+
+ for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+ tcg_target_ulong mask, partmask, affected;
+ int nb_oargs, nb_iargs, i;
+ TCGArg tmp;
+
+ TCGOp * const op = &s->gen_op_buf[oi];
+ TCGArg * const args = &s->gen_opparam_buf[op->args];
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+
+ oi_next = op->next;
+ if (opc == INDEX_op_call) {
+ nb_oargs = op->callo;
+ nb_iargs = op->calli;
+ } else {
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+ }
+
+ /* Do copy propagation */
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ if (temps[args[i]].state == TCG_TEMP_COPY) {
+ args[i] = find_better_copy(s, args[i]);
+ }
+ }
+
+ /* For commutative operations make constant second argument */
+ switch (opc) {
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(eqv):
+ CASE_OP_32_64(nand):
+ CASE_OP_32_64(nor):
+ CASE_OP_32_64(muluh):
+ CASE_OP_32_64(mulsh):
+ swap_commutative(args[0], &args[1], &args[2]);
+ break;
+ CASE_OP_32_64(brcond):
+ if (swap_commutative(-1, &args[0], &args[1])) {
+ args[2] = tcg_swap_cond(args[2]);
+ }
+ break;
+ CASE_OP_32_64(setcond):
+ if (swap_commutative(args[0], &args[1], &args[2])) {
+ args[3] = tcg_swap_cond(args[3]);
+ }
+ break;
+ CASE_OP_32_64(movcond):
+ if (swap_commutative(-1, &args[1], &args[2])) {
+ args[5] = tcg_swap_cond(args[5]);
+ }
+ /* For movcond, we canonicalize the "false" input reg to match
+ the destination reg so that the tcg backend can implement
+ a "move if true" operation. */
+ if (swap_commutative(args[0], &args[4], &args[3])) {
+ args[5] = tcg_invert_cond(args[5]);
+ }
+ break;
+ CASE_OP_32_64(add2):
+ swap_commutative(args[0], &args[2], &args[4]);
+ swap_commutative(args[1], &args[3], &args[5]);
+ break;
+ CASE_OP_32_64(mulu2):
+ CASE_OP_32_64(muls2):
+ swap_commutative(args[0], &args[2], &args[3]);
+ break;
+ case INDEX_op_brcond2_i32:
+ if (swap_commutative2(&args[0], &args[2])) {
+ args[4] = tcg_swap_cond(args[4]);
+ }
+ break;
+ case INDEX_op_setcond2_i32:
+ if (swap_commutative2(&args[1], &args[3])) {
+ args[5] = tcg_swap_cond(args[5]);
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
+ and "sub r, 0, a => neg r, a" case. */
+ switch (opc) {
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ if (temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == 0) {
+ tcg_opt_gen_movi(s, op, args, args[0], 0);
+ continue;
+ }
+ break;
+ CASE_OP_32_64(sub):
+ {
+ TCGOpcode neg_op;
+ bool have_neg;
+
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ /* Proceed with possible constant folding. */
+ break;
+ }
+ if (opc == INDEX_op_sub_i32) {
+ neg_op = INDEX_op_neg_i32;
+ have_neg = TCG_TARGET_HAS_neg_i32;
+ } else {
+ neg_op = INDEX_op_neg_i64;
+ have_neg = TCG_TARGET_HAS_neg_i64;
+ }
+ if (!have_neg) {
+ break;
+ }
+ if (temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == 0) {
+ op->opc = neg_op;
+ reset_temp(args[0]);
+ args[1] = args[2];
+ continue;
+ }
+ }
+ break;
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(nand):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == -1) {
+ i = 1;
+ goto try_not;
+ }
+ break;
+ CASE_OP_32_64(nor):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0) {
+ i = 1;
+ goto try_not;
+ }
+ break;
+ CASE_OP_32_64(andc):
+ if (temps[args[2]].state != TCG_TEMP_CONST
+ && temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == -1) {
+ i = 2;
+ goto try_not;
+ }
+ break;
+ CASE_OP_32_64(orc):
+ CASE_OP_32_64(eqv):
+ if (temps[args[2]].state != TCG_TEMP_CONST
+ && temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == 0) {
+ i = 2;
+ goto try_not;
+ }
+ break;
+ try_not:
+ {
+ TCGOpcode not_op;
+ bool have_not;
+
+ if (def->flags & TCG_OPF_64BIT) {
+ not_op = INDEX_op_not_i64;
+ have_not = TCG_TARGET_HAS_not_i64;
+ } else {
+ not_op = INDEX_op_not_i32;
+ have_not = TCG_TARGET_HAS_not_i32;
+ }
+ if (!have_not) {
+ break;
+ }
+ op->opc = not_op;
+ reset_temp(args[0]);
+ args[1] = args[i];
+ continue;
+ }
+ default:
+ break;
+ }
+
+ /* Simplify expression for "op r, a, const => mov r, a" cases */
+ switch (opc) {
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(andc):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0) {
+ tcg_opt_gen_mov(s, op, args, args[0], args[1]);
+ continue;
+ }
+ break;
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(orc):
+ CASE_OP_32_64(eqv):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == -1) {
+ tcg_opt_gen_mov(s, op, args, args[0], args[1]);
+ continue;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Simplify using known-zero bits. Currently only ops with a single
+ output argument is supported. */
+ mask = -1;
+ affected = -1;
+ switch (opc) {
+ CASE_OP_32_64(ext8s):
+ if ((temps[args[1]].mask & 0x80) != 0) {
+ break;
+ }
+ CASE_OP_32_64(ext8u):
+ mask = 0xff;
+ goto and_const;
+ CASE_OP_32_64(ext16s):
+ if ((temps[args[1]].mask & 0x8000) != 0) {
+ break;
+ }
+ CASE_OP_32_64(ext16u):
+ mask = 0xffff;
+ goto and_const;
+ case INDEX_op_ext32s_i64:
+ if ((temps[args[1]].mask & 0x80000000) != 0) {
+ break;
+ }
+ case INDEX_op_ext32u_i64:
+ mask = 0xffffffffU;
+ goto and_const;
+
+ CASE_OP_32_64(and):
+ mask = temps[args[2]].mask;
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ and_const:
+ affected = temps[args[1]].mask & ~mask;
+ }
+ mask = temps[args[1]].mask & mask;
+ break;
+
+ CASE_OP_32_64(andc):
+ /* Known-zeros does not imply known-ones. Therefore unless
+ args[2] is constant, we can't infer anything from it. */
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = ~temps[args[2]].mask;
+ goto and_const;
+ }
+ /* But we certainly know nothing outside args[1] may be set. */
+ mask = temps[args[1]].mask;
+ break;
+
+ case INDEX_op_sar_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ tmp = temps[args[2]].val & 31;
+ mask = (int32_t)temps[args[1]].mask >> tmp;
+ }
+ break;
+ case INDEX_op_sar_i64:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ tmp = temps[args[2]].val & 63;
+ mask = (int64_t)temps[args[1]].mask >> tmp;
+ }
+ break;
+
+ case INDEX_op_shr_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ tmp = temps[args[2]].val & 31;
+ mask = (uint32_t)temps[args[1]].mask >> tmp;
+ }
+ break;
+ case INDEX_op_shr_i64:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ tmp = temps[args[2]].val & 63;
+ mask = (uint64_t)temps[args[1]].mask >> tmp;
+ }
+ break;
+
+ case INDEX_op_trunc_shr_i32:
+ mask = (uint64_t)temps[args[1]].mask >> args[2];
+ break;
+
+ CASE_OP_32_64(shl):
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1);
+ mask = temps[args[1]].mask << tmp;
+ }
+ break;
+
+ CASE_OP_32_64(neg):
+ /* Set to 1 all bits to the left of the rightmost. */
+ mask = -(temps[args[1]].mask & -temps[args[1]].mask);
+ break;
+
+ CASE_OP_32_64(deposit):
+ mask = deposit64(temps[args[1]].mask, args[3], args[4],
+ temps[args[2]].mask);
+ break;
+
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ mask = temps[args[1]].mask | temps[args[2]].mask;
+ break;
+
+ CASE_OP_32_64(setcond):
+ case INDEX_op_setcond2_i32:
+ mask = 1;
+ break;
+
+ CASE_OP_32_64(movcond):
+ mask = temps[args[3]].mask | temps[args[4]].mask;
+ break;
+
+ CASE_OP_32_64(ld8u):
+ mask = 0xff;
+ break;
+ CASE_OP_32_64(ld16u):
+ mask = 0xffff;
+ break;
+ case INDEX_op_ld32u_i64:
+ mask = 0xffffffffu;
+ break;
+
+ CASE_OP_32_64(qemu_ld):
+ {
+ TCGMemOpIdx oi = args[nb_oargs + nb_iargs];
+ TCGMemOp mop = get_memop(oi);
+ if (!(mop & MO_SIGN)) {
+ mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ /* 32-bit ops generate 32-bit results. For the result is zero test
+ below, we can ignore high bits, but for further optimizations we
+ need to record that the high bits contain garbage. */
+ partmask = mask;
+ if (!(def->flags & TCG_OPF_64BIT)) {
+ mask |= ~(tcg_target_ulong)0xffffffffu;
+ partmask &= 0xffffffffu;
+ affected &= 0xffffffffu;
+ }
+
+ if (partmask == 0) {
+ assert(nb_oargs == 1);
+ tcg_opt_gen_movi(s, op, args, args[0], 0);
+ continue;
+ }
+ if (affected == 0) {
+ assert(nb_oargs == 1);
+ tcg_opt_gen_mov(s, op, args, args[0], args[1]);
+ continue;
+ }
+
+ /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
+ switch (opc) {
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(muluh):
+ CASE_OP_32_64(mulsh):
+ if ((temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0)) {
+ tcg_opt_gen_movi(s, op, args, args[0], 0);
+ continue;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Simplify expression for "op r, a, a => mov r, a" cases */
+ switch (opc) {
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(and):
+ if (temps_are_copies(args[1], args[2])) {
+ tcg_opt_gen_mov(s, op, args, args[0], args[1]);
+ continue;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Simplify expression for "op r, a, a => movi r, 0" cases */
+ switch (opc) {
+ CASE_OP_32_64(andc):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(xor):
+ if (temps_are_copies(args[1], args[2])) {
+ tcg_opt_gen_movi(s, op, args, args[0], 0);
+ continue;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Propagate constants through copy operations and do constant
+ folding. Constants will be substituted to arguments by register
+ allocator where needed and possible. Also detect copies. */
+ switch (opc) {
+ CASE_OP_32_64(mov):
+ tcg_opt_gen_mov(s, op, args, args[0], args[1]);
+ break;
+ CASE_OP_32_64(movi):
+ tcg_opt_gen_movi(s, op, args, args[0], args[1]);
+ break;
+
+ CASE_OP_32_64(not):
+ CASE_OP_32_64(neg):
+ CASE_OP_32_64(ext8s):
+ CASE_OP_32_64(ext8u):
+ CASE_OP_32_64(ext16s):
+ CASE_OP_32_64(ext16u):
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
+ if (temps[args[1]].state == TCG_TEMP_CONST) {
+ tmp = do_constant_folding(opc, temps[args[1]].val, 0);
+ tcg_opt_gen_movi(s, op, args, args[0], tmp);
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_trunc_shr_i32:
+ if (temps[args[1]].state == TCG_TEMP_CONST) {
+ tmp = do_constant_folding(opc, temps[args[1]].val, args[2]);
+ tcg_opt_gen_movi(s, op, args, args[0], tmp);
+ break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ CASE_OP_32_64(andc):
+ CASE_OP_32_64(orc):
+ CASE_OP_32_64(eqv):
+ CASE_OP_32_64(nand):
+ CASE_OP_32_64(nor):
+ CASE_OP_32_64(muluh):
+ CASE_OP_32_64(mulsh):
+ CASE_OP_32_64(div):
+ CASE_OP_32_64(divu):
+ CASE_OP_32_64(rem):
+ CASE_OP_32_64(remu):
+ if (temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST) {
+ tmp = do_constant_folding(opc, temps[args[1]].val,
+ temps[args[2]].val);
+ tcg_opt_gen_movi(s, op, args, args[0], tmp);
+ break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(deposit):
+ if (temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST) {
+ tmp = deposit64(temps[args[1]].val, args[3], args[4],
+ temps[args[2]].val);
+ tcg_opt_gen_movi(s, op, args, args[0], tmp);
+ break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(setcond):
+ tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
+ if (tmp != 2) {
+ tcg_opt_gen_movi(s, op, args, args[0], tmp);
+ break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(brcond):
+ tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
+ if (tmp != 2) {
+ if (tmp) {
+ reset_all_temps(nb_temps);
+ op->opc = INDEX_op_br;
+ args[0] = args[3];
+ } else {
+ tcg_op_remove(s, op);
+ }
+ break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(movcond):
+ tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
+ if (tmp != 2) {
+ tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]);
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_sub2_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[3]].state == TCG_TEMP_CONST
+ && temps[args[4]].state == TCG_TEMP_CONST
+ && temps[args[5]].state == TCG_TEMP_CONST) {
+ uint32_t al = temps[args[2]].val;
+ uint32_t ah = temps[args[3]].val;
+ uint32_t bl = temps[args[4]].val;
+ uint32_t bh = temps[args[5]].val;
+ uint64_t a = ((uint64_t)ah << 32) | al;
+ uint64_t b = ((uint64_t)bh << 32) | bl;
+ TCGArg rl, rh;
+ TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+ TCGArg *args2 = &s->gen_opparam_buf[op2->args];
+
+ if (opc == INDEX_op_add2_i32) {
+ a += b;
+ } else {
+ a -= b;
+ }
+
+ rl = args[0];
+ rh = args[1];
+ tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a);
+ tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32));
+
+ /* We've done all we need to do with the movi. Skip it. */
+ oi_next = op2->next;
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_mulu2_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[3]].state == TCG_TEMP_CONST) {
+ uint32_t a = temps[args[2]].val;
+ uint32_t b = temps[args[3]].val;
+ uint64_t r = (uint64_t)a * b;
+ TCGArg rl, rh;
+ TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+ TCGArg *args2 = &s->gen_opparam_buf[op2->args];
+
+ rl = args[0];
+ rh = args[1];
+ tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r);
+ tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32));
+
+ /* We've done all we need to do with the movi. Skip it. */
+ oi_next = op2->next;
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_brcond2_i32:
+ tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
+ if (tmp != 2) {
+ if (tmp) {
+ do_brcond_true:
+ reset_all_temps(nb_temps);
+ op->opc = INDEX_op_br;
+ args[0] = args[5];
+ } else {
+ do_brcond_false:
+ tcg_op_remove(s, op);
+ }
+ } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[3]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0
+ && temps[args[3]].val == 0) {
+ /* Simplify LT/GE comparisons vs zero to a single compare
+ vs the high word of the input. */
+ do_brcond_high:
+ reset_all_temps(nb_temps);
+ op->opc = INDEX_op_brcond_i32;
+ args[0] = args[1];
+ args[1] = args[3];
+ args[2] = args[4];
+ args[3] = args[5];
+ } else if (args[4] == TCG_COND_EQ) {
+ /* Simplify EQ comparisons where one of the pairs
+ can be simplified. */
+ tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
+ args[0], args[2], TCG_COND_EQ);
+ if (tmp == 0) {
+ goto do_brcond_false;
+ } else if (tmp == 1) {
+ goto do_brcond_high;
+ }
+ tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
+ args[1], args[3], TCG_COND_EQ);
+ if (tmp == 0) {
+ goto do_brcond_false;
+ } else if (tmp != 1) {
+ goto do_default;
+ }
+ do_brcond_low:
+ reset_all_temps(nb_temps);
+ op->opc = INDEX_op_brcond_i32;
+ args[1] = args[2];
+ args[2] = args[4];
+ args[3] = args[5];
+ } else if (args[4] == TCG_COND_NE) {
+ /* Simplify NE comparisons where one of the pairs
+ can be simplified. */
+ tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
+ args[0], args[2], TCG_COND_NE);
+ if (tmp == 0) {
+ goto do_brcond_high;
+ } else if (tmp == 1) {
+ goto do_brcond_true;
+ }
+ tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
+ args[1], args[3], TCG_COND_NE);
+ if (tmp == 0) {
+ goto do_brcond_low;
+ } else if (tmp == 1) {
+ goto do_brcond_true;
+ }
+ goto do_default;
+ } else {
+ goto do_default;
+ }
+ break;
+
+ case INDEX_op_setcond2_i32:
+ tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
+ if (tmp != 2) {
+ do_setcond_const:
+ tcg_opt_gen_movi(s, op, args, args[0], tmp);
+ } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+ && temps[args[3]].state == TCG_TEMP_CONST
+ && temps[args[4]].state == TCG_TEMP_CONST
+ && temps[args[3]].val == 0
+ && temps[args[4]].val == 0) {
+ /* Simplify LT/GE comparisons vs zero to a single compare
+ vs the high word of the input. */
+ do_setcond_high:
+ reset_temp(args[0]);
+ temps[args[0]].mask = 1;
+ op->opc = INDEX_op_setcond_i32;
+ args[1] = args[2];
+ args[2] = args[4];
+ args[3] = args[5];
+ } else if (args[5] == TCG_COND_EQ) {
+ /* Simplify EQ comparisons where one of the pairs
+ can be simplified. */
+ tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
+ args[1], args[3], TCG_COND_EQ);
+ if (tmp == 0) {
+ goto do_setcond_const;
+ } else if (tmp == 1) {
+ goto do_setcond_high;
+ }
+ tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
+ args[2], args[4], TCG_COND_EQ);
+ if (tmp == 0) {
+ goto do_setcond_high;
+ } else if (tmp != 1) {
+ goto do_default;
+ }
+ do_setcond_low:
+ reset_temp(args[0]);
+ temps[args[0]].mask = 1;
+ op->opc = INDEX_op_setcond_i32;
+ args[2] = args[3];
+ args[3] = args[5];
+ } else if (args[5] == TCG_COND_NE) {
+ /* Simplify NE comparisons where one of the pairs
+ can be simplified. */
+ tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
+ args[1], args[3], TCG_COND_NE);
+ if (tmp == 0) {
+ goto do_setcond_high;
+ } else if (tmp == 1) {
+ goto do_setcond_const;
+ }
+ tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
+ args[2], args[4], TCG_COND_NE);
+ if (tmp == 0) {
+ goto do_setcond_low;
+ } else if (tmp == 1) {
+ goto do_setcond_const;
+ }
+ goto do_default;
+ } else {
+ goto do_default;
+ }
+ break;
+
+ case INDEX_op_call:
+ if (!(args[nb_oargs + nb_iargs + 1]
+ & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
+ for (i = 0; i < nb_globals; i++) {
+ reset_temp(i);
+ }
+ }
+ goto do_reset_output;
+
+ default:
+ do_default:
+ /* Default case: we know nothing about operation (or were unable
+ to compute the operation result) so no propagation is done.
+ We trash everything if the operation is the end of a basic
+ block, otherwise we only trash the output args. "mask" is
+ the non-zero bits mask for the first output arg. */
+ if (def->flags & TCG_OPF_BB_END) {
+ reset_all_temps(nb_temps);
+ } else {
+ do_reset_output:
+ for (i = 0; i < nb_oargs; i++) {
+ reset_temp(args[i]);
+ /* Save the corresponding known-zero bits mask for the
+ first output argument (only one supported so far). */
+ if (i == 0) {
+ temps[args[i]].mask = mask;
+ }
+ }
+ }
+ break;
+ }
+ }
+}
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
new file mode 100644
index 00000000..2b6eafa0
--- /dev/null
+++ b/tcg/ppc/tcg-target.c
@@ -0,0 +1,2722 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg-be-ldst.h"
+
+#if defined _CALL_DARWIN || defined __APPLE__
+#define TCG_TARGET_CALL_DARWIN
+#endif
+#ifdef _CALL_SYSV
+# define TCG_TARGET_CALL_ALIGN_ARGS 1
+#endif
+
+/* For some memory operations, we need a scratch that isn't R0. For the AIX
+ calling convention, we can re-use the TOC register since we'll be reloading
+ it at every call. Otherwise R12 will do nicely as neither a call-saved
+ register nor a parameter register. */
+#ifdef _CALL_AIX
+# define TCG_REG_TMP1 TCG_REG_R2
+#else
+# define TCG_REG_TMP1 TCG_REG_R12
+#endif
+
+/* For the 64-bit target, we don't like the 5 insn sequence needed to build
+ full 64-bit addresses. Better to have a base register to which we can
+ apply a 32-bit displacement.
+
+ There are generally three items of interest:
+ (1) helper functions in the main executable,
+ (2) TranslationBlock data structures,
+ (3) the return address in the epilogue.
+
+ For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer
+ will be inside the main executable, and thus near enough to make a
+ pointer to the epilogue be within 2GB of all helper functions.
+
+ For softmmu, we'll let the kernel choose the address of code_gen_buffer,
+ and odds are it'll be somewhere close to the main malloc arena, and so
+ a pointer to the epilogue will be within 2GB of the TranslationBlocks.
+
+ For --enable-pie, everything will be kinda near everything else,
+ somewhere in high memory.
+
+ Thus we choose to keep the return address in a call-saved register. */
+#define TCG_REG_RA TCG_REG_R31
+#define USE_REG_RA (TCG_TARGET_REG_BITS == 64)
+
+/* Shorthand for size of a pointer. Avoid promotion to unsigned. */
+#define SZP ((int)sizeof(void *))
+
+/* Shorthand for size of a register. */
+#define SZR (TCG_TARGET_REG_BITS / 8)
+
+#define TCG_CT_CONST_S16 0x100
+#define TCG_CT_CONST_U16 0x200
+#define TCG_CT_CONST_S32 0x400
+#define TCG_CT_CONST_U32 0x800
+#define TCG_CT_CONST_ZERO 0x1000
+#define TCG_CT_CONST_MONE 0x2000
+
+static tcg_insn_unit *tb_ret_addr;
+
+#ifndef GUEST_BASE
+#define GUEST_BASE 0
+#endif
+
+#include "elf.h"
+static bool have_isa_2_06;
+#define HAVE_ISA_2_06 have_isa_2_06
+#define HAVE_ISEL have_isa_2_06
+
+#ifdef CONFIG_USE_GUEST_BASE
+#define TCG_GUEST_BASE_REG 30
+#else
+#define TCG_GUEST_BASE_REG 0
+#endif
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "r0",
+ "r1",
+ "r2",
+ "r3",
+ "r4",
+ "r5",
+ "r6",
+ "r7",
+ "r8",
+ "r9",
+ "r10",
+ "r11",
+ "r12",
+ "r13",
+ "r14",
+ "r15",
+ "r16",
+ "r17",
+ "r18",
+ "r19",
+ "r20",
+ "r21",
+ "r22",
+ "r23",
+ "r24",
+ "r25",
+ "r26",
+ "r27",
+ "r28",
+ "r29",
+ "r30",
+ "r31"
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_R14, /* call saved registers */
+ TCG_REG_R15,
+ TCG_REG_R16,
+ TCG_REG_R17,
+ TCG_REG_R18,
+ TCG_REG_R19,
+ TCG_REG_R20,
+ TCG_REG_R21,
+ TCG_REG_R22,
+ TCG_REG_R23,
+ TCG_REG_R24,
+ TCG_REG_R25,
+ TCG_REG_R26,
+ TCG_REG_R27,
+ TCG_REG_R28,
+ TCG_REG_R29,
+ TCG_REG_R30,
+ TCG_REG_R31,
+ TCG_REG_R12, /* call clobbered, non-arguments */
+ TCG_REG_R11,
+ TCG_REG_R2,
+ TCG_REG_R13,
+ TCG_REG_R10, /* call clobbered, arguments */
+ TCG_REG_R9,
+ TCG_REG_R8,
+ TCG_REG_R7,
+ TCG_REG_R6,
+ TCG_REG_R5,
+ TCG_REG_R4,
+ TCG_REG_R3,
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_R3,
+ TCG_REG_R4
+};
+
+static const int tcg_target_callee_save_regs[] = {
+#ifdef TCG_TARGET_CALL_DARWIN
+ TCG_REG_R11,
+#endif
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_R16,
+ TCG_REG_R17,
+ TCG_REG_R18,
+ TCG_REG_R19,
+ TCG_REG_R20,
+ TCG_REG_R21,
+ TCG_REG_R22,
+ TCG_REG_R23,
+ TCG_REG_R24,
+ TCG_REG_R25,
+ TCG_REG_R26,
+ TCG_REG_R27, /* currently used for the global env */
+ TCG_REG_R28,
+ TCG_REG_R29,
+ TCG_REG_R30,
+ TCG_REG_R31
+};
+
+static inline bool in_range_b(tcg_target_long target)
+{
+ return target == sextract64(target, 0, 26);
+}
+
+static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
+ assert(in_range_b(disp));
+ return disp & 0x3fffffc;
+}
+
+static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+ *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target);
+}
+
+static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
+ assert(disp == (int16_t) disp);
+ return disp & 0xfffc;
+}
+
+static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+ *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target);
+}
+
+static inline void tcg_out_b_noaddr(TCGContext *s, int insn)
+{
+ unsigned retrans = *s->code_ptr & 0x3fffffc;
+ tcg_out32(s, insn | retrans);
+}
+
+static inline void tcg_out_bc_noaddr(TCGContext *s, int insn)
+{
+ unsigned retrans = *s->code_ptr & 0xfffc;
+ tcg_out32(s, insn | retrans);
+}
+
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ tcg_insn_unit *target = (tcg_insn_unit *)value;
+
+ assert(addend == 0);
+ switch (type) {
+ case R_PPC_REL14:
+ reloc_pc14(code_ptr, target);
+ break;
+ case R_PPC_REL24:
+ reloc_pc24(code_ptr, target);
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str;
+
+ ct_str = *pct_str;
+ switch (ct_str[0]) {
+ case 'A': case 'B': case 'C': case 'D':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
+ break;
+ case 'r':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ break;
+ case 'L': /* qemu_ld constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+#ifdef CONFIG_SOFTMMU
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
+#endif
+ break;
+ case 'S': /* qemu_st constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
+#ifdef CONFIG_SOFTMMU
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
+#endif
+ break;
+ case 'I':
+ ct->ct |= TCG_CT_CONST_S16;
+ break;
+ case 'J':
+ ct->ct |= TCG_CT_CONST_U16;
+ break;
+ case 'M':
+ ct->ct |= TCG_CT_CONST_MONE;
+ break;
+ case 'T':
+ ct->ct |= TCG_CT_CONST_S32;
+ break;
+ case 'U':
+ ct->ct |= TCG_CT_CONST_U32;
+ break;
+ case 'Z':
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+/* test if a constant matches the constraint */
+static int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct = arg_ct->ct;
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+
+ /* The only 32-bit constraint we use aside from
+ TCG_CT_CONST is TCG_CT_CONST_S16. */
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
+ return 1;
+ }
+ return 0;
+}
+
+#define OPCD(opc) ((opc)<<26)
+#define XO19(opc) (OPCD(19)|((opc)<<1))
+#define MD30(opc) (OPCD(30)|((opc)<<2))
+#define MDS30(opc) (OPCD(30)|((opc)<<1))
+#define XO31(opc) (OPCD(31)|((opc)<<1))
+#define XO58(opc) (OPCD(58)|(opc))
+#define XO62(opc) (OPCD(62)|(opc))
+
+#define B OPCD( 18)
+#define BC OPCD( 16)
+#define LBZ OPCD( 34)
+#define LHZ OPCD( 40)
+#define LHA OPCD( 42)
+#define LWZ OPCD( 32)
+#define STB OPCD( 38)
+#define STH OPCD( 44)
+#define STW OPCD( 36)
+
+#define STD XO62( 0)
+#define STDU XO62( 1)
+#define STDX XO31(149)
+
+#define LD XO58( 0)
+#define LDX XO31( 21)
+#define LDU XO58( 1)
+#define LWA XO58( 2)
+#define LWAX XO31(341)
+
+#define ADDIC OPCD( 12)
+#define ADDI OPCD( 14)
+#define ADDIS OPCD( 15)
+#define ORI OPCD( 24)
+#define ORIS OPCD( 25)
+#define XORI OPCD( 26)
+#define XORIS OPCD( 27)
+#define ANDI OPCD( 28)
+#define ANDIS OPCD( 29)
+#define MULLI OPCD( 7)
+#define CMPLI OPCD( 10)
+#define CMPI OPCD( 11)
+#define SUBFIC OPCD( 8)
+
+#define LWZU OPCD( 33)
+#define STWU OPCD( 37)
+
+#define RLWIMI OPCD( 20)
+#define RLWINM OPCD( 21)
+#define RLWNM OPCD( 23)
+
+#define RLDICL MD30( 0)
+#define RLDICR MD30( 1)
+#define RLDIMI MD30( 3)
+#define RLDCL MDS30( 8)
+
+#define BCLR XO19( 16)
+#define BCCTR XO19(528)
+#define CRAND XO19(257)
+#define CRANDC XO19(129)
+#define CRNAND XO19(225)
+#define CROR XO19(449)
+#define CRNOR XO19( 33)
+
+#define EXTSB XO31(954)
+#define EXTSH XO31(922)
+#define EXTSW XO31(986)
+#define ADD XO31(266)
+#define ADDE XO31(138)
+#define ADDME XO31(234)
+#define ADDZE XO31(202)
+#define ADDC XO31( 10)
+#define AND XO31( 28)
+#define SUBF XO31( 40)
+#define SUBFC XO31( 8)
+#define SUBFE XO31(136)
+#define SUBFME XO31(232)
+#define SUBFZE XO31(200)
+#define OR XO31(444)
+#define XOR XO31(316)
+#define MULLW XO31(235)
+#define MULHW XO31( 75)
+#define MULHWU XO31( 11)
+#define DIVW XO31(491)
+#define DIVWU XO31(459)
+#define CMP XO31( 0)
+#define CMPL XO31( 32)
+#define LHBRX XO31(790)
+#define LWBRX XO31(534)
+#define LDBRX XO31(532)
+#define STHBRX XO31(918)
+#define STWBRX XO31(662)
+#define STDBRX XO31(660)
+#define MFSPR XO31(339)
+#define MTSPR XO31(467)
+#define SRAWI XO31(824)
+#define NEG XO31(104)
+#define MFCR XO31( 19)
+#define MFOCRF (MFCR | (1u << 20))
+#define NOR XO31(124)
+#define CNTLZW XO31( 26)
+#define CNTLZD XO31( 58)
+#define ANDC XO31( 60)
+#define ORC XO31(412)
+#define EQV XO31(284)
+#define NAND XO31(476)
+#define ISEL XO31( 15)
+
+#define MULLD XO31(233)
+#define MULHD XO31( 73)
+#define MULHDU XO31( 9)
+#define DIVD XO31(489)
+#define DIVDU XO31(457)
+
+#define LBZX XO31( 87)
+#define LHZX XO31(279)
+#define LHAX XO31(343)
+#define LWZX XO31( 23)
+#define STBX XO31(215)
+#define STHX XO31(407)
+#define STWX XO31(151)
+
+#define SPR(a, b) ((((a)<<5)|(b))<<11)
+#define LR SPR(8, 0)
+#define CTR SPR(9, 0)
+
+#define SLW XO31( 24)
+#define SRW XO31(536)
+#define SRAW XO31(792)
+
+#define SLD XO31( 27)
+#define SRD XO31(539)
+#define SRAD XO31(794)
+#define SRADI XO31(413<<1)
+
+#define TW XO31( 4)
+#define TRAP (TW | TO(31))
+
+#define NOP ORI /* ori 0,0,0 */
+
+#define RT(r) ((r)<<21)
+#define RS(r) ((r)<<21)
+#define RA(r) ((r)<<16)
+#define RB(r) ((r)<<11)
+#define TO(t) ((t)<<21)
+#define SH(s) ((s)<<11)
+#define MB(b) ((b)<<6)
+#define ME(e) ((e)<<1)
+#define BO(o) ((o)<<21)
+#define MB64(b) ((b)<<5)
+#define FXM(b) (1 << (19 - (b)))
+
+#define LK 1
+
+#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
+#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
+#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
+#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
+
+#define BF(n) ((n)<<23)
+#define BI(n, c) (((c)+((n)*4))<<16)
+#define BT(n, c) (((c)+((n)*4))<<21)
+#define BA(n, c) (((c)+((n)*4))<<16)
+#define BB(n, c) (((c)+((n)*4))<<11)
+#define BC_(n, c) (((c)+((n)*4))<<6)
+
+#define BO_COND_TRUE BO(12)
+#define BO_COND_FALSE BO( 4)
+#define BO_ALWAYS BO(20)
+
+enum {
+ CR_LT,
+ CR_GT,
+ CR_EQ,
+ CR_SO
+};
+
+static const uint32_t tcg_to_bc[] = {
+ [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE,
+ [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE,
+ [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE,
+ [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE,
+ [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE,
+ [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE,
+ [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
+ [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
+ [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
+ [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
+};
+
+/* The low bit here is set if the RA and RB fields must be inverted. */
+static const uint32_t tcg_to_isel[] = {
+ [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ),
+ [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1,
+ [TCG_COND_LT] = ISEL | BC_(7, CR_LT),
+ [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1,
+ [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1,
+ [TCG_COND_GT] = ISEL | BC_(7, CR_GT),
+ [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
+ [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
+ [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
+ [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
+};
+
+static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
+ TCGReg base, tcg_target_long offset);
+
+static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
+ if (ret != arg) {
+ tcg_out32(s, OR | SAB(arg, ret, arg));
+ }
+}
+
+static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+ int sh, int mb)
+{
+ assert(TCG_TARGET_REG_BITS == 64);
+ sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
+ mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
+ tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
+}
+
+static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+ int sh, int mb, int me)
+{
+ tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
+}
+
+static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ tcg_out_rld(s, RLDICL, dst, src, 0, 32);
+}
+
+static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
+}
+
+static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
+}
+
+static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
+}
+
+static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
+}
+
+static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
+{
+ if (arg == (int16_t) arg) {
+ tcg_out32(s, ADDI | TAI(ret, 0, arg));
+ } else {
+ tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
+ if (arg & 0xffff) {
+ tcg_out32(s, ORI | SAI(ret, ret, arg));
+ }
+ }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
+ tcg_target_long arg)
+{
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
+ if (type == TCG_TYPE_I32 || arg == (int32_t)arg) {
+ tcg_out_movi32(s, ret, arg);
+ } else if (arg == (uint32_t)arg && !(arg & 0x8000)) {
+ tcg_out32(s, ADDI | TAI(ret, 0, arg));
+ tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
+ } else {
+ int32_t high;
+
+ if (USE_REG_RA) {
+ intptr_t diff = arg - (intptr_t)tb_ret_addr;
+ if (diff == (int32_t)diff) {
+ tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff);
+ return;
+ }
+ }
+
+ high = arg >> 31 >> 1;
+ tcg_out_movi32(s, ret, high);
+ if (high) {
+ tcg_out_shli64(s, ret, ret, 32);
+ }
+ if (arg & 0xffff0000) {
+ tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
+ }
+ if (arg & 0xffff) {
+ tcg_out32(s, ORI | SAI(ret, ret, arg));
+ }
+ }
+}
+
+static bool mask_operand(uint32_t c, int *mb, int *me)
+{
+ uint32_t lsb, test;
+
+ /* Accept a bit pattern like:
+ 0....01....1
+ 1....10....0
+ 0..01..10..0
+ Keep track of the transitions. */
+ if (c == 0 || c == -1) {
+ return false;
+ }
+ test = c;
+ lsb = test & -test;
+ test += lsb;
+ if (test & (test - 1)) {
+ return false;
+ }
+
+ *me = clz32(lsb);
+ *mb = test ? clz32(test & -test) + 1 : 0;
+ return true;
+}
+
+static bool mask64_operand(uint64_t c, int *mb, int *me)
+{
+ uint64_t lsb;
+
+ if (c == 0) {
+ return false;
+ }
+
+ lsb = c & -c;
+ /* Accept 1..10..0. */
+ if (c == -lsb) {
+ *mb = 0;
+ *me = clz64(lsb);
+ return true;
+ }
+ /* Accept 0..01..1. */
+ if (lsb == 1 && (c & (c + 1)) == 0) {
+ *mb = clz64(c + 1) + 1;
+ *me = 63;
+ return true;
+ }
+ return false;
+}
+
+static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
+{
+ int mb, me;
+
+ if ((c & 0xffff) == c) {
+ tcg_out32(s, ANDI | SAI(src, dst, c));
+ return;
+ } else if ((c & 0xffff0000) == c) {
+ tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
+ return;
+ } else if (mask_operand(c, &mb, &me)) {
+ tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
+ tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
+ }
+}
+
+static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
+{
+ int mb, me;
+
+ assert(TCG_TARGET_REG_BITS == 64);
+ if ((c & 0xffff) == c) {
+ tcg_out32(s, ANDI | SAI(src, dst, c));
+ return;
+ } else if ((c & 0xffff0000) == c) {
+ tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
+ return;
+ } else if (mask64_operand(c, &mb, &me)) {
+ if (mb == 0) {
+ tcg_out_rld(s, RLDICR, dst, src, 0, me);
+ } else {
+ tcg_out_rld(s, RLDICL, dst, src, 0, mb);
+ }
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
+ tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
+ }
+}
+
+static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
+ int op_lo, int op_hi)
+{
+ if (c >> 16) {
+ tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
+ src = dst;
+ }
+ if (c & 0xffff) {
+ tcg_out32(s, op_lo | SAI(src, dst, c));
+ src = dst;
+ }
+}
+
+static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
+{
+ tcg_out_zori32(s, dst, src, c, ORI, ORIS);
+}
+
+static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
+{
+ tcg_out_zori32(s, dst, src, c, XORI, XORIS);
+}
+
+static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target)
+{
+ ptrdiff_t disp = tcg_pcrel_diff(s, target);
+ if (in_range_b(disp)) {
+ tcg_out32(s, B | (disp & 0x3fffffc) | mask);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
+ tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS | mask);
+ }
+}
+
+static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
+ TCGReg base, tcg_target_long offset)
+{
+ tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
+ bool is_store = false;
+ TCGReg rs = TCG_REG_TMP1;
+
+ switch (opi) {
+ case LD: case LWA:
+ align = 3;
+ /* FALLTHRU */
+ default:
+ if (rt != TCG_REG_R0) {
+ rs = rt;
+ break;
+ }
+ break;
+ case STD:
+ align = 3;
+ /* FALLTHRU */
+ case STB: case STH: case STW:
+ is_store = true;
+ break;
+ }
+
+ /* For unaligned, or very large offsets, use the indexed form. */
+ if (offset & align || offset != (int32_t)offset) {
+ if (rs == base) {
+ rs = TCG_REG_R0;
+ }
+ tcg_debug_assert(!is_store || rs != rt);
+ tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
+ tcg_out32(s, opx | TAB(rt, base, rs));
+ return;
+ }
+
+ l0 = (int16_t)offset;
+ offset = (offset - l0) >> 16;
+ l1 = (int16_t)offset;
+
+ if (l1 < 0 && orig >= 0) {
+ extra = 0x4000;
+ l1 = (int16_t)(offset - 0x4000);
+ }
+ if (l1) {
+ tcg_out32(s, ADDIS | TAI(rs, base, l1));
+ base = rs;
+ }
+ if (extra) {
+ tcg_out32(s, ADDIS | TAI(rs, base, extra));
+ base = rs;
+ }
+ if (opi != ADDI || base != rt || l0 != 0) {
+ tcg_out32(s, opi | TAI(rt, base, l0));
+ }
+}
+
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg arg1, intptr_t arg2)
+{
+ int opi, opx;
+
+ assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
+ if (type == TCG_TYPE_I32) {
+ opi = LWZ, opx = LWZX;
+ } else {
+ opi = LD, opx = LDX;
+ }
+ tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ int opi, opx;
+
+ assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
+ if (type == TCG_TYPE_I32) {
+ opi = STW, opx = STWX;
+ } else {
+ opi = STD, opx = STDX;
+ }
+ tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
+}
+
+static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
+ int const_arg2, int cr, TCGType type)
+{
+ int imm;
+ uint32_t op;
+
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
+
+ /* Simplify the comparisons below wrt CMPI. */
+ if (type == TCG_TYPE_I32) {
+ arg2 = (int32_t)arg2;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ if (const_arg2) {
+ if ((int16_t) arg2 == arg2) {
+ op = CMPI;
+ imm = 1;
+ break;
+ } else if ((uint16_t) arg2 == arg2) {
+ op = CMPLI;
+ imm = 1;
+ break;
+ }
+ }
+ op = CMPL;
+ imm = 0;
+ break;
+
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ if (const_arg2) {
+ if ((int16_t) arg2 == arg2) {
+ op = CMPI;
+ imm = 1;
+ break;
+ }
+ }
+ op = CMP;
+ imm = 0;
+ break;
+
+ case TCG_COND_LTU:
+ case TCG_COND_GEU:
+ case TCG_COND_LEU:
+ case TCG_COND_GTU:
+ if (const_arg2) {
+ if ((uint16_t) arg2 == arg2) {
+ op = CMPLI;
+ imm = 1;
+ break;
+ }
+ }
+ op = CMPL;
+ imm = 0;
+ break;
+
+ default:
+ tcg_abort();
+ }
+ op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
+
+ if (imm) {
+ tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
+ } else {
+ if (const_arg2) {
+ tcg_out_movi(s, type, TCG_REG_R0, arg2);
+ arg2 = TCG_REG_R0;
+ }
+ tcg_out32(s, op | RA(arg1) | RB(arg2));
+ }
+}
+
+static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
+ TCGReg dst, TCGReg src)
+{
+ if (type == TCG_TYPE_I32) {
+ tcg_out32(s, CNTLZW | RS(src) | RA(dst));
+ tcg_out_shri32(s, dst, dst, 5);
+ } else {
+ tcg_out32(s, CNTLZD | RS(src) | RA(dst));
+ tcg_out_shri64(s, dst, dst, 6);
+ }
+}
+
+static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ /* X != 0 implies X + -1 generates a carry. Extra addition
+ trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */
+ if (dst != src) {
+ tcg_out32(s, ADDIC | TAI(dst, src, -1));
+ tcg_out32(s, SUBFE | TAB(dst, dst, src));
+ } else {
+ tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
+ tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
+ }
+}
+
+static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
+ bool const_arg2)
+{
+ if (const_arg2) {
+ if ((uint32_t)arg2 == arg2) {
+ tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
+ tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
+ }
+ } else {
+ tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
+ }
+ return TCG_REG_R0;
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
+ TCGArg arg0, TCGArg arg1, TCGArg arg2,
+ int const_arg2)
+{
+ int crop, sh;
+
+ assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
+
+ /* Ignore high bits of a potential constant arg2. */
+ if (type == TCG_TYPE_I32) {
+ arg2 = (uint32_t)arg2;
+ }
+
+ /* Handle common and trivial cases before handling anything else. */
+ if (arg2 == 0) {
+ switch (cond) {
+ case TCG_COND_EQ:
+ tcg_out_setcond_eq0(s, type, arg0, arg1);
+ return;
+ case TCG_COND_NE:
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ tcg_out_ext32u(s, TCG_REG_R0, arg1);
+ arg1 = TCG_REG_R0;
+ }
+ tcg_out_setcond_ne0(s, arg0, arg1);
+ return;
+ case TCG_COND_GE:
+ tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
+ arg1 = arg0;
+ /* FALLTHRU */
+ case TCG_COND_LT:
+ /* Extract the sign bit. */
+ if (type == TCG_TYPE_I32) {
+ tcg_out_shri32(s, arg0, arg1, 31);
+ } else {
+ tcg_out_shri64(s, arg0, arg1, 63);
+ }
+ return;
+ default:
+ break;
+ }
+ }
+
+ /* If we have ISEL, we can implement everything with 3 or 4 insns.
+ All other cases below are also at least 3 insns, so speed up the
+ code generator by not considering them and always using ISEL. */
+ if (HAVE_ISEL) {
+ int isel, tab;
+
+ tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+
+ isel = tcg_to_isel[cond];
+
+ tcg_out_movi(s, type, arg0, 1);
+ if (isel & 1) {
+ /* arg0 = (bc ? 0 : 1) */
+ tab = TAB(arg0, 0, arg0);
+ isel &= ~1;
+ } else {
+ /* arg0 = (bc ? 1 : 0) */
+ tcg_out_movi(s, type, TCG_REG_R0, 0);
+ tab = TAB(arg0, arg0, TCG_REG_R0);
+ }
+ tcg_out32(s, isel | tab);
+ return;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
+ tcg_out_setcond_eq0(s, type, arg0, arg1);
+ return;
+
+ case TCG_COND_NE:
+ arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
+ /* Discard the high bits only once, rather than both inputs. */
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ tcg_out_ext32u(s, TCG_REG_R0, arg1);
+ arg1 = TCG_REG_R0;
+ }
+ tcg_out_setcond_ne0(s, arg0, arg1);
+ return;
+
+ case TCG_COND_GT:
+ case TCG_COND_GTU:
+ sh = 30;
+ crop = 0;
+ goto crtest;
+
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ sh = 29;
+ crop = 0;
+ goto crtest;
+
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ sh = 31;
+ crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
+ goto crtest;
+
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ sh = 31;
+ crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
+ crtest:
+ tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+ if (crop) {
+ tcg_out32(s, crop);
+ }
+ tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
+ tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
+{
+ if (l->has_value) {
+ tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr));
+ } else {
+ tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
+ tcg_out_bc_noaddr(s, bc);
+ }
+}
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ TCGLabel *l, TCGType type)
+{
+ tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+ tcg_out_bc(s, tcg_to_bc[cond], l);
+}
+
+static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
+ TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
+ TCGArg v2, bool const_c2)
+{
+ /* If for some reason both inputs are zero, don't produce bad code. */
+ if (v1 == 0 && v2 == 0) {
+ tcg_out_movi(s, type, dest, 0);
+ return;
+ }
+
+ tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
+
+ if (HAVE_ISEL) {
+ int isel = tcg_to_isel[cond];
+
+ /* Swap the V operands if the operation indicates inversion. */
+ if (isel & 1) {
+ int t = v1;
+ v1 = v2;
+ v2 = t;
+ isel &= ~1;
+ }
+ /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */
+ if (v2 == 0) {
+ tcg_out_movi(s, type, TCG_REG_R0, 0);
+ }
+ tcg_out32(s, isel | TAB(dest, v1, v2));
+ } else {
+ if (dest == v2) {
+ cond = tcg_invert_cond(cond);
+ v2 = v1;
+ } else if (dest != v1) {
+ if (v1 == 0) {
+ tcg_out_movi(s, type, dest, 0);
+ } else {
+ tcg_out_mov(s, type, dest, v1);
+ }
+ }
+ /* Branch forward over one insn */
+ tcg_out32(s, tcg_to_bc[cond] | 8);
+ if (v2 == 0) {
+ tcg_out_movi(s, type, dest, 0);
+ } else {
+ tcg_out_mov(s, type, dest, v2);
+ }
+ }
+}
+
+static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ static const struct { uint8_t bit1, bit2; } bits[] = {
+ [TCG_COND_LT ] = { CR_LT, CR_LT },
+ [TCG_COND_LE ] = { CR_LT, CR_GT },
+ [TCG_COND_GT ] = { CR_GT, CR_GT },
+ [TCG_COND_GE ] = { CR_GT, CR_LT },
+ [TCG_COND_LTU] = { CR_LT, CR_LT },
+ [TCG_COND_LEU] = { CR_LT, CR_GT },
+ [TCG_COND_GTU] = { CR_GT, CR_GT },
+ [TCG_COND_GEU] = { CR_GT, CR_LT },
+ };
+
+ TCGCond cond = args[4], cond2;
+ TCGArg al, ah, bl, bh;
+ int blconst, bhconst;
+ int op, bit1, bit2;
+
+ al = args[0];
+ ah = args[1];
+ bl = args[2];
+ bh = args[3];
+ blconst = const_args[2];
+ bhconst = const_args[3];
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ op = CRAND;
+ goto do_equality;
+ case TCG_COND_NE:
+ op = CRNAND;
+ do_equality:
+ tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
+ tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
+ tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
+ break;
+
+ case TCG_COND_LT:
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ case TCG_COND_GE:
+ case TCG_COND_LTU:
+ case TCG_COND_LEU:
+ case TCG_COND_GTU:
+ case TCG_COND_GEU:
+ bit1 = bits[cond].bit1;
+ bit2 = bits[cond].bit2;
+ op = (bit1 != bit2 ? CRANDC : CRAND);
+ cond2 = tcg_unsigned_cond(cond);
+
+ tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
+ tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
+ tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
+ tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ tcg_out_cmp2(s, args + 1, const_args + 1);
+ tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
+ tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
+}
+
+static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ tcg_out_cmp2(s, args, const_args);
+ tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
+}
+
+void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
+{
+ TCGContext s;
+
+ s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr;
+ tcg_out_b(&s, 0, (tcg_insn_unit *)addr);
+ flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s));
+}
+
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
+{
+#ifdef _CALL_AIX
+ /* Look through the descriptor. If the branch is in range, and we
+ don't have to spend too much effort on building the toc. */
+ void *tgt = ((void **)target)[0];
+ uintptr_t toc = ((uintptr_t *)target)[1];
+ intptr_t diff = tcg_pcrel_diff(s, tgt);
+
+ if (in_range_b(diff) && toc == (uint32_t)toc) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
+ tcg_out_b(s, LK, tgt);
+ } else {
+ /* Fold the low bits of the constant into the addresses below. */
+ intptr_t arg = (intptr_t)target;
+ int ofs = (int16_t)arg;
+
+ if (ofs + 8 < 0x8000) {
+ arg -= ofs;
+ } else {
+ ofs = 0;
+ }
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
+ tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
+ tcg_out32(s, BCCTR | BO_ALWAYS | LK);
+ }
+#elif defined(_CALL_ELF) && _CALL_ELF == 2
+ intptr_t diff;
+
+ /* In the ELFv2 ABI, we have to set up r12 to contain the destination
+ address, which the callee uses to compute its TOC address. */
+ /* FIXME: when the branch is in range, we could avoid r12 load if we
+ knew that the destination uses the same TOC, and what its local
+ entry point offset is. */
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
+
+ diff = tcg_pcrel_diff(s, target);
+ if (in_range_b(diff)) {
+ tcg_out_b(s, LK, target);
+ } else {
+ tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS | LK);
+ }
+#else
+ tcg_out_b(s, LK, target);
+#endif
+}
+
+static const uint32_t qemu_ldx_opc[16] = {
+ [MO_UB] = LBZX,
+ [MO_UW] = LHZX,
+ [MO_UL] = LWZX,
+ [MO_Q] = LDX,
+ [MO_SW] = LHAX,
+ [MO_SL] = LWAX,
+ [MO_BSWAP | MO_UB] = LBZX,
+ [MO_BSWAP | MO_UW] = LHBRX,
+ [MO_BSWAP | MO_UL] = LWBRX,
+ [MO_BSWAP | MO_Q] = LDBRX,
+};
+
+static const uint32_t qemu_stx_opc[16] = {
+ [MO_UB] = STBX,
+ [MO_UW] = STHX,
+ [MO_UL] = STWX,
+ [MO_Q] = STDX,
+ [MO_BSWAP | MO_UB] = STBX,
+ [MO_BSWAP | MO_UW] = STHBRX,
+ [MO_BSWAP | MO_UL] = STWBRX,
+ [MO_BSWAP | MO_Q] = STDBRX,
+};
+
+static const uint32_t qemu_exts_opc[4] = {
+ EXTSB, EXTSH, EXTSW, 0
+};
+
+#if defined (CONFIG_SOFTMMU)
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[16] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_st_helpers[16] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+
+/* Perform the TLB load and compare. Places the result of the comparison
+ in CR7, loads the addend of the TLB into R3, and returns the register
+ containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
+
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
+ TCGReg addrlo, TCGReg addrhi,
+ int mem_index, bool is_read)
+{
+ int cmp_off
+ = (is_read
+ ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+ : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+ int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+ TCGReg base = TCG_AREG0;
+
+ /* Extract the page index, shifted into place for tlb index. */
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (TARGET_LONG_BITS == 32) {
+ /* Zero-extend the address into a place helpful for further use. */
+ tcg_out_ext32u(s, TCG_REG_R4, addrlo);
+ addrlo = TCG_REG_R4;
+ } else {
+ tcg_out_rld(s, RLDICL, TCG_REG_R3, addrlo,
+ 64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS);
+ }
+ }
+
+ /* Compensate for very large offsets. */
+ if (add_off >= 0x8000) {
+ /* Most target env are smaller than 32k; none are larger than 64k.
+ Simplify the logic here merely to offset by 0x7ff0, giving us a
+ range just shy of 64k. Check this assumption. */
+ QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
+ tlb_table[NB_MMU_MODES - 1][1])
+ > 0x7ff0 + 0x7fff);
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0));
+ base = TCG_REG_TMP1;
+ cmp_off -= 0x7ff0;
+ add_off -= 0x7ff0;
+ }
+
+ /* Extraction and shifting, part 2. */
+ if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
+ tcg_out_rlw(s, RLWINM, TCG_REG_R3, addrlo,
+ 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
+ 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
+ 31 - CPU_TLB_ENTRY_BITS);
+ } else {
+ tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS);
+ }
+
+ tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base));
+
+ /* Load the tlb comparator. */
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
+ } else {
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
+ }
+
+ /* Load the TLB addend for use on the fast path. Do this asap
+ to minimize any load use delay. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);
+
+ /* Clear the non-page, non-alignment bits from the address. */
+ if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
+ (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
+ } else if (!s_bits) {
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo,
+ 0, 63 - TARGET_PAGE_BITS);
+ } else {
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
+ 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+ }
+
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
+ 0, 7, TCG_TYPE_I32);
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
+ } else {
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
+ 0, 7, TCG_TYPE_TL);
+ }
+
+ return addrlo;
+}
+
+/* Record the context of a call to the out of line helper code for the slow
+ path for a load or store, so that we can later generate the correct
+ helper code. */
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
+ TCGReg datalo_reg, TCGReg datahi_reg,
+ TCGReg addrlo_reg, TCGReg addrhi_reg,
+ tcg_insn_unit *raddr, tcg_insn_unit *lptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->datalo_reg = datalo_reg;
+ label->datahi_reg = datahi_reg;
+ label->addrlo_reg = addrlo_reg;
+ label->addrhi_reg = addrhi_reg;
+ label->raddr = raddr;
+ label->label_ptr[0] = lptr;
+}
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGMemOpIdx oi = lb->oi;
+ TCGMemOp opc = get_memop(oi);
+ TCGReg hi, lo, arg = TCG_REG_R3;
+
+ reloc_pc14(lb->label_ptr[0], s->code_ptr);
+
+ tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
+
+ lo = lb->addrlo_reg;
+ hi = lb->addrhi_reg;
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+ arg |= 1;
+#endif
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
+ } else {
+ /* If the address needed to be zero-extended, we'll have already
+ placed it in R4. The only remaining case is 64-bit guest. */
+ tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
+ }
+
+ tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
+ tcg_out32(s, MFSPR | RT(arg) | LR);
+
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+
+ lo = lb->datalo_reg;
+ hi = lb->datahi_reg;
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
+ tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
+ tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
+ } else if (opc & MO_SIGN) {
+ uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
+ tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
+ } else {
+ tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
+ }
+
+ tcg_out_b(s, 0, lb->raddr);
+}
+
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGMemOpIdx oi = lb->oi;
+ TCGMemOp opc = get_memop(oi);
+ TCGMemOp s_bits = opc & MO_SIZE;
+ TCGReg hi, lo, arg = TCG_REG_R3;
+
+ reloc_pc14(lb->label_ptr[0], s->code_ptr);
+
+ tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
+
+ lo = lb->addrlo_reg;
+ hi = lb->addrhi_reg;
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+ arg |= 1;
+#endif
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
+ } else {
+ /* If the address needed to be zero-extended, we'll have already
+ placed it in R4. The only remaining case is 64-bit guest. */
+ tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
+ }
+
+ lo = lb->datalo_reg;
+ hi = lb->datahi_reg;
+ if (TCG_TARGET_REG_BITS == 32) {
+ switch (s_bits) {
+ case MO_64:
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+ arg |= 1;
+#endif
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
+ /* FALLTHRU */
+ case MO_32:
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
+ break;
+ default:
+ tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
+ break;
+ }
+ } else {
+ if (s_bits == MO_64) {
+ tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
+ } else {
+ tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
+ }
+ }
+
+ tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
+ tcg_out32(s, MFSPR | RT(arg) | LR);
+
+ tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+
+ tcg_out_b(s, 0, lb->raddr);
+}
+#endif /* SOFTMMU */
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg datalo, datahi, addrlo, rbase;
+ TCGReg addrhi __attribute__((unused));
+ TCGMemOpIdx oi;
+ TCGMemOp opc, s_bits;
+#ifdef CONFIG_SOFTMMU
+ int mem_index;
+ tcg_insn_unit *label_ptr;
+#endif
+
+ datalo = *args++;
+ datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+ s_bits = opc & MO_SIZE;
+
+#ifdef CONFIG_SOFTMMU
+ mem_index = get_mmuidx(oi);
+ addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, true);
+
+ /* Load a pointer into the current opcode w/conditional branch-link. */
+ label_ptr = s->code_ptr;
+ tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
+
+ rbase = TCG_REG_R3;
+#else /* !CONFIG_SOFTMMU */
+ rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
+ addrlo = TCG_REG_TMP1;
+ }
+#endif
+
+ if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
+ if (opc & MO_BSWAP) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
+ tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
+ } else if (rbase != 0) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
+ tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
+ } else if (addrlo == datahi) {
+ tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
+ tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
+ } else {
+ tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
+ tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
+ }
+ } else {
+ uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
+ if (!HAVE_ISA_2_06 && insn == LDBRX) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
+ tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
+ tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
+ } else if (insn) {
+ tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
+ } else {
+ insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
+ tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
+ insn = qemu_exts_opc[s_bits];
+ tcg_out32(s, insn | RA(datalo) | RS(datalo));
+ }
+ }
+
+#ifdef CONFIG_SOFTMMU
+ add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg datalo, datahi, addrlo, rbase;
+ TCGReg addrhi __attribute__((unused));
+ TCGMemOpIdx oi;
+ TCGMemOp opc, s_bits;
+#ifdef CONFIG_SOFTMMU
+ int mem_index;
+ tcg_insn_unit *label_ptr;
+#endif
+
+ datalo = *args++;
+ datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+ s_bits = opc & MO_SIZE;
+
+#ifdef CONFIG_SOFTMMU
+ mem_index = get_mmuidx(oi);
+ addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, false);
+
+ /* Load a pointer into the current opcode w/conditional branch-link. */
+ label_ptr = s->code_ptr;
+ tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
+
+ rbase = TCG_REG_R3;
+#else /* !CONFIG_SOFTMMU */
+ rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
+ addrlo = TCG_REG_TMP1;
+ }
+#endif
+
+ if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
+ if (opc & MO_BSWAP) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
+ tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
+ } else if (rbase != 0) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
+ tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
+ } else {
+ tcg_out32(s, STW | TAI(datahi, addrlo, 0));
+ tcg_out32(s, STW | TAI(datalo, addrlo, 4));
+ }
+ } else {
+ uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
+ if (!HAVE_ISA_2_06 && insn == STDBRX) {
+ tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
+ tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
+ tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
+ } else {
+ tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
+ }
+ }
+
+#ifdef CONFIG_SOFTMMU
+ add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#endif
+}
+
+/* Parameters for function call generation, used in tcg.c. */
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_EXTEND_ARGS 1
+
+#ifdef _CALL_AIX
+# define LINK_AREA_SIZE (6 * SZR)
+# define LR_OFFSET (1 * SZR)
+# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR)
+#elif defined(TCG_TARGET_CALL_DARWIN)
+# define LINK_AREA_SIZE (6 * SZR)
+# define LR_OFFSET (2 * SZR)
+#elif TCG_TARGET_REG_BITS == 64
+# if defined(_CALL_ELF) && _CALL_ELF == 2
+# define LINK_AREA_SIZE (4 * SZR)
+# define LR_OFFSET (1 * SZR)
+# endif
+#else /* TCG_TARGET_REG_BITS == 32 */
+# if defined(_CALL_SYSV)
+# define LINK_AREA_SIZE (2 * SZR)
+# define LR_OFFSET (1 * SZR)
+# endif
+#endif
+#ifndef LR_OFFSET
+# error "Unhandled abi"
+#endif
+#ifndef TCG_TARGET_CALL_STACK_OFFSET
+# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE
+#endif
+
+#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
+#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
+
+#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_SIZE \
+ + REG_SAVE_SIZE \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & -TCG_TARGET_STACK_ALIGN)
+
+#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int i;
+
+#ifdef _CALL_AIX
+ void **desc = (void **)s->code_ptr;
+ desc[0] = desc + 2; /* entry point */
+ desc[1] = 0; /* environment pointer */
+ s->code_ptr = (void *)(desc + 2); /* skip over descriptor */
+#endif
+
+ tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
+ CPU_TEMP_BUF_SIZE);
+
+ /* Prologue */
+ tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
+ tcg_out32(s, (SZR == 8 ? STDU : STWU)
+ | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
+
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
+ tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_R1, REG_SAVE_BOT + i * SZR);
+ }
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
+
+#ifdef CONFIG_USE_GUEST_BASE
+ if (GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+#endif
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
+
+ if (USE_REG_RA) {
+#ifdef _CALL_AIX
+ /* Make the caller load the value as the TOC into R2. */
+ tb_ret_addr = s->code_ptr + 2;
+ desc[1] = tb_ret_addr;
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+#elif defined(_CALL_ELF) && _CALL_ELF == 2
+ /* Compute from the incoming R12 value. */
+ tb_ret_addr = s->code_ptr + 2;
+ tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12,
+ tcg_ptr_byte_diff(tb_ret_addr, s->code_buf)));
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+#else
+ /* Reserve max 5 insns for the constant load. */
+ tb_ret_addr = s->code_ptr + 6;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+ while (s->code_ptr < tb_ret_addr) {
+ tcg_out32(s, NOP);
+ }
+#endif
+ } else {
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+ tb_ret_addr = s->code_ptr;
+ }
+
+ /* Epilogue */
+ assert(tb_ret_addr == s->code_ptr);
+
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
+ tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_R1, REG_SAVE_BOT + i * SZR);
+ }
+ tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
+ tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
+ tcg_out32(s, BCLR | BO_ALWAYS);
+}
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
+ const int *const_args)
+{
+ TCGArg a0, a1, a2;
+ int c;
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ if (USE_REG_RA) {
+ ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
+
+ /* If we can use a direct branch, otherwise use the value in RA.
+ Note that the direct branch is always forward. If it's in
+ range now, it'll still be in range after the movi. Don't
+ bother about the 20 bytes where the test here fails but it
+ would succeed below. */
+ if (!in_range_b(disp)) {
+ tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+ break;
+ }
+ }
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
+ tcg_out_b(s, 0, tb_ret_addr);
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_offset) {
+ /* Direct jump method. */
+ s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+ s->code_ptr += 7;
+ } else {
+ /* Indirect jump method. */
+ tcg_abort();
+ }
+ s->tb_next_offset[args[0]] = tcg_current_code_size(s);
+ break;
+ case INDEX_op_br:
+ {
+ TCGLabel *l = arg_label(args[0]);
+
+ if (l->has_value) {
+ tcg_out_b(s, 0, l->u.value_ptr);
+ } else {
+ tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
+ tcg_out_b_noaddr(s, B);
+ }
+ }
+ break;
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
+ tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+ tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
+ break;
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
+ tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld32s_i64:
+ tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_add_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ do_addi_32:
+ tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
+ } else {
+ tcg_out32(s, ADD | TAB(a0, a1, a2));
+ }
+ break;
+ case INDEX_op_sub_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[1]) {
+ if (const_args[2]) {
+ tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
+ }
+ } else if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_32;
+ } else {
+ tcg_out32(s, SUBF | TAB(a0, a2, a1));
+ }
+ break;
+
+ case INDEX_op_and_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi32(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, AND | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_and_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi64(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, AND | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_or_i64:
+ case INDEX_op_or_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_ori32(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, OR | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_xor_i64:
+ case INDEX_op_xor_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_xori32(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, XOR | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_andc_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi32(s, a0, a1, ~a2);
+ } else {
+ tcg_out32(s, ANDC | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_andc_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi64(s, a0, a1, ~a2);
+ } else {
+ tcg_out32(s, ANDC | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_orc_i32:
+ if (const_args[2]) {
+ tcg_out_ori32(s, args[0], args[1], ~args[2]);
+ break;
+ }
+ /* FALLTHRU */
+ case INDEX_op_orc_i64:
+ tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_eqv_i32:
+ if (const_args[2]) {
+ tcg_out_xori32(s, args[0], args[1], ~args[2]);
+ break;
+ }
+ /* FALLTHRU */
+ case INDEX_op_eqv_i64:
+ tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_nand_i32:
+ case INDEX_op_nand_i64:
+ tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
+ tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
+ break;
+
+ case INDEX_op_mul_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out32(s, MULLI | TAI(a0, a1, a2));
+ } else {
+ tcg_out32(s, MULLW | TAB(a0, a1, a2));
+ }
+ break;
+
+ case INDEX_op_div_i32:
+ tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
+ break;
+
+ case INDEX_op_divu_i32:
+ tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
+ break;
+
+ case INDEX_op_shl_i32:
+ if (const_args[2]) {
+ tcg_out_shli32(s, args[0], args[1], args[2]);
+ } else {
+ tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_shr_i32:
+ if (const_args[2]) {
+ tcg_out_shri32(s, args[0], args[1], args[2]);
+ } else {
+ tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_sar_i32:
+ if (const_args[2]) {
+ tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2]));
+ } else {
+ tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_rotl_i32:
+ if (const_args[2]) {
+ tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
+ } else {
+ tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
+ | MB(0) | ME(31));
+ }
+ break;
+ case INDEX_op_rotr_i32:
+ if (const_args[2]) {
+ tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
+ tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
+ | MB(0) | ME(31));
+ }
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ arg_label(args[3]), TCG_TYPE_I32);
+ break;
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ arg_label(args[3]), TCG_TYPE_I64);
+ break;
+ case INDEX_op_brcond2_i32:
+ tcg_out_brcond2(s, args, const_args);
+ break;
+
+ case INDEX_op_neg_i32:
+ case INDEX_op_neg_i64:
+ tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
+ break;
+
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
+ tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
+ break;
+
+ case INDEX_op_add_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ do_addi_64:
+ tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
+ } else {
+ tcg_out32(s, ADD | TAB(a0, a1, a2));
+ }
+ break;
+ case INDEX_op_sub_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[1]) {
+ if (const_args[2]) {
+ tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
+ }
+ } else if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_64;
+ } else {
+ tcg_out32(s, SUBF | TAB(a0, a2, a1));
+ }
+ break;
+
+ case INDEX_op_shl_i64:
+ if (const_args[2]) {
+ tcg_out_shli64(s, args[0], args[1], args[2]);
+ } else {
+ tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_shr_i64:
+ if (const_args[2]) {
+ tcg_out_shri64(s, args[0], args[1], args[2]);
+ } else {
+ tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_sar_i64:
+ if (const_args[2]) {
+ int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
+ tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
+ } else {
+ tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_rotl_i64:
+ if (const_args[2]) {
+ tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
+ } else {
+ tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
+ }
+ break;
+ case INDEX_op_rotr_i64:
+ if (const_args[2]) {
+ tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
+ tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
+ }
+ break;
+
+ case INDEX_op_mul_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out32(s, MULLI | TAI(a0, a1, a2));
+ } else {
+ tcg_out32(s, MULLD | TAB(a0, a1, a2));
+ }
+ break;
+ case INDEX_op_div_i64:
+ tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
+ break;
+ case INDEX_op_divu_i64:
+ tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args, false);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args, true);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_qemu_st(s, args, false);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args, true);
+ break;
+
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ c = EXTSB;
+ goto gen_ext;
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ c = EXTSH;
+ goto gen_ext;
+ case INDEX_op_ext32s_i64:
+ c = EXTSW;
+ goto gen_ext;
+ gen_ext:
+ tcg_out32(s, c | RS(args[1]) | RA(args[0]));
+ break;
+
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
+ const_args[2]);
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
+ const_args[2]);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args, const_args);
+ break;
+
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ a0 = args[0], a1 = args[1];
+ /* a1 = abcd */
+ if (a0 != a1) {
+ /* a0 = (a1 r<< 24) & 0xff # 000c */
+ tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
+ /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
+ } else {
+ /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
+ /* a0 = (a1 r<< 24) & 0xff # 000c */
+ tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
+ /* a0 = a0 | r0 # 00dc */
+ tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
+ }
+ break;
+
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ /* Stolen from gcc's builtin_bswap32 */
+ a1 = args[1];
+ a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
+
+ /* a1 = args[1] # abcd */
+ /* a0 = rotate_left (a1, 8) # bcda */
+ tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
+ /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
+ /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
+
+ if (a0 == TCG_REG_R0) {
+ tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
+ }
+ break;
+
+ case INDEX_op_bswap64_i64:
+ a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
+ if (a0 == a1) {
+ a0 = TCG_REG_R0;
+ a2 = a1;
+ }
+
+ /* a1 = # abcd efgh */
+ /* a0 = rl32(a1, 8) # 0000 fghe */
+ tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
+ /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
+ /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
+
+ /* a0 = rl64(a0, 32) # hgfe 0000 */
+ /* a2 = rl64(a1, 32) # efgh abcd */
+ tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
+ tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
+
+ /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */
+ tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
+ /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
+ tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
+ /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
+ tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
+
+ if (a0 == 0) {
+ tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
+ }
+ break;
+
+ case INDEX_op_deposit_i32:
+ if (const_args[2]) {
+ uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
+ tcg_out_andi32(s, args[0], args[0], ~mask);
+ } else {
+ tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
+ 32 - args[3] - args[4], 31 - args[3]);
+ }
+ break;
+ case INDEX_op_deposit_i64:
+ if (const_args[2]) {
+ uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
+ tcg_out_andi64(s, args[0], args[0], ~mask);
+ } else {
+ tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
+ 64 - args[3] - args[4]);
+ }
+ break;
+
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
+ args[3], args[4], const_args[2]);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
+ args[3], args[4], const_args[2]);
+ break;
+
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_add2_i64:
+#else
+ case INDEX_op_add2_i32:
+#endif
+ /* Note that the CA bit is defined based on the word size of the
+ environment. So in 64-bit mode it's always carry-out of bit 63.
+ The fallback code using deposit works just as well for 32-bit. */
+ a0 = args[0], a1 = args[1];
+ if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
+ a0 = TCG_REG_R0;
+ }
+ if (const_args[4]) {
+ tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
+ } else {
+ tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
+ }
+ if (const_args[5]) {
+ tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
+ } else {
+ tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
+ }
+ if (a0 != args[0]) {
+ tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
+ }
+ break;
+
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_sub2_i64:
+#else
+ case INDEX_op_sub2_i32:
+#endif
+ a0 = args[0], a1 = args[1];
+ if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
+ a0 = TCG_REG_R0;
+ }
+ if (const_args[2]) {
+ tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
+ } else {
+ tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
+ }
+ if (const_args[3]) {
+ tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
+ } else {
+ tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
+ }
+ if (a0 != args[0]) {
+ tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
+ }
+ break;
+
+ case INDEX_op_muluh_i32:
+ tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
+ break;
+ case INDEX_op_mulsh_i32:
+ tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
+ break;
+ case INDEX_op_muluh_i64:
+ tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
+ break;
+ case INDEX_op_mulsh_i64:
+ tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_movi_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static const TCGTargetOpDef ppc_op_defs[] = {
+ { INDEX_op_exit_tb, { } },
+ { INDEX_op_goto_tb, { } },
+ { INDEX_op_br, { } },
+
+ { INDEX_op_ld8u_i32, { "r", "r" } },
+ { INDEX_op_ld8s_i32, { "r", "r" } },
+ { INDEX_op_ld16u_i32, { "r", "r" } },
+ { INDEX_op_ld16s_i32, { "r", "r" } },
+ { INDEX_op_ld_i32, { "r", "r" } },
+
+ { INDEX_op_st8_i32, { "r", "r" } },
+ { INDEX_op_st16_i32, { "r", "r" } },
+ { INDEX_op_st_i32, { "r", "r" } },
+
+ { INDEX_op_add_i32, { "r", "r", "ri" } },
+ { INDEX_op_mul_i32, { "r", "r", "rI" } },
+ { INDEX_op_div_i32, { "r", "r", "r" } },
+ { INDEX_op_divu_i32, { "r", "r", "r" } },
+ { INDEX_op_sub_i32, { "r", "rI", "ri" } },
+ { INDEX_op_and_i32, { "r", "r", "ri" } },
+ { INDEX_op_or_i32, { "r", "r", "ri" } },
+ { INDEX_op_xor_i32, { "r", "r", "ri" } },
+ { INDEX_op_andc_i32, { "r", "r", "ri" } },
+ { INDEX_op_orc_i32, { "r", "r", "ri" } },
+ { INDEX_op_eqv_i32, { "r", "r", "ri" } },
+ { INDEX_op_nand_i32, { "r", "r", "r" } },
+ { INDEX_op_nor_i32, { "r", "r", "r" } },
+
+ { INDEX_op_shl_i32, { "r", "r", "ri" } },
+ { INDEX_op_shr_i32, { "r", "r", "ri" } },
+ { INDEX_op_sar_i32, { "r", "r", "ri" } },
+ { INDEX_op_rotl_i32, { "r", "r", "ri" } },
+ { INDEX_op_rotr_i32, { "r", "r", "ri" } },
+
+ { INDEX_op_neg_i32, { "r", "r" } },
+ { INDEX_op_not_i32, { "r", "r" } },
+ { INDEX_op_ext8s_i32, { "r", "r" } },
+ { INDEX_op_ext16s_i32, { "r", "r" } },
+ { INDEX_op_bswap16_i32, { "r", "r" } },
+ { INDEX_op_bswap32_i32, { "r", "r" } },
+
+ { INDEX_op_brcond_i32, { "r", "ri" } },
+ { INDEX_op_setcond_i32, { "r", "r", "ri" } },
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } },
+
+ { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+
+ { INDEX_op_muluh_i32, { "r", "r", "r" } },
+ { INDEX_op_mulsh_i32, { "r", "r", "r" } },
+
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_ld8u_i64, { "r", "r" } },
+ { INDEX_op_ld8s_i64, { "r", "r" } },
+ { INDEX_op_ld16u_i64, { "r", "r" } },
+ { INDEX_op_ld16s_i64, { "r", "r" } },
+ { INDEX_op_ld32u_i64, { "r", "r" } },
+ { INDEX_op_ld32s_i64, { "r", "r" } },
+ { INDEX_op_ld_i64, { "r", "r" } },
+
+ { INDEX_op_st8_i64, { "r", "r" } },
+ { INDEX_op_st16_i64, { "r", "r" } },
+ { INDEX_op_st32_i64, { "r", "r" } },
+ { INDEX_op_st_i64, { "r", "r" } },
+
+ { INDEX_op_add_i64, { "r", "r", "rT" } },
+ { INDEX_op_sub_i64, { "r", "rI", "rT" } },
+ { INDEX_op_and_i64, { "r", "r", "ri" } },
+ { INDEX_op_or_i64, { "r", "r", "rU" } },
+ { INDEX_op_xor_i64, { "r", "r", "rU" } },
+ { INDEX_op_andc_i64, { "r", "r", "ri" } },
+ { INDEX_op_orc_i64, { "r", "r", "r" } },
+ { INDEX_op_eqv_i64, { "r", "r", "r" } },
+ { INDEX_op_nand_i64, { "r", "r", "r" } },
+ { INDEX_op_nor_i64, { "r", "r", "r" } },
+
+ { INDEX_op_shl_i64, { "r", "r", "ri" } },
+ { INDEX_op_shr_i64, { "r", "r", "ri" } },
+ { INDEX_op_sar_i64, { "r", "r", "ri" } },
+ { INDEX_op_rotl_i64, { "r", "r", "ri" } },
+ { INDEX_op_rotr_i64, { "r", "r", "ri" } },
+
+ { INDEX_op_mul_i64, { "r", "r", "rI" } },
+ { INDEX_op_div_i64, { "r", "r", "r" } },
+ { INDEX_op_divu_i64, { "r", "r", "r" } },
+
+ { INDEX_op_neg_i64, { "r", "r" } },
+ { INDEX_op_not_i64, { "r", "r" } },
+ { INDEX_op_ext8s_i64, { "r", "r" } },
+ { INDEX_op_ext16s_i64, { "r", "r" } },
+ { INDEX_op_ext32s_i64, { "r", "r" } },
+ { INDEX_op_bswap16_i64, { "r", "r" } },
+ { INDEX_op_bswap32_i64, { "r", "r" } },
+ { INDEX_op_bswap64_i64, { "r", "r" } },
+
+ { INDEX_op_brcond_i64, { "r", "ri" } },
+ { INDEX_op_setcond_i64, { "r", "r", "ri" } },
+ { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } },
+
+ { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
+
+ { INDEX_op_mulsh_i64, { "r", "r", "r" } },
+ { INDEX_op_muluh_i64, { "r", "r", "r" } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 32
+ { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
+ { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } },
+ { INDEX_op_sub2_i64, { "r", "r", "rI", "rZM", "r", "r" } },
+#else
+ { INDEX_op_add2_i32, { "r", "r", "r", "r", "rI", "rZM" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rI", "rZM", "r", "r" } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_qemu_ld_i32, { "r", "L" } },
+ { INDEX_op_qemu_st_i32, { "S", "S" } },
+ { INDEX_op_qemu_ld_i64, { "r", "L" } },
+ { INDEX_op_qemu_st_i64, { "S", "S" } },
+#elif TARGET_LONG_BITS == 32
+ { INDEX_op_qemu_ld_i32, { "r", "L" } },
+ { INDEX_op_qemu_st_i32, { "S", "S" } },
+ { INDEX_op_qemu_ld_i64, { "L", "L", "L" } },
+ { INDEX_op_qemu_st_i64, { "S", "S", "S" } },
+#else
+ { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
+ { INDEX_op_qemu_st_i32, { "S", "S", "S" } },
+ { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } },
+ { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } },
+#endif
+
+ { -1 },
+};
+
+static void tcg_target_init(TCGContext *s)
+{
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+ if (hwcap & PPC_FEATURE_ARCH_2_06) {
+ have_isa_2_06 = true;
+ }
+
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
+ tcg_regset_set32(tcg_target_call_clobber_regs, 0,
+ (1 << TCG_REG_R0) |
+ (1 << TCG_REG_R2) |
+ (1 << TCG_REG_R3) |
+ (1 << TCG_REG_R4) |
+ (1 << TCG_REG_R5) |
+ (1 << TCG_REG_R6) |
+ (1 << TCG_REG_R7) |
+ (1 << TCG_REG_R8) |
+ (1 << TCG_REG_R9) |
+ (1 << TCG_REG_R10) |
+ (1 << TCG_REG_R11) |
+ (1 << TCG_REG_R12));
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
+#if defined(_CALL_SYSV)
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
+#endif
+#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
+#endif
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
+ if (USE_REG_RA) {
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */
+ }
+
+ tcg_add_target_add_op_defs(ppc_op_defs);
+}
+
+#ifdef __ELF__
+typedef struct {
+ DebugFrameCIE cie;
+ DebugFrameFDEHeader fde;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#if TCG_TARGET_REG_BITS == 64
+# define ELF_HOST_MACHINE EM_PPC64
+#else
+# define ELF_HOST_MACHINE EM_PPC
+#endif
+
+static DebugFrame debug_frame = {
+ .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .cie.id = -1,
+ .cie.version = 1,
+ .cie.code_align = 1,
+ .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */
+ .cie.return_column = 65,
+
+ /* Total FDE size does not include the "len" member. */
+ .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
+ 0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
+ }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ uint8_t *p = &debug_frame.fde_reg_ofs[3];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
+ p[0] = 0x80 + tcg_target_callee_save_regs[i];
+ p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
+ }
+
+ debug_frame.fde.func_start = (uintptr_t)buf;
+ debug_frame.fde.func_len = buf_size;
+
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
+#endif /* __ELF__ */
+
+static size_t dcache_bsize = 16;
+static size_t icache_bsize = 16;
+
+void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ uintptr_t p, start1, stop1;
+ size_t dsize = dcache_bsize;
+ size_t isize = icache_bsize;
+
+ start1 = start & ~(dsize - 1);
+ stop1 = (stop + dsize - 1) & ~(dsize - 1);
+ for (p = start1; p < stop1; p += dsize) {
+ asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
+ }
+ asm volatile ("sync" : : : "memory");
+
+ start &= start & ~(isize - 1);
+ stop1 = (stop + isize - 1) & ~(isize - 1);
+ for (p = start1; p < stop1; p += isize) {
+ asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
+ }
+ asm volatile ("sync" : : : "memory");
+ asm volatile ("isync" : : : "memory");
+}
+
+#if defined _AIX
+#include <sys/systemcfg.h>
+
+static void __attribute__((constructor)) tcg_cache_init(void)
+{
+ icache_bsize = _system_configuration.icache_line;
+ dcache_bsize = _system_configuration.dcache_line;
+}
+
+#elif defined __linux__
+static void __attribute__((constructor)) tcg_cache_init(void)
+{
+ unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE);
+ unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE);
+
+ if (dsize == 0 || isize == 0) {
+ if (dsize == 0) {
+ fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n");
+ }
+ if (isize == 0) {
+ fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n");
+ }
+ exit(1);
+ }
+ dcache_bsize = dsize;
+ icache_bsize = isize;
+}
+
+#elif defined __APPLE__
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+static void __attribute__((constructor)) tcg_cache_init(void)
+{
+ size_t len;
+ unsigned cacheline;
+ int name[2] = { CTL_HW, HW_CACHELINE };
+
+ len = sizeof(cacheline);
+ if (sysctl(name, 2, &cacheline, &len, NULL, 0)) {
+ perror("sysctl CTL_HW HW_CACHELINE failed");
+ exit(1);
+ }
+ dcache_bsize = cacheline;
+ icache_bsize = cacheline;
+}
+
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+static void __attribute__((constructor)) tcg_cache_init(void)
+{
+ size_t len = 4;
+ unsigned cacheline;
+
+ if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) {
+ fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ dcache_bsize = cacheline;
+ icache_bsize = cacheline;
+}
+#endif
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
new file mode 100644
index 00000000..7ce70488
--- /dev/null
+++ b/tcg/ppc/tcg-target.h
@@ -0,0 +1,112 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TCG_TARGET_PPC64
+#define TCG_TARGET_PPC64 1
+
+#ifdef _ARCH_PPC64
+# define TCG_TARGET_REG_BITS 64
+#else
+# define TCG_TARGET_REG_BITS 32
+#endif
+
+#define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+
+typedef enum {
+ TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
+ TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, TCG_REG_R7,
+ TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
+ TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
+ TCG_REG_R16, TCG_REG_R17, TCG_REG_R18, TCG_REG_R19,
+ TCG_REG_R20, TCG_REG_R21, TCG_REG_R22, TCG_REG_R23,
+ TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
+ TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
+
+ TCG_REG_CALL_STACK = TCG_REG_R1,
+ TCG_AREG0 = TCG_REG_R27
+} TCGReg;
+
+/* optional instructions automatically implemented */
+#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
+#define TCG_TARGET_HAS_ext16u_i32 0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 0
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_eqv_i32 1
+#define TCG_TARGET_HAS_nand_i32 1
+#define TCG_TARGET_HAS_nor_i32 1
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 1
+#define TCG_TARGET_HAS_mulsh_i32 1
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_trunc_shr_i32 0
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 0
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 0
+#define TCG_TARGET_HAS_ext16u_i64 0
+#define TCG_TARGET_HAS_ext32u_i64 0
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_eqv_i64 1
+#define TCG_TARGET_HAS_nand_i64 1
+#define TCG_TARGET_HAS_nor_i64 1
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 1
+#define TCG_TARGET_HAS_mulsh_i64 1
+#endif
+
+void flush_icache_range(uintptr_t start, uintptr_t stop);
+
+#endif
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
new file mode 100644
index 00000000..aa718eca
--- /dev/null
+++ b/tcg/s390/tcg-target.c
@@ -0,0 +1,2393 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg-be-ldst.h"
+
+/* We only support generating code for 64-bit mode. */
+#if TCG_TARGET_REG_BITS != 64
+#error "unsupported code generation mode"
+#endif
+
+#include "elf.h"
+
+/* ??? The translation blocks produced by TCG are generally small enough to
+ be entirely reachable with a 16-bit displacement. Leaving the option for
+ a 32-bit displacement here Just In Case. */
+#define USE_LONG_BRANCHES 0
+
+#define TCG_CT_CONST_MULI 0x100
+#define TCG_CT_CONST_ORI 0x200
+#define TCG_CT_CONST_XORI 0x400
+#define TCG_CT_CONST_CMPI 0x800
+#define TCG_CT_CONST_ADLI 0x1000
+
+/* Several places within the instruction set 0 means "no register"
+ rather than TCG_REG_R0. */
+#define TCG_REG_NONE 0
+
+/* A scratch register that may be be used throughout the backend. */
+#define TCG_TMP0 TCG_REG_R14
+
+#ifdef CONFIG_USE_GUEST_BASE
+#define TCG_GUEST_BASE_REG TCG_REG_R13
+#else
+#define TCG_GUEST_BASE_REG TCG_REG_R0
+#endif
+
+#ifndef GUEST_BASE
+#define GUEST_BASE 0
+#endif
+
+
+/* All of the following instructions are prefixed with their instruction
+ format, and are defined as 8- or 16-bit quantities, even when the two
+ halves of the 16-bit quantity may appear 32 bits apart in the insn.
+ This makes it easy to copy the values from the tables in Appendix B. */
+typedef enum S390Opcode {
+ RIL_AFI = 0xc209,
+ RIL_AGFI = 0xc208,
+ RIL_ALFI = 0xc20b,
+ RIL_ALGFI = 0xc20a,
+ RIL_BRASL = 0xc005,
+ RIL_BRCL = 0xc004,
+ RIL_CFI = 0xc20d,
+ RIL_CGFI = 0xc20c,
+ RIL_CLFI = 0xc20f,
+ RIL_CLGFI = 0xc20e,
+ RIL_IIHF = 0xc008,
+ RIL_IILF = 0xc009,
+ RIL_LARL = 0xc000,
+ RIL_LGFI = 0xc001,
+ RIL_LGRL = 0xc408,
+ RIL_LLIHF = 0xc00e,
+ RIL_LLILF = 0xc00f,
+ RIL_LRL = 0xc40d,
+ RIL_MSFI = 0xc201,
+ RIL_MSGFI = 0xc200,
+ RIL_NIHF = 0xc00a,
+ RIL_NILF = 0xc00b,
+ RIL_OIHF = 0xc00c,
+ RIL_OILF = 0xc00d,
+ RIL_SLFI = 0xc205,
+ RIL_SLGFI = 0xc204,
+ RIL_XIHF = 0xc006,
+ RIL_XILF = 0xc007,
+
+ RI_AGHI = 0xa70b,
+ RI_AHI = 0xa70a,
+ RI_BRC = 0xa704,
+ RI_IIHH = 0xa500,
+ RI_IIHL = 0xa501,
+ RI_IILH = 0xa502,
+ RI_IILL = 0xa503,
+ RI_LGHI = 0xa709,
+ RI_LLIHH = 0xa50c,
+ RI_LLIHL = 0xa50d,
+ RI_LLILH = 0xa50e,
+ RI_LLILL = 0xa50f,
+ RI_MGHI = 0xa70d,
+ RI_MHI = 0xa70c,
+ RI_NIHH = 0xa504,
+ RI_NIHL = 0xa505,
+ RI_NILH = 0xa506,
+ RI_NILL = 0xa507,
+ RI_OIHH = 0xa508,
+ RI_OIHL = 0xa509,
+ RI_OILH = 0xa50a,
+ RI_OILL = 0xa50b,
+
+ RIE_CGIJ = 0xec7c,
+ RIE_CGRJ = 0xec64,
+ RIE_CIJ = 0xec7e,
+ RIE_CLGRJ = 0xec65,
+ RIE_CLIJ = 0xec7f,
+ RIE_CLGIJ = 0xec7d,
+ RIE_CLRJ = 0xec77,
+ RIE_CRJ = 0xec76,
+ RIE_RISBG = 0xec55,
+
+ RRE_AGR = 0xb908,
+ RRE_ALGR = 0xb90a,
+ RRE_ALCR = 0xb998,
+ RRE_ALCGR = 0xb988,
+ RRE_CGR = 0xb920,
+ RRE_CLGR = 0xb921,
+ RRE_DLGR = 0xb987,
+ RRE_DLR = 0xb997,
+ RRE_DSGFR = 0xb91d,
+ RRE_DSGR = 0xb90d,
+ RRE_LGBR = 0xb906,
+ RRE_LCGR = 0xb903,
+ RRE_LGFR = 0xb914,
+ RRE_LGHR = 0xb907,
+ RRE_LGR = 0xb904,
+ RRE_LLGCR = 0xb984,
+ RRE_LLGFR = 0xb916,
+ RRE_LLGHR = 0xb985,
+ RRE_LRVR = 0xb91f,
+ RRE_LRVGR = 0xb90f,
+ RRE_LTGR = 0xb902,
+ RRE_MLGR = 0xb986,
+ RRE_MSGR = 0xb90c,
+ RRE_MSR = 0xb252,
+ RRE_NGR = 0xb980,
+ RRE_OGR = 0xb981,
+ RRE_SGR = 0xb909,
+ RRE_SLGR = 0xb90b,
+ RRE_SLBR = 0xb999,
+ RRE_SLBGR = 0xb989,
+ RRE_XGR = 0xb982,
+
+ RRF_LOCR = 0xb9f2,
+ RRF_LOCGR = 0xb9e2,
+
+ RR_AR = 0x1a,
+ RR_ALR = 0x1e,
+ RR_BASR = 0x0d,
+ RR_BCR = 0x07,
+ RR_CLR = 0x15,
+ RR_CR = 0x19,
+ RR_DR = 0x1d,
+ RR_LCR = 0x13,
+ RR_LR = 0x18,
+ RR_LTR = 0x12,
+ RR_NR = 0x14,
+ RR_OR = 0x16,
+ RR_SR = 0x1b,
+ RR_SLR = 0x1f,
+ RR_XR = 0x17,
+
+ RSY_RLL = 0xeb1d,
+ RSY_RLLG = 0xeb1c,
+ RSY_SLLG = 0xeb0d,
+ RSY_SRAG = 0xeb0a,
+ RSY_SRLG = 0xeb0c,
+
+ RS_SLL = 0x89,
+ RS_SRA = 0x8a,
+ RS_SRL = 0x88,
+
+ RXY_AG = 0xe308,
+ RXY_AY = 0xe35a,
+ RXY_CG = 0xe320,
+ RXY_CY = 0xe359,
+ RXY_LAY = 0xe371,
+ RXY_LB = 0xe376,
+ RXY_LG = 0xe304,
+ RXY_LGB = 0xe377,
+ RXY_LGF = 0xe314,
+ RXY_LGH = 0xe315,
+ RXY_LHY = 0xe378,
+ RXY_LLGC = 0xe390,
+ RXY_LLGF = 0xe316,
+ RXY_LLGH = 0xe391,
+ RXY_LMG = 0xeb04,
+ RXY_LRV = 0xe31e,
+ RXY_LRVG = 0xe30f,
+ RXY_LRVH = 0xe31f,
+ RXY_LY = 0xe358,
+ RXY_STCY = 0xe372,
+ RXY_STG = 0xe324,
+ RXY_STHY = 0xe370,
+ RXY_STMG = 0xeb24,
+ RXY_STRV = 0xe33e,
+ RXY_STRVG = 0xe32f,
+ RXY_STRVH = 0xe33f,
+ RXY_STY = 0xe350,
+
+ RX_A = 0x5a,
+ RX_C = 0x59,
+ RX_L = 0x58,
+ RX_LA = 0x41,
+ RX_LH = 0x48,
+ RX_ST = 0x50,
+ RX_STC = 0x42,
+ RX_STH = 0x40,
+} S390Opcode;
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15"
+};
+#endif
+
+/* Since R6 is a potential argument register, choose it last of the
+ call-saved registers. Likewise prefer the call-clobbered registers
+ in reverse order to maximize the chance of avoiding the arguments. */
+static const int tcg_target_reg_alloc_order[] = {
+ /* Call saved registers. */
+ TCG_REG_R13,
+ TCG_REG_R12,
+ TCG_REG_R11,
+ TCG_REG_R10,
+ TCG_REG_R9,
+ TCG_REG_R8,
+ TCG_REG_R7,
+ TCG_REG_R6,
+ /* Call clobbered registers. */
+ TCG_REG_R14,
+ TCG_REG_R0,
+ TCG_REG_R1,
+ /* Argument registers, in reverse order of allocation. */
+ TCG_REG_R5,
+ TCG_REG_R4,
+ TCG_REG_R3,
+ TCG_REG_R2,
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_R2,
+};
+
+#define S390_CC_EQ 8
+#define S390_CC_LT 4
+#define S390_CC_GT 2
+#define S390_CC_OV 1
+#define S390_CC_NE (S390_CC_LT | S390_CC_GT)
+#define S390_CC_LE (S390_CC_LT | S390_CC_EQ)
+#define S390_CC_GE (S390_CC_GT | S390_CC_EQ)
+#define S390_CC_NEVER 0
+#define S390_CC_ALWAYS 15
+
+/* Condition codes that result from a COMPARE and COMPARE LOGICAL. */
+static const uint8_t tcg_cond_to_s390_cond[] = {
+ [TCG_COND_EQ] = S390_CC_EQ,
+ [TCG_COND_NE] = S390_CC_NE,
+ [TCG_COND_LT] = S390_CC_LT,
+ [TCG_COND_LE] = S390_CC_LE,
+ [TCG_COND_GT] = S390_CC_GT,
+ [TCG_COND_GE] = S390_CC_GE,
+ [TCG_COND_LTU] = S390_CC_LT,
+ [TCG_COND_LEU] = S390_CC_LE,
+ [TCG_COND_GTU] = S390_CC_GT,
+ [TCG_COND_GEU] = S390_CC_GE,
+};
+
+/* Condition codes that result from a LOAD AND TEST. Here, we have no
+ unsigned instruction variation, however since the test is vs zero we
+ can re-map the outcomes appropriately. */
+static const uint8_t tcg_cond_to_ltr_cond[] = {
+ [TCG_COND_EQ] = S390_CC_EQ,
+ [TCG_COND_NE] = S390_CC_NE,
+ [TCG_COND_LT] = S390_CC_LT,
+ [TCG_COND_LE] = S390_CC_LE,
+ [TCG_COND_GT] = S390_CC_GT,
+ [TCG_COND_GE] = S390_CC_GE,
+ [TCG_COND_LTU] = S390_CC_NEVER,
+ [TCG_COND_LEU] = S390_CC_EQ,
+ [TCG_COND_GTU] = S390_CC_NE,
+ [TCG_COND_GEU] = S390_CC_ALWAYS,
+};
+
+#ifdef CONFIG_SOFTMMU
+static void * const qemu_ld_helpers[16] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_SB] = helper_ret_ldsb_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LESW] = helper_le_ldsw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LESL] = helper_le_ldsl_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BESW] = helper_be_ldsw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BESL] = helper_be_ldsl_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+};
+
+static void * const qemu_st_helpers[16] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+#endif
+
+static tcg_insn_unit *tb_ret_addr;
+
+/* A list of relevant facilities used by this translator. Some of these
+ are required for proper operation, and these are checked at startup. */
+
+#define FACILITY_ZARCH_ACTIVE (1ULL << (63 - 2))
+#define FACILITY_LONG_DISP (1ULL << (63 - 18))
+#define FACILITY_EXT_IMM (1ULL << (63 - 21))
+#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
+#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
+
+static uint64_t facilities;
+
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1);
+ assert(addend == -2);
+
+ switch (type) {
+ case R_390_PC16DBL:
+ assert(pcrel2 == (int16_t)pcrel2);
+ tcg_patch16(code_ptr, pcrel2);
+ break;
+ case R_390_PC32DBL:
+ assert(pcrel2 == (int32_t)pcrel2);
+ tcg_patch32(code_ptr, pcrel2);
+ break;
+ default:
+ tcg_abort();
+ break;
+ }
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str = *pct_str;
+
+ switch (ct_str[0]) {
+ case 'r': /* all registers */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffff);
+ break;
+ case 'R': /* not R0 */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
+ break;
+ case 'L': /* qemu_ld/st constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffff);
+ tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2);
+ tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
+ tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
+ break;
+ case 'a': /* force R2 for division */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_clear(ct->u.regs);
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_R2);
+ break;
+ case 'b': /* force R3 for division */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_clear(ct->u.regs);
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
+ break;
+ case 'A':
+ ct->ct |= TCG_CT_CONST_ADLI;
+ break;
+ case 'K':
+ ct->ct |= TCG_CT_CONST_MULI;
+ break;
+ case 'O':
+ ct->ct |= TCG_CT_CONST_ORI;
+ break;
+ case 'X':
+ ct->ct |= TCG_CT_CONST_XORI;
+ break;
+ case 'C':
+ ct->ct |= TCG_CT_CONST_CMPI;
+ break;
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+
+ return 0;
+}
+
+/* Immediates to be used with logical OR. This is an optimization only,
+ since a full 64-bit immediate OR can always be performed with 4 sequential
+ OI[LH][LH] instructions. What we're looking for is immediates that we
+ can load efficiently, and the immediate load plus the reg-reg OR is
+ smaller than the sequential OI's. */
+
+static int tcg_match_ori(TCGType type, tcg_target_long val)
+{
+ if (facilities & FACILITY_EXT_IMM) {
+ if (type == TCG_TYPE_I32) {
+ /* All 32-bit ORs can be performed with 1 48-bit insn. */
+ return 1;
+ }
+ }
+
+ /* Look for negative values. These are best to load with LGHI. */
+ if (val < 0) {
+ if (val == (int16_t)val) {
+ return 0;
+ }
+ if (facilities & FACILITY_EXT_IMM) {
+ if (val == (int32_t)val) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+/* Immediates to be used with logical XOR. This is almost, but not quite,
+ only an optimization. XOR with immediate is only supported with the
+ extended-immediate facility. That said, there are a few patterns for
+ which it is better to load the value into a register first. */
+
+static int tcg_match_xori(TCGType type, tcg_target_long val)
+{
+ if ((facilities & FACILITY_EXT_IMM) == 0) {
+ return 0;
+ }
+
+ if (type == TCG_TYPE_I32) {
+ /* All 32-bit XORs can be performed with 1 48-bit insn. */
+ return 1;
+ }
+
+ /* Look for negative values. These are best to load with LGHI. */
+ if (val < 0 && val == (int32_t)val) {
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Imediates to be used with comparisons. */
+
+static int tcg_match_cmpi(TCGType type, tcg_target_long val)
+{
+ if (facilities & FACILITY_EXT_IMM) {
+ /* The COMPARE IMMEDIATE instruction is available. */
+ if (type == TCG_TYPE_I32) {
+ /* We have a 32-bit immediate and can compare against anything. */
+ return 1;
+ } else {
+ /* ??? We have no insight here into whether the comparison is
+ signed or unsigned. The COMPARE IMMEDIATE insn uses a 32-bit
+ signed immediate, and the COMPARE LOGICAL IMMEDIATE insn uses
+ a 32-bit unsigned immediate. If we were to use the (semi)
+ obvious "val == (int32_t)val" we would be enabling unsigned
+ comparisons vs very large numbers. The only solution is to
+ take the intersection of the ranges. */
+ /* ??? Another possible solution is to simply lie and allow all
+ constants here and force the out-of-range values into a temp
+ register in tgen_cmp when we have knowledge of the actual
+ comparison code in use. */
+ return val >= 0 && val <= 0x7fffffff;
+ }
+ } else {
+ /* Only the LOAD AND TEST instruction is available. */
+ return val == 0;
+ }
+}
+
+/* Immediates to be used with add2/sub2. */
+
+static int tcg_match_add2i(TCGType type, tcg_target_long val)
+{
+ if (facilities & FACILITY_EXT_IMM) {
+ if (type == TCG_TYPE_I32) {
+ return 1;
+ } else if (val >= -0xffffffffll && val <= 0xffffffffll) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Test if a constant matches the constraint. */
+static int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct = arg_ct->ct;
+
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ /* The following are mutually exclusive. */
+ if (ct & TCG_CT_CONST_MULI) {
+ /* Immediates that may be used with multiply. If we have the
+ general-instruction-extensions, then we have MULTIPLY SINGLE
+ IMMEDIATE with a signed 32-bit, otherwise we have only
+ MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */
+ if (facilities & FACILITY_GEN_INST_EXT) {
+ return val == (int32_t)val;
+ } else {
+ return val == (int16_t)val;
+ }
+ } else if (ct & TCG_CT_CONST_ADLI) {
+ return tcg_match_add2i(type, val);
+ } else if (ct & TCG_CT_CONST_ORI) {
+ return tcg_match_ori(type, val);
+ } else if (ct & TCG_CT_CONST_XORI) {
+ return tcg_match_xori(type, val);
+ } else if (ct & TCG_CT_CONST_CMPI) {
+ return tcg_match_cmpi(type, val);
+ }
+
+ return 0;
+}
+
+/* Emit instructions according to the given instruction format. */
+
+static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
+{
+ tcg_out16(s, (op << 8) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
+ TCGReg r1, TCGReg r2)
+{
+ tcg_out32(s, (op << 16) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
+ TCGReg r1, TCGReg r2, int m3)
+{
+ tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
+{
+ tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
+}
+
+static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
+{
+ tcg_out16(s, op | (r1 << 4));
+ tcg_out32(s, i2);
+}
+
+static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
+ TCGReg b2, TCGReg r3, int disp)
+{
+ tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
+ | (disp & 0xfff));
+}
+
+static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
+ TCGReg b2, TCGReg r3, int disp)
+{
+ tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
+ tcg_out32(s, (op & 0xff) | (b2 << 28)
+ | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
+}
+
+#define tcg_out_insn_RX tcg_out_insn_RS
+#define tcg_out_insn_RXY tcg_out_insn_RSY
+
+/* Emit an opcode with "type-checking" of the format. */
+#define tcg_out_insn(S, FMT, OP, ...) \
+ glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
+
+
+/* emit 64-bit shifts */
+static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
+ TCGReg src, TCGReg sh_reg, int sh_imm)
+{
+ tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
+}
+
+/* emit 32-bit shifts */
+static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
+ TCGReg sh_reg, int sh_imm)
+{
+ tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
+}
+
+static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
+{
+ if (src != dst) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, LR, dst, src);
+ } else {
+ tcg_out_insn(s, RRE, LGR, dst, src);
+ }
+ }
+}
+
+/* load a register with an immediate value */
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long sval)
+{
+ static const S390Opcode lli_insns[4] = {
+ RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
+ };
+
+ tcg_target_ulong uval = sval;
+ int i;
+
+ if (type == TCG_TYPE_I32) {
+ uval = (uint32_t)sval;
+ sval = (int32_t)sval;
+ }
+
+ /* Try all 32-bit insns that can load it in one go. */
+ if (sval >= -0x8000 && sval < 0x8000) {
+ tcg_out_insn(s, RI, LGHI, ret, sval);
+ return;
+ }
+
+ for (i = 0; i < 4; i++) {
+ tcg_target_long mask = 0xffffull << i*16;
+ if ((uval & mask) == uval) {
+ tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
+ return;
+ }
+ }
+
+ /* Try all 48-bit insns that can load it in one go. */
+ if (facilities & FACILITY_EXT_IMM) {
+ if (sval == (int32_t)sval) {
+ tcg_out_insn(s, RIL, LGFI, ret, sval);
+ return;
+ }
+ if (uval <= 0xffffffff) {
+ tcg_out_insn(s, RIL, LLILF, ret, uval);
+ return;
+ }
+ if ((uval & 0xffffffff) == 0) {
+ tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1);
+ return;
+ }
+ }
+
+ /* Try for PC-relative address load. */
+ if ((sval & 1) == 0) {
+ ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
+ if (off == (int32_t)off) {
+ tcg_out_insn(s, RIL, LARL, ret, off);
+ return;
+ }
+ }
+
+ /* If extended immediates are not present, then we may have to issue
+ several instructions to load the low 32 bits. */
+ if (!(facilities & FACILITY_EXT_IMM)) {
+ /* A 32-bit unsigned value can be loaded in 2 insns. And given
+ that the lli_insns loop above did not succeed, we know that
+ both insns are required. */
+ if (uval <= 0xffffffff) {
+ tcg_out_insn(s, RI, LLILL, ret, uval);
+ tcg_out_insn(s, RI, IILH, ret, uval >> 16);
+ return;
+ }
+
+ /* If all high bits are set, the value can be loaded in 2 or 3 insns.
+ We first want to make sure that all the high bits get set. With
+ luck the low 16-bits can be considered negative to perform that for
+ free, otherwise we load an explicit -1. */
+ if (sval >> 31 >> 1 == -1) {
+ if (uval & 0x8000) {
+ tcg_out_insn(s, RI, LGHI, ret, uval);
+ } else {
+ tcg_out_insn(s, RI, LGHI, ret, -1);
+ tcg_out_insn(s, RI, IILL, ret, uval);
+ }
+ tcg_out_insn(s, RI, IILH, ret, uval >> 16);
+ return;
+ }
+ }
+
+ /* If we get here, both the high and low parts have non-zero bits. */
+
+ /* Recurse to load the lower 32-bits. */
+ tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff);
+
+ /* Insert data into the high 32-bits. */
+ uval = uval >> 31 >> 1;
+ if (facilities & FACILITY_EXT_IMM) {
+ if (uval < 0x10000) {
+ tcg_out_insn(s, RI, IIHL, ret, uval);
+ } else if ((uval & 0xffff) == 0) {
+ tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
+ } else {
+ tcg_out_insn(s, RIL, IIHF, ret, uval);
+ }
+ } else {
+ if (uval & 0xffff) {
+ tcg_out_insn(s, RI, IIHL, ret, uval);
+ }
+ if (uval & 0xffff0000) {
+ tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
+ }
+ }
+}
+
+
+/* Emit a load/store type instruction. Inputs are:
+ DATA: The register to be loaded or stored.
+ BASE+OFS: The effective address.
+ OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0.
+ OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */
+
+static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
+ TCGReg data, TCGReg base, TCGReg index,
+ tcg_target_long ofs)
+{
+ if (ofs < -0x80000 || ofs >= 0x80000) {
+ /* Combine the low 20 bits of the offset with the actual load insn;
+ the high 44 bits must come from an immediate load. */
+ tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
+ ofs = low;
+
+ /* If we were already given an index register, add it in. */
+ if (index != TCG_REG_NONE) {
+ tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
+ }
+ index = TCG_TMP0;
+ }
+
+ if (opc_rx && ofs >= 0 && ofs < 0x1000) {
+ tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
+ } else {
+ tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
+ }
+}
+
+
+/* load data without address translation or endianness conversion */
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
+ TCGReg base, intptr_t ofs)
+{
+ if (type == TCG_TYPE_I32) {
+ tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
+ } else {
+ tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
+ }
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
+ TCGReg base, intptr_t ofs)
+{
+ if (type == TCG_TYPE_I32) {
+ tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
+ } else {
+ tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
+ }
+}
+
+/* load data from an absolute host address */
+static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs)
+{
+ intptr_t addr = (intptr_t)abs;
+
+ if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) {
+ ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
+ if (disp == (int32_t)disp) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RIL, LRL, dest, disp);
+ } else {
+ tcg_out_insn(s, RIL, LGRL, dest, disp);
+ }
+ return;
+ }
+ }
+
+ tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
+ tcg_out_ld(s, type, dest, dest, addr & 0xffff);
+}
+
+static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
+ int msb, int lsb, int ofs, int z)
+{
+ /* Format RIE-f */
+ tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
+ tcg_out16(s, (msb << 8) | (z << 7) | lsb);
+ tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
+}
+
+static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ if (facilities & FACILITY_EXT_IMM) {
+ tcg_out_insn(s, RRE, LGBR, dest, src);
+ return;
+ }
+
+ if (type == TCG_TYPE_I32) {
+ if (dest == src) {
+ tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
+ } else {
+ tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
+ }
+ tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
+ } else {
+ tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
+ tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
+ }
+}
+
+static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ if (facilities & FACILITY_EXT_IMM) {
+ tcg_out_insn(s, RRE, LLGCR, dest, src);
+ return;
+ }
+
+ if (dest == src) {
+ tcg_out_movi(s, type, TCG_TMP0, 0xff);
+ src = TCG_TMP0;
+ } else {
+ tcg_out_movi(s, type, dest, 0xff);
+ }
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, NR, dest, src);
+ } else {
+ tcg_out_insn(s, RRE, NGR, dest, src);
+ }
+}
+
+static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ if (facilities & FACILITY_EXT_IMM) {
+ tcg_out_insn(s, RRE, LGHR, dest, src);
+ return;
+ }
+
+ if (type == TCG_TYPE_I32) {
+ if (dest == src) {
+ tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
+ } else {
+ tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
+ }
+ tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
+ } else {
+ tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
+ tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
+ }
+}
+
+static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ if (facilities & FACILITY_EXT_IMM) {
+ tcg_out_insn(s, RRE, LLGHR, dest, src);
+ return;
+ }
+
+ if (dest == src) {
+ tcg_out_movi(s, type, TCG_TMP0, 0xffff);
+ src = TCG_TMP0;
+ } else {
+ tcg_out_movi(s, type, dest, 0xffff);
+ }
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, NR, dest, src);
+ } else {
+ tcg_out_insn(s, RRE, NGR, dest, src);
+ }
+}
+
+static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
+{
+ tcg_out_insn(s, RRE, LGFR, dest, src);
+}
+
+static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
+{
+ tcg_out_insn(s, RRE, LLGFR, dest, src);
+}
+
+/* Accept bit patterns like these:
+ 0....01....1
+ 1....10....0
+ 1..10..01..1
+ 0..01..10..0
+ Copied from gcc sources. */
+static inline bool risbg_mask(uint64_t c)
+{
+ uint64_t lsb;
+ /* We don't change the number of transitions by inverting,
+ so make sure we start with the LSB zero. */
+ if (c & 1) {
+ c = ~c;
+ }
+ /* Reject all zeros or all ones. */
+ if (c == 0) {
+ return false;
+ }
+ /* Find the first transition. */
+ lsb = c & -c;
+ /* Invert to look for a second transition. */
+ c = ~c;
+ /* Erase the first transition. */
+ c &= -lsb;
+ /* Find the second transition, if any. */
+ lsb = c & -c;
+ /* Match if all the bits are 1's, or if c is zero. */
+ return c == -lsb;
+}
+
+static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
+{
+ int msb, lsb;
+ if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+ /* Achieve wraparound by swapping msb and lsb. */
+ msb = 64 - ctz64(~val);
+ lsb = clz64(~val) - 1;
+ } else {
+ msb = clz64(val);
+ lsb = 63 - ctz64(val);
+ }
+ tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
+}
+
+static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
+{
+ static const S390Opcode ni_insns[4] = {
+ RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
+ };
+ static const S390Opcode nif_insns[2] = {
+ RIL_NILF, RIL_NIHF
+ };
+ uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
+ int i;
+
+ /* Look for the zero-extensions. */
+ if ((val & valid) == 0xffffffff) {
+ tgen_ext32u(s, dest, dest);
+ return;
+ }
+ if (facilities & FACILITY_EXT_IMM) {
+ if ((val & valid) == 0xff) {
+ tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
+ return;
+ }
+ if ((val & valid) == 0xffff) {
+ tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
+ return;
+ }
+ }
+
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = ~(0xffffull << i*16);
+ if (((val | ~valid) & mask) == mask) {
+ tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+ return;
+ }
+ }
+
+ /* Try all 48-bit insns that can perform it in one go. */
+ if (facilities & FACILITY_EXT_IMM) {
+ for (i = 0; i < 2; i++) {
+ tcg_target_ulong mask = ~(0xffffffffull << i*32);
+ if (((val | ~valid) & mask) == mask) {
+ tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+ return;
+ }
+ }
+ }
+ if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
+ tgen_andi_risbg(s, dest, dest, val);
+ return;
+ }
+
+ /* Fall back to loading the constant. */
+ tcg_out_movi(s, type, TCG_TMP0, val);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
+ } else {
+ tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
+ }
+}
+
+static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+ static const S390Opcode oi_insns[4] = {
+ RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
+ };
+ static const S390Opcode nif_insns[2] = {
+ RIL_OILF, RIL_OIHF
+ };
+
+ int i;
+
+ /* Look for no-op. */
+ if (val == 0) {
+ return;
+ }
+
+ if (facilities & FACILITY_EXT_IMM) {
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = (0xffffull << i*16);
+ if ((val & mask) != 0 && (val & ~mask) == 0) {
+ tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
+ return;
+ }
+ }
+
+ /* Try all 48-bit insns that can perform it in one go. */
+ for (i = 0; i < 2; i++) {
+ tcg_target_ulong mask = (0xffffffffull << i*32);
+ if ((val & mask) != 0 && (val & ~mask) == 0) {
+ tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+ return;
+ }
+ }
+
+ /* Perform the OR via sequential modifications to the high and
+ low parts. Do this via recursion to handle 16-bit vs 32-bit
+ masks in each half. */
+ tgen64_ori(s, dest, val & 0x00000000ffffffffull);
+ tgen64_ori(s, dest, val & 0xffffffff00000000ull);
+ } else {
+ /* With no extended-immediate facility, we don't need to be so
+ clever. Just iterate over the insns and mask in the constant. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = (0xffffull << i*16);
+ if ((val & mask) != 0) {
+ tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
+ }
+ }
+ }
+}
+
+static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+ /* Perform the xor by parts. */
+ if (val & 0xffffffff) {
+ tcg_out_insn(s, RIL, XILF, dest, val);
+ }
+ if (val > 0xffffffff) {
+ tcg_out_insn(s, RIL, XIHF, dest, val >> 31 >> 1);
+ }
+}
+
+static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
+ TCGArg c2, int c2const)
+{
+ bool is_unsigned = is_unsigned_cond(c);
+ if (c2const) {
+ if (c2 == 0) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, LTR, r1, r1);
+ } else {
+ tcg_out_insn(s, RRE, LTGR, r1, r1);
+ }
+ return tcg_cond_to_ltr_cond[c];
+ } else {
+ if (is_unsigned) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RIL, CLFI, r1, c2);
+ } else {
+ tcg_out_insn(s, RIL, CLGFI, r1, c2);
+ }
+ } else {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RIL, CFI, r1, c2);
+ } else {
+ tcg_out_insn(s, RIL, CGFI, r1, c2);
+ }
+ }
+ }
+ } else {
+ if (is_unsigned) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, CLR, r1, c2);
+ } else {
+ tcg_out_insn(s, RRE, CLGR, r1, c2);
+ }
+ } else {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, CR, r1, c2);
+ } else {
+ tcg_out_insn(s, RRE, CGR, r1, c2);
+ }
+ }
+ }
+ return tcg_cond_to_s390_cond[c];
+}
+
+static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
+ TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
+{
+ int cc;
+
+ switch (cond) {
+ case TCG_COND_GTU:
+ case TCG_COND_GT:
+ do_greater:
+ /* The result of a compare has CC=2 for GT and CC=3 unused.
+ ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */
+ tgen_cmp(s, type, cond, c1, c2, c2const);
+ tcg_out_movi(s, type, dest, 0);
+ tcg_out_insn(s, RRE, ALCGR, dest, dest);
+ return;
+
+ case TCG_COND_GEU:
+ do_geu:
+ /* We need "real" carry semantics, so use SUBTRACT LOGICAL
+ instead of COMPARE LOGICAL. This needs an extra move. */
+ tcg_out_mov(s, type, TCG_TMP0, c1);
+ if (c2const) {
+ tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2);
+ } else {
+ tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2);
+ }
+ } else {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, SLR, TCG_TMP0, c2);
+ } else {
+ tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2);
+ }
+ tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+ }
+ tcg_out_insn(s, RRE, ALCGR, dest, dest);
+ return;
+
+ case TCG_COND_LEU:
+ case TCG_COND_LTU:
+ case TCG_COND_LT:
+ /* Swap operands so that we can use GEU/GTU/GT. */
+ if (c2const) {
+ tcg_out_movi(s, type, TCG_TMP0, c2);
+ c2 = c1;
+ c2const = 0;
+ c1 = TCG_TMP0;
+ } else {
+ TCGReg t = c1;
+ c1 = c2;
+ c2 = t;
+ }
+ if (cond == TCG_COND_LEU) {
+ goto do_geu;
+ }
+ cond = tcg_swap_cond(cond);
+ goto do_greater;
+
+ case TCG_COND_NE:
+ /* X != 0 is X > 0. */
+ if (c2const && c2 == 0) {
+ cond = TCG_COND_GTU;
+ goto do_greater;
+ }
+ break;
+
+ case TCG_COND_EQ:
+ /* X == 0 is X <= 0 is 0 >= X. */
+ if (c2const && c2 == 0) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0);
+ c2 = c1;
+ c2const = 0;
+ c1 = TCG_TMP0;
+ goto do_geu;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ cc = tgen_cmp(s, type, cond, c1, c2, c2const);
+ if (facilities & FACILITY_LOAD_ON_COND) {
+ /* Emit: d = 0, t = 1, d = (cc ? t : d). */
+ tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
+ tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
+ } else {
+ /* Emit: d = 1; if (cc) goto over; d = 0; over: */
+ tcg_out_movi(s, type, dest, 1);
+ tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+ tcg_out_movi(s, type, dest, 0);
+ }
+}
+
+static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
+ TCGReg c1, TCGArg c2, int c2const, TCGReg r3)
+{
+ int cc;
+ if (facilities & FACILITY_LOAD_ON_COND) {
+ cc = tgen_cmp(s, type, c, c1, c2, c2const);
+ tcg_out_insn(s, RRF, LOCGR, dest, r3, cc);
+ } else {
+ c = tcg_invert_cond(c);
+ cc = tgen_cmp(s, type, c, c1, c2, c2const);
+
+ /* Emit: if (cc) goto over; dest = r3; over: */
+ tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+ tcg_out_insn(s, RRE, LGR, dest, r3);
+ }
+}
+
+bool tcg_target_deposit_valid(int ofs, int len)
+{
+ return (facilities & FACILITY_GEN_INST_EXT) != 0;
+}
+
+static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
+ int ofs, int len)
+{
+ int lsb = (63 - ofs);
+ int msb = lsb - (len - 1);
+ tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0);
+}
+
+static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest)
+{
+ ptrdiff_t off = dest - s->code_ptr;
+ if (off == (int16_t)off) {
+ tcg_out_insn(s, RI, BRC, cc, off);
+ } else if (off == (int32_t)off) {
+ tcg_out_insn(s, RIL, BRCL, cc, off);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
+ tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
+ }
+}
+
+static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
+{
+ if (l->has_value) {
+ tgen_gotoi(s, cc, l->u.value_ptr);
+ } else if (USE_LONG_BRANCHES) {
+ tcg_out16(s, RIL_BRCL | (cc << 4));
+ tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, -2);
+ s->code_ptr += 2;
+ } else {
+ tcg_out16(s, RI_BRC | (cc << 4));
+ tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, -2);
+ s->code_ptr += 1;
+ }
+}
+
+static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
+ TCGReg r1, TCGReg r2, TCGLabel *l)
+{
+ intptr_t off;
+
+ if (l->has_value) {
+ off = l->u.value_ptr - s->code_ptr;
+ } else {
+ /* We need to keep the offset unchanged for retranslation. */
+ off = s->code_ptr[1];
+ tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2);
+ }
+
+ tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
+ tcg_out16(s, off);
+ tcg_out16(s, cc << 12 | (opc & 0xff));
+}
+
+static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
+ TCGReg r1, int i2, TCGLabel *l)
+{
+ tcg_target_long off;
+
+ if (l->has_value) {
+ off = l->u.value_ptr - s->code_ptr;
+ } else {
+ /* We need to keep the offset unchanged for retranslation. */
+ off = s->code_ptr[1];
+ tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2);
+ }
+
+ tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
+ tcg_out16(s, off);
+ tcg_out16(s, (i2 << 8) | (opc & 0xff));
+}
+
+static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
+ TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
+{
+ int cc;
+
+ if (facilities & FACILITY_GEN_INST_EXT) {
+ bool is_unsigned = is_unsigned_cond(c);
+ bool in_range;
+ S390Opcode opc;
+
+ cc = tcg_cond_to_s390_cond[c];
+
+ if (!c2const) {
+ opc = (type == TCG_TYPE_I32
+ ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
+ : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
+ tgen_compare_branch(s, opc, cc, r1, c2, l);
+ return;
+ }
+
+ /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
+ If the immediate we've been given does not fit that range, we'll
+ fall back to separate compare and branch instructions using the
+ larger comparison range afforded by COMPARE IMMEDIATE. */
+ if (type == TCG_TYPE_I32) {
+ if (is_unsigned) {
+ opc = RIE_CLIJ;
+ in_range = (uint32_t)c2 == (uint8_t)c2;
+ } else {
+ opc = RIE_CIJ;
+ in_range = (int32_t)c2 == (int8_t)c2;
+ }
+ } else {
+ if (is_unsigned) {
+ opc = RIE_CLGIJ;
+ in_range = (uint64_t)c2 == (uint8_t)c2;
+ } else {
+ opc = RIE_CGIJ;
+ in_range = (int64_t)c2 == (int8_t)c2;
+ }
+ }
+ if (in_range) {
+ tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
+ return;
+ }
+ }
+
+ cc = tgen_cmp(s, type, c, r1, c2, c2const);
+ tgen_branch(s, cc, l);
+}
+
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
+{
+ ptrdiff_t off = dest - s->code_ptr;
+ if (off == (int32_t)off) {
+ tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
+ tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
+ }
+}
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
+ TCGReg base, TCGReg index, int disp)
+{
+ switch (opc & (MO_SSIZE | MO_BSWAP)) {
+ case MO_UB:
+ tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
+ break;
+ case MO_SB:
+ tcg_out_insn(s, RXY, LGB, data, base, index, disp);
+ break;
+
+ case MO_UW | MO_BSWAP:
+ /* swapped unsigned halfword load with upper bits zeroed */
+ tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
+ tgen_ext16u(s, TCG_TYPE_I64, data, data);
+ break;
+ case MO_UW:
+ tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
+ break;
+
+ case MO_SW | MO_BSWAP:
+ /* swapped sign-extended halfword load */
+ tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
+ tgen_ext16s(s, TCG_TYPE_I64, data, data);
+ break;
+ case MO_SW:
+ tcg_out_insn(s, RXY, LGH, data, base, index, disp);
+ break;
+
+ case MO_UL | MO_BSWAP:
+ /* swapped unsigned int load with upper bits zeroed */
+ tcg_out_insn(s, RXY, LRV, data, base, index, disp);
+ tgen_ext32u(s, data, data);
+ break;
+ case MO_UL:
+ tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
+ break;
+
+ case MO_SL | MO_BSWAP:
+ /* swapped sign-extended int load */
+ tcg_out_insn(s, RXY, LRV, data, base, index, disp);
+ tgen_ext32s(s, data, data);
+ break;
+ case MO_SL:
+ tcg_out_insn(s, RXY, LGF, data, base, index, disp);
+ break;
+
+ case MO_Q | MO_BSWAP:
+ tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
+ break;
+ case MO_Q:
+ tcg_out_insn(s, RXY, LG, data, base, index, disp);
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
+ TCGReg base, TCGReg index, int disp)
+{
+ switch (opc & (MO_SIZE | MO_BSWAP)) {
+ case MO_UB:
+ if (disp >= 0 && disp < 0x1000) {
+ tcg_out_insn(s, RX, STC, data, base, index, disp);
+ } else {
+ tcg_out_insn(s, RXY, STCY, data, base, index, disp);
+ }
+ break;
+
+ case MO_UW | MO_BSWAP:
+ tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
+ break;
+ case MO_UW:
+ if (disp >= 0 && disp < 0x1000) {
+ tcg_out_insn(s, RX, STH, data, base, index, disp);
+ } else {
+ tcg_out_insn(s, RXY, STHY, data, base, index, disp);
+ }
+ break;
+
+ case MO_UL | MO_BSWAP:
+ tcg_out_insn(s, RXY, STRV, data, base, index, disp);
+ break;
+ case MO_UL:
+ if (disp >= 0 && disp < 0x1000) {
+ tcg_out_insn(s, RX, ST, data, base, index, disp);
+ } else {
+ tcg_out_insn(s, RXY, STY, data, base, index, disp);
+ }
+ break;
+
+ case MO_Q | MO_BSWAP:
+ tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
+ break;
+ case MO_Q:
+ tcg_out_insn(s, RXY, STG, data, base, index, disp);
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+#if defined(CONFIG_SOFTMMU)
+/* We're expecting to use a 20-bit signed offset on the tlb memory ops.
+ Using the offset of the second entry in the last tlb table ensures
+ that we can index all of the elements of the first entry. */
+QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
+ > 0x7ffff);
+
+/* Load and compare a TLB entry, leaving the flags set. Loads the TLB
+ addend into R2. Returns a register with the santitized guest address. */
+static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
+ int mem_index, bool is_ld)
+{
+ TCGMemOp s_bits = opc & MO_SIZE;
+ uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1);
+ int ofs;
+
+ if (facilities & FACILITY_GEN_INST_EXT) {
+ tcg_out_risbg(s, TCG_REG_R2, addr_reg,
+ 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS,
+ 63 - CPU_TLB_ENTRY_BITS,
+ 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1);
+ tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
+ } else {
+ tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg);
+ tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2,
+ (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+ tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
+ }
+
+ if (is_ld) {
+ ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
+ } else {
+ ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
+ }
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs);
+ } else {
+ tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs);
+ }
+
+ ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+ tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs);
+
+ if (TARGET_LONG_BITS == 32) {
+ tgen_ext32u(s, TCG_REG_R3, addr_reg);
+ return TCG_REG_R3;
+ }
+ return addr_reg;
+}
+
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
+ TCGReg data, TCGReg addr,
+ tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->datalo_reg = data;
+ label->addrlo_reg = addr;
+ label->raddr = raddr;
+ label->label_ptr[0] = label_ptr;
+}
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGReg addr_reg = lb->addrlo_reg;
+ TCGReg data_reg = lb->datalo_reg;
+ TCGMemOpIdx oi = lb->oi;
+ TCGMemOp opc = get_memop(oi);
+
+ patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2);
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
+ }
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
+
+ tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
+}
+
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGReg addr_reg = lb->addrlo_reg;
+ TCGReg data_reg = lb->datalo_reg;
+ TCGMemOpIdx oi = lb->oi;
+ TCGMemOp opc = get_memop(oi);
+
+ patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2);
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
+ }
+ switch (opc & MO_SIZE) {
+ case MO_UB:
+ tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+ break;
+ case MO_UW:
+ tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+ break;
+ case MO_UL:
+ tgen_ext32u(s, TCG_REG_R4, data_reg);
+ break;
+ case MO_Q:
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+ break;
+ default:
+ tcg_abort();
+ }
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
+ tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+
+ tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
+}
+#else
+static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
+ TCGReg *index_reg, tcg_target_long *disp)
+{
+ if (TARGET_LONG_BITS == 32) {
+ tgen_ext32u(s, TCG_TMP0, *addr_reg);
+ *addr_reg = TCG_TMP0;
+ }
+ if (GUEST_BASE < 0x80000) {
+ *index_reg = TCG_REG_NONE;
+ *disp = GUEST_BASE;
+ } else {
+ *index_reg = TCG_GUEST_BASE_REG;
+ *disp = 0;
+ }
+}
+#endif /* CONFIG_SOFTMMU */
+
+static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
+ TCGMemOpIdx oi)
+{
+ TCGMemOp opc = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+ unsigned mem_index = get_mmuidx(oi);
+ tcg_insn_unit *label_ptr;
+ TCGReg base_reg;
+
+ base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
+
+ /* We need to keep the offset unchanged for retranslation. */
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+ label_ptr = s->code_ptr;
+ s->code_ptr += 1;
+
+ tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
+
+ add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
+#else
+ TCGReg index_reg;
+ tcg_target_long disp;
+
+ tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
+ tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
+ TCGMemOpIdx oi)
+{
+ TCGMemOp opc = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+ unsigned mem_index = get_mmuidx(oi);
+ tcg_insn_unit *label_ptr;
+ TCGReg base_reg;
+
+ base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
+
+ /* We need to keep the offset unchanged for retranslation. */
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+ label_ptr = s->code_ptr;
+ s->code_ptr += 1;
+
+ tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
+
+ add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
+#else
+ TCGReg index_reg;
+ tcg_target_long disp;
+
+ tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
+ tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
+#endif
+}
+
+# define OP_32_64(x) \
+ case glue(glue(INDEX_op_,x),_i32): \
+ case glue(glue(INDEX_op_,x),_i64)
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg *args, const int *const_args)
+{
+ S390Opcode op;
+ TCGArg a0, a1, a2;
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ /* return value */
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]);
+ tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
+ break;
+
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_offset) {
+ tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
+ s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+ s->code_ptr += 2;
+ } else {
+ /* load address stored at s->tb_next + args[0] */
+ tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]);
+ /* and go there */
+ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0);
+ }
+ s->tb_next_offset[args[0]] = tcg_current_code_size(s);
+ break;
+
+ OP_32_64(ld8u):
+ /* ??? LLC (RXY format) is only present with the extended-immediate
+ facility, whereas LLGC is always present. */
+ tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+
+ OP_32_64(ld8s):
+ /* ??? LB is no smaller than LGB, so no point to using it. */
+ tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+
+ OP_32_64(ld16u):
+ /* ??? LLH (RXY format) is only present with the extended-immediate
+ facility, whereas LLGH is always present. */
+ tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+
+ case INDEX_op_ld16s_i32:
+ tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+
+ case INDEX_op_ld_i32:
+ tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ break;
+
+ OP_32_64(st8):
+ tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
+ TCG_REG_NONE, args[2]);
+ break;
+
+ OP_32_64(st16):
+ tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
+ TCG_REG_NONE, args[2]);
+ break;
+
+ case INDEX_op_st_i32:
+ tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_add_i32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ if (const_args[2]) {
+ do_addi_32:
+ if (a0 == a1) {
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, AHI, a0, a2);
+ break;
+ }
+ if (facilities & FACILITY_EXT_IMM) {
+ tcg_out_insn(s, RIL, AFI, a0, a2);
+ break;
+ }
+ }
+ tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, AR, a0, a2);
+ } else {
+ tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
+ }
+ break;
+ case INDEX_op_sub_i32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_32;
+ }
+ tcg_out_insn(s, RR, SR, args[0], args[2]);
+ break;
+
+ case INDEX_op_and_i32:
+ if (const_args[2]) {
+ tgen_andi(s, TCG_TYPE_I32, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RR, NR, args[0], args[2]);
+ }
+ break;
+ case INDEX_op_or_i32:
+ if (const_args[2]) {
+ tgen64_ori(s, args[0], args[2] & 0xffffffff);
+ } else {
+ tcg_out_insn(s, RR, OR, args[0], args[2]);
+ }
+ break;
+ case INDEX_op_xor_i32:
+ if (const_args[2]) {
+ tgen64_xori(s, args[0], args[2] & 0xffffffff);
+ } else {
+ tcg_out_insn(s, RR, XR, args[0], args[2]);
+ }
+ break;
+
+ case INDEX_op_neg_i32:
+ tcg_out_insn(s, RR, LCR, args[0], args[1]);
+ break;
+
+ case INDEX_op_mul_i32:
+ if (const_args[2]) {
+ if ((int32_t)args[2] == (int16_t)args[2]) {
+ tcg_out_insn(s, RI, MHI, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
+ }
+ } else {
+ tcg_out_insn(s, RRE, MSR, args[0], args[2]);
+ }
+ break;
+
+ case INDEX_op_div2_i32:
+ tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
+ break;
+ case INDEX_op_divu2_i32:
+ tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
+ break;
+
+ case INDEX_op_shl_i32:
+ op = RS_SLL;
+ do_shift32:
+ if (const_args[2]) {
+ tcg_out_sh32(s, op, args[0], TCG_REG_NONE, args[2]);
+ } else {
+ tcg_out_sh32(s, op, args[0], args[2], 0);
+ }
+ break;
+ case INDEX_op_shr_i32:
+ op = RS_SRL;
+ goto do_shift32;
+ case INDEX_op_sar_i32:
+ op = RS_SRA;
+ goto do_shift32;
+
+ case INDEX_op_rotl_i32:
+ /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */
+ if (const_args[2]) {
+ tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
+ } else {
+ tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
+ }
+ break;
+ case INDEX_op_rotr_i32:
+ if (const_args[2]) {
+ tcg_out_sh64(s, RSY_RLL, args[0], args[1],
+ TCG_REG_NONE, (32 - args[2]) & 31);
+ } else {
+ tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
+ tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
+ }
+ break;
+
+ case INDEX_op_ext8s_i32:
+ tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ext16s_i32:
+ tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ext8u_i32:
+ tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ext16u_i32:
+ tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+
+ OP_32_64(bswap16):
+ /* The TCG bswap definition requires bits 0-47 already be zero.
+ Thus we don't need the G-type insns to implement bswap16_i64. */
+ tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
+ tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16);
+ break;
+ OP_32_64(bswap32):
+ tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
+ break;
+
+ case INDEX_op_add2_i32:
+ if (const_args[4]) {
+ tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
+ } else {
+ tcg_out_insn(s, RR, ALR, args[0], args[4]);
+ }
+ tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
+ break;
+ case INDEX_op_sub2_i32:
+ if (const_args[4]) {
+ tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
+ } else {
+ tcg_out_insn(s, RR, SLR, args[0], args[4]);
+ }
+ tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
+ break;
+
+ case INDEX_op_br:
+ tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
+ break;
+
+ case INDEX_op_brcond_i32:
+ tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
+ args[1], const_args[1], arg_label(args[3]));
+ break;
+ case INDEX_op_setcond_i32:
+ tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_movcond_i32:
+ tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ /* ??? Technically we can use a non-extending instruction. */
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_ld16s_i64:
+ tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+ case INDEX_op_ld32u_i64:
+ tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+ case INDEX_op_ld32s_i64:
+ tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_st32_i64:
+ tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_add_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ do_addi_64:
+ if (a0 == a1) {
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, AGHI, a0, a2);
+ break;
+ }
+ if (facilities & FACILITY_EXT_IMM) {
+ if (a2 == (int32_t)a2) {
+ tcg_out_insn(s, RIL, AGFI, a0, a2);
+ break;
+ } else if (a2 == (uint32_t)a2) {
+ tcg_out_insn(s, RIL, ALGFI, a0, a2);
+ break;
+ } else if (-a2 == (uint32_t)-a2) {
+ tcg_out_insn(s, RIL, SLGFI, a0, -a2);
+ break;
+ }
+ }
+ }
+ tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, AGR, a0, a2);
+ } else {
+ tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
+ }
+ break;
+ case INDEX_op_sub_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_64;
+ } else {
+ tcg_out_insn(s, RRE, SGR, args[0], args[2]);
+ }
+ break;
+
+ case INDEX_op_and_i64:
+ if (const_args[2]) {
+ tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+ }
+ break;
+ case INDEX_op_or_i64:
+ if (const_args[2]) {
+ tgen64_ori(s, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RRE, OGR, args[0], args[2]);
+ }
+ break;
+ case INDEX_op_xor_i64:
+ if (const_args[2]) {
+ tgen64_xori(s, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RRE, XGR, args[0], args[2]);
+ }
+ break;
+
+ case INDEX_op_neg_i64:
+ tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
+ break;
+ case INDEX_op_bswap64_i64:
+ tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
+ break;
+
+ case INDEX_op_mul_i64:
+ if (const_args[2]) {
+ if (args[2] == (int16_t)args[2]) {
+ tcg_out_insn(s, RI, MGHI, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
+ }
+ } else {
+ tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
+ }
+ break;
+
+ case INDEX_op_div2_i64:
+ /* ??? We get an unnecessary sign-extension of the dividend
+ into R3 with this definition, but as we do in fact always
+ produce both quotient and remainder using INDEX_op_div_i64
+ instead requires jumping through even more hoops. */
+ tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
+ break;
+ case INDEX_op_divu2_i64:
+ tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
+ break;
+ case INDEX_op_mulu2_i64:
+ tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
+ break;
+
+ case INDEX_op_shl_i64:
+ op = RSY_SLLG;
+ do_shift64:
+ if (const_args[2]) {
+ tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
+ } else {
+ tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
+ }
+ break;
+ case INDEX_op_shr_i64:
+ op = RSY_SRLG;
+ goto do_shift64;
+ case INDEX_op_sar_i64:
+ op = RSY_SRAG;
+ goto do_shift64;
+
+ case INDEX_op_rotl_i64:
+ if (const_args[2]) {
+ tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
+ TCG_REG_NONE, args[2]);
+ } else {
+ tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
+ }
+ break;
+ case INDEX_op_rotr_i64:
+ if (const_args[2]) {
+ tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
+ TCG_REG_NONE, (64 - args[2]) & 63);
+ } else {
+ /* We can use the smaller 32-bit negate because only the
+ low 6 bits are examined for the rotate. */
+ tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
+ tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
+ }
+ break;
+
+ case INDEX_op_ext8s_i64:
+ tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+ case INDEX_op_ext16s_i64:
+ tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+ case INDEX_op_ext32s_i64:
+ tgen_ext32s(s, args[0], args[1]);
+ break;
+ case INDEX_op_ext8u_i64:
+ tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+ case INDEX_op_ext16u_i64:
+ tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+ case INDEX_op_ext32u_i64:
+ tgen_ext32u(s, args[0], args[1]);
+ break;
+
+ case INDEX_op_add2_i64:
+ if (const_args[4]) {
+ if ((int64_t)args[4] >= 0) {
+ tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
+ } else {
+ tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
+ }
+ } else {
+ tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+ }
+ tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
+ break;
+ case INDEX_op_sub2_i64:
+ if (const_args[4]) {
+ if ((int64_t)args[4] >= 0) {
+ tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
+ } else {
+ tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
+ }
+ } else {
+ tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+ }
+ tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
+ break;
+
+ case INDEX_op_brcond_i64:
+ tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
+ args[1], const_args[1], arg_label(args[3]));
+ break;
+ case INDEX_op_setcond_i64:
+ tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_movcond_i64:
+ tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
+
+ OP_32_64(deposit):
+ tgen_deposit(s, args[0], args[2], args[3], args[4]);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_movi_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static const TCGTargetOpDef s390_op_defs[] = {
+ { INDEX_op_exit_tb, { } },
+ { INDEX_op_goto_tb, { } },
+ { INDEX_op_br, { } },
+
+ { INDEX_op_ld8u_i32, { "r", "r" } },
+ { INDEX_op_ld8s_i32, { "r", "r" } },
+ { INDEX_op_ld16u_i32, { "r", "r" } },
+ { INDEX_op_ld16s_i32, { "r", "r" } },
+ { INDEX_op_ld_i32, { "r", "r" } },
+ { INDEX_op_st8_i32, { "r", "r" } },
+ { INDEX_op_st16_i32, { "r", "r" } },
+ { INDEX_op_st_i32, { "r", "r" } },
+
+ { INDEX_op_add_i32, { "r", "r", "ri" } },
+ { INDEX_op_sub_i32, { "r", "0", "ri" } },
+ { INDEX_op_mul_i32, { "r", "0", "rK" } },
+
+ { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
+ { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
+
+ { INDEX_op_and_i32, { "r", "0", "ri" } },
+ { INDEX_op_or_i32, { "r", "0", "rO" } },
+ { INDEX_op_xor_i32, { "r", "0", "rX" } },
+
+ { INDEX_op_neg_i32, { "r", "r" } },
+
+ { INDEX_op_shl_i32, { "r", "0", "Ri" } },
+ { INDEX_op_shr_i32, { "r", "0", "Ri" } },
+ { INDEX_op_sar_i32, { "r", "0", "Ri" } },
+
+ { INDEX_op_rotl_i32, { "r", "r", "Ri" } },
+ { INDEX_op_rotr_i32, { "r", "r", "Ri" } },
+
+ { INDEX_op_ext8s_i32, { "r", "r" } },
+ { INDEX_op_ext8u_i32, { "r", "r" } },
+ { INDEX_op_ext16s_i32, { "r", "r" } },
+ { INDEX_op_ext16u_i32, { "r", "r" } },
+
+ { INDEX_op_bswap16_i32, { "r", "r" } },
+ { INDEX_op_bswap32_i32, { "r", "r" } },
+
+ { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } },
+ { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } },
+
+ { INDEX_op_brcond_i32, { "r", "rC" } },
+ { INDEX_op_setcond_i32, { "r", "r", "rC" } },
+ { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } },
+ { INDEX_op_deposit_i32, { "r", "0", "r" } },
+
+ { INDEX_op_qemu_ld_i32, { "r", "L" } },
+ { INDEX_op_qemu_ld_i64, { "r", "L" } },
+ { INDEX_op_qemu_st_i32, { "L", "L" } },
+ { INDEX_op_qemu_st_i64, { "L", "L" } },
+
+ { INDEX_op_ld8u_i64, { "r", "r" } },
+ { INDEX_op_ld8s_i64, { "r", "r" } },
+ { INDEX_op_ld16u_i64, { "r", "r" } },
+ { INDEX_op_ld16s_i64, { "r", "r" } },
+ { INDEX_op_ld32u_i64, { "r", "r" } },
+ { INDEX_op_ld32s_i64, { "r", "r" } },
+ { INDEX_op_ld_i64, { "r", "r" } },
+
+ { INDEX_op_st8_i64, { "r", "r" } },
+ { INDEX_op_st16_i64, { "r", "r" } },
+ { INDEX_op_st32_i64, { "r", "r" } },
+ { INDEX_op_st_i64, { "r", "r" } },
+
+ { INDEX_op_add_i64, { "r", "r", "ri" } },
+ { INDEX_op_sub_i64, { "r", "0", "ri" } },
+ { INDEX_op_mul_i64, { "r", "0", "rK" } },
+
+ { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
+ { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
+ { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } },
+
+ { INDEX_op_and_i64, { "r", "0", "ri" } },
+ { INDEX_op_or_i64, { "r", "0", "rO" } },
+ { INDEX_op_xor_i64, { "r", "0", "rX" } },
+
+ { INDEX_op_neg_i64, { "r", "r" } },
+
+ { INDEX_op_shl_i64, { "r", "r", "Ri" } },
+ { INDEX_op_shr_i64, { "r", "r", "Ri" } },
+ { INDEX_op_sar_i64, { "r", "r", "Ri" } },
+
+ { INDEX_op_rotl_i64, { "r", "r", "Ri" } },
+ { INDEX_op_rotr_i64, { "r", "r", "Ri" } },
+
+ { INDEX_op_ext8s_i64, { "r", "r" } },
+ { INDEX_op_ext8u_i64, { "r", "r" } },
+ { INDEX_op_ext16s_i64, { "r", "r" } },
+ { INDEX_op_ext16u_i64, { "r", "r" } },
+ { INDEX_op_ext32s_i64, { "r", "r" } },
+ { INDEX_op_ext32u_i64, { "r", "r" } },
+
+ { INDEX_op_bswap16_i64, { "r", "r" } },
+ { INDEX_op_bswap32_i64, { "r", "r" } },
+ { INDEX_op_bswap64_i64, { "r", "r" } },
+
+ { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } },
+ { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } },
+
+ { INDEX_op_brcond_i64, { "r", "rC" } },
+ { INDEX_op_setcond_i64, { "r", "r", "rC" } },
+ { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
+ { INDEX_op_deposit_i64, { "r", "0", "r" } },
+
+ { -1 },
+};
+
+static void query_facilities(void)
+{
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+
+ /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this
+ is present on all 64-bit systems, but let's check for it anyway. */
+ if (hwcap & HWCAP_S390_STFLE) {
+ register int r0 __asm__("0");
+ register void *r1 __asm__("1");
+
+ /* stfle 0(%r1) */
+ r1 = &facilities;
+ asm volatile(".word 0xb2b0,0x1000"
+ : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc");
+ }
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ query_facilities();
+
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
+
+ tcg_regset_clear(tcg_target_call_clobber_regs);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
+ /* The r6 register is technically call-saved, but it's also a parameter
+ register, so it can get killed by setup for the qemu_st helper. */
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
+ /* The return register can be considered call-clobbered. */
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
+ /* XXX many insns can't be used with R0, so we better avoid it for now */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+
+ tcg_add_target_add_op_defs(s390_op_defs);
+}
+
+#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long)))
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ /* stmg %r6,%r15,48(%r15) (save registers) */
+ tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
+
+ /* aghi %r15,-frame_size */
+ tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
+
+ tcg_set_frame(s, TCG_REG_CALL_STACK,
+ TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+ if (GUEST_BASE >= 0x80000) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ /* br %r3 (go to TB) */
+ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
+
+ tb_ret_addr = s->code_ptr;
+
+ /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
+ tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
+ FRAME_SIZE + 48);
+
+ /* br %r14 (return) */
+ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[18];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#define ELF_HOST_MACHINE EM_S390
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 8, /* sleb128 8 */
+ .h.cie.return_column = TCG_REG_R14,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x86, 6, /* DW_CFA_offset, %r6, 48 */
+ 0x87, 7, /* DW_CFA_offset, %r7, 56 */
+ 0x88, 8, /* DW_CFA_offset, %r8, 64 */
+ 0x89, 9, /* DW_CFA_offset, %r92, 72 */
+ 0x8a, 10, /* DW_CFA_offset, %r10, 80 */
+ 0x8b, 11, /* DW_CFA_offset, %r11, 88 */
+ 0x8c, 12, /* DW_CFA_offset, %r12, 96 */
+ 0x8d, 13, /* DW_CFA_offset, %r13, 104 */
+ 0x8e, 14, /* DW_CFA_offset, %r14, 112 */
+ }
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
new file mode 100644
index 00000000..91576d59
--- /dev/null
+++ b/tcg/s390/tcg-target.h
@@ -0,0 +1,123 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TCG_TARGET_S390
+#define TCG_TARGET_S390 1
+
+#define TCG_TARGET_INSN_UNIT_SIZE 2
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
+
+typedef enum TCGReg {
+ TCG_REG_R0 = 0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15
+} TCGReg;
+
+#define TCG_TARGET_NB_REGS 16
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div2_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_not_i32 0
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_andc_i32 0
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_trunc_shr_i32 0
+
+#define TCG_TARGET_HAS_div2_i64 1
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_not_i64 0
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 1
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
+
+extern bool tcg_target_deposit_valid(int ofs, int len);
+#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid
+#define TCG_TARGET_deposit_i64_valid tcg_target_deposit_valid
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_R15
+#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_CALL_STACK_OFFSET 160
+
+#define TCG_TARGET_EXTEND_ARGS 1
+
+enum {
+ TCG_AREG0 = TCG_REG_R10,
+};
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+}
+
+#endif
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
new file mode 100644
index 00000000..1a870a81
--- /dev/null
+++ b/tcg/sparc/tcg-target.c
@@ -0,0 +1,1650 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg-be-null.h"
+
+#ifndef NDEBUG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "%g0",
+ "%g1",
+ "%g2",
+ "%g3",
+ "%g4",
+ "%g5",
+ "%g6",
+ "%g7",
+ "%o0",
+ "%o1",
+ "%o2",
+ "%o3",
+ "%o4",
+ "%o5",
+ "%o6",
+ "%o7",
+ "%l0",
+ "%l1",
+ "%l2",
+ "%l3",
+ "%l4",
+ "%l5",
+ "%l6",
+ "%l7",
+ "%i0",
+ "%i1",
+ "%i2",
+ "%i3",
+ "%i4",
+ "%i5",
+ "%i6",
+ "%i7",
+};
+#endif
+
+#ifdef __arch64__
+# define SPARC64 1
+#else
+# define SPARC64 0
+#endif
+
+/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o
+ registers. These are saved manually by the kernel in full 64-bit
+ slots. The %i and %l registers are saved by the register window
+ mechanism, which only allocates space for 32 bits. Given that this
+ window spill/fill can happen on any signal, we must consider the
+ high bits of the %i and %l registers garbage at all times. */
+#if SPARC64
+# define ALL_64 0xffffffffu
+#else
+# define ALL_64 0xffffu
+#endif
+
+/* Define some temporary registers. T2 is used for constant generation. */
+#define TCG_REG_T1 TCG_REG_G1
+#define TCG_REG_T2 TCG_REG_O7
+
+#ifdef CONFIG_USE_GUEST_BASE
+# define TCG_GUEST_BASE_REG TCG_REG_I5
+#else
+# define TCG_GUEST_BASE_REG TCG_REG_G0
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_L0,
+ TCG_REG_L1,
+ TCG_REG_L2,
+ TCG_REG_L3,
+ TCG_REG_L4,
+ TCG_REG_L5,
+ TCG_REG_L6,
+ TCG_REG_L7,
+
+ TCG_REG_I0,
+ TCG_REG_I1,
+ TCG_REG_I2,
+ TCG_REG_I3,
+ TCG_REG_I4,
+ TCG_REG_I5,
+
+ TCG_REG_G2,
+ TCG_REG_G3,
+ TCG_REG_G4,
+ TCG_REG_G5,
+
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+ TCG_REG_O4,
+ TCG_REG_O5,
+};
+
+static const int tcg_target_call_iarg_regs[6] = {
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+ TCG_REG_O4,
+ TCG_REG_O5,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+};
+
+#define INSN_OP(x) ((x) << 30)
+#define INSN_OP2(x) ((x) << 22)
+#define INSN_OP3(x) ((x) << 19)
+#define INSN_OPF(x) ((x) << 5)
+#define INSN_RD(x) ((x) << 25)
+#define INSN_RS1(x) ((x) << 14)
+#define INSN_RS2(x) (x)
+#define INSN_ASI(x) ((x) << 5)
+
+#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
+#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
+#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
+#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
+#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
+#define INSN_COND(x) ((x) << 25)
+
+#define COND_N 0x0
+#define COND_E 0x1
+#define COND_LE 0x2
+#define COND_L 0x3
+#define COND_LEU 0x4
+#define COND_CS 0x5
+#define COND_NEG 0x6
+#define COND_VS 0x7
+#define COND_A 0x8
+#define COND_NE 0x9
+#define COND_G 0xa
+#define COND_GE 0xb
+#define COND_GU 0xc
+#define COND_CC 0xd
+#define COND_POS 0xe
+#define COND_VC 0xf
+#define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
+
+#define RCOND_Z 1
+#define RCOND_LEZ 2
+#define RCOND_LZ 3
+#define RCOND_NZ 5
+#define RCOND_GZ 6
+#define RCOND_GEZ 7
+
+#define MOVCC_ICC (1 << 18)
+#define MOVCC_XCC (1 << 18 | 1 << 12)
+
+#define BPCC_ICC 0
+#define BPCC_XCC (2 << 20)
+#define BPCC_PT (1 << 19)
+#define BPCC_PN 0
+#define BPCC_A (1 << 29)
+
+#define BPR_PT BPCC_PT
+
+#define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00))
+#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
+#define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01))
+#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
+#define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02))
+#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
+#define ARITH_ORN (INSN_OP(2) | INSN_OP3(0x06))
+#define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03))
+#define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04))
+#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
+#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
+#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
+#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
+#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
+#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
+#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
+#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
+#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
+#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
+#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
+#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
+
+#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
+#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
+
+#define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25))
+#define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26))
+#define SHIFT_SRA (INSN_OP(2) | INSN_OP3(0x27))
+
+#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
+#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
+#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))
+
+#define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
+#define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
+#define JMPL (INSN_OP(2) | INSN_OP3(0x38))
+#define RETURN (INSN_OP(2) | INSN_OP3(0x39))
+#define SAVE (INSN_OP(2) | INSN_OP3(0x3c))
+#define RESTORE (INSN_OP(2) | INSN_OP3(0x3d))
+#define SETHI (INSN_OP(0) | INSN_OP2(0x4))
+#define CALL INSN_OP(1)
+#define LDUB (INSN_OP(3) | INSN_OP3(0x01))
+#define LDSB (INSN_OP(3) | INSN_OP3(0x09))
+#define LDUH (INSN_OP(3) | INSN_OP3(0x02))
+#define LDSH (INSN_OP(3) | INSN_OP3(0x0a))
+#define LDUW (INSN_OP(3) | INSN_OP3(0x00))
+#define LDSW (INSN_OP(3) | INSN_OP3(0x08))
+#define LDX (INSN_OP(3) | INSN_OP3(0x0b))
+#define STB (INSN_OP(3) | INSN_OP3(0x05))
+#define STH (INSN_OP(3) | INSN_OP3(0x06))
+#define STW (INSN_OP(3) | INSN_OP3(0x04))
+#define STX (INSN_OP(3) | INSN_OP3(0x0e))
+#define LDUBA (INSN_OP(3) | INSN_OP3(0x11))
+#define LDSBA (INSN_OP(3) | INSN_OP3(0x19))
+#define LDUHA (INSN_OP(3) | INSN_OP3(0x12))
+#define LDSHA (INSN_OP(3) | INSN_OP3(0x1a))
+#define LDUWA (INSN_OP(3) | INSN_OP3(0x10))
+#define LDSWA (INSN_OP(3) | INSN_OP3(0x18))
+#define LDXA (INSN_OP(3) | INSN_OP3(0x1b))
+#define STBA (INSN_OP(3) | INSN_OP3(0x15))
+#define STHA (INSN_OP(3) | INSN_OP3(0x16))
+#define STWA (INSN_OP(3) | INSN_OP3(0x14))
+#define STXA (INSN_OP(3) | INSN_OP3(0x1e))
+
+#ifndef ASI_PRIMARY_LITTLE
+#define ASI_PRIMARY_LITTLE 0x88
+#endif
+
+#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#ifndef use_vis3_instructions
+bool use_vis3_instructions;
+#endif
+
+static inline int check_fit_i64(int64_t val, unsigned int bits)
+{
+ return val == sextract64(val, 0, bits);
+}
+
+static inline int check_fit_i32(int32_t val, unsigned int bits)
+{
+ return val == sextract32(val, 0, bits);
+}
+
+#define check_fit_tl check_fit_i64
+#if SPARC64
+# define check_fit_ptr check_fit_i64
+#else
+# define check_fit_ptr check_fit_i32
+#endif
+
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ uint32_t insn;
+
+ assert(addend == 0);
+ value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr);
+
+ switch (type) {
+ case R_SPARC_WDISP16:
+ if (!check_fit_ptr(value >> 2, 16)) {
+ tcg_abort();
+ }
+ insn = *code_ptr;
+ insn &= ~INSN_OFF16(-1);
+ insn |= INSN_OFF16(value);
+ *code_ptr = insn;
+ break;
+ case R_SPARC_WDISP19:
+ if (!check_fit_ptr(value >> 2, 19)) {
+ tcg_abort();
+ }
+ insn = *code_ptr;
+ insn &= ~INSN_OFF19(-1);
+ insn |= INSN_OFF19(value);
+ *code_ptr = insn;
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str;
+
+ ct_str = *pct_str;
+ switch (ct_str[0]) {
+ case 'r':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ break;
+ case 'R':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, ALL_64);
+ break;
+ case 'A': /* qemu_ld/st address constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0,
+ TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff);
+ reserve_helpers:
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+ break;
+ case 's': /* qemu_st data 32-bit constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ goto reserve_helpers;
+ case 'S': /* qemu_st data 64-bit constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, ALL_64);
+ goto reserve_helpers;
+ case 'I':
+ ct->ct |= TCG_CT_CONST_S11;
+ break;
+ case 'J':
+ ct->ct |= TCG_CT_CONST_S13;
+ break;
+ case 'Z':
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct = arg_ct->ct;
+
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
+ TCGReg rs2, int op)
+{
+ tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
+}
+
+static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
+ int32_t offset, int op)
+{
+ tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
+}
+
+static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
+ int32_t val2, int val2const, int op)
+{
+ tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
+ | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
+}
+
+static inline void tcg_out_mov(TCGContext *s, TCGType type,
+ TCGReg ret, TCGReg arg)
+{
+ if (ret != arg) {
+ tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+ }
+}
+
+static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
+{
+ tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
+}
+
+static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
+{
+ tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ tcg_target_long hi, lo = (int32_t)arg;
+
+ /* Make sure we test 32-bit constants for imm13 properly. */
+ if (type == TCG_TYPE_I32) {
+ arg = lo;
+ }
+
+ /* A 13-bit constant sign-extended to 64-bits. */
+ if (check_fit_tl(arg, 13)) {
+ tcg_out_movi_imm13(s, ret, arg);
+ return;
+ }
+
+ /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
+ if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
+ tcg_out_sethi(s, ret, arg);
+ if (arg & 0x3ff) {
+ tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
+ }
+ return;
+ }
+
+ /* A 32-bit constant sign-extended to 64-bits. */
+ if (arg == lo) {
+ tcg_out_sethi(s, ret, ~arg);
+ tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
+ return;
+ }
+
+ /* A 64-bit constant decomposed into 2 32-bit pieces. */
+ if (check_fit_i32(lo, 13)) {
+ hi = (arg - lo) >> 32;
+ tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
+ tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+ tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
+ } else {
+ hi = arg >> 32;
+ tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
+ tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+ tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
+ }
+}
+
+static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
+ TCGReg a2, int op)
+{
+ tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
+}
+
+static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
+ intptr_t offset, int op)
+{
+ if (check_fit_ptr(offset, 13)) {
+ tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
+ INSN_IMM13(offset));
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
+ tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
+ }
+}
+
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
+}
+
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
+}
+
+static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg)
+{
+ tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
+ tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
+}
+
+static inline void tcg_out_sety(TCGContext *s, TCGReg rs)
+{
+ tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
+}
+
+static inline void tcg_out_rdy(TCGContext *s, TCGReg rd)
+{
+ tcg_out32(s, RDY | INSN_RD(rd));
+}
+
+static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
+ int32_t val2, int val2const, int uns)
+{
+ /* Load Y with the sign/zero extension of RS1 to 64-bits. */
+ if (uns) {
+ tcg_out_sety(s, TCG_REG_G0);
+ } else {
+ tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
+ tcg_out_sety(s, TCG_REG_T1);
+ }
+
+ tcg_out_arithc(s, rd, rs1, val2, val2const,
+ uns ? ARITH_UDIV : ARITH_SDIV);
+}
+
+static inline void tcg_out_nop(TCGContext *s)
+{
+ tcg_out_sethi(s, TCG_REG_G0, 0);
+}
+
+static const uint8_t tcg_cond_to_bcond[] = {
+ [TCG_COND_EQ] = COND_E,
+ [TCG_COND_NE] = COND_NE,
+ [TCG_COND_LT] = COND_L,
+ [TCG_COND_GE] = COND_GE,
+ [TCG_COND_LE] = COND_LE,
+ [TCG_COND_GT] = COND_G,
+ [TCG_COND_LTU] = COND_CS,
+ [TCG_COND_GEU] = COND_CC,
+ [TCG_COND_LEU] = COND_LEU,
+ [TCG_COND_GTU] = COND_GU,
+};
+
+static const uint8_t tcg_cond_to_rcond[] = {
+ [TCG_COND_EQ] = RCOND_Z,
+ [TCG_COND_NE] = RCOND_NZ,
+ [TCG_COND_LT] = RCOND_LZ,
+ [TCG_COND_GT] = RCOND_GZ,
+ [TCG_COND_LE] = RCOND_LEZ,
+ [TCG_COND_GE] = RCOND_GEZ
+};
+
+static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
+{
+ tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
+}
+
+static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
+{
+ int off19;
+
+ if (l->has_value) {
+ off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
+ } else {
+ /* Make sure to preserve destinations during retranslation. */
+ off19 = *s->code_ptr & INSN_OFF19(-1);
+ tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
+ }
+ tcg_out_bpcc0(s, scond, flags, off19);
+}
+
+static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
+{
+ tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
+}
+
+static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
+ int32_t arg2, int const_arg2, TCGLabel *l)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2);
+ tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
+ tcg_out_nop(s);
+}
+
+static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
+ int32_t v1, int v1const)
+{
+ tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
+ | INSN_RS1(tcg_cond_to_bcond[cond])
+ | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, int32_t c2, int c2const,
+ int32_t v1, int v1const)
+{
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
+}
+
+static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
+ int32_t arg2, int const_arg2, TCGLabel *l)
+{
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare. */
+ if (arg2 == 0 && !is_unsigned_cond(cond)) {
+ int off16;
+
+ if (l->has_value) {
+ off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
+ } else {
+ /* Make sure to preserve destinations during retranslation. */
+ off16 = *s->code_ptr & INSN_OFF16(-1);
+ tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
+ }
+ tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
+ | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
+ } else {
+ tcg_out_cmp(s, arg1, arg2, const_arg2);
+ tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
+ }
+ tcg_out_nop(s);
+}
+
+static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
+ int32_t v1, int v1const)
+{
+ tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
+ | (tcg_cond_to_rcond[cond] << 10)
+ | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, int32_t c2, int c2const,
+ int32_t v1, int v1const)
+{
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare.
+ Note that the immediate range is one bit smaller, so we must check
+ for that as well. */
+ if (c2 == 0 && !is_unsigned_cond(cond)
+ && (!v1const || check_fit_i32(v1, 10))) {
+ tcg_out_movr(s, cond, ret, c1, v1, v1const);
+ } else {
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
+ }
+}
+
+static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, int32_t c2, int c2const)
+{
+ /* For 32-bit comparisons, we can play games with ADDC/SUBC. */
+ switch (cond) {
+ case TCG_COND_LTU:
+ case TCG_COND_GEU:
+ /* The result of the comparison is in the carry bit. */
+ break;
+
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ /* For equality, we can transform to inequality vs zero. */
+ if (c2 != 0) {
+ tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
+ c2 = TCG_REG_T1;
+ } else {
+ c2 = c1;
+ }
+ c1 = TCG_REG_G0, c2const = 0;
+ cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
+ break;
+
+ case TCG_COND_GTU:
+ case TCG_COND_LEU:
+ /* If we don't need to load a constant into a register, we can
+ swap the operands on GTU/LEU. There's no benefit to loading
+ the constant into a temporary register. */
+ if (!c2const || c2 == 0) {
+ TCGReg t = c1;
+ c1 = c2;
+ c2 = t;
+ c2const = 0;
+ cond = tcg_swap_cond(cond);
+ break;
+ }
+ /* FALLTHRU */
+
+ default:
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
+ return;
+ }
+
+ tcg_out_cmp(s, c1, c2, c2const);
+ if (cond == TCG_COND_LTU) {
+ tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
+ } else {
+ tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
+ }
+}
+
+static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, int32_t c2, int c2const)
+{
+ if (use_vis3_instructions) {
+ switch (cond) {
+ case TCG_COND_NE:
+ if (c2 != 0) {
+ break;
+ }
+ c2 = c1, c2const = 0, c1 = TCG_REG_G0;
+ /* FALLTHRU */
+ case TCG_COND_LTU:
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
+ return;
+ default:
+ break;
+ }
+ }
+
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare
+ if the input does not overlap the output. */
+ if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movr(s, cond, ret, c1, 1, 1);
+ } else {
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
+ }
+}
+
+static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
+ TCGReg al, TCGReg ah, int32_t bl, int blconst,
+ int32_t bh, int bhconst, int opl, int oph)
+{
+ TCGReg tmp = TCG_REG_T1;
+
+ /* Note that the low parts are fully consumed before tmp is set. */
+ if (rl != ah && (bhconst || rl != bh)) {
+ tmp = rl;
+ }
+
+ tcg_out_arithc(s, tmp, al, bl, blconst, opl);
+ tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
+ tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
+}
+
+static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
+ TCGReg al, TCGReg ah, int32_t bl, int blconst,
+ int32_t bh, int bhconst, bool is_sub)
+{
+ TCGReg tmp = TCG_REG_T1;
+
+ /* Note that the low parts are fully consumed before tmp is set. */
+ if (rl != ah && (bhconst || rl != bh)) {
+ tmp = rl;
+ }
+
+ tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);
+
+ if (use_vis3_instructions && !is_sub) {
+ /* Note that ADDXC doesn't accept immediates. */
+ if (bhconst && bh != 0) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh);
+ bh = TCG_REG_T2;
+ }
+ tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
+ } else if (bh == TCG_REG_G0) {
+ /* If we have a zero, we can perform the operation in two insns,
+ with the arithmetic first, and a conditional move into place. */
+ if (rh == ah) {
+ tcg_out_arithi(s, TCG_REG_T2, ah, 1,
+ is_sub ? ARITH_SUB : ARITH_ADD);
+ tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
+ } else {
+ tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
+ tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
+ }
+ } else {
+ /* Otherwise adjust BH as if there is carry into T2 ... */
+ if (bhconst) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1));
+ } else {
+ tcg_out_arithi(s, TCG_REG_T2, bh, 1,
+ is_sub ? ARITH_SUB : ARITH_ADD);
+ }
+ /* ... smoosh T2 back to original BH if carry is clear ... */
+ tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
+ /* ... and finally perform the arithmetic with the new operand. */
+ tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
+ }
+
+ tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
+}
+
+static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest)
+{
+ ptrdiff_t disp = tcg_pcrel_diff(s, dest);
+
+ if (disp == (int32_t)disp) {
+ tcg_out32(s, CALL | (uint32_t)disp >> 2);
+ } else {
+ uintptr_t desti = (uintptr_t)dest;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff);
+ tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL);
+ }
+}
+
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
+{
+ tcg_out_call_nodelay(s, dest);
+ tcg_out_nop(s);
+}
+
+#ifdef CONFIG_SOFTMMU
+static tcg_insn_unit *qemu_ld_trampoline[16];
+static tcg_insn_unit *qemu_st_trampoline[16];
+
+static void build_trampolines(TCGContext *s)
+{
+ static void * const qemu_ld_helpers[16] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_SB] = helper_ret_ldsb_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LESW] = helper_le_ldsw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BESW] = helper_be_ldsw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+ };
+ static void * const qemu_st_helpers[16] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+ };
+
+ int i;
+ TCGReg ra;
+
+ for (i = 0; i < 16; ++i) {
+ if (qemu_ld_helpers[i] == NULL) {
+ continue;
+ }
+
+ /* May as well align the trampoline. */
+ while ((uintptr_t)s->code_ptr & 15) {
+ tcg_out_nop(s);
+ }
+ qemu_ld_trampoline[i] = s->code_ptr;
+
+ if (SPARC64 || TARGET_LONG_BITS == 32) {
+ ra = TCG_REG_O3;
+ } else {
+ /* Install the high part of the address. */
+ tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
+ ra = TCG_REG_O4;
+ }
+
+ /* Set the retaddr operand. */
+ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
+ /* Set the env operand. */
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
+ /* Tail call. */
+ tcg_out_call_nodelay(s, qemu_ld_helpers[i]);
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
+ }
+
+ for (i = 0; i < 16; ++i) {
+ if (qemu_st_helpers[i] == NULL) {
+ continue;
+ }
+
+ /* May as well align the trampoline. */
+ while ((uintptr_t)s->code_ptr & 15) {
+ tcg_out_nop(s);
+ }
+ qemu_st_trampoline[i] = s->code_ptr;
+
+ if (SPARC64) {
+ ra = TCG_REG_O4;
+ } else {
+ ra = TCG_REG_O1;
+ if (TARGET_LONG_BITS == 64) {
+ /* Install the high part of the address. */
+ tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
+ ra += 2;
+ } else {
+ ra += 1;
+ }
+ if ((i & MO_SIZE) == MO_64) {
+ /* Install the high part of the data. */
+ tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
+ ra += 2;
+ } else {
+ ra += 1;
+ }
+ /* Skip the oi argument. */
+ ra += 1;
+ }
+
+ /* Set the retaddr operand. */
+ if (ra >= TCG_REG_O6) {
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
+ TCG_TARGET_CALL_STACK_OFFSET);
+ ra = TCG_REG_G1;
+ }
+ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
+ /* Set the env operand. */
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
+ /* Tail call. */
+ tcg_out_call_nodelay(s, qemu_st_helpers[i]);
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
+ }
+}
+#endif
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int tmp_buf_size, frame_size;
+
+ /* The TCG temp buffer is at the top of the frame, immediately
+ below the frame pointer. */
+ tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
+ tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
+ tmp_buf_size);
+
+ /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
+ otherwise the minimal frame usable by callees. */
+ frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
+ frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
+ frame_size += TCG_TARGET_STACK_ALIGN - 1;
+ frame_size &= -TCG_TARGET_STACK_ALIGN;
+ tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
+ INSN_IMM13(-frame_size));
+
+#ifdef CONFIG_USE_GUEST_BASE
+ if (GUEST_BASE != 0) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+#endif
+
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
+ /* delay slot */
+ tcg_out_nop(s);
+
+ /* No epilogue required. We issue ret + restore directly in the TB. */
+
+#ifdef CONFIG_SOFTMMU
+ build_trampolines(s);
+#endif
+}
+
+#if defined(CONFIG_SOFTMMU)
+/* Perform the TLB load and compare.
+
+ Inputs:
+ ADDRLO and ADDRHI contain the possible two parts of the address.
+
+ MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+ WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+ This should be offsetof addr_read or addr_write.
+
+ The result of the TLB comparison is in %[ix]cc. The sanitized address
+ is in the returned register, maybe %o0. The TLB addend is in %o1. */
+
+static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
+ TCGMemOp s_bits, int which)
+{
+ const TCGReg r0 = TCG_REG_O0;
+ const TCGReg r1 = TCG_REG_O1;
+ const TCGReg r2 = TCG_REG_O2;
+ int tlb_ofs;
+
+ /* Shift the page number down. */
+ tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL);
+
+ /* Mask out the page offset, except for the required alignment. */
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1,
+ TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
+ /* Mask the tlb index. */
+ tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND);
+
+ /* Mask page, part 2. */
+ tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND);
+
+ /* Shift the tlb index into place. */
+ tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL);
+
+ /* Relative to the current ENV. */
+ tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
+
+ /* Find a base address that can load both tlb comparator and addend. */
+ tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
+ if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
+ if (tlb_ofs & ~0x3ff) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff);
+ tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD);
+ }
+ tlb_ofs &= 0x3ff;
+ }
+
+ /* Load the tlb comparator and the addend. */
+ tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
+ tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
+
+ /* subcc arg0, arg2, %g0 */
+ tcg_out_cmp(s, r0, r2, 0);
+
+ /* If the guest address must be zero-extended, do so now. */
+ if (SPARC64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
+ return r0;
+ }
+ return addr;
+}
+#endif /* CONFIG_SOFTMMU */
+
+static const int qemu_ld_opc[16] = {
+ [MO_UB] = LDUB,
+ [MO_SB] = LDSB,
+
+ [MO_BEUW] = LDUH,
+ [MO_BESW] = LDSH,
+ [MO_BEUL] = LDUW,
+ [MO_BESL] = LDSW,
+ [MO_BEQ] = LDX,
+
+ [MO_LEUW] = LDUH_LE,
+ [MO_LESW] = LDSH_LE,
+ [MO_LEUL] = LDUW_LE,
+ [MO_LESL] = LDSW_LE,
+ [MO_LEQ] = LDX_LE,
+};
+
+static const int qemu_st_opc[16] = {
+ [MO_UB] = STB,
+
+ [MO_BEUW] = STH,
+ [MO_BEUL] = STW,
+ [MO_BEQ] = STX,
+
+ [MO_LEUW] = STH_LE,
+ [MO_LEUL] = STW_LE,
+ [MO_LEQ] = STX_LE,
+};
+
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
+ TCGMemOpIdx oi, bool is_64)
+{
+ TCGMemOp memop = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+ unsigned memi = get_mmuidx(oi);
+ TCGReg addrz, param;
+ tcg_insn_unit *func;
+ tcg_insn_unit *label_ptr;
+
+ addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
+ offsetof(CPUTLBEntry, addr_read));
+
+ /* The fast path is exactly one insn. Thus we can perform the
+ entire TLB Hit in the (annulled) delay slot of the branch
+ over the TLB Miss case. */
+
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr = s->code_ptr;
+ tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+ /* delay slot */
+ tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
+ qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
+
+ /* TLB Miss. */
+
+ param = TCG_REG_O1;
+ if (!SPARC64 && TARGET_LONG_BITS == 64) {
+ /* Skip the high-part; we'll perform the extract in the trampoline. */
+ param++;
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
+
+ /* We use the helpers to extend SB and SW data, leaving the case
+ of SL needing explicit extending below. */
+ if ((memop & MO_SSIZE) == MO_SL) {
+ func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
+ } else {
+ func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
+ }
+ assert(func != NULL);
+ tcg_out_call_nodelay(s, func);
+ /* delay slot */
+ tcg_out_movi(s, TCG_TYPE_I32, param, oi);
+
+ /* Recall that all of the helpers return 64-bit results.
+ Which complicates things for sparcv8plus. */
+ if (SPARC64) {
+ /* We let the helper sign-extend SB and SW, but leave SL for here. */
+ if (is_64 && (memop & MO_SSIZE) == MO_SL) {
+ tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
+ }
+ } else {
+ if ((memop & MO_SIZE) == MO_64) {
+ tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
+ tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
+ tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
+ } else if (is_64) {
+ /* Re-extend from 32-bit rather than reassembling when we
+ know the high register must be an extension. */
+ tcg_out_arithi(s, data, TCG_REG_O1, 0,
+ memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
+ }
+ }
+
+ *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
+#else
+ if (SPARC64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
+ addr = TCG_REG_T1;
+ }
+ tcg_out_ldst_rr(s, data, addr,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
+#endif /* CONFIG_SOFTMMU */
+}
+
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
+ TCGMemOpIdx oi)
+{
+ TCGMemOp memop = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+ unsigned memi = get_mmuidx(oi);
+ TCGReg addrz, param;
+ tcg_insn_unit *func;
+ tcg_insn_unit *label_ptr;
+
+ addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
+ offsetof(CPUTLBEntry, addr_write));
+
+ /* The fast path is exactly one insn. Thus we can perform the entire
+ TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr = s->code_ptr;
+ tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+ /* delay slot */
+ tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
+ qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
+
+ /* TLB Miss. */
+
+ param = TCG_REG_O1;
+ if (!SPARC64 && TARGET_LONG_BITS == 64) {
+ /* Skip the high-part; we'll perform the extract in the trampoline. */
+ param++;
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
+ if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
+ /* Skip the high-part; we'll perform the extract in the trampoline. */
+ param++;
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, param++, data);
+
+ func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
+ assert(func != NULL);
+ tcg_out_call_nodelay(s, func);
+ /* delay slot */
+ tcg_out_movi(s, TCG_TYPE_I32, param, oi);
+
+ *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
+#else
+ if (SPARC64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
+ addr = TCG_REG_T1;
+ }
+ tcg_out_ldst_rr(s, data, addr,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
+#endif /* CONFIG_SOFTMMU */
+}
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGArg a0, a1, a2;
+ int c, c2;
+
+ /* Hoist the loads of the most common arguments. */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ c2 = const_args[2];
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ if (check_fit_ptr(a0, 13)) {
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+ tcg_out_movi_imm13(s, TCG_REG_O0, a0);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+ tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
+ }
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_offset) {
+ /* direct jump method */
+ s->tb_jmp_offset[a0] = tcg_current_code_size(s);
+ /* Make sure to preserve links during retranslation. */
+ tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1)));
+ } else {
+ /* indirect jump method */
+ tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0));
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL);
+ }
+ tcg_out_nop(s);
+ s->tb_next_offset[a0] = tcg_current_code_size(s);
+ break;
+ case INDEX_op_br:
+ tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
+ tcg_out_nop(s);
+ break;
+
+#define OP_32_64(x) \
+ glue(glue(case INDEX_op_, x), _i32): \
+ glue(glue(case INDEX_op_, x), _i64)
+
+ OP_32_64(ld8u):
+ tcg_out_ldst(s, a0, a1, a2, LDUB);
+ break;
+ OP_32_64(ld8s):
+ tcg_out_ldst(s, a0, a1, a2, LDSB);
+ break;
+ OP_32_64(ld16u):
+ tcg_out_ldst(s, a0, a1, a2, LDUH);
+ break;
+ OP_32_64(ld16s):
+ tcg_out_ldst(s, a0, a1, a2, LDSH);
+ break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ tcg_out_ldst(s, a0, a1, a2, LDUW);
+ break;
+ OP_32_64(st8):
+ tcg_out_ldst(s, a0, a1, a2, STB);
+ break;
+ OP_32_64(st16):
+ tcg_out_ldst(s, a0, a1, a2, STH);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ tcg_out_ldst(s, a0, a1, a2, STW);
+ break;
+ OP_32_64(add):
+ c = ARITH_ADD;
+ goto gen_arith;
+ OP_32_64(sub):
+ c = ARITH_SUB;
+ goto gen_arith;
+ OP_32_64(and):
+ c = ARITH_AND;
+ goto gen_arith;
+ OP_32_64(andc):
+ c = ARITH_ANDN;
+ goto gen_arith;
+ OP_32_64(or):
+ c = ARITH_OR;
+ goto gen_arith;
+ OP_32_64(orc):
+ c = ARITH_ORN;
+ goto gen_arith;
+ OP_32_64(xor):
+ c = ARITH_XOR;
+ goto gen_arith;
+ case INDEX_op_shl_i32:
+ c = SHIFT_SLL;
+ do_shift32:
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
+ break;
+ case INDEX_op_shr_i32:
+ c = SHIFT_SRL;
+ goto do_shift32;
+ case INDEX_op_sar_i32:
+ c = SHIFT_SRA;
+ goto do_shift32;
+ case INDEX_op_mul_i32:
+ c = ARITH_UMUL;
+ goto gen_arith;
+
+ OP_32_64(neg):
+ c = ARITH_SUB;
+ goto gen_arith1;
+ OP_32_64(not):
+ c = ARITH_ORN;
+ goto gen_arith1;
+
+ case INDEX_op_div_i32:
+ tcg_out_div32(s, a0, a1, a2, c2, 0);
+ break;
+ case INDEX_op_divu_i32:
+ tcg_out_div32(s, a0, a1, a2, c2, 1);
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3]));
+ break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
+ break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
+ break;
+
+ case INDEX_op_add2_i32:
+ tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
+ args[4], const_args[4], args[5], const_args[5],
+ ARITH_ADDCC, ARITH_ADDC);
+ break;
+ case INDEX_op_sub2_i32:
+ tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
+ args[4], const_args[4], args[5], const_args[5],
+ ARITH_SUBCC, ARITH_SUBC);
+ break;
+ case INDEX_op_mulu2_i32:
+ c = ARITH_UMUL;
+ goto do_mul2;
+ case INDEX_op_muls2_i32:
+ c = ARITH_SMUL;
+ do_mul2:
+ /* The 32-bit multiply insns produce a full 64-bit result. If the
+ destination register can hold it, we can avoid the slower RDY. */
+ tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
+ if (SPARC64 || a0 <= TCG_REG_O7) {
+ tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
+ } else {
+ tcg_out_rdy(s, a1);
+ }
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, a0, a1, a2, false);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, a0, a1, a2, true);
+ break;
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, a0, a1, a2);
+ break;
+
+ case INDEX_op_ld32s_i64:
+ tcg_out_ldst(s, a0, a1, a2, LDSW);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ldst(s, a0, a1, a2, LDX);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_ldst(s, a0, a1, a2, STX);
+ break;
+ case INDEX_op_shl_i64:
+ c = SHIFT_SLLX;
+ do_shift64:
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
+ break;
+ case INDEX_op_shr_i64:
+ c = SHIFT_SRLX;
+ goto do_shift64;
+ case INDEX_op_sar_i64:
+ c = SHIFT_SRAX;
+ goto do_shift64;
+ case INDEX_op_mul_i64:
+ c = ARITH_MULX;
+ goto gen_arith;
+ case INDEX_op_div_i64:
+ c = ARITH_SDIVX;
+ goto gen_arith;
+ case INDEX_op_divu_i64:
+ c = ARITH_UDIVX;
+ goto gen_arith;
+ case INDEX_op_ext32s_i64:
+ tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
+ break;
+ case INDEX_op_ext32u_i64:
+ tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
+ break;
+ case INDEX_op_trunc_shr_i32:
+ if (a2 == 0) {
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ } else {
+ tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX);
+ }
+ break;
+
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3]));
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
+ break;
+ case INDEX_op_add2_i64:
+ tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
+ const_args[4], args[5], const_args[5], false);
+ break;
+ case INDEX_op_sub2_i64:
+ tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
+ const_args[4], args[5], const_args[5], true);
+ break;
+ case INDEX_op_muluh_i64:
+ tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
+ break;
+
+ gen_arith:
+ tcg_out_arithc(s, a0, a1, a2, c2, c);
+ break;
+
+ gen_arith1:
+ tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_movi_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static const TCGTargetOpDef sparc_op_defs[] = {
+ { INDEX_op_exit_tb, { } },
+ { INDEX_op_goto_tb, { } },
+ { INDEX_op_br, { } },
+
+ { INDEX_op_ld8u_i32, { "r", "r" } },
+ { INDEX_op_ld8s_i32, { "r", "r" } },
+ { INDEX_op_ld16u_i32, { "r", "r" } },
+ { INDEX_op_ld16s_i32, { "r", "r" } },
+ { INDEX_op_ld_i32, { "r", "r" } },
+ { INDEX_op_st8_i32, { "rZ", "r" } },
+ { INDEX_op_st16_i32, { "rZ", "r" } },
+ { INDEX_op_st_i32, { "rZ", "r" } },
+
+ { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_mul_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_div_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_divu_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_and_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_andc_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_or_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_orc_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_xor_i32, { "r", "rZ", "rJ" } },
+
+ { INDEX_op_shl_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_shr_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_sar_i32, { "r", "rZ", "rJ" } },
+
+ { INDEX_op_neg_i32, { "r", "rJ" } },
+ { INDEX_op_not_i32, { "r", "rJ" } },
+
+ { INDEX_op_brcond_i32, { "rZ", "rJ" } },
+ { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } },
+
+ { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
+ { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } },
+
+ { INDEX_op_ld8u_i64, { "R", "r" } },
+ { INDEX_op_ld8s_i64, { "R", "r" } },
+ { INDEX_op_ld16u_i64, { "R", "r" } },
+ { INDEX_op_ld16s_i64, { "R", "r" } },
+ { INDEX_op_ld32u_i64, { "R", "r" } },
+ { INDEX_op_ld32s_i64, { "R", "r" } },
+ { INDEX_op_ld_i64, { "R", "r" } },
+ { INDEX_op_st8_i64, { "RZ", "r" } },
+ { INDEX_op_st16_i64, { "RZ", "r" } },
+ { INDEX_op_st32_i64, { "RZ", "r" } },
+ { INDEX_op_st_i64, { "RZ", "r" } },
+
+ { INDEX_op_add_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_mul_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_div_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_divu_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_sub_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_and_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_andc_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_or_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_orc_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_xor_i64, { "R", "RZ", "RJ" } },
+
+ { INDEX_op_shl_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_shr_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_sar_i64, { "R", "RZ", "RJ" } },
+
+ { INDEX_op_neg_i64, { "R", "RJ" } },
+ { INDEX_op_not_i64, { "R", "RJ" } },
+
+ { INDEX_op_ext32s_i64, { "R", "r" } },
+ { INDEX_op_ext32u_i64, { "R", "r" } },
+ { INDEX_op_trunc_shr_i32, { "r", "R" } },
+
+ { INDEX_op_brcond_i64, { "RZ", "RJ" } },
+ { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } },
+ { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } },
+
+ { INDEX_op_add2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } },
+ { INDEX_op_sub2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } },
+ { INDEX_op_muluh_i64, { "R", "RZ", "RZ" } },
+
+ { INDEX_op_qemu_ld_i32, { "r", "A" } },
+ { INDEX_op_qemu_ld_i64, { "R", "A" } },
+ { INDEX_op_qemu_st_i32, { "sZ", "A" } },
+ { INDEX_op_qemu_st_i64, { "SZ", "A" } },
+
+ { -1 },
+};
+
+static void tcg_target_init(TCGContext *s)
+{
+ /* Only probe for the platform and capabilities if we havn't already
+ determined maximum values at compile time. */
+#ifndef use_vis3_instructions
+ {
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+ use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
+ }
+#endif
+
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64);
+
+ tcg_regset_set32(tcg_target_call_clobber_regs, 0,
+ (1 << TCG_REG_G1) |
+ (1 << TCG_REG_G2) |
+ (1 << TCG_REG_G3) |
+ (1 << TCG_REG_G4) |
+ (1 << TCG_REG_G5) |
+ (1 << TCG_REG_G6) |
+ (1 << TCG_REG_G7) |
+ (1 << TCG_REG_O0) |
+ (1 << TCG_REG_O1) |
+ (1 << TCG_REG_O2) |
+ (1 << TCG_REG_O3) |
+ (1 << TCG_REG_O4) |
+ (1 << TCG_REG_O5) |
+ (1 << TCG_REG_O7));
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
+
+ tcg_add_target_add_op_defs(sparc_op_defs);
+}
+
+#if SPARC64
+# define ELF_HOST_MACHINE EM_SPARCV9
+#else
+# define ELF_HOST_MACHINE EM_SPARC32PLUS
+# define ELF_HOST_FLAGS EF_SPARC_32PLUS
+#endif
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
+ uint8_t fde_win_save;
+ uint8_t fde_ret_save[3];
+} DebugFrame;
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = -sizeof(void *) & 0x7f,
+ .h.cie.return_column = 15, /* o7 */
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+#if SPARC64
+ 12, 30, /* DW_CFA_def_cfa i6, 2047 */
+ (2047 & 0x7f) | 0x80, (2047 >> 7)
+#else
+ 13, 30 /* DW_CFA_def_cfa_register i6 */
+#endif
+ },
+ .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */
+ .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */
+};
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
+
+void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+ uint32_t *ptr = (uint32_t *)jmp_addr;
+ uintptr_t disp = addr - jmp_addr;
+
+ /* We can reach the entire address space for 32-bit. For 64-bit
+ the code_gen_buffer can't be larger than 2GB. */
+ assert(disp == (int32_t)disp);
+
+ *ptr = CALL | (uint32_t)disp >> 2;
+ flush_icache_range(jmp_addr, jmp_addr + 4);
+}
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
new file mode 100644
index 00000000..f584de47
--- /dev/null
+++ b/tcg/sparc/tcg-target.h
@@ -0,0 +1,160 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TCG_TARGET_SPARC
+#define TCG_TARGET_SPARC 1
+
+#define TCG_TARGET_REG_BITS 64
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define TCG_TARGET_NB_REGS 32
+
+typedef enum {
+ TCG_REG_G0 = 0,
+ TCG_REG_G1,
+ TCG_REG_G2,
+ TCG_REG_G3,
+ TCG_REG_G4,
+ TCG_REG_G5,
+ TCG_REG_G6,
+ TCG_REG_G7,
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+ TCG_REG_O4,
+ TCG_REG_O5,
+ TCG_REG_O6,
+ TCG_REG_O7,
+ TCG_REG_L0,
+ TCG_REG_L1,
+ TCG_REG_L2,
+ TCG_REG_L3,
+ TCG_REG_L4,
+ TCG_REG_L5,
+ TCG_REG_L6,
+ TCG_REG_L7,
+ TCG_REG_I0,
+ TCG_REG_I1,
+ TCG_REG_I2,
+ TCG_REG_I3,
+ TCG_REG_I4,
+ TCG_REG_I5,
+ TCG_REG_I6,
+ TCG_REG_I7,
+} TCGReg;
+
+#define TCG_CT_CONST_S11 0x100
+#define TCG_CT_CONST_S13 0x200
+#define TCG_CT_CONST_ZERO 0x400
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_O6
+
+#ifdef __arch64__
+#define TCG_TARGET_STACK_BIAS 2047
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS)
+#else
+#define TCG_TARGET_STACK_BIAS 0
+#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4)
+#endif
+
+#ifdef __arch64__
+#define TCG_TARGET_EXTEND_ARGS 1
+#endif
+
+#if defined(__VIS__) && __VIS__ >= 0x300
+#define use_vis3_instructions 1
+#else
+extern bool use_vis3_instructions;
+#endif
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 0
+#define TCG_TARGET_HAS_rot_i32 0
+#define TCG_TARGET_HAS_ext8s_i32 0
+#define TCG_TARGET_HAS_ext16s_i32 0
+#define TCG_TARGET_HAS_ext8u_i32 0
+#define TCG_TARGET_HAS_ext16u_i32 0
+#define TCG_TARGET_HAS_bswap16_i32 0
+#define TCG_TARGET_HAS_bswap32_i32 0
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+
+#define TCG_TARGET_HAS_trunc_shr_i32 1
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 0
+#define TCG_TARGET_HAS_rot_i64 0
+#define TCG_TARGET_HAS_ext8s_i64 0
+#define TCG_TARGET_HAS_ext16s_i64 0
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 0
+#define TCG_TARGET_HAS_ext16u_i64 0
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 0
+#define TCG_TARGET_HAS_bswap32_i64 0
+#define TCG_TARGET_HAS_bswap64_i64 0
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
+#define TCG_TARGET_HAS_mulsh_i64 0
+
+#define TCG_AREG0 TCG_REG_I0
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+ uintptr_t p;
+ for (p = start & -8; p < ((stop + 7) & -8); p += 8) {
+ __asm__ __volatile__("flush\t%0" : : "r" (p));
+ }
+}
+
+#endif
diff --git a/tcg/tcg-be-ldst.h b/tcg/tcg-be-ldst.h
new file mode 100644
index 00000000..40a2369b
--- /dev/null
+++ b/tcg/tcg-be-ldst.h
@@ -0,0 +1,88 @@
+/*
+ * TCG Backend Data: load-store optimization only.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifdef CONFIG_SOFTMMU
+
+typedef struct TCGLabelQemuLdst {
+ bool is_ld; /* qemu_ld: true, qemu_st: false */
+ TCGMemOpIdx oi;
+ TCGType type; /* result type of a load */
+ TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */
+ TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */
+ TCGReg datalo_reg; /* reg index for low word to be loaded or stored */
+ TCGReg datahi_reg; /* reg index for high word to be loaded or stored */
+ tcg_insn_unit *raddr; /* gen code addr of the next IR of qemu_ld/st IR */
+ tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
+ struct TCGLabelQemuLdst *next;
+} TCGLabelQemuLdst;
+
+typedef struct TCGBackendData {
+ TCGLabelQemuLdst *labels;
+} TCGBackendData;
+
+
+/*
+ * Initialize TB backend data at the beginning of the TB.
+ */
+
+static inline void tcg_out_tb_init(TCGContext *s)
+{
+ s->be->labels = NULL;
+}
+
+/*
+ * Generate TB finalization at the end of block
+ */
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
+
+static void tcg_out_tb_finalize(TCGContext *s)
+{
+ TCGLabelQemuLdst *lb;
+
+ /* qemu_ld/st slow paths */
+ for (lb = s->be->labels; lb != NULL; lb = lb->next) {
+ if (lb->is_ld) {
+ tcg_out_qemu_ld_slow_path(s, lb);
+ } else {
+ tcg_out_qemu_st_slow_path(s, lb);
+ }
+ }
+}
+
+/*
+ * Allocate a new TCGLabelQemuLdst entry.
+ */
+
+static inline TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
+{
+ TCGBackendData *be = s->be;
+ TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
+
+ l->next = be->labels;
+ be->labels = l;
+ return l;
+}
+#else
+#include "tcg-be-null.h"
+#endif /* CONFIG_SOFTMMU */
diff --git a/tcg/tcg-be-null.h b/tcg/tcg-be-null.h
new file mode 100644
index 00000000..74c57d5a
--- /dev/null
+++ b/tcg/tcg-be-null.h
@@ -0,0 +1,43 @@
+/*
+ * TCG Backend Data: No backend data
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+typedef struct TCGBackendData {
+ /* Empty */
+ char dummy;
+} TCGBackendData;
+
+
+/*
+ * Initialize TB backend data at the beginning of the TB.
+ */
+
+static inline void tcg_out_tb_init(TCGContext *s)
+{
+}
+
+/*
+ * Generate TB finalization at the end of block
+ */
+
+static inline void tcg_out_tb_finalize(TCGContext *s)
+{
+}
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
new file mode 100644
index 00000000..45098c31
--- /dev/null
+++ b/tcg/tcg-op.c
@@ -0,0 +1,1945 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg.h"
+#include "tcg-op.h"
+
+/* Reduce the number of ifdefs below. This assumes that all uses of
+ TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
+ the compiler can eliminate. */
+#if TCG_TARGET_REG_BITS == 64
+extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
+extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
+#define TCGV_LOW TCGV_LOW_link_error
+#define TCGV_HIGH TCGV_HIGH_link_error
+#endif
+
+/* Note that this is optimized for sequential allocation during translate.
+ Up to and including filling in the forward link immediately. We'll do
+ proper termination of the end of the list after we finish translation. */
+
+static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
+{
+ int oi = ctx->gen_next_op_idx;
+ int ni = oi + 1;
+ int pi = oi - 1;
+
+ tcg_debug_assert(oi < OPC_BUF_SIZE);
+ ctx->gen_last_op_idx = oi;
+ ctx->gen_next_op_idx = ni;
+
+ ctx->gen_op_buf[oi] = (TCGOp){
+ .opc = opc,
+ .args = args,
+ .prev = pi,
+ .next = ni
+ };
+}
+
+void tcg_gen_op1(TCGContext *ctx, TCGOpcode opc, TCGArg a1)
+{
+ int pi = ctx->gen_next_parm_idx;
+
+ tcg_debug_assert(pi + 1 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 1;
+ ctx->gen_opparam_buf[pi] = a1;
+
+ tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op2(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2)
+{
+ int pi = ctx->gen_next_parm_idx;
+
+ tcg_debug_assert(pi + 2 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 2;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
+
+ tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op3(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
+ TCGArg a2, TCGArg a3)
+{
+ int pi = ctx->gen_next_parm_idx;
+
+ tcg_debug_assert(pi + 3 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 3;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
+ ctx->gen_opparam_buf[pi + 2] = a3;
+
+ tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op4(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
+ TCGArg a2, TCGArg a3, TCGArg a4)
+{
+ int pi = ctx->gen_next_parm_idx;
+
+ tcg_debug_assert(pi + 4 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 4;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
+ ctx->gen_opparam_buf[pi + 2] = a3;
+ ctx->gen_opparam_buf[pi + 3] = a4;
+
+ tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op5(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
+ TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5)
+{
+ int pi = ctx->gen_next_parm_idx;
+
+ tcg_debug_assert(pi + 5 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 5;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
+ ctx->gen_opparam_buf[pi + 2] = a3;
+ ctx->gen_opparam_buf[pi + 3] = a4;
+ ctx->gen_opparam_buf[pi + 4] = a5;
+
+ tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2,
+ TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6)
+{
+ int pi = ctx->gen_next_parm_idx;
+
+ tcg_debug_assert(pi + 6 <= OPPARAM_BUF_SIZE);
+ ctx->gen_next_parm_idx = pi + 6;
+ ctx->gen_opparam_buf[pi + 0] = a1;
+ ctx->gen_opparam_buf[pi + 1] = a2;
+ ctx->gen_opparam_buf[pi + 2] = a3;
+ ctx->gen_opparam_buf[pi + 3] = a4;
+ ctx->gen_opparam_buf[pi + 4] = a5;
+ ctx->gen_opparam_buf[pi + 5] = a6;
+
+ tcg_emit_op(ctx, opc, pi);
+}
+
+/* 32 bit ops */
+
+void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_add_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
+{
+ if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
+ /* Don't recurse with tcg_gen_neg_i32. */
+ tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg1);
+ tcg_gen_sub_i32(ret, t0, arg2);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_sub_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
+{
+ TCGv_i32 t0;
+ /* Some cases can be optimized here. */
+ switch (arg2) {
+ case 0:
+ tcg_gen_movi_i32(ret, 0);
+ return;
+ case 0xffffffffu:
+ tcg_gen_mov_i32(ret, arg1);
+ return;
+ case 0xffu:
+ /* Don't recurse with tcg_gen_ext8u_i32. */
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffu:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
+ return;
+ }
+ break;
+ }
+ t0 = tcg_const_i32(arg2);
+ tcg_gen_and_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+}
+
+void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* Some cases can be optimized here. */
+ if (arg2 == -1) {
+ tcg_gen_movi_i32(ret, -1);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_or_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* Some cases can be optimized here. */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
+ /* Don't recurse with tcg_gen_not_i32. */
+ tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_xor_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 32);
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_shl_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 32);
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_shr_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 32);
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_sar_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(l);
+ } else if (cond != TCG_COND_NEVER) {
+ tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
+ }
+}
+
+void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(l);
+ } else if (cond != TCG_COND_NEVER) {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_brcond_i32(cond, arg1, t0, l);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_movi_i32(ret, 1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_movi_i32(ret, 0);
+ } else {
+ tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+ }
+}
+
+void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 arg1, int32_t arg2)
+{
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_setcond_i32(cond, ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+}
+
+void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_mul_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+}
+
+void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_div_i32) {
+ tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div2_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t0, arg1, 31);
+ tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+ } else {
+ gen_helper_div_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_rem_i32) {
+ tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
+ tcg_gen_mul_i32(t0, t0, arg2);
+ tcg_gen_sub_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ } else if (TCG_TARGET_HAS_div2_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t0, arg1, 31);
+ tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+ } else {
+ gen_helper_rem_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_div_i32) {
+ tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div2_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(t0, 0);
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+ } else {
+ gen_helper_divu_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_rem_i32) {
+ tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
+ tcg_gen_mul_i32(t0, t0, arg2);
+ tcg_gen_sub_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ } else if (TCG_TARGET_HAS_div2_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(t0, 0);
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+ } else {
+ gen_helper_remu_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_andc_i32) {
+ tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_not_i32(t0, arg2);
+ tcg_gen_and_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_eqv_i32) {
+ tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
+ } else {
+ tcg_gen_xor_i32(ret, arg1, arg2);
+ tcg_gen_not_i32(ret, ret);
+ }
+}
+
+void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_nand_i32) {
+ tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
+ } else {
+ tcg_gen_and_i32(ret, arg1, arg2);
+ tcg_gen_not_i32(ret, ret);
+ }
+}
+
+void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_nor_i32) {
+ tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
+ } else {
+ tcg_gen_or_i32(ret, arg1, arg2);
+ tcg_gen_not_i32(ret, ret);
+ }
+}
+
+void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_orc_i32) {
+ tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_not_i32(t0, arg2);
+ tcg_gen_or_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_rot_i32) {
+ tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
+ } else {
+ TCGv_i32 t0, t1;
+
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_shl_i32(t0, arg1, arg2);
+ tcg_gen_subfi_i32(t1, 32, arg2);
+ tcg_gen_shr_i32(t1, arg1, t1);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 32);
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else if (TCG_TARGET_HAS_rot_i32) {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_rotl_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ } else {
+ TCGv_i32 t0, t1;
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_shli_i32(t0, arg1, arg2);
+ tcg_gen_shri_i32(t1, arg1, 32 - arg2);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_rot_i32) {
+ tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
+ } else {
+ TCGv_i32 t0, t1;
+
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_shr_i32(t0, arg1, arg2);
+ tcg_gen_subfi_i32(t1, 32, arg2);
+ tcg_gen_shl_i32(t1, arg1, t1);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 32);
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
+ }
+}
+
+void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
+ unsigned int ofs, unsigned int len)
+{
+ uint32_t mask;
+ TCGv_i32 t1;
+
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
+ if (ofs == 0 && len == 32) {
+ tcg_gen_mov_i32(ret, arg2);
+ return;
+ }
+ if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
+ tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
+ return;
+ }
+
+ mask = (1u << len) - 1;
+ t1 = tcg_temp_new_i32();
+
+ if (ofs + len < 32) {
+ tcg_gen_andi_i32(t1, arg2, mask);
+ tcg_gen_shli_i32(t1, t1, ofs);
+ } else {
+ tcg_gen_shli_i32(t1, arg2, ofs);
+ }
+ tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
+ tcg_gen_or_i32(ret, ret, t1);
+
+ tcg_temp_free_i32(t1);
+}
+
+void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
+ TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_mov_i32(ret, v1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_mov_i32(ret, v2);
+ } else if (TCG_TARGET_HAS_movcond_i32) {
+ tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_setcond_i32(cond, t0, c1, c2);
+ tcg_gen_neg_i32(t0, t0);
+ tcg_gen_and_i32(t1, v1, t0);
+ tcg_gen_andc_i32(ret, v2, t0);
+ tcg_gen_or_i32(ret, ret, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+ TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+ if (TCG_TARGET_HAS_add2_i32) {
+ tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_concat_i32_i64(t0, al, ah);
+ tcg_gen_concat_i32_i64(t1, bl, bh);
+ tcg_gen_add_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+ TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+ if (TCG_TARGET_HAS_sub2_i32) {
+ tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_concat_i32_i64(t0, al, ah);
+ tcg_gen_concat_i32_i64(t1, bl, bh);
+ tcg_gen_sub_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_mulu2_i32) {
+ tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
+ } else if (TCG_TARGET_HAS_muluh_i32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+ tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
+ tcg_gen_mov_i32(rl, t);
+ tcg_temp_free_i32(t);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t0, arg1);
+ tcg_gen_extu_i32_i64(t1, arg2);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_muls2_i32) {
+ tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
+ } else if (TCG_TARGET_HAS_mulsh_i32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+ tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
+ tcg_gen_mov_i32(rl, t);
+ tcg_temp_free_i32(t);
+ } else if (TCG_TARGET_REG_BITS == 32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ TCGv_i32 t3 = tcg_temp_new_i32();
+ tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
+ /* Adjust for negative inputs. */
+ tcg_gen_sari_i32(t2, arg1, 31);
+ tcg_gen_sari_i32(t3, arg2, 31);
+ tcg_gen_and_i32(t2, t2, arg2);
+ tcg_gen_and_i32(t3, t3, arg1);
+ tcg_gen_sub_i32(rh, t1, t2);
+ tcg_gen_sub_i32(rh, rh, t3);
+ tcg_gen_mov_i32(rl, t0);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t3);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(t0, arg1);
+ tcg_gen_ext_i32_i64(t1, arg2);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_ext8s_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
+ } else {
+ tcg_gen_shli_i32(ret, arg, 24);
+ tcg_gen_sari_i32(ret, ret, 24);
+ }
+}
+
+void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_ext16s_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
+ } else {
+ tcg_gen_shli_i32(ret, arg, 16);
+ tcg_gen_sari_i32(ret, ret, 16);
+ }
+}
+
+void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
+ } else {
+ tcg_gen_andi_i32(ret, arg, 0xffu);
+ }
+}
+
+void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
+ } else {
+ tcg_gen_andi_i32(ret, arg, 0xffffu);
+ }
+}
+
+/* Note: we assume the two high bytes are set to zero */
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_bswap16_i32) {
+ tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+
+ tcg_gen_ext8u_i32(t0, arg);
+ tcg_gen_shli_i32(t0, t0, 8);
+ tcg_gen_shri_i32(ret, arg, 8);
+ tcg_gen_or_i32(ret, ret, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_bswap32_i32) {
+ tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
+ } else {
+ TCGv_i32 t0, t1;
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+
+ tcg_gen_shli_i32(t0, arg, 24);
+
+ tcg_gen_andi_i32(t1, arg, 0x0000ff00);
+ tcg_gen_shli_i32(t1, t1, 8);
+ tcg_gen_or_i32(t0, t0, t1);
+
+ tcg_gen_shri_i32(t1, arg, 8);
+ tcg_gen_andi_i32(t1, t1, 0x0000ff00);
+ tcg_gen_or_i32(t0, t0, t1);
+
+ tcg_gen_shri_i32(t1, arg, 24);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+/* 64-bit ops */
+
+#if TCG_TARGET_REG_BITS == 32
+/* These are all inline for TCG_TARGET_REG_BITS == 64. */
+
+void tcg_gen_discard_i64(TCGv_i64 arg)
+{
+ tcg_gen_discard_i32(TCGV_LOW(arg));
+ tcg_gen_discard_i32(TCGV_HIGH(arg));
+}
+
+void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+}
+
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
+{
+ tcg_gen_movi_i32(TCGV_LOW(ret), arg);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
+}
+
+void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), 31);
+}
+
+void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ /* Since arg2 and ret have different types,
+ they cannot be the same temporary */
+#ifdef HOST_WORDS_BIGENDIAN
+ tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
+#else
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
+#endif
+}
+
+void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+ tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
+ tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
+#else
+ tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
+ tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
+#endif
+}
+
+void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ gen_helper_shl_i64(ret, arg1, arg2);
+}
+
+void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ gen_helper_shr_i64(ret, arg1, arg2);
+}
+
+void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ gen_helper_sar_i64(ret, arg1, arg2);
+}
+
+void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ TCGv_i64 t0;
+ TCGv_i32 t1;
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i32();
+
+ tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
+ TCGV_LOW(arg1), TCGV_LOW(arg2));
+
+ tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
+ tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
+ tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
+ tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
+
+ tcg_gen_mov_i64(ret, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i32(t1);
+}
+#endif /* TCG_TARGET_REG_SIZE == 32 */
+
+void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_add_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
+{
+ if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
+ /* Don't recurse with tcg_gen_neg_i64. */
+ tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg1);
+ tcg_gen_sub_i64(ret, t0, arg2);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_sub_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
+{
+ TCGv_i64 t0;
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+ tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+ return;
+ }
+
+ /* Some cases can be optimized here. */
+ switch (arg2) {
+ case 0:
+ tcg_gen_movi_i64(ret, 0);
+ return;
+ case 0xffffffffffffffffull:
+ tcg_gen_mov_i64(ret, arg1);
+ return;
+ case 0xffull:
+ /* Don't recurse with tcg_gen_ext8u_i64. */
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffu:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffffffull:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
+ return;
+ }
+ break;
+ }
+ t0 = tcg_const_i64(arg2);
+ tcg_gen_and_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+}
+
+void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+ tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+ return;
+ }
+ /* Some cases can be optimized here. */
+ if (arg2 == -1) {
+ tcg_gen_movi_i64(ret, -1);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_or_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+ tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+ return;
+ }
+ /* Some cases can be optimized here. */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
+ /* Don't recurse with tcg_gen_not_i64. */
+ tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_xor_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
+ unsigned c, bool right, bool arith)
+{
+ tcg_debug_assert(c < 64);
+ if (c == 0) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+ } else if (c >= 32) {
+ c -= 32;
+ if (right) {
+ if (arith) {
+ tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
+ } else {
+ tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ }
+ } else {
+ tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
+ tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+ }
+ } else {
+ TCGv_i32 t0, t1;
+
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ if (right) {
+ tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c);
+ if (arith) {
+ tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c);
+ } else {
+ tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c);
+ }
+ tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
+ tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0);
+ tcg_gen_mov_i32(TCGV_HIGH(ret), t1);
+ } else {
+ tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
+ /* Note: ret can be the same as arg1, so we use t1 */
+ tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c);
+ tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
+ tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0);
+ tcg_gen_mov_i32(TCGV_LOW(ret), t1);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 64);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_shl_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 64);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_shr_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 64);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_sar_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(l);
+ } else if (cond != TCG_COND_NEVER) {
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
+ TCGV_HIGH(arg1), TCGV_LOW(arg2),
+ TCGV_HIGH(arg2), cond, label_arg(l));
+ } else {
+ tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
+ label_arg(l));
+ }
+ }
+}
+
+void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(l);
+ } else if (cond != TCG_COND_NEVER) {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_brcond_i64(cond, arg1, t0, l);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_movi_i64(ret, 1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_movi_i64(ret, 0);
+ } else {
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else {
+ tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+ }
+ }
+}
+
+void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 arg1, int64_t arg2)
+{
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_setcond_i64(cond, ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+}
+
+void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_mul_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+}
+
+void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_div_i64) {
+ tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div2_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t0, arg1, 63);
+ tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+ } else {
+ gen_helper_div_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_rem_i64) {
+ tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
+ tcg_gen_mul_i64(t0, t0, arg2);
+ tcg_gen_sub_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ } else if (TCG_TARGET_HAS_div2_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t0, arg1, 63);
+ tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+ } else {
+ gen_helper_rem_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_div_i64) {
+ tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div2_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t0, 0);
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+ } else {
+ gen_helper_divu_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_rem_i64) {
+ tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
+ tcg_gen_mul_i64(t0, t0, arg2);
+ tcg_gen_sub_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ } else if (TCG_TARGET_HAS_div2_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t0, 0);
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+ } else {
+ gen_helper_remu_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else if (TCG_TARGET_HAS_ext8s_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
+ } else {
+ tcg_gen_shli_i64(ret, arg, 56);
+ tcg_gen_sari_i64(ret, ret, 56);
+ }
+}
+
+void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else if (TCG_TARGET_HAS_ext16s_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
+ } else {
+ tcg_gen_shli_i64(ret, arg, 48);
+ tcg_gen_sari_i64(ret, ret, 48);
+ }
+}
+
+void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else if (TCG_TARGET_HAS_ext32s_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
+ } else {
+ tcg_gen_shli_i64(ret, arg, 32);
+ tcg_gen_sari_i64(ret, ret, 32);
+ }
+}
+
+void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
+ } else {
+ tcg_gen_andi_i64(ret, arg, 0xffu);
+ }
+}
+
+void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
+ } else {
+ tcg_gen_andi_i64(ret, arg, 0xffffu);
+ }
+}
+
+void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
+ } else {
+ tcg_gen_andi_i64(ret, arg, 0xffffffffu);
+ }
+}
+
+/* Note: we assume the six high bytes are set to zero */
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else if (TCG_TARGET_HAS_bswap16_i64) {
+ tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+
+ tcg_gen_ext8u_i64(t0, arg);
+ tcg_gen_shli_i64(t0, t0, 8);
+ tcg_gen_shri_i64(ret, arg, 8);
+ tcg_gen_or_i64(ret, ret, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+/* Note: we assume the four high bytes are set to zero */
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else if (TCG_TARGET_HAS_bswap32_i64) {
+ tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
+ } else {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t0, arg, 24);
+ tcg_gen_ext32u_i64(t0, t0);
+
+ tcg_gen_andi_i64(t1, arg, 0x0000ff00);
+ tcg_gen_shli_i64(t1, t1, 8);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 8);
+ tcg_gen_andi_i64(t1, t1, 0x0000ff00);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 24);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ TCGv_i32 t0, t1;
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+
+ tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
+ tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
+ tcg_gen_mov_i32(TCGV_LOW(ret), t1);
+ tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ } else if (TCG_TARGET_HAS_bswap64_i64) {
+ tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t0, arg, 56);
+
+ tcg_gen_andi_i64(t1, arg, 0x0000ff00);
+ tcg_gen_shli_i64(t1, t1, 40);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_andi_i64(t1, arg, 0x00ff0000);
+ tcg_gen_shli_i64(t1, t1, 24);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_andi_i64(t1, arg, 0xff000000);
+ tcg_gen_shli_i64(t1, t1, 8);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 8);
+ tcg_gen_andi_i64(t1, t1, 0xff000000);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 24);
+ tcg_gen_andi_i64(t1, t1, 0x00ff0000);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 40);
+ tcg_gen_andi_i64(t1, t1, 0x0000ff00);
+ tcg_gen_or_i64(t0, t0, t1);
+
+ tcg_gen_shri_i64(t1, arg, 56);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+ } else if (TCG_TARGET_HAS_not_i64) {
+ tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
+ } else {
+ tcg_gen_xori_i64(ret, arg, -1);
+ }
+}
+
+void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_andc_i64) {
+ tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_not_i64(t0, arg2);
+ tcg_gen_and_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_eqv_i64) {
+ tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
+ } else {
+ tcg_gen_xor_i64(ret, arg1, arg2);
+ tcg_gen_not_i64(ret, ret);
+ }
+}
+
+void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_nand_i64) {
+ tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
+ } else {
+ tcg_gen_and_i64(ret, arg1, arg2);
+ tcg_gen_not_i64(ret, ret);
+ }
+}
+
+void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_nor_i64) {
+ tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
+ } else {
+ tcg_gen_or_i64(ret, arg1, arg2);
+ tcg_gen_not_i64(ret, ret);
+ }
+}
+
+void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_orc_i64) {
+ tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_not_i64(t0, arg2);
+ tcg_gen_or_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_rot_i64) {
+ tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
+ } else {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_shl_i64(t0, arg1, arg2);
+ tcg_gen_subfi_i64(t1, 64, arg2);
+ tcg_gen_shr_i64(t1, arg1, t1);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 64);
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else if (TCG_TARGET_HAS_rot_i64) {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_rotl_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ } else {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_shli_i64(t0, arg1, arg2);
+ tcg_gen_shri_i64(t1, arg1, 64 - arg2);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_rot_i64) {
+ tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
+ } else {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_shr_i64(t0, arg1, arg2);
+ tcg_gen_subfi_i64(t1, 64, arg2);
+ tcg_gen_shl_i64(t1, arg1, t1);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+ tcg_debug_assert(arg2 < 64);
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
+ }
+}
+
+void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
+ unsigned int ofs, unsigned int len)
+{
+ uint64_t mask;
+ TCGv_i64 t1;
+
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
+ if (ofs == 0 && len == 64) {
+ tcg_gen_mov_i64(ret, arg2);
+ return;
+ }
+ if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
+ tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
+ return;
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ if (ofs >= 32) {
+ tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
+ TCGV_LOW(arg2), ofs - 32, len);
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+ return;
+ }
+ if (ofs + len <= 32) {
+ tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
+ TCGV_LOW(arg2), ofs, len);
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+ return;
+ }
+ }
+
+ mask = (1ull << len) - 1;
+ t1 = tcg_temp_new_i64();
+
+ if (ofs + len < 64) {
+ tcg_gen_andi_i64(t1, arg2, mask);
+ tcg_gen_shli_i64(t1, t1, ofs);
+ } else {
+ tcg_gen_shli_i64(t1, arg2, ofs);
+ }
+ tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
+ tcg_gen_or_i64(ret, ret, t1);
+
+ tcg_temp_free_i64(t1);
+}
+
+void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
+ TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_mov_i64(ret, v1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_mov_i64(ret, v2);
+ } else if (TCG_TARGET_REG_BITS == 32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+ TCGV_LOW(c1), TCGV_HIGH(c1),
+ TCGV_LOW(c2), TCGV_HIGH(c2), cond);
+
+ if (TCG_TARGET_HAS_movcond_i32) {
+ tcg_gen_movi_i32(t1, 0);
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
+ TCGV_LOW(v1), TCGV_LOW(v2));
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
+ TCGV_HIGH(v1), TCGV_HIGH(v2));
+ } else {
+ tcg_gen_neg_i32(t0, t0);
+
+ tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
+ tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
+ tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
+
+ tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
+ tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
+ tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ } else if (TCG_TARGET_HAS_movcond_i64) {
+ tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_setcond_i64(cond, t0, c1, c2);
+ tcg_gen_neg_i64(t0, t0);
+ tcg_gen_and_i64(t1, v1, t0);
+ tcg_gen_andc_i64(ret, v2, t0);
+ tcg_gen_or_i64(ret, ret, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+ TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+ if (TCG_TARGET_HAS_add2_i64) {
+ tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_add_i64(t0, al, bl);
+ tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
+ tcg_gen_add_i64(rh, ah, bh);
+ tcg_gen_add_i64(rh, rh, t1);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+ TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+ if (TCG_TARGET_HAS_sub2_i64) {
+ tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_sub_i64(t0, al, bl);
+ tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
+ tcg_gen_sub_i64(rh, ah, bh);
+ tcg_gen_sub_i64(rh, rh, t1);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_mulu2_i64) {
+ tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
+ } else if (TCG_TARGET_HAS_muluh_i64) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+ tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t);
+ tcg_temp_free_i64(t);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t0, arg1, arg2);
+ gen_helper_muluh_i64(rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_muls2_i64) {
+ tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
+ } else if (TCG_TARGET_HAS_mulsh_i64) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+ tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t);
+ tcg_temp_free_i64(t);
+ } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+ tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
+ /* Adjust for negative inputs. */
+ tcg_gen_sari_i64(t2, arg1, 63);
+ tcg_gen_sari_i64(t3, arg2, 63);
+ tcg_gen_and_i64(t2, t2, arg2);
+ tcg_gen_and_i64(t3, t3, arg1);
+ tcg_gen_sub_i64(rh, t1, t2);
+ tcg_gen_sub_i64(rh, rh, t3);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t3);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t0, arg1, arg2);
+ gen_helper_mulsh_i64(rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+/* Size changing operations. */
+
+void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count)
+{
+ tcg_debug_assert(count < 64);
+ if (TCG_TARGET_REG_BITS == 32) {
+ if (count >= 32) {
+ tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32);
+ } else if (count == 0) {
+ tcg_gen_mov_i32(ret, TCGV_LOW(arg));
+ } else {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t, arg, count);
+ tcg_gen_mov_i32(ret, TCGV_LOW(t));
+ tcg_temp_free_i64(t);
+ }
+ } else if (TCG_TARGET_HAS_trunc_shr_i32) {
+ tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret,
+ MAKE_TCGV_I32(GET_TCGV_I64(arg)), count);
+ } else if (count == 0) {
+ tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg)));
+ } else {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t, arg, count);
+ tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t)));
+ tcg_temp_free_i64(t);
+ }
+}
+
+void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else {
+ /* Note: we assume the target supports move between
+ 32 and 64 bit registers. */
+ tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+ }
+}
+
+void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else {
+ /* Note: we assume the target supports move between
+ 32 and 64 bit registers. */
+ tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+ }
+}
+
+void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
+{
+ TCGv_i64 tmp;
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(dest), low);
+ tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+ return;
+ }
+
+ tmp = tcg_temp_new_i64();
+ /* These extensions are only needed for type correctness.
+ We may be able to do better given target specific information. */
+ tcg_gen_extu_i32_i64(tmp, high);
+ tcg_gen_extu_i32_i64(dest, low);
+ /* If deposit is available, use it. Otherwise use the extra
+ knowledge that we have of the zero-extensions above. */
+ if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
+ tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
+ } else {
+ tcg_gen_shli_i64(tmp, tmp, 32);
+ tcg_gen_or_i64(dest, dest, tmp);
+ }
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(lo, TCGV_LOW(arg));
+ tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
+ } else {
+ tcg_gen_trunc_shr_i64_i32(lo, arg, 0);
+ tcg_gen_trunc_shr_i64_i32(hi, arg, 32);
+ }
+}
+
+void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
+{
+ tcg_gen_ext32u_i64(lo, arg);
+ tcg_gen_shri_i64(hi, arg, 32);
+}
+
+/* QEMU specific operations. */
+
+void tcg_gen_goto_tb(unsigned idx)
+{
+ /* We only support two chained exits. */
+ tcg_debug_assert(idx <= 1);
+#ifdef CONFIG_DEBUG_TCG
+ /* Verify that we havn't seen this numbered exit before. */
+ tcg_debug_assert((tcg_ctx.goto_tb_issue_mask & (1 << idx)) == 0);
+ tcg_ctx.goto_tb_issue_mask |= 1 << idx;
+#endif
+ tcg_gen_op1i(INDEX_op_goto_tb, idx);
+}
+
+static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
+{
+ switch (op & MO_SIZE) {
+ case MO_8:
+ op &= ~MO_BSWAP;
+ break;
+ case MO_16:
+ break;
+ case MO_32:
+ if (!is64) {
+ op &= ~MO_SIGN;
+ }
+ break;
+ case MO_64:
+ if (!is64) {
+ tcg_abort();
+ }
+ break;
+ }
+ if (st) {
+ op &= ~MO_SIGN;
+ }
+ return op;
+}
+
+static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
+ TCGMemOp memop, TCGArg idx)
+{
+ TCGMemOpIdx oi = make_memop_idx(memop, idx);
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(opc, val, addr, oi);
+#else
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
+ } else {
+ tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_I64(addr), oi);
+ }
+#endif
+}
+
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
+ TCGMemOp memop, TCGArg idx)
+{
+ TCGMemOpIdx oi = make_memop_idx(memop, idx);
+#if TARGET_LONG_BITS == 32
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
+ } else {
+ tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_I32(addr), oi);
+ }
+#else
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
+ TCGV_LOW(addr), TCGV_HIGH(addr), oi);
+ } else {
+ tcg_gen_op3i_i64(opc, val, addr, oi);
+ }
+#endif
+}
+
+void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+ memop = tcg_canonicalize_memop(memop, 0, 0);
+ gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
+}
+
+void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+ memop = tcg_canonicalize_memop(memop, 0, 1);
+ gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
+}
+
+void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
+ tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
+ if (memop & MO_SIGN) {
+ tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
+ } else {
+ tcg_gen_movi_i32(TCGV_HIGH(val), 0);
+ }
+ return;
+ }
+
+ memop = tcg_canonicalize_memop(memop, 1, 0);
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
+}
+
+void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
+ tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
+ return;
+ }
+
+ memop = tcg_canonicalize_memop(memop, 1, 1);
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
+}
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
new file mode 100644
index 00000000..d1d763f6
--- /dev/null
+++ b/tcg/tcg-op.h
@@ -0,0 +1,991 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg.h"
+#include "exec/helper-proto.h"
+#include "exec/helper-gen.h"
+
+/* Basic output routines. Not for general consumption. */
+
+void tcg_gen_op1(TCGContext *, TCGOpcode, TCGArg);
+void tcg_gen_op2(TCGContext *, TCGOpcode, TCGArg, TCGArg);
+void tcg_gen_op3(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg);
+void tcg_gen_op4(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
+void tcg_gen_op5(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg,
+ TCGArg, TCGArg);
+void tcg_gen_op6(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg,
+ TCGArg, TCGArg, TCGArg);
+
+
+static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1)
+{
+ tcg_gen_op1(&tcg_ctx, opc, GET_TCGV_I32(a1));
+}
+
+static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1)
+{
+ tcg_gen_op1(&tcg_ctx, opc, GET_TCGV_I64(a1));
+}
+
+static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg a1)
+{
+ tcg_gen_op1(&tcg_ctx, opc, a1);
+}
+
+static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2)
+{
+ tcg_gen_op2(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2));
+}
+
+static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2)
+{
+ tcg_gen_op2(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2));
+}
+
+static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 a1, TCGArg a2)
+{
+ tcg_gen_op2(&tcg_ctx, opc, GET_TCGV_I32(a1), a2);
+}
+
+static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 a1, TCGArg a2)
+{
+ tcg_gen_op2(&tcg_ctx, opc, GET_TCGV_I64(a1), a2);
+}
+
+static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg a1, TCGArg a2)
+{
+ tcg_gen_op2(&tcg_ctx, opc, a1, a2);
+}
+
+static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1,
+ TCGv_i32 a2, TCGv_i32 a3)
+{
+ tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I32(a1),
+ GET_TCGV_I32(a2), GET_TCGV_I32(a3));
+}
+
+static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1,
+ TCGv_i64 a2, TCGv_i64 a3)
+{
+ tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I64(a1),
+ GET_TCGV_I64(a2), GET_TCGV_I64(a3));
+}
+
+static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1,
+ TCGv_i32 a2, TCGArg a3)
+{
+ tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), a3);
+}
+
+static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1,
+ TCGv_i64 a2, TCGArg a3)
+{
+ tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), a3);
+}
+
+static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
+ TCGv_ptr base, TCGArg offset)
+{
+ tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_PTR(base), offset);
+}
+
+static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
+ TCGv_ptr base, TCGArg offset)
+{
+ tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_PTR(base), offset);
+}
+
+static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGv_i32 a3, TCGv_i32 a4)
+{
+ tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2),
+ GET_TCGV_I32(a3), GET_TCGV_I32(a4));
+}
+
+static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGv_i64 a3, TCGv_i64 a4)
+{
+ tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2),
+ GET_TCGV_I64(a3), GET_TCGV_I64(a4));
+}
+
+static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGv_i32 a3, TCGArg a4)
+{
+ tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2),
+ GET_TCGV_I32(a3), a4);
+}
+
+static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGv_i64 a3, TCGArg a4)
+{
+ tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2),
+ GET_TCGV_I64(a3), a4);
+}
+
+static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGArg a3, TCGArg a4)
+{
+ tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), a3, a4);
+}
+
+static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGArg a3, TCGArg a4)
+{
+ tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), a3, a4);
+}
+
+static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5)
+{
+ tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2),
+ GET_TCGV_I32(a3), GET_TCGV_I32(a4), GET_TCGV_I32(a5));
+}
+
+static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5)
+{
+ tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2),
+ GET_TCGV_I64(a3), GET_TCGV_I64(a4), GET_TCGV_I64(a5));
+}
+
+static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGv_i32 a3, TCGv_i32 a4, TCGArg a5)
+{
+ tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2),
+ GET_TCGV_I32(a3), GET_TCGV_I32(a4), a5);
+}
+
+static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGv_i64 a3, TCGv_i64 a4, TCGArg a5)
+{
+ tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2),
+ GET_TCGV_I64(a3), GET_TCGV_I64(a4), a5);
+}
+
+static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGv_i32 a3, TCGArg a4, TCGArg a5)
+{
+ tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2),
+ GET_TCGV_I32(a3), a4, a5);
+}
+
+static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGv_i64 a3, TCGArg a4, TCGArg a5)
+{
+ tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2),
+ GET_TCGV_I64(a3), a4, a5);
+}
+
+static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGv_i32 a3, TCGv_i32 a4,
+ TCGv_i32 a5, TCGv_i32 a6)
+{
+ tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2),
+ GET_TCGV_I32(a3), GET_TCGV_I32(a4), GET_TCGV_I32(a5),
+ GET_TCGV_I32(a6));
+}
+
+static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGv_i64 a3, TCGv_i64 a4,
+ TCGv_i64 a5, TCGv_i64 a6)
+{
+ tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2),
+ GET_TCGV_I64(a3), GET_TCGV_I64(a4), GET_TCGV_I64(a5),
+ GET_TCGV_I64(a6));
+}
+
+static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGv_i32 a3, TCGv_i32 a4,
+ TCGv_i32 a5, TCGArg a6)
+{
+ tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2),
+ GET_TCGV_I32(a3), GET_TCGV_I32(a4), GET_TCGV_I32(a5), a6);
+}
+
+static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGv_i64 a3, TCGv_i64 a4,
+ TCGv_i64 a5, TCGArg a6)
+{
+ tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2),
+ GET_TCGV_I64(a3), GET_TCGV_I64(a4), GET_TCGV_I64(a5), a6);
+}
+
+static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2,
+ TCGv_i32 a3, TCGv_i32 a4,
+ TCGArg a5, TCGArg a6)
+{
+ tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2),
+ GET_TCGV_I32(a3), GET_TCGV_I32(a4), a5, a6);
+}
+
+static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2,
+ TCGv_i64 a3, TCGv_i64 a4,
+ TCGArg a5, TCGArg a6)
+{
+ tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2),
+ GET_TCGV_I64(a3), GET_TCGV_I64(a4), a5, a6);
+}
+
+
+/* Generic ops. */
+
+static inline void gen_set_label(TCGLabel *l)
+{
+ tcg_gen_op1(&tcg_ctx, INDEX_op_set_label, label_arg(l));
+}
+
+static inline void tcg_gen_br(TCGLabel *l)
+{
+ tcg_gen_op1(&tcg_ctx, INDEX_op_br, label_arg(l));
+}
+
+/* Helper calls. */
+
+/* 32 bit ops */
+
+void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2);
+void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
+void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
+void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
+void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
+void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
+void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
+void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
+ unsigned int ofs, unsigned int len);
+void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *);
+void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *);
+void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
+ TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2);
+void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+ TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh);
+void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+ TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh);
+void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
+
+static inline void tcg_gen_discard_i32(TCGv_i32 arg)
+{
+ tcg_gen_op1_i32(INDEX_op_discard, arg);
+}
+
+static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (!TCGV_EQUAL_I32(ret, arg)) {
+ tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg);
+ }
+}
+
+static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
+{
+ tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg);
+}
+
+static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset);
+}
+
+static inline void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_neg_i32) {
+ tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg);
+ } else {
+ tcg_gen_subfi_i32(ret, 0, arg);
+ }
+}
+
+static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_not_i32) {
+ tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg);
+ } else {
+ tcg_gen_xori_i32(ret, arg, -1);
+ }
+}
+
+/* 64 bit ops */
+
+void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2);
+void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
+void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
+void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
+void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
+void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
+void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
+void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
+ unsigned int ofs, unsigned int len);
+void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *);
+void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *);
+void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
+ TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2);
+void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+ TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
+void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+ TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
+void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
+
+#if TCG_TARGET_REG_BITS == 64
+static inline void tcg_gen_discard_i64(TCGv_i64 arg)
+{
+ tcg_gen_op1_i64(INDEX_op_discard, arg);
+}
+
+static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (!TCGV_EQUAL_I64(ret, arg)) {
+ tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg);
+ }
+}
+
+static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
+{
+ tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg);
+}
+
+static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset);
+}
+
+static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset);
+}
+
+static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2);
+}
+#else /* TCG_TARGET_REG_BITS == 32 */
+static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset);
+}
+
+static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset);
+}
+
+static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2,
+ tcg_target_long offset)
+{
+ tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
+}
+
+static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
+ TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
+}
+
+static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
+ TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_discard_i64(TCGv_i64 arg);
+void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg);
+void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+#endif /* TCG_TARGET_REG_BITS */
+
+static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_HAS_neg_i64) {
+ tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg);
+ } else {
+ tcg_gen_subfi_i64(ret, 0, arg);
+ }
+}
+
+/* Size changing operations. */
+
+void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg);
+void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg);
+void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high);
+void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned int c);
+void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg);
+void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg);
+
+static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
+{
+ tcg_gen_deposit_i64(ret, lo, hi, 32, 32);
+}
+
+static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
+{
+ tcg_gen_trunc_shr_i64_i32(ret, arg, 0);
+}
+
+/* QEMU specific operations. */
+
+#ifndef TARGET_LONG_BITS
+#error must include QEMU headers
+#endif
+
+/* debug info: write the PC of the corresponding QEMU CPU instruction */
+static inline void tcg_gen_debug_insn_start(uint64_t pc)
+{
+ /* XXX: must really use a 32 bit size for TCGArg in all cases */
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+ tcg_gen_op2ii(INDEX_op_debug_insn_start,
+ (uint32_t)(pc), (uint32_t)(pc >> 32));
+#else
+ tcg_gen_op1i(INDEX_op_debug_insn_start, pc);
+#endif
+}
+
+static inline void tcg_gen_exit_tb(uintptr_t val)
+{
+ tcg_gen_op1i(INDEX_op_exit_tb, val);
+}
+
+void tcg_gen_goto_tb(unsigned idx);
+
+#if TARGET_LONG_BITS == 32
+#define TCGv TCGv_i32
+#define tcg_temp_new() tcg_temp_new_i32()
+#define tcg_global_reg_new tcg_global_reg_new_i32
+#define tcg_global_mem_new tcg_global_mem_new_i32
+#define tcg_temp_local_new() tcg_temp_local_new_i32()
+#define tcg_temp_free tcg_temp_free_i32
+#define TCGV_UNUSED(x) TCGV_UNUSED_I32(x)
+#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I32(x)
+#define TCGV_EQUAL(a, b) TCGV_EQUAL_I32(a, b)
+#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
+#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
+#else
+#define TCGv TCGv_i64
+#define tcg_temp_new() tcg_temp_new_i64()
+#define tcg_global_reg_new tcg_global_reg_new_i64
+#define tcg_global_mem_new tcg_global_mem_new_i64
+#define tcg_temp_local_new() tcg_temp_local_new_i64()
+#define tcg_temp_free tcg_temp_free_i64
+#define TCGV_UNUSED(x) TCGV_UNUSED_I64(x)
+#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I64(x)
+#define TCGV_EQUAL(a, b) TCGV_EQUAL_I64(a, b)
+#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
+#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
+#endif
+
+void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, TCGMemOp);
+void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, TCGMemOp);
+void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, TCGMemOp);
+void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, TCGMemOp);
+
+static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_UB);
+}
+
+static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_SB);
+}
+
+static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUW);
+}
+
+static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESW);
+}
+
+static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUL);
+}
+
+static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESL);
+}
+
+static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_ld_i64(ret, addr, mem_index, MO_TEQ);
+}
+
+static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_UB);
+}
+
+static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUW);
+}
+
+static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUL);
+}
+
+static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
+{
+ tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEQ);
+}
+
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_movi_tl tcg_gen_movi_i64
+#define tcg_gen_mov_tl tcg_gen_mov_i64
+#define tcg_gen_ld8u_tl tcg_gen_ld8u_i64
+#define tcg_gen_ld8s_tl tcg_gen_ld8s_i64
+#define tcg_gen_ld16u_tl tcg_gen_ld16u_i64
+#define tcg_gen_ld16s_tl tcg_gen_ld16s_i64
+#define tcg_gen_ld32u_tl tcg_gen_ld32u_i64
+#define tcg_gen_ld32s_tl tcg_gen_ld32s_i64
+#define tcg_gen_ld_tl tcg_gen_ld_i64
+#define tcg_gen_st8_tl tcg_gen_st8_i64
+#define tcg_gen_st16_tl tcg_gen_st16_i64
+#define tcg_gen_st32_tl tcg_gen_st32_i64
+#define tcg_gen_st_tl tcg_gen_st_i64
+#define tcg_gen_add_tl tcg_gen_add_i64
+#define tcg_gen_addi_tl tcg_gen_addi_i64
+#define tcg_gen_sub_tl tcg_gen_sub_i64
+#define tcg_gen_neg_tl tcg_gen_neg_i64
+#define tcg_gen_subfi_tl tcg_gen_subfi_i64
+#define tcg_gen_subi_tl tcg_gen_subi_i64
+#define tcg_gen_and_tl tcg_gen_and_i64
+#define tcg_gen_andi_tl tcg_gen_andi_i64
+#define tcg_gen_or_tl tcg_gen_or_i64
+#define tcg_gen_ori_tl tcg_gen_ori_i64
+#define tcg_gen_xor_tl tcg_gen_xor_i64
+#define tcg_gen_xori_tl tcg_gen_xori_i64
+#define tcg_gen_not_tl tcg_gen_not_i64
+#define tcg_gen_shl_tl tcg_gen_shl_i64
+#define tcg_gen_shli_tl tcg_gen_shli_i64
+#define tcg_gen_shr_tl tcg_gen_shr_i64
+#define tcg_gen_shri_tl tcg_gen_shri_i64
+#define tcg_gen_sar_tl tcg_gen_sar_i64
+#define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_brcond_tl tcg_gen_brcond_i64
+#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
+#define tcg_gen_mul_tl tcg_gen_mul_i64
+#define tcg_gen_muli_tl tcg_gen_muli_i64
+#define tcg_gen_div_tl tcg_gen_div_i64
+#define tcg_gen_rem_tl tcg_gen_rem_i64
+#define tcg_gen_divu_tl tcg_gen_divu_i64
+#define tcg_gen_remu_tl tcg_gen_remu_i64
+#define tcg_gen_discard_tl tcg_gen_discard_i64
+#define tcg_gen_trunc_tl_i32 tcg_gen_trunc_i64_i32
+#define tcg_gen_trunc_i64_tl tcg_gen_mov_i64
+#define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64
+#define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64
+#define tcg_gen_extu_tl_i64 tcg_gen_mov_i64
+#define tcg_gen_ext_tl_i64 tcg_gen_mov_i64
+#define tcg_gen_ext8u_tl tcg_gen_ext8u_i64
+#define tcg_gen_ext8s_tl tcg_gen_ext8s_i64
+#define tcg_gen_ext16u_tl tcg_gen_ext16u_i64
+#define tcg_gen_ext16s_tl tcg_gen_ext16s_i64
+#define tcg_gen_ext32u_tl tcg_gen_ext32u_i64
+#define tcg_gen_ext32s_tl tcg_gen_ext32s_i64
+#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64
+#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
+#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
+#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
+#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64
+#define tcg_gen_andc_tl tcg_gen_andc_i64
+#define tcg_gen_eqv_tl tcg_gen_eqv_i64
+#define tcg_gen_nand_tl tcg_gen_nand_i64
+#define tcg_gen_nor_tl tcg_gen_nor_i64
+#define tcg_gen_orc_tl tcg_gen_orc_i64
+#define tcg_gen_rotl_tl tcg_gen_rotl_i64
+#define tcg_gen_rotli_tl tcg_gen_rotli_i64
+#define tcg_gen_rotr_tl tcg_gen_rotr_i64
+#define tcg_gen_rotri_tl tcg_gen_rotri_i64
+#define tcg_gen_deposit_tl tcg_gen_deposit_i64
+#define tcg_const_tl tcg_const_i64
+#define tcg_const_local_tl tcg_const_local_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
+#define tcg_gen_add2_tl tcg_gen_add2_i64
+#define tcg_gen_sub2_tl tcg_gen_sub2_i64
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
+#define tcg_gen_muls2_tl tcg_gen_muls2_i64
+#else
+#define tcg_gen_movi_tl tcg_gen_movi_i32
+#define tcg_gen_mov_tl tcg_gen_mov_i32
+#define tcg_gen_ld8u_tl tcg_gen_ld8u_i32
+#define tcg_gen_ld8s_tl tcg_gen_ld8s_i32
+#define tcg_gen_ld16u_tl tcg_gen_ld16u_i32
+#define tcg_gen_ld16s_tl tcg_gen_ld16s_i32
+#define tcg_gen_ld32u_tl tcg_gen_ld_i32
+#define tcg_gen_ld32s_tl tcg_gen_ld_i32
+#define tcg_gen_ld_tl tcg_gen_ld_i32
+#define tcg_gen_st8_tl tcg_gen_st8_i32
+#define tcg_gen_st16_tl tcg_gen_st16_i32
+#define tcg_gen_st32_tl tcg_gen_st_i32
+#define tcg_gen_st_tl tcg_gen_st_i32
+#define tcg_gen_add_tl tcg_gen_add_i32
+#define tcg_gen_addi_tl tcg_gen_addi_i32
+#define tcg_gen_sub_tl tcg_gen_sub_i32
+#define tcg_gen_neg_tl tcg_gen_neg_i32
+#define tcg_gen_subfi_tl tcg_gen_subfi_i32
+#define tcg_gen_subi_tl tcg_gen_subi_i32
+#define tcg_gen_and_tl tcg_gen_and_i32
+#define tcg_gen_andi_tl tcg_gen_andi_i32
+#define tcg_gen_or_tl tcg_gen_or_i32
+#define tcg_gen_ori_tl tcg_gen_ori_i32
+#define tcg_gen_xor_tl tcg_gen_xor_i32
+#define tcg_gen_xori_tl tcg_gen_xori_i32
+#define tcg_gen_not_tl tcg_gen_not_i32
+#define tcg_gen_shl_tl tcg_gen_shl_i32
+#define tcg_gen_shli_tl tcg_gen_shli_i32
+#define tcg_gen_shr_tl tcg_gen_shr_i32
+#define tcg_gen_shri_tl tcg_gen_shri_i32
+#define tcg_gen_sar_tl tcg_gen_sar_i32
+#define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_brcond_tl tcg_gen_brcond_i32
+#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
+#define tcg_gen_mul_tl tcg_gen_mul_i32
+#define tcg_gen_muli_tl tcg_gen_muli_i32
+#define tcg_gen_div_tl tcg_gen_div_i32
+#define tcg_gen_rem_tl tcg_gen_rem_i32
+#define tcg_gen_divu_tl tcg_gen_divu_i32
+#define tcg_gen_remu_tl tcg_gen_remu_i32
+#define tcg_gen_discard_tl tcg_gen_discard_i32
+#define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32
+#define tcg_gen_trunc_i64_tl tcg_gen_trunc_i64_i32
+#define tcg_gen_extu_i32_tl tcg_gen_mov_i32
+#define tcg_gen_ext_i32_tl tcg_gen_mov_i32
+#define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64
+#define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64
+#define tcg_gen_ext8u_tl tcg_gen_ext8u_i32
+#define tcg_gen_ext8s_tl tcg_gen_ext8s_i32
+#define tcg_gen_ext16u_tl tcg_gen_ext16u_i32
+#define tcg_gen_ext16s_tl tcg_gen_ext16s_i32
+#define tcg_gen_ext32u_tl tcg_gen_mov_i32
+#define tcg_gen_ext32s_tl tcg_gen_mov_i32
+#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
+#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
+#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
+#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
+#define tcg_gen_andc_tl tcg_gen_andc_i32
+#define tcg_gen_eqv_tl tcg_gen_eqv_i32
+#define tcg_gen_nand_tl tcg_gen_nand_i32
+#define tcg_gen_nor_tl tcg_gen_nor_i32
+#define tcg_gen_orc_tl tcg_gen_orc_i32
+#define tcg_gen_rotl_tl tcg_gen_rotl_i32
+#define tcg_gen_rotli_tl tcg_gen_rotli_i32
+#define tcg_gen_rotr_tl tcg_gen_rotr_i32
+#define tcg_gen_rotri_tl tcg_gen_rotri_i32
+#define tcg_gen_deposit_tl tcg_gen_deposit_i32
+#define tcg_const_tl tcg_const_i32
+#define tcg_const_local_tl tcg_const_local_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
+#define tcg_gen_add2_tl tcg_gen_add2_i32
+#define tcg_gen_sub2_tl tcg_gen_sub2_i32
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
+#define tcg_gen_muls2_tl tcg_gen_muls2_i32
+#endif
+
+#if UINTPTR_MAX == UINT32_MAX
+# define tcg_gen_ld_ptr(R, A, O) \
+ tcg_gen_ld_i32(TCGV_PTR_TO_NAT(R), (A), (O))
+# define tcg_gen_discard_ptr(A) \
+ tcg_gen_discard_i32(TCGV_PTR_TO_NAT(A))
+# define tcg_gen_add_ptr(R, A, B) \
+ tcg_gen_add_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B))
+# define tcg_gen_addi_ptr(R, A, B) \
+ tcg_gen_addi_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B))
+# define tcg_gen_ext_i32_ptr(R, A) \
+ tcg_gen_mov_i32(TCGV_PTR_TO_NAT(R), (A))
+#else
+# define tcg_gen_ld_ptr(R, A, O) \
+ tcg_gen_ld_i64(TCGV_PTR_TO_NAT(R), (A), (O))
+# define tcg_gen_discard_ptr(A) \
+ tcg_gen_discard_i64(TCGV_PTR_TO_NAT(A))
+# define tcg_gen_add_ptr(R, A, B) \
+ tcg_gen_add_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B))
+# define tcg_gen_addi_ptr(R, A, B) \
+ tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B))
+# define tcg_gen_ext_i32_ptr(R, A) \
+ tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A))
+#endif /* UINTPTR_MAX == UINT32_MAX */
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
new file mode 100644
index 00000000..13ccb60a
--- /dev/null
+++ b/tcg/tcg-opc.h
@@ -0,0 +1,195 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * DEF(name, oargs, iargs, cargs, flags)
+ */
+
+/* predefined ops */
+DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT)
+DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
+
+/* variable number of parameters */
+DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
+
+DEF(br, 0, 0, 1, TCG_OPF_BB_END)
+
+#define IMPL(X) (__builtin_constant_p(X) && !(X) ? TCG_OPF_NOT_PRESENT : 0)
+#if TCG_TARGET_REG_BITS == 32
+# define IMPL64 TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT
+#else
+# define IMPL64 TCG_OPF_64BIT
+#endif
+
+DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
+DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
+DEF(setcond_i32, 1, 2, 1, 0)
+DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
+/* load/store */
+DEF(ld8u_i32, 1, 1, 1, 0)
+DEF(ld8s_i32, 1, 1, 1, 0)
+DEF(ld16u_i32, 1, 1, 1, 0)
+DEF(ld16s_i32, 1, 1, 1, 0)
+DEF(ld_i32, 1, 1, 1, 0)
+DEF(st8_i32, 0, 2, 1, 0)
+DEF(st16_i32, 0, 2, 1, 0)
+DEF(st_i32, 0, 2, 1, 0)
+/* arith */
+DEF(add_i32, 1, 2, 0, 0)
+DEF(sub_i32, 1, 2, 0, 0)
+DEF(mul_i32, 1, 2, 0, 0)
+DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
+DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
+DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
+DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
+DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
+DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
+DEF(and_i32, 1, 2, 0, 0)
+DEF(or_i32, 1, 2, 0, 0)
+DEF(xor_i32, 1, 2, 0, 0)
+/* shifts/rotates */
+DEF(shl_i32, 1, 2, 0, 0)
+DEF(shr_i32, 1, 2, 0, 0)
+DEF(sar_i32, 1, 2, 0, 0)
+DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
+DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
+DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
+
+DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
+
+DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
+DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
+DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
+DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
+DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
+DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
+DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
+
+DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
+DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))
+DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))
+DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
+DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32))
+DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32))
+DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
+DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32))
+DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
+DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32))
+DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
+DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
+DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
+
+DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
+DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
+DEF(setcond_i64, 1, 2, 1, IMPL64)
+DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
+/* load/store */
+DEF(ld8u_i64, 1, 1, 1, IMPL64)
+DEF(ld8s_i64, 1, 1, 1, IMPL64)
+DEF(ld16u_i64, 1, 1, 1, IMPL64)
+DEF(ld16s_i64, 1, 1, 1, IMPL64)
+DEF(ld32u_i64, 1, 1, 1, IMPL64)
+DEF(ld32s_i64, 1, 1, 1, IMPL64)
+DEF(ld_i64, 1, 1, 1, IMPL64)
+DEF(st8_i64, 0, 2, 1, IMPL64)
+DEF(st16_i64, 0, 2, 1, IMPL64)
+DEF(st32_i64, 0, 2, 1, IMPL64)
+DEF(st_i64, 0, 2, 1, IMPL64)
+/* arith */
+DEF(add_i64, 1, 2, 0, IMPL64)
+DEF(sub_i64, 1, 2, 0, IMPL64)
+DEF(mul_i64, 1, 2, 0, IMPL64)
+DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
+DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
+DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
+DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
+DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
+DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
+DEF(and_i64, 1, 2, 0, IMPL64)
+DEF(or_i64, 1, 2, 0, IMPL64)
+DEF(xor_i64, 1, 2, 0, IMPL64)
+/* shifts/rotates */
+DEF(shl_i64, 1, 2, 0, IMPL64)
+DEF(shr_i64, 1, 2, 0, IMPL64)
+DEF(sar_i64, 1, 2, 0, IMPL64)
+DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
+DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
+DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
+
+DEF(trunc_shr_i32, 1, 1, 1,
+ IMPL(TCG_TARGET_HAS_trunc_shr_i32)
+ | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
+
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64)
+DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
+DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
+DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
+DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))
+DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))
+DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))
+DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
+DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
+DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
+DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
+DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64))
+DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
+DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64))
+DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
+DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
+DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
+
+DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
+DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
+DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
+DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
+DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64))
+DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))
+
+/* QEMU specific */
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+DEF(debug_insn_start, 0, 0, 2, TCG_OPF_NOT_PRESENT)
+#else
+DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT)
+#endif
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
+
+#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
+#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
+
+DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+
+#undef TLADDR_ARGS
+#undef DATA64_ARGS
+#undef IMPL
+#undef IMPL64
+#undef DEF
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
new file mode 100644
index 00000000..23a0c377
--- /dev/null
+++ b/tcg/tcg-runtime.h
@@ -0,0 +1,16 @@
+DEF_HELPER_FLAGS_2(div_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32)
+DEF_HELPER_FLAGS_2(rem_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32)
+DEF_HELPER_FLAGS_2(divu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(remu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+
+DEF_HELPER_FLAGS_2(div_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
+DEF_HELPER_FLAGS_2(rem_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
+DEF_HELPER_FLAGS_2(divu_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(remu_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(shl_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(shr_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
+
+DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
+DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
diff --git a/tcg/tcg.c b/tcg/tcg.c
new file mode 100644
index 00000000..0892a9bb
--- /dev/null
+++ b/tcg/tcg.c
@@ -0,0 +1,2764 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* define it to use liveness analysis (better code) */
+#define USE_LIVENESS_ANALYSIS
+#define USE_TCG_OPTIMIZATIONS
+
+#include "config.h"
+
+/* Define to jump the ELF file used to communicate with GDB. */
+#undef DEBUG_JIT
+
+#if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
+/* define it to suppress various consistency checks (faster) */
+#define NDEBUG
+#endif
+
+#include "qemu-common.h"
+#include "qemu/host-utils.h"
+#include "qemu/timer.h"
+
+/* Note: the long term plan is to reduce the dependencies on the QEMU
+ CPU definitions. Currently they are used for qemu_ld/st
+ instructions */
+#define NO_CPU_IO_DEFS
+#include "cpu.h"
+
+#include "tcg-op.h"
+
+#if UINTPTR_MAX == UINT32_MAX
+# define ELF_CLASS ELFCLASS32
+#else
+# define ELF_CLASS ELFCLASS64
+#endif
+#ifdef HOST_WORDS_BIGENDIAN
+# define ELF_DATA ELFDATA2MSB
+#else
+# define ELF_DATA ELFDATA2LSB
+#endif
+
+#include "elf.h"
+
+/* Forward declarations for functions declared in tcg-target.c and used here. */
+static void tcg_target_init(TCGContext *s);
+static void tcg_target_qemu_prologue(TCGContext *s);
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend);
+
+/* The CIE and FDE header definitions will be common to all hosts. */
+typedef struct {
+ uint32_t len __attribute__((aligned((sizeof(void *)))));
+ uint32_t id;
+ uint8_t version;
+ char augmentation[1];
+ uint8_t code_align;
+ uint8_t data_align;
+ uint8_t return_column;
+} DebugFrameCIE;
+
+typedef struct QEMU_PACKED {
+ uint32_t len __attribute__((aligned((sizeof(void *)))));
+ uint32_t cie_offset;
+ uintptr_t func_start;
+ uintptr_t func_len;
+} DebugFrameFDEHeader;
+
+typedef struct QEMU_PACKED {
+ DebugFrameCIE cie;
+ DebugFrameFDEHeader fde;
+} DebugFrameHeader;
+
+static void tcg_register_jit_int(void *buf, size_t size,
+ const void *debug_frame,
+ size_t debug_frame_size)
+ __attribute__((unused));
+
+/* Forward declarations for functions declared and used in tcg-target.c. */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
+ intptr_t arg2);
+static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg);
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
+ const int *const_args);
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
+ intptr_t arg2);
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
+static int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct);
+static void tcg_out_tb_init(TCGContext *s);
+static void tcg_out_tb_finalize(TCGContext *s);
+
+
+TCGOpDef tcg_op_defs[] = {
+#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
+#include "tcg-opc.h"
+#undef DEF
+};
+const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs);
+
+static TCGRegSet tcg_target_available_regs[2];
+static TCGRegSet tcg_target_call_clobber_regs;
+
+#if TCG_TARGET_INSN_UNIT_SIZE == 1
+static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
+{
+ *s->code_ptr++ = v;
+}
+
+static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
+ uint8_t v)
+{
+ *p = v;
+}
+#endif
+
+#if TCG_TARGET_INSN_UNIT_SIZE <= 2
+static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
+ *s->code_ptr++ = v;
+ } else {
+ tcg_insn_unit *p = s->code_ptr;
+ memcpy(p, &v, sizeof(v));
+ s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
+ }
+}
+
+static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
+ uint16_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
+ *p = v;
+ } else {
+ memcpy(p, &v, sizeof(v));
+ }
+}
+#endif
+
+#if TCG_TARGET_INSN_UNIT_SIZE <= 4
+static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
+ *s->code_ptr++ = v;
+ } else {
+ tcg_insn_unit *p = s->code_ptr;
+ memcpy(p, &v, sizeof(v));
+ s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
+ }
+}
+
+static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
+ uint32_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
+ *p = v;
+ } else {
+ memcpy(p, &v, sizeof(v));
+ }
+}
+#endif
+
+#if TCG_TARGET_INSN_UNIT_SIZE <= 8
+static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
+ *s->code_ptr++ = v;
+ } else {
+ tcg_insn_unit *p = s->code_ptr;
+ memcpy(p, &v, sizeof(v));
+ s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
+ }
+}
+
+static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
+ uint64_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
+ *p = v;
+ } else {
+ memcpy(p, &v, sizeof(v));
+ }
+}
+#endif
+
+/* label relocation processing */
+
+static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
+ TCGLabel *l, intptr_t addend)
+{
+ TCGRelocation *r;
+
+ if (l->has_value) {
+ /* FIXME: This may break relocations on RISC targets that
+ modify instruction fields in place. The caller may not have
+ written the initial value. */
+ patch_reloc(code_ptr, type, l->u.value, addend);
+ } else {
+ /* add a new relocation entry */
+ r = tcg_malloc(sizeof(TCGRelocation));
+ r->type = type;
+ r->ptr = code_ptr;
+ r->addend = addend;
+ r->next = l->u.first_reloc;
+ l->u.first_reloc = r;
+ }
+}
+
+static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
+{
+ intptr_t value = (intptr_t)ptr;
+ TCGRelocation *r;
+
+ assert(!l->has_value);
+
+ for (r = l->u.first_reloc; r != NULL; r = r->next) {
+ patch_reloc(r->ptr, r->type, value, r->addend);
+ }
+
+ l->has_value = 1;
+ l->u.value_ptr = ptr;
+}
+
+TCGLabel *gen_new_label(void)
+{
+ TCGContext *s = &tcg_ctx;
+ TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
+
+ *l = (TCGLabel){
+ .id = s->nb_labels++
+ };
+
+ return l;
+}
+
+#include "tcg-target.c"
+
+/* pool based memory allocation */
+void *tcg_malloc_internal(TCGContext *s, int size)
+{
+ TCGPool *p;
+ int pool_size;
+
+ if (size > TCG_POOL_CHUNK_SIZE) {
+ /* big malloc: insert a new pool (XXX: could optimize) */
+ p = g_malloc(sizeof(TCGPool) + size);
+ p->size = size;
+ p->next = s->pool_first_large;
+ s->pool_first_large = p;
+ return p->data;
+ } else {
+ p = s->pool_current;
+ if (!p) {
+ p = s->pool_first;
+ if (!p)
+ goto new_pool;
+ } else {
+ if (!p->next) {
+ new_pool:
+ pool_size = TCG_POOL_CHUNK_SIZE;
+ p = g_malloc(sizeof(TCGPool) + pool_size);
+ p->size = pool_size;
+ p->next = NULL;
+ if (s->pool_current)
+ s->pool_current->next = p;
+ else
+ s->pool_first = p;
+ } else {
+ p = p->next;
+ }
+ }
+ }
+ s->pool_current = p;
+ s->pool_cur = p->data + size;
+ s->pool_end = p->data + p->size;
+ return p->data;
+}
+
+void tcg_pool_reset(TCGContext *s)
+{
+ TCGPool *p, *t;
+ for (p = s->pool_first_large; p; p = t) {
+ t = p->next;
+ g_free(p);
+ }
+ s->pool_first_large = NULL;
+ s->pool_cur = s->pool_end = NULL;
+ s->pool_current = NULL;
+}
+
+typedef struct TCGHelperInfo {
+ void *func;
+ const char *name;
+ unsigned flags;
+ unsigned sizemask;
+} TCGHelperInfo;
+
+#include "exec/helper-proto.h"
+
+static const TCGHelperInfo all_helpers[] = {
+#include "exec/helper-tcg.h"
+};
+
+void tcg_context_init(TCGContext *s)
+{
+ int op, total_args, n, i;
+ TCGOpDef *def;
+ TCGArgConstraint *args_ct;
+ int *sorted_args;
+ GHashTable *helper_table;
+
+ memset(s, 0, sizeof(*s));
+ s->nb_globals = 0;
+
+ /* Count total number of arguments and allocate the corresponding
+ space */
+ total_args = 0;
+ for(op = 0; op < NB_OPS; op++) {
+ def = &tcg_op_defs[op];
+ n = def->nb_iargs + def->nb_oargs;
+ total_args += n;
+ }
+
+ args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
+ sorted_args = g_malloc(sizeof(int) * total_args);
+
+ for(op = 0; op < NB_OPS; op++) {
+ def = &tcg_op_defs[op];
+ def->args_ct = args_ct;
+ def->sorted_args = sorted_args;
+ n = def->nb_iargs + def->nb_oargs;
+ sorted_args += n;
+ args_ct += n;
+ }
+
+ /* Register helpers. */
+ /* Use g_direct_hash/equal for direct pointer comparisons on func. */
+ s->helpers = helper_table = g_hash_table_new(NULL, NULL);
+
+ for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
+ g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
+ (gpointer)&all_helpers[i]);
+ }
+
+ tcg_target_init(s);
+}
+
+void tcg_prologue_init(TCGContext *s)
+{
+ /* init global prologue and epilogue */
+ s->code_buf = s->code_gen_prologue;
+ s->code_ptr = s->code_buf;
+ tcg_target_qemu_prologue(s);
+ flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+
+#ifdef DEBUG_DISAS
+ if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
+ size_t size = tcg_current_code_size(s);
+ qemu_log("PROLOGUE: [size=%zu]\n", size);
+ log_disas(s->code_buf, size);
+ qemu_log("\n");
+ qemu_log_flush();
+ }
+#endif
+}
+
+void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size)
+{
+ s->frame_start = start;
+ s->frame_end = start + size;
+ s->frame_reg = reg;
+}
+
+void tcg_func_start(TCGContext *s)
+{
+ tcg_pool_reset(s);
+ s->nb_temps = s->nb_globals;
+
+ /* No temps have been previously allocated for size or locality. */
+ memset(s->free_temps, 0, sizeof(s->free_temps));
+
+ s->nb_labels = 0;
+ s->current_frame_offset = s->frame_start;
+
+#ifdef CONFIG_DEBUG_TCG
+ s->goto_tb_issue_mask = 0;
+#endif
+
+ s->gen_first_op_idx = 0;
+ s->gen_last_op_idx = -1;
+ s->gen_next_op_idx = 0;
+ s->gen_next_parm_idx = 0;
+
+ s->be = tcg_malloc(sizeof(TCGBackendData));
+}
+
+static inline void tcg_temp_alloc(TCGContext *s, int n)
+{
+ if (n > TCG_MAX_TEMPS)
+ tcg_abort();
+}
+
+static inline int tcg_global_reg_new_internal(TCGType type, int reg,
+ const char *name)
+{
+ TCGContext *s = &tcg_ctx;
+ TCGTemp *ts;
+ int idx;
+
+#if TCG_TARGET_REG_BITS == 32
+ if (type != TCG_TYPE_I32)
+ tcg_abort();
+#endif
+ if (tcg_regset_test_reg(s->reserved_regs, reg))
+ tcg_abort();
+ idx = s->nb_globals;
+ tcg_temp_alloc(s, s->nb_globals + 1);
+ ts = &s->temps[s->nb_globals];
+ ts->base_type = type;
+ ts->type = type;
+ ts->fixed_reg = 1;
+ ts->reg = reg;
+ ts->name = name;
+ s->nb_globals++;
+ tcg_regset_set_reg(s->reserved_regs, reg);
+ return idx;
+}
+
+TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name)
+{
+ int idx;
+
+ idx = tcg_global_reg_new_internal(TCG_TYPE_I32, reg, name);
+ return MAKE_TCGV_I32(idx);
+}
+
+TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name)
+{
+ int idx;
+
+ idx = tcg_global_reg_new_internal(TCG_TYPE_I64, reg, name);
+ return MAKE_TCGV_I64(idx);
+}
+
+static inline int tcg_global_mem_new_internal(TCGType type, int reg,
+ intptr_t offset,
+ const char *name)
+{
+ TCGContext *s = &tcg_ctx;
+ TCGTemp *ts;
+ int idx;
+
+ idx = s->nb_globals;
+#if TCG_TARGET_REG_BITS == 32
+ if (type == TCG_TYPE_I64) {
+ char buf[64];
+ tcg_temp_alloc(s, s->nb_globals + 2);
+ ts = &s->temps[s->nb_globals];
+ ts->base_type = type;
+ ts->type = TCG_TYPE_I32;
+ ts->fixed_reg = 0;
+ ts->mem_allocated = 1;
+ ts->mem_reg = reg;
+#ifdef HOST_WORDS_BIGENDIAN
+ ts->mem_offset = offset + 4;
+#else
+ ts->mem_offset = offset;
+#endif
+ pstrcpy(buf, sizeof(buf), name);
+ pstrcat(buf, sizeof(buf), "_0");
+ ts->name = strdup(buf);
+ ts++;
+
+ ts->base_type = type;
+ ts->type = TCG_TYPE_I32;
+ ts->fixed_reg = 0;
+ ts->mem_allocated = 1;
+ ts->mem_reg = reg;
+#ifdef HOST_WORDS_BIGENDIAN
+ ts->mem_offset = offset;
+#else
+ ts->mem_offset = offset + 4;
+#endif
+ pstrcpy(buf, sizeof(buf), name);
+ pstrcat(buf, sizeof(buf), "_1");
+ ts->name = strdup(buf);
+
+ s->nb_globals += 2;
+ } else
+#endif
+ {
+ tcg_temp_alloc(s, s->nb_globals + 1);
+ ts = &s->temps[s->nb_globals];
+ ts->base_type = type;
+ ts->type = type;
+ ts->fixed_reg = 0;
+ ts->mem_allocated = 1;
+ ts->mem_reg = reg;
+ ts->mem_offset = offset;
+ ts->name = name;
+ s->nb_globals++;
+ }
+ return idx;
+}
+
+TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name)
+{
+ int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name);
+ return MAKE_TCGV_I32(idx);
+}
+
+TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name)
+{
+ int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name);
+ return MAKE_TCGV_I64(idx);
+}
+
+static inline int tcg_temp_new_internal(TCGType type, int temp_local)
+{
+ TCGContext *s = &tcg_ctx;
+ TCGTemp *ts;
+ int idx, k;
+
+ k = type + (temp_local ? TCG_TYPE_COUNT : 0);
+ idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
+ if (idx < TCG_MAX_TEMPS) {
+ /* There is already an available temp with the right type. */
+ clear_bit(idx, s->free_temps[k].l);
+
+ ts = &s->temps[idx];
+ ts->temp_allocated = 1;
+ assert(ts->base_type == type);
+ assert(ts->temp_local == temp_local);
+ } else {
+ idx = s->nb_temps;
+#if TCG_TARGET_REG_BITS == 32
+ if (type == TCG_TYPE_I64) {
+ tcg_temp_alloc(s, s->nb_temps + 2);
+ ts = &s->temps[s->nb_temps];
+ ts->base_type = type;
+ ts->type = TCG_TYPE_I32;
+ ts->temp_allocated = 1;
+ ts->temp_local = temp_local;
+ ts->name = NULL;
+ ts++;
+ ts->base_type = type;
+ ts->type = TCG_TYPE_I32;
+ ts->temp_allocated = 1;
+ ts->temp_local = temp_local;
+ ts->name = NULL;
+ s->nb_temps += 2;
+ } else
+#endif
+ {
+ tcg_temp_alloc(s, s->nb_temps + 1);
+ ts = &s->temps[s->nb_temps];
+ ts->base_type = type;
+ ts->type = type;
+ ts->temp_allocated = 1;
+ ts->temp_local = temp_local;
+ ts->name = NULL;
+ s->nb_temps++;
+ }
+ }
+
+#if defined(CONFIG_DEBUG_TCG)
+ s->temps_in_use++;
+#endif
+ return idx;
+}
+
+TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
+{
+ int idx;
+
+ idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
+ return MAKE_TCGV_I32(idx);
+}
+
+TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
+{
+ int idx;
+
+ idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
+ return MAKE_TCGV_I64(idx);
+}
+
+static void tcg_temp_free_internal(int idx)
+{
+ TCGContext *s = &tcg_ctx;
+ TCGTemp *ts;
+ int k;
+
+#if defined(CONFIG_DEBUG_TCG)
+ s->temps_in_use--;
+ if (s->temps_in_use < 0) {
+ fprintf(stderr, "More temporaries freed than allocated!\n");
+ }
+#endif
+
+ assert(idx >= s->nb_globals && idx < s->nb_temps);
+ ts = &s->temps[idx];
+ assert(ts->temp_allocated != 0);
+ ts->temp_allocated = 0;
+
+ k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
+ set_bit(idx, s->free_temps[k].l);
+}
+
+void tcg_temp_free_i32(TCGv_i32 arg)
+{
+ tcg_temp_free_internal(GET_TCGV_I32(arg));
+}
+
+void tcg_temp_free_i64(TCGv_i64 arg)
+{
+ tcg_temp_free_internal(GET_TCGV_I64(arg));
+}
+
+TCGv_i32 tcg_const_i32(int32_t val)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(t0, val);
+ return t0;
+}
+
+TCGv_i64 tcg_const_i64(int64_t val)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t0, val);
+ return t0;
+}
+
+TCGv_i32 tcg_const_local_i32(int32_t val)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_local_new_i32();
+ tcg_gen_movi_i32(t0, val);
+ return t0;
+}
+
+TCGv_i64 tcg_const_local_i64(int64_t val)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_local_new_i64();
+ tcg_gen_movi_i64(t0, val);
+ return t0;
+}
+
+#if defined(CONFIG_DEBUG_TCG)
+void tcg_clear_temp_count(void)
+{
+ TCGContext *s = &tcg_ctx;
+ s->temps_in_use = 0;
+}
+
+int tcg_check_temp_count(void)
+{
+ TCGContext *s = &tcg_ctx;
+ if (s->temps_in_use) {
+ /* Clear the count so that we don't give another
+ * warning immediately next time around.
+ */
+ s->temps_in_use = 0;
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+/* Note: we convert the 64 bit args to 32 bit and do some alignment
+ and endian swap. Maybe it would be better to do the alignment
+ and endian swap in tcg_reg_alloc_call(). */
+void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
+ int nargs, TCGArg *args)
+{
+ int i, real_args, nb_rets, pi, pi_first;
+ unsigned sizemask, flags;
+ TCGHelperInfo *info;
+
+ info = g_hash_table_lookup(s->helpers, (gpointer)func);
+ flags = info->flags;
+ sizemask = info->sizemask;
+
+#if defined(__sparc__) && !defined(__arch64__) \
+ && !defined(CONFIG_TCG_INTERPRETER)
+ /* We have 64-bit values in one register, but need to pass as two
+ separate parameters. Split them. */
+ int orig_sizemask = sizemask;
+ int orig_nargs = nargs;
+ TCGv_i64 retl, reth;
+
+ TCGV_UNUSED_I64(retl);
+ TCGV_UNUSED_I64(reth);
+ if (sizemask != 0) {
+ TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
+ for (i = real_args = 0; i < nargs; ++i) {
+ int is_64bit = sizemask & (1 << (i+1)*2);
+ if (is_64bit) {
+ TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
+ TCGv_i32 h = tcg_temp_new_i32();
+ TCGv_i32 l = tcg_temp_new_i32();
+ tcg_gen_extr_i64_i32(l, h, orig);
+ split_args[real_args++] = GET_TCGV_I32(h);
+ split_args[real_args++] = GET_TCGV_I32(l);
+ } else {
+ split_args[real_args++] = args[i];
+ }
+ }
+ nargs = real_args;
+ args = split_args;
+ sizemask = 0;
+ }
+#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+ for (i = 0; i < nargs; ++i) {
+ int is_64bit = sizemask & (1 << (i+1)*2);
+ int is_signed = sizemask & (2 << (i+1)*2);
+ if (!is_64bit) {
+ TCGv_i64 temp = tcg_temp_new_i64();
+ TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
+ if (is_signed) {
+ tcg_gen_ext32s_i64(temp, orig);
+ } else {
+ tcg_gen_ext32u_i64(temp, orig);
+ }
+ args[i] = GET_TCGV_I64(temp);
+ }
+ }
+#endif /* TCG_TARGET_EXTEND_ARGS */
+
+ pi_first = pi = s->gen_next_parm_idx;
+ if (ret != TCG_CALL_DUMMY_ARG) {
+#if defined(__sparc__) && !defined(__arch64__) \
+ && !defined(CONFIG_TCG_INTERPRETER)
+ if (orig_sizemask & 1) {
+ /* The 32-bit ABI is going to return the 64-bit value in
+ the %o0/%o1 register pair. Prepare for this by using
+ two return temporaries, and reassemble below. */
+ retl = tcg_temp_new_i64();
+ reth = tcg_temp_new_i64();
+ s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
+ s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
+ nb_rets = 2;
+ } else {
+ s->gen_opparam_buf[pi++] = ret;
+ nb_rets = 1;
+ }
+#else
+ if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
+#ifdef HOST_WORDS_BIGENDIAN
+ s->gen_opparam_buf[pi++] = ret + 1;
+ s->gen_opparam_buf[pi++] = ret;
+#else
+ s->gen_opparam_buf[pi++] = ret;
+ s->gen_opparam_buf[pi++] = ret + 1;
+#endif
+ nb_rets = 2;
+ } else {
+ s->gen_opparam_buf[pi++] = ret;
+ nb_rets = 1;
+ }
+#endif
+ } else {
+ nb_rets = 0;
+ }
+ real_args = 0;
+ for (i = 0; i < nargs; i++) {
+ int is_64bit = sizemask & (1 << (i+1)*2);
+ if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+ /* some targets want aligned 64 bit args */
+ if (real_args & 1) {
+ s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
+ real_args++;
+ }
+#endif
+ /* If stack grows up, then we will be placing successive
+ arguments at lower addresses, which means we need to
+ reverse the order compared to how we would normally
+ treat either big or little-endian. For those arguments
+ that will wind up in registers, this still works for
+ HPPA (the only current STACK_GROWSUP target) since the
+ argument registers are *also* allocated in decreasing
+ order. If another such target is added, this logic may
+ have to get more complicated to differentiate between
+ stack arguments and register arguments. */
+#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
+ s->gen_opparam_buf[pi++] = args[i] + 1;
+ s->gen_opparam_buf[pi++] = args[i];
+#else
+ s->gen_opparam_buf[pi++] = args[i];
+ s->gen_opparam_buf[pi++] = args[i] + 1;
+#endif
+ real_args += 2;
+ continue;
+ }
+
+ s->gen_opparam_buf[pi++] = args[i];
+ real_args++;
+ }
+ s->gen_opparam_buf[pi++] = (uintptr_t)func;
+ s->gen_opparam_buf[pi++] = flags;
+
+ i = s->gen_next_op_idx;
+ tcg_debug_assert(i < OPC_BUF_SIZE);
+ tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
+
+ /* Set links for sequential allocation during translation. */
+ s->gen_op_buf[i] = (TCGOp){
+ .opc = INDEX_op_call,
+ .callo = nb_rets,
+ .calli = real_args,
+ .args = pi_first,
+ .prev = i - 1,
+ .next = i + 1
+ };
+
+ /* Make sure the calli field didn't overflow. */
+ tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
+
+ s->gen_last_op_idx = i;
+ s->gen_next_op_idx = i + 1;
+ s->gen_next_parm_idx = pi;
+
+#if defined(__sparc__) && !defined(__arch64__) \
+ && !defined(CONFIG_TCG_INTERPRETER)
+ /* Free all of the parts we allocated above. */
+ for (i = real_args = 0; i < orig_nargs; ++i) {
+ int is_64bit = orig_sizemask & (1 << (i+1)*2);
+ if (is_64bit) {
+ TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
+ TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
+ tcg_temp_free_i32(h);
+ tcg_temp_free_i32(l);
+ } else {
+ real_args++;
+ }
+ }
+ if (orig_sizemask & 1) {
+ /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
+ Note that describing these as TCGv_i64 eliminates an unnecessary
+ zero-extension that tcg_gen_concat_i32_i64 would create. */
+ tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
+ tcg_temp_free_i64(retl);
+ tcg_temp_free_i64(reth);
+ }
+#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+ for (i = 0; i < nargs; ++i) {
+ int is_64bit = sizemask & (1 << (i+1)*2);
+ if (!is_64bit) {
+ TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
+ tcg_temp_free_i64(temp);
+ }
+ }
+#endif /* TCG_TARGET_EXTEND_ARGS */
+}
+
+static void tcg_reg_alloc_start(TCGContext *s)
+{
+ int i;
+ TCGTemp *ts;
+ for(i = 0; i < s->nb_globals; i++) {
+ ts = &s->temps[i];
+ if (ts->fixed_reg) {
+ ts->val_type = TEMP_VAL_REG;
+ } else {
+ ts->val_type = TEMP_VAL_MEM;
+ }
+ }
+ for(i = s->nb_globals; i < s->nb_temps; i++) {
+ ts = &s->temps[i];
+ if (ts->temp_local) {
+ ts->val_type = TEMP_VAL_MEM;
+ } else {
+ ts->val_type = TEMP_VAL_DEAD;
+ }
+ ts->mem_allocated = 0;
+ ts->fixed_reg = 0;
+ }
+ for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ s->reg_to_temp[i] = -1;
+ }
+}
+
+static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size,
+ int idx)
+{
+ TCGTemp *ts;
+
+ assert(idx >= 0 && idx < s->nb_temps);
+ ts = &s->temps[idx];
+ if (idx < s->nb_globals) {
+ pstrcpy(buf, buf_size, ts->name);
+ } else {
+ if (ts->temp_local)
+ snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
+ else
+ snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
+ }
+ return buf;
+}
+
+char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg)
+{
+ return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I32(arg));
+}
+
+char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg)
+{
+ return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg));
+}
+
+/* Find helper name. */
+static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
+{
+ const char *ret = NULL;
+ if (s->helpers) {
+ TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val);
+ if (info) {
+ ret = info->name;
+ }
+ }
+ return ret;
+}
+
+static const char * const cond_name[] =
+{
+ [TCG_COND_NEVER] = "never",
+ [TCG_COND_ALWAYS] = "always",
+ [TCG_COND_EQ] = "eq",
+ [TCG_COND_NE] = "ne",
+ [TCG_COND_LT] = "lt",
+ [TCG_COND_GE] = "ge",
+ [TCG_COND_LE] = "le",
+ [TCG_COND_GT] = "gt",
+ [TCG_COND_LTU] = "ltu",
+ [TCG_COND_GEU] = "geu",
+ [TCG_COND_LEU] = "leu",
+ [TCG_COND_GTU] = "gtu"
+};
+
+static const char * const ldst_name[] =
+{
+ [MO_UB] = "ub",
+ [MO_SB] = "sb",
+ [MO_LEUW] = "leuw",
+ [MO_LESW] = "lesw",
+ [MO_LEUL] = "leul",
+ [MO_LESL] = "lesl",
+ [MO_LEQ] = "leq",
+ [MO_BEUW] = "beuw",
+ [MO_BESW] = "besw",
+ [MO_BEUL] = "beul",
+ [MO_BESL] = "besl",
+ [MO_BEQ] = "beq",
+};
+
+void tcg_dump_ops(TCGContext *s)
+{
+ char buf[128];
+ TCGOp *op;
+ int oi;
+
+ for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
+ int i, k, nb_oargs, nb_iargs, nb_cargs;
+ const TCGOpDef *def;
+ const TCGArg *args;
+ TCGOpcode c;
+
+ op = &s->gen_op_buf[oi];
+ c = op->opc;
+ def = &tcg_op_defs[c];
+ args = &s->gen_opparam_buf[op->args];
+
+ if (c == INDEX_op_debug_insn_start) {
+ uint64_t pc;
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+ pc = ((uint64_t)args[1] << 32) | args[0];
+#else
+ pc = args[0];
+#endif
+ if (oi != s->gen_first_op_idx) {
+ qemu_log("\n");
+ }
+ qemu_log(" ---- 0x%" PRIx64, pc);
+ } else if (c == INDEX_op_call) {
+ /* variable number of arguments */
+ nb_oargs = op->callo;
+ nb_iargs = op->calli;
+ nb_cargs = def->nb_cargs;
+
+ /* function name, flags, out args */
+ qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
+ tcg_find_helper(s, args[nb_oargs + nb_iargs]),
+ args[nb_oargs + nb_iargs + 1], nb_oargs);
+ for (i = 0; i < nb_oargs; i++) {
+ qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[i]));
+ }
+ for (i = 0; i < nb_iargs; i++) {
+ TCGArg arg = args[nb_oargs + i];
+ const char *t = "<dummy>";
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
+ }
+ qemu_log(",%s", t);
+ }
+ } else {
+ qemu_log(" %s ", def->name);
+
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+ nb_cargs = def->nb_cargs;
+
+ k = 0;
+ for (i = 0; i < nb_oargs; i++) {
+ if (k != 0) {
+ qemu_log(",");
+ }
+ qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[k++]));
+ }
+ for (i = 0; i < nb_iargs; i++) {
+ if (k != 0) {
+ qemu_log(",");
+ }
+ qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[k++]));
+ }
+ switch (c) {
+ case INDEX_op_brcond_i32:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_movcond_i32:
+ case INDEX_op_brcond2_i32:
+ case INDEX_op_setcond2_i32:
+ case INDEX_op_brcond_i64:
+ case INDEX_op_setcond_i64:
+ case INDEX_op_movcond_i64:
+ if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
+ qemu_log(",%s", cond_name[args[k++]]);
+ } else {
+ qemu_log(",$0x%" TCG_PRIlx, args[k++]);
+ }
+ i = 1;
+ break;
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_ld_i64:
+ case INDEX_op_qemu_st_i64:
+ {
+ TCGMemOpIdx oi = args[k++];
+ TCGMemOp op = get_memop(oi);
+ unsigned ix = get_mmuidx(oi);
+
+ if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
+ qemu_log(",$0x%x,%u", op, ix);
+ } else {
+ const char *s_al = "", *s_op;
+ if (op & MO_AMASK) {
+ if ((op & MO_AMASK) == MO_ALIGN) {
+ s_al = "al+";
+ } else {
+ s_al = "un+";
+ }
+ }
+ s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
+ qemu_log(",%s%s,%u", s_al, s_op, ix);
+ }
+ i = 1;
+ }
+ break;
+ default:
+ i = 0;
+ break;
+ }
+ switch (c) {
+ case INDEX_op_set_label:
+ case INDEX_op_br:
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ case INDEX_op_brcond2_i32:
+ qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
+ i++, k++;
+ break;
+ default:
+ break;
+ }
+ for (; i < nb_cargs; i++, k++) {
+ qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
+ }
+ }
+ qemu_log("\n");
+ }
+}
+
+/* we give more priority to constraints with less registers */
+static int get_constraint_priority(const TCGOpDef *def, int k)
+{
+ const TCGArgConstraint *arg_ct;
+
+ int i, n;
+ arg_ct = &def->args_ct[k];
+ if (arg_ct->ct & TCG_CT_ALIAS) {
+ /* an alias is equivalent to a single register */
+ n = 1;
+ } else {
+ if (!(arg_ct->ct & TCG_CT_REG))
+ return 0;
+ n = 0;
+ for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ if (tcg_regset_test_reg(arg_ct->u.regs, i))
+ n++;
+ }
+ }
+ return TCG_TARGET_NB_REGS - n + 1;
+}
+
+/* sort from highest priority to lowest */
+static void sort_constraints(TCGOpDef *def, int start, int n)
+{
+ int i, j, p1, p2, tmp;
+
+ for(i = 0; i < n; i++)
+ def->sorted_args[start + i] = start + i;
+ if (n <= 1)
+ return;
+ for(i = 0; i < n - 1; i++) {
+ for(j = i + 1; j < n; j++) {
+ p1 = get_constraint_priority(def, def->sorted_args[start + i]);
+ p2 = get_constraint_priority(def, def->sorted_args[start + j]);
+ if (p1 < p2) {
+ tmp = def->sorted_args[start + i];
+ def->sorted_args[start + i] = def->sorted_args[start + j];
+ def->sorted_args[start + j] = tmp;
+ }
+ }
+ }
+}
+
+void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
+{
+ TCGOpcode op;
+ TCGOpDef *def;
+ const char *ct_str;
+ int i, nb_args;
+
+ for(;;) {
+ if (tdefs->op == (TCGOpcode)-1)
+ break;
+ op = tdefs->op;
+ assert((unsigned)op < NB_OPS);
+ def = &tcg_op_defs[op];
+#if defined(CONFIG_DEBUG_TCG)
+ /* Duplicate entry in op definitions? */
+ assert(!def->used);
+ def->used = 1;
+#endif
+ nb_args = def->nb_iargs + def->nb_oargs;
+ for(i = 0; i < nb_args; i++) {
+ ct_str = tdefs->args_ct_str[i];
+ /* Incomplete TCGTargetOpDef entry? */
+ assert(ct_str != NULL);
+ tcg_regset_clear(def->args_ct[i].u.regs);
+ def->args_ct[i].ct = 0;
+ if (ct_str[0] >= '0' && ct_str[0] <= '9') {
+ int oarg;
+ oarg = ct_str[0] - '0';
+ assert(oarg < def->nb_oargs);
+ assert(def->args_ct[oarg].ct & TCG_CT_REG);
+ /* TCG_CT_ALIAS is for the output arguments. The input
+ argument is tagged with TCG_CT_IALIAS. */
+ def->args_ct[i] = def->args_ct[oarg];
+ def->args_ct[oarg].ct = TCG_CT_ALIAS;
+ def->args_ct[oarg].alias_index = i;
+ def->args_ct[i].ct |= TCG_CT_IALIAS;
+ def->args_ct[i].alias_index = oarg;
+ } else {
+ for(;;) {
+ if (*ct_str == '\0')
+ break;
+ switch(*ct_str) {
+ case 'i':
+ def->args_ct[i].ct |= TCG_CT_CONST;
+ ct_str++;
+ break;
+ default:
+ if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
+ fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
+ ct_str, i, def->name);
+ exit(1);
+ }
+ }
+ }
+ }
+ }
+
+ /* TCGTargetOpDef entry with too much information? */
+ assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
+
+ /* sort the constraints (XXX: this is just an heuristic) */
+ sort_constraints(def, 0, def->nb_oargs);
+ sort_constraints(def, def->nb_oargs, def->nb_iargs);
+
+#if 0
+ {
+ int i;
+
+ printf("%s: sorted=", def->name);
+ for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
+ printf(" %d", def->sorted_args[i]);
+ printf("\n");
+ }
+#endif
+ tdefs++;
+ }
+
+#if defined(CONFIG_DEBUG_TCG)
+ i = 0;
+ for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) {
+ const TCGOpDef *def = &tcg_op_defs[op];
+ if (def->flags & TCG_OPF_NOT_PRESENT) {
+ /* Wrong entry in op definitions? */
+ if (def->used) {
+ fprintf(stderr, "Invalid op definition for %s\n", def->name);
+ i = 1;
+ }
+ } else {
+ /* Missing entry in op definitions? */
+ if (!def->used) {
+ fprintf(stderr, "Missing op definition for %s\n", def->name);
+ i = 1;
+ }
+ }
+ }
+ if (i == 1) {
+ tcg_abort();
+ }
+#endif
+}
+
+void tcg_op_remove(TCGContext *s, TCGOp *op)
+{
+ int next = op->next;
+ int prev = op->prev;
+
+ if (next >= 0) {
+ s->gen_op_buf[next].prev = prev;
+ } else {
+ s->gen_last_op_idx = prev;
+ }
+ if (prev >= 0) {
+ s->gen_op_buf[prev].next = next;
+ } else {
+ s->gen_first_op_idx = next;
+ }
+
+ memset(op, -1, sizeof(*op));
+
+#ifdef CONFIG_PROFILER
+ s->del_op_count++;
+#endif
+}
+
+#ifdef USE_LIVENESS_ANALYSIS
+/* liveness analysis: end of function: all temps are dead, and globals
+ should be in memory. */
+static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
+ uint8_t *mem_temps)
+{
+ memset(dead_temps, 1, s->nb_temps);
+ memset(mem_temps, 1, s->nb_globals);
+ memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
+}
+
+/* liveness analysis: end of basic block: all temps are dead, globals
+ and local temps should be in memory. */
+static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
+ uint8_t *mem_temps)
+{
+ int i;
+
+ memset(dead_temps, 1, s->nb_temps);
+ memset(mem_temps, 1, s->nb_globals);
+ for(i = s->nb_globals; i < s->nb_temps; i++) {
+ mem_temps[i] = s->temps[i].temp_local;
+ }
+}
+
+/* Liveness analysis : update the opc_dead_args array to tell if a
+ given input arguments is dead. Instructions updating dead
+ temporaries are removed. */
+static void tcg_liveness_analysis(TCGContext *s)
+{
+ uint8_t *dead_temps, *mem_temps;
+ int oi, oi_prev, nb_ops;
+
+ nb_ops = s->gen_next_op_idx;
+ s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
+ s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
+
+ dead_temps = tcg_malloc(s->nb_temps);
+ mem_temps = tcg_malloc(s->nb_temps);
+ tcg_la_func_end(s, dead_temps, mem_temps);
+
+ for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
+ int i, nb_iargs, nb_oargs;
+ TCGOpcode opc_new, opc_new2;
+ bool have_opc_new2;
+ uint16_t dead_args;
+ uint8_t sync_args;
+ TCGArg arg;
+
+ TCGOp * const op = &s->gen_op_buf[oi];
+ TCGArg * const args = &s->gen_opparam_buf[op->args];
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+
+ oi_prev = op->prev;
+
+ switch (opc) {
+ case INDEX_op_call:
+ {
+ int call_flags;
+
+ nb_oargs = op->callo;
+ nb_iargs = op->calli;
+ call_flags = args[nb_oargs + nb_iargs + 1];
+
+ /* pure functions can be removed if their result is unused */
+ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
+ for (i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ if (!dead_temps[arg] || mem_temps[arg]) {
+ goto do_not_remove_call;
+ }
+ }
+ goto do_remove;
+ } else {
+ do_not_remove_call:
+
+ /* output args are dead */
+ dead_args = 0;
+ sync_args = 0;
+ for (i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ if (dead_temps[arg]) {
+ dead_args |= (1 << i);
+ }
+ if (mem_temps[arg]) {
+ sync_args |= (1 << i);
+ }
+ dead_temps[arg] = 1;
+ mem_temps[arg] = 0;
+ }
+
+ if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+ /* globals should be synced to memory */
+ memset(mem_temps, 1, s->nb_globals);
+ }
+ if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
+ TCG_CALL_NO_READ_GLOBALS))) {
+ /* globals should go back to memory */
+ memset(dead_temps, 1, s->nb_globals);
+ }
+
+ /* record arguments that die in this helper */
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ arg = args[i];
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ if (dead_temps[arg]) {
+ dead_args |= (1 << i);
+ }
+ }
+ }
+ /* input arguments are live for preceeding opcodes */
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ arg = args[i];
+ dead_temps[arg] = 0;
+ }
+ s->op_dead_args[oi] = dead_args;
+ s->op_sync_args[oi] = sync_args;
+ }
+ }
+ break;
+ case INDEX_op_debug_insn_start:
+ break;
+ case INDEX_op_discard:
+ /* mark the temporary as dead */
+ dead_temps[args[0]] = 1;
+ mem_temps[args[0]] = 0;
+ break;
+
+ case INDEX_op_add2_i32:
+ opc_new = INDEX_op_add_i32;
+ goto do_addsub2;
+ case INDEX_op_sub2_i32:
+ opc_new = INDEX_op_sub_i32;
+ goto do_addsub2;
+ case INDEX_op_add2_i64:
+ opc_new = INDEX_op_add_i64;
+ goto do_addsub2;
+ case INDEX_op_sub2_i64:
+ opc_new = INDEX_op_sub_i64;
+ do_addsub2:
+ nb_iargs = 4;
+ nb_oargs = 2;
+ /* Test if the high part of the operation is dead, but not
+ the low part. The result can be optimized to a simple
+ add or sub. This happens often for x86_64 guest when the
+ cpu mode is set to 32 bit. */
+ if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+ if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ goto do_remove;
+ }
+ /* Replace the opcode and adjust the args in place,
+ leaving 3 unused args at the end. */
+ op->opc = opc = opc_new;
+ args[1] = args[2];
+ args[2] = args[4];
+ /* Fall through and mark the single-word operation live. */
+ nb_iargs = 2;
+ nb_oargs = 1;
+ }
+ goto do_not_remove;
+
+ case INDEX_op_mulu2_i32:
+ opc_new = INDEX_op_mul_i32;
+ opc_new2 = INDEX_op_muluh_i32;
+ have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
+ goto do_mul2;
+ case INDEX_op_muls2_i32:
+ opc_new = INDEX_op_mul_i32;
+ opc_new2 = INDEX_op_mulsh_i32;
+ have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
+ goto do_mul2;
+ case INDEX_op_mulu2_i64:
+ opc_new = INDEX_op_mul_i64;
+ opc_new2 = INDEX_op_muluh_i64;
+ have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
+ goto do_mul2;
+ case INDEX_op_muls2_i64:
+ opc_new = INDEX_op_mul_i64;
+ opc_new2 = INDEX_op_mulsh_i64;
+ have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
+ goto do_mul2;
+ do_mul2:
+ nb_iargs = 2;
+ nb_oargs = 2;
+ if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+ if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ /* Both parts of the operation are dead. */
+ goto do_remove;
+ }
+ /* The high part of the operation is dead; generate the low. */
+ op->opc = opc = opc_new;
+ args[1] = args[2];
+ args[2] = args[3];
+ } else if (have_opc_new2 && dead_temps[args[0]]
+ && !mem_temps[args[0]]) {
+ /* The low part of the operation is dead; generate the high. */
+ op->opc = opc = opc_new2;
+ args[0] = args[1];
+ args[1] = args[2];
+ args[2] = args[3];
+ } else {
+ goto do_not_remove;
+ }
+ /* Mark the single-word operation live. */
+ nb_oargs = 1;
+ goto do_not_remove;
+
+ default:
+ /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+ nb_iargs = def->nb_iargs;
+ nb_oargs = def->nb_oargs;
+
+ /* Test if the operation can be removed because all
+ its outputs are dead. We assume that nb_oargs == 0
+ implies side effects */
+ if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
+ for (i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ if (!dead_temps[arg] || mem_temps[arg]) {
+ goto do_not_remove;
+ }
+ }
+ do_remove:
+ tcg_op_remove(s, op);
+ } else {
+ do_not_remove:
+ /* output args are dead */
+ dead_args = 0;
+ sync_args = 0;
+ for (i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ if (dead_temps[arg]) {
+ dead_args |= (1 << i);
+ }
+ if (mem_temps[arg]) {
+ sync_args |= (1 << i);
+ }
+ dead_temps[arg] = 1;
+ mem_temps[arg] = 0;
+ }
+
+ /* if end of basic block, update */
+ if (def->flags & TCG_OPF_BB_END) {
+ tcg_la_bb_end(s, dead_temps, mem_temps);
+ } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* globals should be synced to memory */
+ memset(mem_temps, 1, s->nb_globals);
+ }
+
+ /* record arguments that die in this opcode */
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ arg = args[i];
+ if (dead_temps[arg]) {
+ dead_args |= (1 << i);
+ }
+ }
+ /* input arguments are live for preceeding opcodes */
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ arg = args[i];
+ dead_temps[arg] = 0;
+ }
+ s->op_dead_args[oi] = dead_args;
+ s->op_sync_args[oi] = sync_args;
+ }
+ break;
+ }
+ }
+}
+#else
+/* dummy liveness analysis */
+static void tcg_liveness_analysis(TCGContext *s)
+{
+ int nb_ops;
+ nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
+
+ s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
+ memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
+ s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
+ memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
+}
+#endif
+
+#ifndef NDEBUG
+static void dump_regs(TCGContext *s)
+{
+ TCGTemp *ts;
+ int i;
+ char buf[64];
+
+ for(i = 0; i < s->nb_temps; i++) {
+ ts = &s->temps[i];
+ printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
+ switch(ts->val_type) {
+ case TEMP_VAL_REG:
+ printf("%s", tcg_target_reg_names[ts->reg]);
+ break;
+ case TEMP_VAL_MEM:
+ printf("%d(%s)", (int)ts->mem_offset, tcg_target_reg_names[ts->mem_reg]);
+ break;
+ case TEMP_VAL_CONST:
+ printf("$0x%" TCG_PRIlx, ts->val);
+ break;
+ case TEMP_VAL_DEAD:
+ printf("D");
+ break;
+ default:
+ printf("???");
+ break;
+ }
+ printf("\n");
+ }
+
+ for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ if (s->reg_to_temp[i] >= 0) {
+ printf("%s: %s\n",
+ tcg_target_reg_names[i],
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i]));
+ }
+ }
+}
+
+static void check_regs(TCGContext *s)
+{
+ int reg, k;
+ TCGTemp *ts;
+ char buf[64];
+
+ for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
+ k = s->reg_to_temp[reg];
+ if (k >= 0) {
+ ts = &s->temps[k];
+ if (ts->val_type != TEMP_VAL_REG ||
+ ts->reg != reg) {
+ printf("Inconsistency for register %s:\n",
+ tcg_target_reg_names[reg]);
+ goto fail;
+ }
+ }
+ }
+ for(k = 0; k < s->nb_temps; k++) {
+ ts = &s->temps[k];
+ if (ts->val_type == TEMP_VAL_REG &&
+ !ts->fixed_reg &&
+ s->reg_to_temp[ts->reg] != k) {
+ printf("Inconsistency for temp %s:\n",
+ tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
+ fail:
+ printf("reg state:\n");
+ dump_regs(s);
+ tcg_abort();
+ }
+ }
+}
+#endif
+
+static void temp_allocate_frame(TCGContext *s, int temp)
+{
+ TCGTemp *ts;
+ ts = &s->temps[temp];
+#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
+ /* Sparc64 stack is accessed with offset of 2047 */
+ s->current_frame_offset = (s->current_frame_offset +
+ (tcg_target_long)sizeof(tcg_target_long) - 1) &
+ ~(sizeof(tcg_target_long) - 1);
+#endif
+ if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
+ s->frame_end) {
+ tcg_abort();
+ }
+ ts->mem_offset = s->current_frame_offset;
+ ts->mem_reg = s->frame_reg;
+ ts->mem_allocated = 1;
+ s->current_frame_offset += sizeof(tcg_target_long);
+}
+
+/* sync register 'reg' by saving it to the corresponding temporary */
+static inline void tcg_reg_sync(TCGContext *s, int reg)
+{
+ TCGTemp *ts;
+ int temp;
+
+ temp = s->reg_to_temp[reg];
+ ts = &s->temps[temp];
+ assert(ts->val_type == TEMP_VAL_REG);
+ if (!ts->mem_coherent && !ts->fixed_reg) {
+ if (!ts->mem_allocated) {
+ temp_allocate_frame(s, temp);
+ }
+ tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ }
+ ts->mem_coherent = 1;
+}
+
+/* free register 'reg' by spilling the corresponding temporary if necessary */
+static void tcg_reg_free(TCGContext *s, int reg)
+{
+ int temp;
+
+ temp = s->reg_to_temp[reg];
+ if (temp != -1) {
+ tcg_reg_sync(s, reg);
+ s->temps[temp].val_type = TEMP_VAL_MEM;
+ s->reg_to_temp[reg] = -1;
+ }
+}
+
+/* Allocate a register belonging to reg1 & ~reg2 */
+static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
+{
+ int i, reg;
+ TCGRegSet reg_ct;
+
+ tcg_regset_andnot(reg_ct, reg1, reg2);
+
+ /* first try free registers */
+ for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
+ reg = tcg_target_reg_alloc_order[i];
+ if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1)
+ return reg;
+ }
+
+ /* XXX: do better spill choice */
+ for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
+ reg = tcg_target_reg_alloc_order[i];
+ if (tcg_regset_test_reg(reg_ct, reg)) {
+ tcg_reg_free(s, reg);
+ return reg;
+ }
+ }
+
+ tcg_abort();
+}
+
+/* mark a temporary as dead. */
+static inline void temp_dead(TCGContext *s, int temp)
+{
+ TCGTemp *ts;
+
+ ts = &s->temps[temp];
+ if (!ts->fixed_reg) {
+ if (ts->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ts->reg] = -1;
+ }
+ if (temp < s->nb_globals || ts->temp_local) {
+ ts->val_type = TEMP_VAL_MEM;
+ } else {
+ ts->val_type = TEMP_VAL_DEAD;
+ }
+ }
+}
+
+/* sync a temporary to memory. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs)
+{
+ TCGTemp *ts;
+
+ ts = &s->temps[temp];
+ if (!ts->fixed_reg) {
+ switch(ts->val_type) {
+ case TEMP_VAL_CONST:
+ ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ allocated_regs);
+ ts->val_type = TEMP_VAL_REG;
+ s->reg_to_temp[ts->reg] = temp;
+ ts->mem_coherent = 0;
+ tcg_out_movi(s, ts->type, ts->reg, ts->val);
+ /* fallthrough*/
+ case TEMP_VAL_REG:
+ tcg_reg_sync(s, ts->reg);
+ break;
+ case TEMP_VAL_DEAD:
+ case TEMP_VAL_MEM:
+ break;
+ default:
+ tcg_abort();
+ }
+ }
+}
+
+/* save a temporary to memory. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
+{
+#ifdef USE_LIVENESS_ANALYSIS
+ /* The liveness analysis already ensures that globals are back
+ in memory. Keep an assert for safety. */
+ assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg);
+#else
+ temp_sync(s, temp, allocated_regs);
+ temp_dead(s, temp);
+#endif
+}
+
+/* save globals to their canonical location and assume they can be
+ modified be the following code. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
+{
+ int i;
+
+ for(i = 0; i < s->nb_globals; i++) {
+ temp_save(s, i, allocated_regs);
+ }
+}
+
+/* sync globals to their canonical location and assume they can be
+ read by the following code. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
+{
+ int i;
+
+ for (i = 0; i < s->nb_globals; i++) {
+#ifdef USE_LIVENESS_ANALYSIS
+ assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg ||
+ s->temps[i].mem_coherent);
+#else
+ temp_sync(s, i, allocated_regs);
+#endif
+ }
+}
+
+/* at the end of a basic block, we assume all temporaries are dead and
+ all globals are stored at their canonical location. */
+static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
+{
+ TCGTemp *ts;
+ int i;
+
+ for(i = s->nb_globals; i < s->nb_temps; i++) {
+ ts = &s->temps[i];
+ if (ts->temp_local) {
+ temp_save(s, i, allocated_regs);
+ } else {
+#ifdef USE_LIVENESS_ANALYSIS
+ /* The liveness analysis already ensures that temps are dead.
+ Keep an assert for safety. */
+ assert(ts->val_type == TEMP_VAL_DEAD);
+#else
+ temp_dead(s, i);
+#endif
+ }
+ }
+
+ save_globals(s, allocated_regs);
+}
+
+#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
+#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
+
+static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
+ uint16_t dead_args, uint8_t sync_args)
+{
+ TCGTemp *ots;
+ tcg_target_ulong val;
+
+ ots = &s->temps[args[0]];
+ val = args[1];
+
+ if (ots->fixed_reg) {
+ /* for fixed registers, we do not do any constant
+ propagation */
+ tcg_out_movi(s, ots->type, ots->reg, val);
+ } else {
+ /* The movi is not explicitly generated here */
+ if (ots->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ots->reg] = -1;
+ ots->val_type = TEMP_VAL_CONST;
+ ots->val = val;
+ }
+ if (NEED_SYNC_ARG(0)) {
+ temp_sync(s, args[0], s->reserved_regs);
+ }
+ if (IS_DEAD_ARG(0)) {
+ temp_dead(s, args[0]);
+ }
+}
+
+static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
+ const TCGArg *args, uint16_t dead_args,
+ uint8_t sync_args)
+{
+ TCGRegSet allocated_regs;
+ TCGTemp *ts, *ots;
+ TCGType otype, itype;
+
+ tcg_regset_set(allocated_regs, s->reserved_regs);
+ ots = &s->temps[args[0]];
+ ts = &s->temps[args[1]];
+
+ /* Note that otype != itype for no-op truncation. */
+ otype = ots->type;
+ itype = ts->type;
+
+ /* If the source value is not in a register, and we're going to be
+ forced to have it in a register in order to perform the copy,
+ then copy the SOURCE value into its own register first. That way
+ we don't have to reload SOURCE the next time it is used. */
+ if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
+ || ts->val_type == TEMP_VAL_MEM) {
+ ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[itype],
+ allocated_regs);
+ if (ts->val_type == TEMP_VAL_MEM) {
+ tcg_out_ld(s, itype, ts->reg, ts->mem_reg, ts->mem_offset);
+ ts->mem_coherent = 1;
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ tcg_out_movi(s, itype, ts->reg, ts->val);
+ ts->mem_coherent = 0;
+ }
+ s->reg_to_temp[ts->reg] = args[1];
+ ts->val_type = TEMP_VAL_REG;
+ }
+
+ if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
+ /* mov to a non-saved dead register makes no sense (even with
+ liveness analysis disabled). */
+ assert(NEED_SYNC_ARG(0));
+ /* The code above should have moved the temp to a register. */
+ assert(ts->val_type == TEMP_VAL_REG);
+ if (!ots->mem_allocated) {
+ temp_allocate_frame(s, args[0]);
+ }
+ tcg_out_st(s, otype, ts->reg, ots->mem_reg, ots->mem_offset);
+ if (IS_DEAD_ARG(1)) {
+ temp_dead(s, args[1]);
+ }
+ temp_dead(s, args[0]);
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ /* propagate constant */
+ if (ots->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ots->reg] = -1;
+ }
+ ots->val_type = TEMP_VAL_CONST;
+ ots->val = ts->val;
+ if (IS_DEAD_ARG(1)) {
+ temp_dead(s, args[1]);
+ }
+ } else {
+ /* The code in the first if block should have moved the
+ temp to a register. */
+ assert(ts->val_type == TEMP_VAL_REG);
+ if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
+ /* the mov can be suppressed */
+ if (ots->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ots->reg] = -1;
+ }
+ ots->reg = ts->reg;
+ temp_dead(s, args[1]);
+ } else {
+ if (ots->val_type != TEMP_VAL_REG) {
+ /* When allocating a new register, make sure to not spill the
+ input one. */
+ tcg_regset_set_reg(allocated_regs, ts->reg);
+ ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
+ allocated_regs);
+ }
+ tcg_out_mov(s, otype, ots->reg, ts->reg);
+ }
+ ots->val_type = TEMP_VAL_REG;
+ ots->mem_coherent = 0;
+ s->reg_to_temp[ots->reg] = args[0];
+ if (NEED_SYNC_ARG(0)) {
+ tcg_reg_sync(s, ots->reg);
+ }
+ }
+}
+
+static void tcg_reg_alloc_op(TCGContext *s,
+ const TCGOpDef *def, TCGOpcode opc,
+ const TCGArg *args, uint16_t dead_args,
+ uint8_t sync_args)
+{
+ TCGRegSet allocated_regs;
+ int i, k, nb_iargs, nb_oargs, reg;
+ TCGArg arg;
+ const TCGArgConstraint *arg_ct;
+ TCGTemp *ts;
+ TCGArg new_args[TCG_MAX_OP_ARGS];
+ int const_args[TCG_MAX_OP_ARGS];
+
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+
+ /* copy constants */
+ memcpy(new_args + nb_oargs + nb_iargs,
+ args + nb_oargs + nb_iargs,
+ sizeof(TCGArg) * def->nb_cargs);
+
+ /* satisfy input constraints */
+ tcg_regset_set(allocated_regs, s->reserved_regs);
+ for(k = 0; k < nb_iargs; k++) {
+ i = def->sorted_args[nb_oargs + k];
+ arg = args[i];
+ arg_ct = &def->args_ct[i];
+ ts = &s->temps[arg];
+ if (ts->val_type == TEMP_VAL_MEM) {
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ ts->mem_coherent = 1;
+ s->reg_to_temp[reg] = arg;
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ if (tcg_target_const_match(ts->val, ts->type, arg_ct)) {
+ /* constant is OK for instruction */
+ const_args[i] = 1;
+ new_args[i] = ts->val;
+ goto iarg_end;
+ } else {
+ /* need to move to a register */
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ tcg_out_movi(s, ts->type, reg, ts->val);
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = arg;
+ }
+ }
+ assert(ts->val_type == TEMP_VAL_REG);
+ if (arg_ct->ct & TCG_CT_IALIAS) {
+ if (ts->fixed_reg) {
+ /* if fixed register, we must allocate a new register
+ if the alias is not the same register */
+ if (arg != args[arg_ct->alias_index])
+ goto allocate_in_reg;
+ } else {
+ /* if the input is aliased to an output and if it is
+ not dead after the instruction, we must allocate
+ a new register and move it */
+ if (!IS_DEAD_ARG(i)) {
+ goto allocate_in_reg;
+ }
+ /* check if the current register has already been allocated
+ for another input aliased to an output */
+ int k2, i2;
+ for (k2 = 0 ; k2 < k ; k2++) {
+ i2 = def->sorted_args[nb_oargs + k2];
+ if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
+ (new_args[i2] == ts->reg)) {
+ goto allocate_in_reg;
+ }
+ }
+ }
+ }
+ reg = ts->reg;
+ if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
+ /* nothing to do : the constraint is satisfied */
+ } else {
+ allocate_in_reg:
+ /* allocate a new register matching the constraint
+ and move the temporary register into it */
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ tcg_out_mov(s, ts->type, reg, ts->reg);
+ }
+ new_args[i] = reg;
+ const_args[i] = 0;
+ tcg_regset_set_reg(allocated_regs, reg);
+ iarg_end: ;
+ }
+
+ /* mark dead temporaries and free the associated registers */
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ if (IS_DEAD_ARG(i)) {
+ temp_dead(s, args[i]);
+ }
+ }
+
+ if (def->flags & TCG_OPF_BB_END) {
+ tcg_reg_alloc_bb_end(s, allocated_regs);
+ } else {
+ if (def->flags & TCG_OPF_CALL_CLOBBER) {
+ /* XXX: permit generic clobber register list ? */
+ for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
+ if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
+ tcg_reg_free(s, reg);
+ }
+ }
+ }
+ if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* sync globals if the op has side effects and might trigger
+ an exception. */
+ sync_globals(s, allocated_regs);
+ }
+
+ /* satisfy the output constraints */
+ tcg_regset_set(allocated_regs, s->reserved_regs);
+ for(k = 0; k < nb_oargs; k++) {
+ i = def->sorted_args[k];
+ arg = args[i];
+ arg_ct = &def->args_ct[i];
+ ts = &s->temps[arg];
+ if (arg_ct->ct & TCG_CT_ALIAS) {
+ reg = new_args[arg_ct->alias_index];
+ } else {
+ /* if fixed register, we try to use it */
+ reg = ts->reg;
+ if (ts->fixed_reg &&
+ tcg_regset_test_reg(arg_ct->u.regs, reg)) {
+ goto oarg_end;
+ }
+ reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ }
+ tcg_regset_set_reg(allocated_regs, reg);
+ /* if a fixed register is used, then a move will be done afterwards */
+ if (!ts->fixed_reg) {
+ if (ts->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ts->reg] = -1;
+ }
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ /* temp value is modified, so the value kept in memory is
+ potentially not the same */
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = arg;
+ }
+ oarg_end:
+ new_args[i] = reg;
+ }
+ }
+
+ /* emit instruction */
+ tcg_out_op(s, opc, new_args, const_args);
+
+ /* move the outputs in the correct register if needed */
+ for(i = 0; i < nb_oargs; i++) {
+ ts = &s->temps[args[i]];
+ reg = new_args[i];
+ if (ts->fixed_reg && ts->reg != reg) {
+ tcg_out_mov(s, ts->type, ts->reg, reg);
+ }
+ if (NEED_SYNC_ARG(i)) {
+ tcg_reg_sync(s, reg);
+ }
+ if (IS_DEAD_ARG(i)) {
+ temp_dead(s, args[i]);
+ }
+ }
+}
+
+#ifdef TCG_TARGET_STACK_GROWSUP
+#define STACK_DIR(x) (-(x))
+#else
+#define STACK_DIR(x) (x)
+#endif
+
+static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
+ const TCGArg * const args, uint16_t dead_args,
+ uint8_t sync_args)
+{
+ int flags, nb_regs, i, reg;
+ TCGArg arg;
+ TCGTemp *ts;
+ intptr_t stack_offset;
+ size_t call_stack_size;
+ tcg_insn_unit *func_addr;
+ int allocate_args;
+ TCGRegSet allocated_regs;
+
+ func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
+ flags = args[nb_oargs + nb_iargs + 1];
+
+ nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
+ if (nb_regs > nb_iargs) {
+ nb_regs = nb_iargs;
+ }
+
+ /* assign stack slots first */
+ call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
+ call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
+ ~(TCG_TARGET_STACK_ALIGN - 1);
+ allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
+ if (allocate_args) {
+ /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
+ preallocate call stack */
+ tcg_abort();
+ }
+
+ stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
+ for(i = nb_regs; i < nb_iargs; i++) {
+ arg = args[nb_oargs + i];
+#ifdef TCG_TARGET_STACK_GROWSUP
+ stack_offset -= sizeof(tcg_target_long);
+#endif
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ ts = &s->temps[arg];
+ if (ts->val_type == TEMP_VAL_REG) {
+ tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
+ } else if (ts->val_type == TEMP_VAL_MEM) {
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ s->reserved_regs);
+ /* XXX: not correct if reading values from the stack */
+ tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ s->reserved_regs);
+ /* XXX: sign extend may be needed on some targets */
+ tcg_out_movi(s, ts->type, reg, ts->val);
+ tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
+ } else {
+ tcg_abort();
+ }
+ }
+#ifndef TCG_TARGET_STACK_GROWSUP
+ stack_offset += sizeof(tcg_target_long);
+#endif
+ }
+
+ /* assign input registers */
+ tcg_regset_set(allocated_regs, s->reserved_regs);
+ for(i = 0; i < nb_regs; i++) {
+ arg = args[nb_oargs + i];
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ ts = &s->temps[arg];
+ reg = tcg_target_call_iarg_regs[i];
+ tcg_reg_free(s, reg);
+ if (ts->val_type == TEMP_VAL_REG) {
+ if (ts->reg != reg) {
+ tcg_out_mov(s, ts->type, reg, ts->reg);
+ }
+ } else if (ts->val_type == TEMP_VAL_MEM) {
+ tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ /* XXX: sign extend ? */
+ tcg_out_movi(s, ts->type, reg, ts->val);
+ } else {
+ tcg_abort();
+ }
+ tcg_regset_set_reg(allocated_regs, reg);
+ }
+ }
+
+ /* mark dead temporaries and free the associated registers */
+ for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ if (IS_DEAD_ARG(i)) {
+ temp_dead(s, args[i]);
+ }
+ }
+
+ /* clobber call registers */
+ for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
+ if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
+ tcg_reg_free(s, reg);
+ }
+ }
+
+ /* Save globals if they might be written by the helper, sync them if
+ they might be read. */
+ if (flags & TCG_CALL_NO_READ_GLOBALS) {
+ /* Nothing to do */
+ } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
+ sync_globals(s, allocated_regs);
+ } else {
+ save_globals(s, allocated_regs);
+ }
+
+ tcg_out_call(s, func_addr);
+
+ /* assign output registers and emit moves if needed */
+ for(i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ ts = &s->temps[arg];
+ reg = tcg_target_call_oarg_regs[i];
+ assert(s->reg_to_temp[reg] == -1);
+
+ if (ts->fixed_reg) {
+ if (ts->reg != reg) {
+ tcg_out_mov(s, ts->type, ts->reg, reg);
+ }
+ } else {
+ if (ts->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ts->reg] = -1;
+ }
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = arg;
+ if (NEED_SYNC_ARG(i)) {
+ tcg_reg_sync(s, reg);
+ }
+ if (IS_DEAD_ARG(i)) {
+ temp_dead(s, args[i]);
+ }
+ }
+ }
+}
+
+#ifdef CONFIG_PROFILER
+
+static int64_t tcg_table_op_count[NB_OPS];
+
+void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
+{
+ int i;
+
+ for (i = 0; i < NB_OPS; i++) {
+ cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
+ tcg_table_op_count[i]);
+ }
+}
+#else
+void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
+{
+ cpu_fprintf(f, "[TCG profiler not compiled]\n");
+}
+#endif
+
+
+static inline int tcg_gen_code_common(TCGContext *s,
+ tcg_insn_unit *gen_code_buf,
+ long search_pc)
+{
+ int oi, oi_next;
+
+#ifdef DEBUG_DISAS
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+ qemu_log("OP:\n");
+ tcg_dump_ops(s);
+ qemu_log("\n");
+ }
+#endif
+
+#ifdef CONFIG_PROFILER
+ s->opt_time -= profile_getclock();
+#endif
+
+#ifdef USE_TCG_OPTIMIZATIONS
+ tcg_optimize(s);
+#endif
+
+#ifdef CONFIG_PROFILER
+ s->opt_time += profile_getclock();
+ s->la_time -= profile_getclock();
+#endif
+
+ tcg_liveness_analysis(s);
+
+#ifdef CONFIG_PROFILER
+ s->la_time += profile_getclock();
+#endif
+
+#ifdef DEBUG_DISAS
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
+ qemu_log("OP after optimization and liveness analysis:\n");
+ tcg_dump_ops(s);
+ qemu_log("\n");
+ }
+#endif
+
+ tcg_reg_alloc_start(s);
+
+ s->code_buf = gen_code_buf;
+ s->code_ptr = gen_code_buf;
+
+ tcg_out_tb_init(s);
+
+ for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+ TCGOp * const op = &s->gen_op_buf[oi];
+ TCGArg * const args = &s->gen_opparam_buf[op->args];
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+ uint16_t dead_args = s->op_dead_args[oi];
+ uint8_t sync_args = s->op_sync_args[oi];
+
+ oi_next = op->next;
+#ifdef CONFIG_PROFILER
+ tcg_table_op_count[opc]++;
+#endif
+
+ switch (opc) {
+ case INDEX_op_mov_i32:
+ case INDEX_op_mov_i64:
+ tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
+ break;
+ case INDEX_op_movi_i32:
+ case INDEX_op_movi_i64:
+ tcg_reg_alloc_movi(s, args, dead_args, sync_args);
+ break;
+ case INDEX_op_debug_insn_start:
+ break;
+ case INDEX_op_discard:
+ temp_dead(s, args[0]);
+ break;
+ case INDEX_op_set_label:
+ tcg_reg_alloc_bb_end(s, s->reserved_regs);
+ tcg_out_label(s, arg_label(args[0]), s->code_ptr);
+ break;
+ case INDEX_op_call:
+ tcg_reg_alloc_call(s, op->callo, op->calli, args,
+ dead_args, sync_args);
+ break;
+ default:
+ /* Sanity check that we've not introduced any unhandled opcodes. */
+ if (def->flags & TCG_OPF_NOT_PRESENT) {
+ tcg_abort();
+ }
+ /* Note: in order to speed up the code, it would be much
+ faster to have specialized register allocator functions for
+ some common argument patterns */
+ tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
+ break;
+ }
+ if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
+ return oi;
+ }
+#ifndef NDEBUG
+ check_regs(s);
+#endif
+ }
+
+ /* Generate TB finalization at the end of block */
+ tcg_out_tb_finalize(s);
+ return -1;
+}
+
+int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
+{
+#ifdef CONFIG_PROFILER
+ {
+ int n;
+
+ n = s->gen_last_op_idx + 1;
+ s->op_count += n;
+ if (n > s->op_count_max) {
+ s->op_count_max = n;
+ }
+
+ n = s->nb_temps;
+ s->temp_count += n;
+ if (n > s->temp_count_max) {
+ s->temp_count_max = n;
+ }
+ }
+#endif
+
+ tcg_gen_code_common(s, gen_code_buf, -1);
+
+ /* flush instruction cache */
+ flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+
+ return tcg_current_code_size(s);
+}
+
+/* Return the index of the micro operation such as the pc after is <
+ offset bytes from the start of the TB. The contents of gen_code_buf must
+ not be changed, though writing the same values is ok.
+ Return -1 if not found. */
+int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
+ long offset)
+{
+ return tcg_gen_code_common(s, gen_code_buf, offset);
+}
+
+#ifdef CONFIG_PROFILER
+void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
+{
+ TCGContext *s = &tcg_ctx;
+ int64_t tot;
+
+ tot = s->interm_time + s->code_time;
+ cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
+ tot, tot / 2.4e9);
+ cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
+ s->tb_count,
+ s->tb_count1 - s->tb_count,
+ s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
+ cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
+ s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
+ cpu_fprintf(f, "deleted ops/TB %0.2f\n",
+ s->tb_count ?
+ (double)s->del_op_count / s->tb_count : 0);
+ cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
+ s->tb_count ?
+ (double)s->temp_count / s->tb_count : 0,
+ s->temp_count_max);
+
+ cpu_fprintf(f, "cycles/op %0.1f\n",
+ s->op_count ? (double)tot / s->op_count : 0);
+ cpu_fprintf(f, "cycles/in byte %0.1f\n",
+ s->code_in_len ? (double)tot / s->code_in_len : 0);
+ cpu_fprintf(f, "cycles/out byte %0.1f\n",
+ s->code_out_len ? (double)tot / s->code_out_len : 0);
+ if (tot == 0)
+ tot = 1;
+ cpu_fprintf(f, " gen_interm time %0.1f%%\n",
+ (double)s->interm_time / tot * 100.0);
+ cpu_fprintf(f, " gen_code time %0.1f%%\n",
+ (double)s->code_time / tot * 100.0);
+ cpu_fprintf(f, "optim./code time %0.1f%%\n",
+ (double)s->opt_time / (s->code_time ? s->code_time : 1)
+ * 100.0);
+ cpu_fprintf(f, "liveness/code time %0.1f%%\n",
+ (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
+ cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
+ s->restore_count);
+ cpu_fprintf(f, " avg cycles %0.1f\n",
+ s->restore_count ? (double)s->restore_time / s->restore_count : 0);
+}
+#else
+void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
+{
+ cpu_fprintf(f, "[TCG profiler not compiled]\n");
+}
+#endif
+
+#ifdef ELF_HOST_MACHINE
+/* In order to use this feature, the backend needs to do three things:
+
+ (1) Define ELF_HOST_MACHINE to indicate both what value to
+ put into the ELF image and to indicate support for the feature.
+
+ (2) Define tcg_register_jit. This should create a buffer containing
+ the contents of a .debug_frame section that describes the post-
+ prologue unwind info for the tcg machine.
+
+ (3) Call tcg_register_jit_int, with the constructed .debug_frame.
+*/
+
+/* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
+typedef enum {
+ JIT_NOACTION = 0,
+ JIT_REGISTER_FN,
+ JIT_UNREGISTER_FN
+} jit_actions_t;
+
+struct jit_code_entry {
+ struct jit_code_entry *next_entry;
+ struct jit_code_entry *prev_entry;
+ const void *symfile_addr;
+ uint64_t symfile_size;
+};
+
+struct jit_descriptor {
+ uint32_t version;
+ uint32_t action_flag;
+ struct jit_code_entry *relevant_entry;
+ struct jit_code_entry *first_entry;
+};
+
+void __jit_debug_register_code(void) __attribute__((noinline));
+void __jit_debug_register_code(void)
+{
+ asm("");
+}
+
+/* Must statically initialize the version, because GDB may check
+ the version before we can set it. */
+struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+
+/* End GDB interface. */
+
+static int find_string(const char *strtab, const char *str)
+{
+ const char *p = strtab + 1;
+
+ while (1) {
+ if (strcmp(p, str) == 0) {
+ return p - strtab;
+ }
+ p += strlen(p) + 1;
+ }
+}
+
+static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
+ const void *debug_frame,
+ size_t debug_frame_size)
+{
+ struct __attribute__((packed)) DebugInfo {
+ uint32_t len;
+ uint16_t version;
+ uint32_t abbrev;
+ uint8_t ptr_size;
+ uint8_t cu_die;
+ uint16_t cu_lang;
+ uintptr_t cu_low_pc;
+ uintptr_t cu_high_pc;
+ uint8_t fn_die;
+ char fn_name[16];
+ uintptr_t fn_low_pc;
+ uintptr_t fn_high_pc;
+ uint8_t cu_eoc;
+ };
+
+ struct ElfImage {
+ ElfW(Ehdr) ehdr;
+ ElfW(Phdr) phdr;
+ ElfW(Shdr) shdr[7];
+ ElfW(Sym) sym[2];
+ struct DebugInfo di;
+ uint8_t da[24];
+ char str[80];
+ };
+
+ struct ElfImage *img;
+
+ static const struct ElfImage img_template = {
+ .ehdr = {
+ .e_ident[EI_MAG0] = ELFMAG0,
+ .e_ident[EI_MAG1] = ELFMAG1,
+ .e_ident[EI_MAG2] = ELFMAG2,
+ .e_ident[EI_MAG3] = ELFMAG3,
+ .e_ident[EI_CLASS] = ELF_CLASS,
+ .e_ident[EI_DATA] = ELF_DATA,
+ .e_ident[EI_VERSION] = EV_CURRENT,
+ .e_type = ET_EXEC,
+ .e_machine = ELF_HOST_MACHINE,
+ .e_version = EV_CURRENT,
+ .e_phoff = offsetof(struct ElfImage, phdr),
+ .e_shoff = offsetof(struct ElfImage, shdr),
+ .e_ehsize = sizeof(ElfW(Shdr)),
+ .e_phentsize = sizeof(ElfW(Phdr)),
+ .e_phnum = 1,
+ .e_shentsize = sizeof(ElfW(Shdr)),
+ .e_shnum = ARRAY_SIZE(img->shdr),
+ .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
+#ifdef ELF_HOST_FLAGS
+ .e_flags = ELF_HOST_FLAGS,
+#endif
+#ifdef ELF_OSABI
+ .e_ident[EI_OSABI] = ELF_OSABI,
+#endif
+ },
+ .phdr = {
+ .p_type = PT_LOAD,
+ .p_flags = PF_X,
+ },
+ .shdr = {
+ [0] = { .sh_type = SHT_NULL },
+ /* Trick: The contents of code_gen_buffer are not present in
+ this fake ELF file; that got allocated elsewhere. Therefore
+ we mark .text as SHT_NOBITS (similar to .bss) so that readers
+ will not look for contents. We can record any address. */
+ [1] = { /* .text */
+ .sh_type = SHT_NOBITS,
+ .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
+ },
+ [2] = { /* .debug_info */
+ .sh_type = SHT_PROGBITS,
+ .sh_offset = offsetof(struct ElfImage, di),
+ .sh_size = sizeof(struct DebugInfo),
+ },
+ [3] = { /* .debug_abbrev */
+ .sh_type = SHT_PROGBITS,
+ .sh_offset = offsetof(struct ElfImage, da),
+ .sh_size = sizeof(img->da),
+ },
+ [4] = { /* .debug_frame */
+ .sh_type = SHT_PROGBITS,
+ .sh_offset = sizeof(struct ElfImage),
+ },
+ [5] = { /* .symtab */
+ .sh_type = SHT_SYMTAB,
+ .sh_offset = offsetof(struct ElfImage, sym),
+ .sh_size = sizeof(img->sym),
+ .sh_info = 1,
+ .sh_link = ARRAY_SIZE(img->shdr) - 1,
+ .sh_entsize = sizeof(ElfW(Sym)),
+ },
+ [6] = { /* .strtab */
+ .sh_type = SHT_STRTAB,
+ .sh_offset = offsetof(struct ElfImage, str),
+ .sh_size = sizeof(img->str),
+ }
+ },
+ .sym = {
+ [1] = { /* code_gen_buffer */
+ .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
+ .st_shndx = 1,
+ }
+ },
+ .di = {
+ .len = sizeof(struct DebugInfo) - 4,
+ .version = 2,
+ .ptr_size = sizeof(void *),
+ .cu_die = 1,
+ .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
+ .fn_die = 2,
+ .fn_name = "code_gen_buffer"
+ },
+ .da = {
+ 1, /* abbrev number (the cu) */
+ 0x11, 1, /* DW_TAG_compile_unit, has children */
+ 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
+ 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
+ 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
+ 0, 0, /* end of abbrev */
+ 2, /* abbrev number (the fn) */
+ 0x2e, 0, /* DW_TAG_subprogram, no children */
+ 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
+ 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
+ 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
+ 0, 0, /* end of abbrev */
+ 0 /* no more abbrev */
+ },
+ .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
+ ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
+ };
+
+ /* We only need a single jit entry; statically allocate it. */
+ static struct jit_code_entry one_entry;
+
+ uintptr_t buf = (uintptr_t)buf_ptr;
+ size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
+ DebugFrameHeader *dfh;
+
+ img = g_malloc(img_size);
+ *img = img_template;
+
+ img->phdr.p_vaddr = buf;
+ img->phdr.p_paddr = buf;
+ img->phdr.p_memsz = buf_size;
+
+ img->shdr[1].sh_name = find_string(img->str, ".text");
+ img->shdr[1].sh_addr = buf;
+ img->shdr[1].sh_size = buf_size;
+
+ img->shdr[2].sh_name = find_string(img->str, ".debug_info");
+ img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
+
+ img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
+ img->shdr[4].sh_size = debug_frame_size;
+
+ img->shdr[5].sh_name = find_string(img->str, ".symtab");
+ img->shdr[6].sh_name = find_string(img->str, ".strtab");
+
+ img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
+ img->sym[1].st_value = buf;
+ img->sym[1].st_size = buf_size;
+
+ img->di.cu_low_pc = buf;
+ img->di.cu_high_pc = buf + buf_size;
+ img->di.fn_low_pc = buf;
+ img->di.fn_high_pc = buf + buf_size;
+
+ dfh = (DebugFrameHeader *)(img + 1);
+ memcpy(dfh, debug_frame, debug_frame_size);
+ dfh->fde.func_start = buf;
+ dfh->fde.func_len = buf_size;
+
+#ifdef DEBUG_JIT
+ /* Enable this block to be able to debug the ELF image file creation.
+ One can use readelf, objdump, or other inspection utilities. */
+ {
+ FILE *f = fopen("/tmp/qemu.jit", "w+b");
+ if (f) {
+ if (fwrite(img, img_size, 1, f) != img_size) {
+ /* Avoid stupid unused return value warning for fwrite. */
+ }
+ fclose(f);
+ }
+ }
+#endif
+
+ one_entry.symfile_addr = img;
+ one_entry.symfile_size = img_size;
+
+ __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+ __jit_debug_descriptor.relevant_entry = &one_entry;
+ __jit_debug_descriptor.first_entry = &one_entry;
+ __jit_debug_register_code();
+}
+#else
+/* No support for the feature. Provide the entry point expected by exec.c,
+ and implement the internal function we declared earlier. */
+
+static void tcg_register_jit_int(void *buf, size_t size,
+ const void *debug_frame,
+ size_t debug_frame_size)
+{
+}
+
+void tcg_register_jit(void *buf, size_t buf_size)
+{
+}
+#endif /* ELF_HOST_MACHINE */
diff --git a/tcg/tcg.h b/tcg/tcg.h
new file mode 100644
index 00000000..231a7815
--- /dev/null
+++ b/tcg/tcg.h
@@ -0,0 +1,1011 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_H
+#define TCG_H
+
+#include "qemu-common.h"
+#include "qemu/bitops.h"
+#include "tcg-target.h"
+
+#define CPU_TEMP_BUF_NLONGS 128
+
+/* Default target word size to pointer size. */
+#ifndef TCG_TARGET_REG_BITS
+# if UINTPTR_MAX == UINT32_MAX
+# define TCG_TARGET_REG_BITS 32
+# elif UINTPTR_MAX == UINT64_MAX
+# define TCG_TARGET_REG_BITS 64
+# else
+# error Unknown pointer size for tcg target
+# endif
+#endif
+
+#if TCG_TARGET_REG_BITS == 32
+typedef int32_t tcg_target_long;
+typedef uint32_t tcg_target_ulong;
+#define TCG_PRIlx PRIx32
+#define TCG_PRIld PRId32
+#elif TCG_TARGET_REG_BITS == 64
+typedef int64_t tcg_target_long;
+typedef uint64_t tcg_target_ulong;
+#define TCG_PRIlx PRIx64
+#define TCG_PRIld PRId64
+#else
+#error unsupported
+#endif
+
+#if TCG_TARGET_NB_REGS <= 32
+typedef uint32_t TCGRegSet;
+#elif TCG_TARGET_NB_REGS <= 64
+typedef uint64_t TCGRegSet;
+#else
+#error unsupported
+#endif
+
+#if TCG_TARGET_REG_BITS == 32
+/* Turn some undef macros into false macros. */
+#define TCG_TARGET_HAS_trunc_shr_i32 0
+#define TCG_TARGET_HAS_div_i64 0
+#define TCG_TARGET_HAS_rem_i64 0
+#define TCG_TARGET_HAS_div2_i64 0
+#define TCG_TARGET_HAS_rot_i64 0
+#define TCG_TARGET_HAS_ext8s_i64 0
+#define TCG_TARGET_HAS_ext16s_i64 0
+#define TCG_TARGET_HAS_ext32s_i64 0
+#define TCG_TARGET_HAS_ext8u_i64 0
+#define TCG_TARGET_HAS_ext16u_i64 0
+#define TCG_TARGET_HAS_ext32u_i64 0
+#define TCG_TARGET_HAS_bswap16_i64 0
+#define TCG_TARGET_HAS_bswap32_i64 0
+#define TCG_TARGET_HAS_bswap64_i64 0
+#define TCG_TARGET_HAS_neg_i64 0
+#define TCG_TARGET_HAS_not_i64 0
+#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
+/* Turn some undef macros into true macros. */
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#endif
+
+#ifndef TCG_TARGET_deposit_i32_valid
+#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_deposit_i64_valid
+#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
+#endif
+
+/* Only one of DIV or DIV2 should be defined. */
+#if defined(TCG_TARGET_HAS_div_i32)
+#define TCG_TARGET_HAS_div2_i32 0
+#elif defined(TCG_TARGET_HAS_div2_i32)
+#define TCG_TARGET_HAS_div_i32 0
+#define TCG_TARGET_HAS_rem_i32 0
+#endif
+#if defined(TCG_TARGET_HAS_div_i64)
+#define TCG_TARGET_HAS_div2_i64 0
+#elif defined(TCG_TARGET_HAS_div2_i64)
+#define TCG_TARGET_HAS_div_i64 0
+#define TCG_TARGET_HAS_rem_i64 0
+#endif
+
+/* For 32-bit targets, some sort of unsigned widening multiply is required. */
+#if TCG_TARGET_REG_BITS == 32 \
+ && !(defined(TCG_TARGET_HAS_mulu2_i32) \
+ || defined(TCG_TARGET_HAS_muluh_i32))
+# error "Missing unsigned widening multiply"
+#endif
+
+typedef enum TCGOpcode {
+#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
+#include "tcg-opc.h"
+#undef DEF
+ NB_OPS,
+} TCGOpcode;
+
+#define tcg_regset_clear(d) (d) = 0
+#define tcg_regset_set(d, s) (d) = (s)
+#define tcg_regset_set32(d, reg, val32) (d) |= (val32) << (reg)
+#define tcg_regset_set_reg(d, r) (d) |= 1L << (r)
+#define tcg_regset_reset_reg(d, r) (d) &= ~(1L << (r))
+#define tcg_regset_test_reg(d, r) (((d) >> (r)) & 1)
+#define tcg_regset_or(d, a, b) (d) = (a) | (b)
+#define tcg_regset_and(d, a, b) (d) = (a) & (b)
+#define tcg_regset_andnot(d, a, b) (d) = (a) & ~(b)
+#define tcg_regset_not(d, a) (d) = ~(a)
+
+#ifndef TCG_TARGET_INSN_UNIT_SIZE
+# error "Missing TCG_TARGET_INSN_UNIT_SIZE"
+#elif TCG_TARGET_INSN_UNIT_SIZE == 1
+typedef uint8_t tcg_insn_unit;
+#elif TCG_TARGET_INSN_UNIT_SIZE == 2
+typedef uint16_t tcg_insn_unit;
+#elif TCG_TARGET_INSN_UNIT_SIZE == 4
+typedef uint32_t tcg_insn_unit;
+#elif TCG_TARGET_INSN_UNIT_SIZE == 8
+typedef uint64_t tcg_insn_unit;
+#else
+/* The port better have done this. */
+#endif
+
+
+typedef struct TCGRelocation {
+ struct TCGRelocation *next;
+ int type;
+ tcg_insn_unit *ptr;
+ intptr_t addend;
+} TCGRelocation;
+
+typedef struct TCGLabel {
+ unsigned has_value : 1;
+ unsigned id : 31;
+ union {
+ uintptr_t value;
+ tcg_insn_unit *value_ptr;
+ TCGRelocation *first_reloc;
+ } u;
+} TCGLabel;
+
+typedef struct TCGPool {
+ struct TCGPool *next;
+ int size;
+ uint8_t data[0] __attribute__ ((aligned));
+} TCGPool;
+
+#define TCG_POOL_CHUNK_SIZE 32768
+
+#define TCG_MAX_TEMPS 512
+
+/* when the size of the arguments of a called function is smaller than
+ this value, they are statically allocated in the TB stack frame */
+#define TCG_STATIC_CALL_ARGS_SIZE 128
+
+typedef enum TCGType {
+ TCG_TYPE_I32,
+ TCG_TYPE_I64,
+ TCG_TYPE_COUNT, /* number of different types */
+
+ /* An alias for the size of the host register. */
+#if TCG_TARGET_REG_BITS == 32
+ TCG_TYPE_REG = TCG_TYPE_I32,
+#else
+ TCG_TYPE_REG = TCG_TYPE_I64,
+#endif
+
+ /* An alias for the size of the native pointer. */
+#if UINTPTR_MAX == UINT32_MAX
+ TCG_TYPE_PTR = TCG_TYPE_I32,
+#else
+ TCG_TYPE_PTR = TCG_TYPE_I64,
+#endif
+
+ /* An alias for the size of the target "long", aka register. */
+#if TARGET_LONG_BITS == 64
+ TCG_TYPE_TL = TCG_TYPE_I64,
+#else
+ TCG_TYPE_TL = TCG_TYPE_I32,
+#endif
+} TCGType;
+
+/* Constants for qemu_ld and qemu_st for the Memory Operation field. */
+typedef enum TCGMemOp {
+ MO_8 = 0,
+ MO_16 = 1,
+ MO_32 = 2,
+ MO_64 = 3,
+ MO_SIZE = 3, /* Mask for the above. */
+
+ MO_SIGN = 4, /* Sign-extended, otherwise zero-extended. */
+
+ MO_BSWAP = 8, /* Host reverse endian. */
+#ifdef HOST_WORDS_BIGENDIAN
+ MO_LE = MO_BSWAP,
+ MO_BE = 0,
+#else
+ MO_LE = 0,
+ MO_BE = MO_BSWAP,
+#endif
+#ifdef TARGET_WORDS_BIGENDIAN
+ MO_TE = MO_BE,
+#else
+ MO_TE = MO_LE,
+#endif
+
+ /* MO_UNALN accesses are never checked for alignment.
+ MO_ALIGN accesses will result in a call to the CPU's
+ do_unaligned_access hook if the guest address is not aligned.
+ The default depends on whether the target CPU defines ALIGNED_ONLY. */
+ MO_AMASK = 16,
+#ifdef ALIGNED_ONLY
+ MO_ALIGN = 0,
+ MO_UNALN = MO_AMASK,
+#else
+ MO_ALIGN = MO_AMASK,
+ MO_UNALN = 0,
+#endif
+
+ /* Combinations of the above, for ease of use. */
+ MO_UB = MO_8,
+ MO_UW = MO_16,
+ MO_UL = MO_32,
+ MO_SB = MO_SIGN | MO_8,
+ MO_SW = MO_SIGN | MO_16,
+ MO_SL = MO_SIGN | MO_32,
+ MO_Q = MO_64,
+
+ MO_LEUW = MO_LE | MO_UW,
+ MO_LEUL = MO_LE | MO_UL,
+ MO_LESW = MO_LE | MO_SW,
+ MO_LESL = MO_LE | MO_SL,
+ MO_LEQ = MO_LE | MO_Q,
+
+ MO_BEUW = MO_BE | MO_UW,
+ MO_BEUL = MO_BE | MO_UL,
+ MO_BESW = MO_BE | MO_SW,
+ MO_BESL = MO_BE | MO_SL,
+ MO_BEQ = MO_BE | MO_Q,
+
+ MO_TEUW = MO_TE | MO_UW,
+ MO_TEUL = MO_TE | MO_UL,
+ MO_TESW = MO_TE | MO_SW,
+ MO_TESL = MO_TE | MO_SL,
+ MO_TEQ = MO_TE | MO_Q,
+
+ MO_SSIZE = MO_SIZE | MO_SIGN,
+} TCGMemOp;
+
+typedef tcg_target_ulong TCGArg;
+
+/* Define a type and accessor macros for variables. Using pointer types
+ is nice because it gives some level of type safely. Converting to and
+ from intptr_t rather than int reduces the number of sign-extension
+ instructions that get implied on 64-bit hosts. Users of tcg_gen_* don't
+ need to know about any of this, and should treat TCGv as an opaque type.
+ In addition we do typechecking for different types of variables. TCGv_i32
+ and TCGv_i64 are 32/64-bit variables respectively. TCGv and TCGv_ptr
+ are aliases for target_ulong and host pointer sized values respectively. */
+
+typedef struct TCGv_i32_d *TCGv_i32;
+typedef struct TCGv_i64_d *TCGv_i64;
+typedef struct TCGv_ptr_d *TCGv_ptr;
+
+static inline TCGv_i32 QEMU_ARTIFICIAL MAKE_TCGV_I32(intptr_t i)
+{
+ return (TCGv_i32)i;
+}
+
+static inline TCGv_i64 QEMU_ARTIFICIAL MAKE_TCGV_I64(intptr_t i)
+{
+ return (TCGv_i64)i;
+}
+
+static inline TCGv_ptr QEMU_ARTIFICIAL MAKE_TCGV_PTR(intptr_t i)
+{
+ return (TCGv_ptr)i;
+}
+
+static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_I32(TCGv_i32 t)
+{
+ return (intptr_t)t;
+}
+
+static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_I64(TCGv_i64 t)
+{
+ return (intptr_t)t;
+}
+
+static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_PTR(TCGv_ptr t)
+{
+ return (intptr_t)t;
+}
+
+#if TCG_TARGET_REG_BITS == 32
+#define TCGV_LOW(t) MAKE_TCGV_I32(GET_TCGV_I64(t))
+#define TCGV_HIGH(t) MAKE_TCGV_I32(GET_TCGV_I64(t) + 1)
+#endif
+
+#define TCGV_EQUAL_I32(a, b) (GET_TCGV_I32(a) == GET_TCGV_I32(b))
+#define TCGV_EQUAL_I64(a, b) (GET_TCGV_I64(a) == GET_TCGV_I64(b))
+#define TCGV_EQUAL_PTR(a, b) (GET_TCGV_PTR(a) == GET_TCGV_PTR(b))
+
+/* Dummy definition to avoid compiler warnings. */
+#define TCGV_UNUSED_I32(x) x = MAKE_TCGV_I32(-1)
+#define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1)
+#define TCGV_UNUSED_PTR(x) x = MAKE_TCGV_PTR(-1)
+
+#define TCGV_IS_UNUSED_I32(x) (GET_TCGV_I32(x) == -1)
+#define TCGV_IS_UNUSED_I64(x) (GET_TCGV_I64(x) == -1)
+#define TCGV_IS_UNUSED_PTR(x) (GET_TCGV_PTR(x) == -1)
+
+/* call flags */
+/* Helper does not read globals (either directly or through an exception). It
+ implies TCG_CALL_NO_WRITE_GLOBALS. */
+#define TCG_CALL_NO_READ_GLOBALS 0x0010
+/* Helper does not write globals */
+#define TCG_CALL_NO_WRITE_GLOBALS 0x0020
+/* Helper can be safely suppressed if the return value is not used. */
+#define TCG_CALL_NO_SIDE_EFFECTS 0x0040
+
+/* convenience version of most used call flags */
+#define TCG_CALL_NO_RWG TCG_CALL_NO_READ_GLOBALS
+#define TCG_CALL_NO_WG TCG_CALL_NO_WRITE_GLOBALS
+#define TCG_CALL_NO_SE TCG_CALL_NO_SIDE_EFFECTS
+#define TCG_CALL_NO_RWG_SE (TCG_CALL_NO_RWG | TCG_CALL_NO_SE)
+#define TCG_CALL_NO_WG_SE (TCG_CALL_NO_WG | TCG_CALL_NO_SE)
+
+/* used to align parameters */
+#define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1)
+#define TCG_CALL_DUMMY_ARG ((TCGArg)(-1))
+
+/* Conditions. Note that these are laid out for easy manipulation by
+ the functions below:
+ bit 0 is used for inverting;
+ bit 1 is signed,
+ bit 2 is unsigned,
+ bit 3 is used with bit 0 for swapping signed/unsigned. */
+typedef enum {
+ /* non-signed */
+ TCG_COND_NEVER = 0 | 0 | 0 | 0,
+ TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
+ TCG_COND_EQ = 8 | 0 | 0 | 0,
+ TCG_COND_NE = 8 | 0 | 0 | 1,
+ /* signed */
+ TCG_COND_LT = 0 | 0 | 2 | 0,
+ TCG_COND_GE = 0 | 0 | 2 | 1,
+ TCG_COND_LE = 8 | 0 | 2 | 0,
+ TCG_COND_GT = 8 | 0 | 2 | 1,
+ /* unsigned */
+ TCG_COND_LTU = 0 | 4 | 0 | 0,
+ TCG_COND_GEU = 0 | 4 | 0 | 1,
+ TCG_COND_LEU = 8 | 4 | 0 | 0,
+ TCG_COND_GTU = 8 | 4 | 0 | 1,
+} TCGCond;
+
+/* Invert the sense of the comparison. */
+static inline TCGCond tcg_invert_cond(TCGCond c)
+{
+ return (TCGCond)(c ^ 1);
+}
+
+/* Swap the operands in a comparison. */
+static inline TCGCond tcg_swap_cond(TCGCond c)
+{
+ return c & 6 ? (TCGCond)(c ^ 9) : c;
+}
+
+/* Create an "unsigned" version of a "signed" comparison. */
+static inline TCGCond tcg_unsigned_cond(TCGCond c)
+{
+ return c & 2 ? (TCGCond)(c ^ 6) : c;
+}
+
+/* Must a comparison be considered unsigned? */
+static inline bool is_unsigned_cond(TCGCond c)
+{
+ return (c & 4) != 0;
+}
+
+/* Create a "high" version of a double-word comparison.
+ This removes equality from a LTE or GTE comparison. */
+static inline TCGCond tcg_high_cond(TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_GE:
+ case TCG_COND_LE:
+ case TCG_COND_GEU:
+ case TCG_COND_LEU:
+ return (TCGCond)(c ^ 8);
+ default:
+ return c;
+ }
+}
+
+typedef enum TCGTempVal {
+ TEMP_VAL_DEAD,
+ TEMP_VAL_REG,
+ TEMP_VAL_MEM,
+ TEMP_VAL_CONST,
+} TCGTempVal;
+
+typedef struct TCGTemp {
+ unsigned int reg:8;
+ unsigned int mem_reg:8;
+ TCGTempVal val_type:8;
+ TCGType base_type:8;
+ TCGType type:8;
+ unsigned int fixed_reg:1;
+ unsigned int mem_coherent:1;
+ unsigned int mem_allocated:1;
+ unsigned int temp_local:1; /* If true, the temp is saved across
+ basic blocks. Otherwise, it is not
+ preserved across basic blocks. */
+ unsigned int temp_allocated:1; /* never used for code gen */
+
+ tcg_target_long val;
+ intptr_t mem_offset;
+ const char *name;
+} TCGTemp;
+
+typedef struct TCGContext TCGContext;
+
+typedef struct TCGTempSet {
+ unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
+} TCGTempSet;
+
+typedef struct TCGOp {
+ TCGOpcode opc : 8;
+
+ /* The number of out and in parameter for a call. */
+ unsigned callo : 2;
+ unsigned calli : 6;
+
+ /* Index of the arguments for this op, or -1 for zero-operand ops. */
+ signed args : 16;
+
+ /* Index of the prex/next op, or -1 for the end of the list. */
+ signed prev : 16;
+ signed next : 16;
+} TCGOp;
+
+QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
+QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
+QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
+
+struct TCGContext {
+ uint8_t *pool_cur, *pool_end;
+ TCGPool *pool_first, *pool_current, *pool_first_large;
+ int nb_labels;
+ int nb_globals;
+ int nb_temps;
+
+ /* goto_tb support */
+ tcg_insn_unit *code_buf;
+ uintptr_t *tb_next;
+ uint16_t *tb_next_offset;
+ uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
+
+ /* liveness analysis */
+ uint16_t *op_dead_args; /* for each operation, each bit tells if the
+ corresponding argument is dead */
+ uint8_t *op_sync_args; /* for each operation, each bit tells if the
+ corresponding output argument needs to be
+ sync to memory. */
+
+ TCGRegSet reserved_regs;
+ intptr_t current_frame_offset;
+ intptr_t frame_start;
+ intptr_t frame_end;
+ int frame_reg;
+
+ tcg_insn_unit *code_ptr;
+
+ GHashTable *helpers;
+
+#ifdef CONFIG_PROFILER
+ /* profiling info */
+ int64_t tb_count1;
+ int64_t tb_count;
+ int64_t op_count; /* total insn count */
+ int op_count_max; /* max insn per TB */
+ int64_t temp_count;
+ int temp_count_max;
+ int64_t del_op_count;
+ int64_t code_in_len;
+ int64_t code_out_len;
+ int64_t interm_time;
+ int64_t code_time;
+ int64_t la_time;
+ int64_t opt_time;
+ int64_t restore_count;
+ int64_t restore_time;
+#endif
+
+#ifdef CONFIG_DEBUG_TCG
+ int temps_in_use;
+ int goto_tb_issue_mask;
+#endif
+
+ int gen_first_op_idx;
+ int gen_last_op_idx;
+ int gen_next_op_idx;
+ int gen_next_parm_idx;
+
+ /* Code generation. Note that we specifically do not use tcg_insn_unit
+ here, because there's too much arithmetic throughout that relies
+ on addition and subtraction working on bytes. Rely on the GCC
+ extension that allows arithmetic on void*. */
+ int code_gen_max_blocks;
+ void *code_gen_prologue;
+ void *code_gen_buffer;
+ size_t code_gen_buffer_size;
+ /* threshold to flush the translated code buffer */
+ size_t code_gen_buffer_max_size;
+ void *code_gen_ptr;
+
+ TBContext tb_ctx;
+
+ /* The TCGBackendData structure is private to tcg-target.c. */
+ struct TCGBackendData *be;
+
+ TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
+ TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
+
+ /* tells in which temporary a given register is. It does not take
+ into account fixed registers */
+ int reg_to_temp[TCG_TARGET_NB_REGS];
+
+ TCGOp gen_op_buf[OPC_BUF_SIZE];
+ TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
+
+ target_ulong gen_opc_pc[OPC_BUF_SIZE];
+ uint16_t gen_opc_icount[OPC_BUF_SIZE];
+ uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+};
+
+extern TCGContext tcg_ctx;
+
+/* The number of opcodes emitted so far. */
+static inline int tcg_op_buf_count(void)
+{
+ return tcg_ctx.gen_next_op_idx;
+}
+
+/* Test for whether to terminate the TB for using too many opcodes. */
+static inline bool tcg_op_buf_full(void)
+{
+ return tcg_op_buf_count() >= OPC_MAX_SIZE;
+}
+
+/* pool based memory allocation */
+
+void *tcg_malloc_internal(TCGContext *s, int size);
+void tcg_pool_reset(TCGContext *s);
+void tcg_pool_delete(TCGContext *s);
+
+static inline void *tcg_malloc(int size)
+{
+ TCGContext *s = &tcg_ctx;
+ uint8_t *ptr, *ptr_end;
+ size = (size + sizeof(long) - 1) & ~(sizeof(long) - 1);
+ ptr = s->pool_cur;
+ ptr_end = ptr + size;
+ if (unlikely(ptr_end > s->pool_end)) {
+ return tcg_malloc_internal(&tcg_ctx, size);
+ } else {
+ s->pool_cur = ptr_end;
+ return ptr;
+ }
+}
+
+void tcg_context_init(TCGContext *s);
+void tcg_prologue_init(TCGContext *s);
+void tcg_func_start(TCGContext *s);
+
+int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf);
+int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
+ long offset);
+
+void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size);
+
+TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name);
+TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name);
+TCGv_i32 tcg_temp_new_internal_i32(int temp_local);
+static inline TCGv_i32 tcg_temp_new_i32(void)
+{
+ return tcg_temp_new_internal_i32(0);
+}
+static inline TCGv_i32 tcg_temp_local_new_i32(void)
+{
+ return tcg_temp_new_internal_i32(1);
+}
+void tcg_temp_free_i32(TCGv_i32 arg);
+char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg);
+
+TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name);
+TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name);
+TCGv_i64 tcg_temp_new_internal_i64(int temp_local);
+static inline TCGv_i64 tcg_temp_new_i64(void)
+{
+ return tcg_temp_new_internal_i64(0);
+}
+static inline TCGv_i64 tcg_temp_local_new_i64(void)
+{
+ return tcg_temp_new_internal_i64(1);
+}
+void tcg_temp_free_i64(TCGv_i64 arg);
+char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
+
+#if defined(CONFIG_DEBUG_TCG)
+/* If you call tcg_clear_temp_count() at the start of a section of
+ * code which is not supposed to leak any TCG temporaries, then
+ * calling tcg_check_temp_count() at the end of the section will
+ * return 1 if the section did in fact leak a temporary.
+ */
+void tcg_clear_temp_count(void);
+int tcg_check_temp_count(void);
+#else
+#define tcg_clear_temp_count() do { } while (0)
+#define tcg_check_temp_count() 0
+#endif
+
+void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf);
+void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf);
+
+#define TCG_CT_ALIAS 0x80
+#define TCG_CT_IALIAS 0x40
+#define TCG_CT_REG 0x01
+#define TCG_CT_CONST 0x02 /* any constant of register size */
+
+typedef struct TCGArgConstraint {
+ uint16_t ct;
+ uint8_t alias_index;
+ union {
+ TCGRegSet regs;
+ } u;
+} TCGArgConstraint;
+
+#define TCG_MAX_OP_ARGS 16
+
+/* Bits for TCGOpDef->flags, 8 bits available. */
+enum {
+ /* Instruction defines the end of a basic block. */
+ TCG_OPF_BB_END = 0x01,
+ /* Instruction clobbers call registers and potentially update globals. */
+ TCG_OPF_CALL_CLOBBER = 0x02,
+ /* Instruction has side effects: it cannot be removed if its outputs
+ are not used, and might trigger exceptions. */
+ TCG_OPF_SIDE_EFFECTS = 0x04,
+ /* Instruction operands are 64-bits (otherwise 32-bits). */
+ TCG_OPF_64BIT = 0x08,
+ /* Instruction is optional and not implemented by the host, or insn
+ is generic and should not be implemened by the host. */
+ TCG_OPF_NOT_PRESENT = 0x10,
+};
+
+typedef struct TCGOpDef {
+ const char *name;
+ uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
+ uint8_t flags;
+ TCGArgConstraint *args_ct;
+ int *sorted_args;
+#if defined(CONFIG_DEBUG_TCG)
+ int used;
+#endif
+} TCGOpDef;
+
+extern TCGOpDef tcg_op_defs[];
+extern const size_t tcg_op_defs_max;
+
+typedef struct TCGTargetOpDef {
+ TCGOpcode op;
+ const char *args_ct_str[TCG_MAX_OP_ARGS];
+} TCGTargetOpDef;
+
+#define tcg_abort() \
+do {\
+ fprintf(stderr, "%s:%d: tcg fatal error\n", __FILE__, __LINE__);\
+ abort();\
+} while (0)
+
+#ifdef CONFIG_DEBUG_TCG
+# define tcg_debug_assert(X) do { assert(X); } while (0)
+#elif QEMU_GNUC_PREREQ(4, 5)
+# define tcg_debug_assert(X) \
+ do { if (!(X)) { __builtin_unreachable(); } } while (0)
+#else
+# define tcg_debug_assert(X) do { (void)(X); } while (0)
+#endif
+
+void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
+
+#if UINTPTR_MAX == UINT32_MAX
+#define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I32(n))
+#define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I32(GET_TCGV_PTR(n))
+
+#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i32((intptr_t)(V)))
+#define tcg_global_reg_new_ptr(R, N) \
+ TCGV_NAT_TO_PTR(tcg_global_reg_new_i32((R), (N)))
+#define tcg_global_mem_new_ptr(R, O, N) \
+ TCGV_NAT_TO_PTR(tcg_global_mem_new_i32((R), (O), (N)))
+#define tcg_temp_new_ptr() TCGV_NAT_TO_PTR(tcg_temp_new_i32())
+#define tcg_temp_free_ptr(T) tcg_temp_free_i32(TCGV_PTR_TO_NAT(T))
+#else
+#define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I64(n))
+#define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I64(GET_TCGV_PTR(n))
+
+#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i64((intptr_t)(V)))
+#define tcg_global_reg_new_ptr(R, N) \
+ TCGV_NAT_TO_PTR(tcg_global_reg_new_i64((R), (N)))
+#define tcg_global_mem_new_ptr(R, O, N) \
+ TCGV_NAT_TO_PTR(tcg_global_mem_new_i64((R), (O), (N)))
+#define tcg_temp_new_ptr() TCGV_NAT_TO_PTR(tcg_temp_new_i64())
+#define tcg_temp_free_ptr(T) tcg_temp_free_i64(TCGV_PTR_TO_NAT(T))
+#endif
+
+void tcg_gen_callN(TCGContext *s, void *func,
+ TCGArg ret, int nargs, TCGArg *args);
+
+void tcg_op_remove(TCGContext *s, TCGOp *op);
+void tcg_optimize(TCGContext *s);
+
+/* only used for debugging purposes */
+void tcg_dump_ops(TCGContext *s);
+
+void dump_ops(const uint16_t *opc_buf, const TCGArg *opparam_buf);
+TCGv_i32 tcg_const_i32(int32_t val);
+TCGv_i64 tcg_const_i64(int64_t val);
+TCGv_i32 tcg_const_local_i32(int32_t val);
+TCGv_i64 tcg_const_local_i64(int64_t val);
+
+TCGLabel *gen_new_label(void);
+
+/**
+ * label_arg
+ * @l: label
+ *
+ * Encode a label for storage in the TCG opcode stream.
+ */
+
+static inline TCGArg label_arg(TCGLabel *l)
+{
+ return (uintptr_t)l;
+}
+
+/**
+ * arg_label
+ * @i: value
+ *
+ * The opposite of label_arg. Retrieve a label from the
+ * encoding of the TCG opcode stream.
+ */
+
+static inline TCGLabel *arg_label(TCGArg i)
+{
+ return (TCGLabel *)(uintptr_t)i;
+}
+
+/**
+ * tcg_ptr_byte_diff
+ * @a, @b: addresses to be differenced
+ *
+ * There are many places within the TCG backends where we need a byte
+ * difference between two pointers. While this can be accomplished
+ * with local casting, it's easy to get wrong -- especially if one is
+ * concerned with the signedness of the result.
+ *
+ * This version relies on GCC's void pointer arithmetic to get the
+ * correct result.
+ */
+
+static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b)
+{
+ return a - b;
+}
+
+/**
+ * tcg_pcrel_diff
+ * @s: the tcg context
+ * @target: address of the target
+ *
+ * Produce a pc-relative difference, from the current code_ptr
+ * to the destination address.
+ */
+
+static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, void *target)
+{
+ return tcg_ptr_byte_diff(target, s->code_ptr);
+}
+
+/**
+ * tcg_current_code_size
+ * @s: the tcg context
+ *
+ * Compute the current code size within the translation block.
+ * This is used to fill in qemu's data structures for goto_tb.
+ */
+
+static inline size_t tcg_current_code_size(TCGContext *s)
+{
+ return tcg_ptr_byte_diff(s->code_ptr, s->code_buf);
+}
+
+/* Combine the TCGMemOp and mmu_idx parameters into a single value. */
+typedef uint32_t TCGMemOpIdx;
+
+/**
+ * make_memop_idx
+ * @op: memory operation
+ * @idx: mmu index
+ *
+ * Encode these values into a single parameter.
+ */
+static inline TCGMemOpIdx make_memop_idx(TCGMemOp op, unsigned idx)
+{
+ tcg_debug_assert(idx <= 15);
+ return (op << 4) | idx;
+}
+
+/**
+ * get_memop
+ * @oi: combined op/idx parameter
+ *
+ * Extract the memory operation from the combined value.
+ */
+static inline TCGMemOp get_memop(TCGMemOpIdx oi)
+{
+ return oi >> 4;
+}
+
+/**
+ * get_mmuidx
+ * @oi: combined op/idx parameter
+ *
+ * Extract the mmu index from the combined value.
+ */
+static inline unsigned get_mmuidx(TCGMemOpIdx oi)
+{
+ return oi & 15;
+}
+
+/**
+ * tcg_qemu_tb_exec:
+ * @env: CPUArchState * for the CPU
+ * @tb_ptr: address of generated code for the TB to execute
+ *
+ * Start executing code from a given translation block.
+ * Where translation blocks have been linked, execution
+ * may proceed from the given TB into successive ones.
+ * Control eventually returns only when some action is needed
+ * from the top-level loop: either control must pass to a TB
+ * which has not yet been directly linked, or an asynchronous
+ * event such as an interrupt needs handling.
+ *
+ * The return value is a pointer to the next TB to execute
+ * (if known; otherwise zero). This pointer is assumed to be
+ * 4-aligned, and the bottom two bits are used to return further
+ * information:
+ * 0, 1: the link between this TB and the next is via the specified
+ * TB index (0 or 1). That is, we left the TB via (the equivalent
+ * of) "goto_tb <index>". The main loop uses this to determine
+ * how to link the TB just executed to the next.
+ * 2: we are using instruction counting code generation, and we
+ * did not start executing this TB because the instruction counter
+ * would hit zero midway through it. In this case the next-TB pointer
+ * returned is the TB we were about to execute, and the caller must
+ * arrange to execute the remaining count of instructions.
+ * 3: we stopped because the CPU's exit_request flag was set
+ * (usually meaning that there is an interrupt that needs to be
+ * handled). The next-TB pointer returned is the TB we were
+ * about to execute when we noticed the pending exit request.
+ *
+ * If the bottom two bits indicate an exit-via-index then the CPU
+ * state is correctly synchronised and ready for execution of the next
+ * TB (and in particular the guest PC is the address to execute next).
+ * Otherwise, we gave up on execution of this TB before it started, and
+ * the caller must fix up the CPU state by calling the CPU's
+ * synchronize_from_tb() method with the next-TB pointer we return (falling
+ * back to calling the CPU's set_pc method with tb->pb if no
+ * synchronize_from_tb() method exists).
+ *
+ * Note that TCG targets may use a different definition of tcg_qemu_tb_exec
+ * to this default (which just calls the prologue.code emitted by
+ * tcg_target_qemu_prologue()).
+ */
+#define TB_EXIT_MASK 3
+#define TB_EXIT_IDX0 0
+#define TB_EXIT_IDX1 1
+#define TB_EXIT_ICOUNT_EXPIRED 2
+#define TB_EXIT_REQUESTED 3
+
+#ifdef HAVE_TCG_QEMU_TB_EXEC
+uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
+#else
+# define tcg_qemu_tb_exec(env, tb_ptr) \
+ ((uintptr_t (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, tb_ptr)
+#endif
+
+void tcg_register_jit(void *buf, size_t buf_size);
+
+/*
+ * Memory helpers that will be used by TCG generated code.
+ */
+#ifdef CONFIG_SOFTMMU
+/* Value zero-extended to tcg register size. */
+tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+
+/* Value sign-extended to tcg register size. */
+tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+
+void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+
+/* Temporary aliases until backends are converted. */
+#ifdef TARGET_WORDS_BIGENDIAN
+# define helper_ret_ldsw_mmu helper_be_ldsw_mmu
+# define helper_ret_lduw_mmu helper_be_lduw_mmu
+# define helper_ret_ldsl_mmu helper_be_ldsl_mmu
+# define helper_ret_ldul_mmu helper_be_ldul_mmu
+# define helper_ret_ldq_mmu helper_be_ldq_mmu
+# define helper_ret_stw_mmu helper_be_stw_mmu
+# define helper_ret_stl_mmu helper_be_stl_mmu
+# define helper_ret_stq_mmu helper_be_stq_mmu
+#else
+# define helper_ret_ldsw_mmu helper_le_ldsw_mmu
+# define helper_ret_lduw_mmu helper_le_lduw_mmu
+# define helper_ret_ldsl_mmu helper_le_ldsl_mmu
+# define helper_ret_ldul_mmu helper_le_ldul_mmu
+# define helper_ret_ldq_mmu helper_le_ldq_mmu
+# define helper_ret_stw_mmu helper_le_stw_mmu
+# define helper_ret_stl_mmu helper_le_stl_mmu
+# define helper_ret_stq_mmu helper_le_stq_mmu
+#endif
+
+#endif /* CONFIG_SOFTMMU */
+
+#endif /* TCG_H */
diff --git a/tcg/tci/README b/tcg/tci/README
new file mode 100644
index 00000000..dc57f076
--- /dev/null
+++ b/tcg/tci/README
@@ -0,0 +1,130 @@
+TCG Interpreter (TCI) - Copyright (c) 2011 Stefan Weil.
+
+This file is released under the BSD license.
+
+1) Introduction
+
+TCG (Tiny Code Generator) is a code generator which translates
+code fragments ("basic blocks") from target code (any of the
+targets supported by QEMU) to a code representation which
+can be run on a host.
+
+QEMU can create native code for some hosts (arm, hppa, i386, ia64, ppc, ppc64,
+s390, sparc, x86_64). For others, unofficial host support was written.
+
+By adding a code generator for a virtual machine and using an
+interpreter for the generated bytecode, it is possible to
+support (almost) any host.
+
+This is what TCI (Tiny Code Interpreter) does.
+
+2) Implementation
+
+Like each TCG host frontend, TCI implements the code generator in
+tcg-target.c, tcg-target.h. Both files are in directory tcg/tci.
+
+The additional file tcg/tci.c adds the interpreter.
+
+The bytecode consists of opcodes (same numeric values as those used by
+TCG), command length and arguments of variable size and number.
+
+3) Usage
+
+For hosts without native TCG, the interpreter TCI must be enabled by
+
+ configure --enable-tcg-interpreter
+
+If configure is called without --enable-tcg-interpreter, it will
+suggest using this option. Setting it automatically would need
+additional code in configure which must be fixed when new native TCG
+implementations are added.
+
+System emulation should work on any 32 or 64 bit host.
+User mode emulation might work. Maybe a new linker script (*.ld)
+is needed. Byte order might be wrong (on big endian hosts)
+and need fixes in configure.
+
+For hosts with native TCG, the interpreter TCI can be enabled by
+
+ configure --enable-tcg-interpreter
+
+The only difference from running QEMU with TCI to running without TCI
+should be speed. Especially during development of TCI, it was very
+useful to compare runs with and without TCI. Create /tmp/qemu.log by
+
+ qemu-system-i386 -d in_asm,op_opt,cpu -D /tmp/qemu.log -singlestep
+
+once with interpreter and once without interpreter and compare the resulting
+qemu.log files. This is also useful to see the effects of additional
+registers or additional opcodes (it is easy to modify the virtual machine).
+It can also be used to verify native TCGs.
+
+Hosts with native TCG can also enable TCI by claiming to be unsupported:
+
+ configure --cpu=unknown --enable-tcg-interpreter
+
+configure then no longer uses the native linker script (*.ld) for
+user mode emulation.
+
+
+4) Status
+
+TCI needs special implementation for 32 and 64 bit host, 32 and 64 bit target,
+host and target with same or different endianness.
+
+ | host (le) host (be)
+ | 32 64 32 64
+------------+------------------------------------------------------------
+target (le) | s0, u0 s1, u1 s?, u? s?, u?
+32 bit |
+ |
+target (le) | sc, uc s1, u1 s?, u? s?, u?
+64 bit |
+ |
+target (be) | sc, u0 sc, uc s?, u? s?, u?
+32 bit |
+ |
+target (be) | sc, uc sc, uc s?, u? s?, u?
+64 bit |
+ |
+
+System emulation
+s? = untested
+sc = compiles
+s0 = bios works
+s1 = grub works
+s2 = Linux boots
+
+Linux user mode emulation
+u? = untested
+uc = compiles
+u0 = static hello works
+u1 = linux-user-test works
+
+5) Todo list
+
+* TCI is not widely tested. It was written and tested on a x86_64 host
+ running i386 and x86_64 system emulation and Linux user mode.
+ A cross compiled QEMU for i386 host also works with the same basic tests.
+ A cross compiled QEMU for mipsel host works, too. It is terribly slow
+ because I run it in a mips malta emulation, so it is an interpreted
+ emulation in an emulation.
+ A cross compiled QEMU for arm host works (tested with pc bios).
+ A cross compiled QEMU for ppc host works at least partially:
+ i386-linux-user/qemu-i386 can run a simple hello-world program
+ (tested in a ppc emulation).
+
+* Some TCG opcodes are either missing in the code generator and/or
+ in the interpreter. These opcodes raise a runtime exception, so it is
+ possible to see where code must be added.
+
+* The pseudo code is not optimized and still ugly. For hosts with special
+ alignment requirements, it needs some fixes (maybe aligned bytecode
+ would also improve speed for hosts which support byte alignment).
+
+* A better disassembler for the pseudo code would be nice (a very primitive
+ disassembler is included in tcg-target.c).
+
+* It might be useful to have a runtime option which selects the native TCG
+ or TCI, so QEMU would have to include two TCGs. Today, selecting TCI
+ is a configure option, so you need two compilations of QEMU.
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
new file mode 100644
index 00000000..83472dbc
--- /dev/null
+++ b/tcg/tci/tcg-target.c
@@ -0,0 +1,875 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009, 2011 Stefan Weil
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg-be-null.h"
+
+/* TODO list:
+ * - See TODO comments in code.
+ */
+
+/* Marker for missing code. */
+#define TODO() \
+ do { \
+ fprintf(stderr, "TODO %s:%u: %s()\n", \
+ __FILE__, __LINE__, __func__); \
+ tcg_abort(); \
+ } while (0)
+
+/* Bitfield n...m (in 32 bit value). */
+#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
+
+/* Macros used in tcg_target_op_defs. */
+#define R "r"
+#define RI "ri"
+#if TCG_TARGET_REG_BITS == 32
+# define R64 "r", "r"
+#else
+# define R64 "r"
+#endif
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+# define L "L", "L"
+# define S "S", "S"
+#else
+# define L "L"
+# define S "S"
+#endif
+
+/* TODO: documentation. */
+static const TCGTargetOpDef tcg_target_op_defs[] = {
+ { INDEX_op_exit_tb, { NULL } },
+ { INDEX_op_goto_tb, { NULL } },
+ { INDEX_op_br, { NULL } },
+
+ { INDEX_op_ld8u_i32, { R, R } },
+ { INDEX_op_ld8s_i32, { R, R } },
+ { INDEX_op_ld16u_i32, { R, R } },
+ { INDEX_op_ld16s_i32, { R, R } },
+ { INDEX_op_ld_i32, { R, R } },
+ { INDEX_op_st8_i32, { R, R } },
+ { INDEX_op_st16_i32, { R, R } },
+ { INDEX_op_st_i32, { R, R } },
+
+ { INDEX_op_add_i32, { R, RI, RI } },
+ { INDEX_op_sub_i32, { R, RI, RI } },
+ { INDEX_op_mul_i32, { R, RI, RI } },
+#if TCG_TARGET_HAS_div_i32
+ { INDEX_op_div_i32, { R, R, R } },
+ { INDEX_op_divu_i32, { R, R, R } },
+ { INDEX_op_rem_i32, { R, R, R } },
+ { INDEX_op_remu_i32, { R, R, R } },
+#elif TCG_TARGET_HAS_div2_i32
+ { INDEX_op_div2_i32, { R, R, "0", "1", R } },
+ { INDEX_op_divu2_i32, { R, R, "0", "1", R } },
+#endif
+ /* TODO: Does R, RI, RI result in faster code than R, R, RI?
+ If both operands are constants, we can optimize. */
+ { INDEX_op_and_i32, { R, RI, RI } },
+#if TCG_TARGET_HAS_andc_i32
+ { INDEX_op_andc_i32, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_eqv_i32
+ { INDEX_op_eqv_i32, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_nand_i32
+ { INDEX_op_nand_i32, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_nor_i32
+ { INDEX_op_nor_i32, { R, RI, RI } },
+#endif
+ { INDEX_op_or_i32, { R, RI, RI } },
+#if TCG_TARGET_HAS_orc_i32
+ { INDEX_op_orc_i32, { R, RI, RI } },
+#endif
+ { INDEX_op_xor_i32, { R, RI, RI } },
+ { INDEX_op_shl_i32, { R, RI, RI } },
+ { INDEX_op_shr_i32, { R, RI, RI } },
+ { INDEX_op_sar_i32, { R, RI, RI } },
+#if TCG_TARGET_HAS_rot_i32
+ { INDEX_op_rotl_i32, { R, RI, RI } },
+ { INDEX_op_rotr_i32, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_deposit_i32
+ { INDEX_op_deposit_i32, { R, "0", R } },
+#endif
+
+ { INDEX_op_brcond_i32, { R, RI } },
+
+ { INDEX_op_setcond_i32, { R, R, RI } },
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_setcond_i64, { R, R, RI } },
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+#if TCG_TARGET_REG_BITS == 32
+ /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
+ { INDEX_op_add2_i32, { R, R, R, R, R, R } },
+ { INDEX_op_sub2_i32, { R, R, R, R, R, R } },
+ { INDEX_op_brcond2_i32, { R, R, RI, RI } },
+ { INDEX_op_mulu2_i32, { R, R, R, R } },
+ { INDEX_op_setcond2_i32, { R, R, R, RI, RI } },
+#endif
+
+#if TCG_TARGET_HAS_not_i32
+ { INDEX_op_not_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_neg_i32
+ { INDEX_op_neg_i32, { R, R } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_ld8u_i64, { R, R } },
+ { INDEX_op_ld8s_i64, { R, R } },
+ { INDEX_op_ld16u_i64, { R, R } },
+ { INDEX_op_ld16s_i64, { R, R } },
+ { INDEX_op_ld32u_i64, { R, R } },
+ { INDEX_op_ld32s_i64, { R, R } },
+ { INDEX_op_ld_i64, { R, R } },
+
+ { INDEX_op_st8_i64, { R, R } },
+ { INDEX_op_st16_i64, { R, R } },
+ { INDEX_op_st32_i64, { R, R } },
+ { INDEX_op_st_i64, { R, R } },
+
+ { INDEX_op_add_i64, { R, RI, RI } },
+ { INDEX_op_sub_i64, { R, RI, RI } },
+ { INDEX_op_mul_i64, { R, RI, RI } },
+#if TCG_TARGET_HAS_div_i64
+ { INDEX_op_div_i64, { R, R, R } },
+ { INDEX_op_divu_i64, { R, R, R } },
+ { INDEX_op_rem_i64, { R, R, R } },
+ { INDEX_op_remu_i64, { R, R, R } },
+#elif TCG_TARGET_HAS_div2_i64
+ { INDEX_op_div2_i64, { R, R, "0", "1", R } },
+ { INDEX_op_divu2_i64, { R, R, "0", "1", R } },
+#endif
+ { INDEX_op_and_i64, { R, RI, RI } },
+#if TCG_TARGET_HAS_andc_i64
+ { INDEX_op_andc_i64, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_eqv_i64
+ { INDEX_op_eqv_i64, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_nand_i64
+ { INDEX_op_nand_i64, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_nor_i64
+ { INDEX_op_nor_i64, { R, RI, RI } },
+#endif
+ { INDEX_op_or_i64, { R, RI, RI } },
+#if TCG_TARGET_HAS_orc_i64
+ { INDEX_op_orc_i64, { R, RI, RI } },
+#endif
+ { INDEX_op_xor_i64, { R, RI, RI } },
+ { INDEX_op_shl_i64, { R, RI, RI } },
+ { INDEX_op_shr_i64, { R, RI, RI } },
+ { INDEX_op_sar_i64, { R, RI, RI } },
+#if TCG_TARGET_HAS_rot_i64
+ { INDEX_op_rotl_i64, { R, RI, RI } },
+ { INDEX_op_rotr_i64, { R, RI, RI } },
+#endif
+#if TCG_TARGET_HAS_deposit_i64
+ { INDEX_op_deposit_i64, { R, "0", R } },
+#endif
+ { INDEX_op_brcond_i64, { R, RI } },
+
+#if TCG_TARGET_HAS_ext8s_i64
+ { INDEX_op_ext8s_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext16s_i64
+ { INDEX_op_ext16s_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext32s_i64
+ { INDEX_op_ext32s_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext8u_i64
+ { INDEX_op_ext8u_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext16u_i64
+ { INDEX_op_ext16u_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext32u_i64
+ { INDEX_op_ext32u_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_bswap16_i64
+ { INDEX_op_bswap16_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_bswap32_i64
+ { INDEX_op_bswap32_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_bswap64_i64
+ { INDEX_op_bswap64_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_not_i64
+ { INDEX_op_not_i64, { R, R } },
+#endif
+#if TCG_TARGET_HAS_neg_i64
+ { INDEX_op_neg_i64, { R, R } },
+#endif
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+ { INDEX_op_qemu_ld_i32, { R, L } },
+ { INDEX_op_qemu_ld_i64, { R64, L } },
+
+ { INDEX_op_qemu_st_i32, { R, S } },
+ { INDEX_op_qemu_st_i64, { R64, S } },
+
+#if TCG_TARGET_HAS_ext8s_i32
+ { INDEX_op_ext8s_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext16s_i32
+ { INDEX_op_ext16s_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext8u_i32
+ { INDEX_op_ext8u_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_ext16u_i32
+ { INDEX_op_ext16u_i32, { R, R } },
+#endif
+
+#if TCG_TARGET_HAS_bswap16_i32
+ { INDEX_op_bswap16_i32, { R, R } },
+#endif
+#if TCG_TARGET_HAS_bswap32_i32
+ { INDEX_op_bswap32_i32, { R, R } },
+#endif
+
+ { -1 },
+};
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_R0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+#if 0 /* used for TCG_REG_CALL_STACK */
+ TCG_REG_R4,
+#endif
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+#if TCG_TARGET_NB_REGS >= 16
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+#endif
+};
+
+#if MAX_OPC_PARAM_IARGS != 5
+# error Fix needed, number of supported input arguments changed!
+#endif
+
+static const int tcg_target_call_iarg_regs[] = {
+ TCG_REG_R0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+#if 0 /* used for TCG_REG_CALL_STACK */
+ TCG_REG_R4,
+#endif
+ TCG_REG_R5,
+#if TCG_TARGET_REG_BITS == 32
+ /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
+ TCG_REG_R6,
+ TCG_REG_R7,
+#if TCG_TARGET_NB_REGS >= 16
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+#else
+# error Too few input registers available
+#endif
+#endif
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_R0,
+#if TCG_TARGET_REG_BITS == 32
+ TCG_REG_R1
+#endif
+};
+
+#ifndef NDEBUG
+static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "r00",
+ "r01",
+ "r02",
+ "r03",
+ "r04",
+ "r05",
+ "r06",
+ "r07",
+#if TCG_TARGET_NB_REGS >= 16
+ "r08",
+ "r09",
+ "r10",
+ "r11",
+ "r12",
+ "r13",
+ "r14",
+ "r15",
+#if TCG_TARGET_NB_REGS >= 32
+ "r16",
+ "r17",
+ "r18",
+ "r19",
+ "r20",
+ "r21",
+ "r22",
+ "r23",
+ "r24",
+ "r25",
+ "r26",
+ "r27",
+ "r28",
+ "r29",
+ "r30",
+ "r31"
+#endif
+#endif
+};
+#endif
+
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ /* tcg_out_reloc always uses the same type, addend. */
+ assert(type == sizeof(tcg_target_long));
+ assert(addend == 0);
+ assert(value != 0);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_patch32(code_ptr, value);
+ } else {
+ tcg_patch64(code_ptr, value);
+ }
+}
+
+/* Parse target specific constraints. */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str = *pct_str;
+ switch (ct_str[0]) {
+ case 'r':
+ case 'L': /* qemu_ld constraint */
+ case 'S': /* qemu_st constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, BIT(TCG_TARGET_NB_REGS) - 1);
+ break;
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
+/* Show current bytecode. Used by tcg interpreter. */
+void tci_disas(uint8_t opc)
+{
+ const TCGOpDef *def = &tcg_op_defs[opc];
+ fprintf(stderr, "TCG %s %u, %u, %u\n",
+ def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs);
+}
+#endif
+
+/* Write value (native size). */
+static void tcg_out_i(TCGContext *s, tcg_target_ulong v)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out32(s, v);
+ } else {
+ tcg_out64(s, v);
+ }
+}
+
+/* Write opcode. */
+static void tcg_out_op_t(TCGContext *s, TCGOpcode op)
+{
+ tcg_out8(s, op);
+ tcg_out8(s, 0);
+}
+
+/* Write register. */
+static void tcg_out_r(TCGContext *s, TCGArg t0)
+{
+ assert(t0 < TCG_TARGET_NB_REGS);
+ tcg_out8(s, t0);
+}
+
+/* Write register or constant (native size). */
+static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg)
+{
+ if (const_arg) {
+ assert(const_arg == 1);
+ tcg_out8(s, TCG_CONST);
+ tcg_out_i(s, arg);
+ } else {
+ tcg_out_r(s, arg);
+ }
+}
+
+/* Write register or constant (32 bit). */
+static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg)
+{
+ if (const_arg) {
+ assert(const_arg == 1);
+ tcg_out8(s, TCG_CONST);
+ tcg_out32(s, arg);
+ } else {
+ tcg_out_r(s, arg);
+ }
+}
+
+#if TCG_TARGET_REG_BITS == 64
+/* Write register or constant (64 bit). */
+static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg)
+{
+ if (const_arg) {
+ assert(const_arg == 1);
+ tcg_out8(s, TCG_CONST);
+ tcg_out64(s, arg);
+ } else {
+ tcg_out_r(s, arg);
+ }
+}
+#endif
+
+/* Write label. */
+static void tci_out_label(TCGContext *s, TCGLabel *label)
+{
+ if (label->has_value) {
+ tcg_out_i(s, label->u.value);
+ assert(label->u.value);
+ } else {
+ tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0);
+ s->code_ptr += sizeof(tcg_target_ulong);
+ }
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
+ intptr_t arg2)
+{
+ uint8_t *old_code_ptr = s->code_ptr;
+ if (type == TCG_TYPE_I32) {
+ tcg_out_op_t(s, INDEX_op_ld_i32);
+ tcg_out_r(s, ret);
+ tcg_out_r(s, arg1);
+ tcg_out32(s, arg2);
+ } else {
+ assert(type == TCG_TYPE_I64);
+#if TCG_TARGET_REG_BITS == 64
+ tcg_out_op_t(s, INDEX_op_ld_i64);
+ tcg_out_r(s, ret);
+ tcg_out_r(s, arg1);
+ assert(arg2 == (int32_t)arg2);
+ tcg_out32(s, arg2);
+#else
+ TODO();
+#endif
+ }
+ old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ uint8_t *old_code_ptr = s->code_ptr;
+ assert(ret != arg);
+#if TCG_TARGET_REG_BITS == 32
+ tcg_out_op_t(s, INDEX_op_mov_i32);
+#else
+ tcg_out_op_t(s, INDEX_op_mov_i64);
+#endif
+ tcg_out_r(s, ret);
+ tcg_out_r(s, arg);
+ old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg t0, tcg_target_long arg)
+{
+ uint8_t *old_code_ptr = s->code_ptr;
+ uint32_t arg32 = arg;
+ if (type == TCG_TYPE_I32 || arg == arg32) {
+ tcg_out_op_t(s, INDEX_op_movi_i32);
+ tcg_out_r(s, t0);
+ tcg_out32(s, arg32);
+ } else {
+ assert(type == TCG_TYPE_I64);
+#if TCG_TARGET_REG_BITS == 64
+ tcg_out_op_t(s, INDEX_op_movi_i64);
+ tcg_out_r(s, t0);
+ tcg_out64(s, arg);
+#else
+ TODO();
+#endif
+ }
+ old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+{
+ uint8_t *old_code_ptr = s->code_ptr;
+ tcg_out_op_t(s, INDEX_op_call);
+ tcg_out_ri(s, 1, (uintptr_t)arg);
+ old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
+ const int *const_args)
+{
+ uint8_t *old_code_ptr = s->code_ptr;
+
+ tcg_out_op_t(s, opc);
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ tcg_out64(s, args[0]);
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_offset) {
+ /* Direct jump method. */
+ assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset));
+ s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+ tcg_out32(s, 0);
+ } else {
+ /* Indirect jump method. */
+ TODO();
+ }
+ assert(args[0] < ARRAY_SIZE(s->tb_next_offset));
+ s->tb_next_offset[args[0]] = tcg_current_code_size(s);
+ break;
+ case INDEX_op_br:
+ tci_out_label(s, arg_label(args[0]));
+ break;
+ case INDEX_op_setcond_i32:
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_ri32(s, const_args[2], args[2]);
+ tcg_out8(s, args[3]); /* condition */
+ break;
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_setcond2_i32:
+ /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_r(s, args[2]);
+ tcg_out_ri32(s, const_args[3], args[3]);
+ tcg_out_ri32(s, const_args[4], args[4]);
+ tcg_out8(s, args[5]); /* condition */
+ break;
+#elif TCG_TARGET_REG_BITS == 64
+ case INDEX_op_setcond_i64:
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_ri64(s, const_args[2], args[2]);
+ tcg_out8(s, args[3]); /* condition */
+ break;
+#endif
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld_i32:
+ case INDEX_op_st8_i32:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i64:
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i64:
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ assert(args[2] == (int32_t)args[2]);
+ tcg_out32(s, args[2]);
+ break;
+ case INDEX_op_add_i32:
+ case INDEX_op_sub_i32:
+ case INDEX_op_mul_i32:
+ case INDEX_op_and_i32:
+ case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */
+ case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_eqv_i32). */
+ case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */
+ case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */
+ case INDEX_op_or_i32:
+ case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i32). */
+ case INDEX_op_xor_i32:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
+ case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
+ tcg_out_r(s, args[0]);
+ tcg_out_ri32(s, const_args[1], args[1]);
+ tcg_out_ri32(s, const_args[2], args[2]);
+ break;
+ case INDEX_op_deposit_i32: /* Optional (TCG_TARGET_HAS_deposit_i32). */
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_r(s, args[2]);
+ assert(args[3] <= UINT8_MAX);
+ tcg_out8(s, args[3]);
+ assert(args[4] <= UINT8_MAX);
+ tcg_out8(s, args[4]);
+ break;
+
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_add_i64:
+ case INDEX_op_sub_i64:
+ case INDEX_op_mul_i64:
+ case INDEX_op_and_i64:
+ case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */
+ case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */
+ case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */
+ case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */
+ case INDEX_op_or_i64:
+ case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */
+ case INDEX_op_xor_i64:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
+ case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
+ tcg_out_r(s, args[0]);
+ tcg_out_ri64(s, const_args[1], args[1]);
+ tcg_out_ri64(s, const_args[2], args[2]);
+ break;
+ case INDEX_op_deposit_i64: /* Optional (TCG_TARGET_HAS_deposit_i64). */
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_r(s, args[2]);
+ assert(args[3] <= UINT8_MAX);
+ tcg_out8(s, args[3]);
+ assert(args[4] <= UINT8_MAX);
+ tcg_out8(s, args[4]);
+ break;
+ case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
+ case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
+ case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
+ case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
+ TODO();
+ break;
+ case INDEX_op_div2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */
+ case INDEX_op_divu2_i64: /* Optional (TCG_TARGET_HAS_div2_i64). */
+ TODO();
+ break;
+ case INDEX_op_brcond_i64:
+ tcg_out_r(s, args[0]);
+ tcg_out_ri64(s, const_args[1], args[1]);
+ tcg_out8(s, args[2]); /* condition */
+ tci_out_label(s, arg_label(args[3]));
+ break;
+ case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */
+ case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */
+ case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */
+ case INDEX_op_not_i64: /* Optional (TCG_TARGET_HAS_not_i64). */
+ case INDEX_op_neg_i64: /* Optional (TCG_TARGET_HAS_neg_i64). */
+ case INDEX_op_ext8s_i64: /* Optional (TCG_TARGET_HAS_ext8s_i64). */
+ case INDEX_op_ext8u_i64: /* Optional (TCG_TARGET_HAS_ext8u_i64). */
+ case INDEX_op_ext16s_i64: /* Optional (TCG_TARGET_HAS_ext16s_i64). */
+ case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */
+ case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */
+ case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */
+#endif /* TCG_TARGET_REG_BITS == 64 */
+ case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */
+ case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */
+ case INDEX_op_ext8s_i32: /* Optional (TCG_TARGET_HAS_ext8s_i32). */
+ case INDEX_op_ext16s_i32: /* Optional (TCG_TARGET_HAS_ext16s_i32). */
+ case INDEX_op_ext8u_i32: /* Optional (TCG_TARGET_HAS_ext8u_i32). */
+ case INDEX_op_ext16u_i32: /* Optional (TCG_TARGET_HAS_ext16u_i32). */
+ case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */
+ case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ break;
+ case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
+ case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
+ case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
+ case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
+ tcg_out_r(s, args[0]);
+ tcg_out_ri32(s, const_args[1], args[1]);
+ tcg_out_ri32(s, const_args[2], args[2]);
+ break;
+ case INDEX_op_div2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */
+ case INDEX_op_divu2_i32: /* Optional (TCG_TARGET_HAS_div2_i32). */
+ TODO();
+ break;
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_add2_i32:
+ case INDEX_op_sub2_i32:
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_r(s, args[2]);
+ tcg_out_r(s, args[3]);
+ tcg_out_r(s, args[4]);
+ tcg_out_r(s, args[5]);
+ break;
+ case INDEX_op_brcond2_i32:
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_ri32(s, const_args[2], args[2]);
+ tcg_out_ri32(s, const_args[3], args[3]);
+ tcg_out8(s, args[4]); /* condition */
+ tci_out_label(s, arg_label(args[5]));
+ break;
+ case INDEX_op_mulu2_i32:
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_r(s, args[2]);
+ tcg_out_r(s, args[3]);
+ break;
+#endif
+ case INDEX_op_brcond_i32:
+ tcg_out_r(s, args[0]);
+ tcg_out_ri32(s, const_args[1], args[1]);
+ tcg_out8(s, args[2]); /* condition */
+ tci_out_label(s, arg_label(args[3]));
+ break;
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_r(s, *args++);
+ tcg_out_r(s, *args++);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_out_r(s, *args++);
+ }
+ tcg_out_i(s, *args++);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_r(s, *args++);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out_r(s, *args++);
+ }
+ tcg_out_r(s, *args++);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_out_r(s, *args++);
+ }
+ tcg_out_i(s, *args++);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_r(s, *args++);
+ tcg_out_r(s, *args++);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_out_r(s, *args++);
+ }
+ tcg_out_i(s, *args++);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_r(s, *args++);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out_r(s, *args++);
+ }
+ tcg_out_r(s, *args++);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_out_r(s, *args++);
+ }
+ tcg_out_i(s, *args++);
+ break;
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
+ case INDEX_op_movi_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+ old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
+ intptr_t arg2)
+{
+ uint8_t *old_code_ptr = s->code_ptr;
+ if (type == TCG_TYPE_I32) {
+ tcg_out_op_t(s, INDEX_op_st_i32);
+ tcg_out_r(s, arg);
+ tcg_out_r(s, arg1);
+ tcg_out32(s, arg2);
+ } else {
+ assert(type == TCG_TYPE_I64);
+#if TCG_TARGET_REG_BITS == 64
+ tcg_out_op_t(s, INDEX_op_st_i64);
+ tcg_out_r(s, arg);
+ tcg_out_r(s, arg1);
+ tcg_out32(s, arg2);
+#else
+ TODO();
+#endif
+ }
+ old_code_ptr[1] = s->code_ptr - old_code_ptr;
+}
+
+/* Test if a constant matches the constraint. */
+static int tcg_target_const_match(tcg_target_long val, TCGType type,
+ const TCGArgConstraint *arg_ct)
+{
+ /* No need to return 0 or 1, 0 or != 0 is good enough. */
+ return arg_ct->ct & TCG_CT_CONST;
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
+ const char *envval = getenv("DEBUG_TCG");
+ if (envval) {
+ qemu_set_log(strtol(envval, NULL, 0));
+ }
+#endif
+
+ /* The current code uses uint8_t for tcg operations. */
+ assert(ARRAY_SIZE(tcg_op_defs) <= UINT8_MAX);
+
+ /* Registers available for 32 bit operations. */
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0,
+ BIT(TCG_TARGET_NB_REGS) - 1);
+ /* Registers available for 64 bit operations. */
+ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0,
+ BIT(TCG_TARGET_NB_REGS) - 1);
+ /* TODO: Which registers should be set here? */
+ tcg_regset_set32(tcg_target_call_clobber_regs, 0,
+ BIT(TCG_TARGET_NB_REGS) - 1);
+
+ tcg_regset_clear(s->reserved_regs);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+ tcg_add_target_add_op_defs(tcg_target_op_defs);
+
+ /* We use negative offsets from "sp" so that we can distinguish
+ stores that might pretend to be call arguments. */
+ tcg_set_frame(s, TCG_REG_CALL_STACK,
+ -CPU_TEMP_BUF_NLONGS * sizeof(long),
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+}
+
+/* Generate global QEMU prologue and epilogue code. */
+static inline void tcg_target_qemu_prologue(TCGContext *s)
+{
+}
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
new file mode 100644
index 00000000..cbf3f9b5
--- /dev/null
+++ b/tcg/tci/tcg-target.h
@@ -0,0 +1,185 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009, 2011 Stefan Weil
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * This code implements a TCG which does not generate machine code for some
+ * real target machine but which generates virtual machine code for an
+ * interpreter. Interpreted pseudo code is slow, but it works on any host.
+ *
+ * Some remarks might help in understanding the code:
+ *
+ * "target" or "TCG target" is the machine which runs the generated code.
+ * This is different to the usual meaning in QEMU where "target" is the
+ * emulated machine. So normally QEMU host is identical to TCG target.
+ * Here the TCG target is a virtual machine, but this virtual machine must
+ * use the same word size like the real machine.
+ * Therefore, we need both 32 and 64 bit virtual machines (interpreter).
+ */
+
+#if !defined(TCG_TARGET_H)
+#define TCG_TARGET_H
+
+#include "config-host.h"
+
+#define TCG_TARGET_INTERPRETER 1
+#define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+
+#if UINTPTR_MAX == UINT32_MAX
+# define TCG_TARGET_REG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define TCG_TARGET_REG_BITS 64
+#else
+# error Unknown pointer size for tci target
+#endif
+
+#ifdef CONFIG_DEBUG_TCG
+/* Enable debug output. */
+#define CONFIG_DEBUG_TCG_INTERPRETER
+#endif
+
+/* Optional instructions. */
+
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_andc_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_trunc_shr_i32 0
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_div_i64 0
+#define TCG_TARGET_HAS_rem_i64 0
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
+#else
+#define TCG_TARGET_HAS_mulu2_i32 1
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+/* Number of registers available.
+ For 32 bit hosts, we need more than 8 registers (call arguments). */
+/* #define TCG_TARGET_NB_REGS 8 */
+#define TCG_TARGET_NB_REGS 16
+/* #define TCG_TARGET_NB_REGS 32 */
+
+/* List of registers which are used by TCG. */
+typedef enum {
+ TCG_REG_R0 = 0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+#if TCG_TARGET_NB_REGS >= 16
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+#if TCG_TARGET_NB_REGS >= 32
+ TCG_REG_R16,
+ TCG_REG_R17,
+ TCG_REG_R18,
+ TCG_REG_R19,
+ TCG_REG_R20,
+ TCG_REG_R21,
+ TCG_REG_R22,
+ TCG_REG_R23,
+ TCG_REG_R24,
+ TCG_REG_R25,
+ TCG_REG_R26,
+ TCG_REG_R27,
+ TCG_REG_R28,
+ TCG_REG_R29,
+ TCG_REG_R30,
+ TCG_REG_R31,
+#endif
+#endif
+ /* Special value UINT8_MAX is used by TCI to encode constant values. */
+ TCG_CONST = UINT8_MAX
+} TCGReg;
+
+#define TCG_AREG0 (TCG_TARGET_NB_REGS - 2)
+
+/* Used for function call generation. */
+#define TCG_REG_CALL_STACK (TCG_TARGET_NB_REGS - 1)
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+#define TCG_TARGET_STACK_ALIGN 16
+
+void tci_disas(uint8_t opc);
+
+#define HAVE_TCG_QEMU_TB_EXEC
+
+static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+{
+}
+
+#endif /* TCG_TARGET_H */