diff options
| author | fishsoupisgood <github@madingley.org> | 2019-04-29 01:17:54 +0100 | 
|---|---|---|
| committer | fishsoupisgood <github@madingley.org> | 2019-05-27 03:43:43 +0100 | 
| commit | 3f2546b2ef55b661fd8dd69682b38992225e86f6 (patch) | |
| tree | 65ca85f13617aee1dce474596800950f266a456c /tcg | |
| download | qemu-master.tar.gz qemu-master.tar.bz2 qemu-master.zip  | |
Diffstat (limited to 'tcg')
| -rw-r--r-- | tcg/LICENSE | 3 | ||||
| -rw-r--r-- | tcg/README | 551 | ||||
| -rw-r--r-- | tcg/TODO | 14 | ||||
| -rw-r--r-- | tcg/aarch64/tcg-target.c | 1882 | ||||
| -rw-r--r-- | tcg/aarch64/tcg-target.h | 108 | ||||
| -rw-r--r-- | tcg/arm/tcg-target.c | 2128 | ||||
| -rw-r--r-- | tcg/arm/tcg-target.h | 109 | ||||
| -rw-r--r-- | tcg/i386/tcg-target.c | 2451 | ||||
| -rw-r--r-- | tcg/i386/tcg-target.h | 149 | ||||
| -rw-r--r-- | tcg/ia64/tcg-target.c | 2445 | ||||
| -rw-r--r-- | tcg/ia64/tcg-target.h | 185 | ||||
| -rw-r--r-- | tcg/mips/tcg-target.c | 1828 | ||||
| -rw-r--r-- | tcg/mips/tcg-target.h | 138 | ||||
| -rw-r--r-- | tcg/optimize.c | 1330 | ||||
| -rw-r--r-- | tcg/ppc/tcg-target.c | 2722 | ||||
| -rw-r--r-- | tcg/ppc/tcg-target.h | 112 | ||||
| -rw-r--r-- | tcg/s390/tcg-target.c | 2393 | ||||
| -rw-r--r-- | tcg/s390/tcg-target.h | 123 | ||||
| -rw-r--r-- | tcg/sparc/tcg-target.c | 1650 | ||||
| -rw-r--r-- | tcg/sparc/tcg-target.h | 160 | ||||
| -rw-r--r-- | tcg/tcg-be-ldst.h | 88 | ||||
| -rw-r--r-- | tcg/tcg-be-null.h | 43 | ||||
| -rw-r--r-- | tcg/tcg-op.c | 1945 | ||||
| -rw-r--r-- | tcg/tcg-op.h | 991 | ||||
| -rw-r--r-- | tcg/tcg-opc.h | 195 | ||||
| -rw-r--r-- | tcg/tcg-runtime.h | 16 | ||||
| -rw-r--r-- | tcg/tcg.c | 2764 | ||||
| -rw-r--r-- | tcg/tcg.h | 1011 | ||||
| -rw-r--r-- | tcg/tci/README | 130 | ||||
| -rw-r--r-- | tcg/tci/tcg-target.c | 875 | ||||
| -rw-r--r-- | tcg/tci/tcg-target.h | 185 | 
31 files changed, 28724 insertions, 0 deletions
diff --git a/tcg/LICENSE b/tcg/LICENSE new file mode 100644 index 00000000..be817fa1 --- /dev/null +++ b/tcg/LICENSE @@ -0,0 +1,3 @@ +All the files in this directory and subdirectories are released under +a BSD like license (see header in each file). No other license is +accepted. diff --git a/tcg/README b/tcg/README new file mode 100644 index 00000000..a550ff17 --- /dev/null +++ b/tcg/README @@ -0,0 +1,551 @@ +Tiny Code Generator - Fabrice Bellard. + +1) Introduction + +TCG (Tiny Code Generator) began as a generic backend for a C +compiler. It was simplified to be used in QEMU. It also has its roots +in the QOP code generator written by Paul Brook.  + +2) Definitions + +The TCG "target" is the architecture for which we generate the +code. It is of course not the same as the "target" of QEMU which is +the emulated architecture. As TCG started as a generic C backend used +for cross compiling, it is assumed that the TCG target is different +from the host, although it is never the case for QEMU. + +In this document, we use "guest" to specify what architecture we are +emulating; "target" always means the TCG target, the machine on which +we are running QEMU. + +A TCG "function" corresponds to a QEMU Translated Block (TB). + +A TCG "temporary" is a variable only live in a basic +block. Temporaries are allocated explicitly in each function. + +A TCG "local temporary" is a variable only live in a function. Local +temporaries are allocated explicitly in each function. + +A TCG "global" is a variable which is live in all the functions +(equivalent of a C global variable). They are defined before the +functions defined. A TCG global can be a memory location (e.g. a QEMU +CPU register), a fixed host register (e.g. the QEMU CPU state pointer) +or a memory location which is stored in a register outside QEMU TBs +(not implemented yet). + +A TCG "basic block" corresponds to a list of instructions terminated +by a branch instruction.  + +An operation with "undefined behavior" may result in a crash. + +An operation with "unspecified behavior" shall not crash.  However, +the result may be one of several possibilities so may be considered +an "undefined result". + +3) Intermediate representation + +3.1) Introduction + +TCG instructions operate on variables which are temporaries, local +temporaries or globals. TCG instructions and variables are strongly +typed. Two types are supported: 32 bit integers and 64 bit +integers. Pointers are defined as an alias to 32 bit or 64 bit +integers depending on the TCG target word size. + +Each instruction has a fixed number of output variable operands, input +variable operands and always constant operands. + +The notable exception is the call instruction which has a variable +number of outputs and inputs. + +In the textual form, output operands usually come first, followed by +input operands, followed by constant operands. The output type is +included in the instruction name. Constants are prefixed with a '$'. + +add_i32 t0, t1, t2  (t0 <- t1 + t2) + +3.2) Assumptions + +* Basic blocks + +- Basic blocks end after branches (e.g. brcond_i32 instruction), +  goto_tb and exit_tb instructions. +- Basic blocks start after the end of a previous basic block, or at a +  set_label instruction. + +After the end of a basic block, the content of temporaries is +destroyed, but local temporaries and globals are preserved. + +* Floating point types are not supported yet + +* Pointers: depending on the TCG target, pointer size is 32 bit or 64 +  bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or +  TCG_TYPE_I64. + +* Helpers: + +Using the tcg_gen_helper_x_y it is possible to call any function +taking i32, i64 or pointer types. By default, before calling a helper, +all globals are stored at their canonical location and it is assumed +that the function can modify them. By default, the helper is allowed to +modify the CPU state or raise an exception. + +This can be overridden using the following function modifiers: +- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals, +  either directly or via an exception. They will not be saved to their +  canonical locations before calling the helper. +- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals. +  They will only be saved to their canonical location before calling helpers, +  but they won't be reloaded afterwise. +- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if +  the return value is not used. + +Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS. + +On some TCG targets (e.g. x86), several calling conventions are +supported. + +* Branches: + +Use the instruction 'br' to jump to a label. + +3.3) Code Optimizations + +When generating instructions, you can count on at least the following +optimizations: + +- Single instructions are simplified, e.g. + +   and_i32 t0, t0, $0xffffffff +     +  is suppressed. + +- A liveness analysis is done at the basic block level. The +  information is used to suppress moves from a dead variable to +  another one. It is also used to remove instructions which compute +  dead results. The later is especially useful for condition code +  optimization in QEMU. + +  In the following example: + +  add_i32 t0, t1, t2 +  add_i32 t0, t0, $1 +  mov_i32 t0, $1 + +  only the last instruction is kept. + +3.4) Instruction Reference + +********* Function call + +* call <ret> <params> ptr + +call function 'ptr' (pointer type) + +<ret> optional 32 bit or 64 bit return value +<params> optional 32 bit or 64 bit parameters + +********* Jumps/Labels + +* set_label $label + +Define label 'label' at the current program point. + +* br $label + +Jump to label. + +* brcond_i32/i64 t0, t1, cond, label + +Conditional jump if t0 cond t1 is true. cond can be: +    TCG_COND_EQ +    TCG_COND_NE +    TCG_COND_LT /* signed */ +    TCG_COND_GE /* signed */ +    TCG_COND_LE /* signed */ +    TCG_COND_GT /* signed */ +    TCG_COND_LTU /* unsigned */ +    TCG_COND_GEU /* unsigned */ +    TCG_COND_LEU /* unsigned */ +    TCG_COND_GTU /* unsigned */ + +********* Arithmetic + +* add_i32/i64 t0, t1, t2 + +t0=t1+t2 + +* sub_i32/i64 t0, t1, t2 + +t0=t1-t2 + +* neg_i32/i64 t0, t1 + +t0=-t1 (two's complement) + +* mul_i32/i64 t0, t1, t2 + +t0=t1*t2 + +* div_i32/i64 t0, t1, t2 + +t0=t1/t2 (signed). Undefined behavior if division by zero or overflow. + +* divu_i32/i64 t0, t1, t2 + +t0=t1/t2 (unsigned). Undefined behavior if division by zero. + +* rem_i32/i64 t0, t1, t2 + +t0=t1%t2 (signed). Undefined behavior if division by zero or overflow. + +* remu_i32/i64 t0, t1, t2 + +t0=t1%t2 (unsigned). Undefined behavior if division by zero. + +********* Logical + +* and_i32/i64 t0, t1, t2 + +t0=t1&t2 + +* or_i32/i64 t0, t1, t2 + +t0=t1|t2 + +* xor_i32/i64 t0, t1, t2 + +t0=t1^t2 + +* not_i32/i64 t0, t1 + +t0=~t1 + +* andc_i32/i64 t0, t1, t2 + +t0=t1&~t2 + +* eqv_i32/i64 t0, t1, t2 + +t0=~(t1^t2), or equivalently, t0=t1^~t2 + +* nand_i32/i64 t0, t1, t2 + +t0=~(t1&t2) + +* nor_i32/i64 t0, t1, t2 + +t0=~(t1|t2) + +* orc_i32/i64 t0, t1, t2 + +t0=t1|~t2 + +********* Shifts/Rotates + +* shl_i32/i64 t0, t1, t2 + +t0=t1 << t2. Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) + +* shr_i32/i64 t0, t1, t2 + +t0=t1 >> t2 (unsigned). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) + +* sar_i32/i64 t0, t1, t2 + +t0=t1 >> t2 (signed). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) + +* rotl_i32/i64 t0, t1, t2 + +Rotation of t2 bits to the left. +Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) + +* rotr_i32/i64 t0, t1, t2 + +Rotation of t2 bits to the right. +Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) + +********* Misc + +* mov_i32/i64 t0, t1 + +t0 = t1 + +Move t1 to t0 (both operands must have the same type). + +* ext8s_i32/i64 t0, t1 +ext8u_i32/i64 t0, t1 +ext16s_i32/i64 t0, t1 +ext16u_i32/i64 t0, t1 +ext32s_i64 t0, t1 +ext32u_i64 t0, t1 + +8, 16 or 32 bit sign/zero extension (both operands must have the same type) + +* bswap16_i32/i64 t0, t1 + +16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order +bytes are set to zero. + +* bswap32_i32/i64 t0, t1 + +32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that +the four high order bytes are set to zero. + +* bswap64_i64 t0, t1 + +64 bit byte swap + +* discard_i32/i64 t0 + +Indicate that the value of t0 won't be used later. It is useful to +force dead code elimination. + +* deposit_i32/i64 dest, t1, t2, pos, len + +Deposit T2 as a bitfield into T1, placing the result in DEST. +The bitfield is described by POS/LEN, which are immediate values: + +  LEN - the length of the bitfield +  POS - the position of the first bit, counting from the LSB + +For example, pos=8, len=4 indicates a 4-bit field at bit 8. +This operation would be equivalent to + +  dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) + +* trunc_shr_i32 t0, t1, pos + +For 64-bit hosts only, right shift the 64-bit input T1 by POS and +truncate to 32-bit output T0.  Depending on the host, this may be +a simple mov/shift, or may require additional canonicalization. + +********* Conditional moves + +* setcond_i32/i64 dest, t1, t2, cond + +dest = (t1 cond t2) + +Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0. + +* movcond_i32/i64 dest, c1, c2, v1, v2, cond + +dest = (c1 cond c2 ? v1 : v2) + +Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2. + +********* Type conversions + +* ext_i32_i64 t0, t1 +Convert t1 (32 bit) to t0 (64 bit) and does sign extension + +* extu_i32_i64 t0, t1 +Convert t1 (32 bit) to t0 (64 bit) and does zero extension + +* trunc_i64_i32 t0, t1 +Truncate t1 (64 bit) to t0 (32 bit) + +* concat_i32_i64 t0, t1, t2 +Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half +from t2 (32 bit). + +* concat32_i64 t0, t1, t2 +Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half +from t2 (64 bit). + +********* Load/Store + +* ld_i32/i64 t0, t1, offset +ld8s_i32/i64 t0, t1, offset +ld8u_i32/i64 t0, t1, offset +ld16s_i32/i64 t0, t1, offset +ld16u_i32/i64 t0, t1, offset +ld32s_i64 t0, t1, offset +ld32u_i64 t0, t1, offset + +t0 = read(t1 + offset) +Load 8, 16, 32 or 64 bits with or without sign extension from host memory.  +offset must be a constant. + +* st_i32/i64 t0, t1, offset +st8_i32/i64 t0, t1, offset +st16_i32/i64 t0, t1, offset +st32_i64 t0, t1, offset + +write(t0, t1 + offset) +Write 8, 16, 32 or 64 bits to host memory. + +All this opcodes assume that the pointed host memory doesn't correspond +to a global. In the latter case the behaviour is unpredictable. + +********* Multiword arithmetic support + +* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high +* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high + +Similar to add/sub, except that the double-word inputs T1 and T2 are +formed from two single-word arguments, and the double-word output T0 +is returned in two single-word outputs. + +* mulu2_i32/i64 t0_low, t0_high, t1, t2 + +Similar to mul, except two unsigned inputs T1 and T2 yielding the full +double-word product T0.  The later is returned in two single-word outputs. + +* muls2_i32/i64 t0_low, t0_high, t1, t2 + +Similar to mulu2, except the two inputs T1 and T2 are signed. + +********* 64-bit guest on 32-bit host support + +The following opcodes are internal to TCG.  Thus they are to be implemented by +32-bit host code generators, but are not to be emitted by guest translators. +They are emitted as needed by inline functions within "tcg-op.h". + +* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label + +Similar to brcond, except that the 64-bit values T0 and T1 +are formed from two 32-bit arguments. + +* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond + +Similar to setcond, except that the 64-bit values T1 and T2 are +formed from two 32-bit arguments.  The result is a 32-bit value. + +********* QEMU specific operations + +* exit_tb t0 + +Exit the current TB and return the value t0 (word type). + +* goto_tb index + +Exit the current TB and jump to the TB index 'index' (constant) if the +current TB was linked to this TB. Otherwise execute the next +instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued +at most once with each slot index per TB. + +* qemu_ld_i32/i64 t0, t1, flags, memidx +* qemu_st_i32/i64 t0, t1, flags, memidx + +Load data at the guest address t1 into t0, or store data in t0 at guest +address t1.  The _i32/_i64 size applies to the size of the input/output +register t0 only.  The address t1 is always sized according to the guest, +and the width of the memory operation is controlled by flags. + +Both t0 and t1 may be split into little-endian ordered pairs of registers +if dealing with 64-bit quantities on a 32-bit host. + +The memidx selects the qemu tlb index to use (e.g. user or kernel access). +The flags are the TCGMemOp bits, selecting the sign, width, and endianness +of the memory access. + +For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a +64-bit memory access specified in flags. + +********* + +Note 1: Some shortcuts are defined when the last operand is known to be +a constant (e.g. addi for add, movi for mov). + +Note 2: When using TCG, the opcodes must never be generated directly +as some of them may not be available as "real" opcodes. Always use the +function tcg_gen_xxx(args). + +4) Backend + +tcg-target.h contains the target specific definitions. tcg-target.c +contains the target specific code. + +4.1) Assumptions + +The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or +64 bit. It is expected that the pointer has the same size as the word. + +On a 32 bit target, all 64 bit operations are converted to 32 bits. A +few specific operations must be implemented to allow it (see add2_i32, +sub2_i32, brcond2_i32). + +Floating point operations are not supported in this version. A +previous incarnation of the code generator had full support of them, +but it is better to concentrate on integer operations first. + +On a 64 bit target, no assumption is made in TCG about the storage of +the 32 bit values in 64 bit registers. + +4.2) Constraints + +GCC like constraints are used to define the constraints of every +instruction. Memory constraints are not supported in this +version. Aliases are specified in the input operands as for GCC. + +The same register may be used for both an input and an output, even when +they are not explicitly aliased.  If an op expands to multiple target +instructions then care must be taken to avoid clobbering input values. +GCC style "early clobber" outputs are not currently supported. + +A target can define specific register or constant constraints. If an +operation uses a constant input constraint which does not allow all +constants, it must also accept registers in order to have a fallback. + +The movi_i32 and movi_i64 operations must accept any constants. + +The mov_i32 and mov_i64 operations must accept any registers of the +same type. + +The ld/st instructions must accept signed 32 bit constant offsets. It +can be implemented by reserving a specific register to compute the +address if the offset is too big. + +The ld/st instructions must accept any destination (ld) or source (st) +register. + +4.3) Function call assumptions + +- The only supported types for parameters and return value are: 32 and +  64 bit integers and pointer. +- The stack grows downwards. +- The first N parameters are passed in registers. +- The next parameters are passed on the stack by storing them as words. +- Some registers are clobbered during the call.  +- The function can return 0 or 1 value in registers. On a 32 bit +  target, functions must be able to return 2 values in registers for +  64 bit return type. + +5) Recommended coding rules for best performance + +- Use globals to represent the parts of the QEMU CPU state which are +  often modified, e.g. the integer registers and the condition +  codes. TCG will be able to use host registers to store them. + +- Avoid globals stored in fixed registers. They must be used only to +  store the pointer to the CPU state and possibly to store a pointer +  to a register window. + +- Use temporaries. Use local temporaries only when really needed, +  e.g. when you need to use a value after a jump. Local temporaries +  introduce a performance hit in the current TCG implementation: their +  content is saved to memory at end of each basic block. + +- Free temporaries and local temporaries when they are no longer used +  (tcg_temp_free). Since tcg_const_x() also creates a temporary, you +  should free it after it is used. Freeing temporaries does not yield +  a better generated code, but it reduces the memory usage of TCG and +  the speed of the translation. + +- Don't hesitate to use helpers for complicated or seldom used guest +  instructions. There is little performance advantage in using TCG to +  implement guest instructions taking more than about twenty TCG +  instructions. Note that this rule of thumb is more applicable to +  helpers doing complex logic or arithmetic, where the C compiler has +  scope to do a good job of optimisation; it is less relevant where +  the instruction is mostly doing loads and stores, and in those cases +  inline TCG may still be faster for longer sequences. + +- The hard limit on the number of TCG instructions you can generate +  per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h -- +  you cannot exceed this without risking a buffer overrun. + +- Use the 'discard' instruction if you know that TCG won't be able to +  prove that a given global is "dead" at a given program point. The +  x86 guest uses it to improve the condition codes optimisation. diff --git a/tcg/TODO b/tcg/TODO new file mode 100644 index 00000000..07478477 --- /dev/null +++ b/tcg/TODO @@ -0,0 +1,14 @@ +- Add new instructions such as: clz, ctz, popcnt. + +- See if it is worth exporting mul2, mulu2, div2, divu2.  + +- Support of globals saved in fixed registers between TBs. + +Ideas: + +- Move the slow part of the qemu_ld/st ops after the end of the TB. + +- Change exception syntax to get closer to QOP system (exception +  parameters given with a specific instruction). + +- Add float and vector support. diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c new file mode 100644 index 00000000..bad8b1de --- /dev/null +++ b/tcg/aarch64/tcg-target.c @@ -0,0 +1,1882 @@ +/* + * Initial TCG Implementation for aarch64 + * + * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH + * Written by Claudio Fontana + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + */ + +#include "tcg-be-ldst.h" +#include "qemu/bitops.h" + +/* We're going to re-use TCGType in setting of the SF bit, which controls +   the size of the operation performed.  If we know the values match, it +   makes things much cleaner.  */ +QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1); + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +    "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7", +    "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15", +    "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23", +    "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp", +}; +#endif /* NDEBUG */ + +static const int tcg_target_reg_alloc_order[] = { +    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, +    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, +    TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */ + +    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, +    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, +    TCG_REG_X16, TCG_REG_X17, + +    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, +    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, + +    /* X18 reserved by system */ +    /* X19 reserved for AREG0 */ +    /* X29 reserved as fp */ +    /* X30 reserved as temporary */ +}; + +static const int tcg_target_call_iarg_regs[8] = { +    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, +    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 +}; +static const int tcg_target_call_oarg_regs[1] = { +    TCG_REG_X0 +}; + +#define TCG_REG_TMP TCG_REG_X30 + +#ifndef CONFIG_SOFTMMU +/* Note that XZR cannot be encoded in the address base register slot, +   as that actaully encodes SP.  So if we need to zero-extend the guest +   address, via the address index register slot, we need to load even +   a zero guest base into a register.  */ +#define USE_GUEST_BASE     (GUEST_BASE != 0 || TARGET_LONG_BITS == 32) + +# ifdef CONFIG_USE_GUEST_BASE +#  define TCG_REG_GUEST_BASE TCG_REG_X28 +# else +#  define TCG_REG_GUEST_BASE TCG_REG_XZR +# endif +#endif + +static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ +    ptrdiff_t offset = target - code_ptr; +    assert(offset == sextract64(offset, 0, 26)); +    /* read instruction, mask away previous PC_REL26 parameter contents, +       set the proper offset, then write back the instruction. */ +    *code_ptr = deposit32(*code_ptr, 0, 26, offset); +} + +static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ +    ptrdiff_t offset = target - code_ptr; +    assert(offset == sextract64(offset, 0, 19)); +    *code_ptr = deposit32(*code_ptr, 5, 19, offset); +} + +static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, +                               intptr_t value, intptr_t addend) +{ +    assert(addend == 0); +    switch (type) { +    case R_AARCH64_JUMP26: +    case R_AARCH64_CALL26: +        reloc_pc26(code_ptr, (tcg_insn_unit *)value); +        break; +    case R_AARCH64_CONDBR19: +        reloc_pc19(code_ptr, (tcg_insn_unit *)value); +        break; +    default: +        tcg_abort(); +    } +} + +#define TCG_CT_CONST_AIMM 0x100 +#define TCG_CT_CONST_LIMM 0x200 +#define TCG_CT_CONST_ZERO 0x400 +#define TCG_CT_CONST_MONE 0x800 + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, +                                   const char **pct_str) +{ +    const char *ct_str = *pct_str; + +    switch (ct_str[0]) { +    case 'r': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); +        break; +    case 'l': /* qemu_ld / qemu_st address, data_reg */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1); +#ifdef CONFIG_SOFTMMU +        /* x0 and x1 will be overwritten when reading the tlb entry, +           and x2, and x3 for helper args, better to avoid using them. */ +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3); +#endif +        break; +    case 'A': /* Valid for arithmetic immediate (positive or negative).  */ +        ct->ct |= TCG_CT_CONST_AIMM; +        break; +    case 'L': /* Valid for logical immediate.  */ +        ct->ct |= TCG_CT_CONST_LIMM; +        break; +    case 'M': /* minus one */ +        ct->ct |= TCG_CT_CONST_MONE; +        break; +    case 'Z': /* zero */ +        ct->ct |= TCG_CT_CONST_ZERO; +        break; +    default: +        return -1; +    } + +    ct_str++; +    *pct_str = ct_str; +    return 0; +} + +static inline bool is_aimm(uint64_t val) +{ +    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0; +} + +static inline bool is_limm(uint64_t val) +{ +    /* Taking a simplified view of the logical immediates for now, ignoring +       the replication that can happen across the field.  Match bit patterns +       of the forms +           0....01....1 +           0..01..10..0 +       and their inverses.  */ + +    /* Make things easier below, by testing the form with msb clear. */ +    if ((int64_t)val < 0) { +        val = ~val; +    } +    if (val == 0) { +        return false; +    } +    val += val & -val; +    return (val & (val - 1)) == 0; +} + +static int tcg_target_const_match(tcg_target_long val, TCGType type, +                                  const TCGArgConstraint *arg_ct) +{ +    int ct = arg_ct->ct; + +    if (ct & TCG_CT_CONST) { +        return 1; +    } +    if (type == TCG_TYPE_I32) { +        val = (int32_t)val; +    } +    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) { +        return 1; +    } +    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) { +        return 1; +    } +    if ((ct & TCG_CT_CONST_ZERO) && val == 0) { +        return 1; +    } +    if ((ct & TCG_CT_CONST_MONE) && val == -1) { +        return 1; +    } + +    return 0; +} + +enum aarch64_cond_code { +    COND_EQ = 0x0, +    COND_NE = 0x1, +    COND_CS = 0x2,     /* Unsigned greater or equal */ +    COND_HS = COND_CS, /* ALIAS greater or equal */ +    COND_CC = 0x3,     /* Unsigned less than */ +    COND_LO = COND_CC, /* ALIAS Lower */ +    COND_MI = 0x4,     /* Negative */ +    COND_PL = 0x5,     /* Zero or greater */ +    COND_VS = 0x6,     /* Overflow */ +    COND_VC = 0x7,     /* No overflow */ +    COND_HI = 0x8,     /* Unsigned greater than */ +    COND_LS = 0x9,     /* Unsigned less or equal */ +    COND_GE = 0xa, +    COND_LT = 0xb, +    COND_GT = 0xc, +    COND_LE = 0xd, +    COND_AL = 0xe, +    COND_NV = 0xf, /* behaves like COND_AL here */ +}; + +static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { +    [TCG_COND_EQ] = COND_EQ, +    [TCG_COND_NE] = COND_NE, +    [TCG_COND_LT] = COND_LT, +    [TCG_COND_GE] = COND_GE, +    [TCG_COND_LE] = COND_LE, +    [TCG_COND_GT] = COND_GT, +    /* unsigned */ +    [TCG_COND_LTU] = COND_LO, +    [TCG_COND_GTU] = COND_HI, +    [TCG_COND_GEU] = COND_HS, +    [TCG_COND_LEU] = COND_LS, +}; + +typedef enum { +    LDST_ST = 0,    /* store */ +    LDST_LD = 1,    /* load */ +    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */ +    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */ +} AArch64LdstType; + +/* We encode the format of the insn into the beginning of the name, so that +   we can have the preprocessor help "typecheck" the insn vs the output +   function.  Arm didn't provide us with nice names for the formats, so we +   use the section number of the architecture reference manual in which the +   instruction group is described.  */ +typedef enum { +    /* Compare and branch (immediate).  */ +    I3201_CBZ       = 0x34000000, +    I3201_CBNZ      = 0x35000000, + +    /* Conditional branch (immediate).  */ +    I3202_B_C       = 0x54000000, + +    /* Unconditional branch (immediate).  */ +    I3206_B         = 0x14000000, +    I3206_BL        = 0x94000000, + +    /* Unconditional branch (register).  */ +    I3207_BR        = 0xd61f0000, +    I3207_BLR       = 0xd63f0000, +    I3207_RET       = 0xd65f0000, + +    /* Load/store register.  Described here as 3.3.12, but the helper +       that emits them can transform to 3.3.10 or 3.3.13.  */ +    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30, +    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30, +    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30, +    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30, + +    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30, +    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30, +    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30, +    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30, + +    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30, +    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30, + +    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30, +    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30, +    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30, + +    I3312_TO_I3310  = 0x00200800, +    I3312_TO_I3313  = 0x01000000, + +    /* Load/store register pair instructions.  */ +    I3314_LDP       = 0x28400000, +    I3314_STP       = 0x28000000, + +    /* Add/subtract immediate instructions.  */ +    I3401_ADDI      = 0x11000000, +    I3401_ADDSI     = 0x31000000, +    I3401_SUBI      = 0x51000000, +    I3401_SUBSI     = 0x71000000, + +    /* Bitfield instructions.  */ +    I3402_BFM       = 0x33000000, +    I3402_SBFM      = 0x13000000, +    I3402_UBFM      = 0x53000000, + +    /* Extract instruction.  */ +    I3403_EXTR      = 0x13800000, + +    /* Logical immediate instructions.  */ +    I3404_ANDI      = 0x12000000, +    I3404_ORRI      = 0x32000000, +    I3404_EORI      = 0x52000000, + +    /* Move wide immediate instructions.  */ +    I3405_MOVN      = 0x12800000, +    I3405_MOVZ      = 0x52800000, +    I3405_MOVK      = 0x72800000, + +    /* PC relative addressing instructions.  */ +    I3406_ADR       = 0x10000000, +    I3406_ADRP      = 0x90000000, + +    /* Add/subtract shifted register instructions (without a shift).  */ +    I3502_ADD       = 0x0b000000, +    I3502_ADDS      = 0x2b000000, +    I3502_SUB       = 0x4b000000, +    I3502_SUBS      = 0x6b000000, + +    /* Add/subtract shifted register instructions (with a shift).  */ +    I3502S_ADD_LSL  = I3502_ADD, + +    /* Add/subtract with carry instructions.  */ +    I3503_ADC       = 0x1a000000, +    I3503_SBC       = 0x5a000000, + +    /* Conditional select instructions.  */ +    I3506_CSEL      = 0x1a800000, +    I3506_CSINC     = 0x1a800400, + +    /* Data-processing (1 source) instructions.  */ +    I3507_REV16     = 0x5ac00400, +    I3507_REV32     = 0x5ac00800, +    I3507_REV64     = 0x5ac00c00, + +    /* Data-processing (2 source) instructions.  */ +    I3508_LSLV      = 0x1ac02000, +    I3508_LSRV      = 0x1ac02400, +    I3508_ASRV      = 0x1ac02800, +    I3508_RORV      = 0x1ac02c00, +    I3508_SMULH     = 0x9b407c00, +    I3508_UMULH     = 0x9bc07c00, +    I3508_UDIV      = 0x1ac00800, +    I3508_SDIV      = 0x1ac00c00, + +    /* Data-processing (3 source) instructions.  */ +    I3509_MADD      = 0x1b000000, +    I3509_MSUB      = 0x1b008000, + +    /* Logical shifted register instructions (without a shift).  */ +    I3510_AND       = 0x0a000000, +    I3510_BIC       = 0x0a200000, +    I3510_ORR       = 0x2a000000, +    I3510_ORN       = 0x2a200000, +    I3510_EOR       = 0x4a000000, +    I3510_EON       = 0x4a200000, +    I3510_ANDS      = 0x6a000000, +} AArch64Insn; + +static inline uint32_t tcg_in32(TCGContext *s) +{ +    uint32_t v = *(uint32_t *)s->code_ptr; +    return v; +} + +/* Emit an opcode with "type-checking" of the format.  */ +#define tcg_out_insn(S, FMT, OP, ...) \ +    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__) + +static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, +                              TCGReg rt, int imm19) +{ +    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt); +} + +static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn, +                              TCGCond c, int imm19) +{ +    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5); +} + +static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26) +{ +    tcg_out32(s, insn | (imm26 & 0x03ffffff)); +} + +static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn) +{ +    tcg_out32(s, insn | rn << 5); +} + +static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn, +                              TCGReg r1, TCGReg r2, TCGReg rn, +                              tcg_target_long ofs, bool pre, bool w) +{ +    insn |= 1u << 31; /* ext */ +    insn |= pre << 24; +    insn |= w << 23; + +    assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0); +    insn |= (ofs & (0x7f << 3)) << (15 - 3); + +    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1); +} + +static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext, +                              TCGReg rd, TCGReg rn, uint64_t aimm) +{ +    if (aimm > 0xfff) { +        assert((aimm & 0xfff) == 0); +        aimm >>= 12; +        assert(aimm <= 0xfff); +        aimm |= 1 << 12;  /* apply LSL 12 */ +    } +    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd); +} + +/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4 +   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields +   that feed the DecodeBitMasks pseudo function.  */ +static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext, +                              TCGReg rd, TCGReg rn, int n, int immr, int imms) +{ +    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10 +              | rn << 5 | rd); +} + +#define tcg_out_insn_3404  tcg_out_insn_3402 + +static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext, +                              TCGReg rd, TCGReg rn, TCGReg rm, int imms) +{ +    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10 +              | rn << 5 | rd); +} + +/* This function is used for the Move (wide immediate) instruction group. +   Note that SHIFT is a full shift count, not the 2 bit HW field. */ +static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext, +                              TCGReg rd, uint16_t half, unsigned shift) +{ +    assert((shift & ~0x30) == 0); +    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd); +} + +static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, +                              TCGReg rd, int64_t disp) +{ +    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); +} + +/* This function is for both 3.5.2 (Add/Subtract shifted register), for +   the rare occasion when we actually want to supply a shift amount.  */ +static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, +                                      TCGType ext, TCGReg rd, TCGReg rn, +                                      TCGReg rm, int imm6) +{ +    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd); +} + +/* This function is for 3.5.2 (Add/subtract shifted register), +   and 3.5.10 (Logical shifted register), for the vast majorty of cases +   when we don't want to apply a shift.  Thus it can also be used for +   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */ +static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext, +                              TCGReg rd, TCGReg rn, TCGReg rm) +{ +    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd); +} + +#define tcg_out_insn_3503  tcg_out_insn_3502 +#define tcg_out_insn_3508  tcg_out_insn_3502 +#define tcg_out_insn_3510  tcg_out_insn_3502 + +static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext, +                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c) +{ +    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd +              | tcg_cond_to_aarch64[c] << 12); +} + +static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext, +                              TCGReg rd, TCGReg rn) +{ +    tcg_out32(s, insn | ext << 31 | rn << 5 | rd); +} + +static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext, +                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra) +{ +    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd); +} + +static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, +                              TCGReg rd, TCGReg base, TCGType ext, +                              TCGReg regoff) +{ +    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */ +    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | +              0x4000 | ext << 13 | base << 5 | rd); +} + +static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, +                              TCGReg rd, TCGReg rn, intptr_t offset) +{ +    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd); +} + +static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn, +                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm) +{ +    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */ +    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd); +} + +/* Register to register move using ORR (shifted register with no shift). */ +static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm) +{ +    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm); +} + +/* Register to register move using ADDI (move to/from SP).  */ +static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn) +{ +    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0); +} + +/* This function is used for the Logical (immediate) instruction group. +   The value of LIMM must satisfy IS_LIMM.  See the comment above about +   only supporting simplified logical immediates.  */ +static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext, +                             TCGReg rd, TCGReg rn, uint64_t limm) +{ +    unsigned h, l, r, c; + +    assert(is_limm(limm)); + +    h = clz64(limm); +    l = ctz64(limm); +    if (l == 0) { +        r = 0;                  /* form 0....01....1 */ +        c = ctz64(~limm) - 1; +        if (h == 0) { +            r = clz64(~limm);   /* form 1..10..01..1 */ +            c += r; +        } +    } else { +        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */ +        c = r - h - 1; +    } +    if (ext == TCG_TYPE_I32) { +        r &= 31; +        c &= 31; +    } + +    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c); +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, +                         tcg_target_long value) +{ +    AArch64Insn insn; +    int i, wantinv, shift; +    tcg_target_long svalue = value; +    tcg_target_long ivalue = ~value; +    tcg_target_long imask; + +    /* For 32-bit values, discard potential garbage in value.  For 64-bit +       values within [2**31, 2**32-1], we can create smaller sequences by +       interpreting this as a negative 32-bit number, while ensuring that +       the high 32 bits are cleared by setting SF=0.  */ +    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) { +        svalue = (int32_t)value; +        value = (uint32_t)value; +        ivalue = (uint32_t)ivalue; +        type = TCG_TYPE_I32; +    } + +    /* Speed things up by handling the common case of small positive +       and negative values specially.  */ +    if ((value & ~0xffffull) == 0) { +        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0); +        return; +    } else if ((ivalue & ~0xffffull) == 0) { +        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0); +        return; +    } + +    /* Check for bitfield immediates.  For the benefit of 32-bit quantities, +       use the sign-extended value.  That lets us match rotated values such +       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */ +    if (is_limm(svalue)) { +        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue); +        return; +    } + +    /* Look for host pointer values within 4G of the PC.  This happens +       often when loading pointers to QEMU's own data structures.  */ +    if (type == TCG_TYPE_I64) { +        tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12); +        if (disp == sextract64(disp, 0, 21)) { +            tcg_out_insn(s, 3406, ADRP, rd, disp); +            if (value & 0xfff) { +                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff); +            } +            return; +        } +    } + +    /* Would it take fewer insns to begin with MOVN?  For the value and its +       inverse, count the number of 16-bit lanes that are 0.  */ +    for (i = wantinv = imask = 0; i < 64; i += 16) { +        tcg_target_long mask = 0xffffull << i; +        if ((value & mask) == 0) { +            wantinv -= 1; +        } +        if ((ivalue & mask) == 0) { +            wantinv += 1; +            imask |= mask; +        } +    } + +    /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN.  */ +    insn = I3405_MOVZ; +    if (wantinv > 0) { +        value = ivalue; +        insn = I3405_MOVN; +    } + +    /* Find the lowest lane that is not 0x0000.  */ +    shift = ctz64(value) & (63 & -16); +    tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift); + +    if (wantinv > 0) { +        /* Re-invert the value, so MOVK sees non-inverted bits.  */ +        value = ~value; +        /* Clear out all the 0xffff lanes.  */ +        value ^= imask; +    } +    /* Clear out the lane that we just set.  */ +    value &= ~(0xffffUL << shift); + +    /* Iterate until all lanes have been set, and thus cleared from VALUE.  */ +    while (value) { +        shift = ctz64(value) & (63 & -16); +        tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift); +        value &= ~(0xffffUL << shift); +    } +} + +/* Define something more legible for general use.  */ +#define tcg_out_ldst_r  tcg_out_insn_3310 + +static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, +                         TCGReg rd, TCGReg rn, intptr_t offset) +{ +    TCGMemOp size = (uint32_t)insn >> 30; + +    /* If the offset is naturally aligned and in range, then we can +       use the scaled uimm12 encoding */ +    if (offset >= 0 && !(offset & ((1 << size) - 1))) { +        uintptr_t scaled_uimm = offset >> size; +        if (scaled_uimm <= 0xfff) { +            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm); +            return; +        } +    } + +    /* Small signed offsets can use the unscaled encoding.  */ +    if (offset >= -256 && offset < 256) { +        tcg_out_insn_3312(s, insn, rd, rn, offset); +        return; +    } + +    /* Worst-case scenario, move offset to temp register, use reg offset.  */ +    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); +    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); +} + +static inline void tcg_out_mov(TCGContext *s, +                               TCGType type, TCGReg ret, TCGReg arg) +{ +    if (ret != arg) { +        tcg_out_movr(s, type, ret, arg); +    } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX, +                 arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX, +                 arg, arg1, arg2); +} + +static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd, +                               TCGReg rn, unsigned int a, unsigned int b) +{ +    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b); +} + +static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd, +                                TCGReg rn, unsigned int a, unsigned int b) +{ +    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b); +} + +static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd, +                                TCGReg rn, unsigned int a, unsigned int b) +{ +    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b); +} + +static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, +                                TCGReg rn, TCGReg rm, unsigned int a) +{ +    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); +} + +static inline void tcg_out_shl(TCGContext *s, TCGType ext, +                               TCGReg rd, TCGReg rn, unsigned int m) +{ +    int bits = ext ? 64 : 32; +    int max = bits - 1; +    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); +} + +static inline void tcg_out_shr(TCGContext *s, TCGType ext, +                               TCGReg rd, TCGReg rn, unsigned int m) +{ +    int max = ext ? 63 : 31; +    tcg_out_ubfm(s, ext, rd, rn, m & max, max); +} + +static inline void tcg_out_sar(TCGContext *s, TCGType ext, +                               TCGReg rd, TCGReg rn, unsigned int m) +{ +    int max = ext ? 63 : 31; +    tcg_out_sbfm(s, ext, rd, rn, m & max, max); +} + +static inline void tcg_out_rotr(TCGContext *s, TCGType ext, +                                TCGReg rd, TCGReg rn, unsigned int m) +{ +    int max = ext ? 63 : 31; +    tcg_out_extr(s, ext, rd, rn, rn, m & max); +} + +static inline void tcg_out_rotl(TCGContext *s, TCGType ext, +                                TCGReg rd, TCGReg rn, unsigned int m) +{ +    int bits = ext ? 64 : 32; +    int max = bits - 1; +    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); +} + +static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, +                               TCGReg rn, unsigned lsb, unsigned width) +{ +    unsigned size = ext ? 64 : 32; +    unsigned a = (size - lsb) & (size - 1); +    unsigned b = width - 1; +    tcg_out_bfm(s, ext, rd, rn, a, b); +} + +static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a, +                        tcg_target_long b, bool const_b) +{ +    if (const_b) { +        /* Using CMP or CMN aliases.  */ +        if (b >= 0) { +            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); +        } else { +            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); +        } +    } else { +        /* Using CMP alias SUBS wzr, Wn, Wm */ +        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); +    } +} + +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) +{ +    ptrdiff_t offset = target - s->code_ptr; +    assert(offset == sextract64(offset, 0, 26)); +    tcg_out_insn(s, 3206, B, offset); +} + +static inline void tcg_out_goto_noaddr(TCGContext *s) +{ +    /* We pay attention here to not modify the branch target by reading from +       the buffer. This ensure that caches and memory are kept coherent during +       retranslation.  Mask away possible garbage in the high bits for the +       first translation, while keeping the offset bits for retranslation. */ +    uint32_t old = tcg_in32(s); +    tcg_out_insn(s, 3206, B, old); +} + +static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) +{ +    /* See comments in tcg_out_goto_noaddr.  */ +    uint32_t old = tcg_in32(s) >> 5; +    tcg_out_insn(s, 3202, B_C, c, old); +} + +static inline void tcg_out_callr(TCGContext *s, TCGReg reg) +{ +    tcg_out_insn(s, 3207, BLR, reg); +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +{ +    ptrdiff_t offset = target - s->code_ptr; +    if (offset == sextract64(offset, 0, 26)) { +        tcg_out_insn(s, 3206, BL, offset); +    } else { +        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); +        tcg_out_callr(s, TCG_REG_TMP); +    } +} + +void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) +{ +    tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; +    tcg_insn_unit *target = (tcg_insn_unit *)addr; + +    reloc_pc26(code_ptr, target); +    flush_icache_range(jmp_addr, jmp_addr + 4); +} + +static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) +{ +    if (!l->has_value) { +        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); +        tcg_out_goto_noaddr(s); +    } else { +        tcg_out_goto(s, l->u.value_ptr); +    } +} + +static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a, +                           TCGArg b, bool b_const, TCGLabel *l) +{ +    intptr_t offset; +    bool need_cmp; + +    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) { +        need_cmp = false; +    } else { +        need_cmp = true; +        tcg_out_cmp(s, ext, a, b, b_const); +    } + +    if (!l->has_value) { +        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); +        offset = tcg_in32(s) >> 5; +    } else { +        offset = l->u.value_ptr - s->code_ptr; +        assert(offset == sextract64(offset, 0, 19)); +    } + +    if (need_cmp) { +        tcg_out_insn(s, 3202, B_C, c, offset); +    } else if (c == TCG_COND_EQ) { +        tcg_out_insn(s, 3201, CBZ, ext, a, offset); +    } else { +        tcg_out_insn(s, 3201, CBNZ, ext, a, offset); +    } +} + +static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn) +{ +    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn); +} + +static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn) +{ +    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn); +} + +static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn) +{ +    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn); +} + +static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits, +                               TCGReg rd, TCGReg rn) +{ +    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */ +    int bits = (8 << s_bits) - 1; +    tcg_out_sbfm(s, ext, rd, rn, 0, bits); +} + +static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits, +                               TCGReg rd, TCGReg rn) +{ +    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */ +    int bits = (8 << s_bits) - 1; +    tcg_out_ubfm(s, 0, rd, rn, 0, bits); +} + +static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, +                            TCGReg rn, int64_t aimm) +{ +    if (aimm >= 0) { +        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); +    } else { +        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); +    } +} + +static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl, +                                   TCGReg rh, TCGReg al, TCGReg ah, +                                   tcg_target_long bl, tcg_target_long bh, +                                   bool const_bl, bool const_bh, bool sub) +{ +    TCGReg orig_rl = rl; +    AArch64Insn insn; + +    if (rl == ah || (!const_bh && rl == bh)) { +        rl = TCG_REG_TMP; +    } + +    if (const_bl) { +        insn = I3401_ADDSI; +        if ((bl < 0) ^ sub) { +            insn = I3401_SUBSI; +            bl = -bl; +        } +        tcg_out_insn_3401(s, insn, ext, rl, al, bl); +    } else { +        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); +    } + +    insn = I3503_ADC; +    if (const_bh) { +        /* Note that the only two constants we support are 0 and -1, and +           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */ +        if ((bh != 0) ^ sub) { +            insn = I3503_SBC; +        } +        bh = TCG_REG_XZR; +    } else if (sub) { +        insn = I3503_SBC; +    } +    tcg_out_insn_3503(s, insn, ext, rh, ah, bh); + +    tcg_out_mov(s, ext, orig_rl, rl); +} + +#ifdef CONFIG_SOFTMMU +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + *                                     TCGMemOpIdx oi, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { +    [MO_UB]   = helper_ret_ldub_mmu, +    [MO_LEUW] = helper_le_lduw_mmu, +    [MO_LEUL] = helper_le_ldul_mmu, +    [MO_LEQ]  = helper_le_ldq_mmu, +    [MO_BEUW] = helper_be_lduw_mmu, +    [MO_BEUL] = helper_be_ldul_mmu, +    [MO_BEQ]  = helper_be_ldq_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + *                                     uintxx_t val, TCGMemOpIdx oi, + *                                     uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { +    [MO_UB]   = helper_ret_stb_mmu, +    [MO_LEUW] = helper_le_stw_mmu, +    [MO_LEUL] = helper_le_stl_mmu, +    [MO_LEQ]  = helper_le_stq_mmu, +    [MO_BEUW] = helper_be_stw_mmu, +    [MO_BEUL] = helper_be_stl_mmu, +    [MO_BEQ]  = helper_be_stq_mmu, +}; + +static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target) +{ +    ptrdiff_t offset = tcg_pcrel_diff(s, target); +    assert(offset == sextract64(offset, 0, 21)); +    tcg_out_insn(s, 3406, ADR, rd, offset); +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ +    TCGMemOpIdx oi = lb->oi; +    TCGMemOp opc = get_memop(oi); +    TCGMemOp size = opc & MO_SIZE; + +    reloc_pc19(lb->label_ptr[0], s->code_ptr); + +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); +    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); +    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); +    tcg_out_adr(s, TCG_REG_X3, lb->raddr); +    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); +    if (opc & MO_SIGN) { +        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); +    } else { +        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0); +    } + +    tcg_out_goto(s, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ +    TCGMemOpIdx oi = lb->oi; +    TCGMemOp opc = get_memop(oi); +    TCGMemOp size = opc & MO_SIZE; + +    reloc_pc19(lb->label_ptr[0], s->code_ptr); + +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); +    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); +    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); +    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); +    tcg_out_adr(s, TCG_REG_X4, lb->raddr); +    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); +    tcg_out_goto(s, lb->raddr); +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, +                                TCGType ext, TCGReg data_reg, TCGReg addr_reg, +                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) +{ +    TCGLabelQemuLdst *label = new_ldst_label(s); + +    label->is_ld = is_ld; +    label->oi = oi; +    label->type = ext; +    label->datalo_reg = data_reg; +    label->addrlo_reg = addr_reg; +    label->raddr = raddr; +    label->label_ptr[0] = label_ptr; +} + +/* Load and compare a TLB entry, emitting the conditional jump to the +   slow path for the failure case, which will be patched later when finalizing +   the slow path. Generated code returns the host addend in X1, +   clobbers X0,X2,X3,TMP. */ +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits, +                             tcg_insn_unit **label_ptr, int mem_index, +                             bool is_read) +{ +    TCGReg base = TCG_AREG0; +    int tlb_offset = is_read ? +        offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) +        : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + +    /* Extract the TLB index from the address into X0. +       X0<CPU_TLB_BITS:0> = +       addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */ +    tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg, +                 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); + +    /* Store the page mask part of the address and the low s_bits into X3. +       Later this allows checking for equality and alignment at the same time. +       X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */ +    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3, +                     addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + +    /* Add any "high bits" from the tlb offset to the env address into X2, +       to take advantage of the LSL12 form of the ADDI instruction. +       X2 = env + (tlb_offset & 0xfff000) */ +    if (tlb_offset & 0xfff000) { +        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base, +                     tlb_offset & 0xfff000); +        base = TCG_REG_X2; +    } + +    /* Merge the tlb index contribution into X2. +       X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */ +    tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base, +                 TCG_REG_X0, CPU_TLB_ENTRY_BITS); + +    /* Merge "low bits" from tlb offset, load the tlb comparator into X0. +       X0 = load [X2 + (tlb_offset & 0x000fff)] */ +    tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX, +                 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff); + +    /* Load the tlb addend. Do that early to avoid stalling. +       X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */ +    tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2, +                 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) - +                 (is_read ? offsetof(CPUTLBEntry, addr_read) +                  : offsetof(CPUTLBEntry, addr_write))); + +    /* Perform the address comparison. */ +    tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); + +    /* If not equal, we jump to the slow path. */ +    *label_ptr = s->code_ptr; +    tcg_out_goto_cond_noaddr(s, TCG_COND_NE); +} + +#endif /* CONFIG_SOFTMMU */ + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext, +                                   TCGReg data_r, TCGReg addr_r, +                                   TCGType otype, TCGReg off_r) +{ +    const TCGMemOp bswap = memop & MO_BSWAP; + +    switch (memop & MO_SSIZE) { +    case MO_UB: +        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); +        break; +    case MO_SB: +        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, +                       data_r, addr_r, otype, off_r); +        break; +    case MO_UW: +        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); +        if (bswap) { +            tcg_out_rev16(s, data_r, data_r); +        } +        break; +    case MO_SW: +        if (bswap) { +            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); +            tcg_out_rev16(s, data_r, data_r); +            tcg_out_sxt(s, ext, MO_16, data_r, data_r); +        } else { +            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), +                           data_r, addr_r, otype, off_r); +        } +        break; +    case MO_UL: +        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); +        if (bswap) { +            tcg_out_rev32(s, data_r, data_r); +        } +        break; +    case MO_SL: +        if (bswap) { +            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); +            tcg_out_rev32(s, data_r, data_r); +            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r); +        } else { +            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); +        } +        break; +    case MO_Q: +        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); +        if (bswap) { +            tcg_out_rev64(s, data_r, data_r); +        } +        break; +    default: +        tcg_abort(); +    } +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop, +                                   TCGReg data_r, TCGReg addr_r, +                                   TCGType otype, TCGReg off_r) +{ +    const TCGMemOp bswap = memop & MO_BSWAP; + +    switch (memop & MO_SIZE) { +    case MO_8: +        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r); +        break; +    case MO_16: +        if (bswap && data_r != TCG_REG_XZR) { +            tcg_out_rev16(s, TCG_REG_TMP, data_r); +            data_r = TCG_REG_TMP; +        } +        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r); +        break; +    case MO_32: +        if (bswap && data_r != TCG_REG_XZR) { +            tcg_out_rev32(s, TCG_REG_TMP, data_r); +            data_r = TCG_REG_TMP; +        } +        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r); +        break; +    case MO_64: +        if (bswap && data_r != TCG_REG_XZR) { +            tcg_out_rev64(s, TCG_REG_TMP, data_r); +            data_r = TCG_REG_TMP; +        } +        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r); +        break; +    default: +        tcg_abort(); +    } +} + +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, +                            TCGMemOpIdx oi, TCGType ext) +{ +    TCGMemOp memop = get_memop(oi); +    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; +#ifdef CONFIG_SOFTMMU +    unsigned mem_index = get_mmuidx(oi); +    TCGMemOp s_bits = memop & MO_SIZE; +    tcg_insn_unit *label_ptr; + +    tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1); +    tcg_out_qemu_ld_direct(s, memop, ext, data_reg, +                           TCG_REG_X1, otype, addr_reg); +    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, +                        s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ +    if (USE_GUEST_BASE) { +        tcg_out_qemu_ld_direct(s, memop, ext, data_reg, +                               TCG_REG_GUEST_BASE, otype, addr_reg); +    } else { +        tcg_out_qemu_ld_direct(s, memop, ext, data_reg, +                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR); +    } +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, +                            TCGMemOpIdx oi) +{ +    TCGMemOp memop = get_memop(oi); +    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; +#ifdef CONFIG_SOFTMMU +    unsigned mem_index = get_mmuidx(oi); +    TCGMemOp s_bits = memop & MO_SIZE; +    tcg_insn_unit *label_ptr; + +    tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0); +    tcg_out_qemu_st_direct(s, memop, data_reg, +                           TCG_REG_X1, otype, addr_reg); +    add_qemu_ldst_label(s, false, oi, s_bits == MO_64, data_reg, addr_reg, +                        s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ +    if (USE_GUEST_BASE) { +        tcg_out_qemu_st_direct(s, memop, data_reg, +                               TCG_REG_GUEST_BASE, otype, addr_reg); +    } else { +        tcg_out_qemu_st_direct(s, memop, data_reg, +                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR); +    } +#endif /* CONFIG_SOFTMMU */ +} + +static tcg_insn_unit *tb_ret_addr; + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, +                       const TCGArg args[TCG_MAX_OP_ARGS], +                       const int const_args[TCG_MAX_OP_ARGS]) +{ +    /* 99% of the time, we can signal the use of extension registers +       by looking to see if the opcode handles 64-bit data.  */ +    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0; + +    /* Hoist the loads of the most common arguments.  */ +    TCGArg a0 = args[0]; +    TCGArg a1 = args[1]; +    TCGArg a2 = args[2]; +    int c2 = const_args[2]; + +    /* Some operands are defined with "rZ" constraint, a register or +       the zero register.  These need not actually test args[I] == 0.  */ +#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) + +    switch (opc) { +    case INDEX_op_exit_tb: +        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); +        tcg_out_goto(s, tb_ret_addr); +        break; + +    case INDEX_op_goto_tb: +#ifndef USE_DIRECT_JUMP +#error "USE_DIRECT_JUMP required for aarch64" +#endif +        assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */ +        s->tb_jmp_offset[a0] = tcg_current_code_size(s); +        /* actual branch destination will be patched by +           aarch64_tb_set_jmp_target later, beware retranslation. */ +        tcg_out_goto_noaddr(s); +        s->tb_next_offset[a0] = tcg_current_code_size(s); +        break; + +    case INDEX_op_br: +        tcg_out_goto_label(s, arg_label(a0)); +        break; + +    case INDEX_op_ld8u_i32: +    case INDEX_op_ld8u_i64: +        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2); +        break; +    case INDEX_op_ld8s_i32: +        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2); +        break; +    case INDEX_op_ld8s_i64: +        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2); +        break; +    case INDEX_op_ld16u_i32: +    case INDEX_op_ld16u_i64: +        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2); +        break; +    case INDEX_op_ld16s_i32: +        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2); +        break; +    case INDEX_op_ld16s_i64: +        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2); +        break; +    case INDEX_op_ld_i32: +    case INDEX_op_ld32u_i64: +        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2); +        break; +    case INDEX_op_ld32s_i64: +        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2); +        break; +    case INDEX_op_ld_i64: +        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2); +        break; + +    case INDEX_op_st8_i32: +    case INDEX_op_st8_i64: +        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2); +        break; +    case INDEX_op_st16_i32: +    case INDEX_op_st16_i64: +        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2); +        break; +    case INDEX_op_st_i32: +    case INDEX_op_st32_i64: +        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2); +        break; +    case INDEX_op_st_i64: +        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2); +        break; + +    case INDEX_op_add_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_add_i64: +        if (c2) { +            tcg_out_addsubi(s, ext, a0, a1, a2); +        } else { +            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_sub_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_sub_i64: +        if (c2) { +            tcg_out_addsubi(s, ext, a0, a1, -a2); +        } else { +            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_neg_i64: +    case INDEX_op_neg_i32: +        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); +        break; + +    case INDEX_op_and_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_and_i64: +        if (c2) { +            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); +        } else { +            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_andc_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_andc_i64: +        if (c2) { +            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); +        } else { +            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_or_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_or_i64: +        if (c2) { +            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); +        } else { +            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_orc_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_orc_i64: +        if (c2) { +            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); +        } else { +            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_xor_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_xor_i64: +        if (c2) { +            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); +        } else { +            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_eqv_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_eqv_i64: +        if (c2) { +            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); +        } else { +            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_not_i64: +    case INDEX_op_not_i32: +        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); +        break; + +    case INDEX_op_mul_i64: +    case INDEX_op_mul_i32: +        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); +        break; + +    case INDEX_op_div_i64: +    case INDEX_op_div_i32: +        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); +        break; +    case INDEX_op_divu_i64: +    case INDEX_op_divu_i32: +        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); +        break; + +    case INDEX_op_rem_i64: +    case INDEX_op_rem_i32: +        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); +        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); +        break; +    case INDEX_op_remu_i64: +    case INDEX_op_remu_i32: +        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); +        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); +        break; + +    case INDEX_op_shl_i64: +    case INDEX_op_shl_i32: +        if (c2) { +            tcg_out_shl(s, ext, a0, a1, a2); +        } else { +            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_shr_i64: +    case INDEX_op_shr_i32: +        if (c2) { +            tcg_out_shr(s, ext, a0, a1, a2); +        } else { +            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_sar_i64: +    case INDEX_op_sar_i32: +        if (c2) { +            tcg_out_sar(s, ext, a0, a1, a2); +        } else { +            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_rotr_i64: +    case INDEX_op_rotr_i32: +        if (c2) { +            tcg_out_rotr(s, ext, a0, a1, a2); +        } else { +            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); +        } +        break; + +    case INDEX_op_rotl_i64: +    case INDEX_op_rotl_i32: +        if (c2) { +            tcg_out_rotl(s, ext, a0, a1, a2); +        } else { +            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); +            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); +        } +        break; + +    case INDEX_op_brcond_i32: +        a1 = (int32_t)a1; +        /* FALLTHRU */ +    case INDEX_op_brcond_i64: +        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); +        break; + +    case INDEX_op_setcond_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_setcond_i64: +        tcg_out_cmp(s, ext, a1, a2, c2); +        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */ +        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, +                     TCG_REG_XZR, tcg_invert_cond(args[3])); +        break; + +    case INDEX_op_movcond_i32: +        a2 = (int32_t)a2; +        /* FALLTHRU */ +    case INDEX_op_movcond_i64: +        tcg_out_cmp(s, ext, a1, a2, c2); +        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]); +        break; + +    case INDEX_op_qemu_ld_i32: +    case INDEX_op_qemu_ld_i64: +        tcg_out_qemu_ld(s, a0, a1, a2, ext); +        break; +    case INDEX_op_qemu_st_i32: +    case INDEX_op_qemu_st_i64: +        tcg_out_qemu_st(s, REG0(0), a1, a2); +        break; + +    case INDEX_op_bswap64_i64: +        tcg_out_rev64(s, a0, a1); +        break; +    case INDEX_op_bswap32_i64: +    case INDEX_op_bswap32_i32: +        tcg_out_rev32(s, a0, a1); +        break; +    case INDEX_op_bswap16_i64: +    case INDEX_op_bswap16_i32: +        tcg_out_rev16(s, a0, a1); +        break; + +    case INDEX_op_ext8s_i64: +    case INDEX_op_ext8s_i32: +        tcg_out_sxt(s, ext, MO_8, a0, a1); +        break; +    case INDEX_op_ext16s_i64: +    case INDEX_op_ext16s_i32: +        tcg_out_sxt(s, ext, MO_16, a0, a1); +        break; +    case INDEX_op_ext32s_i64: +        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); +        break; +    case INDEX_op_ext8u_i64: +    case INDEX_op_ext8u_i32: +        tcg_out_uxt(s, MO_8, a0, a1); +        break; +    case INDEX_op_ext16u_i64: +    case INDEX_op_ext16u_i32: +        tcg_out_uxt(s, MO_16, a0, a1); +        break; +    case INDEX_op_ext32u_i64: +        tcg_out_movr(s, TCG_TYPE_I32, a0, a1); +        break; + +    case INDEX_op_deposit_i64: +    case INDEX_op_deposit_i32: +        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]); +        break; + +    case INDEX_op_add2_i32: +        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), +                        (int32_t)args[4], args[5], const_args[4], +                        const_args[5], false); +        break; +    case INDEX_op_add2_i64: +        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], +                        args[5], const_args[4], const_args[5], false); +        break; +    case INDEX_op_sub2_i32: +        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3), +                        (int32_t)args[4], args[5], const_args[4], +                        const_args[5], true); +        break; +    case INDEX_op_sub2_i64: +        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4], +                        args[5], const_args[4], const_args[5], true); +        break; + +    case INDEX_op_muluh_i64: +        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); +        break; +    case INDEX_op_mulsh_i64: +        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); +        break; + +    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */ +    case INDEX_op_mov_i64: +    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */ +    case INDEX_op_movi_i64: +    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */ +    default: +        tcg_abort(); +    } + +#undef REG0 +} + +static const TCGTargetOpDef aarch64_op_defs[] = { +    { INDEX_op_exit_tb, { } }, +    { INDEX_op_goto_tb, { } }, +    { INDEX_op_br, { } }, + +    { INDEX_op_ld8u_i32, { "r", "r" } }, +    { INDEX_op_ld8s_i32, { "r", "r" } }, +    { INDEX_op_ld16u_i32, { "r", "r" } }, +    { INDEX_op_ld16s_i32, { "r", "r" } }, +    { INDEX_op_ld_i32, { "r", "r" } }, +    { INDEX_op_ld8u_i64, { "r", "r" } }, +    { INDEX_op_ld8s_i64, { "r", "r" } }, +    { INDEX_op_ld16u_i64, { "r", "r" } }, +    { INDEX_op_ld16s_i64, { "r", "r" } }, +    { INDEX_op_ld32u_i64, { "r", "r" } }, +    { INDEX_op_ld32s_i64, { "r", "r" } }, +    { INDEX_op_ld_i64, { "r", "r" } }, + +    { INDEX_op_st8_i32, { "rZ", "r" } }, +    { INDEX_op_st16_i32, { "rZ", "r" } }, +    { INDEX_op_st_i32, { "rZ", "r" } }, +    { INDEX_op_st8_i64, { "rZ", "r" } }, +    { INDEX_op_st16_i64, { "rZ", "r" } }, +    { INDEX_op_st32_i64, { "rZ", "r" } }, +    { INDEX_op_st_i64, { "rZ", "r" } }, + +    { INDEX_op_add_i32, { "r", "r", "rA" } }, +    { INDEX_op_add_i64, { "r", "r", "rA" } }, +    { INDEX_op_sub_i32, { "r", "r", "rA" } }, +    { INDEX_op_sub_i64, { "r", "r", "rA" } }, +    { INDEX_op_mul_i32, { "r", "r", "r" } }, +    { INDEX_op_mul_i64, { "r", "r", "r" } }, +    { INDEX_op_div_i32, { "r", "r", "r" } }, +    { INDEX_op_div_i64, { "r", "r", "r" } }, +    { INDEX_op_divu_i32, { "r", "r", "r" } }, +    { INDEX_op_divu_i64, { "r", "r", "r" } }, +    { INDEX_op_rem_i32, { "r", "r", "r" } }, +    { INDEX_op_rem_i64, { "r", "r", "r" } }, +    { INDEX_op_remu_i32, { "r", "r", "r" } }, +    { INDEX_op_remu_i64, { "r", "r", "r" } }, +    { INDEX_op_and_i32, { "r", "r", "rL" } }, +    { INDEX_op_and_i64, { "r", "r", "rL" } }, +    { INDEX_op_or_i32, { "r", "r", "rL" } }, +    { INDEX_op_or_i64, { "r", "r", "rL" } }, +    { INDEX_op_xor_i32, { "r", "r", "rL" } }, +    { INDEX_op_xor_i64, { "r", "r", "rL" } }, +    { INDEX_op_andc_i32, { "r", "r", "rL" } }, +    { INDEX_op_andc_i64, { "r", "r", "rL" } }, +    { INDEX_op_orc_i32, { "r", "r", "rL" } }, +    { INDEX_op_orc_i64, { "r", "r", "rL" } }, +    { INDEX_op_eqv_i32, { "r", "r", "rL" } }, +    { INDEX_op_eqv_i64, { "r", "r", "rL" } }, + +    { INDEX_op_neg_i32, { "r", "r" } }, +    { INDEX_op_neg_i64, { "r", "r" } }, +    { INDEX_op_not_i32, { "r", "r" } }, +    { INDEX_op_not_i64, { "r", "r" } }, + +    { INDEX_op_shl_i32, { "r", "r", "ri" } }, +    { INDEX_op_shr_i32, { "r", "r", "ri" } }, +    { INDEX_op_sar_i32, { "r", "r", "ri" } }, +    { INDEX_op_rotl_i32, { "r", "r", "ri" } }, +    { INDEX_op_rotr_i32, { "r", "r", "ri" } }, +    { INDEX_op_shl_i64, { "r", "r", "ri" } }, +    { INDEX_op_shr_i64, { "r", "r", "ri" } }, +    { INDEX_op_sar_i64, { "r", "r", "ri" } }, +    { INDEX_op_rotl_i64, { "r", "r", "ri" } }, +    { INDEX_op_rotr_i64, { "r", "r", "ri" } }, + +    { INDEX_op_brcond_i32, { "r", "rA" } }, +    { INDEX_op_brcond_i64, { "r", "rA" } }, +    { INDEX_op_setcond_i32, { "r", "r", "rA" } }, +    { INDEX_op_setcond_i64, { "r", "r", "rA" } }, +    { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } }, +    { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } }, + +    { INDEX_op_qemu_ld_i32, { "r", "l" } }, +    { INDEX_op_qemu_ld_i64, { "r", "l" } }, +    { INDEX_op_qemu_st_i32, { "lZ", "l" } }, +    { INDEX_op_qemu_st_i64, { "lZ", "l" } }, + +    { INDEX_op_bswap16_i32, { "r", "r" } }, +    { INDEX_op_bswap32_i32, { "r", "r" } }, +    { INDEX_op_bswap16_i64, { "r", "r" } }, +    { INDEX_op_bswap32_i64, { "r", "r" } }, +    { INDEX_op_bswap64_i64, { "r", "r" } }, + +    { INDEX_op_ext8s_i32, { "r", "r" } }, +    { INDEX_op_ext16s_i32, { "r", "r" } }, +    { INDEX_op_ext8u_i32, { "r", "r" } }, +    { INDEX_op_ext16u_i32, { "r", "r" } }, + +    { INDEX_op_ext8s_i64, { "r", "r" } }, +    { INDEX_op_ext16s_i64, { "r", "r" } }, +    { INDEX_op_ext32s_i64, { "r", "r" } }, +    { INDEX_op_ext8u_i64, { "r", "r" } }, +    { INDEX_op_ext16u_i64, { "r", "r" } }, +    { INDEX_op_ext32u_i64, { "r", "r" } }, + +    { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, +    { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, + +    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, +    { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, +    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, +    { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } }, + +    { INDEX_op_muluh_i64, { "r", "r", "r" } }, +    { INDEX_op_mulsh_i64, { "r", "r", "r" } }, + +    { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); + +    tcg_regset_set32(tcg_target_call_clobber_regs, 0, +                     (1 << TCG_REG_X0) | (1 << TCG_REG_X1) | +                     (1 << TCG_REG_X2) | (1 << TCG_REG_X3) | +                     (1 << TCG_REG_X4) | (1 << TCG_REG_X5) | +                     (1 << TCG_REG_X6) | (1 << TCG_REG_X7) | +                     (1 << TCG_REG_X8) | (1 << TCG_REG_X9) | +                     (1 << TCG_REG_X10) | (1 << TCG_REG_X11) | +                     (1 << TCG_REG_X12) | (1 << TCG_REG_X13) | +                     (1 << TCG_REG_X14) | (1 << TCG_REG_X15) | +                     (1 << TCG_REG_X16) | (1 << TCG_REG_X17) | +                     (1 << TCG_REG_X18) | (1 << TCG_REG_X30)); + +    tcg_regset_clear(s->reserved_regs); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ + +    tcg_add_target_add_op_defs(aarch64_op_defs); +} + +/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */ +#define PUSH_SIZE  ((30 - 19 + 1) * 8) + +#define FRAME_SIZE \ +    ((PUSH_SIZE \ +      + TCG_STATIC_CALL_ARGS_SIZE \ +      + CPU_TEMP_BUF_NLONGS * sizeof(long) \ +      + TCG_TARGET_STACK_ALIGN - 1) \ +     & ~(TCG_TARGET_STACK_ALIGN - 1)) + +/* We're expecting a 2 byte uleb128 encoded value.  */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +/* We're expecting to use a single ADDI insn.  */ +QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff); + +static void tcg_target_qemu_prologue(TCGContext *s) +{ +    TCGReg r; + +    /* Push (FP, LR) and allocate space for all saved registers.  */ +    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR, +                 TCG_REG_SP, -PUSH_SIZE, 1, 1); + +    /* Set up frame pointer for canonical unwinding.  */ +    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP); + +    /* Store callee-preserved regs x19..x28.  */ +    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { +        int ofs = (r - TCG_REG_X19 + 2) * 8; +        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0); +    } + +    /* Make stack space for TCG locals.  */ +    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, +                 FRAME_SIZE - PUSH_SIZE); + +    /* Inform TCG about how to find TCG locals with register, offset, size.  */ +    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, +                  CPU_TEMP_BUF_NLONGS * sizeof(long)); + +#if defined(CONFIG_USE_GUEST_BASE) +    if (USE_GUEST_BASE) { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE); +        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); +    } +#endif + +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); +    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); + +    tb_ret_addr = s->code_ptr; + +    /* Remove TCG locals stack space.  */ +    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP, +                 FRAME_SIZE - PUSH_SIZE); + +    /* Restore registers x19..x28.  */ +    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { +        int ofs = (r - TCG_REG_X19 + 2) * 8; +        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0); +    } + +    /* Pop (FP, LR), restore SP to previous frame.  */ +    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR, +                 TCG_REG_SP, PUSH_SIZE, 0, 1); +    tcg_out_insn(s, 3207, RET, TCG_REG_LR); +} + +typedef struct { +    DebugFrameHeader h; +    uint8_t fde_def_cfa[4]; +    uint8_t fde_reg_ofs[24]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_AARCH64 + +static const DebugFrame debug_frame = { +    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ +    .h.cie.id = -1, +    .h.cie.version = 1, +    .h.cie.code_align = 1, +    .h.cie.data_align = 0x78,             /* sleb128 -8 */ +    .h.cie.return_column = TCG_REG_LR, + +    /* Total FDE size does not include the "len" member.  */ +    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + +    .fde_def_cfa = { +        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */ +        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */ +        (FRAME_SIZE >> 7) +    }, +    .fde_reg_ofs = { +        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */ +        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */ +        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */ +        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */ +        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */ +        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */ +        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */ +        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */ +        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */ +        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */ +        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */ +        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */ +    } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ +    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h new file mode 100644 index 00000000..8aec04d2 --- /dev/null +++ b/tcg/aarch64/tcg-target.h @@ -0,0 +1,108 @@ +/* + * Initial TCG Implementation for aarch64 + * + * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH + * Written by Claudio Fontana + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + * + * See the COPYING file in the top-level directory for details. + */ + +#ifndef TCG_TARGET_AARCH64 +#define TCG_TARGET_AARCH64 1 + +#define TCG_TARGET_INSN_UNIT_SIZE  4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 +#undef TCG_TARGET_STACK_GROWSUP + +typedef enum { +    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, +    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, +    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, +    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, +    TCG_REG_X16, TCG_REG_X17, TCG_REG_X18, TCG_REG_X19, +    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, +    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, +    TCG_REG_X28, TCG_REG_X29, TCG_REG_X30, + +    /* X31 is either the stack pointer or zero, depending on context.  */ +    TCG_REG_SP = 31, +    TCG_REG_XZR = 31, + +    /* Aliases.  */ +    TCG_REG_FP = TCG_REG_X29, +    TCG_REG_LR = TCG_REG_X30, +    TCG_AREG0  = TCG_REG_X19, +} TCGReg; + +#define TCG_TARGET_NB_REGS 32 + +/* used for function call generation */ +#define TCG_REG_CALL_STACK              TCG_REG_SP +#define TCG_TARGET_STACK_ALIGN          16 +#define TCG_TARGET_CALL_ALIGN_ARGS      1 +#define TCG_TARGET_CALL_STACK_OFFSET    0 + +/* optional instructions */ +#define TCG_TARGET_HAS_div_i32          1 +#define TCG_TARGET_HAS_rem_i32          1 +#define TCG_TARGET_HAS_ext8s_i32        1 +#define TCG_TARGET_HAS_ext16s_i32       1 +#define TCG_TARGET_HAS_ext8u_i32        1 +#define TCG_TARGET_HAS_ext16u_i32       1 +#define TCG_TARGET_HAS_bswap16_i32      1 +#define TCG_TARGET_HAS_bswap32_i32      1 +#define TCG_TARGET_HAS_not_i32          1 +#define TCG_TARGET_HAS_neg_i32          1 +#define TCG_TARGET_HAS_rot_i32          1 +#define TCG_TARGET_HAS_andc_i32         1 +#define TCG_TARGET_HAS_orc_i32          1 +#define TCG_TARGET_HAS_eqv_i32          1 +#define TCG_TARGET_HAS_nand_i32         0 +#define TCG_TARGET_HAS_nor_i32          0 +#define TCG_TARGET_HAS_deposit_i32      1 +#define TCG_TARGET_HAS_movcond_i32      1 +#define TCG_TARGET_HAS_add2_i32         1 +#define TCG_TARGET_HAS_sub2_i32         1 +#define TCG_TARGET_HAS_mulu2_i32        0 +#define TCG_TARGET_HAS_muls2_i32        0 +#define TCG_TARGET_HAS_muluh_i32        0 +#define TCG_TARGET_HAS_mulsh_i32        0 +#define TCG_TARGET_HAS_trunc_shr_i32    0 + +#define TCG_TARGET_HAS_div_i64          1 +#define TCG_TARGET_HAS_rem_i64          1 +#define TCG_TARGET_HAS_ext8s_i64        1 +#define TCG_TARGET_HAS_ext16s_i64       1 +#define TCG_TARGET_HAS_ext32s_i64       1 +#define TCG_TARGET_HAS_ext8u_i64        1 +#define TCG_TARGET_HAS_ext16u_i64       1 +#define TCG_TARGET_HAS_ext32u_i64       1 +#define TCG_TARGET_HAS_bswap16_i64      1 +#define TCG_TARGET_HAS_bswap32_i64      1 +#define TCG_TARGET_HAS_bswap64_i64      1 +#define TCG_TARGET_HAS_not_i64          1 +#define TCG_TARGET_HAS_neg_i64          1 +#define TCG_TARGET_HAS_rot_i64          1 +#define TCG_TARGET_HAS_andc_i64         1 +#define TCG_TARGET_HAS_orc_i64          1 +#define TCG_TARGET_HAS_eqv_i64          1 +#define TCG_TARGET_HAS_nand_i64         0 +#define TCG_TARGET_HAS_nor_i64          0 +#define TCG_TARGET_HAS_deposit_i64      1 +#define TCG_TARGET_HAS_movcond_i64      1 +#define TCG_TARGET_HAS_add2_i64         1 +#define TCG_TARGET_HAS_sub2_i64         1 +#define TCG_TARGET_HAS_mulu2_i64        0 +#define TCG_TARGET_HAS_muls2_i64        0 +#define TCG_TARGET_HAS_muluh_i64        1 +#define TCG_TARGET_HAS_mulsh_i64        1 + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +    __builtin___clear_cache((char *)start, (char *)stop); +} + +#endif /* TCG_TARGET_AARCH64 */ diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c new file mode 100644 index 00000000..ae2ec7a9 --- /dev/null +++ b/tcg/arm/tcg-target.c @@ -0,0 +1,2128 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Andrzej Zaborowski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "elf.h" +#include "tcg-be-ldst.h" + +/* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */ +#ifndef __ARM_ARCH +# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ +     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ +     || defined(__ARM_ARCH_7EM__) +#  define __ARM_ARCH 7 +# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ +       || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ +       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) +#  define __ARM_ARCH 6 +# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \ +       || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ +       || defined(__ARM_ARCH_5TEJ__) +#  define __ARM_ARCH 5 +# else +#  define __ARM_ARCH 4 +# endif +#endif + +static int arm_arch = __ARM_ARCH; + +#if defined(__ARM_ARCH_5T__) \ +    || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) +# define use_armv5t_instructions 1 +#else +# define use_armv5t_instructions use_armv6_instructions +#endif + +#define use_armv6_instructions  (__ARM_ARCH >= 6 || arm_arch >= 6) +#define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7) + +#ifndef use_idiv_instructions +bool use_idiv_instructions; +#endif + +/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */ +#ifdef CONFIG_SOFTMMU +# define USING_SOFTMMU 1 +#else +# define USING_SOFTMMU 0 +#endif + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +    "%r0", +    "%r1", +    "%r2", +    "%r3", +    "%r4", +    "%r5", +    "%r6", +    "%r7", +    "%r8", +    "%r9", +    "%r10", +    "%r11", +    "%r12", +    "%r13", +    "%r14", +    "%pc", +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { +    TCG_REG_R4, +    TCG_REG_R5, +    TCG_REG_R6, +    TCG_REG_R7, +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10, +    TCG_REG_R11, +    TCG_REG_R13, +    TCG_REG_R0, +    TCG_REG_R1, +    TCG_REG_R2, +    TCG_REG_R3, +    TCG_REG_R12, +    TCG_REG_R14, +}; + +static const int tcg_target_call_iarg_regs[4] = { +    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3 +}; +static const int tcg_target_call_oarg_regs[2] = { +    TCG_REG_R0, TCG_REG_R1 +}; + +#define TCG_REG_TMP  TCG_REG_R12 + +static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) +{ +    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; +    *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, +                        intptr_t value, intptr_t addend) +{ +    assert(type == R_ARM_PC24); +    assert(addend == 0); +    reloc_pc24(code_ptr, (tcg_insn_unit *)value); +} + +#define TCG_CT_CONST_ARM  0x100 +#define TCG_CT_CONST_INV  0x200 +#define TCG_CT_CONST_NEG  0x400 +#define TCG_CT_CONST_ZERO 0x800 + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ +    const char *ct_str; + +    ct_str = *pct_str; +    switch (ct_str[0]) { +    case 'I': +        ct->ct |= TCG_CT_CONST_ARM; +        break; +    case 'K': +        ct->ct |= TCG_CT_CONST_INV; +        break; +    case 'N': /* The gcc constraint letter is L, already used here.  */ +        ct->ct |= TCG_CT_CONST_NEG; +        break; +    case 'Z': +        ct->ct |= TCG_CT_CONST_ZERO; +        break; + +    case 'r': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); +        break; + +    /* qemu_ld address */ +    case 'l': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); +#ifdef CONFIG_SOFTMMU +        /* r0-r2,lr will be overwritten when reading the tlb entry, +           so don't use these. */ +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); +#endif +        break; + +    /* qemu_st address & data */ +    case 's': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); +        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) +           and r0-r1 doing the byte swapping, so don't use these. */ +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1); +#if defined(CONFIG_SOFTMMU) +        /* Avoid clashes with registers being used for helper args */ +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2); +#if TARGET_LONG_BITS == 64 +        /* Avoid clashes with registers being used for helper args */ +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#endif +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14); +#endif +        break; + +    default: +        return -1; +    } +    ct_str++; +    *pct_str = ct_str; + +    return 0; +} + +static inline uint32_t rotl(uint32_t val, int n) +{ +  return (val << n) | (val >> (32 - n)); +} + +/* ARM immediates for ALU instructions are made of an unsigned 8-bit +   right-rotated by an even amount between 0 and 30. */ +static inline int encode_imm(uint32_t imm) +{ +    int shift; + +    /* simple case, only lower bits */ +    if ((imm & ~0xff) == 0) +        return 0; +    /* then try a simple even shift */ +    shift = ctz32(imm) & ~1; +    if (((imm >> shift) & ~0xff) == 0) +        return 32 - shift; +    /* now try harder with rotations */ +    if ((rotl(imm, 2) & ~0xff) == 0) +        return 2; +    if ((rotl(imm, 4) & ~0xff) == 0) +        return 4; +    if ((rotl(imm, 6) & ~0xff) == 0) +        return 6; +    /* imm can't be encoded */ +    return -1; +} + +static inline int check_fit_imm(uint32_t imm) +{ +    return encode_imm(imm) >= 0; +} + +/* Test if a constant matches the constraint. + * TODO: define constraints for: + * + * ldr/str offset:   between -0xfff and 0xfff + * ldrh/strh offset: between -0xff and 0xff + * mov operand2:     values represented with x << (2 * y), x < 0x100 + * add, sub, eor...: ditto + */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, +                                         const TCGArgConstraint *arg_ct) +{ +    int ct; +    ct = arg_ct->ct; +    if (ct & TCG_CT_CONST) { +        return 1; +    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) { +        return 1; +    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) { +        return 1; +    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) { +        return 1; +    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { +        return 1; +    } else { +        return 0; +    } +} + +#define TO_CPSR (1 << 20) + +typedef enum { +    ARITH_AND = 0x0 << 21, +    ARITH_EOR = 0x1 << 21, +    ARITH_SUB = 0x2 << 21, +    ARITH_RSB = 0x3 << 21, +    ARITH_ADD = 0x4 << 21, +    ARITH_ADC = 0x5 << 21, +    ARITH_SBC = 0x6 << 21, +    ARITH_RSC = 0x7 << 21, +    ARITH_TST = 0x8 << 21 | TO_CPSR, +    ARITH_CMP = 0xa << 21 | TO_CPSR, +    ARITH_CMN = 0xb << 21 | TO_CPSR, +    ARITH_ORR = 0xc << 21, +    ARITH_MOV = 0xd << 21, +    ARITH_BIC = 0xe << 21, +    ARITH_MVN = 0xf << 21, + +    INSN_LDR_IMM   = 0x04100000, +    INSN_LDR_REG   = 0x06100000, +    INSN_STR_IMM   = 0x04000000, +    INSN_STR_REG   = 0x06000000, + +    INSN_LDRH_IMM  = 0x005000b0, +    INSN_LDRH_REG  = 0x001000b0, +    INSN_LDRSH_IMM = 0x005000f0, +    INSN_LDRSH_REG = 0x001000f0, +    INSN_STRH_IMM  = 0x004000b0, +    INSN_STRH_REG  = 0x000000b0, + +    INSN_LDRB_IMM  = 0x04500000, +    INSN_LDRB_REG  = 0x06500000, +    INSN_LDRSB_IMM = 0x005000d0, +    INSN_LDRSB_REG = 0x001000d0, +    INSN_STRB_IMM  = 0x04400000, +    INSN_STRB_REG  = 0x06400000, + +    INSN_LDRD_IMM  = 0x004000d0, +    INSN_LDRD_REG  = 0x000000d0, +    INSN_STRD_IMM  = 0x004000f0, +    INSN_STRD_REG  = 0x000000f0, +} ARMInsn; + +#define SHIFT_IMM_LSL(im)	(((im) << 7) | 0x00) +#define SHIFT_IMM_LSR(im)	(((im) << 7) | 0x20) +#define SHIFT_IMM_ASR(im)	(((im) << 7) | 0x40) +#define SHIFT_IMM_ROR(im)	(((im) << 7) | 0x60) +#define SHIFT_REG_LSL(rs)	(((rs) << 8) | 0x10) +#define SHIFT_REG_LSR(rs)	(((rs) << 8) | 0x30) +#define SHIFT_REG_ASR(rs)	(((rs) << 8) | 0x50) +#define SHIFT_REG_ROR(rs)	(((rs) << 8) | 0x70) + +enum arm_cond_code_e { +    COND_EQ = 0x0, +    COND_NE = 0x1, +    COND_CS = 0x2,	/* Unsigned greater or equal */ +    COND_CC = 0x3,	/* Unsigned less than */ +    COND_MI = 0x4,	/* Negative */ +    COND_PL = 0x5,	/* Zero or greater */ +    COND_VS = 0x6,	/* Overflow */ +    COND_VC = 0x7,	/* No overflow */ +    COND_HI = 0x8,	/* Unsigned greater than */ +    COND_LS = 0x9,	/* Unsigned less or equal */ +    COND_GE = 0xa, +    COND_LT = 0xb, +    COND_GT = 0xc, +    COND_LE = 0xd, +    COND_AL = 0xe, +}; + +static const uint8_t tcg_cond_to_arm_cond[] = { +    [TCG_COND_EQ] = COND_EQ, +    [TCG_COND_NE] = COND_NE, +    [TCG_COND_LT] = COND_LT, +    [TCG_COND_GE] = COND_GE, +    [TCG_COND_LE] = COND_LE, +    [TCG_COND_GT] = COND_GT, +    /* unsigned */ +    [TCG_COND_LTU] = COND_CC, +    [TCG_COND_GEU] = COND_CS, +    [TCG_COND_LEU] = COND_LS, +    [TCG_COND_GTU] = COND_HI, +}; + +static inline void tcg_out_bx(TCGContext *s, int cond, int rn) +{ +    tcg_out32(s, (cond << 28) | 0x012fff10 | rn); +} + +static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) +{ +    tcg_out32(s, (cond << 28) | 0x0a000000 | +                    (((offset - 8) >> 2) & 0x00ffffff)); +} + +static inline void tcg_out_b_noaddr(TCGContext *s, int cond) +{ +    /* We pay attention here to not modify the branch target by masking +       the corresponding bytes.  This ensure that caches and memory are +       kept coherent during retranslation. */ +    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a)); +} + +static inline void tcg_out_bl_noaddr(TCGContext *s, int cond) +{ +    /* We pay attention here to not modify the branch target by masking +       the corresponding bytes.  This ensure that caches and memory are +       kept coherent during retranslation. */ +    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b)); +} + +static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) +{ +    tcg_out32(s, (cond << 28) | 0x0b000000 | +                    (((offset - 8) >> 2) & 0x00ffffff)); +} + +static inline void tcg_out_blx(TCGContext *s, int cond, int rn) +{ +    tcg_out32(s, (cond << 28) | 0x012fff30 | rn); +} + +static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset) +{ +    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) | +                (((offset - 8) >> 2) & 0x00ffffff)); +} + +static inline void tcg_out_dat_reg(TCGContext *s, +                int cond, int opc, int rd, int rn, int rm, int shift) +{ +    tcg_out32(s, (cond << 28) | (0 << 25) | opc | +                    (rn << 16) | (rd << 12) | shift | rm); +} + +static inline void tcg_out_nop(TCGContext *s) +{ +    if (use_armv7_instructions) { +        /* Architected nop introduced in v6k.  */ +        /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this +           also Just So Happened to do nothing on pre-v6k so that we +           don't need to conditionalize it?  */ +        tcg_out32(s, 0xe320f000); +    } else { +        /* Prior to that the assembler uses mov r0, r0.  */ +        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0)); +    } +} + +static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) +{ +    /* Simple reg-reg move, optimising out the 'do nothing' case */ +    if (rd != rm) { +        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0)); +    } +} + +static inline void tcg_out_dat_imm(TCGContext *s, +                int cond, int opc, int rd, int rn, int im) +{ +    tcg_out32(s, (cond << 28) | (1 << 25) | opc | +                    (rn << 16) | (rd << 12) | im); +} + +static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) +{ +    int rot, opc, rn; + +    /* For armv7, make sure not to use movw+movt when mov/mvn would do. +       Speed things up by only checking when movt would be required. +       Prior to armv7, have one go at fully rotated immediates before +       doing the decomposition thing below.  */ +    if (!use_armv7_instructions || (arg & 0xffff0000)) { +        rot = encode_imm(arg); +        if (rot >= 0) { +            tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, +                            rotl(arg, rot) | (rot << 7)); +            return; +        } +        rot = encode_imm(~arg); +        if (rot >= 0) { +            tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, +                            rotl(~arg, rot) | (rot << 7)); +            return; +        } +    } + +    /* Use movw + movt.  */ +    if (use_armv7_instructions) { +        /* movw */ +        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) +                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); +        if (arg & 0xffff0000) { +            /* movt */ +            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) +                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); +        } +        return; +    } + +    /* TODO: This is very suboptimal, we can easily have a constant +       pool somewhere after all the instructions.  */ +    opc = ARITH_MOV; +    rn = 0; +    /* If we have lots of leading 1's, we can shorten the sequence by +       beginning with mvn and then clearing higher bits with eor.  */ +    if (clz32(~arg) > clz32(arg)) { +        opc = ARITH_MVN, arg = ~arg; +    } +    do { +        int i = ctz32(arg) & ~1; +        rot = ((32 - i) << 7) & 0xf00; +        tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); +        arg &= ~(0xff << i); + +        opc = ARITH_EOR; +        rn = rd; +    } while (arg); +} + +static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, +                                  TCGArg lhs, TCGArg rhs, int rhs_is_const) +{ +    /* Emit either the reg,imm or reg,reg form of a data-processing insn. +     * rhs must satisfy the "rI" constraint. +     */ +    if (rhs_is_const) { +        int rot = encode_imm(rhs); +        assert(rot >= 0); +        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); +    } else { +        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); +    } +} + +static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv, +                            TCGReg dst, TCGReg lhs, TCGArg rhs, +                            bool rhs_is_const) +{ +    /* Emit either the reg,imm or reg,reg form of a data-processing insn. +     * rhs must satisfy the "rIK" constraint. +     */ +    if (rhs_is_const) { +        int rot = encode_imm(rhs); +        if (rot < 0) { +            rhs = ~rhs; +            rot = encode_imm(rhs); +            assert(rot >= 0); +            opc = opinv; +        } +        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); +    } else { +        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); +    } +} + +static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg, +                            TCGArg dst, TCGArg lhs, TCGArg rhs, +                            bool rhs_is_const) +{ +    /* Emit either the reg,imm or reg,reg form of a data-processing insn. +     * rhs must satisfy the "rIN" constraint. +     */ +    if (rhs_is_const) { +        int rot = encode_imm(rhs); +        if (rot < 0) { +            rhs = -rhs; +            rot = encode_imm(rhs); +            assert(rot >= 0); +            opc = opneg; +        } +        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7)); +    } else { +        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); +    } +} + +static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd, +                                 TCGReg rn, TCGReg rm) +{ +    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */ +    if (!use_armv6_instructions && rd == rn) { +        if (rd == rm) { +            /* rd == rn == rm; copy an input to tmp first.  */ +            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); +            rm = rn = TCG_REG_TMP; +        } else { +            rn = rm; +            rm = rd; +        } +    } +    /* mul */ +    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); +} + +static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0, +                                   TCGReg rd1, TCGReg rn, TCGReg rm) +{ +    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */ +    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { +        if (rd0 == rm || rd1 == rm) { +            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); +            rn = TCG_REG_TMP; +        } else { +            TCGReg t = rn; +            rn = rm; +            rm = t; +        } +    } +    /* umull */ +    tcg_out32(s, (cond << 28) | 0x00800090 | +              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0, +                                   TCGReg rd1, TCGReg rn, TCGReg rm) +{ +    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */ +    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { +        if (rd0 == rm || rd1 == rm) { +            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); +            rn = TCG_REG_TMP; +        } else { +            TCGReg t = rn; +            rn = rm; +            rm = t; +        } +    } +    /* smull */ +    tcg_out32(s, (cond << 28) | 0x00c00090 | +              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ +    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); +} + +static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm) +{ +    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); +} + +static inline void tcg_out_ext8s(TCGContext *s, int cond, +                                 int rd, int rn) +{ +    if (use_armv6_instructions) { +        /* sxtb */ +        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); +    } else { +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        rd, 0, rn, SHIFT_IMM_LSL(24)); +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        rd, 0, rd, SHIFT_IMM_ASR(24)); +    } +} + +static inline void tcg_out_ext8u(TCGContext *s, int cond, +                                 int rd, int rn) +{ +    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff); +} + +static inline void tcg_out_ext16s(TCGContext *s, int cond, +                                  int rd, int rn) +{ +    if (use_armv6_instructions) { +        /* sxth */ +        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); +    } else { +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        rd, 0, rn, SHIFT_IMM_LSL(16)); +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        rd, 0, rd, SHIFT_IMM_ASR(16)); +    } +} + +static inline void tcg_out_ext16u(TCGContext *s, int cond, +                                  int rd, int rn) +{ +    if (use_armv6_instructions) { +        /* uxth */ +        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); +    } else { +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        rd, 0, rn, SHIFT_IMM_LSL(16)); +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        rd, 0, rd, SHIFT_IMM_LSR(16)); +    } +} + +static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn) +{ +    if (use_armv6_instructions) { +        /* revsh */ +        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); +    } else { +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16)); +        tcg_out_dat_reg(s, cond, ARITH_ORR, +                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); +    } +} + +static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) +{ +    if (use_armv6_instructions) { +        /* rev16 */ +        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); +    } else { +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24)); +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16)); +        tcg_out_dat_reg(s, cond, ARITH_ORR, +                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8)); +    } +} + +/* swap the two low bytes assuming that the two high input bytes and the +   two high output bit can hold any value. */ +static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn) +{ +    if (use_armv6_instructions) { +        /* rev16 */ +        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); +    } else { +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); +        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); +        tcg_out_dat_reg(s, cond, ARITH_ORR, +                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); +    } +} + +static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) +{ +    if (use_armv6_instructions) { +        /* rev */ +        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); +    } else { +        tcg_out_dat_reg(s, cond, ARITH_EOR, +                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); +        tcg_out_dat_imm(s, cond, ARITH_BIC, +                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); +        tcg_out_dat_reg(s, cond, ARITH_MOV, +                        rd, 0, rn, SHIFT_IMM_ROR(8)); +        tcg_out_dat_reg(s, cond, ARITH_EOR, +                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); +    } +} + +bool tcg_target_deposit_valid(int ofs, int len) +{ +    /* ??? Without bfi, we could improve over generic code by combining +       the right-shift from a non-zero ofs with the orr.  We do run into +       problems when rd == rs, and the mask generated from ofs+len doesn't +       fit into an immediate.  We would have to be careful not to pessimize +       wrt the optimizations performed on the expanded code.  */ +    return use_armv7_instructions; +} + +static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd, +                                   TCGArg a1, int ofs, int len, bool const_a1) +{ +    if (const_a1) { +        /* bfi becomes bfc with rn == 15.  */ +        a1 = 15; +    } +    /* bfi/bfc */ +    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1 +              | (ofs << 7) | ((ofs + len - 1) << 16)); +} + +/* Note that this routine is used for both LDR and LDRH formats, so we do +   not wish to include an immediate shift at this point.  */ +static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, +                            TCGReg rn, TCGReg rm, bool u, bool p, bool w) +{ +    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) +              | (w << 21) | (rn << 16) | (rt << 12) | rm); +} + +static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, +                            TCGReg rn, int imm8, bool p, bool w) +{ +    bool u = 1; +    if (imm8 < 0) { +        imm8 = -imm8; +        u = 0; +    } +    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | +              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf)); +} + +static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt, +                             TCGReg rn, int imm12, bool p, bool w) +{ +    bool u = 1; +    if (imm12 < 0) { +        imm12 = -imm12; +        u = 0; +    } +    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) | +              (rn << 16) | (rt << 12) | imm12); +} + +static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt, +                                   TCGReg rn, int imm12) +{ +    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt, +                                   TCGReg rn, int imm12) +{ +    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt, +                                   TCGReg rn, int imm8) +{ +    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt, +                                   TCGReg rn, int imm8) +{ +    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0); +} + +/* Register pre-increment with base writeback.  */ +static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt, +                                    TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1); +} + +static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt, +                                    TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1); +} + +static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt, +                                   TCGReg rn, int imm8) +{ +    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, int imm8) +{ +    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt, +                                   TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt, +                                   TCGReg rn, int imm8) +{ +    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt, +                                   TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, int imm12) +{ +    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, int imm12) +{ +    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0); +} + +static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt, +                                 TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt, +                                 TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, int imm8) +{ +    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0); +} + +static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt, +                                  TCGReg rn, TCGReg rm) +{ +    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0); +} + +static inline void tcg_out_ld32u(TCGContext *s, int cond, +                int rd, int rn, int32_t offset) +{ +    if (offset > 0xfff || offset < -0xfff) { +        tcg_out_movi32(s, cond, TCG_REG_TMP, offset); +        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP); +    } else +        tcg_out_ld32_12(s, cond, rd, rn, offset); +} + +static inline void tcg_out_st32(TCGContext *s, int cond, +                int rd, int rn, int32_t offset) +{ +    if (offset > 0xfff || offset < -0xfff) { +        tcg_out_movi32(s, cond, TCG_REG_TMP, offset); +        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP); +    } else +        tcg_out_st32_12(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld16u(TCGContext *s, int cond, +                int rd, int rn, int32_t offset) +{ +    if (offset > 0xff || offset < -0xff) { +        tcg_out_movi32(s, cond, TCG_REG_TMP, offset); +        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP); +    } else +        tcg_out_ld16u_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld16s(TCGContext *s, int cond, +                int rd, int rn, int32_t offset) +{ +    if (offset > 0xff || offset < -0xff) { +        tcg_out_movi32(s, cond, TCG_REG_TMP, offset); +        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP); +    } else +        tcg_out_ld16s_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_st16(TCGContext *s, int cond, +                int rd, int rn, int32_t offset) +{ +    if (offset > 0xff || offset < -0xff) { +        tcg_out_movi32(s, cond, TCG_REG_TMP, offset); +        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP); +    } else +        tcg_out_st16_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld8u(TCGContext *s, int cond, +                int rd, int rn, int32_t offset) +{ +    if (offset > 0xfff || offset < -0xfff) { +        tcg_out_movi32(s, cond, TCG_REG_TMP, offset); +        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP); +    } else +        tcg_out_ld8_12(s, cond, rd, rn, offset); +} + +static inline void tcg_out_ld8s(TCGContext *s, int cond, +                int rd, int rn, int32_t offset) +{ +    if (offset > 0xff || offset < -0xff) { +        tcg_out_movi32(s, cond, TCG_REG_TMP, offset); +        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP); +    } else +        tcg_out_ld8s_8(s, cond, rd, rn, offset); +} + +static inline void tcg_out_st8(TCGContext *s, int cond, +                int rd, int rn, int32_t offset) +{ +    if (offset > 0xfff || offset < -0xfff) { +        tcg_out_movi32(s, cond, TCG_REG_TMP, offset); +        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP); +    } else +        tcg_out_st8_12(s, cond, rd, rn, offset); +} + +/* The _goto case is normally between TBs within the same code buffer, and + * with the code buffer limited to 16MB we wouldn't need the long case. + * But we also use it for the tail-call to the qemu_ld/st helpers, which does. + */ +static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr) +{ +    intptr_t addri = (intptr_t)addr; +    ptrdiff_t disp = tcg_pcrel_diff(s, addr); + +    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) { +        tcg_out_b(s, cond, disp); +        return; +    } + +    tcg_out_movi32(s, cond, TCG_REG_TMP, addri); +    if (use_armv5t_instructions) { +        tcg_out_bx(s, cond, TCG_REG_TMP); +    } else { +        if (addri & 1) { +            tcg_abort(); +        } +        tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP); +    } +} + +/* The call case is mostly used for helpers - so it's not unreasonable + * for them to be beyond branch range */ +static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr) +{ +    intptr_t addri = (intptr_t)addr; +    ptrdiff_t disp = tcg_pcrel_diff(s, addr); + +    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) { +        if (addri & 1) { +            /* Use BLX if the target is in Thumb mode */ +            if (!use_armv5t_instructions) { +                tcg_abort(); +            } +            tcg_out_blx_imm(s, disp); +        } else { +            tcg_out_bl(s, COND_AL, disp); +        } +    } else if (use_armv7_instructions) { +        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); +        tcg_out_blx(s, COND_AL, TCG_REG_TMP); +    } else { +        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); +        tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); +        tcg_out32(s, addri); +    } +} + +static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l) +{ +    if (l->has_value) { +        tcg_out_goto(s, cond, l->u.value_ptr); +    } else { +        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0); +        tcg_out_b_noaddr(s, cond); +    } +} + +#ifdef CONFIG_SOFTMMU +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + *                                     int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { +    [MO_UB]   = helper_ret_ldub_mmu, +    [MO_SB]   = helper_ret_ldsb_mmu, + +    [MO_LEUW] = helper_le_lduw_mmu, +    [MO_LEUL] = helper_le_ldul_mmu, +    [MO_LEQ]  = helper_le_ldq_mmu, +    [MO_LESW] = helper_le_ldsw_mmu, +    [MO_LESL] = helper_le_ldul_mmu, + +    [MO_BEUW] = helper_be_lduw_mmu, +    [MO_BEUL] = helper_be_ldul_mmu, +    [MO_BEQ]  = helper_be_ldq_mmu, +    [MO_BESW] = helper_be_ldsw_mmu, +    [MO_BESL] = helper_be_ldul_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + *                                     uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { +    [MO_UB]   = helper_ret_stb_mmu, +    [MO_LEUW] = helper_le_stw_mmu, +    [MO_LEUL] = helper_le_stl_mmu, +    [MO_LEQ]  = helper_le_stq_mmu, +    [MO_BEUW] = helper_be_stw_mmu, +    [MO_BEUL] = helper_be_stl_mmu, +    [MO_BEQ]  = helper_be_stq_mmu, +}; + +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * argreg is where we want to put this argument, arg is the argument itself. + * Return value is the updated argreg ready for the next call. + * Note that argreg 0..3 is real registers, 4+ on stack. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use) and 64 bit value in a lo:hi register pair. + */ +#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \ +static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \ +{                                                                          \ +    if (argreg < 4) {                                                      \ +        MOV_ARG(s, COND_AL, argreg, arg);                                  \ +    } else {                                                               \ +        int ofs = (argreg - 4) * 4;                                        \ +        EXT_ARG;                                                           \ +        assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);                      \ +        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \ +    }                                                                      \ +    return argreg + 1;                                                     \ +} + +DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, +    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u, +    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u, +    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) + +static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, +                                TCGReg arglo, TCGReg arghi) +{ +    /* 64 bit arguments must go in even/odd register pairs +     * and in 8-aligned stack slots. +     */ +    if (argreg & 1) { +        argreg++; +    } +    if (use_armv6_instructions && argreg >= 4 +        && (arglo & 1) == 0 && arghi == arglo + 1) { +        tcg_out_strd_8(s, COND_AL, arglo, +                       TCG_REG_CALL_STACK, (argreg - 4) * 4); +        return argreg + 2; +    } else { +        argreg = tcg_out_arg_reg32(s, argreg, arglo); +        argreg = tcg_out_arg_reg32(s, argreg, arghi); +        return argreg; +    } +} + +#define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) + +/* We're expecting to use an 8-bit immediate and to mask.  */ +QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8); + +/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset. +   Using the offset of the second entry in the last tlb table ensures +   that we can index all of the elements of the first entry.  */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) +                  > 0xffff); + +/* Load and compare a TLB entry, leaving the flags set.  Returns the register +   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */ + +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, +                               TCGMemOp s_bits, int mem_index, bool is_load) +{ +    TCGReg base = TCG_AREG0; +    int cmp_off = +        (is_load +         ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) +         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); +    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + +    /* Should generate something like the following: +     *   shr    tmp, addrlo, #TARGET_PAGE_BITS                    (1) +     *   add    r2, env, #high +     *   and    r0, tmp, #(CPU_TLB_SIZE - 1)                      (2) +     *   add    r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS               (3) +     *   ldr    r0, [r2, #cmp]                                    (4) +     *   tst    addrlo, #s_mask +     *   ldr    r2, [r2, #add]                                    (5) +     *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS +     */ +    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, +                    0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + +    /* We checked that the offset is contained within 16 bits above.  */ +    if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) { +        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, +                        (24 << 7) | (cmp_off >> 8)); +        base = TCG_REG_R2; +        add_off -= cmp_off & 0xff00; +        cmp_off &= 0xff; +    } + +    tcg_out_dat_imm(s, COND_AL, ARITH_AND, +                    TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1); +    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, +                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); + +    /* Load the tlb comparator.  Use ldrd if needed and available, +       but due to how the pointer needs setting up, ldm isn't useful. +       Base arm5 doesn't have ldrd, but armv5te does.  */ +    if (use_armv6_instructions && TARGET_LONG_BITS == 64) { +        tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); +    } else { +        tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off); +        if (TARGET_LONG_BITS == 64) { +            tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4); +        } +    } + +    /* Check alignment.  */ +    if (s_bits) { +        tcg_out_dat_imm(s, COND_AL, ARITH_TST, +                        0, addrlo, (1 << s_bits) - 1); +    } + +    /* Load the tlb addend.  */ +    tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off); + +    tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0, +                    TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + +    if (TARGET_LONG_BITS == 64) { +        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, +                        TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); +    } + +    return TCG_REG_R2; +} + +/* Record the context of a call to the out of line helper code for the slow +   path for a load or store, so that we can later generate the correct +   helper code.  */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, +                                TCGReg datalo, TCGReg datahi, TCGReg addrlo, +                                TCGReg addrhi, tcg_insn_unit *raddr, +                                tcg_insn_unit *label_ptr) +{ +    TCGLabelQemuLdst *label = new_ldst_label(s); + +    label->is_ld = is_ld; +    label->oi = oi; +    label->datalo_reg = datalo; +    label->datahi_reg = datahi; +    label->addrlo_reg = addrlo; +    label->addrhi_reg = addrhi; +    label->raddr = raddr; +    label->label_ptr[0] = label_ptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ +    TCGReg argreg, datalo, datahi; +    TCGMemOpIdx oi = lb->oi; +    TCGMemOp opc = get_memop(oi); +    void *func; + +    reloc_pc24(lb->label_ptr[0], s->code_ptr); + +    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); +    if (TARGET_LONG_BITS == 64) { +        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); +    } else { +        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); +    } +    argreg = tcg_out_arg_imm32(s, argreg, oi); +    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); + +    /* For armv6 we can use the canonical unsigned helpers and minimize +       icache usage.  For pre-armv6, use the signed helpers since we do +       not have a single insn sign-extend.  */ +    if (use_armv6_instructions) { +        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; +    } else { +        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]; +        if (opc & MO_SIGN) { +            opc = MO_UL; +        } +    } +    tcg_out_call(s, func); + +    datalo = lb->datalo_reg; +    datahi = lb->datahi_reg; +    switch (opc & MO_SSIZE) { +    case MO_SB: +        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0); +        break; +    case MO_SW: +        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0); +        break; +    default: +        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); +        break; +    case MO_Q: +        if (datalo != TCG_REG_R1) { +            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); +            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); +        } else if (datahi != TCG_REG_R0) { +            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); +            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); +        } else { +            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0); +            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); +            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP); +        } +        break; +    } + +    tcg_out_goto(s, COND_AL, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ +    TCGReg argreg, datalo, datahi; +    TCGMemOpIdx oi = lb->oi; +    TCGMemOp opc = get_memop(oi); + +    reloc_pc24(lb->label_ptr[0], s->code_ptr); + +    argreg = TCG_REG_R0; +    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); +    if (TARGET_LONG_BITS == 64) { +        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); +    } else { +        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); +    } + +    datalo = lb->datalo_reg; +    datahi = lb->datahi_reg; +    switch (opc & MO_SIZE) { +    case MO_8: +        argreg = tcg_out_arg_reg8(s, argreg, datalo); +        break; +    case MO_16: +        argreg = tcg_out_arg_reg16(s, argreg, datalo); +        break; +    case MO_32: +    default: +        argreg = tcg_out_arg_reg32(s, argreg, datalo); +        break; +    case MO_64: +        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi); +        break; +    } + +    argreg = tcg_out_arg_imm32(s, argreg, oi); +    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); + +    /* Tail-call to the helper, which will return to the fast path.  */ +    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); +} +#endif /* SOFTMMU */ + +static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc, +                                         TCGReg datalo, TCGReg datahi, +                                         TCGReg addrlo, TCGReg addend) +{ +    TCGMemOp bswap = opc & MO_BSWAP; + +    switch (opc & MO_SSIZE) { +    case MO_UB: +        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend); +        break; +    case MO_SB: +        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend); +        break; +    case MO_UW: +        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); +        if (bswap) { +            tcg_out_bswap16(s, COND_AL, datalo, datalo); +        } +        break; +    case MO_SW: +        if (bswap) { +            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); +            tcg_out_bswap16s(s, COND_AL, datalo, datalo); +        } else { +            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); +        } +        break; +    case MO_UL: +    default: +        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); +        if (bswap) { +            tcg_out_bswap32(s, COND_AL, datalo, datalo); +        } +        break; +    case MO_Q: +        { +            TCGReg dl = (bswap ? datahi : datalo); +            TCGReg dh = (bswap ? datalo : datahi); + +            /* Avoid ldrd for user-only emulation, to handle unaligned.  */ +            if (USING_SOFTMMU && use_armv6_instructions +                && (dl & 1) == 0 && dh == dl + 1) { +                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend); +            } else if (dl != addend) { +                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo); +                tcg_out_ld32_12(s, COND_AL, dh, addend, 4); +            } else { +                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, +                                addend, addrlo, SHIFT_IMM_LSL(0)); +                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0); +                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4); +            } +            if (bswap) { +                tcg_out_bswap32(s, COND_AL, dl, dl); +                tcg_out_bswap32(s, COND_AL, dh, dh); +            } +        } +        break; +    } +} + +static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, +                                          TCGReg datalo, TCGReg datahi, +                                          TCGReg addrlo) +{ +    TCGMemOp bswap = opc & MO_BSWAP; + +    switch (opc & MO_SSIZE) { +    case MO_UB: +        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0); +        break; +    case MO_SB: +        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0); +        break; +    case MO_UW: +        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); +        if (bswap) { +            tcg_out_bswap16(s, COND_AL, datalo, datalo); +        } +        break; +    case MO_SW: +        if (bswap) { +            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); +            tcg_out_bswap16s(s, COND_AL, datalo, datalo); +        } else { +            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); +        } +        break; +    case MO_UL: +    default: +        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); +        if (bswap) { +            tcg_out_bswap32(s, COND_AL, datalo, datalo); +        } +        break; +    case MO_Q: +        { +            TCGReg dl = (bswap ? datahi : datalo); +            TCGReg dh = (bswap ? datalo : datahi); + +            /* Avoid ldrd for user-only emulation, to handle unaligned.  */ +            if (USING_SOFTMMU && use_armv6_instructions +                && (dl & 1) == 0 && dh == dl + 1) { +                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0); +            } else if (dl == addrlo) { +                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); +                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); +            } else { +                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0); +                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4); +            } +            if (bswap) { +                tcg_out_bswap32(s, COND_AL, dl, dl); +                tcg_out_bswap32(s, COND_AL, dh, dh); +            } +        } +        break; +    } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +{ +    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); +    TCGMemOpIdx oi; +    TCGMemOp opc; +#ifdef CONFIG_SOFTMMU +    int mem_index; +    TCGReg addend; +    tcg_insn_unit *label_ptr; +#endif + +    datalo = *args++; +    datahi = (is64 ? *args++ : 0); +    addrlo = *args++; +    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); +    oi = *args++; +    opc = get_memop(oi); + +#ifdef CONFIG_SOFTMMU +    mem_index = get_mmuidx(oi); +    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1); + +    /* This a conditional BL only to load a pointer within this opcode into LR +       for the slow path.  We will not be using the value for a tail call.  */ +    label_ptr = s->code_ptr; +    tcg_out_bl_noaddr(s, COND_NE); + +    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); + +    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, +                        s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ +    if (GUEST_BASE) { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); +        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); +    } else { +        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); +    } +#endif +} + +static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc, +                                         TCGReg datalo, TCGReg datahi, +                                         TCGReg addrlo, TCGReg addend) +{ +    TCGMemOp bswap = opc & MO_BSWAP; + +    switch (opc & MO_SIZE) { +    case MO_8: +        tcg_out_st8_r(s, cond, datalo, addrlo, addend); +        break; +    case MO_16: +        if (bswap) { +            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo); +            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend); +        } else { +            tcg_out_st16_r(s, cond, datalo, addrlo, addend); +        } +        break; +    case MO_32: +    default: +        if (bswap) { +            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); +            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend); +        } else { +            tcg_out_st32_r(s, cond, datalo, addrlo, addend); +        } +        break; +    case MO_64: +        /* Avoid strd for user-only emulation, to handle unaligned.  */ +        if (bswap) { +            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi); +            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo); +            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo); +            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4); +        } else if (USING_SOFTMMU && use_armv6_instructions +                   && (datalo & 1) == 0 && datahi == datalo + 1) { +            tcg_out_strd_r(s, cond, datalo, addrlo, addend); +        } else { +            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); +            tcg_out_st32_12(s, cond, datahi, addend, 4); +        } +        break; +    } +} + +static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, +                                          TCGReg datalo, TCGReg datahi, +                                          TCGReg addrlo) +{ +    TCGMemOp bswap = opc & MO_BSWAP; + +    switch (opc & MO_SIZE) { +    case MO_8: +        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); +        break; +    case MO_16: +        if (bswap) { +            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo); +            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0); +        } else { +            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); +        } +        break; +    case MO_32: +    default: +        if (bswap) { +            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); +            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); +        } else { +            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); +        } +        break; +    case MO_64: +        /* Avoid strd for user-only emulation, to handle unaligned.  */ +        if (bswap) { +            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi); +            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0); +            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo); +            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4); +        } else if (USING_SOFTMMU && use_armv6_instructions +                   && (datalo & 1) == 0 && datahi == datalo + 1) { +            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); +        } else { +            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); +            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4); +        } +        break; +    } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +{ +    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); +    TCGMemOpIdx oi; +    TCGMemOp opc; +#ifdef CONFIG_SOFTMMU +    int mem_index; +    TCGReg addend; +    tcg_insn_unit *label_ptr; +#endif + +    datalo = *args++; +    datahi = (is64 ? *args++ : 0); +    addrlo = *args++; +    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); +    oi = *args++; +    opc = get_memop(oi); + +#ifdef CONFIG_SOFTMMU +    mem_index = get_mmuidx(oi); +    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0); + +    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); + +    /* The conditional call must come last, as we're going to return here.  */ +    label_ptr = s->code_ptr; +    tcg_out_bl_noaddr(s, COND_NE); + +    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, +                        s->code_ptr, label_ptr); +#else /* !CONFIG_SOFTMMU */ +    if (GUEST_BASE) { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); +        tcg_out_qemu_st_index(s, COND_AL, opc, datalo, +                              datahi, addrlo, TCG_REG_TMP); +    } else { +        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); +    } +#endif +} + +static tcg_insn_unit *tb_ret_addr; + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, +                const TCGArg *args, const int *const_args) +{ +    TCGArg a0, a1, a2, a3, a4, a5; +    int c; + +    switch (opc) { +    case INDEX_op_exit_tb: +        tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); +        tcg_out_goto(s, COND_AL, tb_ret_addr); +        break; +    case INDEX_op_goto_tb: +        if (s->tb_jmp_offset) { +            /* Direct jump method */ +            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); +            tcg_out_b_noaddr(s, COND_AL); +        } else { +            /* Indirect jump method */ +            intptr_t ptr = (intptr_t)(s->tb_next + args[0]); +            tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff); +            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff); +        } +        s->tb_next_offset[args[0]] = tcg_current_code_size(s); +        break; +    case INDEX_op_br: +        tcg_out_goto_label(s, COND_AL, arg_label(args[0])); +        break; + +    case INDEX_op_ld8u_i32: +        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld8s_i32: +        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld16u_i32: +        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld16s_i32: +        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld_i32: +        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]); +        break; +    case INDEX_op_st8_i32: +        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]); +        break; +    case INDEX_op_st16_i32: +        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]); +        break; +    case INDEX_op_st_i32: +        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); +        break; + +    case INDEX_op_movcond_i32: +        /* Constraints mean that v2 is always in the same register as dest, +         * so we only need to do "if condition passed, move v1 to dest". +         */ +        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, +                        args[1], args[2], const_args[2]); +        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, +                        ARITH_MVN, args[0], 0, args[3], const_args[3]); +        break; +    case INDEX_op_add_i32: +        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, +                        args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_sub_i32: +        if (const_args[1]) { +            if (const_args[2]) { +                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]); +            } else { +                tcg_out_dat_rI(s, COND_AL, ARITH_RSB, +                               args[0], args[2], args[1], 1); +            } +        } else { +            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, +                            args[0], args[1], args[2], const_args[2]); +        } +        break; +    case INDEX_op_and_i32: +        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, +                        args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_andc_i32: +        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, +                        args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_or_i32: +        c = ARITH_ORR; +        goto gen_arith; +    case INDEX_op_xor_i32: +        c = ARITH_EOR; +        /* Fall through.  */ +    gen_arith: +        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_add2_i32: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        a3 = args[3], a4 = args[4], a5 = args[5]; +        if (a0 == a3 || (a0 == a5 && !const_args[5])) { +            a0 = TCG_REG_TMP; +        } +        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, +                        a0, a2, a4, const_args[4]); +        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC, +                        a1, a3, a5, const_args[5]); +        tcg_out_mov_reg(s, COND_AL, args[0], a0); +        break; +    case INDEX_op_sub2_i32: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        a3 = args[3], a4 = args[4], a5 = args[5]; +        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) { +            a0 = TCG_REG_TMP; +        } +        if (const_args[2]) { +            if (const_args[4]) { +                tcg_out_movi32(s, COND_AL, a0, a4); +                a4 = a0; +            } +            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1); +        } else { +            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR, +                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]); +        } +        if (const_args[3]) { +            if (const_args[5]) { +                tcg_out_movi32(s, COND_AL, a1, a5); +                a5 = a1; +            } +            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1); +        } else { +            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC, +                            a1, a3, a5, const_args[5]); +        } +        tcg_out_mov_reg(s, COND_AL, args[0], a0); +        break; +    case INDEX_op_neg_i32: +        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0); +        break; +    case INDEX_op_not_i32: +        tcg_out_dat_reg(s, COND_AL, +                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0)); +        break; +    case INDEX_op_mul_i32: +        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]); +        break; +    case INDEX_op_mulu2_i32: +        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); +        break; +    case INDEX_op_muls2_i32: +        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); +        break; +    /* XXX: Perhaps args[2] & 0x1f is wrong */ +    case INDEX_op_shl_i32: +        c = const_args[2] ? +                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]); +        goto gen_shift32; +    case INDEX_op_shr_i32: +        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) : +                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]); +        goto gen_shift32; +    case INDEX_op_sar_i32: +        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) : +                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]); +        goto gen_shift32; +    case INDEX_op_rotr_i32: +        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) : +                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]); +        /* Fall through.  */ +    gen_shift32: +        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c); +        break; + +    case INDEX_op_rotl_i32: +        if (const_args[2]) { +            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], +                            ((0x20 - args[2]) & 0x1f) ? +                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : +                            SHIFT_IMM_LSL(0)); +        } else { +            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20); +            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], +                            SHIFT_REG_ROR(TCG_REG_TMP)); +        } +        break; + +    case INDEX_op_brcond_i32: +        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, +                       args[0], args[1], const_args[1]); +        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], +                           arg_label(args[3])); +        break; +    case INDEX_op_brcond2_i32: +        /* The resulting conditions are: +         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3, +         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3, +         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3, +         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3), +         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3), +         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3, +         */ +        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, +                        args[1], args[3], const_args[3]); +        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, +                        args[0], args[2], const_args[2]); +        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], +                           arg_label(args[5])); +        break; +    case INDEX_op_setcond_i32: +        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, +                        args[1], args[2], const_args[2]); +        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], +                        ARITH_MOV, args[0], 0, 1); +        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], +                        ARITH_MOV, args[0], 0, 0); +        break; +    case INDEX_op_setcond2_i32: +        /* See brcond2_i32 comment */ +        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, +                        args[2], args[4], const_args[4]); +        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, +                        args[1], args[3], const_args[3]); +        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]], +                        ARITH_MOV, args[0], 0, 1); +        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])], +                        ARITH_MOV, args[0], 0, 0); +        break; + +    case INDEX_op_qemu_ld_i32: +        tcg_out_qemu_ld(s, args, 0); +        break; +    case INDEX_op_qemu_ld_i64: +        tcg_out_qemu_ld(s, args, 1); +        break; +    case INDEX_op_qemu_st_i32: +        tcg_out_qemu_st(s, args, 0); +        break; +    case INDEX_op_qemu_st_i64: +        tcg_out_qemu_st(s, args, 1); +        break; + +    case INDEX_op_bswap16_i32: +        tcg_out_bswap16(s, COND_AL, args[0], args[1]); +        break; +    case INDEX_op_bswap32_i32: +        tcg_out_bswap32(s, COND_AL, args[0], args[1]); +        break; + +    case INDEX_op_ext8s_i32: +        tcg_out_ext8s(s, COND_AL, args[0], args[1]); +        break; +    case INDEX_op_ext16s_i32: +        tcg_out_ext16s(s, COND_AL, args[0], args[1]); +        break; +    case INDEX_op_ext16u_i32: +        tcg_out_ext16u(s, COND_AL, args[0], args[1]); +        break; + +    case INDEX_op_deposit_i32: +        tcg_out_deposit(s, COND_AL, args[0], args[2], +                        args[3], args[4], const_args[2]); +        break; + +    case INDEX_op_div_i32: +        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); +        break; +    case INDEX_op_divu_i32: +        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); +        break; + +    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */ +    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */ +    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */ +    default: +        tcg_abort(); +    } +} + +static const TCGTargetOpDef arm_op_defs[] = { +    { INDEX_op_exit_tb, { } }, +    { INDEX_op_goto_tb, { } }, +    { INDEX_op_br, { } }, + +    { INDEX_op_ld8u_i32, { "r", "r" } }, +    { INDEX_op_ld8s_i32, { "r", "r" } }, +    { INDEX_op_ld16u_i32, { "r", "r" } }, +    { INDEX_op_ld16s_i32, { "r", "r" } }, +    { INDEX_op_ld_i32, { "r", "r" } }, +    { INDEX_op_st8_i32, { "r", "r" } }, +    { INDEX_op_st16_i32, { "r", "r" } }, +    { INDEX_op_st_i32, { "r", "r" } }, + +    /* TODO: "r", "r", "ri" */ +    { INDEX_op_add_i32, { "r", "r", "rIN" } }, +    { INDEX_op_sub_i32, { "r", "rI", "rIN" } }, +    { INDEX_op_mul_i32, { "r", "r", "r" } }, +    { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } }, +    { INDEX_op_muls2_i32, { "r", "r", "r", "r" } }, +    { INDEX_op_and_i32, { "r", "r", "rIK" } }, +    { INDEX_op_andc_i32, { "r", "r", "rIK" } }, +    { INDEX_op_or_i32, { "r", "r", "rI" } }, +    { INDEX_op_xor_i32, { "r", "r", "rI" } }, +    { INDEX_op_neg_i32, { "r", "r" } }, +    { INDEX_op_not_i32, { "r", "r" } }, + +    { INDEX_op_shl_i32, { "r", "r", "ri" } }, +    { INDEX_op_shr_i32, { "r", "r", "ri" } }, +    { INDEX_op_sar_i32, { "r", "r", "ri" } }, +    { INDEX_op_rotl_i32, { "r", "r", "ri" } }, +    { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + +    { INDEX_op_brcond_i32, { "r", "rIN" } }, +    { INDEX_op_setcond_i32, { "r", "r", "rIN" } }, +    { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } }, + +    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } }, +    { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } }, +    { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } }, +    { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } }, + +#if TARGET_LONG_BITS == 32 +    { INDEX_op_qemu_ld_i32, { "r", "l" } }, +    { INDEX_op_qemu_ld_i64, { "r", "r", "l" } }, +    { INDEX_op_qemu_st_i32, { "s", "s" } }, +    { INDEX_op_qemu_st_i64, { "s", "s", "s" } }, +#else +    { INDEX_op_qemu_ld_i32, { "r", "l", "l" } }, +    { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } }, +    { INDEX_op_qemu_st_i32, { "s", "s", "s" } }, +    { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } }, +#endif + +    { INDEX_op_bswap16_i32, { "r", "r" } }, +    { INDEX_op_bswap32_i32, { "r", "r" } }, + +    { INDEX_op_ext8s_i32, { "r", "r" } }, +    { INDEX_op_ext16s_i32, { "r", "r" } }, +    { INDEX_op_ext16u_i32, { "r", "r" } }, + +    { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + +    { INDEX_op_div_i32, { "r", "r", "r" } }, +    { INDEX_op_divu_i32, { "r", "r", "r" } }, + +    { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ +    /* Only probe for the platform and capabilities if we havn't already +       determined maximum values at compile time.  */ +#ifndef use_idiv_instructions +    { +        unsigned long hwcap = qemu_getauxval(AT_HWCAP); +        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0; +    } +#endif +    if (__ARM_ARCH < 7) { +        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM); +        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { +            arm_arch = pl[1] - '0'; +        } +    } + +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); +    tcg_regset_set32(tcg_target_call_clobber_regs, 0, +                     (1 << TCG_REG_R0) | +                     (1 << TCG_REG_R1) | +                     (1 << TCG_REG_R2) | +                     (1 << TCG_REG_R3) | +                     (1 << TCG_REG_R12) | +                     (1 << TCG_REG_R14)); + +    tcg_regset_clear(s->reserved_regs); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC); + +    tcg_add_target_add_op_defs(arm_op_defs); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    tcg_out_st32(s, COND_AL, arg, arg1, arg2); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, +                               TCGReg ret, TCGReg arg) +{ +    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0)); +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, +                                TCGReg ret, tcg_target_long arg) +{ +    tcg_out_movi32(s, COND_AL, ret, arg); +} + +/* Compute frame size via macros, to share between tcg_target_qemu_prologue +   and tcg_register_jit.  */ + +#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long)) + +#define FRAME_SIZE \ +    ((PUSH_SIZE \ +      + TCG_STATIC_CALL_ARGS_SIZE \ +      + CPU_TEMP_BUF_NLONGS * sizeof(long) \ +      + TCG_TARGET_STACK_ALIGN - 1) \ +     & -TCG_TARGET_STACK_ALIGN) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ +    int stack_addend; + +    /* Calling convention requires us to save r4-r11 and lr.  */ +    /* stmdb sp!, { r4 - r11, lr } */ +    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); + +    /* Reserve callee argument and tcg temp space.  */ +    stack_addend = FRAME_SIZE - PUSH_SIZE; + +    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, +                   TCG_REG_CALL_STACK, stack_addend, 1); +    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, +                  CPU_TEMP_BUF_NLONGS * sizeof(long)); + +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + +    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]); +    tb_ret_addr = s->code_ptr; + +    /* Epilogue.  We branch here via tb_ret_addr.  */ +    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, +                   TCG_REG_CALL_STACK, stack_addend, 1); + +    /* ldmia sp!, { r4 - r11, pc } */ +    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); +} + +typedef struct { +    DebugFrameHeader h; +    uint8_t fde_def_cfa[4]; +    uint8_t fde_reg_ofs[18]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_ARM + +/* We're expecting a 2 byte uleb128 encoded value.  */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +static const DebugFrame debug_frame = { +    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ +    .h.cie.id = -1, +    .h.cie.version = 1, +    .h.cie.code_align = 1, +    .h.cie.data_align = 0x7c,             /* sleb128 -4 */ +    .h.cie.return_column = 14, + +    /* Total FDE size does not include the "len" member.  */ +    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + +    .fde_def_cfa = { +        12, 13,                         /* DW_CFA_def_cfa sp, ... */ +        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */ +        (FRAME_SIZE >> 7) +    }, +    .fde_reg_ofs = { +        /* The following must match the stmdb in the prologue.  */ +        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */ +        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */ +        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */ +        0x89, 4,                        /* DW_CFA_offset, r9, -16 */ +        0x88, 5,                        /* DW_CFA_offset, r8, -20 */ +        0x87, 6,                        /* DW_CFA_offset, r7, -24 */ +        0x86, 7,                        /* DW_CFA_offset, r6, -28 */ +        0x85, 8,                        /* DW_CFA_offset, r5, -32 */ +        0x84, 9,                        /* DW_CFA_offset, r4, -36 */ +    } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ +    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h new file mode 100644 index 00000000..6559f80b --- /dev/null +++ b/tcg/arm/tcg-target.h @@ -0,0 +1,109 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * Copyright (c) 2008 Andrzej Zaborowski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef TCG_TARGET_ARM  +#define TCG_TARGET_ARM 1 + +#undef TCG_TARGET_STACK_GROWSUP +#define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 + +typedef enum { +    TCG_REG_R0 = 0, +    TCG_REG_R1, +    TCG_REG_R2, +    TCG_REG_R3, +    TCG_REG_R4, +    TCG_REG_R5, +    TCG_REG_R6, +    TCG_REG_R7, +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10, +    TCG_REG_R11, +    TCG_REG_R12, +    TCG_REG_R13, +    TCG_REG_R14, +    TCG_REG_PC, +} TCGReg; + +#define TCG_TARGET_NB_REGS 16 + +#ifdef __ARM_ARCH_EXT_IDIV__ +#define use_idiv_instructions  1 +#else +extern bool use_idiv_instructions; +#endif + + +/* used for function call generation */ +#define TCG_REG_CALL_STACK		TCG_REG_R13 +#define TCG_TARGET_STACK_ALIGN		8 +#define TCG_TARGET_CALL_ALIGN_ARGS	1 +#define TCG_TARGET_CALL_STACK_OFFSET	0 + +/* optional instructions */ +#define TCG_TARGET_HAS_ext8s_i32        1 +#define TCG_TARGET_HAS_ext16s_i32       1 +#define TCG_TARGET_HAS_ext8u_i32        0 /* and r0, r1, #0xff */ +#define TCG_TARGET_HAS_ext16u_i32       1 +#define TCG_TARGET_HAS_bswap16_i32      1 +#define TCG_TARGET_HAS_bswap32_i32      1 +#define TCG_TARGET_HAS_not_i32          1 +#define TCG_TARGET_HAS_neg_i32          1 +#define TCG_TARGET_HAS_rot_i32          1 +#define TCG_TARGET_HAS_andc_i32         1 +#define TCG_TARGET_HAS_orc_i32          0 +#define TCG_TARGET_HAS_eqv_i32          0 +#define TCG_TARGET_HAS_nand_i32         0 +#define TCG_TARGET_HAS_nor_i32          0 +#define TCG_TARGET_HAS_deposit_i32      1 +#define TCG_TARGET_HAS_movcond_i32      1 +#define TCG_TARGET_HAS_mulu2_i32        1 +#define TCG_TARGET_HAS_muls2_i32        1 +#define TCG_TARGET_HAS_muluh_i32        0 +#define TCG_TARGET_HAS_mulsh_i32        0 +#define TCG_TARGET_HAS_div_i32          use_idiv_instructions +#define TCG_TARGET_HAS_rem_i32          0 + +extern bool tcg_target_deposit_valid(int ofs, int len); +#define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid + +enum { +    TCG_AREG0 = TCG_REG_R6, +}; + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +#if QEMU_GNUC_PREREQ(4, 1) +    __builtin___clear_cache((char *) start, (char *) stop); +#else +    register uintptr_t _beg __asm("a1") = start; +    register uintptr_t _end __asm("a2") = stop; +    register uintptr_t _flg __asm("a3") = 0; +    __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); +#endif +} + +#endif diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c new file mode 100644 index 00000000..887f22f6 --- /dev/null +++ b/tcg/i386/tcg-target.c @@ -0,0 +1,2451 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg-be-ldst.h" + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +#if TCG_TARGET_REG_BITS == 64 +    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", +    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", +#else +    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", +#endif +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { +#if TCG_TARGET_REG_BITS == 64 +    TCG_REG_RBP, +    TCG_REG_RBX, +    TCG_REG_R12, +    TCG_REG_R13, +    TCG_REG_R14, +    TCG_REG_R15, +    TCG_REG_R10, +    TCG_REG_R11, +    TCG_REG_R9, +    TCG_REG_R8, +    TCG_REG_RCX, +    TCG_REG_RDX, +    TCG_REG_RSI, +    TCG_REG_RDI, +    TCG_REG_RAX, +#else +    TCG_REG_EBX, +    TCG_REG_ESI, +    TCG_REG_EDI, +    TCG_REG_EBP, +    TCG_REG_ECX, +    TCG_REG_EDX, +    TCG_REG_EAX, +#endif +}; + +static const int tcg_target_call_iarg_regs[] = { +#if TCG_TARGET_REG_BITS == 64 +#if defined(_WIN64) +    TCG_REG_RCX, +    TCG_REG_RDX, +#else +    TCG_REG_RDI, +    TCG_REG_RSI, +    TCG_REG_RDX, +    TCG_REG_RCX, +#endif +    TCG_REG_R8, +    TCG_REG_R9, +#else +    /* 32 bit mode uses stack based calling convention (GCC default). */ +#endif +}; + +static const int tcg_target_call_oarg_regs[] = { +    TCG_REG_EAX, +#if TCG_TARGET_REG_BITS == 32 +    TCG_REG_EDX +#endif +}; + +/* Constants we accept.  */ +#define TCG_CT_CONST_S32 0x100 +#define TCG_CT_CONST_U32 0x200 +#define TCG_CT_CONST_I32 0x400 + +/* Registers used with L constraint, which are the first argument  +   registers on x86_64, and two random call clobbered registers on +   i386. */ +#if TCG_TARGET_REG_BITS == 64 +# define TCG_REG_L0 tcg_target_call_iarg_regs[0] +# define TCG_REG_L1 tcg_target_call_iarg_regs[1] +#else +# define TCG_REG_L0 TCG_REG_EAX +# define TCG_REG_L1 TCG_REG_EDX +#endif + +/* The host compiler should supply <cpuid.h> to enable runtime features +   detection, as we're not going to go so far as our own inline assembly. +   If not available, default values will be assumed.  */ +#if defined(CONFIG_CPUID_H) +#include <cpuid.h> +#endif + +/* For 32-bit, we are going to attempt to determine at runtime whether cmov +   is available.  */ +#if TCG_TARGET_REG_BITS == 64 +# define have_cmov 1 +#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV) +static bool have_cmov; +#else +# define have_cmov 0 +#endif + +/* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are +   going to attempt to determine at runtime whether movbe is available.  */ +#if defined(CONFIG_CPUID_H) && defined(bit_MOVBE) +static bool have_movbe; +#else +# define have_movbe 0 +#endif + +/* We need this symbol in tcg-target.h, and we can't properly conditionalize +   it there.  Therefore we always define the variable.  */ +bool have_bmi1; + +#if defined(CONFIG_CPUID_H) && defined(bit_BMI2) +static bool have_bmi2; +#else +# define have_bmi2 0 +#endif + +static tcg_insn_unit *tb_ret_addr; + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, +                        intptr_t value, intptr_t addend) +{ +    value += addend; +    switch(type) { +    case R_386_PC32: +        value -= (uintptr_t)code_ptr; +        if (value != (int32_t)value) { +            tcg_abort(); +        } +        tcg_patch32(code_ptr, value); +        break; +    case R_386_PC8: +        value -= (uintptr_t)code_ptr; +        if (value != (int8_t)value) { +            tcg_abort(); +        } +        tcg_patch8(code_ptr, value); +        break; +    default: +        tcg_abort(); +    } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ +    const char *ct_str; + +    ct_str = *pct_str; +    switch(ct_str[0]) { +    case 'a': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX); +        break; +    case 'b': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); +        break; +    case 'c': +    case_c: +        ct->ct |= TCG_CT_REG; +        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); +        break; +    case 'd': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX); +        break; +    case 'S': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI); +        break; +    case 'D': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI); +        break; +    case 'q': +        ct->ct |= TCG_CT_REG; +        if (TCG_TARGET_REG_BITS == 64) { +            tcg_regset_set32(ct->u.regs, 0, 0xffff); +        } else { +            tcg_regset_set32(ct->u.regs, 0, 0xf); +        } +        break; +    case 'Q': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, 0xf); +        break; +    case 'r': +    case_r: +        ct->ct |= TCG_CT_REG; +        if (TCG_TARGET_REG_BITS == 64) { +            tcg_regset_set32(ct->u.regs, 0, 0xffff); +        } else { +            tcg_regset_set32(ct->u.regs, 0, 0xff); +        } +        break; +    case 'C': +        /* With SHRX et al, we need not use ECX as shift count register.  */ +        if (have_bmi2) { +            goto case_r; +        } else { +            goto case_c; +        } + +        /* qemu_ld/st address constraint */ +    case 'L': +        ct->ct |= TCG_CT_REG; +        if (TCG_TARGET_REG_BITS == 64) { +            tcg_regset_set32(ct->u.regs, 0, 0xffff); +        } else { +            tcg_regset_set32(ct->u.regs, 0, 0xff); +        } +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); +        break; + +    case 'e': +        ct->ct |= TCG_CT_CONST_S32; +        break; +    case 'Z': +        ct->ct |= TCG_CT_CONST_U32; +        break; +    case 'I': +        ct->ct |= TCG_CT_CONST_I32; +        break; + +    default: +        return -1; +    } +    ct_str++; +    *pct_str = ct_str; +    return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, +                                         const TCGArgConstraint *arg_ct) +{ +    int ct = arg_ct->ct; +    if (ct & TCG_CT_CONST) { +        return 1; +    } +    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { +        return 1; +    } +    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { +        return 1; +    } +    if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { +        return 1; +    } +    return 0; +} + +#if TCG_TARGET_REG_BITS == 64 +# define LOWREGMASK(x)	((x) & 7) +#else +# define LOWREGMASK(x)	(x) +#endif + +#define P_EXT		0x100		/* 0x0f opcode prefix */ +#define P_EXT38         0x200           /* 0x0f 0x38 opcode prefix */ +#define P_DATA16        0x400           /* 0x66 opcode prefix */ +#if TCG_TARGET_REG_BITS == 64 +# define P_ADDR32       0x800           /* 0x67 opcode prefix */ +# define P_REXW         0x1000          /* Set REX.W = 1 */ +# define P_REXB_R       0x2000          /* REG field as byte register */ +# define P_REXB_RM      0x4000          /* R/M field as byte register */ +# define P_GS           0x8000          /* gs segment override */ +#else +# define P_ADDR32	0 +# define P_REXW		0 +# define P_REXB_R	0 +# define P_REXB_RM	0 +# define P_GS           0 +#endif +#define P_SIMDF3        0x10000         /* 0xf3 opcode prefix */ +#define P_SIMDF2        0x20000         /* 0xf2 opcode prefix */ + +#define OPC_ARITH_EvIz	(0x81) +#define OPC_ARITH_EvIb	(0x83) +#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */ +#define OPC_ANDN        (0xf2 | P_EXT38) +#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3)) +#define OPC_BSWAP	(0xc8 | P_EXT) +#define OPC_CALL_Jz	(0xe8) +#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */ +#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3)) +#define OPC_DEC_r32	(0x48) +#define OPC_IMUL_GvEv	(0xaf | P_EXT) +#define OPC_IMUL_GvEvIb	(0x6b) +#define OPC_IMUL_GvEvIz	(0x69) +#define OPC_INC_r32	(0x40) +#define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */ +#define OPC_JCC_short	(0x70)		/* ... plus condition code */ +#define OPC_JMP_long	(0xe9) +#define OPC_JMP_short	(0xeb) +#define OPC_LEA         (0x8d) +#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */ +#define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */ +#define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */ +#define OPC_MOVB_EvIz   (0xc6) +#define OPC_MOVL_EvIz	(0xc7) +#define OPC_MOVL_Iv     (0xb8) +#define OPC_MOVBE_GyMy  (0xf0 | P_EXT38) +#define OPC_MOVBE_MyGy  (0xf1 | P_EXT38) +#define OPC_MOVSBL	(0xbe | P_EXT) +#define OPC_MOVSWL	(0xbf | P_EXT) +#define OPC_MOVSLQ	(0x63 | P_REXW) +#define OPC_MOVZBL	(0xb6 | P_EXT) +#define OPC_MOVZWL	(0xb7 | P_EXT) +#define OPC_POP_r32	(0x58) +#define OPC_PUSH_r32	(0x50) +#define OPC_PUSH_Iv	(0x68) +#define OPC_PUSH_Ib	(0x6a) +#define OPC_RET		(0xc3) +#define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ +#define OPC_SHIFT_1	(0xd1) +#define OPC_SHIFT_Ib	(0xc1) +#define OPC_SHIFT_cl	(0xd3) +#define OPC_SARX        (0xf7 | P_EXT38 | P_SIMDF3) +#define OPC_SHLX        (0xf7 | P_EXT38 | P_DATA16) +#define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2) +#define OPC_TESTL	(0x85) +#define OPC_XCHG_ax_r32	(0x90) + +#define OPC_GRP3_Ev	(0xf7) +#define OPC_GRP5	(0xff) + +/* Group 1 opcode extensions for 0x80-0x83. +   These are also used as modifiers for OPC_ARITH.  */ +#define ARITH_ADD 0 +#define ARITH_OR  1 +#define ARITH_ADC 2 +#define ARITH_SBB 3 +#define ARITH_AND 4 +#define ARITH_SUB 5 +#define ARITH_XOR 6 +#define ARITH_CMP 7 + +/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */ +#define SHIFT_ROL 0 +#define SHIFT_ROR 1 +#define SHIFT_SHL 4 +#define SHIFT_SHR 5 +#define SHIFT_SAR 7 + +/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */ +#define EXT3_NOT   2 +#define EXT3_NEG   3 +#define EXT3_MUL   4 +#define EXT3_IMUL  5 +#define EXT3_DIV   6 +#define EXT3_IDIV  7 + +/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */ +#define EXT5_INC_Ev	0 +#define EXT5_DEC_Ev	1 +#define EXT5_CALLN_Ev	2 +#define EXT5_JMPN_Ev	4 + +/* Condition codes to be added to OPC_JCC_{long,short}.  */ +#define JCC_JMP (-1) +#define JCC_JO  0x0 +#define JCC_JNO 0x1 +#define JCC_JB  0x2 +#define JCC_JAE 0x3 +#define JCC_JE  0x4 +#define JCC_JNE 0x5 +#define JCC_JBE 0x6 +#define JCC_JA  0x7 +#define JCC_JS  0x8 +#define JCC_JNS 0x9 +#define JCC_JP  0xa +#define JCC_JNP 0xb +#define JCC_JL  0xc +#define JCC_JGE 0xd +#define JCC_JLE 0xe +#define JCC_JG  0xf + +static const uint8_t tcg_cond_to_jcc[] = { +    [TCG_COND_EQ] = JCC_JE, +    [TCG_COND_NE] = JCC_JNE, +    [TCG_COND_LT] = JCC_JL, +    [TCG_COND_GE] = JCC_JGE, +    [TCG_COND_LE] = JCC_JLE, +    [TCG_COND_GT] = JCC_JG, +    [TCG_COND_LTU] = JCC_JB, +    [TCG_COND_GEU] = JCC_JAE, +    [TCG_COND_LEU] = JCC_JBE, +    [TCG_COND_GTU] = JCC_JA, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) +{ +    int rex; + +    if (opc & P_GS) { +        tcg_out8(s, 0x65); +    } +    if (opc & P_DATA16) { +        /* We should never be asking for both 16 and 64-bit operation.  */ +        assert((opc & P_REXW) == 0); +        tcg_out8(s, 0x66); +    } +    if (opc & P_ADDR32) { +        tcg_out8(s, 0x67); +    } + +    rex = 0; +    rex |= (opc & P_REXW) ? 0x8 : 0x0;  /* REX.W */ +    rex |= (r & 8) >> 1;                /* REX.R */ +    rex |= (x & 8) >> 2;                /* REX.X */ +    rex |= (rm & 8) >> 3;               /* REX.B */ + +    /* P_REXB_{R,RM} indicates that the given register is the low byte. +       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, +       as otherwise the encoding indicates %[abcd]h.  Note that the values +       that are ORed in merely indicate that the REX byte must be present; +       those bits get discarded in output.  */ +    rex |= opc & (r >= 4 ? P_REXB_R : 0); +    rex |= opc & (rm >= 4 ? P_REXB_RM : 0); + +    if (rex) { +        tcg_out8(s, (uint8_t)(rex | 0x40)); +    } + +    if (opc & (P_EXT | P_EXT38)) { +        tcg_out8(s, 0x0f); +        if (opc & P_EXT38) { +            tcg_out8(s, 0x38); +        } +    } + +    tcg_out8(s, opc); +} +#else +static void tcg_out_opc(TCGContext *s, int opc) +{ +    if (opc & P_DATA16) { +        tcg_out8(s, 0x66); +    } +    if (opc & (P_EXT | P_EXT38)) { +        tcg_out8(s, 0x0f); +        if (opc & P_EXT38) { +            tcg_out8(s, 0x38); +        } +    } +    tcg_out8(s, opc); +} +/* Discard the register arguments to tcg_out_opc early, so as not to penalize +   the 32-bit compilation paths.  This method works with all versions of gcc, +   whereas relying on optimization may not be able to exclude them.  */ +#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc) +#endif + +static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) +{ +    tcg_out_opc(s, opc, r, rm, 0); +    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + +static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) +{ +    int tmp; + +    if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) { +        /* Three byte VEX prefix.  */ +        tcg_out8(s, 0xc4); + +        /* VEX.m-mmmm */ +        if (opc & P_EXT38) { +            tmp = 2; +        } else if (opc & P_EXT) { +            tmp = 1; +        } else { +            tcg_abort(); +        } +        tmp |= 0x40;                       /* VEX.X */ +        tmp |= (r & 8 ? 0 : 0x80);         /* VEX.R */ +        tmp |= (rm & 8 ? 0 : 0x20);        /* VEX.B */ +        tcg_out8(s, tmp); + +        tmp = (opc & P_REXW ? 0x80 : 0);   /* VEX.W */ +    } else { +        /* Two byte VEX prefix.  */ +        tcg_out8(s, 0xc5); + +        tmp = (r & 8 ? 0 : 0x80);          /* VEX.R */ +    } +    /* VEX.pp */ +    if (opc & P_DATA16) { +        tmp |= 1;                          /* 0x66 */ +    } else if (opc & P_SIMDF3) { +        tmp |= 2;                          /* 0xf3 */ +    } else if (opc & P_SIMDF2) { +        tmp |= 3;                          /* 0xf2 */ +    } +    tmp |= (~v & 15) << 3;                 /* VEX.vvvv */ +    tcg_out8(s, tmp); +    tcg_out8(s, opc); +    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + +/* Output an opcode with a full "rm + (index<<shift) + offset" address mode. +   We handle either RM and INDEX missing with a negative value.  In 64-bit +   mode for absolute addresses, ~RM is the size of the immediate operand +   that will follow the instruction.  */ + +static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, +                                     int index, int shift, intptr_t offset) +{ +    int mod, len; + +    if (index < 0 && rm < 0) { +        if (TCG_TARGET_REG_BITS == 64) { +            /* Try for a rip-relative addressing mode.  This has replaced +               the 32-bit-mode absolute addressing encoding.  */ +            intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm; +            intptr_t disp = offset - pc; +            if (disp == (int32_t)disp) { +                tcg_out_opc(s, opc, r, 0, 0); +                tcg_out8(s, (LOWREGMASK(r) << 3) | 5); +                tcg_out32(s, disp); +                return; +            } + +            /* Try for an absolute address encoding.  This requires the +               use of the MODRM+SIB encoding and is therefore larger than +               rip-relative addressing.  */ +            if (offset == (int32_t)offset) { +                tcg_out_opc(s, opc, r, 0, 0); +                tcg_out8(s, (LOWREGMASK(r) << 3) | 4); +                tcg_out8(s, (4 << 3) | 5); +                tcg_out32(s, offset); +                return; +            } + +            /* ??? The memory isn't directly addressable.  */ +            tcg_abort(); +        } else { +            /* Absolute address.  */ +            tcg_out_opc(s, opc, r, 0, 0); +            tcg_out8(s, (r << 3) | 5); +            tcg_out32(s, offset); +            return; +        } +    } + +    /* Find the length of the immediate addend.  Note that the encoding +       that would be used for (%ebp) indicates absolute addressing.  */ +    if (rm < 0) { +        mod = 0, len = 4, rm = 5; +    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { +        mod = 0, len = 0; +    } else if (offset == (int8_t)offset) { +        mod = 0x40, len = 1; +    } else { +        mod = 0x80, len = 4; +    } + +    /* Use a single byte MODRM format if possible.  Note that the encoding +       that would be used for %esp is the escape to the two byte form.  */ +    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { +        /* Single byte MODRM format.  */ +        tcg_out_opc(s, opc, r, rm, 0); +        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +    } else { +        /* Two byte MODRM+SIB format.  */ + +        /* Note that the encoding that would place %esp into the index +           field indicates no index register.  In 64-bit mode, the REX.X +           bit counts, so %r12 can be used as the index.  */ +        if (index < 0) { +            index = 4; +        } else { +            assert(index != TCG_REG_ESP); +        } + +        tcg_out_opc(s, opc, r, rm, index); +        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); +        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); +    } + +    if (len == 1) { +        tcg_out8(s, offset); +    } else if (len == 4) { +        tcg_out32(s, offset); +    } +} + +/* A simplification of the above with no index or shift.  */ +static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, +                                        int rm, intptr_t offset) +{ +    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); +} + +/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */ +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) +{ +    /* Propagate an opcode prefix, such as P_REXW.  */ +    int ext = subop & ~0x7; +    subop &= 0x7; + +    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, +                               TCGReg ret, TCGReg arg) +{ +    if (arg != ret) { +        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); +        tcg_out_modrm(s, opc, ret, arg); +    } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, +                         TCGReg ret, tcg_target_long arg) +{ +    tcg_target_long diff; + +    if (arg == 0) { +        tgen_arithr(s, ARITH_XOR, ret, ret); +        return; +    } +    if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { +        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); +        tcg_out32(s, arg); +        return; +    } +    if (arg == (int32_t)arg) { +        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); +        tcg_out32(s, arg); +        return; +    } + +    /* Try a 7 byte pc-relative lea before the 10 byte movq.  */ +    diff = arg - ((uintptr_t)s->code_ptr + 7); +    if (diff == (int32_t)diff) { +        tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); +        tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); +        tcg_out32(s, diff); +        return; +    } + +    tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); +    tcg_out64(s, arg); +} + +static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) +{ +    if (val == (int8_t)val) { +        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); +        tcg_out8(s, val); +    } else if (val == (int32_t)val) { +        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); +        tcg_out32(s, val); +    } else { +        tcg_abort(); +    } +} + +static inline void tcg_out_push(TCGContext *s, int reg) +{ +    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_pop(TCGContext *s, int reg) +{ +    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, +                              TCGReg arg1, intptr_t arg2) +{ +    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); +    tcg_out_modrm_offset(s, opc, ret, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); +    tcg_out_modrm_offset(s, opc, arg, arg1, arg2); +} + +static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base, +                               tcg_target_long ofs, tcg_target_long val) +{ +    int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0); +    tcg_out_modrm_offset(s, opc, 0, base, ofs); +    tcg_out32(s, val); +} + +static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) +{ +    /* Propagate an opcode prefix, such as P_DATA16.  */ +    int ext = subopc & ~0x7; +    subopc &= 0x7; + +    if (count == 1) { +        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); +    } else { +        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); +        tcg_out8(s, count); +    } +} + +static inline void tcg_out_bswap32(TCGContext *s, int reg) +{ +    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_rolw_8(TCGContext *s, int reg) +{ +    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); +} + +static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) +{ +    /* movzbl */ +    assert(src < 4 || TCG_TARGET_REG_BITS == 64); +    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); +} + +static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) +{ +    /* movsbl */ +    assert(src < 4 || TCG_TARGET_REG_BITS == 64); +    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); +} + +static inline void tcg_out_ext16u(TCGContext *s, int dest, int src) +{ +    /* movzwl */ +    tcg_out_modrm(s, OPC_MOVZWL, dest, src); +} + +static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw) +{ +    /* movsw[lq] */ +    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); +} + +static inline void tcg_out_ext32u(TCGContext *s, int dest, int src) +{ +    /* 32-bit mov zero extends.  */ +    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); +} + +static inline void tcg_out_ext32s(TCGContext *s, int dest, int src) +{ +    tcg_out_modrm(s, OPC_MOVSLQ, dest, src); +} + +static inline void tcg_out_bswap64(TCGContext *s, int reg) +{ +    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); +} + +static void tgen_arithi(TCGContext *s, int c, int r0, +                        tcg_target_long val, int cf) +{ +    int rexw = 0; + +    if (TCG_TARGET_REG_BITS == 64) { +        rexw = c & -8; +        c &= 7; +    } + +    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce +       partial flags update stalls on Pentium4 and are not recommended +       by current Intel optimization manuals.  */ +    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { +        int is_inc = (c == ARITH_ADD) ^ (val < 0); +        if (TCG_TARGET_REG_BITS == 64) { +            /* The single-byte increment encodings are re-tasked as the +               REX prefixes.  Use the MODRM encoding.  */ +            tcg_out_modrm(s, OPC_GRP5 + rexw, +                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); +        } else { +            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); +        } +        return; +    } + +    if (c == ARITH_AND) { +        if (TCG_TARGET_REG_BITS == 64) { +            if (val == 0xffffffffu) { +                tcg_out_ext32u(s, r0, r0); +                return; +            } +            if (val == (uint32_t)val) { +                /* AND with no high bits set can use a 32-bit operation.  */ +                rexw = 0; +            } +        } +        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { +            tcg_out_ext8u(s, r0, r0); +            return; +        } +        if (val == 0xffffu) { +            tcg_out_ext16u(s, r0, r0); +            return; +        } +    } + +    if (val == (int8_t)val) { +        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); +        tcg_out8(s, val); +        return; +    } +    if (rexw == 0 || val == (int32_t)val) { +        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); +        tcg_out32(s, val); +        return; +    } + +    tcg_abort(); +} + +static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) +{ +    if (val != 0) { +        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); +    } +} + +/* Use SMALL != 0 to force a short forward branch.  */ +static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small) +{ +    int32_t val, val1; + +    if (l->has_value) { +        val = tcg_pcrel_diff(s, l->u.value_ptr); +        val1 = val - 2; +        if ((int8_t)val1 == val1) { +            if (opc == -1) { +                tcg_out8(s, OPC_JMP_short); +            } else { +                tcg_out8(s, OPC_JCC_short + opc); +            } +            tcg_out8(s, val1); +        } else { +            if (small) { +                tcg_abort(); +            } +            if (opc == -1) { +                tcg_out8(s, OPC_JMP_long); +                tcg_out32(s, val - 5); +            } else { +                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); +                tcg_out32(s, val - 6); +            } +        } +    } else if (small) { +        if (opc == -1) { +            tcg_out8(s, OPC_JMP_short); +        } else { +            tcg_out8(s, OPC_JCC_short + opc); +        } +        tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1); +        s->code_ptr += 1; +    } else { +        if (opc == -1) { +            tcg_out8(s, OPC_JMP_long); +        } else { +            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); +        } +        tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4); +        s->code_ptr += 4; +    } +} + +static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, +                        int const_arg2, int rexw) +{ +    if (const_arg2) { +        if (arg2 == 0) { +            /* test r, r */ +            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); +        } else { +            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); +        } +    } else { +        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); +    } +} + +static void tcg_out_brcond32(TCGContext *s, TCGCond cond, +                             TCGArg arg1, TCGArg arg2, int const_arg2, +                             TCGLabel *label, int small) +{ +    tcg_out_cmp(s, arg1, arg2, const_arg2, 0); +    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_brcond64(TCGContext *s, TCGCond cond, +                             TCGArg arg1, TCGArg arg2, int const_arg2, +                             TCGLabel *label, int small) +{ +    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); +    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small); +} +#else +/* XXX: we implement it at the target level to avoid having to +   handle cross basic blocks temporaries */ +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, +                            const int *const_args, int small) +{ +    TCGLabel *label_next = gen_new_label(); +    TCGLabel *label_this = arg_label(args[5]); + +    switch(args[4]) { +    case TCG_COND_EQ: +        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], +                         label_next, 1); +        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], +                         label_this, small); +        break; +    case TCG_COND_NE: +        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], +                         label_this, small); +        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], +                         label_this, small); +        break; +    case TCG_COND_LT: +        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], +                         label_this, small); +        tcg_out_jxx(s, JCC_JNE, label_next, 1); +        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], +                         label_this, small); +        break; +    case TCG_COND_LE: +        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], +                         label_this, small); +        tcg_out_jxx(s, JCC_JNE, label_next, 1); +        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], +                         label_this, small); +        break; +    case TCG_COND_GT: +        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], +                         label_this, small); +        tcg_out_jxx(s, JCC_JNE, label_next, 1); +        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], +                         label_this, small); +        break; +    case TCG_COND_GE: +        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], +                         label_this, small); +        tcg_out_jxx(s, JCC_JNE, label_next, 1); +        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], +                         label_this, small); +        break; +    case TCG_COND_LTU: +        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], +                         label_this, small); +        tcg_out_jxx(s, JCC_JNE, label_next, 1); +        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], +                         label_this, small); +        break; +    case TCG_COND_LEU: +        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], +                         label_this, small); +        tcg_out_jxx(s, JCC_JNE, label_next, 1); +        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], +                         label_this, small); +        break; +    case TCG_COND_GTU: +        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], +                         label_this, small); +        tcg_out_jxx(s, JCC_JNE, label_next, 1); +        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], +                         label_this, small); +        break; +    case TCG_COND_GEU: +        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], +                         label_this, small); +        tcg_out_jxx(s, JCC_JNE, label_next, 1); +        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], +                         label_this, small); +        break; +    default: +        tcg_abort(); +    } +    tcg_out_label(s, label_next, s->code_ptr); +} +#endif + +static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, +                              TCGArg arg1, TCGArg arg2, int const_arg2) +{ +    tcg_out_cmp(s, arg1, arg2, const_arg2, 0); +    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); +    tcg_out_ext8u(s, dest, dest); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, +                              TCGArg arg1, TCGArg arg2, int const_arg2) +{ +    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); +    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); +    tcg_out_ext8u(s, dest, dest); +} +#else +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, +                             const int *const_args) +{ +    TCGArg new_args[6]; +    TCGLabel *label_true, *label_over; + +    memcpy(new_args, args+1, 5*sizeof(TCGArg)); + +    if (args[0] == args[1] || args[0] == args[2] +        || (!const_args[3] && args[0] == args[3]) +        || (!const_args[4] && args[0] == args[4])) { +        /* When the destination overlaps with one of the argument +           registers, don't do anything tricky.  */ +        label_true = gen_new_label(); +        label_over = gen_new_label(); + +        new_args[5] = label_arg(label_true); +        tcg_out_brcond2(s, new_args, const_args+1, 1); + +        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); +        tcg_out_jxx(s, JCC_JMP, label_over, 1); +        tcg_out_label(s, label_true, s->code_ptr); + +        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); +        tcg_out_label(s, label_over, s->code_ptr); +    } else { +        /* When the destination does not overlap one of the arguments, +           clear the destination first, jump if cond false, and emit an +           increment in the true case.  This results in smaller code.  */ + +        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + +        label_over = gen_new_label(); +        new_args[4] = tcg_invert_cond(new_args[4]); +        new_args[5] = label_arg(label_over); +        tcg_out_brcond2(s, new_args, const_args+1, 1); + +        tgen_arithi(s, ARITH_ADD, args[0], 1, 0); +        tcg_out_label(s, label_over, s->code_ptr); +    } +} +#endif + +static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, +                              TCGArg c1, TCGArg c2, int const_c2, +                              TCGArg v1) +{ +    tcg_out_cmp(s, c1, c2, const_c2, 0); +    if (have_cmov) { +        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); +    } else { +        TCGLabel *over = gen_new_label(); +        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); +        tcg_out_mov(s, TCG_TYPE_I32, dest, v1); +        tcg_out_label(s, over, s->code_ptr); +    } +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, +                              TCGArg c1, TCGArg c2, int const_c2, +                              TCGArg v1) +{ +    tcg_out_cmp(s, c1, c2, const_c2, P_REXW); +    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1); +} +#endif + +static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) +{ +    intptr_t disp = tcg_pcrel_diff(s, dest) - 5; + +    if (disp == (int32_t)disp) { +        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); +        tcg_out32(s, disp); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest); +        tcg_out_modrm(s, OPC_GRP5, +                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); +    } +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ +    tcg_out_branch(s, 1, dest); +} + +static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest) +{ +    tcg_out_branch(s, 0, dest); +} + +#if defined(CONFIG_SOFTMMU) +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, + *                                     int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { +    [MO_UB]   = helper_ret_ldub_mmu, +    [MO_LEUW] = helper_le_lduw_mmu, +    [MO_LEUL] = helper_le_ldul_mmu, +    [MO_LEQ]  = helper_le_ldq_mmu, +    [MO_BEUW] = helper_be_lduw_mmu, +    [MO_BEUL] = helper_be_ldul_mmu, +    [MO_BEQ]  = helper_be_ldq_mmu, +}; + +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, + *                                     uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { +    [MO_UB]   = helper_ret_stb_mmu, +    [MO_LEUW] = helper_le_stw_mmu, +    [MO_LEUL] = helper_le_stl_mmu, +    [MO_LEQ]  = helper_le_stq_mmu, +    [MO_BEUW] = helper_be_stw_mmu, +    [MO_BEUL] = helper_be_stl_mmu, +    [MO_BEQ]  = helper_be_stq_mmu, +}; + +/* Perform the TLB load and compare. + +   Inputs: +   ADDRLO and ADDRHI contain the low and high part of the address. + +   MEM_INDEX and S_BITS are the memory context and log2 size of the load. + +   WHICH is the offset into the CPUTLBEntry structure of the slot to read. +   This should be offsetof addr_read or addr_write. + +   Outputs: +   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) +   positions of the displacements of forward jumps to the TLB miss case. + +   Second argument register is loaded with the low part of the address. +   In the TLB hit case, it has been adjusted as indicated by the TLB +   and so is a host address.  In the TLB miss case, it continues to +   hold a guest address. + +   First argument register is clobbered.  */ + +static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, +                                    int mem_index, TCGMemOp s_bits, +                                    tcg_insn_unit **label_ptr, int which) +{ +    const TCGReg r0 = TCG_REG_L0; +    const TCGReg r1 = TCG_REG_L1; +    TCGType ttype = TCG_TYPE_I32; +    TCGType htype = TCG_TYPE_I32; +    int trexw = 0, hrexw = 0; + +    if (TCG_TARGET_REG_BITS == 64) { +        if (TARGET_LONG_BITS == 64) { +            ttype = TCG_TYPE_I64; +            trexw = P_REXW; +        } +        if (TCG_TYPE_PTR == TCG_TYPE_I64) { +            htype = TCG_TYPE_I64; +            hrexw = P_REXW; +        } +    } + +    tcg_out_mov(s, htype, r0, addrlo); +    tcg_out_mov(s, ttype, r1, addrlo); + +    tcg_out_shifti(s, SHIFT_SHR + hrexw, r0, +                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + +    tgen_arithi(s, ARITH_AND + trexw, r1, +                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); +    tgen_arithi(s, ARITH_AND + hrexw, r0, +                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); + +    tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0, +                             offsetof(CPUArchState, tlb_table[mem_index][0]) +                             + which); + +    /* cmp 0(r0), r1 */ +    tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); + +    /* Prepare for both the fast path add of the tlb addend, and the slow +       path function argument setup.  There are two cases worth note: +       For 32-bit guest and x86_64 host, MOVL zero-extends the guest address +       before the fastpath ADDQ below.  For 64-bit guest and x32 host, MOVQ +       copies the entire guest address for the slow path, while truncation +       for the 32-bit host happens with the fastpath ADDL below.  */ +    tcg_out_mov(s, ttype, r1, addrlo); + +    /* jne slow_path */ +    tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); +    label_ptr[0] = s->code_ptr; +    s->code_ptr += 4; + +    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { +        /* cmp 4(r0), addrhi */ +        tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4); + +        /* jne slow_path */ +        tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); +        label_ptr[1] = s->code_ptr; +        s->code_ptr += 4; +    } + +    /* TLB Hit.  */ + +    /* add addend(r0), r1 */ +    tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0, +                         offsetof(CPUTLBEntry, addend) - which); +} + +/* + * Record the context of a call to the out of line helper code for the slow path + * for a load or store, so that we can later generate the correct helper code + */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, +                                TCGReg datalo, TCGReg datahi, +                                TCGReg addrlo, TCGReg addrhi, +                                tcg_insn_unit *raddr, +                                tcg_insn_unit **label_ptr) +{ +    TCGLabelQemuLdst *label = new_ldst_label(s); + +    label->is_ld = is_ld; +    label->oi = oi; +    label->datalo_reg = datalo; +    label->datahi_reg = datahi; +    label->addrlo_reg = addrlo; +    label->addrhi_reg = addrhi; +    label->raddr = raddr; +    label->label_ptr[0] = label_ptr[0]; +    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { +        label->label_ptr[1] = label_ptr[1]; +    } +} + +/* + * Generate code for the slow path for a load at the end of block + */ +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ +    TCGMemOpIdx oi = l->oi; +    TCGMemOp opc = get_memop(oi); +    TCGReg data_reg; +    tcg_insn_unit **label_ptr = &l->label_ptr[0]; + +    /* resolve label address */ +    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); +    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { +        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); +    } + +    if (TCG_TARGET_REG_BITS == 32) { +        int ofs = 0; + +        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); +        ofs += 4; + +        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); +        ofs += 4; + +        if (TARGET_LONG_BITS == 64) { +            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); +            ofs += 4; +        } + +        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); +        ofs += 4; + +        tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); +    } else { +        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); +        /* The second argument is already loaded with addrlo.  */ +        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi); +        tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], +                     (uintptr_t)l->raddr); +    } + +    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + +    data_reg = l->datalo_reg; +    switch (opc & MO_SSIZE) { +    case MO_SB: +        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); +        break; +    case MO_SW: +        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); +        break; +#if TCG_TARGET_REG_BITS == 64 +    case MO_SL: +        tcg_out_ext32s(s, data_reg, TCG_REG_EAX); +        break; +#endif +    case MO_UB: +    case MO_UW: +        /* Note that the helpers have zero-extended to tcg_target_long.  */ +    case MO_UL: +        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); +        break; +    case MO_Q: +        if (TCG_TARGET_REG_BITS == 64) { +            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); +        } else if (data_reg == TCG_REG_EDX) { +            /* xchg %edx, %eax */ +            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); +            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); +        } else { +            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); +            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); +        } +        break; +    default: +        tcg_abort(); +    } + +    /* Jump to the code corresponding to next IR of qemu_st */ +    tcg_out_jmp(s, l->raddr); +} + +/* + * Generate code for the slow path for a store at the end of block + */ +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ +    TCGMemOpIdx oi = l->oi; +    TCGMemOp opc = get_memop(oi); +    TCGMemOp s_bits = opc & MO_SIZE; +    tcg_insn_unit **label_ptr = &l->label_ptr[0]; +    TCGReg retaddr; + +    /* resolve label address */ +    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); +    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { +        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4); +    } + +    if (TCG_TARGET_REG_BITS == 32) { +        int ofs = 0; + +        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); +        ofs += 4; + +        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); +        ofs += 4; + +        if (TARGET_LONG_BITS == 64) { +            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); +            ofs += 4; +        } + +        tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs); +        ofs += 4; + +        if (s_bits == MO_64) { +            tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs); +            ofs += 4; +        } + +        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, oi); +        ofs += 4; + +        retaddr = TCG_REG_EAX; +        tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); +        tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs); +    } else { +        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); +        /* The second argument is already loaded with addrlo.  */ +        tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), +                    tcg_target_call_iarg_regs[2], l->datalo_reg); +        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi); + +        if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) { +            retaddr = tcg_target_call_iarg_regs[4]; +            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); +        } else { +            retaddr = TCG_REG_RAX; +            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); +            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, +                       TCG_TARGET_CALL_STACK_OFFSET); +        } +    } + +    /* "Tail call" to the helper, with the return address back inline.  */ +    tcg_out_push(s, retaddr); +    tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); +} +#elif defined(__x86_64__) && defined(__linux__) +# include <asm/prctl.h> +# include <sys/prctl.h> + +int arch_prctl(int code, unsigned long addr); + +static int guest_base_flags; +static inline void setup_guest_base_seg(void) +{ +    if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) { +        guest_base_flags = P_GS; +    } +} +#else +# define guest_base_flags 0 +static inline void setup_guest_base_seg(void) { } +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, +                                   TCGReg base, int index, intptr_t ofs, +                                   int seg, TCGMemOp memop) +{ +    const TCGMemOp real_bswap = memop & MO_BSWAP; +    TCGMemOp bswap = real_bswap; +    int movop = OPC_MOVL_GvEv; + +    if (have_movbe && real_bswap) { +        bswap = 0; +        movop = OPC_MOVBE_GyMy; +    } + +    switch (memop & MO_SSIZE) { +    case MO_UB: +        tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo, +                                 base, index, 0, ofs); +        break; +    case MO_SB: +        tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, +                                 base, index, 0, ofs); +        break; +    case MO_UW: +        tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, +                                 base, index, 0, ofs); +        if (real_bswap) { +            tcg_out_rolw_8(s, datalo); +        } +        break; +    case MO_SW: +        if (real_bswap) { +            if (have_movbe) { +                tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, +                                         datalo, base, index, 0, ofs); +            } else { +                tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, +                                         base, index, 0, ofs); +                tcg_out_rolw_8(s, datalo); +            } +            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); +        } else { +            tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg, +                                     datalo, base, index, 0, ofs); +        } +        break; +    case MO_UL: +        tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); +        if (bswap) { +            tcg_out_bswap32(s, datalo); +        } +        break; +#if TCG_TARGET_REG_BITS == 64 +    case MO_SL: +        if (real_bswap) { +            tcg_out_modrm_sib_offset(s, movop + seg, datalo, +                                     base, index, 0, ofs); +            if (bswap) { +                tcg_out_bswap32(s, datalo); +            } +            tcg_out_ext32s(s, datalo, datalo); +        } else { +            tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo, +                                     base, index, 0, ofs); +        } +        break; +#endif +    case MO_Q: +        if (TCG_TARGET_REG_BITS == 64) { +            tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, +                                     base, index, 0, ofs); +            if (bswap) { +                tcg_out_bswap64(s, datalo); +            } +        } else { +            if (real_bswap) { +                int t = datalo; +                datalo = datahi; +                datahi = t; +            } +            if (base != datalo) { +                tcg_out_modrm_sib_offset(s, movop + seg, datalo, +                                         base, index, 0, ofs); +                tcg_out_modrm_sib_offset(s, movop + seg, datahi, +                                         base, index, 0, ofs + 4); +            } else { +                tcg_out_modrm_sib_offset(s, movop + seg, datahi, +                                         base, index, 0, ofs + 4); +                tcg_out_modrm_sib_offset(s, movop + seg, datalo, +                                         base, index, 0, ofs); +            } +            if (bswap) { +                tcg_out_bswap32(s, datalo); +                tcg_out_bswap32(s, datahi); +            } +        } +        break; +    default: +        tcg_abort(); +    } +} + +/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and +   EAX. It will be useful once fixed registers globals are less +   common. */ +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +{ +    TCGReg datalo, datahi, addrlo; +    TCGReg addrhi __attribute__((unused)); +    TCGMemOpIdx oi; +    TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) +    int mem_index; +    TCGMemOp s_bits; +    tcg_insn_unit *label_ptr[2]; +#endif + +    datalo = *args++; +    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); +    addrlo = *args++; +    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); +    oi = *args++; +    opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) +    mem_index = get_mmuidx(oi); +    s_bits = opc & MO_SIZE; + +    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, +                     label_ptr, offsetof(CPUTLBEntry, addr_read)); + +    /* TLB Hit.  */ +    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); + +    /* Record the current context of a load into ldst label */ +    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, +                        s->code_ptr, label_ptr); +#else +    { +        int32_t offset = GUEST_BASE; +        TCGReg base = addrlo; +        int index = -1; +        int seg = 0; + +        /* For a 32-bit guest, the high 32 bits may contain garbage. +           We can do this with the ADDR32 prefix if we're not using +           a guest base, or when using segmentation.  Otherwise we +           need to zero-extend manually.  */ +        if (GUEST_BASE == 0 || guest_base_flags) { +            seg = guest_base_flags; +            offset = 0; +            if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { +                seg |= P_ADDR32; +            } +        } else if (TCG_TARGET_REG_BITS == 64) { +            if (TARGET_LONG_BITS == 32) { +                tcg_out_ext32u(s, TCG_REG_L0, base); +                base = TCG_REG_L0; +            } +            if (offset != GUEST_BASE) { +                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); +                index = TCG_REG_L1; +                offset = 0; +            } +        } + +        tcg_out_qemu_ld_direct(s, datalo, datahi, +                               base, index, offset, seg, opc); +    } +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, +                                   TCGReg base, intptr_t ofs, int seg, +                                   TCGMemOp memop) +{ +    /* ??? Ideally we wouldn't need a scratch register.  For user-only, +       we could perform the bswap twice to restore the original value +       instead of moving to the scratch.  But as it is, the L constraint +       means that TCG_REG_L0 is definitely free here.  */ +    const TCGReg scratch = TCG_REG_L0; +    const TCGMemOp real_bswap = memop & MO_BSWAP; +    TCGMemOp bswap = real_bswap; +    int movop = OPC_MOVL_EvGv; + +    if (have_movbe && real_bswap) { +        bswap = 0; +        movop = OPC_MOVBE_MyGy; +    } + +    switch (memop & MO_SIZE) { +    case MO_8: +        /* In 32-bit mode, 8-bit stores can only happen from [abcd]x. +           Use the scratch register if necessary.  */ +        if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) { +            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); +            datalo = scratch; +        } +        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, +                             datalo, base, ofs); +        break; +    case MO_16: +        if (bswap) { +            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); +            tcg_out_rolw_8(s, scratch); +            datalo = scratch; +        } +        tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs); +        break; +    case MO_32: +        if (bswap) { +            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); +            tcg_out_bswap32(s, scratch); +            datalo = scratch; +        } +        tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); +        break; +    case MO_64: +        if (TCG_TARGET_REG_BITS == 64) { +            if (bswap) { +                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); +                tcg_out_bswap64(s, scratch); +                datalo = scratch; +            } +            tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); +        } else if (bswap) { +            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); +            tcg_out_bswap32(s, scratch); +            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs); +            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); +            tcg_out_bswap32(s, scratch); +            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); +        } else { +            if (real_bswap) { +                int t = datalo; +                datalo = datahi; +                datahi = t; +            } +            tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); +            tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4); +        } +        break; +    default: +        tcg_abort(); +    } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +{ +    TCGReg datalo, datahi, addrlo; +    TCGReg addrhi __attribute__((unused)); +    TCGMemOpIdx oi; +    TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) +    int mem_index; +    TCGMemOp s_bits; +    tcg_insn_unit *label_ptr[2]; +#endif + +    datalo = *args++; +    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); +    addrlo = *args++; +    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); +    oi = *args++; +    opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) +    mem_index = get_mmuidx(oi); +    s_bits = opc & MO_SIZE; + +    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, +                     label_ptr, offsetof(CPUTLBEntry, addr_write)); + +    /* TLB Hit.  */ +    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); + +    /* Record the current context of a store into ldst label */ +    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, +                        s->code_ptr, label_ptr); +#else +    { +        int32_t offset = GUEST_BASE; +        TCGReg base = addrlo; +        int seg = 0; + +        /* See comment in tcg_out_qemu_ld re zero-extension of addrlo.  */ +        if (GUEST_BASE == 0 || guest_base_flags) { +            seg = guest_base_flags; +            offset = 0; +            if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { +                seg |= P_ADDR32; +            } +        } else if (TCG_TARGET_REG_BITS == 64) { +            /* ??? Note that we can't use the same SIB addressing scheme +               as for loads, since we require L0 free for bswap.  */ +            if (offset != GUEST_BASE) { +                if (TARGET_LONG_BITS == 32) { +                    tcg_out_ext32u(s, TCG_REG_L0, base); +                    base = TCG_REG_L0; +                } +                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); +                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); +                base = TCG_REG_L1; +                offset = 0; +            } else if (TARGET_LONG_BITS == 32) { +                tcg_out_ext32u(s, TCG_REG_L1, base); +                base = TCG_REG_L1; +            } +        } + +        tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); +    } +#endif +} + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, +                              const TCGArg *args, const int *const_args) +{ +    int c, vexop, rexw = 0; + +#if TCG_TARGET_REG_BITS == 64 +# define OP_32_64(x) \ +        case glue(glue(INDEX_op_, x), _i64): \ +            rexw = P_REXW; /* FALLTHRU */    \ +        case glue(glue(INDEX_op_, x), _i32) +#else +# define OP_32_64(x) \ +        case glue(glue(INDEX_op_, x), _i32) +#endif + +    switch(opc) { +    case INDEX_op_exit_tb: +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); +        tcg_out_jmp(s, tb_ret_addr); +        break; +    case INDEX_op_goto_tb: +        if (s->tb_jmp_offset) { +            /* direct jump method */ +            tcg_out8(s, OPC_JMP_long); /* jmp im */ +            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); +            tcg_out32(s, 0); +        } else { +            /* indirect jump method */ +            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, +                                 (intptr_t)(s->tb_next + args[0])); +        } +        s->tb_next_offset[args[0]] = tcg_current_code_size(s); +        break; +    case INDEX_op_br: +        tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); +        break; +    OP_32_64(ld8u): +        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */ +        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); +        break; +    OP_32_64(ld8s): +        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); +        break; +    OP_32_64(ld16u): +        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */ +        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); +        break; +    OP_32_64(ld16s): +        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); +        break; +#if TCG_TARGET_REG_BITS == 64 +    case INDEX_op_ld32u_i64: +#endif +    case INDEX_op_ld_i32: +        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); +        break; + +    OP_32_64(st8): +        if (const_args[0]) { +            tcg_out_modrm_offset(s, OPC_MOVB_EvIz, +                                 0, args[1], args[2]); +            tcg_out8(s, args[0]); +        } else { +            tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, +                                 args[0], args[1], args[2]); +        } +        break; +    OP_32_64(st16): +        if (const_args[0]) { +            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, +                                 0, args[1], args[2]); +            tcg_out16(s, args[0]); +        } else { +            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, +                                 args[0], args[1], args[2]); +        } +        break; +#if TCG_TARGET_REG_BITS == 64 +    case INDEX_op_st32_i64: +#endif +    case INDEX_op_st_i32: +        if (const_args[0]) { +            tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]); +            tcg_out32(s, args[0]); +        } else { +            tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); +        } +        break; + +    OP_32_64(add): +        /* For 3-operand addition, use LEA.  */ +        if (args[0] != args[1]) { +            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; + +            if (const_args[2]) { +                c3 = a2, a2 = -1; +            } else if (a0 == a2) { +                /* Watch out for dest = src + dest, since we've removed +                   the matching constraint on the add.  */ +                tgen_arithr(s, ARITH_ADD + rexw, a0, a1); +                break; +            } + +            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); +            break; +        } +        c = ARITH_ADD; +        goto gen_arith; +    OP_32_64(sub): +        c = ARITH_SUB; +        goto gen_arith; +    OP_32_64(and): +        c = ARITH_AND; +        goto gen_arith; +    OP_32_64(or): +        c = ARITH_OR; +        goto gen_arith; +    OP_32_64(xor): +        c = ARITH_XOR; +        goto gen_arith; +    gen_arith: +        if (const_args[2]) { +            tgen_arithi(s, c + rexw, args[0], args[2], 0); +        } else { +            tgen_arithr(s, c + rexw, args[0], args[2]); +        } +        break; + +    OP_32_64(andc): +        if (const_args[2]) { +            tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, +                        args[0], args[1]); +            tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0); +        } else { +            tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]); +        } +        break; + +    OP_32_64(mul): +        if (const_args[2]) { +            int32_t val; +            val = args[2]; +            if (val == (int8_t)val) { +                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); +                tcg_out8(s, val); +            } else { +                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); +                tcg_out32(s, val); +            } +        } else { +            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); +        } +        break; + +    OP_32_64(div2): +        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); +        break; +    OP_32_64(divu2): +        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); +        break; + +    OP_32_64(shl): +        c = SHIFT_SHL; +        vexop = OPC_SHLX; +        goto gen_shift_maybe_vex; +    OP_32_64(shr): +        c = SHIFT_SHR; +        vexop = OPC_SHRX; +        goto gen_shift_maybe_vex; +    OP_32_64(sar): +        c = SHIFT_SAR; +        vexop = OPC_SARX; +        goto gen_shift_maybe_vex; +    OP_32_64(rotl): +        c = SHIFT_ROL; +        goto gen_shift; +    OP_32_64(rotr): +        c = SHIFT_ROR; +        goto gen_shift; +    gen_shift_maybe_vex: +        if (have_bmi2 && !const_args[2]) { +            tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]); +            break; +        } +        /* FALLTHRU */ +    gen_shift: +        if (const_args[2]) { +            tcg_out_shifti(s, c + rexw, args[0], args[2]); +        } else { +            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); +        } +        break; + +    case INDEX_op_brcond_i32: +        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], +                         arg_label(args[3]), 0); +        break; +    case INDEX_op_setcond_i32: +        tcg_out_setcond32(s, args[3], args[0], args[1], +                          args[2], const_args[2]); +        break; +    case INDEX_op_movcond_i32: +        tcg_out_movcond32(s, args[5], args[0], args[1], +                          args[2], const_args[2], args[3]); +        break; + +    OP_32_64(bswap16): +        tcg_out_rolw_8(s, args[0]); +        break; +    OP_32_64(bswap32): +        tcg_out_bswap32(s, args[0]); +        break; + +    OP_32_64(neg): +        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); +        break; +    OP_32_64(not): +        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); +        break; + +    OP_32_64(ext8s): +        tcg_out_ext8s(s, args[0], args[1], rexw); +        break; +    OP_32_64(ext16s): +        tcg_out_ext16s(s, args[0], args[1], rexw); +        break; +    OP_32_64(ext8u): +        tcg_out_ext8u(s, args[0], args[1]); +        break; +    OP_32_64(ext16u): +        tcg_out_ext16u(s, args[0], args[1]); +        break; + +    case INDEX_op_qemu_ld_i32: +        tcg_out_qemu_ld(s, args, 0); +        break; +    case INDEX_op_qemu_ld_i64: +        tcg_out_qemu_ld(s, args, 1); +        break; +    case INDEX_op_qemu_st_i32: +        tcg_out_qemu_st(s, args, 0); +        break; +    case INDEX_op_qemu_st_i64: +        tcg_out_qemu_st(s, args, 1); +        break; + +    OP_32_64(mulu2): +        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); +        break; +    OP_32_64(muls2): +        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); +        break; +    OP_32_64(add2): +        if (const_args[4]) { +            tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1); +        } else { +            tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]); +        } +        if (const_args[5]) { +            tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1); +        } else { +            tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]); +        } +        break; +    OP_32_64(sub2): +        if (const_args[4]) { +            tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1); +        } else { +            tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]); +        } +        if (const_args[5]) { +            tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1); +        } else { +            tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]); +        } +        break; + +#if TCG_TARGET_REG_BITS == 32 +    case INDEX_op_brcond2_i32: +        tcg_out_brcond2(s, args, const_args, 0); +        break; +    case INDEX_op_setcond2_i32: +        tcg_out_setcond2(s, args, const_args); +        break; +#else /* TCG_TARGET_REG_BITS == 64 */ +    case INDEX_op_ld32s_i64: +        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld_i64: +        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); +        break; +    case INDEX_op_st_i64: +        if (const_args[0]) { +            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, +                                 0, args[1], args[2]); +            tcg_out32(s, args[0]); +        } else { +            tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); +        } +        break; + +    case INDEX_op_brcond_i64: +        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], +                         arg_label(args[3]), 0); +        break; +    case INDEX_op_setcond_i64: +        tcg_out_setcond64(s, args[3], args[0], args[1], +                          args[2], const_args[2]); +        break; +    case INDEX_op_movcond_i64: +        tcg_out_movcond64(s, args[5], args[0], args[1], +                          args[2], const_args[2], args[3]); +        break; + +    case INDEX_op_bswap64_i64: +        tcg_out_bswap64(s, args[0]); +        break; +    case INDEX_op_ext32u_i64: +        tcg_out_ext32u(s, args[0], args[1]); +        break; +    case INDEX_op_ext32s_i64: +        tcg_out_ext32s(s, args[0], args[1]); +        break; +#endif + +    OP_32_64(deposit): +        if (args[3] == 0 && args[4] == 8) { +            /* load bits 0..7 */ +            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, +                          args[2], args[0]); +        } else if (args[3] == 8 && args[4] == 8) { +            /* load bits 8..15 */ +            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4); +        } else if (args[3] == 0 && args[4] == 16) { +            /* load bits 0..15 */ +            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]); +        } else { +            tcg_abort(); +        } +        break; + +    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */ +    case INDEX_op_mov_i64: +    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */ +    case INDEX_op_movi_i64: +    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */ +    default: +        tcg_abort(); +    } + +#undef OP_32_64 +} + +static const TCGTargetOpDef x86_op_defs[] = { +    { INDEX_op_exit_tb, { } }, +    { INDEX_op_goto_tb, { } }, +    { INDEX_op_br, { } }, +    { INDEX_op_ld8u_i32, { "r", "r" } }, +    { INDEX_op_ld8s_i32, { "r", "r" } }, +    { INDEX_op_ld16u_i32, { "r", "r" } }, +    { INDEX_op_ld16s_i32, { "r", "r" } }, +    { INDEX_op_ld_i32, { "r", "r" } }, +    { INDEX_op_st8_i32, { "qi", "r" } }, +    { INDEX_op_st16_i32, { "ri", "r" } }, +    { INDEX_op_st_i32, { "ri", "r" } }, + +    { INDEX_op_add_i32, { "r", "r", "ri" } }, +    { INDEX_op_sub_i32, { "r", "0", "ri" } }, +    { INDEX_op_mul_i32, { "r", "0", "ri" } }, +    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } }, +    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } }, +    { INDEX_op_and_i32, { "r", "0", "ri" } }, +    { INDEX_op_or_i32, { "r", "0", "ri" } }, +    { INDEX_op_xor_i32, { "r", "0", "ri" } }, +    { INDEX_op_andc_i32, { "r", "r", "ri" } }, + +    { INDEX_op_shl_i32, { "r", "0", "Ci" } }, +    { INDEX_op_shr_i32, { "r", "0", "Ci" } }, +    { INDEX_op_sar_i32, { "r", "0", "Ci" } }, +    { INDEX_op_rotl_i32, { "r", "0", "ci" } }, +    { INDEX_op_rotr_i32, { "r", "0", "ci" } }, + +    { INDEX_op_brcond_i32, { "r", "ri" } }, + +    { INDEX_op_bswap16_i32, { "r", "0" } }, +    { INDEX_op_bswap32_i32, { "r", "0" } }, + +    { INDEX_op_neg_i32, { "r", "0" } }, + +    { INDEX_op_not_i32, { "r", "0" } }, + +    { INDEX_op_ext8s_i32, { "r", "q" } }, +    { INDEX_op_ext16s_i32, { "r", "r" } }, +    { INDEX_op_ext8u_i32, { "r", "q" } }, +    { INDEX_op_ext16u_i32, { "r", "r" } }, + +    { INDEX_op_setcond_i32, { "q", "r", "ri" } }, + +    { INDEX_op_deposit_i32, { "Q", "0", "Q" } }, +    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, + +    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, +    { INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, +    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, +    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + +#if TCG_TARGET_REG_BITS == 32 +    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, +    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#else +    { INDEX_op_ld8u_i64, { "r", "r" } }, +    { INDEX_op_ld8s_i64, { "r", "r" } }, +    { INDEX_op_ld16u_i64, { "r", "r" } }, +    { INDEX_op_ld16s_i64, { "r", "r" } }, +    { INDEX_op_ld32u_i64, { "r", "r" } }, +    { INDEX_op_ld32s_i64, { "r", "r" } }, +    { INDEX_op_ld_i64, { "r", "r" } }, +    { INDEX_op_st8_i64, { "ri", "r" } }, +    { INDEX_op_st16_i64, { "ri", "r" } }, +    { INDEX_op_st32_i64, { "ri", "r" } }, +    { INDEX_op_st_i64, { "re", "r" } }, + +    { INDEX_op_add_i64, { "r", "r", "re" } }, +    { INDEX_op_mul_i64, { "r", "0", "re" } }, +    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, +    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, +    { INDEX_op_sub_i64, { "r", "0", "re" } }, +    { INDEX_op_and_i64, { "r", "0", "reZ" } }, +    { INDEX_op_or_i64, { "r", "0", "re" } }, +    { INDEX_op_xor_i64, { "r", "0", "re" } }, +    { INDEX_op_andc_i64, { "r", "r", "rI" } }, + +    { INDEX_op_shl_i64, { "r", "0", "Ci" } }, +    { INDEX_op_shr_i64, { "r", "0", "Ci" } }, +    { INDEX_op_sar_i64, { "r", "0", "Ci" } }, +    { INDEX_op_rotl_i64, { "r", "0", "ci" } }, +    { INDEX_op_rotr_i64, { "r", "0", "ci" } }, + +    { INDEX_op_brcond_i64, { "r", "re" } }, +    { INDEX_op_setcond_i64, { "r", "r", "re" } }, + +    { INDEX_op_bswap16_i64, { "r", "0" } }, +    { INDEX_op_bswap32_i64, { "r", "0" } }, +    { INDEX_op_bswap64_i64, { "r", "0" } }, +    { INDEX_op_neg_i64, { "r", "0" } }, +    { INDEX_op_not_i64, { "r", "0" } }, + +    { INDEX_op_ext8s_i64, { "r", "r" } }, +    { INDEX_op_ext16s_i64, { "r", "r" } }, +    { INDEX_op_ext32s_i64, { "r", "r" } }, +    { INDEX_op_ext8u_i64, { "r", "r" } }, +    { INDEX_op_ext16u_i64, { "r", "r" } }, +    { INDEX_op_ext32u_i64, { "r", "r" } }, + +    { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, +    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, + +    { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } }, +    { INDEX_op_muls2_i64, { "a", "d", "a", "r" } }, +    { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } }, +    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 +    { INDEX_op_qemu_ld_i32, { "r", "L" } }, +    { INDEX_op_qemu_st_i32, { "L", "L" } }, +    { INDEX_op_qemu_ld_i64, { "r", "L" } }, +    { INDEX_op_qemu_st_i64, { "L", "L" } }, +#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS +    { INDEX_op_qemu_ld_i32, { "r", "L" } }, +    { INDEX_op_qemu_st_i32, { "L", "L" } }, +    { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, +    { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, +#else +    { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, +    { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, +    { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, +    { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, +#endif +    { -1 }, +}; + +static int tcg_target_callee_save_regs[] = { +#if TCG_TARGET_REG_BITS == 64 +    TCG_REG_RBP, +    TCG_REG_RBX, +#if defined(_WIN64) +    TCG_REG_RDI, +    TCG_REG_RSI, +#endif +    TCG_REG_R12, +    TCG_REG_R13, +    TCG_REG_R14, /* Currently used for the global env. */ +    TCG_REG_R15, +#else +    TCG_REG_EBP, /* Currently used for the global env. */ +    TCG_REG_EBX, +    TCG_REG_ESI, +    TCG_REG_EDI, +#endif +}; + +/* Compute frame size via macros, to share between tcg_target_qemu_prologue +   and tcg_register_jit.  */ + +#define PUSH_SIZE \ +    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ +     * (TCG_TARGET_REG_BITS / 8)) + +#define FRAME_SIZE \ +    ((PUSH_SIZE \ +      + TCG_STATIC_CALL_ARGS_SIZE \ +      + CPU_TEMP_BUF_NLONGS * sizeof(long) \ +      + TCG_TARGET_STACK_ALIGN - 1) \ +     & ~(TCG_TARGET_STACK_ALIGN - 1)) + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ +    int i, stack_addend; + +    /* TB prologue */ + +    /* Reserve some stack space, also for TCG temps.  */ +    stack_addend = FRAME_SIZE - PUSH_SIZE; +    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, +                  CPU_TEMP_BUF_NLONGS * sizeof(long)); + +    /* Save all callee saved registers.  */ +    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { +        tcg_out_push(s, tcg_target_callee_save_regs[i]); +    } + +#if TCG_TARGET_REG_BITS == 32 +    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, +               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); +    tcg_out_addi(s, TCG_REG_ESP, -stack_addend); +    /* jmp *tb.  */ +    tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, +		         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 +			 + stack_addend); +#else +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); +    tcg_out_addi(s, TCG_REG_ESP, -stack_addend); +    /* jmp *tb.  */ +    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); +#endif + +    /* TB epilogue */ +    tb_ret_addr = s->code_ptr; + +    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); + +    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { +        tcg_out_pop(s, tcg_target_callee_save_regs[i]); +    } +    tcg_out_opc(s, OPC_RET, 0, 0, 0); + +#if !defined(CONFIG_SOFTMMU) +    /* Try to set up a segment register to point to GUEST_BASE.  */ +    if (GUEST_BASE) { +        setup_guest_base_seg(); +    } +#endif +} + +static void tcg_target_init(TCGContext *s) +{ +#ifdef CONFIG_CPUID_H +    unsigned a, b, c, d; +    int max = __get_cpuid_max(0, 0); + +    if (max >= 1) { +        __cpuid(1, a, b, c, d); +#ifndef have_cmov +        /* For 32-bit, 99% certainty that we're running on hardware that +           supports cmov, but we still need to check.  In case cmov is not +           available, we'll use a small forward branch.  */ +        have_cmov = (d & bit_CMOV) != 0; +#endif +#ifndef have_movbe +        /* MOVBE is only available on Intel Atom and Haswell CPUs, so we +           need to probe for it.  */ +        have_movbe = (c & bit_MOVBE) != 0; +#endif +    } + +    if (max >= 7) { +        /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs.  */ +        __cpuid_count(7, 0, a, b, c, d); +#ifdef bit_BMI +        have_bmi1 = (b & bit_BMI) != 0; +#endif +#ifndef have_bmi2 +        have_bmi2 = (b & bit_BMI2) != 0; +#endif +    } +#endif + +    if (TCG_TARGET_REG_BITS == 64) { +        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); +        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); +    } else { +        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); +    } + +    tcg_regset_clear(tcg_target_call_clobber_regs); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); +    if (TCG_TARGET_REG_BITS == 64) { +#if !defined(_WIN64) +        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); +        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); +#endif +        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); +        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); +        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); +        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); +    } + +    tcg_regset_clear(s->reserved_regs); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + +    tcg_add_target_add_op_defs(x86_op_defs); +} + +typedef struct { +    DebugFrameHeader h; +    uint8_t fde_def_cfa[4]; +    uint8_t fde_reg_ofs[14]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value.  */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#if !defined(__ELF__) +    /* Host machine without ELF. */ +#elif TCG_TARGET_REG_BITS == 64 +#define ELF_HOST_MACHINE EM_X86_64 +static const DebugFrame debug_frame = { +    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ +    .h.cie.id = -1, +    .h.cie.version = 1, +    .h.cie.code_align = 1, +    .h.cie.data_align = 0x78,             /* sleb128 -8 */ +    .h.cie.return_column = 16, + +    /* Total FDE size does not include the "len" member.  */ +    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + +    .fde_def_cfa = { +        12, 7,                          /* DW_CFA_def_cfa %rsp, ... */ +        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */ +        (FRAME_SIZE >> 7) +    }, +    .fde_reg_ofs = { +        0x90, 1,                        /* DW_CFA_offset, %rip, -8 */ +        /* The following ordering must match tcg_target_callee_save_regs.  */ +        0x86, 2,                        /* DW_CFA_offset, %rbp, -16 */ +        0x83, 3,                        /* DW_CFA_offset, %rbx, -24 */ +        0x8c, 4,                        /* DW_CFA_offset, %r12, -32 */ +        0x8d, 5,                        /* DW_CFA_offset, %r13, -40 */ +        0x8e, 6,                        /* DW_CFA_offset, %r14, -48 */ +        0x8f, 7,                        /* DW_CFA_offset, %r15, -56 */ +    } +}; +#else +#define ELF_HOST_MACHINE EM_386 +static const DebugFrame debug_frame = { +    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ +    .h.cie.id = -1, +    .h.cie.version = 1, +    .h.cie.code_align = 1, +    .h.cie.data_align = 0x7c,             /* sleb128 -4 */ +    .h.cie.return_column = 8, + +    /* Total FDE size does not include the "len" member.  */ +    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + +    .fde_def_cfa = { +        12, 4,                          /* DW_CFA_def_cfa %esp, ... */ +        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */ +        (FRAME_SIZE >> 7) +    }, +    .fde_reg_ofs = { +        0x88, 1,                        /* DW_CFA_offset, %eip, -4 */ +        /* The following ordering must match tcg_target_callee_save_regs.  */ +        0x85, 2,                        /* DW_CFA_offset, %ebp, -8 */ +        0x83, 3,                        /* DW_CFA_offset, %ebx, -12 */ +        0x86, 4,                        /* DW_CFA_offset, %esi, -16 */ +        0x87, 5,                        /* DW_CFA_offset, %edi, -20 */ +    } +}; +#endif + +#if defined(ELF_HOST_MACHINE) +void tcg_register_jit(void *buf, size_t buf_size) +{ +    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} +#endif diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h new file mode 100644 index 00000000..25b51335 --- /dev/null +++ b/tcg/i386/tcg-target.h @@ -0,0 +1,149 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef TCG_TARGET_I386  +#define TCG_TARGET_I386 1 + +#define TCG_TARGET_INSN_UNIT_SIZE  1 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31 + +#ifdef __x86_64__ +# define TCG_TARGET_REG_BITS  64 +# define TCG_TARGET_NB_REGS   16 +#else +# define TCG_TARGET_REG_BITS  32 +# define TCG_TARGET_NB_REGS    8 +#endif + +typedef enum { +    TCG_REG_EAX = 0, +    TCG_REG_ECX, +    TCG_REG_EDX, +    TCG_REG_EBX, +    TCG_REG_ESP, +    TCG_REG_EBP, +    TCG_REG_ESI, +    TCG_REG_EDI, + +    /* 64-bit registers; always define the symbols to avoid +       too much if-deffing.  */ +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10, +    TCG_REG_R11, +    TCG_REG_R12, +    TCG_REG_R13, +    TCG_REG_R14, +    TCG_REG_R15, +    TCG_REG_RAX = TCG_REG_EAX, +    TCG_REG_RCX = TCG_REG_ECX, +    TCG_REG_RDX = TCG_REG_EDX, +    TCG_REG_RBX = TCG_REG_EBX, +    TCG_REG_RSP = TCG_REG_ESP, +    TCG_REG_RBP = TCG_REG_EBP, +    TCG_REG_RSI = TCG_REG_ESI, +    TCG_REG_RDI = TCG_REG_EDI, +} TCGReg; + +/* used for function call generation */ +#define TCG_REG_CALL_STACK TCG_REG_ESP  +#define TCG_TARGET_STACK_ALIGN 16 +#if defined(_WIN64) +#define TCG_TARGET_CALL_STACK_OFFSET 32 +#else +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#endif + +extern bool have_bmi1; + +/* optional instructions */ +#define TCG_TARGET_HAS_div2_i32         1 +#define TCG_TARGET_HAS_rot_i32          1 +#define TCG_TARGET_HAS_ext8s_i32        1 +#define TCG_TARGET_HAS_ext16s_i32       1 +#define TCG_TARGET_HAS_ext8u_i32        1 +#define TCG_TARGET_HAS_ext16u_i32       1 +#define TCG_TARGET_HAS_bswap16_i32      1 +#define TCG_TARGET_HAS_bswap32_i32      1 +#define TCG_TARGET_HAS_neg_i32          1 +#define TCG_TARGET_HAS_not_i32          1 +#define TCG_TARGET_HAS_andc_i32         have_bmi1 +#define TCG_TARGET_HAS_orc_i32          0 +#define TCG_TARGET_HAS_eqv_i32          0 +#define TCG_TARGET_HAS_nand_i32         0 +#define TCG_TARGET_HAS_nor_i32          0 +#define TCG_TARGET_HAS_deposit_i32      1 +#define TCG_TARGET_HAS_movcond_i32      1 +#define TCG_TARGET_HAS_add2_i32         1 +#define TCG_TARGET_HAS_sub2_i32         1 +#define TCG_TARGET_HAS_mulu2_i32        1 +#define TCG_TARGET_HAS_muls2_i32        1 +#define TCG_TARGET_HAS_muluh_i32        0 +#define TCG_TARGET_HAS_mulsh_i32        0 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_trunc_shr_i32    0 +#define TCG_TARGET_HAS_div2_i64         1 +#define TCG_TARGET_HAS_rot_i64          1 +#define TCG_TARGET_HAS_ext8s_i64        1 +#define TCG_TARGET_HAS_ext16s_i64       1 +#define TCG_TARGET_HAS_ext32s_i64       1 +#define TCG_TARGET_HAS_ext8u_i64        1 +#define TCG_TARGET_HAS_ext16u_i64       1 +#define TCG_TARGET_HAS_ext32u_i64       1 +#define TCG_TARGET_HAS_bswap16_i64      1 +#define TCG_TARGET_HAS_bswap32_i64      1 +#define TCG_TARGET_HAS_bswap64_i64      1 +#define TCG_TARGET_HAS_neg_i64          1 +#define TCG_TARGET_HAS_not_i64          1 +#define TCG_TARGET_HAS_andc_i64         have_bmi1 +#define TCG_TARGET_HAS_orc_i64          0 +#define TCG_TARGET_HAS_eqv_i64          0 +#define TCG_TARGET_HAS_nand_i64         0 +#define TCG_TARGET_HAS_nor_i64          0 +#define TCG_TARGET_HAS_deposit_i64      1 +#define TCG_TARGET_HAS_movcond_i64      1 +#define TCG_TARGET_HAS_add2_i64         1 +#define TCG_TARGET_HAS_sub2_i64         1 +#define TCG_TARGET_HAS_mulu2_i64        1 +#define TCG_TARGET_HAS_muls2_i64        1 +#define TCG_TARGET_HAS_muluh_i64        0 +#define TCG_TARGET_HAS_mulsh_i64        0 +#endif + +#define TCG_TARGET_deposit_i32_valid(ofs, len) \ +    (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \ +     ((ofs) == 0 && (len) == 16)) +#define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid + +#if TCG_TARGET_REG_BITS == 64 +# define TCG_AREG0 TCG_REG_R14 +#else +# define TCG_AREG0 TCG_REG_EBP +#endif + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +} + +#endif diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c new file mode 100644 index 00000000..81cb9f79 --- /dev/null +++ b/tcg/ia64/tcg-target.c @@ -0,0 +1,2445 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009-2010 Aurelien Jarno <aurelien@aurel32.net> + * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * Register definitions + */ + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +     "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7", +     "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15", +    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", +    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", +    "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", +    "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", +    "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", +    "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", +}; +#endif + +#ifdef CONFIG_USE_GUEST_BASE +#define TCG_GUEST_BASE_REG TCG_REG_R55 +#else +#define TCG_GUEST_BASE_REG TCG_REG_R0 +#endif +#ifndef GUEST_BASE +#define GUEST_BASE 0 +#endif + +/* Branch registers */ +enum { +    TCG_REG_B0 = 0, +    TCG_REG_B1, +    TCG_REG_B2, +    TCG_REG_B3, +    TCG_REG_B4, +    TCG_REG_B5, +    TCG_REG_B6, +    TCG_REG_B7, +}; + +/* Floating point registers */ +enum { +    TCG_REG_F0 = 0, +    TCG_REG_F1, +    TCG_REG_F2, +    TCG_REG_F3, +    TCG_REG_F4, +    TCG_REG_F5, +    TCG_REG_F6, +    TCG_REG_F7, +    TCG_REG_F8, +    TCG_REG_F9, +    TCG_REG_F10, +    TCG_REG_F11, +    TCG_REG_F12, +    TCG_REG_F13, +    TCG_REG_F14, +    TCG_REG_F15, +}; + +/* Predicate registers */ +enum { +    TCG_REG_P0 = 0, +    TCG_REG_P1, +    TCG_REG_P2, +    TCG_REG_P3, +    TCG_REG_P4, +    TCG_REG_P5, +    TCG_REG_P6, +    TCG_REG_P7, +    TCG_REG_P8, +    TCG_REG_P9, +    TCG_REG_P10, +    TCG_REG_P11, +    TCG_REG_P12, +    TCG_REG_P13, +    TCG_REG_P14, +    TCG_REG_P15, +}; + +/* Application registers */ +enum { +    TCG_REG_PFS = 64, +}; + +static const int tcg_target_reg_alloc_order[] = { +    TCG_REG_R35, +    TCG_REG_R36, +    TCG_REG_R37, +    TCG_REG_R38, +    TCG_REG_R39, +    TCG_REG_R40, +    TCG_REG_R41, +    TCG_REG_R42, +    TCG_REG_R43, +    TCG_REG_R44, +    TCG_REG_R45, +    TCG_REG_R46, +    TCG_REG_R47, +    TCG_REG_R48, +    TCG_REG_R49, +    TCG_REG_R50, +    TCG_REG_R51, +    TCG_REG_R52, +    TCG_REG_R53, +    TCG_REG_R54, +    TCG_REG_R55, +    TCG_REG_R14, +    TCG_REG_R15, +    TCG_REG_R16, +    TCG_REG_R17, +    TCG_REG_R18, +    TCG_REG_R19, +    TCG_REG_R20, +    TCG_REG_R21, +    TCG_REG_R22, +    TCG_REG_R23, +    TCG_REG_R24, +    TCG_REG_R25, +    TCG_REG_R26, +    TCG_REG_R27, +    TCG_REG_R28, +    TCG_REG_R29, +    TCG_REG_R30, +    TCG_REG_R31, +    TCG_REG_R56, +    TCG_REG_R57, +    TCG_REG_R58, +    TCG_REG_R59, +    TCG_REG_R60, +    TCG_REG_R61, +    TCG_REG_R62, +    TCG_REG_R63, +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10, +    TCG_REG_R11 +}; + +static const int tcg_target_call_iarg_regs[8] = { +    TCG_REG_R56, +    TCG_REG_R57, +    TCG_REG_R58, +    TCG_REG_R59, +    TCG_REG_R60, +    TCG_REG_R61, +    TCG_REG_R62, +    TCG_REG_R63, +}; + +static const int tcg_target_call_oarg_regs[] = { +    TCG_REG_R8 +}; + +/* + * opcode formation + */ + +/* bundle templates: stops (double bar in the IA64 manual) are marked with +   an uppercase letter. */ +enum { +    mii = 0x00, +    miI = 0x01, +    mIi = 0x02, +    mII = 0x03, +    mlx = 0x04, +    mLX = 0x05, +    mmi = 0x08, +    mmI = 0x09, +    Mmi = 0x0a, +    MmI = 0x0b, +    mfi = 0x0c, +    mfI = 0x0d, +    mmf = 0x0e, +    mmF = 0x0f, +    mib = 0x10, +    miB = 0x11, +    mbb = 0x12, +    mbB = 0x13, +    bbb = 0x16, +    bbB = 0x17, +    mmb = 0x18, +    mmB = 0x19, +    mfb = 0x1c, +    mfB = 0x1d, +}; + +enum { +    OPC_ADD_A1                = 0x10000000000ull, +    OPC_AND_A1                = 0x10060000000ull, +    OPC_AND_A3                = 0x10160000000ull, +    OPC_ANDCM_A1              = 0x10068000000ull, +    OPC_ANDCM_A3              = 0x10168000000ull, +    OPC_ADDS_A4               = 0x10800000000ull, +    OPC_ADDL_A5               = 0x12000000000ull, +    OPC_ALLOC_M34             = 0x02c00000000ull, +    OPC_BR_DPTK_FEW_B1        = 0x08400000000ull, +    OPC_BR_SPTK_MANY_B1       = 0x08000001000ull, +    OPC_BR_CALL_SPNT_FEW_B3   = 0x0a200000000ull, +    OPC_BR_SPTK_MANY_B4       = 0x00100001000ull, +    OPC_BR_CALL_SPTK_MANY_B5  = 0x02100001000ull, +    OPC_BR_RET_SPTK_MANY_B4   = 0x00108001100ull, +    OPC_BRL_SPTK_MANY_X3      = 0x18000001000ull, +    OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull, +    OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, +    OPC_CMP_LT_A6             = 0x18000000000ull, +    OPC_CMP_LTU_A6            = 0x1a000000000ull, +    OPC_CMP_EQ_A6             = 0x1c000000000ull, +    OPC_CMP4_LT_A6            = 0x18400000000ull, +    OPC_CMP4_LTU_A6           = 0x1a400000000ull, +    OPC_CMP4_EQ_A6            = 0x1c400000000ull, +    OPC_DEP_I14               = 0x0ae00000000ull, +    OPC_DEP_I15               = 0x08000000000ull, +    OPC_DEP_Z_I12             = 0x0a600000000ull, +    OPC_EXTR_I11              = 0x0a400002000ull, +    OPC_EXTR_U_I11            = 0x0a400000000ull, +    OPC_FCVT_FX_TRUNC_S1_F10  = 0x004d0000000ull, +    OPC_FCVT_FXU_TRUNC_S1_F10 = 0x004d8000000ull, +    OPC_FCVT_XF_F11           = 0x000e0000000ull, +    OPC_FMA_S1_F1             = 0x10400000000ull, +    OPC_FNMA_S1_F1            = 0x18400000000ull, +    OPC_FRCPA_S1_F6           = 0x00600000000ull, +    OPC_GETF_SIG_M19          = 0x08708000000ull, +    OPC_LD1_M1                = 0x08000000000ull, +    OPC_LD1_M3                = 0x0a000000000ull, +    OPC_LD2_M1                = 0x08040000000ull, +    OPC_LD2_M3                = 0x0a040000000ull, +    OPC_LD4_M1                = 0x08080000000ull, +    OPC_LD4_M3                = 0x0a080000000ull, +    OPC_LD8_M1                = 0x080c0000000ull, +    OPC_LD8_M3                = 0x0a0c0000000ull, +    OPC_MUX1_I3               = 0x0eca0000000ull, +    OPC_NOP_B9                = 0x04008000000ull, +    OPC_NOP_F16               = 0x00008000000ull, +    OPC_NOP_I18               = 0x00008000000ull, +    OPC_NOP_M48               = 0x00008000000ull, +    OPC_MOV_I21               = 0x00e00100000ull, +    OPC_MOV_RET_I21           = 0x00e00500000ull, +    OPC_MOV_I22               = 0x00188000000ull, +    OPC_MOV_I_I26             = 0x00150000000ull, +    OPC_MOVL_X2               = 0x0c000000000ull, +    OPC_OR_A1                 = 0x10070000000ull, +    OPC_OR_A3                 = 0x10170000000ull, +    OPC_SETF_EXP_M18          = 0x0c748000000ull, +    OPC_SETF_SIG_M18          = 0x0c708000000ull, +    OPC_SHL_I7                = 0x0f240000000ull, +    OPC_SHR_I5                = 0x0f220000000ull, +    OPC_SHR_U_I5              = 0x0f200000000ull, +    OPC_SHRP_I10              = 0x0ac00000000ull, +    OPC_SXT1_I29              = 0x000a0000000ull, +    OPC_SXT2_I29              = 0x000a8000000ull, +    OPC_SXT4_I29              = 0x000b0000000ull, +    OPC_ST1_M4                = 0x08c00000000ull, +    OPC_ST2_M4                = 0x08c40000000ull, +    OPC_ST4_M4                = 0x08c80000000ull, +    OPC_ST8_M4                = 0x08cc0000000ull, +    OPC_SUB_A1                = 0x10028000000ull, +    OPC_SUB_A3                = 0x10128000000ull, +    OPC_UNPACK4_L_I2          = 0x0f860000000ull, +    OPC_XMA_L_F2              = 0x1d000000000ull, +    OPC_XOR_A1                = 0x10078000000ull, +    OPC_XOR_A3                = 0x10178000000ull, +    OPC_ZXT1_I29              = 0x00080000000ull, +    OPC_ZXT2_I29              = 0x00088000000ull, +    OPC_ZXT4_I29              = 0x00090000000ull, + +    INSN_NOP_M                = OPC_NOP_M48,  /* nop.m 0 */ +    INSN_NOP_I                = OPC_NOP_I18,  /* nop.i 0 */ +}; + +static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1, +                                  int r2, int r3) +{ +    return opc +           | ((r3 & 0x7f) << 20) +           | ((r2 & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a3(int qp, uint64_t opc, int r1, +                                  uint64_t imm, int r3) +{ +    return opc +           | ((imm & 0x80) << 29) /* s */ +           | ((imm & 0x7f) << 13) /* imm7b */ +           | ((r3 & 0x7f) << 20) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a4(int qp, uint64_t opc, int r1, +                                  uint64_t imm, int r3) +{ +    return opc +           | ((imm & 0x2000) << 23) /* s */ +           | ((imm & 0x1f80) << 20) /* imm6d */ +           | ((imm & 0x007f) << 13) /* imm7b */ +           | ((r3 & 0x7f) << 20) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a5(int qp, uint64_t opc, int r1, +                                  uint64_t imm, int r3) +{ +    return opc +           | ((imm & 0x200000) << 15) /* s */ +           | ((imm & 0x1f0000) <<  6) /* imm5c */ +           | ((imm & 0x00ff80) << 20) /* imm9d */ +           | ((imm & 0x00007f) << 13) /* imm7b */ +           | ((r3 & 0x03) << 20) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_a6(int qp, uint64_t opc, int p1, +                                  int p2, int r2, int r3) +{ +    return opc +           | ((p2 & 0x3f) << 27) +           | ((r3 & 0x7f) << 20) +           | ((r2 & 0x7f) << 13) +           | ((p1 & 0x3f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm) +{ +    return opc +           | ((imm & 0x100000) << 16) /* s */ +           | ((imm & 0x0fffff) << 13) /* imm20b */ +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm) +{ +    return opc +           | ((imm & 0x100000) << 16) /* s */ +           | ((imm & 0x0fffff) << 13) /* imm20b */ +           | ((b1 & 0x7) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2) +{ +    return opc +           | ((b2 & 0x7) << 13) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_b5(int qp, uint64_t opc, int b1, int b2) +{ +    return opc +           | ((b2 & 0x7) << 13) +           | ((b1 & 0x7) << 6) +           | (qp & 0x3f); +} + + +static inline uint64_t tcg_opc_b9(int qp, uint64_t opc, uint64_t imm) +{ +    return opc +           | ((imm & 0x100000) << 16) /* i */ +           | ((imm & 0x0fffff) << 6)  /* imm20a */ +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f1(int qp, uint64_t opc, int f1, +                                  int f3, int f4, int f2) +{ +    return opc +           | ((f4 & 0x7f) << 27) +           | ((f3 & 0x7f) << 20) +           | ((f2 & 0x7f) << 13) +           | ((f1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f2(int qp, uint64_t opc, int f1, +                                  int f3, int f4, int f2) +{ +    return opc +           | ((f4 & 0x7f) << 27) +           | ((f3 & 0x7f) << 20) +           | ((f2 & 0x7f) << 13) +           | ((f1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f6(int qp, uint64_t opc, int f1, +                                  int p2, int f2, int f3) +{ +    return opc +           | ((p2 & 0x3f) << 27) +           | ((f3 & 0x7f) << 20) +           | ((f2 & 0x7f) << 13) +           | ((f1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f10(int qp, uint64_t opc, int f1, int f2) +{ +    return opc +           | ((f2 & 0x7f) << 13) +           | ((f1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f11(int qp, uint64_t opc, int f1, int f2) +{ +    return opc +           | ((f2 & 0x7f) << 13) +           | ((f1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_f16(int qp, uint64_t opc, uint64_t imm) +{ +    return opc +           | ((imm & 0x100000) << 16) /* i */ +           | ((imm & 0x0fffff) << 6)  /* imm20a */ +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i2(int qp, uint64_t opc, int r1, +                                  int r2, int r3) +{ +    return opc +           | ((r3 & 0x7f) << 20) +           | ((r2 & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i3(int qp, uint64_t opc, int r1, +                                  int r2, int mbtype) +{ +    return opc +           | ((mbtype & 0x0f) << 20) +           | ((r2 & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i5(int qp, uint64_t opc, int r1, +                                  int r3, int r2) +{ +    return opc +           | ((r3 & 0x7f) << 20) +           | ((r2 & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i7(int qp, uint64_t opc, int r1, +                                  int r2, int r3) +{ +    return opc +           | ((r3 & 0x7f) << 20) +           | ((r2 & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i10(int qp, uint64_t opc, int r1, +                                   int r2, int r3, uint64_t count) +{ +    return opc +           | ((count & 0x3f) << 27) +           | ((r3 & 0x7f) << 20) +           | ((r2 & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i11(int qp, uint64_t opc, int r1, +                                   int r3, uint64_t pos, uint64_t len) +{ +    return opc +           | ((len & 0x3f) << 27) +           | ((r3 & 0x7f) << 20) +           | ((pos & 0x3f) << 14) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1, +                                   int r2, uint64_t pos, uint64_t len) +{ +    return opc +           | ((len & 0x3f) << 27) +           | ((pos & 0x3f) << 20) +           | ((r2 & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm, +                                   int r3, uint64_t pos, uint64_t len) +{ +    return opc +           | ((imm & 0x01) << 36) +           | ((len & 0x3f) << 27) +           | ((r3 & 0x7f) << 20) +           | ((pos & 0x3f) << 14) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2, +                                   int r3, uint64_t pos, uint64_t len) +{ +    return opc +           | ((pos & 0x3f) << 31) +           | ((len & 0x0f) << 27) +           | ((r3 & 0x7f) << 20) +           | ((r2 & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm) +{ +    return opc +           | ((imm & 0x100000) << 16) /* i */ +           | ((imm & 0x0fffff) << 6)  /* imm20a */ +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i21(int qp, uint64_t opc, int b1, +                                   int r2, uint64_t imm) +{ +    return opc +           | ((imm & 0x1ff) << 24) +           | ((r2 & 0x7f) << 13) +           | ((b1 & 0x7) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i22(int qp, uint64_t opc, int r1, int b2) +{ +    return opc +           | ((b2 & 0x7) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i26(int qp, uint64_t opc, int ar3, int r2) +{ +    return opc +           | ((ar3 & 0x7f) << 20) +           | ((r2 & 0x7f) << 13) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_i29(int qp, uint64_t opc, int r1, int r3) +{ +    return opc +           | ((r3 & 0x7f) << 20) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_l2(uint64_t imm) +{ +    return (imm & 0x7fffffffffc00000ull) >> 22; +} + +static inline uint64_t tcg_opc_l3(uint64_t imm) +{ +    return (imm & 0x07fffffffff00000ull) >> 18; +} + +#define tcg_opc_l4  tcg_opc_l3 + +static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3) +{ +    return opc +           | ((r3 & 0x7f) << 20) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m3(int qp, uint64_t opc, int r1, +                                  int r3, uint64_t imm) +{ +    return opc +           | ((imm & 0x100) << 28) /* s */ +           | ((imm & 0x080) << 20) /* i */ +           | ((imm & 0x07f) << 13) /* imm7b */ +           | ((r3 & 0x7f) << 20) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m4(int qp, uint64_t opc, int r2, int r3) +{ +    return opc +           | ((r3 & 0x7f) << 20) +           | ((r2 & 0x7f) << 13) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m18(int qp, uint64_t opc, int f1, int r2) +{ +    return opc +           | ((r2 & 0x7f) << 13) +           | ((f1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m19(int qp, uint64_t opc, int r1, int f2) +{ +    return opc +           | ((f2 & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m34(int qp, uint64_t opc, int r1, +                                   int sof, int sol, int sor) +{ +    return opc +           | ((sor & 0x0f) << 27) +           | ((sol & 0x7f) << 20) +           | ((sof & 0x7f) << 13) +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_m48(int qp, uint64_t opc, uint64_t imm) +{ +    return opc +           | ((imm & 0x100000) << 16) /* i */ +           | ((imm & 0x0fffff) << 6)  /* imm20a */ +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_x2(int qp, uint64_t opc, +                                  int r1, uint64_t imm) +{ +    return opc +           | ((imm & 0x8000000000000000ull) >> 27) /* i */ +           |  (imm & 0x0000000000200000ull)        /* ic */ +           | ((imm & 0x00000000001f0000ull) << 6)  /* imm5c */ +           | ((imm & 0x000000000000ff80ull) << 20) /* imm9d */ +           | ((imm & 0x000000000000007full) << 13) /* imm7b */ +           | ((r1 & 0x7f) << 6) +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm) +{ +    return opc +           | ((imm & 0x0800000000000000ull) >> 23) /* i */ +           | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ +           | (qp & 0x3f); +} + +static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm) +{ +    return opc +           | ((imm & 0x0800000000000000ull) >> 23) /* i */ +           | ((imm & 0x00000000000fffffull) << 13) /* imm20b */ +           | ((b1 & 0x7) << 6) +           | (qp & 0x3f); +} + + +/* + * Relocations - Note that we never encode branches elsewhere than slot 2. + */ + +static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target) +{ +    uint64_t imm = target - pc; + +    pc->hi = (pc->hi & 0xf700000fffffffffull) +             | ((imm & 0x100000) << 39)  /* s */ +             | ((imm & 0x0fffff) << 36); /* imm20b */ +} + +static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc) +{ +    int64_t high = pc->hi; + +    return ((high >> 39) & 0x100000) + /* s */ +           ((high >> 36) & 0x0fffff);  /* imm20b */ +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, +                        intptr_t value, intptr_t addend) +{ +    assert(addend == 0); +    assert(type == R_IA64_PCREL21B); +    reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value); +} + +/* + * Constraints + */ + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ +    const char *ct_str; + +    ct_str = *pct_str; +    switch(ct_str[0]) { +    case 'r': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); +        break; +    case 'I': +        ct->ct |= TCG_CT_CONST_S22; +        break; +    case 'S': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set(ct->u.regs, 0xffffffffffffffffull); +#if defined(CONFIG_SOFTMMU) +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58); +#endif +        break; +    case 'Z': +        /* We are cheating a bit here, using the fact that the register +           r0 is also the register number 0. Hence there is no need +           to check for const_args in each instruction. */ +        ct->ct |= TCG_CT_CONST_ZERO; +        break; +    default: +        return -1; +    } +    ct_str++; +    *pct_str = ct_str; +    return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, +                                         const TCGArgConstraint *arg_ct) +{ +    int ct; +    ct = arg_ct->ct; +    if (ct & TCG_CT_CONST) +        return 1; +    else if ((ct & TCG_CT_CONST_ZERO) && val == 0) +        return 1; +    else if ((ct & TCG_CT_CONST_S22) && val == ((int32_t)val << 10) >> 10) +        return 1; +    else +        return 0; +} + +/* + * Code generation + */ + +static tcg_insn_unit *tb_ret_addr; + +static inline void tcg_out_bundle(TCGContext *s, int template, +                                  uint64_t slot0, uint64_t slot1, +                                  uint64_t slot2) +{ +    template &= 0x1f;          /* 5 bits */ +    slot0 &= 0x1ffffffffffull; /* 41 bits */ +    slot1 &= 0x1ffffffffffull; /* 41 bits */ +    slot2 &= 0x1ffffffffffull; /* 41 bits */ + +    *s->code_ptr++ = (tcg_insn_unit){ +        (slot1 << 46) | (slot0 << 5) | template, +        (slot2 << 23) | (slot1 >> 18) +    }; +} + +static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src) +{ +    return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, +                               TCGReg ret, TCGReg arg) +{ +    tcg_out_bundle(s, mmI, +                   INSN_NOP_M, +                   INSN_NOP_M, +                   tcg_opc_mov_a(TCG_REG_P0, ret, arg)); +} + +static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src) +{ +    assert(src == sextract64(src, 0, 22)); +    return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0); +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, +                                TCGReg reg, tcg_target_long arg) +{ +    tcg_out_bundle(s, mLX, +                   INSN_NOP_M, +                   tcg_opc_l2 (arg), +                   tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg)); +} + +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ +    uint64_t imm; + +    /* We pay attention here to not modify the branch target by reading +       the existing value and using it again. This ensure that caches and +       memory are kept coherent during retranslation. */ +    if (l->has_value) { +        imm = l->u.value_ptr -  s->code_ptr; +    } else { +        imm = get_reloc_pcrel21b_slot2(s->code_ptr); +        tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); +    } + +    tcg_out_bundle(s, mmB, +                   INSN_NOP_M, +                   INSN_NOP_M, +                   tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm)); +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc) +{ +    uintptr_t func = desc->lo, gp = desc->hi, disp; + +    /* Look through the function descriptor.  */ +    tcg_out_bundle(s, mlx, +                   INSN_NOP_M, +                   tcg_opc_l2 (gp), +                   tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp)); +    disp = (tcg_insn_unit *)func - s->code_ptr; +    tcg_out_bundle(s, mLX, +                   INSN_NOP_M, +                   tcg_opc_l4 (disp), +                   tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4, +                               TCG_REG_B0, disp)); +} + +static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) +{ +    uint64_t imm, opc1; + +    /* At least arg == 0 is a common operation.  */ +    if (arg == sextract64(arg, 0, 22)) { +        opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); +        opc1 = INSN_NOP_M; +    } + +    imm = tb_ret_addr - s->code_ptr; + +    tcg_out_bundle(s, mLX, +                   opc1, +                   tcg_opc_l3 (imm), +                   tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); +} + +static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg) +{ +    if (s->tb_jmp_offset) { +        /* direct jump method */ +        tcg_abort(); +    } else { +        /* indirect jump method */ +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, +                     (tcg_target_long)(s->tb_next + arg)); +        tcg_out_bundle(s, MmI, +                       tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, +                                   TCG_REG_R2, TCG_REG_R2), +                       INSN_NOP_M, +                       tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, +                                   TCG_REG_R2, 0)); +        tcg_out_bundle(s, mmB, +                       INSN_NOP_M, +                       INSN_NOP_M, +                       tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, +                                   TCG_REG_B6)); +    } +    s->tb_next_offset[arg] = tcg_current_code_size(s); +} + +static inline void tcg_out_jmp(TCGContext *s, TCGArg addr) +{ +    tcg_out_bundle(s, mmI, +                   INSN_NOP_M, +                   INSN_NOP_M, +                   tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0)); +    tcg_out_bundle(s, mmB, +                   INSN_NOP_M, +                   INSN_NOP_M, +                   tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); +} + +static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, +                                  TCGArg arg1, tcg_target_long arg2) +{ +    if (arg2 == ((int16_t)arg2 >> 2) << 2) { +        tcg_out_bundle(s, MmI, +                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, +                                  TCG_REG_R2, arg2, arg1), +                       tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), +                       INSN_NOP_I); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); +        tcg_out_bundle(s, MmI, +                       tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, +                                   TCG_REG_R2, TCG_REG_R2, arg1), +                       tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), +                       INSN_NOP_I); +    } +} + +static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg, +                                  TCGArg arg1, tcg_target_long arg2) +{ +    if (arg2 == ((int16_t)arg2 >> 2) << 2) { +        tcg_out_bundle(s, MmI, +                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, +                                  TCG_REG_R2, arg2, arg1), +                       tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), +                       INSN_NOP_I); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2); +        tcg_out_bundle(s, MmI, +                       tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, +                                   TCG_REG_R2, TCG_REG_R2, arg1), +                       tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2), +                       INSN_NOP_I); +    } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    if (type == TCG_TYPE_I32) { +        tcg_out_ld_rel(s, OPC_LD4_M1, arg, arg1, arg2); +    } else { +        tcg_out_ld_rel(s, OPC_LD8_M1, arg, arg1, arg2); +    } +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    if (type == TCG_TYPE_I32) { +        tcg_out_st_rel(s, OPC_ST4_M4, arg, arg1, arg2); +    } else { +        tcg_out_st_rel(s, OPC_ST8_M4, arg, arg1, arg2); +    } +} + +static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3, +                               TCGReg ret, TCGArg arg1, int const_arg1, +                               TCGArg arg2, int const_arg2) +{ +    uint64_t opc1 = 0, opc2 = 0, opc3 = 0; + +    if (const_arg2 && arg2 != 0) { +        opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2); +        arg2 = TCG_REG_R3; +    } +    if (const_arg1 && arg1 != 0) { +        if (opc_a3 && arg1 == (int8_t)arg1) { +            opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2); +        } else { +            opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1); +            arg1 = TCG_REG_R2; +        } +    } +    if (opc3 == 0) { +        opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2); +    } + +    tcg_out_bundle(s, (opc1 || opc2 ? mII : miI), +                   opc1 ? opc1 : INSN_NOP_M, +                   opc2 ? opc2 : INSN_NOP_I, +                   opc3); +} + +static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1, +                               TCGArg arg2, int const_arg2) +{ +    if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) { +        tcg_out_bundle(s, mmI, +                       INSN_NOP_M, +                       INSN_NOP_M, +                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1)); +    } else { +        tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2); +    } +} + +static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1, +                               int const_arg1, TCGArg arg2, int const_arg2) +{ +    if (!const_arg1 && const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) { +        tcg_out_bundle(s, mmI, +                       INSN_NOP_M, +                       INSN_NOP_M, +                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1)); +    } else { +        tcg_out_alu(s, OPC_SUB_A1, OPC_SUB_A3, ret, +                    arg1, const_arg1, arg2, const_arg2); +    } +} + +static inline void tcg_out_eqv(TCGContext *s, TCGArg ret, +                               TCGArg arg1, int const_arg1, +                               TCGArg arg2, int const_arg2) +{ +    tcg_out_bundle(s, mII, +                   INSN_NOP_M, +                   tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2), +                   tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); +} + +static inline void tcg_out_nand(TCGContext *s, TCGArg ret, +                                TCGArg arg1, int const_arg1, +                                TCGArg arg2, int const_arg2) +{ +    tcg_out_bundle(s, mII, +                   INSN_NOP_M, +                   tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2), +                   tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); +} + +static inline void tcg_out_nor(TCGContext *s, TCGArg ret, +                               TCGArg arg1, int const_arg1, +                               TCGArg arg2, int const_arg2) +{ +    tcg_out_bundle(s, mII, +                   INSN_NOP_M, +                   tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2), +                   tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret)); +} + +static inline void tcg_out_orc(TCGContext *s, TCGArg ret, +                               TCGArg arg1, int const_arg1, +                               TCGArg arg2, int const_arg2) +{ +    tcg_out_bundle(s, mII, +                   INSN_NOP_M, +                   tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2), +                   tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2)); +} + +static inline void tcg_out_mul(TCGContext *s, TCGArg ret, +                               TCGArg arg1, TCGArg arg2) +{ +    tcg_out_bundle(s, mmI, +                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1), +                   tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2), +                   INSN_NOP_I); +    tcg_out_bundle(s, mmF, +                   INSN_NOP_M, +                   INSN_NOP_M, +                   tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6, +                               TCG_REG_F7, TCG_REG_F0)); +    tcg_out_bundle(s, miI, +                   tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6), +                   INSN_NOP_I, +                   INSN_NOP_I); +} + +static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1, +                                   TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, +                                   ret, arg1, arg2, 31 - arg2)); +    } else { +        tcg_out_bundle(s, mII, +                       tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, +                                   TCG_REG_R3, 0x1f, arg2), +                       tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1), +                       tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, +                                   TCG_REG_R2, TCG_REG_R3)); +    } +} + +static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1, +                                   TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11, +                                   ret, arg1, arg2, 63 - arg2)); +    } else { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2)); +    } +} + +static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, +                                   TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, +                                   arg1, 63 - arg2, 31 - arg2)); +    } else { +        tcg_out_bundle(s, mII, +                       INSN_NOP_M, +                       tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2, +                                   0x1f, arg2), +                       tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, +                                   arg1, TCG_REG_R2)); +    } +} + +static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, +                                   TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, +                                   arg1, 63 - arg2, 63 - arg2)); +    } else { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret, +                                   arg1, arg2)); +    } +} + +static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, +                                   TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, +                                   arg1, arg2, 31 - arg2)); +    } else { +        tcg_out_bundle(s, mII, +                       tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, +                                   0x1f, arg2), +                       tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1), +                       tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, +                                   TCG_REG_R2, TCG_REG_R3)); +    } +} + +static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, +                                   TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, +                                   arg1, arg2, 63 - arg2)); +    } else { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, +                                   arg1, arg2)); +    } +} + +static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1, +                                    TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, mII, +                       INSN_NOP_M, +                       tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, +                                   TCG_REG_R2, arg1, arg1), +                       tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, +                                   TCG_REG_R2, 32 - arg2, 31)); +    } else { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, +                                   TCG_REG_R2, arg1, arg1), +                       tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, +                                   0x1f, arg2)); +        tcg_out_bundle(s, mII, +                       INSN_NOP_M, +                       tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3, +                                   0x20, TCG_REG_R3), +                       tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, +                                   TCG_REG_R2, TCG_REG_R3)); +    } +} + +static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1, +                                    TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, +                                   arg1, 0x40 - arg2)); +    } else { +        tcg_out_bundle(s, mII, +                       tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, +                                   0x40, arg2), +                       tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R3, +                                   arg1, arg2), +                       tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2, +                                   arg1, TCG_REG_R2)); +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, +                                   TCG_REG_R2, TCG_REG_R3)); +    } +} + +static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1, +                                    TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, mII, +                       INSN_NOP_M, +                       tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, +                                   TCG_REG_R2, arg1, arg1), +                       tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret, +                                   TCG_REG_R2, arg2, 31)); +    } else { +        tcg_out_bundle(s, mII, +                       tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3, +                                   0x1f, arg2), +                       tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2, +                                   TCG_REG_R2, arg1, arg1), +                       tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret, +                                   TCG_REG_R2, TCG_REG_R3)); +    } +} + +static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1, +                                    TCGArg arg2, int const_arg2) +{ +    if (const_arg2) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1, +                                   arg1, arg2)); +    } else { +        tcg_out_bundle(s, mII, +                       tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R2, +                                   0x40, arg2), +                       tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R3, +                                   arg1, arg2), +                       tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2, +                                   arg1, TCG_REG_R2)); +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, +                                   TCG_REG_R2, TCG_REG_R3)); +    } +} + +static const uint64_t opc_ext_i29[8] = { +    OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0, +    OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 +}; + +static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s) +{ +    if ((opc & MO_SIZE) == MO_64) { +        return tcg_opc_mov_a(qp, d, s); +    } else { +        return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s); +    } +} + +static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29, +                               TCGArg ret, TCGArg arg) +{ +    tcg_out_bundle(s, miI, +                   INSN_NOP_M, +                   INSN_NOP_I, +                   tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg)); +} + +static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s) +{ +    return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb); +} + +static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg) +{ +    tcg_out_bundle(s, mII, +                   INSN_NOP_M, +                   tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15), +                   tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); +} + +static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg) +{ +    tcg_out_bundle(s, mII, +                   INSN_NOP_M, +                   tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31), +                   tcg_opc_bswap64_i(TCG_REG_P0, ret, ret)); +} + +static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg) +{ +    tcg_out_bundle(s, miI, +                   INSN_NOP_M, +                   INSN_NOP_I, +                   tcg_opc_bswap64_i(TCG_REG_P0, ret, arg)); +} + +static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1, +                                   TCGArg a2, int const_a2, int pos, int len) +{ +    uint64_t i1 = 0, i2 = 0; +    int cpos = 63 - pos, lm1 = len - 1; + +    if (const_a2) { +        /* Truncate the value of a constant a2 to the width of the field.  */ +        int mask = (1u << len) - 1; +        a2 &= mask; + +        if (a2 == 0 || a2 == mask) { +            /* 1-bit signed constant inserted into register.  */ +            i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1); +        } else { +            /* Otherwise, load any constant into a temporary.  Do this into +               the first I slot to help out with cross-unit delays.  */ +            i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2); +            a2 = TCG_REG_R2; +        } +    } +    if (i2 == 0) { +        i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1); +    } +    tcg_out_bundle(s, (i1 ? mII : miI), +                   INSN_NOP_M, +                   i1 ? i1 : INSN_NOP_I, +                   i2); +} + +static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1, +                                     TCGArg arg2, int cmp4) +{ +    uint64_t opc_eq_a6, opc_lt_a6, opc_ltu_a6; + +    if (cmp4) { +        opc_eq_a6 = OPC_CMP4_EQ_A6; +        opc_lt_a6 = OPC_CMP4_LT_A6; +        opc_ltu_a6 = OPC_CMP4_LTU_A6; +    } else { +        opc_eq_a6 = OPC_CMP_EQ_A6; +        opc_lt_a6 = OPC_CMP_LT_A6; +        opc_ltu_a6 = OPC_CMP_LTU_A6; +    } + +    switch (cond) { +    case TCG_COND_EQ: +        return tcg_opc_a6 (qp, opc_eq_a6,  TCG_REG_P6, TCG_REG_P7, arg1, arg2); +    case TCG_COND_NE: +        return tcg_opc_a6 (qp, opc_eq_a6,  TCG_REG_P7, TCG_REG_P6, arg1, arg2); +    case TCG_COND_LT: +        return tcg_opc_a6 (qp, opc_lt_a6,  TCG_REG_P6, TCG_REG_P7, arg1, arg2); +    case TCG_COND_LTU: +        return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg1, arg2); +    case TCG_COND_GE: +        return tcg_opc_a6 (qp, opc_lt_a6,  TCG_REG_P7, TCG_REG_P6, arg1, arg2); +    case TCG_COND_GEU: +        return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg1, arg2); +    case TCG_COND_LE: +        return tcg_opc_a6 (qp, opc_lt_a6,  TCG_REG_P7, TCG_REG_P6, arg2, arg1); +    case TCG_COND_LEU: +        return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P7, TCG_REG_P6, arg2, arg1); +    case TCG_COND_GT: +        return tcg_opc_a6 (qp, opc_lt_a6,  TCG_REG_P6, TCG_REG_P7, arg2, arg1); +    case TCG_COND_GTU: +        return tcg_opc_a6 (qp, opc_ltu_a6, TCG_REG_P6, TCG_REG_P7, arg2, arg1); +    default: +        tcg_abort(); +        break; +    } +} + +static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, +                                  TCGReg arg2, TCGLabel *l, int cmp4) +{ +    uint64_t imm; + +    /* We pay attention here to not modify the branch target by reading +       the existing value and using it again. This ensure that caches and +       memory are kept coherent during retranslation. */ +    if (l->has_value) { +        imm = l->u.value_ptr - s->code_ptr; +    } else { +        imm = get_reloc_pcrel21b_slot2(s->code_ptr); +        tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, l, 0); +    } + +    tcg_out_bundle(s, miB, +                   INSN_NOP_M, +                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), +                   tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm)); +} + +static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret, +                                   TCGArg arg1, TCGArg arg2, int cmp4) +{ +    tcg_out_bundle(s, MmI, +                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4), +                   tcg_opc_movi_a(TCG_REG_P6, ret, 1), +                   tcg_opc_movi_a(TCG_REG_P7, ret, 0)); +} + +static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, +                                   TCGArg c1, TCGArg c2, +                                   TCGArg v1, int const_v1, +                                   TCGArg v2, int const_v2, int cmp4) +{ +    uint64_t opc1, opc2; + +    if (const_v1) { +        opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1); +    } else if (ret == v1) { +        opc1 = INSN_NOP_M; +    } else { +        opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1); +    } +    if (const_v2) { +        opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2); +    } else if (ret == v2) { +        opc2 = INSN_NOP_I; +    } else { +        opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2); +    } + +    tcg_out_bundle(s, MmI, +                   tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4), +                   opc1, +                   opc2); +} + +#if defined(CONFIG_SOFTMMU) +/* We're expecting to use an signed 22-bit immediate add.  */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) +                  > 0x1fffff) + +/* Load and compare a TLB entry, and return the result in (p6, p7). +   R2 is loaded with the addend TLB entry. +   R57 is loaded with the address, zero extented on 32-bit targets. +   R1, R3 are clobbered, leaving R56 free for... +   BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store.  */ +static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, +                                    TCGMemOp s_bits, int off_rw, int off_add, +                                    uint64_t bswap1, uint64_t bswap2) +{ +     /* +        .mii +        mov	r2 = off_rw +        extr.u	r3 = addr_reg, ...		# extract tlb page +        zxt4	r57 = addr_reg                  # or mov for 64-bit guest +        ;; +        .mii +        addl	r2 = r2, areg0 +        shl	r3 = r3, cteb                   # via dep.z +        dep	r1 = 0, r57, ...                # zero page ofs, keep align +        ;; +        .mmi +        add	r2 = r2, r3 +        ;; +        ld4	r3 = [r2], off_add-off_rw	# or ld8 for 64-bit guest +        nop +        ;; +        .mmi +        nop +        cmp.eq	p6, p7 = r3, r58 +        nop +        ;; +      */ +    tcg_out_bundle(s, miI, +                   tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw), +                   tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3, +                               addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), +                   tcg_opc_ext_i(TCG_REG_P0, +                                 TARGET_LONG_BITS == 32 ? MO_UL : MO_Q, +                                 TCG_REG_R57, addr_reg)); +    tcg_out_bundle(s, miI, +                   tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, +                               TCG_REG_R2, TCG_AREG0), +                   tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, +                               TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS, +                               63 - CPU_TLB_ENTRY_BITS), +                   tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0, +                               TCG_REG_R57, 63 - s_bits, +                               TARGET_PAGE_BITS - s_bits - 1)); +    tcg_out_bundle(s, MmI, +                   tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, +                               TCG_REG_R2, TCG_REG_R2, TCG_REG_R3), +                   tcg_opc_m3 (TCG_REG_P0, +                               (TARGET_LONG_BITS == 32 +                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3, +                               TCG_REG_R2, off_add - off_rw), +                   bswap1); +    tcg_out_bundle(s, mmI, +                   tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), +                   tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, +                               TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), +                   bswap2); +} + +typedef struct TCGLabelQemuLdst { +    bool is_ld; +    TCGMemOp size; +    tcg_insn_unit *label_ptr;     /* label pointers to be updated */ +    struct TCGLabelQemuLdst *next; +} TCGLabelQemuLdst; + +typedef struct TCGBackendData { +    TCGLabelQemuLdst *labels; +} TCGBackendData; + +static inline void tcg_out_tb_init(TCGContext *s) +{ +    s->be->labels = NULL; +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, +                                tcg_insn_unit *label_ptr) +{ +    TCGBackendData *be = s->be; +    TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); + +    l->is_ld = is_ld; +    l->size = opc & MO_SIZE; +    l->label_ptr = label_ptr; +    l->next = be->labels; +    be->labels = l; +} + +static void tcg_out_tb_finalize(TCGContext *s) +{ +    static const void * const helpers[8] = { +        helper_ret_stb_mmu, +        helper_le_stw_mmu, +        helper_le_stl_mmu, +        helper_le_stq_mmu, +        helper_ret_ldub_mmu, +        helper_le_lduw_mmu, +        helper_le_ldul_mmu, +        helper_le_ldq_mmu, +    }; +    tcg_insn_unit *thunks[8] = { }; +    TCGLabelQemuLdst *l; + +    for (l = s->be->labels; l != NULL; l = l->next) { +        long x = l->is_ld * 4 + l->size; +        tcg_insn_unit *dest = thunks[x]; + +        /* The out-of-line thunks are all the same; load the return address +           from B0, load the GP, and branch to the code.  Note that we are +           always post-call, so the register window has rolled, so we're +           using incomming parameter register numbers, not outgoing.  */ +        if (dest == NULL) { +            uintptr_t *desc = (uintptr_t *)helpers[x]; +            uintptr_t func = desc[0], gp = desc[1], disp; + +            thunks[x] = dest = s->code_ptr; + +            tcg_out_bundle(s, mlx, +                           INSN_NOP_M, +                           tcg_opc_l2 (gp), +                           tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, +                                       TCG_REG_R1, gp)); +            tcg_out_bundle(s, mii, +                           INSN_NOP_M, +                           INSN_NOP_I, +                           tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, +                                       l->is_ld ? TCG_REG_R35 : TCG_REG_R36, +                                       TCG_REG_B0)); +            disp = (tcg_insn_unit *)func - s->code_ptr; +            tcg_out_bundle(s, mLX, +                           INSN_NOP_M, +                           tcg_opc_l3 (disp), +                           tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp)); +        } + +        reloc_pcrel21b_slot2(l->label_ptr, dest); +    } +} + +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) +{ +    static const uint64_t opc_ld_m1[4] = { +        OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 +    }; +    int addr_reg, data_reg, mem_index; +    TCGMemOpIdx oi; +    TCGMemOp opc, s_bits; +    uint64_t fin1, fin2; +    tcg_insn_unit *label_ptr; + +    data_reg = args[0]; +    addr_reg = args[1]; +    oi = args[2]; +    opc = get_memop(oi); +    mem_index = get_mmuidx(oi); +    s_bits = opc & MO_SIZE; + +    /* Read the TLB entry */ +    tcg_out_qemu_tlb(s, addr_reg, s_bits, +                     offsetof(CPUArchState, tlb_table[mem_index][0].addr_read), +                     offsetof(CPUArchState, tlb_table[mem_index][0].addend), +                     INSN_NOP_I, INSN_NOP_I); + +    /* P6 is the fast path, and P7 the slow path */ + +    fin2 = 0; +    if (opc & MO_BSWAP) { +        fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8); +        if (s_bits < MO_64) { +            int shift = 64 - (8 << s_bits); +            fin2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); +            fin2 = tcg_opc_i11(TCG_REG_P0, fin2, +                               data_reg, data_reg, shift, 63 - shift); +        } +    } else { +        fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8); +    } + +    tcg_out_bundle(s, mmI, +                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), +                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, +                               TCG_REG_R2, TCG_REG_R57), +                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, oi)); +    label_ptr = s->code_ptr; +    tcg_out_bundle(s, miB, +                   tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], +                               TCG_REG_R8, TCG_REG_R2), +                   INSN_NOP_I, +                   tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, +                               get_reloc_pcrel21b_slot2(label_ptr))); + +    add_qemu_ldst_label(s, 1, opc, label_ptr); + +    /* Note that we always use LE helper functions, so the bswap insns +       here for the fast path also apply to the slow path.  */ +    tcg_out_bundle(s, (fin2 ? mII : miI), +                   INSN_NOP_M, +                   fin1, +                   fin2 ? fin2 : INSN_NOP_I); +} + +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) +{ +    static const uint64_t opc_st_m4[4] = { +        OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 +    }; +    TCGReg addr_reg, data_reg; +    int mem_index; +    uint64_t pre1, pre2; +    TCGMemOpIdx oi; +    TCGMemOp opc, s_bits; +    tcg_insn_unit *label_ptr; + +    data_reg = args[0]; +    addr_reg = args[1]; +    oi = args[2]; +    opc = get_memop(oi); +    mem_index = get_mmuidx(oi); +    s_bits = opc & MO_SIZE; + +    /* Note that we always use LE helper functions, so the bswap insns +       that are here for the fast path also apply to the slow path, +       and move the data into the argument register.  */ +    pre2 = INSN_NOP_I; +    if (opc & MO_BSWAP) { +        pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg); +        if (s_bits < MO_64) { +            int shift = 64 - (8 << s_bits); +            pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, +                               TCG_REG_R58, TCG_REG_R58, shift, 63 - shift); +        } +    } else { +        /* Just move the data into place for the slow path.  */ +        pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg); +    } + +    tcg_out_qemu_tlb(s, addr_reg, s_bits, +                     offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), +                     offsetof(CPUArchState, tlb_table[mem_index][0].addend), +                     pre1, pre2); + +    /* P6 is the fast path, and P7 the slow path */ +    tcg_out_bundle(s, mmI, +                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), +                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, +                               TCG_REG_R2, TCG_REG_R57), +                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, oi)); +    label_ptr = s->code_ptr; +    tcg_out_bundle(s, miB, +                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], +                               TCG_REG_R58, TCG_REG_R2), +                   INSN_NOP_I, +                   tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, +                               get_reloc_pcrel21b_slot2(label_ptr))); + +    add_qemu_ldst_label(s, 0, opc, label_ptr); +} + +#else /* !CONFIG_SOFTMMU */ +# include "tcg-be-null.h" + +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) +{ +    static uint64_t const opc_ld_m1[4] = { +        OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 +    }; +    int addr_reg, data_reg; +    TCGMemOp opc, s_bits, bswap; + +    data_reg = args[0]; +    addr_reg = args[1]; +    opc = args[2]; +    s_bits = opc & MO_SIZE; +    bswap = opc & MO_BSWAP; + +#if TARGET_LONG_BITS == 32 +    if (GUEST_BASE != 0) { +        tcg_out_bundle(s, mII, +                       INSN_NOP_M, +                       tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, +                                   TCG_REG_R3, addr_reg), +                       tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, +                                   TCG_GUEST_BASE_REG, TCG_REG_R3)); +    } else { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, +                                   TCG_REG_R2, addr_reg), +                       INSN_NOP_I); +    } + +    if (!bswap) { +        if (!(opc & MO_SIGN)) { +            tcg_out_bundle(s, miI, +                           tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], +                                       data_reg, TCG_REG_R2), +                           INSN_NOP_I, +                           INSN_NOP_I); +        } else { +            tcg_out_bundle(s, mII, +                           tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], +                                       data_reg, TCG_REG_R2), +                           INSN_NOP_I, +                           tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); +        } +    } else if (s_bits == MO_64) { +            tcg_out_bundle(s, mII, +                           tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], +                                       data_reg, TCG_REG_R2), +                           INSN_NOP_I, +                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); +    } else { +        if (s_bits == MO_16) { +            tcg_out_bundle(s, mII, +                           tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], +                                       data_reg, TCG_REG_R2), +                           INSN_NOP_I, +                           tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, +                                      data_reg, data_reg, 15, 15)); +        } else { +            tcg_out_bundle(s, mII, +                           tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], +                                       data_reg, TCG_REG_R2), +                           INSN_NOP_I, +                           tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, +                                      data_reg, data_reg, 31, 31)); +        } +        if (!(opc & MO_SIGN)) { +            tcg_out_bundle(s, miI, +                           INSN_NOP_M, +                           INSN_NOP_I, +                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); +        } else { +            tcg_out_bundle(s, mII, +                           INSN_NOP_M, +                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg), +                           tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); +        } +    } +#else +    if (GUEST_BASE != 0) { +        tcg_out_bundle(s, MmI, +                       tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, +                                   TCG_GUEST_BASE_REG, addr_reg), +                       tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], +                                   data_reg, TCG_REG_R2), +                       INSN_NOP_I); +    } else { +        tcg_out_bundle(s, mmI, +                       INSN_NOP_M, +                       tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits], +                                   data_reg, addr_reg), +                       INSN_NOP_I); +    } + +    if (bswap && s_bits == MO_16) { +        tcg_out_bundle(s, mII, +                       INSN_NOP_M, +                       tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, +                                   data_reg, data_reg, 15, 15), +                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); +    } else if (bswap && s_bits == MO_32) { +        tcg_out_bundle(s, mII, +                       INSN_NOP_M, +                       tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, +                                   data_reg, data_reg, 31, 31), +                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); +    } else if (bswap && s_bits == MO_64) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg)); +    } +    if (opc & MO_SIGN) { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       INSN_NOP_I, +                       tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg)); +    } +#endif +} + +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) +{ +    static uint64_t const opc_st_m4[4] = { +        OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 +    }; +    int addr_reg, data_reg; +#if TARGET_LONG_BITS == 64 +    uint64_t add_guest_base; +#endif +    TCGMemOp opc, s_bits, bswap; + +    data_reg = args[0]; +    addr_reg = args[1]; +    opc = args[2]; +    s_bits = opc & MO_SIZE; +    bswap = opc & MO_BSWAP; + +#if TARGET_LONG_BITS == 32 +    if (GUEST_BASE != 0) { +        tcg_out_bundle(s, mII, +                       INSN_NOP_M, +                       tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, +                                   TCG_REG_R3, addr_reg), +                       tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, +                                   TCG_GUEST_BASE_REG, TCG_REG_R3)); +    } else { +        tcg_out_bundle(s, miI, +                       INSN_NOP_M, +                       tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, +                                   TCG_REG_R2, addr_reg), +                       INSN_NOP_I); +    } + +    if (bswap) { +        if (s_bits == MO_16) { +            tcg_out_bundle(s, mII, +                           INSN_NOP_M, +                           tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, +                                       TCG_REG_R3, data_reg, 15, 15), +                           tcg_opc_bswap64_i(TCG_REG_P0, +                                             TCG_REG_R3, TCG_REG_R3)); +            data_reg = TCG_REG_R3; +        } else if (s_bits == MO_32) { +            tcg_out_bundle(s, mII, +                           INSN_NOP_M, +                           tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, +                                       TCG_REG_R3, data_reg, 31, 31), +                           tcg_opc_bswap64_i(TCG_REG_P0, +                                             TCG_REG_R3, TCG_REG_R3)); +            data_reg = TCG_REG_R3; +        } else if (s_bits == MO_64) { +            tcg_out_bundle(s, miI, +                           INSN_NOP_M, +                           INSN_NOP_I, +                           tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); +            data_reg = TCG_REG_R3; +        } +    } +    tcg_out_bundle(s, mmI, +                   tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], +                               data_reg, TCG_REG_R2), +                   INSN_NOP_M, +                   INSN_NOP_I); +#else +    if (GUEST_BASE != 0) { +        add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, +                                     TCG_GUEST_BASE_REG, addr_reg); +        addr_reg = TCG_REG_R2; +    } else { +        add_guest_base = INSN_NOP_M; +    } + +    if (!bswap) { +        tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI), +                       add_guest_base, +                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], +                                   data_reg, addr_reg), +                       INSN_NOP_I); +    } else { +        if (s_bits == MO_16) { +            tcg_out_bundle(s, mII, +                           add_guest_base, +                           tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, +                                       TCG_REG_R3, data_reg, 15, 15), +                           tcg_opc_bswap64_i(TCG_REG_P0, +                                             TCG_REG_R3, TCG_REG_R3)); +            data_reg = TCG_REG_R3; +        } else if (s_bits == MO_32) { +            tcg_out_bundle(s, mII, +                           add_guest_base, +                           tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, +                                       TCG_REG_R3, data_reg, 31, 31), +                           tcg_opc_bswap64_i(TCG_REG_P0, +                                             TCG_REG_R3, TCG_REG_R3)); +            data_reg = TCG_REG_R3; +        } else if (s_bits == MO_64) { +            tcg_out_bundle(s, miI, +                           add_guest_base, +                           INSN_NOP_I, +                           tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg)); +            data_reg = TCG_REG_R3; +        } +        tcg_out_bundle(s, miI, +                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], +                                   data_reg, addr_reg), +                       INSN_NOP_I, +                       INSN_NOP_I); +    } +#endif +} + +#endif + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, +                              const TCGArg *args, const int *const_args) +{ +    switch(opc) { +    case INDEX_op_exit_tb: +        tcg_out_exit_tb(s, args[0]); +        break; +    case INDEX_op_br: +        tcg_out_br(s, arg_label(args[0])); +        break; +    case INDEX_op_goto_tb: +        tcg_out_goto_tb(s, args[0]); +        break; + +    case INDEX_op_ld8u_i32: +    case INDEX_op_ld8u_i64: +        tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld8s_i32: +    case INDEX_op_ld8s_i64: +        tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]); +        tcg_out_ext(s, OPC_SXT1_I29, args[0], args[0]); +        break; +    case INDEX_op_ld16u_i32: +    case INDEX_op_ld16u_i64: +        tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld16s_i32: +    case INDEX_op_ld16s_i64: +        tcg_out_ld_rel(s, OPC_LD2_M1, args[0], args[1], args[2]); +        tcg_out_ext(s, OPC_SXT2_I29, args[0], args[0]); +        break; +    case INDEX_op_ld_i32: +    case INDEX_op_ld32u_i64: +        tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld32s_i64: +        tcg_out_ld_rel(s, OPC_LD4_M1, args[0], args[1], args[2]); +        tcg_out_ext(s, OPC_SXT4_I29, args[0], args[0]); +        break; +    case INDEX_op_ld_i64: +        tcg_out_ld_rel(s, OPC_LD8_M1, args[0], args[1], args[2]); +        break; +    case INDEX_op_st8_i32: +    case INDEX_op_st8_i64: +        tcg_out_st_rel(s, OPC_ST1_M4, args[0], args[1], args[2]); +        break; +    case INDEX_op_st16_i32: +    case INDEX_op_st16_i64: +        tcg_out_st_rel(s, OPC_ST2_M4, args[0], args[1], args[2]); +        break; +    case INDEX_op_st_i32: +    case INDEX_op_st32_i64: +        tcg_out_st_rel(s, OPC_ST4_M4, args[0], args[1], args[2]); +        break; +    case INDEX_op_st_i64: +        tcg_out_st_rel(s, OPC_ST8_M4, args[0], args[1], args[2]); +        break; + +    case INDEX_op_add_i32: +    case INDEX_op_add_i64: +        tcg_out_add(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_sub_i32: +    case INDEX_op_sub_i64: +        tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]); +        break; + +    case INDEX_op_and_i32: +    case INDEX_op_and_i64: +        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */ +        tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0], +                    args[2], const_args[2], args[1], const_args[1]); +        break; +    case INDEX_op_andc_i32: +    case INDEX_op_andc_i64: +        tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0], +                    args[1], const_args[1], args[2], const_args[2]); +        break; +    case INDEX_op_eqv_i32: +    case INDEX_op_eqv_i64: +        tcg_out_eqv(s, args[0], args[1], const_args[1], +                    args[2], const_args[2]); +        break; +    case INDEX_op_nand_i32: +    case INDEX_op_nand_i64: +        tcg_out_nand(s, args[0], args[1], const_args[1], +                     args[2], const_args[2]); +        break; +    case INDEX_op_nor_i32: +    case INDEX_op_nor_i64: +        tcg_out_nor(s, args[0], args[1], const_args[1], +                    args[2], const_args[2]); +        break; +    case INDEX_op_or_i32: +    case INDEX_op_or_i64: +        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */ +        tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0], +                    args[2], const_args[2], args[1], const_args[1]); +        break; +    case INDEX_op_orc_i32: +    case INDEX_op_orc_i64: +        tcg_out_orc(s, args[0], args[1], const_args[1], +                    args[2], const_args[2]); +        break; +    case INDEX_op_xor_i32: +    case INDEX_op_xor_i64: +        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */ +        tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0], +                    args[2], const_args[2], args[1], const_args[1]); +        break; + +    case INDEX_op_mul_i32: +    case INDEX_op_mul_i64: +        tcg_out_mul(s, args[0], args[1], args[2]); +        break; + +    case INDEX_op_sar_i32: +        tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_sar_i64: +        tcg_out_sar_i64(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_shl_i32: +        tcg_out_shl_i32(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_shl_i64: +        tcg_out_shl_i64(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_shr_i32: +        tcg_out_shr_i32(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_shr_i64: +        tcg_out_shr_i64(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_rotl_i32: +        tcg_out_rotl_i32(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_rotl_i64: +        tcg_out_rotl_i64(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_rotr_i32: +        tcg_out_rotr_i32(s, args[0], args[1], args[2], const_args[2]); +        break; +    case INDEX_op_rotr_i64: +        tcg_out_rotr_i64(s, args[0], args[1], args[2], const_args[2]); +        break; + +    case INDEX_op_ext8s_i32: +    case INDEX_op_ext8s_i64: +        tcg_out_ext(s, OPC_SXT1_I29, args[0], args[1]); +        break; +    case INDEX_op_ext8u_i32: +    case INDEX_op_ext8u_i64: +        tcg_out_ext(s, OPC_ZXT1_I29, args[0], args[1]); +        break; +    case INDEX_op_ext16s_i32: +    case INDEX_op_ext16s_i64: +        tcg_out_ext(s, OPC_SXT2_I29, args[0], args[1]); +        break; +    case INDEX_op_ext16u_i32: +    case INDEX_op_ext16u_i64: +        tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]); +        break; +    case INDEX_op_ext32s_i64: +        tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]); +        break; +    case INDEX_op_ext32u_i64: +        tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]); +        break; + +    case INDEX_op_bswap16_i32: +    case INDEX_op_bswap16_i64: +        tcg_out_bswap16(s, args[0], args[1]); +        break; +    case INDEX_op_bswap32_i32: +    case INDEX_op_bswap32_i64: +        tcg_out_bswap32(s, args[0], args[1]); +        break; +    case INDEX_op_bswap64_i64: +        tcg_out_bswap64(s, args[0], args[1]); +        break; + +    case INDEX_op_deposit_i32: +    case INDEX_op_deposit_i64: +        tcg_out_deposit(s, args[0], args[1], args[2], const_args[2], +                        args[3], args[4]); +        break; + +    case INDEX_op_brcond_i32: +        tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 1); +        break; +    case INDEX_op_brcond_i64: +        tcg_out_brcond(s, args[2], args[0], args[1], arg_label(args[3]), 0); +        break; +    case INDEX_op_setcond_i32: +        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1); +        break; +    case INDEX_op_setcond_i64: +        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0); +        break; +    case INDEX_op_movcond_i32: +        tcg_out_movcond(s, args[5], args[0], args[1], args[2], +                        args[3], const_args[3], args[4], const_args[4], 1); +        break; +    case INDEX_op_movcond_i64: +        tcg_out_movcond(s, args[5], args[0], args[1], args[2], +                        args[3], const_args[3], args[4], const_args[4], 0); +        break; + +    case INDEX_op_qemu_ld_i32: +        tcg_out_qemu_ld(s, args); +        break; +    case INDEX_op_qemu_ld_i64: +        tcg_out_qemu_ld(s, args); +        break; +    case INDEX_op_qemu_st_i32: +        tcg_out_qemu_st(s, args); +        break; +    case INDEX_op_qemu_st_i64: +        tcg_out_qemu_st(s, args); +        break; + +    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */ +    case INDEX_op_mov_i64: +    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */ +    case INDEX_op_movi_i64: +    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */ +    default: +        tcg_abort(); +    } +} + +static const TCGTargetOpDef ia64_op_defs[] = { +    { INDEX_op_br, { } }, +    { INDEX_op_exit_tb, { } }, +    { INDEX_op_goto_tb, { } }, + +    { INDEX_op_ld8u_i32, { "r", "r" } }, +    { INDEX_op_ld8s_i32, { "r", "r" } }, +    { INDEX_op_ld16u_i32, { "r", "r" } }, +    { INDEX_op_ld16s_i32, { "r", "r" } }, +    { INDEX_op_ld_i32, { "r", "r" } }, +    { INDEX_op_st8_i32, { "rZ", "r" } }, +    { INDEX_op_st16_i32, { "rZ", "r" } }, +    { INDEX_op_st_i32, { "rZ", "r" } }, + +    { INDEX_op_add_i32, { "r", "rZ", "rI" } }, +    { INDEX_op_sub_i32, { "r", "rI", "rI" } }, + +    { INDEX_op_and_i32, { "r", "rI", "rI" } }, +    { INDEX_op_andc_i32, { "r", "rI", "rI" } }, +    { INDEX_op_eqv_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_nand_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_or_i32, { "r", "rI", "rI" } }, +    { INDEX_op_orc_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_xor_i32, { "r", "rI", "rI" } }, + +    { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, + +    { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, +    { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, +    { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, +    { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, +    { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, + +    { INDEX_op_ext8s_i32, { "r", "rZ"} }, +    { INDEX_op_ext8u_i32, { "r", "rZ"} }, +    { INDEX_op_ext16s_i32, { "r", "rZ"} }, +    { INDEX_op_ext16u_i32, { "r", "rZ"} }, + +    { INDEX_op_bswap16_i32, { "r", "rZ" } }, +    { INDEX_op_bswap32_i32, { "r", "rZ" } }, + +    { INDEX_op_brcond_i32, { "rZ", "rZ" } }, +    { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } }, + +    { INDEX_op_ld8u_i64, { "r", "r" } }, +    { INDEX_op_ld8s_i64, { "r", "r" } }, +    { INDEX_op_ld16u_i64, { "r", "r" } }, +    { INDEX_op_ld16s_i64, { "r", "r" } }, +    { INDEX_op_ld32u_i64, { "r", "r" } }, +    { INDEX_op_ld32s_i64, { "r", "r" } }, +    { INDEX_op_ld_i64, { "r", "r" } }, +    { INDEX_op_st8_i64, { "rZ", "r" } }, +    { INDEX_op_st16_i64, { "rZ", "r" } }, +    { INDEX_op_st32_i64, { "rZ", "r" } }, +    { INDEX_op_st_i64, { "rZ", "r" } }, + +    { INDEX_op_add_i64, { "r", "rZ", "rI" } }, +    { INDEX_op_sub_i64, { "r", "rI", "rI" } }, + +    { INDEX_op_and_i64, { "r", "rI", "rI" } }, +    { INDEX_op_andc_i64, { "r", "rI", "rI" } }, +    { INDEX_op_eqv_i64, { "r", "rZ", "rZ" } }, +    { INDEX_op_nand_i64, { "r", "rZ", "rZ" } }, +    { INDEX_op_nor_i64, { "r", "rZ", "rZ" } }, +    { INDEX_op_or_i64, { "r", "rI", "rI" } }, +    { INDEX_op_orc_i64, { "r", "rZ", "rZ" } }, +    { INDEX_op_xor_i64, { "r", "rI", "rI" } }, + +    { INDEX_op_mul_i64, { "r", "rZ", "rZ" } }, + +    { INDEX_op_sar_i64, { "r", "rZ", "ri" } }, +    { INDEX_op_shl_i64, { "r", "rZ", "ri" } }, +    { INDEX_op_shr_i64, { "r", "rZ", "ri" } }, +    { INDEX_op_rotl_i64, { "r", "rZ", "ri" } }, +    { INDEX_op_rotr_i64, { "r", "rZ", "ri" } }, + +    { INDEX_op_ext8s_i64, { "r", "rZ"} }, +    { INDEX_op_ext8u_i64, { "r", "rZ"} }, +    { INDEX_op_ext16s_i64, { "r", "rZ"} }, +    { INDEX_op_ext16u_i64, { "r", "rZ"} }, +    { INDEX_op_ext32s_i64, { "r", "rZ"} }, +    { INDEX_op_ext32u_i64, { "r", "rZ"} }, + +    { INDEX_op_bswap16_i64, { "r", "rZ" } }, +    { INDEX_op_bswap32_i64, { "r", "rZ" } }, +    { INDEX_op_bswap64_i64, { "r", "rZ" } }, + +    { INDEX_op_brcond_i64, { "rZ", "rZ" } }, +    { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } }, +    { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } }, + +    { INDEX_op_deposit_i32, { "r", "rZ", "ri" } }, +    { INDEX_op_deposit_i64, { "r", "rZ", "ri" } }, + +    { INDEX_op_qemu_ld_i32, { "r", "r" } }, +    { INDEX_op_qemu_ld_i64, { "r", "r" } }, +    { INDEX_op_qemu_st_i32, { "SZ", "r" } }, +    { INDEX_op_qemu_st_i64, { "SZ", "r" } }, + +    { -1 }, +}; + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ +    int frame_size; + +    /* reserve some stack space */ +    frame_size = TCG_STATIC_CALL_ARGS_SIZE + +                 CPU_TEMP_BUF_NLONGS * sizeof(long); +    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & +                 ~(TCG_TARGET_STACK_ALIGN - 1); +    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, +                  CPU_TEMP_BUF_NLONGS * sizeof(long)); + +    /* First emit adhoc function descriptor */ +    *s->code_ptr = (tcg_insn_unit){ +        (uint64_t)(s->code_ptr + 1), /* entry point */ +        0                            /* skip gp */ +    }; +    s->code_ptr++; + +    /* prologue */ +    tcg_out_bundle(s, miI, +                   tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34, +                               TCG_REG_R34, 32, 24, 0), +                   INSN_NOP_I, +                   tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, +                               TCG_REG_B6, TCG_REG_R33, 0)); + +    /* ??? If GUEST_BASE < 0x200000, we could load the register via +       an ADDL in the M slot of the next bundle.  */ +    if (GUEST_BASE != 0) { +        tcg_out_bundle(s, mlx, +                       INSN_NOP_M, +                       tcg_opc_l2 (GUEST_BASE), +                       tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, +                                   TCG_GUEST_BASE_REG, GUEST_BASE)); +        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); +    } + +    tcg_out_bundle(s, miB, +                   tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, +                               TCG_REG_R12, -frame_size, TCG_REG_R12), +                   tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, +                               TCG_REG_R33, TCG_REG_B0), +                   tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6)); + +    /* epilogue */ +    tb_ret_addr = s->code_ptr; +    tcg_out_bundle(s, miI, +                   INSN_NOP_M, +                   tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, +                               TCG_REG_B0, TCG_REG_R33, 0), +                   tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, +                               TCG_REG_R12, frame_size, TCG_REG_R12)); +    tcg_out_bundle(s, miB, +                   INSN_NOP_M, +                   tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26, +                               TCG_REG_PFS, TCG_REG_R34), +                   tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4, +                               TCG_REG_B0)); +} + +static void tcg_target_init(TCGContext *s) +{ +    tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], +                   0xffffffffffffffffull); +    tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I64], +                   0xffffffffffffffffull); + +    tcg_regset_clear(tcg_target_call_clobber_regs); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R15); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R16); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R17); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R18); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R19); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R27); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R28); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R29); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R30); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R31); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R56); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R57); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R58); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R59); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R60); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R61); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R62); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R63); + +    tcg_regset_clear(s->reserved_regs); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);   /* zero register */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1);   /* global pointer */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2);   /* internal use */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3);   /* internal use */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12);  /* stack pointer */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13);  /* thread pointer */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33);  /* return address */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34);  /* PFS */ + +    /* The following 4 are not in use, are call-saved, but *not* saved +       by the prologue.  Therefore we cannot use them without modifying +       the prologue.  There doesn't seem to be any good reason to use +       these as opposed to the windowed registers.  */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7); + +    tcg_add_target_add_op_defs(ia64_op_defs); +} diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h new file mode 100644 index 00000000..a04ed812 --- /dev/null +++ b/tcg/ia64/tcg-target.h @@ -0,0 +1,185 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009-2010 Aurelien Jarno <aurelien@aurel32.net> + * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef TCG_TARGET_IA64  +#define TCG_TARGET_IA64 1 + +#define TCG_TARGET_INSN_UNIT_SIZE 16 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 21 + +typedef struct { +    uint64_t lo __attribute__((aligned(16))); +    uint64_t hi; +} tcg_insn_unit; + +/* We only map the first 64 registers */ +#define TCG_TARGET_NB_REGS 64 +typedef enum { +    TCG_REG_R0 = 0, +    TCG_REG_R1, +    TCG_REG_R2, +    TCG_REG_R3, +    TCG_REG_R4, +    TCG_REG_R5, +    TCG_REG_R6, +    TCG_REG_R7, +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10, +    TCG_REG_R11, +    TCG_REG_R12, +    TCG_REG_R13, +    TCG_REG_R14, +    TCG_REG_R15, +    TCG_REG_R16, +    TCG_REG_R17, +    TCG_REG_R18, +    TCG_REG_R19, +    TCG_REG_R20, +    TCG_REG_R21, +    TCG_REG_R22, +    TCG_REG_R23, +    TCG_REG_R24, +    TCG_REG_R25, +    TCG_REG_R26, +    TCG_REG_R27, +    TCG_REG_R28, +    TCG_REG_R29, +    TCG_REG_R30, +    TCG_REG_R31, +    TCG_REG_R32, +    TCG_REG_R33, +    TCG_REG_R34, +    TCG_REG_R35, +    TCG_REG_R36, +    TCG_REG_R37, +    TCG_REG_R38, +    TCG_REG_R39, +    TCG_REG_R40, +    TCG_REG_R41, +    TCG_REG_R42, +    TCG_REG_R43, +    TCG_REG_R44, +    TCG_REG_R45, +    TCG_REG_R46, +    TCG_REG_R47, +    TCG_REG_R48, +    TCG_REG_R49, +    TCG_REG_R50, +    TCG_REG_R51, +    TCG_REG_R52, +    TCG_REG_R53, +    TCG_REG_R54, +    TCG_REG_R55, +    TCG_REG_R56, +    TCG_REG_R57, +    TCG_REG_R58, +    TCG_REG_R59, +    TCG_REG_R60, +    TCG_REG_R61, +    TCG_REG_R62, +    TCG_REG_R63, + +    TCG_AREG0 = TCG_REG_R32, +} TCGReg; + +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_S22 0x200 + +/* used for function call generation */ +#define TCG_REG_CALL_STACK TCG_REG_R12 +#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_CALL_STACK_OFFSET 16 + +/* optional instructions */ +#define TCG_TARGET_HAS_div_i32          0 +#define TCG_TARGET_HAS_rem_i32          0 +#define TCG_TARGET_HAS_div_i64          0 +#define TCG_TARGET_HAS_rem_i64          0 +#define TCG_TARGET_HAS_andc_i32         1 +#define TCG_TARGET_HAS_andc_i64         1 +#define TCG_TARGET_HAS_bswap16_i32      1 +#define TCG_TARGET_HAS_bswap16_i64      1 +#define TCG_TARGET_HAS_bswap32_i32      1 +#define TCG_TARGET_HAS_bswap32_i64      1 +#define TCG_TARGET_HAS_bswap64_i64      1 +#define TCG_TARGET_HAS_eqv_i32          1 +#define TCG_TARGET_HAS_eqv_i64          1 +#define TCG_TARGET_HAS_ext8s_i32        1 +#define TCG_TARGET_HAS_ext16s_i32       1 +#define TCG_TARGET_HAS_ext8s_i64        1 +#define TCG_TARGET_HAS_ext16s_i64       1 +#define TCG_TARGET_HAS_ext32s_i64       1 +#define TCG_TARGET_HAS_ext8u_i32        1 +#define TCG_TARGET_HAS_ext16u_i32       1 +#define TCG_TARGET_HAS_ext8u_i64        1 +#define TCG_TARGET_HAS_ext16u_i64       1 +#define TCG_TARGET_HAS_ext32u_i64       1 +#define TCG_TARGET_HAS_nand_i32         1 +#define TCG_TARGET_HAS_nand_i64         1 +#define TCG_TARGET_HAS_nor_i32          1 +#define TCG_TARGET_HAS_nor_i64          1 +#define TCG_TARGET_HAS_orc_i32          1 +#define TCG_TARGET_HAS_orc_i64          1 +#define TCG_TARGET_HAS_rot_i32          1 +#define TCG_TARGET_HAS_rot_i64          1 +#define TCG_TARGET_HAS_movcond_i32      1 +#define TCG_TARGET_HAS_movcond_i64      1 +#define TCG_TARGET_HAS_deposit_i32      1 +#define TCG_TARGET_HAS_deposit_i64      1 +#define TCG_TARGET_HAS_add2_i32         0 +#define TCG_TARGET_HAS_add2_i64         0 +#define TCG_TARGET_HAS_sub2_i32         0 +#define TCG_TARGET_HAS_sub2_i64         0 +#define TCG_TARGET_HAS_mulu2_i32        0 +#define TCG_TARGET_HAS_mulu2_i64        0 +#define TCG_TARGET_HAS_muls2_i32        0 +#define TCG_TARGET_HAS_muls2_i64        0 +#define TCG_TARGET_HAS_muluh_i32        0 +#define TCG_TARGET_HAS_muluh_i64        0 +#define TCG_TARGET_HAS_mulsh_i32        0 +#define TCG_TARGET_HAS_mulsh_i64        0 +#define TCG_TARGET_HAS_trunc_shr_i32    0 + +#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) +#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) + +/* optional instructions automatically implemented */ +#define TCG_TARGET_HAS_neg_i32          0 /* sub r1, r0, r3 */ +#define TCG_TARGET_HAS_neg_i64          0 /* sub r1, r0, r3 */ +#define TCG_TARGET_HAS_not_i32          0 /* xor r1, -1, r3 */ +#define TCG_TARGET_HAS_not_i64          0 /* xor r1, -1, r3 */ + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +    start = start & ~(32UL - 1UL); +    stop = (stop + (32UL - 1UL)) & ~(32UL - 1UL); + +    for (; start < stop; start += 32UL) { +        asm volatile ("fc.i %0" :: "r" (start)); +    } +    asm volatile (";;sync.i;;srlz.i;;"); +} + +#endif diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c new file mode 100644 index 00000000..2ccd0e82 --- /dev/null +++ b/tcg/mips/tcg-target.c @@ -0,0 +1,1828 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org> + * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net> + * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg-be-ldst.h" + +#ifdef HOST_WORDS_BIGENDIAN +# define MIPS_BE  1 +#else +# define MIPS_BE  0 +#endif + +#define LO_OFF    (MIPS_BE * 4) +#define HI_OFF    (4 - LO_OFF) + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +    "zero", +    "at", +    "v0", +    "v1", +    "a0", +    "a1", +    "a2", +    "a3", +    "t0", +    "t1", +    "t2", +    "t3", +    "t4", +    "t5", +    "t6", +    "t7", +    "s0", +    "s1", +    "s2", +    "s3", +    "s4", +    "s5", +    "s6", +    "s7", +    "t8", +    "t9", +    "k0", +    "k1", +    "gp", +    "sp", +    "s8", +    "ra", +}; +#endif + +#define TCG_TMP0  TCG_REG_AT +#define TCG_TMP1  TCG_REG_T9 + +/* check if we really need so many registers :P */ +static const TCGReg tcg_target_reg_alloc_order[] = { +    /* Call saved registers.  */ +    TCG_REG_S0, +    TCG_REG_S1, +    TCG_REG_S2, +    TCG_REG_S3, +    TCG_REG_S4, +    TCG_REG_S5, +    TCG_REG_S6, +    TCG_REG_S7, +    TCG_REG_S8, + +    /* Call clobbered registers.  */ +    TCG_REG_T0, +    TCG_REG_T1, +    TCG_REG_T2, +    TCG_REG_T3, +    TCG_REG_T4, +    TCG_REG_T5, +    TCG_REG_T6, +    TCG_REG_T7, +    TCG_REG_T8, +    TCG_REG_T9, +    TCG_REG_V1, +    TCG_REG_V0, + +    /* Argument registers, opposite order of allocation.  */ +    TCG_REG_A3, +    TCG_REG_A2, +    TCG_REG_A1, +    TCG_REG_A0, +}; + +static const TCGReg tcg_target_call_iarg_regs[4] = { +    TCG_REG_A0, +    TCG_REG_A1, +    TCG_REG_A2, +    TCG_REG_A3 +}; + +static const TCGReg tcg_target_call_oarg_regs[2] = { +    TCG_REG_V0, +    TCG_REG_V1 +}; + +static tcg_insn_unit *tb_ret_addr; + +static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ +    /* Let the compiler perform the right-shift as part of the arithmetic.  */ +    ptrdiff_t disp = target - (pc + 1); +    assert(disp == (int16_t)disp); +    return disp & 0xffff; +} + +static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target) +{ +    *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target)); +} + +static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ +    assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0); +    return ((uintptr_t)target >> 2) & 0x3ffffff; +} + +static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) +{ +    *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target)); +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, +                        intptr_t value, intptr_t addend) +{ +    assert(type == R_MIPS_PC16); +    assert(addend == 0); +    reloc_pc16(code_ptr, (tcg_insn_unit *)value); +} + +#define TCG_CT_CONST_ZERO 0x100 +#define TCG_CT_CONST_U16  0x200    /* Unsigned 16-bit: 0 - 0xffff.  */ +#define TCG_CT_CONST_S16  0x400    /* Signed 16-bit: -32768 - 32767 */ +#define TCG_CT_CONST_P2M1 0x800    /* Power of 2 minus 1.  */ +#define TCG_CT_CONST_N16  0x1000   /* "Negatable" 16-bit: -32767 - 32767 */ + +static inline bool is_p2m1(tcg_target_long val) +{ +    return val && ((val + 1) & val) == 0; +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ +    const char *ct_str; + +    ct_str = *pct_str; +    switch(ct_str[0]) { +    case 'r': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set(ct->u.regs, 0xffffffff); +        break; +    case 'L': /* qemu_ld output arg constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set(ct->u.regs, 0xffffffff); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0); +        break; +    case 'l': /* qemu_ld input arg constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set(ct->u.regs, 0xffffffff); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); +#if defined(CONFIG_SOFTMMU) +        if (TARGET_LONG_BITS == 64) { +            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); +        } +#endif +        break; +    case 'S': /* qemu_st constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set(ct->u.regs, 0xffffffff); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0); +#if defined(CONFIG_SOFTMMU) +        if (TARGET_LONG_BITS == 32) { +            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1); +        } else { +            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2); +            tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3); +        } +#endif +        break; +    case 'I': +        ct->ct |= TCG_CT_CONST_U16; +        break; +    case 'J': +        ct->ct |= TCG_CT_CONST_S16; +        break; +    case 'K': +        ct->ct |= TCG_CT_CONST_P2M1; +        break; +    case 'N': +        ct->ct |= TCG_CT_CONST_N16; +        break; +    case 'Z': +        /* We are cheating a bit here, using the fact that the register +           ZERO is also the register number 0. Hence there is no need +           to check for const_args in each instruction. */ +        ct->ct |= TCG_CT_CONST_ZERO; +        break; +    default: +        return -1; +    } +    ct_str++; +    *pct_str = ct_str; +    return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, +                                         const TCGArgConstraint *arg_ct) +{ +    int ct; +    ct = arg_ct->ct; +    if (ct & TCG_CT_CONST) { +        return 1; +    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { +        return 1; +    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { +        return 1; +    } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { +        return 1; +    } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { +        return 1; +    } else if ((ct & TCG_CT_CONST_P2M1) +               && use_mips32r2_instructions && is_p2m1(val)) { +        return 1; +    } +    return 0; +} + +/* instruction opcodes */ +typedef enum { +    OPC_J        = 0x02 << 26, +    OPC_JAL      = 0x03 << 26, +    OPC_BEQ      = 0x04 << 26, +    OPC_BNE      = 0x05 << 26, +    OPC_BLEZ     = 0x06 << 26, +    OPC_BGTZ     = 0x07 << 26, +    OPC_ADDIU    = 0x09 << 26, +    OPC_SLTI     = 0x0A << 26, +    OPC_SLTIU    = 0x0B << 26, +    OPC_ANDI     = 0x0C << 26, +    OPC_ORI      = 0x0D << 26, +    OPC_XORI     = 0x0E << 26, +    OPC_LUI      = 0x0F << 26, +    OPC_LB       = 0x20 << 26, +    OPC_LH       = 0x21 << 26, +    OPC_LW       = 0x23 << 26, +    OPC_LBU      = 0x24 << 26, +    OPC_LHU      = 0x25 << 26, +    OPC_LWU      = 0x27 << 26, +    OPC_SB       = 0x28 << 26, +    OPC_SH       = 0x29 << 26, +    OPC_SW       = 0x2B << 26, + +    OPC_SPECIAL  = 0x00 << 26, +    OPC_SLL      = OPC_SPECIAL | 0x00, +    OPC_SRL      = OPC_SPECIAL | 0x02, +    OPC_ROTR     = OPC_SPECIAL | (0x01 << 21) | 0x02, +    OPC_SRA      = OPC_SPECIAL | 0x03, +    OPC_SLLV     = OPC_SPECIAL | 0x04, +    OPC_SRLV     = OPC_SPECIAL | 0x06, +    OPC_ROTRV    = OPC_SPECIAL | (0x01 <<  6) | 0x06, +    OPC_SRAV     = OPC_SPECIAL | 0x07, +    OPC_JR       = OPC_SPECIAL | 0x08, +    OPC_JALR     = OPC_SPECIAL | 0x09, +    OPC_MOVZ     = OPC_SPECIAL | 0x0A, +    OPC_MOVN     = OPC_SPECIAL | 0x0B, +    OPC_MFHI     = OPC_SPECIAL | 0x10, +    OPC_MFLO     = OPC_SPECIAL | 0x12, +    OPC_MULT     = OPC_SPECIAL | 0x18, +    OPC_MULTU    = OPC_SPECIAL | 0x19, +    OPC_DIV      = OPC_SPECIAL | 0x1A, +    OPC_DIVU     = OPC_SPECIAL | 0x1B, +    OPC_ADDU     = OPC_SPECIAL | 0x21, +    OPC_SUBU     = OPC_SPECIAL | 0x23, +    OPC_AND      = OPC_SPECIAL | 0x24, +    OPC_OR       = OPC_SPECIAL | 0x25, +    OPC_XOR      = OPC_SPECIAL | 0x26, +    OPC_NOR      = OPC_SPECIAL | 0x27, +    OPC_SLT      = OPC_SPECIAL | 0x2A, +    OPC_SLTU     = OPC_SPECIAL | 0x2B, + +    OPC_REGIMM   = 0x01 << 26, +    OPC_BLTZ     = OPC_REGIMM | (0x00 << 16), +    OPC_BGEZ     = OPC_REGIMM | (0x01 << 16), + +    OPC_SPECIAL2 = 0x1c << 26, +    OPC_MUL      = OPC_SPECIAL2 | 0x002, + +    OPC_SPECIAL3 = 0x1f << 26, +    OPC_EXT      = OPC_SPECIAL3 | 0x000, +    OPC_INS      = OPC_SPECIAL3 | 0x004, +    OPC_WSBH     = OPC_SPECIAL3 | 0x0a0, +    OPC_SEB      = OPC_SPECIAL3 | 0x420, +    OPC_SEH      = OPC_SPECIAL3 | 0x620, +} MIPSInsn; + +/* + * Type reg + */ +static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc, +                                   TCGReg rd, TCGReg rs, TCGReg rt) +{ +    int32_t inst; + +    inst = opc; +    inst |= (rs & 0x1F) << 21; +    inst |= (rt & 0x1F) << 16; +    inst |= (rd & 0x1F) << 11; +    tcg_out32(s, inst); +} + +/* + * Type immediate + */ +static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc, +                                   TCGReg rt, TCGReg rs, TCGArg imm) +{ +    int32_t inst; + +    inst = opc; +    inst |= (rs & 0x1F) << 21; +    inst |= (rt & 0x1F) << 16; +    inst |= (imm & 0xffff); +    tcg_out32(s, inst); +} + +/* + * Type bitfield + */ +static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt, +                                  TCGReg rs, int msb, int lsb) +{ +    int32_t inst; + +    inst = opc; +    inst |= (rs & 0x1F) << 21; +    inst |= (rt & 0x1F) << 16; +    inst |= (msb & 0x1F) << 11; +    inst |= (lsb & 0x1F) << 6; +    tcg_out32(s, inst); +} + +/* + * Type branch + */ +static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, +                                  TCGReg rt, TCGReg rs) +{ +    /* We pay attention here to not modify the branch target by reading +       the existing value and using it again. This ensure that caches and +       memory are kept coherent during retranslation. */ +    uint16_t offset = (uint16_t)*s->code_ptr; + +    tcg_out_opc_imm(s, opc, rt, rs, offset); +} + +/* + * Type sa + */ +static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc, +                                  TCGReg rd, TCGReg rt, TCGArg sa) +{ +    int32_t inst; + +    inst = opc; +    inst |= (rt & 0x1F) << 16; +    inst |= (rd & 0x1F) << 11; +    inst |= (sa & 0x1F) <<  6; +    tcg_out32(s, inst); + +} + +/* + * Type jump. + * Returns true if the branch was in range and the insn was emitted. + */ +static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, void *target) +{ +    uintptr_t dest = (uintptr_t)target; +    uintptr_t from = (uintptr_t)s->code_ptr + 4; +    int32_t inst; + +    /* The pc-region branch happens within the 256MB region of +       the delay slot (thus the +4).  */ +    if ((from ^ dest) & -(1 << 28)) { +        return false; +    } +    assert((dest & 3) == 0); + +    inst = opc; +    inst |= (dest >> 2) & 0x3ffffff; +    tcg_out32(s, inst); +    return true; +} + +static inline void tcg_out_nop(TCGContext *s) +{ +    tcg_out32(s, 0); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, +                               TCGReg ret, TCGReg arg) +{ +    /* Simple reg-reg move, optimising out the 'do nothing' case */ +    if (ret != arg) { +        tcg_out_opc_reg(s, OPC_ADDU, ret, arg, TCG_REG_ZERO); +    } +} + +static inline void tcg_out_movi(TCGContext *s, TCGType type, +                                TCGReg reg, tcg_target_long arg) +{ +    if (arg == (int16_t)arg) { +        tcg_out_opc_imm(s, OPC_ADDIU, reg, TCG_REG_ZERO, arg); +    } else if (arg == (uint16_t)arg) { +        tcg_out_opc_imm(s, OPC_ORI, reg, TCG_REG_ZERO, arg); +    } else { +        tcg_out_opc_imm(s, OPC_LUI, reg, TCG_REG_ZERO, arg >> 16); +        if (arg & 0xffff) { +            tcg_out_opc_imm(s, OPC_ORI, reg, reg, arg & 0xffff); +        } +    } +} + +static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg) +{ +    if (use_mips32r2_instructions) { +        tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); +    } else { +        /* ret and arg can't be register at */ +        if (ret == TCG_TMP0 || arg == TCG_TMP0) { +            tcg_abort(); +        } + +        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); +        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); +        tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); +        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); +    } +} + +static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg) +{ +    if (use_mips32r2_instructions) { +        tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); +        tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); +    } else { +        /* ret and arg can't be register at */ +        if (ret == TCG_TMP0 || arg == TCG_TMP0) { +            tcg_abort(); +        } + +        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); +        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); +        tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); +        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); +    } +} + +static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg) +{ +    if (use_mips32r2_instructions) { +        tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); +        tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); +    } else { +        /* ret and arg must be different and can't be register at */ +        if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) { +            tcg_abort(); +        } + +        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); + +        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24); +        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + +        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00); +        tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8); +        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); + +        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); +        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00); +        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); +    } +} + +static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg) +{ +    if (use_mips32r2_instructions) { +        tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg); +    } else { +        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); +        tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24); +    } +} + +static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg) +{ +    if (use_mips32r2_instructions) { +        tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg); +    } else { +        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16); +        tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); +    } +} + +static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data, +                         TCGReg addr, intptr_t ofs) +{ +    int16_t lo = ofs; +    if (ofs != lo) { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo); +        if (addr != TCG_REG_ZERO) { +            tcg_out_opc_reg(s, OPC_ADDU, TCG_TMP0, TCG_TMP0, addr); +        } +        addr = TCG_TMP0; +    } +    tcg_out_opc_imm(s, opc, data, addr, lo); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    tcg_out_ldst(s, OPC_LW, arg, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    tcg_out_ldst(s, OPC_SW, arg, arg1, arg2); +} + +static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val) +{ +    if (val == (int16_t)val) { +        tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, val); +        tcg_out_opc_reg(s, OPC_ADDU, reg, reg, TCG_TMP0); +    } +} + +/* Bit 0 set if inversion required; bit 1 set if swapping required.  */ +#define MIPS_CMP_INV  1 +#define MIPS_CMP_SWAP 2 + +static const uint8_t mips_cmp_map[16] = { +    [TCG_COND_LT]  = 0, +    [TCG_COND_LTU] = 0, +    [TCG_COND_GE]  = MIPS_CMP_INV, +    [TCG_COND_GEU] = MIPS_CMP_INV, +    [TCG_COND_LE]  = MIPS_CMP_INV | MIPS_CMP_SWAP, +    [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP, +    [TCG_COND_GT]  = MIPS_CMP_SWAP, +    [TCG_COND_GTU] = MIPS_CMP_SWAP, +}; + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, +                            TCGReg arg1, TCGReg arg2) +{ +    MIPSInsn s_opc = OPC_SLTU; +    int cmp_map; + +    switch (cond) { +    case TCG_COND_EQ: +        if (arg2 != 0) { +            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); +            arg1 = ret; +        } +        tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1); +        break; + +    case TCG_COND_NE: +        if (arg2 != 0) { +            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); +            arg1 = ret; +        } +        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1); +        break; + +    case TCG_COND_LT: +    case TCG_COND_GE: +    case TCG_COND_LE: +    case TCG_COND_GT: +        s_opc = OPC_SLT; +        /* FALLTHRU */ + +    case TCG_COND_LTU: +    case TCG_COND_GEU: +    case TCG_COND_LEU: +    case TCG_COND_GTU: +        cmp_map = mips_cmp_map[cond]; +        if (cmp_map & MIPS_CMP_SWAP) { +            TCGReg t = arg1; +            arg1 = arg2; +            arg2 = t; +        } +        tcg_out_opc_reg(s, s_opc, ret, arg1, arg2); +        if (cmp_map & MIPS_CMP_INV) { +            tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); +        } +        break; + +     default: +         tcg_abort(); +         break; +     } +} + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, +                           TCGReg arg2, TCGLabel *l) +{ +    static const MIPSInsn b_zero[16] = { +        [TCG_COND_LT] = OPC_BLTZ, +        [TCG_COND_GT] = OPC_BGTZ, +        [TCG_COND_LE] = OPC_BLEZ, +        [TCG_COND_GE] = OPC_BGEZ, +    }; + +    MIPSInsn s_opc = OPC_SLTU; +    MIPSInsn b_opc; +    int cmp_map; + +    switch (cond) { +    case TCG_COND_EQ: +        b_opc = OPC_BEQ; +        break; +    case TCG_COND_NE: +        b_opc = OPC_BNE; +        break; + +    case TCG_COND_LT: +    case TCG_COND_GT: +    case TCG_COND_LE: +    case TCG_COND_GE: +        if (arg2 == 0) { +            b_opc = b_zero[cond]; +            arg2 = arg1; +            arg1 = 0; +            break; +        } +        s_opc = OPC_SLT; +        /* FALLTHRU */ + +    case TCG_COND_LTU: +    case TCG_COND_GTU: +    case TCG_COND_LEU: +    case TCG_COND_GEU: +        cmp_map = mips_cmp_map[cond]; +        if (cmp_map & MIPS_CMP_SWAP) { +            TCGReg t = arg1; +            arg1 = arg2; +            arg2 = t; +        } +        tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2); +        b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE); +        arg1 = TCG_TMP0; +        arg2 = TCG_REG_ZERO; +        break; + +    default: +        tcg_abort(); +        break; +    } + +    tcg_out_opc_br(s, b_opc, arg1, arg2); +    if (l->has_value) { +        reloc_pc16(s->code_ptr - 1, l->u.value_ptr); +    } else { +        tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0); +    } +    tcg_out_nop(s); +} + +static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1, +                                 TCGReg al, TCGReg ah, +                                 TCGReg bl, TCGReg bh) +{ +    /* Merge highpart comparison into AH.  */ +    if (bh != 0) { +        if (ah != 0) { +            tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh); +            ah = tmp0; +        } else { +            ah = bh; +        } +    } +    /* Merge lowpart comparison into AL.  */ +    if (bl != 0) { +        if (al != 0) { +            tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl); +            al = tmp1; +        } else { +            al = bl; +        } +    } +    /* Merge high and low part comparisons into AL.  */ +    if (ah != 0) { +        if (al != 0) { +            tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al); +            al = tmp0; +        } else { +            al = ah; +        } +    } +    return al; +} + +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, +                             TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) +{ +    TCGReg tmp0 = TCG_TMP0; +    TCGReg tmp1 = ret; + +    assert(ret != TCG_TMP0); +    if (ret == ah || ret == bh) { +        assert(ret != TCG_TMP1); +        tmp1 = TCG_TMP1; +    } + +    switch (cond) { +    case TCG_COND_EQ: +    case TCG_COND_NE: +        tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh); +        tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO); +        break; + +    default: +        tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh); +        tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl); +        tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0); +        tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh); +        tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0); +        break; +    } +} + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, +                            TCGReg bl, TCGReg bh, TCGLabel *l) +{ +    TCGCond b_cond = TCG_COND_NE; +    TCGReg tmp = TCG_TMP1; + +    /* With branches, we emit between 4 and 9 insns with 2 or 3 branches. +       With setcond, we emit between 3 and 10 insns and only 1 branch, +       which ought to get better branch prediction.  */ +     switch (cond) { +     case TCG_COND_EQ: +     case TCG_COND_NE: +        b_cond = cond; +        tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh); +        break; + +    default: +        /* Minimize code size by preferring a compare not requiring INV.  */ +        if (mips_cmp_map[cond] & MIPS_CMP_INV) { +            cond = tcg_invert_cond(cond); +            b_cond = TCG_COND_EQ; +        } +        tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh); +        break; +    } + +    tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, l); +} + +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, +                            TCGReg c1, TCGReg c2, TCGReg v) +{ +    MIPSInsn m_opc = OPC_MOVN; + +    switch (cond) { +    case TCG_COND_EQ: +        m_opc = OPC_MOVZ; +        /* FALLTHRU */ +    case TCG_COND_NE: +        if (c2 != 0) { +            tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2); +            c1 = TCG_TMP0; +        } +        break; + +    default: +        /* Minimize code size by preferring a compare not requiring INV.  */ +        if (mips_cmp_map[cond] & MIPS_CMP_INV) { +            cond = tcg_invert_cond(cond); +            m_opc = OPC_MOVZ; +        } +        tcg_out_setcond(s, cond, TCG_TMP0, c1, c2); +        c1 = TCG_TMP0; +        break; +    } + +    tcg_out_opc_reg(s, m_opc, ret, v, c1); +} + +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +{ +    /* Note that the ABI requires the called function's address to be +       loaded into T9, even if a direct branch is in range.  */ +    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); + +    /* But do try a direct branch, allowing the cpu better insn prefetch.  */ +    if (tail) { +        if (!tcg_out_opc_jmp(s, OPC_J, arg)) { +            tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0); +        } +    } else { +        if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) { +            tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0); +        } +    } +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ +    tcg_out_call_int(s, arg, false); +    tcg_out_nop(s); +} + +#if defined(CONFIG_SOFTMMU) +static void * const qemu_ld_helpers[16] = { +    [MO_UB]   = helper_ret_ldub_mmu, +    [MO_SB]   = helper_ret_ldsb_mmu, +    [MO_LEUW] = helper_le_lduw_mmu, +    [MO_LESW] = helper_le_ldsw_mmu, +    [MO_LEUL] = helper_le_ldul_mmu, +    [MO_LEQ]  = helper_le_ldq_mmu, +    [MO_BEUW] = helper_be_lduw_mmu, +    [MO_BESW] = helper_be_ldsw_mmu, +    [MO_BEUL] = helper_be_ldul_mmu, +    [MO_BEQ]  = helper_be_ldq_mmu, +}; + +static void * const qemu_st_helpers[16] = { +    [MO_UB]   = helper_ret_stb_mmu, +    [MO_LEUW] = helper_le_stw_mmu, +    [MO_LEUL] = helper_le_stl_mmu, +    [MO_LEQ]  = helper_le_stq_mmu, +    [MO_BEUW] = helper_be_stw_mmu, +    [MO_BEUL] = helper_be_stl_mmu, +    [MO_BEQ]  = helper_be_stq_mmu, +}; + +/* Helper routines for marshalling helper function arguments into + * the correct registers and stack. + * I is where we want to put this argument, and is updated and returned + * for the next call. ARG is the argument itself. + * + * We provide routines for arguments which are: immediate, 32 bit + * value in register, 16 and 8 bit values in register (which must be zero + * extended before use) and 64 bit value in a lo:hi register pair. + */ + +static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) +{ +    if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { +        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); +    } else { +        tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); +    } +    return i + 1; +} + +static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) +{ +    TCGReg tmp = TCG_TMP0; +    if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { +        tmp = tcg_target_call_iarg_regs[i]; +    } +    tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff); +    return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) +{ +    TCGReg tmp = TCG_TMP0; +    if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { +        tmp = tcg_target_call_iarg_regs[i]; +    } +    tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); +    return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) +{ +    TCGReg tmp = TCG_TMP0; +    if (arg == 0) { +        tmp = TCG_REG_ZERO; +    } else { +        if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { +            tmp = tcg_target_call_iarg_regs[i]; +        } +        tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); +    } +    return tcg_out_call_iarg_reg(s, i, tmp); +} + +static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) +{ +    i = (i + 1) & ~1; +    i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); +    i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); +    return i; +} + +/* Perform the tlb comparison operation.  The complete host address is +   placed in BASE.  Clobbers AT, T0, A0.  */ +static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, +                             TCGReg addrh, int mem_index, TCGMemOp s_bits, +                             tcg_insn_unit *label_ptr[2], bool is_load) +{ +    int cmp_off +        = (is_load +           ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) +           : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); +    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); + +    tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addrl, +                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); +    tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, +                    (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); +    tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0); + +    /* Compensate for very large offsets.  */ +    if (add_off >= 0x8000) { +        /* Most target env are smaller than 32k; none are larger than 64k. +           Simplify the logic here merely to offset by 0x7ff0, giving us a +           range just shy of 64k.  Check this assumption.  */ +        QEMU_BUILD_BUG_ON(offsetof(CPUArchState, +                                   tlb_table[NB_MMU_MODES - 1][1]) +                          > 0x7ff0 + 0x7fff); +        tcg_out_opc_imm(s, OPC_ADDIU, TCG_REG_A0, TCG_REG_A0, 0x7ff0); +        cmp_off -= 0x7ff0; +        add_off -= 0x7ff0; +    } + +    /* Load the (low half) tlb comparator.  */ +    tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, +                    cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0)); + +    /* Mask the page bits, keeping the alignment bits to compare against. +       In between on 32-bit targets, load the tlb addend for the fast path.  */ +    tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, +                 TARGET_PAGE_MASK | ((1 << s_bits) - 1)); +    if (TARGET_LONG_BITS == 32) { +        tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); +    } +    tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); + +    label_ptr[0] = s->code_ptr; +    tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); + +    /* Load and test the high half tlb comparator.  */ +    if (TARGET_LONG_BITS == 64) { +        /* delay slot */ +        tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0, cmp_off + HI_OFF); + +        /* Load the tlb addend for the fast path. We can't do it earlier with +           64-bit targets or we'll clobber a0 before reading the high half tlb +           comparator.  */ +        tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off); + +        label_ptr[1] = s->code_ptr; +        tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0); +    } + +    /* delay slot */ +    tcg_out_opc_reg(s, OPC_ADDU, base, TCG_REG_A0, addrl); +} + +static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi, +                                TCGReg datalo, TCGReg datahi, +                                TCGReg addrlo, TCGReg addrhi, +                                void *raddr, tcg_insn_unit *label_ptr[2]) +{ +    TCGLabelQemuLdst *label = new_ldst_label(s); + +    label->is_ld = is_ld; +    label->oi = oi; +    label->datalo_reg = datalo; +    label->datahi_reg = datahi; +    label->addrlo_reg = addrlo; +    label->addrhi_reg = addrhi; +    label->raddr = raddr; +    label->label_ptr[0] = label_ptr[0]; +    if (TARGET_LONG_BITS == 64) { +        label->label_ptr[1] = label_ptr[1]; +    } +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ +    TCGMemOpIdx oi = l->oi; +    TCGMemOp opc = get_memop(oi); +    TCGReg v0; +    int i; + +    /* resolve label address */ +    reloc_pc16(l->label_ptr[0], s->code_ptr); +    if (TARGET_LONG_BITS == 64) { +        reloc_pc16(l->label_ptr[1], s->code_ptr); +    } + +    i = 1; +    if (TARGET_LONG_BITS == 64) { +        i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); +    } else { +        i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); +    } +    i = tcg_out_call_iarg_imm(s, i, oi); +    i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); +    tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); +    /* delay slot */ +    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + +    v0 = l->datalo_reg; +    if ((opc & MO_SIZE) == MO_64) { +        /* We eliminated V0 from the possible output registers, so it +           cannot be clobbered here.  So we must move V1 first.  */ +        if (MIPS_BE) { +            tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); +            v0 = l->datahi_reg; +        } else { +            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); +        } +    } + +    reloc_pc16(s->code_ptr, l->raddr); +    tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); +    /* delay slot */ +    tcg_out_mov(s, TCG_TYPE_REG, v0, TCG_REG_V0); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ +    TCGMemOpIdx oi = l->oi; +    TCGMemOp opc = get_memop(oi); +    TCGMemOp s_bits = opc & MO_SIZE; +    int i; + +    /* resolve label address */ +    reloc_pc16(l->label_ptr[0], s->code_ptr); +    if (TARGET_LONG_BITS == 64) { +        reloc_pc16(l->label_ptr[1], s->code_ptr); +    } + +    i = 1; +    if (TARGET_LONG_BITS == 64) { +        i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); +    } else { +        i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); +    } +    switch (s_bits) { +    case MO_8: +        i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); +        break; +    case MO_16: +        i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); +        break; +    case MO_32: +        i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); +        break; +    case MO_64: +        i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); +        break; +    default: +        tcg_abort(); +    } +    i = tcg_out_call_iarg_imm(s, i, oi); + +    /* Tail call to the store helper.  Thus force the return address +       computation to take place in the return address register.  */ +    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); +    i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); +    tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); +    /* delay slot */ +    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); +} +#endif + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, +                                   TCGReg base, TCGMemOp opc) +{ +    switch (opc & (MO_SSIZE | MO_BSWAP)) { +    case MO_UB: +        tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0); +        break; +    case MO_SB: +        tcg_out_opc_imm(s, OPC_LB, datalo, base, 0); +        break; +    case MO_UW | MO_BSWAP: +        tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); +        tcg_out_bswap16(s, datalo, TCG_TMP1); +        break; +    case MO_UW: +        tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0); +        break; +    case MO_SW | MO_BSWAP: +        tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0); +        tcg_out_bswap16s(s, datalo, TCG_TMP1); +        break; +    case MO_SW: +        tcg_out_opc_imm(s, OPC_LH, datalo, base, 0); +        break; +    case MO_UL | MO_BSWAP: +        tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0); +        tcg_out_bswap32(s, datalo, TCG_TMP1); +        break; +    case MO_UL: +        tcg_out_opc_imm(s, OPC_LW, datalo, base, 0); +        break; +    case MO_Q | MO_BSWAP: +        tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF); +        tcg_out_bswap32(s, datalo, TCG_TMP1); +        tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF); +        tcg_out_bswap32(s, datahi, TCG_TMP1); +        break; +    case MO_Q: +        tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF); +        tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF); +        break; +    default: +        tcg_abort(); +    } +} + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) +{ +    TCGReg addr_regl, addr_regh __attribute__((unused)); +    TCGReg data_regl, data_regh; +    TCGMemOpIdx oi; +    TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) +    tcg_insn_unit *label_ptr[2]; +    int mem_index; +    TCGMemOp s_bits; +#endif +    /* Note that we've eliminated V0 from the output registers, +       so we won't overwrite the base register during loading.  */ +    TCGReg base = TCG_REG_V0; + +    data_regl = *args++; +    data_regh = (is_64 ? *args++ : 0); +    addr_regl = *args++; +    addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); +    oi = *args++; +    opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) +    mem_index = get_mmuidx(oi); +    s_bits = opc & MO_SIZE; + +    tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, +                     s_bits, label_ptr, 1); +    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); +    add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh, +                        s->code_ptr, label_ptr); +#else +    if (GUEST_BASE == 0 && data_regl != addr_regl) { +        base = addr_regl; +    } else if (GUEST_BASE == (int16_t)GUEST_BASE) { +        tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); +        tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); +    } +    tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, +                                   TCGReg base, TCGMemOp opc) +{ +    switch (opc & (MO_SIZE | MO_BSWAP)) { +    case MO_8: +        tcg_out_opc_imm(s, OPC_SB, datalo, base, 0); +        break; + +    case MO_16 | MO_BSWAP: +        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff); +        tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1); +        datalo = TCG_TMP1; +        /* FALLTHRU */ +    case MO_16: +        tcg_out_opc_imm(s, OPC_SH, datalo, base, 0); +        break; + +    case MO_32 | MO_BSWAP: +        tcg_out_bswap32(s, TCG_TMP1, datalo); +        datalo = TCG_TMP1; +        /* FALLTHRU */ +    case MO_32: +        tcg_out_opc_imm(s, OPC_SW, datalo, base, 0); +        break; + +    case MO_64 | MO_BSWAP: +        tcg_out_bswap32(s, TCG_TMP1, datalo); +        tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, HI_OFF); +        tcg_out_bswap32(s, TCG_TMP1, datahi); +        tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, LO_OFF); +        break; +    case MO_64: +        tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF); +        tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF); +        break; + +    default: +        tcg_abort(); +    } +} + +static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, +                            TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, +                            bool cbh, bool is_sub) +{ +    TCGReg th = TCG_TMP1; + +    /* If we have a negative constant such that negating it would +       make the high part zero, we can (usually) eliminate one insn.  */ +    if (cbl && cbh && bh == -1 && bl != 0) { +        bl = -bl; +        bh = 0; +        is_sub = !is_sub; +    } + +    /* By operating on the high part first, we get to use the final +       carry operation to move back from the temporary.  */ +    if (!cbh) { +        tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); +    } else if (bh != 0 || ah == rl) { +        tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); +    } else { +        th = ah; +    } + +    /* Note that tcg optimization should eliminate the bl == 0 case.  */ +    if (is_sub) { +        if (cbl) { +            tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); +            tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); +        } else { +            tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); +            tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); +        } +        tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); +    } else { +        if (cbl) { +            tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); +            tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); +        } else if (rl == al && rl == bl) { +            tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31); +            tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); +        } else { +            tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); +            tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); +        } +        tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); +    } +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ +    TCGReg addr_regl, addr_regh __attribute__((unused)); +    TCGReg data_regl, data_regh, base; +    TCGMemOpIdx oi; +    TCGMemOp opc; +#if defined(CONFIG_SOFTMMU) +    tcg_insn_unit *label_ptr[2]; +    int mem_index; +    TCGMemOp s_bits; +#endif + +    data_regl = *args++; +    data_regh = (is_64 ? *args++ : 0); +    addr_regl = *args++; +    addr_regh = (TARGET_LONG_BITS == 64 ? *args++ : 0); +    oi = *args++; +    opc = get_memop(oi); + +#if defined(CONFIG_SOFTMMU) +    mem_index = get_mmuidx(oi); +    s_bits = opc & 3; + +    /* Note that we eliminated the helper's address argument, +       so we can reuse that for the base.  */ +    base = (TARGET_LONG_BITS == 32 ? TCG_REG_A1 : TCG_REG_A2); +    tcg_out_tlb_load(s, base, addr_regl, addr_regh, mem_index, +                     s_bits, label_ptr, 0); +    tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); +    add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh, +                        s->code_ptr, label_ptr); +#else +    if (GUEST_BASE == 0) { +        base = addr_regl; +    } else { +        base = TCG_REG_A0; +        if (GUEST_BASE == (int16_t)GUEST_BASE) { +            tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); +        } else { +            tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); +            tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); +        } +    } +    tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); +#endif +} + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, +                              const TCGArg *args, const int *const_args) +{ +    MIPSInsn i1, i2; +    TCGArg a0, a1, a2; +    int c2; + +    a0 = args[0]; +    a1 = args[1]; +    a2 = args[2]; +    c2 = const_args[2]; + +    switch (opc) { +    case INDEX_op_exit_tb: +        { +            TCGReg b0 = TCG_REG_ZERO; + +            if (a0 & ~0xffff) { +                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); +                b0 = TCG_REG_V0; +            } +            if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { +                tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, +                             (uintptr_t)tb_ret_addr); +                tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); +            } +            tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); +        } +        break; +    case INDEX_op_goto_tb: +        if (s->tb_jmp_offset) { +            /* direct jump method */ +            s->tb_jmp_offset[a0] = tcg_current_code_size(s); +            /* Avoid clobbering the address during retranslation.  */ +            tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff)); +        } else { +            /* indirect jump method */ +            tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, +                       (uintptr_t)(s->tb_next + a0)); +            tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); +        } +        tcg_out_nop(s); +        s->tb_next_offset[a0] = tcg_current_code_size(s); +        break; +    case INDEX_op_br: +        tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, +                       arg_label(a0)); +        break; + +    case INDEX_op_ld8u_i32: +        i1 = OPC_LBU; +        goto do_ldst; +    case INDEX_op_ld8s_i32: +        i1 = OPC_LB; +        goto do_ldst; +    case INDEX_op_ld16u_i32: +        i1 = OPC_LHU; +        goto do_ldst; +    case INDEX_op_ld16s_i32: +        i1 = OPC_LH; +        goto do_ldst; +    case INDEX_op_ld_i32: +        i1 = OPC_LW; +        goto do_ldst; +    case INDEX_op_st8_i32: +        i1 = OPC_SB; +        goto do_ldst; +    case INDEX_op_st16_i32: +        i1 = OPC_SH; +        goto do_ldst; +    case INDEX_op_st_i32: +        i1 = OPC_SW; +    do_ldst: +        tcg_out_ldst(s, i1, a0, a1, a2); +        break; + +    case INDEX_op_add_i32: +        i1 = OPC_ADDU, i2 = OPC_ADDIU; +        goto do_binary; +    case INDEX_op_or_i32: +        i1 = OPC_OR, i2 = OPC_ORI; +        goto do_binary; +    case INDEX_op_xor_i32: +        i1 = OPC_XOR, i2 = OPC_XORI; +    do_binary: +        if (c2) { +            tcg_out_opc_imm(s, i2, a0, a1, a2); +            break; +        } +    do_binaryv: +        tcg_out_opc_reg(s, i1, a0, a1, a2); +        break; + +    case INDEX_op_sub_i32: +        if (c2) { +            tcg_out_opc_imm(s, OPC_ADDIU, a0, a1, -a2); +            break; +        } +        i1 = OPC_SUBU; +        goto do_binary; +    case INDEX_op_and_i32: +        if (c2 && a2 != (uint16_t)a2) { +            int msb = ctz32(~a2) - 1; +            assert(use_mips32r2_instructions); +            assert(is_p2m1(a2)); +            tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); +            break; +        } +        i1 = OPC_AND, i2 = OPC_ANDI; +        goto do_binary; +    case INDEX_op_nor_i32: +        i1 = OPC_NOR; +        goto do_binaryv; + +    case INDEX_op_mul_i32: +        if (use_mips32_instructions) { +            tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); +            break; +        } +        i1 = OPC_MULT, i2 = OPC_MFLO; +        goto do_hilo1; +    case INDEX_op_mulsh_i32: +        i1 = OPC_MULT, i2 = OPC_MFHI; +        goto do_hilo1; +    case INDEX_op_muluh_i32: +        i1 = OPC_MULTU, i2 = OPC_MFHI; +        goto do_hilo1; +    case INDEX_op_div_i32: +        i1 = OPC_DIV, i2 = OPC_MFLO; +        goto do_hilo1; +    case INDEX_op_divu_i32: +        i1 = OPC_DIVU, i2 = OPC_MFLO; +        goto do_hilo1; +    case INDEX_op_rem_i32: +        i1 = OPC_DIV, i2 = OPC_MFHI; +        goto do_hilo1; +    case INDEX_op_remu_i32: +        i1 = OPC_DIVU, i2 = OPC_MFHI; +    do_hilo1: +        tcg_out_opc_reg(s, i1, 0, a1, a2); +        tcg_out_opc_reg(s, i2, a0, 0, 0); +        break; + +    case INDEX_op_muls2_i32: +        i1 = OPC_MULT; +        goto do_hilo2; +    case INDEX_op_mulu2_i32: +        i1 = OPC_MULTU; +    do_hilo2: +        tcg_out_opc_reg(s, i1, 0, a2, args[3]); +        tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); +        tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); +        break; + +    case INDEX_op_not_i32: +        i1 = OPC_NOR; +        goto do_unary; +    case INDEX_op_bswap16_i32: +        i1 = OPC_WSBH; +        goto do_unary; +    case INDEX_op_ext8s_i32: +        i1 = OPC_SEB; +        goto do_unary; +    case INDEX_op_ext16s_i32: +        i1 = OPC_SEH; +    do_unary: +        tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); +        break; + +    case INDEX_op_sar_i32: +        i1 = OPC_SRAV, i2 = OPC_SRA; +        goto do_shift; +    case INDEX_op_shl_i32: +        i1 = OPC_SLLV, i2 = OPC_SLL; +        goto do_shift; +    case INDEX_op_shr_i32: +        i1 = OPC_SRLV, i2 = OPC_SRL; +        goto do_shift; +    case INDEX_op_rotr_i32: +        i1 = OPC_ROTRV, i2 = OPC_ROTR; +    do_shift: +        if (c2) { +            tcg_out_opc_sa(s, i2, a0, a1, a2); +        } else { +            tcg_out_opc_reg(s, i1, a0, a2, a1); +        } +        break; +    case INDEX_op_rotl_i32: +        if (c2) { +            tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); +        } else { +            tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); +            tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); +        } +        break; + +    case INDEX_op_bswap32_i32: +        tcg_out_opc_reg(s, OPC_WSBH, a0, 0, a1); +        tcg_out_opc_sa(s, OPC_ROTR, a0, a0, 16); +        break; + +    case INDEX_op_deposit_i32: +        tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); +        break; + +    case INDEX_op_brcond_i32: +        tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); +        break; +    case INDEX_op_brcond2_i32: +        tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); +        break; + +    case INDEX_op_movcond_i32: +        tcg_out_movcond(s, args[5], a0, a1, a2, args[3]); +        break; + +    case INDEX_op_setcond_i32: +        tcg_out_setcond(s, args[3], a0, a1, a2); +        break; +    case INDEX_op_setcond2_i32: +        tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); +        break; + +    case INDEX_op_qemu_ld_i32: +        tcg_out_qemu_ld(s, args, false); +        break; +    case INDEX_op_qemu_ld_i64: +        tcg_out_qemu_ld(s, args, true); +        break; +    case INDEX_op_qemu_st_i32: +        tcg_out_qemu_st(s, args, false); +        break; +    case INDEX_op_qemu_st_i64: +        tcg_out_qemu_st(s, args, true); +        break; + +    case INDEX_op_add2_i32: +        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], +                        const_args[4], const_args[5], false); +        break; +    case INDEX_op_sub2_i32: +        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], +                        const_args[4], const_args[5], true); +        break; + +    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */ +    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */ +    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */ +    default: +        tcg_abort(); +    } +} + +static const TCGTargetOpDef mips_op_defs[] = { +    { INDEX_op_exit_tb, { } }, +    { INDEX_op_goto_tb, { } }, +    { INDEX_op_br, { } }, + +    { INDEX_op_ld8u_i32, { "r", "r" } }, +    { INDEX_op_ld8s_i32, { "r", "r" } }, +    { INDEX_op_ld16u_i32, { "r", "r" } }, +    { INDEX_op_ld16s_i32, { "r", "r" } }, +    { INDEX_op_ld_i32, { "r", "r" } }, +    { INDEX_op_st8_i32, { "rZ", "r" } }, +    { INDEX_op_st16_i32, { "rZ", "r" } }, +    { INDEX_op_st_i32, { "rZ", "r" } }, + +    { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, +    { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, +    { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_remu_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_sub_i32, { "r", "rZ", "rN" } }, + +    { INDEX_op_and_i32, { "r", "rZ", "rIK" } }, +    { INDEX_op_nor_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_not_i32, { "r", "rZ" } }, +    { INDEX_op_or_i32, { "r", "rZ", "rIZ" } }, +    { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } }, + +    { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, +    { INDEX_op_shr_i32, { "r", "rZ", "ri" } }, +    { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, +    { INDEX_op_rotr_i32, { "r", "rZ", "ri" } }, +    { INDEX_op_rotl_i32, { "r", "rZ", "ri" } }, + +    { INDEX_op_bswap16_i32, { "r", "r" } }, +    { INDEX_op_bswap32_i32, { "r", "r" } }, + +    { INDEX_op_ext8s_i32, { "r", "rZ" } }, +    { INDEX_op_ext16s_i32, { "r", "rZ" } }, + +    { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + +    { INDEX_op_brcond_i32, { "rZ", "rZ" } }, +    { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } }, +    { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } }, +    { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } }, + +    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, +    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rN", "rN" } }, +    { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } }, + +#if TARGET_LONG_BITS == 32 +    { INDEX_op_qemu_ld_i32, { "L", "lZ" } }, +    { INDEX_op_qemu_st_i32, { "SZ", "SZ" } }, +    { INDEX_op_qemu_ld_i64, { "L", "L", "lZ" } }, +    { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ" } }, +#else +    { INDEX_op_qemu_ld_i32, { "L", "lZ", "lZ" } }, +    { INDEX_op_qemu_st_i32, { "SZ", "SZ", "SZ" } }, +    { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } }, +    { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } }, +#endif +    { -1 }, +}; + +static int tcg_target_callee_save_regs[] = { +    TCG_REG_S0,       /* used for the global env (TCG_AREG0) */ +    TCG_REG_S1, +    TCG_REG_S2, +    TCG_REG_S3, +    TCG_REG_S4, +    TCG_REG_S5, +    TCG_REG_S6, +    TCG_REG_S7, +    TCG_REG_S8, +    TCG_REG_RA,       /* should be last for ABI compliance */ +}; + +/* The Linux kernel doesn't provide any information about the available +   instruction set. Probe it using a signal handler. */ + +#include <signal.h> + +#ifndef use_movnz_instructions +bool use_movnz_instructions = false; +#endif + +#ifndef use_mips32_instructions +bool use_mips32_instructions = false; +#endif + +#ifndef use_mips32r2_instructions +bool use_mips32r2_instructions = false; +#endif + +static volatile sig_atomic_t got_sigill; + +static void sigill_handler(int signo, siginfo_t *si, void *data) +{ +    /* Skip the faulty instruction */ +    ucontext_t *uc = (ucontext_t *)data; +    uc->uc_mcontext.pc += 4; + +    got_sigill = 1; +} + +static void tcg_target_detect_isa(void) +{ +    struct sigaction sa_old, sa_new; + +    memset(&sa_new, 0, sizeof(sa_new)); +    sa_new.sa_flags = SA_SIGINFO; +    sa_new.sa_sigaction = sigill_handler; +    sigaction(SIGILL, &sa_new, &sa_old); + +    /* Probe for movn/movz, necessary to implement movcond. */ +#ifndef use_movnz_instructions +    got_sigill = 0; +    asm volatile(".set push\n" +                 ".set mips32\n" +                 "movn $zero, $zero, $zero\n" +                 "movz $zero, $zero, $zero\n" +                 ".set pop\n" +                 : : : ); +    use_movnz_instructions = !got_sigill; +#endif + +    /* Probe for MIPS32 instructions. As no subsetting is allowed +       by the specification, it is only necessary to probe for one +       of the instructions. */ +#ifndef use_mips32_instructions +    got_sigill = 0; +    asm volatile(".set push\n" +                 ".set mips32\n" +                 "mul $zero, $zero\n" +                 ".set pop\n" +                 : : : ); +    use_mips32_instructions = !got_sigill; +#endif + +    /* Probe for MIPS32r2 instructions if MIPS32 instructions are +       available. As no subsetting is allowed by the specification, +       it is only necessary to probe for one of the instructions. */ +#ifndef use_mips32r2_instructions +    if (use_mips32_instructions) { +        got_sigill = 0; +        asm volatile(".set push\n" +                     ".set mips32r2\n" +                     "seb $zero, $zero\n" +                     ".set pop\n" +                     : : : ); +        use_mips32r2_instructions = !got_sigill; +    } +#endif + +    sigaction(SIGILL, &sa_old, NULL); +} + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ +    int i, frame_size; + +    /* reserve some stack space, also for TCG temps. */ +    frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4 +                 + TCG_STATIC_CALL_ARGS_SIZE +                 + CPU_TEMP_BUF_NLONGS * sizeof(long); +    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & +                 ~(TCG_TARGET_STACK_ALIGN - 1); +    tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4 +                  + TCG_STATIC_CALL_ARGS_SIZE, +                  CPU_TEMP_BUF_NLONGS * sizeof(long)); + +    /* TB prologue */ +    tcg_out_addi(s, TCG_REG_SP, -frame_size); +    for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { +        tcg_out_st(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], +                   TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); +    } + +    /* Call generated code */ +    tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0); +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); +    tb_ret_addr = s->code_ptr; + +    /* TB epilogue */ +    for(i = 0 ; i < ARRAY_SIZE(tcg_target_callee_save_regs) ; i++) { +        tcg_out_ld(s, TCG_TYPE_I32, tcg_target_callee_save_regs[i], +                   TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE + i * 4); +    } + +    tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0); +    tcg_out_addi(s, TCG_REG_SP, frame_size); +} + +static void tcg_target_init(TCGContext *s) +{ +    tcg_target_detect_isa(); +    tcg_regset_set(tcg_target_available_regs[TCG_TYPE_I32], 0xffffffff); +    tcg_regset_set(tcg_target_call_clobber_regs, +                   (1 << TCG_REG_V0) | +                   (1 << TCG_REG_V1) | +                   (1 << TCG_REG_A0) | +                   (1 << TCG_REG_A1) | +                   (1 << TCG_REG_A2) | +                   (1 << TCG_REG_A3) | +                   (1 << TCG_REG_T0) | +                   (1 << TCG_REG_T1) | +                   (1 << TCG_REG_T2) | +                   (1 << TCG_REG_T3) | +                   (1 << TCG_REG_T4) | +                   (1 << TCG_REG_T5) | +                   (1 << TCG_REG_T6) | +                   (1 << TCG_REG_T7) | +                   (1 << TCG_REG_T8) | +                   (1 << TCG_REG_T9)); + +    tcg_regset_clear(s->reserved_regs); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0);   /* kernel use only */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1);   /* kernel use only */ +    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);     /* internal use */ +    tcg_regset_set_reg(s->reserved_regs, TCG_TMP1);     /* internal use */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA);   /* return address */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);   /* stack pointer */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);   /* global pointer */ + +    tcg_add_target_add_op_defs(mips_op_defs); +} + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ +    uint32_t *ptr = (uint32_t *)jmp_addr; +    *ptr = deposit32(*ptr, 0, 26, addr >> 2); +    flush_icache_range(jmp_addr, jmp_addr + 4); +} diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h new file mode 100644 index 00000000..f5ba52ca --- /dev/null +++ b/tcg/mips/tcg-target.h @@ -0,0 +1,138 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org> + * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net> + * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef TCG_TARGET_MIPS  +#define TCG_TARGET_MIPS 1 + +#define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 +#define TCG_TARGET_NB_REGS 32 + +typedef enum { +    TCG_REG_ZERO = 0, +    TCG_REG_AT, +    TCG_REG_V0, +    TCG_REG_V1, +    TCG_REG_A0, +    TCG_REG_A1, +    TCG_REG_A2, +    TCG_REG_A3, +    TCG_REG_T0, +    TCG_REG_T1, +    TCG_REG_T2, +    TCG_REG_T3, +    TCG_REG_T4, +    TCG_REG_T5, +    TCG_REG_T6, +    TCG_REG_T7, +    TCG_REG_S0, +    TCG_REG_S1, +    TCG_REG_S2, +    TCG_REG_S3, +    TCG_REG_S4, +    TCG_REG_S5, +    TCG_REG_S6, +    TCG_REG_S7, +    TCG_REG_T8, +    TCG_REG_T9, +    TCG_REG_K0, +    TCG_REG_K1, +    TCG_REG_GP, +    TCG_REG_SP, +    TCG_REG_S8, +    TCG_REG_RA, + +    TCG_REG_CALL_STACK = TCG_REG_SP, +    TCG_AREG0 = TCG_REG_S0, +} TCGReg; + +/* used for function call generation */ +#define TCG_TARGET_STACK_ALIGN 8 +#define TCG_TARGET_CALL_STACK_OFFSET 16 +#define TCG_TARGET_CALL_ALIGN_ARGS 1 + +/* MOVN/MOVZ instructions detection */ +#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ +    defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \ +    defined(_MIPS_ARCH_MIPS4) +#define use_movnz_instructions  1 +#else +extern bool use_movnz_instructions; +#endif + +/* MIPS32 instruction set detection */ +#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1) +#define use_mips32_instructions  1 +#else +extern bool use_mips32_instructions; +#endif + +/* MIPS32R2 instruction set detection */ +#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) +#define use_mips32r2_instructions  1 +#else +extern bool use_mips32r2_instructions; +#endif + +/* optional instructions */ +#define TCG_TARGET_HAS_div_i32          1 +#define TCG_TARGET_HAS_rem_i32          1 +#define TCG_TARGET_HAS_not_i32          1 +#define TCG_TARGET_HAS_nor_i32          1 +#define TCG_TARGET_HAS_andc_i32         0 +#define TCG_TARGET_HAS_orc_i32          0 +#define TCG_TARGET_HAS_eqv_i32          0 +#define TCG_TARGET_HAS_nand_i32         0 +#define TCG_TARGET_HAS_mulu2_i32        1 +#define TCG_TARGET_HAS_muls2_i32        1 +#define TCG_TARGET_HAS_muluh_i32        1 +#define TCG_TARGET_HAS_mulsh_i32        1 + +/* optional instructions detected at runtime */ +#define TCG_TARGET_HAS_movcond_i32      use_movnz_instructions +#define TCG_TARGET_HAS_bswap16_i32      use_mips32r2_instructions +#define TCG_TARGET_HAS_bswap32_i32      use_mips32r2_instructions +#define TCG_TARGET_HAS_deposit_i32      use_mips32r2_instructions +#define TCG_TARGET_HAS_ext8s_i32        use_mips32r2_instructions +#define TCG_TARGET_HAS_ext16s_i32       use_mips32r2_instructions +#define TCG_TARGET_HAS_rot_i32          use_mips32r2_instructions + +/* optional instructions automatically implemented */ +#define TCG_TARGET_HAS_neg_i32          0 /* sub  rd, zero, rt   */ +#define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */ +#define TCG_TARGET_HAS_ext16u_i32       0 /* andi rt, rs, 0xffff */ + +#ifdef __OpenBSD__ +#include <machine/sysarch.h> +#else +#include <sys/cachectl.h> +#endif + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +    cacheflush ((void *)start, stop-start, ICACHE); +} + +#endif diff --git a/tcg/optimize.c b/tcg/optimize.c new file mode 100644 index 00000000..18283cfd --- /dev/null +++ b/tcg/optimize.c @@ -0,0 +1,1330 @@ +/* + * Optimizations for Tiny Code Generator for QEMU + * + * Copyright (c) 2010 Samsung Electronics. + * Contributed by Kirill Batuzov <batuzovk@ispras.ru> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "config.h" + +#include <stdlib.h> +#include <stdio.h> + +#include "qemu-common.h" +#include "tcg-op.h" + +#define CASE_OP_32_64(x)                        \ +        glue(glue(case INDEX_op_, x), _i32):    \ +        glue(glue(case INDEX_op_, x), _i64) + +typedef enum { +    TCG_TEMP_UNDEF = 0, +    TCG_TEMP_CONST, +    TCG_TEMP_COPY, +} tcg_temp_state; + +struct tcg_temp_info { +    tcg_temp_state state; +    uint16_t prev_copy; +    uint16_t next_copy; +    tcg_target_ulong val; +    tcg_target_ulong mask; +}; + +static struct tcg_temp_info temps[TCG_MAX_TEMPS]; + +/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove +   the copy flag from the left temp.  */ +static void reset_temp(TCGArg temp) +{ +    if (temps[temp].state == TCG_TEMP_COPY) { +        if (temps[temp].prev_copy == temps[temp].next_copy) { +            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; +        } else { +            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; +            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; +        } +    } +    temps[temp].state = TCG_TEMP_UNDEF; +    temps[temp].mask = -1; +} + +static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, +                                TCGOpcode opc, int nargs) +{ +    int oi = s->gen_next_op_idx; +    int pi = s->gen_next_parm_idx; +    int prev = old_op->prev; +    int next = old_op - s->gen_op_buf; +    TCGOp *new_op; + +    tcg_debug_assert(oi < OPC_BUF_SIZE); +    tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); +    s->gen_next_op_idx = oi + 1; +    s->gen_next_parm_idx = pi + nargs; + +    new_op = &s->gen_op_buf[oi]; +    *new_op = (TCGOp){ +        .opc = opc, +        .args = pi, +        .prev = prev, +        .next = next +    }; +    if (prev >= 0) { +        s->gen_op_buf[prev].next = oi; +    } else { +        s->gen_first_op_idx = oi; +    } +    old_op->prev = oi; + +    return new_op; +} + +/* Reset all temporaries, given that there are NB_TEMPS of them.  */ +static void reset_all_temps(int nb_temps) +{ +    int i; +    for (i = 0; i < nb_temps; i++) { +        temps[i].state = TCG_TEMP_UNDEF; +        temps[i].mask = -1; +    } +} + +static int op_bits(TCGOpcode op) +{ +    const TCGOpDef *def = &tcg_op_defs[op]; +    return def->flags & TCG_OPF_64BIT ? 64 : 32; +} + +static TCGOpcode op_to_mov(TCGOpcode op) +{ +    switch (op_bits(op)) { +    case 32: +        return INDEX_op_mov_i32; +    case 64: +        return INDEX_op_mov_i64; +    default: +        fprintf(stderr, "op_to_mov: unexpected return value of " +                "function op_bits.\n"); +        tcg_abort(); +    } +} + +static TCGOpcode op_to_movi(TCGOpcode op) +{ +    switch (op_bits(op)) { +    case 32: +        return INDEX_op_movi_i32; +    case 64: +        return INDEX_op_movi_i64; +    default: +        fprintf(stderr, "op_to_movi: unexpected return value of " +                "function op_bits.\n"); +        tcg_abort(); +    } +} + +static TCGArg find_better_copy(TCGContext *s, TCGArg temp) +{ +    TCGArg i; + +    /* If this is already a global, we can't do better. */ +    if (temp < s->nb_globals) { +        return temp; +    } + +    /* Search for a global first. */ +    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { +        if (i < s->nb_globals) { +            return i; +        } +    } + +    /* If it is a temp, search for a temp local. */ +    if (!s->temps[temp].temp_local) { +        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { +            if (s->temps[i].temp_local) { +                return i; +            } +        } +    } + +    /* Failure to find a better representation, return the same temp. */ +    return temp; +} + +static bool temps_are_copies(TCGArg arg1, TCGArg arg2) +{ +    TCGArg i; + +    if (arg1 == arg2) { +        return true; +    } + +    if (temps[arg1].state != TCG_TEMP_COPY +        || temps[arg2].state != TCG_TEMP_COPY) { +        return false; +    } + +    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { +        if (i == arg2) { +            return true; +        } +    } + +    return false; +} + +static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, +                             TCGArg dst, TCGArg val) +{ +    TCGOpcode new_op = op_to_movi(op->opc); +    tcg_target_ulong mask; + +    op->opc = new_op; + +    reset_temp(dst); +    temps[dst].state = TCG_TEMP_CONST; +    temps[dst].val = val; +    mask = val; +    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { +        /* High bits of the destination are now garbage.  */ +        mask |= ~0xffffffffull; +    } +    temps[dst].mask = mask; + +    args[0] = dst; +    args[1] = val; +} + +static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, +                            TCGArg dst, TCGArg src) +{ +    if (temps_are_copies(dst, src)) { +        tcg_op_remove(s, op); +        return; +    } + +    if (temps[src].state == TCG_TEMP_CONST) { +        tcg_opt_gen_movi(s, op, args, dst, temps[src].val); +        return; +    } + +    TCGOpcode new_op = op_to_mov(op->opc); +    tcg_target_ulong mask; + +    op->opc = new_op; + +    reset_temp(dst); +    mask = temps[src].mask; +    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { +        /* High bits of the destination are now garbage.  */ +        mask |= ~0xffffffffull; +    } +    temps[dst].mask = mask; + +    assert(temps[src].state != TCG_TEMP_CONST); + +    if (s->temps[src].type == s->temps[dst].type) { +        if (temps[src].state != TCG_TEMP_COPY) { +            temps[src].state = TCG_TEMP_COPY; +            temps[src].next_copy = src; +            temps[src].prev_copy = src; +        } +        temps[dst].state = TCG_TEMP_COPY; +        temps[dst].next_copy = temps[src].next_copy; +        temps[dst].prev_copy = src; +        temps[temps[dst].next_copy].prev_copy = dst; +        temps[src].next_copy = dst; +    } + +    args[0] = dst; +    args[1] = src; +} + +static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) +{ +    uint64_t l64, h64; + +    switch (op) { +    CASE_OP_32_64(add): +        return x + y; + +    CASE_OP_32_64(sub): +        return x - y; + +    CASE_OP_32_64(mul): +        return x * y; + +    CASE_OP_32_64(and): +        return x & y; + +    CASE_OP_32_64(or): +        return x | y; + +    CASE_OP_32_64(xor): +        return x ^ y; + +    case INDEX_op_shl_i32: +        return (uint32_t)x << (y & 31); + +    case INDEX_op_shl_i64: +        return (uint64_t)x << (y & 63); + +    case INDEX_op_shr_i32: +        return (uint32_t)x >> (y & 31); + +    case INDEX_op_trunc_shr_i32: +    case INDEX_op_shr_i64: +        return (uint64_t)x >> (y & 63); + +    case INDEX_op_sar_i32: +        return (int32_t)x >> (y & 31); + +    case INDEX_op_sar_i64: +        return (int64_t)x >> (y & 63); + +    case INDEX_op_rotr_i32: +        return ror32(x, y & 31); + +    case INDEX_op_rotr_i64: +        return ror64(x, y & 63); + +    case INDEX_op_rotl_i32: +        return rol32(x, y & 31); + +    case INDEX_op_rotl_i64: +        return rol64(x, y & 63); + +    CASE_OP_32_64(not): +        return ~x; + +    CASE_OP_32_64(neg): +        return -x; + +    CASE_OP_32_64(andc): +        return x & ~y; + +    CASE_OP_32_64(orc): +        return x | ~y; + +    CASE_OP_32_64(eqv): +        return ~(x ^ y); + +    CASE_OP_32_64(nand): +        return ~(x & y); + +    CASE_OP_32_64(nor): +        return ~(x | y); + +    CASE_OP_32_64(ext8s): +        return (int8_t)x; + +    CASE_OP_32_64(ext16s): +        return (int16_t)x; + +    CASE_OP_32_64(ext8u): +        return (uint8_t)x; + +    CASE_OP_32_64(ext16u): +        return (uint16_t)x; + +    case INDEX_op_ext32s_i64: +        return (int32_t)x; + +    case INDEX_op_ext32u_i64: +        return (uint32_t)x; + +    case INDEX_op_muluh_i32: +        return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; +    case INDEX_op_mulsh_i32: +        return ((int64_t)(int32_t)x * (int32_t)y) >> 32; + +    case INDEX_op_muluh_i64: +        mulu64(&l64, &h64, x, y); +        return h64; +    case INDEX_op_mulsh_i64: +        muls64(&l64, &h64, x, y); +        return h64; + +    case INDEX_op_div_i32: +        /* Avoid crashing on divide by zero, otherwise undefined.  */ +        return (int32_t)x / ((int32_t)y ? : 1); +    case INDEX_op_divu_i32: +        return (uint32_t)x / ((uint32_t)y ? : 1); +    case INDEX_op_div_i64: +        return (int64_t)x / ((int64_t)y ? : 1); +    case INDEX_op_divu_i64: +        return (uint64_t)x / ((uint64_t)y ? : 1); + +    case INDEX_op_rem_i32: +        return (int32_t)x % ((int32_t)y ? : 1); +    case INDEX_op_remu_i32: +        return (uint32_t)x % ((uint32_t)y ? : 1); +    case INDEX_op_rem_i64: +        return (int64_t)x % ((int64_t)y ? : 1); +    case INDEX_op_remu_i64: +        return (uint64_t)x % ((uint64_t)y ? : 1); + +    default: +        fprintf(stderr, +                "Unrecognized operation %d in do_constant_folding.\n", op); +        tcg_abort(); +    } +} + +static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) +{ +    TCGArg res = do_constant_folding_2(op, x, y); +    if (op_bits(op) == 32) { +        res &= 0xffffffff; +    } +    return res; +} + +static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) +{ +    switch (c) { +    case TCG_COND_EQ: +        return x == y; +    case TCG_COND_NE: +        return x != y; +    case TCG_COND_LT: +        return (int32_t)x < (int32_t)y; +    case TCG_COND_GE: +        return (int32_t)x >= (int32_t)y; +    case TCG_COND_LE: +        return (int32_t)x <= (int32_t)y; +    case TCG_COND_GT: +        return (int32_t)x > (int32_t)y; +    case TCG_COND_LTU: +        return x < y; +    case TCG_COND_GEU: +        return x >= y; +    case TCG_COND_LEU: +        return x <= y; +    case TCG_COND_GTU: +        return x > y; +    default: +        tcg_abort(); +    } +} + +static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) +{ +    switch (c) { +    case TCG_COND_EQ: +        return x == y; +    case TCG_COND_NE: +        return x != y; +    case TCG_COND_LT: +        return (int64_t)x < (int64_t)y; +    case TCG_COND_GE: +        return (int64_t)x >= (int64_t)y; +    case TCG_COND_LE: +        return (int64_t)x <= (int64_t)y; +    case TCG_COND_GT: +        return (int64_t)x > (int64_t)y; +    case TCG_COND_LTU: +        return x < y; +    case TCG_COND_GEU: +        return x >= y; +    case TCG_COND_LEU: +        return x <= y; +    case TCG_COND_GTU: +        return x > y; +    default: +        tcg_abort(); +    } +} + +static bool do_constant_folding_cond_eq(TCGCond c) +{ +    switch (c) { +    case TCG_COND_GT: +    case TCG_COND_LTU: +    case TCG_COND_LT: +    case TCG_COND_GTU: +    case TCG_COND_NE: +        return 0; +    case TCG_COND_GE: +    case TCG_COND_GEU: +    case TCG_COND_LE: +    case TCG_COND_LEU: +    case TCG_COND_EQ: +        return 1; +    default: +        tcg_abort(); +    } +} + +/* Return 2 if the condition can't be simplified, and the result +   of the condition (0 or 1) if it can */ +static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, +                                       TCGArg y, TCGCond c) +{ +    if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { +        switch (op_bits(op)) { +        case 32: +            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); +        case 64: +            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); +        default: +            tcg_abort(); +        } +    } else if (temps_are_copies(x, y)) { +        return do_constant_folding_cond_eq(c); +    } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { +        switch (c) { +        case TCG_COND_LTU: +            return 0; +        case TCG_COND_GEU: +            return 1; +        default: +            return 2; +        } +    } else { +        return 2; +    } +} + +/* Return 2 if the condition can't be simplified, and the result +   of the condition (0 or 1) if it can */ +static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) +{ +    TCGArg al = p1[0], ah = p1[1]; +    TCGArg bl = p2[0], bh = p2[1]; + +    if (temps[bl].state == TCG_TEMP_CONST +        && temps[bh].state == TCG_TEMP_CONST) { +        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; + +        if (temps[al].state == TCG_TEMP_CONST +            && temps[ah].state == TCG_TEMP_CONST) { +            uint64_t a; +            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; +            return do_constant_folding_cond_64(a, b, c); +        } +        if (b == 0) { +            switch (c) { +            case TCG_COND_LTU: +                return 0; +            case TCG_COND_GEU: +                return 1; +            default: +                break; +            } +        } +    } +    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { +        return do_constant_folding_cond_eq(c); +    } +    return 2; +} + +static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) +{ +    TCGArg a1 = *p1, a2 = *p2; +    int sum = 0; +    sum += temps[a1].state == TCG_TEMP_CONST; +    sum -= temps[a2].state == TCG_TEMP_CONST; + +    /* Prefer the constant in second argument, and then the form +       op a, a, b, which is better handled on non-RISC hosts. */ +    if (sum > 0 || (sum == 0 && dest == a2)) { +        *p1 = a2; +        *p2 = a1; +        return true; +    } +    return false; +} + +static bool swap_commutative2(TCGArg *p1, TCGArg *p2) +{ +    int sum = 0; +    sum += temps[p1[0]].state == TCG_TEMP_CONST; +    sum += temps[p1[1]].state == TCG_TEMP_CONST; +    sum -= temps[p2[0]].state == TCG_TEMP_CONST; +    sum -= temps[p2[1]].state == TCG_TEMP_CONST; +    if (sum > 0) { +        TCGArg t; +        t = p1[0], p1[0] = p2[0], p2[0] = t; +        t = p1[1], p1[1] = p2[1], p2[1] = t; +        return true; +    } +    return false; +} + +/* Propagate constants and copies, fold constant expressions. */ +void tcg_optimize(TCGContext *s) +{ +    int oi, oi_next, nb_temps, nb_globals; + +    /* Array VALS has an element for each temp. +       If this temp holds a constant then its value is kept in VALS' element. +       If this temp is a copy of other ones then the other copies are +       available through the doubly linked circular list. */ + +    nb_temps = s->nb_temps; +    nb_globals = s->nb_globals; +    reset_all_temps(nb_temps); + +    for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { +        tcg_target_ulong mask, partmask, affected; +        int nb_oargs, nb_iargs, i; +        TCGArg tmp; + +        TCGOp * const op = &s->gen_op_buf[oi]; +        TCGArg * const args = &s->gen_opparam_buf[op->args]; +        TCGOpcode opc = op->opc; +        const TCGOpDef *def = &tcg_op_defs[opc]; + +        oi_next = op->next; +        if (opc == INDEX_op_call) { +            nb_oargs = op->callo; +            nb_iargs = op->calli; +        } else { +            nb_oargs = def->nb_oargs; +            nb_iargs = def->nb_iargs; +        } + +        /* Do copy propagation */ +        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { +            if (temps[args[i]].state == TCG_TEMP_COPY) { +                args[i] = find_better_copy(s, args[i]); +            } +        } + +        /* For commutative operations make constant second argument */ +        switch (opc) { +        CASE_OP_32_64(add): +        CASE_OP_32_64(mul): +        CASE_OP_32_64(and): +        CASE_OP_32_64(or): +        CASE_OP_32_64(xor): +        CASE_OP_32_64(eqv): +        CASE_OP_32_64(nand): +        CASE_OP_32_64(nor): +        CASE_OP_32_64(muluh): +        CASE_OP_32_64(mulsh): +            swap_commutative(args[0], &args[1], &args[2]); +            break; +        CASE_OP_32_64(brcond): +            if (swap_commutative(-1, &args[0], &args[1])) { +                args[2] = tcg_swap_cond(args[2]); +            } +            break; +        CASE_OP_32_64(setcond): +            if (swap_commutative(args[0], &args[1], &args[2])) { +                args[3] = tcg_swap_cond(args[3]); +            } +            break; +        CASE_OP_32_64(movcond): +            if (swap_commutative(-1, &args[1], &args[2])) { +                args[5] = tcg_swap_cond(args[5]); +            } +            /* For movcond, we canonicalize the "false" input reg to match +               the destination reg so that the tcg backend can implement +               a "move if true" operation.  */ +            if (swap_commutative(args[0], &args[4], &args[3])) { +                args[5] = tcg_invert_cond(args[5]); +            } +            break; +        CASE_OP_32_64(add2): +            swap_commutative(args[0], &args[2], &args[4]); +            swap_commutative(args[1], &args[3], &args[5]); +            break; +        CASE_OP_32_64(mulu2): +        CASE_OP_32_64(muls2): +            swap_commutative(args[0], &args[2], &args[3]); +            break; +        case INDEX_op_brcond2_i32: +            if (swap_commutative2(&args[0], &args[2])) { +                args[4] = tcg_swap_cond(args[4]); +            } +            break; +        case INDEX_op_setcond2_i32: +            if (swap_commutative2(&args[1], &args[3])) { +                args[5] = tcg_swap_cond(args[5]); +            } +            break; +        default: +            break; +        } + +        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", +           and "sub r, 0, a => neg r, a" case.  */ +        switch (opc) { +        CASE_OP_32_64(shl): +        CASE_OP_32_64(shr): +        CASE_OP_32_64(sar): +        CASE_OP_32_64(rotl): +        CASE_OP_32_64(rotr): +            if (temps[args[1]].state == TCG_TEMP_CONST +                && temps[args[1]].val == 0) { +                tcg_opt_gen_movi(s, op, args, args[0], 0); +                continue; +            } +            break; +        CASE_OP_32_64(sub): +            { +                TCGOpcode neg_op; +                bool have_neg; + +                if (temps[args[2]].state == TCG_TEMP_CONST) { +                    /* Proceed with possible constant folding. */ +                    break; +                } +                if (opc == INDEX_op_sub_i32) { +                    neg_op = INDEX_op_neg_i32; +                    have_neg = TCG_TARGET_HAS_neg_i32; +                } else { +                    neg_op = INDEX_op_neg_i64; +                    have_neg = TCG_TARGET_HAS_neg_i64; +                } +                if (!have_neg) { +                    break; +                } +                if (temps[args[1]].state == TCG_TEMP_CONST +                    && temps[args[1]].val == 0) { +                    op->opc = neg_op; +                    reset_temp(args[0]); +                    args[1] = args[2]; +                    continue; +                } +            } +            break; +        CASE_OP_32_64(xor): +        CASE_OP_32_64(nand): +            if (temps[args[1]].state != TCG_TEMP_CONST +                && temps[args[2]].state == TCG_TEMP_CONST +                && temps[args[2]].val == -1) { +                i = 1; +                goto try_not; +            } +            break; +        CASE_OP_32_64(nor): +            if (temps[args[1]].state != TCG_TEMP_CONST +                && temps[args[2]].state == TCG_TEMP_CONST +                && temps[args[2]].val == 0) { +                i = 1; +                goto try_not; +            } +            break; +        CASE_OP_32_64(andc): +            if (temps[args[2]].state != TCG_TEMP_CONST +                && temps[args[1]].state == TCG_TEMP_CONST +                && temps[args[1]].val == -1) { +                i = 2; +                goto try_not; +            } +            break; +        CASE_OP_32_64(orc): +        CASE_OP_32_64(eqv): +            if (temps[args[2]].state != TCG_TEMP_CONST +                && temps[args[1]].state == TCG_TEMP_CONST +                && temps[args[1]].val == 0) { +                i = 2; +                goto try_not; +            } +            break; +        try_not: +            { +                TCGOpcode not_op; +                bool have_not; + +                if (def->flags & TCG_OPF_64BIT) { +                    not_op = INDEX_op_not_i64; +                    have_not = TCG_TARGET_HAS_not_i64; +                } else { +                    not_op = INDEX_op_not_i32; +                    have_not = TCG_TARGET_HAS_not_i32; +                } +                if (!have_not) { +                    break; +                } +                op->opc = not_op; +                reset_temp(args[0]); +                args[1] = args[i]; +                continue; +            } +        default: +            break; +        } + +        /* Simplify expression for "op r, a, const => mov r, a" cases */ +        switch (opc) { +        CASE_OP_32_64(add): +        CASE_OP_32_64(sub): +        CASE_OP_32_64(shl): +        CASE_OP_32_64(shr): +        CASE_OP_32_64(sar): +        CASE_OP_32_64(rotl): +        CASE_OP_32_64(rotr): +        CASE_OP_32_64(or): +        CASE_OP_32_64(xor): +        CASE_OP_32_64(andc): +            if (temps[args[1]].state != TCG_TEMP_CONST +                && temps[args[2]].state == TCG_TEMP_CONST +                && temps[args[2]].val == 0) { +                tcg_opt_gen_mov(s, op, args, args[0], args[1]); +                continue; +            } +            break; +        CASE_OP_32_64(and): +        CASE_OP_32_64(orc): +        CASE_OP_32_64(eqv): +            if (temps[args[1]].state != TCG_TEMP_CONST +                && temps[args[2]].state == TCG_TEMP_CONST +                && temps[args[2]].val == -1) { +                tcg_opt_gen_mov(s, op, args, args[0], args[1]); +                continue; +            } +            break; +        default: +            break; +        } + +        /* Simplify using known-zero bits. Currently only ops with a single +           output argument is supported. */ +        mask = -1; +        affected = -1; +        switch (opc) { +        CASE_OP_32_64(ext8s): +            if ((temps[args[1]].mask & 0x80) != 0) { +                break; +            } +        CASE_OP_32_64(ext8u): +            mask = 0xff; +            goto and_const; +        CASE_OP_32_64(ext16s): +            if ((temps[args[1]].mask & 0x8000) != 0) { +                break; +            } +        CASE_OP_32_64(ext16u): +            mask = 0xffff; +            goto and_const; +        case INDEX_op_ext32s_i64: +            if ((temps[args[1]].mask & 0x80000000) != 0) { +                break; +            } +        case INDEX_op_ext32u_i64: +            mask = 0xffffffffU; +            goto and_const; + +        CASE_OP_32_64(and): +            mask = temps[args[2]].mask; +            if (temps[args[2]].state == TCG_TEMP_CONST) { +        and_const: +                affected = temps[args[1]].mask & ~mask; +            } +            mask = temps[args[1]].mask & mask; +            break; + +        CASE_OP_32_64(andc): +            /* Known-zeros does not imply known-ones.  Therefore unless +               args[2] is constant, we can't infer anything from it.  */ +            if (temps[args[2]].state == TCG_TEMP_CONST) { +                mask = ~temps[args[2]].mask; +                goto and_const; +            } +            /* But we certainly know nothing outside args[1] may be set. */ +            mask = temps[args[1]].mask; +            break; + +        case INDEX_op_sar_i32: +            if (temps[args[2]].state == TCG_TEMP_CONST) { +                tmp = temps[args[2]].val & 31; +                mask = (int32_t)temps[args[1]].mask >> tmp; +            } +            break; +        case INDEX_op_sar_i64: +            if (temps[args[2]].state == TCG_TEMP_CONST) { +                tmp = temps[args[2]].val & 63; +                mask = (int64_t)temps[args[1]].mask >> tmp; +            } +            break; + +        case INDEX_op_shr_i32: +            if (temps[args[2]].state == TCG_TEMP_CONST) { +                tmp = temps[args[2]].val & 31; +                mask = (uint32_t)temps[args[1]].mask >> tmp; +            } +            break; +        case INDEX_op_shr_i64: +            if (temps[args[2]].state == TCG_TEMP_CONST) { +                tmp = temps[args[2]].val & 63; +                mask = (uint64_t)temps[args[1]].mask >> tmp; +            } +            break; + +        case INDEX_op_trunc_shr_i32: +            mask = (uint64_t)temps[args[1]].mask >> args[2]; +            break; + +        CASE_OP_32_64(shl): +            if (temps[args[2]].state == TCG_TEMP_CONST) { +                tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); +                mask = temps[args[1]].mask << tmp; +            } +            break; + +        CASE_OP_32_64(neg): +            /* Set to 1 all bits to the left of the rightmost.  */ +            mask = -(temps[args[1]].mask & -temps[args[1]].mask); +            break; + +        CASE_OP_32_64(deposit): +            mask = deposit64(temps[args[1]].mask, args[3], args[4], +                             temps[args[2]].mask); +            break; + +        CASE_OP_32_64(or): +        CASE_OP_32_64(xor): +            mask = temps[args[1]].mask | temps[args[2]].mask; +            break; + +        CASE_OP_32_64(setcond): +        case INDEX_op_setcond2_i32: +            mask = 1; +            break; + +        CASE_OP_32_64(movcond): +            mask = temps[args[3]].mask | temps[args[4]].mask; +            break; + +        CASE_OP_32_64(ld8u): +            mask = 0xff; +            break; +        CASE_OP_32_64(ld16u): +            mask = 0xffff; +            break; +        case INDEX_op_ld32u_i64: +            mask = 0xffffffffu; +            break; + +        CASE_OP_32_64(qemu_ld): +            { +                TCGMemOpIdx oi = args[nb_oargs + nb_iargs]; +                TCGMemOp mop = get_memop(oi); +                if (!(mop & MO_SIGN)) { +                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; +                } +            } +            break; + +        default: +            break; +        } + +        /* 32-bit ops generate 32-bit results.  For the result is zero test +           below, we can ignore high bits, but for further optimizations we +           need to record that the high bits contain garbage.  */ +        partmask = mask; +        if (!(def->flags & TCG_OPF_64BIT)) { +            mask |= ~(tcg_target_ulong)0xffffffffu; +            partmask &= 0xffffffffu; +            affected &= 0xffffffffu; +        } + +        if (partmask == 0) { +            assert(nb_oargs == 1); +            tcg_opt_gen_movi(s, op, args, args[0], 0); +            continue; +        } +        if (affected == 0) { +            assert(nb_oargs == 1); +            tcg_opt_gen_mov(s, op, args, args[0], args[1]); +            continue; +        } + +        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ +        switch (opc) { +        CASE_OP_32_64(and): +        CASE_OP_32_64(mul): +        CASE_OP_32_64(muluh): +        CASE_OP_32_64(mulsh): +            if ((temps[args[2]].state == TCG_TEMP_CONST +                && temps[args[2]].val == 0)) { +                tcg_opt_gen_movi(s, op, args, args[0], 0); +                continue; +            } +            break; +        default: +            break; +        } + +        /* Simplify expression for "op r, a, a => mov r, a" cases */ +        switch (opc) { +        CASE_OP_32_64(or): +        CASE_OP_32_64(and): +            if (temps_are_copies(args[1], args[2])) { +                tcg_opt_gen_mov(s, op, args, args[0], args[1]); +                continue; +            } +            break; +        default: +            break; +        } + +        /* Simplify expression for "op r, a, a => movi r, 0" cases */ +        switch (opc) { +        CASE_OP_32_64(andc): +        CASE_OP_32_64(sub): +        CASE_OP_32_64(xor): +            if (temps_are_copies(args[1], args[2])) { +                tcg_opt_gen_movi(s, op, args, args[0], 0); +                continue; +            } +            break; +        default: +            break; +        } + +        /* Propagate constants through copy operations and do constant +           folding.  Constants will be substituted to arguments by register +           allocator where needed and possible.  Also detect copies. */ +        switch (opc) { +        CASE_OP_32_64(mov): +            tcg_opt_gen_mov(s, op, args, args[0], args[1]); +            break; +        CASE_OP_32_64(movi): +            tcg_opt_gen_movi(s, op, args, args[0], args[1]); +            break; + +        CASE_OP_32_64(not): +        CASE_OP_32_64(neg): +        CASE_OP_32_64(ext8s): +        CASE_OP_32_64(ext8u): +        CASE_OP_32_64(ext16s): +        CASE_OP_32_64(ext16u): +        case INDEX_op_ext32s_i64: +        case INDEX_op_ext32u_i64: +            if (temps[args[1]].state == TCG_TEMP_CONST) { +                tmp = do_constant_folding(opc, temps[args[1]].val, 0); +                tcg_opt_gen_movi(s, op, args, args[0], tmp); +                break; +            } +            goto do_default; + +        case INDEX_op_trunc_shr_i32: +            if (temps[args[1]].state == TCG_TEMP_CONST) { +                tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); +                tcg_opt_gen_movi(s, op, args, args[0], tmp); +                break; +            } +            goto do_default; + +        CASE_OP_32_64(add): +        CASE_OP_32_64(sub): +        CASE_OP_32_64(mul): +        CASE_OP_32_64(or): +        CASE_OP_32_64(and): +        CASE_OP_32_64(xor): +        CASE_OP_32_64(shl): +        CASE_OP_32_64(shr): +        CASE_OP_32_64(sar): +        CASE_OP_32_64(rotl): +        CASE_OP_32_64(rotr): +        CASE_OP_32_64(andc): +        CASE_OP_32_64(orc): +        CASE_OP_32_64(eqv): +        CASE_OP_32_64(nand): +        CASE_OP_32_64(nor): +        CASE_OP_32_64(muluh): +        CASE_OP_32_64(mulsh): +        CASE_OP_32_64(div): +        CASE_OP_32_64(divu): +        CASE_OP_32_64(rem): +        CASE_OP_32_64(remu): +            if (temps[args[1]].state == TCG_TEMP_CONST +                && temps[args[2]].state == TCG_TEMP_CONST) { +                tmp = do_constant_folding(opc, temps[args[1]].val, +                                          temps[args[2]].val); +                tcg_opt_gen_movi(s, op, args, args[0], tmp); +                break; +            } +            goto do_default; + +        CASE_OP_32_64(deposit): +            if (temps[args[1]].state == TCG_TEMP_CONST +                && temps[args[2]].state == TCG_TEMP_CONST) { +                tmp = deposit64(temps[args[1]].val, args[3], args[4], +                                temps[args[2]].val); +                tcg_opt_gen_movi(s, op, args, args[0], tmp); +                break; +            } +            goto do_default; + +        CASE_OP_32_64(setcond): +            tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); +            if (tmp != 2) { +                tcg_opt_gen_movi(s, op, args, args[0], tmp); +                break; +            } +            goto do_default; + +        CASE_OP_32_64(brcond): +            tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); +            if (tmp != 2) { +                if (tmp) { +                    reset_all_temps(nb_temps); +                    op->opc = INDEX_op_br; +                    args[0] = args[3]; +                } else { +                    tcg_op_remove(s, op); +                } +                break; +            } +            goto do_default; + +        CASE_OP_32_64(movcond): +            tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); +            if (tmp != 2) { +                tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); +                break; +            } +            goto do_default; + +        case INDEX_op_add2_i32: +        case INDEX_op_sub2_i32: +            if (temps[args[2]].state == TCG_TEMP_CONST +                && temps[args[3]].state == TCG_TEMP_CONST +                && temps[args[4]].state == TCG_TEMP_CONST +                && temps[args[5]].state == TCG_TEMP_CONST) { +                uint32_t al = temps[args[2]].val; +                uint32_t ah = temps[args[3]].val; +                uint32_t bl = temps[args[4]].val; +                uint32_t bh = temps[args[5]].val; +                uint64_t a = ((uint64_t)ah << 32) | al; +                uint64_t b = ((uint64_t)bh << 32) | bl; +                TCGArg rl, rh; +                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); +                TCGArg *args2 = &s->gen_opparam_buf[op2->args]; + +                if (opc == INDEX_op_add2_i32) { +                    a += b; +                } else { +                    a -= b; +                } + +                rl = args[0]; +                rh = args[1]; +                tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a); +                tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32)); + +                /* We've done all we need to do with the movi.  Skip it.  */ +                oi_next = op2->next; +                break; +            } +            goto do_default; + +        case INDEX_op_mulu2_i32: +            if (temps[args[2]].state == TCG_TEMP_CONST +                && temps[args[3]].state == TCG_TEMP_CONST) { +                uint32_t a = temps[args[2]].val; +                uint32_t b = temps[args[3]].val; +                uint64_t r = (uint64_t)a * b; +                TCGArg rl, rh; +                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); +                TCGArg *args2 = &s->gen_opparam_buf[op2->args]; + +                rl = args[0]; +                rh = args[1]; +                tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r); +                tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32)); + +                /* We've done all we need to do with the movi.  Skip it.  */ +                oi_next = op2->next; +                break; +            } +            goto do_default; + +        case INDEX_op_brcond2_i32: +            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); +            if (tmp != 2) { +                if (tmp) { +            do_brcond_true: +                    reset_all_temps(nb_temps); +                    op->opc = INDEX_op_br; +                    args[0] = args[5]; +                } else { +            do_brcond_false: +                    tcg_op_remove(s, op); +                } +            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) +                       && temps[args[2]].state == TCG_TEMP_CONST +                       && temps[args[3]].state == TCG_TEMP_CONST +                       && temps[args[2]].val == 0 +                       && temps[args[3]].val == 0) { +                /* Simplify LT/GE comparisons vs zero to a single compare +                   vs the high word of the input.  */ +            do_brcond_high: +                reset_all_temps(nb_temps); +                op->opc = INDEX_op_brcond_i32; +                args[0] = args[1]; +                args[1] = args[3]; +                args[2] = args[4]; +                args[3] = args[5]; +            } else if (args[4] == TCG_COND_EQ) { +                /* Simplify EQ comparisons where one of the pairs +                   can be simplified.  */ +                tmp = do_constant_folding_cond(INDEX_op_brcond_i32, +                                               args[0], args[2], TCG_COND_EQ); +                if (tmp == 0) { +                    goto do_brcond_false; +                } else if (tmp == 1) { +                    goto do_brcond_high; +                } +                tmp = do_constant_folding_cond(INDEX_op_brcond_i32, +                                               args[1], args[3], TCG_COND_EQ); +                if (tmp == 0) { +                    goto do_brcond_false; +                } else if (tmp != 1) { +                    goto do_default; +                } +            do_brcond_low: +                reset_all_temps(nb_temps); +                op->opc = INDEX_op_brcond_i32; +                args[1] = args[2]; +                args[2] = args[4]; +                args[3] = args[5]; +            } else if (args[4] == TCG_COND_NE) { +                /* Simplify NE comparisons where one of the pairs +                   can be simplified.  */ +                tmp = do_constant_folding_cond(INDEX_op_brcond_i32, +                                               args[0], args[2], TCG_COND_NE); +                if (tmp == 0) { +                    goto do_brcond_high; +                } else if (tmp == 1) { +                    goto do_brcond_true; +                } +                tmp = do_constant_folding_cond(INDEX_op_brcond_i32, +                                               args[1], args[3], TCG_COND_NE); +                if (tmp == 0) { +                    goto do_brcond_low; +                } else if (tmp == 1) { +                    goto do_brcond_true; +                } +                goto do_default; +            } else { +                goto do_default; +            } +            break; + +        case INDEX_op_setcond2_i32: +            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); +            if (tmp != 2) { +            do_setcond_const: +                tcg_opt_gen_movi(s, op, args, args[0], tmp); +            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) +                       && temps[args[3]].state == TCG_TEMP_CONST +                       && temps[args[4]].state == TCG_TEMP_CONST +                       && temps[args[3]].val == 0 +                       && temps[args[4]].val == 0) { +                /* Simplify LT/GE comparisons vs zero to a single compare +                   vs the high word of the input.  */ +            do_setcond_high: +                reset_temp(args[0]); +                temps[args[0]].mask = 1; +                op->opc = INDEX_op_setcond_i32; +                args[1] = args[2]; +                args[2] = args[4]; +                args[3] = args[5]; +            } else if (args[5] == TCG_COND_EQ) { +                /* Simplify EQ comparisons where one of the pairs +                   can be simplified.  */ +                tmp = do_constant_folding_cond(INDEX_op_setcond_i32, +                                               args[1], args[3], TCG_COND_EQ); +                if (tmp == 0) { +                    goto do_setcond_const; +                } else if (tmp == 1) { +                    goto do_setcond_high; +                } +                tmp = do_constant_folding_cond(INDEX_op_setcond_i32, +                                               args[2], args[4], TCG_COND_EQ); +                if (tmp == 0) { +                    goto do_setcond_high; +                } else if (tmp != 1) { +                    goto do_default; +                } +            do_setcond_low: +                reset_temp(args[0]); +                temps[args[0]].mask = 1; +                op->opc = INDEX_op_setcond_i32; +                args[2] = args[3]; +                args[3] = args[5]; +            } else if (args[5] == TCG_COND_NE) { +                /* Simplify NE comparisons where one of the pairs +                   can be simplified.  */ +                tmp = do_constant_folding_cond(INDEX_op_setcond_i32, +                                               args[1], args[3], TCG_COND_NE); +                if (tmp == 0) { +                    goto do_setcond_high; +                } else if (tmp == 1) { +                    goto do_setcond_const; +                } +                tmp = do_constant_folding_cond(INDEX_op_setcond_i32, +                                               args[2], args[4], TCG_COND_NE); +                if (tmp == 0) { +                    goto do_setcond_low; +                } else if (tmp == 1) { +                    goto do_setcond_const; +                } +                goto do_default; +            } else { +                goto do_default; +            } +            break; + +        case INDEX_op_call: +            if (!(args[nb_oargs + nb_iargs + 1] +                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { +                for (i = 0; i < nb_globals; i++) { +                    reset_temp(i); +                } +            } +            goto do_reset_output; + +        default: +        do_default: +            /* Default case: we know nothing about operation (or were unable +               to compute the operation result) so no propagation is done. +               We trash everything if the operation is the end of a basic +               block, otherwise we only trash the output args.  "mask" is +               the non-zero bits mask for the first output arg.  */ +            if (def->flags & TCG_OPF_BB_END) { +                reset_all_temps(nb_temps); +            } else { +        do_reset_output: +                for (i = 0; i < nb_oargs; i++) { +                    reset_temp(args[i]); +                    /* Save the corresponding known-zero bits mask for the +                       first output argument (only one supported so far). */ +                    if (i == 0) { +                        temps[args[i]].mask = mask; +                    } +                } +            } +            break; +        } +    } +} diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c new file mode 100644 index 00000000..2b6eafa0 --- /dev/null +++ b/tcg/ppc/tcg-target.c @@ -0,0 +1,2722 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg-be-ldst.h" + +#if defined _CALL_DARWIN || defined __APPLE__ +#define TCG_TARGET_CALL_DARWIN +#endif +#ifdef _CALL_SYSV +# define TCG_TARGET_CALL_ALIGN_ARGS   1 +#endif + +/* For some memory operations, we need a scratch that isn't R0.  For the AIX +   calling convention, we can re-use the TOC register since we'll be reloading +   it at every call.  Otherwise R12 will do nicely as neither a call-saved +   register nor a parameter register.  */ +#ifdef _CALL_AIX +# define TCG_REG_TMP1   TCG_REG_R2 +#else +# define TCG_REG_TMP1   TCG_REG_R12 +#endif + +/* For the 64-bit target, we don't like the 5 insn sequence needed to build +   full 64-bit addresses.  Better to have a base register to which we can +   apply a 32-bit displacement. + +   There are generally three items of interest: +   (1) helper functions in the main executable, +   (2) TranslationBlock data structures, +   (3) the return address in the epilogue. + +   For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer +   will be inside the main executable, and thus near enough to make a +   pointer to the epilogue be within 2GB of all helper functions. + +   For softmmu, we'll let the kernel choose the address of code_gen_buffer, +   and odds are it'll be somewhere close to the main malloc arena, and so +   a pointer to the epilogue will be within 2GB of the TranslationBlocks. + +   For --enable-pie, everything will be kinda near everything else, +   somewhere in high memory. + +   Thus we choose to keep the return address in a call-saved register.  */ +#define TCG_REG_RA     TCG_REG_R31 +#define USE_REG_RA     (TCG_TARGET_REG_BITS == 64) + +/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */ +#define SZP  ((int)sizeof(void *)) + +/* Shorthand for size of a register.  */ +#define SZR  (TCG_TARGET_REG_BITS / 8) + +#define TCG_CT_CONST_S16  0x100 +#define TCG_CT_CONST_U16  0x200 +#define TCG_CT_CONST_S32  0x400 +#define TCG_CT_CONST_U32  0x800 +#define TCG_CT_CONST_ZERO 0x1000 +#define TCG_CT_CONST_MONE 0x2000 + +static tcg_insn_unit *tb_ret_addr; + +#ifndef GUEST_BASE +#define GUEST_BASE 0 +#endif + +#include "elf.h" +static bool have_isa_2_06; +#define HAVE_ISA_2_06  have_isa_2_06 +#define HAVE_ISEL      have_isa_2_06 + +#ifdef CONFIG_USE_GUEST_BASE +#define TCG_GUEST_BASE_REG 30 +#else +#define TCG_GUEST_BASE_REG 0 +#endif + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +    "r0", +    "r1", +    "r2", +    "r3", +    "r4", +    "r5", +    "r6", +    "r7", +    "r8", +    "r9", +    "r10", +    "r11", +    "r12", +    "r13", +    "r14", +    "r15", +    "r16", +    "r17", +    "r18", +    "r19", +    "r20", +    "r21", +    "r22", +    "r23", +    "r24", +    "r25", +    "r26", +    "r27", +    "r28", +    "r29", +    "r30", +    "r31" +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { +    TCG_REG_R14,  /* call saved registers */ +    TCG_REG_R15, +    TCG_REG_R16, +    TCG_REG_R17, +    TCG_REG_R18, +    TCG_REG_R19, +    TCG_REG_R20, +    TCG_REG_R21, +    TCG_REG_R22, +    TCG_REG_R23, +    TCG_REG_R24, +    TCG_REG_R25, +    TCG_REG_R26, +    TCG_REG_R27, +    TCG_REG_R28, +    TCG_REG_R29, +    TCG_REG_R30, +    TCG_REG_R31, +    TCG_REG_R12,  /* call clobbered, non-arguments */ +    TCG_REG_R11, +    TCG_REG_R2, +    TCG_REG_R13, +    TCG_REG_R10,  /* call clobbered, arguments */ +    TCG_REG_R9, +    TCG_REG_R8, +    TCG_REG_R7, +    TCG_REG_R6, +    TCG_REG_R5, +    TCG_REG_R4, +    TCG_REG_R3, +}; + +static const int tcg_target_call_iarg_regs[] = { +    TCG_REG_R3, +    TCG_REG_R4, +    TCG_REG_R5, +    TCG_REG_R6, +    TCG_REG_R7, +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10 +}; + +static const int tcg_target_call_oarg_regs[] = { +    TCG_REG_R3, +    TCG_REG_R4 +}; + +static const int tcg_target_callee_save_regs[] = { +#ifdef TCG_TARGET_CALL_DARWIN +    TCG_REG_R11, +#endif +    TCG_REG_R14, +    TCG_REG_R15, +    TCG_REG_R16, +    TCG_REG_R17, +    TCG_REG_R18, +    TCG_REG_R19, +    TCG_REG_R20, +    TCG_REG_R21, +    TCG_REG_R22, +    TCG_REG_R23, +    TCG_REG_R24, +    TCG_REG_R25, +    TCG_REG_R26, +    TCG_REG_R27, /* currently used for the global env */ +    TCG_REG_R28, +    TCG_REG_R29, +    TCG_REG_R30, +    TCG_REG_R31 +}; + +static inline bool in_range_b(tcg_target_long target) +{ +    return target == sextract64(target, 0, 26); +} + +static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ +    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); +    assert(in_range_b(disp)); +    return disp & 0x3fffffc; +} + +static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) +{ +    *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target); +} + +static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) +{ +    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); +    assert(disp == (int16_t) disp); +    return disp & 0xfffc; +} + +static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) +{ +    *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); +} + +static inline void tcg_out_b_noaddr(TCGContext *s, int insn) +{ +    unsigned retrans = *s->code_ptr & 0x3fffffc; +    tcg_out32(s, insn | retrans); +} + +static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) +{ +    unsigned retrans = *s->code_ptr & 0xfffc; +    tcg_out32(s, insn | retrans); +} + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, +                        intptr_t value, intptr_t addend) +{ +    tcg_insn_unit *target = (tcg_insn_unit *)value; + +    assert(addend == 0); +    switch (type) { +    case R_PPC_REL14: +        reloc_pc14(code_ptr, target); +        break; +    case R_PPC_REL24: +        reloc_pc24(code_ptr, target); +        break; +    default: +        tcg_abort(); +    } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ +    const char *ct_str; + +    ct_str = *pct_str; +    switch (ct_str[0]) { +    case 'A': case 'B': case 'C': case 'D': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A'); +        break; +    case 'r': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, 0xffffffff); +        break; +    case 'L':                   /* qemu_ld constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, 0xffffffff); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#ifdef CONFIG_SOFTMMU +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); +#endif +        break; +    case 'S':                   /* qemu_st constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, 0xffffffff); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3); +#ifdef CONFIG_SOFTMMU +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6); +#endif +        break; +    case 'I': +        ct->ct |= TCG_CT_CONST_S16; +        break; +    case 'J': +        ct->ct |= TCG_CT_CONST_U16; +        break; +    case 'M': +        ct->ct |= TCG_CT_CONST_MONE; +        break; +    case 'T': +        ct->ct |= TCG_CT_CONST_S32; +        break; +    case 'U': +        ct->ct |= TCG_CT_CONST_U32; +        break; +    case 'Z': +        ct->ct |= TCG_CT_CONST_ZERO; +        break; +    default: +        return -1; +    } +    ct_str++; +    *pct_str = ct_str; +    return 0; +} + +/* test if a constant matches the constraint */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, +                                  const TCGArgConstraint *arg_ct) +{ +    int ct = arg_ct->ct; +    if (ct & TCG_CT_CONST) { +        return 1; +    } + +    /* The only 32-bit constraint we use aside from +       TCG_CT_CONST is TCG_CT_CONST_S16.  */ +    if (type == TCG_TYPE_I32) { +        val = (int32_t)val; +    } + +    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { +        return 1; +    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) { +        return 1; +    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { +        return 1; +    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { +        return 1; +    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) { +        return 1; +    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) { +        return 1; +    } +    return 0; +} + +#define OPCD(opc) ((opc)<<26) +#define XO19(opc) (OPCD(19)|((opc)<<1)) +#define MD30(opc) (OPCD(30)|((opc)<<2)) +#define MDS30(opc) (OPCD(30)|((opc)<<1)) +#define XO31(opc) (OPCD(31)|((opc)<<1)) +#define XO58(opc) (OPCD(58)|(opc)) +#define XO62(opc) (OPCD(62)|(opc)) + +#define B      OPCD( 18) +#define BC     OPCD( 16) +#define LBZ    OPCD( 34) +#define LHZ    OPCD( 40) +#define LHA    OPCD( 42) +#define LWZ    OPCD( 32) +#define STB    OPCD( 38) +#define STH    OPCD( 44) +#define STW    OPCD( 36) + +#define STD    XO62(  0) +#define STDU   XO62(  1) +#define STDX   XO31(149) + +#define LD     XO58(  0) +#define LDX    XO31( 21) +#define LDU    XO58(  1) +#define LWA    XO58(  2) +#define LWAX   XO31(341) + +#define ADDIC  OPCD( 12) +#define ADDI   OPCD( 14) +#define ADDIS  OPCD( 15) +#define ORI    OPCD( 24) +#define ORIS   OPCD( 25) +#define XORI   OPCD( 26) +#define XORIS  OPCD( 27) +#define ANDI   OPCD( 28) +#define ANDIS  OPCD( 29) +#define MULLI  OPCD(  7) +#define CMPLI  OPCD( 10) +#define CMPI   OPCD( 11) +#define SUBFIC OPCD( 8) + +#define LWZU   OPCD( 33) +#define STWU   OPCD( 37) + +#define RLWIMI OPCD( 20) +#define RLWINM OPCD( 21) +#define RLWNM  OPCD( 23) + +#define RLDICL MD30(  0) +#define RLDICR MD30(  1) +#define RLDIMI MD30(  3) +#define RLDCL  MDS30( 8) + +#define BCLR   XO19( 16) +#define BCCTR  XO19(528) +#define CRAND  XO19(257) +#define CRANDC XO19(129) +#define CRNAND XO19(225) +#define CROR   XO19(449) +#define CRNOR  XO19( 33) + +#define EXTSB  XO31(954) +#define EXTSH  XO31(922) +#define EXTSW  XO31(986) +#define ADD    XO31(266) +#define ADDE   XO31(138) +#define ADDME  XO31(234) +#define ADDZE  XO31(202) +#define ADDC   XO31( 10) +#define AND    XO31( 28) +#define SUBF   XO31( 40) +#define SUBFC  XO31(  8) +#define SUBFE  XO31(136) +#define SUBFME XO31(232) +#define SUBFZE XO31(200) +#define OR     XO31(444) +#define XOR    XO31(316) +#define MULLW  XO31(235) +#define MULHW  XO31( 75) +#define MULHWU XO31( 11) +#define DIVW   XO31(491) +#define DIVWU  XO31(459) +#define CMP    XO31(  0) +#define CMPL   XO31( 32) +#define LHBRX  XO31(790) +#define LWBRX  XO31(534) +#define LDBRX  XO31(532) +#define STHBRX XO31(918) +#define STWBRX XO31(662) +#define STDBRX XO31(660) +#define MFSPR  XO31(339) +#define MTSPR  XO31(467) +#define SRAWI  XO31(824) +#define NEG    XO31(104) +#define MFCR   XO31( 19) +#define MFOCRF (MFCR | (1u << 20)) +#define NOR    XO31(124) +#define CNTLZW XO31( 26) +#define CNTLZD XO31( 58) +#define ANDC   XO31( 60) +#define ORC    XO31(412) +#define EQV    XO31(284) +#define NAND   XO31(476) +#define ISEL   XO31( 15) + +#define MULLD  XO31(233) +#define MULHD  XO31( 73) +#define MULHDU XO31(  9) +#define DIVD   XO31(489) +#define DIVDU  XO31(457) + +#define LBZX   XO31( 87) +#define LHZX   XO31(279) +#define LHAX   XO31(343) +#define LWZX   XO31( 23) +#define STBX   XO31(215) +#define STHX   XO31(407) +#define STWX   XO31(151) + +#define SPR(a, b) ((((a)<<5)|(b))<<11) +#define LR     SPR(8, 0) +#define CTR    SPR(9, 0) + +#define SLW    XO31( 24) +#define SRW    XO31(536) +#define SRAW   XO31(792) + +#define SLD    XO31( 27) +#define SRD    XO31(539) +#define SRAD   XO31(794) +#define SRADI  XO31(413<<1) + +#define TW     XO31( 4) +#define TRAP   (TW | TO(31)) + +#define NOP    ORI  /* ori 0,0,0 */ + +#define RT(r) ((r)<<21) +#define RS(r) ((r)<<21) +#define RA(r) ((r)<<16) +#define RB(r) ((r)<<11) +#define TO(t) ((t)<<21) +#define SH(s) ((s)<<11) +#define MB(b) ((b)<<6) +#define ME(e) ((e)<<1) +#define BO(o) ((o)<<21) +#define MB64(b) ((b)<<5) +#define FXM(b) (1 << (19 - (b))) + +#define LK    1 + +#define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) +#define SAB(s, a, b) (RS(s) | RA(a) | RB(b)) +#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff)) +#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff)) + +#define BF(n)    ((n)<<23) +#define BI(n, c) (((c)+((n)*4))<<16) +#define BT(n, c) (((c)+((n)*4))<<21) +#define BA(n, c) (((c)+((n)*4))<<16) +#define BB(n, c) (((c)+((n)*4))<<11) +#define BC_(n, c) (((c)+((n)*4))<<6) + +#define BO_COND_TRUE  BO(12) +#define BO_COND_FALSE BO( 4) +#define BO_ALWAYS     BO(20) + +enum { +    CR_LT, +    CR_GT, +    CR_EQ, +    CR_SO +}; + +static const uint32_t tcg_to_bc[] = { +    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE, +    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE, +    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE, +    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE, +    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE, +    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE, +    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE, +    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE, +    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE, +    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE, +}; + +/* The low bit here is set if the RA and RB fields must be inverted.  */ +static const uint32_t tcg_to_isel[] = { +    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ), +    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1, +    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT), +    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1, +    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1, +    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT), +    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), +    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, +    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, +    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), +}; + +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, +                             TCGReg base, tcg_target_long offset); + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ +    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); +    if (ret != arg) { +        tcg_out32(s, OR | SAB(arg, ret, arg)); +    } +} + +static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs, +                               int sh, int mb) +{ +    assert(TCG_TARGET_REG_BITS == 64); +    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1); +    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f)); +    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb); +} + +static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs, +                               int sh, int mb, int me) +{ +    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me)); +} + +static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src) +{ +    tcg_out_rld(s, RLDICL, dst, src, 0, 32); +} + +static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ +    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c); +} + +static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ +    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c); +} + +static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ +    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31); +} + +static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c) +{ +    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c); +} + +static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg) +{ +    if (arg == (int16_t) arg) { +        tcg_out32(s, ADDI | TAI(ret, 0, arg)); +    } else { +        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16)); +        if (arg & 0xffff) { +            tcg_out32(s, ORI | SAI(ret, ret, arg)); +        } +    } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, +                         tcg_target_long arg) +{ +    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); +    if (type == TCG_TYPE_I32 || arg == (int32_t)arg) { +        tcg_out_movi32(s, ret, arg); +    } else if (arg == (uint32_t)arg && !(arg & 0x8000)) { +        tcg_out32(s, ADDI | TAI(ret, 0, arg)); +        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); +    } else { +        int32_t high; + +        if (USE_REG_RA) { +            intptr_t diff = arg - (intptr_t)tb_ret_addr; +            if (diff == (int32_t)diff) { +                tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff); +                return; +            } +        } + +        high = arg >> 31 >> 1; +        tcg_out_movi32(s, ret, high); +        if (high) { +            tcg_out_shli64(s, ret, ret, 32); +        } +        if (arg & 0xffff0000) { +            tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16)); +        } +        if (arg & 0xffff) { +            tcg_out32(s, ORI | SAI(ret, ret, arg)); +        } +    } +} + +static bool mask_operand(uint32_t c, int *mb, int *me) +{ +    uint32_t lsb, test; + +    /* Accept a bit pattern like: +           0....01....1 +           1....10....0 +           0..01..10..0 +       Keep track of the transitions.  */ +    if (c == 0 || c == -1) { +        return false; +    } +    test = c; +    lsb = test & -test; +    test += lsb; +    if (test & (test - 1)) { +        return false; +    } + +    *me = clz32(lsb); +    *mb = test ? clz32(test & -test) + 1 : 0; +    return true; +} + +static bool mask64_operand(uint64_t c, int *mb, int *me) +{ +    uint64_t lsb; + +    if (c == 0) { +        return false; +    } + +    lsb = c & -c; +    /* Accept 1..10..0.  */ +    if (c == -lsb) { +        *mb = 0; +        *me = clz64(lsb); +        return true; +    } +    /* Accept 0..01..1.  */ +    if (lsb == 1 && (c & (c + 1)) == 0) { +        *mb = clz64(c + 1) + 1; +        *me = 63; +        return true; +    } +    return false; +} + +static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ +    int mb, me; + +    if ((c & 0xffff) == c) { +        tcg_out32(s, ANDI | SAI(src, dst, c)); +        return; +    } else if ((c & 0xffff0000) == c) { +        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); +        return; +    } else if (mask_operand(c, &mb, &me)) { +        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me); +    } else { +        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c); +        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); +    } +} + +static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c) +{ +    int mb, me; + +    assert(TCG_TARGET_REG_BITS == 64); +    if ((c & 0xffff) == c) { +        tcg_out32(s, ANDI | SAI(src, dst, c)); +        return; +    } else if ((c & 0xffff0000) == c) { +        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16)); +        return; +    } else if (mask64_operand(c, &mb, &me)) { +        if (mb == 0) { +            tcg_out_rld(s, RLDICR, dst, src, 0, me); +        } else { +            tcg_out_rld(s, RLDICL, dst, src, 0, mb); +        } +    } else { +        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c); +        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0)); +    } +} + +static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c, +                           int op_lo, int op_hi) +{ +    if (c >> 16) { +        tcg_out32(s, op_hi | SAI(src, dst, c >> 16)); +        src = dst; +    } +    if (c & 0xffff) { +        tcg_out32(s, op_lo | SAI(src, dst, c)); +        src = dst; +    } +} + +static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ +    tcg_out_zori32(s, dst, src, c, ORI, ORIS); +} + +static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c) +{ +    tcg_out_zori32(s, dst, src, c, XORI, XORIS); +} + +static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target) +{ +    ptrdiff_t disp = tcg_pcrel_diff(s, target); +    if (in_range_b(disp)) { +        tcg_out32(s, B | (disp & 0x3fffffc) | mask); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target); +        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR); +        tcg_out32(s, BCCTR | BO_ALWAYS | mask); +    } +} + +static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, +                             TCGReg base, tcg_target_long offset) +{ +    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; +    bool is_store = false; +    TCGReg rs = TCG_REG_TMP1; + +    switch (opi) { +    case LD: case LWA: +        align = 3; +        /* FALLTHRU */ +    default: +        if (rt != TCG_REG_R0) { +            rs = rt; +            break; +        } +        break; +    case STD: +        align = 3; +        /* FALLTHRU */ +    case STB: case STH: case STW: +        is_store = true; +        break; +    } + +    /* For unaligned, or very large offsets, use the indexed form.  */ +    if (offset & align || offset != (int32_t)offset) { +        if (rs == base) { +            rs = TCG_REG_R0; +        } +        tcg_debug_assert(!is_store || rs != rt); +        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); +        tcg_out32(s, opx | TAB(rt, base, rs)); +        return; +    } + +    l0 = (int16_t)offset; +    offset = (offset - l0) >> 16; +    l1 = (int16_t)offset; + +    if (l1 < 0 && orig >= 0) { +        extra = 0x4000; +        l1 = (int16_t)(offset - 0x4000); +    } +    if (l1) { +        tcg_out32(s, ADDIS | TAI(rs, base, l1)); +        base = rs; +    } +    if (extra) { +        tcg_out32(s, ADDIS | TAI(rs, base, extra)); +        base = rs; +    } +    if (opi != ADDI || base != rt || l0 != 0) { +        tcg_out32(s, opi | TAI(rt, base, l0)); +    } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, +                              TCGReg arg1, intptr_t arg2) +{ +    int opi, opx; + +    assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); +    if (type == TCG_TYPE_I32) { +        opi = LWZ, opx = LWZX; +    } else { +        opi = LD, opx = LDX; +    } +    tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    int opi, opx; + +    assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); +    if (type == TCG_TYPE_I32) { +        opi = STW, opx = STWX; +    } else { +        opi = STD, opx = STDX; +    } +    tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); +} + +static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, +                        int const_arg2, int cr, TCGType type) +{ +    int imm; +    uint32_t op; + +    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + +    /* Simplify the comparisons below wrt CMPI.  */ +    if (type == TCG_TYPE_I32) { +        arg2 = (int32_t)arg2; +    } + +    switch (cond) { +    case TCG_COND_EQ: +    case TCG_COND_NE: +        if (const_arg2) { +            if ((int16_t) arg2 == arg2) { +                op = CMPI; +                imm = 1; +                break; +            } else if ((uint16_t) arg2 == arg2) { +                op = CMPLI; +                imm = 1; +                break; +            } +        } +        op = CMPL; +        imm = 0; +        break; + +    case TCG_COND_LT: +    case TCG_COND_GE: +    case TCG_COND_LE: +    case TCG_COND_GT: +        if (const_arg2) { +            if ((int16_t) arg2 == arg2) { +                op = CMPI; +                imm = 1; +                break; +            } +        } +        op = CMP; +        imm = 0; +        break; + +    case TCG_COND_LTU: +    case TCG_COND_GEU: +    case TCG_COND_LEU: +    case TCG_COND_GTU: +        if (const_arg2) { +            if ((uint16_t) arg2 == arg2) { +                op = CMPLI; +                imm = 1; +                break; +            } +        } +        op = CMPL; +        imm = 0; +        break; + +    default: +        tcg_abort(); +    } +    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); + +    if (imm) { +        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); +    } else { +        if (const_arg2) { +            tcg_out_movi(s, type, TCG_REG_R0, arg2); +            arg2 = TCG_REG_R0; +        } +        tcg_out32(s, op | RA(arg1) | RB(arg2)); +    } +} + +static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, +                                TCGReg dst, TCGReg src) +{ +    if (type == TCG_TYPE_I32) { +        tcg_out32(s, CNTLZW | RS(src) | RA(dst)); +        tcg_out_shri32(s, dst, dst, 5); +    } else { +        tcg_out32(s, CNTLZD | RS(src) | RA(dst)); +        tcg_out_shri64(s, dst, dst, 6); +    } +} + +static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) +{ +    /* X != 0 implies X + -1 generates a carry.  Extra addition +       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */ +    if (dst != src) { +        tcg_out32(s, ADDIC | TAI(dst, src, -1)); +        tcg_out32(s, SUBFE | TAB(dst, dst, src)); +    } else { +        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1)); +        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src)); +    } +} + +static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, +                                  bool const_arg2) +{ +    if (const_arg2) { +        if ((uint32_t)arg2 == arg2) { +            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); +        } else { +            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); +            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); +        } +    } else { +        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); +    } +    return TCG_REG_R0; +} + +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, +                            TCGArg arg0, TCGArg arg1, TCGArg arg2, +                            int const_arg2) +{ +    int crop, sh; + +    assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); + +    /* Ignore high bits of a potential constant arg2.  */ +    if (type == TCG_TYPE_I32) { +        arg2 = (uint32_t)arg2; +    } + +    /* Handle common and trivial cases before handling anything else.  */ +    if (arg2 == 0) { +        switch (cond) { +        case TCG_COND_EQ: +            tcg_out_setcond_eq0(s, type, arg0, arg1); +            return; +        case TCG_COND_NE: +            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { +                tcg_out_ext32u(s, TCG_REG_R0, arg1); +                arg1 = TCG_REG_R0; +            } +            tcg_out_setcond_ne0(s, arg0, arg1); +            return; +        case TCG_COND_GE: +            tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); +            arg1 = arg0; +            /* FALLTHRU */ +        case TCG_COND_LT: +            /* Extract the sign bit.  */ +            if (type == TCG_TYPE_I32) { +                tcg_out_shri32(s, arg0, arg1, 31); +            } else { +                tcg_out_shri64(s, arg0, arg1, 63); +            } +            return; +        default: +            break; +        } +    } + +    /* If we have ISEL, we can implement everything with 3 or 4 insns. +       All other cases below are also at least 3 insns, so speed up the +       code generator by not considering them and always using ISEL.  */ +    if (HAVE_ISEL) { +        int isel, tab; + +        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + +        isel = tcg_to_isel[cond]; + +        tcg_out_movi(s, type, arg0, 1); +        if (isel & 1) { +            /* arg0 = (bc ? 0 : 1) */ +            tab = TAB(arg0, 0, arg0); +            isel &= ~1; +        } else { +            /* arg0 = (bc ? 1 : 0) */ +            tcg_out_movi(s, type, TCG_REG_R0, 0); +            tab = TAB(arg0, arg0, TCG_REG_R0); +        } +        tcg_out32(s, isel | tab); +        return; +    } + +    switch (cond) { +    case TCG_COND_EQ: +        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); +        tcg_out_setcond_eq0(s, type, arg0, arg1); +        return; + +    case TCG_COND_NE: +        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); +        /* Discard the high bits only once, rather than both inputs.  */ +        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { +            tcg_out_ext32u(s, TCG_REG_R0, arg1); +            arg1 = TCG_REG_R0; +        } +        tcg_out_setcond_ne0(s, arg0, arg1); +        return; + +    case TCG_COND_GT: +    case TCG_COND_GTU: +        sh = 30; +        crop = 0; +        goto crtest; + +    case TCG_COND_LT: +    case TCG_COND_LTU: +        sh = 29; +        crop = 0; +        goto crtest; + +    case TCG_COND_GE: +    case TCG_COND_GEU: +        sh = 31; +        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT); +        goto crtest; + +    case TCG_COND_LE: +    case TCG_COND_LEU: +        sh = 31; +        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT); +    crtest: +        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); +        if (crop) { +            tcg_out32(s, crop); +        } +        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); +        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31); +        break; + +    default: +        tcg_abort(); +    } +} + +static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) +{ +    if (l->has_value) { +        tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr)); +    } else { +        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); +        tcg_out_bc_noaddr(s, bc); +    } +} + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, +                           TCGArg arg1, TCGArg arg2, int const_arg2, +                           TCGLabel *l, TCGType type) +{ +    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); +    tcg_out_bc(s, tcg_to_bc[cond], l); +} + +static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, +                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, +                            TCGArg v2, bool const_c2) +{ +    /* If for some reason both inputs are zero, don't produce bad code.  */ +    if (v1 == 0 && v2 == 0) { +        tcg_out_movi(s, type, dest, 0); +        return; +    } + +    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); + +    if (HAVE_ISEL) { +        int isel = tcg_to_isel[cond]; + +        /* Swap the V operands if the operation indicates inversion.  */ +        if (isel & 1) { +            int t = v1; +            v1 = v2; +            v2 = t; +            isel &= ~1; +        } +        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */ +        if (v2 == 0) { +            tcg_out_movi(s, type, TCG_REG_R0, 0); +        } +        tcg_out32(s, isel | TAB(dest, v1, v2)); +    } else { +        if (dest == v2) { +            cond = tcg_invert_cond(cond); +            v2 = v1; +        } else if (dest != v1) { +            if (v1 == 0) { +                tcg_out_movi(s, type, dest, 0); +            } else { +                tcg_out_mov(s, type, dest, v1); +            } +        } +        /* Branch forward over one insn */ +        tcg_out32(s, tcg_to_bc[cond] | 8); +        if (v2 == 0) { +            tcg_out_movi(s, type, dest, 0); +        } else { +            tcg_out_mov(s, type, dest, v2); +        } +    } +} + +static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, +                         const int *const_args) +{ +    static const struct { uint8_t bit1, bit2; } bits[] = { +        [TCG_COND_LT ] = { CR_LT, CR_LT }, +        [TCG_COND_LE ] = { CR_LT, CR_GT }, +        [TCG_COND_GT ] = { CR_GT, CR_GT }, +        [TCG_COND_GE ] = { CR_GT, CR_LT }, +        [TCG_COND_LTU] = { CR_LT, CR_LT }, +        [TCG_COND_LEU] = { CR_LT, CR_GT }, +        [TCG_COND_GTU] = { CR_GT, CR_GT }, +        [TCG_COND_GEU] = { CR_GT, CR_LT }, +    }; + +    TCGCond cond = args[4], cond2; +    TCGArg al, ah, bl, bh; +    int blconst, bhconst; +    int op, bit1, bit2; + +    al = args[0]; +    ah = args[1]; +    bl = args[2]; +    bh = args[3]; +    blconst = const_args[2]; +    bhconst = const_args[3]; + +    switch (cond) { +    case TCG_COND_EQ: +        op = CRAND; +        goto do_equality; +    case TCG_COND_NE: +        op = CRNAND; +    do_equality: +        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32); +        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32); +        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); +        break; + +    case TCG_COND_LT: +    case TCG_COND_LE: +    case TCG_COND_GT: +    case TCG_COND_GE: +    case TCG_COND_LTU: +    case TCG_COND_LEU: +    case TCG_COND_GTU: +    case TCG_COND_GEU: +        bit1 = bits[cond].bit1; +        bit2 = bits[cond].bit2; +        op = (bit1 != bit2 ? CRANDC : CRAND); +        cond2 = tcg_unsigned_cond(cond); + +        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32); +        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32); +        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2)); +        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ)); +        break; + +    default: +        tcg_abort(); +    } +} + +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, +                             const int *const_args) +{ +    tcg_out_cmp2(s, args + 1, const_args + 1); +    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7)); +    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31); +} + +static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, +                             const int *const_args) +{ +    tcg_out_cmp2(s, args, const_args); +    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5])); +} + +void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) +{ +    TCGContext s; + +    s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr; +    tcg_out_b(&s, 0, (tcg_insn_unit *)addr); +    flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s)); +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) +{ +#ifdef _CALL_AIX +    /* Look through the descriptor.  If the branch is in range, and we +       don't have to spend too much effort on building the toc.  */ +    void *tgt = ((void **)target)[0]; +    uintptr_t toc = ((uintptr_t *)target)[1]; +    intptr_t diff = tcg_pcrel_diff(s, tgt); + +    if (in_range_b(diff) && toc == (uint32_t)toc) { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc); +        tcg_out_b(s, LK, tgt); +    } else { +        /* Fold the low bits of the constant into the addresses below.  */ +        intptr_t arg = (intptr_t)target; +        int ofs = (int16_t)arg; + +        if (ofs + 8 < 0x8000) { +            arg -= ofs; +        } else { +            ofs = 0; +        } +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg); +        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs); +        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR); +        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP); +        tcg_out32(s, BCCTR | BO_ALWAYS | LK); +    } +#elif defined(_CALL_ELF) && _CALL_ELF == 2 +    intptr_t diff; + +    /* In the ELFv2 ABI, we have to set up r12 to contain the destination +       address, which the callee uses to compute its TOC address.  */ +    /* FIXME: when the branch is in range, we could avoid r12 load if we +       knew that the destination uses the same TOC, and what its local +       entry point offset is.  */ +    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target); + +    diff = tcg_pcrel_diff(s, target); +    if (in_range_b(diff)) { +        tcg_out_b(s, LK, target); +    } else { +        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR); +        tcg_out32(s, BCCTR | BO_ALWAYS | LK); +    } +#else +    tcg_out_b(s, LK, target); +#endif +} + +static const uint32_t qemu_ldx_opc[16] = { +    [MO_UB] = LBZX, +    [MO_UW] = LHZX, +    [MO_UL] = LWZX, +    [MO_Q]  = LDX, +    [MO_SW] = LHAX, +    [MO_SL] = LWAX, +    [MO_BSWAP | MO_UB] = LBZX, +    [MO_BSWAP | MO_UW] = LHBRX, +    [MO_BSWAP | MO_UL] = LWBRX, +    [MO_BSWAP | MO_Q]  = LDBRX, +}; + +static const uint32_t qemu_stx_opc[16] = { +    [MO_UB] = STBX, +    [MO_UW] = STHX, +    [MO_UL] = STWX, +    [MO_Q]  = STDX, +    [MO_BSWAP | MO_UB] = STBX, +    [MO_BSWAP | MO_UW] = STHBRX, +    [MO_BSWAP | MO_UL] = STWBRX, +    [MO_BSWAP | MO_Q]  = STDBRX, +}; + +static const uint32_t qemu_exts_opc[4] = { +    EXTSB, EXTSH, EXTSW, 0 +}; + +#if defined (CONFIG_SOFTMMU) +/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, + *                                 int mmu_idx, uintptr_t ra) + */ +static void * const qemu_ld_helpers[16] = { +    [MO_UB]   = helper_ret_ldub_mmu, +    [MO_LEUW] = helper_le_lduw_mmu, +    [MO_LEUL] = helper_le_ldul_mmu, +    [MO_LEQ]  = helper_le_ldq_mmu, +    [MO_BEUW] = helper_be_lduw_mmu, +    [MO_BEUL] = helper_be_ldul_mmu, +    [MO_BEQ]  = helper_be_ldq_mmu, +}; + +/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, + *                                 uintxx_t val, int mmu_idx, uintptr_t ra) + */ +static void * const qemu_st_helpers[16] = { +    [MO_UB]   = helper_ret_stb_mmu, +    [MO_LEUW] = helper_le_stw_mmu, +    [MO_LEUL] = helper_le_stl_mmu, +    [MO_LEQ]  = helper_le_stq_mmu, +    [MO_BEUW] = helper_be_stw_mmu, +    [MO_BEUL] = helper_be_stl_mmu, +    [MO_BEQ]  = helper_be_stq_mmu, +}; + +/* Perform the TLB load and compare.  Places the result of the comparison +   in CR7, loads the addend of the TLB into R3, and returns the register +   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */ + +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, +                               TCGReg addrlo, TCGReg addrhi, +                               int mem_index, bool is_read) +{ +    int cmp_off +        = (is_read +           ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) +           : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); +    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); +    TCGReg base = TCG_AREG0; + +    /* Extract the page index, shifted into place for tlb index.  */ +    if (TCG_TARGET_REG_BITS == 64) { +        if (TARGET_LONG_BITS == 32) { +            /* Zero-extend the address into a place helpful for further use. */ +            tcg_out_ext32u(s, TCG_REG_R4, addrlo); +            addrlo = TCG_REG_R4; +        } else { +            tcg_out_rld(s, RLDICL, TCG_REG_R3, addrlo, +                        64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS); +        } +    } + +    /* Compensate for very large offsets.  */ +    if (add_off >= 0x8000) { +        /* Most target env are smaller than 32k; none are larger than 64k. +           Simplify the logic here merely to offset by 0x7ff0, giving us a +           range just shy of 64k.  Check this assumption.  */ +        QEMU_BUILD_BUG_ON(offsetof(CPUArchState, +                                   tlb_table[NB_MMU_MODES - 1][1]) +                          > 0x7ff0 + 0x7fff); +        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0)); +        base = TCG_REG_TMP1; +        cmp_off -= 0x7ff0; +        add_off -= 0x7ff0; +    } + +    /* Extraction and shifting, part 2.  */ +    if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { +        tcg_out_rlw(s, RLWINM, TCG_REG_R3, addrlo, +                    32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS), +                    32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS), +                    31 - CPU_TLB_ENTRY_BITS); +    } else { +        tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS); +    } + +    tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base)); + +    /* Load the tlb comparator.  */ +    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off); +        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4); +    } else { +        tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off); +    } + +    /* Load the TLB addend for use on the fast path.  Do this asap +       to minimize any load use delay.  */ +    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off); + +    /* Clear the non-page, non-alignment bits from the address.  */ +    if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { +        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, +                    (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); +    } else if (!s_bits) { +        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, +                    0, 63 - TARGET_PAGE_BITS); +    } else { +        tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, +                    64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); +        tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); +    } + +    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, +                    0, 7, TCG_TYPE_I32); +        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32); +        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ)); +    } else { +        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1, +                    0, 7, TCG_TYPE_TL); +    } + +    return addrlo; +} + +/* Record the context of a call to the out of line helper code for the slow +   path for a load or store, so that we can later generate the correct +   helper code.  */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, +                                TCGReg datalo_reg, TCGReg datahi_reg, +                                TCGReg addrlo_reg, TCGReg addrhi_reg, +                                tcg_insn_unit *raddr, tcg_insn_unit *lptr) +{ +    TCGLabelQemuLdst *label = new_ldst_label(s); + +    label->is_ld = is_ld; +    label->oi = oi; +    label->datalo_reg = datalo_reg; +    label->datahi_reg = datahi_reg; +    label->addrlo_reg = addrlo_reg; +    label->addrhi_reg = addrhi_reg; +    label->raddr = raddr; +    label->label_ptr[0] = lptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ +    TCGMemOpIdx oi = lb->oi; +    TCGMemOp opc = get_memop(oi); +    TCGReg hi, lo, arg = TCG_REG_R3; + +    reloc_pc14(lb->label_ptr[0], s->code_ptr); + +    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); + +    lo = lb->addrlo_reg; +    hi = lb->addrhi_reg; +    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +#ifdef TCG_TARGET_CALL_ALIGN_ARGS +        arg |= 1; +#endif +        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); +        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); +    } else { +        /* If the address needed to be zero-extended, we'll have already +           placed it in R4.  The only remaining case is 64-bit guest.  */ +        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); +    } + +    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); +    tcg_out32(s, MFSPR | RT(arg) | LR); + +    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + +    lo = lb->datalo_reg; +    hi = lb->datahi_reg; +    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { +        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4); +        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3); +    } else if (opc & MO_SIGN) { +        uint32_t insn = qemu_exts_opc[opc & MO_SIZE]; +        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3)); +    } else { +        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3); +    } + +    tcg_out_b(s, 0, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ +    TCGMemOpIdx oi = lb->oi; +    TCGMemOp opc = get_memop(oi); +    TCGMemOp s_bits = opc & MO_SIZE; +    TCGReg hi, lo, arg = TCG_REG_R3; + +    reloc_pc14(lb->label_ptr[0], s->code_ptr); + +    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); + +    lo = lb->addrlo_reg; +    hi = lb->addrhi_reg; +    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { +#ifdef TCG_TARGET_CALL_ALIGN_ARGS +        arg |= 1; +#endif +        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); +        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); +    } else { +        /* If the address needed to be zero-extended, we'll have already +           placed it in R4.  The only remaining case is 64-bit guest.  */ +        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo); +    } + +    lo = lb->datalo_reg; +    hi = lb->datahi_reg; +    if (TCG_TARGET_REG_BITS == 32) { +        switch (s_bits) { +        case MO_64: +#ifdef TCG_TARGET_CALL_ALIGN_ARGS +            arg |= 1; +#endif +            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); +            /* FALLTHRU */ +        case MO_32: +            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); +            break; +        default: +            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31); +            break; +        } +    } else { +        if (s_bits == MO_64) { +            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo); +        } else { +            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits)); +        } +    } + +    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); +    tcg_out32(s, MFSPR | RT(arg) | LR); + +    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + +    tcg_out_b(s, 0, lb->raddr); +} +#endif /* SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) +{ +    TCGReg datalo, datahi, addrlo, rbase; +    TCGReg addrhi __attribute__((unused)); +    TCGMemOpIdx oi; +    TCGMemOp opc, s_bits; +#ifdef CONFIG_SOFTMMU +    int mem_index; +    tcg_insn_unit *label_ptr; +#endif + +    datalo = *args++; +    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); +    addrlo = *args++; +    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); +    oi = *args++; +    opc = get_memop(oi); +    s_bits = opc & MO_SIZE; + +#ifdef CONFIG_SOFTMMU +    mem_index = get_mmuidx(oi); +    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, true); + +    /* Load a pointer into the current opcode w/conditional branch-link. */ +    label_ptr = s->code_ptr; +    tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + +    rbase = TCG_REG_R3; +#else  /* !CONFIG_SOFTMMU */ +    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; +    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { +        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); +        addrlo = TCG_REG_TMP1; +    } +#endif + +    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { +        if (opc & MO_BSWAP) { +            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); +            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); +            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0)); +        } else if (rbase != 0) { +            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); +            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo)); +            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0)); +        } else if (addrlo == datahi) { +            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); +            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); +        } else { +            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0)); +            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4)); +        } +    } else { +        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; +        if (!HAVE_ISA_2_06 && insn == LDBRX) { +            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); +            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); +            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); +            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0); +        } else if (insn) { +            tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); +        } else { +            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)]; +            tcg_out32(s, insn | TAB(datalo, rbase, addrlo)); +            insn = qemu_exts_opc[s_bits]; +            tcg_out32(s, insn | RA(datalo) | RS(datalo)); +        } +    } + +#ifdef CONFIG_SOFTMMU +    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, +                        s->code_ptr, label_ptr); +#endif +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) +{ +    TCGReg datalo, datahi, addrlo, rbase; +    TCGReg addrhi __attribute__((unused)); +    TCGMemOpIdx oi; +    TCGMemOp opc, s_bits; +#ifdef CONFIG_SOFTMMU +    int mem_index; +    tcg_insn_unit *label_ptr; +#endif + +    datalo = *args++; +    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); +    addrlo = *args++; +    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0); +    oi = *args++; +    opc = get_memop(oi); +    s_bits = opc & MO_SIZE; + +#ifdef CONFIG_SOFTMMU +    mem_index = get_mmuidx(oi); +    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, false); + +    /* Load a pointer into the current opcode w/conditional branch-link. */ +    label_ptr = s->code_ptr; +    tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); + +    rbase = TCG_REG_R3; +#else  /* !CONFIG_SOFTMMU */ +    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; +    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { +        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); +        addrlo = TCG_REG_TMP1; +    } +#endif + +    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) { +        if (opc & MO_BSWAP) { +            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); +            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); +            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0)); +        } else if (rbase != 0) { +            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); +            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo)); +            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0)); +        } else { +            tcg_out32(s, STW | TAI(datahi, addrlo, 0)); +            tcg_out32(s, STW | TAI(datalo, addrlo, 4)); +        } +    } else { +        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; +        if (!HAVE_ISA_2_06 && insn == STDBRX) { +            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); +            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); +            tcg_out_shri64(s, TCG_REG_R0, datalo, 32); +            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1)); +        } else { +            tcg_out32(s, insn | SAB(datalo, rbase, addrlo)); +        } +    } + +#ifdef CONFIG_SOFTMMU +    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, +                        s->code_ptr, label_ptr); +#endif +} + +/* Parameters for function call generation, used in tcg.c.  */ +#define TCG_TARGET_STACK_ALIGN       16 +#define TCG_TARGET_EXTEND_ARGS       1 + +#ifdef _CALL_AIX +# define LINK_AREA_SIZE                (6 * SZR) +# define LR_OFFSET                     (1 * SZR) +# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR) +#elif defined(TCG_TARGET_CALL_DARWIN) +# define LINK_AREA_SIZE                (6 * SZR) +# define LR_OFFSET                     (2 * SZR) +#elif TCG_TARGET_REG_BITS == 64 +# if defined(_CALL_ELF) && _CALL_ELF == 2 +#  define LINK_AREA_SIZE               (4 * SZR) +#  define LR_OFFSET                    (1 * SZR) +# endif +#else /* TCG_TARGET_REG_BITS == 32 */ +# if defined(_CALL_SYSV) +#  define LINK_AREA_SIZE               (2 * SZR) +#  define LR_OFFSET                    (1 * SZR) +# endif +#endif +#ifndef LR_OFFSET +# error "Unhandled abi" +#endif +#ifndef TCG_TARGET_CALL_STACK_OFFSET +# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE +#endif + +#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) +#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) + +#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \ +                     + TCG_STATIC_CALL_ARGS_SIZE    \ +                     + CPU_TEMP_BUF_SIZE            \ +                     + REG_SAVE_SIZE                \ +                     + TCG_TARGET_STACK_ALIGN - 1)  \ +                    & -TCG_TARGET_STACK_ALIGN) + +#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ +    int i; + +#ifdef _CALL_AIX +    void **desc = (void **)s->code_ptr; +    desc[0] = desc + 2;                   /* entry point */ +    desc[1] = 0;                          /* environment pointer */ +    s->code_ptr = (void *)(desc + 2);     /* skip over descriptor */ +#endif + +    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE, +                  CPU_TEMP_BUF_SIZE); + +    /* Prologue */ +    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR); +    tcg_out32(s, (SZR == 8 ? STDU : STWU) +              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE)); + +    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { +        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], +                   TCG_REG_R1, REG_SAVE_BOT + i * SZR); +    } +    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); + +#ifdef CONFIG_USE_GUEST_BASE +    if (GUEST_BASE) { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); +        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); +    } +#endif + +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); +    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR); + +    if (USE_REG_RA) { +#ifdef _CALL_AIX +        /* Make the caller load the value as the TOC into R2.  */ +        tb_ret_addr = s->code_ptr + 2; +        desc[1] = tb_ret_addr; +        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); +        tcg_out32(s, BCCTR | BO_ALWAYS); +#elif defined(_CALL_ELF) && _CALL_ELF == 2 +        /* Compute from the incoming R12 value.  */ +        tb_ret_addr = s->code_ptr + 2; +        tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, +                                tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); +        tcg_out32(s, BCCTR | BO_ALWAYS); +#else +        /* Reserve max 5 insns for the constant load.  */ +        tb_ret_addr = s->code_ptr + 6; +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); +        tcg_out32(s, BCCTR | BO_ALWAYS); +        while (s->code_ptr < tb_ret_addr) { +            tcg_out32(s, NOP); +        } +#endif +    } else { +        tcg_out32(s, BCCTR | BO_ALWAYS); +        tb_ret_addr = s->code_ptr; +    } + +    /* Epilogue */ +    assert(tb_ret_addr == s->code_ptr); + +    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); +    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) { +        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i], +                   TCG_REG_R1, REG_SAVE_BOT + i * SZR); +    } +    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR); +    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE)); +    tcg_out32(s, BCLR | BO_ALWAYS); +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, +                       const int *const_args) +{ +    TCGArg a0, a1, a2; +    int c; + +    switch (opc) { +    case INDEX_op_exit_tb: +        if (USE_REG_RA) { +            ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr); + +            /* If we can use a direct branch, otherwise use the value in RA. +               Note that the direct branch is always forward.  If it's in +               range now, it'll still be in range after the movi.  Don't +               bother about the 20 bytes where the test here fails but it +               would succeed below.  */ +            if (!in_range_b(disp)) { +                tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR); +                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); +                tcg_out32(s, BCCTR | BO_ALWAYS); +                break; +            } +        } +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); +        tcg_out_b(s, 0, tb_ret_addr); +        break; +    case INDEX_op_goto_tb: +        if (s->tb_jmp_offset) { +            /* Direct jump method.  */ +            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); +            s->code_ptr += 7; +        } else { +            /* Indirect jump method.  */ +            tcg_abort(); +        } +        s->tb_next_offset[args[0]] = tcg_current_code_size(s); +        break; +    case INDEX_op_br: +        { +            TCGLabel *l = arg_label(args[0]); + +            if (l->has_value) { +                tcg_out_b(s, 0, l->u.value_ptr); +            } else { +                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); +                tcg_out_b_noaddr(s, B); +            } +        } +        break; +    case INDEX_op_ld8u_i32: +    case INDEX_op_ld8u_i64: +        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld8s_i32: +    case INDEX_op_ld8s_i64: +        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); +        tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); +        break; +    case INDEX_op_ld16u_i32: +    case INDEX_op_ld16u_i64: +        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld16s_i32: +    case INDEX_op_ld16s_i64: +        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld_i32: +    case INDEX_op_ld32u_i64: +        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld32s_i64: +        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); +        break; +    case INDEX_op_ld_i64: +        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); +        break; +    case INDEX_op_st8_i32: +    case INDEX_op_st8_i64: +        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); +        break; +    case INDEX_op_st16_i32: +    case INDEX_op_st16_i64: +        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); +        break; +    case INDEX_op_st_i32: +    case INDEX_op_st32_i64: +        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); +        break; +    case INDEX_op_st_i64: +        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); +        break; + +    case INDEX_op_add_i32: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +        do_addi_32: +            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); +        } else { +            tcg_out32(s, ADD | TAB(a0, a1, a2)); +        } +        break; +    case INDEX_op_sub_i32: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[1]) { +            if (const_args[2]) { +                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); +            } else { +                tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); +            } +        } else if (const_args[2]) { +            a2 = -a2; +            goto do_addi_32; +        } else { +            tcg_out32(s, SUBF | TAB(a0, a2, a1)); +        } +        break; + +    case INDEX_op_and_i32: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +            tcg_out_andi32(s, a0, a1, a2); +        } else { +            tcg_out32(s, AND | SAB(a1, a0, a2)); +        } +        break; +    case INDEX_op_and_i64: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +            tcg_out_andi64(s, a0, a1, a2); +        } else { +            tcg_out32(s, AND | SAB(a1, a0, a2)); +        } +        break; +    case INDEX_op_or_i64: +    case INDEX_op_or_i32: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +            tcg_out_ori32(s, a0, a1, a2); +        } else { +            tcg_out32(s, OR | SAB(a1, a0, a2)); +        } +        break; +    case INDEX_op_xor_i64: +    case INDEX_op_xor_i32: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +            tcg_out_xori32(s, a0, a1, a2); +        } else { +            tcg_out32(s, XOR | SAB(a1, a0, a2)); +        } +        break; +    case INDEX_op_andc_i32: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +            tcg_out_andi32(s, a0, a1, ~a2); +        } else { +            tcg_out32(s, ANDC | SAB(a1, a0, a2)); +        } +        break; +    case INDEX_op_andc_i64: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +            tcg_out_andi64(s, a0, a1, ~a2); +        } else { +            tcg_out32(s, ANDC | SAB(a1, a0, a2)); +        } +        break; +    case INDEX_op_orc_i32: +        if (const_args[2]) { +            tcg_out_ori32(s, args[0], args[1], ~args[2]); +            break; +        } +        /* FALLTHRU */ +    case INDEX_op_orc_i64: +        tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); +        break; +    case INDEX_op_eqv_i32: +        if (const_args[2]) { +            tcg_out_xori32(s, args[0], args[1], ~args[2]); +            break; +        } +        /* FALLTHRU */ +    case INDEX_op_eqv_i64: +        tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); +        break; +    case INDEX_op_nand_i32: +    case INDEX_op_nand_i64: +        tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); +        break; +    case INDEX_op_nor_i32: +    case INDEX_op_nor_i64: +        tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); +        break; + +    case INDEX_op_mul_i32: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +            tcg_out32(s, MULLI | TAI(a0, a1, a2)); +        } else { +            tcg_out32(s, MULLW | TAB(a0, a1, a2)); +        } +        break; + +    case INDEX_op_div_i32: +        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); +        break; + +    case INDEX_op_divu_i32: +        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); +        break; + +    case INDEX_op_shl_i32: +        if (const_args[2]) { +            tcg_out_shli32(s, args[0], args[1], args[2]); +        } else { +            tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); +        } +        break; +    case INDEX_op_shr_i32: +        if (const_args[2]) { +            tcg_out_shri32(s, args[0], args[1], args[2]); +        } else { +            tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); +        } +        break; +    case INDEX_op_sar_i32: +        if (const_args[2]) { +            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); +        } else { +            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); +        } +        break; +    case INDEX_op_rotl_i32: +        if (const_args[2]) { +            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); +        } else { +            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) +                         | MB(0) | ME(31)); +        } +        break; +    case INDEX_op_rotr_i32: +        if (const_args[2]) { +            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); +        } else { +            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); +            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) +                         | MB(0) | ME(31)); +        } +        break; + +    case INDEX_op_brcond_i32: +        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], +                       arg_label(args[3]), TCG_TYPE_I32); +        break; +    case INDEX_op_brcond_i64: +        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], +                       arg_label(args[3]), TCG_TYPE_I64); +        break; +    case INDEX_op_brcond2_i32: +        tcg_out_brcond2(s, args, const_args); +        break; + +    case INDEX_op_neg_i32: +    case INDEX_op_neg_i64: +        tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); +        break; + +    case INDEX_op_not_i32: +    case INDEX_op_not_i64: +        tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); +        break; + +    case INDEX_op_add_i64: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +        do_addi_64: +            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); +        } else { +            tcg_out32(s, ADD | TAB(a0, a1, a2)); +        } +        break; +    case INDEX_op_sub_i64: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[1]) { +            if (const_args[2]) { +                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); +            } else { +                tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); +            } +        } else if (const_args[2]) { +            a2 = -a2; +            goto do_addi_64; +        } else { +            tcg_out32(s, SUBF | TAB(a0, a2, a1)); +        } +        break; + +    case INDEX_op_shl_i64: +        if (const_args[2]) { +            tcg_out_shli64(s, args[0], args[1], args[2]); +        } else { +            tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); +        } +        break; +    case INDEX_op_shr_i64: +        if (const_args[2]) { +            tcg_out_shri64(s, args[0], args[1], args[2]); +        } else { +            tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); +        } +        break; +    case INDEX_op_sar_i64: +        if (const_args[2]) { +            int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1); +            tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh); +        } else { +            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); +        } +        break; +    case INDEX_op_rotl_i64: +        if (const_args[2]) { +            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); +        } else { +            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); +        } +        break; +    case INDEX_op_rotr_i64: +        if (const_args[2]) { +            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); +        } else { +            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); +            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); +        } +        break; + +    case INDEX_op_mul_i64: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +            tcg_out32(s, MULLI | TAI(a0, a1, a2)); +        } else { +            tcg_out32(s, MULLD | TAB(a0, a1, a2)); +        } +        break; +    case INDEX_op_div_i64: +        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); +        break; +    case INDEX_op_divu_i64: +        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); +        break; + +    case INDEX_op_qemu_ld_i32: +        tcg_out_qemu_ld(s, args, false); +        break; +    case INDEX_op_qemu_ld_i64: +        tcg_out_qemu_ld(s, args, true); +        break; +    case INDEX_op_qemu_st_i32: +        tcg_out_qemu_st(s, args, false); +        break; +    case INDEX_op_qemu_st_i64: +        tcg_out_qemu_st(s, args, true); +        break; + +    case INDEX_op_ext8s_i32: +    case INDEX_op_ext8s_i64: +        c = EXTSB; +        goto gen_ext; +    case INDEX_op_ext16s_i32: +    case INDEX_op_ext16s_i64: +        c = EXTSH; +        goto gen_ext; +    case INDEX_op_ext32s_i64: +        c = EXTSW; +        goto gen_ext; +    gen_ext: +        tcg_out32(s, c | RS(args[1]) | RA(args[0])); +        break; + +    case INDEX_op_setcond_i32: +        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], +                        const_args[2]); +        break; +    case INDEX_op_setcond_i64: +        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], +                        const_args[2]); +        break; +    case INDEX_op_setcond2_i32: +        tcg_out_setcond2(s, args, const_args); +        break; + +    case INDEX_op_bswap16_i32: +    case INDEX_op_bswap16_i64: +        a0 = args[0], a1 = args[1]; +        /* a1 = abcd */ +        if (a0 != a1) { +            /* a0 = (a1 r<< 24) & 0xff # 000c */ +            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); +            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */ +            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23); +        } else { +            /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */ +            tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23); +            /* a0 = (a1 r<< 24) & 0xff # 000c */ +            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31); +            /* a0 = a0 | r0 # 00dc */ +            tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0)); +        } +        break; + +    case INDEX_op_bswap32_i32: +    case INDEX_op_bswap32_i64: +        /* Stolen from gcc's builtin_bswap32 */ +        a1 = args[1]; +        a0 = args[0] == a1 ? TCG_REG_R0 : args[0]; + +        /* a1 = args[1] # abcd */ +        /* a0 = rotate_left (a1, 8) # bcda */ +        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); +        /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */ +        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); +        /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */ +        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + +        if (a0 == TCG_REG_R0) { +            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); +        } +        break; + +    case INDEX_op_bswap64_i64: +        a0 = args[0], a1 = args[1], a2 = TCG_REG_R0; +        if (a0 == a1) { +            a0 = TCG_REG_R0; +            a2 = a1; +        } + +        /* a1 = # abcd efgh */ +        /* a0 = rl32(a1, 8) # 0000 fghe */ +        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31); +        /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */ +        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7); +        /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */ +        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23); + +        /* a0 = rl64(a0, 32) # hgfe 0000 */ +        /* a2 = rl64(a1, 32) # efgh abcd */ +        tcg_out_rld(s, RLDICL, a0, a0, 32, 0); +        tcg_out_rld(s, RLDICL, a2, a1, 32, 0); + +        /* a0 = dep(a0, rl32(a2, 8), 0xffffffff)  # hgfe bcda */ +        tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31); +        /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */ +        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7); +        /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */ +        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23); + +        if (a0 == 0) { +            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); +        } +        break; + +    case INDEX_op_deposit_i32: +        if (const_args[2]) { +            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; +            tcg_out_andi32(s, args[0], args[0], ~mask); +        } else { +            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], +                        32 - args[3] - args[4], 31 - args[3]); +        } +        break; +    case INDEX_op_deposit_i64: +        if (const_args[2]) { +            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; +            tcg_out_andi64(s, args[0], args[0], ~mask); +        } else { +            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], +                        64 - args[3] - args[4]); +        } +        break; + +    case INDEX_op_movcond_i32: +        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], +                        args[3], args[4], const_args[2]); +        break; +    case INDEX_op_movcond_i64: +        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], +                        args[3], args[4], const_args[2]); +        break; + +#if TCG_TARGET_REG_BITS == 64 +    case INDEX_op_add2_i64: +#else +    case INDEX_op_add2_i32: +#endif +        /* Note that the CA bit is defined based on the word size of the +           environment.  So in 64-bit mode it's always carry-out of bit 63. +           The fallback code using deposit works just as well for 32-bit.  */ +        a0 = args[0], a1 = args[1]; +        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { +            a0 = TCG_REG_R0; +        } +        if (const_args[4]) { +            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); +        } else { +            tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); +        } +        if (const_args[5]) { +            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); +        } else { +            tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); +        } +        if (a0 != args[0]) { +            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); +        } +        break; + +#if TCG_TARGET_REG_BITS == 64 +    case INDEX_op_sub2_i64: +#else +    case INDEX_op_sub2_i32: +#endif +        a0 = args[0], a1 = args[1]; +        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { +            a0 = TCG_REG_R0; +        } +        if (const_args[2]) { +            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); +        } else { +            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); +        } +        if (const_args[3]) { +            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); +        } else { +            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); +        } +        if (a0 != args[0]) { +            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); +        } +        break; + +    case INDEX_op_muluh_i32: +        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); +        break; +    case INDEX_op_mulsh_i32: +        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); +        break; +    case INDEX_op_muluh_i64: +        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); +        break; +    case INDEX_op_mulsh_i64: +        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); +        break; + +    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */ +    case INDEX_op_mov_i64: +    case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */ +    case INDEX_op_movi_i64: +    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */ +    default: +        tcg_abort(); +    } +} + +static const TCGTargetOpDef ppc_op_defs[] = { +    { INDEX_op_exit_tb, { } }, +    { INDEX_op_goto_tb, { } }, +    { INDEX_op_br, { } }, + +    { INDEX_op_ld8u_i32, { "r", "r" } }, +    { INDEX_op_ld8s_i32, { "r", "r" } }, +    { INDEX_op_ld16u_i32, { "r", "r" } }, +    { INDEX_op_ld16s_i32, { "r", "r" } }, +    { INDEX_op_ld_i32, { "r", "r" } }, + +    { INDEX_op_st8_i32, { "r", "r" } }, +    { INDEX_op_st16_i32, { "r", "r" } }, +    { INDEX_op_st_i32, { "r", "r" } }, + +    { INDEX_op_add_i32, { "r", "r", "ri" } }, +    { INDEX_op_mul_i32, { "r", "r", "rI" } }, +    { INDEX_op_div_i32, { "r", "r", "r" } }, +    { INDEX_op_divu_i32, { "r", "r", "r" } }, +    { INDEX_op_sub_i32, { "r", "rI", "ri" } }, +    { INDEX_op_and_i32, { "r", "r", "ri" } }, +    { INDEX_op_or_i32, { "r", "r", "ri" } }, +    { INDEX_op_xor_i32, { "r", "r", "ri" } }, +    { INDEX_op_andc_i32, { "r", "r", "ri" } }, +    { INDEX_op_orc_i32, { "r", "r", "ri" } }, +    { INDEX_op_eqv_i32, { "r", "r", "ri" } }, +    { INDEX_op_nand_i32, { "r", "r", "r" } }, +    { INDEX_op_nor_i32, { "r", "r", "r" } }, + +    { INDEX_op_shl_i32, { "r", "r", "ri" } }, +    { INDEX_op_shr_i32, { "r", "r", "ri" } }, +    { INDEX_op_sar_i32, { "r", "r", "ri" } }, +    { INDEX_op_rotl_i32, { "r", "r", "ri" } }, +    { INDEX_op_rotr_i32, { "r", "r", "ri" } }, + +    { INDEX_op_neg_i32, { "r", "r" } }, +    { INDEX_op_not_i32, { "r", "r" } }, +    { INDEX_op_ext8s_i32, { "r", "r" } }, +    { INDEX_op_ext16s_i32, { "r", "r" } }, +    { INDEX_op_bswap16_i32, { "r", "r" } }, +    { INDEX_op_bswap32_i32, { "r", "r" } }, + +    { INDEX_op_brcond_i32, { "r", "ri" } }, +    { INDEX_op_setcond_i32, { "r", "r", "ri" } }, +    { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } }, + +    { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, + +    { INDEX_op_muluh_i32, { "r", "r", "r" } }, +    { INDEX_op_mulsh_i32, { "r", "r", "r" } }, + +#if TCG_TARGET_REG_BITS == 64 +    { INDEX_op_ld8u_i64, { "r", "r" } }, +    { INDEX_op_ld8s_i64, { "r", "r" } }, +    { INDEX_op_ld16u_i64, { "r", "r" } }, +    { INDEX_op_ld16s_i64, { "r", "r" } }, +    { INDEX_op_ld32u_i64, { "r", "r" } }, +    { INDEX_op_ld32s_i64, { "r", "r" } }, +    { INDEX_op_ld_i64, { "r", "r" } }, + +    { INDEX_op_st8_i64, { "r", "r" } }, +    { INDEX_op_st16_i64, { "r", "r" } }, +    { INDEX_op_st32_i64, { "r", "r" } }, +    { INDEX_op_st_i64, { "r", "r" } }, + +    { INDEX_op_add_i64, { "r", "r", "rT" } }, +    { INDEX_op_sub_i64, { "r", "rI", "rT" } }, +    { INDEX_op_and_i64, { "r", "r", "ri" } }, +    { INDEX_op_or_i64, { "r", "r", "rU" } }, +    { INDEX_op_xor_i64, { "r", "r", "rU" } }, +    { INDEX_op_andc_i64, { "r", "r", "ri" } }, +    { INDEX_op_orc_i64, { "r", "r", "r" } }, +    { INDEX_op_eqv_i64, { "r", "r", "r" } }, +    { INDEX_op_nand_i64, { "r", "r", "r" } }, +    { INDEX_op_nor_i64, { "r", "r", "r" } }, + +    { INDEX_op_shl_i64, { "r", "r", "ri" } }, +    { INDEX_op_shr_i64, { "r", "r", "ri" } }, +    { INDEX_op_sar_i64, { "r", "r", "ri" } }, +    { INDEX_op_rotl_i64, { "r", "r", "ri" } }, +    { INDEX_op_rotr_i64, { "r", "r", "ri" } }, + +    { INDEX_op_mul_i64, { "r", "r", "rI" } }, +    { INDEX_op_div_i64, { "r", "r", "r" } }, +    { INDEX_op_divu_i64, { "r", "r", "r" } }, + +    { INDEX_op_neg_i64, { "r", "r" } }, +    { INDEX_op_not_i64, { "r", "r" } }, +    { INDEX_op_ext8s_i64, { "r", "r" } }, +    { INDEX_op_ext16s_i64, { "r", "r" } }, +    { INDEX_op_ext32s_i64, { "r", "r" } }, +    { INDEX_op_bswap16_i64, { "r", "r" } }, +    { INDEX_op_bswap32_i64, { "r", "r" } }, +    { INDEX_op_bswap64_i64, { "r", "r" } }, + +    { INDEX_op_brcond_i64, { "r", "ri" } }, +    { INDEX_op_setcond_i64, { "r", "r", "ri" } }, +    { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } }, + +    { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, + +    { INDEX_op_mulsh_i64, { "r", "r", "r" } }, +    { INDEX_op_muluh_i64, { "r", "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 32 +    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, +    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 +    { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, +    { INDEX_op_sub2_i64, { "r", "r", "rI", "rZM", "r", "r" } }, +#else +    { INDEX_op_add2_i32, { "r", "r", "r", "r", "rI", "rZM" } }, +    { INDEX_op_sub2_i32, { "r", "r", "rI", "rZM", "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 +    { INDEX_op_qemu_ld_i32, { "r", "L" } }, +    { INDEX_op_qemu_st_i32, { "S", "S" } }, +    { INDEX_op_qemu_ld_i64, { "r", "L" } }, +    { INDEX_op_qemu_st_i64, { "S", "S" } }, +#elif TARGET_LONG_BITS == 32 +    { INDEX_op_qemu_ld_i32, { "r", "L" } }, +    { INDEX_op_qemu_st_i32, { "S", "S" } }, +    { INDEX_op_qemu_ld_i64, { "L", "L", "L" } }, +    { INDEX_op_qemu_st_i64, { "S", "S", "S" } }, +#else +    { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, +    { INDEX_op_qemu_st_i32, { "S", "S", "S" } }, +    { INDEX_op_qemu_ld_i64, { "L", "L", "L", "L" } }, +    { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, +#endif + +    { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ +    unsigned long hwcap = qemu_getauxval(AT_HWCAP); +    if (hwcap & PPC_FEATURE_ARCH_2_06) { +        have_isa_2_06 = true; +    } + +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff); +    tcg_regset_set32(tcg_target_call_clobber_regs, 0, +                     (1 << TCG_REG_R0) | +                     (1 << TCG_REG_R2) | +                     (1 << TCG_REG_R3) | +                     (1 << TCG_REG_R4) | +                     (1 << TCG_REG_R5) | +                     (1 << TCG_REG_R6) | +                     (1 << TCG_REG_R7) | +                     (1 << TCG_REG_R8) | +                     (1 << TCG_REG_R9) | +                     (1 << TCG_REG_R10) | +                     (1 << TCG_REG_R11) | +                     (1 << TCG_REG_R12)); + +    tcg_regset_clear(s->reserved_regs); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ +#if defined(_CALL_SYSV) +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */ +#endif +#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64 +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ +#endif +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ +    if (USE_REG_RA) { +        tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA);  /* return addr */ +    } + +    tcg_add_target_add_op_defs(ppc_op_defs); +} + +#ifdef __ELF__ +typedef struct { +    DebugFrameCIE cie; +    DebugFrameFDEHeader fde; +    uint8_t fde_def_cfa[4]; +    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value.  */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#if TCG_TARGET_REG_BITS == 64 +# define ELF_HOST_MACHINE EM_PPC64 +#else +# define ELF_HOST_MACHINE EM_PPC +#endif + +static DebugFrame debug_frame = { +    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ +    .cie.id = -1, +    .cie.version = 1, +    .cie.code_align = 1, +    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */ +    .cie.return_column = 65, + +    /* Total FDE size does not include the "len" member.  */ +    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + +    .fde_def_cfa = { +        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */ +        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */ +        (FRAME_SIZE >> 7) +    }, +    .fde_reg_ofs = { +        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */ +        0x11, 65, (LR_OFFSET / -SZR) & 0x7f, +    } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ +    uint8_t *p = &debug_frame.fde_reg_ofs[3]; +    int i; + +    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) { +        p[0] = 0x80 + tcg_target_callee_save_regs[i]; +        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR; +    } + +    debug_frame.fde.func_start = (uintptr_t)buf; +    debug_frame.fde.func_len = buf_size; + +    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} +#endif /* __ELF__ */ + +static size_t dcache_bsize = 16; +static size_t icache_bsize = 16; + +void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +    uintptr_t p, start1, stop1; +    size_t dsize = dcache_bsize; +    size_t isize = icache_bsize; + +    start1 = start & ~(dsize - 1); +    stop1 = (stop + dsize - 1) & ~(dsize - 1); +    for (p = start1; p < stop1; p += dsize) { +        asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); +    } +    asm volatile ("sync" : : : "memory"); + +    start &= start & ~(isize - 1); +    stop1 = (stop + isize - 1) & ~(isize - 1); +    for (p = start1; p < stop1; p += isize) { +        asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); +    } +    asm volatile ("sync" : : : "memory"); +    asm volatile ("isync" : : : "memory"); +} + +#if defined _AIX +#include <sys/systemcfg.h> + +static void __attribute__((constructor)) tcg_cache_init(void) +{ +    icache_bsize = _system_configuration.icache_line; +    dcache_bsize = _system_configuration.dcache_line; +} + +#elif defined __linux__ +static void __attribute__((constructor)) tcg_cache_init(void) +{ +    unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE); +    unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE); + +    if (dsize == 0 || isize == 0) { +        if (dsize == 0) { +            fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n"); +        } +        if (isize == 0) { +            fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n"); +        } +        exit(1); +    } +    dcache_bsize = dsize; +    icache_bsize = isize; +} + +#elif defined __APPLE__ +#include <stdio.h> +#include <sys/types.h> +#include <sys/sysctl.h> + +static void __attribute__((constructor)) tcg_cache_init(void) +{ +    size_t len; +    unsigned cacheline; +    int name[2] = { CTL_HW, HW_CACHELINE }; + +    len = sizeof(cacheline); +    if (sysctl(name, 2, &cacheline, &len, NULL, 0)) { +        perror("sysctl CTL_HW HW_CACHELINE failed"); +        exit(1); +    } +    dcache_bsize = cacheline; +    icache_bsize = cacheline; +} + +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/sysctl.h> + +static void __attribute__((constructor)) tcg_cache_init(void) +{ +    size_t len = 4; +    unsigned cacheline; + +    if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) { +        fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n", +                strerror(errno)); +        exit(1); +    } +    dcache_bsize = cacheline; +    icache_bsize = cacheline; +} +#endif diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h new file mode 100644 index 00000000..7ce70488 --- /dev/null +++ b/tcg/ppc/tcg-target.h @@ -0,0 +1,112 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef TCG_TARGET_PPC64  +#define TCG_TARGET_PPC64 1 + +#ifdef _ARCH_PPC64 +# define TCG_TARGET_REG_BITS  64 +#else +# define TCG_TARGET_REG_BITS  32 +#endif + +#define TCG_TARGET_NB_REGS 32 +#define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 + +typedef enum { +    TCG_REG_R0,  TCG_REG_R1,  TCG_REG_R2,  TCG_REG_R3, +    TCG_REG_R4,  TCG_REG_R5,  TCG_REG_R6,  TCG_REG_R7, +    TCG_REG_R8,  TCG_REG_R9,  TCG_REG_R10, TCG_REG_R11, +    TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15, +    TCG_REG_R16, TCG_REG_R17, TCG_REG_R18, TCG_REG_R19, +    TCG_REG_R20, TCG_REG_R21, TCG_REG_R22, TCG_REG_R23, +    TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27, +    TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31, + +    TCG_REG_CALL_STACK = TCG_REG_R1, +    TCG_AREG0 = TCG_REG_R27 +} TCGReg; + +/* optional instructions automatically implemented */ +#define TCG_TARGET_HAS_ext8u_i32        0 /* andi */ +#define TCG_TARGET_HAS_ext16u_i32       0 + +/* optional instructions */ +#define TCG_TARGET_HAS_div_i32          1 +#define TCG_TARGET_HAS_rem_i32          0 +#define TCG_TARGET_HAS_rot_i32          1 +#define TCG_TARGET_HAS_ext8s_i32        1 +#define TCG_TARGET_HAS_ext16s_i32       1 +#define TCG_TARGET_HAS_bswap16_i32      1 +#define TCG_TARGET_HAS_bswap32_i32      1 +#define TCG_TARGET_HAS_not_i32          1 +#define TCG_TARGET_HAS_neg_i32          1 +#define TCG_TARGET_HAS_andc_i32         1 +#define TCG_TARGET_HAS_orc_i32          1 +#define TCG_TARGET_HAS_eqv_i32          1 +#define TCG_TARGET_HAS_nand_i32         1 +#define TCG_TARGET_HAS_nor_i32          1 +#define TCG_TARGET_HAS_deposit_i32      1 +#define TCG_TARGET_HAS_movcond_i32      1 +#define TCG_TARGET_HAS_mulu2_i32        0 +#define TCG_TARGET_HAS_muls2_i32        0 +#define TCG_TARGET_HAS_muluh_i32        1 +#define TCG_TARGET_HAS_mulsh_i32        1 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_add2_i32         0 +#define TCG_TARGET_HAS_sub2_i32         0 +#define TCG_TARGET_HAS_trunc_shr_i32    0 +#define TCG_TARGET_HAS_div_i64          1 +#define TCG_TARGET_HAS_rem_i64          0 +#define TCG_TARGET_HAS_rot_i64          1 +#define TCG_TARGET_HAS_ext8s_i64        1 +#define TCG_TARGET_HAS_ext16s_i64       1 +#define TCG_TARGET_HAS_ext32s_i64       1 +#define TCG_TARGET_HAS_ext8u_i64        0 +#define TCG_TARGET_HAS_ext16u_i64       0 +#define TCG_TARGET_HAS_ext32u_i64       0 +#define TCG_TARGET_HAS_bswap16_i64      1 +#define TCG_TARGET_HAS_bswap32_i64      1 +#define TCG_TARGET_HAS_bswap64_i64      1 +#define TCG_TARGET_HAS_not_i64          1 +#define TCG_TARGET_HAS_neg_i64          1 +#define TCG_TARGET_HAS_andc_i64         1 +#define TCG_TARGET_HAS_orc_i64          1 +#define TCG_TARGET_HAS_eqv_i64          1 +#define TCG_TARGET_HAS_nand_i64         1 +#define TCG_TARGET_HAS_nor_i64          1 +#define TCG_TARGET_HAS_deposit_i64      1 +#define TCG_TARGET_HAS_movcond_i64      1 +#define TCG_TARGET_HAS_add2_i64         1 +#define TCG_TARGET_HAS_sub2_i64         1 +#define TCG_TARGET_HAS_mulu2_i64        0 +#define TCG_TARGET_HAS_muls2_i64        0 +#define TCG_TARGET_HAS_muluh_i64        1 +#define TCG_TARGET_HAS_mulsh_i64        1 +#endif + +void flush_icache_range(uintptr_t start, uintptr_t stop); + +#endif diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c new file mode 100644 index 00000000..aa718eca --- /dev/null +++ b/tcg/s390/tcg-target.c @@ -0,0 +1,2393 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009 Ulrich Hecht <uli@suse.de> + * Copyright (c) 2009 Alexander Graf <agraf@suse.de> + * Copyright (c) 2010 Richard Henderson <rth@twiddle.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg-be-ldst.h" + +/* We only support generating code for 64-bit mode.  */ +#if TCG_TARGET_REG_BITS != 64 +#error "unsupported code generation mode" +#endif + +#include "elf.h" + +/* ??? The translation blocks produced by TCG are generally small enough to +   be entirely reachable with a 16-bit displacement.  Leaving the option for +   a 32-bit displacement here Just In Case.  */ +#define USE_LONG_BRANCHES 0 + +#define TCG_CT_CONST_MULI  0x100 +#define TCG_CT_CONST_ORI   0x200 +#define TCG_CT_CONST_XORI  0x400 +#define TCG_CT_CONST_CMPI  0x800 +#define TCG_CT_CONST_ADLI  0x1000 + +/* Several places within the instruction set 0 means "no register" +   rather than TCG_REG_R0.  */ +#define TCG_REG_NONE    0 + +/* A scratch register that may be be used throughout the backend.  */ +#define TCG_TMP0        TCG_REG_R14 + +#ifdef CONFIG_USE_GUEST_BASE +#define TCG_GUEST_BASE_REG TCG_REG_R13 +#else +#define TCG_GUEST_BASE_REG TCG_REG_R0 +#endif + +#ifndef GUEST_BASE +#define GUEST_BASE 0 +#endif + + +/* All of the following instructions are prefixed with their instruction +   format, and are defined as 8- or 16-bit quantities, even when the two +   halves of the 16-bit quantity may appear 32 bits apart in the insn. +   This makes it easy to copy the values from the tables in Appendix B.  */ +typedef enum S390Opcode { +    RIL_AFI     = 0xc209, +    RIL_AGFI    = 0xc208, +    RIL_ALFI    = 0xc20b, +    RIL_ALGFI   = 0xc20a, +    RIL_BRASL   = 0xc005, +    RIL_BRCL    = 0xc004, +    RIL_CFI     = 0xc20d, +    RIL_CGFI    = 0xc20c, +    RIL_CLFI    = 0xc20f, +    RIL_CLGFI   = 0xc20e, +    RIL_IIHF    = 0xc008, +    RIL_IILF    = 0xc009, +    RIL_LARL    = 0xc000, +    RIL_LGFI    = 0xc001, +    RIL_LGRL    = 0xc408, +    RIL_LLIHF   = 0xc00e, +    RIL_LLILF   = 0xc00f, +    RIL_LRL     = 0xc40d, +    RIL_MSFI    = 0xc201, +    RIL_MSGFI   = 0xc200, +    RIL_NIHF    = 0xc00a, +    RIL_NILF    = 0xc00b, +    RIL_OIHF    = 0xc00c, +    RIL_OILF    = 0xc00d, +    RIL_SLFI    = 0xc205, +    RIL_SLGFI   = 0xc204, +    RIL_XIHF    = 0xc006, +    RIL_XILF    = 0xc007, + +    RI_AGHI     = 0xa70b, +    RI_AHI      = 0xa70a, +    RI_BRC      = 0xa704, +    RI_IIHH     = 0xa500, +    RI_IIHL     = 0xa501, +    RI_IILH     = 0xa502, +    RI_IILL     = 0xa503, +    RI_LGHI     = 0xa709, +    RI_LLIHH    = 0xa50c, +    RI_LLIHL    = 0xa50d, +    RI_LLILH    = 0xa50e, +    RI_LLILL    = 0xa50f, +    RI_MGHI     = 0xa70d, +    RI_MHI      = 0xa70c, +    RI_NIHH     = 0xa504, +    RI_NIHL     = 0xa505, +    RI_NILH     = 0xa506, +    RI_NILL     = 0xa507, +    RI_OIHH     = 0xa508, +    RI_OIHL     = 0xa509, +    RI_OILH     = 0xa50a, +    RI_OILL     = 0xa50b, + +    RIE_CGIJ    = 0xec7c, +    RIE_CGRJ    = 0xec64, +    RIE_CIJ     = 0xec7e, +    RIE_CLGRJ   = 0xec65, +    RIE_CLIJ    = 0xec7f, +    RIE_CLGIJ   = 0xec7d, +    RIE_CLRJ    = 0xec77, +    RIE_CRJ     = 0xec76, +    RIE_RISBG   = 0xec55, + +    RRE_AGR     = 0xb908, +    RRE_ALGR    = 0xb90a, +    RRE_ALCR    = 0xb998, +    RRE_ALCGR   = 0xb988, +    RRE_CGR     = 0xb920, +    RRE_CLGR    = 0xb921, +    RRE_DLGR    = 0xb987, +    RRE_DLR     = 0xb997, +    RRE_DSGFR   = 0xb91d, +    RRE_DSGR    = 0xb90d, +    RRE_LGBR    = 0xb906, +    RRE_LCGR    = 0xb903, +    RRE_LGFR    = 0xb914, +    RRE_LGHR    = 0xb907, +    RRE_LGR     = 0xb904, +    RRE_LLGCR   = 0xb984, +    RRE_LLGFR   = 0xb916, +    RRE_LLGHR   = 0xb985, +    RRE_LRVR    = 0xb91f, +    RRE_LRVGR   = 0xb90f, +    RRE_LTGR    = 0xb902, +    RRE_MLGR    = 0xb986, +    RRE_MSGR    = 0xb90c, +    RRE_MSR     = 0xb252, +    RRE_NGR     = 0xb980, +    RRE_OGR     = 0xb981, +    RRE_SGR     = 0xb909, +    RRE_SLGR    = 0xb90b, +    RRE_SLBR    = 0xb999, +    RRE_SLBGR   = 0xb989, +    RRE_XGR     = 0xb982, + +    RRF_LOCR    = 0xb9f2, +    RRF_LOCGR   = 0xb9e2, + +    RR_AR       = 0x1a, +    RR_ALR      = 0x1e, +    RR_BASR     = 0x0d, +    RR_BCR      = 0x07, +    RR_CLR      = 0x15, +    RR_CR       = 0x19, +    RR_DR       = 0x1d, +    RR_LCR      = 0x13, +    RR_LR       = 0x18, +    RR_LTR      = 0x12, +    RR_NR       = 0x14, +    RR_OR       = 0x16, +    RR_SR       = 0x1b, +    RR_SLR      = 0x1f, +    RR_XR       = 0x17, + +    RSY_RLL     = 0xeb1d, +    RSY_RLLG    = 0xeb1c, +    RSY_SLLG    = 0xeb0d, +    RSY_SRAG    = 0xeb0a, +    RSY_SRLG    = 0xeb0c, + +    RS_SLL      = 0x89, +    RS_SRA      = 0x8a, +    RS_SRL      = 0x88, + +    RXY_AG      = 0xe308, +    RXY_AY      = 0xe35a, +    RXY_CG      = 0xe320, +    RXY_CY      = 0xe359, +    RXY_LAY     = 0xe371, +    RXY_LB      = 0xe376, +    RXY_LG      = 0xe304, +    RXY_LGB     = 0xe377, +    RXY_LGF     = 0xe314, +    RXY_LGH     = 0xe315, +    RXY_LHY     = 0xe378, +    RXY_LLGC    = 0xe390, +    RXY_LLGF    = 0xe316, +    RXY_LLGH    = 0xe391, +    RXY_LMG     = 0xeb04, +    RXY_LRV     = 0xe31e, +    RXY_LRVG    = 0xe30f, +    RXY_LRVH    = 0xe31f, +    RXY_LY      = 0xe358, +    RXY_STCY    = 0xe372, +    RXY_STG     = 0xe324, +    RXY_STHY    = 0xe370, +    RXY_STMG    = 0xeb24, +    RXY_STRV    = 0xe33e, +    RXY_STRVG   = 0xe32f, +    RXY_STRVH   = 0xe33f, +    RXY_STY     = 0xe350, + +    RX_A        = 0x5a, +    RX_C        = 0x59, +    RX_L        = 0x58, +    RX_LA       = 0x41, +    RX_LH       = 0x48, +    RX_ST       = 0x50, +    RX_STC      = 0x42, +    RX_STH      = 0x40, +} S390Opcode; + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +    "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", +    "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15" +}; +#endif + +/* Since R6 is a potential argument register, choose it last of the +   call-saved registers.  Likewise prefer the call-clobbered registers +   in reverse order to maximize the chance of avoiding the arguments.  */ +static const int tcg_target_reg_alloc_order[] = { +    /* Call saved registers.  */ +    TCG_REG_R13, +    TCG_REG_R12, +    TCG_REG_R11, +    TCG_REG_R10, +    TCG_REG_R9, +    TCG_REG_R8, +    TCG_REG_R7, +    TCG_REG_R6, +    /* Call clobbered registers.  */ +    TCG_REG_R14, +    TCG_REG_R0, +    TCG_REG_R1, +    /* Argument registers, in reverse order of allocation.  */ +    TCG_REG_R5, +    TCG_REG_R4, +    TCG_REG_R3, +    TCG_REG_R2, +}; + +static const int tcg_target_call_iarg_regs[] = { +    TCG_REG_R2, +    TCG_REG_R3, +    TCG_REG_R4, +    TCG_REG_R5, +    TCG_REG_R6, +}; + +static const int tcg_target_call_oarg_regs[] = { +    TCG_REG_R2, +}; + +#define S390_CC_EQ      8 +#define S390_CC_LT      4 +#define S390_CC_GT      2 +#define S390_CC_OV      1 +#define S390_CC_NE      (S390_CC_LT | S390_CC_GT) +#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ) +#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ) +#define S390_CC_NEVER   0 +#define S390_CC_ALWAYS  15 + +/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */ +static const uint8_t tcg_cond_to_s390_cond[] = { +    [TCG_COND_EQ]  = S390_CC_EQ, +    [TCG_COND_NE]  = S390_CC_NE, +    [TCG_COND_LT]  = S390_CC_LT, +    [TCG_COND_LE]  = S390_CC_LE, +    [TCG_COND_GT]  = S390_CC_GT, +    [TCG_COND_GE]  = S390_CC_GE, +    [TCG_COND_LTU] = S390_CC_LT, +    [TCG_COND_LEU] = S390_CC_LE, +    [TCG_COND_GTU] = S390_CC_GT, +    [TCG_COND_GEU] = S390_CC_GE, +}; + +/* Condition codes that result from a LOAD AND TEST.  Here, we have no +   unsigned instruction variation, however since the test is vs zero we +   can re-map the outcomes appropriately.  */ +static const uint8_t tcg_cond_to_ltr_cond[] = { +    [TCG_COND_EQ]  = S390_CC_EQ, +    [TCG_COND_NE]  = S390_CC_NE, +    [TCG_COND_LT]  = S390_CC_LT, +    [TCG_COND_LE]  = S390_CC_LE, +    [TCG_COND_GT]  = S390_CC_GT, +    [TCG_COND_GE]  = S390_CC_GE, +    [TCG_COND_LTU] = S390_CC_NEVER, +    [TCG_COND_LEU] = S390_CC_EQ, +    [TCG_COND_GTU] = S390_CC_NE, +    [TCG_COND_GEU] = S390_CC_ALWAYS, +}; + +#ifdef CONFIG_SOFTMMU +static void * const qemu_ld_helpers[16] = { +    [MO_UB]   = helper_ret_ldub_mmu, +    [MO_SB]   = helper_ret_ldsb_mmu, +    [MO_LEUW] = helper_le_lduw_mmu, +    [MO_LESW] = helper_le_ldsw_mmu, +    [MO_LEUL] = helper_le_ldul_mmu, +    [MO_LESL] = helper_le_ldsl_mmu, +    [MO_LEQ]  = helper_le_ldq_mmu, +    [MO_BEUW] = helper_be_lduw_mmu, +    [MO_BESW] = helper_be_ldsw_mmu, +    [MO_BEUL] = helper_be_ldul_mmu, +    [MO_BESL] = helper_be_ldsl_mmu, +    [MO_BEQ]  = helper_be_ldq_mmu, +}; + +static void * const qemu_st_helpers[16] = { +    [MO_UB]   = helper_ret_stb_mmu, +    [MO_LEUW] = helper_le_stw_mmu, +    [MO_LEUL] = helper_le_stl_mmu, +    [MO_LEQ]  = helper_le_stq_mmu, +    [MO_BEUW] = helper_be_stw_mmu, +    [MO_BEUL] = helper_be_stl_mmu, +    [MO_BEQ]  = helper_be_stq_mmu, +}; +#endif + +static tcg_insn_unit *tb_ret_addr; + +/* A list of relevant facilities used by this translator.  Some of these +   are required for proper operation, and these are checked at startup.  */ + +#define FACILITY_ZARCH_ACTIVE	(1ULL << (63 - 2)) +#define FACILITY_LONG_DISP	(1ULL << (63 - 18)) +#define FACILITY_EXT_IMM	(1ULL << (63 - 21)) +#define FACILITY_GEN_INST_EXT	(1ULL << (63 - 34)) +#define FACILITY_LOAD_ON_COND   (1ULL << (63 - 45)) + +static uint64_t facilities; + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, +                        intptr_t value, intptr_t addend) +{ +    intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1); +    assert(addend == -2); + +    switch (type) { +    case R_390_PC16DBL: +        assert(pcrel2 == (int16_t)pcrel2); +        tcg_patch16(code_ptr, pcrel2); +        break; +    case R_390_PC32DBL: +        assert(pcrel2 == (int32_t)pcrel2); +        tcg_patch32(code_ptr, pcrel2); +        break; +    default: +        tcg_abort(); +        break; +    } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ +    const char *ct_str = *pct_str; + +    switch (ct_str[0]) { +    case 'r':                  /* all registers */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, 0xffff); +        break; +    case 'R':                  /* not R0 */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, 0xffff); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); +        break; +    case 'L':                  /* qemu_ld/st constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, 0xffff); +        tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2); +        tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); +        tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4); +        break; +    case 'a':                  /* force R2 for division */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_clear(ct->u.regs); +        tcg_regset_set_reg(ct->u.regs, TCG_REG_R2); +        break; +    case 'b':                  /* force R3 for division */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_clear(ct->u.regs); +        tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); +        break; +    case 'A': +        ct->ct |= TCG_CT_CONST_ADLI; +        break; +    case 'K': +        ct->ct |= TCG_CT_CONST_MULI; +        break; +    case 'O': +        ct->ct |= TCG_CT_CONST_ORI; +        break; +    case 'X': +        ct->ct |= TCG_CT_CONST_XORI; +        break; +    case 'C': +        ct->ct |= TCG_CT_CONST_CMPI; +        break; +    default: +        return -1; +    } +    ct_str++; +    *pct_str = ct_str; + +    return 0; +} + +/* Immediates to be used with logical OR.  This is an optimization only, +   since a full 64-bit immediate OR can always be performed with 4 sequential +   OI[LH][LH] instructions.  What we're looking for is immediates that we +   can load efficiently, and the immediate load plus the reg-reg OR is +   smaller than the sequential OI's.  */ + +static int tcg_match_ori(TCGType type, tcg_target_long val) +{ +    if (facilities & FACILITY_EXT_IMM) { +        if (type == TCG_TYPE_I32) { +            /* All 32-bit ORs can be performed with 1 48-bit insn.  */ +            return 1; +        } +    } + +    /* Look for negative values.  These are best to load with LGHI.  */ +    if (val < 0) { +        if (val == (int16_t)val) { +            return 0; +        } +        if (facilities & FACILITY_EXT_IMM) { +            if (val == (int32_t)val) { +                return 0; +            } +        } +    } + +    return 1; +} + +/* Immediates to be used with logical XOR.  This is almost, but not quite, +   only an optimization.  XOR with immediate is only supported with the +   extended-immediate facility.  That said, there are a few patterns for +   which it is better to load the value into a register first.  */ + +static int tcg_match_xori(TCGType type, tcg_target_long val) +{ +    if ((facilities & FACILITY_EXT_IMM) == 0) { +        return 0; +    } + +    if (type == TCG_TYPE_I32) { +        /* All 32-bit XORs can be performed with 1 48-bit insn.  */ +        return 1; +    } + +    /* Look for negative values.  These are best to load with LGHI.  */ +    if (val < 0 && val == (int32_t)val) { +        return 0; +    } + +    return 1; +} + +/* Imediates to be used with comparisons.  */ + +static int tcg_match_cmpi(TCGType type, tcg_target_long val) +{ +    if (facilities & FACILITY_EXT_IMM) { +        /* The COMPARE IMMEDIATE instruction is available.  */ +        if (type == TCG_TYPE_I32) { +            /* We have a 32-bit immediate and can compare against anything.  */ +            return 1; +        } else { +            /* ??? We have no insight here into whether the comparison is +               signed or unsigned.  The COMPARE IMMEDIATE insn uses a 32-bit +               signed immediate, and the COMPARE LOGICAL IMMEDIATE insn uses +               a 32-bit unsigned immediate.  If we were to use the (semi) +               obvious "val == (int32_t)val" we would be enabling unsigned +               comparisons vs very large numbers.  The only solution is to +               take the intersection of the ranges.  */ +            /* ??? Another possible solution is to simply lie and allow all +               constants here and force the out-of-range values into a temp +               register in tgen_cmp when we have knowledge of the actual +               comparison code in use.  */ +            return val >= 0 && val <= 0x7fffffff; +        } +    } else { +        /* Only the LOAD AND TEST instruction is available.  */ +        return val == 0; +    } +} + +/* Immediates to be used with add2/sub2.  */ + +static int tcg_match_add2i(TCGType type, tcg_target_long val) +{ +    if (facilities & FACILITY_EXT_IMM) { +        if (type == TCG_TYPE_I32) { +            return 1; +        } else if (val >= -0xffffffffll && val <= 0xffffffffll) { +            return 1; +        } +    } +    return 0; +} + +/* Test if a constant matches the constraint. */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, +                                  const TCGArgConstraint *arg_ct) +{ +    int ct = arg_ct->ct; + +    if (ct & TCG_CT_CONST) { +        return 1; +    } + +    if (type == TCG_TYPE_I32) { +        val = (int32_t)val; +    } + +    /* The following are mutually exclusive.  */ +    if (ct & TCG_CT_CONST_MULI) { +        /* Immediates that may be used with multiply.  If we have the +           general-instruction-extensions, then we have MULTIPLY SINGLE +           IMMEDIATE with a signed 32-bit, otherwise we have only +           MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */ +        if (facilities & FACILITY_GEN_INST_EXT) { +            return val == (int32_t)val; +        } else { +            return val == (int16_t)val; +        } +    } else if (ct & TCG_CT_CONST_ADLI) { +        return tcg_match_add2i(type, val); +    } else if (ct & TCG_CT_CONST_ORI) { +        return tcg_match_ori(type, val); +    } else if (ct & TCG_CT_CONST_XORI) { +        return tcg_match_xori(type, val); +    } else if (ct & TCG_CT_CONST_CMPI) { +        return tcg_match_cmpi(type, val); +    } + +    return 0; +} + +/* Emit instructions according to the given instruction format.  */ + +static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2) +{ +    tcg_out16(s, (op << 8) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op, +                             TCGReg r1, TCGReg r2) +{ +    tcg_out32(s, (op << 16) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op, +                             TCGReg r1, TCGReg r2, int m3) +{ +    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) +{ +    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); +} + +static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2) +{ +    tcg_out16(s, op | (r1 << 4)); +    tcg_out32(s, i2); +} + +static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1, +                            TCGReg b2, TCGReg r3, int disp) +{ +    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12) +              | (disp & 0xfff)); +} + +static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1, +                             TCGReg b2, TCGReg r3, int disp) +{ +    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3); +    tcg_out32(s, (op & 0xff) | (b2 << 28) +              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4)); +} + +#define tcg_out_insn_RX   tcg_out_insn_RS +#define tcg_out_insn_RXY  tcg_out_insn_RSY + +/* Emit an opcode with "type-checking" of the format.  */ +#define tcg_out_insn(S, FMT, OP, ...) \ +    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__) + + +/* emit 64-bit shifts */ +static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest, +                         TCGReg src, TCGReg sh_reg, int sh_imm) +{ +    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm); +} + +/* emit 32-bit shifts */ +static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest, +                         TCGReg sh_reg, int sh_imm) +{ +    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm); +} + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) +{ +    if (src != dst) { +        if (type == TCG_TYPE_I32) { +            tcg_out_insn(s, RR, LR, dst, src); +        } else { +            tcg_out_insn(s, RRE, LGR, dst, src); +        } +    } +} + +/* load a register with an immediate value */ +static void tcg_out_movi(TCGContext *s, TCGType type, +                         TCGReg ret, tcg_target_long sval) +{ +    static const S390Opcode lli_insns[4] = { +        RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH +    }; + +    tcg_target_ulong uval = sval; +    int i; + +    if (type == TCG_TYPE_I32) { +        uval = (uint32_t)sval; +        sval = (int32_t)sval; +    } + +    /* Try all 32-bit insns that can load it in one go.  */ +    if (sval >= -0x8000 && sval < 0x8000) { +        tcg_out_insn(s, RI, LGHI, ret, sval); +        return; +    } + +    for (i = 0; i < 4; i++) { +        tcg_target_long mask = 0xffffull << i*16; +        if ((uval & mask) == uval) { +            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); +            return; +        } +    } + +    /* Try all 48-bit insns that can load it in one go.  */ +    if (facilities & FACILITY_EXT_IMM) { +        if (sval == (int32_t)sval) { +            tcg_out_insn(s, RIL, LGFI, ret, sval); +            return; +        } +        if (uval <= 0xffffffff) { +            tcg_out_insn(s, RIL, LLILF, ret, uval); +            return; +        } +        if ((uval & 0xffffffff) == 0) { +            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1); +            return; +        } +    } + +    /* Try for PC-relative address load.  */ +    if ((sval & 1) == 0) { +        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1; +        if (off == (int32_t)off) { +            tcg_out_insn(s, RIL, LARL, ret, off); +            return; +        } +    } + +    /* If extended immediates are not present, then we may have to issue +       several instructions to load the low 32 bits.  */ +    if (!(facilities & FACILITY_EXT_IMM)) { +        /* A 32-bit unsigned value can be loaded in 2 insns.  And given +           that the lli_insns loop above did not succeed, we know that +           both insns are required.  */ +        if (uval <= 0xffffffff) { +            tcg_out_insn(s, RI, LLILL, ret, uval); +            tcg_out_insn(s, RI, IILH, ret, uval >> 16); +            return; +        } + +        /* If all high bits are set, the value can be loaded in 2 or 3 insns. +           We first want to make sure that all the high bits get set.  With +           luck the low 16-bits can be considered negative to perform that for +           free, otherwise we load an explicit -1.  */ +        if (sval >> 31 >> 1 == -1) { +            if (uval & 0x8000) { +                tcg_out_insn(s, RI, LGHI, ret, uval); +            } else { +                tcg_out_insn(s, RI, LGHI, ret, -1); +                tcg_out_insn(s, RI, IILL, ret, uval); +            } +            tcg_out_insn(s, RI, IILH, ret, uval >> 16); +            return; +        } +    } + +    /* If we get here, both the high and low parts have non-zero bits.  */ + +    /* Recurse to load the lower 32-bits.  */ +    tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff); + +    /* Insert data into the high 32-bits.  */ +    uval = uval >> 31 >> 1; +    if (facilities & FACILITY_EXT_IMM) { +        if (uval < 0x10000) { +            tcg_out_insn(s, RI, IIHL, ret, uval); +        } else if ((uval & 0xffff) == 0) { +            tcg_out_insn(s, RI, IIHH, ret, uval >> 16); +        } else { +            tcg_out_insn(s, RIL, IIHF, ret, uval); +        } +    } else { +        if (uval & 0xffff) { +            tcg_out_insn(s, RI, IIHL, ret, uval); +        } +        if (uval & 0xffff0000) { +            tcg_out_insn(s, RI, IIHH, ret, uval >> 16); +        } +    } +} + + +/* Emit a load/store type instruction.  Inputs are: +   DATA:     The register to be loaded or stored. +   BASE+OFS: The effective address. +   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0. +   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */ + +static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy, +                        TCGReg data, TCGReg base, TCGReg index, +                        tcg_target_long ofs) +{ +    if (ofs < -0x80000 || ofs >= 0x80000) { +        /* Combine the low 20 bits of the offset with the actual load insn; +           the high 44 bits must come from an immediate load.  */ +        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000; +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low); +        ofs = low; + +        /* If we were already given an index register, add it in.  */ +        if (index != TCG_REG_NONE) { +            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index); +        } +        index = TCG_TMP0; +    } + +    if (opc_rx && ofs >= 0 && ofs < 0x1000) { +        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs); +    } else { +        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs); +    } +} + + +/* load data without address translation or endianness conversion */ +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data, +                              TCGReg base, intptr_t ofs) +{ +    if (type == TCG_TYPE_I32) { +        tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs); +    } else { +        tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs); +    } +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data, +                              TCGReg base, intptr_t ofs) +{ +    if (type == TCG_TYPE_I32) { +        tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs); +    } else { +        tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs); +    } +} + +/* load data from an absolute host address */ +static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs) +{ +    intptr_t addr = (intptr_t)abs; + +    if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) { +        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1; +        if (disp == (int32_t)disp) { +            if (type == TCG_TYPE_I32) { +                tcg_out_insn(s, RIL, LRL, dest, disp); +            } else { +                tcg_out_insn(s, RIL, LGRL, dest, disp); +            } +            return; +        } +    } + +    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff); +    tcg_out_ld(s, type, dest, dest, addr & 0xffff); +} + +static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src, +                                 int msb, int lsb, int ofs, int z) +{ +    /* Format RIE-f */ +    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src); +    tcg_out16(s, (msb << 8) | (z << 7) | lsb); +    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff)); +} + +static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ +    if (facilities & FACILITY_EXT_IMM) { +        tcg_out_insn(s, RRE, LGBR, dest, src); +        return; +    } + +    if (type == TCG_TYPE_I32) { +        if (dest == src) { +            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24); +        } else { +            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24); +        } +        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24); +    } else { +        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56); +        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56); +    } +} + +static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ +    if (facilities & FACILITY_EXT_IMM) { +        tcg_out_insn(s, RRE, LLGCR, dest, src); +        return; +    } + +    if (dest == src) { +        tcg_out_movi(s, type, TCG_TMP0, 0xff); +        src = TCG_TMP0; +    } else { +        tcg_out_movi(s, type, dest, 0xff); +    } +    if (type == TCG_TYPE_I32) { +        tcg_out_insn(s, RR, NR, dest, src); +    } else { +        tcg_out_insn(s, RRE, NGR, dest, src); +    } +} + +static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ +    if (facilities & FACILITY_EXT_IMM) { +        tcg_out_insn(s, RRE, LGHR, dest, src); +        return; +    } + +    if (type == TCG_TYPE_I32) { +        if (dest == src) { +            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16); +        } else { +            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16); +        } +        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16); +    } else { +        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48); +        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48); +    } +} + +static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ +    if (facilities & FACILITY_EXT_IMM) { +        tcg_out_insn(s, RRE, LLGHR, dest, src); +        return; +    } + +    if (dest == src) { +        tcg_out_movi(s, type, TCG_TMP0, 0xffff); +        src = TCG_TMP0; +    } else { +        tcg_out_movi(s, type, dest, 0xffff); +    } +    if (type == TCG_TYPE_I32) { +        tcg_out_insn(s, RR, NR, dest, src); +    } else { +        tcg_out_insn(s, RRE, NGR, dest, src); +    } +} + +static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src) +{ +    tcg_out_insn(s, RRE, LGFR, dest, src); +} + +static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src) +{ +    tcg_out_insn(s, RRE, LLGFR, dest, src); +} + +/* Accept bit patterns like these: +    0....01....1 +    1....10....0 +    1..10..01..1 +    0..01..10..0 +   Copied from gcc sources.  */ +static inline bool risbg_mask(uint64_t c) +{ +    uint64_t lsb; +    /* We don't change the number of transitions by inverting, +       so make sure we start with the LSB zero.  */ +    if (c & 1) { +        c = ~c; +    } +    /* Reject all zeros or all ones.  */ +    if (c == 0) { +        return false; +    } +    /* Find the first transition.  */ +    lsb = c & -c; +    /* Invert to look for a second transition.  */ +    c = ~c; +    /* Erase the first transition.  */ +    c &= -lsb; +    /* Find the second transition, if any.  */ +    lsb = c & -c; +    /* Match if all the bits are 1's, or if c is zero.  */ +    return c == -lsb; +} + +static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val) +{ +    int msb, lsb; +    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) { +        /* Achieve wraparound by swapping msb and lsb.  */ +        msb = 64 - ctz64(~val); +        lsb = clz64(~val) - 1; +    } else { +        msb = clz64(val); +        lsb = 63 - ctz64(val); +    } +    tcg_out_risbg(s, out, in, msb, lsb, 0, 1); +} + +static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) +{ +    static const S390Opcode ni_insns[4] = { +        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH +    }; +    static const S390Opcode nif_insns[2] = { +        RIL_NILF, RIL_NIHF +    }; +    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull); +    int i; + +    /* Look for the zero-extensions.  */ +    if ((val & valid) == 0xffffffff) { +        tgen_ext32u(s, dest, dest); +        return; +    } +    if (facilities & FACILITY_EXT_IMM) { +        if ((val & valid) == 0xff) { +            tgen_ext8u(s, TCG_TYPE_I64, dest, dest); +            return; +        } +        if ((val & valid) == 0xffff) { +            tgen_ext16u(s, TCG_TYPE_I64, dest, dest); +            return; +        } +    } + +    /* Try all 32-bit insns that can perform it in one go.  */ +    for (i = 0; i < 4; i++) { +        tcg_target_ulong mask = ~(0xffffull << i*16); +        if (((val | ~valid) & mask) == mask) { +            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); +            return; +        } +    } + +    /* Try all 48-bit insns that can perform it in one go.  */ +    if (facilities & FACILITY_EXT_IMM) { +        for (i = 0; i < 2; i++) { +            tcg_target_ulong mask = ~(0xffffffffull << i*32); +            if (((val | ~valid) & mask) == mask) { +                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); +                return; +            } +        } +    } +    if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) { +        tgen_andi_risbg(s, dest, dest, val); +        return; +    } + +    /* Fall back to loading the constant.  */ +    tcg_out_movi(s, type, TCG_TMP0, val); +    if (type == TCG_TYPE_I32) { +        tcg_out_insn(s, RR, NR, dest, TCG_TMP0); +    } else { +        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); +    } +} + +static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ +    static const S390Opcode oi_insns[4] = { +        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH +    }; +    static const S390Opcode nif_insns[2] = { +        RIL_OILF, RIL_OIHF +    }; + +    int i; + +    /* Look for no-op.  */ +    if (val == 0) { +        return; +    } + +    if (facilities & FACILITY_EXT_IMM) { +        /* Try all 32-bit insns that can perform it in one go.  */ +        for (i = 0; i < 4; i++) { +            tcg_target_ulong mask = (0xffffull << i*16); +            if ((val & mask) != 0 && (val & ~mask) == 0) { +                tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); +                return; +            } +        } + +        /* Try all 48-bit insns that can perform it in one go.  */ +        for (i = 0; i < 2; i++) { +            tcg_target_ulong mask = (0xffffffffull << i*32); +            if ((val & mask) != 0 && (val & ~mask) == 0) { +                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); +                return; +            } +        } + +        /* Perform the OR via sequential modifications to the high and +           low parts.  Do this via recursion to handle 16-bit vs 32-bit +           masks in each half.  */ +        tgen64_ori(s, dest, val & 0x00000000ffffffffull); +        tgen64_ori(s, dest, val & 0xffffffff00000000ull); +    } else { +        /* With no extended-immediate facility, we don't need to be so +           clever.  Just iterate over the insns and mask in the constant.  */ +        for (i = 0; i < 4; i++) { +            tcg_target_ulong mask = (0xffffull << i*16); +            if ((val & mask) != 0) { +                tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); +            } +        } +    } +} + +static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ +    /* Perform the xor by parts.  */ +    if (val & 0xffffffff) { +        tcg_out_insn(s, RIL, XILF, dest, val); +    } +    if (val > 0xffffffff) { +        tcg_out_insn(s, RIL, XIHF, dest, val >> 31 >> 1); +    } +} + +static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, +                    TCGArg c2, int c2const) +{ +    bool is_unsigned = is_unsigned_cond(c); +    if (c2const) { +        if (c2 == 0) { +            if (type == TCG_TYPE_I32) { +                tcg_out_insn(s, RR, LTR, r1, r1); +            } else { +                tcg_out_insn(s, RRE, LTGR, r1, r1); +            } +            return tcg_cond_to_ltr_cond[c]; +        } else { +            if (is_unsigned) { +                if (type == TCG_TYPE_I32) { +                    tcg_out_insn(s, RIL, CLFI, r1, c2); +                } else { +                    tcg_out_insn(s, RIL, CLGFI, r1, c2); +                } +            } else { +                if (type == TCG_TYPE_I32) { +                    tcg_out_insn(s, RIL, CFI, r1, c2); +                } else { +                    tcg_out_insn(s, RIL, CGFI, r1, c2); +                } +            } +        } +    } else { +        if (is_unsigned) { +            if (type == TCG_TYPE_I32) { +                tcg_out_insn(s, RR, CLR, r1, c2); +            } else { +                tcg_out_insn(s, RRE, CLGR, r1, c2); +            } +        } else { +            if (type == TCG_TYPE_I32) { +                tcg_out_insn(s, RR, CR, r1, c2); +            } else { +                tcg_out_insn(s, RRE, CGR, r1, c2); +            } +        } +    } +    return tcg_cond_to_s390_cond[c]; +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, +                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const) +{ +    int cc; + +    switch (cond) { +    case TCG_COND_GTU: +    case TCG_COND_GT: +    do_greater: +        /* The result of a compare has CC=2 for GT and CC=3 unused. +           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */ +        tgen_cmp(s, type, cond, c1, c2, c2const); +        tcg_out_movi(s, type, dest, 0); +        tcg_out_insn(s, RRE, ALCGR, dest, dest); +        return; + +    case TCG_COND_GEU: +    do_geu: +        /* We need "real" carry semantics, so use SUBTRACT LOGICAL +           instead of COMPARE LOGICAL.  This needs an extra move.  */ +        tcg_out_mov(s, type, TCG_TMP0, c1); +        if (c2const) { +            tcg_out_movi(s, TCG_TYPE_I64, dest, 0); +            if (type == TCG_TYPE_I32) { +                tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2); +            } else { +                tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2); +            } +        } else { +            if (type == TCG_TYPE_I32) { +                tcg_out_insn(s, RR, SLR, TCG_TMP0, c2); +            } else { +                tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2); +            } +            tcg_out_movi(s, TCG_TYPE_I64, dest, 0); +        } +        tcg_out_insn(s, RRE, ALCGR, dest, dest); +        return; + +    case TCG_COND_LEU: +    case TCG_COND_LTU: +    case TCG_COND_LT: +        /* Swap operands so that we can use GEU/GTU/GT.  */ +        if (c2const) { +            tcg_out_movi(s, type, TCG_TMP0, c2); +            c2 = c1; +            c2const = 0; +            c1 = TCG_TMP0; +        } else { +            TCGReg t = c1; +            c1 = c2; +            c2 = t; +        } +        if (cond == TCG_COND_LEU) { +            goto do_geu; +        } +        cond = tcg_swap_cond(cond); +        goto do_greater; + +    case TCG_COND_NE: +        /* X != 0 is X > 0.  */ +        if (c2const && c2 == 0) { +            cond = TCG_COND_GTU; +            goto do_greater; +        } +        break; + +    case TCG_COND_EQ: +        /* X == 0 is X <= 0 is 0 >= X.  */ +        if (c2const && c2 == 0) { +            tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0); +            c2 = c1; +            c2const = 0; +            c1 = TCG_TMP0; +            goto do_geu; +        } +        break; + +    default: +        break; +    } + +    cc = tgen_cmp(s, type, cond, c1, c2, c2const); +    if (facilities & FACILITY_LOAD_ON_COND) { +        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */ +        tcg_out_movi(s, TCG_TYPE_I64, dest, 0); +        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); +        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc); +    } else { +        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */ +        tcg_out_movi(s, type, dest, 1); +        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); +        tcg_out_movi(s, type, dest, 0); +    } +} + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, +                         TCGReg c1, TCGArg c2, int c2const, TCGReg r3) +{ +    int cc; +    if (facilities & FACILITY_LOAD_ON_COND) { +        cc = tgen_cmp(s, type, c, c1, c2, c2const); +        tcg_out_insn(s, RRF, LOCGR, dest, r3, cc); +    } else { +        c = tcg_invert_cond(c); +        cc = tgen_cmp(s, type, c, c1, c2, c2const); + +        /* Emit: if (cc) goto over; dest = r3; over:  */ +        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); +        tcg_out_insn(s, RRE, LGR, dest, r3); +    } +} + +bool tcg_target_deposit_valid(int ofs, int len) +{ +    return (facilities & FACILITY_GEN_INST_EXT) != 0; +} + +static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, +                         int ofs, int len) +{ +    int lsb = (63 - ofs); +    int msb = lsb - (len - 1); +    tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0); +} + +static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest) +{ +    ptrdiff_t off = dest - s->code_ptr; +    if (off == (int16_t)off) { +        tcg_out_insn(s, RI, BRC, cc, off); +    } else if (off == (int32_t)off) { +        tcg_out_insn(s, RIL, BRCL, cc, off); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); +        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0); +    } +} + +static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) +{ +    if (l->has_value) { +        tgen_gotoi(s, cc, l->u.value_ptr); +    } else if (USE_LONG_BRANCHES) { +        tcg_out16(s, RIL_BRCL | (cc << 4)); +        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, -2); +        s->code_ptr += 2; +    } else { +        tcg_out16(s, RI_BRC | (cc << 4)); +        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, -2); +        s->code_ptr += 1; +    } +} + +static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, +                                TCGReg r1, TCGReg r2, TCGLabel *l) +{ +    intptr_t off; + +    if (l->has_value) { +        off = l->u.value_ptr - s->code_ptr; +    } else { +        /* We need to keep the offset unchanged for retranslation.  */ +        off = s->code_ptr[1]; +        tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); +    } + +    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2); +    tcg_out16(s, off); +    tcg_out16(s, cc << 12 | (opc & 0xff)); +} + +static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, +                                    TCGReg r1, int i2, TCGLabel *l) +{ +    tcg_target_long off; + +    if (l->has_value) { +        off = l->u.value_ptr - s->code_ptr; +    } else { +        /* We need to keep the offset unchanged for retranslation.  */ +        off = s->code_ptr[1]; +        tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, -2); +    } + +    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc); +    tcg_out16(s, off); +    tcg_out16(s, (i2 << 8) | (opc & 0xff)); +} + +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, +                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l) +{ +    int cc; + +    if (facilities & FACILITY_GEN_INST_EXT) { +        bool is_unsigned = is_unsigned_cond(c); +        bool in_range; +        S390Opcode opc; + +        cc = tcg_cond_to_s390_cond[c]; + +        if (!c2const) { +            opc = (type == TCG_TYPE_I32 +                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ) +                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ)); +            tgen_compare_branch(s, opc, cc, r1, c2, l); +            return; +        } + +        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field. +           If the immediate we've been given does not fit that range, we'll +           fall back to separate compare and branch instructions using the +           larger comparison range afforded by COMPARE IMMEDIATE.  */ +        if (type == TCG_TYPE_I32) { +            if (is_unsigned) { +                opc = RIE_CLIJ; +                in_range = (uint32_t)c2 == (uint8_t)c2; +            } else { +                opc = RIE_CIJ; +                in_range = (int32_t)c2 == (int8_t)c2; +            } +        } else { +            if (is_unsigned) { +                opc = RIE_CLGIJ; +                in_range = (uint64_t)c2 == (uint8_t)c2; +            } else { +                opc = RIE_CGIJ; +                in_range = (int64_t)c2 == (int8_t)c2; +            } +        } +        if (in_range) { +            tgen_compare_imm_branch(s, opc, cc, r1, c2, l); +            return; +        } +    } + +    cc = tgen_cmp(s, type, c, r1, c2, c2const); +    tgen_branch(s, cc, l); +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ +    ptrdiff_t off = dest - s->code_ptr; +    if (off == (int32_t)off) { +        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest); +        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0); +    } +} + +static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc, TCGReg data, +                                   TCGReg base, TCGReg index, int disp) +{ +    switch (opc & (MO_SSIZE | MO_BSWAP)) { +    case MO_UB: +        tcg_out_insn(s, RXY, LLGC, data, base, index, disp); +        break; +    case MO_SB: +        tcg_out_insn(s, RXY, LGB, data, base, index, disp); +        break; + +    case MO_UW | MO_BSWAP: +        /* swapped unsigned halfword load with upper bits zeroed */ +        tcg_out_insn(s, RXY, LRVH, data, base, index, disp); +        tgen_ext16u(s, TCG_TYPE_I64, data, data); +        break; +    case MO_UW: +        tcg_out_insn(s, RXY, LLGH, data, base, index, disp); +        break; + +    case MO_SW | MO_BSWAP: +        /* swapped sign-extended halfword load */ +        tcg_out_insn(s, RXY, LRVH, data, base, index, disp); +        tgen_ext16s(s, TCG_TYPE_I64, data, data); +        break; +    case MO_SW: +        tcg_out_insn(s, RXY, LGH, data, base, index, disp); +        break; + +    case MO_UL | MO_BSWAP: +        /* swapped unsigned int load with upper bits zeroed */ +        tcg_out_insn(s, RXY, LRV, data, base, index, disp); +        tgen_ext32u(s, data, data); +        break; +    case MO_UL: +        tcg_out_insn(s, RXY, LLGF, data, base, index, disp); +        break; + +    case MO_SL | MO_BSWAP: +        /* swapped sign-extended int load */ +        tcg_out_insn(s, RXY, LRV, data, base, index, disp); +        tgen_ext32s(s, data, data); +        break; +    case MO_SL: +        tcg_out_insn(s, RXY, LGF, data, base, index, disp); +        break; + +    case MO_Q | MO_BSWAP: +        tcg_out_insn(s, RXY, LRVG, data, base, index, disp); +        break; +    case MO_Q: +        tcg_out_insn(s, RXY, LG, data, base, index, disp); +        break; + +    default: +        tcg_abort(); +    } +} + +static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data, +                                   TCGReg base, TCGReg index, int disp) +{ +    switch (opc & (MO_SIZE | MO_BSWAP)) { +    case MO_UB: +        if (disp >= 0 && disp < 0x1000) { +            tcg_out_insn(s, RX, STC, data, base, index, disp); +        } else { +            tcg_out_insn(s, RXY, STCY, data, base, index, disp); +        } +        break; + +    case MO_UW | MO_BSWAP: +        tcg_out_insn(s, RXY, STRVH, data, base, index, disp); +        break; +    case MO_UW: +        if (disp >= 0 && disp < 0x1000) { +            tcg_out_insn(s, RX, STH, data, base, index, disp); +        } else { +            tcg_out_insn(s, RXY, STHY, data, base, index, disp); +        } +        break; + +    case MO_UL | MO_BSWAP: +        tcg_out_insn(s, RXY, STRV, data, base, index, disp); +        break; +    case MO_UL: +        if (disp >= 0 && disp < 0x1000) { +            tcg_out_insn(s, RX, ST, data, base, index, disp); +        } else { +            tcg_out_insn(s, RXY, STY, data, base, index, disp); +        } +        break; + +    case MO_Q | MO_BSWAP: +        tcg_out_insn(s, RXY, STRVG, data, base, index, disp); +        break; +    case MO_Q: +        tcg_out_insn(s, RXY, STG, data, base, index, disp); +        break; + +    default: +        tcg_abort(); +    } +} + +#if defined(CONFIG_SOFTMMU) +/* We're expecting to use a 20-bit signed offset on the tlb memory ops. +   Using the offset of the second entry in the last tlb table ensures +   that we can index all of the elements of the first entry.  */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) +                  > 0x7ffff); + +/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB +   addend into R2.  Returns a register with the santitized guest address.  */ +static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, +                               int mem_index, bool is_ld) +{ +    TCGMemOp s_bits = opc & MO_SIZE; +    uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1); +    int ofs; + +    if (facilities & FACILITY_GEN_INST_EXT) { +        tcg_out_risbg(s, TCG_REG_R2, addr_reg, +                      64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, +                      63 - CPU_TLB_ENTRY_BITS, +                      64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); +        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); +    } else { +        tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, +                     TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); +        tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg); +        tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, +                  (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); +        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); +    } + +    if (is_ld) { +        ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); +    } else { +        ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); +    } +    if (TARGET_LONG_BITS == 32) { +        tcg_out_mem(s, RX_C, RXY_CY, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); +    } else { +        tcg_out_mem(s, 0, RXY_CG, TCG_REG_R3, TCG_REG_R2, TCG_AREG0, ofs); +    } + +    ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend); +    tcg_out_mem(s, 0, RXY_LG, TCG_REG_R2, TCG_REG_R2, TCG_AREG0, ofs); + +    if (TARGET_LONG_BITS == 32) { +        tgen_ext32u(s, TCG_REG_R3, addr_reg); +        return TCG_REG_R3; +    } +    return addr_reg; +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, +                                TCGReg data, TCGReg addr, +                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr) +{ +    TCGLabelQemuLdst *label = new_ldst_label(s); + +    label->is_ld = is_ld; +    label->oi = oi; +    label->datalo_reg = data; +    label->addrlo_reg = addr; +    label->raddr = raddr; +    label->label_ptr[0] = label_ptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ +    TCGReg addr_reg = lb->addrlo_reg; +    TCGReg data_reg = lb->datalo_reg; +    TCGMemOpIdx oi = lb->oi; +    TCGMemOp opc = get_memop(oi); + +    patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); + +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); +    if (TARGET_LONG_BITS == 64) { +        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); +    } +    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); +    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); +    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); +    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); + +    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ +    TCGReg addr_reg = lb->addrlo_reg; +    TCGReg data_reg = lb->datalo_reg; +    TCGMemOpIdx oi = lb->oi; +    TCGMemOp opc = get_memop(oi); + +    patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, -2); + +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); +    if (TARGET_LONG_BITS == 64) { +        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg); +    } +    switch (opc & MO_SIZE) { +    case MO_UB: +        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); +        break; +    case MO_UW: +        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); +        break; +    case MO_UL: +        tgen_ext32u(s, TCG_REG_R4, data_reg); +        break; +    case MO_Q: +        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg); +        break; +    default: +        tcg_abort(); +    } +    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); +    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); +    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + +    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); +} +#else +static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, +                                  TCGReg *index_reg, tcg_target_long *disp) +{ +    if (TARGET_LONG_BITS == 32) { +        tgen_ext32u(s, TCG_TMP0, *addr_reg); +        *addr_reg = TCG_TMP0; +    } +    if (GUEST_BASE < 0x80000) { +        *index_reg = TCG_REG_NONE; +        *disp = GUEST_BASE; +    } else { +        *index_reg = TCG_GUEST_BASE_REG; +        *disp = 0; +    } +} +#endif /* CONFIG_SOFTMMU */ + +static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, +                            TCGMemOpIdx oi) +{ +    TCGMemOp opc = get_memop(oi); +#ifdef CONFIG_SOFTMMU +    unsigned mem_index = get_mmuidx(oi); +    tcg_insn_unit *label_ptr; +    TCGReg base_reg; + +    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); + +    /* We need to keep the offset unchanged for retranslation.  */ +    tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); +    label_ptr = s->code_ptr; +    s->code_ptr += 1; + +    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); + +    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr); +#else +    TCGReg index_reg; +    tcg_target_long disp; + +    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); +    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp); +#endif +} + +static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, +                            TCGMemOpIdx oi) +{ +    TCGMemOp opc = get_memop(oi); +#ifdef CONFIG_SOFTMMU +    unsigned mem_index = get_mmuidx(oi); +    tcg_insn_unit *label_ptr; +    TCGReg base_reg; + +    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); + +    /* We need to keep the offset unchanged for retranslation.  */ +    tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); +    label_ptr = s->code_ptr; +    s->code_ptr += 1; + +    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0); + +    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr); +#else +    TCGReg index_reg; +    tcg_target_long disp; + +    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp); +    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp); +#endif +} + +# define OP_32_64(x) \ +        case glue(glue(INDEX_op_,x),_i32): \ +        case glue(glue(INDEX_op_,x),_i64) + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, +                const TCGArg *args, const int *const_args) +{ +    S390Opcode op; +    TCGArg a0, a1, a2; + +    switch (opc) { +    case INDEX_op_exit_tb: +        /* return value */ +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]); +        tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); +        break; + +    case INDEX_op_goto_tb: +        if (s->tb_jmp_offset) { +            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); +            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); +            s->code_ptr += 2; +        } else { +            /* load address stored at s->tb_next + args[0] */ +            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_TMP0, s->tb_next + args[0]); +            /* and go there */ +            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0); +        } +        s->tb_next_offset[args[0]] = tcg_current_code_size(s); +        break; + +    OP_32_64(ld8u): +        /* ??? LLC (RXY format) is only present with the extended-immediate +           facility, whereas LLGC is always present.  */ +        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]); +        break; + +    OP_32_64(ld8s): +        /* ??? LB is no smaller than LGB, so no point to using it.  */ +        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]); +        break; + +    OP_32_64(ld16u): +        /* ??? LLH (RXY format) is only present with the extended-immediate +           facility, whereas LLGH is always present.  */ +        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]); +        break; + +    case INDEX_op_ld16s_i32: +        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]); +        break; + +    case INDEX_op_ld_i32: +        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); +        break; + +    OP_32_64(st8): +        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], +                    TCG_REG_NONE, args[2]); +        break; + +    OP_32_64(st16): +        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1], +                    TCG_REG_NONE, args[2]); +        break; + +    case INDEX_op_st_i32: +        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); +        break; + +    case INDEX_op_add_i32: +        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; +        if (const_args[2]) { +        do_addi_32: +            if (a0 == a1) { +                if (a2 == (int16_t)a2) { +                    tcg_out_insn(s, RI, AHI, a0, a2); +                    break; +                } +                if (facilities & FACILITY_EXT_IMM) { +                    tcg_out_insn(s, RIL, AFI, a0, a2); +                    break; +                } +            } +            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); +        } else if (a0 == a1) { +            tcg_out_insn(s, RR, AR, a0, a2); +        } else { +            tcg_out_insn(s, RX, LA, a0, a1, a2, 0); +        } +        break; +    case INDEX_op_sub_i32: +        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; +        if (const_args[2]) { +            a2 = -a2; +            goto do_addi_32; +        } +        tcg_out_insn(s, RR, SR, args[0], args[2]); +        break; + +    case INDEX_op_and_i32: +        if (const_args[2]) { +            tgen_andi(s, TCG_TYPE_I32, args[0], args[2]); +        } else { +            tcg_out_insn(s, RR, NR, args[0], args[2]); +        } +        break; +    case INDEX_op_or_i32: +        if (const_args[2]) { +            tgen64_ori(s, args[0], args[2] & 0xffffffff); +        } else { +            tcg_out_insn(s, RR, OR, args[0], args[2]); +        } +        break; +    case INDEX_op_xor_i32: +        if (const_args[2]) { +            tgen64_xori(s, args[0], args[2] & 0xffffffff); +        } else { +            tcg_out_insn(s, RR, XR, args[0], args[2]); +        } +        break; + +    case INDEX_op_neg_i32: +        tcg_out_insn(s, RR, LCR, args[0], args[1]); +        break; + +    case INDEX_op_mul_i32: +        if (const_args[2]) { +            if ((int32_t)args[2] == (int16_t)args[2]) { +                tcg_out_insn(s, RI, MHI, args[0], args[2]); +            } else { +                tcg_out_insn(s, RIL, MSFI, args[0], args[2]); +            } +        } else { +            tcg_out_insn(s, RRE, MSR, args[0], args[2]); +        } +        break; + +    case INDEX_op_div2_i32: +        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]); +        break; +    case INDEX_op_divu2_i32: +        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]); +        break; + +    case INDEX_op_shl_i32: +        op = RS_SLL; +    do_shift32: +        if (const_args[2]) { +            tcg_out_sh32(s, op, args[0], TCG_REG_NONE, args[2]); +        } else { +            tcg_out_sh32(s, op, args[0], args[2], 0); +        } +        break; +    case INDEX_op_shr_i32: +        op = RS_SRL; +        goto do_shift32; +    case INDEX_op_sar_i32: +        op = RS_SRA; +        goto do_shift32; + +    case INDEX_op_rotl_i32: +        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */ +        if (const_args[2]) { +            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]); +        } else { +            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0); +        } +        break; +    case INDEX_op_rotr_i32: +        if (const_args[2]) { +            tcg_out_sh64(s, RSY_RLL, args[0], args[1], +                         TCG_REG_NONE, (32 - args[2]) & 31); +        } else { +            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); +            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0); +        } +        break; + +    case INDEX_op_ext8s_i32: +        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]); +        break; +    case INDEX_op_ext16s_i32: +        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]); +        break; +    case INDEX_op_ext8u_i32: +        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]); +        break; +    case INDEX_op_ext16u_i32: +        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]); +        break; + +    OP_32_64(bswap16): +        /* The TCG bswap definition requires bits 0-47 already be zero. +           Thus we don't need the G-type insns to implement bswap16_i64.  */ +        tcg_out_insn(s, RRE, LRVR, args[0], args[1]); +        tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16); +        break; +    OP_32_64(bswap32): +        tcg_out_insn(s, RRE, LRVR, args[0], args[1]); +        break; + +    case INDEX_op_add2_i32: +        if (const_args[4]) { +            tcg_out_insn(s, RIL, ALFI, args[0], args[4]); +        } else { +            tcg_out_insn(s, RR, ALR, args[0], args[4]); +        } +        tcg_out_insn(s, RRE, ALCR, args[1], args[5]); +        break; +    case INDEX_op_sub2_i32: +        if (const_args[4]) { +            tcg_out_insn(s, RIL, SLFI, args[0], args[4]); +        } else { +            tcg_out_insn(s, RR, SLR, args[0], args[4]); +        } +        tcg_out_insn(s, RRE, SLBR, args[1], args[5]); +        break; + +    case INDEX_op_br: +        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); +        break; + +    case INDEX_op_brcond_i32: +        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0], +                    args[1], const_args[1], arg_label(args[3])); +        break; +    case INDEX_op_setcond_i32: +        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], +                     args[2], const_args[2]); +        break; +    case INDEX_op_movcond_i32: +        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], +                     args[2], const_args[2], args[3]); +        break; + +    case INDEX_op_qemu_ld_i32: +        /* ??? Technically we can use a non-extending instruction.  */ +    case INDEX_op_qemu_ld_i64: +        tcg_out_qemu_ld(s, args[0], args[1], args[2]); +        break; +    case INDEX_op_qemu_st_i32: +    case INDEX_op_qemu_st_i64: +        tcg_out_qemu_st(s, args[0], args[1], args[2]); +        break; + +    case INDEX_op_ld16s_i64: +        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); +        break; +    case INDEX_op_ld32u_i64: +        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); +        break; +    case INDEX_op_ld32s_i64: +        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); +        break; +    case INDEX_op_ld_i64: +        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); +        break; + +    case INDEX_op_st32_i64: +        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); +        break; +    case INDEX_op_st_i64: +        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); +        break; + +    case INDEX_op_add_i64: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +        do_addi_64: +            if (a0 == a1) { +                if (a2 == (int16_t)a2) { +                    tcg_out_insn(s, RI, AGHI, a0, a2); +                    break; +                } +                if (facilities & FACILITY_EXT_IMM) { +                    if (a2 == (int32_t)a2) { +                        tcg_out_insn(s, RIL, AGFI, a0, a2); +                        break; +                    } else if (a2 == (uint32_t)a2) { +                        tcg_out_insn(s, RIL, ALGFI, a0, a2); +                        break; +                    } else if (-a2 == (uint32_t)-a2) { +                        tcg_out_insn(s, RIL, SLGFI, a0, -a2); +                        break; +                    } +                } +            } +            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); +        } else if (a0 == a1) { +            tcg_out_insn(s, RRE, AGR, a0, a2); +        } else { +            tcg_out_insn(s, RX, LA, a0, a1, a2, 0); +        } +        break; +    case INDEX_op_sub_i64: +        a0 = args[0], a1 = args[1], a2 = args[2]; +        if (const_args[2]) { +            a2 = -a2; +            goto do_addi_64; +        } else { +            tcg_out_insn(s, RRE, SGR, args[0], args[2]); +        } +        break; + +    case INDEX_op_and_i64: +        if (const_args[2]) { +            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]); +        } else { +            tcg_out_insn(s, RRE, NGR, args[0], args[2]); +        } +        break; +    case INDEX_op_or_i64: +        if (const_args[2]) { +            tgen64_ori(s, args[0], args[2]); +        } else { +            tcg_out_insn(s, RRE, OGR, args[0], args[2]); +        } +        break; +    case INDEX_op_xor_i64: +        if (const_args[2]) { +            tgen64_xori(s, args[0], args[2]); +        } else { +            tcg_out_insn(s, RRE, XGR, args[0], args[2]); +        } +        break; + +    case INDEX_op_neg_i64: +        tcg_out_insn(s, RRE, LCGR, args[0], args[1]); +        break; +    case INDEX_op_bswap64_i64: +        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); +        break; + +    case INDEX_op_mul_i64: +        if (const_args[2]) { +            if (args[2] == (int16_t)args[2]) { +                tcg_out_insn(s, RI, MGHI, args[0], args[2]); +            } else { +                tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); +            } +        } else { +            tcg_out_insn(s, RRE, MSGR, args[0], args[2]); +        } +        break; + +    case INDEX_op_div2_i64: +        /* ??? We get an unnecessary sign-extension of the dividend +           into R3 with this definition, but as we do in fact always +           produce both quotient and remainder using INDEX_op_div_i64 +           instead requires jumping through even more hoops.  */ +        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]); +        break; +    case INDEX_op_divu2_i64: +        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); +        break; +    case INDEX_op_mulu2_i64: +        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]); +        break; + +    case INDEX_op_shl_i64: +        op = RSY_SLLG; +    do_shift64: +        if (const_args[2]) { +            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); +        } else { +            tcg_out_sh64(s, op, args[0], args[1], args[2], 0); +        } +        break; +    case INDEX_op_shr_i64: +        op = RSY_SRLG; +        goto do_shift64; +    case INDEX_op_sar_i64: +        op = RSY_SRAG; +        goto do_shift64; + +    case INDEX_op_rotl_i64: +        if (const_args[2]) { +            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], +                         TCG_REG_NONE, args[2]); +        } else { +            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0); +        } +        break; +    case INDEX_op_rotr_i64: +        if (const_args[2]) { +            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], +                         TCG_REG_NONE, (64 - args[2]) & 63); +        } else { +            /* We can use the smaller 32-bit negate because only the +               low 6 bits are examined for the rotate.  */ +            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); +            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0); +        } +        break; + +    case INDEX_op_ext8s_i64: +        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]); +        break; +    case INDEX_op_ext16s_i64: +        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); +        break; +    case INDEX_op_ext32s_i64: +        tgen_ext32s(s, args[0], args[1]); +        break; +    case INDEX_op_ext8u_i64: +        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]); +        break; +    case INDEX_op_ext16u_i64: +        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); +        break; +    case INDEX_op_ext32u_i64: +        tgen_ext32u(s, args[0], args[1]); +        break; + +    case INDEX_op_add2_i64: +        if (const_args[4]) { +            if ((int64_t)args[4] >= 0) { +                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]); +            } else { +                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]); +            } +        } else { +            tcg_out_insn(s, RRE, ALGR, args[0], args[4]); +        } +        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); +        break; +    case INDEX_op_sub2_i64: +        if (const_args[4]) { +            if ((int64_t)args[4] >= 0) { +                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]); +            } else { +                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]); +            } +        } else { +            tcg_out_insn(s, RRE, SLGR, args[0], args[4]); +        } +        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); +        break; + +    case INDEX_op_brcond_i64: +        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], +                    args[1], const_args[1], arg_label(args[3])); +        break; +    case INDEX_op_setcond_i64: +        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], +                     args[2], const_args[2]); +        break; +    case INDEX_op_movcond_i64: +        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], +                     args[2], const_args[2], args[3]); +        break; + +    OP_32_64(deposit): +        tgen_deposit(s, args[0], args[2], args[3], args[4]); +        break; + +    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */ +    case INDEX_op_mov_i64: +    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */ +    case INDEX_op_movi_i64: +    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */ +    default: +        tcg_abort(); +    } +} + +static const TCGTargetOpDef s390_op_defs[] = { +    { INDEX_op_exit_tb, { } }, +    { INDEX_op_goto_tb, { } }, +    { INDEX_op_br, { } }, + +    { INDEX_op_ld8u_i32, { "r", "r" } }, +    { INDEX_op_ld8s_i32, { "r", "r" } }, +    { INDEX_op_ld16u_i32, { "r", "r" } }, +    { INDEX_op_ld16s_i32, { "r", "r" } }, +    { INDEX_op_ld_i32, { "r", "r" } }, +    { INDEX_op_st8_i32, { "r", "r" } }, +    { INDEX_op_st16_i32, { "r", "r" } }, +    { INDEX_op_st_i32, { "r", "r" } }, + +    { INDEX_op_add_i32, { "r", "r", "ri" } }, +    { INDEX_op_sub_i32, { "r", "0", "ri" } }, +    { INDEX_op_mul_i32, { "r", "0", "rK" } }, + +    { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } }, +    { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, + +    { INDEX_op_and_i32, { "r", "0", "ri" } }, +    { INDEX_op_or_i32, { "r", "0", "rO" } }, +    { INDEX_op_xor_i32, { "r", "0", "rX" } }, + +    { INDEX_op_neg_i32, { "r", "r" } }, + +    { INDEX_op_shl_i32, { "r", "0", "Ri" } }, +    { INDEX_op_shr_i32, { "r", "0", "Ri" } }, +    { INDEX_op_sar_i32, { "r", "0", "Ri" } }, + +    { INDEX_op_rotl_i32, { "r", "r", "Ri" } }, +    { INDEX_op_rotr_i32, { "r", "r", "Ri" } }, + +    { INDEX_op_ext8s_i32, { "r", "r" } }, +    { INDEX_op_ext8u_i32, { "r", "r" } }, +    { INDEX_op_ext16s_i32, { "r", "r" } }, +    { INDEX_op_ext16u_i32, { "r", "r" } }, + +    { INDEX_op_bswap16_i32, { "r", "r" } }, +    { INDEX_op_bswap32_i32, { "r", "r" } }, + +    { INDEX_op_add2_i32, { "r", "r", "0", "1", "rA", "r" } }, +    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "rA", "r" } }, + +    { INDEX_op_brcond_i32, { "r", "rC" } }, +    { INDEX_op_setcond_i32, { "r", "r", "rC" } }, +    { INDEX_op_movcond_i32, { "r", "r", "rC", "r", "0" } }, +    { INDEX_op_deposit_i32, { "r", "0", "r" } }, + +    { INDEX_op_qemu_ld_i32, { "r", "L" } }, +    { INDEX_op_qemu_ld_i64, { "r", "L" } }, +    { INDEX_op_qemu_st_i32, { "L", "L" } }, +    { INDEX_op_qemu_st_i64, { "L", "L" } }, + +    { INDEX_op_ld8u_i64, { "r", "r" } }, +    { INDEX_op_ld8s_i64, { "r", "r" } }, +    { INDEX_op_ld16u_i64, { "r", "r" } }, +    { INDEX_op_ld16s_i64, { "r", "r" } }, +    { INDEX_op_ld32u_i64, { "r", "r" } }, +    { INDEX_op_ld32s_i64, { "r", "r" } }, +    { INDEX_op_ld_i64, { "r", "r" } }, + +    { INDEX_op_st8_i64, { "r", "r" } }, +    { INDEX_op_st16_i64, { "r", "r" } }, +    { INDEX_op_st32_i64, { "r", "r" } }, +    { INDEX_op_st_i64, { "r", "r" } }, + +    { INDEX_op_add_i64, { "r", "r", "ri" } }, +    { INDEX_op_sub_i64, { "r", "0", "ri" } }, +    { INDEX_op_mul_i64, { "r", "0", "rK" } }, + +    { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } }, +    { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, +    { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } }, + +    { INDEX_op_and_i64, { "r", "0", "ri" } }, +    { INDEX_op_or_i64, { "r", "0", "rO" } }, +    { INDEX_op_xor_i64, { "r", "0", "rX" } }, + +    { INDEX_op_neg_i64, { "r", "r" } }, + +    { INDEX_op_shl_i64, { "r", "r", "Ri" } }, +    { INDEX_op_shr_i64, { "r", "r", "Ri" } }, +    { INDEX_op_sar_i64, { "r", "r", "Ri" } }, + +    { INDEX_op_rotl_i64, { "r", "r", "Ri" } }, +    { INDEX_op_rotr_i64, { "r", "r", "Ri" } }, + +    { INDEX_op_ext8s_i64, { "r", "r" } }, +    { INDEX_op_ext8u_i64, { "r", "r" } }, +    { INDEX_op_ext16s_i64, { "r", "r" } }, +    { INDEX_op_ext16u_i64, { "r", "r" } }, +    { INDEX_op_ext32s_i64, { "r", "r" } }, +    { INDEX_op_ext32u_i64, { "r", "r" } }, + +    { INDEX_op_bswap16_i64, { "r", "r" } }, +    { INDEX_op_bswap32_i64, { "r", "r" } }, +    { INDEX_op_bswap64_i64, { "r", "r" } }, + +    { INDEX_op_add2_i64, { "r", "r", "0", "1", "rA", "r" } }, +    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "rA", "r" } }, + +    { INDEX_op_brcond_i64, { "r", "rC" } }, +    { INDEX_op_setcond_i64, { "r", "r", "rC" } }, +    { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } }, +    { INDEX_op_deposit_i64, { "r", "0", "r" } }, + +    { -1 }, +}; + +static void query_facilities(void) +{ +    unsigned long hwcap = qemu_getauxval(AT_HWCAP); + +    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this +       is present on all 64-bit systems, but let's check for it anyway.  */ +    if (hwcap & HWCAP_S390_STFLE) { +        register int r0 __asm__("0"); +        register void *r1 __asm__("1"); + +        /* stfle 0(%r1) */ +        r1 = &facilities; +        asm volatile(".word 0xb2b0,0x1000" +                     : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc"); +    } +} + +static void tcg_target_init(TCGContext *s) +{ +    query_facilities(); + +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); + +    tcg_regset_clear(tcg_target_call_clobber_regs); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4); +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5); +    /* The r6 register is technically call-saved, but it's also a parameter +       register, so it can get killed by setup for the qemu_st helper.  */ +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6); +    /* The return register can be considered call-clobbered.  */ +    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14); + +    tcg_regset_clear(s->reserved_regs); +    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); +    /* XXX many insns can't be used with R0, so we better avoid it for now */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + +    tcg_add_target_add_op_defs(s390_op_defs); +} + +#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \ +                           + TCG_STATIC_CALL_ARGS_SIZE           \ +                           + CPU_TEMP_BUF_NLONGS * sizeof(long))) + +static void tcg_target_qemu_prologue(TCGContext *s) +{ +    /* stmg %r6,%r15,48(%r15) (save registers) */ +    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48); + +    /* aghi %r15,-frame_size */ +    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE); + +    tcg_set_frame(s, TCG_REG_CALL_STACK, +                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, +                  CPU_TEMP_BUF_NLONGS * sizeof(long)); + +    if (GUEST_BASE >= 0x80000) { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); +        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); +    } + +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); +    /* br %r3 (go to TB) */ +    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]); + +    tb_ret_addr = s->code_ptr; + +    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */ +    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, +                 FRAME_SIZE + 48); + +    /* br %r14 (return) */ +    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); +} + +typedef struct { +    DebugFrameHeader h; +    uint8_t fde_def_cfa[4]; +    uint8_t fde_reg_ofs[18]; +} DebugFrame; + +/* We're expecting a 2 byte uleb128 encoded value.  */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +#define ELF_HOST_MACHINE  EM_S390 + +static const DebugFrame debug_frame = { +    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ +    .h.cie.id = -1, +    .h.cie.version = 1, +    .h.cie.code_align = 1, +    .h.cie.data_align = 8,                /* sleb128 8 */ +    .h.cie.return_column = TCG_REG_R14, + +    /* Total FDE size does not include the "len" member.  */ +    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + +    .fde_def_cfa = { +        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */ +        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */ +        (FRAME_SIZE >> 7) +    }, +    .fde_reg_ofs = { +        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */ +        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */ +        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */ +        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */ +        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */ +        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */ +        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */ +        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */ +        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */ +    } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ +    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h new file mode 100644 index 00000000..91576d59 --- /dev/null +++ b/tcg/s390/tcg-target.h @@ -0,0 +1,123 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009 Ulrich Hecht <uli@suse.de> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef TCG_TARGET_S390  +#define TCG_TARGET_S390 1 + +#define TCG_TARGET_INSN_UNIT_SIZE 2 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19 + +typedef enum TCGReg { +    TCG_REG_R0 = 0, +    TCG_REG_R1, +    TCG_REG_R2, +    TCG_REG_R3, +    TCG_REG_R4, +    TCG_REG_R5, +    TCG_REG_R6, +    TCG_REG_R7, +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10, +    TCG_REG_R11, +    TCG_REG_R12, +    TCG_REG_R13, +    TCG_REG_R14, +    TCG_REG_R15 +} TCGReg; + +#define TCG_TARGET_NB_REGS 16 + +/* optional instructions */ +#define TCG_TARGET_HAS_div2_i32         1 +#define TCG_TARGET_HAS_rot_i32          1 +#define TCG_TARGET_HAS_ext8s_i32        1 +#define TCG_TARGET_HAS_ext16s_i32       1 +#define TCG_TARGET_HAS_ext8u_i32        1 +#define TCG_TARGET_HAS_ext16u_i32       1 +#define TCG_TARGET_HAS_bswap16_i32      1 +#define TCG_TARGET_HAS_bswap32_i32      1 +#define TCG_TARGET_HAS_not_i32          0 +#define TCG_TARGET_HAS_neg_i32          1 +#define TCG_TARGET_HAS_andc_i32         0 +#define TCG_TARGET_HAS_orc_i32          0 +#define TCG_TARGET_HAS_eqv_i32          0 +#define TCG_TARGET_HAS_nand_i32         0 +#define TCG_TARGET_HAS_nor_i32          0 +#define TCG_TARGET_HAS_deposit_i32      1 +#define TCG_TARGET_HAS_movcond_i32      1 +#define TCG_TARGET_HAS_add2_i32         1 +#define TCG_TARGET_HAS_sub2_i32         1 +#define TCG_TARGET_HAS_mulu2_i32        0 +#define TCG_TARGET_HAS_muls2_i32        0 +#define TCG_TARGET_HAS_muluh_i32        0 +#define TCG_TARGET_HAS_mulsh_i32        0 +#define TCG_TARGET_HAS_trunc_shr_i32    0 + +#define TCG_TARGET_HAS_div2_i64         1 +#define TCG_TARGET_HAS_rot_i64          1 +#define TCG_TARGET_HAS_ext8s_i64        1 +#define TCG_TARGET_HAS_ext16s_i64       1 +#define TCG_TARGET_HAS_ext32s_i64       1 +#define TCG_TARGET_HAS_ext8u_i64        1 +#define TCG_TARGET_HAS_ext16u_i64       1 +#define TCG_TARGET_HAS_ext32u_i64       1 +#define TCG_TARGET_HAS_bswap16_i64      1 +#define TCG_TARGET_HAS_bswap32_i64      1 +#define TCG_TARGET_HAS_bswap64_i64      1 +#define TCG_TARGET_HAS_not_i64          0 +#define TCG_TARGET_HAS_neg_i64          1 +#define TCG_TARGET_HAS_andc_i64         0 +#define TCG_TARGET_HAS_orc_i64          0 +#define TCG_TARGET_HAS_eqv_i64          0 +#define TCG_TARGET_HAS_nand_i64         0 +#define TCG_TARGET_HAS_nor_i64          0 +#define TCG_TARGET_HAS_deposit_i64      1 +#define TCG_TARGET_HAS_movcond_i64      1 +#define TCG_TARGET_HAS_add2_i64         1 +#define TCG_TARGET_HAS_sub2_i64         1 +#define TCG_TARGET_HAS_mulu2_i64        1 +#define TCG_TARGET_HAS_muls2_i64        0 +#define TCG_TARGET_HAS_muluh_i64        0 +#define TCG_TARGET_HAS_mulsh_i64        0 + +extern bool tcg_target_deposit_valid(int ofs, int len); +#define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid +#define TCG_TARGET_deposit_i64_valid  tcg_target_deposit_valid + +/* used for function call generation */ +#define TCG_REG_CALL_STACK		TCG_REG_R15 +#define TCG_TARGET_STACK_ALIGN		8 +#define TCG_TARGET_CALL_STACK_OFFSET	160 + +#define TCG_TARGET_EXTEND_ARGS 1 + +enum { +    TCG_AREG0 = TCG_REG_R10, +}; + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +} + +#endif diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c new file mode 100644 index 00000000..1a870a81 --- /dev/null +++ b/tcg/sparc/tcg-target.c @@ -0,0 +1,1650 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg-be-null.h" + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +    "%g0", +    "%g1", +    "%g2", +    "%g3", +    "%g4", +    "%g5", +    "%g6", +    "%g7", +    "%o0", +    "%o1", +    "%o2", +    "%o3", +    "%o4", +    "%o5", +    "%o6", +    "%o7", +    "%l0", +    "%l1", +    "%l2", +    "%l3", +    "%l4", +    "%l5", +    "%l6", +    "%l7", +    "%i0", +    "%i1", +    "%i2", +    "%i3", +    "%i4", +    "%i5", +    "%i6", +    "%i7", +}; +#endif + +#ifdef __arch64__ +# define SPARC64 1 +#else +# define SPARC64 0 +#endif + +/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o +   registers.  These are saved manually by the kernel in full 64-bit +   slots.  The %i and %l registers are saved by the register window +   mechanism, which only allocates space for 32 bits.  Given that this +   window spill/fill can happen on any signal, we must consider the +   high bits of the %i and %l registers garbage at all times.  */ +#if SPARC64 +# define ALL_64  0xffffffffu +#else +# define ALL_64  0xffffu +#endif + +/* Define some temporary registers.  T2 is used for constant generation.  */ +#define TCG_REG_T1  TCG_REG_G1 +#define TCG_REG_T2  TCG_REG_O7 + +#ifdef CONFIG_USE_GUEST_BASE +# define TCG_GUEST_BASE_REG TCG_REG_I5 +#else +# define TCG_GUEST_BASE_REG TCG_REG_G0 +#endif + +static const int tcg_target_reg_alloc_order[] = { +    TCG_REG_L0, +    TCG_REG_L1, +    TCG_REG_L2, +    TCG_REG_L3, +    TCG_REG_L4, +    TCG_REG_L5, +    TCG_REG_L6, +    TCG_REG_L7, + +    TCG_REG_I0, +    TCG_REG_I1, +    TCG_REG_I2, +    TCG_REG_I3, +    TCG_REG_I4, +    TCG_REG_I5, + +    TCG_REG_G2, +    TCG_REG_G3, +    TCG_REG_G4, +    TCG_REG_G5, + +    TCG_REG_O0, +    TCG_REG_O1, +    TCG_REG_O2, +    TCG_REG_O3, +    TCG_REG_O4, +    TCG_REG_O5, +}; + +static const int tcg_target_call_iarg_regs[6] = { +    TCG_REG_O0, +    TCG_REG_O1, +    TCG_REG_O2, +    TCG_REG_O3, +    TCG_REG_O4, +    TCG_REG_O5, +}; + +static const int tcg_target_call_oarg_regs[] = { +    TCG_REG_O0, +    TCG_REG_O1, +    TCG_REG_O2, +    TCG_REG_O3, +}; + +#define INSN_OP(x)  ((x) << 30) +#define INSN_OP2(x) ((x) << 22) +#define INSN_OP3(x) ((x) << 19) +#define INSN_OPF(x) ((x) << 5) +#define INSN_RD(x)  ((x) << 25) +#define INSN_RS1(x) ((x) << 14) +#define INSN_RS2(x) (x) +#define INSN_ASI(x) ((x) << 5) + +#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff)) +#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff)) +#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff)) +#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20)) +#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff) +#define INSN_COND(x) ((x) << 25) + +#define COND_N     0x0 +#define COND_E     0x1 +#define COND_LE    0x2 +#define COND_L     0x3 +#define COND_LEU   0x4 +#define COND_CS    0x5 +#define COND_NEG   0x6 +#define COND_VS    0x7 +#define COND_A     0x8 +#define COND_NE    0x9 +#define COND_G     0xa +#define COND_GE    0xb +#define COND_GU    0xc +#define COND_CC    0xd +#define COND_POS   0xe +#define COND_VC    0xf +#define BA         (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2)) + +#define RCOND_Z    1 +#define RCOND_LEZ  2 +#define RCOND_LZ   3 +#define RCOND_NZ   5 +#define RCOND_GZ   6 +#define RCOND_GEZ  7 + +#define MOVCC_ICC  (1 << 18) +#define MOVCC_XCC  (1 << 18 | 1 << 12) + +#define BPCC_ICC   0 +#define BPCC_XCC   (2 << 20) +#define BPCC_PT    (1 << 19) +#define BPCC_PN    0 +#define BPCC_A     (1 << 29) + +#define BPR_PT     BPCC_PT + +#define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00)) +#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10)) +#define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01)) +#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05)) +#define ARITH_OR   (INSN_OP(2) | INSN_OP3(0x02)) +#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12)) +#define ARITH_ORN  (INSN_OP(2) | INSN_OP3(0x06)) +#define ARITH_XOR  (INSN_OP(2) | INSN_OP3(0x03)) +#define ARITH_SUB  (INSN_OP(2) | INSN_OP3(0x04)) +#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14)) +#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08)) +#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c)) +#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) +#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b)) +#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) +#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f)) +#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09)) +#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d)) +#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d)) +#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c)) +#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f)) + +#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11)) +#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16)) + +#define SHIFT_SLL  (INSN_OP(2) | INSN_OP3(0x25)) +#define SHIFT_SRL  (INSN_OP(2) | INSN_OP3(0x26)) +#define SHIFT_SRA  (INSN_OP(2) | INSN_OP3(0x27)) + +#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12)) +#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12)) +#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12)) + +#define RDY        (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0)) +#define WRY        (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0)) +#define JMPL       (INSN_OP(2) | INSN_OP3(0x38)) +#define RETURN     (INSN_OP(2) | INSN_OP3(0x39)) +#define SAVE       (INSN_OP(2) | INSN_OP3(0x3c)) +#define RESTORE    (INSN_OP(2) | INSN_OP3(0x3d)) +#define SETHI      (INSN_OP(0) | INSN_OP2(0x4)) +#define CALL       INSN_OP(1) +#define LDUB       (INSN_OP(3) | INSN_OP3(0x01)) +#define LDSB       (INSN_OP(3) | INSN_OP3(0x09)) +#define LDUH       (INSN_OP(3) | INSN_OP3(0x02)) +#define LDSH       (INSN_OP(3) | INSN_OP3(0x0a)) +#define LDUW       (INSN_OP(3) | INSN_OP3(0x00)) +#define LDSW       (INSN_OP(3) | INSN_OP3(0x08)) +#define LDX        (INSN_OP(3) | INSN_OP3(0x0b)) +#define STB        (INSN_OP(3) | INSN_OP3(0x05)) +#define STH        (INSN_OP(3) | INSN_OP3(0x06)) +#define STW        (INSN_OP(3) | INSN_OP3(0x04)) +#define STX        (INSN_OP(3) | INSN_OP3(0x0e)) +#define LDUBA      (INSN_OP(3) | INSN_OP3(0x11)) +#define LDSBA      (INSN_OP(3) | INSN_OP3(0x19)) +#define LDUHA      (INSN_OP(3) | INSN_OP3(0x12)) +#define LDSHA      (INSN_OP(3) | INSN_OP3(0x1a)) +#define LDUWA      (INSN_OP(3) | INSN_OP3(0x10)) +#define LDSWA      (INSN_OP(3) | INSN_OP3(0x18)) +#define LDXA       (INSN_OP(3) | INSN_OP3(0x1b)) +#define STBA       (INSN_OP(3) | INSN_OP3(0x15)) +#define STHA       (INSN_OP(3) | INSN_OP3(0x16)) +#define STWA       (INSN_OP(3) | INSN_OP3(0x14)) +#define STXA       (INSN_OP(3) | INSN_OP3(0x1e)) + +#ifndef ASI_PRIMARY_LITTLE +#define ASI_PRIMARY_LITTLE 0x88 +#endif + +#define LDUH_LE    (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSH_LE    (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDUW_LE    (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDSW_LE    (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define LDX_LE     (LDXA  | INSN_ASI(ASI_PRIMARY_LITTLE)) + +#define STH_LE     (STHA  | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE)) +#define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE)) + +#ifndef use_vis3_instructions +bool use_vis3_instructions; +#endif + +static inline int check_fit_i64(int64_t val, unsigned int bits) +{ +    return val == sextract64(val, 0, bits); +} + +static inline int check_fit_i32(int32_t val, unsigned int bits) +{ +    return val == sextract32(val, 0, bits); +} + +#define check_fit_tl    check_fit_i64 +#if SPARC64 +# define check_fit_ptr  check_fit_i64 +#else +# define check_fit_ptr  check_fit_i32 +#endif + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, +                        intptr_t value, intptr_t addend) +{ +    uint32_t insn; + +    assert(addend == 0); +    value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr); + +    switch (type) { +    case R_SPARC_WDISP16: +        if (!check_fit_ptr(value >> 2, 16)) { +            tcg_abort(); +        } +        insn = *code_ptr; +        insn &= ~INSN_OFF16(-1); +        insn |= INSN_OFF16(value); +        *code_ptr = insn; +        break; +    case R_SPARC_WDISP19: +        if (!check_fit_ptr(value >> 2, 19)) { +            tcg_abort(); +        } +        insn = *code_ptr; +        insn &= ~INSN_OFF19(-1); +        insn |= INSN_OFF19(value); +        *code_ptr = insn; +        break; +    default: +        tcg_abort(); +    } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ +    const char *ct_str; + +    ct_str = *pct_str; +    switch (ct_str[0]) { +    case 'r': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, 0xffffffff); +        break; +    case 'R': +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, ALL_64); +        break; +    case 'A': /* qemu_ld/st address constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, +                         TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff); +    reserve_helpers: +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1); +        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2); +        break; +    case 's': /* qemu_st data 32-bit constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, 0xffffffff); +        goto reserve_helpers; +    case 'S': /* qemu_st data 64-bit constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, ALL_64); +        goto reserve_helpers; +    case 'I': +        ct->ct |= TCG_CT_CONST_S11; +        break; +    case 'J': +        ct->ct |= TCG_CT_CONST_S13; +        break; +    case 'Z': +        ct->ct |= TCG_CT_CONST_ZERO; +        break; +    default: +        return -1; +    } +    ct_str++; +    *pct_str = ct_str; +    return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, TCGType type, +                                         const TCGArgConstraint *arg_ct) +{ +    int ct = arg_ct->ct; + +    if (ct & TCG_CT_CONST) { +        return 1; +    } + +    if (type == TCG_TYPE_I32) { +        val = (int32_t)val; +    } + +    if ((ct & TCG_CT_CONST_ZERO) && val == 0) { +        return 1; +    } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) { +        return 1; +    } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) { +        return 1; +    } else { +        return 0; +    } +} + +static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1, +                                 TCGReg rs2, int op) +{ +    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2)); +} + +static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1, +                                  int32_t offset, int op) +{ +    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset)); +} + +static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, +			   int32_t val2, int val2const, int op) +{ +    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) +              | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, +                               TCGReg ret, TCGReg arg) +{ +    if (ret != arg) { +        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); +    } +} + +static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg) +{ +    tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10)); +} + +static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg) +{ +    tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR); +} + +static void tcg_out_movi(TCGContext *s, TCGType type, +                         TCGReg ret, tcg_target_long arg) +{ +    tcg_target_long hi, lo = (int32_t)arg; + +    /* Make sure we test 32-bit constants for imm13 properly.  */ +    if (type == TCG_TYPE_I32) { +        arg = lo; +    } + +    /* A 13-bit constant sign-extended to 64-bits.  */ +    if (check_fit_tl(arg, 13)) { +        tcg_out_movi_imm13(s, ret, arg); +        return; +    } + +    /* A 32-bit constant, or 32-bit zero-extended to 64-bits.  */ +    if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) { +        tcg_out_sethi(s, ret, arg); +        if (arg & 0x3ff) { +            tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR); +        } +        return; +    } + +    /* A 32-bit constant sign-extended to 64-bits.  */ +    if (arg == lo) { +        tcg_out_sethi(s, ret, ~arg); +        tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR); +        return; +    } + +    /* A 64-bit constant decomposed into 2 32-bit pieces.  */ +    if (check_fit_i32(lo, 13)) { +        hi = (arg - lo) >> 32; +        tcg_out_movi(s, TCG_TYPE_I32, ret, hi); +        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); +        tcg_out_arithi(s, ret, ret, lo, ARITH_ADD); +    } else { +        hi = arg >> 32; +        tcg_out_movi(s, TCG_TYPE_I32, ret, hi); +        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo); +        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX); +        tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR); +    } +} + +static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, +                                   TCGReg a2, int op) +{ +    tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2)); +} + +static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, +                         intptr_t offset, int op) +{ +    if (check_fit_ptr(offset, 13)) { +        tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) | +                  INSN_IMM13(offset)); +    } else { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset); +        tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op); +    } +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, +                              TCGReg arg1, intptr_t arg2) +{ +    tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX)); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, +                              TCGReg arg1, intptr_t arg2) +{ +    tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX)); +} + +static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg) +{ +    tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff); +    tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff); +} + +static inline void tcg_out_sety(TCGContext *s, TCGReg rs) +{ +    tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); +} + +static inline void tcg_out_rdy(TCGContext *s, TCGReg rd) +{ +    tcg_out32(s, RDY | INSN_RD(rd)); +} + +static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, +                          int32_t val2, int val2const, int uns) +{ +    /* Load Y with the sign/zero extension of RS1 to 64-bits.  */ +    if (uns) { +        tcg_out_sety(s, TCG_REG_G0); +    } else { +        tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA); +        tcg_out_sety(s, TCG_REG_T1); +    } + +    tcg_out_arithc(s, rd, rs1, val2, val2const, +                   uns ? ARITH_UDIV : ARITH_SDIV); +} + +static inline void tcg_out_nop(TCGContext *s) +{ +    tcg_out_sethi(s, TCG_REG_G0, 0); +} + +static const uint8_t tcg_cond_to_bcond[] = { +    [TCG_COND_EQ] = COND_E, +    [TCG_COND_NE] = COND_NE, +    [TCG_COND_LT] = COND_L, +    [TCG_COND_GE] = COND_GE, +    [TCG_COND_LE] = COND_LE, +    [TCG_COND_GT] = COND_G, +    [TCG_COND_LTU] = COND_CS, +    [TCG_COND_GEU] = COND_CC, +    [TCG_COND_LEU] = COND_LEU, +    [TCG_COND_GTU] = COND_GU, +}; + +static const uint8_t tcg_cond_to_rcond[] = { +    [TCG_COND_EQ] = RCOND_Z, +    [TCG_COND_NE] = RCOND_NZ, +    [TCG_COND_LT] = RCOND_LZ, +    [TCG_COND_GT] = RCOND_GZ, +    [TCG_COND_LE] = RCOND_LEZ, +    [TCG_COND_GE] = RCOND_GEZ +}; + +static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19) +{ +    tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19); +} + +static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) +{ +    int off19; + +    if (l->has_value) { +        off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr)); +    } else { +        /* Make sure to preserve destinations during retranslation.  */ +        off19 = *s->code_ptr & INSN_OFF19(-1); +        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0); +    } +    tcg_out_bpcc0(s, scond, flags, off19); +} + +static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const) +{ +    tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC); +} + +static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, +                               int32_t arg2, int const_arg2, TCGLabel *l) +{ +    tcg_out_cmp(s, arg1, arg2, const_arg2); +    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l); +    tcg_out_nop(s); +} + +static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret, +                          int32_t v1, int v1const) +{ +    tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) +              | INSN_RS1(tcg_cond_to_bcond[cond]) +              | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); +} + +static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, +                                TCGReg c1, int32_t c2, int c2const, +                                int32_t v1, int v1const) +{ +    tcg_out_cmp(s, c1, c2, c2const); +    tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); +} + +static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, +                               int32_t arg2, int const_arg2, TCGLabel *l) +{ +    /* For 64-bit signed comparisons vs zero, we can avoid the compare.  */ +    if (arg2 == 0 && !is_unsigned_cond(cond)) { +        int off16; + +        if (l->has_value) { +            off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr)); +        } else { +            /* Make sure to preserve destinations during retranslation.  */ +            off16 = *s->code_ptr & INSN_OFF16(-1); +            tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0); +        } +        tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) +                  | INSN_COND(tcg_cond_to_rcond[cond]) | off16); +    } else { +        tcg_out_cmp(s, arg1, arg2, const_arg2); +        tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l); +    } +    tcg_out_nop(s); +} + +static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1, +                         int32_t v1, int v1const) +{ +    tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1) +              | (tcg_cond_to_rcond[cond] << 10) +              | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1))); +} + +static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, +                                TCGReg c1, int32_t c2, int c2const, +                                int32_t v1, int v1const) +{ +    /* For 64-bit signed comparisons vs zero, we can avoid the compare. +       Note that the immediate range is one bit smaller, so we must check +       for that as well.  */ +    if (c2 == 0 && !is_unsigned_cond(cond) +        && (!v1const || check_fit_i32(v1, 10))) { +        tcg_out_movr(s, cond, ret, c1, v1, v1const); +    } else { +        tcg_out_cmp(s, c1, c2, c2const); +        tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); +    } +} + +static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, +                                TCGReg c1, int32_t c2, int c2const) +{ +    /* For 32-bit comparisons, we can play games with ADDC/SUBC.  */ +    switch (cond) { +    case TCG_COND_LTU: +    case TCG_COND_GEU: +        /* The result of the comparison is in the carry bit.  */ +        break; + +    case TCG_COND_EQ: +    case TCG_COND_NE: +        /* For equality, we can transform to inequality vs zero.  */ +        if (c2 != 0) { +            tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR); +            c2 = TCG_REG_T1; +        } else { +            c2 = c1; +        } +        c1 = TCG_REG_G0, c2const = 0; +        cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU); +	break; + +    case TCG_COND_GTU: +    case TCG_COND_LEU: +        /* If we don't need to load a constant into a register, we can +           swap the operands on GTU/LEU.  There's no benefit to loading +           the constant into a temporary register.  */ +        if (!c2const || c2 == 0) { +            TCGReg t = c1; +            c1 = c2; +            c2 = t; +            c2const = 0; +            cond = tcg_swap_cond(cond); +            break; +        } +        /* FALLTHRU */ + +    default: +        tcg_out_cmp(s, c1, c2, c2const); +        tcg_out_movi_imm13(s, ret, 0); +        tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1); +        return; +    } + +    tcg_out_cmp(s, c1, c2, c2const); +    if (cond == TCG_COND_LTU) { +        tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC); +    } else { +        tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC); +    } +} + +static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, +                                TCGReg c1, int32_t c2, int c2const) +{ +    if (use_vis3_instructions) { +        switch (cond) { +        case TCG_COND_NE: +            if (c2 != 0) { +                break; +            } +            c2 = c1, c2const = 0, c1 = TCG_REG_G0; +            /* FALLTHRU */ +        case TCG_COND_LTU: +            tcg_out_cmp(s, c1, c2, c2const); +            tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC); +            return; +        default: +            break; +        } +    } + +    /* For 64-bit signed comparisons vs zero, we can avoid the compare +       if the input does not overlap the output.  */ +    if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) { +        tcg_out_movi_imm13(s, ret, 0); +        tcg_out_movr(s, cond, ret, c1, 1, 1); +    } else { +        tcg_out_cmp(s, c1, c2, c2const); +        tcg_out_movi_imm13(s, ret, 0); +        tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1); +    } +} + +static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh, +                                TCGReg al, TCGReg ah, int32_t bl, int blconst, +                                int32_t bh, int bhconst, int opl, int oph) +{ +    TCGReg tmp = TCG_REG_T1; + +    /* Note that the low parts are fully consumed before tmp is set.  */ +    if (rl != ah && (bhconst || rl != bh)) { +        tmp = rl; +    } + +    tcg_out_arithc(s, tmp, al, bl, blconst, opl); +    tcg_out_arithc(s, rh, ah, bh, bhconst, oph); +    tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); +} + +static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, +                                TCGReg al, TCGReg ah, int32_t bl, int blconst, +                                int32_t bh, int bhconst, bool is_sub) +{ +    TCGReg tmp = TCG_REG_T1; + +    /* Note that the low parts are fully consumed before tmp is set.  */ +    if (rl != ah && (bhconst || rl != bh)) { +        tmp = rl; +    } + +    tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC); + +    if (use_vis3_instructions && !is_sub) { +        /* Note that ADDXC doesn't accept immediates.  */ +        if (bhconst && bh != 0) { +           tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh); +           bh = TCG_REG_T2; +        } +        tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); +    } else if (bh == TCG_REG_G0) { +	/* If we have a zero, we can perform the operation in two insns, +           with the arithmetic first, and a conditional move into place.  */ +	if (rh == ah) { +            tcg_out_arithi(s, TCG_REG_T2, ah, 1, +			   is_sub ? ARITH_SUB : ARITH_ADD); +            tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0); +	} else { +            tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD); +	    tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); +	} +    } else { +        /* Otherwise adjust BH as if there is carry into T2 ... */ +        if (bhconst) { +            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1)); +        } else { +            tcg_out_arithi(s, TCG_REG_T2, bh, 1, +                           is_sub ? ARITH_SUB : ARITH_ADD); +        } +        /* ... smoosh T2 back to original BH if carry is clear ... */ +        tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst); +	/* ... and finally perform the arithmetic with the new operand.  */ +        tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD); +    } + +    tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); +} + +static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest) +{ +    ptrdiff_t disp = tcg_pcrel_diff(s, dest); + +    if (disp == (int32_t)disp) { +        tcg_out32(s, CALL | (uint32_t)disp >> 2); +    } else { +        uintptr_t desti = (uintptr_t)dest; +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff); +        tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL); +    } +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest) +{ +    tcg_out_call_nodelay(s, dest); +    tcg_out_nop(s); +} + +#ifdef CONFIG_SOFTMMU +static tcg_insn_unit *qemu_ld_trampoline[16]; +static tcg_insn_unit *qemu_st_trampoline[16]; + +static void build_trampolines(TCGContext *s) +{ +    static void * const qemu_ld_helpers[16] = { +        [MO_UB]   = helper_ret_ldub_mmu, +        [MO_SB]   = helper_ret_ldsb_mmu, +        [MO_LEUW] = helper_le_lduw_mmu, +        [MO_LESW] = helper_le_ldsw_mmu, +        [MO_LEUL] = helper_le_ldul_mmu, +        [MO_LEQ]  = helper_le_ldq_mmu, +        [MO_BEUW] = helper_be_lduw_mmu, +        [MO_BESW] = helper_be_ldsw_mmu, +        [MO_BEUL] = helper_be_ldul_mmu, +        [MO_BEQ]  = helper_be_ldq_mmu, +    }; +    static void * const qemu_st_helpers[16] = { +        [MO_UB]   = helper_ret_stb_mmu, +        [MO_LEUW] = helper_le_stw_mmu, +        [MO_LEUL] = helper_le_stl_mmu, +        [MO_LEQ]  = helper_le_stq_mmu, +        [MO_BEUW] = helper_be_stw_mmu, +        [MO_BEUL] = helper_be_stl_mmu, +        [MO_BEQ]  = helper_be_stq_mmu, +    }; + +    int i; +    TCGReg ra; + +    for (i = 0; i < 16; ++i) { +        if (qemu_ld_helpers[i] == NULL) { +            continue; +        } + +        /* May as well align the trampoline.  */ +        while ((uintptr_t)s->code_ptr & 15) { +            tcg_out_nop(s); +        } +        qemu_ld_trampoline[i] = s->code_ptr; + +        if (SPARC64 || TARGET_LONG_BITS == 32) { +            ra = TCG_REG_O3; +        } else { +            /* Install the high part of the address.  */ +            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); +            ra = TCG_REG_O4; +        } + +        /* Set the retaddr operand.  */ +        tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); +        /* Set the env operand.  */ +        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); +        /* Tail call.  */ +        tcg_out_call_nodelay(s, qemu_ld_helpers[i]); +        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); +    } + +    for (i = 0; i < 16; ++i) { +        if (qemu_st_helpers[i] == NULL) { +            continue; +        } + +        /* May as well align the trampoline.  */ +        while ((uintptr_t)s->code_ptr & 15) { +            tcg_out_nop(s); +        } +        qemu_st_trampoline[i] = s->code_ptr; + +        if (SPARC64) { +            ra = TCG_REG_O4; +        } else { +            ra = TCG_REG_O1; +            if (TARGET_LONG_BITS == 64) { +                /* Install the high part of the address.  */ +                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); +                ra += 2; +            } else { +                ra += 1; +            } +            if ((i & MO_SIZE) == MO_64) { +                /* Install the high part of the data.  */ +                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); +                ra += 2; +            } else { +                ra += 1; +            } +            /* Skip the oi argument.  */ +            ra += 1; +        } +                 +        /* Set the retaddr operand.  */ +        if (ra >= TCG_REG_O6) { +            tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, +                       TCG_TARGET_CALL_STACK_OFFSET); +            ra = TCG_REG_G1; +        } +        tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); +        /* Set the env operand.  */ +        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0); +        /* Tail call.  */ +        tcg_out_call_nodelay(s, qemu_st_helpers[i]); +        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra); +    } +} +#endif + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ +    int tmp_buf_size, frame_size; + +    /* The TCG temp buffer is at the top of the frame, immediately +       below the frame pointer.  */ +    tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long); +    tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size, +                  tmp_buf_size); + +    /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is +       otherwise the minimal frame usable by callees.  */ +    frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS; +    frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size; +    frame_size += TCG_TARGET_STACK_ALIGN - 1; +    frame_size &= -TCG_TARGET_STACK_ALIGN; +    tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | +              INSN_IMM13(-frame_size)); + +#ifdef CONFIG_USE_GUEST_BASE +    if (GUEST_BASE != 0) { +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); +        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); +    } +#endif + +    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL); +    /* delay slot */ +    tcg_out_nop(s); + +    /* No epilogue required.  We issue ret + restore directly in the TB.  */ + +#ifdef CONFIG_SOFTMMU +    build_trampolines(s); +#endif +} + +#if defined(CONFIG_SOFTMMU) +/* Perform the TLB load and compare. + +   Inputs: +   ADDRLO and ADDRHI contain the possible two parts of the address. + +   MEM_INDEX and S_BITS are the memory context and log2 size of the load. + +   WHICH is the offset into the CPUTLBEntry structure of the slot to read. +   This should be offsetof addr_read or addr_write. + +   The result of the TLB comparison is in %[ix]cc.  The sanitized address +   is in the returned register, maybe %o0.  The TLB addend is in %o1.  */ + +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, +                               TCGMemOp s_bits, int which) +{ +    const TCGReg r0 = TCG_REG_O0; +    const TCGReg r1 = TCG_REG_O1; +    const TCGReg r2 = TCG_REG_O2; +    int tlb_ofs; + +    /* Shift the page number down.  */ +    tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL); + +    /* Mask out the page offset, except for the required alignment.  */ +    tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1, +                 TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + +    /* Mask the tlb index.  */ +    tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND); +     +    /* Mask page, part 2.  */ +    tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND); + +    /* Shift the tlb index into place.  */ +    tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL); + +    /* Relative to the current ENV.  */ +    tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD); + +    /* Find a base address that can load both tlb comparator and addend.  */ +    tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]); +    if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) { +        if (tlb_ofs & ~0x3ff) { +            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff); +            tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD); +        } +        tlb_ofs &= 0x3ff; +    } + +    /* Load the tlb comparator and the addend.  */ +    tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which); +    tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend)); + +    /* subcc arg0, arg2, %g0 */ +    tcg_out_cmp(s, r0, r2, 0); + +    /* If the guest address must be zero-extended, do so now.  */ +    if (SPARC64 && TARGET_LONG_BITS == 32) { +        tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); +        return r0; +    } +    return addr; +} +#endif /* CONFIG_SOFTMMU */ + +static const int qemu_ld_opc[16] = { +    [MO_UB]   = LDUB, +    [MO_SB]   = LDSB, + +    [MO_BEUW] = LDUH, +    [MO_BESW] = LDSH, +    [MO_BEUL] = LDUW, +    [MO_BESL] = LDSW, +    [MO_BEQ]  = LDX, + +    [MO_LEUW] = LDUH_LE, +    [MO_LESW] = LDSH_LE, +    [MO_LEUL] = LDUW_LE, +    [MO_LESL] = LDSW_LE, +    [MO_LEQ]  = LDX_LE, +}; + +static const int qemu_st_opc[16] = { +    [MO_UB]   = STB, + +    [MO_BEUW] = STH, +    [MO_BEUL] = STW, +    [MO_BEQ]  = STX, + +    [MO_LEUW] = STH_LE, +    [MO_LEUL] = STW_LE, +    [MO_LEQ]  = STX_LE, +}; + +static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, +                            TCGMemOpIdx oi, bool is_64) +{ +    TCGMemOp memop = get_memop(oi); +#ifdef CONFIG_SOFTMMU +    unsigned memi = get_mmuidx(oi); +    TCGReg addrz, param; +    tcg_insn_unit *func; +    tcg_insn_unit *label_ptr; + +    addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, +                             offsetof(CPUTLBEntry, addr_read)); + +    /* The fast path is exactly one insn.  Thus we can perform the +       entire TLB Hit in the (annulled) delay slot of the branch +       over the TLB Miss case.  */ + +    /* beq,a,pt %[xi]cc, label0 */ +    label_ptr = s->code_ptr; +    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT +                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); +    /* delay slot */ +    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, +                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); + +    /* TLB Miss.  */ + +    param = TCG_REG_O1; +    if (!SPARC64 && TARGET_LONG_BITS == 64) { +        /* Skip the high-part; we'll perform the extract in the trampoline.  */ +        param++; +    } +    tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + +    /* We use the helpers to extend SB and SW data, leaving the case +       of SL needing explicit extending below.  */ +    if ((memop & MO_SSIZE) == MO_SL) { +        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)]; +    } else { +        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)]; +    } +    assert(func != NULL); +    tcg_out_call_nodelay(s, func); +    /* delay slot */ +    tcg_out_movi(s, TCG_TYPE_I32, param, oi); + +    /* Recall that all of the helpers return 64-bit results. +       Which complicates things for sparcv8plus.  */ +    if (SPARC64) { +        /* We let the helper sign-extend SB and SW, but leave SL for here.  */ +        if (is_64 && (memop & MO_SSIZE) == MO_SL) { +            tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); +        } else { +            tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); +        } +    } else { +        if ((memop & MO_SIZE) == MO_64) { +            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); +            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); +            tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); +        } else if (is_64) { +            /* Re-extend from 32-bit rather than reassembling when we +               know the high register must be an extension.  */ +            tcg_out_arithi(s, data, TCG_REG_O1, 0, +                           memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); +        } else { +            tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); +        } +    } + +    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); +#else +    if (SPARC64 && TARGET_LONG_BITS == 32) { +        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); +        addr = TCG_REG_T1; +    } +    tcg_out_ldst_rr(s, data, addr, +                    (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), +                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, +                            TCGMemOpIdx oi) +{ +    TCGMemOp memop = get_memop(oi); +#ifdef CONFIG_SOFTMMU +    unsigned memi = get_mmuidx(oi); +    TCGReg addrz, param; +    tcg_insn_unit *func; +    tcg_insn_unit *label_ptr; + +    addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE, +                             offsetof(CPUTLBEntry, addr_write)); + +    /* The fast path is exactly one insn.  Thus we can perform the entire +       TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */ +    /* beq,a,pt %[xi]cc, label0 */ +    label_ptr = s->code_ptr; +    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT +                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0); +    /* delay slot */ +    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1, +                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); + +    /* TLB Miss.  */ + +    param = TCG_REG_O1; +    if (!SPARC64 && TARGET_LONG_BITS == 64) { +        /* Skip the high-part; we'll perform the extract in the trampoline.  */ +        param++; +    } +    tcg_out_mov(s, TCG_TYPE_REG, param++, addr); +    if (!SPARC64 && (memop & MO_SIZE) == MO_64) { +        /* Skip the high-part; we'll perform the extract in the trampoline.  */ +        param++; +    } +    tcg_out_mov(s, TCG_TYPE_REG, param++, data); + +    func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; +    assert(func != NULL); +    tcg_out_call_nodelay(s, func); +    /* delay slot */ +    tcg_out_movi(s, TCG_TYPE_I32, param, oi); + +    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); +#else +    if (SPARC64 && TARGET_LONG_BITS == 32) { +        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); +        addr = TCG_REG_T1; +    } +    tcg_out_ldst_rr(s, data, addr, +                    (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), +                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); +#endif /* CONFIG_SOFTMMU */ +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, +                       const TCGArg args[TCG_MAX_OP_ARGS], +                       const int const_args[TCG_MAX_OP_ARGS]) +{ +    TCGArg a0, a1, a2; +    int c, c2; + +    /* Hoist the loads of the most common arguments.  */ +    a0 = args[0]; +    a1 = args[1]; +    a2 = args[2]; +    c2 = const_args[2]; + +    switch (opc) { +    case INDEX_op_exit_tb: +        if (check_fit_ptr(a0, 13)) { +            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); +            tcg_out_movi_imm13(s, TCG_REG_O0, a0); +        } else { +            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); +            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); +            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); +        } +        break; +    case INDEX_op_goto_tb: +        if (s->tb_jmp_offset) { +            /* direct jump method */ +            s->tb_jmp_offset[a0] = tcg_current_code_size(s); +            /* Make sure to preserve links during retranslation.  */ +            tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1))); +        } else { +            /* indirect jump method */ +            tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0)); +            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL); +        } +        tcg_out_nop(s); +        s->tb_next_offset[a0] = tcg_current_code_size(s); +        break; +    case INDEX_op_br: +        tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); +        tcg_out_nop(s); +        break; + +#define OP_32_64(x)                             \ +        glue(glue(case INDEX_op_, x), _i32):    \ +        glue(glue(case INDEX_op_, x), _i64) + +    OP_32_64(ld8u): +        tcg_out_ldst(s, a0, a1, a2, LDUB); +        break; +    OP_32_64(ld8s): +        tcg_out_ldst(s, a0, a1, a2, LDSB); +        break; +    OP_32_64(ld16u): +        tcg_out_ldst(s, a0, a1, a2, LDUH); +        break; +    OP_32_64(ld16s): +        tcg_out_ldst(s, a0, a1, a2, LDSH); +        break; +    case INDEX_op_ld_i32: +    case INDEX_op_ld32u_i64: +        tcg_out_ldst(s, a0, a1, a2, LDUW); +        break; +    OP_32_64(st8): +        tcg_out_ldst(s, a0, a1, a2, STB); +        break; +    OP_32_64(st16): +        tcg_out_ldst(s, a0, a1, a2, STH); +        break; +    case INDEX_op_st_i32: +    case INDEX_op_st32_i64: +        tcg_out_ldst(s, a0, a1, a2, STW); +        break; +    OP_32_64(add): +        c = ARITH_ADD; +        goto gen_arith; +    OP_32_64(sub): +        c = ARITH_SUB; +        goto gen_arith; +    OP_32_64(and): +        c = ARITH_AND; +        goto gen_arith; +    OP_32_64(andc): +        c = ARITH_ANDN; +        goto gen_arith; +    OP_32_64(or): +        c = ARITH_OR; +        goto gen_arith; +    OP_32_64(orc): +        c = ARITH_ORN; +        goto gen_arith; +    OP_32_64(xor): +        c = ARITH_XOR; +        goto gen_arith; +    case INDEX_op_shl_i32: +        c = SHIFT_SLL; +    do_shift32: +        /* Limit immediate shift count lest we create an illegal insn.  */ +        tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); +        break; +    case INDEX_op_shr_i32: +        c = SHIFT_SRL; +        goto do_shift32; +    case INDEX_op_sar_i32: +        c = SHIFT_SRA; +        goto do_shift32; +    case INDEX_op_mul_i32: +        c = ARITH_UMUL; +        goto gen_arith; + +    OP_32_64(neg): +	c = ARITH_SUB; +	goto gen_arith1; +    OP_32_64(not): +	c = ARITH_ORN; +	goto gen_arith1; + +    case INDEX_op_div_i32: +        tcg_out_div32(s, a0, a1, a2, c2, 0); +        break; +    case INDEX_op_divu_i32: +        tcg_out_div32(s, a0, a1, a2, c2, 1); +        break; + +    case INDEX_op_brcond_i32: +        tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); +        break; +    case INDEX_op_setcond_i32: +        tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2); +        break; +    case INDEX_op_movcond_i32: +        tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); +        break; + +    case INDEX_op_add2_i32: +        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], +                            args[4], const_args[4], args[5], const_args[5], +                            ARITH_ADDCC, ARITH_ADDC); +        break; +    case INDEX_op_sub2_i32: +        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], +                            args[4], const_args[4], args[5], const_args[5], +                            ARITH_SUBCC, ARITH_SUBC); +        break; +    case INDEX_op_mulu2_i32: +        c = ARITH_UMUL; +        goto do_mul2; +    case INDEX_op_muls2_i32: +        c = ARITH_SMUL; +    do_mul2: +        /* The 32-bit multiply insns produce a full 64-bit result.  If the +           destination register can hold it, we can avoid the slower RDY.  */ +        tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); +        if (SPARC64 || a0 <= TCG_REG_O7) { +            tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); +        } else { +            tcg_out_rdy(s, a1); +        } +        break; + +    case INDEX_op_qemu_ld_i32: +        tcg_out_qemu_ld(s, a0, a1, a2, false); +        break; +    case INDEX_op_qemu_ld_i64: +        tcg_out_qemu_ld(s, a0, a1, a2, true); +        break; +    case INDEX_op_qemu_st_i32: +    case INDEX_op_qemu_st_i64: +        tcg_out_qemu_st(s, a0, a1, a2); +        break; + +    case INDEX_op_ld32s_i64: +        tcg_out_ldst(s, a0, a1, a2, LDSW); +        break; +    case INDEX_op_ld_i64: +        tcg_out_ldst(s, a0, a1, a2, LDX); +        break; +    case INDEX_op_st_i64: +        tcg_out_ldst(s, a0, a1, a2, STX); +        break; +    case INDEX_op_shl_i64: +        c = SHIFT_SLLX; +    do_shift64: +        /* Limit immediate shift count lest we create an illegal insn.  */ +        tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); +        break; +    case INDEX_op_shr_i64: +        c = SHIFT_SRLX; +        goto do_shift64; +    case INDEX_op_sar_i64: +        c = SHIFT_SRAX; +        goto do_shift64; +    case INDEX_op_mul_i64: +        c = ARITH_MULX; +        goto gen_arith; +    case INDEX_op_div_i64: +        c = ARITH_SDIVX; +        goto gen_arith; +    case INDEX_op_divu_i64: +        c = ARITH_UDIVX; +        goto gen_arith; +    case INDEX_op_ext32s_i64: +        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); +        break; +    case INDEX_op_ext32u_i64: +        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); +        break; +    case INDEX_op_trunc_shr_i32: +        if (a2 == 0) { +            tcg_out_mov(s, TCG_TYPE_I32, a0, a1); +        } else { +            tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX); +        } +        break; + +    case INDEX_op_brcond_i64: +        tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); +        break; +    case INDEX_op_setcond_i64: +        tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2); +        break; +    case INDEX_op_movcond_i64: +        tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); +        break; +    case INDEX_op_add2_i64: +        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], +                            const_args[4], args[5], const_args[5], false); +        break; +    case INDEX_op_sub2_i64: +        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], +                            const_args[4], args[5], const_args[5], true); +        break; +    case INDEX_op_muluh_i64: +        tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI); +        break; + +    gen_arith: +        tcg_out_arithc(s, a0, a1, a2, c2, c); +        break; + +    gen_arith1: +	tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c); +	break; + +    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */ +    case INDEX_op_mov_i64: +    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */ +    case INDEX_op_movi_i64: +    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */ +    default: +        tcg_abort(); +    } +} + +static const TCGTargetOpDef sparc_op_defs[] = { +    { INDEX_op_exit_tb, { } }, +    { INDEX_op_goto_tb, { } }, +    { INDEX_op_br, { } }, + +    { INDEX_op_ld8u_i32, { "r", "r" } }, +    { INDEX_op_ld8s_i32, { "r", "r" } }, +    { INDEX_op_ld16u_i32, { "r", "r" } }, +    { INDEX_op_ld16s_i32, { "r", "r" } }, +    { INDEX_op_ld_i32, { "r", "r" } }, +    { INDEX_op_st8_i32, { "rZ", "r" } }, +    { INDEX_op_st16_i32, { "rZ", "r" } }, +    { INDEX_op_st_i32, { "rZ", "r" } }, + +    { INDEX_op_add_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_mul_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_div_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_divu_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_sub_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_and_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_andc_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_or_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_orc_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_xor_i32, { "r", "rZ", "rJ" } }, + +    { INDEX_op_shl_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_shr_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_sar_i32, { "r", "rZ", "rJ" } }, + +    { INDEX_op_neg_i32, { "r", "rJ" } }, +    { INDEX_op_not_i32, { "r", "rJ" } }, + +    { INDEX_op_brcond_i32, { "rZ", "rJ" } }, +    { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } }, +    { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } }, + +    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, +    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } }, +    { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } }, +    { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } }, + +    { INDEX_op_ld8u_i64, { "R", "r" } }, +    { INDEX_op_ld8s_i64, { "R", "r" } }, +    { INDEX_op_ld16u_i64, { "R", "r" } }, +    { INDEX_op_ld16s_i64, { "R", "r" } }, +    { INDEX_op_ld32u_i64, { "R", "r" } }, +    { INDEX_op_ld32s_i64, { "R", "r" } }, +    { INDEX_op_ld_i64, { "R", "r" } }, +    { INDEX_op_st8_i64, { "RZ", "r" } }, +    { INDEX_op_st16_i64, { "RZ", "r" } }, +    { INDEX_op_st32_i64, { "RZ", "r" } }, +    { INDEX_op_st_i64, { "RZ", "r" } }, + +    { INDEX_op_add_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_mul_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_div_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_divu_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_sub_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_and_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_andc_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_or_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_orc_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_xor_i64, { "R", "RZ", "RJ" } }, + +    { INDEX_op_shl_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_shr_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_sar_i64, { "R", "RZ", "RJ" } }, + +    { INDEX_op_neg_i64, { "R", "RJ" } }, +    { INDEX_op_not_i64, { "R", "RJ" } }, + +    { INDEX_op_ext32s_i64, { "R", "r" } }, +    { INDEX_op_ext32u_i64, { "R", "r" } }, +    { INDEX_op_trunc_shr_i32,  { "r", "R" } }, + +    { INDEX_op_brcond_i64, { "RZ", "RJ" } }, +    { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, +    { INDEX_op_movcond_i64, { "R", "RZ", "RJ", "RI", "0" } }, + +    { INDEX_op_add2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, +    { INDEX_op_sub2_i64, { "R", "R", "RZ", "RZ", "RJ", "RI" } }, +    { INDEX_op_muluh_i64, { "R", "RZ", "RZ" } }, + +    { INDEX_op_qemu_ld_i32, { "r", "A" } }, +    { INDEX_op_qemu_ld_i64, { "R", "A" } }, +    { INDEX_op_qemu_st_i32, { "sZ", "A" } }, +    { INDEX_op_qemu_st_i64, { "SZ", "A" } }, + +    { -1 }, +}; + +static void tcg_target_init(TCGContext *s) +{ +    /* Only probe for the platform and capabilities if we havn't already +       determined maximum values at compile time.  */ +#ifndef use_vis3_instructions +    { +        unsigned long hwcap = qemu_getauxval(AT_HWCAP); +        use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0; +    } +#endif + +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff); +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, ALL_64); + +    tcg_regset_set32(tcg_target_call_clobber_regs, 0, +                     (1 << TCG_REG_G1) | +                     (1 << TCG_REG_G2) | +                     (1 << TCG_REG_G3) | +                     (1 << TCG_REG_G4) | +                     (1 << TCG_REG_G5) | +                     (1 << TCG_REG_G6) | +                     (1 << TCG_REG_G7) | +                     (1 << TCG_REG_O0) | +                     (1 << TCG_REG_O1) | +                     (1 << TCG_REG_O2) | +                     (1 << TCG_REG_O3) | +                     (1 << TCG_REG_O4) | +                     (1 << TCG_REG_O5) | +                     (1 << TCG_REG_O7)); + +    tcg_regset_clear(s->reserved_regs); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */ +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */ + +    tcg_add_target_add_op_defs(sparc_op_defs); +} + +#if SPARC64 +# define ELF_HOST_MACHINE  EM_SPARCV9 +#else +# define ELF_HOST_MACHINE  EM_SPARC32PLUS +# define ELF_HOST_FLAGS    EF_SPARC_32PLUS +#endif + +typedef struct { +    DebugFrameHeader h; +    uint8_t fde_def_cfa[SPARC64 ? 4 : 2]; +    uint8_t fde_win_save; +    uint8_t fde_ret_save[3]; +} DebugFrame; + +static const DebugFrame debug_frame = { +    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ +    .h.cie.id = -1, +    .h.cie.version = 1, +    .h.cie.code_align = 1, +    .h.cie.data_align = -sizeof(void *) & 0x7f, +    .h.cie.return_column = 15,            /* o7 */ + +    /* Total FDE size does not include the "len" member.  */ +    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), + +    .fde_def_cfa = { +#if SPARC64 +        12, 30,                         /* DW_CFA_def_cfa i6, 2047 */ +        (2047 & 0x7f) | 0x80, (2047 >> 7) +#else +        13, 30                          /* DW_CFA_def_cfa_register i6 */ +#endif +    }, +    .fde_win_save = 0x2d,               /* DW_CFA_GNU_window_save */ +    .fde_ret_save = { 9, 15, 31 },      /* DW_CFA_register o7, i7 */ +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ +    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +} + +void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ +    uint32_t *ptr = (uint32_t *)jmp_addr; +    uintptr_t disp = addr - jmp_addr; + +    /* We can reach the entire address space for 32-bit.  For 64-bit +       the code_gen_buffer can't be larger than 2GB.  */ +    assert(disp == (int32_t)disp); + +    *ptr = CALL | (uint32_t)disp >> 2; +    flush_icache_range(jmp_addr, jmp_addr + 4); +} diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h new file mode 100644 index 00000000..f584de47 --- /dev/null +++ b/tcg/sparc/tcg-target.h @@ -0,0 +1,160 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef TCG_TARGET_SPARC  +#define TCG_TARGET_SPARC 1 + +#define TCG_TARGET_REG_BITS 64 + +#define TCG_TARGET_INSN_UNIT_SIZE 4 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 +#define TCG_TARGET_NB_REGS 32 + +typedef enum { +    TCG_REG_G0 = 0, +    TCG_REG_G1, +    TCG_REG_G2, +    TCG_REG_G3, +    TCG_REG_G4, +    TCG_REG_G5, +    TCG_REG_G6, +    TCG_REG_G7, +    TCG_REG_O0, +    TCG_REG_O1, +    TCG_REG_O2, +    TCG_REG_O3, +    TCG_REG_O4, +    TCG_REG_O5, +    TCG_REG_O6, +    TCG_REG_O7, +    TCG_REG_L0, +    TCG_REG_L1, +    TCG_REG_L2, +    TCG_REG_L3, +    TCG_REG_L4, +    TCG_REG_L5, +    TCG_REG_L6, +    TCG_REG_L7, +    TCG_REG_I0, +    TCG_REG_I1, +    TCG_REG_I2, +    TCG_REG_I3, +    TCG_REG_I4, +    TCG_REG_I5, +    TCG_REG_I6, +    TCG_REG_I7, +} TCGReg; + +#define TCG_CT_CONST_S11  0x100 +#define TCG_CT_CONST_S13  0x200 +#define TCG_CT_CONST_ZERO 0x400 + +/* used for function call generation */ +#define TCG_REG_CALL_STACK TCG_REG_O6 + +#ifdef __arch64__ +#define TCG_TARGET_STACK_BIAS           2047 +#define TCG_TARGET_STACK_ALIGN          16 +#define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS) +#else +#define TCG_TARGET_STACK_BIAS           0 +#define TCG_TARGET_STACK_ALIGN          8 +#define TCG_TARGET_CALL_STACK_OFFSET    (64 + 4 + 6*4) +#endif + +#ifdef __arch64__ +#define TCG_TARGET_EXTEND_ARGS 1 +#endif + +#if defined(__VIS__) && __VIS__ >= 0x300 +#define use_vis3_instructions  1 +#else +extern bool use_vis3_instructions; +#endif + +/* optional instructions */ +#define TCG_TARGET_HAS_div_i32		1 +#define TCG_TARGET_HAS_rem_i32		0 +#define TCG_TARGET_HAS_rot_i32          0 +#define TCG_TARGET_HAS_ext8s_i32        0 +#define TCG_TARGET_HAS_ext16s_i32       0 +#define TCG_TARGET_HAS_ext8u_i32        0 +#define TCG_TARGET_HAS_ext16u_i32       0 +#define TCG_TARGET_HAS_bswap16_i32      0 +#define TCG_TARGET_HAS_bswap32_i32      0 +#define TCG_TARGET_HAS_neg_i32          1 +#define TCG_TARGET_HAS_not_i32          1 +#define TCG_TARGET_HAS_andc_i32         1 +#define TCG_TARGET_HAS_orc_i32          1 +#define TCG_TARGET_HAS_eqv_i32          0 +#define TCG_TARGET_HAS_nand_i32         0 +#define TCG_TARGET_HAS_nor_i32          0 +#define TCG_TARGET_HAS_deposit_i32      0 +#define TCG_TARGET_HAS_movcond_i32      1 +#define TCG_TARGET_HAS_add2_i32         1 +#define TCG_TARGET_HAS_sub2_i32         1 +#define TCG_TARGET_HAS_mulu2_i32        1 +#define TCG_TARGET_HAS_muls2_i32        1 +#define TCG_TARGET_HAS_muluh_i32        0 +#define TCG_TARGET_HAS_mulsh_i32        0 + +#define TCG_TARGET_HAS_trunc_shr_i32    1 +#define TCG_TARGET_HAS_div_i64          1 +#define TCG_TARGET_HAS_rem_i64          0 +#define TCG_TARGET_HAS_rot_i64          0 +#define TCG_TARGET_HAS_ext8s_i64        0 +#define TCG_TARGET_HAS_ext16s_i64       0 +#define TCG_TARGET_HAS_ext32s_i64       1 +#define TCG_TARGET_HAS_ext8u_i64        0 +#define TCG_TARGET_HAS_ext16u_i64       0 +#define TCG_TARGET_HAS_ext32u_i64       1 +#define TCG_TARGET_HAS_bswap16_i64      0 +#define TCG_TARGET_HAS_bswap32_i64      0 +#define TCG_TARGET_HAS_bswap64_i64      0 +#define TCG_TARGET_HAS_neg_i64          1 +#define TCG_TARGET_HAS_not_i64          1 +#define TCG_TARGET_HAS_andc_i64         1 +#define TCG_TARGET_HAS_orc_i64          1 +#define TCG_TARGET_HAS_eqv_i64          0 +#define TCG_TARGET_HAS_nand_i64         0 +#define TCG_TARGET_HAS_nor_i64          0 +#define TCG_TARGET_HAS_deposit_i64      0 +#define TCG_TARGET_HAS_movcond_i64      1 +#define TCG_TARGET_HAS_add2_i64         1 +#define TCG_TARGET_HAS_sub2_i64         1 +#define TCG_TARGET_HAS_mulu2_i64        0 +#define TCG_TARGET_HAS_muls2_i64        0 +#define TCG_TARGET_HAS_muluh_i64        use_vis3_instructions +#define TCG_TARGET_HAS_mulsh_i64        0 + +#define TCG_AREG0 TCG_REG_I0 + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +    uintptr_t p; +    for (p = start & -8; p < ((stop + 7) & -8); p += 8) { +        __asm__ __volatile__("flush\t%0" : : "r" (p)); +    } +} + +#endif diff --git a/tcg/tcg-be-ldst.h b/tcg/tcg-be-ldst.h new file mode 100644 index 00000000..40a2369b --- /dev/null +++ b/tcg/tcg-be-ldst.h @@ -0,0 +1,88 @@ +/* + * TCG Backend Data: load-store optimization only. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifdef CONFIG_SOFTMMU + +typedef struct TCGLabelQemuLdst { +    bool is_ld;             /* qemu_ld: true, qemu_st: false */ +    TCGMemOpIdx oi; +    TCGType type;           /* result type of a load */ +    TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */ +    TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */ +    TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */ +    TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */ +    tcg_insn_unit *raddr;   /* gen code addr of the next IR of qemu_ld/st IR */ +    tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ +    struct TCGLabelQemuLdst *next; +} TCGLabelQemuLdst; + +typedef struct TCGBackendData { +    TCGLabelQemuLdst *labels; +} TCGBackendData; + + +/* + * Initialize TB backend data at the beginning of the TB. + */ + +static inline void tcg_out_tb_init(TCGContext *s) +{ +    s->be->labels = NULL; +} + +/* + * Generate TB finalization at the end of block + */ + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); + +static void tcg_out_tb_finalize(TCGContext *s) +{ +    TCGLabelQemuLdst *lb; + +    /* qemu_ld/st slow paths */ +    for (lb = s->be->labels; lb != NULL; lb = lb->next) { +        if (lb->is_ld) { +            tcg_out_qemu_ld_slow_path(s, lb); +        } else { +            tcg_out_qemu_st_slow_path(s, lb); +        } +    } +} + +/* + * Allocate a new TCGLabelQemuLdst entry. + */ + +static inline TCGLabelQemuLdst *new_ldst_label(TCGContext *s) +{ +    TCGBackendData *be = s->be; +    TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); + +    l->next = be->labels; +    be->labels = l; +    return l; +} +#else +#include "tcg-be-null.h" +#endif /* CONFIG_SOFTMMU */ diff --git a/tcg/tcg-be-null.h b/tcg/tcg-be-null.h new file mode 100644 index 00000000..74c57d5a --- /dev/null +++ b/tcg/tcg-be-null.h @@ -0,0 +1,43 @@ +/* + * TCG Backend Data: No backend data + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +typedef struct TCGBackendData { +    /* Empty */ +    char dummy; +} TCGBackendData; + + +/* + * Initialize TB backend data at the beginning of the TB. + */ + +static inline void tcg_out_tb_init(TCGContext *s) +{ +} + +/* + * Generate TB finalization at the end of block + */ + +static inline void tcg_out_tb_finalize(TCGContext *s) +{ +} diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c new file mode 100644 index 00000000..45098c31 --- /dev/null +++ b/tcg/tcg-op.c @@ -0,0 +1,1945 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg.h" +#include "tcg-op.h" + +/* Reduce the number of ifdefs below.  This assumes that all uses of +   TCGV_HIGH and TCGV_LOW are properly protected by a conditional that +   the compiler can eliminate.  */ +#if TCG_TARGET_REG_BITS == 64 +extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); +extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); +#define TCGV_LOW  TCGV_LOW_link_error +#define TCGV_HIGH TCGV_HIGH_link_error +#endif + +/* Note that this is optimized for sequential allocation during translate. +   Up to and including filling in the forward link immediately.  We'll do +   proper termination of the end of the list after we finish translation.  */ + +static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) +{ +    int oi = ctx->gen_next_op_idx; +    int ni = oi + 1; +    int pi = oi - 1; + +    tcg_debug_assert(oi < OPC_BUF_SIZE); +    ctx->gen_last_op_idx = oi; +    ctx->gen_next_op_idx = ni; + +    ctx->gen_op_buf[oi] = (TCGOp){ +        .opc = opc, +        .args = args, +        .prev = pi, +        .next = ni +    }; +} + +void tcg_gen_op1(TCGContext *ctx, TCGOpcode opc, TCGArg a1) +{ +    int pi = ctx->gen_next_parm_idx; + +    tcg_debug_assert(pi + 1 <= OPPARAM_BUF_SIZE); +    ctx->gen_next_parm_idx = pi + 1; +    ctx->gen_opparam_buf[pi] = a1; + +    tcg_emit_op(ctx, opc, pi); +} + +void tcg_gen_op2(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2) +{ +    int pi = ctx->gen_next_parm_idx; + +    tcg_debug_assert(pi + 2 <= OPPARAM_BUF_SIZE); +    ctx->gen_next_parm_idx = pi + 2; +    ctx->gen_opparam_buf[pi + 0] = a1; +    ctx->gen_opparam_buf[pi + 1] = a2; + +    tcg_emit_op(ctx, opc, pi); +} + +void tcg_gen_op3(TCGContext *ctx, TCGOpcode opc, TCGArg a1, +                 TCGArg a2, TCGArg a3) +{ +    int pi = ctx->gen_next_parm_idx; + +    tcg_debug_assert(pi + 3 <= OPPARAM_BUF_SIZE); +    ctx->gen_next_parm_idx = pi + 3; +    ctx->gen_opparam_buf[pi + 0] = a1; +    ctx->gen_opparam_buf[pi + 1] = a2; +    ctx->gen_opparam_buf[pi + 2] = a3; + +    tcg_emit_op(ctx, opc, pi); +} + +void tcg_gen_op4(TCGContext *ctx, TCGOpcode opc, TCGArg a1, +                 TCGArg a2, TCGArg a3, TCGArg a4) +{ +    int pi = ctx->gen_next_parm_idx; + +    tcg_debug_assert(pi + 4 <= OPPARAM_BUF_SIZE); +    ctx->gen_next_parm_idx = pi + 4; +    ctx->gen_opparam_buf[pi + 0] = a1; +    ctx->gen_opparam_buf[pi + 1] = a2; +    ctx->gen_opparam_buf[pi + 2] = a3; +    ctx->gen_opparam_buf[pi + 3] = a4; + +    tcg_emit_op(ctx, opc, pi); +} + +void tcg_gen_op5(TCGContext *ctx, TCGOpcode opc, TCGArg a1, +                 TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5) +{ +    int pi = ctx->gen_next_parm_idx; + +    tcg_debug_assert(pi + 5 <= OPPARAM_BUF_SIZE); +    ctx->gen_next_parm_idx = pi + 5; +    ctx->gen_opparam_buf[pi + 0] = a1; +    ctx->gen_opparam_buf[pi + 1] = a2; +    ctx->gen_opparam_buf[pi + 2] = a3; +    ctx->gen_opparam_buf[pi + 3] = a4; +    ctx->gen_opparam_buf[pi + 4] = a5; + +    tcg_emit_op(ctx, opc, pi); +} + +void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, +                 TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6) +{ +    int pi = ctx->gen_next_parm_idx; + +    tcg_debug_assert(pi + 6 <= OPPARAM_BUF_SIZE); +    ctx->gen_next_parm_idx = pi + 6; +    ctx->gen_opparam_buf[pi + 0] = a1; +    ctx->gen_opparam_buf[pi + 1] = a2; +    ctx->gen_opparam_buf[pi + 2] = a3; +    ctx->gen_opparam_buf[pi + 3] = a4; +    ctx->gen_opparam_buf[pi + 4] = a5; +    ctx->gen_opparam_buf[pi + 5] = a6; + +    tcg_emit_op(ctx, opc, pi); +} + +/* 32 bit ops */ + +void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ +    /* some cases can be optimized here */ +    if (arg2 == 0) { +        tcg_gen_mov_i32(ret, arg1); +    } else { +        TCGv_i32 t0 = tcg_const_i32(arg2); +        tcg_gen_add_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2) +{ +    if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) { +        /* Don't recurse with tcg_gen_neg_i32.  */ +        tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2); +    } else { +        TCGv_i32 t0 = tcg_const_i32(arg1); +        tcg_gen_sub_i32(ret, t0, arg2); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ +    /* some cases can be optimized here */ +    if (arg2 == 0) { +        tcg_gen_mov_i32(ret, arg1); +    } else { +        TCGv_i32 t0 = tcg_const_i32(arg2); +        tcg_gen_sub_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) +{ +    TCGv_i32 t0; +    /* Some cases can be optimized here.  */ +    switch (arg2) { +    case 0: +        tcg_gen_movi_i32(ret, 0); +        return; +    case 0xffffffffu: +        tcg_gen_mov_i32(ret, arg1); +        return; +    case 0xffu: +        /* Don't recurse with tcg_gen_ext8u_i32.  */ +        if (TCG_TARGET_HAS_ext8u_i32) { +            tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1); +            return; +        } +        break; +    case 0xffffu: +        if (TCG_TARGET_HAS_ext16u_i32) { +            tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1); +            return; +        } +        break; +    } +    t0 = tcg_const_i32(arg2); +    tcg_gen_and_i32(ret, arg1, t0); +    tcg_temp_free_i32(t0); +} + +void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ +    /* Some cases can be optimized here.  */ +    if (arg2 == -1) { +        tcg_gen_movi_i32(ret, -1); +    } else if (arg2 == 0) { +        tcg_gen_mov_i32(ret, arg1); +    } else { +        TCGv_i32 t0 = tcg_const_i32(arg2); +        tcg_gen_or_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ +    /* Some cases can be optimized here.  */ +    if (arg2 == 0) { +        tcg_gen_mov_i32(ret, arg1); +    } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) { +        /* Don't recurse with tcg_gen_not_i32.  */ +        tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1); +    } else { +        TCGv_i32 t0 = tcg_const_i32(arg2); +        tcg_gen_xor_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 32); +    if (arg2 == 0) { +        tcg_gen_mov_i32(ret, arg1); +    } else { +        TCGv_i32 t0 = tcg_const_i32(arg2); +        tcg_gen_shl_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 32); +    if (arg2 == 0) { +        tcg_gen_mov_i32(ret, arg1); +    } else { +        TCGv_i32 t0 = tcg_const_i32(arg2); +        tcg_gen_shr_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 32); +    if (arg2 == 0) { +        tcg_gen_mov_i32(ret, arg1); +    } else { +        TCGv_i32 t0 = tcg_const_i32(arg2); +        tcg_gen_sar_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) +{ +    if (cond == TCG_COND_ALWAYS) { +        tcg_gen_br(l); +    } else if (cond != TCG_COND_NEVER) { +        tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); +    } +} + +void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l) +{ +    if (cond == TCG_COND_ALWAYS) { +        tcg_gen_br(l); +    } else if (cond != TCG_COND_NEVER) { +        TCGv_i32 t0 = tcg_const_i32(arg2); +        tcg_gen_brcond_i32(cond, arg1, t0, l); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, +                         TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (cond == TCG_COND_ALWAYS) { +        tcg_gen_movi_i32(ret, 1); +    } else if (cond == TCG_COND_NEVER) { +        tcg_gen_movi_i32(ret, 0); +    } else { +        tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond); +    } +} + +void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret, +                          TCGv_i32 arg1, int32_t arg2) +{ +    TCGv_i32 t0 = tcg_const_i32(arg2); +    tcg_gen_setcond_i32(cond, ret, arg1, t0); +    tcg_temp_free_i32(t0); +} + +void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ +    TCGv_i32 t0 = tcg_const_i32(arg2); +    tcg_gen_mul_i32(ret, arg1, t0); +    tcg_temp_free_i32(t0); +} + +void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_div_i32) { +        tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2); +    } else if (TCG_TARGET_HAS_div2_i32) { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        tcg_gen_sari_i32(t0, arg1, 31); +        tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2); +        tcg_temp_free_i32(t0); +    } else { +        gen_helper_div_i32(ret, arg1, arg2); +    } +} + +void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_rem_i32) { +        tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2); +    } else if (TCG_TARGET_HAS_div_i32) { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2); +        tcg_gen_mul_i32(t0, t0, arg2); +        tcg_gen_sub_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } else if (TCG_TARGET_HAS_div2_i32) { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        tcg_gen_sari_i32(t0, arg1, 31); +        tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2); +        tcg_temp_free_i32(t0); +    } else { +        gen_helper_rem_i32(ret, arg1, arg2); +    } +} + +void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_div_i32) { +        tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2); +    } else if (TCG_TARGET_HAS_div2_i32) { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        tcg_gen_movi_i32(t0, 0); +        tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2); +        tcg_temp_free_i32(t0); +    } else { +        gen_helper_divu_i32(ret, arg1, arg2); +    } +} + +void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_rem_i32) { +        tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); +    } else if (TCG_TARGET_HAS_div_i32) { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2); +        tcg_gen_mul_i32(t0, t0, arg2); +        tcg_gen_sub_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } else if (TCG_TARGET_HAS_div2_i32) { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        tcg_gen_movi_i32(t0, 0); +        tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2); +        tcg_temp_free_i32(t0); +    } else { +        gen_helper_remu_i32(ret, arg1, arg2); +    } +} + +void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_andc_i32) { +        tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2); +    } else { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        tcg_gen_not_i32(t0, arg2); +        tcg_gen_and_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_eqv_i32) { +        tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2); +    } else { +        tcg_gen_xor_i32(ret, arg1, arg2); +        tcg_gen_not_i32(ret, ret); +    } +} + +void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_nand_i32) { +        tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2); +    } else { +        tcg_gen_and_i32(ret, arg1, arg2); +        tcg_gen_not_i32(ret, ret); +    } +} + +void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_nor_i32) { +        tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2); +    } else { +        tcg_gen_or_i32(ret, arg1, arg2); +        tcg_gen_not_i32(ret, ret); +    } +} + +void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_orc_i32) { +        tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2); +    } else { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        tcg_gen_not_i32(t0, arg2); +        tcg_gen_or_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_rot_i32) { +        tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2); +    } else { +        TCGv_i32 t0, t1; + +        t0 = tcg_temp_new_i32(); +        t1 = tcg_temp_new_i32(); +        tcg_gen_shl_i32(t0, arg1, arg2); +        tcg_gen_subfi_i32(t1, 32, arg2); +        tcg_gen_shr_i32(t1, arg1, t1); +        tcg_gen_or_i32(ret, t0, t1); +        tcg_temp_free_i32(t0); +        tcg_temp_free_i32(t1); +    } +} + +void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 32); +    /* some cases can be optimized here */ +    if (arg2 == 0) { +        tcg_gen_mov_i32(ret, arg1); +    } else if (TCG_TARGET_HAS_rot_i32) { +        TCGv_i32 t0 = tcg_const_i32(arg2); +        tcg_gen_rotl_i32(ret, arg1, t0); +        tcg_temp_free_i32(t0); +    } else { +        TCGv_i32 t0, t1; +        t0 = tcg_temp_new_i32(); +        t1 = tcg_temp_new_i32(); +        tcg_gen_shli_i32(t0, arg1, arg2); +        tcg_gen_shri_i32(t1, arg1, 32 - arg2); +        tcg_gen_or_i32(ret, t0, t1); +        tcg_temp_free_i32(t0); +        tcg_temp_free_i32(t1); +    } +} + +void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_rot_i32) { +        tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2); +    } else { +        TCGv_i32 t0, t1; + +        t0 = tcg_temp_new_i32(); +        t1 = tcg_temp_new_i32(); +        tcg_gen_shr_i32(t0, arg1, arg2); +        tcg_gen_subfi_i32(t1, 32, arg2); +        tcg_gen_shl_i32(t1, arg1, t1); +        tcg_gen_or_i32(ret, t0, t1); +        tcg_temp_free_i32(t0); +        tcg_temp_free_i32(t1); +    } +} + +void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 32); +    /* some cases can be optimized here */ +    if (arg2 == 0) { +        tcg_gen_mov_i32(ret, arg1); +    } else { +        tcg_gen_rotli_i32(ret, arg1, 32 - arg2); +    } +} + +void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, +                         unsigned int ofs, unsigned int len) +{ +    uint32_t mask; +    TCGv_i32 t1; + +    tcg_debug_assert(ofs < 32); +    tcg_debug_assert(len <= 32); +    tcg_debug_assert(ofs + len <= 32); + +    if (ofs == 0 && len == 32) { +        tcg_gen_mov_i32(ret, arg2); +        return; +    } +    if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) { +        tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len); +        return; +    } + +    mask = (1u << len) - 1; +    t1 = tcg_temp_new_i32(); + +    if (ofs + len < 32) { +        tcg_gen_andi_i32(t1, arg2, mask); +        tcg_gen_shli_i32(t1, t1, ofs); +    } else { +        tcg_gen_shli_i32(t1, arg2, ofs); +    } +    tcg_gen_andi_i32(ret, arg1, ~(mask << ofs)); +    tcg_gen_or_i32(ret, ret, t1); + +    tcg_temp_free_i32(t1); +} + +void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, +                         TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2) +{ +    if (cond == TCG_COND_ALWAYS) { +        tcg_gen_mov_i32(ret, v1); +    } else if (cond == TCG_COND_NEVER) { +        tcg_gen_mov_i32(ret, v2); +    } else if (TCG_TARGET_HAS_movcond_i32) { +        tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond); +    } else { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        TCGv_i32 t1 = tcg_temp_new_i32(); +        tcg_gen_setcond_i32(cond, t0, c1, c2); +        tcg_gen_neg_i32(t0, t0); +        tcg_gen_and_i32(t1, v1, t0); +        tcg_gen_andc_i32(ret, v2, t0); +        tcg_gen_or_i32(ret, ret, t1); +        tcg_temp_free_i32(t0); +        tcg_temp_free_i32(t1); +    } +} + +void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, +                      TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) +{ +    if (TCG_TARGET_HAS_add2_i32) { +        tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        TCGv_i64 t1 = tcg_temp_new_i64(); +        tcg_gen_concat_i32_i64(t0, al, ah); +        tcg_gen_concat_i32_i64(t1, bl, bh); +        tcg_gen_add_i64(t0, t0, t1); +        tcg_gen_extr_i64_i32(rl, rh, t0); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, +                      TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) +{ +    if (TCG_TARGET_HAS_sub2_i32) { +        tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        TCGv_i64 t1 = tcg_temp_new_i64(); +        tcg_gen_concat_i32_i64(t0, al, ah); +        tcg_gen_concat_i32_i64(t1, bl, bh); +        tcg_gen_sub_i64(t0, t0, t1); +        tcg_gen_extr_i64_i32(rl, rh, t0); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_mulu2_i32) { +        tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); +    } else if (TCG_TARGET_HAS_muluh_i32) { +        TCGv_i32 t = tcg_temp_new_i32(); +        tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); +        tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); +        tcg_gen_mov_i32(rl, t); +        tcg_temp_free_i32(t); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        TCGv_i64 t1 = tcg_temp_new_i64(); +        tcg_gen_extu_i32_i64(t0, arg1); +        tcg_gen_extu_i32_i64(t1, arg2); +        tcg_gen_mul_i64(t0, t0, t1); +        tcg_gen_extr_i64_i32(rl, rh, t0); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    if (TCG_TARGET_HAS_muls2_i32) { +        tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); +    } else if (TCG_TARGET_HAS_mulsh_i32) { +        TCGv_i32 t = tcg_temp_new_i32(); +        tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); +        tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); +        tcg_gen_mov_i32(rl, t); +        tcg_temp_free_i32(t); +    } else if (TCG_TARGET_REG_BITS == 32) { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        TCGv_i32 t1 = tcg_temp_new_i32(); +        TCGv_i32 t2 = tcg_temp_new_i32(); +        TCGv_i32 t3 = tcg_temp_new_i32(); +        tcg_gen_mulu2_i32(t0, t1, arg1, arg2); +        /* Adjust for negative inputs.  */ +        tcg_gen_sari_i32(t2, arg1, 31); +        tcg_gen_sari_i32(t3, arg2, 31); +        tcg_gen_and_i32(t2, t2, arg2); +        tcg_gen_and_i32(t3, t3, arg1); +        tcg_gen_sub_i32(rh, t1, t2); +        tcg_gen_sub_i32(rh, rh, t3); +        tcg_gen_mov_i32(rl, t0); +        tcg_temp_free_i32(t0); +        tcg_temp_free_i32(t1); +        tcg_temp_free_i32(t2); +        tcg_temp_free_i32(t3); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        TCGv_i64 t1 = tcg_temp_new_i64(); +        tcg_gen_ext_i32_i64(t0, arg1); +        tcg_gen_ext_i32_i64(t1, arg2); +        tcg_gen_mul_i64(t0, t0, t1); +        tcg_gen_extr_i64_i32(rl, rh, t0); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_HAS_ext8s_i32) { +        tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg); +    } else { +        tcg_gen_shli_i32(ret, arg, 24); +        tcg_gen_sari_i32(ret, ret, 24); +    } +} + +void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_HAS_ext16s_i32) { +        tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg); +    } else { +        tcg_gen_shli_i32(ret, arg, 16); +        tcg_gen_sari_i32(ret, ret, 16); +    } +} + +void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_HAS_ext8u_i32) { +        tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg); +    } else { +        tcg_gen_andi_i32(ret, arg, 0xffu); +    } +} + +void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_HAS_ext16u_i32) { +        tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg); +    } else { +        tcg_gen_andi_i32(ret, arg, 0xffffu); +    } +} + +/* Note: we assume the two high bytes are set to zero */ +void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_HAS_bswap16_i32) { +        tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg); +    } else { +        TCGv_i32 t0 = tcg_temp_new_i32(); + +        tcg_gen_ext8u_i32(t0, arg); +        tcg_gen_shli_i32(t0, t0, 8); +        tcg_gen_shri_i32(ret, arg, 8); +        tcg_gen_or_i32(ret, ret, t0); +        tcg_temp_free_i32(t0); +    } +} + +void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_HAS_bswap32_i32) { +        tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg); +    } else { +        TCGv_i32 t0, t1; +        t0 = tcg_temp_new_i32(); +        t1 = tcg_temp_new_i32(); + +        tcg_gen_shli_i32(t0, arg, 24); + +        tcg_gen_andi_i32(t1, arg, 0x0000ff00); +        tcg_gen_shli_i32(t1, t1, 8); +        tcg_gen_or_i32(t0, t0, t1); + +        tcg_gen_shri_i32(t1, arg, 8); +        tcg_gen_andi_i32(t1, t1, 0x0000ff00); +        tcg_gen_or_i32(t0, t0, t1); + +        tcg_gen_shri_i32(t1, arg, 24); +        tcg_gen_or_i32(ret, t0, t1); +        tcg_temp_free_i32(t0); +        tcg_temp_free_i32(t1); +    } +} + +/* 64-bit ops */ + +#if TCG_TARGET_REG_BITS == 32 +/* These are all inline for TCG_TARGET_REG_BITS == 64.  */ + +void tcg_gen_discard_i64(TCGv_i64 arg) +{ +    tcg_gen_discard_i32(TCGV_LOW(arg)); +    tcg_gen_discard_i32(TCGV_HIGH(arg)); +} + +void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +    tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); +} + +void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) +{ +    tcg_gen_movi_i32(TCGV_LOW(ret), arg); +    tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32); +} + +void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) +{ +    tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset); +    tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) +{ +    tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); +    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), 31); +} + +void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) +{ +    tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset); +    tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) +{ +    tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset); +    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) +{ +    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); +    tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) +{ +    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); +    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) +{ +    /* Since arg2 and ret have different types, +       they cannot be the same temporary */ +#ifdef HOST_WORDS_BIGENDIAN +    tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); +    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); +#else +    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); +    tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4); +#endif +} + +void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ +#ifdef HOST_WORDS_BIGENDIAN +    tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); +    tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); +#else +    tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); +    tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4); +#endif +} + +void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); +    tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +} + +void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); +    tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +} + +void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); +    tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +} + +void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    gen_helper_shl_i64(ret, arg1, arg2); +} + +void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    gen_helper_shr_i64(ret, arg1, arg2); +} + +void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    gen_helper_sar_i64(ret, arg1, arg2); +} + +void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    TCGv_i64 t0; +    TCGv_i32 t1; + +    t0 = tcg_temp_new_i64(); +    t1 = tcg_temp_new_i32(); + +    tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0), +                      TCGV_LOW(arg1), TCGV_LOW(arg2)); + +    tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2)); +    tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); +    tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2)); +    tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); + +    tcg_gen_mov_i64(ret, t0); +    tcg_temp_free_i64(t0); +    tcg_temp_free_i32(t1); +} +#endif /* TCG_TARGET_REG_SIZE == 32 */ + +void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ +    /* some cases can be optimized here */ +    if (arg2 == 0) { +        tcg_gen_mov_i64(ret, arg1); +    } else { +        TCGv_i64 t0 = tcg_const_i64(arg2); +        tcg_gen_add_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2) +{ +    if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) { +        /* Don't recurse with tcg_gen_neg_i64.  */ +        tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2); +    } else { +        TCGv_i64 t0 = tcg_const_i64(arg1); +        tcg_gen_sub_i64(ret, t0, arg2); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ +    /* some cases can be optimized here */ +    if (arg2 == 0) { +        tcg_gen_mov_i64(ret, arg1); +    } else { +        TCGv_i64 t0 = tcg_const_i64(arg2); +        tcg_gen_sub_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) +{ +    TCGv_i64 t0; + +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2); +        tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32); +        return; +    } + +    /* Some cases can be optimized here.  */ +    switch (arg2) { +    case 0: +        tcg_gen_movi_i64(ret, 0); +        return; +    case 0xffffffffffffffffull: +        tcg_gen_mov_i64(ret, arg1); +        return; +    case 0xffull: +        /* Don't recurse with tcg_gen_ext8u_i64.  */ +        if (TCG_TARGET_HAS_ext8u_i64) { +            tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1); +            return; +        } +        break; +    case 0xffffu: +        if (TCG_TARGET_HAS_ext16u_i64) { +            tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1); +            return; +        } +        break; +    case 0xffffffffull: +        if (TCG_TARGET_HAS_ext32u_i64) { +            tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1); +            return; +        } +        break; +    } +    t0 = tcg_const_i64(arg2); +    tcg_gen_and_i64(ret, arg1, t0); +    tcg_temp_free_i64(t0); +} + +void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2); +        tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32); +        return; +    } +    /* Some cases can be optimized here.  */ +    if (arg2 == -1) { +        tcg_gen_movi_i64(ret, -1); +    } else if (arg2 == 0) { +        tcg_gen_mov_i64(ret, arg1); +    } else { +        TCGv_i64 t0 = tcg_const_i64(arg2); +        tcg_gen_or_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2); +        tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32); +        return; +    } +    /* Some cases can be optimized here.  */ +    if (arg2 == 0) { +        tcg_gen_mov_i64(ret, arg1); +    } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) { +        /* Don't recurse with tcg_gen_not_i64.  */ +        tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1); +    } else { +        TCGv_i64 t0 = tcg_const_i64(arg2); +        tcg_gen_xor_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } +} + +static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, +                                      unsigned c, bool right, bool arith) +{ +    tcg_debug_assert(c < 64); +    if (c == 0) { +        tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); +        tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); +    } else if (c >= 32) { +        c -= 32; +        if (right) { +            if (arith) { +                tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c); +                tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31); +            } else { +                tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c); +                tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +            } +        } else { +            tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c); +            tcg_gen_movi_i32(TCGV_LOW(ret), 0); +        } +    } else { +        TCGv_i32 t0, t1; + +        t0 = tcg_temp_new_i32(); +        t1 = tcg_temp_new_i32(); +        if (right) { +            tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c); +            if (arith) { +                tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c); +            } else { +                tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c); +            } +            tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); +            tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0); +            tcg_gen_mov_i32(TCGV_HIGH(ret), t1); +        } else { +            tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c); +            /* Note: ret can be the same as arg1, so we use t1 */ +            tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c); +            tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); +            tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0); +            tcg_gen_mov_i32(TCGV_LOW(ret), t1); +        } +        tcg_temp_free_i32(t0); +        tcg_temp_free_i32(t1); +    } +} + +void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 64); +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0); +    } else if (arg2 == 0) { +        tcg_gen_mov_i64(ret, arg1); +    } else { +        TCGv_i64 t0 = tcg_const_i64(arg2); +        tcg_gen_shl_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 64); +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0); +    } else if (arg2 == 0) { +        tcg_gen_mov_i64(ret, arg1); +    } else { +        TCGv_i64 t0 = tcg_const_i64(arg2); +        tcg_gen_shr_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 64); +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1); +    } else if (arg2 == 0) { +        tcg_gen_mov_i64(ret, arg1); +    } else { +        TCGv_i64 t0 = tcg_const_i64(arg2); +        tcg_gen_sar_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) +{ +    if (cond == TCG_COND_ALWAYS) { +        tcg_gen_br(l); +    } else if (cond != TCG_COND_NEVER) { +        if (TCG_TARGET_REG_BITS == 32) { +            tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), +                              TCGV_HIGH(arg1), TCGV_LOW(arg2), +                              TCGV_HIGH(arg2), cond, label_arg(l)); +        } else { +            tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, +                              label_arg(l)); +        } +    } +} + +void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l) +{ +    if (cond == TCG_COND_ALWAYS) { +        tcg_gen_br(l); +    } else if (cond != TCG_COND_NEVER) { +        TCGv_i64 t0 = tcg_const_i64(arg2); +        tcg_gen_brcond_i64(cond, arg1, t0, l); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, +                         TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (cond == TCG_COND_ALWAYS) { +        tcg_gen_movi_i64(ret, 1); +    } else if (cond == TCG_COND_NEVER) { +        tcg_gen_movi_i64(ret, 0); +    } else { +        if (TCG_TARGET_REG_BITS == 32) { +            tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret), +                             TCGV_LOW(arg1), TCGV_HIGH(arg1), +                             TCGV_LOW(arg2), TCGV_HIGH(arg2), cond); +            tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +        } else { +            tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond); +        } +    } +} + +void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret, +                          TCGv_i64 arg1, int64_t arg2) +{ +    TCGv_i64 t0 = tcg_const_i64(arg2); +    tcg_gen_setcond_i64(cond, ret, arg1, t0); +    tcg_temp_free_i64(t0); +} + +void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ +    TCGv_i64 t0 = tcg_const_i64(arg2); +    tcg_gen_mul_i64(ret, arg1, t0); +    tcg_temp_free_i64(t0); +} + +void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_HAS_div_i64) { +        tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2); +    } else if (TCG_TARGET_HAS_div2_i64) { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_sari_i64(t0, arg1, 63); +        tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2); +        tcg_temp_free_i64(t0); +    } else { +        gen_helper_div_i64(ret, arg1, arg2); +    } +} + +void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_HAS_rem_i64) { +        tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2); +    } else if (TCG_TARGET_HAS_div_i64) { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2); +        tcg_gen_mul_i64(t0, t0, arg2); +        tcg_gen_sub_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } else if (TCG_TARGET_HAS_div2_i64) { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_sari_i64(t0, arg1, 63); +        tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2); +        tcg_temp_free_i64(t0); +    } else { +        gen_helper_rem_i64(ret, arg1, arg2); +    } +} + +void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_HAS_div_i64) { +        tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2); +    } else if (TCG_TARGET_HAS_div2_i64) { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_movi_i64(t0, 0); +        tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2); +        tcg_temp_free_i64(t0); +    } else { +        gen_helper_divu_i64(ret, arg1, arg2); +    } +} + +void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_HAS_rem_i64) { +        tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); +    } else if (TCG_TARGET_HAS_div_i64) { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2); +        tcg_gen_mul_i64(t0, t0, arg2); +        tcg_gen_sub_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } else if (TCG_TARGET_HAS_div2_i64) { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_movi_i64(t0, 0); +        tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2); +        tcg_temp_free_i64(t0); +    } else { +        gen_helper_remu_i64(ret, arg1, arg2); +    } +} + +void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +    } else if (TCG_TARGET_HAS_ext8s_i64) { +        tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg); +    } else { +        tcg_gen_shli_i64(ret, arg, 56); +        tcg_gen_sari_i64(ret, ret, 56); +    } +} + +void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +    } else if (TCG_TARGET_HAS_ext16s_i64) { +        tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg); +    } else { +        tcg_gen_shli_i64(ret, arg, 48); +        tcg_gen_sari_i64(ret, ret, 48); +    } +} + +void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +    } else if (TCG_TARGET_HAS_ext32s_i64) { +        tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg); +    } else { +        tcg_gen_shli_i64(ret, arg, 32); +        tcg_gen_sari_i64(ret, ret, 32); +    } +} + +void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +        tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +    } else if (TCG_TARGET_HAS_ext8u_i64) { +        tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg); +    } else { +        tcg_gen_andi_i64(ret, arg, 0xffu); +    } +} + +void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +        tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +    } else if (TCG_TARGET_HAS_ext16u_i64) { +        tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg); +    } else { +        tcg_gen_andi_i64(ret, arg, 0xffffu); +    } +} + +void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +        tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +    } else if (TCG_TARGET_HAS_ext32u_i64) { +        tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg); +    } else { +        tcg_gen_andi_i64(ret, arg, 0xffffffffu); +    } +} + +/* Note: we assume the six high bytes are set to zero */ +void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +        tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +    } else if (TCG_TARGET_HAS_bswap16_i64) { +        tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); + +        tcg_gen_ext8u_i64(t0, arg); +        tcg_gen_shli_i64(t0, t0, 8); +        tcg_gen_shri_i64(ret, arg, 8); +        tcg_gen_or_i64(ret, ret, t0); +        tcg_temp_free_i64(t0); +    } +} + +/* Note: we assume the four high bytes are set to zero */ +void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +        tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +    } else if (TCG_TARGET_HAS_bswap32_i64) { +        tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg); +    } else { +        TCGv_i64 t0, t1; +        t0 = tcg_temp_new_i64(); +        t1 = tcg_temp_new_i64(); + +        tcg_gen_shli_i64(t0, arg, 24); +        tcg_gen_ext32u_i64(t0, t0); + +        tcg_gen_andi_i64(t1, arg, 0x0000ff00); +        tcg_gen_shli_i64(t1, t1, 8); +        tcg_gen_or_i64(t0, t0, t1); + +        tcg_gen_shri_i64(t1, arg, 8); +        tcg_gen_andi_i64(t1, t1, 0x0000ff00); +        tcg_gen_or_i64(t0, t0, t1); + +        tcg_gen_shri_i64(t1, arg, 24); +        tcg_gen_or_i64(ret, t0, t1); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        TCGv_i32 t0, t1; +        t0 = tcg_temp_new_i32(); +        t1 = tcg_temp_new_i32(); + +        tcg_gen_bswap32_i32(t0, TCGV_LOW(arg)); +        tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg)); +        tcg_gen_mov_i32(TCGV_LOW(ret), t1); +        tcg_gen_mov_i32(TCGV_HIGH(ret), t0); +        tcg_temp_free_i32(t0); +        tcg_temp_free_i32(t1); +    } else if (TCG_TARGET_HAS_bswap64_i64) { +        tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        TCGv_i64 t1 = tcg_temp_new_i64(); + +        tcg_gen_shli_i64(t0, arg, 56); + +        tcg_gen_andi_i64(t1, arg, 0x0000ff00); +        tcg_gen_shli_i64(t1, t1, 40); +        tcg_gen_or_i64(t0, t0, t1); + +        tcg_gen_andi_i64(t1, arg, 0x00ff0000); +        tcg_gen_shli_i64(t1, t1, 24); +        tcg_gen_or_i64(t0, t0, t1); + +        tcg_gen_andi_i64(t1, arg, 0xff000000); +        tcg_gen_shli_i64(t1, t1, 8); +        tcg_gen_or_i64(t0, t0, t1); + +        tcg_gen_shri_i64(t1, arg, 8); +        tcg_gen_andi_i64(t1, t1, 0xff000000); +        tcg_gen_or_i64(t0, t0, t1); + +        tcg_gen_shri_i64(t1, arg, 24); +        tcg_gen_andi_i64(t1, t1, 0x00ff0000); +        tcg_gen_or_i64(t0, t0, t1); + +        tcg_gen_shri_i64(t1, arg, 40); +        tcg_gen_andi_i64(t1, t1, 0x0000ff00); +        tcg_gen_or_i64(t0, t0, t1); + +        tcg_gen_shri_i64(t1, arg, 56); +        tcg_gen_or_i64(ret, t0, t1); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +        tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); +    } else if (TCG_TARGET_HAS_not_i64) { +        tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg); +    } else { +        tcg_gen_xori_i64(ret, arg, -1); +    } +} + +void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); +        tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +    } else if (TCG_TARGET_HAS_andc_i64) { +        tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_not_i64(t0, arg2); +        tcg_gen_and_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); +        tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +    } else if (TCG_TARGET_HAS_eqv_i64) { +        tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2); +    } else { +        tcg_gen_xor_i64(ret, arg1, arg2); +        tcg_gen_not_i64(ret, ret); +    } +} + +void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); +        tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +    } else if (TCG_TARGET_HAS_nand_i64) { +        tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2); +    } else { +        tcg_gen_and_i64(ret, arg1, arg2); +        tcg_gen_not_i64(ret, ret); +    } +} + +void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); +        tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +    } else if (TCG_TARGET_HAS_nor_i64) { +        tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2); +    } else { +        tcg_gen_or_i64(ret, arg1, arg2); +        tcg_gen_not_i64(ret, ret); +    } +} + +void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); +        tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +    } else if (TCG_TARGET_HAS_orc_i64) { +        tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_not_i64(t0, arg2); +        tcg_gen_or_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_HAS_rot_i64) { +        tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2); +    } else { +        TCGv_i64 t0, t1; +        t0 = tcg_temp_new_i64(); +        t1 = tcg_temp_new_i64(); +        tcg_gen_shl_i64(t0, arg1, arg2); +        tcg_gen_subfi_i64(t1, 64, arg2); +        tcg_gen_shr_i64(t1, arg1, t1); +        tcg_gen_or_i64(ret, t0, t1); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 64); +    /* some cases can be optimized here */ +    if (arg2 == 0) { +        tcg_gen_mov_i64(ret, arg1); +    } else if (TCG_TARGET_HAS_rot_i64) { +        TCGv_i64 t0 = tcg_const_i64(arg2); +        tcg_gen_rotl_i64(ret, arg1, t0); +        tcg_temp_free_i64(t0); +    } else { +        TCGv_i64 t0, t1; +        t0 = tcg_temp_new_i64(); +        t1 = tcg_temp_new_i64(); +        tcg_gen_shli_i64(t0, arg1, arg2); +        tcg_gen_shri_i64(t1, arg1, 64 - arg2); +        tcg_gen_or_i64(ret, t0, t1); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_HAS_rot_i64) { +        tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2); +    } else { +        TCGv_i64 t0, t1; +        t0 = tcg_temp_new_i64(); +        t1 = tcg_temp_new_i64(); +        tcg_gen_shr_i64(t0, arg1, arg2); +        tcg_gen_subfi_i64(t1, 64, arg2); +        tcg_gen_shl_i64(t1, arg1, t1); +        tcg_gen_or_i64(ret, t0, t1); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) +{ +    tcg_debug_assert(arg2 < 64); +    /* some cases can be optimized here */ +    if (arg2 == 0) { +        tcg_gen_mov_i64(ret, arg1); +    } else { +        tcg_gen_rotli_i64(ret, arg1, 64 - arg2); +    } +} + +void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, +                         unsigned int ofs, unsigned int len) +{ +    uint64_t mask; +    TCGv_i64 t1; + +    tcg_debug_assert(ofs < 64); +    tcg_debug_assert(len <= 64); +    tcg_debug_assert(ofs + len <= 64); + +    if (ofs == 0 && len == 64) { +        tcg_gen_mov_i64(ret, arg2); +        return; +    } +    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) { +        tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len); +        return; +    } + +    if (TCG_TARGET_REG_BITS == 32) { +        if (ofs >= 32) { +            tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), +                                TCGV_LOW(arg2), ofs - 32, len); +            tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); +            return; +        } +        if (ofs + len <= 32) { +            tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1), +                                TCGV_LOW(arg2), ofs, len); +            tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); +            return; +        } +    } + +    mask = (1ull << len) - 1; +    t1 = tcg_temp_new_i64(); + +    if (ofs + len < 64) { +        tcg_gen_andi_i64(t1, arg2, mask); +        tcg_gen_shli_i64(t1, t1, ofs); +    } else { +        tcg_gen_shli_i64(t1, arg2, ofs); +    } +    tcg_gen_andi_i64(ret, arg1, ~(mask << ofs)); +    tcg_gen_or_i64(ret, ret, t1); + +    tcg_temp_free_i64(t1); +} + +void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, +                         TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2) +{ +    if (cond == TCG_COND_ALWAYS) { +        tcg_gen_mov_i64(ret, v1); +    } else if (cond == TCG_COND_NEVER) { +        tcg_gen_mov_i64(ret, v2); +    } else if (TCG_TARGET_REG_BITS == 32) { +        TCGv_i32 t0 = tcg_temp_new_i32(); +        TCGv_i32 t1 = tcg_temp_new_i32(); +        tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0, +                         TCGV_LOW(c1), TCGV_HIGH(c1), +                         TCGV_LOW(c2), TCGV_HIGH(c2), cond); + +        if (TCG_TARGET_HAS_movcond_i32) { +            tcg_gen_movi_i32(t1, 0); +            tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1, +                                TCGV_LOW(v1), TCGV_LOW(v2)); +            tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1, +                                TCGV_HIGH(v1), TCGV_HIGH(v2)); +        } else { +            tcg_gen_neg_i32(t0, t0); + +            tcg_gen_and_i32(t1, TCGV_LOW(v1), t0); +            tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0); +            tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1); + +            tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0); +            tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0); +            tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1); +        } +        tcg_temp_free_i32(t0); +        tcg_temp_free_i32(t1); +    } else if (TCG_TARGET_HAS_movcond_i64) { +        tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        TCGv_i64 t1 = tcg_temp_new_i64(); +        tcg_gen_setcond_i64(cond, t0, c1, c2); +        tcg_gen_neg_i64(t0, t0); +        tcg_gen_and_i64(t1, v1, t0); +        tcg_gen_andc_i64(ret, v2, t0); +        tcg_gen_or_i64(ret, ret, t1); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, +                      TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) +{ +    if (TCG_TARGET_HAS_add2_i64) { +        tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        TCGv_i64 t1 = tcg_temp_new_i64(); +        tcg_gen_add_i64(t0, al, bl); +        tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al); +        tcg_gen_add_i64(rh, ah, bh); +        tcg_gen_add_i64(rh, rh, t1); +        tcg_gen_mov_i64(rl, t0); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, +                      TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) +{ +    if (TCG_TARGET_HAS_sub2_i64) { +        tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        TCGv_i64 t1 = tcg_temp_new_i64(); +        tcg_gen_sub_i64(t0, al, bl); +        tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl); +        tcg_gen_sub_i64(rh, ah, bh); +        tcg_gen_sub_i64(rh, rh, t1); +        tcg_gen_mov_i64(rl, t0); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +    } +} + +void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_HAS_mulu2_i64) { +        tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); +    } else if (TCG_TARGET_HAS_muluh_i64) { +        TCGv_i64 t = tcg_temp_new_i64(); +        tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); +        tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); +        tcg_gen_mov_i64(rl, t); +        tcg_temp_free_i64(t); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_mul_i64(t0, arg1, arg2); +        gen_helper_muluh_i64(rh, arg1, arg2); +        tcg_gen_mov_i64(rl, t0); +        tcg_temp_free_i64(t0); +    } +} + +void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    if (TCG_TARGET_HAS_muls2_i64) { +        tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); +    } else if (TCG_TARGET_HAS_mulsh_i64) { +        TCGv_i64 t = tcg_temp_new_i64(); +        tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); +        tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); +        tcg_gen_mov_i64(rl, t); +        tcg_temp_free_i64(t); +    } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        TCGv_i64 t1 = tcg_temp_new_i64(); +        TCGv_i64 t2 = tcg_temp_new_i64(); +        TCGv_i64 t3 = tcg_temp_new_i64(); +        tcg_gen_mulu2_i64(t0, t1, arg1, arg2); +        /* Adjust for negative inputs.  */ +        tcg_gen_sari_i64(t2, arg1, 63); +        tcg_gen_sari_i64(t3, arg2, 63); +        tcg_gen_and_i64(t2, t2, arg2); +        tcg_gen_and_i64(t3, t3, arg1); +        tcg_gen_sub_i64(rh, t1, t2); +        tcg_gen_sub_i64(rh, rh, t3); +        tcg_gen_mov_i64(rl, t0); +        tcg_temp_free_i64(t0); +        tcg_temp_free_i64(t1); +        tcg_temp_free_i64(t2); +        tcg_temp_free_i64(t3); +    } else { +        TCGv_i64 t0 = tcg_temp_new_i64(); +        tcg_gen_mul_i64(t0, arg1, arg2); +        gen_helper_mulsh_i64(rh, arg1, arg2); +        tcg_gen_mov_i64(rl, t0); +        tcg_temp_free_i64(t0); +    } +} + +/* Size changing operations.  */ + +void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count) +{ +    tcg_debug_assert(count < 64); +    if (TCG_TARGET_REG_BITS == 32) { +        if (count >= 32) { +            tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32); +        } else if (count == 0) { +            tcg_gen_mov_i32(ret, TCGV_LOW(arg)); +        } else { +            TCGv_i64 t = tcg_temp_new_i64(); +            tcg_gen_shri_i64(t, arg, count); +            tcg_gen_mov_i32(ret, TCGV_LOW(t)); +            tcg_temp_free_i64(t); +        } +    } else if (TCG_TARGET_HAS_trunc_shr_i32) { +        tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret, +                         MAKE_TCGV_I32(GET_TCGV_I64(arg)), count); +    } else if (count == 0) { +        tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); +    } else { +        TCGv_i64 t = tcg_temp_new_i64(); +        tcg_gen_shri_i64(t, arg, count); +        tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t))); +        tcg_temp_free_i64(t); +    } +} + +void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_mov_i32(TCGV_LOW(ret), arg); +        tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +    } else { +        /* Note: we assume the target supports move between +           32 and 64 bit registers.  */ +        tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); +    } +} + +void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_mov_i32(TCGV_LOW(ret), arg); +        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +    } else { +        /* Note: we assume the target supports move between +           32 and 64 bit registers.  */ +        tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); +    } +} + +void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high) +{ +    TCGv_i64 tmp; + +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_mov_i32(TCGV_LOW(dest), low); +        tcg_gen_mov_i32(TCGV_HIGH(dest), high); +        return; +    } + +    tmp = tcg_temp_new_i64(); +    /* These extensions are only needed for type correctness. +       We may be able to do better given target specific information.  */ +    tcg_gen_extu_i32_i64(tmp, high); +    tcg_gen_extu_i32_i64(dest, low); +    /* If deposit is available, use it.  Otherwise use the extra +       knowledge that we have of the zero-extensions above.  */ +    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) { +        tcg_gen_deposit_i64(dest, dest, tmp, 32, 32); +    } else { +        tcg_gen_shli_i64(tmp, tmp, 32); +        tcg_gen_or_i64(dest, dest, tmp); +    } +    tcg_temp_free_i64(tmp); +} + +void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_mov_i32(lo, TCGV_LOW(arg)); +        tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); +    } else { +        tcg_gen_trunc_shr_i64_i32(lo, arg, 0); +        tcg_gen_trunc_shr_i64_i32(hi, arg, 32); +    } +} + +void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) +{ +    tcg_gen_ext32u_i64(lo, arg); +    tcg_gen_shri_i64(hi, arg, 32); +} + +/* QEMU specific operations.  */ + +void tcg_gen_goto_tb(unsigned idx) +{ +    /* We only support two chained exits.  */ +    tcg_debug_assert(idx <= 1); +#ifdef CONFIG_DEBUG_TCG +    /* Verify that we havn't seen this numbered exit before.  */ +    tcg_debug_assert((tcg_ctx.goto_tb_issue_mask & (1 << idx)) == 0); +    tcg_ctx.goto_tb_issue_mask |= 1 << idx; +#endif +    tcg_gen_op1i(INDEX_op_goto_tb, idx); +} + +static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st) +{ +    switch (op & MO_SIZE) { +    case MO_8: +        op &= ~MO_BSWAP; +        break; +    case MO_16: +        break; +    case MO_32: +        if (!is64) { +            op &= ~MO_SIGN; +        } +        break; +    case MO_64: +        if (!is64) { +            tcg_abort(); +        } +        break; +    } +    if (st) { +        op &= ~MO_SIGN; +    } +    return op; +} + +static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr, +                         TCGMemOp memop, TCGArg idx) +{ +    TCGMemOpIdx oi = make_memop_idx(memop, idx); +#if TARGET_LONG_BITS == 32 +    tcg_gen_op3i_i32(opc, val, addr, oi); +#else +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi); +    } else { +        tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_I64(addr), oi); +    } +#endif +} + +static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr, +                         TCGMemOp memop, TCGArg idx) +{ +    TCGMemOpIdx oi = make_memop_idx(memop, idx); +#if TARGET_LONG_BITS == 32 +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi); +    } else { +        tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_I32(addr), oi); +    } +#else +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), +                         TCGV_LOW(addr), TCGV_HIGH(addr), oi); +    } else { +        tcg_gen_op3i_i64(opc, val, addr, oi); +    } +#endif +} + +void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) +{ +    memop = tcg_canonicalize_memop(memop, 0, 0); +    gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx); +} + +void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) +{ +    memop = tcg_canonicalize_memop(memop, 0, 1); +    gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx); +} + +void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) +{ +    if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { +        tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop); +        if (memop & MO_SIGN) { +            tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31); +        } else { +            tcg_gen_movi_i32(TCGV_HIGH(val), 0); +        } +        return; +    } + +    memop = tcg_canonicalize_memop(memop, 1, 0); +    gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx); +} + +void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) +{ +    if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { +        tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop); +        return; +    } + +    memop = tcg_canonicalize_memop(memop, 1, 1); +    gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx); +} diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h new file mode 100644 index 00000000..d1d763f6 --- /dev/null +++ b/tcg/tcg-op.h @@ -0,0 +1,991 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg.h" +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" + +/* Basic output routines.  Not for general consumption.  */ + +void tcg_gen_op1(TCGContext *, TCGOpcode, TCGArg); +void tcg_gen_op2(TCGContext *, TCGOpcode, TCGArg, TCGArg); +void tcg_gen_op3(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg); +void tcg_gen_op4(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg); +void tcg_gen_op5(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg, +                 TCGArg, TCGArg); +void tcg_gen_op6(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg, +                 TCGArg, TCGArg, TCGArg); + + +static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1) +{ +    tcg_gen_op1(&tcg_ctx, opc, GET_TCGV_I32(a1)); +} + +static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 a1) +{ +    tcg_gen_op1(&tcg_ctx, opc, GET_TCGV_I64(a1)); +} + +static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg a1) +{ +    tcg_gen_op1(&tcg_ctx, opc, a1); +} + +static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2) +{ +    tcg_gen_op2(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2)); +} + +static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2) +{ +    tcg_gen_op2(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2)); +} + +static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 a1, TCGArg a2) +{ +    tcg_gen_op2(&tcg_ctx, opc, GET_TCGV_I32(a1), a2); +} + +static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 a1, TCGArg a2) +{ +    tcg_gen_op2(&tcg_ctx, opc, GET_TCGV_I64(a1), a2); +} + +static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg a1, TCGArg a2) +{ +    tcg_gen_op2(&tcg_ctx, opc, a1, a2); +} + +static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 a1, +                                   TCGv_i32 a2, TCGv_i32 a3) +{ +    tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I32(a1), +                GET_TCGV_I32(a2), GET_TCGV_I32(a3)); +} + +static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 a1, +                                   TCGv_i64 a2, TCGv_i64 a3) +{ +    tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I64(a1), +                GET_TCGV_I64(a2), GET_TCGV_I64(a3)); +} + +static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 a1, +                                    TCGv_i32 a2, TCGArg a3) +{ +    tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), a3); +} + +static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 a1, +                                    TCGv_i64 a2, TCGArg a3) +{ +    tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), a3); +} + +static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, +                                       TCGv_ptr base, TCGArg offset) +{ +    tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_PTR(base), offset); +} + +static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, +                                       TCGv_ptr base, TCGArg offset) +{ +    tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_PTR(base), offset); +} + +static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, +                                   TCGv_i32 a3, TCGv_i32 a4) +{ +    tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), +                GET_TCGV_I32(a3), GET_TCGV_I32(a4)); +} + +static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, +                                   TCGv_i64 a3, TCGv_i64 a4) +{ +    tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), +                GET_TCGV_I64(a3), GET_TCGV_I64(a4)); +} + +static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, +                                    TCGv_i32 a3, TCGArg a4) +{ +    tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), +                GET_TCGV_I32(a3), a4); +} + +static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, +                                    TCGv_i64 a3, TCGArg a4) +{ +    tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), +                GET_TCGV_I64(a3), a4); +} + +static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, +                                     TCGArg a3, TCGArg a4) +{ +    tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), a3, a4); +} + +static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, +                                     TCGArg a3, TCGArg a4) +{ +    tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), a3, a4); +} + +static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, +                                   TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5) +{ +    tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), +                GET_TCGV_I32(a3), GET_TCGV_I32(a4), GET_TCGV_I32(a5)); +} + +static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, +                                   TCGv_i64 a3, TCGv_i64 a4, TCGv_i64 a5) +{ +    tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), +                GET_TCGV_I64(a3), GET_TCGV_I64(a4), GET_TCGV_I64(a5)); +} + +static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, +                                    TCGv_i32 a3, TCGv_i32 a4, TCGArg a5) +{ +    tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), +                GET_TCGV_I32(a3), GET_TCGV_I32(a4), a5); +} + +static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, +                                    TCGv_i64 a3, TCGv_i64 a4, TCGArg a5) +{ +    tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), +                GET_TCGV_I64(a3), GET_TCGV_I64(a4), a5); +} + +static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, +                                     TCGv_i32 a3, TCGArg a4, TCGArg a5) +{ +    tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), +                GET_TCGV_I32(a3), a4, a5); +} + +static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, +                                     TCGv_i64 a3, TCGArg a4, TCGArg a5) +{ +    tcg_gen_op5(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), +                GET_TCGV_I64(a3), a4, a5); +} + +static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, +                                   TCGv_i32 a3, TCGv_i32 a4, +                                   TCGv_i32 a5, TCGv_i32 a6) +{ +    tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), +                GET_TCGV_I32(a3), GET_TCGV_I32(a4), GET_TCGV_I32(a5), +                GET_TCGV_I32(a6)); +} + +static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, +                                   TCGv_i64 a3, TCGv_i64 a4, +                                   TCGv_i64 a5, TCGv_i64 a6) +{ +    tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), +                GET_TCGV_I64(a3), GET_TCGV_I64(a4), GET_TCGV_I64(a5), +                GET_TCGV_I64(a6)); +} + +static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, +                                    TCGv_i32 a3, TCGv_i32 a4, +                                    TCGv_i32 a5, TCGArg a6) +{ +    tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), +                GET_TCGV_I32(a3), GET_TCGV_I32(a4), GET_TCGV_I32(a5), a6); +} + +static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, +                                    TCGv_i64 a3, TCGv_i64 a4, +                                    TCGv_i64 a5, TCGArg a6) +{ +    tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), +                GET_TCGV_I64(a3), GET_TCGV_I64(a4), GET_TCGV_I64(a5), a6); +} + +static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, +                                     TCGv_i32 a3, TCGv_i32 a4, +                                     TCGArg a5, TCGArg a6) +{ +    tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I32(a1), GET_TCGV_I32(a2), +                GET_TCGV_I32(a3), GET_TCGV_I32(a4), a5, a6); +} + +static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, +                                     TCGv_i64 a3, TCGv_i64 a4, +                                     TCGArg a5, TCGArg a6) +{ +    tcg_gen_op6(&tcg_ctx, opc, GET_TCGV_I64(a1), GET_TCGV_I64(a2), +                GET_TCGV_I64(a3), GET_TCGV_I64(a4), a5, a6); +} + + +/* Generic ops.  */ + +static inline void gen_set_label(TCGLabel *l) +{ +    tcg_gen_op1(&tcg_ctx, INDEX_op_set_label, label_arg(l)); +} + +static inline void tcg_gen_br(TCGLabel *l) +{ +    tcg_gen_op1(&tcg_ctx, INDEX_op_br, label_arg(l)); +} + +/* Helper calls. */ + +/* 32 bit ops */ + +void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2); +void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); +void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); +void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); +void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); +void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); +void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); +void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); +void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, +                         unsigned int ofs, unsigned int len); +void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *); +void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *); +void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, +                         TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret, +                          TCGv_i32 arg1, int32_t arg2); +void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, +                         TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2); +void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, +                      TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); +void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, +                      TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); +void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg); + +static inline void tcg_gen_discard_i32(TCGv_i32 arg) +{ +    tcg_gen_op1_i32(INDEX_op_discard, arg); +} + +static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +    if (!TCGV_EQUAL_I32(ret, arg)) { +        tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg); +    } +} + +static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) +{ +    tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg); +} + +static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, +                                    tcg_target_long offset) +{ +    tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, +                                    tcg_target_long offset) +{ +    tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, +                                     tcg_target_long offset) +{ +    tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, +                                     tcg_target_long offset) +{ +    tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, +                                  tcg_target_long offset) +{ +    tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); +} + +static inline void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, +                                   tcg_target_long offset) +{ +    tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, +                                    tcg_target_long offset) +{ +    tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, +                                  tcg_target_long offset) +{ +    tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +    tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_HAS_neg_i32) { +        tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); +    } else { +        tcg_gen_subfi_i32(ret, 0, arg); +    } +} + +static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +    if (TCG_TARGET_HAS_not_i32) { +        tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); +    } else { +        tcg_gen_xori_i32(ret, arg, -1); +    } +} + +/* 64 bit ops */ + +void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2); +void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); +void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); +void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); +void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); +void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); +void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); +void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); +void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, +                         unsigned int ofs, unsigned int len); +void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *); +void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *); +void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, +                         TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret, +                          TCGv_i64 arg1, int64_t arg2); +void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, +                         TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2); +void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, +                      TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); +void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, +                      TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); +void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg); + +#if TCG_TARGET_REG_BITS == 64 +static inline void tcg_gen_discard_i64(TCGv_i64 arg) +{ +    tcg_gen_op1_i64(INDEX_op_discard, arg); +} + +static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (!TCGV_EQUAL_I64(ret, arg)) { +        tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg); +    } +} + +static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) +{ +    tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg); +} + +static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, +                                    tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, +                                    tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, +                                     tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, +                                     tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, +                                     tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, +                                     tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, +                                  tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset); +} + +static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, +                                   tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, +                                    tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, +                                    tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, +                                  tcg_target_long offset) +{ +    tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); +} +#else /* TCG_TARGET_REG_BITS == 32 */ +static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, +                                   tcg_target_long offset) +{ +    tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, +                                    tcg_target_long offset) +{ +    tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, +                                    tcg_target_long offset) +{ +    tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), +                     TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +    tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), +                     TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); +} + +void tcg_gen_discard_i64(TCGv_i64 arg); +void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg); +void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +#endif /* TCG_TARGET_REG_BITS */ + +static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +    if (TCG_TARGET_HAS_neg_i64) { +        tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); +    } else { +        tcg_gen_subfi_i64(ret, 0, arg); +    } +} + +/* Size changing operations.  */ + +void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg); +void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg); +void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high); +void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned int c); +void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg); +void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg); + +static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) +{ +    tcg_gen_deposit_i64(ret, lo, hi, 32, 32); +} + +static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ +    tcg_gen_trunc_shr_i64_i32(ret, arg, 0); +} + +/* QEMU specific operations.  */ + +#ifndef TARGET_LONG_BITS +#error must include QEMU headers +#endif + +/* debug info: write the PC of the corresponding QEMU CPU instruction */ +static inline void tcg_gen_debug_insn_start(uint64_t pc) +{ +    /* XXX: must really use a 32 bit size for TCGArg in all cases */ +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +    tcg_gen_op2ii(INDEX_op_debug_insn_start, +                  (uint32_t)(pc), (uint32_t)(pc >> 32)); +#else +    tcg_gen_op1i(INDEX_op_debug_insn_start, pc); +#endif +} + +static inline void tcg_gen_exit_tb(uintptr_t val) +{ +    tcg_gen_op1i(INDEX_op_exit_tb, val); +} + +void tcg_gen_goto_tb(unsigned idx); + +#if TARGET_LONG_BITS == 32 +#define TCGv TCGv_i32 +#define tcg_temp_new() tcg_temp_new_i32() +#define tcg_global_reg_new tcg_global_reg_new_i32 +#define tcg_global_mem_new tcg_global_mem_new_i32 +#define tcg_temp_local_new() tcg_temp_local_new_i32() +#define tcg_temp_free tcg_temp_free_i32 +#define TCGV_UNUSED(x) TCGV_UNUSED_I32(x) +#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I32(x) +#define TCGV_EQUAL(a, b) TCGV_EQUAL_I32(a, b) +#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32 +#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32 +#else +#define TCGv TCGv_i64 +#define tcg_temp_new() tcg_temp_new_i64() +#define tcg_global_reg_new tcg_global_reg_new_i64 +#define tcg_global_mem_new tcg_global_mem_new_i64 +#define tcg_temp_local_new() tcg_temp_local_new_i64() +#define tcg_temp_free tcg_temp_free_i64 +#define TCGV_UNUSED(x) TCGV_UNUSED_I64(x) +#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I64(x) +#define TCGV_EQUAL(a, b) TCGV_EQUAL_I64(a, b) +#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64 +#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64 +#endif + +void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, TCGMemOp); +void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, TCGMemOp); +void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, TCGMemOp); +void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, TCGMemOp); + +static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_UB); +} + +static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_SB); +} + +static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUW); +} + +static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESW); +} + +static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TEUL); +} + +static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_ld_tl(ret, addr, mem_index, MO_TESL); +} + +static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_ld_i64(ret, addr, mem_index, MO_TEQ); +} + +static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_UB); +} + +static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUW); +} + +static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_st_tl(arg, addr, mem_index, MO_TEUL); +} + +static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) +{ +    tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEQ); +} + +#if TARGET_LONG_BITS == 64 +#define tcg_gen_movi_tl tcg_gen_movi_i64 +#define tcg_gen_mov_tl tcg_gen_mov_i64 +#define tcg_gen_ld8u_tl tcg_gen_ld8u_i64 +#define tcg_gen_ld8s_tl tcg_gen_ld8s_i64 +#define tcg_gen_ld16u_tl tcg_gen_ld16u_i64 +#define tcg_gen_ld16s_tl tcg_gen_ld16s_i64 +#define tcg_gen_ld32u_tl tcg_gen_ld32u_i64 +#define tcg_gen_ld32s_tl tcg_gen_ld32s_i64 +#define tcg_gen_ld_tl tcg_gen_ld_i64 +#define tcg_gen_st8_tl tcg_gen_st8_i64 +#define tcg_gen_st16_tl tcg_gen_st16_i64 +#define tcg_gen_st32_tl tcg_gen_st32_i64 +#define tcg_gen_st_tl tcg_gen_st_i64 +#define tcg_gen_add_tl tcg_gen_add_i64 +#define tcg_gen_addi_tl tcg_gen_addi_i64 +#define tcg_gen_sub_tl tcg_gen_sub_i64 +#define tcg_gen_neg_tl tcg_gen_neg_i64 +#define tcg_gen_subfi_tl tcg_gen_subfi_i64 +#define tcg_gen_subi_tl tcg_gen_subi_i64 +#define tcg_gen_and_tl tcg_gen_and_i64 +#define tcg_gen_andi_tl tcg_gen_andi_i64 +#define tcg_gen_or_tl tcg_gen_or_i64 +#define tcg_gen_ori_tl tcg_gen_ori_i64 +#define tcg_gen_xor_tl tcg_gen_xor_i64 +#define tcg_gen_xori_tl tcg_gen_xori_i64 +#define tcg_gen_not_tl tcg_gen_not_i64 +#define tcg_gen_shl_tl tcg_gen_shl_i64 +#define tcg_gen_shli_tl tcg_gen_shli_i64 +#define tcg_gen_shr_tl tcg_gen_shr_i64 +#define tcg_gen_shri_tl tcg_gen_shri_i64 +#define tcg_gen_sar_tl tcg_gen_sar_i64 +#define tcg_gen_sari_tl tcg_gen_sari_i64 +#define tcg_gen_brcond_tl tcg_gen_brcond_i64 +#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64 +#define tcg_gen_setcond_tl tcg_gen_setcond_i64 +#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64 +#define tcg_gen_mul_tl tcg_gen_mul_i64 +#define tcg_gen_muli_tl tcg_gen_muli_i64 +#define tcg_gen_div_tl tcg_gen_div_i64 +#define tcg_gen_rem_tl tcg_gen_rem_i64 +#define tcg_gen_divu_tl tcg_gen_divu_i64 +#define tcg_gen_remu_tl tcg_gen_remu_i64 +#define tcg_gen_discard_tl tcg_gen_discard_i64 +#define tcg_gen_trunc_tl_i32 tcg_gen_trunc_i64_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_mov_i64 +#define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64 +#define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64 +#define tcg_gen_extu_tl_i64 tcg_gen_mov_i64 +#define tcg_gen_ext_tl_i64 tcg_gen_mov_i64 +#define tcg_gen_ext8u_tl tcg_gen_ext8u_i64 +#define tcg_gen_ext8s_tl tcg_gen_ext8s_i64 +#define tcg_gen_ext16u_tl tcg_gen_ext16u_i64 +#define tcg_gen_ext16s_tl tcg_gen_ext16s_i64 +#define tcg_gen_ext32u_tl tcg_gen_ext32u_i64 +#define tcg_gen_ext32s_tl tcg_gen_ext32s_i64 +#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64 +#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64 +#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64 +#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64 +#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64 +#define tcg_gen_andc_tl tcg_gen_andc_i64 +#define tcg_gen_eqv_tl tcg_gen_eqv_i64 +#define tcg_gen_nand_tl tcg_gen_nand_i64 +#define tcg_gen_nor_tl tcg_gen_nor_i64 +#define tcg_gen_orc_tl tcg_gen_orc_i64 +#define tcg_gen_rotl_tl tcg_gen_rotl_i64 +#define tcg_gen_rotli_tl tcg_gen_rotli_i64 +#define tcg_gen_rotr_tl tcg_gen_rotr_i64 +#define tcg_gen_rotri_tl tcg_gen_rotri_i64 +#define tcg_gen_deposit_tl tcg_gen_deposit_i64 +#define tcg_const_tl tcg_const_i64 +#define tcg_const_local_tl tcg_const_local_i64 +#define tcg_gen_movcond_tl tcg_gen_movcond_i64 +#define tcg_gen_add2_tl tcg_gen_add2_i64 +#define tcg_gen_sub2_tl tcg_gen_sub2_i64 +#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64 +#define tcg_gen_muls2_tl tcg_gen_muls2_i64 +#else +#define tcg_gen_movi_tl tcg_gen_movi_i32 +#define tcg_gen_mov_tl tcg_gen_mov_i32 +#define tcg_gen_ld8u_tl tcg_gen_ld8u_i32 +#define tcg_gen_ld8s_tl tcg_gen_ld8s_i32 +#define tcg_gen_ld16u_tl tcg_gen_ld16u_i32 +#define tcg_gen_ld16s_tl tcg_gen_ld16s_i32 +#define tcg_gen_ld32u_tl tcg_gen_ld_i32 +#define tcg_gen_ld32s_tl tcg_gen_ld_i32 +#define tcg_gen_ld_tl tcg_gen_ld_i32 +#define tcg_gen_st8_tl tcg_gen_st8_i32 +#define tcg_gen_st16_tl tcg_gen_st16_i32 +#define tcg_gen_st32_tl tcg_gen_st_i32 +#define tcg_gen_st_tl tcg_gen_st_i32 +#define tcg_gen_add_tl tcg_gen_add_i32 +#define tcg_gen_addi_tl tcg_gen_addi_i32 +#define tcg_gen_sub_tl tcg_gen_sub_i32 +#define tcg_gen_neg_tl tcg_gen_neg_i32 +#define tcg_gen_subfi_tl tcg_gen_subfi_i32 +#define tcg_gen_subi_tl tcg_gen_subi_i32 +#define tcg_gen_and_tl tcg_gen_and_i32 +#define tcg_gen_andi_tl tcg_gen_andi_i32 +#define tcg_gen_or_tl tcg_gen_or_i32 +#define tcg_gen_ori_tl tcg_gen_ori_i32 +#define tcg_gen_xor_tl tcg_gen_xor_i32 +#define tcg_gen_xori_tl tcg_gen_xori_i32 +#define tcg_gen_not_tl tcg_gen_not_i32 +#define tcg_gen_shl_tl tcg_gen_shl_i32 +#define tcg_gen_shli_tl tcg_gen_shli_i32 +#define tcg_gen_shr_tl tcg_gen_shr_i32 +#define tcg_gen_shri_tl tcg_gen_shri_i32 +#define tcg_gen_sar_tl tcg_gen_sar_i32 +#define tcg_gen_sari_tl tcg_gen_sari_i32 +#define tcg_gen_brcond_tl tcg_gen_brcond_i32 +#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32 +#define tcg_gen_setcond_tl tcg_gen_setcond_i32 +#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32 +#define tcg_gen_mul_tl tcg_gen_mul_i32 +#define tcg_gen_muli_tl tcg_gen_muli_i32 +#define tcg_gen_div_tl tcg_gen_div_i32 +#define tcg_gen_rem_tl tcg_gen_rem_i32 +#define tcg_gen_divu_tl tcg_gen_divu_i32 +#define tcg_gen_remu_tl tcg_gen_remu_i32 +#define tcg_gen_discard_tl tcg_gen_discard_i32 +#define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_trunc_i64_i32 +#define tcg_gen_extu_i32_tl tcg_gen_mov_i32 +#define tcg_gen_ext_i32_tl tcg_gen_mov_i32 +#define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64 +#define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64 +#define tcg_gen_ext8u_tl tcg_gen_ext8u_i32 +#define tcg_gen_ext8s_tl tcg_gen_ext8s_i32 +#define tcg_gen_ext16u_tl tcg_gen_ext16u_i32 +#define tcg_gen_ext16s_tl tcg_gen_ext16s_i32 +#define tcg_gen_ext32u_tl tcg_gen_mov_i32 +#define tcg_gen_ext32s_tl tcg_gen_mov_i32 +#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 +#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 +#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 +#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32 +#define tcg_gen_andc_tl tcg_gen_andc_i32 +#define tcg_gen_eqv_tl tcg_gen_eqv_i32 +#define tcg_gen_nand_tl tcg_gen_nand_i32 +#define tcg_gen_nor_tl tcg_gen_nor_i32 +#define tcg_gen_orc_tl tcg_gen_orc_i32 +#define tcg_gen_rotl_tl tcg_gen_rotl_i32 +#define tcg_gen_rotli_tl tcg_gen_rotli_i32 +#define tcg_gen_rotr_tl tcg_gen_rotr_i32 +#define tcg_gen_rotri_tl tcg_gen_rotri_i32 +#define tcg_gen_deposit_tl tcg_gen_deposit_i32 +#define tcg_const_tl tcg_const_i32 +#define tcg_const_local_tl tcg_const_local_i32 +#define tcg_gen_movcond_tl tcg_gen_movcond_i32 +#define tcg_gen_add2_tl tcg_gen_add2_i32 +#define tcg_gen_sub2_tl tcg_gen_sub2_i32 +#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32 +#define tcg_gen_muls2_tl tcg_gen_muls2_i32 +#endif + +#if UINTPTR_MAX == UINT32_MAX +# define tcg_gen_ld_ptr(R, A, O) \ +    tcg_gen_ld_i32(TCGV_PTR_TO_NAT(R), (A), (O)) +# define tcg_gen_discard_ptr(A) \ +    tcg_gen_discard_i32(TCGV_PTR_TO_NAT(A)) +# define tcg_gen_add_ptr(R, A, B) \ +    tcg_gen_add_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B)) +# define tcg_gen_addi_ptr(R, A, B) \ +    tcg_gen_addi_i32(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B)) +# define tcg_gen_ext_i32_ptr(R, A) \ +    tcg_gen_mov_i32(TCGV_PTR_TO_NAT(R), (A)) +#else +# define tcg_gen_ld_ptr(R, A, O) \ +    tcg_gen_ld_i64(TCGV_PTR_TO_NAT(R), (A), (O)) +# define tcg_gen_discard_ptr(A) \ +    tcg_gen_discard_i64(TCGV_PTR_TO_NAT(A)) +# define tcg_gen_add_ptr(R, A, B) \ +    tcg_gen_add_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), TCGV_PTR_TO_NAT(B)) +# define tcg_gen_addi_ptr(R, A, B) \ +    tcg_gen_addi_i64(TCGV_PTR_TO_NAT(R), TCGV_PTR_TO_NAT(A), (B)) +# define tcg_gen_ext_i32_ptr(R, A) \ +    tcg_gen_ext_i32_i64(TCGV_PTR_TO_NAT(R), (A)) +#endif /* UINTPTR_MAX == UINT32_MAX */ diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h new file mode 100644 index 00000000..13ccb60a --- /dev/null +++ b/tcg/tcg-opc.h @@ -0,0 +1,195 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * DEF(name, oargs, iargs, cargs, flags) + */ + +/* predefined ops */ +DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT) +DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) + +/* variable number of parameters */ +DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) + +DEF(br, 0, 0, 1, TCG_OPF_BB_END) + +#define IMPL(X) (__builtin_constant_p(X) && !(X) ? TCG_OPF_NOT_PRESENT : 0) +#if TCG_TARGET_REG_BITS == 32 +# define IMPL64  TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT +#else +# define IMPL64  TCG_OPF_64BIT +#endif + +DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT) +DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(setcond_i32, 1, 2, 1, 0) +DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32)) +/* load/store */ +DEF(ld8u_i32, 1, 1, 1, 0) +DEF(ld8s_i32, 1, 1, 1, 0) +DEF(ld16u_i32, 1, 1, 1, 0) +DEF(ld16s_i32, 1, 1, 1, 0) +DEF(ld_i32, 1, 1, 1, 0) +DEF(st8_i32, 0, 2, 1, 0) +DEF(st16_i32, 0, 2, 1, 0) +DEF(st_i32, 0, 2, 1, 0) +/* arith */ +DEF(add_i32, 1, 2, 0, 0) +DEF(sub_i32, 1, 2, 0, 0) +DEF(mul_i32, 1, 2, 0, 0) +DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) +DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32)) +DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32)) +DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32)) +DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32)) +DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32)) +DEF(and_i32, 1, 2, 0, 0) +DEF(or_i32, 1, 2, 0, 0) +DEF(xor_i32, 1, 2, 0, 0) +/* shifts/rotates */ +DEF(shl_i32, 1, 2, 0, 0) +DEF(shr_i32, 1, 2, 0, 0) +DEF(sar_i32, 1, 2, 0, 0) +DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) +DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) +DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) + +DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END) + +DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) +DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) +DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) +DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) +DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32)) +DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32)) +DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) +DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) + +DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) +DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32)) +DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32)) +DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32)) +DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32)) +DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32)) +DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32)) +DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32)) +DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32)) +DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32)) +DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32)) +DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32)) +DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32)) + +DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) +DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) +DEF(setcond_i64, 1, 2, 1, IMPL64) +DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64)) +/* load/store */ +DEF(ld8u_i64, 1, 1, 1, IMPL64) +DEF(ld8s_i64, 1, 1, 1, IMPL64) +DEF(ld16u_i64, 1, 1, 1, IMPL64) +DEF(ld16s_i64, 1, 1, 1, IMPL64) +DEF(ld32u_i64, 1, 1, 1, IMPL64) +DEF(ld32s_i64, 1, 1, 1, IMPL64) +DEF(ld_i64, 1, 1, 1, IMPL64) +DEF(st8_i64, 0, 2, 1, IMPL64) +DEF(st16_i64, 0, 2, 1, IMPL64) +DEF(st32_i64, 0, 2, 1, IMPL64) +DEF(st_i64, 0, 2, 1, IMPL64) +/* arith */ +DEF(add_i64, 1, 2, 0, IMPL64) +DEF(sub_i64, 1, 2, 0, IMPL64) +DEF(mul_i64, 1, 2, 0, IMPL64) +DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) +DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64)) +DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64)) +DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64)) +DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64)) +DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64)) +DEF(and_i64, 1, 2, 0, IMPL64) +DEF(or_i64, 1, 2, 0, IMPL64) +DEF(xor_i64, 1, 2, 0, IMPL64) +/* shifts/rotates */ +DEF(shl_i64, 1, 2, 0, IMPL64) +DEF(shr_i64, 1, 2, 0, IMPL64) +DEF(sar_i64, 1, 2, 0, IMPL64) +DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) +DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) +DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) + +DEF(trunc_shr_i32, 1, 1, 1, +    IMPL(TCG_TARGET_HAS_trunc_shr_i32) +    | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) + +DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) +DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64)) +DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64)) +DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64)) +DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64)) +DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64)) +DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64)) +DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) +DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) +DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) +DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64)) +DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64)) +DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64)) +DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64)) +DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64)) +DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64)) +DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64)) + +DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) +DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) +DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64)) +DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) +DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64)) +DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64)) + +/* QEMU specific */ +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +DEF(debug_insn_start, 0, 0, 2, TCG_OPF_NOT_PRESENT) +#else +DEF(debug_insn_start, 0, 0, 1, TCG_OPF_NOT_PRESENT) +#endif +DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) +DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) + +#define TLADDR_ARGS    (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2) +#define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2) + +DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1, +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1, +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1, +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) +DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1, +    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) + +#undef TLADDR_ARGS +#undef DATA64_ARGS +#undef IMPL +#undef IMPL64 +#undef DEF diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h new file mode 100644 index 00000000..23a0c377 --- /dev/null +++ b/tcg/tcg-runtime.h @@ -0,0 +1,16 @@ +DEF_HELPER_FLAGS_2(div_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32) +DEF_HELPER_FLAGS_2(rem_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32) +DEF_HELPER_FLAGS_2(divu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32) +DEF_HELPER_FLAGS_2(remu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32) + +DEF_HELPER_FLAGS_2(div_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_2(rem_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_2(divu_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(remu_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(shl_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(shr_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) + +DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) diff --git a/tcg/tcg.c b/tcg/tcg.c new file mode 100644 index 00000000..0892a9bb --- /dev/null +++ b/tcg/tcg.c @@ -0,0 +1,2764 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* define it to use liveness analysis (better code) */ +#define USE_LIVENESS_ANALYSIS +#define USE_TCG_OPTIMIZATIONS + +#include "config.h" + +/* Define to jump the ELF file used to communicate with GDB.  */ +#undef DEBUG_JIT + +#if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG) +/* define it to suppress various consistency checks (faster) */ +#define NDEBUG +#endif + +#include "qemu-common.h" +#include "qemu/host-utils.h" +#include "qemu/timer.h" + +/* Note: the long term plan is to reduce the dependencies on the QEMU +   CPU definitions. Currently they are used for qemu_ld/st +   instructions */ +#define NO_CPU_IO_DEFS +#include "cpu.h" + +#include "tcg-op.h" + +#if UINTPTR_MAX == UINT32_MAX +# define ELF_CLASS  ELFCLASS32 +#else +# define ELF_CLASS  ELFCLASS64 +#endif +#ifdef HOST_WORDS_BIGENDIAN +# define ELF_DATA   ELFDATA2MSB +#else +# define ELF_DATA   ELFDATA2LSB +#endif + +#include "elf.h" + +/* Forward declarations for functions declared in tcg-target.c and used here. */ +static void tcg_target_init(TCGContext *s); +static void tcg_target_qemu_prologue(TCGContext *s); +static void patch_reloc(tcg_insn_unit *code_ptr, int type, +                        intptr_t value, intptr_t addend); + +/* The CIE and FDE header definitions will be common to all hosts.  */ +typedef struct { +    uint32_t len __attribute__((aligned((sizeof(void *))))); +    uint32_t id; +    uint8_t version; +    char augmentation[1]; +    uint8_t code_align; +    uint8_t data_align; +    uint8_t return_column; +} DebugFrameCIE; + +typedef struct QEMU_PACKED { +    uint32_t len __attribute__((aligned((sizeof(void *))))); +    uint32_t cie_offset; +    uintptr_t func_start; +    uintptr_t func_len; +} DebugFrameFDEHeader; + +typedef struct QEMU_PACKED { +    DebugFrameCIE cie; +    DebugFrameFDEHeader fde; +} DebugFrameHeader; + +static void tcg_register_jit_int(void *buf, size_t size, +                                 const void *debug_frame, +                                 size_t debug_frame_size) +    __attribute__((unused)); + +/* Forward declarations for functions declared and used in tcg-target.c. */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str); +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, +                       intptr_t arg2); +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); +static void tcg_out_movi(TCGContext *s, TCGType type, +                         TCGReg ret, tcg_target_long arg); +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, +                       const int *const_args); +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, +                       intptr_t arg2); +static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); +static int tcg_target_const_match(tcg_target_long val, TCGType type, +                                  const TCGArgConstraint *arg_ct); +static void tcg_out_tb_init(TCGContext *s); +static void tcg_out_tb_finalize(TCGContext *s); + + +TCGOpDef tcg_op_defs[] = { +#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, +#include "tcg-opc.h" +#undef DEF +}; +const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs); + +static TCGRegSet tcg_target_available_regs[2]; +static TCGRegSet tcg_target_call_clobber_regs; + +#if TCG_TARGET_INSN_UNIT_SIZE == 1 +static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v) +{ +    *s->code_ptr++ = v; +} + +static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p, +                                                      uint8_t v) +{ +    *p = v; +} +#endif + +#if TCG_TARGET_INSN_UNIT_SIZE <= 2 +static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v) +{ +    if (TCG_TARGET_INSN_UNIT_SIZE == 2) { +        *s->code_ptr++ = v; +    } else { +        tcg_insn_unit *p = s->code_ptr; +        memcpy(p, &v, sizeof(v)); +        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE); +    } +} + +static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p, +                                                       uint16_t v) +{ +    if (TCG_TARGET_INSN_UNIT_SIZE == 2) { +        *p = v; +    } else { +        memcpy(p, &v, sizeof(v)); +    } +} +#endif + +#if TCG_TARGET_INSN_UNIT_SIZE <= 4 +static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v) +{ +    if (TCG_TARGET_INSN_UNIT_SIZE == 4) { +        *s->code_ptr++ = v; +    } else { +        tcg_insn_unit *p = s->code_ptr; +        memcpy(p, &v, sizeof(v)); +        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE); +    } +} + +static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p, +                                                       uint32_t v) +{ +    if (TCG_TARGET_INSN_UNIT_SIZE == 4) { +        *p = v; +    } else { +        memcpy(p, &v, sizeof(v)); +    } +} +#endif + +#if TCG_TARGET_INSN_UNIT_SIZE <= 8 +static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v) +{ +    if (TCG_TARGET_INSN_UNIT_SIZE == 8) { +        *s->code_ptr++ = v; +    } else { +        tcg_insn_unit *p = s->code_ptr; +        memcpy(p, &v, sizeof(v)); +        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE); +    } +} + +static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, +                                                       uint64_t v) +{ +    if (TCG_TARGET_INSN_UNIT_SIZE == 8) { +        *p = v; +    } else { +        memcpy(p, &v, sizeof(v)); +    } +} +#endif + +/* label relocation processing */ + +static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, +                          TCGLabel *l, intptr_t addend) +{ +    TCGRelocation *r; + +    if (l->has_value) { +        /* FIXME: This may break relocations on RISC targets that +           modify instruction fields in place.  The caller may not have  +           written the initial value.  */ +        patch_reloc(code_ptr, type, l->u.value, addend); +    } else { +        /* add a new relocation entry */ +        r = tcg_malloc(sizeof(TCGRelocation)); +        r->type = type; +        r->ptr = code_ptr; +        r->addend = addend; +        r->next = l->u.first_reloc; +        l->u.first_reloc = r; +    } +} + +static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) +{ +    intptr_t value = (intptr_t)ptr; +    TCGRelocation *r; + +    assert(!l->has_value); + +    for (r = l->u.first_reloc; r != NULL; r = r->next) { +        patch_reloc(r->ptr, r->type, value, r->addend); +    } + +    l->has_value = 1; +    l->u.value_ptr = ptr; +} + +TCGLabel *gen_new_label(void) +{ +    TCGContext *s = &tcg_ctx; +    TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); + +    *l = (TCGLabel){ +        .id = s->nb_labels++ +    }; + +    return l; +} + +#include "tcg-target.c" + +/* pool based memory allocation */ +void *tcg_malloc_internal(TCGContext *s, int size) +{ +    TCGPool *p; +    int pool_size; +     +    if (size > TCG_POOL_CHUNK_SIZE) { +        /* big malloc: insert a new pool (XXX: could optimize) */ +        p = g_malloc(sizeof(TCGPool) + size); +        p->size = size; +        p->next = s->pool_first_large; +        s->pool_first_large = p; +        return p->data; +    } else { +        p = s->pool_current; +        if (!p) { +            p = s->pool_first; +            if (!p) +                goto new_pool; +        } else { +            if (!p->next) { +            new_pool: +                pool_size = TCG_POOL_CHUNK_SIZE; +                p = g_malloc(sizeof(TCGPool) + pool_size); +                p->size = pool_size; +                p->next = NULL; +                if (s->pool_current)  +                    s->pool_current->next = p; +                else +                    s->pool_first = p; +            } else { +                p = p->next; +            } +        } +    } +    s->pool_current = p; +    s->pool_cur = p->data + size; +    s->pool_end = p->data + p->size; +    return p->data; +} + +void tcg_pool_reset(TCGContext *s) +{ +    TCGPool *p, *t; +    for (p = s->pool_first_large; p; p = t) { +        t = p->next; +        g_free(p); +    } +    s->pool_first_large = NULL; +    s->pool_cur = s->pool_end = NULL; +    s->pool_current = NULL; +} + +typedef struct TCGHelperInfo { +    void *func; +    const char *name; +    unsigned flags; +    unsigned sizemask; +} TCGHelperInfo; + +#include "exec/helper-proto.h" + +static const TCGHelperInfo all_helpers[] = { +#include "exec/helper-tcg.h" +}; + +void tcg_context_init(TCGContext *s) +{ +    int op, total_args, n, i; +    TCGOpDef *def; +    TCGArgConstraint *args_ct; +    int *sorted_args; +    GHashTable *helper_table; + +    memset(s, 0, sizeof(*s)); +    s->nb_globals = 0; +     +    /* Count total number of arguments and allocate the corresponding +       space */ +    total_args = 0; +    for(op = 0; op < NB_OPS; op++) { +        def = &tcg_op_defs[op]; +        n = def->nb_iargs + def->nb_oargs; +        total_args += n; +    } + +    args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args); +    sorted_args = g_malloc(sizeof(int) * total_args); + +    for(op = 0; op < NB_OPS; op++) { +        def = &tcg_op_defs[op]; +        def->args_ct = args_ct; +        def->sorted_args = sorted_args; +        n = def->nb_iargs + def->nb_oargs; +        sorted_args += n; +        args_ct += n; +    } + +    /* Register helpers.  */ +    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */ +    s->helpers = helper_table = g_hash_table_new(NULL, NULL); + +    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { +        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, +                            (gpointer)&all_helpers[i]); +    } + +    tcg_target_init(s); +} + +void tcg_prologue_init(TCGContext *s) +{ +    /* init global prologue and epilogue */ +    s->code_buf = s->code_gen_prologue; +    s->code_ptr = s->code_buf; +    tcg_target_qemu_prologue(s); +    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); + +#ifdef DEBUG_DISAS +    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { +        size_t size = tcg_current_code_size(s); +        qemu_log("PROLOGUE: [size=%zu]\n", size); +        log_disas(s->code_buf, size); +        qemu_log("\n"); +        qemu_log_flush(); +    } +#endif +} + +void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) +{ +    s->frame_start = start; +    s->frame_end = start + size; +    s->frame_reg = reg; +} + +void tcg_func_start(TCGContext *s) +{ +    tcg_pool_reset(s); +    s->nb_temps = s->nb_globals; + +    /* No temps have been previously allocated for size or locality.  */ +    memset(s->free_temps, 0, sizeof(s->free_temps)); + +    s->nb_labels = 0; +    s->current_frame_offset = s->frame_start; + +#ifdef CONFIG_DEBUG_TCG +    s->goto_tb_issue_mask = 0; +#endif + +    s->gen_first_op_idx = 0; +    s->gen_last_op_idx = -1; +    s->gen_next_op_idx = 0; +    s->gen_next_parm_idx = 0; + +    s->be = tcg_malloc(sizeof(TCGBackendData)); +} + +static inline void tcg_temp_alloc(TCGContext *s, int n) +{ +    if (n > TCG_MAX_TEMPS) +        tcg_abort(); +} + +static inline int tcg_global_reg_new_internal(TCGType type, int reg, +                                              const char *name) +{ +    TCGContext *s = &tcg_ctx; +    TCGTemp *ts; +    int idx; + +#if TCG_TARGET_REG_BITS == 32 +    if (type != TCG_TYPE_I32) +        tcg_abort(); +#endif +    if (tcg_regset_test_reg(s->reserved_regs, reg)) +        tcg_abort(); +    idx = s->nb_globals; +    tcg_temp_alloc(s, s->nb_globals + 1); +    ts = &s->temps[s->nb_globals]; +    ts->base_type = type; +    ts->type = type; +    ts->fixed_reg = 1; +    ts->reg = reg; +    ts->name = name; +    s->nb_globals++; +    tcg_regset_set_reg(s->reserved_regs, reg); +    return idx; +} + +TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name) +{ +    int idx; + +    idx = tcg_global_reg_new_internal(TCG_TYPE_I32, reg, name); +    return MAKE_TCGV_I32(idx); +} + +TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name) +{ +    int idx; + +    idx = tcg_global_reg_new_internal(TCG_TYPE_I64, reg, name); +    return MAKE_TCGV_I64(idx); +} + +static inline int tcg_global_mem_new_internal(TCGType type, int reg, +                                              intptr_t offset, +                                              const char *name) +{ +    TCGContext *s = &tcg_ctx; +    TCGTemp *ts; +    int idx; + +    idx = s->nb_globals; +#if TCG_TARGET_REG_BITS == 32 +    if (type == TCG_TYPE_I64) { +        char buf[64]; +        tcg_temp_alloc(s, s->nb_globals + 2); +        ts = &s->temps[s->nb_globals]; +        ts->base_type = type; +        ts->type = TCG_TYPE_I32; +        ts->fixed_reg = 0; +        ts->mem_allocated = 1; +        ts->mem_reg = reg; +#ifdef HOST_WORDS_BIGENDIAN +        ts->mem_offset = offset + 4; +#else +        ts->mem_offset = offset; +#endif +        pstrcpy(buf, sizeof(buf), name); +        pstrcat(buf, sizeof(buf), "_0"); +        ts->name = strdup(buf); +        ts++; + +        ts->base_type = type; +        ts->type = TCG_TYPE_I32; +        ts->fixed_reg = 0; +        ts->mem_allocated = 1; +        ts->mem_reg = reg; +#ifdef HOST_WORDS_BIGENDIAN +        ts->mem_offset = offset; +#else +        ts->mem_offset = offset + 4; +#endif +        pstrcpy(buf, sizeof(buf), name); +        pstrcat(buf, sizeof(buf), "_1"); +        ts->name = strdup(buf); + +        s->nb_globals += 2; +    } else +#endif +    { +        tcg_temp_alloc(s, s->nb_globals + 1); +        ts = &s->temps[s->nb_globals]; +        ts->base_type = type; +        ts->type = type; +        ts->fixed_reg = 0; +        ts->mem_allocated = 1; +        ts->mem_reg = reg; +        ts->mem_offset = offset; +        ts->name = name; +        s->nb_globals++; +    } +    return idx; +} + +TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name) +{ +    int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); +    return MAKE_TCGV_I32(idx); +} + +TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name) +{ +    int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); +    return MAKE_TCGV_I64(idx); +} + +static inline int tcg_temp_new_internal(TCGType type, int temp_local) +{ +    TCGContext *s = &tcg_ctx; +    TCGTemp *ts; +    int idx, k; + +    k = type + (temp_local ? TCG_TYPE_COUNT : 0); +    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); +    if (idx < TCG_MAX_TEMPS) { +        /* There is already an available temp with the right type.  */ +        clear_bit(idx, s->free_temps[k].l); + +        ts = &s->temps[idx]; +        ts->temp_allocated = 1; +        assert(ts->base_type == type); +        assert(ts->temp_local == temp_local); +    } else { +        idx = s->nb_temps; +#if TCG_TARGET_REG_BITS == 32 +        if (type == TCG_TYPE_I64) { +            tcg_temp_alloc(s, s->nb_temps + 2); +            ts = &s->temps[s->nb_temps]; +            ts->base_type = type; +            ts->type = TCG_TYPE_I32; +            ts->temp_allocated = 1; +            ts->temp_local = temp_local; +            ts->name = NULL; +            ts++; +            ts->base_type = type; +            ts->type = TCG_TYPE_I32; +            ts->temp_allocated = 1; +            ts->temp_local = temp_local; +            ts->name = NULL; +            s->nb_temps += 2; +        } else +#endif +        { +            tcg_temp_alloc(s, s->nb_temps + 1); +            ts = &s->temps[s->nb_temps]; +            ts->base_type = type; +            ts->type = type; +            ts->temp_allocated = 1; +            ts->temp_local = temp_local; +            ts->name = NULL; +            s->nb_temps++; +        } +    } + +#if defined(CONFIG_DEBUG_TCG) +    s->temps_in_use++; +#endif +    return idx; +} + +TCGv_i32 tcg_temp_new_internal_i32(int temp_local) +{ +    int idx; + +    idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); +    return MAKE_TCGV_I32(idx); +} + +TCGv_i64 tcg_temp_new_internal_i64(int temp_local) +{ +    int idx; + +    idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); +    return MAKE_TCGV_I64(idx); +} + +static void tcg_temp_free_internal(int idx) +{ +    TCGContext *s = &tcg_ctx; +    TCGTemp *ts; +    int k; + +#if defined(CONFIG_DEBUG_TCG) +    s->temps_in_use--; +    if (s->temps_in_use < 0) { +        fprintf(stderr, "More temporaries freed than allocated!\n"); +    } +#endif + +    assert(idx >= s->nb_globals && idx < s->nb_temps); +    ts = &s->temps[idx]; +    assert(ts->temp_allocated != 0); +    ts->temp_allocated = 0; + +    k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0); +    set_bit(idx, s->free_temps[k].l); +} + +void tcg_temp_free_i32(TCGv_i32 arg) +{ +    tcg_temp_free_internal(GET_TCGV_I32(arg)); +} + +void tcg_temp_free_i64(TCGv_i64 arg) +{ +    tcg_temp_free_internal(GET_TCGV_I64(arg)); +} + +TCGv_i32 tcg_const_i32(int32_t val) +{ +    TCGv_i32 t0; +    t0 = tcg_temp_new_i32(); +    tcg_gen_movi_i32(t0, val); +    return t0; +} + +TCGv_i64 tcg_const_i64(int64_t val) +{ +    TCGv_i64 t0; +    t0 = tcg_temp_new_i64(); +    tcg_gen_movi_i64(t0, val); +    return t0; +} + +TCGv_i32 tcg_const_local_i32(int32_t val) +{ +    TCGv_i32 t0; +    t0 = tcg_temp_local_new_i32(); +    tcg_gen_movi_i32(t0, val); +    return t0; +} + +TCGv_i64 tcg_const_local_i64(int64_t val) +{ +    TCGv_i64 t0; +    t0 = tcg_temp_local_new_i64(); +    tcg_gen_movi_i64(t0, val); +    return t0; +} + +#if defined(CONFIG_DEBUG_TCG) +void tcg_clear_temp_count(void) +{ +    TCGContext *s = &tcg_ctx; +    s->temps_in_use = 0; +} + +int tcg_check_temp_count(void) +{ +    TCGContext *s = &tcg_ctx; +    if (s->temps_in_use) { +        /* Clear the count so that we don't give another +         * warning immediately next time around. +         */ +        s->temps_in_use = 0; +        return 1; +    } +    return 0; +} +#endif + +/* Note: we convert the 64 bit args to 32 bit and do some alignment +   and endian swap. Maybe it would be better to do the alignment +   and endian swap in tcg_reg_alloc_call(). */ +void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, +                   int nargs, TCGArg *args) +{ +    int i, real_args, nb_rets, pi, pi_first; +    unsigned sizemask, flags; +    TCGHelperInfo *info; + +    info = g_hash_table_lookup(s->helpers, (gpointer)func); +    flags = info->flags; +    sizemask = info->sizemask; + +#if defined(__sparc__) && !defined(__arch64__) \ +    && !defined(CONFIG_TCG_INTERPRETER) +    /* We have 64-bit values in one register, but need to pass as two +       separate parameters.  Split them.  */ +    int orig_sizemask = sizemask; +    int orig_nargs = nargs; +    TCGv_i64 retl, reth; + +    TCGV_UNUSED_I64(retl); +    TCGV_UNUSED_I64(reth); +    if (sizemask != 0) { +        TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2); +        for (i = real_args = 0; i < nargs; ++i) { +            int is_64bit = sizemask & (1 << (i+1)*2); +            if (is_64bit) { +                TCGv_i64 orig = MAKE_TCGV_I64(args[i]); +                TCGv_i32 h = tcg_temp_new_i32(); +                TCGv_i32 l = tcg_temp_new_i32(); +                tcg_gen_extr_i64_i32(l, h, orig); +                split_args[real_args++] = GET_TCGV_I32(h); +                split_args[real_args++] = GET_TCGV_I32(l); +            } else { +                split_args[real_args++] = args[i]; +            } +        } +        nargs = real_args; +        args = split_args; +        sizemask = 0; +    } +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 +    for (i = 0; i < nargs; ++i) { +        int is_64bit = sizemask & (1 << (i+1)*2); +        int is_signed = sizemask & (2 << (i+1)*2); +        if (!is_64bit) { +            TCGv_i64 temp = tcg_temp_new_i64(); +            TCGv_i64 orig = MAKE_TCGV_I64(args[i]); +            if (is_signed) { +                tcg_gen_ext32s_i64(temp, orig); +            } else { +                tcg_gen_ext32u_i64(temp, orig); +            } +            args[i] = GET_TCGV_I64(temp); +        } +    } +#endif /* TCG_TARGET_EXTEND_ARGS */ + +    pi_first = pi = s->gen_next_parm_idx; +    if (ret != TCG_CALL_DUMMY_ARG) { +#if defined(__sparc__) && !defined(__arch64__) \ +    && !defined(CONFIG_TCG_INTERPRETER) +        if (orig_sizemask & 1) { +            /* The 32-bit ABI is going to return the 64-bit value in +               the %o0/%o1 register pair.  Prepare for this by using +               two return temporaries, and reassemble below.  */ +            retl = tcg_temp_new_i64(); +            reth = tcg_temp_new_i64(); +            s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth); +            s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl); +            nb_rets = 2; +        } else { +            s->gen_opparam_buf[pi++] = ret; +            nb_rets = 1; +        } +#else +        if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { +#ifdef HOST_WORDS_BIGENDIAN +            s->gen_opparam_buf[pi++] = ret + 1; +            s->gen_opparam_buf[pi++] = ret; +#else +            s->gen_opparam_buf[pi++] = ret; +            s->gen_opparam_buf[pi++] = ret + 1; +#endif +            nb_rets = 2; +        } else { +            s->gen_opparam_buf[pi++] = ret; +            nb_rets = 1; +        } +#endif +    } else { +        nb_rets = 0; +    } +    real_args = 0; +    for (i = 0; i < nargs; i++) { +        int is_64bit = sizemask & (1 << (i+1)*2); +        if (TCG_TARGET_REG_BITS < 64 && is_64bit) { +#ifdef TCG_TARGET_CALL_ALIGN_ARGS +            /* some targets want aligned 64 bit args */ +            if (real_args & 1) { +                s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG; +                real_args++; +            } +#endif +	    /* If stack grows up, then we will be placing successive +	       arguments at lower addresses, which means we need to +	       reverse the order compared to how we would normally +	       treat either big or little-endian.  For those arguments +	       that will wind up in registers, this still works for +	       HPPA (the only current STACK_GROWSUP target) since the +	       argument registers are *also* allocated in decreasing +	       order.  If another such target is added, this logic may +	       have to get more complicated to differentiate between +	       stack arguments and register arguments.  */ +#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) +            s->gen_opparam_buf[pi++] = args[i] + 1; +            s->gen_opparam_buf[pi++] = args[i]; +#else +            s->gen_opparam_buf[pi++] = args[i]; +            s->gen_opparam_buf[pi++] = args[i] + 1; +#endif +            real_args += 2; +            continue; +        } + +        s->gen_opparam_buf[pi++] = args[i]; +        real_args++; +    } +    s->gen_opparam_buf[pi++] = (uintptr_t)func; +    s->gen_opparam_buf[pi++] = flags; + +    i = s->gen_next_op_idx; +    tcg_debug_assert(i < OPC_BUF_SIZE); +    tcg_debug_assert(pi <= OPPARAM_BUF_SIZE); + +    /* Set links for sequential allocation during translation.  */ +    s->gen_op_buf[i] = (TCGOp){ +        .opc = INDEX_op_call, +        .callo = nb_rets, +        .calli = real_args, +        .args = pi_first, +        .prev = i - 1, +        .next = i + 1 +    }; + +    /* Make sure the calli field didn't overflow.  */ +    tcg_debug_assert(s->gen_op_buf[i].calli == real_args); + +    s->gen_last_op_idx = i; +    s->gen_next_op_idx = i + 1; +    s->gen_next_parm_idx = pi; + +#if defined(__sparc__) && !defined(__arch64__) \ +    && !defined(CONFIG_TCG_INTERPRETER) +    /* Free all of the parts we allocated above.  */ +    for (i = real_args = 0; i < orig_nargs; ++i) { +        int is_64bit = orig_sizemask & (1 << (i+1)*2); +        if (is_64bit) { +            TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]); +            TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]); +            tcg_temp_free_i32(h); +            tcg_temp_free_i32(l); +        } else { +            real_args++; +        } +    } +    if (orig_sizemask & 1) { +        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them. +           Note that describing these as TCGv_i64 eliminates an unnecessary +           zero-extension that tcg_gen_concat_i32_i64 would create.  */ +        tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth); +        tcg_temp_free_i64(retl); +        tcg_temp_free_i64(reth); +    } +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 +    for (i = 0; i < nargs; ++i) { +        int is_64bit = sizemask & (1 << (i+1)*2); +        if (!is_64bit) { +            TCGv_i64 temp = MAKE_TCGV_I64(args[i]); +            tcg_temp_free_i64(temp); +        } +    } +#endif /* TCG_TARGET_EXTEND_ARGS */ +} + +static void tcg_reg_alloc_start(TCGContext *s) +{ +    int i; +    TCGTemp *ts; +    for(i = 0; i < s->nb_globals; i++) { +        ts = &s->temps[i]; +        if (ts->fixed_reg) { +            ts->val_type = TEMP_VAL_REG; +        } else { +            ts->val_type = TEMP_VAL_MEM; +        } +    } +    for(i = s->nb_globals; i < s->nb_temps; i++) { +        ts = &s->temps[i]; +        if (ts->temp_local) { +            ts->val_type = TEMP_VAL_MEM; +        } else { +            ts->val_type = TEMP_VAL_DEAD; +        } +        ts->mem_allocated = 0; +        ts->fixed_reg = 0; +    } +    for(i = 0; i < TCG_TARGET_NB_REGS; i++) { +        s->reg_to_temp[i] = -1; +    } +} + +static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size, +                                 int idx) +{ +    TCGTemp *ts; + +    assert(idx >= 0 && idx < s->nb_temps); +    ts = &s->temps[idx]; +    if (idx < s->nb_globals) { +        pstrcpy(buf, buf_size, ts->name); +    } else { +        if (ts->temp_local)  +            snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); +        else +            snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); +    } +    return buf; +} + +char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg) +{ +    return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I32(arg)); +} + +char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg) +{ +    return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg)); +} + +/* Find helper name.  */ +static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) +{ +    const char *ret = NULL; +    if (s->helpers) { +        TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val); +        if (info) { +            ret = info->name; +        } +    } +    return ret; +} + +static const char * const cond_name[] = +{ +    [TCG_COND_NEVER] = "never", +    [TCG_COND_ALWAYS] = "always", +    [TCG_COND_EQ] = "eq", +    [TCG_COND_NE] = "ne", +    [TCG_COND_LT] = "lt", +    [TCG_COND_GE] = "ge", +    [TCG_COND_LE] = "le", +    [TCG_COND_GT] = "gt", +    [TCG_COND_LTU] = "ltu", +    [TCG_COND_GEU] = "geu", +    [TCG_COND_LEU] = "leu", +    [TCG_COND_GTU] = "gtu" +}; + +static const char * const ldst_name[] = +{ +    [MO_UB]   = "ub", +    [MO_SB]   = "sb", +    [MO_LEUW] = "leuw", +    [MO_LESW] = "lesw", +    [MO_LEUL] = "leul", +    [MO_LESL] = "lesl", +    [MO_LEQ]  = "leq", +    [MO_BEUW] = "beuw", +    [MO_BESW] = "besw", +    [MO_BEUL] = "beul", +    [MO_BESL] = "besl", +    [MO_BEQ]  = "beq", +}; + +void tcg_dump_ops(TCGContext *s) +{ +    char buf[128]; +    TCGOp *op; +    int oi; + +    for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) { +        int i, k, nb_oargs, nb_iargs, nb_cargs; +        const TCGOpDef *def; +        const TCGArg *args; +        TCGOpcode c; + +        op = &s->gen_op_buf[oi]; +        c = op->opc; +        def = &tcg_op_defs[c]; +        args = &s->gen_opparam_buf[op->args]; + +        if (c == INDEX_op_debug_insn_start) { +            uint64_t pc; +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +            pc = ((uint64_t)args[1] << 32) | args[0]; +#else +            pc = args[0]; +#endif +            if (oi != s->gen_first_op_idx) { +                qemu_log("\n"); +            } +            qemu_log(" ---- 0x%" PRIx64, pc); +        } else if (c == INDEX_op_call) { +            /* variable number of arguments */ +            nb_oargs = op->callo; +            nb_iargs = op->calli; +            nb_cargs = def->nb_cargs; + +            /* function name, flags, out args */ +            qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, +                     tcg_find_helper(s, args[nb_oargs + nb_iargs]), +                     args[nb_oargs + nb_iargs + 1], nb_oargs); +            for (i = 0; i < nb_oargs; i++) { +                qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), +                                                   args[i])); +            } +            for (i = 0; i < nb_iargs; i++) { +                TCGArg arg = args[nb_oargs + i]; +                const char *t = "<dummy>"; +                if (arg != TCG_CALL_DUMMY_ARG) { +                    t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); +                } +                qemu_log(",%s", t); +            } +        } else { +            qemu_log(" %s ", def->name); + +            nb_oargs = def->nb_oargs; +            nb_iargs = def->nb_iargs; +            nb_cargs = def->nb_cargs; + +            k = 0; +            for (i = 0; i < nb_oargs; i++) { +                if (k != 0) { +                    qemu_log(","); +                } +                qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), +                                                   args[k++])); +            } +            for (i = 0; i < nb_iargs; i++) { +                if (k != 0) { +                    qemu_log(","); +                } +                qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), +                                                   args[k++])); +            } +            switch (c) { +            case INDEX_op_brcond_i32: +            case INDEX_op_setcond_i32: +            case INDEX_op_movcond_i32: +            case INDEX_op_brcond2_i32: +            case INDEX_op_setcond2_i32: +            case INDEX_op_brcond_i64: +            case INDEX_op_setcond_i64: +            case INDEX_op_movcond_i64: +                if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { +                    qemu_log(",%s", cond_name[args[k++]]); +                } else { +                    qemu_log(",$0x%" TCG_PRIlx, args[k++]); +                } +                i = 1; +                break; +            case INDEX_op_qemu_ld_i32: +            case INDEX_op_qemu_st_i32: +            case INDEX_op_qemu_ld_i64: +            case INDEX_op_qemu_st_i64: +                { +                    TCGMemOpIdx oi = args[k++]; +                    TCGMemOp op = get_memop(oi); +                    unsigned ix = get_mmuidx(oi); + +                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { +                        qemu_log(",$0x%x,%u", op, ix); +                    } else { +                        const char *s_al = "", *s_op; +                        if (op & MO_AMASK) { +                            if ((op & MO_AMASK) == MO_ALIGN) { +                                s_al = "al+"; +                            } else { +                                s_al = "un+"; +                            } +                        } +                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; +                        qemu_log(",%s%s,%u", s_al, s_op, ix); +                    } +                    i = 1; +                } +                break; +            default: +                i = 0; +                break; +            } +            switch (c) { +            case INDEX_op_set_label: +            case INDEX_op_br: +            case INDEX_op_brcond_i32: +            case INDEX_op_brcond_i64: +            case INDEX_op_brcond2_i32: +                qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id); +                i++, k++; +                break; +            default: +                break; +            } +            for (; i < nb_cargs; i++, k++) { +                qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); +            } +        } +        qemu_log("\n"); +    } +} + +/* we give more priority to constraints with less registers */ +static int get_constraint_priority(const TCGOpDef *def, int k) +{ +    const TCGArgConstraint *arg_ct; + +    int i, n; +    arg_ct = &def->args_ct[k]; +    if (arg_ct->ct & TCG_CT_ALIAS) { +        /* an alias is equivalent to a single register */ +        n = 1; +    } else { +        if (!(arg_ct->ct & TCG_CT_REG)) +            return 0; +        n = 0; +        for(i = 0; i < TCG_TARGET_NB_REGS; i++) { +            if (tcg_regset_test_reg(arg_ct->u.regs, i)) +                n++; +        } +    } +    return TCG_TARGET_NB_REGS - n + 1; +} + +/* sort from highest priority to lowest */ +static void sort_constraints(TCGOpDef *def, int start, int n) +{ +    int i, j, p1, p2, tmp; + +    for(i = 0; i < n; i++) +        def->sorted_args[start + i] = start + i; +    if (n <= 1) +        return; +    for(i = 0; i < n - 1; i++) { +        for(j = i + 1; j < n; j++) { +            p1 = get_constraint_priority(def, def->sorted_args[start + i]); +            p2 = get_constraint_priority(def, def->sorted_args[start + j]); +            if (p1 < p2) { +                tmp = def->sorted_args[start + i]; +                def->sorted_args[start + i] = def->sorted_args[start + j]; +                def->sorted_args[start + j] = tmp; +            } +        } +    } +} + +void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) +{ +    TCGOpcode op; +    TCGOpDef *def; +    const char *ct_str; +    int i, nb_args; + +    for(;;) { +        if (tdefs->op == (TCGOpcode)-1) +            break; +        op = tdefs->op; +        assert((unsigned)op < NB_OPS); +        def = &tcg_op_defs[op]; +#if defined(CONFIG_DEBUG_TCG) +        /* Duplicate entry in op definitions? */ +        assert(!def->used); +        def->used = 1; +#endif +        nb_args = def->nb_iargs + def->nb_oargs; +        for(i = 0; i < nb_args; i++) { +            ct_str = tdefs->args_ct_str[i]; +            /* Incomplete TCGTargetOpDef entry? */ +            assert(ct_str != NULL); +            tcg_regset_clear(def->args_ct[i].u.regs); +            def->args_ct[i].ct = 0; +            if (ct_str[0] >= '0' && ct_str[0] <= '9') { +                int oarg; +                oarg = ct_str[0] - '0'; +                assert(oarg < def->nb_oargs); +                assert(def->args_ct[oarg].ct & TCG_CT_REG); +                /* TCG_CT_ALIAS is for the output arguments. The input +                   argument is tagged with TCG_CT_IALIAS. */ +                def->args_ct[i] = def->args_ct[oarg]; +                def->args_ct[oarg].ct = TCG_CT_ALIAS; +                def->args_ct[oarg].alias_index = i; +                def->args_ct[i].ct |= TCG_CT_IALIAS; +                def->args_ct[i].alias_index = oarg; +            } else { +                for(;;) { +                    if (*ct_str == '\0') +                        break; +                    switch(*ct_str) { +                    case 'i': +                        def->args_ct[i].ct |= TCG_CT_CONST; +                        ct_str++; +                        break; +                    default: +                        if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) { +                            fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n", +                                    ct_str, i, def->name); +                            exit(1); +                        } +                    } +                } +            } +        } + +        /* TCGTargetOpDef entry with too much information? */ +        assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); + +        /* sort the constraints (XXX: this is just an heuristic) */ +        sort_constraints(def, 0, def->nb_oargs); +        sort_constraints(def, def->nb_oargs, def->nb_iargs); + +#if 0 +        { +            int i; + +            printf("%s: sorted=", def->name); +            for(i = 0; i < def->nb_oargs + def->nb_iargs; i++) +                printf(" %d", def->sorted_args[i]); +            printf("\n"); +        } +#endif +        tdefs++; +    } + +#if defined(CONFIG_DEBUG_TCG) +    i = 0; +    for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) { +        const TCGOpDef *def = &tcg_op_defs[op]; +        if (def->flags & TCG_OPF_NOT_PRESENT) { +            /* Wrong entry in op definitions? */ +            if (def->used) { +                fprintf(stderr, "Invalid op definition for %s\n", def->name); +                i = 1; +            } +        } else { +            /* Missing entry in op definitions? */ +            if (!def->used) { +                fprintf(stderr, "Missing op definition for %s\n", def->name); +                i = 1; +            } +        } +    } +    if (i == 1) { +        tcg_abort(); +    } +#endif +} + +void tcg_op_remove(TCGContext *s, TCGOp *op) +{ +    int next = op->next; +    int prev = op->prev; + +    if (next >= 0) { +        s->gen_op_buf[next].prev = prev; +    } else { +        s->gen_last_op_idx = prev; +    } +    if (prev >= 0) { +        s->gen_op_buf[prev].next = next; +    } else { +        s->gen_first_op_idx = next; +    } + +    memset(op, -1, sizeof(*op)); + +#ifdef CONFIG_PROFILER +    s->del_op_count++; +#endif +} + +#ifdef USE_LIVENESS_ANALYSIS +/* liveness analysis: end of function: all temps are dead, and globals +   should be in memory. */ +static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps, +                                   uint8_t *mem_temps) +{ +    memset(dead_temps, 1, s->nb_temps); +    memset(mem_temps, 1, s->nb_globals); +    memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals); +} + +/* liveness analysis: end of basic block: all temps are dead, globals +   and local temps should be in memory. */ +static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, +                                 uint8_t *mem_temps) +{ +    int i; + +    memset(dead_temps, 1, s->nb_temps); +    memset(mem_temps, 1, s->nb_globals); +    for(i = s->nb_globals; i < s->nb_temps; i++) { +        mem_temps[i] = s->temps[i].temp_local; +    } +} + +/* Liveness analysis : update the opc_dead_args array to tell if a +   given input arguments is dead. Instructions updating dead +   temporaries are removed. */ +static void tcg_liveness_analysis(TCGContext *s) +{ +    uint8_t *dead_temps, *mem_temps; +    int oi, oi_prev, nb_ops; + +    nb_ops = s->gen_next_op_idx; +    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); +    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t)); +     +    dead_temps = tcg_malloc(s->nb_temps); +    mem_temps = tcg_malloc(s->nb_temps); +    tcg_la_func_end(s, dead_temps, mem_temps); + +    for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) { +        int i, nb_iargs, nb_oargs; +        TCGOpcode opc_new, opc_new2; +        bool have_opc_new2; +        uint16_t dead_args; +        uint8_t sync_args; +        TCGArg arg; + +        TCGOp * const op = &s->gen_op_buf[oi]; +        TCGArg * const args = &s->gen_opparam_buf[op->args]; +        TCGOpcode opc = op->opc; +        const TCGOpDef *def = &tcg_op_defs[opc]; + +        oi_prev = op->prev; + +        switch (opc) { +        case INDEX_op_call: +            { +                int call_flags; + +                nb_oargs = op->callo; +                nb_iargs = op->calli; +                call_flags = args[nb_oargs + nb_iargs + 1]; + +                /* pure functions can be removed if their result is unused */ +                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { +                    for (i = 0; i < nb_oargs; i++) { +                        arg = args[i]; +                        if (!dead_temps[arg] || mem_temps[arg]) { +                            goto do_not_remove_call; +                        } +                    } +                    goto do_remove; +                } else { +                do_not_remove_call: + +                    /* output args are dead */ +                    dead_args = 0; +                    sync_args = 0; +                    for (i = 0; i < nb_oargs; i++) { +                        arg = args[i]; +                        if (dead_temps[arg]) { +                            dead_args |= (1 << i); +                        } +                        if (mem_temps[arg]) { +                            sync_args |= (1 << i); +                        } +                        dead_temps[arg] = 1; +                        mem_temps[arg] = 0; +                    } + +                    if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { +                        /* globals should be synced to memory */ +                        memset(mem_temps, 1, s->nb_globals); +                    } +                    if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | +                                        TCG_CALL_NO_READ_GLOBALS))) { +                        /* globals should go back to memory */ +                        memset(dead_temps, 1, s->nb_globals); +                    } + +                    /* record arguments that die in this helper */ +                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { +                        arg = args[i]; +                        if (arg != TCG_CALL_DUMMY_ARG) { +                            if (dead_temps[arg]) { +                                dead_args |= (1 << i); +                            } +                        } +                    } +                    /* input arguments are live for preceeding opcodes */ +                    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { +                        arg = args[i]; +                        dead_temps[arg] = 0; +                    } +                    s->op_dead_args[oi] = dead_args; +                    s->op_sync_args[oi] = sync_args; +                } +            } +            break; +        case INDEX_op_debug_insn_start: +            break; +        case INDEX_op_discard: +            /* mark the temporary as dead */ +            dead_temps[args[0]] = 1; +            mem_temps[args[0]] = 0; +            break; + +        case INDEX_op_add2_i32: +            opc_new = INDEX_op_add_i32; +            goto do_addsub2; +        case INDEX_op_sub2_i32: +            opc_new = INDEX_op_sub_i32; +            goto do_addsub2; +        case INDEX_op_add2_i64: +            opc_new = INDEX_op_add_i64; +            goto do_addsub2; +        case INDEX_op_sub2_i64: +            opc_new = INDEX_op_sub_i64; +        do_addsub2: +            nb_iargs = 4; +            nb_oargs = 2; +            /* Test if the high part of the operation is dead, but not +               the low part.  The result can be optimized to a simple +               add or sub.  This happens often for x86_64 guest when the +               cpu mode is set to 32 bit.  */ +            if (dead_temps[args[1]] && !mem_temps[args[1]]) { +                if (dead_temps[args[0]] && !mem_temps[args[0]]) { +                    goto do_remove; +                } +                /* Replace the opcode and adjust the args in place, +                   leaving 3 unused args at the end.  */ +                op->opc = opc = opc_new; +                args[1] = args[2]; +                args[2] = args[4]; +                /* Fall through and mark the single-word operation live.  */ +                nb_iargs = 2; +                nb_oargs = 1; +            } +            goto do_not_remove; + +        case INDEX_op_mulu2_i32: +            opc_new = INDEX_op_mul_i32; +            opc_new2 = INDEX_op_muluh_i32; +            have_opc_new2 = TCG_TARGET_HAS_muluh_i32; +            goto do_mul2; +        case INDEX_op_muls2_i32: +            opc_new = INDEX_op_mul_i32; +            opc_new2 = INDEX_op_mulsh_i32; +            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; +            goto do_mul2; +        case INDEX_op_mulu2_i64: +            opc_new = INDEX_op_mul_i64; +            opc_new2 = INDEX_op_muluh_i64; +            have_opc_new2 = TCG_TARGET_HAS_muluh_i64; +            goto do_mul2; +        case INDEX_op_muls2_i64: +            opc_new = INDEX_op_mul_i64; +            opc_new2 = INDEX_op_mulsh_i64; +            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; +            goto do_mul2; +        do_mul2: +            nb_iargs = 2; +            nb_oargs = 2; +            if (dead_temps[args[1]] && !mem_temps[args[1]]) { +                if (dead_temps[args[0]] && !mem_temps[args[0]]) { +                    /* Both parts of the operation are dead.  */ +                    goto do_remove; +                } +                /* The high part of the operation is dead; generate the low. */ +                op->opc = opc = opc_new; +                args[1] = args[2]; +                args[2] = args[3]; +            } else if (have_opc_new2 && dead_temps[args[0]] +                       && !mem_temps[args[0]]) { +                /* The low part of the operation is dead; generate the high. */ +                op->opc = opc = opc_new2; +                args[0] = args[1]; +                args[1] = args[2]; +                args[2] = args[3]; +            } else { +                goto do_not_remove; +            } +            /* Mark the single-word operation live.  */ +            nb_oargs = 1; +            goto do_not_remove; + +        default: +            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ +            nb_iargs = def->nb_iargs; +            nb_oargs = def->nb_oargs; + +            /* Test if the operation can be removed because all +               its outputs are dead. We assume that nb_oargs == 0 +               implies side effects */ +            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { +                for (i = 0; i < nb_oargs; i++) { +                    arg = args[i]; +                    if (!dead_temps[arg] || mem_temps[arg]) { +                        goto do_not_remove; +                    } +                } +            do_remove: +                tcg_op_remove(s, op); +            } else { +            do_not_remove: +                /* output args are dead */ +                dead_args = 0; +                sync_args = 0; +                for (i = 0; i < nb_oargs; i++) { +                    arg = args[i]; +                    if (dead_temps[arg]) { +                        dead_args |= (1 << i); +                    } +                    if (mem_temps[arg]) { +                        sync_args |= (1 << i); +                    } +                    dead_temps[arg] = 1; +                    mem_temps[arg] = 0; +                } + +                /* if end of basic block, update */ +                if (def->flags & TCG_OPF_BB_END) { +                    tcg_la_bb_end(s, dead_temps, mem_temps); +                } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { +                    /* globals should be synced to memory */ +                    memset(mem_temps, 1, s->nb_globals); +                } + +                /* record arguments that die in this opcode */ +                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { +                    arg = args[i]; +                    if (dead_temps[arg]) { +                        dead_args |= (1 << i); +                    } +                } +                /* input arguments are live for preceeding opcodes */ +                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { +                    arg = args[i]; +                    dead_temps[arg] = 0; +                } +                s->op_dead_args[oi] = dead_args; +                s->op_sync_args[oi] = sync_args; +            } +            break; +        } +    } +} +#else +/* dummy liveness analysis */ +static void tcg_liveness_analysis(TCGContext *s) +{ +    int nb_ops; +    nb_ops = s->gen_opc_ptr - s->gen_opc_buf; + +    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); +    memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t)); +    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t)); +    memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t)); +} +#endif + +#ifndef NDEBUG +static void dump_regs(TCGContext *s) +{ +    TCGTemp *ts; +    int i; +    char buf[64]; + +    for(i = 0; i < s->nb_temps; i++) { +        ts = &s->temps[i]; +        printf("  %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i)); +        switch(ts->val_type) { +        case TEMP_VAL_REG: +            printf("%s", tcg_target_reg_names[ts->reg]); +            break; +        case TEMP_VAL_MEM: +            printf("%d(%s)", (int)ts->mem_offset, tcg_target_reg_names[ts->mem_reg]); +            break; +        case TEMP_VAL_CONST: +            printf("$0x%" TCG_PRIlx, ts->val); +            break; +        case TEMP_VAL_DEAD: +            printf("D"); +            break; +        default: +            printf("???"); +            break; +        } +        printf("\n"); +    } + +    for(i = 0; i < TCG_TARGET_NB_REGS; i++) { +        if (s->reg_to_temp[i] >= 0) { +            printf("%s: %s\n",  +                   tcg_target_reg_names[i],  +                   tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i])); +        } +    } +} + +static void check_regs(TCGContext *s) +{ +    int reg, k; +    TCGTemp *ts; +    char buf[64]; + +    for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { +        k = s->reg_to_temp[reg]; +        if (k >= 0) { +            ts = &s->temps[k]; +            if (ts->val_type != TEMP_VAL_REG || +                ts->reg != reg) { +                printf("Inconsistency for register %s:\n",  +                       tcg_target_reg_names[reg]); +                goto fail; +            } +        } +    } +    for(k = 0; k < s->nb_temps; k++) { +        ts = &s->temps[k]; +        if (ts->val_type == TEMP_VAL_REG && +            !ts->fixed_reg && +            s->reg_to_temp[ts->reg] != k) { +                printf("Inconsistency for temp %s:\n",  +                       tcg_get_arg_str_idx(s, buf, sizeof(buf), k)); +        fail: +                printf("reg state:\n"); +                dump_regs(s); +                tcg_abort(); +        } +    } +} +#endif + +static void temp_allocate_frame(TCGContext *s, int temp) +{ +    TCGTemp *ts; +    ts = &s->temps[temp]; +#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) +    /* Sparc64 stack is accessed with offset of 2047 */ +    s->current_frame_offset = (s->current_frame_offset + +                               (tcg_target_long)sizeof(tcg_target_long) - 1) & +        ~(sizeof(tcg_target_long) - 1); +#endif +    if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > +        s->frame_end) { +        tcg_abort(); +    } +    ts->mem_offset = s->current_frame_offset; +    ts->mem_reg = s->frame_reg; +    ts->mem_allocated = 1; +    s->current_frame_offset += sizeof(tcg_target_long); +} + +/* sync register 'reg' by saving it to the corresponding temporary */ +static inline void tcg_reg_sync(TCGContext *s, int reg) +{ +    TCGTemp *ts; +    int temp; + +    temp = s->reg_to_temp[reg]; +    ts = &s->temps[temp]; +    assert(ts->val_type == TEMP_VAL_REG); +    if (!ts->mem_coherent && !ts->fixed_reg) { +        if (!ts->mem_allocated) { +            temp_allocate_frame(s, temp); +        } +        tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset); +    } +    ts->mem_coherent = 1; +} + +/* free register 'reg' by spilling the corresponding temporary if necessary */ +static void tcg_reg_free(TCGContext *s, int reg) +{ +    int temp; + +    temp = s->reg_to_temp[reg]; +    if (temp != -1) { +        tcg_reg_sync(s, reg); +        s->temps[temp].val_type = TEMP_VAL_MEM; +        s->reg_to_temp[reg] = -1; +    } +} + +/* Allocate a register belonging to reg1 & ~reg2 */ +static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2) +{ +    int i, reg; +    TCGRegSet reg_ct; + +    tcg_regset_andnot(reg_ct, reg1, reg2); + +    /* first try free registers */ +    for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) { +        reg = tcg_target_reg_alloc_order[i]; +        if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1) +            return reg; +    } + +    /* XXX: do better spill choice */ +    for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) { +        reg = tcg_target_reg_alloc_order[i]; +        if (tcg_regset_test_reg(reg_ct, reg)) { +            tcg_reg_free(s, reg); +            return reg; +        } +    } + +    tcg_abort(); +} + +/* mark a temporary as dead. */ +static inline void temp_dead(TCGContext *s, int temp) +{ +    TCGTemp *ts; + +    ts = &s->temps[temp]; +    if (!ts->fixed_reg) { +        if (ts->val_type == TEMP_VAL_REG) { +            s->reg_to_temp[ts->reg] = -1; +        } +        if (temp < s->nb_globals || ts->temp_local) { +            ts->val_type = TEMP_VAL_MEM; +        } else { +            ts->val_type = TEMP_VAL_DEAD; +        } +    } +} + +/* sync a temporary to memory. 'allocated_regs' is used in case a +   temporary registers needs to be allocated to store a constant. */ +static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs) +{ +    TCGTemp *ts; + +    ts = &s->temps[temp]; +    if (!ts->fixed_reg) { +        switch(ts->val_type) { +        case TEMP_VAL_CONST: +            ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], +                                    allocated_regs); +            ts->val_type = TEMP_VAL_REG; +            s->reg_to_temp[ts->reg] = temp; +            ts->mem_coherent = 0; +            tcg_out_movi(s, ts->type, ts->reg, ts->val); +            /* fallthrough*/ +        case TEMP_VAL_REG: +            tcg_reg_sync(s, ts->reg); +            break; +        case TEMP_VAL_DEAD: +        case TEMP_VAL_MEM: +            break; +        default: +            tcg_abort(); +        } +    } +} + +/* save a temporary to memory. 'allocated_regs' is used in case a +   temporary registers needs to be allocated to store a constant. */ +static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs) +{ +#ifdef USE_LIVENESS_ANALYSIS +    /* The liveness analysis already ensures that globals are back +       in memory. Keep an assert for safety. */ +    assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg); +#else +    temp_sync(s, temp, allocated_regs); +    temp_dead(s, temp); +#endif +} + +/* save globals to their canonical location and assume they can be +   modified be the following code. 'allocated_regs' is used in case a +   temporary registers needs to be allocated to store a constant. */ +static void save_globals(TCGContext *s, TCGRegSet allocated_regs) +{ +    int i; + +    for(i = 0; i < s->nb_globals; i++) { +        temp_save(s, i, allocated_regs); +    } +} + +/* sync globals to their canonical location and assume they can be +   read by the following code. 'allocated_regs' is used in case a +   temporary registers needs to be allocated to store a constant. */ +static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) +{ +    int i; + +    for (i = 0; i < s->nb_globals; i++) { +#ifdef USE_LIVENESS_ANALYSIS +        assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg || +               s->temps[i].mem_coherent); +#else +        temp_sync(s, i, allocated_regs); +#endif +    } +} + +/* at the end of a basic block, we assume all temporaries are dead and +   all globals are stored at their canonical location. */ +static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) +{ +    TCGTemp *ts; +    int i; + +    for(i = s->nb_globals; i < s->nb_temps; i++) { +        ts = &s->temps[i]; +        if (ts->temp_local) { +            temp_save(s, i, allocated_regs); +        } else { +#ifdef USE_LIVENESS_ANALYSIS +            /* The liveness analysis already ensures that temps are dead. +               Keep an assert for safety. */ +            assert(ts->val_type == TEMP_VAL_DEAD); +#else +            temp_dead(s, i); +#endif +        } +    } + +    save_globals(s, allocated_regs); +} + +#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1) +#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1) + +static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, +                               uint16_t dead_args, uint8_t sync_args) +{ +    TCGTemp *ots; +    tcg_target_ulong val; + +    ots = &s->temps[args[0]]; +    val = args[1]; + +    if (ots->fixed_reg) { +        /* for fixed registers, we do not do any constant +           propagation */ +        tcg_out_movi(s, ots->type, ots->reg, val); +    } else { +        /* The movi is not explicitly generated here */ +        if (ots->val_type == TEMP_VAL_REG) +            s->reg_to_temp[ots->reg] = -1; +        ots->val_type = TEMP_VAL_CONST; +        ots->val = val; +    } +    if (NEED_SYNC_ARG(0)) { +        temp_sync(s, args[0], s->reserved_regs); +    } +    if (IS_DEAD_ARG(0)) { +        temp_dead(s, args[0]); +    } +} + +static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, +                              const TCGArg *args, uint16_t dead_args, +                              uint8_t sync_args) +{ +    TCGRegSet allocated_regs; +    TCGTemp *ts, *ots; +    TCGType otype, itype; + +    tcg_regset_set(allocated_regs, s->reserved_regs); +    ots = &s->temps[args[0]]; +    ts = &s->temps[args[1]]; + +    /* Note that otype != itype for no-op truncation.  */ +    otype = ots->type; +    itype = ts->type; + +    /* If the source value is not in a register, and we're going to be +       forced to have it in a register in order to perform the copy, +       then copy the SOURCE value into its own register first.  That way +       we don't have to reload SOURCE the next time it is used. */ +    if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG) +        || ts->val_type == TEMP_VAL_MEM) { +        ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[itype], +                                allocated_regs); +        if (ts->val_type == TEMP_VAL_MEM) { +            tcg_out_ld(s, itype, ts->reg, ts->mem_reg, ts->mem_offset); +            ts->mem_coherent = 1; +        } else if (ts->val_type == TEMP_VAL_CONST) { +            tcg_out_movi(s, itype, ts->reg, ts->val); +            ts->mem_coherent = 0; +        } +        s->reg_to_temp[ts->reg] = args[1]; +        ts->val_type = TEMP_VAL_REG; +    } + +    if (IS_DEAD_ARG(0) && !ots->fixed_reg) { +        /* mov to a non-saved dead register makes no sense (even with +           liveness analysis disabled). */ +        assert(NEED_SYNC_ARG(0)); +        /* The code above should have moved the temp to a register. */ +        assert(ts->val_type == TEMP_VAL_REG); +        if (!ots->mem_allocated) { +            temp_allocate_frame(s, args[0]); +        } +        tcg_out_st(s, otype, ts->reg, ots->mem_reg, ots->mem_offset); +        if (IS_DEAD_ARG(1)) { +            temp_dead(s, args[1]); +        } +        temp_dead(s, args[0]); +    } else if (ts->val_type == TEMP_VAL_CONST) { +        /* propagate constant */ +        if (ots->val_type == TEMP_VAL_REG) { +            s->reg_to_temp[ots->reg] = -1; +        } +        ots->val_type = TEMP_VAL_CONST; +        ots->val = ts->val; +        if (IS_DEAD_ARG(1)) { +            temp_dead(s, args[1]); +        } +    } else { +        /* The code in the first if block should have moved the +           temp to a register. */ +        assert(ts->val_type == TEMP_VAL_REG); +        if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { +            /* the mov can be suppressed */ +            if (ots->val_type == TEMP_VAL_REG) { +                s->reg_to_temp[ots->reg] = -1; +            } +            ots->reg = ts->reg; +            temp_dead(s, args[1]); +        } else { +            if (ots->val_type != TEMP_VAL_REG) { +                /* When allocating a new register, make sure to not spill the +                   input one. */ +                tcg_regset_set_reg(allocated_regs, ts->reg); +                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], +                                         allocated_regs); +            } +            tcg_out_mov(s, otype, ots->reg, ts->reg); +        } +        ots->val_type = TEMP_VAL_REG; +        ots->mem_coherent = 0; +        s->reg_to_temp[ots->reg] = args[0]; +        if (NEED_SYNC_ARG(0)) { +            tcg_reg_sync(s, ots->reg); +        } +    } +} + +static void tcg_reg_alloc_op(TCGContext *s,  +                             const TCGOpDef *def, TCGOpcode opc, +                             const TCGArg *args, uint16_t dead_args, +                             uint8_t sync_args) +{ +    TCGRegSet allocated_regs; +    int i, k, nb_iargs, nb_oargs, reg; +    TCGArg arg; +    const TCGArgConstraint *arg_ct; +    TCGTemp *ts; +    TCGArg new_args[TCG_MAX_OP_ARGS]; +    int const_args[TCG_MAX_OP_ARGS]; + +    nb_oargs = def->nb_oargs; +    nb_iargs = def->nb_iargs; + +    /* copy constants */ +    memcpy(new_args + nb_oargs + nb_iargs,  +           args + nb_oargs + nb_iargs,  +           sizeof(TCGArg) * def->nb_cargs); + +    /* satisfy input constraints */  +    tcg_regset_set(allocated_regs, s->reserved_regs); +    for(k = 0; k < nb_iargs; k++) { +        i = def->sorted_args[nb_oargs + k]; +        arg = args[i]; +        arg_ct = &def->args_ct[i]; +        ts = &s->temps[arg]; +        if (ts->val_type == TEMP_VAL_MEM) { +            reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); +            tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); +            ts->val_type = TEMP_VAL_REG; +            ts->reg = reg; +            ts->mem_coherent = 1; +            s->reg_to_temp[reg] = arg; +        } else if (ts->val_type == TEMP_VAL_CONST) { +            if (tcg_target_const_match(ts->val, ts->type, arg_ct)) { +                /* constant is OK for instruction */ +                const_args[i] = 1; +                new_args[i] = ts->val; +                goto iarg_end; +            } else { +                /* need to move to a register */ +                reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); +                tcg_out_movi(s, ts->type, reg, ts->val); +                ts->val_type = TEMP_VAL_REG; +                ts->reg = reg; +                ts->mem_coherent = 0; +                s->reg_to_temp[reg] = arg; +            } +        } +        assert(ts->val_type == TEMP_VAL_REG); +        if (arg_ct->ct & TCG_CT_IALIAS) { +            if (ts->fixed_reg) { +                /* if fixed register, we must allocate a new register +                   if the alias is not the same register */ +                if (arg != args[arg_ct->alias_index]) +                    goto allocate_in_reg; +            } else { +                /* if the input is aliased to an output and if it is +                   not dead after the instruction, we must allocate +                   a new register and move it */ +                if (!IS_DEAD_ARG(i)) { +                    goto allocate_in_reg; +                } +                /* check if the current register has already been allocated +                   for another input aliased to an output */ +                int k2, i2; +                for (k2 = 0 ; k2 < k ; k2++) { +                    i2 = def->sorted_args[nb_oargs + k2]; +                    if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && +                        (new_args[i2] == ts->reg)) { +                        goto allocate_in_reg; +                    } +                } +            } +        } +        reg = ts->reg; +        if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { +            /* nothing to do : the constraint is satisfied */ +        } else { +        allocate_in_reg: +            /* allocate a new register matching the constraint  +               and move the temporary register into it */ +            reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); +            tcg_out_mov(s, ts->type, reg, ts->reg); +        } +        new_args[i] = reg; +        const_args[i] = 0; +        tcg_regset_set_reg(allocated_regs, reg); +    iarg_end: ; +    } +     +    /* mark dead temporaries and free the associated registers */ +    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { +        if (IS_DEAD_ARG(i)) { +            temp_dead(s, args[i]); +        } +    } + +    if (def->flags & TCG_OPF_BB_END) { +        tcg_reg_alloc_bb_end(s, allocated_regs); +    } else { +        if (def->flags & TCG_OPF_CALL_CLOBBER) { +            /* XXX: permit generic clobber register list ? */  +            for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { +                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { +                    tcg_reg_free(s, reg); +                } +            } +        } +        if (def->flags & TCG_OPF_SIDE_EFFECTS) { +            /* sync globals if the op has side effects and might trigger +               an exception. */ +            sync_globals(s, allocated_regs); +        } +         +        /* satisfy the output constraints */ +        tcg_regset_set(allocated_regs, s->reserved_regs); +        for(k = 0; k < nb_oargs; k++) { +            i = def->sorted_args[k]; +            arg = args[i]; +            arg_ct = &def->args_ct[i]; +            ts = &s->temps[arg]; +            if (arg_ct->ct & TCG_CT_ALIAS) { +                reg = new_args[arg_ct->alias_index]; +            } else { +                /* if fixed register, we try to use it */ +                reg = ts->reg; +                if (ts->fixed_reg && +                    tcg_regset_test_reg(arg_ct->u.regs, reg)) { +                    goto oarg_end; +                } +                reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); +            } +            tcg_regset_set_reg(allocated_regs, reg); +            /* if a fixed register is used, then a move will be done afterwards */ +            if (!ts->fixed_reg) { +                if (ts->val_type == TEMP_VAL_REG) { +                    s->reg_to_temp[ts->reg] = -1; +                } +                ts->val_type = TEMP_VAL_REG; +                ts->reg = reg; +                /* temp value is modified, so the value kept in memory is +                   potentially not the same */ +                ts->mem_coherent = 0; +                s->reg_to_temp[reg] = arg; +            } +        oarg_end: +            new_args[i] = reg; +        } +    } + +    /* emit instruction */ +    tcg_out_op(s, opc, new_args, const_args); +     +    /* move the outputs in the correct register if needed */ +    for(i = 0; i < nb_oargs; i++) { +        ts = &s->temps[args[i]]; +        reg = new_args[i]; +        if (ts->fixed_reg && ts->reg != reg) { +            tcg_out_mov(s, ts->type, ts->reg, reg); +        } +        if (NEED_SYNC_ARG(i)) { +            tcg_reg_sync(s, reg); +        } +        if (IS_DEAD_ARG(i)) { +            temp_dead(s, args[i]); +        } +    } +} + +#ifdef TCG_TARGET_STACK_GROWSUP +#define STACK_DIR(x) (-(x)) +#else +#define STACK_DIR(x) (x) +#endif + +static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, +                               const TCGArg * const args, uint16_t dead_args, +                               uint8_t sync_args) +{ +    int flags, nb_regs, i, reg; +    TCGArg arg; +    TCGTemp *ts; +    intptr_t stack_offset; +    size_t call_stack_size; +    tcg_insn_unit *func_addr; +    int allocate_args; +    TCGRegSet allocated_regs; + +    func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs]; +    flags = args[nb_oargs + nb_iargs + 1]; + +    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); +    if (nb_regs > nb_iargs) { +        nb_regs = nb_iargs; +    } + +    /* assign stack slots first */ +    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); +    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &  +        ~(TCG_TARGET_STACK_ALIGN - 1); +    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); +    if (allocate_args) { +        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, +           preallocate call stack */ +        tcg_abort(); +    } + +    stack_offset = TCG_TARGET_CALL_STACK_OFFSET; +    for(i = nb_regs; i < nb_iargs; i++) { +        arg = args[nb_oargs + i]; +#ifdef TCG_TARGET_STACK_GROWSUP +        stack_offset -= sizeof(tcg_target_long); +#endif +        if (arg != TCG_CALL_DUMMY_ARG) { +            ts = &s->temps[arg]; +            if (ts->val_type == TEMP_VAL_REG) { +                tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); +            } else if (ts->val_type == TEMP_VAL_MEM) { +                reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],  +                                    s->reserved_regs); +                /* XXX: not correct if reading values from the stack */ +                tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); +                tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset); +            } else if (ts->val_type == TEMP_VAL_CONST) { +                reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],  +                                    s->reserved_regs); +                /* XXX: sign extend may be needed on some targets */ +                tcg_out_movi(s, ts->type, reg, ts->val); +                tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset); +            } else { +                tcg_abort(); +            } +        } +#ifndef TCG_TARGET_STACK_GROWSUP +        stack_offset += sizeof(tcg_target_long); +#endif +    } +     +    /* assign input registers */ +    tcg_regset_set(allocated_regs, s->reserved_regs); +    for(i = 0; i < nb_regs; i++) { +        arg = args[nb_oargs + i]; +        if (arg != TCG_CALL_DUMMY_ARG) { +            ts = &s->temps[arg]; +            reg = tcg_target_call_iarg_regs[i]; +            tcg_reg_free(s, reg); +            if (ts->val_type == TEMP_VAL_REG) { +                if (ts->reg != reg) { +                    tcg_out_mov(s, ts->type, reg, ts->reg); +                } +            } else if (ts->val_type == TEMP_VAL_MEM) { +                tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); +            } else if (ts->val_type == TEMP_VAL_CONST) { +                /* XXX: sign extend ? */ +                tcg_out_movi(s, ts->type, reg, ts->val); +            } else { +                tcg_abort(); +            } +            tcg_regset_set_reg(allocated_regs, reg); +        } +    } +     +    /* mark dead temporaries and free the associated registers */ +    for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { +        if (IS_DEAD_ARG(i)) { +            temp_dead(s, args[i]); +        } +    } +     +    /* clobber call registers */ +    for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { +        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { +            tcg_reg_free(s, reg); +        } +    } + +    /* Save globals if they might be written by the helper, sync them if +       they might be read. */ +    if (flags & TCG_CALL_NO_READ_GLOBALS) { +        /* Nothing to do */ +    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { +        sync_globals(s, allocated_regs); +    } else { +        save_globals(s, allocated_regs); +    } + +    tcg_out_call(s, func_addr); + +    /* assign output registers and emit moves if needed */ +    for(i = 0; i < nb_oargs; i++) { +        arg = args[i]; +        ts = &s->temps[arg]; +        reg = tcg_target_call_oarg_regs[i]; +        assert(s->reg_to_temp[reg] == -1); + +        if (ts->fixed_reg) { +            if (ts->reg != reg) { +                tcg_out_mov(s, ts->type, ts->reg, reg); +            } +        } else { +            if (ts->val_type == TEMP_VAL_REG) { +                s->reg_to_temp[ts->reg] = -1; +            } +            ts->val_type = TEMP_VAL_REG; +            ts->reg = reg; +            ts->mem_coherent = 0; +            s->reg_to_temp[reg] = arg; +            if (NEED_SYNC_ARG(i)) { +                tcg_reg_sync(s, reg); +            } +            if (IS_DEAD_ARG(i)) { +                temp_dead(s, args[i]); +            } +        } +    } +} + +#ifdef CONFIG_PROFILER + +static int64_t tcg_table_op_count[NB_OPS]; + +void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) +{ +    int i; + +    for (i = 0; i < NB_OPS; i++) { +        cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, +                    tcg_table_op_count[i]); +    } +} +#else +void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) +{ +    cpu_fprintf(f, "[TCG profiler not compiled]\n"); +} +#endif + + +static inline int tcg_gen_code_common(TCGContext *s, +                                      tcg_insn_unit *gen_code_buf, +                                      long search_pc) +{ +    int oi, oi_next; + +#ifdef DEBUG_DISAS +    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) { +        qemu_log("OP:\n"); +        tcg_dump_ops(s); +        qemu_log("\n"); +    } +#endif + +#ifdef CONFIG_PROFILER +    s->opt_time -= profile_getclock(); +#endif + +#ifdef USE_TCG_OPTIMIZATIONS +    tcg_optimize(s); +#endif + +#ifdef CONFIG_PROFILER +    s->opt_time += profile_getclock(); +    s->la_time -= profile_getclock(); +#endif + +    tcg_liveness_analysis(s); + +#ifdef CONFIG_PROFILER +    s->la_time += profile_getclock(); +#endif + +#ifdef DEBUG_DISAS +    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) { +        qemu_log("OP after optimization and liveness analysis:\n"); +        tcg_dump_ops(s); +        qemu_log("\n"); +    } +#endif + +    tcg_reg_alloc_start(s); + +    s->code_buf = gen_code_buf; +    s->code_ptr = gen_code_buf; + +    tcg_out_tb_init(s); + +    for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { +        TCGOp * const op = &s->gen_op_buf[oi]; +        TCGArg * const args = &s->gen_opparam_buf[op->args]; +        TCGOpcode opc = op->opc; +        const TCGOpDef *def = &tcg_op_defs[opc]; +        uint16_t dead_args = s->op_dead_args[oi]; +        uint8_t sync_args = s->op_sync_args[oi]; + +        oi_next = op->next; +#ifdef CONFIG_PROFILER +        tcg_table_op_count[opc]++; +#endif + +        switch (opc) { +        case INDEX_op_mov_i32: +        case INDEX_op_mov_i64: +            tcg_reg_alloc_mov(s, def, args, dead_args, sync_args); +            break; +        case INDEX_op_movi_i32: +        case INDEX_op_movi_i64: +            tcg_reg_alloc_movi(s, args, dead_args, sync_args); +            break; +        case INDEX_op_debug_insn_start: +            break; +        case INDEX_op_discard: +            temp_dead(s, args[0]); +            break; +        case INDEX_op_set_label: +            tcg_reg_alloc_bb_end(s, s->reserved_regs); +            tcg_out_label(s, arg_label(args[0]), s->code_ptr); +            break; +        case INDEX_op_call: +            tcg_reg_alloc_call(s, op->callo, op->calli, args, +                               dead_args, sync_args); +            break; +        default: +            /* Sanity check that we've not introduced any unhandled opcodes. */ +            if (def->flags & TCG_OPF_NOT_PRESENT) { +                tcg_abort(); +            } +            /* Note: in order to speed up the code, it would be much +               faster to have specialized register allocator functions for +               some common argument patterns */ +            tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args); +            break; +        } +        if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) { +            return oi; +        } +#ifndef NDEBUG +        check_regs(s); +#endif +    } + +    /* Generate TB finalization at the end of block */ +    tcg_out_tb_finalize(s); +    return -1; +} + +int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) +{ +#ifdef CONFIG_PROFILER +    { +        int n; + +        n = s->gen_last_op_idx + 1; +        s->op_count += n; +        if (n > s->op_count_max) { +            s->op_count_max = n; +        } + +        n = s->nb_temps; +        s->temp_count += n; +        if (n > s->temp_count_max) { +            s->temp_count_max = n; +        } +    } +#endif + +    tcg_gen_code_common(s, gen_code_buf, -1); + +    /* flush instruction cache */ +    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); + +    return tcg_current_code_size(s); +} + +/* Return the index of the micro operation such as the pc after is < +   offset bytes from the start of the TB.  The contents of gen_code_buf must +   not be changed, though writing the same values is ok. +   Return -1 if not found. */ +int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf, +                           long offset) +{ +    return tcg_gen_code_common(s, gen_code_buf, offset); +} + +#ifdef CONFIG_PROFILER +void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) +{ +    TCGContext *s = &tcg_ctx; +    int64_t tot; + +    tot = s->interm_time + s->code_time; +    cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n", +                tot, tot / 2.4e9); +    cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",  +                s->tb_count,  +                s->tb_count1 - s->tb_count, +                s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0); +    cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",  +                s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max); +    cpu_fprintf(f, "deleted ops/TB      %0.2f\n", +                s->tb_count ?  +                (double)s->del_op_count / s->tb_count : 0); +    cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n", +                s->tb_count ?  +                (double)s->temp_count / s->tb_count : 0, +                s->temp_count_max); +     +    cpu_fprintf(f, "cycles/op           %0.1f\n",  +                s->op_count ? (double)tot / s->op_count : 0); +    cpu_fprintf(f, "cycles/in byte      %0.1f\n",  +                s->code_in_len ? (double)tot / s->code_in_len : 0); +    cpu_fprintf(f, "cycles/out byte     %0.1f\n",  +                s->code_out_len ? (double)tot / s->code_out_len : 0); +    if (tot == 0) +        tot = 1; +    cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",  +                (double)s->interm_time / tot * 100.0); +    cpu_fprintf(f, "  gen_code time     %0.1f%%\n",  +                (double)s->code_time / tot * 100.0); +    cpu_fprintf(f, "optim./code time    %0.1f%%\n", +                (double)s->opt_time / (s->code_time ? s->code_time : 1) +                * 100.0); +    cpu_fprintf(f, "liveness/code time  %0.1f%%\n",  +                (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); +    cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n", +                s->restore_count); +    cpu_fprintf(f, "  avg cycles        %0.1f\n", +                s->restore_count ? (double)s->restore_time / s->restore_count : 0); +} +#else +void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) +{ +    cpu_fprintf(f, "[TCG profiler not compiled]\n"); +} +#endif + +#ifdef ELF_HOST_MACHINE +/* In order to use this feature, the backend needs to do three things: + +   (1) Define ELF_HOST_MACHINE to indicate both what value to +       put into the ELF image and to indicate support for the feature. + +   (2) Define tcg_register_jit.  This should create a buffer containing +       the contents of a .debug_frame section that describes the post- +       prologue unwind info for the tcg machine. + +   (3) Call tcg_register_jit_int, with the constructed .debug_frame. +*/ + +/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */ +typedef enum { +    JIT_NOACTION = 0, +    JIT_REGISTER_FN, +    JIT_UNREGISTER_FN +} jit_actions_t; + +struct jit_code_entry { +    struct jit_code_entry *next_entry; +    struct jit_code_entry *prev_entry; +    const void *symfile_addr; +    uint64_t symfile_size; +}; + +struct jit_descriptor { +    uint32_t version; +    uint32_t action_flag; +    struct jit_code_entry *relevant_entry; +    struct jit_code_entry *first_entry; +}; + +void __jit_debug_register_code(void) __attribute__((noinline)); +void __jit_debug_register_code(void) +{ +    asm(""); +} + +/* Must statically initialize the version, because GDB may check +   the version before we can set it.  */ +struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; + +/* End GDB interface.  */ + +static int find_string(const char *strtab, const char *str) +{ +    const char *p = strtab + 1; + +    while (1) { +        if (strcmp(p, str) == 0) { +            return p - strtab; +        } +        p += strlen(p) + 1; +    } +} + +static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, +                                 const void *debug_frame, +                                 size_t debug_frame_size) +{ +    struct __attribute__((packed)) DebugInfo { +        uint32_t  len; +        uint16_t  version; +        uint32_t  abbrev; +        uint8_t   ptr_size; +        uint8_t   cu_die; +        uint16_t  cu_lang; +        uintptr_t cu_low_pc; +        uintptr_t cu_high_pc; +        uint8_t   fn_die; +        char      fn_name[16]; +        uintptr_t fn_low_pc; +        uintptr_t fn_high_pc; +        uint8_t   cu_eoc; +    }; + +    struct ElfImage { +        ElfW(Ehdr) ehdr; +        ElfW(Phdr) phdr; +        ElfW(Shdr) shdr[7]; +        ElfW(Sym)  sym[2]; +        struct DebugInfo di; +        uint8_t    da[24]; +        char       str[80]; +    }; + +    struct ElfImage *img; + +    static const struct ElfImage img_template = { +        .ehdr = { +            .e_ident[EI_MAG0] = ELFMAG0, +            .e_ident[EI_MAG1] = ELFMAG1, +            .e_ident[EI_MAG2] = ELFMAG2, +            .e_ident[EI_MAG3] = ELFMAG3, +            .e_ident[EI_CLASS] = ELF_CLASS, +            .e_ident[EI_DATA] = ELF_DATA, +            .e_ident[EI_VERSION] = EV_CURRENT, +            .e_type = ET_EXEC, +            .e_machine = ELF_HOST_MACHINE, +            .e_version = EV_CURRENT, +            .e_phoff = offsetof(struct ElfImage, phdr), +            .e_shoff = offsetof(struct ElfImage, shdr), +            .e_ehsize = sizeof(ElfW(Shdr)), +            .e_phentsize = sizeof(ElfW(Phdr)), +            .e_phnum = 1, +            .e_shentsize = sizeof(ElfW(Shdr)), +            .e_shnum = ARRAY_SIZE(img->shdr), +            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1, +#ifdef ELF_HOST_FLAGS +            .e_flags = ELF_HOST_FLAGS, +#endif +#ifdef ELF_OSABI +            .e_ident[EI_OSABI] = ELF_OSABI, +#endif +        }, +        .phdr = { +            .p_type = PT_LOAD, +            .p_flags = PF_X, +        }, +        .shdr = { +            [0] = { .sh_type = SHT_NULL }, +            /* Trick: The contents of code_gen_buffer are not present in +               this fake ELF file; that got allocated elsewhere.  Therefore +               we mark .text as SHT_NOBITS (similar to .bss) so that readers +               will not look for contents.  We can record any address.  */ +            [1] = { /* .text */ +                .sh_type = SHT_NOBITS, +                .sh_flags = SHF_EXECINSTR | SHF_ALLOC, +            }, +            [2] = { /* .debug_info */ +                .sh_type = SHT_PROGBITS, +                .sh_offset = offsetof(struct ElfImage, di), +                .sh_size = sizeof(struct DebugInfo), +            }, +            [3] = { /* .debug_abbrev */ +                .sh_type = SHT_PROGBITS, +                .sh_offset = offsetof(struct ElfImage, da), +                .sh_size = sizeof(img->da), +            }, +            [4] = { /* .debug_frame */ +                .sh_type = SHT_PROGBITS, +                .sh_offset = sizeof(struct ElfImage), +            }, +            [5] = { /* .symtab */ +                .sh_type = SHT_SYMTAB, +                .sh_offset = offsetof(struct ElfImage, sym), +                .sh_size = sizeof(img->sym), +                .sh_info = 1, +                .sh_link = ARRAY_SIZE(img->shdr) - 1, +                .sh_entsize = sizeof(ElfW(Sym)), +            }, +            [6] = { /* .strtab */ +                .sh_type = SHT_STRTAB, +                .sh_offset = offsetof(struct ElfImage, str), +                .sh_size = sizeof(img->str), +            } +        }, +        .sym = { +            [1] = { /* code_gen_buffer */ +                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC), +                .st_shndx = 1, +            } +        }, +        .di = { +            .len = sizeof(struct DebugInfo) - 4, +            .version = 2, +            .ptr_size = sizeof(void *), +            .cu_die = 1, +            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */ +            .fn_die = 2, +            .fn_name = "code_gen_buffer" +        }, +        .da = { +            1,          /* abbrev number (the cu) */ +            0x11, 1,    /* DW_TAG_compile_unit, has children */ +            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */ +            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */ +            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */ +            0, 0,       /* end of abbrev */ +            2,          /* abbrev number (the fn) */ +            0x2e, 0,    /* DW_TAG_subprogram, no children */ +            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */ +            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */ +            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */ +            0, 0,       /* end of abbrev */ +            0           /* no more abbrev */ +        }, +        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0" +               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer", +    }; + +    /* We only need a single jit entry; statically allocate it.  */ +    static struct jit_code_entry one_entry; + +    uintptr_t buf = (uintptr_t)buf_ptr; +    size_t img_size = sizeof(struct ElfImage) + debug_frame_size; +    DebugFrameHeader *dfh; + +    img = g_malloc(img_size); +    *img = img_template; + +    img->phdr.p_vaddr = buf; +    img->phdr.p_paddr = buf; +    img->phdr.p_memsz = buf_size; + +    img->shdr[1].sh_name = find_string(img->str, ".text"); +    img->shdr[1].sh_addr = buf; +    img->shdr[1].sh_size = buf_size; + +    img->shdr[2].sh_name = find_string(img->str, ".debug_info"); +    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev"); + +    img->shdr[4].sh_name = find_string(img->str, ".debug_frame"); +    img->shdr[4].sh_size = debug_frame_size; + +    img->shdr[5].sh_name = find_string(img->str, ".symtab"); +    img->shdr[6].sh_name = find_string(img->str, ".strtab"); + +    img->sym[1].st_name = find_string(img->str, "code_gen_buffer"); +    img->sym[1].st_value = buf; +    img->sym[1].st_size = buf_size; + +    img->di.cu_low_pc = buf; +    img->di.cu_high_pc = buf + buf_size; +    img->di.fn_low_pc = buf; +    img->di.fn_high_pc = buf + buf_size; + +    dfh = (DebugFrameHeader *)(img + 1); +    memcpy(dfh, debug_frame, debug_frame_size); +    dfh->fde.func_start = buf; +    dfh->fde.func_len = buf_size; + +#ifdef DEBUG_JIT +    /* Enable this block to be able to debug the ELF image file creation. +       One can use readelf, objdump, or other inspection utilities.  */ +    { +        FILE *f = fopen("/tmp/qemu.jit", "w+b"); +        if (f) { +            if (fwrite(img, img_size, 1, f) != img_size) { +                /* Avoid stupid unused return value warning for fwrite.  */ +            } +            fclose(f); +        } +    } +#endif + +    one_entry.symfile_addr = img; +    one_entry.symfile_size = img_size; + +    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; +    __jit_debug_descriptor.relevant_entry = &one_entry; +    __jit_debug_descriptor.first_entry = &one_entry; +    __jit_debug_register_code(); +} +#else +/* No support for the feature.  Provide the entry point expected by exec.c, +   and implement the internal function we declared earlier.  */ + +static void tcg_register_jit_int(void *buf, size_t size, +                                 const void *debug_frame, +                                 size_t debug_frame_size) +{ +} + +void tcg_register_jit(void *buf, size_t buf_size) +{ +} +#endif /* ELF_HOST_MACHINE */ diff --git a/tcg/tcg.h b/tcg/tcg.h new file mode 100644 index 00000000..231a7815 --- /dev/null +++ b/tcg/tcg.h @@ -0,0 +1,1011 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TCG_H +#define TCG_H + +#include "qemu-common.h" +#include "qemu/bitops.h" +#include "tcg-target.h" + +#define CPU_TEMP_BUF_NLONGS 128 + +/* Default target word size to pointer size.  */ +#ifndef TCG_TARGET_REG_BITS +# if UINTPTR_MAX == UINT32_MAX +#  define TCG_TARGET_REG_BITS 32 +# elif UINTPTR_MAX == UINT64_MAX +#  define TCG_TARGET_REG_BITS 64 +# else +#  error Unknown pointer size for tcg target +# endif +#endif + +#if TCG_TARGET_REG_BITS == 32 +typedef int32_t tcg_target_long; +typedef uint32_t tcg_target_ulong; +#define TCG_PRIlx PRIx32 +#define TCG_PRIld PRId32 +#elif TCG_TARGET_REG_BITS == 64 +typedef int64_t tcg_target_long; +typedef uint64_t tcg_target_ulong; +#define TCG_PRIlx PRIx64 +#define TCG_PRIld PRId64 +#else +#error unsupported +#endif + +#if TCG_TARGET_NB_REGS <= 32 +typedef uint32_t TCGRegSet; +#elif TCG_TARGET_NB_REGS <= 64 +typedef uint64_t TCGRegSet; +#else +#error unsupported +#endif + +#if TCG_TARGET_REG_BITS == 32 +/* Turn some undef macros into false macros.  */ +#define TCG_TARGET_HAS_trunc_shr_i32    0 +#define TCG_TARGET_HAS_div_i64          0 +#define TCG_TARGET_HAS_rem_i64          0 +#define TCG_TARGET_HAS_div2_i64         0 +#define TCG_TARGET_HAS_rot_i64          0 +#define TCG_TARGET_HAS_ext8s_i64        0 +#define TCG_TARGET_HAS_ext16s_i64       0 +#define TCG_TARGET_HAS_ext32s_i64       0 +#define TCG_TARGET_HAS_ext8u_i64        0 +#define TCG_TARGET_HAS_ext16u_i64       0 +#define TCG_TARGET_HAS_ext32u_i64       0 +#define TCG_TARGET_HAS_bswap16_i64      0 +#define TCG_TARGET_HAS_bswap32_i64      0 +#define TCG_TARGET_HAS_bswap64_i64      0 +#define TCG_TARGET_HAS_neg_i64          0 +#define TCG_TARGET_HAS_not_i64          0 +#define TCG_TARGET_HAS_andc_i64         0 +#define TCG_TARGET_HAS_orc_i64          0 +#define TCG_TARGET_HAS_eqv_i64          0 +#define TCG_TARGET_HAS_nand_i64         0 +#define TCG_TARGET_HAS_nor_i64          0 +#define TCG_TARGET_HAS_deposit_i64      0 +#define TCG_TARGET_HAS_movcond_i64      0 +#define TCG_TARGET_HAS_add2_i64         0 +#define TCG_TARGET_HAS_sub2_i64         0 +#define TCG_TARGET_HAS_mulu2_i64        0 +#define TCG_TARGET_HAS_muls2_i64        0 +#define TCG_TARGET_HAS_muluh_i64        0 +#define TCG_TARGET_HAS_mulsh_i64        0 +/* Turn some undef macros into true macros.  */ +#define TCG_TARGET_HAS_add2_i32         1 +#define TCG_TARGET_HAS_sub2_i32         1 +#endif + +#ifndef TCG_TARGET_deposit_i32_valid +#define TCG_TARGET_deposit_i32_valid(ofs, len) 1 +#endif +#ifndef TCG_TARGET_deposit_i64_valid +#define TCG_TARGET_deposit_i64_valid(ofs, len) 1 +#endif + +/* Only one of DIV or DIV2 should be defined.  */ +#if defined(TCG_TARGET_HAS_div_i32) +#define TCG_TARGET_HAS_div2_i32         0 +#elif defined(TCG_TARGET_HAS_div2_i32) +#define TCG_TARGET_HAS_div_i32          0 +#define TCG_TARGET_HAS_rem_i32          0 +#endif +#if defined(TCG_TARGET_HAS_div_i64) +#define TCG_TARGET_HAS_div2_i64         0 +#elif defined(TCG_TARGET_HAS_div2_i64) +#define TCG_TARGET_HAS_div_i64          0 +#define TCG_TARGET_HAS_rem_i64          0 +#endif + +/* For 32-bit targets, some sort of unsigned widening multiply is required.  */ +#if TCG_TARGET_REG_BITS == 32 \ +    && !(defined(TCG_TARGET_HAS_mulu2_i32) \ +         || defined(TCG_TARGET_HAS_muluh_i32)) +# error "Missing unsigned widening multiply" +#endif + +typedef enum TCGOpcode { +#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name, +#include "tcg-opc.h" +#undef DEF +    NB_OPS, +} TCGOpcode; + +#define tcg_regset_clear(d) (d) = 0 +#define tcg_regset_set(d, s) (d) = (s) +#define tcg_regset_set32(d, reg, val32) (d) |= (val32) << (reg) +#define tcg_regset_set_reg(d, r) (d) |= 1L << (r) +#define tcg_regset_reset_reg(d, r) (d) &= ~(1L << (r)) +#define tcg_regset_test_reg(d, r) (((d) >> (r)) & 1) +#define tcg_regset_or(d, a, b) (d) = (a) | (b) +#define tcg_regset_and(d, a, b) (d) = (a) & (b) +#define tcg_regset_andnot(d, a, b) (d) = (a) & ~(b) +#define tcg_regset_not(d, a) (d) = ~(a) + +#ifndef TCG_TARGET_INSN_UNIT_SIZE +# error "Missing TCG_TARGET_INSN_UNIT_SIZE" +#elif TCG_TARGET_INSN_UNIT_SIZE == 1 +typedef uint8_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 2 +typedef uint16_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 4 +typedef uint32_t tcg_insn_unit; +#elif TCG_TARGET_INSN_UNIT_SIZE == 8 +typedef uint64_t tcg_insn_unit; +#else +/* The port better have done this.  */ +#endif + + +typedef struct TCGRelocation { +    struct TCGRelocation *next; +    int type; +    tcg_insn_unit *ptr; +    intptr_t addend; +} TCGRelocation;  + +typedef struct TCGLabel { +    unsigned has_value : 1; +    unsigned id : 31; +    union { +        uintptr_t value; +        tcg_insn_unit *value_ptr; +        TCGRelocation *first_reloc; +    } u; +} TCGLabel; + +typedef struct TCGPool { +    struct TCGPool *next; +    int size; +    uint8_t data[0] __attribute__ ((aligned)); +} TCGPool; + +#define TCG_POOL_CHUNK_SIZE 32768 + +#define TCG_MAX_TEMPS 512 + +/* when the size of the arguments of a called function is smaller than +   this value, they are statically allocated in the TB stack frame */ +#define TCG_STATIC_CALL_ARGS_SIZE 128 + +typedef enum TCGType { +    TCG_TYPE_I32, +    TCG_TYPE_I64, +    TCG_TYPE_COUNT, /* number of different types */ + +    /* An alias for the size of the host register.  */ +#if TCG_TARGET_REG_BITS == 32 +    TCG_TYPE_REG = TCG_TYPE_I32, +#else +    TCG_TYPE_REG = TCG_TYPE_I64, +#endif + +    /* An alias for the size of the native pointer.  */ +#if UINTPTR_MAX == UINT32_MAX +    TCG_TYPE_PTR = TCG_TYPE_I32, +#else +    TCG_TYPE_PTR = TCG_TYPE_I64, +#endif + +    /* An alias for the size of the target "long", aka register.  */ +#if TARGET_LONG_BITS == 64 +    TCG_TYPE_TL = TCG_TYPE_I64, +#else +    TCG_TYPE_TL = TCG_TYPE_I32, +#endif +} TCGType; + +/* Constants for qemu_ld and qemu_st for the Memory Operation field.  */ +typedef enum TCGMemOp { +    MO_8     = 0, +    MO_16    = 1, +    MO_32    = 2, +    MO_64    = 3, +    MO_SIZE  = 3,   /* Mask for the above.  */ + +    MO_SIGN  = 4,   /* Sign-extended, otherwise zero-extended.  */ + +    MO_BSWAP = 8,   /* Host reverse endian.  */ +#ifdef HOST_WORDS_BIGENDIAN +    MO_LE    = MO_BSWAP, +    MO_BE    = 0, +#else +    MO_LE    = 0, +    MO_BE    = MO_BSWAP, +#endif +#ifdef TARGET_WORDS_BIGENDIAN +    MO_TE    = MO_BE, +#else +    MO_TE    = MO_LE, +#endif + +    /* MO_UNALN accesses are never checked for alignment. +       MO_ALIGN accesses will result in a call to the CPU's +       do_unaligned_access hook if the guest address is not aligned. +       The default depends on whether the target CPU defines ALIGNED_ONLY.  */ +    MO_AMASK = 16, +#ifdef ALIGNED_ONLY +    MO_ALIGN = 0, +    MO_UNALN = MO_AMASK, +#else +    MO_ALIGN = MO_AMASK, +    MO_UNALN = 0, +#endif + +    /* Combinations of the above, for ease of use.  */ +    MO_UB    = MO_8, +    MO_UW    = MO_16, +    MO_UL    = MO_32, +    MO_SB    = MO_SIGN | MO_8, +    MO_SW    = MO_SIGN | MO_16, +    MO_SL    = MO_SIGN | MO_32, +    MO_Q     = MO_64, + +    MO_LEUW  = MO_LE | MO_UW, +    MO_LEUL  = MO_LE | MO_UL, +    MO_LESW  = MO_LE | MO_SW, +    MO_LESL  = MO_LE | MO_SL, +    MO_LEQ   = MO_LE | MO_Q, + +    MO_BEUW  = MO_BE | MO_UW, +    MO_BEUL  = MO_BE | MO_UL, +    MO_BESW  = MO_BE | MO_SW, +    MO_BESL  = MO_BE | MO_SL, +    MO_BEQ   = MO_BE | MO_Q, + +    MO_TEUW  = MO_TE | MO_UW, +    MO_TEUL  = MO_TE | MO_UL, +    MO_TESW  = MO_TE | MO_SW, +    MO_TESL  = MO_TE | MO_SL, +    MO_TEQ   = MO_TE | MO_Q, + +    MO_SSIZE = MO_SIZE | MO_SIGN, +} TCGMemOp; + +typedef tcg_target_ulong TCGArg; + +/* Define a type and accessor macros for variables.  Using pointer types +   is nice because it gives some level of type safely.  Converting to and +   from intptr_t rather than int reduces the number of sign-extension +   instructions that get implied on 64-bit hosts.  Users of tcg_gen_* don't +   need to know about any of this, and should treat TCGv as an opaque type. +   In addition we do typechecking for different types of variables.  TCGv_i32 +   and TCGv_i64 are 32/64-bit variables respectively.  TCGv and TCGv_ptr +   are aliases for target_ulong and host pointer sized values respectively.  */ + +typedef struct TCGv_i32_d *TCGv_i32; +typedef struct TCGv_i64_d *TCGv_i64; +typedef struct TCGv_ptr_d *TCGv_ptr; + +static inline TCGv_i32 QEMU_ARTIFICIAL MAKE_TCGV_I32(intptr_t i) +{ +    return (TCGv_i32)i; +} + +static inline TCGv_i64 QEMU_ARTIFICIAL MAKE_TCGV_I64(intptr_t i) +{ +    return (TCGv_i64)i; +} + +static inline TCGv_ptr QEMU_ARTIFICIAL MAKE_TCGV_PTR(intptr_t i) +{ +    return (TCGv_ptr)i; +} + +static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_I32(TCGv_i32 t) +{ +    return (intptr_t)t; +} + +static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_I64(TCGv_i64 t) +{ +    return (intptr_t)t; +} + +static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_PTR(TCGv_ptr t) +{ +    return (intptr_t)t; +} + +#if TCG_TARGET_REG_BITS == 32 +#define TCGV_LOW(t) MAKE_TCGV_I32(GET_TCGV_I64(t)) +#define TCGV_HIGH(t) MAKE_TCGV_I32(GET_TCGV_I64(t) + 1) +#endif + +#define TCGV_EQUAL_I32(a, b) (GET_TCGV_I32(a) == GET_TCGV_I32(b)) +#define TCGV_EQUAL_I64(a, b) (GET_TCGV_I64(a) == GET_TCGV_I64(b)) +#define TCGV_EQUAL_PTR(a, b) (GET_TCGV_PTR(a) == GET_TCGV_PTR(b)) + +/* Dummy definition to avoid compiler warnings.  */ +#define TCGV_UNUSED_I32(x) x = MAKE_TCGV_I32(-1) +#define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1) +#define TCGV_UNUSED_PTR(x) x = MAKE_TCGV_PTR(-1) + +#define TCGV_IS_UNUSED_I32(x) (GET_TCGV_I32(x) == -1) +#define TCGV_IS_UNUSED_I64(x) (GET_TCGV_I64(x) == -1) +#define TCGV_IS_UNUSED_PTR(x) (GET_TCGV_PTR(x) == -1) + +/* call flags */ +/* Helper does not read globals (either directly or through an exception). It +   implies TCG_CALL_NO_WRITE_GLOBALS. */ +#define TCG_CALL_NO_READ_GLOBALS    0x0010 +/* Helper does not write globals */ +#define TCG_CALL_NO_WRITE_GLOBALS   0x0020 +/* Helper can be safely suppressed if the return value is not used. */ +#define TCG_CALL_NO_SIDE_EFFECTS    0x0040 + +/* convenience version of most used call flags */ +#define TCG_CALL_NO_RWG         TCG_CALL_NO_READ_GLOBALS +#define TCG_CALL_NO_WG          TCG_CALL_NO_WRITE_GLOBALS +#define TCG_CALL_NO_SE          TCG_CALL_NO_SIDE_EFFECTS +#define TCG_CALL_NO_RWG_SE      (TCG_CALL_NO_RWG | TCG_CALL_NO_SE) +#define TCG_CALL_NO_WG_SE       (TCG_CALL_NO_WG | TCG_CALL_NO_SE) + +/* used to align parameters */ +#define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1) +#define TCG_CALL_DUMMY_ARG      ((TCGArg)(-1)) + +/* Conditions.  Note that these are laid out for easy manipulation by +   the functions below: +     bit 0 is used for inverting; +     bit 1 is signed, +     bit 2 is unsigned, +     bit 3 is used with bit 0 for swapping signed/unsigned.  */ +typedef enum { +    /* non-signed */ +    TCG_COND_NEVER  = 0 | 0 | 0 | 0, +    TCG_COND_ALWAYS = 0 | 0 | 0 | 1, +    TCG_COND_EQ     = 8 | 0 | 0 | 0, +    TCG_COND_NE     = 8 | 0 | 0 | 1, +    /* signed */ +    TCG_COND_LT     = 0 | 0 | 2 | 0, +    TCG_COND_GE     = 0 | 0 | 2 | 1, +    TCG_COND_LE     = 8 | 0 | 2 | 0, +    TCG_COND_GT     = 8 | 0 | 2 | 1, +    /* unsigned */ +    TCG_COND_LTU    = 0 | 4 | 0 | 0, +    TCG_COND_GEU    = 0 | 4 | 0 | 1, +    TCG_COND_LEU    = 8 | 4 | 0 | 0, +    TCG_COND_GTU    = 8 | 4 | 0 | 1, +} TCGCond; + +/* Invert the sense of the comparison.  */ +static inline TCGCond tcg_invert_cond(TCGCond c) +{ +    return (TCGCond)(c ^ 1); +} + +/* Swap the operands in a comparison.  */ +static inline TCGCond tcg_swap_cond(TCGCond c) +{ +    return c & 6 ? (TCGCond)(c ^ 9) : c; +} + +/* Create an "unsigned" version of a "signed" comparison.  */ +static inline TCGCond tcg_unsigned_cond(TCGCond c) +{ +    return c & 2 ? (TCGCond)(c ^ 6) : c; +} + +/* Must a comparison be considered unsigned?  */ +static inline bool is_unsigned_cond(TCGCond c) +{ +    return (c & 4) != 0; +} + +/* Create a "high" version of a double-word comparison. +   This removes equality from a LTE or GTE comparison.  */ +static inline TCGCond tcg_high_cond(TCGCond c) +{ +    switch (c) { +    case TCG_COND_GE: +    case TCG_COND_LE: +    case TCG_COND_GEU: +    case TCG_COND_LEU: +        return (TCGCond)(c ^ 8); +    default: +        return c; +    } +} + +typedef enum TCGTempVal { +    TEMP_VAL_DEAD, +    TEMP_VAL_REG, +    TEMP_VAL_MEM, +    TEMP_VAL_CONST, +} TCGTempVal; + +typedef struct TCGTemp { +    unsigned int reg:8; +    unsigned int mem_reg:8; +    TCGTempVal val_type:8; +    TCGType base_type:8; +    TCGType type:8; +    unsigned int fixed_reg:1; +    unsigned int mem_coherent:1; +    unsigned int mem_allocated:1; +    unsigned int temp_local:1; /* If true, the temp is saved across +                                  basic blocks. Otherwise, it is not +                                  preserved across basic blocks. */ +    unsigned int temp_allocated:1; /* never used for code gen */ + +    tcg_target_long val; +    intptr_t mem_offset; +    const char *name; +} TCGTemp; + +typedef struct TCGContext TCGContext; + +typedef struct TCGTempSet { +    unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; +} TCGTempSet; + +typedef struct TCGOp { +    TCGOpcode opc   : 8; + +    /* The number of out and in parameter for a call.  */ +    unsigned callo  : 2; +    unsigned calli  : 6; + +    /* Index of the arguments for this op, or -1 for zero-operand ops.  */ +    signed args     : 16; + +    /* Index of the prex/next op, or -1 for the end of the list.  */ +    signed prev     : 16; +    signed next     : 16; +} TCGOp; + +QEMU_BUILD_BUG_ON(NB_OPS > 0xff); +QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff); +QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff); + +struct TCGContext { +    uint8_t *pool_cur, *pool_end; +    TCGPool *pool_first, *pool_current, *pool_first_large; +    int nb_labels; +    int nb_globals; +    int nb_temps; + +    /* goto_tb support */ +    tcg_insn_unit *code_buf; +    uintptr_t *tb_next; +    uint16_t *tb_next_offset; +    uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */ + +    /* liveness analysis */ +    uint16_t *op_dead_args; /* for each operation, each bit tells if the +                               corresponding argument is dead */ +    uint8_t *op_sync_args;  /* for each operation, each bit tells if the +                               corresponding output argument needs to be +                               sync to memory. */ +     +    TCGRegSet reserved_regs; +    intptr_t current_frame_offset; +    intptr_t frame_start; +    intptr_t frame_end; +    int frame_reg; + +    tcg_insn_unit *code_ptr; + +    GHashTable *helpers; + +#ifdef CONFIG_PROFILER +    /* profiling info */ +    int64_t tb_count1; +    int64_t tb_count; +    int64_t op_count; /* total insn count */ +    int op_count_max; /* max insn per TB */ +    int64_t temp_count; +    int temp_count_max; +    int64_t del_op_count; +    int64_t code_in_len; +    int64_t code_out_len; +    int64_t interm_time; +    int64_t code_time; +    int64_t la_time; +    int64_t opt_time; +    int64_t restore_count; +    int64_t restore_time; +#endif + +#ifdef CONFIG_DEBUG_TCG +    int temps_in_use; +    int goto_tb_issue_mask; +#endif + +    int gen_first_op_idx; +    int gen_last_op_idx; +    int gen_next_op_idx; +    int gen_next_parm_idx; + +    /* Code generation.  Note that we specifically do not use tcg_insn_unit +       here, because there's too much arithmetic throughout that relies +       on addition and subtraction working on bytes.  Rely on the GCC +       extension that allows arithmetic on void*.  */ +    int code_gen_max_blocks; +    void *code_gen_prologue; +    void *code_gen_buffer; +    size_t code_gen_buffer_size; +    /* threshold to flush the translated code buffer */ +    size_t code_gen_buffer_max_size; +    void *code_gen_ptr; + +    TBContext tb_ctx; + +    /* The TCGBackendData structure is private to tcg-target.c.  */ +    struct TCGBackendData *be; + +    TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; +    TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ + +    /* tells in which temporary a given register is. It does not take +       into account fixed registers */ +    int reg_to_temp[TCG_TARGET_NB_REGS]; + +    TCGOp gen_op_buf[OPC_BUF_SIZE]; +    TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE]; + +    target_ulong gen_opc_pc[OPC_BUF_SIZE]; +    uint16_t gen_opc_icount[OPC_BUF_SIZE]; +    uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; +}; + +extern TCGContext tcg_ctx; + +/* The number of opcodes emitted so far.  */ +static inline int tcg_op_buf_count(void) +{ +    return tcg_ctx.gen_next_op_idx; +} + +/* Test for whether to terminate the TB for using too many opcodes.  */ +static inline bool tcg_op_buf_full(void) +{ +    return tcg_op_buf_count() >= OPC_MAX_SIZE; +} + +/* pool based memory allocation */ + +void *tcg_malloc_internal(TCGContext *s, int size); +void tcg_pool_reset(TCGContext *s); +void tcg_pool_delete(TCGContext *s); + +static inline void *tcg_malloc(int size) +{ +    TCGContext *s = &tcg_ctx; +    uint8_t *ptr, *ptr_end; +    size = (size + sizeof(long) - 1) & ~(sizeof(long) - 1); +    ptr = s->pool_cur; +    ptr_end = ptr + size; +    if (unlikely(ptr_end > s->pool_end)) { +        return tcg_malloc_internal(&tcg_ctx, size); +    } else { +        s->pool_cur = ptr_end; +        return ptr; +    } +} + +void tcg_context_init(TCGContext *s); +void tcg_prologue_init(TCGContext *s); +void tcg_func_start(TCGContext *s); + +int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf); +int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf, +                           long offset); + +void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size); + +TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name); +TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name); +TCGv_i32 tcg_temp_new_internal_i32(int temp_local); +static inline TCGv_i32 tcg_temp_new_i32(void) +{ +    return tcg_temp_new_internal_i32(0); +} +static inline TCGv_i32 tcg_temp_local_new_i32(void) +{ +    return tcg_temp_new_internal_i32(1); +} +void tcg_temp_free_i32(TCGv_i32 arg); +char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg); + +TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name); +TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name); +TCGv_i64 tcg_temp_new_internal_i64(int temp_local); +static inline TCGv_i64 tcg_temp_new_i64(void) +{ +    return tcg_temp_new_internal_i64(0); +} +static inline TCGv_i64 tcg_temp_local_new_i64(void) +{ +    return tcg_temp_new_internal_i64(1); +} +void tcg_temp_free_i64(TCGv_i64 arg); +char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg); + +#if defined(CONFIG_DEBUG_TCG) +/* If you call tcg_clear_temp_count() at the start of a section of + * code which is not supposed to leak any TCG temporaries, then + * calling tcg_check_temp_count() at the end of the section will + * return 1 if the section did in fact leak a temporary. + */ +void tcg_clear_temp_count(void); +int tcg_check_temp_count(void); +#else +#define tcg_clear_temp_count() do { } while (0) +#define tcg_check_temp_count() 0 +#endif + +void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf); +void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf); + +#define TCG_CT_ALIAS  0x80 +#define TCG_CT_IALIAS 0x40 +#define TCG_CT_REG    0x01 +#define TCG_CT_CONST  0x02 /* any constant of register size */ + +typedef struct TCGArgConstraint { +    uint16_t ct; +    uint8_t alias_index; +    union { +        TCGRegSet regs; +    } u; +} TCGArgConstraint; + +#define TCG_MAX_OP_ARGS 16 + +/* Bits for TCGOpDef->flags, 8 bits available.  */ +enum { +    /* Instruction defines the end of a basic block.  */ +    TCG_OPF_BB_END       = 0x01, +    /* Instruction clobbers call registers and potentially update globals.  */ +    TCG_OPF_CALL_CLOBBER = 0x02, +    /* Instruction has side effects: it cannot be removed if its outputs +       are not used, and might trigger exceptions.  */ +    TCG_OPF_SIDE_EFFECTS = 0x04, +    /* Instruction operands are 64-bits (otherwise 32-bits).  */ +    TCG_OPF_64BIT        = 0x08, +    /* Instruction is optional and not implemented by the host, or insn +       is generic and should not be implemened by the host.  */ +    TCG_OPF_NOT_PRESENT  = 0x10, +}; + +typedef struct TCGOpDef { +    const char *name; +    uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; +    uint8_t flags; +    TCGArgConstraint *args_ct; +    int *sorted_args; +#if defined(CONFIG_DEBUG_TCG) +    int used; +#endif +} TCGOpDef; + +extern TCGOpDef tcg_op_defs[]; +extern const size_t tcg_op_defs_max; + +typedef struct TCGTargetOpDef { +    TCGOpcode op; +    const char *args_ct_str[TCG_MAX_OP_ARGS]; +} TCGTargetOpDef; + +#define tcg_abort() \ +do {\ +    fprintf(stderr, "%s:%d: tcg fatal error\n", __FILE__, __LINE__);\ +    abort();\ +} while (0) + +#ifdef CONFIG_DEBUG_TCG +# define tcg_debug_assert(X) do { assert(X); } while (0) +#elif QEMU_GNUC_PREREQ(4, 5) +# define tcg_debug_assert(X) \ +    do { if (!(X)) { __builtin_unreachable(); } } while (0) +#else +# define tcg_debug_assert(X) do { (void)(X); } while (0) +#endif + +void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs); + +#if UINTPTR_MAX == UINT32_MAX +#define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I32(n)) +#define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I32(GET_TCGV_PTR(n)) + +#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i32((intptr_t)(V))) +#define tcg_global_reg_new_ptr(R, N) \ +    TCGV_NAT_TO_PTR(tcg_global_reg_new_i32((R), (N))) +#define tcg_global_mem_new_ptr(R, O, N) \ +    TCGV_NAT_TO_PTR(tcg_global_mem_new_i32((R), (O), (N))) +#define tcg_temp_new_ptr() TCGV_NAT_TO_PTR(tcg_temp_new_i32()) +#define tcg_temp_free_ptr(T) tcg_temp_free_i32(TCGV_PTR_TO_NAT(T)) +#else +#define TCGV_NAT_TO_PTR(n) MAKE_TCGV_PTR(GET_TCGV_I64(n)) +#define TCGV_PTR_TO_NAT(n) MAKE_TCGV_I64(GET_TCGV_PTR(n)) + +#define tcg_const_ptr(V) TCGV_NAT_TO_PTR(tcg_const_i64((intptr_t)(V))) +#define tcg_global_reg_new_ptr(R, N) \ +    TCGV_NAT_TO_PTR(tcg_global_reg_new_i64((R), (N))) +#define tcg_global_mem_new_ptr(R, O, N) \ +    TCGV_NAT_TO_PTR(tcg_global_mem_new_i64((R), (O), (N))) +#define tcg_temp_new_ptr() TCGV_NAT_TO_PTR(tcg_temp_new_i64()) +#define tcg_temp_free_ptr(T) tcg_temp_free_i64(TCGV_PTR_TO_NAT(T)) +#endif + +void tcg_gen_callN(TCGContext *s, void *func, +                   TCGArg ret, int nargs, TCGArg *args); + +void tcg_op_remove(TCGContext *s, TCGOp *op); +void tcg_optimize(TCGContext *s); + +/* only used for debugging purposes */ +void tcg_dump_ops(TCGContext *s); + +void dump_ops(const uint16_t *opc_buf, const TCGArg *opparam_buf); +TCGv_i32 tcg_const_i32(int32_t val); +TCGv_i64 tcg_const_i64(int64_t val); +TCGv_i32 tcg_const_local_i32(int32_t val); +TCGv_i64 tcg_const_local_i64(int64_t val); + +TCGLabel *gen_new_label(void); + +/** + * label_arg + * @l: label + * + * Encode a label for storage in the TCG opcode stream. + */ + +static inline TCGArg label_arg(TCGLabel *l) +{ +    return (uintptr_t)l; +} + +/** + * arg_label + * @i: value + * + * The opposite of label_arg.  Retrieve a label from the + * encoding of the TCG opcode stream. + */ + +static inline TCGLabel *arg_label(TCGArg i) +{ +    return (TCGLabel *)(uintptr_t)i; +} + +/** + * tcg_ptr_byte_diff + * @a, @b: addresses to be differenced + * + * There are many places within the TCG backends where we need a byte + * difference between two pointers.  While this can be accomplished + * with local casting, it's easy to get wrong -- especially if one is + * concerned with the signedness of the result. + * + * This version relies on GCC's void pointer arithmetic to get the + * correct result. + */ + +static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b) +{ +    return a - b; +} + +/** + * tcg_pcrel_diff + * @s: the tcg context + * @target: address of the target + * + * Produce a pc-relative difference, from the current code_ptr + * to the destination address. + */ + +static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, void *target) +{ +    return tcg_ptr_byte_diff(target, s->code_ptr); +} + +/** + * tcg_current_code_size + * @s: the tcg context + * + * Compute the current code size within the translation block. + * This is used to fill in qemu's data structures for goto_tb. + */ + +static inline size_t tcg_current_code_size(TCGContext *s) +{ +    return tcg_ptr_byte_diff(s->code_ptr, s->code_buf); +} + +/* Combine the TCGMemOp and mmu_idx parameters into a single value.  */ +typedef uint32_t TCGMemOpIdx; + +/** + * make_memop_idx + * @op: memory operation + * @idx: mmu index + * + * Encode these values into a single parameter. + */ +static inline TCGMemOpIdx make_memop_idx(TCGMemOp op, unsigned idx) +{ +    tcg_debug_assert(idx <= 15); +    return (op << 4) | idx; +} + +/** + * get_memop + * @oi: combined op/idx parameter + * + * Extract the memory operation from the combined value. + */ +static inline TCGMemOp get_memop(TCGMemOpIdx oi) +{ +    return oi >> 4; +} + +/** + * get_mmuidx + * @oi: combined op/idx parameter + * + * Extract the mmu index from the combined value. + */ +static inline unsigned get_mmuidx(TCGMemOpIdx oi) +{ +    return oi & 15; +} + +/** + * tcg_qemu_tb_exec: + * @env: CPUArchState * for the CPU + * @tb_ptr: address of generated code for the TB to execute + * + * Start executing code from a given translation block. + * Where translation blocks have been linked, execution + * may proceed from the given TB into successive ones. + * Control eventually returns only when some action is needed + * from the top-level loop: either control must pass to a TB + * which has not yet been directly linked, or an asynchronous + * event such as an interrupt needs handling. + * + * The return value is a pointer to the next TB to execute + * (if known; otherwise zero). This pointer is assumed to be + * 4-aligned, and the bottom two bits are used to return further + * information: + *  0, 1: the link between this TB and the next is via the specified + *        TB index (0 or 1). That is, we left the TB via (the equivalent + *        of) "goto_tb <index>". The main loop uses this to determine + *        how to link the TB just executed to the next. + *  2:    we are using instruction counting code generation, and we + *        did not start executing this TB because the instruction counter + *        would hit zero midway through it. In this case the next-TB pointer + *        returned is the TB we were about to execute, and the caller must + *        arrange to execute the remaining count of instructions. + *  3:    we stopped because the CPU's exit_request flag was set + *        (usually meaning that there is an interrupt that needs to be + *        handled). The next-TB pointer returned is the TB we were + *        about to execute when we noticed the pending exit request. + * + * If the bottom two bits indicate an exit-via-index then the CPU + * state is correctly synchronised and ready for execution of the next + * TB (and in particular the guest PC is the address to execute next). + * Otherwise, we gave up on execution of this TB before it started, and + * the caller must fix up the CPU state by calling the CPU's + * synchronize_from_tb() method with the next-TB pointer we return (falling + * back to calling the CPU's set_pc method with tb->pb if no + * synchronize_from_tb() method exists). + * + * Note that TCG targets may use a different definition of tcg_qemu_tb_exec + * to this default (which just calls the prologue.code emitted by + * tcg_target_qemu_prologue()). + */ +#define TB_EXIT_MASK 3 +#define TB_EXIT_IDX0 0 +#define TB_EXIT_IDX1 1 +#define TB_EXIT_ICOUNT_EXPIRED 2 +#define TB_EXIT_REQUESTED 3 + +#ifdef HAVE_TCG_QEMU_TB_EXEC +uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); +#else +# define tcg_qemu_tb_exec(env, tb_ptr) \ +    ((uintptr_t (*)(void *, void *))tcg_ctx.code_gen_prologue)(env, tb_ptr) +#endif + +void tcg_register_jit(void *buf, size_t buf_size); + +/* + * Memory helpers that will be used by TCG generated code. + */ +#ifdef CONFIG_SOFTMMU +/* Value zero-extended to tcg register size.  */ +tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr, +                                     TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr, +                                    TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr, +                                    TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr, +                           TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr, +                                    TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr, +                                    TCGMemOpIdx oi, uintptr_t retaddr); +uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr, +                           TCGMemOpIdx oi, uintptr_t retaddr); + +/* Value sign-extended to tcg register size.  */ +tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, +                                     TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr, +                                    TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr, +                                    TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr, +                                    TCGMemOpIdx oi, uintptr_t retaddr); +tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr, +                                    TCGMemOpIdx oi, uintptr_t retaddr); + +void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, +                        TCGMemOpIdx oi, uintptr_t retaddr); +void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, +                       TCGMemOpIdx oi, uintptr_t retaddr); +void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, +                       TCGMemOpIdx oi, uintptr_t retaddr); +void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, +                       TCGMemOpIdx oi, uintptr_t retaddr); +void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, +                       TCGMemOpIdx oi, uintptr_t retaddr); +void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, +                       TCGMemOpIdx oi, uintptr_t retaddr); +void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, +                       TCGMemOpIdx oi, uintptr_t retaddr); + +/* Temporary aliases until backends are converted.  */ +#ifdef TARGET_WORDS_BIGENDIAN +# define helper_ret_ldsw_mmu  helper_be_ldsw_mmu +# define helper_ret_lduw_mmu  helper_be_lduw_mmu +# define helper_ret_ldsl_mmu  helper_be_ldsl_mmu +# define helper_ret_ldul_mmu  helper_be_ldul_mmu +# define helper_ret_ldq_mmu   helper_be_ldq_mmu +# define helper_ret_stw_mmu   helper_be_stw_mmu +# define helper_ret_stl_mmu   helper_be_stl_mmu +# define helper_ret_stq_mmu   helper_be_stq_mmu +#else +# define helper_ret_ldsw_mmu  helper_le_ldsw_mmu +# define helper_ret_lduw_mmu  helper_le_lduw_mmu +# define helper_ret_ldsl_mmu  helper_le_ldsl_mmu +# define helper_ret_ldul_mmu  helper_le_ldul_mmu +# define helper_ret_ldq_mmu   helper_le_ldq_mmu +# define helper_ret_stw_mmu   helper_le_stw_mmu +# define helper_ret_stl_mmu   helper_le_stl_mmu +# define helper_ret_stq_mmu   helper_le_stq_mmu +#endif + +#endif /* CONFIG_SOFTMMU */ + +#endif /* TCG_H */ diff --git a/tcg/tci/README b/tcg/tci/README new file mode 100644 index 00000000..dc57f076 --- /dev/null +++ b/tcg/tci/README @@ -0,0 +1,130 @@ +TCG Interpreter (TCI) - Copyright (c) 2011 Stefan Weil. + +This file is released under the BSD license. + +1) Introduction + +TCG (Tiny Code Generator) is a code generator which translates +code fragments ("basic blocks") from target code (any of the +targets supported by QEMU) to a code representation which +can be run on a host. + +QEMU can create native code for some hosts (arm, hppa, i386, ia64, ppc, ppc64, +s390, sparc, x86_64). For others, unofficial host support was written. + +By adding a code generator for a virtual machine and using an +interpreter for the generated bytecode, it is possible to +support (almost) any host. + +This is what TCI (Tiny Code Interpreter) does. + +2) Implementation + +Like each TCG host frontend, TCI implements the code generator in +tcg-target.c, tcg-target.h. Both files are in directory tcg/tci. + +The additional file tcg/tci.c adds the interpreter. + +The bytecode consists of opcodes (same numeric values as those used by +TCG), command length and arguments of variable size and number. + +3) Usage + +For hosts without native TCG, the interpreter TCI must be enabled by + +        configure --enable-tcg-interpreter + +If configure is called without --enable-tcg-interpreter, it will +suggest using this option. Setting it automatically would need +additional code in configure which must be fixed when new native TCG +implementations are added. + +System emulation should work on any 32 or 64 bit host. +User mode emulation might work. Maybe a new linker script (*.ld) +is needed. Byte order might be wrong (on big endian hosts) +and need fixes in configure. + +For hosts with native TCG, the interpreter TCI can be enabled by + +        configure --enable-tcg-interpreter + +The only difference from running QEMU with TCI to running without TCI +should be speed. Especially during development of TCI, it was very +useful to compare runs with and without TCI. Create /tmp/qemu.log by + +        qemu-system-i386 -d in_asm,op_opt,cpu -D /tmp/qemu.log -singlestep + +once with interpreter and once without interpreter and compare the resulting +qemu.log files. This is also useful to see the effects of additional +registers or additional opcodes (it is easy to modify the virtual machine). +It can also be used to verify native TCGs. + +Hosts with native TCG can also enable TCI by claiming to be unsupported: + +        configure --cpu=unknown --enable-tcg-interpreter + +configure then no longer uses the native linker script (*.ld) for +user mode emulation. + + +4) Status + +TCI needs special implementation for 32 and 64 bit host, 32 and 64 bit target, +host and target with same or different endianness. + +            | host (le)                     host (be) +            | 32             64             32             64 +------------+------------------------------------------------------------ +target (le) | s0, u0         s1, u1         s?, u?         s?, u? +32 bit      | +            | +target (le) | sc, uc         s1, u1         s?, u?         s?, u? +64 bit      | +            | +target (be) | sc, u0         sc, uc         s?, u?         s?, u? +32 bit      | +            | +target (be) | sc, uc         sc, uc         s?, u?         s?, u? +64 bit      | +            | + +System emulation +s? = untested +sc = compiles +s0 = bios works +s1 = grub works +s2 = Linux boots + +Linux user mode emulation +u? = untested +uc = compiles +u0 = static hello works +u1 = linux-user-test works + +5) Todo list + +* TCI is not widely tested. It was written and tested on a x86_64 host +  running i386 and x86_64 system emulation and Linux user mode. +  A cross compiled QEMU for i386 host also works with the same basic tests. +  A cross compiled QEMU for mipsel host works, too. It is terribly slow +  because I run it in a mips malta emulation, so it is an interpreted +  emulation in an emulation. +  A cross compiled QEMU for arm host works (tested with pc bios). +  A cross compiled QEMU for ppc host works at least partially: +  i386-linux-user/qemu-i386 can run a simple hello-world program +  (tested in a ppc emulation). + +* Some TCG opcodes are either missing in the code generator and/or +  in the interpreter. These opcodes raise a runtime exception, so it is +  possible to see where code must be added. + +* The pseudo code is not optimized and still ugly. For hosts with special +  alignment requirements, it needs some fixes (maybe aligned bytecode +  would also improve speed for hosts which support byte alignment). + +* A better disassembler for the pseudo code would be nice (a very primitive +  disassembler is included in tcg-target.c). + +* It might be useful to have a runtime option which selects the native TCG +  or TCI, so QEMU would have to include two TCGs. Today, selecting TCI +  is a configure option, so you need two compilations of QEMU. diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c new file mode 100644 index 00000000..83472dbc --- /dev/null +++ b/tcg/tci/tcg-target.c @@ -0,0 +1,875 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "tcg-be-null.h" + +/* TODO list: + * - See TODO comments in code. + */ + +/* Marker for missing code. */ +#define TODO() \ +    do { \ +        fprintf(stderr, "TODO %s:%u: %s()\n", \ +                __FILE__, __LINE__, __func__); \ +        tcg_abort(); \ +    } while (0) + +/* Bitfield n...m (in 32 bit value). */ +#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) + +/* Macros used in tcg_target_op_defs. */ +#define R       "r" +#define RI      "ri" +#if TCG_TARGET_REG_BITS == 32 +# define R64    "r", "r" +#else +# define R64    "r" +#endif +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +# define L      "L", "L" +# define S      "S", "S" +#else +# define L      "L" +# define S      "S" +#endif + +/* TODO: documentation. */ +static const TCGTargetOpDef tcg_target_op_defs[] = { +    { INDEX_op_exit_tb, { NULL } }, +    { INDEX_op_goto_tb, { NULL } }, +    { INDEX_op_br, { NULL } }, + +    { INDEX_op_ld8u_i32, { R, R } }, +    { INDEX_op_ld8s_i32, { R, R } }, +    { INDEX_op_ld16u_i32, { R, R } }, +    { INDEX_op_ld16s_i32, { R, R } }, +    { INDEX_op_ld_i32, { R, R } }, +    { INDEX_op_st8_i32, { R, R } }, +    { INDEX_op_st16_i32, { R, R } }, +    { INDEX_op_st_i32, { R, R } }, + +    { INDEX_op_add_i32, { R, RI, RI } }, +    { INDEX_op_sub_i32, { R, RI, RI } }, +    { INDEX_op_mul_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_div_i32 +    { INDEX_op_div_i32, { R, R, R } }, +    { INDEX_op_divu_i32, { R, R, R } }, +    { INDEX_op_rem_i32, { R, R, R } }, +    { INDEX_op_remu_i32, { R, R, R } }, +#elif TCG_TARGET_HAS_div2_i32 +    { INDEX_op_div2_i32, { R, R, "0", "1", R } }, +    { INDEX_op_divu2_i32, { R, R, "0", "1", R } }, +#endif +    /* TODO: Does R, RI, RI result in faster code than R, R, RI? +       If both operands are constants, we can optimize. */ +    { INDEX_op_and_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_andc_i32 +    { INDEX_op_andc_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_eqv_i32 +    { INDEX_op_eqv_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nand_i32 +    { INDEX_op_nand_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nor_i32 +    { INDEX_op_nor_i32, { R, RI, RI } }, +#endif +    { INDEX_op_or_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_orc_i32 +    { INDEX_op_orc_i32, { R, RI, RI } }, +#endif +    { INDEX_op_xor_i32, { R, RI, RI } }, +    { INDEX_op_shl_i32, { R, RI, RI } }, +    { INDEX_op_shr_i32, { R, RI, RI } }, +    { INDEX_op_sar_i32, { R, RI, RI } }, +#if TCG_TARGET_HAS_rot_i32 +    { INDEX_op_rotl_i32, { R, RI, RI } }, +    { INDEX_op_rotr_i32, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_deposit_i32 +    { INDEX_op_deposit_i32, { R, "0", R } }, +#endif + +    { INDEX_op_brcond_i32, { R, RI } }, + +    { INDEX_op_setcond_i32, { R, R, RI } }, +#if TCG_TARGET_REG_BITS == 64 +    { INDEX_op_setcond_i64, { R, R, RI } }, +#endif /* TCG_TARGET_REG_BITS == 64 */ + +#if TCG_TARGET_REG_BITS == 32 +    /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */ +    { INDEX_op_add2_i32, { R, R, R, R, R, R } }, +    { INDEX_op_sub2_i32, { R, R, R, R, R, R } }, +    { INDEX_op_brcond2_i32, { R, R, RI, RI } }, +    { INDEX_op_mulu2_i32, { R, R, R, R } }, +    { INDEX_op_setcond2_i32, { R, R, R, RI, RI } }, +#endif + +#if TCG_TARGET_HAS_not_i32 +    { INDEX_op_not_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_neg_i32 +    { INDEX_op_neg_i32, { R, R } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 +    { INDEX_op_ld8u_i64, { R, R } }, +    { INDEX_op_ld8s_i64, { R, R } }, +    { INDEX_op_ld16u_i64, { R, R } }, +    { INDEX_op_ld16s_i64, { R, R } }, +    { INDEX_op_ld32u_i64, { R, R } }, +    { INDEX_op_ld32s_i64, { R, R } }, +    { INDEX_op_ld_i64, { R, R } }, + +    { INDEX_op_st8_i64, { R, R } }, +    { INDEX_op_st16_i64, { R, R } }, +    { INDEX_op_st32_i64, { R, R } }, +    { INDEX_op_st_i64, { R, R } }, + +    { INDEX_op_add_i64, { R, RI, RI } }, +    { INDEX_op_sub_i64, { R, RI, RI } }, +    { INDEX_op_mul_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_div_i64 +    { INDEX_op_div_i64, { R, R, R } }, +    { INDEX_op_divu_i64, { R, R, R } }, +    { INDEX_op_rem_i64, { R, R, R } }, +    { INDEX_op_remu_i64, { R, R, R } }, +#elif TCG_TARGET_HAS_div2_i64 +    { INDEX_op_div2_i64, { R, R, "0", "1", R } }, +    { INDEX_op_divu2_i64, { R, R, "0", "1", R } }, +#endif +    { INDEX_op_and_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_andc_i64 +    { INDEX_op_andc_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_eqv_i64 +    { INDEX_op_eqv_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nand_i64 +    { INDEX_op_nand_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_nor_i64 +    { INDEX_op_nor_i64, { R, RI, RI } }, +#endif +    { INDEX_op_or_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_orc_i64 +    { INDEX_op_orc_i64, { R, RI, RI } }, +#endif +    { INDEX_op_xor_i64, { R, RI, RI } }, +    { INDEX_op_shl_i64, { R, RI, RI } }, +    { INDEX_op_shr_i64, { R, RI, RI } }, +    { INDEX_op_sar_i64, { R, RI, RI } }, +#if TCG_TARGET_HAS_rot_i64 +    { INDEX_op_rotl_i64, { R, RI, RI } }, +    { INDEX_op_rotr_i64, { R, RI, RI } }, +#endif +#if TCG_TARGET_HAS_deposit_i64 +    { INDEX_op_deposit_i64, { R, "0", R } }, +#endif +    { INDEX_op_brcond_i64, { R, RI } }, + +#if TCG_TARGET_HAS_ext8s_i64 +    { INDEX_op_ext8s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16s_i64 +    { INDEX_op_ext16s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext32s_i64 +    { INDEX_op_ext32s_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext8u_i64 +    { INDEX_op_ext8u_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16u_i64 +    { INDEX_op_ext16u_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext32u_i64 +    { INDEX_op_ext32u_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap16_i64 +    { INDEX_op_bswap16_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap32_i64 +    { INDEX_op_bswap32_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap64_i64 +    { INDEX_op_bswap64_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_not_i64 +    { INDEX_op_not_i64, { R, R } }, +#endif +#if TCG_TARGET_HAS_neg_i64 +    { INDEX_op_neg_i64, { R, R } }, +#endif +#endif /* TCG_TARGET_REG_BITS == 64 */ + +    { INDEX_op_qemu_ld_i32, { R, L } }, +    { INDEX_op_qemu_ld_i64, { R64, L } }, + +    { INDEX_op_qemu_st_i32, { R, S } }, +    { INDEX_op_qemu_st_i64, { R64, S } }, + +#if TCG_TARGET_HAS_ext8s_i32 +    { INDEX_op_ext8s_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16s_i32 +    { INDEX_op_ext16s_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext8u_i32 +    { INDEX_op_ext8u_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_ext16u_i32 +    { INDEX_op_ext16u_i32, { R, R } }, +#endif + +#if TCG_TARGET_HAS_bswap16_i32 +    { INDEX_op_bswap16_i32, { R, R } }, +#endif +#if TCG_TARGET_HAS_bswap32_i32 +    { INDEX_op_bswap32_i32, { R, R } }, +#endif + +    { -1 }, +}; + +static const int tcg_target_reg_alloc_order[] = { +    TCG_REG_R0, +    TCG_REG_R1, +    TCG_REG_R2, +    TCG_REG_R3, +#if 0 /* used for TCG_REG_CALL_STACK */ +    TCG_REG_R4, +#endif +    TCG_REG_R5, +    TCG_REG_R6, +    TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10, +    TCG_REG_R11, +    TCG_REG_R12, +    TCG_REG_R13, +    TCG_REG_R14, +    TCG_REG_R15, +#endif +}; + +#if MAX_OPC_PARAM_IARGS != 5 +# error Fix needed, number of supported input arguments changed! +#endif + +static const int tcg_target_call_iarg_regs[] = { +    TCG_REG_R0, +    TCG_REG_R1, +    TCG_REG_R2, +    TCG_REG_R3, +#if 0 /* used for TCG_REG_CALL_STACK */ +    TCG_REG_R4, +#endif +    TCG_REG_R5, +#if TCG_TARGET_REG_BITS == 32 +    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */ +    TCG_REG_R6, +    TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10, +#else +# error Too few input registers available +#endif +#endif +}; + +static const int tcg_target_call_oarg_regs[] = { +    TCG_REG_R0, +#if TCG_TARGET_REG_BITS == 32 +    TCG_REG_R1 +#endif +}; + +#ifndef NDEBUG +static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +    "r00", +    "r01", +    "r02", +    "r03", +    "r04", +    "r05", +    "r06", +    "r07", +#if TCG_TARGET_NB_REGS >= 16 +    "r08", +    "r09", +    "r10", +    "r11", +    "r12", +    "r13", +    "r14", +    "r15", +#if TCG_TARGET_NB_REGS >= 32 +    "r16", +    "r17", +    "r18", +    "r19", +    "r20", +    "r21", +    "r22", +    "r23", +    "r24", +    "r25", +    "r26", +    "r27", +    "r28", +    "r29", +    "r30", +    "r31" +#endif +#endif +}; +#endif + +static void patch_reloc(tcg_insn_unit *code_ptr, int type, +                        intptr_t value, intptr_t addend) +{ +    /* tcg_out_reloc always uses the same type, addend. */ +    assert(type == sizeof(tcg_target_long)); +    assert(addend == 0); +    assert(value != 0); +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_patch32(code_ptr, value); +    } else { +        tcg_patch64(code_ptr, value); +    } +} + +/* Parse target specific constraints. */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ +    const char *ct_str = *pct_str; +    switch (ct_str[0]) { +    case 'r': +    case 'L':                   /* qemu_ld constraint */ +    case 'S':                   /* qemu_st constraint */ +        ct->ct |= TCG_CT_REG; +        tcg_regset_set32(ct->u.regs, 0, BIT(TCG_TARGET_NB_REGS) - 1); +        break; +    default: +        return -1; +    } +    ct_str++; +    *pct_str = ct_str; +    return 0; +} + +#if defined(CONFIG_DEBUG_TCG_INTERPRETER) +/* Show current bytecode. Used by tcg interpreter. */ +void tci_disas(uint8_t opc) +{ +    const TCGOpDef *def = &tcg_op_defs[opc]; +    fprintf(stderr, "TCG %s %u, %u, %u\n", +            def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs); +} +#endif + +/* Write value (native size). */ +static void tcg_out_i(TCGContext *s, tcg_target_ulong v) +{ +    if (TCG_TARGET_REG_BITS == 32) { +        tcg_out32(s, v); +    } else { +        tcg_out64(s, v); +    } +} + +/* Write opcode. */ +static void tcg_out_op_t(TCGContext *s, TCGOpcode op) +{ +    tcg_out8(s, op); +    tcg_out8(s, 0); +} + +/* Write register. */ +static void tcg_out_r(TCGContext *s, TCGArg t0) +{ +    assert(t0 < TCG_TARGET_NB_REGS); +    tcg_out8(s, t0); +} + +/* Write register or constant (native size). */ +static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg) +{ +    if (const_arg) { +        assert(const_arg == 1); +        tcg_out8(s, TCG_CONST); +        tcg_out_i(s, arg); +    } else { +        tcg_out_r(s, arg); +    } +} + +/* Write register or constant (32 bit). */ +static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg) +{ +    if (const_arg) { +        assert(const_arg == 1); +        tcg_out8(s, TCG_CONST); +        tcg_out32(s, arg); +    } else { +        tcg_out_r(s, arg); +    } +} + +#if TCG_TARGET_REG_BITS == 64 +/* Write register or constant (64 bit). */ +static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg) +{ +    if (const_arg) { +        assert(const_arg == 1); +        tcg_out8(s, TCG_CONST); +        tcg_out64(s, arg); +    } else { +        tcg_out_r(s, arg); +    } +} +#endif + +/* Write label. */ +static void tci_out_label(TCGContext *s, TCGLabel *label) +{ +    if (label->has_value) { +        tcg_out_i(s, label->u.value); +        assert(label->u.value); +    } else { +        tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0); +        s->code_ptr += sizeof(tcg_target_ulong); +    } +} + +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, +                       intptr_t arg2) +{ +    uint8_t *old_code_ptr = s->code_ptr; +    if (type == TCG_TYPE_I32) { +        tcg_out_op_t(s, INDEX_op_ld_i32); +        tcg_out_r(s, ret); +        tcg_out_r(s, arg1); +        tcg_out32(s, arg2); +    } else { +        assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 +        tcg_out_op_t(s, INDEX_op_ld_i64); +        tcg_out_r(s, ret); +        tcg_out_r(s, arg1); +        assert(arg2 == (int32_t)arg2); +        tcg_out32(s, arg2); +#else +        TODO(); +#endif +    } +    old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ +    uint8_t *old_code_ptr = s->code_ptr; +    assert(ret != arg); +#if TCG_TARGET_REG_BITS == 32 +    tcg_out_op_t(s, INDEX_op_mov_i32); +#else +    tcg_out_op_t(s, INDEX_op_mov_i64); +#endif +    tcg_out_r(s, ret); +    tcg_out_r(s, arg); +    old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_movi(TCGContext *s, TCGType type, +                         TCGReg t0, tcg_target_long arg) +{ +    uint8_t *old_code_ptr = s->code_ptr; +    uint32_t arg32 = arg; +    if (type == TCG_TYPE_I32 || arg == arg32) { +        tcg_out_op_t(s, INDEX_op_movi_i32); +        tcg_out_r(s, t0); +        tcg_out32(s, arg32); +    } else { +        assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 +        tcg_out_op_t(s, INDEX_op_movi_i64); +        tcg_out_r(s, t0); +        tcg_out64(s, arg); +#else +        TODO(); +#endif +    } +    old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ +    uint8_t *old_code_ptr = s->code_ptr; +    tcg_out_op_t(s, INDEX_op_call); +    tcg_out_ri(s, 1, (uintptr_t)arg); +    old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, +                       const int *const_args) +{ +    uint8_t *old_code_ptr = s->code_ptr; + +    tcg_out_op_t(s, opc); + +    switch (opc) { +    case INDEX_op_exit_tb: +        tcg_out64(s, args[0]); +        break; +    case INDEX_op_goto_tb: +        if (s->tb_jmp_offset) { +            /* Direct jump method. */ +            assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset)); +            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); +            tcg_out32(s, 0); +        } else { +            /* Indirect jump method. */ +            TODO(); +        } +        assert(args[0] < ARRAY_SIZE(s->tb_next_offset)); +        s->tb_next_offset[args[0]] = tcg_current_code_size(s); +        break; +    case INDEX_op_br: +        tci_out_label(s, arg_label(args[0])); +        break; +    case INDEX_op_setcond_i32: +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        tcg_out_ri32(s, const_args[2], args[2]); +        tcg_out8(s, args[3]);   /* condition */ +        break; +#if TCG_TARGET_REG_BITS == 32 +    case INDEX_op_setcond2_i32: +        /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */ +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        tcg_out_r(s, args[2]); +        tcg_out_ri32(s, const_args[3], args[3]); +        tcg_out_ri32(s, const_args[4], args[4]); +        tcg_out8(s, args[5]);   /* condition */ +        break; +#elif TCG_TARGET_REG_BITS == 64 +    case INDEX_op_setcond_i64: +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        tcg_out_ri64(s, const_args[2], args[2]); +        tcg_out8(s, args[3]);   /* condition */ +        break; +#endif +    case INDEX_op_ld8u_i32: +    case INDEX_op_ld8s_i32: +    case INDEX_op_ld16u_i32: +    case INDEX_op_ld16s_i32: +    case INDEX_op_ld_i32: +    case INDEX_op_st8_i32: +    case INDEX_op_st16_i32: +    case INDEX_op_st_i32: +    case INDEX_op_ld8u_i64: +    case INDEX_op_ld8s_i64: +    case INDEX_op_ld16u_i64: +    case INDEX_op_ld16s_i64: +    case INDEX_op_ld32u_i64: +    case INDEX_op_ld32s_i64: +    case INDEX_op_ld_i64: +    case INDEX_op_st8_i64: +    case INDEX_op_st16_i64: +    case INDEX_op_st32_i64: +    case INDEX_op_st_i64: +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        assert(args[2] == (int32_t)args[2]); +        tcg_out32(s, args[2]); +        break; +    case INDEX_op_add_i32: +    case INDEX_op_sub_i32: +    case INDEX_op_mul_i32: +    case INDEX_op_and_i32: +    case INDEX_op_andc_i32:     /* Optional (TCG_TARGET_HAS_andc_i32). */ +    case INDEX_op_eqv_i32:      /* Optional (TCG_TARGET_HAS_eqv_i32). */ +    case INDEX_op_nand_i32:     /* Optional (TCG_TARGET_HAS_nand_i32). */ +    case INDEX_op_nor_i32:      /* Optional (TCG_TARGET_HAS_nor_i32). */ +    case INDEX_op_or_i32: +    case INDEX_op_orc_i32:      /* Optional (TCG_TARGET_HAS_orc_i32). */ +    case INDEX_op_xor_i32: +    case INDEX_op_shl_i32: +    case INDEX_op_shr_i32: +    case INDEX_op_sar_i32: +    case INDEX_op_rotl_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */ +    case INDEX_op_rotr_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */ +        tcg_out_r(s, args[0]); +        tcg_out_ri32(s, const_args[1], args[1]); +        tcg_out_ri32(s, const_args[2], args[2]); +        break; +    case INDEX_op_deposit_i32:  /* Optional (TCG_TARGET_HAS_deposit_i32). */ +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        tcg_out_r(s, args[2]); +        assert(args[3] <= UINT8_MAX); +        tcg_out8(s, args[3]); +        assert(args[4] <= UINT8_MAX); +        tcg_out8(s, args[4]); +        break; + +#if TCG_TARGET_REG_BITS == 64 +    case INDEX_op_add_i64: +    case INDEX_op_sub_i64: +    case INDEX_op_mul_i64: +    case INDEX_op_and_i64: +    case INDEX_op_andc_i64:     /* Optional (TCG_TARGET_HAS_andc_i64). */ +    case INDEX_op_eqv_i64:      /* Optional (TCG_TARGET_HAS_eqv_i64). */ +    case INDEX_op_nand_i64:     /* Optional (TCG_TARGET_HAS_nand_i64). */ +    case INDEX_op_nor_i64:      /* Optional (TCG_TARGET_HAS_nor_i64). */ +    case INDEX_op_or_i64: +    case INDEX_op_orc_i64:      /* Optional (TCG_TARGET_HAS_orc_i64). */ +    case INDEX_op_xor_i64: +    case INDEX_op_shl_i64: +    case INDEX_op_shr_i64: +    case INDEX_op_sar_i64: +    case INDEX_op_rotl_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */ +    case INDEX_op_rotr_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */ +        tcg_out_r(s, args[0]); +        tcg_out_ri64(s, const_args[1], args[1]); +        tcg_out_ri64(s, const_args[2], args[2]); +        break; +    case INDEX_op_deposit_i64:  /* Optional (TCG_TARGET_HAS_deposit_i64). */ +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        tcg_out_r(s, args[2]); +        assert(args[3] <= UINT8_MAX); +        tcg_out8(s, args[3]); +        assert(args[4] <= UINT8_MAX); +        tcg_out8(s, args[4]); +        break; +    case INDEX_op_div_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */ +    case INDEX_op_divu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */ +    case INDEX_op_rem_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */ +    case INDEX_op_remu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */ +        TODO(); +        break; +    case INDEX_op_div2_i64:     /* Optional (TCG_TARGET_HAS_div2_i64). */ +    case INDEX_op_divu2_i64:    /* Optional (TCG_TARGET_HAS_div2_i64). */ +        TODO(); +        break; +    case INDEX_op_brcond_i64: +        tcg_out_r(s, args[0]); +        tcg_out_ri64(s, const_args[1], args[1]); +        tcg_out8(s, args[2]);           /* condition */ +        tci_out_label(s, arg_label(args[3])); +        break; +    case INDEX_op_bswap16_i64:  /* Optional (TCG_TARGET_HAS_bswap16_i64). */ +    case INDEX_op_bswap32_i64:  /* Optional (TCG_TARGET_HAS_bswap32_i64). */ +    case INDEX_op_bswap64_i64:  /* Optional (TCG_TARGET_HAS_bswap64_i64). */ +    case INDEX_op_not_i64:      /* Optional (TCG_TARGET_HAS_not_i64). */ +    case INDEX_op_neg_i64:      /* Optional (TCG_TARGET_HAS_neg_i64). */ +    case INDEX_op_ext8s_i64:    /* Optional (TCG_TARGET_HAS_ext8s_i64). */ +    case INDEX_op_ext8u_i64:    /* Optional (TCG_TARGET_HAS_ext8u_i64). */ +    case INDEX_op_ext16s_i64:   /* Optional (TCG_TARGET_HAS_ext16s_i64). */ +    case INDEX_op_ext16u_i64:   /* Optional (TCG_TARGET_HAS_ext16u_i64). */ +    case INDEX_op_ext32s_i64:   /* Optional (TCG_TARGET_HAS_ext32s_i64). */ +    case INDEX_op_ext32u_i64:   /* Optional (TCG_TARGET_HAS_ext32u_i64). */ +#endif /* TCG_TARGET_REG_BITS == 64 */ +    case INDEX_op_neg_i32:      /* Optional (TCG_TARGET_HAS_neg_i32). */ +    case INDEX_op_not_i32:      /* Optional (TCG_TARGET_HAS_not_i32). */ +    case INDEX_op_ext8s_i32:    /* Optional (TCG_TARGET_HAS_ext8s_i32). */ +    case INDEX_op_ext16s_i32:   /* Optional (TCG_TARGET_HAS_ext16s_i32). */ +    case INDEX_op_ext8u_i32:    /* Optional (TCG_TARGET_HAS_ext8u_i32). */ +    case INDEX_op_ext16u_i32:   /* Optional (TCG_TARGET_HAS_ext16u_i32). */ +    case INDEX_op_bswap16_i32:  /* Optional (TCG_TARGET_HAS_bswap16_i32). */ +    case INDEX_op_bswap32_i32:  /* Optional (TCG_TARGET_HAS_bswap32_i32). */ +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        break; +    case INDEX_op_div_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */ +    case INDEX_op_divu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */ +    case INDEX_op_rem_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */ +    case INDEX_op_remu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */ +        tcg_out_r(s, args[0]); +        tcg_out_ri32(s, const_args[1], args[1]); +        tcg_out_ri32(s, const_args[2], args[2]); +        break; +    case INDEX_op_div2_i32:     /* Optional (TCG_TARGET_HAS_div2_i32). */ +    case INDEX_op_divu2_i32:    /* Optional (TCG_TARGET_HAS_div2_i32). */ +        TODO(); +        break; +#if TCG_TARGET_REG_BITS == 32 +    case INDEX_op_add2_i32: +    case INDEX_op_sub2_i32: +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        tcg_out_r(s, args[2]); +        tcg_out_r(s, args[3]); +        tcg_out_r(s, args[4]); +        tcg_out_r(s, args[5]); +        break; +    case INDEX_op_brcond2_i32: +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        tcg_out_ri32(s, const_args[2], args[2]); +        tcg_out_ri32(s, const_args[3], args[3]); +        tcg_out8(s, args[4]);           /* condition */ +        tci_out_label(s, arg_label(args[5])); +        break; +    case INDEX_op_mulu2_i32: +        tcg_out_r(s, args[0]); +        tcg_out_r(s, args[1]); +        tcg_out_r(s, args[2]); +        tcg_out_r(s, args[3]); +        break; +#endif +    case INDEX_op_brcond_i32: +        tcg_out_r(s, args[0]); +        tcg_out_ri32(s, const_args[1], args[1]); +        tcg_out8(s, args[2]);           /* condition */ +        tci_out_label(s, arg_label(args[3])); +        break; +    case INDEX_op_qemu_ld_i32: +        tcg_out_r(s, *args++); +        tcg_out_r(s, *args++); +        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { +            tcg_out_r(s, *args++); +        } +        tcg_out_i(s, *args++); +        break; +    case INDEX_op_qemu_ld_i64: +        tcg_out_r(s, *args++); +        if (TCG_TARGET_REG_BITS == 32) { +            tcg_out_r(s, *args++); +        } +        tcg_out_r(s, *args++); +        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { +            tcg_out_r(s, *args++); +        } +        tcg_out_i(s, *args++); +        break; +    case INDEX_op_qemu_st_i32: +        tcg_out_r(s, *args++); +        tcg_out_r(s, *args++); +        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { +            tcg_out_r(s, *args++); +        } +        tcg_out_i(s, *args++); +        break; +    case INDEX_op_qemu_st_i64: +        tcg_out_r(s, *args++); +        if (TCG_TARGET_REG_BITS == 32) { +            tcg_out_r(s, *args++); +        } +        tcg_out_r(s, *args++); +        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { +            tcg_out_r(s, *args++); +        } +        tcg_out_i(s, *args++); +        break; +    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */ +    case INDEX_op_mov_i64: +    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */ +    case INDEX_op_movi_i64: +    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */ +    default: +        tcg_abort(); +    } +    old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, +                       intptr_t arg2) +{ +    uint8_t *old_code_ptr = s->code_ptr; +    if (type == TCG_TYPE_I32) { +        tcg_out_op_t(s, INDEX_op_st_i32); +        tcg_out_r(s, arg); +        tcg_out_r(s, arg1); +        tcg_out32(s, arg2); +    } else { +        assert(type == TCG_TYPE_I64); +#if TCG_TARGET_REG_BITS == 64 +        tcg_out_op_t(s, INDEX_op_st_i64); +        tcg_out_r(s, arg); +        tcg_out_r(s, arg1); +        tcg_out32(s, arg2); +#else +        TODO(); +#endif +    } +    old_code_ptr[1] = s->code_ptr - old_code_ptr; +} + +/* Test if a constant matches the constraint. */ +static int tcg_target_const_match(tcg_target_long val, TCGType type, +                                  const TCGArgConstraint *arg_ct) +{ +    /* No need to return 0 or 1, 0 or != 0 is good enough. */ +    return arg_ct->ct & TCG_CT_CONST; +} + +static void tcg_target_init(TCGContext *s) +{ +#if defined(CONFIG_DEBUG_TCG_INTERPRETER) +    const char *envval = getenv("DEBUG_TCG"); +    if (envval) { +        qemu_set_log(strtol(envval, NULL, 0)); +    } +#endif + +    /* The current code uses uint8_t for tcg operations. */ +    assert(ARRAY_SIZE(tcg_op_defs) <= UINT8_MAX); + +    /* Registers available for 32 bit operations. */ +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, +                     BIT(TCG_TARGET_NB_REGS) - 1); +    /* Registers available for 64 bit operations. */ +    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, +                     BIT(TCG_TARGET_NB_REGS) - 1); +    /* TODO: Which registers should be set here? */ +    tcg_regset_set32(tcg_target_call_clobber_regs, 0, +                     BIT(TCG_TARGET_NB_REGS) - 1); + +    tcg_regset_clear(s->reserved_regs); +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); +    tcg_add_target_add_op_defs(tcg_target_op_defs); + +    /* We use negative offsets from "sp" so that we can distinguish +       stores that might pretend to be call arguments.  */ +    tcg_set_frame(s, TCG_REG_CALL_STACK, +                  -CPU_TEMP_BUF_NLONGS * sizeof(long), +                  CPU_TEMP_BUF_NLONGS * sizeof(long)); +} + +/* Generate global QEMU prologue and epilogue code. */ +static inline void tcg_target_qemu_prologue(TCGContext *s) +{ +} diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h new file mode 100644 index 00000000..cbf3f9b5 --- /dev/null +++ b/tcg/tci/tcg-target.h @@ -0,0 +1,185 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * This code implements a TCG which does not generate machine code for some + * real target machine but which generates virtual machine code for an + * interpreter. Interpreted pseudo code is slow, but it works on any host. + * + * Some remarks might help in understanding the code: + * + * "target" or "TCG target" is the machine which runs the generated code. + * This is different to the usual meaning in QEMU where "target" is the + * emulated machine. So normally QEMU host is identical to TCG target. + * Here the TCG target is a virtual machine, but this virtual machine must + * use the same word size like the real machine. + * Therefore, we need both 32 and 64 bit virtual machines (interpreter). + */ + +#if !defined(TCG_TARGET_H) +#define TCG_TARGET_H + +#include "config-host.h" + +#define TCG_TARGET_INTERPRETER 1 +#define TCG_TARGET_INSN_UNIT_SIZE 1 +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 + +#if UINTPTR_MAX == UINT32_MAX +# define TCG_TARGET_REG_BITS 32 +#elif UINTPTR_MAX == UINT64_MAX +# define TCG_TARGET_REG_BITS 64 +#else +# error Unknown pointer size for tci target +#endif + +#ifdef CONFIG_DEBUG_TCG +/* Enable debug output. */ +#define CONFIG_DEBUG_TCG_INTERPRETER +#endif + +/* Optional instructions. */ + +#define TCG_TARGET_HAS_bswap16_i32      1 +#define TCG_TARGET_HAS_bswap32_i32      1 +#define TCG_TARGET_HAS_div_i32          1 +#define TCG_TARGET_HAS_rem_i32          1 +#define TCG_TARGET_HAS_ext8s_i32        1 +#define TCG_TARGET_HAS_ext16s_i32       1 +#define TCG_TARGET_HAS_ext8u_i32        1 +#define TCG_TARGET_HAS_ext16u_i32       1 +#define TCG_TARGET_HAS_andc_i32         0 +#define TCG_TARGET_HAS_deposit_i32      1 +#define TCG_TARGET_HAS_eqv_i32          0 +#define TCG_TARGET_HAS_nand_i32         0 +#define TCG_TARGET_HAS_nor_i32          0 +#define TCG_TARGET_HAS_neg_i32          1 +#define TCG_TARGET_HAS_not_i32          1 +#define TCG_TARGET_HAS_orc_i32          0 +#define TCG_TARGET_HAS_rot_i32          1 +#define TCG_TARGET_HAS_movcond_i32      0 +#define TCG_TARGET_HAS_muls2_i32        0 +#define TCG_TARGET_HAS_muluh_i32        0 +#define TCG_TARGET_HAS_mulsh_i32        0 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_trunc_shr_i32    0 +#define TCG_TARGET_HAS_bswap16_i64      1 +#define TCG_TARGET_HAS_bswap32_i64      1 +#define TCG_TARGET_HAS_bswap64_i64      1 +#define TCG_TARGET_HAS_deposit_i64      1 +#define TCG_TARGET_HAS_div_i64          0 +#define TCG_TARGET_HAS_rem_i64          0 +#define TCG_TARGET_HAS_ext8s_i64        1 +#define TCG_TARGET_HAS_ext16s_i64       1 +#define TCG_TARGET_HAS_ext32s_i64       1 +#define TCG_TARGET_HAS_ext8u_i64        1 +#define TCG_TARGET_HAS_ext16u_i64       1 +#define TCG_TARGET_HAS_ext32u_i64       1 +#define TCG_TARGET_HAS_andc_i64         0 +#define TCG_TARGET_HAS_eqv_i64          0 +#define TCG_TARGET_HAS_nand_i64         0 +#define TCG_TARGET_HAS_nor_i64          0 +#define TCG_TARGET_HAS_neg_i64          1 +#define TCG_TARGET_HAS_not_i64          1 +#define TCG_TARGET_HAS_orc_i64          0 +#define TCG_TARGET_HAS_rot_i64          1 +#define TCG_TARGET_HAS_movcond_i64      0 +#define TCG_TARGET_HAS_muls2_i64        0 +#define TCG_TARGET_HAS_add2_i32         0 +#define TCG_TARGET_HAS_sub2_i32         0 +#define TCG_TARGET_HAS_mulu2_i32        0 +#define TCG_TARGET_HAS_add2_i64         0 +#define TCG_TARGET_HAS_sub2_i64         0 +#define TCG_TARGET_HAS_mulu2_i64        0 +#define TCG_TARGET_HAS_muluh_i64        0 +#define TCG_TARGET_HAS_mulsh_i64        0 +#else +#define TCG_TARGET_HAS_mulu2_i32        1 +#endif /* TCG_TARGET_REG_BITS == 64 */ + +/* Number of registers available. +   For 32 bit hosts, we need more than 8 registers (call arguments). */ +/* #define TCG_TARGET_NB_REGS 8 */ +#define TCG_TARGET_NB_REGS 16 +/* #define TCG_TARGET_NB_REGS 32 */ + +/* List of registers which are used by TCG. */ +typedef enum { +    TCG_REG_R0 = 0, +    TCG_REG_R1, +    TCG_REG_R2, +    TCG_REG_R3, +    TCG_REG_R4, +    TCG_REG_R5, +    TCG_REG_R6, +    TCG_REG_R7, +#if TCG_TARGET_NB_REGS >= 16 +    TCG_REG_R8, +    TCG_REG_R9, +    TCG_REG_R10, +    TCG_REG_R11, +    TCG_REG_R12, +    TCG_REG_R13, +    TCG_REG_R14, +    TCG_REG_R15, +#if TCG_TARGET_NB_REGS >= 32 +    TCG_REG_R16, +    TCG_REG_R17, +    TCG_REG_R18, +    TCG_REG_R19, +    TCG_REG_R20, +    TCG_REG_R21, +    TCG_REG_R22, +    TCG_REG_R23, +    TCG_REG_R24, +    TCG_REG_R25, +    TCG_REG_R26, +    TCG_REG_R27, +    TCG_REG_R28, +    TCG_REG_R29, +    TCG_REG_R30, +    TCG_REG_R31, +#endif +#endif +    /* Special value UINT8_MAX is used by TCI to encode constant values. */ +    TCG_CONST = UINT8_MAX +} TCGReg; + +#define TCG_AREG0                       (TCG_TARGET_NB_REGS - 2) + +/* Used for function call generation. */ +#define TCG_REG_CALL_STACK              (TCG_TARGET_NB_REGS - 1) +#define TCG_TARGET_CALL_STACK_OFFSET    0 +#define TCG_TARGET_STACK_ALIGN          16 + +void tci_disas(uint8_t opc); + +#define HAVE_TCG_QEMU_TB_EXEC + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +} + +#endif /* TCG_TARGET_H */  | 
