From 3f2546b2ef55b661fd8dd69682b38992225e86f6 Mon Sep 17 00:00:00 2001 From: fishsoupisgood Date: Mon, 29 Apr 2019 01:17:54 +0100 Subject: Initial import of qemu-2.4.1 --- target-arm/Makefile.objs | 12 + target-arm/arm-semi.c | 571 +++ target-arm/arm_ldst.h | 48 + target-arm/cpu-qom.h | 235 + target-arm/cpu.c | 1458 ++++++ target-arm/cpu.h | 1936 ++++++++ target-arm/cpu64.c | 342 ++ target-arm/crypto_helper.c | 465 ++ target-arm/gdbstub.c | 102 + target-arm/gdbstub64.c | 71 + target-arm/helper-a64.c | 546 ++ target-arm/helper-a64.h | 48 + target-arm/helper.c | 7642 ++++++++++++++++++++++++++++ target-arm/helper.h | 535 ++ target-arm/internals.h | 394 ++ target-arm/iwmmxt_helper.c | 672 +++ target-arm/kvm-consts.h | 186 + target-arm/kvm-stub.c | 23 + target-arm/kvm.c | 611 +++ target-arm/kvm32.c | 478 ++ target-arm/kvm64.c | 466 ++ target-arm/kvm_arm.h | 194 + target-arm/machine.c | 330 ++ target-arm/neon_helper.c | 2243 +++++++++ target-arm/op_addsub.h | 103 + target-arm/op_helper.c | 971 ++++ target-arm/psci.c | 257 + target-arm/translate-a64.c | 11130 +++++++++++++++++++++++++++++++++++++++++ target-arm/translate.c | 11584 +++++++++++++++++++++++++++++++++++++++++++ target-arm/translate.h | 148 + 30 files changed, 43801 insertions(+) create mode 100644 target-arm/Makefile.objs create mode 100644 target-arm/arm-semi.c create mode 100644 target-arm/arm_ldst.h create mode 100644 target-arm/cpu-qom.h create mode 100644 target-arm/cpu.c create mode 100644 target-arm/cpu.h create mode 100644 target-arm/cpu64.c create mode 100644 target-arm/crypto_helper.c create mode 100644 target-arm/gdbstub.c create mode 100644 target-arm/gdbstub64.c create mode 100644 target-arm/helper-a64.c create mode 100644 target-arm/helper-a64.h create mode 100644 target-arm/helper.c create mode 100644 target-arm/helper.h create mode 100644 target-arm/internals.h create mode 100644 target-arm/iwmmxt_helper.c create mode 100644 target-arm/kvm-consts.h create mode 100644 target-arm/kvm-stub.c create mode 100644 target-arm/kvm.c create mode 100644 target-arm/kvm32.c create mode 100644 target-arm/kvm64.c create mode 100644 target-arm/kvm_arm.h create mode 100644 target-arm/machine.c create mode 100644 target-arm/neon_helper.c create mode 100644 target-arm/op_addsub.h create mode 100644 target-arm/op_helper.c create mode 100644 target-arm/psci.c create mode 100644 target-arm/translate-a64.c create mode 100644 target-arm/translate.c create mode 100644 target-arm/translate.h (limited to 'target-arm') diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs new file mode 100644 index 00000000..9460b409 --- /dev/null +++ b/target-arm/Makefile.objs @@ -0,0 +1,12 @@ +obj-y += arm-semi.o +obj-$(CONFIG_SOFTMMU) += machine.o +obj-$(CONFIG_KVM) += kvm.o +obj-$(call land,$(CONFIG_KVM),$(call lnot,$(TARGET_AARCH64))) += kvm32.o +obj-$(call land,$(CONFIG_KVM),$(TARGET_AARCH64)) += kvm64.o +obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o +obj-y += translate.o op_helper.o helper.o cpu.o +obj-y += neon_helper.o iwmmxt_helper.o +obj-y += gdbstub.o +obj-$(CONFIG_SOFTMMU) += psci.o +obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o helper-a64.o gdbstub64.o +obj-y += crypto_helper.o diff --git a/target-arm/arm-semi.c b/target-arm/arm-semi.c new file mode 100644 index 00000000..42522a70 --- /dev/null +++ b/target-arm/arm-semi.c @@ -0,0 +1,571 @@ +/* + * Arm "Angel" semihosting syscalls + * + * Copyright (c) 2005, 2007 CodeSourcery. + * Written by Paul Brook. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cpu.h" +#include "exec/semihost.h" +#ifdef CONFIG_USER_ONLY +#include "qemu.h" + +#define ARM_ANGEL_HEAP_SIZE (128 * 1024 * 1024) +#else +#include "qemu-common.h" +#include "exec/gdbstub.h" +#include "hw/arm/arm.h" +#endif + +#define TARGET_SYS_OPEN 0x01 +#define TARGET_SYS_CLOSE 0x02 +#define TARGET_SYS_WRITEC 0x03 +#define TARGET_SYS_WRITE0 0x04 +#define TARGET_SYS_WRITE 0x05 +#define TARGET_SYS_READ 0x06 +#define TARGET_SYS_READC 0x07 +#define TARGET_SYS_ISTTY 0x09 +#define TARGET_SYS_SEEK 0x0a +#define TARGET_SYS_FLEN 0x0c +#define TARGET_SYS_TMPNAM 0x0d +#define TARGET_SYS_REMOVE 0x0e +#define TARGET_SYS_RENAME 0x0f +#define TARGET_SYS_CLOCK 0x10 +#define TARGET_SYS_TIME 0x11 +#define TARGET_SYS_SYSTEM 0x12 +#define TARGET_SYS_ERRNO 0x13 +#define TARGET_SYS_GET_CMDLINE 0x15 +#define TARGET_SYS_HEAPINFO 0x16 +#define TARGET_SYS_EXIT 0x18 + +/* ADP_Stopped_ApplicationExit is used for exit(0), + * anything else is implemented as exit(1) */ +#define ADP_Stopped_ApplicationExit (0x20026) + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#define GDB_O_RDONLY 0x000 +#define GDB_O_WRONLY 0x001 +#define GDB_O_RDWR 0x002 +#define GDB_O_APPEND 0x008 +#define GDB_O_CREAT 0x200 +#define GDB_O_TRUNC 0x400 +#define GDB_O_BINARY 0 + +static int gdb_open_modeflags[12] = { + GDB_O_RDONLY, + GDB_O_RDONLY | GDB_O_BINARY, + GDB_O_RDWR, + GDB_O_RDWR | GDB_O_BINARY, + GDB_O_WRONLY | GDB_O_CREAT | GDB_O_TRUNC, + GDB_O_WRONLY | GDB_O_CREAT | GDB_O_TRUNC | GDB_O_BINARY, + GDB_O_RDWR | GDB_O_CREAT | GDB_O_TRUNC, + GDB_O_RDWR | GDB_O_CREAT | GDB_O_TRUNC | GDB_O_BINARY, + GDB_O_WRONLY | GDB_O_CREAT | GDB_O_APPEND, + GDB_O_WRONLY | GDB_O_CREAT | GDB_O_APPEND | GDB_O_BINARY, + GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND, + GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND | GDB_O_BINARY +}; + +static int open_modeflags[12] = { + O_RDONLY, + O_RDONLY | O_BINARY, + O_RDWR, + O_RDWR | O_BINARY, + O_WRONLY | O_CREAT | O_TRUNC, + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, + O_RDWR | O_CREAT | O_TRUNC, + O_RDWR | O_CREAT | O_TRUNC | O_BINARY, + O_WRONLY | O_CREAT | O_APPEND, + O_WRONLY | O_CREAT | O_APPEND | O_BINARY, + O_RDWR | O_CREAT | O_APPEND, + O_RDWR | O_CREAT | O_APPEND | O_BINARY +}; + +#ifdef CONFIG_USER_ONLY +static inline uint32_t set_swi_errno(TaskState *ts, uint32_t code) +{ + if (code == (uint32_t)-1) + ts->swi_errno = errno; + return code; +} +#else +static inline uint32_t set_swi_errno(CPUARMState *env, uint32_t code) +{ + return code; +} + +#include "exec/softmmu-semi.h" +#endif + +static target_ulong arm_semi_syscall_len; + +#if !defined(CONFIG_USER_ONLY) +static target_ulong syscall_err; +#endif + +static void arm_semi_cb(CPUState *cs, target_ulong ret, target_ulong err) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; +#ifdef CONFIG_USER_ONLY + TaskState *ts = cs->opaque; +#endif + + if (ret == (target_ulong)-1) { +#ifdef CONFIG_USER_ONLY + ts->swi_errno = err; +#else + syscall_err = err; +#endif + env->regs[0] = ret; + } else { + /* Fixup syscalls that use nonstardard return conventions. */ + switch (env->regs[0]) { + case TARGET_SYS_WRITE: + case TARGET_SYS_READ: + env->regs[0] = arm_semi_syscall_len - ret; + break; + case TARGET_SYS_SEEK: + env->regs[0] = 0; + break; + default: + env->regs[0] = ret; + break; + } + } +} + +static void arm_semi_flen_cb(CPUState *cs, target_ulong ret, target_ulong err) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + /* The size is always stored in big-endian order, extract + the value. We assume the size always fit in 32 bits. */ + uint32_t size; + cpu_memory_rw_debug(cs, env->regs[13]-64+32, (uint8_t *)&size, 4, 0); + env->regs[0] = be32_to_cpu(size); +#ifdef CONFIG_USER_ONLY + ((TaskState *)cs->opaque)->swi_errno = err; +#else + syscall_err = err; +#endif +} + +/* Read the input value from the argument block; fail the semihosting + * call if the memory read fails. + */ +#define GET_ARG(n) do { \ + if (get_user_ual(arg ## n, args + (n) * 4)) { \ + return (uint32_t)-1; \ + } \ +} while (0) + +#define SET_ARG(n, val) put_user_ual(val, args + (n) * 4) +uint32_t do_arm_semihosting(CPUARMState *env) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = CPU(cpu); + target_ulong args; + target_ulong arg0, arg1, arg2, arg3; + char * s; + int nr; + uint32_t ret; + uint32_t len; +#ifdef CONFIG_USER_ONLY + TaskState *ts = cs->opaque; +#else + CPUARMState *ts = env; +#endif + + nr = env->regs[0]; + args = env->regs[1]; + switch (nr) { + case TARGET_SYS_OPEN: + GET_ARG(0); + GET_ARG(1); + GET_ARG(2); + s = lock_user_string(arg0); + if (!s) { + /* FIXME - should this error code be -TARGET_EFAULT ? */ + return (uint32_t)-1; + } + if (arg1 >= 12) { + unlock_user(s, arg0, 0); + return (uint32_t)-1; + } + if (strcmp(s, ":tt") == 0) { + int result_fileno = arg1 < 4 ? STDIN_FILENO : STDOUT_FILENO; + unlock_user(s, arg0, 0); + return result_fileno; + } + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_cb, "open,%s,%x,1a4", arg0, + (int)arg2+1, gdb_open_modeflags[arg1]); + ret = env->regs[0]; + } else { + ret = set_swi_errno(ts, open(s, open_modeflags[arg1], 0644)); + } + unlock_user(s, arg0, 0); + return ret; + case TARGET_SYS_CLOSE: + GET_ARG(0); + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_cb, "close,%x", arg0); + return env->regs[0]; + } else { + return set_swi_errno(ts, close(arg0)); + } + case TARGET_SYS_WRITEC: + { + char c; + + if (get_user_u8(c, args)) + /* FIXME - should this error code be -TARGET_EFAULT ? */ + return (uint32_t)-1; + /* Write to debug console. stderr is near enough. */ + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_cb, "write,2,%x,1", args); + return env->regs[0]; + } else { + return write(STDERR_FILENO, &c, 1); + } + } + case TARGET_SYS_WRITE0: + if (!(s = lock_user_string(args))) + /* FIXME - should this error code be -TARGET_EFAULT ? */ + return (uint32_t)-1; + len = strlen(s); + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_cb, "write,2,%x,%x", args, len); + ret = env->regs[0]; + } else { + ret = write(STDERR_FILENO, s, len); + } + unlock_user(s, args, 0); + return ret; + case TARGET_SYS_WRITE: + GET_ARG(0); + GET_ARG(1); + GET_ARG(2); + len = arg2; + if (use_gdb_syscalls()) { + arm_semi_syscall_len = len; + gdb_do_syscall(arm_semi_cb, "write,%x,%x,%x", arg0, arg1, len); + return env->regs[0]; + } else { + s = lock_user(VERIFY_READ, arg1, len, 1); + if (!s) { + /* FIXME - should this error code be -TARGET_EFAULT ? */ + return (uint32_t)-1; + } + ret = set_swi_errno(ts, write(arg0, s, len)); + unlock_user(s, arg1, 0); + if (ret == (uint32_t)-1) + return -1; + return len - ret; + } + case TARGET_SYS_READ: + GET_ARG(0); + GET_ARG(1); + GET_ARG(2); + len = arg2; + if (use_gdb_syscalls()) { + arm_semi_syscall_len = len; + gdb_do_syscall(arm_semi_cb, "read,%x,%x,%x", arg0, arg1, len); + return env->regs[0]; + } else { + s = lock_user(VERIFY_WRITE, arg1, len, 0); + if (!s) { + /* FIXME - should this error code be -TARGET_EFAULT ? */ + return (uint32_t)-1; + } + do { + ret = set_swi_errno(ts, read(arg0, s, len)); + } while (ret == -1 && errno == EINTR); + unlock_user(s, arg1, len); + if (ret == (uint32_t)-1) + return -1; + return len - ret; + } + case TARGET_SYS_READC: + /* XXX: Read from debug console. Not implemented. */ + return 0; + case TARGET_SYS_ISTTY: + GET_ARG(0); + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_cb, "isatty,%x", arg0); + return env->regs[0]; + } else { + return isatty(arg0); + } + case TARGET_SYS_SEEK: + GET_ARG(0); + GET_ARG(1); + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_cb, "lseek,%x,%x,0", arg0, arg1); + return env->regs[0]; + } else { + ret = set_swi_errno(ts, lseek(arg0, arg1, SEEK_SET)); + if (ret == (uint32_t)-1) + return -1; + return 0; + } + case TARGET_SYS_FLEN: + GET_ARG(0); + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_flen_cb, "fstat,%x,%x", + arg0, env->regs[13]-64); + return env->regs[0]; + } else { + struct stat buf; + ret = set_swi_errno(ts, fstat(arg0, &buf)); + if (ret == (uint32_t)-1) + return -1; + return buf.st_size; + } + case TARGET_SYS_TMPNAM: + /* XXX: Not implemented. */ + return -1; + case TARGET_SYS_REMOVE: + GET_ARG(0); + GET_ARG(1); + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_cb, "unlink,%s", arg0, (int)arg1+1); + ret = env->regs[0]; + } else { + s = lock_user_string(arg0); + if (!s) { + /* FIXME - should this error code be -TARGET_EFAULT ? */ + return (uint32_t)-1; + } + ret = set_swi_errno(ts, remove(s)); + unlock_user(s, arg0, 0); + } + return ret; + case TARGET_SYS_RENAME: + GET_ARG(0); + GET_ARG(1); + GET_ARG(2); + GET_ARG(3); + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_cb, "rename,%s,%s", + arg0, (int)arg1+1, arg2, (int)arg3+1); + return env->regs[0]; + } else { + char *s2; + s = lock_user_string(arg0); + s2 = lock_user_string(arg2); + if (!s || !s2) + /* FIXME - should this error code be -TARGET_EFAULT ? */ + ret = (uint32_t)-1; + else + ret = set_swi_errno(ts, rename(s, s2)); + if (s2) + unlock_user(s2, arg2, 0); + if (s) + unlock_user(s, arg0, 0); + return ret; + } + case TARGET_SYS_CLOCK: + return clock() / (CLOCKS_PER_SEC / 100); + case TARGET_SYS_TIME: + return set_swi_errno(ts, time(NULL)); + case TARGET_SYS_SYSTEM: + GET_ARG(0); + GET_ARG(1); + if (use_gdb_syscalls()) { + gdb_do_syscall(arm_semi_cb, "system,%s", arg0, (int)arg1+1); + return env->regs[0]; + } else { + s = lock_user_string(arg0); + if (!s) { + /* FIXME - should this error code be -TARGET_EFAULT ? */ + return (uint32_t)-1; + } + ret = set_swi_errno(ts, system(s)); + unlock_user(s, arg0, 0); + return ret; + } + case TARGET_SYS_ERRNO: +#ifdef CONFIG_USER_ONLY + return ts->swi_errno; +#else + return syscall_err; +#endif + case TARGET_SYS_GET_CMDLINE: + { + /* Build a command-line from the original argv. + * + * The inputs are: + * * arg0, pointer to a buffer of at least the size + * specified in arg1. + * * arg1, size of the buffer pointed to by arg0 in + * bytes. + * + * The outputs are: + * * arg0, pointer to null-terminated string of the + * command line. + * * arg1, length of the string pointed to by arg0. + */ + + char *output_buffer; + size_t input_size; + size_t output_size; + int status = 0; +#if !defined(CONFIG_USER_ONLY) + const char *cmdline; +#endif + GET_ARG(0); + GET_ARG(1); + input_size = arg1; + /* Compute the size of the output string. */ +#if !defined(CONFIG_USER_ONLY) + cmdline = semihosting_get_cmdline(); + if (cmdline == NULL) { + cmdline = ""; /* Default to an empty line. */ + } + output_size = strlen(cmdline) + 1; /* Count terminating 0. */ +#else + unsigned int i; + + output_size = ts->info->arg_end - ts->info->arg_start; + if (!output_size) { + /* We special-case the "empty command line" case (argc==0). + Just provide the terminating 0. */ + output_size = 1; + } +#endif + + if (output_size > input_size) { + /* Not enough space to store command-line arguments. */ + return -1; + } + + /* Adjust the command-line length. */ + if (SET_ARG(1, output_size - 1)) { + /* Couldn't write back to argument block */ + return -1; + } + + /* Lock the buffer on the ARM side. */ + output_buffer = lock_user(VERIFY_WRITE, arg0, output_size, 0); + if (!output_buffer) { + return -1; + } + + /* Copy the command-line arguments. */ +#if !defined(CONFIG_USER_ONLY) + pstrcpy(output_buffer, output_size, cmdline); +#else + if (output_size == 1) { + /* Empty command-line. */ + output_buffer[0] = '\0'; + goto out; + } + + if (copy_from_user(output_buffer, ts->info->arg_start, + output_size)) { + status = -1; + goto out; + } + + /* Separate arguments by white spaces. */ + for (i = 0; i < output_size - 1; i++) { + if (output_buffer[i] == 0) { + output_buffer[i] = ' '; + } + } + out: +#endif + /* Unlock the buffer on the ARM side. */ + unlock_user(output_buffer, arg0, output_size); + + return status; + } + case TARGET_SYS_HEAPINFO: + { + uint32_t *ptr; + uint32_t limit; + GET_ARG(0); + +#ifdef CONFIG_USER_ONLY + /* Some C libraries assume the heap immediately follows .bss, so + allocate it using sbrk. */ + if (!ts->heap_limit) { + abi_ulong ret; + + ts->heap_base = do_brk(0); + limit = ts->heap_base + ARM_ANGEL_HEAP_SIZE; + /* Try a big heap, and reduce the size if that fails. */ + for (;;) { + ret = do_brk(limit); + if (ret >= limit) { + break; + } + limit = (ts->heap_base >> 1) + (limit >> 1); + } + ts->heap_limit = limit; + } + + ptr = lock_user(VERIFY_WRITE, arg0, 16, 0); + if (!ptr) { + /* FIXME - should this error code be -TARGET_EFAULT ? */ + return (uint32_t)-1; + } + ptr[0] = tswap32(ts->heap_base); + ptr[1] = tswap32(ts->heap_limit); + ptr[2] = tswap32(ts->stack_base); + ptr[3] = tswap32(0); /* Stack limit. */ + unlock_user(ptr, arg0, 16); +#else + limit = ram_size; + ptr = lock_user(VERIFY_WRITE, arg0, 16, 0); + if (!ptr) { + /* FIXME - should this error code be -TARGET_EFAULT ? */ + return (uint32_t)-1; + } + /* TODO: Make this use the limit of the loaded application. */ + ptr[0] = tswap32(limit / 2); + ptr[1] = tswap32(limit); + ptr[2] = tswap32(limit); /* Stack base */ + ptr[3] = tswap32(0); /* Stack limit. */ + unlock_user(ptr, arg0, 16); +#endif + return 0; + } + case TARGET_SYS_EXIT: + /* ARM specifies only Stopped_ApplicationExit as normal + * exit, everything else is considered an error */ + ret = (args == ADP_Stopped_ApplicationExit) ? 0 : 1; + gdb_exit(env, ret); + exit(ret); + default: + fprintf(stderr, "qemu: Unsupported SemiHosting SWI 0x%02x\n", nr); + cpu_dump_state(cs, stderr, fprintf, 0); + abort(); + } +} diff --git a/target-arm/arm_ldst.h b/target-arm/arm_ldst.h new file mode 100644 index 00000000..b1ece017 --- /dev/null +++ b/target-arm/arm_ldst.h @@ -0,0 +1,48 @@ +/* + * ARM load/store instructions for code (armeb-user support) + * + * Copyright (c) 2012 CodeSourcery, LLC + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef ARM_LDST_H +#define ARM_LDST_H + +#include "exec/cpu_ldst.h" +#include "qemu/bswap.h" + +/* Load an instruction and return it in the standard little-endian order */ +static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr, + bool do_swap) +{ + uint32_t insn = cpu_ldl_code(env, addr); + if (do_swap) { + return bswap32(insn); + } + return insn; +} + +/* Ditto, for a halfword (Thumb) instruction */ +static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr, + bool do_swap) +{ + uint16_t insn = cpu_lduw_code(env, addr); + if (do_swap) { + return bswap16(insn); + } + return insn; +} + +#endif diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h new file mode 100644 index 00000000..3cbc4a00 --- /dev/null +++ b/target-arm/cpu-qom.h @@ -0,0 +1,235 @@ +/* + * QEMU ARM CPU + * + * Copyright (c) 2012 SUSE LINUX Products GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + */ +#ifndef QEMU_ARM_CPU_QOM_H +#define QEMU_ARM_CPU_QOM_H + +#include "qom/cpu.h" + +#define TYPE_ARM_CPU "arm-cpu" + +#define ARM_CPU_CLASS(klass) \ + OBJECT_CLASS_CHECK(ARMCPUClass, (klass), TYPE_ARM_CPU) +#define ARM_CPU(obj) \ + OBJECT_CHECK(ARMCPU, (obj), TYPE_ARM_CPU) +#define ARM_CPU_GET_CLASS(obj) \ + OBJECT_GET_CLASS(ARMCPUClass, (obj), TYPE_ARM_CPU) + +/** + * ARMCPUClass: + * @parent_realize: The parent class' realize handler. + * @parent_reset: The parent class' reset handler. + * + * An ARM CPU model. + */ +typedef struct ARMCPUClass { + /*< private >*/ + CPUClass parent_class; + /*< public >*/ + + DeviceRealize parent_realize; + void (*parent_reset)(CPUState *cpu); +} ARMCPUClass; + +/** + * ARMCPU: + * @env: #CPUARMState + * + * An ARM CPU core. + */ +typedef struct ARMCPU { + /*< private >*/ + CPUState parent_obj; + /*< public >*/ + + CPUARMState env; + + /* Coprocessor information */ + GHashTable *cp_regs; + /* For marshalling (mostly coprocessor) register state between the + * kernel and QEMU (for KVM) and between two QEMUs (for migration), + * we use these arrays. + */ + /* List of register indexes managed via these arrays; (full KVM style + * 64 bit indexes, not CPRegInfo 32 bit indexes) + */ + uint64_t *cpreg_indexes; + /* Values of the registers (cpreg_indexes[i]'s value is cpreg_values[i]) */ + uint64_t *cpreg_values; + /* Length of the indexes, values, reset_values arrays */ + int32_t cpreg_array_len; + /* These are used only for migration: incoming data arrives in + * these fields and is sanity checked in post_load before copying + * to the working data structures above. + */ + uint64_t *cpreg_vmstate_indexes; + uint64_t *cpreg_vmstate_values; + int32_t cpreg_vmstate_array_len; + + /* Timers used by the generic (architected) timer */ + QEMUTimer *gt_timer[NUM_GTIMERS]; + /* GPIO outputs for generic timer */ + qemu_irq gt_timer_outputs[NUM_GTIMERS]; + + /* 'compatible' string for this CPU for Linux device trees */ + const char *dtb_compatible; + + /* PSCI version for this CPU + * Bits[31:16] = Major Version + * Bits[15:0] = Minor Version + */ + uint32_t psci_version; + + /* Should CPU start in PSCI powered-off state? */ + bool start_powered_off; + /* CPU currently in PSCI powered-off state */ + bool powered_off; + /* CPU has security extension */ + bool has_el3; + + /* CPU has memory protection unit */ + bool has_mpu; + /* PMSAv7 MPU number of supported regions */ + uint32_t pmsav7_dregion; + + /* PSCI conduit used to invoke PSCI methods + * 0 - disabled, 1 - smc, 2 - hvc + */ + uint32_t psci_conduit; + + /* [QEMU_]KVM_ARM_TARGET_* constant for this CPU, or + * QEMU_KVM_ARM_TARGET_NONE if the kernel doesn't support this CPU type. + */ + uint32_t kvm_target; + + /* KVM init features for this CPU */ + uint32_t kvm_init_features[7]; + + /* Uniprocessor system with MP extensions */ + bool mp_is_up; + + /* The instance init functions for implementation-specific subclasses + * set these fields to specify the implementation-dependent values of + * various constant registers and reset values of non-constant + * registers. + * Some of these might become QOM properties eventually. + * Field names match the official register names as defined in the + * ARMv7AR ARM Architecture Reference Manual. A reset_ prefix + * is used for reset values of non-constant registers; no reset_ + * prefix means a constant register. + */ + uint32_t midr; + uint32_t revidr; + uint32_t reset_fpsid; + uint32_t mvfr0; + uint32_t mvfr1; + uint32_t mvfr2; + uint32_t ctr; + uint32_t reset_sctlr; + uint32_t id_pfr0; + uint32_t id_pfr1; + uint32_t id_dfr0; + uint32_t id_afr0; + uint32_t id_mmfr0; + uint32_t id_mmfr1; + uint32_t id_mmfr2; + uint32_t id_mmfr3; + uint32_t id_isar0; + uint32_t id_isar1; + uint32_t id_isar2; + uint32_t id_isar3; + uint32_t id_isar4; + uint32_t id_isar5; + uint64_t id_aa64pfr0; + uint64_t id_aa64pfr1; + uint64_t id_aa64dfr0; + uint64_t id_aa64dfr1; + uint64_t id_aa64afr0; + uint64_t id_aa64afr1; + uint64_t id_aa64isar0; + uint64_t id_aa64isar1; + uint64_t id_aa64mmfr0; + uint64_t id_aa64mmfr1; + uint32_t dbgdidr; + uint32_t clidr; + uint64_t mp_affinity; /* MP ID without feature bits */ + /* The elements of this array are the CCSIDR values for each cache, + * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc. + */ + uint32_t ccsidr[16]; + uint64_t reset_cbar; + uint32_t reset_auxcr; + bool reset_hivecs; + /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */ + uint32_t dcz_blocksize; + uint64_t rvbar; +} ARMCPU; + +#define TYPE_AARCH64_CPU "aarch64-cpu" +#define AARCH64_CPU_CLASS(klass) \ + OBJECT_CLASS_CHECK(AArch64CPUClass, (klass), TYPE_AARCH64_CPU) +#define AARCH64_CPU_GET_CLASS(obj) \ + OBJECT_GET_CLASS(AArch64CPUClass, (obj), TYPE_AArch64_CPU) + +typedef struct AArch64CPUClass { + /*< private >*/ + ARMCPUClass parent_class; + /*< public >*/ +} AArch64CPUClass; + +static inline ARMCPU *arm_env_get_cpu(CPUARMState *env) +{ + return container_of(env, ARMCPU, env); +} + +#define ENV_GET_CPU(e) CPU(arm_env_get_cpu(e)) + +#define ENV_OFFSET offsetof(ARMCPU, env) + +#ifndef CONFIG_USER_ONLY +extern const struct VMStateDescription vmstate_arm_cpu; +#endif + +void register_cp_regs_for_features(ARMCPU *cpu); +void init_cpreg_list(ARMCPU *cpu); + +void arm_cpu_do_interrupt(CPUState *cpu); +void arm_v7m_cpu_do_interrupt(CPUState *cpu); +bool arm_cpu_exec_interrupt(CPUState *cpu, int int_req); + +void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, + int flags); + +hwaddr arm_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); + +int arm_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); +int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); + +/* Callback functions for the generic timer's timers. */ +void arm_gt_ptimer_cb(void *opaque); +void arm_gt_vtimer_cb(void *opaque); + +#ifdef TARGET_AARCH64 +int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); +int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); + +void aarch64_cpu_do_interrupt(CPUState *cs); +#endif + +#endif diff --git a/target-arm/cpu.c b/target-arm/cpu.c new file mode 100644 index 00000000..ce0b8c09 --- /dev/null +++ b/target-arm/cpu.c @@ -0,0 +1,1458 @@ +/* + * QEMU ARM CPU + * + * Copyright (c) 2012 SUSE LINUX Products GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + */ + +#include "cpu.h" +#include "internals.h" +#include "qemu-common.h" +#include "hw/qdev-properties.h" +#if !defined(CONFIG_USER_ONLY) +#include "hw/loader.h" +#endif +#include "hw/arm/arm.h" +#include "sysemu/sysemu.h" +#include "sysemu/kvm.h" +#include "kvm_arm.h" + +static void arm_cpu_set_pc(CPUState *cs, vaddr value) +{ + ARMCPU *cpu = ARM_CPU(cs); + + cpu->env.regs[15] = value; +} + +static bool arm_cpu_has_work(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + + return !cpu->powered_off + && cs->interrupt_request & + (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD + | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ + | CPU_INTERRUPT_EXITTB); +} + +static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) +{ + /* Reset a single ARMCPRegInfo register */ + ARMCPRegInfo *ri = value; + ARMCPU *cpu = opaque; + + if (ri->type & (ARM_CP_SPECIAL | ARM_CP_ALIAS)) { + return; + } + + if (ri->resetfn) { + ri->resetfn(&cpu->env, ri); + return; + } + + /* A zero offset is never possible as it would be regs[0] + * so we use it to indicate that reset is being handled elsewhere. + * This is basically only used for fields in non-core coprocessors + * (like the pxa2xx ones). + */ + if (!ri->fieldoffset) { + return; + } + + if (cpreg_field_is_64bit(ri)) { + CPREG_FIELD64(&cpu->env, ri) = ri->resetvalue; + } else { + CPREG_FIELD32(&cpu->env, ri) = ri->resetvalue; + } +} + +/* CPUClass::reset() */ +static void arm_cpu_reset(CPUState *s) +{ + ARMCPU *cpu = ARM_CPU(s); + ARMCPUClass *acc = ARM_CPU_GET_CLASS(cpu); + CPUARMState *env = &cpu->env; + + acc->parent_reset(s); + + memset(env, 0, offsetof(CPUARMState, features)); + g_hash_table_foreach(cpu->cp_regs, cp_reg_reset, cpu); + env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid; + env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0; + env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1; + env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2; + + cpu->powered_off = cpu->start_powered_off; + s->halted = cpu->start_powered_off; + + if (arm_feature(env, ARM_FEATURE_IWMMXT)) { + env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q'; + } + + if (arm_feature(env, ARM_FEATURE_AARCH64)) { + /* 64 bit CPUs always start in 64 bit mode */ + env->aarch64 = 1; +#if defined(CONFIG_USER_ONLY) + env->pstate = PSTATE_MODE_EL0t; + /* Userspace expects access to DC ZVA, CTL_EL0 and the cache ops */ + env->cp15.sctlr_el[1] |= SCTLR_UCT | SCTLR_UCI | SCTLR_DZE; + /* and to the FP/Neon instructions */ + env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 2, 3); +#else + /* Reset into the highest available EL */ + if (arm_feature(env, ARM_FEATURE_EL3)) { + env->pstate = PSTATE_MODE_EL3h; + } else if (arm_feature(env, ARM_FEATURE_EL2)) { + env->pstate = PSTATE_MODE_EL2h; + } else { + env->pstate = PSTATE_MODE_EL1h; + } + env->pc = cpu->rvbar; +#endif + } else { +#if defined(CONFIG_USER_ONLY) + /* Userspace expects access to cp10 and cp11 for FP/Neon */ + env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 4, 0xf); +#endif + } + +#if defined(CONFIG_USER_ONLY) + env->uncached_cpsr = ARM_CPU_MODE_USR; + /* For user mode we must enable access to coprocessors */ + env->vfp.xregs[ARM_VFP_FPEXC] = 1 << 30; + if (arm_feature(env, ARM_FEATURE_IWMMXT)) { + env->cp15.c15_cpar = 3; + } else if (arm_feature(env, ARM_FEATURE_XSCALE)) { + env->cp15.c15_cpar = 1; + } +#else + /* SVC mode with interrupts disabled. */ + env->uncached_cpsr = ARM_CPU_MODE_SVC; + env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F; + /* On ARMv7-M the CPSR_I is the value of the PRIMASK register, and is + * clear at reset. Initial SP and PC are loaded from ROM. + */ + if (IS_M(env)) { + uint32_t initial_msp; /* Loaded from 0x0 */ + uint32_t initial_pc; /* Loaded from 0x4 */ + uint8_t *rom; + + env->daif &= ~PSTATE_I; + rom = rom_ptr(0); + if (rom) { + /* Address zero is covered by ROM which hasn't yet been + * copied into physical memory. + */ + initial_msp = ldl_p(rom); + initial_pc = ldl_p(rom + 4); + } else { + /* Address zero not covered by a ROM blob, or the ROM blob + * is in non-modifiable memory and this is a second reset after + * it got copied into memory. In the latter case, rom_ptr + * will return a NULL pointer and we should use ldl_phys instead. + */ + initial_msp = ldl_phys(s->as, 0); + initial_pc = ldl_phys(s->as, 4); + } + + env->regs[13] = initial_msp & 0xFFFFFFFC; + env->regs[15] = initial_pc & ~1; + env->thumb = initial_pc & 1; + } + + /* AArch32 has a hard highvec setting of 0xFFFF0000. If we are currently + * executing as AArch32 then check if highvecs are enabled and + * adjust the PC accordingly. + */ + if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) { + env->regs[15] = 0xFFFF0000; + } + + env->vfp.xregs[ARM_VFP_FPEXC] = 0; +#endif + set_flush_to_zero(1, &env->vfp.standard_fp_status); + set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); + set_default_nan_mode(1, &env->vfp.standard_fp_status); + set_float_detect_tininess(float_tininess_before_rounding, + &env->vfp.fp_status); + set_float_detect_tininess(float_tininess_before_rounding, + &env->vfp.standard_fp_status); + tlb_flush(s, 1); + +#ifndef CONFIG_USER_ONLY + if (kvm_enabled()) { + kvm_arm_reset_vcpu(cpu); + } +#endif + + hw_breakpoint_update_all(cpu); + hw_watchpoint_update_all(cpu); +} + +bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + CPUClass *cc = CPU_GET_CLASS(cs); + CPUARMState *env = cs->env_ptr; + uint32_t cur_el = arm_current_el(env); + bool secure = arm_is_secure(env); + uint32_t target_el; + uint32_t excp_idx; + bool ret = false; + + if (interrupt_request & CPU_INTERRUPT_FIQ) { + excp_idx = EXCP_FIQ; + target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); + if (arm_excp_unmasked(cs, excp_idx, target_el)) { + cs->exception_index = excp_idx; + env->exception.target_el = target_el; + cc->do_interrupt(cs); + ret = true; + } + } + if (interrupt_request & CPU_INTERRUPT_HARD) { + excp_idx = EXCP_IRQ; + target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure); + if (arm_excp_unmasked(cs, excp_idx, target_el)) { + cs->exception_index = excp_idx; + env->exception.target_el = target_el; + cc->do_interrupt(cs); + ret = true; + } + } + if (interrupt_request & CPU_INTERRUPT_VIRQ) { + excp_idx = EXCP_VIRQ; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el)) { + cs->exception_index = excp_idx; + env->exception.target_el = target_el; + cc->do_interrupt(cs); + ret = true; + } + } + if (interrupt_request & CPU_INTERRUPT_VFIQ) { + excp_idx = EXCP_VFIQ; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el)) { + cs->exception_index = excp_idx; + env->exception.target_el = target_el; + cc->do_interrupt(cs); + ret = true; + } + } + + return ret; +} + +#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) +static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + CPUClass *cc = CPU_GET_CLASS(cs); + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + bool ret = false; + + + if (interrupt_request & CPU_INTERRUPT_FIQ + && !(env->daif & PSTATE_F)) { + cs->exception_index = EXCP_FIQ; + cc->do_interrupt(cs); + ret = true; + } + /* ARMv7-M interrupt return works by loading a magic value + * into the PC. On real hardware the load causes the + * return to occur. The qemu implementation performs the + * jump normally, then does the exception return when the + * CPU tries to execute code at the magic address. + * This will cause the magic PC value to be pushed to + * the stack if an interrupt occurred at the wrong time. + * We avoid this by disabling interrupts when + * pc contains a magic address. + */ + if (interrupt_request & CPU_INTERRUPT_HARD + && !(env->daif & PSTATE_I) + && (env->regs[15] < 0xfffffff0)) { + cs->exception_index = EXCP_IRQ; + cc->do_interrupt(cs); + ret = true; + } + return ret; +} +#endif + +#ifndef CONFIG_USER_ONLY +static void arm_cpu_set_irq(void *opaque, int irq, int level) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + static const int mask[] = { + [ARM_CPU_IRQ] = CPU_INTERRUPT_HARD, + [ARM_CPU_FIQ] = CPU_INTERRUPT_FIQ, + [ARM_CPU_VIRQ] = CPU_INTERRUPT_VIRQ, + [ARM_CPU_VFIQ] = CPU_INTERRUPT_VFIQ + }; + + switch (irq) { + case ARM_CPU_VIRQ: + case ARM_CPU_VFIQ: + if (!arm_feature(env, ARM_FEATURE_EL2)) { + hw_error("%s: Virtual interrupt line %d with no EL2 support\n", + __func__, irq); + } + /* fall through */ + case ARM_CPU_IRQ: + case ARM_CPU_FIQ: + if (level) { + cpu_interrupt(cs, mask[irq]); + } else { + cpu_reset_interrupt(cs, mask[irq]); + } + break; + default: + hw_error("arm_cpu_set_irq: Bad interrupt line %d\n", irq); + } +} + +static void arm_cpu_kvm_set_irq(void *opaque, int irq, int level) +{ +#ifdef CONFIG_KVM + ARMCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + int kvm_irq = KVM_ARM_IRQ_TYPE_CPU << KVM_ARM_IRQ_TYPE_SHIFT; + + switch (irq) { + case ARM_CPU_IRQ: + kvm_irq |= KVM_ARM_IRQ_CPU_IRQ; + break; + case ARM_CPU_FIQ: + kvm_irq |= KVM_ARM_IRQ_CPU_FIQ; + break; + default: + hw_error("arm_cpu_kvm_set_irq: Bad interrupt line %d\n", irq); + } + kvm_irq |= cs->cpu_index << KVM_ARM_IRQ_VCPU_SHIFT; + kvm_set_irq(kvm_state, kvm_irq, level ? 1 : 0); +#endif +} + +static bool arm_cpu_is_big_endian(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + int cur_el; + + cpu_synchronize_state(cs); + + /* In 32bit guest endianness is determined by looking at CPSR's E bit */ + if (!is_a64(env)) { + return (env->uncached_cpsr & CPSR_E) ? 1 : 0; + } + + cur_el = arm_current_el(env); + + if (cur_el == 0) { + return (env->cp15.sctlr_el[1] & SCTLR_E0E) != 0; + } + + return (env->cp15.sctlr_el[cur_el] & SCTLR_EE) != 0; +} + +#endif + +static inline void set_feature(CPUARMState *env, int feature) +{ + env->features |= 1ULL << feature; +} + +static inline void unset_feature(CPUARMState *env, int feature) +{ + env->features &= ~(1ULL << feature); +} + +static int +print_insn_thumb1(bfd_vma pc, disassemble_info *info) +{ + return print_insn_arm(pc | 1, info); +} + +static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) +{ + ARMCPU *ac = ARM_CPU(cpu); + CPUARMState *env = &ac->env; + + if (is_a64(env)) { + /* We might not be compiled with the A64 disassembler + * because it needs a C++ compiler. Leave print_insn + * unset in this case to use the caller default behaviour. + */ +#if defined(CONFIG_ARM_A64_DIS) + info->print_insn = print_insn_arm_a64; +#endif + } else if (env->thumb) { + info->print_insn = print_insn_thumb1; + } else { + info->print_insn = print_insn_arm; + } + if (env->bswap_code) { +#ifdef TARGET_WORDS_BIGENDIAN + info->endian = BFD_ENDIAN_LITTLE; +#else + info->endian = BFD_ENDIAN_BIG; +#endif + } +} + +#define ARM_CPUS_PER_CLUSTER 8 + +static void arm_cpu_initfn(Object *obj) +{ + CPUState *cs = CPU(obj); + ARMCPU *cpu = ARM_CPU(obj); + static bool inited; + uint32_t Aff1, Aff0; + + cs->env_ptr = &cpu->env; + cpu_exec_init(cs, &error_abort); + cpu->cp_regs = g_hash_table_new_full(g_int_hash, g_int_equal, + g_free, g_free); + + /* This cpu-id-to-MPIDR affinity is used only for TCG; KVM will override it. + * We don't support setting cluster ID ([16..23]) (known as Aff2 + * in later ARM ARM versions), or any of the higher affinity level fields, + * so these bits always RAZ. + */ + Aff1 = cs->cpu_index / ARM_CPUS_PER_CLUSTER; + Aff0 = cs->cpu_index % ARM_CPUS_PER_CLUSTER; + cpu->mp_affinity = (Aff1 << 8) | Aff0; + +#ifndef CONFIG_USER_ONLY + /* Our inbound IRQ and FIQ lines */ + if (kvm_enabled()) { + /* VIRQ and VFIQ are unused with KVM but we add them to maintain + * the same interface as non-KVM CPUs. + */ + qdev_init_gpio_in(DEVICE(cpu), arm_cpu_kvm_set_irq, 4); + } else { + qdev_init_gpio_in(DEVICE(cpu), arm_cpu_set_irq, 4); + } + + cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, + arm_gt_ptimer_cb, cpu); + cpu->gt_timer[GTIMER_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, + arm_gt_vtimer_cb, cpu); + qdev_init_gpio_out(DEVICE(cpu), cpu->gt_timer_outputs, + ARRAY_SIZE(cpu->gt_timer_outputs)); +#endif + + /* DTB consumers generally don't in fact care what the 'compatible' + * string is, so always provide some string and trust that a hypothetical + * picky DTB consumer will also provide a helpful error message. + */ + cpu->dtb_compatible = "qemu,unknown"; + cpu->psci_version = 1; /* By default assume PSCI v0.1 */ + cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; + + if (tcg_enabled()) { + cpu->psci_version = 2; /* TCG implements PSCI 0.2 */ + if (!inited) { + inited = true; + arm_translate_init(); + } + } +} + +static Property arm_cpu_reset_cbar_property = + DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0); + +static Property arm_cpu_reset_hivecs_property = + DEFINE_PROP_BOOL("reset-hivecs", ARMCPU, reset_hivecs, false); + +static Property arm_cpu_rvbar_property = + DEFINE_PROP_UINT64("rvbar", ARMCPU, rvbar, 0); + +static Property arm_cpu_has_el3_property = + DEFINE_PROP_BOOL("has_el3", ARMCPU, has_el3, true); + +static Property arm_cpu_has_mpu_property = + DEFINE_PROP_BOOL("has-mpu", ARMCPU, has_mpu, true); + +static Property arm_cpu_pmsav7_dregion_property = + DEFINE_PROP_UINT32("pmsav7-dregion", ARMCPU, pmsav7_dregion, 16); + +static void arm_cpu_post_init(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) || + arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) { + qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_cbar_property, + &error_abort); + } + + if (!arm_feature(&cpu->env, ARM_FEATURE_M)) { + qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_hivecs_property, + &error_abort); + } + + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + qdev_property_add_static(DEVICE(obj), &arm_cpu_rvbar_property, + &error_abort); + } + + if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) { + /* Add the has_el3 state CPU property only if EL3 is allowed. This will + * prevent "has_el3" from existing on CPUs which cannot support EL3. + */ + qdev_property_add_static(DEVICE(obj), &arm_cpu_has_el3_property, + &error_abort); + } + + if (arm_feature(&cpu->env, ARM_FEATURE_MPU)) { + qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property, + &error_abort); + if (arm_feature(&cpu->env, ARM_FEATURE_V7)) { + qdev_property_add_static(DEVICE(obj), + &arm_cpu_pmsav7_dregion_property, + &error_abort); + } + } + +} + +static void arm_cpu_finalizefn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + g_hash_table_destroy(cpu->cp_regs); +} + +static void arm_cpu_realizefn(DeviceState *dev, Error **errp) +{ + CPUState *cs = CPU(dev); + ARMCPU *cpu = ARM_CPU(dev); + ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); + CPUARMState *env = &cpu->env; + + /* Some features automatically imply others: */ + if (arm_feature(env, ARM_FEATURE_V8)) { + set_feature(env, ARM_FEATURE_V7); + set_feature(env, ARM_FEATURE_ARM_DIV); + set_feature(env, ARM_FEATURE_LPAE); + } + if (arm_feature(env, ARM_FEATURE_V7)) { + set_feature(env, ARM_FEATURE_VAPA); + set_feature(env, ARM_FEATURE_THUMB2); + set_feature(env, ARM_FEATURE_MPIDR); + if (!arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_V6K); + } else { + set_feature(env, ARM_FEATURE_V6); + } + } + if (arm_feature(env, ARM_FEATURE_V6K)) { + set_feature(env, ARM_FEATURE_V6); + set_feature(env, ARM_FEATURE_MVFR); + } + if (arm_feature(env, ARM_FEATURE_V6)) { + set_feature(env, ARM_FEATURE_V5); + if (!arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_AUXCR); + } + } + if (arm_feature(env, ARM_FEATURE_V5)) { + set_feature(env, ARM_FEATURE_V4T); + } + if (arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_THUMB_DIV); + } + if (arm_feature(env, ARM_FEATURE_ARM_DIV)) { + set_feature(env, ARM_FEATURE_THUMB_DIV); + } + if (arm_feature(env, ARM_FEATURE_VFP4)) { + set_feature(env, ARM_FEATURE_VFP3); + set_feature(env, ARM_FEATURE_VFP_FP16); + } + if (arm_feature(env, ARM_FEATURE_VFP3)) { + set_feature(env, ARM_FEATURE_VFP); + } + if (arm_feature(env, ARM_FEATURE_LPAE)) { + set_feature(env, ARM_FEATURE_V7MP); + set_feature(env, ARM_FEATURE_PXN); + } + if (arm_feature(env, ARM_FEATURE_CBAR_RO)) { + set_feature(env, ARM_FEATURE_CBAR); + } + if (arm_feature(env, ARM_FEATURE_THUMB2) && + !arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_THUMB_DSP); + } + + if (cpu->reset_hivecs) { + cpu->reset_sctlr |= (1 << 13); + } + + if (!cpu->has_el3) { + /* If the has_el3 CPU property is disabled then we need to disable the + * feature. + */ + unset_feature(env, ARM_FEATURE_EL3); + + /* Disable the security extension feature bits in the processor feature + * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12]. + */ + cpu->id_pfr1 &= ~0xf0; + cpu->id_aa64pfr0 &= ~0xf000; + } + + if (!cpu->has_mpu) { + unset_feature(env, ARM_FEATURE_MPU); + } + + if (arm_feature(env, ARM_FEATURE_MPU) && + arm_feature(env, ARM_FEATURE_V7)) { + uint32_t nr = cpu->pmsav7_dregion; + + if (nr > 0xff) { + error_setg(errp, "PMSAv7 MPU #regions invalid %" PRIu32 "\n", nr); + return; + } + + if (nr) { + env->pmsav7.drbar = g_new0(uint32_t, nr); + env->pmsav7.drsr = g_new0(uint32_t, nr); + env->pmsav7.dracr = g_new0(uint32_t, nr); + } + } + + register_cp_regs_for_features(cpu); + arm_cpu_register_gdb_regs_for_features(cpu); + + init_cpreg_list(cpu); + + qemu_init_vcpu(cs); + cpu_reset(cs); + + acc->parent_realize(dev, errp); +} + +static ObjectClass *arm_cpu_class_by_name(const char *cpu_model) +{ + ObjectClass *oc; + char *typename; + char **cpuname; + + if (!cpu_model) { + return NULL; + } + + cpuname = g_strsplit(cpu_model, ",", 1); + typename = g_strdup_printf("%s-" TYPE_ARM_CPU, cpuname[0]); + oc = object_class_by_name(typename); + g_strfreev(cpuname); + g_free(typename); + if (!oc || !object_class_dynamic_cast(oc, TYPE_ARM_CPU) || + object_class_is_abstract(oc)) { + return NULL; + } + return oc; +} + +/* CPU models. These are not needed for the AArch64 linux-user build. */ +#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + +static void arm926_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm926"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_VFP); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); + cpu->midr = 0x41069265; + cpu->reset_fpsid = 0x41011090; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00090078; +} + +static void arm946_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm946"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_MPU); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + cpu->midr = 0x41059461; + cpu->ctr = 0x0f004006; + cpu->reset_sctlr = 0x00000078; +} + +static void arm1026_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm1026"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_VFP); + set_feature(&cpu->env, ARM_FEATURE_AUXCR); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN); + cpu->midr = 0x4106a262; + cpu->reset_fpsid = 0x410110a0; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00090078; + cpu->reset_auxcr = 1; + { + /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ + ARMCPRegInfo ifar = { + .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.ifar_ns), + .resetvalue = 0 + }; + define_one_arm_cp_reg(cpu, &ifar); + } +} + +static void arm1136_r2_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + /* What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an + * older core than plain "arm1136". In particular this does not + * have the v6K features. + * These ID register values are correct for 1136 but may be wrong + * for 1136_r2 (in particular r0p2 does not actually implement most + * of the ID registers). + */ + + cpu->dtb_compatible = "arm,arm1136"; + set_feature(&cpu->env, ARM_FEATURE_V6); + set_feature(&cpu->env, ARM_FEATURE_VFP); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG); + set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); + cpu->midr = 0x4107b362; + cpu->reset_fpsid = 0x410120b4; + cpu->mvfr0 = 0x11111111; + cpu->mvfr1 = 0x00000000; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; + cpu->id_dfr0 = 0x2; + cpu->id_afr0 = 0x3; + cpu->id_mmfr0 = 0x01130003; + cpu->id_mmfr1 = 0x10030302; + cpu->id_mmfr2 = 0x01222110; + cpu->id_isar0 = 0x00140011; + cpu->id_isar1 = 0x12002111; + cpu->id_isar2 = 0x11231111; + cpu->id_isar3 = 0x01102131; + cpu->id_isar4 = 0x141; + cpu->reset_auxcr = 7; +} + +static void arm1136_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm1136"; + set_feature(&cpu->env, ARM_FEATURE_V6K); + set_feature(&cpu->env, ARM_FEATURE_V6); + set_feature(&cpu->env, ARM_FEATURE_VFP); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG); + set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); + cpu->midr = 0x4117b363; + cpu->reset_fpsid = 0x410120b4; + cpu->mvfr0 = 0x11111111; + cpu->mvfr1 = 0x00000000; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; + cpu->id_dfr0 = 0x2; + cpu->id_afr0 = 0x3; + cpu->id_mmfr0 = 0x01130003; + cpu->id_mmfr1 = 0x10030302; + cpu->id_mmfr2 = 0x01222110; + cpu->id_isar0 = 0x00140011; + cpu->id_isar1 = 0x12002111; + cpu->id_isar2 = 0x11231111; + cpu->id_isar3 = 0x01102131; + cpu->id_isar4 = 0x141; + cpu->reset_auxcr = 7; +} + +static void arm1176_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm1176"; + set_feature(&cpu->env, ARM_FEATURE_V6K); + set_feature(&cpu->env, ARM_FEATURE_VFP); + set_feature(&cpu->env, ARM_FEATURE_VAPA); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG); + set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); + set_feature(&cpu->env, ARM_FEATURE_EL3); + cpu->midr = 0x410fb767; + cpu->reset_fpsid = 0x410120b5; + cpu->mvfr0 = 0x11111111; + cpu->mvfr1 = 0x00000000; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x11; + cpu->id_dfr0 = 0x33; + cpu->id_afr0 = 0; + cpu->id_mmfr0 = 0x01130003; + cpu->id_mmfr1 = 0x10030302; + cpu->id_mmfr2 = 0x01222100; + cpu->id_isar0 = 0x0140011; + cpu->id_isar1 = 0x12002111; + cpu->id_isar2 = 0x11231121; + cpu->id_isar3 = 0x01102131; + cpu->id_isar4 = 0x01141; + cpu->reset_auxcr = 7; +} + +static void arm11mpcore_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,arm11mpcore"; + set_feature(&cpu->env, ARM_FEATURE_V6K); + set_feature(&cpu->env, ARM_FEATURE_VFP); + set_feature(&cpu->env, ARM_FEATURE_VAPA); + set_feature(&cpu->env, ARM_FEATURE_MPIDR); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + cpu->midr = 0x410fb022; + cpu->reset_fpsid = 0x410120b4; + cpu->mvfr0 = 0x11111111; + cpu->mvfr1 = 0x00000000; + cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; + cpu->id_dfr0 = 0; + cpu->id_afr0 = 0x2; + cpu->id_mmfr0 = 0x01100103; + cpu->id_mmfr1 = 0x10020302; + cpu->id_mmfr2 = 0x01222000; + cpu->id_isar0 = 0x00100011; + cpu->id_isar1 = 0x12002111; + cpu->id_isar2 = 0x11221011; + cpu->id_isar3 = 0x01102131; + cpu->id_isar4 = 0x141; + cpu->reset_auxcr = 1; +} + +static void cortex_m3_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_M); + cpu->midr = 0x410fc231; +} + +static void cortex_m4_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); + cpu->midr = 0x410fc240; /* r0p0 */ +} +static void arm_v7m_class_init(ObjectClass *oc, void *data) +{ + CPUClass *cc = CPU_CLASS(oc); + +#ifndef CONFIG_USER_ONLY + cc->do_interrupt = arm_v7m_cpu_do_interrupt; +#endif + + cc->cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt; +} + +static const ARMCPRegInfo cortexr5_cp_reginfo[] = { + /* Dummy the TCM region regs for the moment */ + { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST }, + { .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST }, + REGINFO_SENTINEL +}; + +static void cortex_r5_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_THUMB_DIV); + set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); + set_feature(&cpu->env, ARM_FEATURE_V7MP); + set_feature(&cpu->env, ARM_FEATURE_MPU); + cpu->midr = 0x411fc153; /* r1p3 */ + cpu->id_pfr0 = 0x0131; + cpu->id_pfr1 = 0x001; + cpu->id_dfr0 = 0x010400; + cpu->id_afr0 = 0x0; + cpu->id_mmfr0 = 0x0210030; + cpu->id_mmfr1 = 0x00000000; + cpu->id_mmfr2 = 0x01200000; + cpu->id_mmfr3 = 0x0211; + cpu->id_isar0 = 0x2101111; + cpu->id_isar1 = 0x13112111; + cpu->id_isar2 = 0x21232141; + cpu->id_isar3 = 0x01112131; + cpu->id_isar4 = 0x0010142; + cpu->id_isar5 = 0x0; + cpu->mp_is_up = true; + define_arm_cp_regs(cpu, cortexr5_cp_reginfo); +} + +static const ARMCPRegInfo cortexa8_cp_reginfo[] = { + { .name = "L2LOCKDOWN", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "L2AUXCR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +static void cortex_a8_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a8"; + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_VFP3); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_EL3); + cpu->midr = 0x410fc080; + cpu->reset_fpsid = 0x410330c0; + cpu->mvfr0 = 0x11110222; + cpu->mvfr1 = 0x00011100; + cpu->ctr = 0x82048004; + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x1031; + cpu->id_pfr1 = 0x11; + cpu->id_dfr0 = 0x400; + cpu->id_afr0 = 0; + cpu->id_mmfr0 = 0x31100003; + cpu->id_mmfr1 = 0x20000000; + cpu->id_mmfr2 = 0x01202000; + cpu->id_mmfr3 = 0x11; + cpu->id_isar0 = 0x00101111; + cpu->id_isar1 = 0x12112111; + cpu->id_isar2 = 0x21232031; + cpu->id_isar3 = 0x11112131; + cpu->id_isar4 = 0x00111142; + cpu->dbgdidr = 0x15141000; + cpu->clidr = (1 << 27) | (2 << 24) | 3; + cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ + cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */ + cpu->ccsidr[2] = 0xf0000000; /* No L2 icache. */ + cpu->reset_auxcr = 2; + define_arm_cp_regs(cpu, cortexa8_cp_reginfo); +} + +static const ARMCPRegInfo cortexa9_cp_reginfo[] = { + /* power_control should be set to maximum latency. Again, + * default to 0 and set by private hook + */ + { .name = "A9_PWRCTL", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c15_power_control) }, + { .name = "A9_DIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c15_diagnostic) }, + { .name = "A9_PWRDIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c15_power_diagnostic) }, + { .name = "NEONBUSY", .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST }, + /* TLB lockdown control */ + { .name = "TLB_LOCKR", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 2, + .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP }, + { .name = "TLB_LOCKW", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 4, + .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP }, + { .name = "TLB_VA", .cp = 15, .crn = 15, .crm = 5, .opc1 = 5, .opc2 = 2, + .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST }, + { .name = "TLB_PA", .cp = 15, .crn = 15, .crm = 6, .opc1 = 5, .opc2 = 2, + .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST }, + { .name = "TLB_ATTR", .cp = 15, .crn = 15, .crm = 7, .opc1 = 5, .opc2 = 2, + .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST }, + REGINFO_SENTINEL +}; + +static void cortex_a9_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a9"; + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_VFP3); + set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); + set_feature(&cpu->env, ARM_FEATURE_EL3); + /* Note that A9 supports the MP extensions even for + * A9UP and single-core A9MP (which are both different + * and valid configurations; we don't model A9UP). + */ + set_feature(&cpu->env, ARM_FEATURE_V7MP); + set_feature(&cpu->env, ARM_FEATURE_CBAR); + cpu->midr = 0x410fc090; + cpu->reset_fpsid = 0x41033090; + cpu->mvfr0 = 0x11110222; + cpu->mvfr1 = 0x01111111; + cpu->ctr = 0x80038003; + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x1031; + cpu->id_pfr1 = 0x11; + cpu->id_dfr0 = 0x000; + cpu->id_afr0 = 0; + cpu->id_mmfr0 = 0x00100103; + cpu->id_mmfr1 = 0x20000000; + cpu->id_mmfr2 = 0x01230000; + cpu->id_mmfr3 = 0x00002111; + cpu->id_isar0 = 0x00101111; + cpu->id_isar1 = 0x13112111; + cpu->id_isar2 = 0x21232041; + cpu->id_isar3 = 0x11112131; + cpu->id_isar4 = 0x00111142; + cpu->dbgdidr = 0x35141000; + cpu->clidr = (1 << 27) | (1 << 24) | 3; + cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); +} + +#ifndef CONFIG_USER_ONLY +static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* Linux wants the number of processors from here. + * Might as well set the interrupt-controller bit too. + */ + return ((smp_cpus - 1) << 24) | (1 << 23); +} +#endif + +static const ARMCPRegInfo cortexa15_cp_reginfo[] = { +#ifndef CONFIG_USER_ONLY + { .name = "L2CTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2, + .access = PL1_RW, .resetvalue = 0, .readfn = a15_l2ctlr_read, + .writefn = arm_cp_write_ignore, }, +#endif + { .name = "L2ECTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +static void cortex_a15_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a15"; + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_VFP4); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); + set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_LPAE); + set_feature(&cpu->env, ARM_FEATURE_EL3); + cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15; + cpu->midr = 0x412fc0f1; + cpu->reset_fpsid = 0x410430f0; + cpu->mvfr0 = 0x10110222; + cpu->mvfr1 = 0x11111111; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x00001131; + cpu->id_pfr1 = 0x00011011; + cpu->id_dfr0 = 0x02010555; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10201105; + cpu->id_mmfr1 = 0x20000000; + cpu->id_mmfr2 = 0x01240000; + cpu->id_mmfr3 = 0x02102211; + cpu->id_isar0 = 0x02101110; + cpu->id_isar1 = 0x13112111; + cpu->id_isar2 = 0x21232041; + cpu->id_isar3 = 0x11112131; + cpu->id_isar4 = 0x10011142; + cpu->dbgdidr = 0x3515f021; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); +} + +static void ti925t_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_V4T); + set_feature(&cpu->env, ARM_FEATURE_OMAPCP); + cpu->midr = ARM_CPUID_TI925T; + cpu->ctr = 0x5109149; + cpu->reset_sctlr = 0x00000070; +} + +static void sa1100_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "intel,sa1100"; + set_feature(&cpu->env, ARM_FEATURE_STRONGARM); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + cpu->midr = 0x4401A11B; + cpu->reset_sctlr = 0x00000070; +} + +static void sa1110_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_STRONGARM); + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + cpu->midr = 0x6901B119; + cpu->reset_sctlr = 0x00000070; +} + +static void pxa250_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052100; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa255_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052d00; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa260_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052903; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa261_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052d05; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa262_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + cpu->midr = 0x69052d06; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270a0_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054110; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270a1_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054111; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270b0_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054112; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270b1_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054113; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270c0_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054114; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +static void pxa270c5_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "marvell,xscale"; + set_feature(&cpu->env, ARM_FEATURE_V5); + set_feature(&cpu->env, ARM_FEATURE_XSCALE); + set_feature(&cpu->env, ARM_FEATURE_IWMMXT); + cpu->midr = 0x69054117; + cpu->ctr = 0xd172172; + cpu->reset_sctlr = 0x00000078; +} + +#ifdef CONFIG_USER_ONLY +static void arm_any_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_VFP4); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); + set_feature(&cpu->env, ARM_FEATURE_CRC); + cpu->midr = 0xffffffff; +} +#endif + +#endif /* !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) */ + +typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); +} ARMCPUInfo; + +static const ARMCPUInfo arm_cpus[] = { +#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, + /* What QEMU calls "arm1136-r2" is actually the 1136 r0p2, i.e. an + * older core than plain "arm1136". In particular this does not + * have the v6K features. + */ + { .name = "arm1136-r2", .initfn = arm1136_r2_initfn }, + { .name = "arm1136", .initfn = arm1136_initfn }, + { .name = "arm1176", .initfn = arm1176_initfn }, + { .name = "arm11mpcore", .initfn = arm11mpcore_initfn }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m4", .initfn = cortex_m4_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-r5", .initfn = cortex_r5_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, + { .name = "ti925t", .initfn = ti925t_initfn }, + { .name = "sa1100", .initfn = sa1100_initfn }, + { .name = "sa1110", .initfn = sa1110_initfn }, + { .name = "pxa250", .initfn = pxa250_initfn }, + { .name = "pxa255", .initfn = pxa255_initfn }, + { .name = "pxa260", .initfn = pxa260_initfn }, + { .name = "pxa261", .initfn = pxa261_initfn }, + { .name = "pxa262", .initfn = pxa262_initfn }, + /* "pxa270" is an alias for "pxa270-a0" */ + { .name = "pxa270", .initfn = pxa270a0_initfn }, + { .name = "pxa270-a0", .initfn = pxa270a0_initfn }, + { .name = "pxa270-a1", .initfn = pxa270a1_initfn }, + { .name = "pxa270-b0", .initfn = pxa270b0_initfn }, + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, +#ifdef CONFIG_USER_ONLY + { .name = "any", .initfn = arm_any_initfn }, +#endif +#endif + { .name = NULL } +}; + +static Property arm_cpu_properties[] = { + DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false), + DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0), + DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0), + DEFINE_PROP_END_OF_LIST() +}; + +#ifdef CONFIG_USER_ONLY +static int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int rw, + int mmu_idx) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + env->exception.vaddress = address; + if (rw == 2) { + cs->exception_index = EXCP_PREFETCH_ABORT; + } else { + cs->exception_index = EXCP_DATA_ABORT; + } + return 1; +} +#endif + +static void arm_cpu_class_init(ObjectClass *oc, void *data) +{ + ARMCPUClass *acc = ARM_CPU_CLASS(oc); + CPUClass *cc = CPU_CLASS(acc); + DeviceClass *dc = DEVICE_CLASS(oc); + + acc->parent_realize = dc->realize; + dc->realize = arm_cpu_realizefn; + dc->props = arm_cpu_properties; + + acc->parent_reset = cc->reset; + cc->reset = arm_cpu_reset; + + cc->class_by_name = arm_cpu_class_by_name; + cc->has_work = arm_cpu_has_work; + cc->cpu_exec_interrupt = arm_cpu_exec_interrupt; + cc->dump_state = arm_cpu_dump_state; + cc->set_pc = arm_cpu_set_pc; + cc->gdb_read_register = arm_cpu_gdb_read_register; + cc->gdb_write_register = arm_cpu_gdb_write_register; +#ifdef CONFIG_USER_ONLY + cc->handle_mmu_fault = arm_cpu_handle_mmu_fault; +#else + cc->do_interrupt = arm_cpu_do_interrupt; + cc->get_phys_page_debug = arm_cpu_get_phys_page_debug; + cc->vmsd = &vmstate_arm_cpu; + cc->virtio_is_big_endian = arm_cpu_is_big_endian; +#endif + cc->gdb_num_core_regs = 26; + cc->gdb_core_xml_file = "arm-core.xml"; + cc->gdb_stop_before_watchpoint = true; + cc->debug_excp_handler = arm_debug_excp_handler; + + cc->disas_set_info = arm_disas_set_info; + + /* + * Reason: arm_cpu_initfn() calls cpu_exec_init(), which saves + * the object in cpus -> dangling pointer after final + * object_unref(). + * + * Once this is fixed, the devices that create ARM CPUs should be + * updated not to set cannot_destroy_with_object_finalize_yet, + * unless they still screw up something else. + */ + dc->cannot_destroy_with_object_finalize_yet = true; +} + +static void cpu_register(const ARMCPUInfo *info) +{ + TypeInfo type_info = { + .parent = TYPE_ARM_CPU, + .instance_size = sizeof(ARMCPU), + .instance_init = info->initfn, + .class_size = sizeof(ARMCPUClass), + .class_init = info->class_init, + }; + + type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name); + type_register(&type_info); + g_free((void *)type_info.name); +} + +static const TypeInfo arm_cpu_type_info = { + .name = TYPE_ARM_CPU, + .parent = TYPE_CPU, + .instance_size = sizeof(ARMCPU), + .instance_init = arm_cpu_initfn, + .instance_post_init = arm_cpu_post_init, + .instance_finalize = arm_cpu_finalizefn, + .abstract = true, + .class_size = sizeof(ARMCPUClass), + .class_init = arm_cpu_class_init, +}; + +static void arm_cpu_register_types(void) +{ + const ARMCPUInfo *info = arm_cpus; + + type_register_static(&arm_cpu_type_info); + + while (info->name) { + cpu_register(info); + info++; + } +} + +type_init(arm_cpu_register_types) diff --git a/target-arm/cpu.h b/target-arm/cpu.h new file mode 100644 index 00000000..7e89152b --- /dev/null +++ b/target-arm/cpu.h @@ -0,0 +1,1936 @@ +/* + * ARM virtual CPU header + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#ifndef CPU_ARM_H +#define CPU_ARM_H + +#include "config.h" + +#include "kvm-consts.h" + +#if defined(TARGET_AARCH64) + /* AArch64 definitions */ +# define TARGET_LONG_BITS 64 +# define ELF_MACHINE EM_AARCH64 +#else +# define TARGET_LONG_BITS 32 +# define ELF_MACHINE EM_ARM +#endif + +#define TARGET_IS_BIENDIAN 1 + +#define CPUArchState struct CPUARMState + +#include "qemu-common.h" +#include "exec/cpu-defs.h" + +#include "fpu/softfloat.h" + +#define EXCP_UDEF 1 /* undefined instruction */ +#define EXCP_SWI 2 /* software interrupt */ +#define EXCP_PREFETCH_ABORT 3 +#define EXCP_DATA_ABORT 4 +#define EXCP_IRQ 5 +#define EXCP_FIQ 6 +#define EXCP_BKPT 7 +#define EXCP_EXCEPTION_EXIT 8 /* Return from v7M exception. */ +#define EXCP_KERNEL_TRAP 9 /* Jumped to kernel code page. */ +#define EXCP_STREX 10 +#define EXCP_HVC 11 /* HyperVisor Call */ +#define EXCP_HYP_TRAP 12 +#define EXCP_SMC 13 /* Secure Monitor Call */ +#define EXCP_VIRQ 14 +#define EXCP_VFIQ 15 + +#define ARMV7M_EXCP_RESET 1 +#define ARMV7M_EXCP_NMI 2 +#define ARMV7M_EXCP_HARD 3 +#define ARMV7M_EXCP_MEM 4 +#define ARMV7M_EXCP_BUS 5 +#define ARMV7M_EXCP_USAGE 6 +#define ARMV7M_EXCP_SVC 11 +#define ARMV7M_EXCP_DEBUG 12 +#define ARMV7M_EXCP_PENDSV 14 +#define ARMV7M_EXCP_SYSTICK 15 + +/* ARM-specific interrupt pending bits. */ +#define CPU_INTERRUPT_FIQ CPU_INTERRUPT_TGT_EXT_1 +#define CPU_INTERRUPT_VIRQ CPU_INTERRUPT_TGT_EXT_2 +#define CPU_INTERRUPT_VFIQ CPU_INTERRUPT_TGT_EXT_3 + +/* The usual mapping for an AArch64 system register to its AArch32 + * counterpart is for the 32 bit world to have access to the lower + * half only (with writes leaving the upper half untouched). It's + * therefore useful to be able to pass TCG the offset of the least + * significant half of a uint64_t struct member. + */ +#ifdef HOST_WORDS_BIGENDIAN +#define offsetoflow32(S, M) (offsetof(S, M) + sizeof(uint32_t)) +#define offsetofhigh32(S, M) offsetof(S, M) +#else +#define offsetoflow32(S, M) offsetof(S, M) +#define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t)) +#endif + +/* Meanings of the ARMCPU object's four inbound GPIO lines */ +#define ARM_CPU_IRQ 0 +#define ARM_CPU_FIQ 1 +#define ARM_CPU_VIRQ 2 +#define ARM_CPU_VFIQ 3 + +struct arm_boot_info; + +#define NB_MMU_MODES 7 + +/* We currently assume float and double are IEEE single and double + precision respectively. + Doing runtime conversions is tricky because VFP registers may contain + integer values (eg. as the result of a FTOSI instruction). + s<2n> maps to the least significant half of d + s<2n+1> maps to the most significant half of d + */ + +/* CPU state for each instance of a generic timer (in cp15 c14) */ +typedef struct ARMGenericTimer { + uint64_t cval; /* Timer CompareValue register */ + uint64_t ctl; /* Timer Control register */ +} ARMGenericTimer; + +#define GTIMER_PHYS 0 +#define GTIMER_VIRT 1 +#define NUM_GTIMERS 2 + +typedef struct { + uint64_t raw_tcr; + uint32_t mask; + uint32_t base_mask; +} TCR; + +typedef struct CPUARMState { + /* Regs for current mode. */ + uint32_t regs[16]; + + /* 32/64 switch only happens when taking and returning from + * exceptions so the overlap semantics are taken care of then + * instead of having a complicated union. + */ + /* Regs for A64 mode. */ + uint64_t xregs[32]; + uint64_t pc; + /* PSTATE isn't an architectural register for ARMv8. However, it is + * convenient for us to assemble the underlying state into a 32 bit format + * identical to the architectural format used for the SPSR. (This is also + * what the Linux kernel's 'pstate' field in signal handlers and KVM's + * 'pstate' register are.) Of the PSTATE bits: + * NZCV are kept in the split out env->CF/VF/NF/ZF, (which have the same + * semantics as for AArch32, as described in the comments on each field) + * nRW (also known as M[4]) is kept, inverted, in env->aarch64 + * DAIF (exception masks) are kept in env->daif + * all other bits are stored in their correct places in env->pstate + */ + uint32_t pstate; + uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */ + + /* Frequently accessed CPSR bits are stored separately for efficiency. + This contains all the other bits. Use cpsr_{read,write} to access + the whole CPSR. */ + uint32_t uncached_cpsr; + uint32_t spsr; + + /* Banked registers. */ + uint64_t banked_spsr[8]; + uint32_t banked_r13[8]; + uint32_t banked_r14[8]; + + /* These hold r8-r12. */ + uint32_t usr_regs[5]; + uint32_t fiq_regs[5]; + + /* cpsr flag cache for faster execution */ + uint32_t CF; /* 0 or 1 */ + uint32_t VF; /* V is the bit 31. All other bits are undefined */ + uint32_t NF; /* N is bit 31. All other bits are undefined. */ + uint32_t ZF; /* Z set if zero. */ + uint32_t QF; /* 0 or 1 */ + uint32_t GE; /* cpsr[19:16] */ + uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */ + uint32_t condexec_bits; /* IT bits. cpsr[15:10,26:25]. */ + uint64_t daif; /* exception masks, in the bits they are in in PSTATE */ + + uint64_t elr_el[4]; /* AArch64 exception link regs */ + uint64_t sp_el[4]; /* AArch64 banked stack pointers */ + + /* System control coprocessor (cp15) */ + struct { + uint32_t c0_cpuid; + union { /* Cache size selection */ + struct { + uint64_t _unused_csselr0; + uint64_t csselr_ns; + uint64_t _unused_csselr1; + uint64_t csselr_s; + }; + uint64_t csselr_el[4]; + }; + union { /* System control register. */ + struct { + uint64_t _unused_sctlr; + uint64_t sctlr_ns; + uint64_t hsctlr; + uint64_t sctlr_s; + }; + uint64_t sctlr_el[4]; + }; + uint64_t cpacr_el1; /* Architectural feature access control register */ + uint64_t cptr_el[4]; /* ARMv8 feature trap registers */ + uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */ + uint64_t sder; /* Secure debug enable register. */ + uint32_t nsacr; /* Non-secure access control register. */ + union { /* MMU translation table base 0. */ + struct { + uint64_t _unused_ttbr0_0; + uint64_t ttbr0_ns; + uint64_t _unused_ttbr0_1; + uint64_t ttbr0_s; + }; + uint64_t ttbr0_el[4]; + }; + union { /* MMU translation table base 1. */ + struct { + uint64_t _unused_ttbr1_0; + uint64_t ttbr1_ns; + uint64_t _unused_ttbr1_1; + uint64_t ttbr1_s; + }; + uint64_t ttbr1_el[4]; + }; + /* MMU translation table base control. */ + TCR tcr_el[4]; + uint32_t c2_data; /* MPU data cachable bits. */ + uint32_t c2_insn; /* MPU instruction cachable bits. */ + union { /* MMU domain access control register + * MPU write buffer control. + */ + struct { + uint64_t dacr_ns; + uint64_t dacr_s; + }; + struct { + uint64_t dacr32_el2; + }; + }; + uint32_t pmsav5_data_ap; /* PMSAv5 MPU data access permissions */ + uint32_t pmsav5_insn_ap; /* PMSAv5 MPU insn access permissions */ + uint64_t hcr_el2; /* Hypervisor configuration register */ + uint64_t scr_el3; /* Secure configuration register. */ + union { /* Fault status registers. */ + struct { + uint64_t ifsr_ns; + uint64_t ifsr_s; + }; + struct { + uint64_t ifsr32_el2; + }; + }; + union { + struct { + uint64_t _unused_dfsr; + uint64_t dfsr_ns; + uint64_t hsr; + uint64_t dfsr_s; + }; + uint64_t esr_el[4]; + }; + uint32_t c6_region[8]; /* MPU base/size registers. */ + union { /* Fault address registers. */ + struct { + uint64_t _unused_far0; +#ifdef HOST_WORDS_BIGENDIAN + uint32_t ifar_ns; + uint32_t dfar_ns; + uint32_t ifar_s; + uint32_t dfar_s; +#else + uint32_t dfar_ns; + uint32_t ifar_ns; + uint32_t dfar_s; + uint32_t ifar_s; +#endif + uint64_t _unused_far3; + }; + uint64_t far_el[4]; + }; + union { /* Translation result. */ + struct { + uint64_t _unused_par_0; + uint64_t par_ns; + uint64_t _unused_par_1; + uint64_t par_s; + }; + uint64_t par_el[4]; + }; + + uint32_t c6_rgnr; + + uint32_t c9_insn; /* Cache lockdown registers. */ + uint32_t c9_data; + uint64_t c9_pmcr; /* performance monitor control register */ + uint64_t c9_pmcnten; /* perf monitor counter enables */ + uint32_t c9_pmovsr; /* perf monitor overflow status */ + uint32_t c9_pmxevtyper; /* perf monitor event type */ + uint32_t c9_pmuserenr; /* perf monitor user enable */ + uint32_t c9_pminten; /* perf monitor interrupt enables */ + union { /* Memory attribute redirection */ + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint64_t _unused_mair_0; + uint32_t mair1_ns; + uint32_t mair0_ns; + uint64_t _unused_mair_1; + uint32_t mair1_s; + uint32_t mair0_s; +#else + uint64_t _unused_mair_0; + uint32_t mair0_ns; + uint32_t mair1_ns; + uint64_t _unused_mair_1; + uint32_t mair0_s; + uint32_t mair1_s; +#endif + }; + uint64_t mair_el[4]; + }; + union { /* vector base address register */ + struct { + uint64_t _unused_vbar; + uint64_t vbar_ns; + uint64_t hvbar; + uint64_t vbar_s; + }; + uint64_t vbar_el[4]; + }; + uint32_t mvbar; /* (monitor) vector base address register */ + struct { /* FCSE PID. */ + uint32_t fcseidr_ns; + uint32_t fcseidr_s; + }; + union { /* Context ID. */ + struct { + uint64_t _unused_contextidr_0; + uint64_t contextidr_ns; + uint64_t _unused_contextidr_1; + uint64_t contextidr_s; + }; + uint64_t contextidr_el[4]; + }; + union { /* User RW Thread register. */ + struct { + uint64_t tpidrurw_ns; + uint64_t tpidrprw_ns; + uint64_t htpidr; + uint64_t _tpidr_el3; + }; + uint64_t tpidr_el[4]; + }; + /* The secure banks of these registers don't map anywhere */ + uint64_t tpidrurw_s; + uint64_t tpidrprw_s; + uint64_t tpidruro_s; + + union { /* User RO Thread register. */ + uint64_t tpidruro_ns; + uint64_t tpidrro_el[1]; + }; + uint64_t c14_cntfrq; /* Counter Frequency register */ + uint64_t c14_cntkctl; /* Timer Control register */ + ARMGenericTimer c14_timer[NUM_GTIMERS]; + uint32_t c15_cpar; /* XScale Coprocessor Access Register */ + uint32_t c15_ticonfig; /* TI925T configuration byte. */ + uint32_t c15_i_max; /* Maximum D-cache dirty line index. */ + uint32_t c15_i_min; /* Minimum D-cache dirty line index. */ + uint32_t c15_threadid; /* TI debugger thread-ID. */ + uint32_t c15_config_base_address; /* SCU base address. */ + uint32_t c15_diagnostic; /* diagnostic register */ + uint32_t c15_power_diagnostic; + uint32_t c15_power_control; /* power control */ + uint64_t dbgbvr[16]; /* breakpoint value registers */ + uint64_t dbgbcr[16]; /* breakpoint control registers */ + uint64_t dbgwvr[16]; /* watchpoint value registers */ + uint64_t dbgwcr[16]; /* watchpoint control registers */ + uint64_t mdscr_el1; + /* If the counter is enabled, this stores the last time the counter + * was reset. Otherwise it stores the counter value + */ + uint64_t c15_ccnt; + uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */ + } cp15; + + struct { + uint32_t other_sp; + uint32_t vecbase; + uint32_t basepri; + uint32_t control; + int current_sp; + int exception; + } v7m; + + /* Information associated with an exception about to be taken: + * code which raises an exception must set cs->exception_index and + * the relevant parts of this structure; the cpu_do_interrupt function + * will then set the guest-visible registers as part of the exception + * entry process. + */ + struct { + uint32_t syndrome; /* AArch64 format syndrome register */ + uint32_t fsr; /* AArch32 format fault status register info */ + uint64_t vaddress; /* virtual addr associated with exception, if any */ + uint32_t target_el; /* EL the exception should be targeted for */ + /* If we implement EL2 we will also need to store information + * about the intermediate physical address for stage 2 faults. + */ + } exception; + + /* Thumb-2 EE state. */ + uint32_t teecr; + uint32_t teehbr; + + /* VFP coprocessor state. */ + struct { + /* VFP/Neon register state. Note that the mapping between S, D and Q + * views of the register bank differs between AArch64 and AArch32: + * In AArch32: + * Qn = regs[2n+1]:regs[2n] + * Dn = regs[n] + * Sn = regs[n/2] bits 31..0 for even n, and bits 63..32 for odd n + * (and regs[32] to regs[63] are inaccessible) + * In AArch64: + * Qn = regs[2n+1]:regs[2n] + * Dn = regs[2n] + * Sn = regs[2n] bits 31..0 + * This corresponds to the architecturally defined mapping between + * the two execution states, and means we do not need to explicitly + * map these registers when changing states. + */ + float64 regs[64]; + + uint32_t xregs[16]; + /* We store these fpcsr fields separately for convenience. */ + int vec_len; + int vec_stride; + + /* scratch space when Tn are not sufficient. */ + uint32_t scratch[8]; + + /* fp_status is the "normal" fp status. standard_fp_status retains + * values corresponding to the ARM "Standard FPSCR Value", ie + * default-NaN, flush-to-zero, round-to-nearest and is used by + * any operations (generally Neon) which the architecture defines + * as controlled by the standard FPSCR value rather than the FPSCR. + * + * To avoid having to transfer exception bits around, we simply + * say that the FPSCR cumulative exception flags are the logical + * OR of the flags in the two fp statuses. This relies on the + * only thing which needs to read the exception flags being + * an explicit FPSCR read. + */ + float_status fp_status; + float_status standard_fp_status; + } vfp; + uint64_t exclusive_addr; + uint64_t exclusive_val; + uint64_t exclusive_high; +#if defined(CONFIG_USER_ONLY) + uint64_t exclusive_test; + uint32_t exclusive_info; +#endif + + /* iwMMXt coprocessor state. */ + struct { + uint64_t regs[16]; + uint64_t val; + + uint32_t cregs[16]; + } iwmmxt; + + /* For mixed endian mode. */ + bool bswap_code; + +#if defined(CONFIG_USER_ONLY) + /* For usermode syscall translation. */ + int eabi; +#endif + + struct CPUBreakpoint *cpu_breakpoint[16]; + struct CPUWatchpoint *cpu_watchpoint[16]; + + CPU_COMMON + + /* These fields after the common ones so they are preserved on reset. */ + + /* Internal CPU feature flags. */ + uint64_t features; + + /* PMSAv7 MPU */ + struct { + uint32_t *drbar; + uint32_t *drsr; + uint32_t *dracr; + } pmsav7; + + void *nvic; + const struct arm_boot_info *boot_info; +} CPUARMState; + +#include "cpu-qom.h" + +ARMCPU *cpu_arm_init(const char *cpu_model); +int cpu_arm_exec(CPUState *cpu); +uint32_t do_arm_semihosting(CPUARMState *env); +void aarch64_sync_32_to_64(CPUARMState *env); +void aarch64_sync_64_to_32(CPUARMState *env); + +static inline bool is_a64(CPUARMState *env) +{ + return env->aarch64; +} + +/* you can call this signal handler from your SIGBUS and SIGSEGV + signal handlers to inform the virtual CPU of exceptions. non zero + is returned if the signal was handled by the virtual CPU. */ +int cpu_arm_signal_handler(int host_signum, void *pinfo, + void *puc); + +/** + * pmccntr_sync + * @env: CPUARMState + * + * Synchronises the counter in the PMCCNTR. This must always be called twice, + * once before any action that might affect the timer and again afterwards. + * The function is used to swap the state of the register if required. + * This only happens when not in user mode (!CONFIG_USER_ONLY) + */ +void pmccntr_sync(CPUARMState *env); + +/* SCTLR bit meanings. Several bits have been reused in newer + * versions of the architecture; in that case we define constants + * for both old and new bit meanings. Code which tests against those + * bits should probably check or otherwise arrange that the CPU + * is the architectural version it expects. + */ +#define SCTLR_M (1U << 0) +#define SCTLR_A (1U << 1) +#define SCTLR_C (1U << 2) +#define SCTLR_W (1U << 3) /* up to v6; RAO in v7 */ +#define SCTLR_SA (1U << 3) +#define SCTLR_P (1U << 4) /* up to v5; RAO in v6 and v7 */ +#define SCTLR_SA0 (1U << 4) /* v8 onward, AArch64 only */ +#define SCTLR_D (1U << 5) /* up to v5; RAO in v6 */ +#define SCTLR_CP15BEN (1U << 5) /* v7 onward */ +#define SCTLR_L (1U << 6) /* up to v5; RAO in v6 and v7; RAZ in v8 */ +#define SCTLR_B (1U << 7) /* up to v6; RAZ in v7 */ +#define SCTLR_ITD (1U << 7) /* v8 onward */ +#define SCTLR_S (1U << 8) /* up to v6; RAZ in v7 */ +#define SCTLR_SED (1U << 8) /* v8 onward */ +#define SCTLR_R (1U << 9) /* up to v6; RAZ in v7 */ +#define SCTLR_UMA (1U << 9) /* v8 onward, AArch64 only */ +#define SCTLR_F (1U << 10) /* up to v6 */ +#define SCTLR_SW (1U << 10) /* v7 onward */ +#define SCTLR_Z (1U << 11) +#define SCTLR_I (1U << 12) +#define SCTLR_V (1U << 13) +#define SCTLR_RR (1U << 14) /* up to v7 */ +#define SCTLR_DZE (1U << 14) /* v8 onward, AArch64 only */ +#define SCTLR_L4 (1U << 15) /* up to v6; RAZ in v7 */ +#define SCTLR_UCT (1U << 15) /* v8 onward, AArch64 only */ +#define SCTLR_DT (1U << 16) /* up to ??, RAO in v6 and v7 */ +#define SCTLR_nTWI (1U << 16) /* v8 onward */ +#define SCTLR_HA (1U << 17) +#define SCTLR_BR (1U << 17) /* PMSA only */ +#define SCTLR_IT (1U << 18) /* up to ??, RAO in v6 and v7 */ +#define SCTLR_nTWE (1U << 18) /* v8 onward */ +#define SCTLR_WXN (1U << 19) +#define SCTLR_ST (1U << 20) /* up to ??, RAZ in v6 */ +#define SCTLR_UWXN (1U << 20) /* v7 onward */ +#define SCTLR_FI (1U << 21) +#define SCTLR_U (1U << 22) +#define SCTLR_XP (1U << 23) /* up to v6; v7 onward RAO */ +#define SCTLR_VE (1U << 24) /* up to v7 */ +#define SCTLR_E0E (1U << 24) /* v8 onward, AArch64 only */ +#define SCTLR_EE (1U << 25) +#define SCTLR_L2 (1U << 26) /* up to v6, RAZ in v7 */ +#define SCTLR_UCI (1U << 26) /* v8 onward, AArch64 only */ +#define SCTLR_NMFI (1U << 27) +#define SCTLR_TRE (1U << 28) +#define SCTLR_AFE (1U << 29) +#define SCTLR_TE (1U << 30) + +#define CPTR_TCPAC (1U << 31) +#define CPTR_TTA (1U << 20) +#define CPTR_TFP (1U << 10) + +#define CPSR_M (0x1fU) +#define CPSR_T (1U << 5) +#define CPSR_F (1U << 6) +#define CPSR_I (1U << 7) +#define CPSR_A (1U << 8) +#define CPSR_E (1U << 9) +#define CPSR_IT_2_7 (0xfc00U) +#define CPSR_GE (0xfU << 16) +#define CPSR_IL (1U << 20) +/* Note that the RESERVED bits include bit 21, which is PSTATE_SS in + * an AArch64 SPSR but RES0 in AArch32 SPSR and CPSR. In QEMU we use + * env->uncached_cpsr bit 21 to store PSTATE.SS when executing in AArch32, + * where it is live state but not accessible to the AArch32 code. + */ +#define CPSR_RESERVED (0x7U << 21) +#define CPSR_J (1U << 24) +#define CPSR_IT_0_1 (3U << 25) +#define CPSR_Q (1U << 27) +#define CPSR_V (1U << 28) +#define CPSR_C (1U << 29) +#define CPSR_Z (1U << 30) +#define CPSR_N (1U << 31) +#define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V) +#define CPSR_AIF (CPSR_A | CPSR_I | CPSR_F) + +#define CPSR_IT (CPSR_IT_0_1 | CPSR_IT_2_7) +#define CACHED_CPSR_BITS (CPSR_T | CPSR_AIF | CPSR_GE | CPSR_IT | CPSR_Q \ + | CPSR_NZCV) +/* Bits writable in user mode. */ +#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE) +/* Execution state bits. MRS read as zero, MSR writes ignored. */ +#define CPSR_EXEC (CPSR_T | CPSR_IT | CPSR_J | CPSR_IL) +/* Mask of bits which may be set by exception return copying them from SPSR */ +#define CPSR_ERET_MASK (~CPSR_RESERVED) + +#define TTBCR_N (7U << 0) /* TTBCR.EAE==0 */ +#define TTBCR_T0SZ (7U << 0) /* TTBCR.EAE==1 */ +#define TTBCR_PD0 (1U << 4) +#define TTBCR_PD1 (1U << 5) +#define TTBCR_EPD0 (1U << 7) +#define TTBCR_IRGN0 (3U << 8) +#define TTBCR_ORGN0 (3U << 10) +#define TTBCR_SH0 (3U << 12) +#define TTBCR_T1SZ (3U << 16) +#define TTBCR_A1 (1U << 22) +#define TTBCR_EPD1 (1U << 23) +#define TTBCR_IRGN1 (3U << 24) +#define TTBCR_ORGN1 (3U << 26) +#define TTBCR_SH1 (1U << 28) +#define TTBCR_EAE (1U << 31) + +/* Bit definitions for ARMv8 SPSR (PSTATE) format. + * Only these are valid when in AArch64 mode; in + * AArch32 mode SPSRs are basically CPSR-format. + */ +#define PSTATE_SP (1U) +#define PSTATE_M (0xFU) +#define PSTATE_nRW (1U << 4) +#define PSTATE_F (1U << 6) +#define PSTATE_I (1U << 7) +#define PSTATE_A (1U << 8) +#define PSTATE_D (1U << 9) +#define PSTATE_IL (1U << 20) +#define PSTATE_SS (1U << 21) +#define PSTATE_V (1U << 28) +#define PSTATE_C (1U << 29) +#define PSTATE_Z (1U << 30) +#define PSTATE_N (1U << 31) +#define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V) +#define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F) +#define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF) +/* Mode values for AArch64 */ +#define PSTATE_MODE_EL3h 13 +#define PSTATE_MODE_EL3t 12 +#define PSTATE_MODE_EL2h 9 +#define PSTATE_MODE_EL2t 8 +#define PSTATE_MODE_EL1h 5 +#define PSTATE_MODE_EL1t 4 +#define PSTATE_MODE_EL0t 0 + +/* Map EL and handler into a PSTATE_MODE. */ +static inline unsigned int aarch64_pstate_mode(unsigned int el, bool handler) +{ + return (el << 2) | handler; +} + +/* Return the current PSTATE value. For the moment we don't support 32<->64 bit + * interprocessing, so we don't attempt to sync with the cpsr state used by + * the 32 bit decoder. + */ +static inline uint32_t pstate_read(CPUARMState *env) +{ + int ZF; + + ZF = (env->ZF == 0); + return (env->NF & 0x80000000) | (ZF << 30) + | (env->CF << 29) | ((env->VF & 0x80000000) >> 3) + | env->pstate | env->daif; +} + +static inline void pstate_write(CPUARMState *env, uint32_t val) +{ + env->ZF = (~val) & PSTATE_Z; + env->NF = val; + env->CF = (val >> 29) & 1; + env->VF = (val << 3) & 0x80000000; + env->daif = val & PSTATE_DAIF; + env->pstate = val & ~CACHED_PSTATE_BITS; +} + +/* Return the current CPSR value. */ +uint32_t cpsr_read(CPUARMState *env); +/* Set the CPSR. Note that some bits of mask must be all-set or all-clear. */ +void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask); + +/* Return the current xPSR value. */ +static inline uint32_t xpsr_read(CPUARMState *env) +{ + int ZF; + ZF = (env->ZF == 0); + return (env->NF & 0x80000000) | (ZF << 30) + | (env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27) + | (env->thumb << 24) | ((env->condexec_bits & 3) << 25) + | ((env->condexec_bits & 0xfc) << 8) + | env->v7m.exception; +} + +/* Set the xPSR. Note that some bits of mask must be all-set or all-clear. */ +static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) +{ + if (mask & CPSR_NZCV) { + env->ZF = (~val) & CPSR_Z; + env->NF = val; + env->CF = (val >> 29) & 1; + env->VF = (val << 3) & 0x80000000; + } + if (mask & CPSR_Q) + env->QF = ((val & CPSR_Q) != 0); + if (mask & (1 << 24)) + env->thumb = ((val & (1 << 24)) != 0); + if (mask & CPSR_IT_0_1) { + env->condexec_bits &= ~3; + env->condexec_bits |= (val >> 25) & 3; + } + if (mask & CPSR_IT_2_7) { + env->condexec_bits &= 3; + env->condexec_bits |= (val >> 8) & 0xfc; + } + if (mask & 0x1ff) { + env->v7m.exception = val & 0x1ff; + } +} + +#define HCR_VM (1ULL << 0) +#define HCR_SWIO (1ULL << 1) +#define HCR_PTW (1ULL << 2) +#define HCR_FMO (1ULL << 3) +#define HCR_IMO (1ULL << 4) +#define HCR_AMO (1ULL << 5) +#define HCR_VF (1ULL << 6) +#define HCR_VI (1ULL << 7) +#define HCR_VSE (1ULL << 8) +#define HCR_FB (1ULL << 9) +#define HCR_BSU_MASK (3ULL << 10) +#define HCR_DC (1ULL << 12) +#define HCR_TWI (1ULL << 13) +#define HCR_TWE (1ULL << 14) +#define HCR_TID0 (1ULL << 15) +#define HCR_TID1 (1ULL << 16) +#define HCR_TID2 (1ULL << 17) +#define HCR_TID3 (1ULL << 18) +#define HCR_TSC (1ULL << 19) +#define HCR_TIDCP (1ULL << 20) +#define HCR_TACR (1ULL << 21) +#define HCR_TSW (1ULL << 22) +#define HCR_TPC (1ULL << 23) +#define HCR_TPU (1ULL << 24) +#define HCR_TTLB (1ULL << 25) +#define HCR_TVM (1ULL << 26) +#define HCR_TGE (1ULL << 27) +#define HCR_TDZ (1ULL << 28) +#define HCR_HCD (1ULL << 29) +#define HCR_TRVM (1ULL << 30) +#define HCR_RW (1ULL << 31) +#define HCR_CD (1ULL << 32) +#define HCR_ID (1ULL << 33) +#define HCR_MASK ((1ULL << 34) - 1) + +#define SCR_NS (1U << 0) +#define SCR_IRQ (1U << 1) +#define SCR_FIQ (1U << 2) +#define SCR_EA (1U << 3) +#define SCR_FW (1U << 4) +#define SCR_AW (1U << 5) +#define SCR_NET (1U << 6) +#define SCR_SMD (1U << 7) +#define SCR_HCE (1U << 8) +#define SCR_SIF (1U << 9) +#define SCR_RW (1U << 10) +#define SCR_ST (1U << 11) +#define SCR_TWI (1U << 12) +#define SCR_TWE (1U << 13) +#define SCR_AARCH32_MASK (0x3fff & ~(SCR_RW | SCR_ST)) +#define SCR_AARCH64_MASK (0x3fff & ~SCR_NET) + +/* Return the current FPSCR value. */ +uint32_t vfp_get_fpscr(CPUARMState *env); +void vfp_set_fpscr(CPUARMState *env, uint32_t val); + +/* For A64 the FPSCR is split into two logically distinct registers, + * FPCR and FPSR. However since they still use non-overlapping bits + * we store the underlying state in fpscr and just mask on read/write. + */ +#define FPSR_MASK 0xf800009f +#define FPCR_MASK 0x07f79f00 +static inline uint32_t vfp_get_fpsr(CPUARMState *env) +{ + return vfp_get_fpscr(env) & FPSR_MASK; +} + +static inline void vfp_set_fpsr(CPUARMState *env, uint32_t val) +{ + uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPSR_MASK) | (val & FPSR_MASK); + vfp_set_fpscr(env, new_fpscr); +} + +static inline uint32_t vfp_get_fpcr(CPUARMState *env) +{ + return vfp_get_fpscr(env) & FPCR_MASK; +} + +static inline void vfp_set_fpcr(CPUARMState *env, uint32_t val) +{ + uint32_t new_fpscr = (vfp_get_fpscr(env) & ~FPCR_MASK) | (val & FPCR_MASK); + vfp_set_fpscr(env, new_fpscr); +} + +enum arm_cpu_mode { + ARM_CPU_MODE_USR = 0x10, + ARM_CPU_MODE_FIQ = 0x11, + ARM_CPU_MODE_IRQ = 0x12, + ARM_CPU_MODE_SVC = 0x13, + ARM_CPU_MODE_MON = 0x16, + ARM_CPU_MODE_ABT = 0x17, + ARM_CPU_MODE_HYP = 0x1a, + ARM_CPU_MODE_UND = 0x1b, + ARM_CPU_MODE_SYS = 0x1f +}; + +/* VFP system registers. */ +#define ARM_VFP_FPSID 0 +#define ARM_VFP_FPSCR 1 +#define ARM_VFP_MVFR2 5 +#define ARM_VFP_MVFR1 6 +#define ARM_VFP_MVFR0 7 +#define ARM_VFP_FPEXC 8 +#define ARM_VFP_FPINST 9 +#define ARM_VFP_FPINST2 10 + +/* iwMMXt coprocessor control registers. */ +#define ARM_IWMMXT_wCID 0 +#define ARM_IWMMXT_wCon 1 +#define ARM_IWMMXT_wCSSF 2 +#define ARM_IWMMXT_wCASF 3 +#define ARM_IWMMXT_wCGR0 8 +#define ARM_IWMMXT_wCGR1 9 +#define ARM_IWMMXT_wCGR2 10 +#define ARM_IWMMXT_wCGR3 11 + +/* If adding a feature bit which corresponds to a Linux ELF + * HWCAP bit, remember to update the feature-bit-to-hwcap + * mapping in linux-user/elfload.c:get_elf_hwcap(). + */ +enum arm_features { + ARM_FEATURE_VFP, + ARM_FEATURE_AUXCR, /* ARM1026 Auxiliary control register. */ + ARM_FEATURE_XSCALE, /* Intel XScale extensions. */ + ARM_FEATURE_IWMMXT, /* Intel iwMMXt extension. */ + ARM_FEATURE_V6, + ARM_FEATURE_V6K, + ARM_FEATURE_V7, + ARM_FEATURE_THUMB2, + ARM_FEATURE_MPU, /* Only has Memory Protection Unit, not full MMU. */ + ARM_FEATURE_VFP3, + ARM_FEATURE_VFP_FP16, + ARM_FEATURE_NEON, + ARM_FEATURE_THUMB_DIV, /* divide supported in Thumb encoding */ + ARM_FEATURE_M, /* Microcontroller profile. */ + ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */ + ARM_FEATURE_THUMB2EE, + ARM_FEATURE_V7MP, /* v7 Multiprocessing Extensions */ + ARM_FEATURE_V4T, + ARM_FEATURE_V5, + ARM_FEATURE_STRONGARM, + ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */ + ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */ + ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */ + ARM_FEATURE_GENERIC_TIMER, + ARM_FEATURE_MVFR, /* Media and VFP Feature Registers 0 and 1 */ + ARM_FEATURE_DUMMY_C15_REGS, /* RAZ/WI all of cp15 crn=15 */ + ARM_FEATURE_CACHE_TEST_CLEAN, /* 926/1026 style test-and-clean ops */ + ARM_FEATURE_CACHE_DIRTY_REG, /* 1136/1176 cache dirty status register */ + ARM_FEATURE_CACHE_BLOCK_OPS, /* v6 optional cache block operations */ + ARM_FEATURE_MPIDR, /* has cp15 MPIDR */ + ARM_FEATURE_PXN, /* has Privileged Execute Never bit */ + ARM_FEATURE_LPAE, /* has Large Physical Address Extension */ + ARM_FEATURE_V8, + ARM_FEATURE_AARCH64, /* supports 64 bit mode */ + ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */ + ARM_FEATURE_CBAR, /* has cp15 CBAR */ + ARM_FEATURE_CRC, /* ARMv8 CRC instructions */ + ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */ + ARM_FEATURE_EL2, /* has EL2 Virtualization support */ + ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ + ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */ + ARM_FEATURE_THUMB_DSP, /* DSP insns supported in the Thumb encodings */ +}; + +static inline int arm_feature(CPUARMState *env, int feature) +{ + return (env->features & (1ULL << feature)) != 0; +} + +#if !defined(CONFIG_USER_ONLY) +/* Return true if exception levels below EL3 are in secure state, + * or would be following an exception return to that level. + * Unlike arm_is_secure() (which is always a question about the + * _current_ state of the CPU) this doesn't care about the current + * EL or mode. + */ +static inline bool arm_is_secure_below_el3(CPUARMState *env) +{ + if (arm_feature(env, ARM_FEATURE_EL3)) { + return !(env->cp15.scr_el3 & SCR_NS); + } else { + /* If EL2 is not supported then the secure state is implementation + * defined, in which case QEMU defaults to non-secure. + */ + return false; + } +} + +/* Return true if the processor is in secure state */ +static inline bool arm_is_secure(CPUARMState *env) +{ + if (arm_feature(env, ARM_FEATURE_EL3)) { + if (is_a64(env) && extract32(env->pstate, 2, 2) == 3) { + /* CPU currently in AArch64 state and EL3 */ + return true; + } else if (!is_a64(env) && + (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON) { + /* CPU currently in AArch32 state and monitor mode */ + return true; + } + } + return arm_is_secure_below_el3(env); +} + +#else +static inline bool arm_is_secure_below_el3(CPUARMState *env) +{ + return false; +} + +static inline bool arm_is_secure(CPUARMState *env) +{ + return false; +} +#endif + +/* Return true if the specified exception level is running in AArch64 state. */ +static inline bool arm_el_is_aa64(CPUARMState *env, int el) +{ + /* We don't currently support EL2, and this isn't valid for EL0 + * (if we're in EL0, is_a64() is what you want, and if we're not in EL0 + * then the state of EL0 isn't well defined.) + */ + assert(el == 1 || el == 3); + + /* AArch64-capable CPUs always run with EL1 in AArch64 mode. This + * is a QEMU-imposed simplification which we may wish to change later. + * If we in future support EL2 and/or EL3, then the state of lower + * exception levels is controlled by the HCR.RW and SCR.RW bits. + */ + return arm_feature(env, ARM_FEATURE_AARCH64); +} + +/* Function for determing whether guest cp register reads and writes should + * access the secure or non-secure bank of a cp register. When EL3 is + * operating in AArch32 state, the NS-bit determines whether the secure + * instance of a cp register should be used. When EL3 is AArch64 (or if + * it doesn't exist at all) then there is no register banking, and all + * accesses are to the non-secure version. + */ +static inline bool access_secure_reg(CPUARMState *env) +{ + bool ret = (arm_feature(env, ARM_FEATURE_EL3) && + !arm_el_is_aa64(env, 3) && + !(env->cp15.scr_el3 & SCR_NS)); + + return ret; +} + +/* Macros for accessing a specified CP register bank */ +#define A32_BANKED_REG_GET(_env, _regname, _secure) \ + ((_secure) ? (_env)->cp15._regname##_s : (_env)->cp15._regname##_ns) + +#define A32_BANKED_REG_SET(_env, _regname, _secure, _val) \ + do { \ + if (_secure) { \ + (_env)->cp15._regname##_s = (_val); \ + } else { \ + (_env)->cp15._regname##_ns = (_val); \ + } \ + } while (0) + +/* Macros for automatically accessing a specific CP register bank depending on + * the current secure state of the system. These macros are not intended for + * supporting instruction translation reads/writes as these are dependent + * solely on the SCR.NS bit and not the mode. + */ +#define A32_BANKED_CURRENT_REG_GET(_env, _regname) \ + A32_BANKED_REG_GET((_env), _regname, \ + ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env)))) + +#define A32_BANKED_CURRENT_REG_SET(_env, _regname, _val) \ + A32_BANKED_REG_SET((_env), _regname, \ + ((!arm_el_is_aa64((_env), 3) && arm_is_secure(_env))), \ + (_val)) + +void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf); +uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, + uint32_t cur_el, bool secure); + +/* Interface between CPU and Interrupt controller. */ +void armv7m_nvic_set_pending(void *opaque, int irq); +int armv7m_nvic_acknowledge_irq(void *opaque); +void armv7m_nvic_complete_irq(void *opaque, int irq); + +/* Interface for defining coprocessor registers. + * Registers are defined in tables of arm_cp_reginfo structs + * which are passed to define_arm_cp_regs(). + */ + +/* When looking up a coprocessor register we look for it + * via an integer which encodes all of: + * coprocessor number + * Crn, Crm, opc1, opc2 fields + * 32 or 64 bit register (ie is it accessed via MRC/MCR + * or via MRRC/MCRR?) + * non-secure/secure bank (AArch32 only) + * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field. + * (In this case crn and opc2 should be zero.) + * For AArch64, there is no 32/64 bit size distinction; + * instead all registers have a 2 bit op0, 3 bit op1 and op2, + * and 4 bit CRn and CRm. The encoding patterns are chosen + * to be easy to convert to and from the KVM encodings, and also + * so that the hashtable can contain both AArch32 and AArch64 + * registers (to allow for interprocessing where we might run + * 32 bit code on a 64 bit core). + */ +/* This bit is private to our hashtable cpreg; in KVM register + * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64 + * in the upper bits of the 64 bit ID. + */ +#define CP_REG_AA64_SHIFT 28 +#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT) + +/* To enable banking of coprocessor registers depending on ns-bit we + * add a bit to distinguish between secure and non-secure cpregs in the + * hashtable. + */ +#define CP_REG_NS_SHIFT 29 +#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT) + +#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2) \ + ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) | \ + ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2)) + +#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \ + (CP_REG_AA64_MASK | \ + ((cp) << CP_REG_ARM_COPROC_SHIFT) | \ + ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ + ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ + ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ + ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \ + ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT)) + +/* Convert a full 64 bit KVM register ID to the truncated 32 bit + * version used as a key for the coprocessor register hashtable + */ +static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid) +{ + uint32_t cpregid = kvmid; + if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) { + cpregid |= CP_REG_AA64_MASK; + } else { + if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) { + cpregid |= (1 << 15); + } + + /* KVM is always non-secure so add the NS flag on AArch32 register + * entries. + */ + cpregid |= 1 << CP_REG_NS_SHIFT; + } + return cpregid; +} + +/* Convert a truncated 32 bit hashtable key into the full + * 64 bit KVM register ID. + */ +static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) +{ + uint64_t kvmid; + + if (cpregid & CP_REG_AA64_MASK) { + kvmid = cpregid & ~CP_REG_AA64_MASK; + kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64; + } else { + kvmid = cpregid & ~(1 << 15); + if (cpregid & (1 << 15)) { + kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM; + } else { + kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM; + } + } + return kvmid; +} + +/* ARMCPRegInfo type field bits. If the SPECIAL bit is set this is a + * special-behaviour cp reg and bits [15..8] indicate what behaviour + * it has. Otherwise it is a simple cp reg, where CONST indicates that + * TCG can assume the value to be constant (ie load at translate time) + * and 64BIT indicates a 64 bit wide coprocessor register. SUPPRESS_TB_END + * indicates that the TB should not be ended after a write to this register + * (the default is that the TB ends after cp writes). OVERRIDE permits + * a register definition to override a previous definition for the + * same (cp, is64, crn, crm, opc1, opc2) tuple: either the new or the + * old must have the OVERRIDE bit set. + * ALIAS indicates that this register is an alias view of some underlying + * state which is also visible via another register, and that the other + * register is handling migration and reset; registers marked ALIAS will not be + * migrated but may have their state set by syncing of register state from KVM. + * NO_RAW indicates that this register has no underlying state and does not + * support raw access for state saving/loading; it will not be used for either + * migration or KVM state synchronization. (Typically this is for "registers" + * which are actually used as instructions for cache maintenance and so on.) + * IO indicates that this register does I/O and therefore its accesses + * need to be surrounded by gen_io_start()/gen_io_end(). In particular, + * registers which implement clocks or timers require this. + */ +#define ARM_CP_SPECIAL 1 +#define ARM_CP_CONST 2 +#define ARM_CP_64BIT 4 +#define ARM_CP_SUPPRESS_TB_END 8 +#define ARM_CP_OVERRIDE 16 +#define ARM_CP_ALIAS 32 +#define ARM_CP_IO 64 +#define ARM_CP_NO_RAW 128 +#define ARM_CP_NOP (ARM_CP_SPECIAL | (1 << 8)) +#define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8)) +#define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8)) +#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8)) +#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | (5 << 8)) +#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA +/* Used only as a terminator for ARMCPRegInfo lists */ +#define ARM_CP_SENTINEL 0xffff +/* Mask of only the flag bits in a type field */ +#define ARM_CP_FLAG_MASK 0xff + +/* Valid values for ARMCPRegInfo state field, indicating which of + * the AArch32 and AArch64 execution states this register is visible in. + * If the reginfo doesn't explicitly specify then it is AArch32 only. + * If the reginfo is declared to be visible in both states then a second + * reginfo is synthesised for the AArch32 view of the AArch64 register, + * such that the AArch32 view is the lower 32 bits of the AArch64 one. + * Note that we rely on the values of these enums as we iterate through + * the various states in some places. + */ +enum { + ARM_CP_STATE_AA32 = 0, + ARM_CP_STATE_AA64 = 1, + ARM_CP_STATE_BOTH = 2, +}; + +/* ARM CP register secure state flags. These flags identify security state + * attributes for a given CP register entry. + * The existence of both or neither secure and non-secure flags indicates that + * the register has both a secure and non-secure hash entry. A single one of + * these flags causes the register to only be hashed for the specified + * security state. + * Although definitions may have any combination of the S/NS bits, each + * registered entry will only have one to identify whether the entry is secure + * or non-secure. + */ +enum { + ARM_CP_SECSTATE_S = (1 << 0), /* bit[0]: Secure state register */ + ARM_CP_SECSTATE_NS = (1 << 1), /* bit[1]: Non-secure state register */ +}; + +/* Return true if cptype is a valid type field. This is used to try to + * catch errors where the sentinel has been accidentally left off the end + * of a list of registers. + */ +static inline bool cptype_valid(int cptype) +{ + return ((cptype & ~ARM_CP_FLAG_MASK) == 0) + || ((cptype & ARM_CP_SPECIAL) && + ((cptype & ~ARM_CP_FLAG_MASK) <= ARM_LAST_SPECIAL)); +} + +/* Access rights: + * We define bits for Read and Write access for what rev C of the v7-AR ARM ARM + * defines as PL0 (user), PL1 (fiq/irq/svc/abt/und/sys, ie privileged), and + * PL2 (hyp). The other level which has Read and Write bits is Secure PL1 + * (ie any of the privileged modes in Secure state, or Monitor mode). + * If a register is accessible in one privilege level it's always accessible + * in higher privilege levels too. Since "Secure PL1" also follows this rule + * (ie anything visible in PL2 is visible in S-PL1, some things are only + * visible in S-PL1) but "Secure PL1" is a bit of a mouthful, we bend the + * terminology a little and call this PL3. + * In AArch64 things are somewhat simpler as the PLx bits line up exactly + * with the ELx exception levels. + * + * If access permissions for a register are more complex than can be + * described with these bits, then use a laxer set of restrictions, and + * do the more restrictive/complex check inside a helper function. + */ +#define PL3_R 0x80 +#define PL3_W 0x40 +#define PL2_R (0x20 | PL3_R) +#define PL2_W (0x10 | PL3_W) +#define PL1_R (0x08 | PL2_R) +#define PL1_W (0x04 | PL2_W) +#define PL0_R (0x02 | PL1_R) +#define PL0_W (0x01 | PL1_W) + +#define PL3_RW (PL3_R | PL3_W) +#define PL2_RW (PL2_R | PL2_W) +#define PL1_RW (PL1_R | PL1_W) +#define PL0_RW (PL0_R | PL0_W) + +/* Return the current Exception Level (as per ARMv8; note that this differs + * from the ARMv7 Privilege Level). + */ +static inline int arm_current_el(CPUARMState *env) +{ + if (arm_feature(env, ARM_FEATURE_M)) { + return !((env->v7m.exception == 0) && (env->v7m.control & 1)); + } + + if (is_a64(env)) { + return extract32(env->pstate, 2, 2); + } + + switch (env->uncached_cpsr & 0x1f) { + case ARM_CPU_MODE_USR: + return 0; + case ARM_CPU_MODE_HYP: + return 2; + case ARM_CPU_MODE_MON: + return 3; + default: + if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { + /* If EL3 is 32-bit then all secure privileged modes run in + * EL3 + */ + return 3; + } + + return 1; + } +} + +typedef struct ARMCPRegInfo ARMCPRegInfo; + +typedef enum CPAccessResult { + /* Access is permitted */ + CP_ACCESS_OK = 0, + /* Access fails due to a configurable trap or enable which would + * result in a categorized exception syndrome giving information about + * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6, + * 0xc or 0x18). The exception is taken to the usual target EL (EL1 or + * PL1 if in EL0, otherwise to the current EL). + */ + CP_ACCESS_TRAP = 1, + /* Access fails and results in an exception syndrome 0x0 ("uncategorized"). + * Note that this is not a catch-all case -- the set of cases which may + * result in this failure is specifically defined by the architecture. + */ + CP_ACCESS_TRAP_UNCATEGORIZED = 2, + /* As CP_ACCESS_TRAP, but for traps directly to EL2 or EL3 */ + CP_ACCESS_TRAP_EL2 = 3, + CP_ACCESS_TRAP_EL3 = 4, +} CPAccessResult; + +/* Access functions for coprocessor registers. These cannot fail and + * may not raise exceptions. + */ +typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque); +typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque, + uint64_t value); +/* Access permission check functions for coprocessor registers. */ +typedef CPAccessResult CPAccessFn(CPUARMState *env, const ARMCPRegInfo *opaque); +/* Hook function for register reset */ +typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque); + +#define CP_ANY 0xff + +/* Definition of an ARM coprocessor register */ +struct ARMCPRegInfo { + /* Name of register (useful mainly for debugging, need not be unique) */ + const char *name; + /* Location of register: coprocessor number and (crn,crm,opc1,opc2) + * tuple. Any of crm, opc1 and opc2 may be CP_ANY to indicate a + * 'wildcard' field -- any value of that field in the MRC/MCR insn + * will be decoded to this register. The register read and write + * callbacks will be passed an ARMCPRegInfo with the crn/crm/opc1/opc2 + * used by the program, so it is possible to register a wildcard and + * then behave differently on read/write if necessary. + * For 64 bit registers, only crm and opc1 are relevant; crn and opc2 + * must both be zero. + * For AArch64-visible registers, opc0 is also used. + * Since there are no "coprocessors" in AArch64, cp is purely used as a + * way to distinguish (for KVM's benefit) guest-visible system registers + * from demuxed ones provided to preserve the "no side effects on + * KVM register read/write from QEMU" semantics. cp==0x13 is guest + * visible (to match KVM's encoding); cp==0 will be converted to + * cp==0x13 when the ARMCPRegInfo is registered, for convenience. + */ + uint8_t cp; + uint8_t crn; + uint8_t crm; + uint8_t opc0; + uint8_t opc1; + uint8_t opc2; + /* Execution state in which this register is visible: ARM_CP_STATE_* */ + int state; + /* Register type: ARM_CP_* bits/values */ + int type; + /* Access rights: PL*_[RW] */ + int access; + /* Security state: ARM_CP_SECSTATE_* bits/values */ + int secure; + /* The opaque pointer passed to define_arm_cp_regs_with_opaque() when + * this register was defined: can be used to hand data through to the + * register read/write functions, since they are passed the ARMCPRegInfo*. + */ + void *opaque; + /* Value of this register, if it is ARM_CP_CONST. Otherwise, if + * fieldoffset is non-zero, the reset value of the register. + */ + uint64_t resetvalue; + /* Offset of the field in CPUARMState for this register. + * + * This is not needed if either: + * 1. type is ARM_CP_CONST or one of the ARM_CP_SPECIALs + * 2. both readfn and writefn are specified + */ + ptrdiff_t fieldoffset; /* offsetof(CPUARMState, field) */ + + /* Offsets of the secure and non-secure fields in CPUARMState for the + * register if it is banked. These fields are only used during the static + * registration of a register. During hashing the bank associated + * with a given security state is copied to fieldoffset which is used from + * there on out. + * + * It is expected that register definitions use either fieldoffset or + * bank_fieldoffsets in the definition but not both. It is also expected + * that both bank offsets are set when defining a banked register. This + * use indicates that a register is banked. + */ + ptrdiff_t bank_fieldoffsets[2]; + + /* Function for making any access checks for this register in addition to + * those specified by the 'access' permissions bits. If NULL, no extra + * checks required. The access check is performed at runtime, not at + * translate time. + */ + CPAccessFn *accessfn; + /* Function for handling reads of this register. If NULL, then reads + * will be done by loading from the offset into CPUARMState specified + * by fieldoffset. + */ + CPReadFn *readfn; + /* Function for handling writes of this register. If NULL, then writes + * will be done by writing to the offset into CPUARMState specified + * by fieldoffset. + */ + CPWriteFn *writefn; + /* Function for doing a "raw" read; used when we need to copy + * coprocessor state to the kernel for KVM or out for + * migration. This only needs to be provided if there is also a + * readfn and it has side effects (for instance clear-on-read bits). + */ + CPReadFn *raw_readfn; + /* Function for doing a "raw" write; used when we need to copy KVM + * kernel coprocessor state into userspace, or for inbound + * migration. This only needs to be provided if there is also a + * writefn and it masks out "unwritable" bits or has write-one-to-clear + * or similar behaviour. + */ + CPWriteFn *raw_writefn; + /* Function for resetting the register. If NULL, then reset will be done + * by writing resetvalue to the field specified in fieldoffset. If + * fieldoffset is 0 then no reset will be done. + */ + CPResetFn *resetfn; +}; + +/* Macros which are lvalues for the field in CPUARMState for the + * ARMCPRegInfo *ri. + */ +#define CPREG_FIELD32(env, ri) \ + (*(uint32_t *)((char *)(env) + (ri)->fieldoffset)) +#define CPREG_FIELD64(env, ri) \ + (*(uint64_t *)((char *)(env) + (ri)->fieldoffset)) + +#define REGINFO_SENTINEL { .type = ARM_CP_SENTINEL } + +void define_arm_cp_regs_with_opaque(ARMCPU *cpu, + const ARMCPRegInfo *regs, void *opaque); +void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, + const ARMCPRegInfo *regs, void *opaque); +static inline void define_arm_cp_regs(ARMCPU *cpu, const ARMCPRegInfo *regs) +{ + define_arm_cp_regs_with_opaque(cpu, regs, 0); +} +static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs) +{ + define_one_arm_cp_reg_with_opaque(cpu, regs, 0); +} +const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp); + +/* CPWriteFn that can be used to implement writes-ignored behaviour */ +void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value); +/* CPReadFn that can be used for read-as-zero behaviour */ +uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri); + +/* CPResetFn that does nothing, for use if no reset is required even + * if fieldoffset is non zero. + */ +void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque); + +/* Return true if this reginfo struct's field in the cpu state struct + * is 64 bits wide. + */ +static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri) +{ + return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT); +} + +static inline bool cp_access_ok(int current_el, + const ARMCPRegInfo *ri, int isread) +{ + return (ri->access >> ((current_el * 2) + isread)) & 1; +} + +/** + * write_list_to_cpustate + * @cpu: ARMCPU + * + * For each register listed in the ARMCPU cpreg_indexes list, write + * its value from the cpreg_values list into the ARMCPUState structure. + * This updates TCG's working data structures from KVM data or + * from incoming migration state. + * + * Returns: true if all register values were updated correctly, + * false if some register was unknown or could not be written. + * Note that we do not stop early on failure -- we will attempt + * writing all registers in the list. + */ +bool write_list_to_cpustate(ARMCPU *cpu); + +/** + * write_cpustate_to_list: + * @cpu: ARMCPU + * + * For each register listed in the ARMCPU cpreg_indexes list, write + * its value from the ARMCPUState structure into the cpreg_values list. + * This is used to copy info from TCG's working data structures into + * KVM or for outbound migration. + * + * Returns: true if all register values were read correctly, + * false if some register was unknown or could not be read. + * Note that we do not stop early on failure -- we will attempt + * reading all registers in the list. + */ +bool write_cpustate_to_list(ARMCPU *cpu); + +/* Does the core conform to the the "MicroController" profile. e.g. Cortex-M3. + Note the M in older cores (eg. ARM7TDMI) stands for Multiply. These are + conventional cores (ie. Application or Realtime profile). */ + +#define IS_M(env) arm_feature(env, ARM_FEATURE_M) + +#define ARM_CPUID_TI915T 0x54029152 +#define ARM_CPUID_TI925T 0x54029252 + +#if defined(CONFIG_USER_ONLY) +#define TARGET_PAGE_BITS 12 +#else +/* The ARM MMU allows 1k pages. */ +/* ??? Linux doesn't actually use these, and they're deprecated in recent + architecture revisions. Maybe a configure option to disable them. */ +#define TARGET_PAGE_BITS 10 +#endif + +#if defined(TARGET_AARCH64) +# define TARGET_PHYS_ADDR_SPACE_BITS 48 +# define TARGET_VIRT_ADDR_SPACE_BITS 64 +#else +# define TARGET_PHYS_ADDR_SPACE_BITS 40 +# define TARGET_VIRT_ADDR_SPACE_BITS 32 +#endif + +static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, + unsigned int target_el) +{ + CPUARMState *env = cs->env_ptr; + unsigned int cur_el = arm_current_el(env); + bool secure = arm_is_secure(env); + uint32_t scr; + uint32_t hcr; + bool pstate_unmasked; + int8_t unmasked = 0; + + /* Don't take exceptions if they target a lower EL. + * This check should catch any exceptions that would not be taken but left + * pending. + */ + if (cur_el > target_el) { + return false; + } + + switch (excp_idx) { + case EXCP_FIQ: + /* If FIQs are routed to EL3 or EL2 then there are cases where we + * override the CPSR.F in determining if the exception is masked or + * not. If neither of these are set then we fall back to the CPSR.F + * setting otherwise we further assess the state below. + */ + hcr = (env->cp15.hcr_el2 & HCR_FMO); + scr = (env->cp15.scr_el3 & SCR_FIQ); + + /* When EL3 is 32-bit, the SCR.FW bit controls whether the CPSR.F bit + * masks FIQ interrupts when taken in non-secure state. If SCR.FW is + * set then FIQs can be masked by CPSR.F when non-secure but only + * when FIQs are only routed to EL3. + */ + scr &= !((env->cp15.scr_el3 & SCR_FW) && !hcr); + pstate_unmasked = !(env->daif & PSTATE_F); + break; + + case EXCP_IRQ: + /* When EL3 execution state is 32-bit, if HCR.IMO is set then we may + * override the CPSR.I masking when in non-secure state. The SCR.IRQ + * setting has already been taken into consideration when setting the + * target EL, so it does not have a further affect here. + */ + hcr = (env->cp15.hcr_el2 & HCR_IMO); + scr = false; + pstate_unmasked = !(env->daif & PSTATE_I); + break; + + case EXCP_VFIQ: + if (secure || !(env->cp15.hcr_el2 & HCR_FMO)) { + /* VFIQs are only taken when hypervized and non-secure. */ + return false; + } + return !(env->daif & PSTATE_F); + case EXCP_VIRQ: + if (secure || !(env->cp15.hcr_el2 & HCR_IMO)) { + /* VIRQs are only taken when hypervized and non-secure. */ + return false; + } + return !(env->daif & PSTATE_I); + default: + g_assert_not_reached(); + } + + /* Use the target EL, current execution state and SCR/HCR settings to + * determine whether the corresponding CPSR bit is used to mask the + * interrupt. + */ + if ((target_el > cur_el) && (target_el != 1)) { + if (arm_el_is_aa64(env, 3) || ((scr || hcr) && (!secure))) { + unmasked = 1; + } + } + + /* The PSTATE bits only mask the interrupt if we have not overriden the + * ability above. + */ + return unmasked || pstate_unmasked; +} + +#define cpu_init(cpu_model) CPU(cpu_arm_init(cpu_model)) + +#define cpu_exec cpu_arm_exec +#define cpu_gen_code cpu_arm_gen_code +#define cpu_signal_handler cpu_arm_signal_handler +#define cpu_list arm_cpu_list + +/* ARM has the following "translation regimes" (as the ARM ARM calls them): + * + * If EL3 is 64-bit: + * + NonSecure EL1 & 0 stage 1 + * + NonSecure EL1 & 0 stage 2 + * + NonSecure EL2 + * + Secure EL1 & EL0 + * + Secure EL3 + * If EL3 is 32-bit: + * + NonSecure PL1 & 0 stage 1 + * + NonSecure PL1 & 0 stage 2 + * + NonSecure PL2 + * + Secure PL0 & PL1 + * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) + * + * For QEMU, an mmu_idx is not quite the same as a translation regime because: + * 1. we need to split the "EL1 & 0" regimes into two mmu_idxes, because they + * may differ in access permissions even if the VA->PA map is the same + * 2. we want to cache in our TLB the full VA->IPA->PA lookup for a stage 1+2 + * translation, which means that we have one mmu_idx that deals with two + * concatenated translation regimes [this sort of combined s1+2 TLB is + * architecturally permitted] + * 3. we don't need to allocate an mmu_idx to translations that we won't be + * handling via the TLB. The only way to do a stage 1 translation without + * the immediate stage 2 translation is via the ATS or AT system insns, + * which can be slow-pathed and always do a page table walk. + * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" + * translation regimes, because they map reasonably well to each other + * and they can't both be active at the same time. + * This gives us the following list of mmu_idx values: + * + * NS EL0 (aka NS PL0) stage 1+2 + * NS EL1 (aka NS PL1) stage 1+2 + * NS EL2 (aka NS PL2) + * S EL3 (aka S PL1) + * S EL0 (aka S PL0) + * S EL1 (not used if EL3 is 32 bit) + * NS EL0+1 stage 2 + * + * (The last of these is an mmu_idx because we want to be able to use the TLB + * for the accesses done as part of a stage 1 page table walk, rather than + * having to walk the stage 2 page table over and over.) + * + * Our enumeration includes at the end some entries which are not "true" + * mmu_idx values in that they don't have corresponding TLBs and are only + * valid for doing slow path page table walks. + * + * The constant names here are patterned after the general style of the names + * of the AT/ATS operations. + * The values used are carefully arranged to make mmu_idx => EL lookup easy. + */ +typedef enum ARMMMUIdx { + ARMMMUIdx_S12NSE0 = 0, + ARMMMUIdx_S12NSE1 = 1, + ARMMMUIdx_S1E2 = 2, + ARMMMUIdx_S1E3 = 3, + ARMMMUIdx_S1SE0 = 4, + ARMMMUIdx_S1SE1 = 5, + ARMMMUIdx_S2NS = 6, + /* Indexes below here don't have TLBs and are used only for AT system + * instructions or for the first stage of an S12 page table walk. + */ + ARMMMUIdx_S1NSE0 = 7, + ARMMMUIdx_S1NSE1 = 8, +} ARMMMUIdx; + +#define MMU_USER_IDX 0 + +/* Return the exception level we're running at if this is our mmu_idx */ +static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) +{ + assert(mmu_idx < ARMMMUIdx_S2NS); + return mmu_idx & 3; +} + +/* Determine the current mmu_idx to use for normal loads/stores */ +static inline int cpu_mmu_index(CPUARMState *env) +{ + int el = arm_current_el(env); + + if (el < 2 && arm_is_secure_below_el3(env)) { + return ARMMMUIdx_S1SE0 + el; + } + return el; +} + +/* Return the Exception Level targeted by debug exceptions; + * currently always EL1 since we don't implement EL2 or EL3. + */ +static inline int arm_debug_target_el(CPUARMState *env) +{ + return 1; +} + +static inline bool aa64_generate_debug_exceptions(CPUARMState *env) +{ + if (arm_current_el(env) == arm_debug_target_el(env)) { + if ((extract32(env->cp15.mdscr_el1, 13, 1) == 0) + || (env->daif & PSTATE_D)) { + return false; + } + } + return true; +} + +static inline bool aa32_generate_debug_exceptions(CPUARMState *env) +{ + if (arm_current_el(env) == 0 && arm_el_is_aa64(env, 1)) { + return aa64_generate_debug_exceptions(env); + } + return arm_current_el(env) != 2; +} + +/* Return true if debugging exceptions are currently enabled. + * This corresponds to what in ARM ARM pseudocode would be + * if UsingAArch32() then + * return AArch32.GenerateDebugExceptions() + * else + * return AArch64.GenerateDebugExceptions() + * We choose to push the if() down into this function for clarity, + * since the pseudocode has it at all callsites except for the one in + * CheckSoftwareStep(), where it is elided because both branches would + * always return the same value. + * + * Parts of the pseudocode relating to EL2 and EL3 are omitted because we + * don't yet implement those exception levels or their associated trap bits. + */ +static inline bool arm_generate_debug_exceptions(CPUARMState *env) +{ + if (env->aarch64) { + return aa64_generate_debug_exceptions(env); + } else { + return aa32_generate_debug_exceptions(env); + } +} + +/* Is single-stepping active? (Note that the "is EL_D AArch64?" check + * implicitly means this always returns false in pre-v8 CPUs.) + */ +static inline bool arm_singlestep_active(CPUARMState *env) +{ + return extract32(env->cp15.mdscr_el1, 0, 1) + && arm_el_is_aa64(env, arm_debug_target_el(env)) + && arm_generate_debug_exceptions(env); +} + +#include "exec/cpu-all.h" + +/* Bit usage in the TB flags field: bit 31 indicates whether we are + * in 32 or 64 bit mode. The meaning of the other bits depends on that. + * We put flags which are shared between 32 and 64 bit mode at the top + * of the word, and flags which apply to only one mode at the bottom. + */ +#define ARM_TBFLAG_AARCH64_STATE_SHIFT 31 +#define ARM_TBFLAG_AARCH64_STATE_MASK (1U << ARM_TBFLAG_AARCH64_STATE_SHIFT) +#define ARM_TBFLAG_MMUIDX_SHIFT 28 +#define ARM_TBFLAG_MMUIDX_MASK (0x7 << ARM_TBFLAG_MMUIDX_SHIFT) +#define ARM_TBFLAG_SS_ACTIVE_SHIFT 27 +#define ARM_TBFLAG_SS_ACTIVE_MASK (1 << ARM_TBFLAG_SS_ACTIVE_SHIFT) +#define ARM_TBFLAG_PSTATE_SS_SHIFT 26 +#define ARM_TBFLAG_PSTATE_SS_MASK (1 << ARM_TBFLAG_PSTATE_SS_SHIFT) +/* Target EL if we take a floating-point-disabled exception */ +#define ARM_TBFLAG_FPEXC_EL_SHIFT 24 +#define ARM_TBFLAG_FPEXC_EL_MASK (0x3 << ARM_TBFLAG_FPEXC_EL_SHIFT) + +/* Bit usage when in AArch32 state: */ +#define ARM_TBFLAG_THUMB_SHIFT 0 +#define ARM_TBFLAG_THUMB_MASK (1 << ARM_TBFLAG_THUMB_SHIFT) +#define ARM_TBFLAG_VECLEN_SHIFT 1 +#define ARM_TBFLAG_VECLEN_MASK (0x7 << ARM_TBFLAG_VECLEN_SHIFT) +#define ARM_TBFLAG_VECSTRIDE_SHIFT 4 +#define ARM_TBFLAG_VECSTRIDE_MASK (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT) +#define ARM_TBFLAG_VFPEN_SHIFT 7 +#define ARM_TBFLAG_VFPEN_MASK (1 << ARM_TBFLAG_VFPEN_SHIFT) +#define ARM_TBFLAG_CONDEXEC_SHIFT 8 +#define ARM_TBFLAG_CONDEXEC_MASK (0xff << ARM_TBFLAG_CONDEXEC_SHIFT) +#define ARM_TBFLAG_BSWAP_CODE_SHIFT 16 +#define ARM_TBFLAG_BSWAP_CODE_MASK (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT) +/* We store the bottom two bits of the CPAR as TB flags and handle + * checks on the other bits at runtime + */ +#define ARM_TBFLAG_XSCALE_CPAR_SHIFT 17 +#define ARM_TBFLAG_XSCALE_CPAR_MASK (3 << ARM_TBFLAG_XSCALE_CPAR_SHIFT) +/* Indicates whether cp register reads and writes by guest code should access + * the secure or nonsecure bank of banked registers; note that this is not + * the same thing as the current security state of the processor! + */ +#define ARM_TBFLAG_NS_SHIFT 19 +#define ARM_TBFLAG_NS_MASK (1 << ARM_TBFLAG_NS_SHIFT) + +/* Bit usage when in AArch64 state: currently we have no A64 specific bits */ + +/* some convenience accessor macros */ +#define ARM_TBFLAG_AARCH64_STATE(F) \ + (((F) & ARM_TBFLAG_AARCH64_STATE_MASK) >> ARM_TBFLAG_AARCH64_STATE_SHIFT) +#define ARM_TBFLAG_MMUIDX(F) \ + (((F) & ARM_TBFLAG_MMUIDX_MASK) >> ARM_TBFLAG_MMUIDX_SHIFT) +#define ARM_TBFLAG_SS_ACTIVE(F) \ + (((F) & ARM_TBFLAG_SS_ACTIVE_MASK) >> ARM_TBFLAG_SS_ACTIVE_SHIFT) +#define ARM_TBFLAG_PSTATE_SS(F) \ + (((F) & ARM_TBFLAG_PSTATE_SS_MASK) >> ARM_TBFLAG_PSTATE_SS_SHIFT) +#define ARM_TBFLAG_FPEXC_EL(F) \ + (((F) & ARM_TBFLAG_FPEXC_EL_MASK) >> ARM_TBFLAG_FPEXC_EL_SHIFT) +#define ARM_TBFLAG_THUMB(F) \ + (((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT) +#define ARM_TBFLAG_VECLEN(F) \ + (((F) & ARM_TBFLAG_VECLEN_MASK) >> ARM_TBFLAG_VECLEN_SHIFT) +#define ARM_TBFLAG_VECSTRIDE(F) \ + (((F) & ARM_TBFLAG_VECSTRIDE_MASK) >> ARM_TBFLAG_VECSTRIDE_SHIFT) +#define ARM_TBFLAG_VFPEN(F) \ + (((F) & ARM_TBFLAG_VFPEN_MASK) >> ARM_TBFLAG_VFPEN_SHIFT) +#define ARM_TBFLAG_CONDEXEC(F) \ + (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT) +#define ARM_TBFLAG_BSWAP_CODE(F) \ + (((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT) +#define ARM_TBFLAG_XSCALE_CPAR(F) \ + (((F) & ARM_TBFLAG_XSCALE_CPAR_MASK) >> ARM_TBFLAG_XSCALE_CPAR_SHIFT) +#define ARM_TBFLAG_NS(F) \ + (((F) & ARM_TBFLAG_NS_MASK) >> ARM_TBFLAG_NS_SHIFT) + +/* Return the exception level to which FP-disabled exceptions should + * be taken, or 0 if FP is enabled. + */ +static inline int fp_exception_el(CPUARMState *env) +{ + int fpen; + int cur_el = arm_current_el(env); + + /* CPACR and the CPTR registers don't exist before v6, so FP is + * always accessible + */ + if (!arm_feature(env, ARM_FEATURE_V6)) { + return 0; + } + + /* The CPACR controls traps to EL1, or PL1 if we're 32 bit: + * 0, 2 : trap EL0 and EL1/PL1 accesses + * 1 : trap only EL0 accesses + * 3 : trap no accesses + */ + fpen = extract32(env->cp15.cpacr_el1, 20, 2); + switch (fpen) { + case 0: + case 2: + if (cur_el == 0 || cur_el == 1) { + /* Trap to PL1, which might be EL1 or EL3 */ + if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { + return 3; + } + return 1; + } + if (cur_el == 3 && !is_a64(env)) { + /* Secure PL1 running at EL3 */ + return 3; + } + break; + case 1: + if (cur_el == 0) { + return 1; + } + break; + case 3: + break; + } + + /* For the CPTR registers we don't need to guard with an ARM_FEATURE + * check because zero bits in the registers mean "don't trap". + */ + + /* CPTR_EL2 : present in v7VE or v8 */ + if (cur_el <= 2 && extract32(env->cp15.cptr_el[2], 10, 1) + && !arm_is_secure_below_el3(env)) { + /* Trap FP ops at EL2, NS-EL1 or NS-EL0 to EL2 */ + return 2; + } + + /* CPTR_EL3 : present in v8 */ + if (extract32(env->cp15.cptr_el[3], 10, 1)) { + /* Trap all FP ops to EL3 */ + return 3; + } + + return 0; +} + +static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, + target_ulong *cs_base, int *flags) +{ + if (is_a64(env)) { + *pc = env->pc; + *flags = ARM_TBFLAG_AARCH64_STATE_MASK; + } else { + *pc = env->regs[15]; + *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT) + | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT) + | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT) + | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT) + | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT); + if (!(access_secure_reg(env))) { + *flags |= ARM_TBFLAG_NS_MASK; + } + if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30) + || arm_el_is_aa64(env, 1)) { + *flags |= ARM_TBFLAG_VFPEN_MASK; + } + *flags |= (extract32(env->cp15.c15_cpar, 0, 2) + << ARM_TBFLAG_XSCALE_CPAR_SHIFT); + } + + *flags |= (cpu_mmu_index(env) << ARM_TBFLAG_MMUIDX_SHIFT); + /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine + * states defined in the ARM ARM for software singlestep: + * SS_ACTIVE PSTATE.SS State + * 0 x Inactive (the TB flag for SS is always 0) + * 1 0 Active-pending + * 1 1 Active-not-pending + */ + if (arm_singlestep_active(env)) { + *flags |= ARM_TBFLAG_SS_ACTIVE_MASK; + if (is_a64(env)) { + if (env->pstate & PSTATE_SS) { + *flags |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } else { + if (env->uncached_cpsr & PSTATE_SS) { + *flags |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } + } + *flags |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT; + + *cs_base = 0; +} + +#include "exec/exec-all.h" + +enum { + QEMU_PSCI_CONDUIT_DISABLED = 0, + QEMU_PSCI_CONDUIT_SMC = 1, + QEMU_PSCI_CONDUIT_HVC = 2, +}; + +#endif diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c new file mode 100644 index 00000000..63c8b1cf --- /dev/null +++ b/target-arm/cpu64.c @@ -0,0 +1,342 @@ +/* + * QEMU AArch64 CPU + * + * Copyright (c) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + */ + +#include "cpu.h" +#include "qemu-common.h" +#if !defined(CONFIG_USER_ONLY) +#include "hw/loader.h" +#endif +#include "hw/arm/arm.h" +#include "sysemu/sysemu.h" +#include "sysemu/kvm.h" + +static inline void set_feature(CPUARMState *env, int feature) +{ + env->features |= 1ULL << feature; +} + +static inline void unset_feature(CPUARMState *env, int feature) +{ + env->features &= ~(1ULL << feature); +} + +#ifndef CONFIG_USER_ONLY +static uint64_t a57_a53_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* Number of processors is in [25:24]; otherwise we RAZ */ + return (smp_cpus - 1) << 24; +} +#endif + +static const ARMCPRegInfo cortex_a57_a53_cp_reginfo[] = { +#ifndef CONFIG_USER_ONLY + { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2, + .access = PL1_RW, .readfn = a57_a53_l2ctlr_read, + .writefn = arm_cp_write_ignore }, + { .name = "L2CTLR", + .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 2, + .access = PL1_RW, .readfn = a57_a53_l2ctlr_read, + .writefn = arm_cp_write_ignore }, +#endif + { .name = "L2ECTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "L2ECTLR", + .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "L2ACTLR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUACTLR", + .cp = 15, .opc1 = 0, .crm = 15, + .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, + { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUECTLR", + .cp = 15, .opc1 = 1, .crm = 15, + .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, + { .name = "CPUMERRSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUMERRSR", + .cp = 15, .opc1 = 2, .crm = 15, + .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, + { .name = "L2MERRSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "L2MERRSR", + .cp = 15, .opc1 = 3, .crm = 15, + .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +static void aarch64_a57_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a57"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_VFP4); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); + set_feature(&cpu->env, ARM_FEATURE_CRC); + cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57; + cpu->midr = 0x411fd070; + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034070; + cpu->mvfr0 = 0x10110222; + cpu->mvfr1 = 0x12111111; + cpu->mvfr2 = 0x00000043; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; + cpu->id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10101105; + cpu->id_mmfr1 = 0x40000000; + cpu->id_mmfr2 = 0x01260000; + cpu->id_mmfr3 = 0x02102211; + cpu->id_isar0 = 0x02101110; + cpu->id_isar1 = 0x13112111; + cpu->id_isar2 = 0x21232042; + cpu->id_isar3 = 0x01112131; + cpu->id_isar4 = 0x00011142; + cpu->id_isar5 = 0x00011121; + cpu->id_aa64pfr0 = 0x00002222; + cpu->id_aa64dfr0 = 0x10305106; + cpu->id_aa64isar0 = 0x00011120; + cpu->id_aa64mmfr0 = 0x00001124; + cpu->dbgdidr = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ + cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */ + cpu->dcz_blocksize = 4; /* 64 bytes */ + define_arm_cp_regs(cpu, cortex_a57_a53_cp_reginfo); +} + +static void aarch64_a53_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a53"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_VFP4); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); + set_feature(&cpu->env, ARM_FEATURE_CRC); + cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A53; + cpu->midr = 0x410fd034; + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034070; + cpu->mvfr0 = 0x10110222; + cpu->mvfr1 = 0x12111111; + cpu->mvfr2 = 0x00000043; + cpu->ctr = 0x84448004; /* L1Ip = VIPT */ + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; + cpu->id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10101105; + cpu->id_mmfr1 = 0x40000000; + cpu->id_mmfr2 = 0x01260000; + cpu->id_mmfr3 = 0x02102211; + cpu->id_isar0 = 0x02101110; + cpu->id_isar1 = 0x13112111; + cpu->id_isar2 = 0x21232042; + cpu->id_isar3 = 0x01112131; + cpu->id_isar4 = 0x00011142; + cpu->id_isar5 = 0x00011121; + cpu->id_aa64pfr0 = 0x00002222; + cpu->id_aa64dfr0 = 0x10305106; + cpu->id_aa64isar0 = 0x00011120; + cpu->id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ + cpu->dbgdidr = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ + cpu->ccsidr[2] = 0x707fe07a; /* 1024KB L2 cache */ + cpu->dcz_blocksize = 4; /* 64 bytes */ + define_arm_cp_regs(cpu, cortex_a57_a53_cp_reginfo); +} + +#ifdef CONFIG_USER_ONLY +static void aarch64_any_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_VFP4); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); + set_feature(&cpu->env, ARM_FEATURE_CRC); + cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */ + cpu->dcz_blocksize = 7; /* 512 bytes */ +} +#endif + +typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); +} ARMCPUInfo; + +static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, +#ifdef CONFIG_USER_ONLY + { .name = "any", .initfn = aarch64_any_initfn }, +#endif + { .name = NULL } +}; + +static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + + return arm_feature(&cpu->env, ARM_FEATURE_AARCH64); +} + +static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + + /* At this time, this property is only allowed if KVM is enabled. This + * restriction allows us to avoid fixing up functionality that assumes a + * uniform execution state like do_interrupt. + */ + if (!kvm_enabled()) { + error_setg(errp, "'aarch64' feature cannot be disabled " + "unless KVM is enabled"); + return; + } + + if (value == false) { + unset_feature(&cpu->env, ARM_FEATURE_AARCH64); + } else { + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + } +} + +static void aarch64_cpu_initfn(Object *obj) +{ + object_property_add_bool(obj, "aarch64", aarch64_cpu_get_aarch64, + aarch64_cpu_set_aarch64, NULL); + object_property_set_description(obj, "aarch64", + "Set on/off to enable/disable aarch64 " + "execution state ", + NULL); +} + +static void aarch64_cpu_finalizefn(Object *obj) +{ +} + +static void aarch64_cpu_set_pc(CPUState *cs, vaddr value) +{ + ARMCPU *cpu = ARM_CPU(cs); + /* It's OK to look at env for the current mode here, because it's + * never possible for an AArch64 TB to chain to an AArch32 TB. + * (Otherwise we would need to use synchronize_from_tb instead.) + */ + if (is_a64(&cpu->env)) { + cpu->env.pc = value; + } else { + cpu->env.regs[15] = value; + } +} + +static void aarch64_cpu_class_init(ObjectClass *oc, void *data) +{ + CPUClass *cc = CPU_CLASS(oc); + +#if !defined(CONFIG_USER_ONLY) + cc->do_interrupt = aarch64_cpu_do_interrupt; +#endif + cc->cpu_exec_interrupt = arm_cpu_exec_interrupt; + cc->set_pc = aarch64_cpu_set_pc; + cc->gdb_read_register = aarch64_cpu_gdb_read_register; + cc->gdb_write_register = aarch64_cpu_gdb_write_register; + cc->gdb_num_core_regs = 34; + cc->gdb_core_xml_file = "aarch64-core.xml"; +} + +static void aarch64_cpu_register(const ARMCPUInfo *info) +{ + TypeInfo type_info = { + .parent = TYPE_AARCH64_CPU, + .instance_size = sizeof(ARMCPU), + .instance_init = info->initfn, + .class_size = sizeof(ARMCPUClass), + .class_init = info->class_init, + }; + + type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name); + type_register(&type_info); + g_free((void *)type_info.name); +} + +static const TypeInfo aarch64_cpu_type_info = { + .name = TYPE_AARCH64_CPU, + .parent = TYPE_ARM_CPU, + .instance_size = sizeof(ARMCPU), + .instance_init = aarch64_cpu_initfn, + .instance_finalize = aarch64_cpu_finalizefn, + .abstract = true, + .class_size = sizeof(AArch64CPUClass), + .class_init = aarch64_cpu_class_init, +}; + +static void aarch64_cpu_register_types(void) +{ + const ARMCPUInfo *info = aarch64_cpus; + + type_register_static(&aarch64_cpu_type_info); + + while (info->name) { + aarch64_cpu_register(info); + info++; + } +} + +type_init(aarch64_cpu_register_types) diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c new file mode 100644 index 00000000..5d228380 --- /dev/null +++ b/target-arm/crypto_helper.c @@ -0,0 +1,465 @@ +/* + * crypto_helper.c - emulate v8 Crypto Extensions instructions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + */ + +#include + +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "crypto/aes.h" + +union CRYPTO_STATE { + uint8_t bytes[16]; + uint32_t words[4]; + uint64_t l[2]; +}; + +#ifdef HOST_WORDS_BIGENDIAN +#define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8]) +#define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2]) +#else +#define CR_ST_BYTE(state, i) (state.bytes[i]) +#define CR_ST_WORD(state, i) (state.words[i]) +#endif + +void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm, + uint32_t decrypt) +{ + static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; + static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; + + union CRYPTO_STATE rk = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + union CRYPTO_STATE st = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + int i; + + assert(decrypt < 2); + + /* xor state vector with round key */ + rk.l[0] ^= st.l[0]; + rk.l[1] ^= st.l[1]; + + /* combine ShiftRows operation and sbox substitution */ + for (i = 0; i < 16; i++) { + CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])]; + } + + env->vfp.regs[rd] = make_float64(st.l[0]); + env->vfp.regs[rd + 1] = make_float64(st.l[1]); +} + +void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, + uint32_t decrypt) +{ + static uint32_t const mc[][256] = { { + /* MixColumns lookup table */ + 0x00000000, 0x03010102, 0x06020204, 0x05030306, + 0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e, + 0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16, + 0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e, + 0x30101020, 0x33111122, 0x36121224, 0x35131326, + 0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e, + 0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36, + 0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e, + 0x60202040, 0x63212142, 0x66222244, 0x65232346, + 0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e, + 0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56, + 0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e, + 0x50303060, 0x53313162, 0x56323264, 0x55333366, + 0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e, + 0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76, + 0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e, + 0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386, + 0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e, + 0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96, + 0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e, + 0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6, + 0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae, + 0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6, + 0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe, + 0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6, + 0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce, + 0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6, + 0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde, + 0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6, + 0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee, + 0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6, + 0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe, + 0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d, + 0x97848413, 0x94858511, 0x91868617, 0x92878715, + 0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d, + 0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05, + 0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d, + 0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735, + 0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d, + 0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25, + 0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d, + 0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755, + 0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d, + 0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45, + 0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d, + 0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775, + 0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d, + 0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65, + 0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d, + 0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795, + 0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d, + 0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85, + 0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd, + 0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5, + 0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad, + 0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5, + 0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd, + 0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5, + 0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd, + 0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5, + 0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd, + 0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5, + 0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed, + 0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5, + }, { + /* Inverse MixColumns lookup table */ + 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, + 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a, + 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362, + 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, + 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2, + 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca, + 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, + 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba, + 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9, + 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, + 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9, + 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81, + 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, + 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411, + 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859, + 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, + 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf, + 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987, + 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, + 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7, + 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f, + 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, + 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f, + 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117, + 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, + 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c, + 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14, + 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, + 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684, + 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc, + 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, + 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc, + 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753, + 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, + 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23, + 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b, + 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, + 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b, + 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3, + 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, + 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88, + 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0, + 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, + 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0, + 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68, + 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, + 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418, + 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020, + 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, + 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6, + 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e, + 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, + 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e, + 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526, + 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, + 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56, + 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25, + 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, + 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255, + 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d, + 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, + 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd, + 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, + 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, + } }; + union CRYPTO_STATE st = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + int i; + + assert(decrypt < 2); + + for (i = 0; i < 16; i += 4) { + CR_ST_WORD(st, i >> 2) = + mc[decrypt][CR_ST_BYTE(st, i)] ^ + rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^ + rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^ + rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24); + } + + env->vfp.regs[rd] = make_float64(st.l[0]); + env->vfp.regs[rd + 1] = make_float64(st.l[1]); +} + +/* + * SHA-1 logical functions + */ + +static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & (y ^ z)) ^ z; +} + +static uint32_t par(uint32_t x, uint32_t y, uint32_t z) +{ + return x ^ y ^ z; +} + +static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & y) | ((x | y) & z); +} + +void HELPER(crypto_sha1_3reg)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm, uint32_t op) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + if (op == 3) { /* sha1su0 */ + d.l[0] ^= d.l[1] ^ m.l[0]; + d.l[1] ^= n.l[0] ^ m.l[1]; + } else { + int i; + + for (i = 0; i < 4; i++) { + uint32_t t; + + switch (op) { + case 0: /* sha1c */ + t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); + break; + case 1: /* sha1p */ + t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); + break; + case 2: /* sha1m */ + t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3)); + break; + default: + g_assert_not_reached(); + } + t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0) + + CR_ST_WORD(m, i); + + CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3); + CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); + CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2); + CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); + CR_ST_WORD(d, 0) = t; + } + } + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha1h)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2); + CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0; + + env->vfp.regs[rd] = make_float64(m.l[0]); + env->vfp.regs[rd + 1] = make_float64(m.l[1]); +} + +void HELPER(crypto_sha1su1)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1); + CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1); + CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1); + CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1); + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +/* + * The SHA-256 logical functions, according to + * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf + */ + +static uint32_t S0(uint32_t x) +{ + return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); +} + +static uint32_t S1(uint32_t x) +{ + return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); +} + +static uint32_t s0(uint32_t x) +{ + return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); +} + +static uint32_t s1(uint32_t x) +{ + return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); +} + +void HELPER(crypto_sha256h)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + int i; + + for (i = 0; i < 4; i++) { + uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2)) + + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0)) + + CR_ST_WORD(m, i); + + CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2); + CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1); + CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0); + CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t; + + t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) + + S0(CR_ST_WORD(d, 0)); + + CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); + CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); + CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); + CR_ST_WORD(d, 0) = t; + } + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha256h2)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + int i; + + for (i = 0; i < 4; i++) { + uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2)) + + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0)) + + CR_ST_WORD(m, i); + + CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2); + CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1); + CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0); + CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t; + } + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha256su0)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1)); + CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2)); + CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3)); + CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0)); + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1); + CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2); + CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3); + CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0); + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} diff --git a/target-arm/gdbstub.c b/target-arm/gdbstub.c new file mode 100644 index 00000000..1c343965 --- /dev/null +++ b/target-arm/gdbstub.c @@ -0,0 +1,102 @@ +/* + * ARM gdb server stub + * + * Copyright (c) 2003-2005 Fabrice Bellard + * Copyright (c) 2013 SUSE LINUX Products GmbH + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#include "config.h" +#include "qemu-common.h" +#include "exec/gdbstub.h" + +/* Old gdb always expect FPA registers. Newer (xml-aware) gdb only expect + whatever the target description contains. Due to a historical mishap + the FPA registers appear in between core integer regs and the CPSR. + We hack round this by giving the FPA regs zero size when talking to a + newer gdb. */ + +int arm_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + if (n < 16) { + /* Core integer register. */ + return gdb_get_reg32(mem_buf, env->regs[n]); + } + if (n < 24) { + /* FPA registers. */ + if (gdb_has_xml) { + return 0; + } + memset(mem_buf, 0, 12); + return 12; + } + switch (n) { + case 24: + /* FPA status register. */ + if (gdb_has_xml) { + return 0; + } + return gdb_get_reg32(mem_buf, 0); + case 25: + /* CPSR */ + return gdb_get_reg32(mem_buf, cpsr_read(env)); + } + /* Unknown register. */ + return 0; +} + +int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint32_t tmp; + + tmp = ldl_p(mem_buf); + + /* Mask out low bit of PC to workaround gdb bugs. This will probably + cause problems if we ever implement the Jazelle DBX extensions. */ + if (n == 15) { + tmp &= ~1; + } + + if (n < 16) { + /* Core integer register. */ + env->regs[n] = tmp; + return 4; + } + if (n < 24) { /* 16-23 */ + /* FPA registers (ignored). */ + if (gdb_has_xml) { + return 0; + } + return 12; + } + switch (n) { + case 24: + /* FPA status register (ignored). */ + if (gdb_has_xml) { + return 0; + } + return 4; + case 25: + /* CPSR */ + cpsr_write(env, tmp, 0xffffffff); + return 4; + } + /* Unknown register. */ + return 0; +} diff --git a/target-arm/gdbstub64.c b/target-arm/gdbstub64.c new file mode 100644 index 00000000..8f3b8d17 --- /dev/null +++ b/target-arm/gdbstub64.c @@ -0,0 +1,71 @@ +/* + * ARM gdb server stub: AArch64 specific functions. + * + * Copyright (c) 2013 SUSE LINUX Products GmbH + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#include "config.h" +#include "qemu-common.h" +#include "exec/gdbstub.h" + +int aarch64_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + if (n < 31) { + /* Core integer register. */ + return gdb_get_reg64(mem_buf, env->xregs[n]); + } + switch (n) { + case 31: + return gdb_get_reg64(mem_buf, env->xregs[31]); + case 32: + return gdb_get_reg64(mem_buf, env->pc); + case 33: + return gdb_get_reg32(mem_buf, pstate_read(env)); + } + /* Unknown register. */ + return 0; +} + +int aarch64_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint64_t tmp; + + tmp = ldq_p(mem_buf); + + if (n < 31) { + /* Core integer register. */ + env->xregs[n] = tmp; + return 8; + } + switch (n) { + case 31: + env->xregs[31] = tmp; + return 8; + case 32: + env->pc = tmp; + return 8; + case 33: + /* CPSR */ + pstate_write(env, tmp); + return 4; + } + /* Unknown register. */ + return 0; +} diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c new file mode 100644 index 00000000..08c95a3f --- /dev/null +++ b/target-arm/helper-a64.c @@ -0,0 +1,546 @@ +/* + * AArch64 specific helpers + * + * Copyright (c) 2013 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "cpu.h" +#include "exec/gdbstub.h" +#include "exec/helper-proto.h" +#include "qemu/host-utils.h" +#include "sysemu/sysemu.h" +#include "qemu/bitops.h" +#include "internals.h" +#include "qemu/crc32c.h" +#include /* For crc32 */ + +/* C2.4.7 Multiply and divide */ +/* special cases for 0 and LLONG_MIN are mandated by the standard */ +uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) +{ + if (den == 0) { + return 0; + } + return num / den; +} + +int64_t HELPER(sdiv64)(int64_t num, int64_t den) +{ + if (den == 0) { + return 0; + } + if (num == LLONG_MIN && den == -1) { + return LLONG_MIN; + } + return num / den; +} + +uint64_t HELPER(clz64)(uint64_t x) +{ + return clz64(x); +} + +uint64_t HELPER(cls64)(uint64_t x) +{ + return clrsb64(x); +} + +uint32_t HELPER(cls32)(uint32_t x) +{ + return clrsb32(x); +} + +uint32_t HELPER(clz32)(uint32_t x) +{ + return clz32(x); +} + +uint64_t HELPER(rbit64)(uint64_t x) +{ + /* assign the correct byte position */ + x = bswap64(x); + + /* assign the correct nibble position */ + x = ((x & 0xf0f0f0f0f0f0f0f0ULL) >> 4) + | ((x & 0x0f0f0f0f0f0f0f0fULL) << 4); + + /* assign the correct bit position */ + x = ((x & 0x8888888888888888ULL) >> 3) + | ((x & 0x4444444444444444ULL) >> 1) + | ((x & 0x2222222222222222ULL) << 1) + | ((x & 0x1111111111111111ULL) << 3); + + return x; +} + +/* Convert a softfloat float_relation_ (as returned by + * the float*_compare functions) to the correct ARM + * NZCV flag state. + */ +static inline uint32_t float_rel_to_flags(int res) +{ + uint64_t flags; + switch (res) { + case float_relation_equal: + flags = PSTATE_Z | PSTATE_C; + break; + case float_relation_less: + flags = PSTATE_N; + break; + case float_relation_greater: + flags = PSTATE_C; + break; + case float_relation_unordered: + default: + flags = PSTATE_C | PSTATE_V; + break; + } + return flags; +} + +uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status) +{ + return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); +} + +uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status) +{ + return float_rel_to_flags(float32_compare(x, y, fp_status)); +} + +uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status) +{ + return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); +} + +uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status) +{ + return float_rel_to_flags(float64_compare(x, y, fp_status)); +} + +float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float32_squash_input_denormal(a, fpst); + b = float32_squash_input_denormal(b, fpst); + + if ((float32_is_zero(a) && float32_is_infinity(b)) || + (float32_is_infinity(a) && float32_is_zero(b))) { + /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ + return make_float32((1U << 30) | + ((float32_val(a) ^ float32_val(b)) & (1U << 31))); + } + return float32_mul(a, b, fpst); +} + +float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float64_squash_input_denormal(a, fpst); + b = float64_squash_input_denormal(b, fpst); + + if ((float64_is_zero(a) && float64_is_infinity(b)) || + (float64_is_infinity(a) && float64_is_zero(b))) { + /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ + return make_float64((1ULL << 62) | + ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); + } + return float64_mul(a, b, fpst); +} + +uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, + uint32_t rn, uint32_t numregs) +{ + /* Helper function for SIMD TBL and TBX. We have to do the table + * lookup part for the 64 bits worth of indices we're passed in. + * result is the initial results vector (either zeroes for TBL + * or some guest values for TBX), rn the register number where + * the table starts, and numregs the number of registers in the table. + * We return the results of the lookups. + */ + int shift; + + for (shift = 0; shift < 64; shift += 8) { + int index = extract64(indices, shift, 8); + if (index < 16 * numregs) { + /* Convert index (a byte offset into the virtual table + * which is a series of 128-bit vectors concatenated) + * into the correct vfp.regs[] element plus a bit offset + * into that element, bearing in mind that the table + * can wrap around from V31 to V0. + */ + int elt = (rn * 2 + (index >> 3)) % 64; + int bitidx = (index & 7) * 8; + uint64_t val = extract64(env->vfp.regs[elt], bitidx, 8); + + result = deposit64(result, shift, 8, val); + } + } + return result; +} + +/* 64bit/double versions of the neon float compare functions */ +uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float64_eq_quiet(a, b, fpst); +} + +uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float64_le(b, a, fpst); +} + +uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float64_lt(b, a, fpst); +} + +/* Reciprocal step and sqrt step. Note that unlike the A32/T32 + * versions, these do a fully fused multiply-add or + * multiply-add-and-halve. + */ +#define float32_two make_float32(0x40000000) +#define float32_three make_float32(0x40400000) +#define float32_one_point_five make_float32(0x3fc00000) + +#define float64_two make_float64(0x4000000000000000ULL) +#define float64_three make_float64(0x4008000000000000ULL) +#define float64_one_point_five make_float64(0x3FF8000000000000ULL) + +float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float32_squash_input_denormal(a, fpst); + b = float32_squash_input_denormal(b, fpst); + + a = float32_chs(a); + if ((float32_is_infinity(a) && float32_is_zero(b)) || + (float32_is_infinity(b) && float32_is_zero(a))) { + return float32_two; + } + return float32_muladd(a, b, float32_two, 0, fpst); +} + +float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float64_squash_input_denormal(a, fpst); + b = float64_squash_input_denormal(b, fpst); + + a = float64_chs(a); + if ((float64_is_infinity(a) && float64_is_zero(b)) || + (float64_is_infinity(b) && float64_is_zero(a))) { + return float64_two; + } + return float64_muladd(a, b, float64_two, 0, fpst); +} + +float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float32_squash_input_denormal(a, fpst); + b = float32_squash_input_denormal(b, fpst); + + a = float32_chs(a); + if ((float32_is_infinity(a) && float32_is_zero(b)) || + (float32_is_infinity(b) && float32_is_zero(a))) { + return float32_one_point_five; + } + return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); +} + +float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float64_squash_input_denormal(a, fpst); + b = float64_squash_input_denormal(b, fpst); + + a = float64_chs(a); + if ((float64_is_infinity(a) && float64_is_zero(b)) || + (float64_is_infinity(b) && float64_is_zero(a))) { + return float64_one_point_five; + } + return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); +} + +/* Pairwise long add: add pairs of adjacent elements into + * double-width elements in the result (eg _s8 is an 8x8->16 op) + */ +uint64_t HELPER(neon_addlp_s8)(uint64_t a) +{ + uint64_t nsignmask = 0x0080008000800080ULL; + uint64_t wsignmask = 0x8000800080008000ULL; + uint64_t elementmask = 0x00ff00ff00ff00ffULL; + uint64_t tmp1, tmp2; + uint64_t res, signres; + + /* Extract odd elements, sign extend each to a 16 bit field */ + tmp1 = a & elementmask; + tmp1 ^= nsignmask; + tmp1 |= wsignmask; + tmp1 = (tmp1 - nsignmask) ^ wsignmask; + /* Ditto for the even elements */ + tmp2 = (a >> 8) & elementmask; + tmp2 ^= nsignmask; + tmp2 |= wsignmask; + tmp2 = (tmp2 - nsignmask) ^ wsignmask; + + /* calculate the result by summing bits 0..14, 16..22, etc, + * and then adjusting the sign bits 15, 23, etc manually. + * This ensures the addition can't overflow the 16 bit field. + */ + signres = (tmp1 ^ tmp2) & wsignmask; + res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); + res ^= signres; + + return res; +} + +uint64_t HELPER(neon_addlp_u8)(uint64_t a) +{ + uint64_t tmp; + + tmp = a & 0x00ff00ff00ff00ffULL; + tmp += (a >> 8) & 0x00ff00ff00ff00ffULL; + return tmp; +} + +uint64_t HELPER(neon_addlp_s16)(uint64_t a) +{ + int32_t reslo, reshi; + + reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); + reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); + + return (uint32_t)reslo | (((uint64_t)reshi) << 32); +} + +uint64_t HELPER(neon_addlp_u16)(uint64_t a) +{ + uint64_t tmp; + + tmp = a & 0x0000ffff0000ffffULL; + tmp += (a >> 16) & 0x0000ffff0000ffffULL; + return tmp; +} + +/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ +float32 HELPER(frecpx_f32)(float32 a, void *fpstp) +{ + float_status *fpst = fpstp; + uint32_t val32, sbit; + int32_t exp; + + if (float32_is_any_nan(a)) { + float32 nan = a; + if (float32_is_signaling_nan(a)) { + float_raise(float_flag_invalid, fpst); + nan = float32_maybe_silence_nan(a); + } + if (fpst->default_nan_mode) { + nan = float32_default_nan; + } + return nan; + } + + val32 = float32_val(a); + sbit = 0x80000000ULL & val32; + exp = extract32(val32, 23, 8); + + if (exp == 0) { + return make_float32(sbit | (0xfe << 23)); + } else { + return make_float32(sbit | (~exp & 0xff) << 23); + } +} + +float64 HELPER(frecpx_f64)(float64 a, void *fpstp) +{ + float_status *fpst = fpstp; + uint64_t val64, sbit; + int64_t exp; + + if (float64_is_any_nan(a)) { + float64 nan = a; + if (float64_is_signaling_nan(a)) { + float_raise(float_flag_invalid, fpst); + nan = float64_maybe_silence_nan(a); + } + if (fpst->default_nan_mode) { + nan = float64_default_nan; + } + return nan; + } + + val64 = float64_val(a); + sbit = 0x8000000000000000ULL & val64; + exp = extract64(float64_val(a), 52, 11); + + if (exp == 0) { + return make_float64(sbit | (0x7feULL << 52)); + } else { + return make_float64(sbit | (~exp & 0x7ffULL) << 52); + } +} + +float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) +{ + /* Von Neumann rounding is implemented by using round-to-zero + * and then setting the LSB of the result if Inexact was raised. + */ + float32 r; + float_status *fpst = &env->vfp.fp_status; + float_status tstat = *fpst; + int exflags; + + set_float_rounding_mode(float_round_to_zero, &tstat); + set_float_exception_flags(0, &tstat); + r = float64_to_float32(a, &tstat); + r = float32_maybe_silence_nan(r); + exflags = get_float_exception_flags(&tstat); + if (exflags & float_flag_inexact) { + r = make_float32(float32_val(r) | 1); + } + exflags |= get_float_exception_flags(fpst); + set_float_exception_flags(exflags, fpst); + return r; +} + +/* 64-bit versions of the CRC helpers. Note that although the operation + * (and the prototypes of crc32c() and crc32() mean that only the bottom + * 32 bits of the accumulator and result are used, we pass and return + * uint64_t for convenience of the generated code. Unlike the 32-bit + * instruction set versions, val may genuinely have 64 bits of data in it. + * The upper bytes of val (above the number specified by 'bytes') must have + * been zeroed out by the caller. + */ +uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) +{ + uint8_t buf[8]; + + stq_le_p(buf, val); + + /* zlib crc32 converts the accumulator and output to one's complement. */ + return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; +} + +uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) +{ + uint8_t buf[8]; + + stq_le_p(buf, val); + + /* Linux crc32c converts the output to one's complement. */ + return crc32c(acc, buf, bytes) ^ 0xffffffff; +} + +#if !defined(CONFIG_USER_ONLY) + +/* Handle a CPU exception. */ +void aarch64_cpu_do_interrupt(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + unsigned int new_el = env->exception.target_el; + target_ulong addr = env->cp15.vbar_el[new_el]; + unsigned int new_mode = aarch64_pstate_mode(new_el, true); + + if (arm_current_el(env) < new_el) { + if (env->aarch64) { + addr += 0x400; + } else { + addr += 0x600; + } + } else if (pstate_read(env) & PSTATE_SP) { + addr += 0x200; + } + + arm_log_exception(cs->exception_index); + qemu_log_mask(CPU_LOG_INT, "...from EL%d\n", arm_current_el(env)); + if (qemu_loglevel_mask(CPU_LOG_INT) + && !excp_is_internal(cs->exception_index)) { + qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%" PRIx32 "\n", + env->exception.syndrome); + } + + if (arm_is_psci_call(cpu, cs->exception_index)) { + arm_handle_psci_call(cpu); + qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n"); + return; + } + + switch (cs->exception_index) { + case EXCP_PREFETCH_ABORT: + case EXCP_DATA_ABORT: + env->cp15.far_el[new_el] = env->exception.vaddress; + qemu_log_mask(CPU_LOG_INT, "...with FAR 0x%" PRIx64 "\n", + env->cp15.far_el[new_el]); + /* fall through */ + case EXCP_BKPT: + case EXCP_UDEF: + case EXCP_SWI: + case EXCP_HVC: + case EXCP_HYP_TRAP: + case EXCP_SMC: + env->cp15.esr_el[new_el] = env->exception.syndrome; + break; + case EXCP_IRQ: + case EXCP_VIRQ: + addr += 0x80; + break; + case EXCP_FIQ: + case EXCP_VFIQ: + addr += 0x100; + break; + default: + cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index); + } + + if (is_a64(env)) { + env->banked_spsr[aarch64_banked_spsr_index(new_el)] = pstate_read(env); + aarch64_save_sp(env, arm_current_el(env)); + env->elr_el[new_el] = env->pc; + } else { + env->banked_spsr[aarch64_banked_spsr_index(new_el)] = cpsr_read(env); + if (!env->thumb) { + env->cp15.esr_el[new_el] |= 1 << 25; + } + env->elr_el[new_el] = env->regs[15]; + + aarch64_sync_32_to_64(env); + + env->condexec_bits = 0; + } + qemu_log_mask(CPU_LOG_INT, "...with ELR 0x%" PRIx64 "\n", + env->elr_el[new_el]); + + pstate_write(env, PSTATE_DAIF | new_mode); + env->aarch64 = 1; + aarch64_restore_sp(env, new_el); + + env->pc = addr; + cs->interrupt_request |= CPU_INTERRUPT_EXITTB; +} +#endif diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h new file mode 100644 index 00000000..1d3d10ff --- /dev/null +++ b/target-arm/helper-a64.h @@ -0,0 +1,48 @@ +/* + * AArch64 specific helper definitions + * + * Copyright (c) 2013 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +DEF_HELPER_FLAGS_2(udiv64, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_1(clz64, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(cls64, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(cls32, TCG_CALL_NO_RWG_SE, i32, i32) +DEF_HELPER_FLAGS_1(clz32, TCG_CALL_NO_RWG_SE, i32, i32) +DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr) +DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) +DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) +DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr) +DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32) +DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr) +DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr) +DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) +DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) +DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) +DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr) +DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr) +DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr) +DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr) +DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr) +DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) +DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) +DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) diff --git a/target-arm/helper.c b/target-arm/helper.c new file mode 100644 index 00000000..01f0d0da --- /dev/null +++ b/target-arm/helper.c @@ -0,0 +1,7642 @@ +#include "cpu.h" +#include "internals.h" +#include "exec/gdbstub.h" +#include "exec/helper-proto.h" +#include "qemu/host-utils.h" +#include "sysemu/arch_init.h" +#include "sysemu/sysemu.h" +#include "qemu/bitops.h" +#include "qemu/crc32c.h" +#include "exec/cpu_ldst.h" +#include "arm_ldst.h" +#include /* For crc32 */ +#include "exec/semihost.h" + +#ifndef CONFIG_USER_ONLY +static inline bool get_phys_addr(CPUARMState *env, target_ulong address, + int access_type, ARMMMUIdx mmu_idx, + hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, + target_ulong *page_size, uint32_t *fsr); + +/* Definitions for the PMCCNTR and PMCR registers */ +#define PMCRD 0x8 +#define PMCRC 0x4 +#define PMCRE 0x1 +#endif + +static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) +{ + int nregs; + + /* VFP data registers are always little-endian. */ + nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16; + if (reg < nregs) { + stfq_le_p(buf, env->vfp.regs[reg]); + return 8; + } + if (arm_feature(env, ARM_FEATURE_NEON)) { + /* Aliases for Q regs. */ + nregs += 16; + if (reg < nregs) { + stfq_le_p(buf, env->vfp.regs[(reg - 32) * 2]); + stfq_le_p(buf + 8, env->vfp.regs[(reg - 32) * 2 + 1]); + return 16; + } + } + switch (reg - nregs) { + case 0: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSID]); return 4; + case 1: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSCR]); return 4; + case 2: stl_p(buf, env->vfp.xregs[ARM_VFP_FPEXC]); return 4; + } + return 0; +} + +static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) +{ + int nregs; + + nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16; + if (reg < nregs) { + env->vfp.regs[reg] = ldfq_le_p(buf); + return 8; + } + if (arm_feature(env, ARM_FEATURE_NEON)) { + nregs += 16; + if (reg < nregs) { + env->vfp.regs[(reg - 32) * 2] = ldfq_le_p(buf); + env->vfp.regs[(reg - 32) * 2 + 1] = ldfq_le_p(buf + 8); + return 16; + } + } + switch (reg - nregs) { + case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4; + case 1: env->vfp.xregs[ARM_VFP_FPSCR] = ldl_p(buf); return 4; + case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4; + } + return 0; +} + +static int aarch64_fpu_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) +{ + switch (reg) { + case 0 ... 31: + /* 128 bit FP register */ + stfq_le_p(buf, env->vfp.regs[reg * 2]); + stfq_le_p(buf + 8, env->vfp.regs[reg * 2 + 1]); + return 16; + case 32: + /* FPSR */ + stl_p(buf, vfp_get_fpsr(env)); + return 4; + case 33: + /* FPCR */ + stl_p(buf, vfp_get_fpcr(env)); + return 4; + default: + return 0; + } +} + +static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) +{ + switch (reg) { + case 0 ... 31: + /* 128 bit FP register */ + env->vfp.regs[reg * 2] = ldfq_le_p(buf); + env->vfp.regs[reg * 2 + 1] = ldfq_le_p(buf + 8); + return 16; + case 32: + /* FPSR */ + vfp_set_fpsr(env, ldl_p(buf)); + return 4; + case 33: + /* FPCR */ + vfp_set_fpcr(env, ldl_p(buf)); + return 4; + default: + return 0; + } +} + +static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + assert(ri->fieldoffset); + if (cpreg_field_is_64bit(ri)) { + return CPREG_FIELD64(env, ri); + } else { + return CPREG_FIELD32(env, ri); + } +} + +static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + assert(ri->fieldoffset); + if (cpreg_field_is_64bit(ri)) { + CPREG_FIELD64(env, ri) = value; + } else { + CPREG_FIELD32(env, ri) = value; + } +} + +static void *raw_ptr(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return (char *)env + ri->fieldoffset; +} + +static uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* Raw read of a coprocessor register (as needed for migration, etc). */ + if (ri->type & ARM_CP_CONST) { + return ri->resetvalue; + } else if (ri->raw_readfn) { + return ri->raw_readfn(env, ri); + } else if (ri->readfn) { + return ri->readfn(env, ri); + } else { + return raw_read(env, ri); + } +} + +static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t v) +{ + /* Raw write of a coprocessor register (as needed for migration, etc). + * Note that constant registers are treated as write-ignored; the + * caller should check for success by whether a readback gives the + * value written. + */ + if (ri->type & ARM_CP_CONST) { + return; + } else if (ri->raw_writefn) { + ri->raw_writefn(env, ri, v); + } else if (ri->writefn) { + ri->writefn(env, ri, v); + } else { + raw_write(env, ri, v); + } +} + +static bool raw_accessors_invalid(const ARMCPRegInfo *ri) +{ + /* Return true if the regdef would cause an assertion if you called + * read_raw_cp_reg() or write_raw_cp_reg() on it (ie if it is a + * program bug for it not to have the NO_RAW flag). + * NB that returning false here doesn't necessarily mean that calling + * read/write_raw_cp_reg() is safe, because we can't distinguish "has + * read/write access functions which are safe for raw use" from "has + * read/write access functions which have side effects but has forgotten + * to provide raw access functions". + * The tests here line up with the conditions in read/write_raw_cp_reg() + * and assertions in raw_read()/raw_write(). + */ + if ((ri->type & ARM_CP_CONST) || + ri->fieldoffset || + ((ri->raw_writefn || ri->writefn) && (ri->raw_readfn || ri->readfn))) { + return false; + } + return true; +} + +bool write_cpustate_to_list(ARMCPU *cpu) +{ + /* Write the coprocessor state from cpu->env to the (index,value) list. */ + int i; + bool ok = true; + + for (i = 0; i < cpu->cpreg_array_len; i++) { + uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]); + const ARMCPRegInfo *ri; + + ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); + if (!ri) { + ok = false; + continue; + } + if (ri->type & ARM_CP_NO_RAW) { + continue; + } + cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri); + } + return ok; +} + +bool write_list_to_cpustate(ARMCPU *cpu) +{ + int i; + bool ok = true; + + for (i = 0; i < cpu->cpreg_array_len; i++) { + uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]); + uint64_t v = cpu->cpreg_values[i]; + const ARMCPRegInfo *ri; + + ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); + if (!ri) { + ok = false; + continue; + } + if (ri->type & ARM_CP_NO_RAW) { + continue; + } + /* Write value and confirm it reads back as written + * (to catch read-only registers and partially read-only + * registers where the incoming migration value doesn't match) + */ + write_raw_cp_reg(&cpu->env, ri, v); + if (read_raw_cp_reg(&cpu->env, ri) != v) { + ok = false; + } + } + return ok; +} + +static void add_cpreg_to_list(gpointer key, gpointer opaque) +{ + ARMCPU *cpu = opaque; + uint64_t regidx; + const ARMCPRegInfo *ri; + + regidx = *(uint32_t *)key; + ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); + + if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) { + cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx); + /* The value array need not be initialized at this point */ + cpu->cpreg_array_len++; + } +} + +static void count_cpreg(gpointer key, gpointer opaque) +{ + ARMCPU *cpu = opaque; + uint64_t regidx; + const ARMCPRegInfo *ri; + + regidx = *(uint32_t *)key; + ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); + + if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) { + cpu->cpreg_array_len++; + } +} + +static gint cpreg_key_compare(gconstpointer a, gconstpointer b) +{ + uint64_t aidx = cpreg_to_kvm_id(*(uint32_t *)a); + uint64_t bidx = cpreg_to_kvm_id(*(uint32_t *)b); + + if (aidx > bidx) { + return 1; + } + if (aidx < bidx) { + return -1; + } + return 0; +} + +void init_cpreg_list(ARMCPU *cpu) +{ + /* Initialise the cpreg_tuples[] array based on the cp_regs hash. + * Note that we require cpreg_tuples[] to be sorted by key ID. + */ + GList *keys; + int arraylen; + + keys = g_hash_table_get_keys(cpu->cp_regs); + keys = g_list_sort(keys, cpreg_key_compare); + + cpu->cpreg_array_len = 0; + + g_list_foreach(keys, count_cpreg, cpu); + + arraylen = cpu->cpreg_array_len; + cpu->cpreg_indexes = g_new(uint64_t, arraylen); + cpu->cpreg_values = g_new(uint64_t, arraylen); + cpu->cpreg_vmstate_indexes = g_new(uint64_t, arraylen); + cpu->cpreg_vmstate_values = g_new(uint64_t, arraylen); + cpu->cpreg_vmstate_array_len = cpu->cpreg_array_len; + cpu->cpreg_array_len = 0; + + g_list_foreach(keys, add_cpreg_to_list, cpu); + + assert(cpu->cpreg_array_len == arraylen); + + g_list_free(keys); +} + +static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + raw_write(env, ri, value); + tlb_flush(CPU(cpu), 1); /* Flush TLB as domain not tracked in TLB */ +} + +static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + if (raw_read(env, ri) != value) { + /* Unlike real hardware the qemu TLB uses virtual addresses, + * not modified virtual addresses, so this causes a TLB flush. + */ + tlb_flush(CPU(cpu), 1); + raw_write(env, ri, value); + } +} + +static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_MPU) + && !extended_addresses_enabled(env)) { + /* For VMSA (when not using the LPAE long descriptor page table + * format) this register includes the ASID, so do a TLB flush. + * For PMSA it is purely a process ID and no action is needed. + */ + tlb_flush(CPU(cpu), 1); + } + raw_write(env, ri, value); +} + +static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate all (TLBIALL) */ + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush(CPU(cpu), 1); +} + +static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); +} + +static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by ASID (TLBIASID) */ + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush(CPU(cpu), value == 0); +} + +static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); +} + +/* IS variants of TLB operations must affect all cores */ +static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *other_cs; + + CPU_FOREACH(other_cs) { + tlb_flush(other_cs, 1); + } +} + +static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *other_cs; + + CPU_FOREACH(other_cs) { + tlb_flush(other_cs, value == 0); + } +} + +static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *other_cs; + + CPU_FOREACH(other_cs) { + tlb_flush_page(other_cs, value & TARGET_PAGE_MASK); + } +} + +static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *other_cs; + + CPU_FOREACH(other_cs) { + tlb_flush_page(other_cs, value & TARGET_PAGE_MASK); + } +} + +static const ARMCPRegInfo cp_reginfo[] = { + /* Define the secure and non-secure FCSE identifier CP registers + * separately because there is no secure bank in V8 (no _EL3). This allows + * the secure register to be properly reset and migrated. There is also no + * v8 EL1 version of the register so the non-secure instance stands alone. + */ + { .name = "FCSEIDR(NS)", + .cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 0, + .access = PL1_RW, .secure = ARM_CP_SECSTATE_NS, + .fieldoffset = offsetof(CPUARMState, cp15.fcseidr_ns), + .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, }, + { .name = "FCSEIDR(S)", + .cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 0, + .access = PL1_RW, .secure = ARM_CP_SECSTATE_S, + .fieldoffset = offsetof(CPUARMState, cp15.fcseidr_s), + .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, }, + /* Define the secure and non-secure context identifier CP registers + * separately because there is no secure bank in V8 (no _EL3). This allows + * the secure register to be properly reset and migrated. In the + * non-secure case, the 32-bit register will have reset and migration + * disabled during registration as it is handled by the 64-bit instance. + */ + { .name = "CONTEXTIDR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1, + .access = PL1_RW, .secure = ARM_CP_SECSTATE_NS, + .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[1]), + .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, }, + { .name = "CONTEXTIDR(S)", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1, + .access = PL1_RW, .secure = ARM_CP_SECSTATE_S, + .fieldoffset = offsetof(CPUARMState, cp15.contextidr_s), + .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo not_v8_cp_reginfo[] = { + /* NB: Some of these registers exist in v8 but with more precise + * definitions that don't use CP_ANY wildcards (mostly in v8_cp_reginfo[]). + */ + /* MMU Domain access control / MPU write buffer control */ + { .name = "DACR", + .cp = 15, .opc1 = CP_ANY, .crn = 3, .crm = CP_ANY, .opc2 = CP_ANY, + .access = PL1_RW, .resetvalue = 0, + .writefn = dacr_write, .raw_writefn = raw_write, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dacr_s), + offsetoflow32(CPUARMState, cp15.dacr_ns) } }, + /* ARMv7 allocates a range of implementation defined TLB LOCKDOWN regs. + * For v6 and v5, these mappings are overly broad. + */ + { .name = "TLB_LOCKDOWN", .cp = 15, .crn = 10, .crm = 0, + .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .type = ARM_CP_NOP }, + { .name = "TLB_LOCKDOWN", .cp = 15, .crn = 10, .crm = 1, + .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .type = ARM_CP_NOP }, + { .name = "TLB_LOCKDOWN", .cp = 15, .crn = 10, .crm = 4, + .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .type = ARM_CP_NOP }, + { .name = "TLB_LOCKDOWN", .cp = 15, .crn = 10, .crm = 8, + .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .type = ARM_CP_NOP }, + /* Cache maintenance ops; some of this space may be overridden later. */ + { .name = "CACHEMAINT", .cp = 15, .crn = 7, .crm = CP_ANY, + .opc1 = 0, .opc2 = CP_ANY, .access = PL1_W, + .type = ARM_CP_NOP | ARM_CP_OVERRIDE }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo not_v6_cp_reginfo[] = { + /* Not all pre-v6 cores implemented this WFI, so this is slightly + * over-broad. + */ + { .name = "WFI_v5", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_WFI }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo not_v7_cp_reginfo[] = { + /* Standard v6 WFI (also used in some pre-v6 cores); not in v7 (which + * is UNPREDICTABLE; we choose to NOP as most implementations do). + */ + { .name = "WFI_v6", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4, + .access = PL1_W, .type = ARM_CP_WFI }, + /* L1 cache lockdown. Not architectural in v6 and earlier but in practice + * implemented in 926, 946, 1026, 1136, 1176 and 11MPCore. StrongARM and + * OMAPCP will override this space. + */ + { .name = "DLOCKDOWN", .cp = 15, .crn = 9, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_data), + .resetvalue = 0 }, + { .name = "ILOCKDOWN", .cp = 15, .crn = 9, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_insn), + .resetvalue = 0 }, + /* v6 doesn't have the cache ID registers but Linux reads them anyway */ + { .name = "DUMMY", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = CP_ANY, + .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, + .resetvalue = 0 }, + /* We don't implement pre-v7 debug but most CPUs had at least a DBGDIDR; + * implementing it as RAZ means the "debug architecture version" bits + * will read as a reserved value, which should cause Linux to not try + * to use the debug hardware. + */ + { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + /* MMU TLB control. Note that the wildcarding means we cover not just + * the unified TLB ops but also the dside/iside/inner-shareable variants. + */ + { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = 0, .access = PL1_W, .writefn = tlbiall_write, + .type = ARM_CP_NO_RAW }, + { .name = "TLBIMVA", .cp = 15, .crn = 8, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = 1, .access = PL1_W, .writefn = tlbimva_write, + .type = ARM_CP_NO_RAW }, + { .name = "TLBIASID", .cp = 15, .crn = 8, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = 2, .access = PL1_W, .writefn = tlbiasid_write, + .type = ARM_CP_NO_RAW }, + { .name = "TLBIMVAA", .cp = 15, .crn = 8, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = 3, .access = PL1_W, .writefn = tlbimvaa_write, + .type = ARM_CP_NO_RAW }, + { .name = "PRRR", .cp = 15, .crn = 10, .crm = 2, + .opc1 = 0, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_NOP }, + { .name = "NMRR", .cp = 15, .crn = 10, .crm = 2, + .opc1 = 0, .opc2 = 1, .access = PL1_RW, .type = ARM_CP_NOP }, + REGINFO_SENTINEL +}; + +static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint32_t mask = 0; + + /* In ARMv8 most bits of CPACR_EL1 are RES0. */ + if (!arm_feature(env, ARM_FEATURE_V8)) { + /* ARMv7 defines bits for unimplemented coprocessors as RAZ/WI. + * ASEDIS [31] and D32DIS [30] are both UNK/SBZP without VFP. + * TRCDIS [28] is RAZ/WI since we do not implement a trace macrocell. + */ + if (arm_feature(env, ARM_FEATURE_VFP)) { + /* VFP coprocessor: cp10 & cp11 [23:20] */ + mask |= (1 << 31) | (1 << 30) | (0xf << 20); + + if (!arm_feature(env, ARM_FEATURE_NEON)) { + /* ASEDIS [31] bit is RAO/WI */ + value |= (1 << 31); + } + + /* VFPv3 and upwards with NEON implement 32 double precision + * registers (D0-D31). + */ + if (!arm_feature(env, ARM_FEATURE_NEON) || + !arm_feature(env, ARM_FEATURE_VFP3)) { + /* D32DIS [30] is RAO/WI if D16-31 are not implemented. */ + value |= (1 << 30); + } + } + value &= mask; + } + env->cp15.cpacr_el1 = value; +} + +static CPAccessResult cpacr_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + if (arm_feature(env, ARM_FEATURE_V8)) { + /* Check if CPACR accesses are to be trapped to EL2 */ + if (arm_current_el(env) == 1 && + (env->cp15.cptr_el[2] & CPTR_TCPAC) && !arm_is_secure(env)) { + return CP_ACCESS_TRAP_EL2; + /* Check if CPACR accesses are to be trapped to EL3 */ + } else if (arm_current_el(env) < 3 && + (env->cp15.cptr_el[3] & CPTR_TCPAC)) { + return CP_ACCESS_TRAP_EL3; + } + } + + return CP_ACCESS_OK; +} + +static CPAccessResult cptr_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* Check if CPTR accesses are set to trap to EL3 */ + if (arm_current_el(env) == 2 && (env->cp15.cptr_el[3] & CPTR_TCPAC)) { + return CP_ACCESS_TRAP_EL3; + } + + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo v6_cp_reginfo[] = { + /* prefetch by MVA in v6, NOP in v7 */ + { .name = "MVA_prefetch", + .cp = 15, .crn = 7, .crm = 13, .opc1 = 0, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "ISB", .cp = 15, .crn = 7, .crm = 5, .opc1 = 0, .opc2 = 4, + .access = PL0_W, .type = ARM_CP_NOP }, + { .name = "DSB", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 4, + .access = PL0_W, .type = ARM_CP_NOP }, + { .name = "DMB", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 5, + .access = PL0_W, .type = ARM_CP_NOP }, + { .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ifar_s), + offsetof(CPUARMState, cp15.ifar_ns) }, + .resetvalue = 0, }, + /* Watchpoint Fault Address Register : should actually only be present + * for 1136, 1176, 11MPCore. + */ + { .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, }, + { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, + .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1), + .resetvalue = 0, .writefn = cpacr_write }, + REGINFO_SENTINEL +}; + +static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* Performance monitor registers user accessibility is controlled + * by PMUSERENR. + */ + if (arm_current_el(env) == 0 && !env->cp15.c9_pmuserenr) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +#ifndef CONFIG_USER_ONLY + +static inline bool arm_ccnt_enabled(CPUARMState *env) +{ + /* This does not support checking PMCCFILTR_EL0 register */ + + if (!(env->cp15.c9_pmcr & PMCRE)) { + return false; + } + + return true; +} + +void pmccntr_sync(CPUARMState *env) +{ + uint64_t temp_ticks; + + temp_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL), + get_ticks_per_sec(), 1000000); + + if (env->cp15.c9_pmcr & PMCRD) { + /* Increment once every 64 processor clock cycles */ + temp_ticks /= 64; + } + + if (arm_ccnt_enabled(env)) { + env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt; + } +} + +static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmccntr_sync(env); + + if (value & PMCRC) { + /* The counter has been reset */ + env->cp15.c15_ccnt = 0; + } + + /* only the DP, X, D and E bits are writable */ + env->cp15.c9_pmcr &= ~0x39; + env->cp15.c9_pmcr |= (value & 0x39); + + pmccntr_sync(env); +} + +static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint64_t total_ticks; + + if (!arm_ccnt_enabled(env)) { + /* Counter is disabled, do not change value */ + return env->cp15.c15_ccnt; + } + + total_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL), + get_ticks_per_sec(), 1000000); + + if (env->cp15.c9_pmcr & PMCRD) { + /* Increment once every 64 processor clock cycles */ + total_ticks /= 64; + } + return total_ticks - env->cp15.c15_ccnt; +} + +static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t total_ticks; + + if (!arm_ccnt_enabled(env)) { + /* Counter is disabled, set the absolute value */ + env->cp15.c15_ccnt = value; + return; + } + + total_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL), + get_ticks_per_sec(), 1000000); + + if (env->cp15.c9_pmcr & PMCRD) { + /* Increment once every 64 processor clock cycles */ + total_ticks /= 64; + } + env->cp15.c15_ccnt = total_ticks - value; +} + +static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t cur_val = pmccntr_read(env, NULL); + + pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value)); +} + +#else /* CONFIG_USER_ONLY */ + +void pmccntr_sync(CPUARMState *env) +{ +} + +#endif + +static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + pmccntr_sync(env); + env->cp15.pmccfiltr_el0 = value & 0x7E000000; + pmccntr_sync(env); +} + +static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= (1 << 31); + env->cp15.c9_pmcnten |= value; +} + +static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= (1 << 31); + env->cp15.c9_pmcnten &= ~value; +} + +static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.c9_pmovsr &= ~value; +} + +static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.c9_pmxevtyper = value & 0xff; +} + +static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.c9_pmuserenr = value & 1; +} + +static void pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* We have no event counters so only the C bit can be changed */ + value &= (1 << 31); + env->cp15.c9_pminten |= value; +} + +static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= (1 << 31); + env->cp15.c9_pminten &= ~value; +} + +static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Note that even though the AArch64 view of this register has bits + * [10:0] all RES0 we can only mask the bottom 5, to comply with the + * architectural requirements for bits which are RES0 only in some + * contexts. (ARMv8 would permit us to do no masking at all, but ARMv7 + * requires the bottom five bits to be RAZ/WI because they're UNK/SBZP.) + */ + raw_write(env, ri, value & ~0x1FULL); +} + +static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + /* We only mask off bits that are RES0 both for AArch64 and AArch32. + * For bits that vary between AArch32/64, code needs to check the + * current execution mode before directly using the feature bit. + */ + uint32_t valid_mask = SCR_AARCH64_MASK | SCR_AARCH32_MASK; + + if (!arm_feature(env, ARM_FEATURE_EL2)) { + valid_mask &= ~SCR_HCE; + + /* On ARMv7, SMD (or SCD as it is called in v7) is only + * supported if EL2 exists. The bit is UNK/SBZP when + * EL2 is unavailable. In QEMU ARMv7, we force it to always zero + * when EL2 is unavailable. + * On ARMv8, this bit is always available. + */ + if (arm_feature(env, ARM_FEATURE_V7) && + !arm_feature(env, ARM_FEATURE_V8)) { + valid_mask &= ~SCR_SMD; + } + } + + /* Clear all-context RES0 bits. */ + value &= valid_mask; + raw_write(env, ri, value); +} + +static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + /* Acquire the CSSELR index from the bank corresponding to the CCSIDR + * bank + */ + uint32_t index = A32_BANKED_REG_GET(env, csselr, + ri->secure & ARM_CP_SECSTATE_S); + + return cpu->ccsidr[index]; +} + +static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + raw_write(env, ri, value & 0xf); +} + +static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + CPUState *cs = ENV_GET_CPU(env); + uint64_t ret = 0; + + if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + ret |= CPSR_I; + } + if (cs->interrupt_request & CPU_INTERRUPT_FIQ) { + ret |= CPSR_F; + } + /* External aborts are not possible in QEMU so A bit is always clear */ + return ret; +} + +static const ARMCPRegInfo v7_cp_reginfo[] = { + /* the old v6 WFI, UNPREDICTABLE in v7 but we choose to NOP */ + { .name = "NOP", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4, + .access = PL1_W, .type = ARM_CP_NOP }, + /* Performance monitors are implementation defined in v7, + * but with an ARM recommended set of registers, which we + * follow (although we don't actually implement any counters) + * + * Performance registers fall into three categories: + * (a) always UNDEF in PL0, RW in PL1 (PMINTENSET, PMINTENCLR) + * (b) RO in PL0 (ie UNDEF on write), RW in PL1 (PMUSERENR) + * (c) UNDEF in PL0 if PMUSERENR.EN==0, otherwise accessible (all others) + * For the cases controlled by PMUSERENR we must set .access to PL0_RW + * or PL0_RO as appropriate and then check PMUSERENR in the helper fn. + */ + { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1, + .access = PL0_RW, .type = ARM_CP_ALIAS, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), + .writefn = pmcntenset_write, + .accessfn = pmreg_access, + .raw_writefn = raw_write }, + { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 1, + .access = PL0_RW, .accessfn = pmreg_access, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .resetvalue = 0, + .writefn = pmcntenset_write, .raw_writefn = raw_write }, + { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2, + .access = PL0_RW, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), + .accessfn = pmreg_access, + .writefn = pmcntenclr_write, + .type = ARM_CP_ALIAS }, + { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2, + .access = PL0_RW, .accessfn = pmreg_access, + .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), + .writefn = pmcntenclr_write }, + { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, + .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), + .accessfn = pmreg_access, + .writefn = pmovsr_write, + .raw_writefn = raw_write }, + /* Unimplemented so WI. */ + { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4, + .access = PL0_W, .accessfn = pmreg_access, .type = ARM_CP_NOP }, + /* Since we don't implement any events, writing to PMSELR is UNPREDICTABLE. + * We choose to RAZ/WI. + */ + { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5, + .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = pmreg_access }, +#ifndef CONFIG_USER_ONLY + { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0, + .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_IO, + .readfn = pmccntr_read, .writefn = pmccntr_write32, + .accessfn = pmreg_access }, + { .name = "PMCCNTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0, + .access = PL0_RW, .accessfn = pmreg_access, + .type = ARM_CP_IO, + .readfn = pmccntr_read, .writefn = pmccntr_write, }, +#endif + { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7, + .writefn = pmccfiltr_write, + .access = PL0_RW, .accessfn = pmreg_access, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0), + .resetvalue = 0, }, + { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1, + .access = PL0_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper), + .accessfn = pmreg_access, .writefn = pmxevtyper_write, + .raw_writefn = raw_write }, + /* Unimplemented, RAZ/WI. */ + { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2, + .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = pmreg_access }, + { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0, + .access = PL0_R | PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmuserenr), + .resetvalue = 0, + .writefn = pmuserenr_write, .raw_writefn = raw_write }, + { .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), + .resetvalue = 0, + .writefn = pmintenset_write, .raw_writefn = raw_write }, + { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), + .writefn = pmintenclr_write, }, + { .name = "VBAR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .writefn = vbar_write, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.vbar_s), + offsetof(CPUARMState, cp15.vbar_ns) }, + .resetvalue = 0 }, + { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, + .access = PL1_R, .readfn = ccsidr_read, .type = ARM_CP_NO_RAW }, + { .name = "CSSELR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0, + .access = PL1_RW, .writefn = csselr_write, .resetvalue = 0, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.csselr_s), + offsetof(CPUARMState, cp15.csselr_ns) } }, + /* Auxiliary ID register: this actually has an IMPDEF value but for now + * just RAZ for all cores: + */ + { .name = "AIDR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 7, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + /* Auxiliary fault status registers: these also are IMPDEF, and we + * choose to RAZ/WI for all cores. + */ + { .name = "AFSR0_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "AFSR1_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + /* MAIR can just read-as-written because we don't implement caches + * and so don't need to care about memory attributes. + */ + { .name = "MAIR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[1]), + .resetvalue = 0 }, + /* For non-long-descriptor page tables these are PRRR and NMRR; + * regardless they still act as reads-as-written for QEMU. + */ + /* MAIR0/1 are defined separately from their 64-bit counterpart which + * allows them to assign the correct fieldoffset based on the endianness + * handled in the field definitions. + */ + { .name = "MAIR0", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, .access = PL1_RW, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.mair0_s), + offsetof(CPUARMState, cp15.mair0_ns) }, + .resetfn = arm_cp_reset_ignore }, + { .name = "MAIR1", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1, .access = PL1_RW, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.mair1_s), + offsetof(CPUARMState, cp15.mair1_ns) }, + .resetfn = arm_cp_reset_ignore }, + { .name = "ISR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL1_R, .readfn = isr_read }, + /* 32 bit ITLB invalidates */ + { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_write }, + { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write }, + { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_write }, + /* 32 bit DTLB invalidates */ + { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_write }, + { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write }, + { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_write }, + /* 32 bit TLB invalidates */ + { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_write }, + { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write }, + { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_write }, + { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimvaa_write }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo v7mp_cp_reginfo[] = { + /* 32 bit TLB invalidates, Inner Shareable */ + { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_is_write }, + { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_is_write }, + { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, + .type = ARM_CP_NO_RAW, .access = PL1_W, + .writefn = tlbiasid_is_write }, + { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, + .type = ARM_CP_NO_RAW, .access = PL1_W, + .writefn = tlbimvaa_is_write }, + REGINFO_SENTINEL +}; + +static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + value &= 1; + env->teecr = value; +} + +static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + if (arm_current_el(env) == 0 && (env->teecr & 1)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo t2ee_cp_reginfo[] = { + { .name = "TEECR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 6, .opc2 = 0, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, teecr), + .resetvalue = 0, + .writefn = teecr_write }, + { .name = "TEEHBR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 6, .opc2 = 0, + .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, teehbr), + .accessfn = teehbr_access, .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo v6k_cp_reginfo[] = { + { .name = "TPIDR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .opc2 = 2, .crn = 13, .crm = 0, + .access = PL0_RW, + .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[0]), .resetvalue = 0 }, + { .name = "TPIDRURW", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 2, + .access = PL0_RW, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidrurw_s), + offsetoflow32(CPUARMState, cp15.tpidrurw_ns) }, + .resetfn = arm_cp_reset_ignore }, + { .name = "TPIDRRO_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .opc2 = 3, .crn = 13, .crm = 0, + .access = PL0_R|PL1_W, + .fieldoffset = offsetof(CPUARMState, cp15.tpidrro_el[0]), + .resetvalue = 0}, + { .name = "TPIDRURO", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 3, + .access = PL0_R|PL1_W, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidruro_s), + offsetoflow32(CPUARMState, cp15.tpidruro_ns) }, + .resetfn = arm_cp_reset_ignore }, + { .name = "TPIDR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 4, .crn = 13, .crm = 0, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[1]), .resetvalue = 0 }, + { .name = "TPIDRPRW", .opc1 = 0, .cp = 15, .crn = 13, .crm = 0, .opc2 = 4, + .access = PL1_RW, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidrprw_s), + offsetoflow32(CPUARMState, cp15.tpidrprw_ns) }, + .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +#ifndef CONFIG_USER_ONLY + +static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero */ + if (arm_current_el(env) == 0 && !extract32(env->cp15.c14_cntkctl, 0, 2)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx) +{ + /* CNT[PV]CT: not visible from PL0 if ELO[PV]CTEN is zero */ + if (arm_current_el(env) == 0 && + !extract32(env->cp15.c14_cntkctl, timeridx, 1)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx) +{ + /* CNT[PV]_CVAL, CNT[PV]_CTL, CNT[PV]_TVAL: not visible from PL0 if + * EL0[PV]TEN is zero. + */ + if (arm_current_el(env) == 0 && + !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static CPAccessResult gt_pct_access(CPUARMState *env, + const ARMCPRegInfo *ri) +{ + return gt_counter_access(env, GTIMER_PHYS); +} + +static CPAccessResult gt_vct_access(CPUARMState *env, + const ARMCPRegInfo *ri) +{ + return gt_counter_access(env, GTIMER_VIRT); +} + +static CPAccessResult gt_ptimer_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return gt_timer_access(env, GTIMER_PHYS); +} + +static CPAccessResult gt_vtimer_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return gt_timer_access(env, GTIMER_VIRT); +} + +static uint64_t gt_get_countervalue(CPUARMState *env) +{ + return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / GTIMER_SCALE; +} + +static void gt_recalc_timer(ARMCPU *cpu, int timeridx) +{ + ARMGenericTimer *gt = &cpu->env.cp15.c14_timer[timeridx]; + + if (gt->ctl & 1) { + /* Timer enabled: calculate and set current ISTATUS, irq, and + * reset timer to when ISTATUS next has to change + */ + uint64_t count = gt_get_countervalue(&cpu->env); + /* Note that this must be unsigned 64 bit arithmetic: */ + int istatus = count >= gt->cval; + uint64_t nexttick; + + gt->ctl = deposit32(gt->ctl, 2, 1, istatus); + qemu_set_irq(cpu->gt_timer_outputs[timeridx], + (istatus && !(gt->ctl & 2))); + if (istatus) { + /* Next transition is when count rolls back over to zero */ + nexttick = UINT64_MAX; + } else { + /* Next transition is when we hit cval */ + nexttick = gt->cval; + } + /* Note that the desired next expiry time might be beyond the + * signed-64-bit range of a QEMUTimer -- in this case we just + * set the timer for as far in the future as possible. When the + * timer expires we will reset the timer for any remaining period. + */ + if (nexttick > INT64_MAX / GTIMER_SCALE) { + nexttick = INT64_MAX / GTIMER_SCALE; + } + timer_mod(cpu->gt_timer[timeridx], nexttick); + } else { + /* Timer disabled: ISTATUS and timer output always clear */ + gt->ctl &= ~4; + qemu_set_irq(cpu->gt_timer_outputs[timeridx], 0); + timer_del(cpu->gt_timer[timeridx]); + } +} + +static void gt_cnt_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int timeridx = ri->opc1 & 1; + + timer_del(cpu->gt_timer[timeridx]); +} + +static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return gt_get_countervalue(env); +} + +static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + int timeridx = ri->opc1 & 1; + + env->cp15.c14_timer[timeridx].cval = value; + gt_recalc_timer(arm_env_get_cpu(env), timeridx); +} + +static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + int timeridx = ri->crm & 1; + + return (uint32_t)(env->cp15.c14_timer[timeridx].cval - + gt_get_countervalue(env)); +} + +static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + int timeridx = ri->crm & 1; + + env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) + + sextract64(value, 0, 32); + gt_recalc_timer(arm_env_get_cpu(env), timeridx); +} + +static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int timeridx = ri->crm & 1; + uint32_t oldval = env->cp15.c14_timer[timeridx].ctl; + + env->cp15.c14_timer[timeridx].ctl = deposit64(oldval, 0, 2, value); + if ((oldval ^ value) & 1) { + /* Enable toggled */ + gt_recalc_timer(cpu, timeridx); + } else if ((oldval ^ value) & 2) { + /* IMASK toggled: don't need to recalculate, + * just set the interrupt line based on ISTATUS + */ + qemu_set_irq(cpu->gt_timer_outputs[timeridx], + (oldval & 4) && !(value & 2)); + } +} + +void arm_gt_ptimer_cb(void *opaque) +{ + ARMCPU *cpu = opaque; + + gt_recalc_timer(cpu, GTIMER_PHYS); +} + +void arm_gt_vtimer_cb(void *opaque) +{ + ARMCPU *cpu = opaque; + + gt_recalc_timer(cpu, GTIMER_VIRT); +} + +static const ARMCPRegInfo generic_timer_cp_reginfo[] = { + /* Note that CNTFRQ is purely reads-as-written for the benefit + * of software; writing it doesn't actually change the timer frequency. + * Our reset value matches the fixed frequency we implement the timer at. + */ + { .name = "CNTFRQ", .cp = 15, .crn = 14, .crm = 0, .opc1 = 0, .opc2 = 0, + .type = ARM_CP_ALIAS, + .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c14_cntfrq), + }, + { .name = "CNTFRQ_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0, + .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access, + .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq), + .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE, + }, + /* overall control: mostly access permissions */ + { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 14, .crm = 1, .opc2 = 0, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c14_cntkctl), + .resetvalue = 0, + }, + /* per-timer control */ + { .name = "CNTP_CTL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1, + .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R, + .accessfn = gt_ptimer_access, + .fieldoffset = offsetoflow32(CPUARMState, + cp15.c14_timer[GTIMER_PHYS].ctl), + .writefn = gt_ctl_write, .raw_writefn = raw_write, + }, + { .name = "CNTP_CTL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 1, + .type = ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_ptimer_access, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl), + .resetvalue = 0, + .writefn = gt_ctl_write, .raw_writefn = raw_write, + }, + { .name = "CNTV_CTL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 1, + .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R, + .accessfn = gt_vtimer_access, + .fieldoffset = offsetoflow32(CPUARMState, + cp15.c14_timer[GTIMER_VIRT].ctl), + .writefn = gt_ctl_write, .raw_writefn = raw_write, + }, + { .name = "CNTV_CTL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 1, + .type = ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_vtimer_access, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl), + .resetvalue = 0, + .writefn = gt_ctl_write, .raw_writefn = raw_write, + }, + /* TimerValue views: a 32 bit downcounting view of the underlying state */ + { .name = "CNTP_TVAL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_ptimer_access, + .readfn = gt_tval_read, .writefn = gt_tval_write, + }, + { .name = "CNTP_TVAL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_ptimer_access, + .readfn = gt_tval_read, .writefn = gt_tval_write, + }, + { .name = "CNTV_TVAL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_vtimer_access, + .readfn = gt_tval_read, .writefn = gt_tval_write, + }, + { .name = "CNTV_TVAL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 0, + .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R, + .accessfn = gt_vtimer_access, + .readfn = gt_tval_read, .writefn = gt_tval_write, + }, + /* The counter itself */ + { .name = "CNTPCT", .cp = 15, .crm = 14, .opc1 = 0, + .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = gt_pct_access, + .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore, + }, + { .name = "CNTPCT_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 1, + .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = gt_pct_access, + .readfn = gt_cnt_read, .resetfn = gt_cnt_reset, + }, + { .name = "CNTVCT", .cp = 15, .crm = 14, .opc1 = 1, + .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = gt_vct_access, + .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore, + }, + { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2, + .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = gt_vct_access, + .readfn = gt_cnt_read, .resetfn = gt_cnt_reset, + }, + /* Comparison value, indicating when the timer goes off */ + { .name = "CNTP_CVAL", .cp = 15, .crm = 14, .opc1 = 2, + .access = PL1_RW | PL0_R, + .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval), + .accessfn = gt_ptimer_access, + .writefn = gt_cval_write, .raw_writefn = raw_write, + }, + { .name = "CNTP_CVAL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 2, + .access = PL1_RW | PL0_R, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval), + .resetvalue = 0, .accessfn = gt_ptimer_access, + .writefn = gt_cval_write, .raw_writefn = raw_write, + }, + { .name = "CNTV_CVAL", .cp = 15, .crm = 14, .opc1 = 3, + .access = PL1_RW | PL0_R, + .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval), + .accessfn = gt_vtimer_access, + .writefn = gt_cval_write, .raw_writefn = raw_write, + }, + { .name = "CNTV_CVAL_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 2, + .access = PL1_RW | PL0_R, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval), + .resetvalue = 0, .accessfn = gt_vtimer_access, + .writefn = gt_cval_write, .raw_writefn = raw_write, + }, + REGINFO_SENTINEL +}; + +#else +/* In user-mode none of the generic timer registers are accessible, + * and their implementation depends on QEMU_CLOCK_VIRTUAL and qdev gpio outputs, + * so instead just don't register any of them. + */ +static const ARMCPRegInfo generic_timer_cp_reginfo[] = { + REGINFO_SENTINEL +}; + +#endif + +static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + if (arm_feature(env, ARM_FEATURE_LPAE)) { + raw_write(env, ri, value); + } else if (arm_feature(env, ARM_FEATURE_V7)) { + raw_write(env, ri, value & 0xfffff6ff); + } else { + raw_write(env, ri, value & 0xfffff1ff); + } +} + +#ifndef CONFIG_USER_ONLY +/* get_phys_addr() isn't present for user-mode-only targets */ + +static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + if (ri->opc2 & 4) { + /* Other states are only available with TrustZone; in + * a non-TZ implementation these registers don't exist + * at all, which is an Uncategorized trap. This underdecoding + * is safe because the reginfo is NO_RAW. + */ + return CP_ACCESS_TRAP_UNCATEGORIZED; + } + return CP_ACCESS_OK; +} + +static uint64_t do_ats_write(CPUARMState *env, uint64_t value, + int access_type, ARMMMUIdx mmu_idx) +{ + hwaddr phys_addr; + target_ulong page_size; + int prot; + uint32_t fsr; + bool ret; + uint64_t par64; + MemTxAttrs attrs = {}; + + ret = get_phys_addr(env, value, access_type, mmu_idx, + &phys_addr, &attrs, &prot, &page_size, &fsr); + if (extended_addresses_enabled(env)) { + /* fsr is a DFSR/IFSR value for the long descriptor + * translation table format, but with WnR always clear. + * Convert it to a 64-bit PAR. + */ + par64 = (1 << 11); /* LPAE bit always set */ + if (!ret) { + par64 |= phys_addr & ~0xfffULL; + if (!attrs.secure) { + par64 |= (1 << 9); /* NS */ + } + /* We don't set the ATTR or SH fields in the PAR. */ + } else { + par64 |= 1; /* F */ + par64 |= (fsr & 0x3f) << 1; /* FS */ + /* Note that S2WLK and FSTAGE are always zero, because we don't + * implement virtualization and therefore there can't be a stage 2 + * fault. + */ + } + } else { + /* fsr is a DFSR/IFSR value for the short descriptor + * translation table format (with WnR always clear). + * Convert it to a 32-bit PAR. + */ + if (!ret) { + /* We do not set any attribute bits in the PAR */ + if (page_size == (1 << 24) + && arm_feature(env, ARM_FEATURE_V7)) { + par64 = (phys_addr & 0xff000000) | (1 << 1); + } else { + par64 = phys_addr & 0xfffff000; + } + if (!attrs.secure) { + par64 |= (1 << 9); /* NS */ + } + } else { + par64 = ((fsr & (1 << 10)) >> 5) | ((fsr & (1 << 12)) >> 6) | + ((fsr & 0xf) << 1) | 1; + } + } + return par64; +} + +static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + int access_type = ri->opc2 & 1; + uint64_t par64; + ARMMMUIdx mmu_idx; + int el = arm_current_el(env); + bool secure = arm_is_secure_below_el3(env); + + switch (ri->opc2 & 6) { + case 0: + /* stage 1 current state PL1: ATS1CPR, ATS1CPW */ + switch (el) { + case 3: + mmu_idx = ARMMMUIdx_S1E3; + break; + case 2: + mmu_idx = ARMMMUIdx_S1NSE1; + break; + case 1: + mmu_idx = secure ? ARMMMUIdx_S1SE1 : ARMMMUIdx_S1NSE1; + break; + default: + g_assert_not_reached(); + } + break; + case 2: + /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ + switch (el) { + case 3: + mmu_idx = ARMMMUIdx_S1SE0; + break; + case 2: + mmu_idx = ARMMMUIdx_S1NSE0; + break; + case 1: + mmu_idx = secure ? ARMMMUIdx_S1SE0 : ARMMMUIdx_S1NSE0; + break; + default: + g_assert_not_reached(); + } + break; + case 4: + /* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */ + mmu_idx = ARMMMUIdx_S12NSE1; + break; + case 6: + /* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */ + mmu_idx = ARMMMUIdx_S12NSE0; + break; + default: + g_assert_not_reached(); + } + + par64 = do_ats_write(env, value, access_type, mmu_idx); + + A32_BANKED_CURRENT_REG_SET(env, par, par64); +} + +static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + int access_type = ri->opc2 & 1; + ARMMMUIdx mmu_idx; + int secure = arm_is_secure_below_el3(env); + + switch (ri->opc2 & 6) { + case 0: + switch (ri->opc1) { + case 0: /* AT S1E1R, AT S1E1W */ + mmu_idx = secure ? ARMMMUIdx_S1SE1 : ARMMMUIdx_S1NSE1; + break; + case 4: /* AT S1E2R, AT S1E2W */ + mmu_idx = ARMMMUIdx_S1E2; + break; + case 6: /* AT S1E3R, AT S1E3W */ + mmu_idx = ARMMMUIdx_S1E3; + break; + default: + g_assert_not_reached(); + } + break; + case 2: /* AT S1E0R, AT S1E0W */ + mmu_idx = secure ? ARMMMUIdx_S1SE0 : ARMMMUIdx_S1NSE0; + break; + case 4: /* AT S12E1R, AT S12E1W */ + mmu_idx = ARMMMUIdx_S12NSE1; + break; + case 6: /* AT S12E0R, AT S12E0W */ + mmu_idx = ARMMMUIdx_S12NSE0; + break; + default: + g_assert_not_reached(); + } + + env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx); +} +#endif + +static const ARMCPRegInfo vapa_cp_reginfo[] = { + { .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .resetvalue = 0, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.par_s), + offsetoflow32(CPUARMState, cp15.par_ns) }, + .writefn = par_write }, +#ifndef CONFIG_USER_ONLY + { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_W, .accessfn = ats_access, + .writefn = ats_write, .type = ARM_CP_NO_RAW }, +#endif + REGINFO_SENTINEL +}; + +/* Return basic MPU access permission bits. */ +static uint32_t simple_mpu_ap_bits(uint32_t val) +{ + uint32_t ret; + uint32_t mask; + int i; + ret = 0; + mask = 3; + for (i = 0; i < 16; i += 2) { + ret |= (val >> i) & mask; + mask <<= 2; + } + return ret; +} + +/* Pad basic MPU access permission bits to extended format. */ +static uint32_t extended_mpu_ap_bits(uint32_t val) +{ + uint32_t ret; + uint32_t mask; + int i; + ret = 0; + mask = 3; + for (i = 0; i < 16; i += 2) { + ret |= (val & mask) << i; + mask <<= 2; + } + return ret; +} + +static void pmsav5_data_ap_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.pmsav5_data_ap = extended_mpu_ap_bits(value); +} + +static uint64_t pmsav5_data_ap_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return simple_mpu_ap_bits(env->cp15.pmsav5_data_ap); +} + +static void pmsav5_insn_ap_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.pmsav5_insn_ap = extended_mpu_ap_bits(value); +} + +static uint64_t pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return simple_mpu_ap_bits(env->cp15.pmsav5_insn_ap); +} + +static uint64_t pmsav7_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint32_t *u32p = *(uint32_t **)raw_ptr(env, ri); + + if (!u32p) { + return 0; + } + + u32p += env->cp15.c6_rgnr; + return *u32p; +} + +static void pmsav7_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + uint32_t *u32p = *(uint32_t **)raw_ptr(env, ri); + + if (!u32p) { + return; + } + + u32p += env->cp15.c6_rgnr; + tlb_flush(CPU(cpu), 1); /* Mappings may have changed - purge! */ + *u32p = value; +} + +static void pmsav7_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + uint32_t *u32p = *(uint32_t **)raw_ptr(env, ri); + + if (!u32p) { + return; + } + + memset(u32p, 0, sizeof(*u32p) * cpu->pmsav7_dregion); +} + +static void pmsav7_rgnr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + uint32_t nrgs = cpu->pmsav7_dregion; + + if (value >= nrgs) { + qemu_log_mask(LOG_GUEST_ERROR, + "PMSAv7 RGNR write >= # supported regions, %" PRIu32 + " > %" PRIu32 "\n", (uint32_t)value, nrgs); + return; + } + + raw_write(env, ri, value); +} + +static const ARMCPRegInfo pmsav7_cp_reginfo[] = { + { .name = "DRBAR", .cp = 15, .crn = 6, .opc1 = 0, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_NO_RAW, + .fieldoffset = offsetof(CPUARMState, pmsav7.drbar), + .readfn = pmsav7_read, .writefn = pmsav7_write, .resetfn = pmsav7_reset }, + { .name = "DRSR", .cp = 15, .crn = 6, .opc1 = 0, .crm = 1, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_NO_RAW, + .fieldoffset = offsetof(CPUARMState, pmsav7.drsr), + .readfn = pmsav7_read, .writefn = pmsav7_write, .resetfn = pmsav7_reset }, + { .name = "DRACR", .cp = 15, .crn = 6, .opc1 = 0, .crm = 1, .opc2 = 4, + .access = PL1_RW, .type = ARM_CP_NO_RAW, + .fieldoffset = offsetof(CPUARMState, pmsav7.dracr), + .readfn = pmsav7_read, .writefn = pmsav7_write, .resetfn = pmsav7_reset }, + { .name = "RGNR", .cp = 15, .crn = 6, .opc1 = 0, .crm = 2, .opc2 = 0, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c6_rgnr), + .writefn = pmsav7_rgnr_write }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo pmsav5_cp_reginfo[] = { + { .name = "DATA_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_data_ap), + .readfn = pmsav5_data_ap_read, .writefn = pmsav5_data_ap_write, }, + { .name = "INSN_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_insn_ap), + .readfn = pmsav5_insn_ap_read, .writefn = pmsav5_insn_ap_write, }, + { .name = "DATA_EXT_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_data_ap), + .resetvalue = 0, }, + { .name = "INSN_EXT_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 3, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_insn_ap), + .resetvalue = 0, }, + { .name = "DCACHE_CFG", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c2_data), .resetvalue = 0, }, + { .name = "ICACHE_CFG", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c2_insn), .resetvalue = 0, }, + /* Protection region base and size registers */ + { .name = "946_PRBS0", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[0]) }, + { .name = "946_PRBS1", .cp = 15, .crn = 6, .crm = 1, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[1]) }, + { .name = "946_PRBS2", .cp = 15, .crn = 6, .crm = 2, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[2]) }, + { .name = "946_PRBS3", .cp = 15, .crn = 6, .crm = 3, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[3]) }, + { .name = "946_PRBS4", .cp = 15, .crn = 6, .crm = 4, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[4]) }, + { .name = "946_PRBS5", .cp = 15, .crn = 6, .crm = 5, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[5]) }, + { .name = "946_PRBS6", .cp = 15, .crn = 6, .crm = 6, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[6]) }, + { .name = "946_PRBS7", .cp = 15, .crn = 6, .crm = 7, .opc1 = 0, + .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.c6_region[7]) }, + REGINFO_SENTINEL +}; + +static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + TCR *tcr = raw_ptr(env, ri); + int maskshift = extract32(value, 0, 3); + + if (!arm_feature(env, ARM_FEATURE_V8)) { + if (arm_feature(env, ARM_FEATURE_LPAE) && (value & TTBCR_EAE)) { + /* Pre ARMv8 bits [21:19], [15:14] and [6:3] are UNK/SBZP when + * using Long-desciptor translation table format */ + value &= ~((7 << 19) | (3 << 14) | (0xf << 3)); + } else if (arm_feature(env, ARM_FEATURE_EL3)) { + /* In an implementation that includes the Security Extensions + * TTBCR has additional fields PD0 [4] and PD1 [5] for + * Short-descriptor translation table format. + */ + value &= TTBCR_PD1 | TTBCR_PD0 | TTBCR_N; + } else { + value &= TTBCR_N; + } + } + + /* Update the masks corresponding to the the TCR bank being written + * Note that we always calculate mask and base_mask, but + * they are only used for short-descriptor tables (ie if EAE is 0); + * for long-descriptor tables the TCR fields are used differently + * and the mask and base_mask values are meaningless. + */ + tcr->raw_tcr = value; + tcr->mask = ~(((uint32_t)0xffffffffu) >> maskshift); + tcr->base_mask = ~((uint32_t)0x3fffu >> maskshift); +} + +static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + if (arm_feature(env, ARM_FEATURE_LPAE)) { + /* With LPAE the TTBCR could result in a change of ASID + * via the TTBCR.A1 bit, so do a TLB flush. + */ + tlb_flush(CPU(cpu), 1); + } + vmsa_ttbcr_raw_write(env, ri, value); +} + +static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + TCR *tcr = raw_ptr(env, ri); + + /* Reset both the TCR as well as the masks corresponding to the bank of + * the TCR being reset. + */ + tcr->raw_tcr = 0; + tcr->mask = 0; + tcr->base_mask = 0xffffc000u; +} + +static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + TCR *tcr = raw_ptr(env, ri); + + /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */ + tlb_flush(CPU(cpu), 1); + tcr->raw_tcr = value; +} + +static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* 64 bit accesses to the TTBRs can change the ASID and so we + * must flush the TLB. + */ + if (cpreg_field_is_64bit(ri)) { + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush(CPU(cpu), 1); + } + raw_write(env, ri, value); +} + +static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = { + { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_ALIAS, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dfsr_s), + offsetoflow32(CPUARMState, cp15.dfsr_ns) }, }, + { .name = "IFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, .resetvalue = 0, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.ifsr_s), + offsetoflow32(CPUARMState, cp15.ifsr_ns) } }, + { .name = "DFAR", .cp = 15, .opc1 = 0, .crn = 6, .crm = 0, .opc2 = 0, + .access = PL1_RW, .resetvalue = 0, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.dfar_s), + offsetof(CPUARMState, cp15.dfar_ns) } }, + { .name = "FAR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[1]), + .resetvalue = 0, }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo vmsa_cp_reginfo[] = { + { .name = "ESR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .crn = 5, .crm = 2, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.esr_el[1]), .resetvalue = 0, }, + { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0, + .access = PL1_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), + offsetof(CPUARMState, cp15.ttbr0_ns) } }, + { .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 1, + .access = PL1_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), + offsetof(CPUARMState, cp15.ttbr1_ns) } }, + { .name = "TCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, .writefn = vmsa_tcr_el1_write, + .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write, + .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[1]) }, + { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_ALIAS, .writefn = vmsa_ttbcr_write, + .raw_writefn = vmsa_ttbcr_raw_write, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tcr_el[3]), + offsetoflow32(CPUARMState, cp15.tcr_el[1])} }, + REGINFO_SENTINEL +}; + +static void omap_ticonfig_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.c15_ticonfig = value & 0xe7; + /* The OS_TYPE bit in this register changes the reported CPUID! */ + env->cp15.c0_cpuid = (value & (1 << 5)) ? + ARM_CPUID_TI915T : ARM_CPUID_TI925T; +} + +static void omap_threadid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.c15_threadid = value & 0xffff; +} + +static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Wait-for-interrupt (deprecated) */ + cpu_interrupt(CPU(arm_env_get_cpu(env)), CPU_INTERRUPT_HALT); +} + +static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* On OMAP there are registers indicating the max/min index of dcache lines + * containing a dirty line; cache flush operations have to reset these. + */ + env->cp15.c15_i_max = 0x000; + env->cp15.c15_i_min = 0xff0; +} + +static const ARMCPRegInfo omap_cp_reginfo[] = { + { .name = "DFSR", .cp = 15, .crn = 5, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .type = ARM_CP_OVERRIDE, + .fieldoffset = offsetoflow32(CPUARMState, cp15.esr_el[1]), + .resetvalue = 0, }, + { .name = "", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_NOP }, + { .name = "TICONFIG", .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c15_ticonfig), .resetvalue = 0, + .writefn = omap_ticonfig_write }, + { .name = "IMAX", .cp = 15, .crn = 15, .crm = 2, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c15_i_max), .resetvalue = 0, }, + { .name = "IMIN", .cp = 15, .crn = 15, .crm = 3, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .resetvalue = 0xff0, + .fieldoffset = offsetof(CPUARMState, cp15.c15_i_min) }, + { .name = "THREADID", .cp = 15, .crn = 15, .crm = 4, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c15_threadid), .resetvalue = 0, + .writefn = omap_threadid_write }, + { .name = "TI925T_STATUS", .cp = 15, .crn = 15, + .crm = 8, .opc1 = 0, .opc2 = 0, .access = PL1_RW, + .type = ARM_CP_NO_RAW, + .readfn = arm_cp_read_zero, .writefn = omap_wfi_write, }, + /* TODO: Peripheral port remap register: + * On OMAP2 mcr p15, 0, rn, c15, c2, 4 sets up the interrupt controller + * base address at $rn & ~0xfff and map size of 0x200 << ($rn & 0xfff), + * when MMU is off. + */ + { .name = "OMAP_CACHEMAINT", .cp = 15, .crn = 7, .crm = CP_ANY, + .opc1 = 0, .opc2 = CP_ANY, .access = PL1_W, + .type = ARM_CP_OVERRIDE | ARM_CP_NO_RAW, + .writefn = omap_cachemaint_write }, + { .name = "C9", .cp = 15, .crn = 9, + .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, + .type = ARM_CP_CONST | ARM_CP_OVERRIDE, .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +static void xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.c15_cpar = value & 0x3fff; +} + +static const ARMCPRegInfo xscale_cp_reginfo[] = { + { .name = "XSCALE_CPAR", + .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0, .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c15_cpar), .resetvalue = 0, + .writefn = xscale_cpar_write, }, + { .name = "XSCALE_AUXCR", + .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.c1_xscaleauxcr), + .resetvalue = 0, }, + /* XScale specific cache-lockdown: since we have no cache we NOP these + * and hope the guest does not really rely on cache behaviour. + */ + { .name = "XSCALE_LOCK_ICACHE_LINE", + .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "XSCALE_UNLOCK_ICACHE", + .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "XSCALE_DCACHE_LOCK", + .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_NOP }, + { .name = "XSCALE_UNLOCK_DCACHE", + .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NOP }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo dummy_c15_cp_reginfo[] = { + /* RAZ/WI the whole crn=15 space, when we don't have a more specific + * implementation of this implementation-defined space. + * Ideally this should eventually disappear in favour of actually + * implementing the correct behaviour for all cores. + */ + { .name = "C15_IMPDEF", .cp = 15, .crn = 15, + .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, + .access = PL1_RW, + .type = ARM_CP_CONST | ARM_CP_NO_RAW | ARM_CP_OVERRIDE, + .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo cache_dirty_status_cp_reginfo[] = { + /* Cache status: RAZ because we have no cache so it's always clean */ + { .name = "CDSR", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 6, + .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, + .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo cache_block_ops_cp_reginfo[] = { + /* We never have a a block transfer operation in progress */ + { .name = "BXSR", .cp = 15, .crn = 7, .crm = 12, .opc1 = 0, .opc2 = 4, + .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, + .resetvalue = 0 }, + /* The cache ops themselves: these all NOP for QEMU */ + { .name = "IICR", .cp = 15, .crm = 5, .opc1 = 0, + .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + { .name = "IDCR", .cp = 15, .crm = 6, .opc1 = 0, + .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + { .name = "CDCR", .cp = 15, .crm = 12, .opc1 = 0, + .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + { .name = "PIR", .cp = 15, .crm = 12, .opc1 = 1, + .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + { .name = "PDR", .cp = 15, .crm = 12, .opc1 = 2, + .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + { .name = "CIDCR", .cp = 15, .crm = 14, .opc1 = 0, + .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo cache_test_clean_cp_reginfo[] = { + /* The cache test-and-clean instructions always return (1 << 30) + * to indicate that there are no dirty cache lines. + */ + { .name = "TC_DCACHE", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 3, + .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, + .resetvalue = (1 << 30) }, + { .name = "TCI_DCACHE", .cp = 15, .crn = 7, .crm = 14, .opc1 = 0, .opc2 = 3, + .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, + .resetvalue = (1 << 30) }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo strongarm_cp_reginfo[] = { + /* Ignore ReadBuffer accesses */ + { .name = "C9_READBUFFER", .cp = 15, .crn = 9, + .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, + .access = PL1_RW, .resetvalue = 0, + .type = ARM_CP_CONST | ARM_CP_OVERRIDE | ARM_CP_NO_RAW }, + REGINFO_SENTINEL +}; + +static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu = ARM_CPU(arm_env_get_cpu(env)); + uint64_t mpidr = cpu->mp_affinity; + + if (arm_feature(env, ARM_FEATURE_V7MP)) { + mpidr |= (1U << 31); + /* Cores which are uniprocessor (non-coherent) + * but still implement the MP extensions set + * bit 30. (For instance, Cortex-R5). + */ + if (cpu->mp_is_up) { + mpidr |= (1u << 30); + } + } + return mpidr; +} + +static const ARMCPRegInfo mpidr_cp_reginfo[] = { + { .name = "MPIDR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5, + .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo lpae_cp_reginfo[] = { + /* NOP AMAIR0/1 */ + { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, + .resetvalue = 0 }, + /* AMAIR1 is mapped to AMAIR_EL1[63:32] */ + { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, + .resetvalue = 0 }, + { .name = "PAR", .cp = 15, .crm = 7, .opc1 = 0, + .access = PL1_RW, .type = ARM_CP_64BIT, .resetvalue = 0, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.par_s), + offsetof(CPUARMState, cp15.par_ns)} }, + { .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0, + .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), + offsetof(CPUARMState, cp15.ttbr0_ns) }, + .writefn = vmsa_ttbr_write, }, + { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1, + .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), + offsetof(CPUARMState, cp15.ttbr1_ns) }, + .writefn = vmsa_ttbr_write, }, + REGINFO_SENTINEL +}; + +static uint64_t aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return vfp_get_fpcr(env); +} + +static void aa64_fpcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + vfp_set_fpcr(env, value); +} + +static uint64_t aa64_fpsr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return vfp_get_fpsr(env); +} + +static void aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + vfp_set_fpsr(env, value); +} + +static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UMA)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static void aa64_daif_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->daif = value & PSTATE_DAIF; +} + +static CPAccessResult aa64_cacheop_access(CPUARMState *env, + const ARMCPRegInfo *ri) +{ + /* Cache invalidate/clean: NOP, but EL0 must UNDEF unless + * SCTLR_EL1.UCI is set. + */ + if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UCI)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +/* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions + * Page D4-1736 (DDI0487A.b) + */ + +static void tlbi_aa64_va_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by VA (AArch64 version) */ + ARMCPU *cpu = arm_env_get_cpu(env); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + tlb_flush_page(CPU(cpu), pageaddr); +} + +static void tlbi_aa64_vaa_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by VA, all ASIDs (AArch64 version) */ + ARMCPU *cpu = arm_env_get_cpu(env); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + tlb_flush_page(CPU(cpu), pageaddr); +} + +static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Invalidate by ASID (AArch64 version) */ + ARMCPU *cpu = arm_env_get_cpu(env); + int asid = extract64(value, 48, 16); + tlb_flush(CPU(cpu), asid == 0); +} + +static void tlbi_aa64_va_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *other_cs; + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + CPU_FOREACH(other_cs) { + tlb_flush_page(other_cs, pageaddr); + } +} + +static void tlbi_aa64_vaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *other_cs; + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + CPU_FOREACH(other_cs) { + tlb_flush_page(other_cs, pageaddr); + } +} + +static void tlbi_aa64_asid_is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *other_cs; + int asid = extract64(value, 48, 16); + + CPU_FOREACH(other_cs) { + tlb_flush(other_cs, asid == 0); + } +} + +static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* We don't implement EL2, so the only control on DC ZVA is the + * bit in the SCTLR which can prohibit access for EL0. + */ + if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_DZE)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static uint64_t aa64_dczid_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int dzp_bit = 1 << 4; + + /* DZP indicates whether DC ZVA access is allowed */ + if (aa64_zva_access(env, NULL) == CP_ACCESS_OK) { + dzp_bit = 0; + } + return cpu->dcz_blocksize | dzp_bit; +} + +static CPAccessResult sp_el0_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + if (!(env->pstate & PSTATE_SP)) { + /* Access to SP_EL0 is undefined if it's being used as + * the stack pointer. + */ + return CP_ACCESS_TRAP_UNCATEGORIZED; + } + return CP_ACCESS_OK; +} + +static uint64_t spsel_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pstate & PSTATE_SP; +} + +static void spsel_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val) +{ + update_spsel(env, val); +} + +static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + if (raw_read(env, ri) == value) { + /* Skip the TLB flush if nothing actually changed; Linux likes + * to do a lot of pointless SCTLR writes. + */ + return; + } + + raw_write(env, ri, value); + /* ??? Lots of these bits are not implemented. */ + /* This may enable/disable the MMU, so do a TLB flush. */ + tlb_flush(CPU(cpu), 1); +} + +static const ARMCPRegInfo v8_cp_reginfo[] = { + /* Minimal set of EL0-visible registers. This will need to be expanded + * significantly for system emulation of AArch64 CPUs. + */ + { .name = "NZCV", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 2, + .access = PL0_RW, .type = ARM_CP_NZCV }, + { .name = "DAIF", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 2, + .type = ARM_CP_NO_RAW, + .access = PL0_RW, .accessfn = aa64_daif_access, + .fieldoffset = offsetof(CPUARMState, daif), + .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore }, + { .name = "FPCR", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4, + .access = PL0_RW, .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write }, + { .name = "FPSR", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4, + .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write }, + { .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0, + .access = PL0_R, .type = ARM_CP_NO_RAW, + .readfn = aa64_dczid_read }, + { .name = "DC_ZVA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 1, + .access = PL0_W, .type = ARM_CP_DC_ZVA, +#ifndef CONFIG_USER_ONLY + /* Avoid overhead of an access check that always passes in user-mode */ + .accessfn = aa64_zva_access, +#endif + }, + { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2, + .access = PL1_R, .type = ARM_CP_CURRENTEL }, + /* Cache ops: all NOPs since we don't emulate caches */ + { .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "IC_IALLU", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1, + .access = PL0_W, .type = ARM_CP_NOP, + .accessfn = aa64_cacheop_access }, + { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "DC_ISW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "DC_CVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1, + .access = PL0_W, .type = ARM_CP_NOP, + .accessfn = aa64_cacheop_access }, + { .name = "DC_CSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NOP }, + { .name = "DC_CVAU", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 11, .opc2 = 1, + .access = PL0_W, .type = ARM_CP_NOP, + .accessfn = aa64_cacheop_access }, + { .name = "DC_CIVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 1, + .access = PL0_W, .type = ARM_CP_NOP, + .accessfn = aa64_cacheop_access }, + { .name = "DC_CISW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NOP }, + /* TLBI operations */ + { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbiall_write }, + { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbiall_is_write }, + { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbiall_is_write }, + { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_va_is_write }, + { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_asid_is_write }, + { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_vaa_is_write }, + { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_va_is_write }, + { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_vaa_is_write }, + { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbiall_write }, + { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_va_write }, + { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_asid_write }, + { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_vaa_write }, + { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_va_write }, + { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_vaa_write }, +#ifndef CONFIG_USER_ONLY + /* 64 bit address translation operations */ + { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 }, + { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 }, + { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 }, + { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3, + .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 }, +#endif + /* TLB invalidate last level of translation table walk */ + { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_is_write }, + { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, + .type = ARM_CP_NO_RAW, .access = PL1_W, + .writefn = tlbimvaa_is_write }, + { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write }, + { .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7, + .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimvaa_write }, + /* 32 bit cache operations */ + { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "BPIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "ICIALLU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "ICIMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 1, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "BPIALL", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "BPIMVA", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 7, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "DCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "DCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "DCCMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 1, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "DCCSW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "DCCMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 11, .opc2 = 1, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "DCCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 1, + .type = ARM_CP_NOP, .access = PL1_W }, + { .name = "DCCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2, + .type = ARM_CP_NOP, .access = PL1_W }, + /* MMU Domain access control / MPU write buffer control */ + { .name = "DACR", .cp = 15, .opc1 = 0, .crn = 3, .crm = 0, .opc2 = 0, + .access = PL1_RW, .resetvalue = 0, + .writefn = dacr_write, .raw_writefn = raw_write, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dacr_s), + offsetoflow32(CPUARMState, cp15.dacr_ns) } }, + { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_ALIAS, + .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, elr_el[1]) }, + { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_ALIAS, + .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[1]) }, + /* We rely on the access checks not allowing the guest to write to the + * state field when SPSel indicates that it's being used as the stack + * pointer. + */ + { .name = "SP_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 1, .opc2 = 0, + .access = PL1_RW, .accessfn = sp_el0_access, + .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, sp_el[0]) }, + { .name = "SP_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 1, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, sp_el[1]) }, + { .name = "SPSel", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 2, .opc2 = 0, + .type = ARM_CP_NO_RAW, + .access = PL1_RW, .readfn = spsel_read, .writefn = spsel_write }, + REGINFO_SENTINEL +}; + +/* Used to describe the behaviour of EL2 regs when EL2 does not exist. */ +static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { + { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0, + .access = PL2_RW, + .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, + { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_NO_RAW, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, + .access = PL2_RW, + .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, + { .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "MAIR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_CONST, + .resetvalue = 0 }, + { .name = "HMAIR1", .state = ARM_CP_STATE_AA32, + .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "TPIDR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 2, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2, + .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_CONST, + .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + uint64_t valid_mask = HCR_MASK; + + if (arm_feature(env, ARM_FEATURE_EL3)) { + valid_mask &= ~HCR_HCD; + } else { + valid_mask &= ~HCR_TSC; + } + + /* Clear RES0 bits. */ + value &= valid_mask; + + /* These bits change the MMU setup: + * HCR_VM enables stage 2 translation + * HCR_PTW forbids certain page-table setups + * HCR_DC Disables stage1 and enables stage2 translation + */ + if ((raw_read(env, ri) ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) { + tlb_flush(CPU(cpu), 1); + } + raw_write(env, ri, value); +} + +static const ARMCPRegInfo el2_cp_reginfo[] = { + { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, + .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), + .writefn = hcr_write }, + { .name = "DACR32_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 0, .opc2 = 0, + .access = PL2_RW, .resetvalue = 0, + .writefn = dacr_write, .raw_writefn = raw_write, + .fieldoffset = offsetof(CPUARMState, cp15.dacr32_el2) }, + { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_ALIAS, + .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, + .access = PL2_RW, + .fieldoffset = offsetof(CPUARMState, elr_el[2]) }, + { .name = "ESR_EL2", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_ALIAS, + .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0, + .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[2]) }, + { .name = "IFSR32_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 0, .opc2 = 1, + .access = PL2_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.ifsr32_el2) }, + { .name = "FAR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0, + .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[2]) }, + { .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_ALIAS, + .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0, + .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[6]) }, + { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0, + .access = PL2_RW, .writefn = vbar_write, + .fieldoffset = offsetof(CPUARMState, cp15.vbar_el[2]), + .resetvalue = 0 }, + { .name = "SP_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 1, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, sp_el[2]) }, + { .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2, + .access = PL2_RW, .accessfn = cptr_access, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.cptr_el[2]) }, + { .name = "MAIR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 0, + .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[2]), + .resetvalue = 0 }, + { .name = "HMAIR1", .state = ARM_CP_STATE_AA32, + .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1, + .access = PL2_RW, .type = ARM_CP_ALIAS, + .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el[2]) }, + { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2, + .access = PL2_RW, .writefn = vmsa_tcr_el1_write, + .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write, + .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) }, + { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0, + .access = PL2_RW, .raw_writefn = raw_write, .writefn = sctlr_write, + .fieldoffset = offsetof(CPUARMState, cp15.sctlr_el[2]) }, + { .name = "TPIDR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 2, + .access = PL2_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[2]) }, + { .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0, + .access = PL2_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) }, + { .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2, + .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, + .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) }, + { .name = "TLBI_ALLE2", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiall_write }, + { .name = "TLBI_VAE2", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbi_aa64_vaa_write }, + { .name = "TLBI_VAE2IS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbi_aa64_vaa_write }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo el3_cp_reginfo[] = { + { .name = "SCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 0, + .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.scr_el3), + .resetvalue = 0, .writefn = scr_write }, + { .name = "SCR", .type = ARM_CP_ALIAS, + .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 0, + .access = PL3_RW, .fieldoffset = offsetoflow32(CPUARMState, cp15.scr_el3), + .writefn = scr_write }, + { .name = "SDER32_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 1, + .access = PL3_RW, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.sder) }, + { .name = "SDER", + .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 1, + .access = PL3_RW, .resetvalue = 0, + .fieldoffset = offsetoflow32(CPUARMState, cp15.sder) }, + /* TODO: Implement NSACR trapping of secure EL1 accesses to EL3 */ + { .name = "NSACR", .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 2, + .access = PL3_W | PL1_R, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.nsacr) }, + { .name = "MVBAR", .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, + .access = PL3_RW, .writefn = vbar_write, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.mvbar) }, + { .name = "SCTLR_EL3", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_ALIAS, /* reset handled by AArch32 view */ + .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 0, .opc2 = 0, + .access = PL3_RW, .raw_writefn = raw_write, .writefn = sctlr_write, + .fieldoffset = offsetof(CPUARMState, cp15.sctlr_el[3]) }, + { .name = "TTBR0_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 0, + .access = PL3_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[3]) }, + { .name = "TCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2, + .access = PL3_RW, .writefn = vmsa_tcr_el1_write, + .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write, + .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[3]) }, + { .name = "ELR_EL3", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_ALIAS, + .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 1, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, elr_el[3]) }, + { .name = "ESR_EL3", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_ALIAS, + .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 2, .opc2 = 0, + .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[3]) }, + { .name = "FAR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 6, .crm = 0, .opc2 = 0, + .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[3]) }, + { .name = "SPSR_EL3", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_ALIAS, + .opc0 = 3, .opc1 = 6, .crn = 4, .crm = 0, .opc2 = 0, + .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[7]) }, + { .name = "VBAR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 0, + .access = PL3_RW, .writefn = vbar_write, + .fieldoffset = offsetof(CPUARMState, cp15.vbar_el[3]), + .resetvalue = 0 }, + { .name = "CPTR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 2, + .access = PL3_RW, .accessfn = cptr_access, .resetvalue = 0, + .fieldoffset = offsetof(CPUARMState, cp15.cptr_el[3]) }, + REGINFO_SENTINEL +}; + +static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* Only accessible in EL0 if SCTLR.UCT is set (and only in AArch64, + * but the AArch32 CTR has its own reginfo struct) + */ + if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UCT)) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo debug_cp_reginfo[] = { + /* DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped + * debug components. The AArch64 version of DBGDRAR is named MDRAR_EL1; + * unlike DBGDRAR it is never accessible from EL0. + * DBGDSAR is deprecated and must RAZ from v8 anyway, so it has no AArch64 + * accessor. + */ + { .name = "DBGDRAR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "MDRAR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DBGDSAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + /* Monitor debug system control register; the 32-bit alias is DBGDSCRext. */ + { .name = "MDSCR_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), + .resetvalue = 0 }, + /* MDCCSR_EL0, aka DBGDSCRint. This is a read-only mirror of MDSCR_EL1. + * We don't implement the configurable EL0 access. + */ + { .name = "MDCCSR_EL0", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0, + .type = ARM_CP_ALIAS, + .access = PL1_R, + .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), }, + /* We define a dummy WI OSLAR_EL1, because Linux writes to it. */ + { .name = "OSLAR_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4, + .access = PL1_W, .type = ARM_CP_NOP }, + /* Dummy OSDLR_EL1: 32-bit Linux will read this */ + { .name = "OSDLR_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 3, .opc2 = 4, + .access = PL1_RW, .type = ARM_CP_NOP }, + /* Dummy DBGVCR: Linux wants to clear this on startup, but we don't + * implement vector catch debug events yet. + */ + { .name = "DBGVCR", + .cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_NOP }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo debug_lpae_cp_reginfo[] = { + /* 64 bit access versions of the (dummy) debug registers */ + { .name = "DBGDRAR", .cp = 14, .crm = 1, .opc1 = 0, + .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 }, + { .name = "DBGDSAR", .cp = 14, .crm = 2, .opc1 = 0, + .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 }, + REGINFO_SENTINEL +}; + +void hw_watchpoint_update(ARMCPU *cpu, int n) +{ + CPUARMState *env = &cpu->env; + vaddr len = 0; + vaddr wvr = env->cp15.dbgwvr[n]; + uint64_t wcr = env->cp15.dbgwcr[n]; + int mask; + int flags = BP_CPU | BP_STOP_BEFORE_ACCESS; + + if (env->cpu_watchpoint[n]) { + cpu_watchpoint_remove_by_ref(CPU(cpu), env->cpu_watchpoint[n]); + env->cpu_watchpoint[n] = NULL; + } + + if (!extract64(wcr, 0, 1)) { + /* E bit clear : watchpoint disabled */ + return; + } + + switch (extract64(wcr, 3, 2)) { + case 0: + /* LSC 00 is reserved and must behave as if the wp is disabled */ + return; + case 1: + flags |= BP_MEM_READ; + break; + case 2: + flags |= BP_MEM_WRITE; + break; + case 3: + flags |= BP_MEM_ACCESS; + break; + } + + /* Attempts to use both MASK and BAS fields simultaneously are + * CONSTRAINED UNPREDICTABLE; we opt to ignore BAS in this case, + * thus generating a watchpoint for every byte in the masked region. + */ + mask = extract64(wcr, 24, 4); + if (mask == 1 || mask == 2) { + /* Reserved values of MASK; we must act as if the mask value was + * some non-reserved value, or as if the watchpoint were disabled. + * We choose the latter. + */ + return; + } else if (mask) { + /* Watchpoint covers an aligned area up to 2GB in size */ + len = 1ULL << mask; + /* If masked bits in WVR are not zero it's CONSTRAINED UNPREDICTABLE + * whether the watchpoint fires when the unmasked bits match; we opt + * to generate the exceptions. + */ + wvr &= ~(len - 1); + } else { + /* Watchpoint covers bytes defined by the byte address select bits */ + int bas = extract64(wcr, 5, 8); + int basstart; + + if (bas == 0) { + /* This must act as if the watchpoint is disabled */ + return; + } + + if (extract64(wvr, 2, 1)) { + /* Deprecated case of an only 4-aligned address. BAS[7:4] are + * ignored, and BAS[3:0] define which bytes to watch. + */ + bas &= 0xf; + } + /* The BAS bits are supposed to be programmed to indicate a contiguous + * range of bytes. Otherwise it is CONSTRAINED UNPREDICTABLE whether + * we fire for each byte in the word/doubleword addressed by the WVR. + * We choose to ignore any non-zero bits after the first range of 1s. + */ + basstart = ctz32(bas); + len = cto32(bas >> basstart); + wvr += basstart; + } + + cpu_watchpoint_insert(CPU(cpu), wvr, len, flags, + &env->cpu_watchpoint[n]); +} + +void hw_watchpoint_update_all(ARMCPU *cpu) +{ + int i; + CPUARMState *env = &cpu->env; + + /* Completely clear out existing QEMU watchpoints and our array, to + * avoid possible stale entries following migration load. + */ + cpu_watchpoint_remove_all(CPU(cpu), BP_CPU); + memset(env->cpu_watchpoint, 0, sizeof(env->cpu_watchpoint)); + + for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_watchpoint); i++) { + hw_watchpoint_update(cpu, i); + } +} + +static void dbgwvr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int i = ri->crm; + + /* Bits [63:49] are hardwired to the value of bit [48]; that is, the + * register reads and behaves as if values written are sign extended. + * Bits [1:0] are RES0. + */ + value = sextract64(value, 0, 49) & ~3ULL; + + raw_write(env, ri, value); + hw_watchpoint_update(cpu, i); +} + +static void dbgwcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int i = ri->crm; + + raw_write(env, ri, value); + hw_watchpoint_update(cpu, i); +} + +void hw_breakpoint_update(ARMCPU *cpu, int n) +{ + CPUARMState *env = &cpu->env; + uint64_t bvr = env->cp15.dbgbvr[n]; + uint64_t bcr = env->cp15.dbgbcr[n]; + vaddr addr; + int bt; + int flags = BP_CPU; + + if (env->cpu_breakpoint[n]) { + cpu_breakpoint_remove_by_ref(CPU(cpu), env->cpu_breakpoint[n]); + env->cpu_breakpoint[n] = NULL; + } + + if (!extract64(bcr, 0, 1)) { + /* E bit clear : watchpoint disabled */ + return; + } + + bt = extract64(bcr, 20, 4); + + switch (bt) { + case 4: /* unlinked address mismatch (reserved if AArch64) */ + case 5: /* linked address mismatch (reserved if AArch64) */ + qemu_log_mask(LOG_UNIMP, + "arm: address mismatch breakpoint types not implemented"); + return; + case 0: /* unlinked address match */ + case 1: /* linked address match */ + { + /* Bits [63:49] are hardwired to the value of bit [48]; that is, + * we behave as if the register was sign extended. Bits [1:0] are + * RES0. The BAS field is used to allow setting breakpoints on 16 + * bit wide instructions; it is CONSTRAINED UNPREDICTABLE whether + * a bp will fire if the addresses covered by the bp and the addresses + * covered by the insn overlap but the insn doesn't start at the + * start of the bp address range. We choose to require the insn and + * the bp to have the same address. The constraints on writing to + * BAS enforced in dbgbcr_write mean we have only four cases: + * 0b0000 => no breakpoint + * 0b0011 => breakpoint on addr + * 0b1100 => breakpoint on addr + 2 + * 0b1111 => breakpoint on addr + * See also figure D2-3 in the v8 ARM ARM (DDI0487A.c). + */ + int bas = extract64(bcr, 5, 4); + addr = sextract64(bvr, 0, 49) & ~3ULL; + if (bas == 0) { + return; + } + if (bas == 0xc) { + addr += 2; + } + break; + } + case 2: /* unlinked context ID match */ + case 8: /* unlinked VMID match (reserved if no EL2) */ + case 10: /* unlinked context ID and VMID match (reserved if no EL2) */ + qemu_log_mask(LOG_UNIMP, + "arm: unlinked context breakpoint types not implemented"); + return; + case 9: /* linked VMID match (reserved if no EL2) */ + case 11: /* linked context ID and VMID match (reserved if no EL2) */ + case 3: /* linked context ID match */ + default: + /* We must generate no events for Linked context matches (unless + * they are linked to by some other bp/wp, which is handled in + * updates for the linking bp/wp). We choose to also generate no events + * for reserved values. + */ + return; + } + + cpu_breakpoint_insert(CPU(cpu), addr, flags, &env->cpu_breakpoint[n]); +} + +void hw_breakpoint_update_all(ARMCPU *cpu) +{ + int i; + CPUARMState *env = &cpu->env; + + /* Completely clear out existing QEMU breakpoints and our array, to + * avoid possible stale entries following migration load. + */ + cpu_breakpoint_remove_all(CPU(cpu), BP_CPU); + memset(env->cpu_breakpoint, 0, sizeof(env->cpu_breakpoint)); + + for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_breakpoint); i++) { + hw_breakpoint_update(cpu, i); + } +} + +static void dbgbvr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int i = ri->crm; + + raw_write(env, ri, value); + hw_breakpoint_update(cpu, i); +} + +static void dbgbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int i = ri->crm; + + /* BAS[3] is a read-only copy of BAS[2], and BAS[1] a read-only + * copy of BAS[0]. + */ + value = deposit64(value, 6, 1, extract64(value, 5, 1)); + value = deposit64(value, 8, 1, extract64(value, 7, 1)); + + raw_write(env, ri, value); + hw_breakpoint_update(cpu, i); +} + +static void define_debug_regs(ARMCPU *cpu) +{ + /* Define v7 and v8 architectural debug registers. + * These are just dummy implementations for now. + */ + int i; + int wrps, brps, ctx_cmps; + ARMCPRegInfo dbgdidr = { + .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = cpu->dbgdidr, + }; + + /* Note that all these register fields hold "number of Xs minus 1". */ + brps = extract32(cpu->dbgdidr, 24, 4); + wrps = extract32(cpu->dbgdidr, 28, 4); + ctx_cmps = extract32(cpu->dbgdidr, 20, 4); + + assert(ctx_cmps <= brps); + + /* The DBGDIDR and ID_AA64DFR0_EL1 define various properties + * of the debug registers such as number of breakpoints; + * check that if they both exist then they agree. + */ + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + assert(extract32(cpu->id_aa64dfr0, 12, 4) == brps); + assert(extract32(cpu->id_aa64dfr0, 20, 4) == wrps); + assert(extract32(cpu->id_aa64dfr0, 28, 4) == ctx_cmps); + } + + define_one_arm_cp_reg(cpu, &dbgdidr); + define_arm_cp_regs(cpu, debug_cp_reginfo); + + if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) { + define_arm_cp_regs(cpu, debug_lpae_cp_reginfo); + } + + for (i = 0; i < brps + 1; i++) { + ARMCPRegInfo dbgregs[] = { + { .name = "DBGBVR", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]), + .writefn = dbgbvr_write, .raw_writefn = raw_write + }, + { .name = "DBGBCR", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]), + .writefn = dbgbcr_write, .raw_writefn = raw_write + }, + REGINFO_SENTINEL + }; + define_arm_cp_regs(cpu, dbgregs); + } + + for (i = 0; i < wrps + 1; i++) { + ARMCPRegInfo dbgregs[] = { + { .name = "DBGWVR", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]), + .writefn = dbgwvr_write, .raw_writefn = raw_write + }, + { .name = "DBGWCR", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7, + .access = PL1_RW, + .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]), + .writefn = dbgwcr_write, .raw_writefn = raw_write + }, + REGINFO_SENTINEL + }; + define_arm_cp_regs(cpu, dbgregs); + } +} + +void register_cp_regs_for_features(ARMCPU *cpu) +{ + /* Register all the coprocessor registers based on feature bits */ + CPUARMState *env = &cpu->env; + if (arm_feature(env, ARM_FEATURE_M)) { + /* M profile has no coprocessor registers */ + return; + } + + define_arm_cp_regs(cpu, cp_reginfo); + if (!arm_feature(env, ARM_FEATURE_V8)) { + /* Must go early as it is full of wildcards that may be + * overridden by later definitions. + */ + define_arm_cp_regs(cpu, not_v8_cp_reginfo); + } + + if (arm_feature(env, ARM_FEATURE_V6)) { + /* The ID registers all have impdef reset values */ + ARMCPRegInfo v6_idregs[] = { + { .name = "ID_PFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_pfr0 }, + { .name = "ID_PFR1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_pfr1 }, + { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_dfr0 }, + { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_afr0 }, + { .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_mmfr0 }, + { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_mmfr1 }, + { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_mmfr2 }, + { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_mmfr3 }, + { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_isar0 }, + { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_isar1 }, + { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_isar2 }, + { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_isar3 }, + { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_isar4 }, + { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_isar5 }, + /* 6..7 are as yet unallocated and must RAZ */ + { .name = "ID_ISAR6", .cp = 15, .crn = 0, .crm = 2, + .opc1 = 0, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = 0 }, + { .name = "ID_ISAR7", .cp = 15, .crn = 0, .crm = 2, + .opc1 = 0, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = 0 }, + REGINFO_SENTINEL + }; + define_arm_cp_regs(cpu, v6_idregs); + define_arm_cp_regs(cpu, v6_cp_reginfo); + } else { + define_arm_cp_regs(cpu, not_v6_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_V6K)) { + define_arm_cp_regs(cpu, v6k_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_V7MP) && + !arm_feature(env, ARM_FEATURE_MPU)) { + define_arm_cp_regs(cpu, v7mp_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_V7)) { + /* v7 performance monitor control register: same implementor + * field as main ID register, and we implement only the cycle + * count register. + */ +#ifndef CONFIG_USER_ONLY + ARMCPRegInfo pmcr = { + .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, + .access = PL0_RW, + .type = ARM_CP_IO | ARM_CP_ALIAS, + .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr), + .accessfn = pmreg_access, .writefn = pmcr_write, + .raw_writefn = raw_write, + }; + ARMCPRegInfo pmcr64 = { + .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0, + .access = PL0_RW, .accessfn = pmreg_access, + .type = ARM_CP_IO, + .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), + .resetvalue = cpu->midr & 0xff000000, + .writefn = pmcr_write, .raw_writefn = raw_write, + }; + define_one_arm_cp_reg(cpu, &pmcr); + define_one_arm_cp_reg(cpu, &pmcr64); +#endif + ARMCPRegInfo clidr = { + .name = "CLIDR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->clidr + }; + define_one_arm_cp_reg(cpu, &clidr); + define_arm_cp_regs(cpu, v7_cp_reginfo); + define_debug_regs(cpu); + } else { + define_arm_cp_regs(cpu, not_v7_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_V8)) { + /* AArch64 ID registers, which all have impdef reset values */ + ARMCPRegInfo v8_idregs[] = { + { .name = "ID_AA64PFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64pfr0 }, + { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64pfr1}, + { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + /* We mask out the PMUVer field, because we don't currently + * implement the PMU. Not advertising it prevents the guest + * from trying to use it and getting UNDEFs on registers we + * don't implement. + */ + .resetvalue = cpu->id_aa64dfr0 & ~0xf00 }, + { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64dfr1 }, + { .name = "ID_AA64AFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 4, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64afr0 }, + { .name = "ID_AA64AFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 5, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64afr1 }, + { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64isar0 }, + { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64isar1 }, + { .name = "ID_AA64MMFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64mmfr0 }, + { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->id_aa64mmfr1 }, + { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->mvfr0 }, + { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->mvfr1 }, + { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->mvfr2 }, + REGINFO_SENTINEL + }; + /* RVBAR_EL1 is only implemented if EL1 is the highest EL */ + if (!arm_feature(env, ARM_FEATURE_EL3) && + !arm_feature(env, ARM_FEATURE_EL2)) { + ARMCPRegInfo rvbar = { + .name = "RVBAR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, + .type = ARM_CP_CONST, .access = PL1_R, .resetvalue = cpu->rvbar + }; + define_one_arm_cp_reg(cpu, &rvbar); + } + define_arm_cp_regs(cpu, v8_idregs); + define_arm_cp_regs(cpu, v8_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_EL2)) { + define_arm_cp_regs(cpu, el2_cp_reginfo); + /* RVBAR_EL2 is only implemented if EL2 is the highest EL */ + if (!arm_feature(env, ARM_FEATURE_EL3)) { + ARMCPRegInfo rvbar = { + .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1, + .type = ARM_CP_CONST, .access = PL2_R, .resetvalue = cpu->rvbar + }; + define_one_arm_cp_reg(cpu, &rvbar); + } + } else { + /* If EL2 is missing but higher ELs are enabled, we need to + * register the no_el2 reginfos. + */ + if (arm_feature(env, ARM_FEATURE_EL3)) { + define_arm_cp_regs(cpu, el3_no_el2_cp_reginfo); + } + } + if (arm_feature(env, ARM_FEATURE_EL3)) { + define_arm_cp_regs(cpu, el3_cp_reginfo); + ARMCPRegInfo rvbar = { + .name = "RVBAR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 1, + .type = ARM_CP_CONST, .access = PL3_R, .resetvalue = cpu->rvbar + }; + define_one_arm_cp_reg(cpu, &rvbar); + } + if (arm_feature(env, ARM_FEATURE_MPU)) { + if (arm_feature(env, ARM_FEATURE_V6)) { + /* PMSAv6 not implemented */ + assert(arm_feature(env, ARM_FEATURE_V7)); + define_arm_cp_regs(cpu, vmsa_pmsa_cp_reginfo); + define_arm_cp_regs(cpu, pmsav7_cp_reginfo); + } else { + define_arm_cp_regs(cpu, pmsav5_cp_reginfo); + } + } else { + define_arm_cp_regs(cpu, vmsa_pmsa_cp_reginfo); + define_arm_cp_regs(cpu, vmsa_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_THUMB2EE)) { + define_arm_cp_regs(cpu, t2ee_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { + define_arm_cp_regs(cpu, generic_timer_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_VAPA)) { + define_arm_cp_regs(cpu, vapa_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_CACHE_TEST_CLEAN)) { + define_arm_cp_regs(cpu, cache_test_clean_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_CACHE_DIRTY_REG)) { + define_arm_cp_regs(cpu, cache_dirty_status_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_CACHE_BLOCK_OPS)) { + define_arm_cp_regs(cpu, cache_block_ops_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_OMAPCP)) { + define_arm_cp_regs(cpu, omap_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_STRONGARM)) { + define_arm_cp_regs(cpu, strongarm_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_XSCALE)) { + define_arm_cp_regs(cpu, xscale_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_DUMMY_C15_REGS)) { + define_arm_cp_regs(cpu, dummy_c15_cp_reginfo); + } + if (arm_feature(env, ARM_FEATURE_LPAE)) { + define_arm_cp_regs(cpu, lpae_cp_reginfo); + } + /* Slightly awkwardly, the OMAP and StrongARM cores need all of + * cp15 crn=0 to be writes-ignored, whereas for other cores they should + * be read-only (ie write causes UNDEF exception). + */ + { + ARMCPRegInfo id_pre_v8_midr_cp_reginfo[] = { + /* Pre-v8 MIDR space. + * Note that the MIDR isn't a simple constant register because + * of the TI925 behaviour where writes to another register can + * cause the MIDR value to change. + * + * Unimplemented registers in the c15 0 0 0 space default to + * MIDR. Define MIDR first as this entire space, then CTR, TCMTR + * and friends override accordingly. + */ + { .name = "MIDR", + .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_R, .resetvalue = cpu->midr, + .writefn = arm_cp_write_ignore, .raw_writefn = raw_write, + .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid), + .type = ARM_CP_OVERRIDE }, + /* crn = 0 op1 = 0 crm = 3..7 : currently unassigned; we RAZ. */ + { .name = "DUMMY", + .cp = 15, .crn = 0, .crm = 3, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DUMMY", + .cp = 15, .crn = 0, .crm = 4, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DUMMY", + .cp = 15, .crn = 0, .crm = 5, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DUMMY", + .cp = 15, .crn = 0, .crm = 6, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DUMMY", + .cp = 15, .crn = 0, .crm = 7, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + REGINFO_SENTINEL + }; + ARMCPRegInfo id_v8_midr_cp_reginfo[] = { + { .name = "MIDR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->midr }, + /* crn = 0 op1 = 0 crm = 0 op2 = 4,7 : AArch32 aliases of MIDR */ + { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST, + .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4, + .access = PL1_R, .resetvalue = cpu->midr }, + { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST, + .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 7, + .access = PL1_R, .resetvalue = cpu->midr }, + { .name = "REVIDR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 6, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->revidr }, + REGINFO_SENTINEL + }; + ARMCPRegInfo id_cp_reginfo[] = { + /* These are common to v8 and pre-v8 */ + { .name = "CTR", + .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr }, + { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0, + .access = PL0_R, .accessfn = ctr_el0_access, + .type = ARM_CP_CONST, .resetvalue = cpu->ctr }, + /* TCMTR and TLBTR exist in v8 but have no 64-bit versions */ + { .name = "TCMTR", + .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + REGINFO_SENTINEL + }; + /* TLBTR is specific to VMSA */ + ARMCPRegInfo id_tlbtr_reginfo = { + .name = "TLBTR", + .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0, + }; + /* MPUIR is specific to PMSA V6+ */ + ARMCPRegInfo id_mpuir_reginfo = { + .name = "MPUIR", + .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->pmsav7_dregion << 8 + }; + ARMCPRegInfo crn0_wi_reginfo = { + .name = "CRN0_WI", .cp = 15, .crn = 0, .crm = CP_ANY, + .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W, + .type = ARM_CP_NOP | ARM_CP_OVERRIDE + }; + if (arm_feature(env, ARM_FEATURE_OMAPCP) || + arm_feature(env, ARM_FEATURE_STRONGARM)) { + ARMCPRegInfo *r; + /* Register the blanket "writes ignored" value first to cover the + * whole space. Then update the specific ID registers to allow write + * access, so that they ignore writes rather than causing them to + * UNDEF. + */ + define_one_arm_cp_reg(cpu, &crn0_wi_reginfo); + for (r = id_pre_v8_midr_cp_reginfo; + r->type != ARM_CP_SENTINEL; r++) { + r->access = PL1_RW; + } + for (r = id_cp_reginfo; r->type != ARM_CP_SENTINEL; r++) { + r->access = PL1_RW; + } + id_tlbtr_reginfo.access = PL1_RW; + id_tlbtr_reginfo.access = PL1_RW; + } + if (arm_feature(env, ARM_FEATURE_V8)) { + define_arm_cp_regs(cpu, id_v8_midr_cp_reginfo); + } else { + define_arm_cp_regs(cpu, id_pre_v8_midr_cp_reginfo); + } + define_arm_cp_regs(cpu, id_cp_reginfo); + if (!arm_feature(env, ARM_FEATURE_MPU)) { + define_one_arm_cp_reg(cpu, &id_tlbtr_reginfo); + } else if (arm_feature(env, ARM_FEATURE_V7)) { + define_one_arm_cp_reg(cpu, &id_mpuir_reginfo); + } + } + + if (arm_feature(env, ARM_FEATURE_MPIDR)) { + define_arm_cp_regs(cpu, mpidr_cp_reginfo); + } + + if (arm_feature(env, ARM_FEATURE_AUXCR)) { + ARMCPRegInfo auxcr = { + .name = "ACTLR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, + .resetvalue = cpu->reset_auxcr + }; + define_one_arm_cp_reg(cpu, &auxcr); + } + + if (arm_feature(env, ARM_FEATURE_CBAR)) { + if (arm_feature(env, ARM_FEATURE_AARCH64)) { + /* 32 bit view is [31:18] 0...0 [43:32]. */ + uint32_t cbar32 = (extract64(cpu->reset_cbar, 18, 14) << 18) + | extract64(cpu->reset_cbar, 32, 12); + ARMCPRegInfo cbar_reginfo[] = { + { .name = "CBAR", + .type = ARM_CP_CONST, + .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0, + .access = PL1_R, .resetvalue = cpu->reset_cbar }, + { .name = "CBAR_EL1", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_CONST, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 0, + .access = PL1_R, .resetvalue = cbar32 }, + REGINFO_SENTINEL + }; + /* We don't implement a r/w 64 bit CBAR currently */ + assert(arm_feature(env, ARM_FEATURE_CBAR_RO)); + define_arm_cp_regs(cpu, cbar_reginfo); + } else { + ARMCPRegInfo cbar = { + .name = "CBAR", + .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0, + .access = PL1_R|PL3_W, .resetvalue = cpu->reset_cbar, + .fieldoffset = offsetof(CPUARMState, + cp15.c15_config_base_address) + }; + if (arm_feature(env, ARM_FEATURE_CBAR_RO)) { + cbar.access = PL1_R; + cbar.fieldoffset = 0; + cbar.type = ARM_CP_CONST; + } + define_one_arm_cp_reg(cpu, &cbar); + } + } + + /* Generic registers whose values depend on the implementation */ + { + ARMCPRegInfo sctlr = { + .name = "SCTLR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0, + .access = PL1_RW, + .bank_fieldoffsets = { offsetof(CPUARMState, cp15.sctlr_s), + offsetof(CPUARMState, cp15.sctlr_ns) }, + .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr, + .raw_writefn = raw_write, + }; + if (arm_feature(env, ARM_FEATURE_XSCALE)) { + /* Normally we would always end the TB on an SCTLR write, but Linux + * arch/arm/mach-pxa/sleep.S expects two instructions following + * an MMU enable to execute from cache. Imitate this behaviour. + */ + sctlr.type |= ARM_CP_SUPPRESS_TB_END; + } + define_one_arm_cp_reg(cpu, &sctlr); + } +} + +ARMCPU *cpu_arm_init(const char *cpu_model) +{ + return ARM_CPU(cpu_generic_init(TYPE_ARM_CPU, cpu_model)); +} + +void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) +{ + CPUState *cs = CPU(cpu); + CPUARMState *env = &cpu->env; + + if (arm_feature(env, ARM_FEATURE_AARCH64)) { + gdb_register_coprocessor(cs, aarch64_fpu_gdb_get_reg, + aarch64_fpu_gdb_set_reg, + 34, "aarch64-fpu.xml", 0); + } else if (arm_feature(env, ARM_FEATURE_NEON)) { + gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, + 51, "arm-neon.xml", 0); + } else if (arm_feature(env, ARM_FEATURE_VFP3)) { + gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, + 35, "arm-vfp3.xml", 0); + } else if (arm_feature(env, ARM_FEATURE_VFP)) { + gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, + 19, "arm-vfp.xml", 0); + } +} + +/* Sort alphabetically by type name, except for "any". */ +static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b) +{ + ObjectClass *class_a = (ObjectClass *)a; + ObjectClass *class_b = (ObjectClass *)b; + const char *name_a, *name_b; + + name_a = object_class_get_name(class_a); + name_b = object_class_get_name(class_b); + if (strcmp(name_a, "any-" TYPE_ARM_CPU) == 0) { + return 1; + } else if (strcmp(name_b, "any-" TYPE_ARM_CPU) == 0) { + return -1; + } else { + return strcmp(name_a, name_b); + } +} + +static void arm_cpu_list_entry(gpointer data, gpointer user_data) +{ + ObjectClass *oc = data; + CPUListState *s = user_data; + const char *typename; + char *name; + + typename = object_class_get_name(oc); + name = g_strndup(typename, strlen(typename) - strlen("-" TYPE_ARM_CPU)); + (*s->cpu_fprintf)(s->file, " %s\n", + name); + g_free(name); +} + +void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf) +{ + CPUListState s = { + .file = f, + .cpu_fprintf = cpu_fprintf, + }; + GSList *list; + + list = object_class_get_list(TYPE_ARM_CPU, false); + list = g_slist_sort(list, arm_cpu_list_compare); + (*cpu_fprintf)(f, "Available CPUs:\n"); + g_slist_foreach(list, arm_cpu_list_entry, &s); + g_slist_free(list); +#ifdef CONFIG_KVM + /* The 'host' CPU type is dynamically registered only if KVM is + * enabled, so we have to special-case it here: + */ + (*cpu_fprintf)(f, " host (only available in KVM mode)\n"); +#endif +} + +static void arm_cpu_add_definition(gpointer data, gpointer user_data) +{ + ObjectClass *oc = data; + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfoList *entry; + CpuDefinitionInfo *info; + const char *typename; + + typename = object_class_get_name(oc); + info = g_malloc0(sizeof(*info)); + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + + entry = g_malloc0(sizeof(*entry)); + entry->value = info; + entry->next = *cpu_list; + *cpu_list = entry; +} + +CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp) +{ + CpuDefinitionInfoList *cpu_list = NULL; + GSList *list; + + list = object_class_get_list(TYPE_ARM_CPU, false); + g_slist_foreach(list, arm_cpu_add_definition, &cpu_list); + g_slist_free(list); + + return cpu_list; +} + +static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, + void *opaque, int state, int secstate, + int crm, int opc1, int opc2) +{ + /* Private utility function for define_one_arm_cp_reg_with_opaque(): + * add a single reginfo struct to the hash table. + */ + uint32_t *key = g_new(uint32_t, 1); + ARMCPRegInfo *r2 = g_memdup(r, sizeof(ARMCPRegInfo)); + int is64 = (r->type & ARM_CP_64BIT) ? 1 : 0; + int ns = (secstate & ARM_CP_SECSTATE_NS) ? 1 : 0; + + /* Reset the secure state to the specific incoming state. This is + * necessary as the register may have been defined with both states. + */ + r2->secure = secstate; + + if (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]) { + /* Register is banked (using both entries in array). + * Overwriting fieldoffset as the array is only used to define + * banked registers but later only fieldoffset is used. + */ + r2->fieldoffset = r->bank_fieldoffsets[ns]; + } + + if (state == ARM_CP_STATE_AA32) { + if (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]) { + /* If the register is banked then we don't need to migrate or + * reset the 32-bit instance in certain cases: + * + * 1) If the register has both 32-bit and 64-bit instances then we + * can count on the 64-bit instance taking care of the + * non-secure bank. + * 2) If ARMv8 is enabled then we can count on a 64-bit version + * taking care of the secure bank. This requires that separate + * 32 and 64-bit definitions are provided. + */ + if ((r->state == ARM_CP_STATE_BOTH && ns) || + (arm_feature(&cpu->env, ARM_FEATURE_V8) && !ns)) { + r2->type |= ARM_CP_ALIAS; + } + } else if ((secstate != r->secure) && !ns) { + /* The register is not banked so we only want to allow migration of + * the non-secure instance. + */ + r2->type |= ARM_CP_ALIAS; + } + + if (r->state == ARM_CP_STATE_BOTH) { + /* We assume it is a cp15 register if the .cp field is left unset. + */ + if (r2->cp == 0) { + r2->cp = 15; + } + +#ifdef HOST_WORDS_BIGENDIAN + if (r2->fieldoffset) { + r2->fieldoffset += sizeof(uint32_t); + } +#endif + } + } + if (state == ARM_CP_STATE_AA64) { + /* To allow abbreviation of ARMCPRegInfo + * definitions, we treat cp == 0 as equivalent to + * the value for "standard guest-visible sysreg". + * STATE_BOTH definitions are also always "standard + * sysreg" in their AArch64 view (the .cp value may + * be non-zero for the benefit of the AArch32 view). + */ + if (r->cp == 0 || r->state == ARM_CP_STATE_BOTH) { + r2->cp = CP_REG_ARM64_SYSREG_CP; + } + *key = ENCODE_AA64_CP_REG(r2->cp, r2->crn, crm, + r2->opc0, opc1, opc2); + } else { + *key = ENCODE_CP_REG(r2->cp, is64, ns, r2->crn, crm, opc1, opc2); + } + if (opaque) { + r2->opaque = opaque; + } + /* reginfo passed to helpers is correct for the actual access, + * and is never ARM_CP_STATE_BOTH: + */ + r2->state = state; + /* Make sure reginfo passed to helpers for wildcarded regs + * has the correct crm/opc1/opc2 for this reg, not CP_ANY: + */ + r2->crm = crm; + r2->opc1 = opc1; + r2->opc2 = opc2; + /* By convention, for wildcarded registers only the first + * entry is used for migration; the others are marked as + * ALIAS so we don't try to transfer the register + * multiple times. Special registers (ie NOP/WFI) are + * never migratable and not even raw-accessible. + */ + if ((r->type & ARM_CP_SPECIAL)) { + r2->type |= ARM_CP_NO_RAW; + } + if (((r->crm == CP_ANY) && crm != 0) || + ((r->opc1 == CP_ANY) && opc1 != 0) || + ((r->opc2 == CP_ANY) && opc2 != 0)) { + r2->type |= ARM_CP_ALIAS; + } + + /* Check that raw accesses are either forbidden or handled. Note that + * we can't assert this earlier because the setup of fieldoffset for + * banked registers has to be done first. + */ + if (!(r2->type & ARM_CP_NO_RAW)) { + assert(!raw_accessors_invalid(r2)); + } + + /* Overriding of an existing definition must be explicitly + * requested. + */ + if (!(r->type & ARM_CP_OVERRIDE)) { + ARMCPRegInfo *oldreg; + oldreg = g_hash_table_lookup(cpu->cp_regs, key); + if (oldreg && !(oldreg->type & ARM_CP_OVERRIDE)) { + fprintf(stderr, "Register redefined: cp=%d %d bit " + "crn=%d crm=%d opc1=%d opc2=%d, " + "was %s, now %s\n", r2->cp, 32 + 32 * is64, + r2->crn, r2->crm, r2->opc1, r2->opc2, + oldreg->name, r2->name); + g_assert_not_reached(); + } + } + g_hash_table_insert(cpu->cp_regs, key, r2); +} + + +void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, + const ARMCPRegInfo *r, void *opaque) +{ + /* Define implementations of coprocessor registers. + * We store these in a hashtable because typically + * there are less than 150 registers in a space which + * is 16*16*16*8*8 = 262144 in size. + * Wildcarding is supported for the crm, opc1 and opc2 fields. + * If a register is defined twice then the second definition is + * used, so this can be used to define some generic registers and + * then override them with implementation specific variations. + * At least one of the original and the second definition should + * include ARM_CP_OVERRIDE in its type bits -- this is just a guard + * against accidental use. + * + * The state field defines whether the register is to be + * visible in the AArch32 or AArch64 execution state. If the + * state is set to ARM_CP_STATE_BOTH then we synthesise a + * reginfo structure for the AArch32 view, which sees the lower + * 32 bits of the 64 bit register. + * + * Only registers visible in AArch64 may set r->opc0; opc0 cannot + * be wildcarded. AArch64 registers are always considered to be 64 + * bits; the ARM_CP_64BIT* flag applies only to the AArch32 view of + * the register, if any. + */ + int crm, opc1, opc2, state; + int crmmin = (r->crm == CP_ANY) ? 0 : r->crm; + int crmmax = (r->crm == CP_ANY) ? 15 : r->crm; + int opc1min = (r->opc1 == CP_ANY) ? 0 : r->opc1; + int opc1max = (r->opc1 == CP_ANY) ? 7 : r->opc1; + int opc2min = (r->opc2 == CP_ANY) ? 0 : r->opc2; + int opc2max = (r->opc2 == CP_ANY) ? 7 : r->opc2; + /* 64 bit registers have only CRm and Opc1 fields */ + assert(!((r->type & ARM_CP_64BIT) && (r->opc2 || r->crn))); + /* op0 only exists in the AArch64 encodings */ + assert((r->state != ARM_CP_STATE_AA32) || (r->opc0 == 0)); + /* AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless */ + assert((r->state != ARM_CP_STATE_AA64) || !(r->type & ARM_CP_64BIT)); + /* The AArch64 pseudocode CheckSystemAccess() specifies that op1 + * encodes a minimum access level for the register. We roll this + * runtime check into our general permission check code, so check + * here that the reginfo's specified permissions are strict enough + * to encompass the generic architectural permission check. + */ + if (r->state != ARM_CP_STATE_AA32) { + int mask = 0; + switch (r->opc1) { + case 0: case 1: case 2: + /* min_EL EL1 */ + mask = PL1_RW; + break; + case 3: + /* min_EL EL0 */ + mask = PL0_RW; + break; + case 4: + /* min_EL EL2 */ + mask = PL2_RW; + break; + case 5: + /* unallocated encoding, so not possible */ + assert(false); + break; + case 6: + /* min_EL EL3 */ + mask = PL3_RW; + break; + case 7: + /* min_EL EL1, secure mode only (we don't check the latter) */ + mask = PL1_RW; + break; + default: + /* broken reginfo with out-of-range opc1 */ + assert(false); + break; + } + /* assert our permissions are not too lax (stricter is fine) */ + assert((r->access & ~mask) == 0); + } + + /* Check that the register definition has enough info to handle + * reads and writes if they are permitted. + */ + if (!(r->type & (ARM_CP_SPECIAL|ARM_CP_CONST))) { + if (r->access & PL3_R) { + assert((r->fieldoffset || + (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1])) || + r->readfn); + } + if (r->access & PL3_W) { + assert((r->fieldoffset || + (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1])) || + r->writefn); + } + } + /* Bad type field probably means missing sentinel at end of reg list */ + assert(cptype_valid(r->type)); + for (crm = crmmin; crm <= crmmax; crm++) { + for (opc1 = opc1min; opc1 <= opc1max; opc1++) { + for (opc2 = opc2min; opc2 <= opc2max; opc2++) { + for (state = ARM_CP_STATE_AA32; + state <= ARM_CP_STATE_AA64; state++) { + if (r->state != state && r->state != ARM_CP_STATE_BOTH) { + continue; + } + if (state == ARM_CP_STATE_AA32) { + /* Under AArch32 CP registers can be common + * (same for secure and non-secure world) or banked. + */ + switch (r->secure) { + case ARM_CP_SECSTATE_S: + case ARM_CP_SECSTATE_NS: + add_cpreg_to_hashtable(cpu, r, opaque, state, + r->secure, crm, opc1, opc2); + break; + default: + add_cpreg_to_hashtable(cpu, r, opaque, state, + ARM_CP_SECSTATE_S, + crm, opc1, opc2); + add_cpreg_to_hashtable(cpu, r, opaque, state, + ARM_CP_SECSTATE_NS, + crm, opc1, opc2); + break; + } + } else { + /* AArch64 registers get mapped to non-secure instance + * of AArch32 */ + add_cpreg_to_hashtable(cpu, r, opaque, state, + ARM_CP_SECSTATE_NS, + crm, opc1, opc2); + } + } + } + } + } +} + +void define_arm_cp_regs_with_opaque(ARMCPU *cpu, + const ARMCPRegInfo *regs, void *opaque) +{ + /* Define a whole list of registers */ + const ARMCPRegInfo *r; + for (r = regs; r->type != ARM_CP_SENTINEL; r++) { + define_one_arm_cp_reg_with_opaque(cpu, r, opaque); + } +} + +const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp) +{ + return g_hash_table_lookup(cpregs, &encoded_cp); +} + +void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Helper coprocessor write function for write-ignore registers */ +} + +uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* Helper coprocessor write function for read-as-zero registers */ + return 0; +} + +void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque) +{ + /* Helper coprocessor reset function for do-nothing-on-reset registers */ +} + +static int bad_mode_switch(CPUARMState *env, int mode) +{ + /* Return true if it is not valid for us to switch to + * this CPU mode (ie all the UNPREDICTABLE cases in + * the ARM ARM CPSRWriteByInstr pseudocode). + */ + switch (mode) { + case ARM_CPU_MODE_USR: + case ARM_CPU_MODE_SYS: + case ARM_CPU_MODE_SVC: + case ARM_CPU_MODE_ABT: + case ARM_CPU_MODE_UND: + case ARM_CPU_MODE_IRQ: + case ARM_CPU_MODE_FIQ: + return 0; + case ARM_CPU_MODE_MON: + return !arm_is_secure(env); + default: + return 1; + } +} + +uint32_t cpsr_read(CPUARMState *env) +{ + int ZF; + ZF = (env->ZF == 0); + return env->uncached_cpsr | (env->NF & 0x80000000) | (ZF << 30) | + (env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27) + | (env->thumb << 5) | ((env->condexec_bits & 3) << 25) + | ((env->condexec_bits & 0xfc) << 8) + | (env->GE << 16) | (env->daif & CPSR_AIF); +} + +void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) +{ + uint32_t changed_daif; + + if (mask & CPSR_NZCV) { + env->ZF = (~val) & CPSR_Z; + env->NF = val; + env->CF = (val >> 29) & 1; + env->VF = (val << 3) & 0x80000000; + } + if (mask & CPSR_Q) + env->QF = ((val & CPSR_Q) != 0); + if (mask & CPSR_T) + env->thumb = ((val & CPSR_T) != 0); + if (mask & CPSR_IT_0_1) { + env->condexec_bits &= ~3; + env->condexec_bits |= (val >> 25) & 3; + } + if (mask & CPSR_IT_2_7) { + env->condexec_bits &= 3; + env->condexec_bits |= (val >> 8) & 0xfc; + } + if (mask & CPSR_GE) { + env->GE = (val >> 16) & 0xf; + } + + /* In a V7 implementation that includes the security extensions but does + * not include Virtualization Extensions the SCR.FW and SCR.AW bits control + * whether non-secure software is allowed to change the CPSR_F and CPSR_A + * bits respectively. + * + * In a V8 implementation, it is permitted for privileged software to + * change the CPSR A/F bits regardless of the SCR.AW/FW bits. + */ + if (!arm_feature(env, ARM_FEATURE_V8) && + arm_feature(env, ARM_FEATURE_EL3) && + !arm_feature(env, ARM_FEATURE_EL2) && + !arm_is_secure(env)) { + + changed_daif = (env->daif ^ val) & mask; + + if (changed_daif & CPSR_A) { + /* Check to see if we are allowed to change the masking of async + * abort exceptions from a non-secure state. + */ + if (!(env->cp15.scr_el3 & SCR_AW)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Ignoring attempt to switch CPSR_A flag from " + "non-secure world with SCR.AW bit clear\n"); + mask &= ~CPSR_A; + } + } + + if (changed_daif & CPSR_F) { + /* Check to see if we are allowed to change the masking of FIQ + * exceptions from a non-secure state. + */ + if (!(env->cp15.scr_el3 & SCR_FW)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Ignoring attempt to switch CPSR_F flag from " + "non-secure world with SCR.FW bit clear\n"); + mask &= ~CPSR_F; + } + + /* Check whether non-maskable FIQ (NMFI) support is enabled. + * If this bit is set software is not allowed to mask + * FIQs, but is allowed to set CPSR_F to 0. + */ + if ((A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_NMFI) && + (val & CPSR_F)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Ignoring attempt to enable CPSR_F flag " + "(non-maskable FIQ [NMFI] support enabled)\n"); + mask &= ~CPSR_F; + } + } + } + + env->daif &= ~(CPSR_AIF & mask); + env->daif |= val & CPSR_AIF & mask; + + if ((env->uncached_cpsr ^ val) & mask & CPSR_M) { + if (bad_mode_switch(env, val & CPSR_M)) { + /* Attempt to switch to an invalid mode: this is UNPREDICTABLE. + * We choose to ignore the attempt and leave the CPSR M field + * untouched. + */ + mask &= ~CPSR_M; + } else { + switch_mode(env, val & CPSR_M); + } + } + mask &= ~CACHED_CPSR_BITS; + env->uncached_cpsr = (env->uncached_cpsr & ~mask) | (val & mask); +} + +/* Sign/zero extend */ +uint32_t HELPER(sxtb16)(uint32_t x) +{ + uint32_t res; + res = (uint16_t)(int8_t)x; + res |= (uint32_t)(int8_t)(x >> 16) << 16; + return res; +} + +uint32_t HELPER(uxtb16)(uint32_t x) +{ + uint32_t res; + res = (uint16_t)(uint8_t)x; + res |= (uint32_t)(uint8_t)(x >> 16) << 16; + return res; +} + +uint32_t HELPER(clz)(uint32_t x) +{ + return clz32(x); +} + +int32_t HELPER(sdiv)(int32_t num, int32_t den) +{ + if (den == 0) + return 0; + if (num == INT_MIN && den == -1) + return INT_MIN; + return num / den; +} + +uint32_t HELPER(udiv)(uint32_t num, uint32_t den) +{ + if (den == 0) + return 0; + return num / den; +} + +uint32_t HELPER(rbit)(uint32_t x) +{ + x = ((x & 0xff000000) >> 24) + | ((x & 0x00ff0000) >> 8) + | ((x & 0x0000ff00) << 8) + | ((x & 0x000000ff) << 24); + x = ((x & 0xf0f0f0f0) >> 4) + | ((x & 0x0f0f0f0f) << 4); + x = ((x & 0x88888888) >> 3) + | ((x & 0x44444444) >> 1) + | ((x & 0x22222222) << 1) + | ((x & 0x11111111) << 3); + return x; +} + +#if defined(CONFIG_USER_ONLY) + +/* These should probably raise undefined insn exceptions. */ +void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + cpu_abort(CPU(cpu), "v7m_msr %d\n", reg); +} + +uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + cpu_abort(CPU(cpu), "v7m_mrs %d\n", reg); + return 0; +} + +void switch_mode(CPUARMState *env, int mode) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + if (mode != ARM_CPU_MODE_USR) { + cpu_abort(CPU(cpu), "Tried to switch out of user mode\n"); + } +} + +void HELPER(set_r13_banked)(CPUARMState *env, uint32_t mode, uint32_t val) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + cpu_abort(CPU(cpu), "banked r13 write\n"); +} + +uint32_t HELPER(get_r13_banked)(CPUARMState *env, uint32_t mode) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + cpu_abort(CPU(cpu), "banked r13 read\n"); + return 0; +} + +uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, + uint32_t cur_el, bool secure) +{ + return 1; +} + +void aarch64_sync_64_to_32(CPUARMState *env) +{ + g_assert_not_reached(); +} + +#else + +/* Map CPU modes onto saved register banks. */ +int bank_number(int mode) +{ + switch (mode) { + case ARM_CPU_MODE_USR: + case ARM_CPU_MODE_SYS: + return 0; + case ARM_CPU_MODE_SVC: + return 1; + case ARM_CPU_MODE_ABT: + return 2; + case ARM_CPU_MODE_UND: + return 3; + case ARM_CPU_MODE_IRQ: + return 4; + case ARM_CPU_MODE_FIQ: + return 5; + case ARM_CPU_MODE_HYP: + return 6; + case ARM_CPU_MODE_MON: + return 7; + } + hw_error("bank number requested for bad CPSR mode value 0x%x\n", mode); +} + +void switch_mode(CPUARMState *env, int mode) +{ + int old_mode; + int i; + + old_mode = env->uncached_cpsr & CPSR_M; + if (mode == old_mode) + return; + + if (old_mode == ARM_CPU_MODE_FIQ) { + memcpy (env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t)); + memcpy (env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t)); + } else if (mode == ARM_CPU_MODE_FIQ) { + memcpy (env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t)); + memcpy (env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t)); + } + + i = bank_number(old_mode); + env->banked_r13[i] = env->regs[13]; + env->banked_r14[i] = env->regs[14]; + env->banked_spsr[i] = env->spsr; + + i = bank_number(mode); + env->regs[13] = env->banked_r13[i]; + env->regs[14] = env->banked_r14[i]; + env->spsr = env->banked_spsr[i]; +} + +/* Physical Interrupt Target EL Lookup Table + * + * [ From ARM ARM section G1.13.4 (Table G1-15) ] + * + * The below multi-dimensional table is used for looking up the target + * exception level given numerous condition criteria. Specifically, the + * target EL is based on SCR and HCR routing controls as well as the + * currently executing EL and secure state. + * + * Dimensions: + * target_el_table[2][2][2][2][2][4] + * | | | | | +--- Current EL + * | | | | +------ Non-secure(0)/Secure(1) + * | | | +--------- HCR mask override + * | | +------------ SCR exec state control + * | +--------------- SCR mask override + * +------------------ 32-bit(0)/64-bit(1) EL3 + * + * The table values are as such: + * 0-3 = EL0-EL3 + * -1 = Cannot occur + * + * The ARM ARM target EL table includes entries indicating that an "exception + * is not taken". The two cases where this is applicable are: + * 1) An exception is taken from EL3 but the SCR does not have the exception + * routed to EL3. + * 2) An exception is taken from EL2 but the HCR does not have the exception + * routed to EL2. + * In these two cases, the below table contain a target of EL1. This value is + * returned as it is expected that the consumer of the table data will check + * for "target EL >= current EL" to ensure the exception is not taken. + * + * SCR HCR + * 64 EA AMO From + * BIT IRQ IMO Non-secure Secure + * EL3 FIQ RW FMO EL0 EL1 EL2 EL3 EL0 EL1 EL2 EL3 + */ +const int8_t target_el_table[2][2][2][2][2][4] = { + {{{{/* 0 0 0 0 */{ 1, 1, 2, -1 },{ 3, -1, -1, 3 },}, + {/* 0 0 0 1 */{ 2, 2, 2, -1 },{ 3, -1, -1, 3 },},}, + {{/* 0 0 1 0 */{ 1, 1, 2, -1 },{ 3, -1, -1, 3 },}, + {/* 0 0 1 1 */{ 2, 2, 2, -1 },{ 3, -1, -1, 3 },},},}, + {{{/* 0 1 0 0 */{ 3, 3, 3, -1 },{ 3, -1, -1, 3 },}, + {/* 0 1 0 1 */{ 3, 3, 3, -1 },{ 3, -1, -1, 3 },},}, + {{/* 0 1 1 0 */{ 3, 3, 3, -1 },{ 3, -1, -1, 3 },}, + {/* 0 1 1 1 */{ 3, 3, 3, -1 },{ 3, -1, -1, 3 },},},},}, + {{{{/* 1 0 0 0 */{ 1, 1, 2, -1 },{ 1, 1, -1, 1 },}, + {/* 1 0 0 1 */{ 2, 2, 2, -1 },{ 1, 1, -1, 1 },},}, + {{/* 1 0 1 0 */{ 1, 1, 1, -1 },{ 1, 1, -1, 1 },}, + {/* 1 0 1 1 */{ 2, 2, 2, -1 },{ 1, 1, -1, 1 },},},}, + {{{/* 1 1 0 0 */{ 3, 3, 3, -1 },{ 3, 3, -1, 3 },}, + {/* 1 1 0 1 */{ 3, 3, 3, -1 },{ 3, 3, -1, 3 },},}, + {{/* 1 1 1 0 */{ 3, 3, 3, -1 },{ 3, 3, -1, 3 },}, + {/* 1 1 1 1 */{ 3, 3, 3, -1 },{ 3, 3, -1, 3 },},},},}, +}; + +/* + * Determine the target EL for physical exceptions + */ +uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, + uint32_t cur_el, bool secure) +{ + CPUARMState *env = cs->env_ptr; + int rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW); + int scr; + int hcr; + int target_el; + int is64 = arm_el_is_aa64(env, 3); + + switch (excp_idx) { + case EXCP_IRQ: + scr = ((env->cp15.scr_el3 & SCR_IRQ) == SCR_IRQ); + hcr = ((env->cp15.hcr_el2 & HCR_IMO) == HCR_IMO); + break; + case EXCP_FIQ: + scr = ((env->cp15.scr_el3 & SCR_FIQ) == SCR_FIQ); + hcr = ((env->cp15.hcr_el2 & HCR_FMO) == HCR_FMO); + break; + default: + scr = ((env->cp15.scr_el3 & SCR_EA) == SCR_EA); + hcr = ((env->cp15.hcr_el2 & HCR_AMO) == HCR_AMO); + break; + }; + + /* If HCR.TGE is set then HCR is treated as being 1 */ + hcr |= ((env->cp15.hcr_el2 & HCR_TGE) == HCR_TGE); + + /* Perform a table-lookup for the target EL given the current state */ + target_el = target_el_table[is64][scr][rw][hcr][secure][cur_el]; + + assert(target_el > 0); + + return target_el; +} + +static void v7m_push(CPUARMState *env, uint32_t val) +{ + CPUState *cs = CPU(arm_env_get_cpu(env)); + + env->regs[13] -= 4; + stl_phys(cs->as, env->regs[13], val); +} + +static uint32_t v7m_pop(CPUARMState *env) +{ + CPUState *cs = CPU(arm_env_get_cpu(env)); + uint32_t val; + + val = ldl_phys(cs->as, env->regs[13]); + env->regs[13] += 4; + return val; +} + +/* Switch to V7M main or process stack pointer. */ +static void switch_v7m_sp(CPUARMState *env, int process) +{ + uint32_t tmp; + if (env->v7m.current_sp != process) { + tmp = env->v7m.other_sp; + env->v7m.other_sp = env->regs[13]; + env->regs[13] = tmp; + env->v7m.current_sp = process; + } +} + +static void do_v7m_exception_exit(CPUARMState *env) +{ + uint32_t type; + uint32_t xpsr; + + type = env->regs[15]; + if (env->v7m.exception != 0) + armv7m_nvic_complete_irq(env->nvic, env->v7m.exception); + + /* Switch to the target stack. */ + switch_v7m_sp(env, (type & 4) != 0); + /* Pop registers. */ + env->regs[0] = v7m_pop(env); + env->regs[1] = v7m_pop(env); + env->regs[2] = v7m_pop(env); + env->regs[3] = v7m_pop(env); + env->regs[12] = v7m_pop(env); + env->regs[14] = v7m_pop(env); + env->regs[15] = v7m_pop(env); + if (env->regs[15] & 1) { + qemu_log_mask(LOG_GUEST_ERROR, + "M profile return from interrupt with misaligned " + "PC is UNPREDICTABLE\n"); + /* Actual hardware seems to ignore the lsbit, and there are several + * RTOSes out there which incorrectly assume the r15 in the stack + * frame should be a Thumb-style "lsbit indicates ARM/Thumb" value. + */ + env->regs[15] &= ~1U; + } + xpsr = v7m_pop(env); + xpsr_write(env, xpsr, 0xfffffdff); + /* Undo stack alignment. */ + if (xpsr & 0x200) + env->regs[13] |= 4; + /* ??? The exception return type specifies Thread/Handler mode. However + this is also implied by the xPSR value. Not sure what to do + if there is a mismatch. */ + /* ??? Likewise for mismatches between the CONTROL register and the stack + pointer. */ +} + +void arm_v7m_cpu_do_interrupt(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint32_t xpsr = xpsr_read(env); + uint32_t lr; + uint32_t addr; + + arm_log_exception(cs->exception_index); + + lr = 0xfffffff1; + if (env->v7m.current_sp) + lr |= 4; + if (env->v7m.exception == 0) + lr |= 8; + + /* For exceptions we just mark as pending on the NVIC, and let that + handle it. */ + /* TODO: Need to escalate if the current priority is higher than the + one we're raising. */ + switch (cs->exception_index) { + case EXCP_UDEF: + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE); + return; + case EXCP_SWI: + /* The PC already points to the next instruction. */ + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC); + return; + case EXCP_PREFETCH_ABORT: + case EXCP_DATA_ABORT: + /* TODO: if we implemented the MPU registers, this is where we + * should set the MMFAR, etc from exception.fsr and exception.vaddress. + */ + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM); + return; + case EXCP_BKPT: + if (semihosting_enabled()) { + int nr; + nr = arm_lduw_code(env, env->regs[15], env->bswap_code) & 0xff; + if (nr == 0xab) { + env->regs[15] += 2; + env->regs[0] = do_arm_semihosting(env); + qemu_log_mask(CPU_LOG_INT, "...handled as semihosting call\n"); + return; + } + } + armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_DEBUG); + return; + case EXCP_IRQ: + env->v7m.exception = armv7m_nvic_acknowledge_irq(env->nvic); + break; + case EXCP_EXCEPTION_EXIT: + do_v7m_exception_exit(env); + return; + default: + cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index); + return; /* Never happens. Keep compiler happy. */ + } + + /* Align stack pointer. */ + /* ??? Should only do this if Configuration Control Register + STACKALIGN bit is set. */ + if (env->regs[13] & 4) { + env->regs[13] -= 4; + xpsr |= 0x200; + } + /* Switch to the handler mode. */ + v7m_push(env, xpsr); + v7m_push(env, env->regs[15]); + v7m_push(env, env->regs[14]); + v7m_push(env, env->regs[12]); + v7m_push(env, env->regs[3]); + v7m_push(env, env->regs[2]); + v7m_push(env, env->regs[1]); + v7m_push(env, env->regs[0]); + switch_v7m_sp(env, 0); + /* Clear IT bits */ + env->condexec_bits = 0; + env->regs[14] = lr; + addr = ldl_phys(cs->as, env->v7m.vecbase + env->v7m.exception * 4); + env->regs[15] = addr & 0xfffffffe; + env->thumb = addr & 1; +} + +/* Function used to synchronize QEMU's AArch64 register set with AArch32 + * register set. This is necessary when switching between AArch32 and AArch64 + * execution state. + */ +void aarch64_sync_32_to_64(CPUARMState *env) +{ + int i; + uint32_t mode = env->uncached_cpsr & CPSR_M; + + /* We can blanket copy R[0:7] to X[0:7] */ + for (i = 0; i < 8; i++) { + env->xregs[i] = env->regs[i]; + } + + /* Unless we are in FIQ mode, x8-x12 come from the user registers r8-r12. + * Otherwise, they come from the banked user regs. + */ + if (mode == ARM_CPU_MODE_FIQ) { + for (i = 8; i < 13; i++) { + env->xregs[i] = env->usr_regs[i - 8]; + } + } else { + for (i = 8; i < 13; i++) { + env->xregs[i] = env->regs[i]; + } + } + + /* Registers x13-x23 are the various mode SP and FP registers. Registers + * r13 and r14 are only copied if we are in that mode, otherwise we copy + * from the mode banked register. + */ + if (mode == ARM_CPU_MODE_USR || mode == ARM_CPU_MODE_SYS) { + env->xregs[13] = env->regs[13]; + env->xregs[14] = env->regs[14]; + } else { + env->xregs[13] = env->banked_r13[bank_number(ARM_CPU_MODE_USR)]; + /* HYP is an exception in that it is copied from r14 */ + if (mode == ARM_CPU_MODE_HYP) { + env->xregs[14] = env->regs[14]; + } else { + env->xregs[14] = env->banked_r14[bank_number(ARM_CPU_MODE_USR)]; + } + } + + if (mode == ARM_CPU_MODE_HYP) { + env->xregs[15] = env->regs[13]; + } else { + env->xregs[15] = env->banked_r13[bank_number(ARM_CPU_MODE_HYP)]; + } + + if (mode == ARM_CPU_MODE_IRQ) { + env->xregs[16] = env->regs[13]; + env->xregs[17] = env->regs[14]; + } else { + env->xregs[16] = env->banked_r13[bank_number(ARM_CPU_MODE_IRQ)]; + env->xregs[17] = env->banked_r14[bank_number(ARM_CPU_MODE_IRQ)]; + } + + if (mode == ARM_CPU_MODE_SVC) { + env->xregs[18] = env->regs[13]; + env->xregs[19] = env->regs[14]; + } else { + env->xregs[18] = env->banked_r13[bank_number(ARM_CPU_MODE_SVC)]; + env->xregs[19] = env->banked_r14[bank_number(ARM_CPU_MODE_SVC)]; + } + + if (mode == ARM_CPU_MODE_ABT) { + env->xregs[20] = env->regs[13]; + env->xregs[21] = env->regs[14]; + } else { + env->xregs[20] = env->banked_r13[bank_number(ARM_CPU_MODE_ABT)]; + env->xregs[21] = env->banked_r14[bank_number(ARM_CPU_MODE_ABT)]; + } + + if (mode == ARM_CPU_MODE_UND) { + env->xregs[22] = env->regs[13]; + env->xregs[23] = env->regs[14]; + } else { + env->xregs[22] = env->banked_r13[bank_number(ARM_CPU_MODE_UND)]; + env->xregs[23] = env->banked_r14[bank_number(ARM_CPU_MODE_UND)]; + } + + /* Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ + * mode, then we can copy from r8-r14. Otherwise, we copy from the + * FIQ bank for r8-r14. + */ + if (mode == ARM_CPU_MODE_FIQ) { + for (i = 24; i < 31; i++) { + env->xregs[i] = env->regs[i - 16]; /* X[24:30] <- R[8:14] */ + } + } else { + for (i = 24; i < 29; i++) { + env->xregs[i] = env->fiq_regs[i - 24]; + } + env->xregs[29] = env->banked_r13[bank_number(ARM_CPU_MODE_FIQ)]; + env->xregs[30] = env->banked_r14[bank_number(ARM_CPU_MODE_FIQ)]; + } + + env->pc = env->regs[15]; +} + +/* Function used to synchronize QEMU's AArch32 register set with AArch64 + * register set. This is necessary when switching between AArch32 and AArch64 + * execution state. + */ +void aarch64_sync_64_to_32(CPUARMState *env) +{ + int i; + uint32_t mode = env->uncached_cpsr & CPSR_M; + + /* We can blanket copy X[0:7] to R[0:7] */ + for (i = 0; i < 8; i++) { + env->regs[i] = env->xregs[i]; + } + + /* Unless we are in FIQ mode, r8-r12 come from the user registers x8-x12. + * Otherwise, we copy x8-x12 into the banked user regs. + */ + if (mode == ARM_CPU_MODE_FIQ) { + for (i = 8; i < 13; i++) { + env->usr_regs[i - 8] = env->xregs[i]; + } + } else { + for (i = 8; i < 13; i++) { + env->regs[i] = env->xregs[i]; + } + } + + /* Registers r13 & r14 depend on the current mode. + * If we are in a given mode, we copy the corresponding x registers to r13 + * and r14. Otherwise, we copy the x register to the banked r13 and r14 + * for the mode. + */ + if (mode == ARM_CPU_MODE_USR || mode == ARM_CPU_MODE_SYS) { + env->regs[13] = env->xregs[13]; + env->regs[14] = env->xregs[14]; + } else { + env->banked_r13[bank_number(ARM_CPU_MODE_USR)] = env->xregs[13]; + + /* HYP is an exception in that it does not have its own banked r14 but + * shares the USR r14 + */ + if (mode == ARM_CPU_MODE_HYP) { + env->regs[14] = env->xregs[14]; + } else { + env->banked_r14[bank_number(ARM_CPU_MODE_USR)] = env->xregs[14]; + } + } + + if (mode == ARM_CPU_MODE_HYP) { + env->regs[13] = env->xregs[15]; + } else { + env->banked_r13[bank_number(ARM_CPU_MODE_HYP)] = env->xregs[15]; + } + + if (mode == ARM_CPU_MODE_IRQ) { + env->regs[13] = env->xregs[16]; + env->regs[14] = env->xregs[17]; + } else { + env->banked_r13[bank_number(ARM_CPU_MODE_IRQ)] = env->xregs[16]; + env->banked_r14[bank_number(ARM_CPU_MODE_IRQ)] = env->xregs[17]; + } + + if (mode == ARM_CPU_MODE_SVC) { + env->regs[13] = env->xregs[18]; + env->regs[14] = env->xregs[19]; + } else { + env->banked_r13[bank_number(ARM_CPU_MODE_SVC)] = env->xregs[18]; + env->banked_r14[bank_number(ARM_CPU_MODE_SVC)] = env->xregs[19]; + } + + if (mode == ARM_CPU_MODE_ABT) { + env->regs[13] = env->xregs[20]; + env->regs[14] = env->xregs[21]; + } else { + env->banked_r13[bank_number(ARM_CPU_MODE_ABT)] = env->xregs[20]; + env->banked_r14[bank_number(ARM_CPU_MODE_ABT)] = env->xregs[21]; + } + + if (mode == ARM_CPU_MODE_UND) { + env->regs[13] = env->xregs[22]; + env->regs[14] = env->xregs[23]; + } else { + env->banked_r13[bank_number(ARM_CPU_MODE_UND)] = env->xregs[22]; + env->banked_r14[bank_number(ARM_CPU_MODE_UND)] = env->xregs[23]; + } + + /* Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ + * mode, then we can copy to r8-r14. Otherwise, we copy to the + * FIQ bank for r8-r14. + */ + if (mode == ARM_CPU_MODE_FIQ) { + for (i = 24; i < 31; i++) { + env->regs[i - 16] = env->xregs[i]; /* X[24:30] -> R[8:14] */ + } + } else { + for (i = 24; i < 29; i++) { + env->fiq_regs[i - 24] = env->xregs[i]; + } + env->banked_r13[bank_number(ARM_CPU_MODE_FIQ)] = env->xregs[29]; + env->banked_r14[bank_number(ARM_CPU_MODE_FIQ)] = env->xregs[30]; + } + + env->regs[15] = env->pc; +} + +/* Handle a CPU exception. */ +void arm_cpu_do_interrupt(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint32_t addr; + uint32_t mask; + int new_mode; + uint32_t offset; + uint32_t moe; + + assert(!IS_M(env)); + + arm_log_exception(cs->exception_index); + + if (arm_is_psci_call(cpu, cs->exception_index)) { + arm_handle_psci_call(cpu); + qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n"); + return; + } + + /* If this is a debug exception we must update the DBGDSCR.MOE bits */ + switch (env->exception.syndrome >> ARM_EL_EC_SHIFT) { + case EC_BREAKPOINT: + case EC_BREAKPOINT_SAME_EL: + moe = 1; + break; + case EC_WATCHPOINT: + case EC_WATCHPOINT_SAME_EL: + moe = 10; + break; + case EC_AA32_BKPT: + moe = 3; + break; + case EC_VECTORCATCH: + moe = 5; + break; + default: + moe = 0; + break; + } + + if (moe) { + env->cp15.mdscr_el1 = deposit64(env->cp15.mdscr_el1, 2, 4, moe); + } + + /* TODO: Vectored interrupt controller. */ + switch (cs->exception_index) { + case EXCP_UDEF: + new_mode = ARM_CPU_MODE_UND; + addr = 0x04; + mask = CPSR_I; + if (env->thumb) + offset = 2; + else + offset = 4; + break; + case EXCP_SWI: + if (semihosting_enabled()) { + /* Check for semihosting interrupt. */ + if (env->thumb) { + mask = arm_lduw_code(env, env->regs[15] - 2, env->bswap_code) + & 0xff; + } else { + mask = arm_ldl_code(env, env->regs[15] - 4, env->bswap_code) + & 0xffffff; + } + /* Only intercept calls from privileged modes, to provide some + semblance of security. */ + if (((mask == 0x123456 && !env->thumb) + || (mask == 0xab && env->thumb)) + && (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR) { + env->regs[0] = do_arm_semihosting(env); + qemu_log_mask(CPU_LOG_INT, "...handled as semihosting call\n"); + return; + } + } + new_mode = ARM_CPU_MODE_SVC; + addr = 0x08; + mask = CPSR_I; + /* The PC already points to the next instruction. */ + offset = 0; + break; + case EXCP_BKPT: + /* See if this is a semihosting syscall. */ + if (env->thumb && semihosting_enabled()) { + mask = arm_lduw_code(env, env->regs[15], env->bswap_code) & 0xff; + if (mask == 0xab + && (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR) { + env->regs[15] += 2; + env->regs[0] = do_arm_semihosting(env); + qemu_log_mask(CPU_LOG_INT, "...handled as semihosting call\n"); + return; + } + } + env->exception.fsr = 2; + /* Fall through to prefetch abort. */ + case EXCP_PREFETCH_ABORT: + A32_BANKED_CURRENT_REG_SET(env, ifsr, env->exception.fsr); + A32_BANKED_CURRENT_REG_SET(env, ifar, env->exception.vaddress); + qemu_log_mask(CPU_LOG_INT, "...with IFSR 0x%x IFAR 0x%x\n", + env->exception.fsr, (uint32_t)env->exception.vaddress); + new_mode = ARM_CPU_MODE_ABT; + addr = 0x0c; + mask = CPSR_A | CPSR_I; + offset = 4; + break; + case EXCP_DATA_ABORT: + A32_BANKED_CURRENT_REG_SET(env, dfsr, env->exception.fsr); + A32_BANKED_CURRENT_REG_SET(env, dfar, env->exception.vaddress); + qemu_log_mask(CPU_LOG_INT, "...with DFSR 0x%x DFAR 0x%x\n", + env->exception.fsr, + (uint32_t)env->exception.vaddress); + new_mode = ARM_CPU_MODE_ABT; + addr = 0x10; + mask = CPSR_A | CPSR_I; + offset = 8; + break; + case EXCP_IRQ: + new_mode = ARM_CPU_MODE_IRQ; + addr = 0x18; + /* Disable IRQ and imprecise data aborts. */ + mask = CPSR_A | CPSR_I; + offset = 4; + if (env->cp15.scr_el3 & SCR_IRQ) { + /* IRQ routed to monitor mode */ + new_mode = ARM_CPU_MODE_MON; + mask |= CPSR_F; + } + break; + case EXCP_FIQ: + new_mode = ARM_CPU_MODE_FIQ; + addr = 0x1c; + /* Disable FIQ, IRQ and imprecise data aborts. */ + mask = CPSR_A | CPSR_I | CPSR_F; + if (env->cp15.scr_el3 & SCR_FIQ) { + /* FIQ routed to monitor mode */ + new_mode = ARM_CPU_MODE_MON; + } + offset = 4; + break; + case EXCP_SMC: + new_mode = ARM_CPU_MODE_MON; + addr = 0x08; + mask = CPSR_A | CPSR_I | CPSR_F; + offset = 0; + break; + default: + cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index); + return; /* Never happens. Keep compiler happy. */ + } + + if (new_mode == ARM_CPU_MODE_MON) { + addr += env->cp15.mvbar; + } else if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) { + /* High vectors. When enabled, base address cannot be remapped. */ + addr += 0xffff0000; + } else { + /* ARM v7 architectures provide a vector base address register to remap + * the interrupt vector table. + * This register is only followed in non-monitor mode, and is banked. + * Note: only bits 31:5 are valid. + */ + addr += A32_BANKED_CURRENT_REG_GET(env, vbar); + } + + if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON) { + env->cp15.scr_el3 &= ~SCR_NS; + } + + switch_mode (env, new_mode); + /* For exceptions taken to AArch32 we must clear the SS bit in both + * PSTATE and in the old-state value we save to SPSR_, so zero it now. + */ + env->uncached_cpsr &= ~PSTATE_SS; + env->spsr = cpsr_read(env); + /* Clear IT bits. */ + env->condexec_bits = 0; + /* Switch to the new mode, and to the correct instruction set. */ + env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode; + env->daif |= mask; + /* this is a lie, as the was no c1_sys on V4T/V5, but who cares + * and we should just guard the thumb mode on V4 */ + if (arm_feature(env, ARM_FEATURE_V4T)) { + env->thumb = (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_TE) != 0; + } + env->regs[14] = env->regs[15] + offset; + env->regs[15] = addr; + cs->interrupt_request |= CPU_INTERRUPT_EXITTB; +} + + +/* Return the exception level which controls this address translation regime */ +static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_S2NS: + case ARMMMUIdx_S1E2: + return 2; + case ARMMMUIdx_S1E3: + return 3; + case ARMMMUIdx_S1SE0: + return arm_el_is_aa64(env, 3) ? 1 : 3; + case ARMMMUIdx_S1SE1: + case ARMMMUIdx_S1NSE0: + case ARMMMUIdx_S1NSE1: + return 1; + default: + g_assert_not_reached(); + } +} + +/* Return true if this address translation regime is secure */ +static inline bool regime_is_secure(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_S12NSE0: + case ARMMMUIdx_S12NSE1: + case ARMMMUIdx_S1NSE0: + case ARMMMUIdx_S1NSE1: + case ARMMMUIdx_S1E2: + case ARMMMUIdx_S2NS: + return false; + case ARMMMUIdx_S1E3: + case ARMMMUIdx_S1SE0: + case ARMMMUIdx_S1SE1: + return true; + default: + g_assert_not_reached(); + } +} + +/* Return the SCTLR value which controls this address translation regime */ +static inline uint32_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + return env->cp15.sctlr_el[regime_el(env, mmu_idx)]; +} + +/* Return true if the specified stage of address translation is disabled */ +static inline bool regime_translation_disabled(CPUARMState *env, + ARMMMUIdx mmu_idx) +{ + if (mmu_idx == ARMMMUIdx_S2NS) { + return (env->cp15.hcr_el2 & HCR_VM) == 0; + } + return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; +} + +/* Return the TCR controlling this translation regime */ +static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + if (mmu_idx == ARMMMUIdx_S2NS) { + /* TODO: return VTCR_EL2 */ + g_assert_not_reached(); + } + return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; +} + +/* Return the TTBR associated with this translation regime */ +static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, + int ttbrn) +{ + if (mmu_idx == ARMMMUIdx_S2NS) { + /* TODO: return VTTBR_EL2 */ + g_assert_not_reached(); + } + if (ttbrn == 0) { + return env->cp15.ttbr0_el[regime_el(env, mmu_idx)]; + } else { + return env->cp15.ttbr1_el[regime_el(env, mmu_idx)]; + } +} + +/* Return true if the translation regime is using LPAE format page tables */ +static inline bool regime_using_lpae_format(CPUARMState *env, + ARMMMUIdx mmu_idx) +{ + int el = regime_el(env, mmu_idx); + if (el == 2 || arm_el_is_aa64(env, el)) { + return true; + } + if (arm_feature(env, ARM_FEATURE_LPAE) + && (regime_tcr(env, mmu_idx)->raw_tcr & TTBCR_EAE)) { + return true; + } + return false; +} + +static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_S1SE0: + case ARMMMUIdx_S1NSE0: + return true; + default: + return false; + case ARMMMUIdx_S12NSE0: + case ARMMMUIdx_S12NSE1: + g_assert_not_reached(); + } +} + +/* Translate section/page access permissions to page + * R/W protection flags + * + * @env: CPUARMState + * @mmu_idx: MMU index indicating required translation regime + * @ap: The 3-bit access permissions (AP[2:0]) + * @domain_prot: The 2-bit domain access permissions + */ +static inline int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, + int ap, int domain_prot) +{ + bool is_user = regime_is_user(env, mmu_idx); + + if (domain_prot == 3) { + return PAGE_READ | PAGE_WRITE; + } + + switch (ap) { + case 0: + if (arm_feature(env, ARM_FEATURE_V7)) { + return 0; + } + switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) { + case SCTLR_S: + return is_user ? 0 : PAGE_READ; + case SCTLR_R: + return PAGE_READ; + default: + return 0; + } + case 1: + return is_user ? 0 : PAGE_READ | PAGE_WRITE; + case 2: + if (is_user) { + return PAGE_READ; + } else { + return PAGE_READ | PAGE_WRITE; + } + case 3: + return PAGE_READ | PAGE_WRITE; + case 4: /* Reserved. */ + return 0; + case 5: + return is_user ? 0 : PAGE_READ; + case 6: + return PAGE_READ; + case 7: + if (!arm_feature(env, ARM_FEATURE_V6K)) { + return 0; + } + return PAGE_READ; + default: + g_assert_not_reached(); + } +} + +/* Translate section/page access permissions to page + * R/W protection flags. + * + * @ap: The 2-bit simple AP (AP[2:1]) + * @is_user: TRUE if accessing from PL0 + */ +static inline int simple_ap_to_rw_prot_is_user(int ap, bool is_user) +{ + switch (ap) { + case 0: + return is_user ? 0 : PAGE_READ | PAGE_WRITE; + case 1: + return PAGE_READ | PAGE_WRITE; + case 2: + return is_user ? 0 : PAGE_READ; + case 3: + return PAGE_READ; + default: + g_assert_not_reached(); + } +} + +static inline int +simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) +{ + return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); +} + +/* Translate section/page access permissions to protection flags + * + * @env: CPUARMState + * @mmu_idx: MMU index indicating required translation regime + * @is_aa64: TRUE if AArch64 + * @ap: The 2-bit simple AP (AP[2:1]) + * @ns: NS (non-secure) bit + * @xn: XN (execute-never) bit + * @pxn: PXN (privileged execute-never) bit + */ +static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, + int ap, int ns, int xn, int pxn) +{ + bool is_user = regime_is_user(env, mmu_idx); + int prot_rw, user_rw; + bool have_wxn; + int wxn = 0; + + assert(mmu_idx != ARMMMUIdx_S2NS); + + user_rw = simple_ap_to_rw_prot_is_user(ap, true); + if (is_user) { + prot_rw = user_rw; + } else { + prot_rw = simple_ap_to_rw_prot_is_user(ap, false); + } + + if (ns && arm_is_secure(env) && (env->cp15.scr_el3 & SCR_SIF)) { + return prot_rw; + } + + /* TODO have_wxn should be replaced with + * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2) + * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE + * compatible processors have EL2, which is required for [U]WXN. + */ + have_wxn = arm_feature(env, ARM_FEATURE_LPAE); + + if (have_wxn) { + wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN; + } + + if (is_aa64) { + switch (regime_el(env, mmu_idx)) { + case 1: + if (!is_user) { + xn = pxn || (user_rw & PAGE_WRITE); + } + break; + case 2: + case 3: + break; + } + } else if (arm_feature(env, ARM_FEATURE_V7)) { + switch (regime_el(env, mmu_idx)) { + case 1: + case 3: + if (is_user) { + xn = xn || !(user_rw & PAGE_READ); + } else { + int uwxn = 0; + if (have_wxn) { + uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN; + } + xn = xn || !(prot_rw & PAGE_READ) || pxn || + (uwxn && (user_rw & PAGE_WRITE)); + } + break; + case 2: + break; + } + } else { + xn = wxn = 0; + } + + if (xn || (wxn && (prot_rw & PAGE_WRITE))) { + return prot_rw; + } + return prot_rw | PAGE_EXEC; +} + +static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, + uint32_t *table, uint32_t address) +{ + /* Note that we can only get here for an AArch32 PL0/PL1 lookup */ + TCR *tcr = regime_tcr(env, mmu_idx); + + if (address & tcr->mask) { + if (tcr->raw_tcr & TTBCR_PD1) { + /* Translation table walk disabled for TTBR1 */ + return false; + } + *table = regime_ttbr(env, mmu_idx, 1) & 0xffffc000; + } else { + if (tcr->raw_tcr & TTBCR_PD0) { + /* Translation table walk disabled for TTBR0 */ + return false; + } + *table = regime_ttbr(env, mmu_idx, 0) & tcr->base_mask; + } + *table |= (address >> 18) & 0x3ffc; + return true; +} + +/* All loads done in the course of a page table walk go through here. + * TODO: rather than ignoring errors from physical memory reads (which + * are external aborts in ARM terminology) we should propagate this + * error out so that we can turn it into a Data Abort if this walk + * was being done for a CPU load/store or an address translation instruction + * (but not if it was for a debug access). + */ +static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure) +{ + MemTxAttrs attrs = {}; + + attrs.secure = is_secure; + return address_space_ldl(cs->as, addr, attrs, NULL); +} + +static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure) +{ + MemTxAttrs attrs = {}; + + attrs.secure = is_secure; + return address_space_ldq(cs->as, addr, attrs, NULL); +} + +static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, + int access_type, ARMMMUIdx mmu_idx, + hwaddr *phys_ptr, int *prot, + target_ulong *page_size, uint32_t *fsr) +{ + CPUState *cs = CPU(arm_env_get_cpu(env)); + int code; + uint32_t table; + uint32_t desc; + int type; + int ap; + int domain = 0; + int domain_prot; + hwaddr phys_addr; + uint32_t dacr; + + /* Pagetable walk. */ + /* Lookup l1 descriptor. */ + if (!get_level1_table_address(env, mmu_idx, &table, address)) { + /* Section translation fault if page walk is disabled by PD0 or PD1 */ + code = 5; + goto do_fault; + } + desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx)); + type = (desc & 3); + domain = (desc >> 5) & 0x0f; + if (regime_el(env, mmu_idx) == 1) { + dacr = env->cp15.dacr_ns; + } else { + dacr = env->cp15.dacr_s; + } + domain_prot = (dacr >> (domain * 2)) & 3; + if (type == 0) { + /* Section translation fault. */ + code = 5; + goto do_fault; + } + if (domain_prot == 0 || domain_prot == 2) { + if (type == 2) + code = 9; /* Section domain fault. */ + else + code = 11; /* Page domain fault. */ + goto do_fault; + } + if (type == 2) { + /* 1Mb section. */ + phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); + ap = (desc >> 10) & 3; + code = 13; + *page_size = 1024 * 1024; + } else { + /* Lookup l2 entry. */ + if (type == 1) { + /* Coarse pagetable. */ + table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); + } else { + /* Fine pagetable. */ + table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); + } + desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx)); + switch (desc & 3) { + case 0: /* Page translation fault. */ + code = 7; + goto do_fault; + case 1: /* 64k page. */ + phys_addr = (desc & 0xffff0000) | (address & 0xffff); + ap = (desc >> (4 + ((address >> 13) & 6))) & 3; + *page_size = 0x10000; + break; + case 2: /* 4k page. */ + phys_addr = (desc & 0xfffff000) | (address & 0xfff); + ap = (desc >> (4 + ((address >> 9) & 6))) & 3; + *page_size = 0x1000; + break; + case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */ + if (type == 1) { + /* ARMv6/XScale extended small page format */ + if (arm_feature(env, ARM_FEATURE_XSCALE) + || arm_feature(env, ARM_FEATURE_V6)) { + phys_addr = (desc & 0xfffff000) | (address & 0xfff); + *page_size = 0x1000; + } else { + /* UNPREDICTABLE in ARMv5; we choose to take a + * page translation fault. + */ + code = 7; + goto do_fault; + } + } else { + phys_addr = (desc & 0xfffffc00) | (address & 0x3ff); + *page_size = 0x400; + } + ap = (desc >> 4) & 3; + break; + default: + /* Never happens, but compiler isn't smart enough to tell. */ + abort(); + } + code = 15; + } + *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); + *prot |= *prot ? PAGE_EXEC : 0; + if (!(*prot & (1 << access_type))) { + /* Access permission fault. */ + goto do_fault; + } + *phys_ptr = phys_addr; + return false; +do_fault: + *fsr = code | (domain << 4); + return true; +} + +static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, + int access_type, ARMMMUIdx mmu_idx, + hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, + target_ulong *page_size, uint32_t *fsr) +{ + CPUState *cs = CPU(arm_env_get_cpu(env)); + int code; + uint32_t table; + uint32_t desc; + uint32_t xn; + uint32_t pxn = 0; + int type; + int ap; + int domain = 0; + int domain_prot; + hwaddr phys_addr; + uint32_t dacr; + bool ns; + + /* Pagetable walk. */ + /* Lookup l1 descriptor. */ + if (!get_level1_table_address(env, mmu_idx, &table, address)) { + /* Section translation fault if page walk is disabled by PD0 or PD1 */ + code = 5; + goto do_fault; + } + desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx)); + type = (desc & 3); + if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) { + /* Section translation fault, or attempt to use the encoding + * which is Reserved on implementations without PXN. + */ + code = 5; + goto do_fault; + } + if ((type == 1) || !(desc & (1 << 18))) { + /* Page or Section. */ + domain = (desc >> 5) & 0x0f; + } + if (regime_el(env, mmu_idx) == 1) { + dacr = env->cp15.dacr_ns; + } else { + dacr = env->cp15.dacr_s; + } + domain_prot = (dacr >> (domain * 2)) & 3; + if (domain_prot == 0 || domain_prot == 2) { + if (type != 1) { + code = 9; /* Section domain fault. */ + } else { + code = 11; /* Page domain fault. */ + } + goto do_fault; + } + if (type != 1) { + if (desc & (1 << 18)) { + /* Supersection. */ + phys_addr = (desc & 0xff000000) | (address & 0x00ffffff); + phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32; + phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36; + *page_size = 0x1000000; + } else { + /* Section. */ + phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); + *page_size = 0x100000; + } + ap = ((desc >> 10) & 3) | ((desc >> 13) & 4); + xn = desc & (1 << 4); + pxn = desc & 1; + code = 13; + ns = extract32(desc, 19, 1); + } else { + if (arm_feature(env, ARM_FEATURE_PXN)) { + pxn = (desc >> 2) & 1; + } + ns = extract32(desc, 3, 1); + /* Lookup l2 entry. */ + table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); + desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx)); + ap = ((desc >> 4) & 3) | ((desc >> 7) & 4); + switch (desc & 3) { + case 0: /* Page translation fault. */ + code = 7; + goto do_fault; + case 1: /* 64k page. */ + phys_addr = (desc & 0xffff0000) | (address & 0xffff); + xn = desc & (1 << 15); + *page_size = 0x10000; + break; + case 2: case 3: /* 4k page. */ + phys_addr = (desc & 0xfffff000) | (address & 0xfff); + xn = desc & 1; + *page_size = 0x1000; + break; + default: + /* Never happens, but compiler isn't smart enough to tell. */ + abort(); + } + code = 15; + } + if (domain_prot == 3) { + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + } else { + if (pxn && !regime_is_user(env, mmu_idx)) { + xn = 1; + } + if (xn && access_type == 2) + goto do_fault; + + if (arm_feature(env, ARM_FEATURE_V6K) && + (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) { + /* The simplified model uses AP[0] as an access control bit. */ + if ((ap & 1) == 0) { + /* Access flag fault. */ + code = (code == 15) ? 6 : 3; + goto do_fault; + } + *prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); + } else { + *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); + } + if (*prot && !xn) { + *prot |= PAGE_EXEC; + } + if (!(*prot & (1 << access_type))) { + /* Access permission fault. */ + goto do_fault; + } + } + if (ns) { + /* The NS bit will (as required by the architecture) have no effect if + * the CPU doesn't support TZ or this is a non-secure translation + * regime, because the attribute will already be non-secure. + */ + attrs->secure = false; + } + *phys_ptr = phys_addr; + return false; +do_fault: + *fsr = code | (domain << 4); + return true; +} + +/* Fault type for long-descriptor MMU fault reporting; this corresponds + * to bits [5..2] in the STATUS field in long-format DFSR/IFSR. + */ +typedef enum { + translation_fault = 1, + access_fault = 2, + permission_fault = 3, +} MMUFaultType; + +static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, + int access_type, ARMMMUIdx mmu_idx, + hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, + target_ulong *page_size_ptr, uint32_t *fsr) +{ + CPUState *cs = CPU(arm_env_get_cpu(env)); + /* Read an LPAE long-descriptor translation table. */ + MMUFaultType fault_type = translation_fault; + uint32_t level = 1; + uint32_t epd; + int32_t tsz; + uint32_t tg; + uint64_t ttbr; + int ttbr_select; + hwaddr descaddr, descmask; + uint32_t tableattrs; + target_ulong page_size; + uint32_t attrs; + int32_t granule_sz = 9; + int32_t va_size = 32; + int32_t tbi = 0; + TCR *tcr = regime_tcr(env, mmu_idx); + int ap, ns, xn, pxn; + uint32_t el = regime_el(env, mmu_idx); + bool ttbr1_valid = true; + + /* TODO: + * This code does not handle the different format TCR for VTCR_EL2. + * This code also does not support shareability levels. + * Attribute and permission bit handling should also be checked when adding + * support for those page table walks. + */ + if (arm_el_is_aa64(env, el)) { + va_size = 64; + if (el > 1) { + tbi = extract64(tcr->raw_tcr, 20, 1); + } else { + if (extract64(address, 55, 1)) { + tbi = extract64(tcr->raw_tcr, 38, 1); + } else { + tbi = extract64(tcr->raw_tcr, 37, 1); + } + } + tbi *= 8; + + /* If we are in 64-bit EL2 or EL3 then there is no TTBR1, so mark it + * invalid. + */ + if (el > 1) { + ttbr1_valid = false; + } + } + + /* Determine whether this address is in the region controlled by + * TTBR0 or TTBR1 (or if it is in neither region and should fault). + * This is a Non-secure PL0/1 stage 1 translation, so controlled by + * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32: + */ + uint32_t t0sz = extract32(tcr->raw_tcr, 0, 6); + if (va_size == 64) { + t0sz = MIN(t0sz, 39); + t0sz = MAX(t0sz, 16); + } + uint32_t t1sz = extract32(tcr->raw_tcr, 16, 6); + if (va_size == 64) { + t1sz = MIN(t1sz, 39); + t1sz = MAX(t1sz, 16); + } + if (t0sz && !extract64(address, va_size - t0sz, t0sz - tbi)) { + /* there is a ttbr0 region and we are in it (high bits all zero) */ + ttbr_select = 0; + } else if (ttbr1_valid && t1sz && + !extract64(~address, va_size - t1sz, t1sz - tbi)) { + /* there is a ttbr1 region and we are in it (high bits all one) */ + ttbr_select = 1; + } else if (!t0sz) { + /* ttbr0 region is "everything not in the ttbr1 region" */ + ttbr_select = 0; + } else if (!t1sz && ttbr1_valid) { + /* ttbr1 region is "everything not in the ttbr0 region" */ + ttbr_select = 1; + } else { + /* in the gap between the two regions, this is a Translation fault */ + fault_type = translation_fault; + goto do_fault; + } + + /* Note that QEMU ignores shareability and cacheability attributes, + * so we don't need to do anything with the SH, ORGN, IRGN fields + * in the TTBCR. Similarly, TTBCR:A1 selects whether we get the + * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently + * implement any ASID-like capability so we can ignore it (instead + * we will always flush the TLB any time the ASID is changed). + */ + if (ttbr_select == 0) { + ttbr = regime_ttbr(env, mmu_idx, 0); + epd = extract32(tcr->raw_tcr, 7, 1); + tsz = t0sz; + + tg = extract32(tcr->raw_tcr, 14, 2); + if (tg == 1) { /* 64KB pages */ + granule_sz = 13; + } + if (tg == 2) { /* 16KB pages */ + granule_sz = 11; + } + } else { + /* We should only be here if TTBR1 is valid */ + assert(ttbr1_valid); + + ttbr = regime_ttbr(env, mmu_idx, 1); + epd = extract32(tcr->raw_tcr, 23, 1); + tsz = t1sz; + + tg = extract32(tcr->raw_tcr, 30, 2); + if (tg == 3) { /* 64KB pages */ + granule_sz = 13; + } + if (tg == 1) { /* 16KB pages */ + granule_sz = 11; + } + } + + /* Here we should have set up all the parameters for the translation: + * va_size, ttbr, epd, tsz, granule_sz, tbi + */ + + if (epd) { + /* Translation table walk disabled => Translation fault on TLB miss + * Note: This is always 0 on 64-bit EL2 and EL3. + */ + goto do_fault; + } + + /* The starting level depends on the virtual address size (which can be + * up to 48 bits) and the translation granule size. It indicates the number + * of strides (granule_sz bits at a time) needed to consume the bits + * of the input address. In the pseudocode this is: + * level = 4 - RoundUp((inputsize - grainsize) / stride) + * where their 'inputsize' is our 'va_size - tsz', 'grainsize' is + * our 'granule_sz + 3' and 'stride' is our 'granule_sz'. + * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying: + * = 4 - (va_size - tsz - granule_sz - 3 + granule_sz - 1) / granule_sz + * = 4 - (va_size - tsz - 4) / granule_sz; + */ + level = 4 - (va_size - tsz - 4) / granule_sz; + + /* Clear the vaddr bits which aren't part of the within-region address, + * so that we don't have to special case things when calculating the + * first descriptor address. + */ + if (tsz) { + address &= (1ULL << (va_size - tsz)) - 1; + } + + descmask = (1ULL << (granule_sz + 3)) - 1; + + /* Now we can extract the actual base address from the TTBR */ + descaddr = extract64(ttbr, 0, 48); + descaddr &= ~((1ULL << (va_size - tsz - (granule_sz * (4 - level)))) - 1); + + /* Secure accesses start with the page table in secure memory and + * can be downgraded to non-secure at any step. Non-secure accesses + * remain non-secure. We implement this by just ORing in the NSTable/NS + * bits at each step. + */ + tableattrs = regime_is_secure(env, mmu_idx) ? 0 : (1 << 4); + for (;;) { + uint64_t descriptor; + bool nstable; + + descaddr |= (address >> (granule_sz * (4 - level))) & descmask; + descaddr &= ~7ULL; + nstable = extract32(tableattrs, 4, 1); + descriptor = arm_ldq_ptw(cs, descaddr, !nstable); + if (!(descriptor & 1) || + (!(descriptor & 2) && (level == 3))) { + /* Invalid, or the Reserved level 3 encoding */ + goto do_fault; + } + descaddr = descriptor & 0xfffffff000ULL; + + if ((descriptor & 2) && (level < 3)) { + /* Table entry. The top five bits are attributes which may + * propagate down through lower levels of the table (and + * which are all arranged so that 0 means "no effect", so + * we can gather them up by ORing in the bits at each level). + */ + tableattrs |= extract64(descriptor, 59, 5); + level++; + continue; + } + /* Block entry at level 1 or 2, or page entry at level 3. + * These are basically the same thing, although the number + * of bits we pull in from the vaddr varies. + */ + page_size = (1ULL << ((granule_sz * (4 - level)) + 3)); + descaddr |= (address & (page_size - 1)); + /* Extract attributes from the descriptor and merge with table attrs */ + attrs = extract64(descriptor, 2, 10) + | (extract64(descriptor, 52, 12) << 10); + attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ + attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */ + /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 + * means "force PL1 access only", which means forcing AP[1] to 0. + */ + if (extract32(tableattrs, 2, 1)) { + attrs &= ~(1 << 4); + } + attrs |= nstable << 3; /* NS */ + break; + } + /* Here descaddr is the final physical address, and attributes + * are all in attrs. + */ + fault_type = access_fault; + if ((attrs & (1 << 8)) == 0) { + /* Access flag */ + goto do_fault; + } + + ap = extract32(attrs, 4, 2); + ns = extract32(attrs, 3, 1); + xn = extract32(attrs, 12, 1); + pxn = extract32(attrs, 11, 1); + + *prot = get_S1prot(env, mmu_idx, va_size == 64, ap, ns, xn, pxn); + + fault_type = permission_fault; + if (!(*prot & (1 << access_type))) { + goto do_fault; + } + + if (ns) { + /* The NS bit will (as required by the architecture) have no effect if + * the CPU doesn't support TZ or this is a non-secure translation + * regime, because the attribute will already be non-secure. + */ + txattrs->secure = false; + } + *phys_ptr = descaddr; + *page_size_ptr = page_size; + return false; + +do_fault: + /* Long-descriptor format IFSR/DFSR value */ + *fsr = (1 << 9) | (fault_type << 2) | level; + return true; +} + +static inline void get_phys_addr_pmsav7_default(CPUARMState *env, + ARMMMUIdx mmu_idx, + int32_t address, int *prot) +{ + *prot = PAGE_READ | PAGE_WRITE; + switch (address) { + case 0xF0000000 ... 0xFFFFFFFF: + if (regime_sctlr(env, mmu_idx) & SCTLR_V) { /* hivecs execing is ok */ + *prot |= PAGE_EXEC; + } + break; + case 0x00000000 ... 0x7FFFFFFF: + *prot |= PAGE_EXEC; + break; + } + +} + +static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, + int access_type, ARMMMUIdx mmu_idx, + hwaddr *phys_ptr, int *prot, uint32_t *fsr) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int n; + bool is_user = regime_is_user(env, mmu_idx); + + *phys_ptr = address; + *prot = 0; + + if (regime_translation_disabled(env, mmu_idx)) { /* MPU disabled */ + get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); + } else { /* MPU enabled */ + for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { + /* region search */ + uint32_t base = env->pmsav7.drbar[n]; + uint32_t rsize = extract32(env->pmsav7.drsr[n], 1, 5); + uint32_t rmask; + bool srdis = false; + + if (!(env->pmsav7.drsr[n] & 0x1)) { + continue; + } + + if (!rsize) { + qemu_log_mask(LOG_GUEST_ERROR, "DRSR.Rsize field can not be 0"); + continue; + } + rsize++; + rmask = (1ull << rsize) - 1; + + if (base & rmask) { + qemu_log_mask(LOG_GUEST_ERROR, "DRBAR %" PRIx32 " misaligned " + "to DRSR region size, mask = %" PRIx32, + base, rmask); + continue; + } + + if (address < base || address > base + rmask) { + continue; + } + + /* Region matched */ + + if (rsize >= 8) { /* no subregions for regions < 256 bytes */ + int i, snd; + uint32_t srdis_mask; + + rsize -= 3; /* sub region size (power of 2) */ + snd = ((address - base) >> rsize) & 0x7; + srdis = extract32(env->pmsav7.drsr[n], snd + 8, 1); + + srdis_mask = srdis ? 0x3 : 0x0; + for (i = 2; i <= 8 && rsize < TARGET_PAGE_BITS; i *= 2) { + /* This will check in groups of 2, 4 and then 8, whether + * the subregion bits are consistent. rsize is incremented + * back up to give the region size, considering consistent + * adjacent subregions as one region. Stop testing if rsize + * is already big enough for an entire QEMU page. + */ + int snd_rounded = snd & ~(i - 1); + uint32_t srdis_multi = extract32(env->pmsav7.drsr[n], + snd_rounded + 8, i); + if (srdis_mask ^ srdis_multi) { + break; + } + srdis_mask = (srdis_mask << i) | srdis_mask; + rsize++; + } + } + if (rsize < TARGET_PAGE_BITS) { + qemu_log_mask(LOG_UNIMP, "No support for MPU (sub)region" + "alignment of %" PRIu32 " bits. Minimum is %d\n", + rsize, TARGET_PAGE_BITS); + continue; + } + if (srdis) { + continue; + } + break; + } + + if (n == -1) { /* no hits */ + if (cpu->pmsav7_dregion && + (is_user || !(regime_sctlr(env, mmu_idx) & SCTLR_BR))) { + /* background fault */ + *fsr = 0; + return true; + } + get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); + } else { /* a MPU hit! */ + uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3); + + if (is_user) { /* User mode AP bit decoding */ + switch (ap) { + case 0: + case 1: + case 5: + break; /* no access */ + case 3: + *prot |= PAGE_WRITE; + /* fall through */ + case 2: + case 6: + *prot |= PAGE_READ | PAGE_EXEC; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "Bad value for AP bits in DRACR %" + PRIx32 "\n", ap); + } + } else { /* Priv. mode AP bits decoding */ + switch (ap) { + case 0: + break; /* no access */ + case 1: + case 2: + case 3: + *prot |= PAGE_WRITE; + /* fall through */ + case 5: + case 6: + *prot |= PAGE_READ | PAGE_EXEC; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "Bad value for AP bits in DRACR %" + PRIx32 "\n", ap); + } + } + + /* execute never */ + if (env->pmsav7.dracr[n] & (1 << 12)) { + *prot &= ~PAGE_EXEC; + } + } + } + + *fsr = 0x00d; /* Permission fault */ + return !(*prot & (1 << access_type)); +} + +static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, + int access_type, ARMMMUIdx mmu_idx, + hwaddr *phys_ptr, int *prot, uint32_t *fsr) +{ + int n; + uint32_t mask; + uint32_t base; + bool is_user = regime_is_user(env, mmu_idx); + + *phys_ptr = address; + for (n = 7; n >= 0; n--) { + base = env->cp15.c6_region[n]; + if ((base & 1) == 0) { + continue; + } + mask = 1 << ((base >> 1) & 0x1f); + /* Keep this shift separate from the above to avoid an + (undefined) << 32. */ + mask = (mask << 1) - 1; + if (((base ^ address) & ~mask) == 0) { + break; + } + } + if (n < 0) { + *fsr = 2; + return true; + } + + if (access_type == 2) { + mask = env->cp15.pmsav5_insn_ap; + } else { + mask = env->cp15.pmsav5_data_ap; + } + mask = (mask >> (n * 4)) & 0xf; + switch (mask) { + case 0: + *fsr = 1; + return true; + case 1: + if (is_user) { + *fsr = 1; + return true; + } + *prot = PAGE_READ | PAGE_WRITE; + break; + case 2: + *prot = PAGE_READ; + if (!is_user) { + *prot |= PAGE_WRITE; + } + break; + case 3: + *prot = PAGE_READ | PAGE_WRITE; + break; + case 5: + if (is_user) { + *fsr = 1; + return true; + } + *prot = PAGE_READ; + break; + case 6: + *prot = PAGE_READ; + break; + default: + /* Bad permission. */ + *fsr = 1; + return true; + } + *prot |= PAGE_EXEC; + return false; +} + +/* get_phys_addr - get the physical address for this virtual address + * + * Find the physical address corresponding to the given virtual address, + * by doing a translation table walk on MMU based systems or using the + * MPU state on MPU based systems. + * + * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, + * prot and page_size may not be filled in, and the populated fsr value provides + * information on why the translation aborted, in the format of a + * DFSR/IFSR fault register, with the following caveats: + * * we honour the short vs long DFSR format differences. + * * the WnR bit is never set (the caller must do this). + * * for PSMAv5 based systems we don't bother to return a full FSR format + * value. + * + * @env: CPUARMState + * @address: virtual address to get physical address for + * @access_type: 0 for read, 1 for write, 2 for execute + * @mmu_idx: MMU index indicating required translation regime + * @phys_ptr: set to the physical address corresponding to the virtual address + * @attrs: set to the memory transaction attributes to use + * @prot: set to the permissions for the page containing phys_ptr + * @page_size: set to the size of the page containing phys_ptr + * @fsr: set to the DFSR/IFSR value on failure + */ +static inline bool get_phys_addr(CPUARMState *env, target_ulong address, + int access_type, ARMMMUIdx mmu_idx, + hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, + target_ulong *page_size, uint32_t *fsr) +{ + if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) { + /* TODO: when we support EL2 we should here call ourselves recursively + * to do the stage 1 and then stage 2 translations. The arm_ld*_ptw + * functions will also need changing to perform ARMMMUIdx_S2NS loads + * rather than direct physical memory loads when appropriate. + * For non-EL2 CPUs a stage1+stage2 translation is just stage 1. + */ + assert(!arm_feature(env, ARM_FEATURE_EL2)); + mmu_idx += ARMMMUIdx_S1NSE0; + } + + /* The page table entries may downgrade secure to non-secure, but + * cannot upgrade an non-secure translation regime's attributes + * to secure. + */ + attrs->secure = regime_is_secure(env, mmu_idx); + attrs->user = regime_is_user(env, mmu_idx); + + /* Fast Context Switch Extension. This doesn't exist at all in v8. + * In v7 and earlier it affects all stage 1 translations. + */ + if (address < 0x02000000 && mmu_idx != ARMMMUIdx_S2NS + && !arm_feature(env, ARM_FEATURE_V8)) { + if (regime_el(env, mmu_idx) == 3) { + address += env->cp15.fcseidr_s; + } else { + address += env->cp15.fcseidr_ns; + } + } + + /* pmsav7 has special handling for when MPU is disabled so call it before + * the common MMU/MPU disabled check below. + */ + if (arm_feature(env, ARM_FEATURE_MPU) && + arm_feature(env, ARM_FEATURE_V7)) { + *page_size = TARGET_PAGE_SIZE; + return get_phys_addr_pmsav7(env, address, access_type, mmu_idx, + phys_ptr, prot, fsr); + } + + if (regime_translation_disabled(env, mmu_idx)) { + /* MMU/MPU disabled. */ + *phys_ptr = address; + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + *page_size = TARGET_PAGE_SIZE; + return 0; + } + + if (arm_feature(env, ARM_FEATURE_MPU)) { + /* Pre-v7 MPU */ + *page_size = TARGET_PAGE_SIZE; + return get_phys_addr_pmsav5(env, address, access_type, mmu_idx, + phys_ptr, prot, fsr); + } + + if (regime_using_lpae_format(env, mmu_idx)) { + return get_phys_addr_lpae(env, address, access_type, mmu_idx, phys_ptr, + attrs, prot, page_size, fsr); + } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) { + return get_phys_addr_v6(env, address, access_type, mmu_idx, phys_ptr, + attrs, prot, page_size, fsr); + } else { + return get_phys_addr_v5(env, address, access_type, mmu_idx, phys_ptr, + prot, page_size, fsr); + } +} + +/* Walk the page table and (if the mapping exists) add the page + * to the TLB. Return false on success, or true on failure. Populate + * fsr with ARM DFSR/IFSR fault register format value on failure. + */ +bool arm_tlb_fill(CPUState *cs, vaddr address, + int access_type, int mmu_idx, uint32_t *fsr) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + hwaddr phys_addr; + target_ulong page_size; + int prot; + int ret; + MemTxAttrs attrs = {}; + + ret = get_phys_addr(env, address, access_type, mmu_idx, &phys_addr, + &attrs, &prot, &page_size, fsr); + if (!ret) { + /* Map a single [sub]page. */ + phys_addr &= TARGET_PAGE_MASK; + address &= TARGET_PAGE_MASK; + tlb_set_page_with_attrs(cs, address, phys_addr, attrs, + prot, mmu_idx, page_size); + return 0; + } + + return ret; +} + +hwaddr arm_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + hwaddr phys_addr; + target_ulong page_size; + int prot; + bool ret; + uint32_t fsr; + MemTxAttrs attrs = {}; + + ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env), &phys_addr, + &attrs, &prot, &page_size, &fsr); + + if (ret) { + return -1; + } + + return phys_addr; +} + +void HELPER(set_r13_banked)(CPUARMState *env, uint32_t mode, uint32_t val) +{ + if ((env->uncached_cpsr & CPSR_M) == mode) { + env->regs[13] = val; + } else { + env->banked_r13[bank_number(mode)] = val; + } +} + +uint32_t HELPER(get_r13_banked)(CPUARMState *env, uint32_t mode) +{ + if ((env->uncached_cpsr & CPSR_M) == mode) { + return env->regs[13]; + } else { + return env->banked_r13[bank_number(mode)]; + } +} + +uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + switch (reg) { + case 0: /* APSR */ + return xpsr_read(env) & 0xf8000000; + case 1: /* IAPSR */ + return xpsr_read(env) & 0xf80001ff; + case 2: /* EAPSR */ + return xpsr_read(env) & 0xff00fc00; + case 3: /* xPSR */ + return xpsr_read(env) & 0xff00fdff; + case 5: /* IPSR */ + return xpsr_read(env) & 0x000001ff; + case 6: /* EPSR */ + return xpsr_read(env) & 0x0700fc00; + case 7: /* IEPSR */ + return xpsr_read(env) & 0x0700edff; + case 8: /* MSP */ + return env->v7m.current_sp ? env->v7m.other_sp : env->regs[13]; + case 9: /* PSP */ + return env->v7m.current_sp ? env->regs[13] : env->v7m.other_sp; + case 16: /* PRIMASK */ + return (env->daif & PSTATE_I) != 0; + case 17: /* BASEPRI */ + case 18: /* BASEPRI_MAX */ + return env->v7m.basepri; + case 19: /* FAULTMASK */ + return (env->daif & PSTATE_F) != 0; + case 20: /* CONTROL */ + return env->v7m.control; + default: + /* ??? For debugging only. */ + cpu_abort(CPU(cpu), "Unimplemented system register read (%d)\n", reg); + return 0; + } +} + +void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + + switch (reg) { + case 0: /* APSR */ + xpsr_write(env, val, 0xf8000000); + break; + case 1: /* IAPSR */ + xpsr_write(env, val, 0xf8000000); + break; + case 2: /* EAPSR */ + xpsr_write(env, val, 0xfe00fc00); + break; + case 3: /* xPSR */ + xpsr_write(env, val, 0xfe00fc00); + break; + case 5: /* IPSR */ + /* IPSR bits are readonly. */ + break; + case 6: /* EPSR */ + xpsr_write(env, val, 0x0600fc00); + break; + case 7: /* IEPSR */ + xpsr_write(env, val, 0x0600fc00); + break; + case 8: /* MSP */ + if (env->v7m.current_sp) + env->v7m.other_sp = val; + else + env->regs[13] = val; + break; + case 9: /* PSP */ + if (env->v7m.current_sp) + env->regs[13] = val; + else + env->v7m.other_sp = val; + break; + case 16: /* PRIMASK */ + if (val & 1) { + env->daif |= PSTATE_I; + } else { + env->daif &= ~PSTATE_I; + } + break; + case 17: /* BASEPRI */ + env->v7m.basepri = val & 0xff; + break; + case 18: /* BASEPRI_MAX */ + val &= 0xff; + if (val != 0 && (val < env->v7m.basepri || env->v7m.basepri == 0)) + env->v7m.basepri = val; + break; + case 19: /* FAULTMASK */ + if (val & 1) { + env->daif |= PSTATE_F; + } else { + env->daif &= ~PSTATE_F; + } + break; + case 20: /* CONTROL */ + env->v7m.control = val & 3; + switch_v7m_sp(env, (val & 2) != 0); + break; + default: + /* ??? For debugging only. */ + cpu_abort(CPU(cpu), "Unimplemented system register write (%d)\n", reg); + return; + } +} + +#endif + +void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) +{ + /* Implement DC ZVA, which zeroes a fixed-length block of memory. + * Note that we do not implement the (architecturally mandated) + * alignment fault for attempts to use this on Device memory + * (which matches the usual QEMU behaviour of not implementing either + * alignment faults or any memory attribute handling). + */ + + ARMCPU *cpu = arm_env_get_cpu(env); + uint64_t blocklen = 4 << cpu->dcz_blocksize; + uint64_t vaddr = vaddr_in & ~(blocklen - 1); + +#ifndef CONFIG_USER_ONLY + { + /* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than + * the block size so we might have to do more than one TLB lookup. + * We know that in fact for any v8 CPU the page size is at least 4K + * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only + * 1K as an artefact of legacy v5 subpage support being present in the + * same QEMU executable. + */ + int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); + void *hostaddr[maxidx]; + int try, i; + unsigned mmu_idx = cpu_mmu_index(env); + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + + for (try = 0; try < 2; try++) { + + for (i = 0; i < maxidx; i++) { + hostaddr[i] = tlb_vaddr_to_host(env, + vaddr + TARGET_PAGE_SIZE * i, + 1, mmu_idx); + if (!hostaddr[i]) { + break; + } + } + if (i == maxidx) { + /* If it's all in the TLB it's fair game for just writing to; + * we know we don't need to update dirty status, etc. + */ + for (i = 0; i < maxidx - 1; i++) { + memset(hostaddr[i], 0, TARGET_PAGE_SIZE); + } + memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); + return; + } + /* OK, try a store and see if we can populate the tlb. This + * might cause an exception if the memory isn't writable, + * in which case we will longjmp out of here. We must for + * this purpose use the actual register value passed to us + * so that we get the fault address right. + */ + helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETRA()); + /* Now we can populate the other TLB entries, if any */ + for (i = 0; i < maxidx; i++) { + uint64_t va = vaddr + TARGET_PAGE_SIZE * i; + if (va != (vaddr_in & TARGET_PAGE_MASK)) { + helper_ret_stb_mmu(env, va, 0, oi, GETRA()); + } + } + } + + /* Slow path (probably attempt to do this to an I/O device or + * similar, or clearing of a block of code we have translations + * cached for). Just do a series of byte writes as the architecture + * demands. It's not worth trying to use a cpu_physical_memory_map(), + * memset(), unmap() sequence here because: + * + we'd need to account for the blocksize being larger than a page + * + the direct-RAM access case is almost always going to be dealt + * with in the fastpath code above, so there's no speed benefit + * + we would have to deal with the map returning NULL because the + * bounce buffer was in use + */ + for (i = 0; i < blocklen; i++) { + helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETRA()); + } + } +#else + memset(g2h(vaddr), 0, blocklen); +#endif +} + +/* Note that signed overflow is undefined in C. The following routines are + careful to use unsigned types where modulo arithmetic is required. + Failure to do so _will_ break on newer gcc. */ + +/* Signed saturating arithmetic. */ + +/* Perform 16-bit signed saturating addition. */ +static inline uint16_t add16_sat(uint16_t a, uint16_t b) +{ + uint16_t res; + + res = a + b; + if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { + if (a & 0x8000) + res = 0x8000; + else + res = 0x7fff; + } + return res; +} + +/* Perform 8-bit signed saturating addition. */ +static inline uint8_t add8_sat(uint8_t a, uint8_t b) +{ + uint8_t res; + + res = a + b; + if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { + if (a & 0x80) + res = 0x80; + else + res = 0x7f; + } + return res; +} + +/* Perform 16-bit signed saturating subtraction. */ +static inline uint16_t sub16_sat(uint16_t a, uint16_t b) +{ + uint16_t res; + + res = a - b; + if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { + if (a & 0x8000) + res = 0x8000; + else + res = 0x7fff; + } + return res; +} + +/* Perform 8-bit signed saturating subtraction. */ +static inline uint8_t sub8_sat(uint8_t a, uint8_t b) +{ + uint8_t res; + + res = a - b; + if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { + if (a & 0x80) + res = 0x80; + else + res = 0x7f; + } + return res; +} + +#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16); +#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16); +#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8); +#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8); +#define PFX q + +#include "op_addsub.h" + +/* Unsigned saturating arithmetic. */ +static inline uint16_t add16_usat(uint16_t a, uint16_t b) +{ + uint16_t res; + res = a + b; + if (res < a) + res = 0xffff; + return res; +} + +static inline uint16_t sub16_usat(uint16_t a, uint16_t b) +{ + if (a > b) + return a - b; + else + return 0; +} + +static inline uint8_t add8_usat(uint8_t a, uint8_t b) +{ + uint8_t res; + res = a + b; + if (res < a) + res = 0xff; + return res; +} + +static inline uint8_t sub8_usat(uint8_t a, uint8_t b) +{ + if (a > b) + return a - b; + else + return 0; +} + +#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); +#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16); +#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8); +#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8); +#define PFX uq + +#include "op_addsub.h" + +/* Signed modulo arithmetic. */ +#define SARITH16(a, b, n, op) do { \ + int32_t sum; \ + sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \ + RESULT(sum, n, 16); \ + if (sum >= 0) \ + ge |= 3 << (n * 2); \ + } while(0) + +#define SARITH8(a, b, n, op) do { \ + int32_t sum; \ + sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \ + RESULT(sum, n, 8); \ + if (sum >= 0) \ + ge |= 1 << n; \ + } while(0) + + +#define ADD16(a, b, n) SARITH16(a, b, n, +) +#define SUB16(a, b, n) SARITH16(a, b, n, -) +#define ADD8(a, b, n) SARITH8(a, b, n, +) +#define SUB8(a, b, n) SARITH8(a, b, n, -) +#define PFX s +#define ARITH_GE + +#include "op_addsub.h" + +/* Unsigned modulo arithmetic. */ +#define ADD16(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \ + RESULT(sum, n, 16); \ + if ((sum >> 16) == 1) \ + ge |= 3 << (n * 2); \ + } while(0) + +#define ADD8(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \ + RESULT(sum, n, 8); \ + if ((sum >> 8) == 1) \ + ge |= 1 << n; \ + } while(0) + +#define SUB16(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \ + RESULT(sum, n, 16); \ + if ((sum >> 16) == 0) \ + ge |= 3 << (n * 2); \ + } while(0) + +#define SUB8(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \ + RESULT(sum, n, 8); \ + if ((sum >> 8) == 0) \ + ge |= 1 << n; \ + } while(0) + +#define PFX u +#define ARITH_GE + +#include "op_addsub.h" + +/* Halved signed arithmetic. */ +#define ADD16(a, b, n) \ + RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16) +#define SUB16(a, b, n) \ + RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16) +#define ADD8(a, b, n) \ + RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8) +#define SUB8(a, b, n) \ + RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8) +#define PFX sh + +#include "op_addsub.h" + +/* Halved unsigned arithmetic. */ +#define ADD16(a, b, n) \ + RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16) +#define SUB16(a, b, n) \ + RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16) +#define ADD8(a, b, n) \ + RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8) +#define SUB8(a, b, n) \ + RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8) +#define PFX uh + +#include "op_addsub.h" + +static inline uint8_t do_usad(uint8_t a, uint8_t b) +{ + if (a > b) + return a - b; + else + return b - a; +} + +/* Unsigned sum of absolute byte differences. */ +uint32_t HELPER(usad8)(uint32_t a, uint32_t b) +{ + uint32_t sum; + sum = do_usad(a, b); + sum += do_usad(a >> 8, b >> 8); + sum += do_usad(a >> 16, b >>16); + sum += do_usad(a >> 24, b >> 24); + return sum; +} + +/* For ARMv6 SEL instruction. */ +uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) +{ + uint32_t mask; + + mask = 0; + if (flags & 1) + mask |= 0xff; + if (flags & 2) + mask |= 0xff00; + if (flags & 4) + mask |= 0xff0000; + if (flags & 8) + mask |= 0xff000000; + return (a & mask) | (b & ~mask); +} + +/* VFP support. We follow the convention used for VFP instructions: + Single precision routines have a "s" suffix, double precision a + "d" suffix. */ + +/* Convert host exception flags to vfp form. */ +static inline int vfp_exceptbits_from_host(int host_bits) +{ + int target_bits = 0; + + if (host_bits & float_flag_invalid) + target_bits |= 1; + if (host_bits & float_flag_divbyzero) + target_bits |= 2; + if (host_bits & float_flag_overflow) + target_bits |= 4; + if (host_bits & (float_flag_underflow | float_flag_output_denormal)) + target_bits |= 8; + if (host_bits & float_flag_inexact) + target_bits |= 0x10; + if (host_bits & float_flag_input_denormal) + target_bits |= 0x80; + return target_bits; +} + +uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) +{ + int i; + uint32_t fpscr; + + fpscr = (env->vfp.xregs[ARM_VFP_FPSCR] & 0xffc8ffff) + | (env->vfp.vec_len << 16) + | (env->vfp.vec_stride << 20); + i = get_float_exception_flags(&env->vfp.fp_status); + i |= get_float_exception_flags(&env->vfp.standard_fp_status); + fpscr |= vfp_exceptbits_from_host(i); + return fpscr; +} + +uint32_t vfp_get_fpscr(CPUARMState *env) +{ + return HELPER(vfp_get_fpscr)(env); +} + +/* Convert vfp exception flags to target form. */ +static inline int vfp_exceptbits_to_host(int target_bits) +{ + int host_bits = 0; + + if (target_bits & 1) + host_bits |= float_flag_invalid; + if (target_bits & 2) + host_bits |= float_flag_divbyzero; + if (target_bits & 4) + host_bits |= float_flag_overflow; + if (target_bits & 8) + host_bits |= float_flag_underflow; + if (target_bits & 0x10) + host_bits |= float_flag_inexact; + if (target_bits & 0x80) + host_bits |= float_flag_input_denormal; + return host_bits; +} + +void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) +{ + int i; + uint32_t changed; + + changed = env->vfp.xregs[ARM_VFP_FPSCR]; + env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff); + env->vfp.vec_len = (val >> 16) & 7; + env->vfp.vec_stride = (val >> 20) & 3; + + changed ^= val; + if (changed & (3 << 22)) { + i = (val >> 22) & 3; + switch (i) { + case FPROUNDING_TIEEVEN: + i = float_round_nearest_even; + break; + case FPROUNDING_POSINF: + i = float_round_up; + break; + case FPROUNDING_NEGINF: + i = float_round_down; + break; + case FPROUNDING_ZERO: + i = float_round_to_zero; + break; + } + set_float_rounding_mode(i, &env->vfp.fp_status); + } + if (changed & (1 << 24)) { + set_flush_to_zero((val & (1 << 24)) != 0, &env->vfp.fp_status); + set_flush_inputs_to_zero((val & (1 << 24)) != 0, &env->vfp.fp_status); + } + if (changed & (1 << 25)) + set_default_nan_mode((val & (1 << 25)) != 0, &env->vfp.fp_status); + + i = vfp_exceptbits_to_host(val); + set_float_exception_flags(i, &env->vfp.fp_status); + set_float_exception_flags(0, &env->vfp.standard_fp_status); +} + +void vfp_set_fpscr(CPUARMState *env, uint32_t val) +{ + HELPER(vfp_set_fpscr)(env, val); +} + +#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p)) + +#define VFP_BINOP(name) \ +float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + return float32_ ## name(a, b, fpst); \ +} \ +float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + return float64_ ## name(a, b, fpst); \ +} +VFP_BINOP(add) +VFP_BINOP(sub) +VFP_BINOP(mul) +VFP_BINOP(div) +VFP_BINOP(min) +VFP_BINOP(max) +VFP_BINOP(minnum) +VFP_BINOP(maxnum) +#undef VFP_BINOP + +float32 VFP_HELPER(neg, s)(float32 a) +{ + return float32_chs(a); +} + +float64 VFP_HELPER(neg, d)(float64 a) +{ + return float64_chs(a); +} + +float32 VFP_HELPER(abs, s)(float32 a) +{ + return float32_abs(a); +} + +float64 VFP_HELPER(abs, d)(float64 a) +{ + return float64_abs(a); +} + +float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env) +{ + return float32_sqrt(a, &env->vfp.fp_status); +} + +float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) +{ + return float64_sqrt(a, &env->vfp.fp_status); +} + +/* XXX: check quiet/signaling case */ +#define DO_VFP_cmp(p, type) \ +void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ +{ \ + uint32_t flags; \ + switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \ + case 0: flags = 0x6; break; \ + case -1: flags = 0x8; break; \ + case 1: flags = 0x2; break; \ + default: case 2: flags = 0x3; break; \ + } \ + env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ + | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ +} \ +void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ +{ \ + uint32_t flags; \ + switch(type ## _compare(a, b, &env->vfp.fp_status)) { \ + case 0: flags = 0x6; break; \ + case -1: flags = 0x8; break; \ + case 1: flags = 0x2; break; \ + default: case 2: flags = 0x3; break; \ + } \ + env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ + | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ +} +DO_VFP_cmp(s, float32) +DO_VFP_cmp(d, float64) +#undef DO_VFP_cmp + +/* Integer to float and float to integer conversions */ + +#define CONV_ITOF(name, fsz, sign) \ + float##fsz HELPER(name)(uint32_t x, void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \ +} + +#define CONV_FTOI(name, fsz, sign, round) \ +uint32_t HELPER(name)(float##fsz x, void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + if (float##fsz##_is_any_nan(x)) { \ + float_raise(float_flag_invalid, fpst); \ + return 0; \ + } \ + return float##fsz##_to_##sign##int32##round(x, fpst); \ +} + +#define FLOAT_CONVS(name, p, fsz, sign) \ +CONV_ITOF(vfp_##name##to##p, fsz, sign) \ +CONV_FTOI(vfp_to##name##p, fsz, sign, ) \ +CONV_FTOI(vfp_to##name##z##p, fsz, sign, _round_to_zero) + +FLOAT_CONVS(si, s, 32, ) +FLOAT_CONVS(si, d, 64, ) +FLOAT_CONVS(ui, s, 32, u) +FLOAT_CONVS(ui, d, 64, u) + +#undef CONV_ITOF +#undef CONV_FTOI +#undef FLOAT_CONVS + +/* floating point conversion */ +float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env) +{ + float64 r = float32_to_float64(x, &env->vfp.fp_status); + /* ARM requires that S<->D conversion of any kind of NaN generates + * a quiet NaN by forcing the most significant frac bit to 1. + */ + return float64_maybe_silence_nan(r); +} + +float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env) +{ + float32 r = float64_to_float32(x, &env->vfp.fp_status); + /* ARM requires that S<->D conversion of any kind of NaN generates + * a quiet NaN by forcing the most significant frac bit to 1. + */ + return float32_maybe_silence_nan(r); +} + +/* VFP3 fixed point conversion. */ +#define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \ +float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \ + void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + float##fsz tmp; \ + tmp = itype##_to_##float##fsz(x, fpst); \ + return float##fsz##_scalbn(tmp, -(int)shift, fpst); \ +} + +/* Notice that we want only input-denormal exception flags from the + * scalbn operation: the other possible flags (overflow+inexact if + * we overflow to infinity, output-denormal) aren't correct for the + * complete scale-and-convert operation. + */ +#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, round) \ +uint##isz##_t HELPER(vfp_to##name##p##round)(float##fsz x, \ + uint32_t shift, \ + void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + int old_exc_flags = get_float_exception_flags(fpst); \ + float##fsz tmp; \ + if (float##fsz##_is_any_nan(x)) { \ + float_raise(float_flag_invalid, fpst); \ + return 0; \ + } \ + tmp = float##fsz##_scalbn(x, shift, fpst); \ + old_exc_flags |= get_float_exception_flags(fpst) \ + & float_flag_input_denormal; \ + set_float_exception_flags(old_exc_flags, fpst); \ + return float##fsz##_to_##itype##round(tmp, fpst); \ +} + +#define VFP_CONV_FIX(name, p, fsz, isz, itype) \ +VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \ +VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, _round_to_zero) \ +VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ) + +#define VFP_CONV_FIX_A64(name, p, fsz, isz, itype) \ +VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \ +VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ) + +VFP_CONV_FIX(sh, d, 64, 64, int16) +VFP_CONV_FIX(sl, d, 64, 64, int32) +VFP_CONV_FIX_A64(sq, d, 64, 64, int64) +VFP_CONV_FIX(uh, d, 64, 64, uint16) +VFP_CONV_FIX(ul, d, 64, 64, uint32) +VFP_CONV_FIX_A64(uq, d, 64, 64, uint64) +VFP_CONV_FIX(sh, s, 32, 32, int16) +VFP_CONV_FIX(sl, s, 32, 32, int32) +VFP_CONV_FIX_A64(sq, s, 32, 64, int64) +VFP_CONV_FIX(uh, s, 32, 32, uint16) +VFP_CONV_FIX(ul, s, 32, 32, uint32) +VFP_CONV_FIX_A64(uq, s, 32, 64, uint64) +#undef VFP_CONV_FIX +#undef VFP_CONV_FIX_FLOAT +#undef VFP_CONV_FLOAT_FIX_ROUND + +/* Set the current fp rounding mode and return the old one. + * The argument is a softfloat float_round_ value. + */ +uint32_t HELPER(set_rmode)(uint32_t rmode, CPUARMState *env) +{ + float_status *fp_status = &env->vfp.fp_status; + + uint32_t prev_rmode = get_float_rounding_mode(fp_status); + set_float_rounding_mode(rmode, fp_status); + + return prev_rmode; +} + +/* Set the current fp rounding mode in the standard fp status and return + * the old one. This is for NEON instructions that need to change the + * rounding mode but wish to use the standard FPSCR values for everything + * else. Always set the rounding mode back to the correct value after + * modifying it. + * The argument is a softfloat float_round_ value. + */ +uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env) +{ + float_status *fp_status = &env->vfp.standard_fp_status; + + uint32_t prev_rmode = get_float_rounding_mode(fp_status); + set_float_rounding_mode(rmode, fp_status); + + return prev_rmode; +} + +/* Half precision conversions. */ +static float32 do_fcvt_f16_to_f32(uint32_t a, CPUARMState *env, float_status *s) +{ + int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; + float32 r = float16_to_float32(make_float16(a), ieee, s); + if (ieee) { + return float32_maybe_silence_nan(r); + } + return r; +} + +static uint32_t do_fcvt_f32_to_f16(float32 a, CPUARMState *env, float_status *s) +{ + int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; + float16 r = float32_to_float16(a, ieee, s); + if (ieee) { + r = float16_maybe_silence_nan(r); + } + return float16_val(r); +} + +float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUARMState *env) +{ + return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status); +} + +uint32_t HELPER(neon_fcvt_f32_to_f16)(float32 a, CPUARMState *env) +{ + return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status); +} + +float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUARMState *env) +{ + return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status); +} + +uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUARMState *env) +{ + return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status); +} + +float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, CPUARMState *env) +{ + int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; + float64 r = float16_to_float64(make_float16(a), ieee, &env->vfp.fp_status); + if (ieee) { + return float64_maybe_silence_nan(r); + } + return r; +} + +uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, CPUARMState *env) +{ + int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; + float16 r = float64_to_float16(a, ieee, &env->vfp.fp_status); + if (ieee) { + r = float16_maybe_silence_nan(r); + } + return float16_val(r); +} + +#define float32_two make_float32(0x40000000) +#define float32_three make_float32(0x40400000) +#define float32_one_point_five make_float32(0x3fc00000) + +float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env) +{ + float_status *s = &env->vfp.standard_fp_status; + if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) || + (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) { + if (!(float32_is_zero(a) || float32_is_zero(b))) { + float_raise(float_flag_input_denormal, s); + } + return float32_two; + } + return float32_sub(float32_two, float32_mul(a, b, s), s); +} + +float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env) +{ + float_status *s = &env->vfp.standard_fp_status; + float32 product; + if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) || + (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) { + if (!(float32_is_zero(a) || float32_is_zero(b))) { + float_raise(float_flag_input_denormal, s); + } + return float32_one_point_five; + } + product = float32_mul(a, b, s); + return float32_div(float32_sub(float32_three, product, s), float32_two, s); +} + +/* NEON helpers. */ + +/* Constants 256 and 512 are used in some helpers; we avoid relying on + * int->float conversions at run-time. */ +#define float64_256 make_float64(0x4070000000000000LL) +#define float64_512 make_float64(0x4080000000000000LL) +#define float32_maxnorm make_float32(0x7f7fffff) +#define float64_maxnorm make_float64(0x7fefffffffffffffLL) + +/* Reciprocal functions + * + * The algorithm that must be used to calculate the estimate + * is specified by the ARM ARM, see FPRecipEstimate() + */ + +static float64 recip_estimate(float64 a, float_status *real_fp_status) +{ + /* These calculations mustn't set any fp exception flags, + * so we use a local copy of the fp_status. + */ + float_status dummy_status = *real_fp_status; + float_status *s = &dummy_status; + /* q = (int)(a * 512.0) */ + float64 q = float64_mul(float64_512, a, s); + int64_t q_int = float64_to_int64_round_to_zero(q, s); + + /* r = 1.0 / (((double)q + 0.5) / 512.0) */ + q = int64_to_float64(q_int, s); + q = float64_add(q, float64_half, s); + q = float64_div(q, float64_512, s); + q = float64_div(float64_one, q, s); + + /* s = (int)(256.0 * r + 0.5) */ + q = float64_mul(q, float64_256, s); + q = float64_add(q, float64_half, s); + q_int = float64_to_int64_round_to_zero(q, s); + + /* return (double)s / 256.0 */ + return float64_div(int64_to_float64(q_int, s), float64_256, s); +} + +/* Common wrapper to call recip_estimate */ +static float64 call_recip_estimate(float64 num, int off, float_status *fpst) +{ + uint64_t val64 = float64_val(num); + uint64_t frac = extract64(val64, 0, 52); + int64_t exp = extract64(val64, 52, 11); + uint64_t sbit; + float64 scaled, estimate; + + /* Generate the scaled number for the estimate function */ + if (exp == 0) { + if (extract64(frac, 51, 1) == 0) { + exp = -1; + frac = extract64(frac, 0, 50) << 2; + } else { + frac = extract64(frac, 0, 51) << 1; + } + } + + /* scaled = '0' : '01111111110' : fraction<51:44> : Zeros(44); */ + scaled = make_float64((0x3feULL << 52) + | extract64(frac, 44, 8) << 44); + + estimate = recip_estimate(scaled, fpst); + + /* Build new result */ + val64 = float64_val(estimate); + sbit = 0x8000000000000000ULL & val64; + exp = off - exp; + frac = extract64(val64, 0, 52); + + if (exp == 0) { + frac = 1ULL << 51 | extract64(frac, 1, 51); + } else if (exp == -1) { + frac = 1ULL << 50 | extract64(frac, 2, 50); + exp = 0; + } + + return make_float64(sbit | (exp << 52) | frac); +} + +static bool round_to_inf(float_status *fpst, bool sign_bit) +{ + switch (fpst->float_rounding_mode) { + case float_round_nearest_even: /* Round to Nearest */ + return true; + case float_round_up: /* Round to +Inf */ + return !sign_bit; + case float_round_down: /* Round to -Inf */ + return sign_bit; + case float_round_to_zero: /* Round to Zero */ + return false; + } + + g_assert_not_reached(); +} + +float32 HELPER(recpe_f32)(float32 input, void *fpstp) +{ + float_status *fpst = fpstp; + float32 f32 = float32_squash_input_denormal(input, fpst); + uint32_t f32_val = float32_val(f32); + uint32_t f32_sbit = 0x80000000ULL & f32_val; + int32_t f32_exp = extract32(f32_val, 23, 8); + uint32_t f32_frac = extract32(f32_val, 0, 23); + float64 f64, r64; + uint64_t r64_val; + int64_t r64_exp; + uint64_t r64_frac; + + if (float32_is_any_nan(f32)) { + float32 nan = f32; + if (float32_is_signaling_nan(f32)) { + float_raise(float_flag_invalid, fpst); + nan = float32_maybe_silence_nan(f32); + } + if (fpst->default_nan_mode) { + nan = float32_default_nan; + } + return nan; + } else if (float32_is_infinity(f32)) { + return float32_set_sign(float32_zero, float32_is_neg(f32)); + } else if (float32_is_zero(f32)) { + float_raise(float_flag_divbyzero, fpst); + return float32_set_sign(float32_infinity, float32_is_neg(f32)); + } else if ((f32_val & ~(1ULL << 31)) < (1ULL << 21)) { + /* Abs(value) < 2.0^-128 */ + float_raise(float_flag_overflow | float_flag_inexact, fpst); + if (round_to_inf(fpst, f32_sbit)) { + return float32_set_sign(float32_infinity, float32_is_neg(f32)); + } else { + return float32_set_sign(float32_maxnorm, float32_is_neg(f32)); + } + } else if (f32_exp >= 253 && fpst->flush_to_zero) { + float_raise(float_flag_underflow, fpst); + return float32_set_sign(float32_zero, float32_is_neg(f32)); + } + + + f64 = make_float64(((int64_t)(f32_exp) << 52) | (int64_t)(f32_frac) << 29); + r64 = call_recip_estimate(f64, 253, fpst); + r64_val = float64_val(r64); + r64_exp = extract64(r64_val, 52, 11); + r64_frac = extract64(r64_val, 0, 52); + + /* result = sign : result_exp<7:0> : fraction<51:29>; */ + return make_float32(f32_sbit | + (r64_exp & 0xff) << 23 | + extract64(r64_frac, 29, 24)); +} + +float64 HELPER(recpe_f64)(float64 input, void *fpstp) +{ + float_status *fpst = fpstp; + float64 f64 = float64_squash_input_denormal(input, fpst); + uint64_t f64_val = float64_val(f64); + uint64_t f64_sbit = 0x8000000000000000ULL & f64_val; + int64_t f64_exp = extract64(f64_val, 52, 11); + float64 r64; + uint64_t r64_val; + int64_t r64_exp; + uint64_t r64_frac; + + /* Deal with any special cases */ + if (float64_is_any_nan(f64)) { + float64 nan = f64; + if (float64_is_signaling_nan(f64)) { + float_raise(float_flag_invalid, fpst); + nan = float64_maybe_silence_nan(f64); + } + if (fpst->default_nan_mode) { + nan = float64_default_nan; + } + return nan; + } else if (float64_is_infinity(f64)) { + return float64_set_sign(float64_zero, float64_is_neg(f64)); + } else if (float64_is_zero(f64)) { + float_raise(float_flag_divbyzero, fpst); + return float64_set_sign(float64_infinity, float64_is_neg(f64)); + } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) { + /* Abs(value) < 2.0^-1024 */ + float_raise(float_flag_overflow | float_flag_inexact, fpst); + if (round_to_inf(fpst, f64_sbit)) { + return float64_set_sign(float64_infinity, float64_is_neg(f64)); + } else { + return float64_set_sign(float64_maxnorm, float64_is_neg(f64)); + } + } else if (f64_exp >= 2045 && fpst->flush_to_zero) { + float_raise(float_flag_underflow, fpst); + return float64_set_sign(float64_zero, float64_is_neg(f64)); + } + + r64 = call_recip_estimate(f64, 2045, fpst); + r64_val = float64_val(r64); + r64_exp = extract64(r64_val, 52, 11); + r64_frac = extract64(r64_val, 0, 52); + + /* result = sign : result_exp<10:0> : fraction<51:0> */ + return make_float64(f64_sbit | + ((r64_exp & 0x7ff) << 52) | + r64_frac); +} + +/* The algorithm that must be used to calculate the estimate + * is specified by the ARM ARM. + */ +static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status) +{ + /* These calculations mustn't set any fp exception flags, + * so we use a local copy of the fp_status. + */ + float_status dummy_status = *real_fp_status; + float_status *s = &dummy_status; + float64 q; + int64_t q_int; + + if (float64_lt(a, float64_half, s)) { + /* range 0.25 <= a < 0.5 */ + + /* a in units of 1/512 rounded down */ + /* q0 = (int)(a * 512.0); */ + q = float64_mul(float64_512, a, s); + q_int = float64_to_int64_round_to_zero(q, s); + + /* reciprocal root r */ + /* r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0); */ + q = int64_to_float64(q_int, s); + q = float64_add(q, float64_half, s); + q = float64_div(q, float64_512, s); + q = float64_sqrt(q, s); + q = float64_div(float64_one, q, s); + } else { + /* range 0.5 <= a < 1.0 */ + + /* a in units of 1/256 rounded down */ + /* q1 = (int)(a * 256.0); */ + q = float64_mul(float64_256, a, s); + int64_t q_int = float64_to_int64_round_to_zero(q, s); + + /* reciprocal root r */ + /* r = 1.0 /sqrt(((double)q1 + 0.5) / 256); */ + q = int64_to_float64(q_int, s); + q = float64_add(q, float64_half, s); + q = float64_div(q, float64_256, s); + q = float64_sqrt(q, s); + q = float64_div(float64_one, q, s); + } + /* r in units of 1/256 rounded to nearest */ + /* s = (int)(256.0 * r + 0.5); */ + + q = float64_mul(q, float64_256,s ); + q = float64_add(q, float64_half, s); + q_int = float64_to_int64_round_to_zero(q, s); + + /* return (double)s / 256.0;*/ + return float64_div(int64_to_float64(q_int, s), float64_256, s); +} + +float32 HELPER(rsqrte_f32)(float32 input, void *fpstp) +{ + float_status *s = fpstp; + float32 f32 = float32_squash_input_denormal(input, s); + uint32_t val = float32_val(f32); + uint32_t f32_sbit = 0x80000000 & val; + int32_t f32_exp = extract32(val, 23, 8); + uint32_t f32_frac = extract32(val, 0, 23); + uint64_t f64_frac; + uint64_t val64; + int result_exp; + float64 f64; + + if (float32_is_any_nan(f32)) { + float32 nan = f32; + if (float32_is_signaling_nan(f32)) { + float_raise(float_flag_invalid, s); + nan = float32_maybe_silence_nan(f32); + } + if (s->default_nan_mode) { + nan = float32_default_nan; + } + return nan; + } else if (float32_is_zero(f32)) { + float_raise(float_flag_divbyzero, s); + return float32_set_sign(float32_infinity, float32_is_neg(f32)); + } else if (float32_is_neg(f32)) { + float_raise(float_flag_invalid, s); + return float32_default_nan; + } else if (float32_is_infinity(f32)) { + return float32_zero; + } + + /* Scale and normalize to a double-precision value between 0.25 and 1.0, + * preserving the parity of the exponent. */ + + f64_frac = ((uint64_t) f32_frac) << 29; + if (f32_exp == 0) { + while (extract64(f64_frac, 51, 1) == 0) { + f64_frac = f64_frac << 1; + f32_exp = f32_exp-1; + } + f64_frac = extract64(f64_frac, 0, 51) << 1; + } + + if (extract64(f32_exp, 0, 1) == 0) { + f64 = make_float64(((uint64_t) f32_sbit) << 32 + | (0x3feULL << 52) + | f64_frac); + } else { + f64 = make_float64(((uint64_t) f32_sbit) << 32 + | (0x3fdULL << 52) + | f64_frac); + } + + result_exp = (380 - f32_exp) / 2; + + f64 = recip_sqrt_estimate(f64, s); + + val64 = float64_val(f64); + + val = ((result_exp & 0xff) << 23) + | ((val64 >> 29) & 0x7fffff); + return make_float32(val); +} + +float64 HELPER(rsqrte_f64)(float64 input, void *fpstp) +{ + float_status *s = fpstp; + float64 f64 = float64_squash_input_denormal(input, s); + uint64_t val = float64_val(f64); + uint64_t f64_sbit = 0x8000000000000000ULL & val; + int64_t f64_exp = extract64(val, 52, 11); + uint64_t f64_frac = extract64(val, 0, 52); + int64_t result_exp; + uint64_t result_frac; + + if (float64_is_any_nan(f64)) { + float64 nan = f64; + if (float64_is_signaling_nan(f64)) { + float_raise(float_flag_invalid, s); + nan = float64_maybe_silence_nan(f64); + } + if (s->default_nan_mode) { + nan = float64_default_nan; + } + return nan; + } else if (float64_is_zero(f64)) { + float_raise(float_flag_divbyzero, s); + return float64_set_sign(float64_infinity, float64_is_neg(f64)); + } else if (float64_is_neg(f64)) { + float_raise(float_flag_invalid, s); + return float64_default_nan; + } else if (float64_is_infinity(f64)) { + return float64_zero; + } + + /* Scale and normalize to a double-precision value between 0.25 and 1.0, + * preserving the parity of the exponent. */ + + if (f64_exp == 0) { + while (extract64(f64_frac, 51, 1) == 0) { + f64_frac = f64_frac << 1; + f64_exp = f64_exp - 1; + } + f64_frac = extract64(f64_frac, 0, 51) << 1; + } + + if (extract64(f64_exp, 0, 1) == 0) { + f64 = make_float64(f64_sbit + | (0x3feULL << 52) + | f64_frac); + } else { + f64 = make_float64(f64_sbit + | (0x3fdULL << 52) + | f64_frac); + } + + result_exp = (3068 - f64_exp) / 2; + + f64 = recip_sqrt_estimate(f64, s); + + result_frac = extract64(float64_val(f64), 0, 52); + + return make_float64(f64_sbit | + ((result_exp & 0x7ff) << 52) | + result_frac); +} + +uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp) +{ + float_status *s = fpstp; + float64 f64; + + if ((a & 0x80000000) == 0) { + return 0xffffffff; + } + + f64 = make_float64((0x3feULL << 52) + | ((int64_t)(a & 0x7fffffff) << 21)); + + f64 = recip_estimate(f64, s); + + return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff); +} + +uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp) +{ + float_status *fpst = fpstp; + float64 f64; + + if ((a & 0xc0000000) == 0) { + return 0xffffffff; + } + + if (a & 0x80000000) { + f64 = make_float64((0x3feULL << 52) + | ((uint64_t)(a & 0x7fffffff) << 21)); + } else { /* bits 31-30 == '01' */ + f64 = make_float64((0x3fdULL << 52) + | ((uint64_t)(a & 0x3fffffff) << 22)); + } + + f64 = recip_sqrt_estimate(f64, fpst); + + return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff); +} + +/* VFPv4 fused multiply-accumulate */ +float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp) +{ + float_status *fpst = fpstp; + return float32_muladd(a, b, c, 0, fpst); +} + +float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp) +{ + float_status *fpst = fpstp; + return float64_muladd(a, b, c, 0, fpst); +} + +/* ARMv8 round to integral */ +float32 HELPER(rints_exact)(float32 x, void *fp_status) +{ + return float32_round_to_int(x, fp_status); +} + +float64 HELPER(rintd_exact)(float64 x, void *fp_status) +{ + return float64_round_to_int(x, fp_status); +} + +float32 HELPER(rints)(float32 x, void *fp_status) +{ + int old_flags = get_float_exception_flags(fp_status), new_flags; + float32 ret; + + ret = float32_round_to_int(x, fp_status); + + /* Suppress any inexact exceptions the conversion produced */ + if (!(old_flags & float_flag_inexact)) { + new_flags = get_float_exception_flags(fp_status); + set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); + } + + return ret; +} + +float64 HELPER(rintd)(float64 x, void *fp_status) +{ + int old_flags = get_float_exception_flags(fp_status), new_flags; + float64 ret; + + ret = float64_round_to_int(x, fp_status); + + new_flags = get_float_exception_flags(fp_status); + + /* Suppress any inexact exceptions the conversion produced */ + if (!(old_flags & float_flag_inexact)) { + new_flags = get_float_exception_flags(fp_status); + set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); + } + + return ret; +} + +/* Convert ARM rounding mode to softfloat */ +int arm_rmode_to_sf(int rmode) +{ + switch (rmode) { + case FPROUNDING_TIEAWAY: + rmode = float_round_ties_away; + break; + case FPROUNDING_ODD: + /* FIXME: add support for TIEAWAY and ODD */ + qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n", + rmode); + case FPROUNDING_TIEEVEN: + default: + rmode = float_round_nearest_even; + break; + case FPROUNDING_POSINF: + rmode = float_round_up; + break; + case FPROUNDING_NEGINF: + rmode = float_round_down; + break; + case FPROUNDING_ZERO: + rmode = float_round_to_zero; + break; + } + return rmode; +} + +/* CRC helpers. + * The upper bytes of val (above the number specified by 'bytes') must have + * been zeroed out by the caller. + */ +uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) +{ + uint8_t buf[4]; + + stl_le_p(buf, val); + + /* zlib crc32 converts the accumulator and output to one's complement. */ + return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; +} + +uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) +{ + uint8_t buf[4]; + + stl_le_p(buf, val); + + /* Linux crc32c converts the output to one's complement. */ + return crc32c(acc, buf, bytes) ^ 0xffffffff; +} diff --git a/target-arm/helper.h b/target-arm/helper.h new file mode 100644 index 00000000..827b33df --- /dev/null +++ b/target-arm/helper.h @@ -0,0 +1,535 @@ +DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, i32, i32) +DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_NO_RWG_SE, i32, i32) +DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_NO_RWG_SE, i32, i32) + +DEF_HELPER_3(add_setq, i32, env, i32, i32) +DEF_HELPER_3(add_saturate, i32, env, i32, i32) +DEF_HELPER_3(sub_saturate, i32, env, i32, i32) +DEF_HELPER_3(add_usaturate, i32, env, i32, i32) +DEF_HELPER_3(sub_usaturate, i32, env, i32, i32) +DEF_HELPER_2(double_saturate, i32, env, s32) +DEF_HELPER_FLAGS_2(sdiv, TCG_CALL_NO_RWG_SE, s32, s32, s32) +DEF_HELPER_FLAGS_2(udiv, TCG_CALL_NO_RWG_SE, i32, i32, i32) +DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32) + +#define PAS_OP(pfx) \ + DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## sub8, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## sub16, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## add16, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## addsubx, i32, i32, i32, ptr) \ + DEF_HELPER_3(pfx ## subaddx, i32, i32, i32, ptr) + +PAS_OP(s) +PAS_OP(u) +#undef PAS_OP + +#define PAS_OP(pfx) \ + DEF_HELPER_2(pfx ## add8, i32, i32, i32) \ + DEF_HELPER_2(pfx ## sub8, i32, i32, i32) \ + DEF_HELPER_2(pfx ## sub16, i32, i32, i32) \ + DEF_HELPER_2(pfx ## add16, i32, i32, i32) \ + DEF_HELPER_2(pfx ## addsubx, i32, i32, i32) \ + DEF_HELPER_2(pfx ## subaddx, i32, i32, i32) +PAS_OP(q) +PAS_OP(sh) +PAS_OP(uq) +PAS_OP(uh) +#undef PAS_OP + +DEF_HELPER_3(ssat, i32, env, i32, i32) +DEF_HELPER_3(usat, i32, env, i32, i32) +DEF_HELPER_3(ssat16, i32, env, i32, i32) +DEF_HELPER_3(usat16, i32, env, i32, i32) + +DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32) + +DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE, + i32, i32, i32, i32) +DEF_HELPER_2(exception_internal, void, env, i32) +DEF_HELPER_4(exception_with_syndrome, void, env, i32, i32, i32) +DEF_HELPER_1(wfi, void, env) +DEF_HELPER_1(wfe, void, env) +DEF_HELPER_1(yield, void, env) +DEF_HELPER_1(pre_hvc, void, env) +DEF_HELPER_2(pre_smc, void, env, i32) + +DEF_HELPER_3(cpsr_write, void, env, i32, i32) +DEF_HELPER_1(cpsr_read, i32, env) + +DEF_HELPER_3(v7m_msr, void, env, i32, i32) +DEF_HELPER_2(v7m_mrs, i32, env, i32) + +DEF_HELPER_3(access_check_cp_reg, void, env, ptr, i32) +DEF_HELPER_3(set_cp_reg, void, env, ptr, i32) +DEF_HELPER_2(get_cp_reg, i32, env, ptr) +DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64) +DEF_HELPER_2(get_cp_reg64, i64, env, ptr) + +DEF_HELPER_3(msr_i_pstate, void, env, i32, i32) +DEF_HELPER_1(clear_pstate_ss, void, env) +DEF_HELPER_1(exception_return, void, env) + +DEF_HELPER_2(get_r13_banked, i32, env, i32) +DEF_HELPER_3(set_r13_banked, void, env, i32, i32) + +DEF_HELPER_2(get_user_reg, i32, env, i32) +DEF_HELPER_3(set_user_reg, void, env, i32, i32) + +DEF_HELPER_1(vfp_get_fpscr, i32, env) +DEF_HELPER_2(vfp_set_fpscr, void, env, i32) + +DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr) +DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr) +DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr) +DEF_HELPER_1(vfp_negs, f32, f32) +DEF_HELPER_1(vfp_negd, f64, f64) +DEF_HELPER_1(vfp_abss, f32, f32) +DEF_HELPER_1(vfp_absd, f64, f64) +DEF_HELPER_2(vfp_sqrts, f32, f32, env) +DEF_HELPER_2(vfp_sqrtd, f64, f64, env) +DEF_HELPER_3(vfp_cmps, void, f32, f32, env) +DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) +DEF_HELPER_3(vfp_cmpes, void, f32, f32, env) +DEF_HELPER_3(vfp_cmped, void, f64, f64, env) + +DEF_HELPER_2(vfp_fcvtds, f64, f32, env) +DEF_HELPER_2(vfp_fcvtsd, f32, f64, env) + +DEF_HELPER_2(vfp_uitos, f32, i32, ptr) +DEF_HELPER_2(vfp_uitod, f64, i32, ptr) +DEF_HELPER_2(vfp_sitos, f32, i32, ptr) +DEF_HELPER_2(vfp_sitod, f64, i32, ptr) + +DEF_HELPER_2(vfp_touis, i32, f32, ptr) +DEF_HELPER_2(vfp_touid, i32, f64, ptr) +DEF_HELPER_2(vfp_touizs, i32, f32, ptr) +DEF_HELPER_2(vfp_touizd, i32, f64, ptr) +DEF_HELPER_2(vfp_tosis, i32, f32, ptr) +DEF_HELPER_2(vfp_tosid, i32, f64, ptr) +DEF_HELPER_2(vfp_tosizs, i32, f32, ptr) +DEF_HELPER_2(vfp_tosizd, i32, f64, ptr) + +DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_touls_round_to_zero, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr) +DEF_HELPER_3(vfp_touhs, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_touls, i32, f32, i32, ptr) +DEF_HELPER_3(vfp_touqs, i64, f32, i32, ptr) +DEF_HELPER_3(vfp_toshd, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_tosld, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_tosqd, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_touhd, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_tould, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_touqd, i64, f64, i32, ptr) +DEF_HELPER_3(vfp_shtos, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_sltos, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_sqtos, f32, i64, i32, ptr) +DEF_HELPER_3(vfp_uhtos, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_ultos, f32, i32, i32, ptr) +DEF_HELPER_3(vfp_uqtos, f32, i64, i32, ptr) +DEF_HELPER_3(vfp_shtod, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_sltod, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_sqtod, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr) +DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr) + +DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, env) +DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env) + +DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env) +DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env) +DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env) +DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env) +DEF_HELPER_FLAGS_2(vfp_fcvt_f16_to_f64, TCG_CALL_NO_RWG, f64, i32, env) +DEF_HELPER_FLAGS_2(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, i32, f64, env) + +DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr) +DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr) + +DEF_HELPER_3(recps_f32, f32, f32, f32, env) +DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env) +DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr) +DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr) +DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_2(recpe_u32, i32, i32, ptr) +DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr) +DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32) + +DEF_HELPER_3(shl_cc, i32, env, i32, i32) +DEF_HELPER_3(shr_cc, i32, env, i32, i32) +DEF_HELPER_3(sar_cc, i32, env, i32, i32) +DEF_HELPER_3(ror_cc, i32, env, i32, i32) + +DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr) +DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr) +DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr) + +/* neon_helper.c */ +DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s64, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(neon_sqadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_sqadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_sqadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_sqadd_u64, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32) +DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32) +DEF_HELPER_3(neon_qsub_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qsub_u32, i32, env, i32, i32) +DEF_HELPER_3(neon_qsub_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qadd_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qadd_s64, i64, env, i64, i64) +DEF_HELPER_3(neon_qsub_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qsub_s64, i64, env, i64, i64) + +DEF_HELPER_2(neon_hadd_s8, i32, i32, i32) +DEF_HELPER_2(neon_hadd_u8, i32, i32, i32) +DEF_HELPER_2(neon_hadd_s16, i32, i32, i32) +DEF_HELPER_2(neon_hadd_u16, i32, i32, i32) +DEF_HELPER_2(neon_hadd_s32, s32, s32, s32) +DEF_HELPER_2(neon_hadd_u32, i32, i32, i32) +DEF_HELPER_2(neon_rhadd_s8, i32, i32, i32) +DEF_HELPER_2(neon_rhadd_u8, i32, i32, i32) +DEF_HELPER_2(neon_rhadd_s16, i32, i32, i32) +DEF_HELPER_2(neon_rhadd_u16, i32, i32, i32) +DEF_HELPER_2(neon_rhadd_s32, s32, s32, s32) +DEF_HELPER_2(neon_rhadd_u32, i32, i32, i32) +DEF_HELPER_2(neon_hsub_s8, i32, i32, i32) +DEF_HELPER_2(neon_hsub_u8, i32, i32, i32) +DEF_HELPER_2(neon_hsub_s16, i32, i32, i32) +DEF_HELPER_2(neon_hsub_u16, i32, i32, i32) +DEF_HELPER_2(neon_hsub_s32, s32, s32, s32) +DEF_HELPER_2(neon_hsub_u32, i32, i32, i32) + +DEF_HELPER_2(neon_cgt_u8, i32, i32, i32) +DEF_HELPER_2(neon_cgt_s8, i32, i32, i32) +DEF_HELPER_2(neon_cgt_u16, i32, i32, i32) +DEF_HELPER_2(neon_cgt_s16, i32, i32, i32) +DEF_HELPER_2(neon_cgt_u32, i32, i32, i32) +DEF_HELPER_2(neon_cgt_s32, i32, i32, i32) +DEF_HELPER_2(neon_cge_u8, i32, i32, i32) +DEF_HELPER_2(neon_cge_s8, i32, i32, i32) +DEF_HELPER_2(neon_cge_u16, i32, i32, i32) +DEF_HELPER_2(neon_cge_s16, i32, i32, i32) +DEF_HELPER_2(neon_cge_u32, i32, i32, i32) +DEF_HELPER_2(neon_cge_s32, i32, i32, i32) + +DEF_HELPER_2(neon_min_u8, i32, i32, i32) +DEF_HELPER_2(neon_min_s8, i32, i32, i32) +DEF_HELPER_2(neon_min_u16, i32, i32, i32) +DEF_HELPER_2(neon_min_s16, i32, i32, i32) +DEF_HELPER_2(neon_min_u32, i32, i32, i32) +DEF_HELPER_2(neon_min_s32, i32, i32, i32) +DEF_HELPER_2(neon_max_u8, i32, i32, i32) +DEF_HELPER_2(neon_max_s8, i32, i32, i32) +DEF_HELPER_2(neon_max_u16, i32, i32, i32) +DEF_HELPER_2(neon_max_s16, i32, i32, i32) +DEF_HELPER_2(neon_max_u32, i32, i32, i32) +DEF_HELPER_2(neon_max_s32, i32, i32, i32) +DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) +DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) +DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) +DEF_HELPER_2(neon_pmin_s16, i32, i32, i32) +DEF_HELPER_2(neon_pmax_u8, i32, i32, i32) +DEF_HELPER_2(neon_pmax_s8, i32, i32, i32) +DEF_HELPER_2(neon_pmax_u16, i32, i32, i32) +DEF_HELPER_2(neon_pmax_s16, i32, i32, i32) + +DEF_HELPER_2(neon_abd_u8, i32, i32, i32) +DEF_HELPER_2(neon_abd_s8, i32, i32, i32) +DEF_HELPER_2(neon_abd_u16, i32, i32, i32) +DEF_HELPER_2(neon_abd_s16, i32, i32, i32) +DEF_HELPER_2(neon_abd_u32, i32, i32, i32) +DEF_HELPER_2(neon_abd_s32, i32, i32, i32) + +DEF_HELPER_2(neon_shl_u8, i32, i32, i32) +DEF_HELPER_2(neon_shl_s8, i32, i32, i32) +DEF_HELPER_2(neon_shl_u16, i32, i32, i32) +DEF_HELPER_2(neon_shl_s16, i32, i32, i32) +DEF_HELPER_2(neon_shl_u32, i32, i32, i32) +DEF_HELPER_2(neon_shl_s32, i32, i32, i32) +DEF_HELPER_2(neon_shl_u64, i64, i64, i64) +DEF_HELPER_2(neon_shl_s64, i64, i64, i64) +DEF_HELPER_2(neon_rshl_u8, i32, i32, i32) +DEF_HELPER_2(neon_rshl_s8, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u16, i32, i32, i32) +DEF_HELPER_2(neon_rshl_s16, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u32, i32, i32, i32) +DEF_HELPER_2(neon_rshl_s32, i32, i32, i32) +DEF_HELPER_2(neon_rshl_u64, i64, i64, i64) +DEF_HELPER_2(neon_rshl_s64, i64, i64, i64) +DEF_HELPER_3(neon_qshl_u8, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_u16, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_u32, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qshl_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qshl_s64, i64, env, i64, i64) +DEF_HELPER_3(neon_qshlu_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qshlu_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qshlu_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qshlu_s64, i64, env, i64, i64) +DEF_HELPER_3(neon_qrshl_u8, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_s8, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_u16, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64) + +DEF_HELPER_2(neon_add_u8, i32, i32, i32) +DEF_HELPER_2(neon_add_u16, i32, i32, i32) +DEF_HELPER_2(neon_padd_u8, i32, i32, i32) +DEF_HELPER_2(neon_padd_u16, i32, i32, i32) +DEF_HELPER_2(neon_sub_u8, i32, i32, i32) +DEF_HELPER_2(neon_sub_u16, i32, i32, i32) +DEF_HELPER_2(neon_mul_u8, i32, i32, i32) +DEF_HELPER_2(neon_mul_u16, i32, i32, i32) +DEF_HELPER_2(neon_mul_p8, i32, i32, i32) +DEF_HELPER_2(neon_mull_p8, i64, i32, i32) + +DEF_HELPER_2(neon_tst_u8, i32, i32, i32) +DEF_HELPER_2(neon_tst_u16, i32, i32, i32) +DEF_HELPER_2(neon_tst_u32, i32, i32, i32) +DEF_HELPER_2(neon_ceq_u8, i32, i32, i32) +DEF_HELPER_2(neon_ceq_u16, i32, i32, i32) +DEF_HELPER_2(neon_ceq_u32, i32, i32, i32) + +DEF_HELPER_1(neon_abs_s8, i32, i32) +DEF_HELPER_1(neon_abs_s16, i32, i32) +DEF_HELPER_1(neon_clz_u8, i32, i32) +DEF_HELPER_1(neon_clz_u16, i32, i32) +DEF_HELPER_1(neon_cls_s8, i32, i32) +DEF_HELPER_1(neon_cls_s16, i32, i32) +DEF_HELPER_1(neon_cls_s32, i32, i32) +DEF_HELPER_1(neon_cnt_u8, i32, i32) +DEF_HELPER_FLAGS_1(neon_rbit_u8, TCG_CALL_NO_RWG_SE, i32, i32) + +DEF_HELPER_3(neon_qdmulh_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qrdmulh_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qdmulh_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qrdmulh_s32, i32, env, i32, i32) + +DEF_HELPER_1(neon_narrow_u8, i32, i64) +DEF_HELPER_1(neon_narrow_u16, i32, i64) +DEF_HELPER_2(neon_unarrow_sat8, i32, env, i64) +DEF_HELPER_2(neon_narrow_sat_u8, i32, env, i64) +DEF_HELPER_2(neon_narrow_sat_s8, i32, env, i64) +DEF_HELPER_2(neon_unarrow_sat16, i32, env, i64) +DEF_HELPER_2(neon_narrow_sat_u16, i32, env, i64) +DEF_HELPER_2(neon_narrow_sat_s16, i32, env, i64) +DEF_HELPER_2(neon_unarrow_sat32, i32, env, i64) +DEF_HELPER_2(neon_narrow_sat_u32, i32, env, i64) +DEF_HELPER_2(neon_narrow_sat_s32, i32, env, i64) +DEF_HELPER_1(neon_narrow_high_u8, i32, i64) +DEF_HELPER_1(neon_narrow_high_u16, i32, i64) +DEF_HELPER_1(neon_narrow_round_high_u8, i32, i64) +DEF_HELPER_1(neon_narrow_round_high_u16, i32, i64) +DEF_HELPER_1(neon_widen_u8, i64, i32) +DEF_HELPER_1(neon_widen_s8, i64, i32) +DEF_HELPER_1(neon_widen_u16, i64, i32) +DEF_HELPER_1(neon_widen_s16, i64, i32) + +DEF_HELPER_2(neon_addl_u16, i64, i64, i64) +DEF_HELPER_2(neon_addl_u32, i64, i64, i64) +DEF_HELPER_2(neon_paddl_u16, i64, i64, i64) +DEF_HELPER_2(neon_paddl_u32, i64, i64, i64) +DEF_HELPER_2(neon_subl_u16, i64, i64, i64) +DEF_HELPER_2(neon_subl_u32, i64, i64, i64) +DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64) +DEF_HELPER_3(neon_addl_saturate_s64, i64, env, i64, i64) +DEF_HELPER_2(neon_abdl_u16, i64, i32, i32) +DEF_HELPER_2(neon_abdl_s16, i64, i32, i32) +DEF_HELPER_2(neon_abdl_u32, i64, i32, i32) +DEF_HELPER_2(neon_abdl_s32, i64, i32, i32) +DEF_HELPER_2(neon_abdl_u64, i64, i32, i32) +DEF_HELPER_2(neon_abdl_s64, i64, i32, i32) +DEF_HELPER_2(neon_mull_u8, i64, i32, i32) +DEF_HELPER_2(neon_mull_s8, i64, i32, i32) +DEF_HELPER_2(neon_mull_u16, i64, i32, i32) +DEF_HELPER_2(neon_mull_s16, i64, i32, i32) + +DEF_HELPER_1(neon_negl_u16, i64, i64) +DEF_HELPER_1(neon_negl_u32, i64, i64) + +DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64) + +DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr) +DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr) +DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr) +DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr) +DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr) +DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr) +DEF_HELPER_3(neon_acge_f64, i64, i64, i64, ptr) +DEF_HELPER_3(neon_acgt_f64, i64, i64, i64, ptr) + +/* iwmmxt_helper.c */ +DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) +DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64) +DEF_HELPER_2(iwmmxt_sadb, i64, i64, i64) +DEF_HELPER_2(iwmmxt_sadw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_mulslw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_mulshw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_mululw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_muluhw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_macsw, i64, i64, i64) +DEF_HELPER_2(iwmmxt_macuw, i64, i64, i64) +DEF_HELPER_1(iwmmxt_setpsr_nz, i32, i64) + +#define DEF_IWMMXT_HELPER_SIZE_ENV(name) \ +DEF_HELPER_3(iwmmxt_##name##b, i64, env, i64, i64) \ +DEF_HELPER_3(iwmmxt_##name##w, i64, env, i64, i64) \ +DEF_HELPER_3(iwmmxt_##name##l, i64, env, i64, i64) \ + +DEF_IWMMXT_HELPER_SIZE_ENV(unpackl) +DEF_IWMMXT_HELPER_SIZE_ENV(unpackh) + +DEF_HELPER_2(iwmmxt_unpacklub, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackluw, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpacklul, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhub, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhuw, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhul, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpacklsb, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpacklsw, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpacklsl, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhsb, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhsw, i64, env, i64) +DEF_HELPER_2(iwmmxt_unpackhsl, i64, env, i64) + +DEF_IWMMXT_HELPER_SIZE_ENV(cmpeq) +DEF_IWMMXT_HELPER_SIZE_ENV(cmpgtu) +DEF_IWMMXT_HELPER_SIZE_ENV(cmpgts) + +DEF_IWMMXT_HELPER_SIZE_ENV(mins) +DEF_IWMMXT_HELPER_SIZE_ENV(minu) +DEF_IWMMXT_HELPER_SIZE_ENV(maxs) +DEF_IWMMXT_HELPER_SIZE_ENV(maxu) + +DEF_IWMMXT_HELPER_SIZE_ENV(subn) +DEF_IWMMXT_HELPER_SIZE_ENV(addn) +DEF_IWMMXT_HELPER_SIZE_ENV(subu) +DEF_IWMMXT_HELPER_SIZE_ENV(addu) +DEF_IWMMXT_HELPER_SIZE_ENV(subs) +DEF_IWMMXT_HELPER_SIZE_ENV(adds) + +DEF_HELPER_3(iwmmxt_avgb0, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64) + +DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32) +DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32) + +DEF_HELPER_1(iwmmxt_bcstb, i64, i32) +DEF_HELPER_1(iwmmxt_bcstw, i64, i32) +DEF_HELPER_1(iwmmxt_bcstl, i64, i32) + +DEF_HELPER_1(iwmmxt_addcb, i64, i64) +DEF_HELPER_1(iwmmxt_addcw, i64, i64) +DEF_HELPER_1(iwmmxt_addcl, i64, i64) + +DEF_HELPER_1(iwmmxt_msbb, i32, i64) +DEF_HELPER_1(iwmmxt_msbw, i32, i64) +DEF_HELPER_1(iwmmxt_msbl, i32, i64) + +DEF_HELPER_3(iwmmxt_srlw, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_srll, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_srlq, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sllw, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_slll, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sllq, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sraw, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sral, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_sraq, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_rorw, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_rorl, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_rorq, i64, env, i64, i32) +DEF_HELPER_3(iwmmxt_shufh, i64, env, i64, i32) + +DEF_HELPER_3(iwmmxt_packuw, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packul, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packuq, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packsw, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packsl, i64, env, i64, i64) +DEF_HELPER_3(iwmmxt_packsq, i64, env, i64, i64) + +DEF_HELPER_3(iwmmxt_muladdsl, i64, i64, i32, i32) +DEF_HELPER_3(iwmmxt_muladdsw, i64, i64, i32, i32) +DEF_HELPER_3(iwmmxt_muladdswl, i64, i64, i32, i32) + +DEF_HELPER_3(neon_unzip8, void, env, i32, i32) +DEF_HELPER_3(neon_unzip16, void, env, i32, i32) +DEF_HELPER_3(neon_qunzip8, void, env, i32, i32) +DEF_HELPER_3(neon_qunzip16, void, env, i32, i32) +DEF_HELPER_3(neon_qunzip32, void, env, i32, i32) +DEF_HELPER_3(neon_zip8, void, env, i32, i32) +DEF_HELPER_3(neon_zip16, void, env, i32, i32) +DEF_HELPER_3(neon_qzip8, void, env, i32, i32) +DEF_HELPER_3(neon_qzip16, void, env, i32, i32) +DEF_HELPER_3(neon_qzip32, void, env, i32, i32) + +DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32) +DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32) + +DEF_HELPER_5(crypto_sha1_3reg, void, env, i32, i32, i32, i32) +DEF_HELPER_3(crypto_sha1h, void, env, i32, i32) +DEF_HELPER_3(crypto_sha1su1, void, env, i32, i32) + +DEF_HELPER_4(crypto_sha256h, void, env, i32, i32, i32) +DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32) +DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32) +DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32) + +DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) +DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) +DEF_HELPER_2(dc_zva, void, env, i64) + +DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) + +#ifdef TARGET_AARCH64 +#include "helper-a64.h" +#endif diff --git a/target-arm/internals.h b/target-arm/internals.h new file mode 100644 index 00000000..924aff9d --- /dev/null +++ b/target-arm/internals.h @@ -0,0 +1,394 @@ +/* + * QEMU ARM CPU -- internal functions and types + * + * Copyright (c) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + * + * This header defines functions, types, etc which need to be shared + * between different source files within target-arm/ but which are + * private to it and not required by the rest of QEMU. + */ + +#ifndef TARGET_ARM_INTERNALS_H +#define TARGET_ARM_INTERNALS_H + +static inline bool excp_is_internal(int excp) +{ + /* Return true if this exception number represents a QEMU-internal + * exception that will not be passed to the guest. + */ + return excp == EXCP_INTERRUPT + || excp == EXCP_HLT + || excp == EXCP_DEBUG + || excp == EXCP_HALTED + || excp == EXCP_EXCEPTION_EXIT + || excp == EXCP_KERNEL_TRAP + || excp == EXCP_STREX; +} + +/* Exception names for debug logging; note that not all of these + * precisely correspond to architectural exceptions. + */ +static const char * const excnames[] = { + [EXCP_UDEF] = "Undefined Instruction", + [EXCP_SWI] = "SVC", + [EXCP_PREFETCH_ABORT] = "Prefetch Abort", + [EXCP_DATA_ABORT] = "Data Abort", + [EXCP_IRQ] = "IRQ", + [EXCP_FIQ] = "FIQ", + [EXCP_BKPT] = "Breakpoint", + [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit", + [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage", + [EXCP_STREX] = "QEMU intercept of STREX", + [EXCP_HVC] = "Hypervisor Call", + [EXCP_HYP_TRAP] = "Hypervisor Trap", + [EXCP_SMC] = "Secure Monitor Call", + [EXCP_VIRQ] = "Virtual IRQ", + [EXCP_VFIQ] = "Virtual FIQ", +}; + +static inline void arm_log_exception(int idx) +{ + if (qemu_loglevel_mask(CPU_LOG_INT)) { + const char *exc = NULL; + + if (idx >= 0 && idx < ARRAY_SIZE(excnames)) { + exc = excnames[idx]; + } + if (!exc) { + exc = "unknown"; + } + qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s]\n", idx, exc); + } +} + +/* Scale factor for generic timers, ie number of ns per tick. + * This gives a 62.5MHz timer. + */ +#define GTIMER_SCALE 16 + +/* + * For AArch64, map a given EL to an index in the banked_spsr array. + * Note that this mapping and the AArch32 mapping defined in bank_number() + * must agree such that the AArch64<->AArch32 SPSRs have the architecturally + * mandated mapping between each other. + */ +static inline unsigned int aarch64_banked_spsr_index(unsigned int el) +{ + static const unsigned int map[4] = { + [1] = 1, /* EL1. */ + [2] = 6, /* EL2. */ + [3] = 7, /* EL3. */ + }; + assert(el >= 1 && el <= 3); + return map[el]; +} + +int bank_number(int mode); +void switch_mode(CPUARMState *, int); +void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); +void arm_translate_init(void); + +enum arm_fprounding { + FPROUNDING_TIEEVEN, + FPROUNDING_POSINF, + FPROUNDING_NEGINF, + FPROUNDING_ZERO, + FPROUNDING_TIEAWAY, + FPROUNDING_ODD +}; + +int arm_rmode_to_sf(int rmode); + +static inline void aarch64_save_sp(CPUARMState *env, int el) +{ + if (env->pstate & PSTATE_SP) { + env->sp_el[el] = env->xregs[31]; + } else { + env->sp_el[0] = env->xregs[31]; + } +} + +static inline void aarch64_restore_sp(CPUARMState *env, int el) +{ + if (env->pstate & PSTATE_SP) { + env->xregs[31] = env->sp_el[el]; + } else { + env->xregs[31] = env->sp_el[0]; + } +} + +static inline void update_spsel(CPUARMState *env, uint32_t imm) +{ + unsigned int cur_el = arm_current_el(env); + /* Update PSTATE SPSel bit; this requires us to update the + * working stack pointer in xregs[31]. + */ + if (!((imm ^ env->pstate) & PSTATE_SP)) { + return; + } + aarch64_save_sp(env, cur_el); + env->pstate = deposit32(env->pstate, 0, 1, imm); + + /* We rely on illegal updates to SPsel from EL0 to get trapped + * at translation time. + */ + assert(cur_el >= 1 && cur_el <= 3); + aarch64_restore_sp(env, cur_el); +} + +/* Return true if extended addresses are enabled. + * This is always the case if our translation regime is 64 bit, + * but depends on TTBCR.EAE for 32 bit. + */ +static inline bool extended_addresses_enabled(CPUARMState *env) +{ + TCR *tcr = &env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1]; + return arm_el_is_aa64(env, 1) || + (arm_feature(env, ARM_FEATURE_LPAE) && (tcr->raw_tcr & TTBCR_EAE)); +} + +/* Valid Syndrome Register EC field values */ +enum arm_exception_class { + EC_UNCATEGORIZED = 0x00, + EC_WFX_TRAP = 0x01, + EC_CP15RTTRAP = 0x03, + EC_CP15RRTTRAP = 0x04, + EC_CP14RTTRAP = 0x05, + EC_CP14DTTRAP = 0x06, + EC_ADVSIMDFPACCESSTRAP = 0x07, + EC_FPIDTRAP = 0x08, + EC_CP14RRTTRAP = 0x0c, + EC_ILLEGALSTATE = 0x0e, + EC_AA32_SVC = 0x11, + EC_AA32_HVC = 0x12, + EC_AA32_SMC = 0x13, + EC_AA64_SVC = 0x15, + EC_AA64_HVC = 0x16, + EC_AA64_SMC = 0x17, + EC_SYSTEMREGISTERTRAP = 0x18, + EC_INSNABORT = 0x20, + EC_INSNABORT_SAME_EL = 0x21, + EC_PCALIGNMENT = 0x22, + EC_DATAABORT = 0x24, + EC_DATAABORT_SAME_EL = 0x25, + EC_SPALIGNMENT = 0x26, + EC_AA32_FPTRAP = 0x28, + EC_AA64_FPTRAP = 0x2c, + EC_SERROR = 0x2f, + EC_BREAKPOINT = 0x30, + EC_BREAKPOINT_SAME_EL = 0x31, + EC_SOFTWARESTEP = 0x32, + EC_SOFTWARESTEP_SAME_EL = 0x33, + EC_WATCHPOINT = 0x34, + EC_WATCHPOINT_SAME_EL = 0x35, + EC_AA32_BKPT = 0x38, + EC_VECTORCATCH = 0x3a, + EC_AA64_BKPT = 0x3c, +}; + +#define ARM_EL_EC_SHIFT 26 +#define ARM_EL_IL_SHIFT 25 +#define ARM_EL_IL (1 << ARM_EL_IL_SHIFT) + +/* Utility functions for constructing various kinds of syndrome value. + * Note that in general we follow the AArch64 syndrome values; in a + * few cases the value in HSR for exceptions taken to AArch32 Hyp + * mode differs slightly, so if we ever implemented Hyp mode then the + * syndrome value would need some massaging on exception entry. + * (One example of this is that AArch64 defaults to IL bit set for + * exceptions which don't specifically indicate information about the + * trapping instruction, whereas AArch32 defaults to IL bit clear.) + */ +static inline uint32_t syn_uncategorized(void) +{ + return (EC_UNCATEGORIZED << ARM_EL_EC_SHIFT) | ARM_EL_IL; +} + +static inline uint32_t syn_aa64_svc(uint32_t imm16) +{ + return (EC_AA64_SVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff); +} + +static inline uint32_t syn_aa64_hvc(uint32_t imm16) +{ + return (EC_AA64_HVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff); +} + +static inline uint32_t syn_aa64_smc(uint32_t imm16) +{ + return (EC_AA64_SMC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff); +} + +static inline uint32_t syn_aa32_svc(uint32_t imm16, bool is_thumb) +{ + return (EC_AA32_SVC << ARM_EL_EC_SHIFT) | (imm16 & 0xffff) + | (is_thumb ? 0 : ARM_EL_IL); +} + +static inline uint32_t syn_aa32_hvc(uint32_t imm16) +{ + return (EC_AA32_HVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff); +} + +static inline uint32_t syn_aa32_smc(void) +{ + return (EC_AA32_SMC << ARM_EL_EC_SHIFT) | ARM_EL_IL; +} + +static inline uint32_t syn_aa64_bkpt(uint32_t imm16) +{ + return (EC_AA64_BKPT << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff); +} + +static inline uint32_t syn_aa32_bkpt(uint32_t imm16, bool is_thumb) +{ + return (EC_AA32_BKPT << ARM_EL_EC_SHIFT) | (imm16 & 0xffff) + | (is_thumb ? 0 : ARM_EL_IL); +} + +static inline uint32_t syn_aa64_sysregtrap(int op0, int op1, int op2, + int crn, int crm, int rt, + int isread) +{ + return (EC_SYSTEMREGISTERTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL + | (op0 << 20) | (op2 << 17) | (op1 << 14) | (crn << 10) | (rt << 5) + | (crm << 1) | isread; +} + +static inline uint32_t syn_cp14_rt_trap(int cv, int cond, int opc1, int opc2, + int crn, int crm, int rt, int isread, + bool is_thumb) +{ + return (EC_CP14RTTRAP << ARM_EL_EC_SHIFT) + | (is_thumb ? 0 : ARM_EL_IL) + | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14) + | (crn << 10) | (rt << 5) | (crm << 1) | isread; +} + +static inline uint32_t syn_cp15_rt_trap(int cv, int cond, int opc1, int opc2, + int crn, int crm, int rt, int isread, + bool is_thumb) +{ + return (EC_CP15RTTRAP << ARM_EL_EC_SHIFT) + | (is_thumb ? 0 : ARM_EL_IL) + | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14) + | (crn << 10) | (rt << 5) | (crm << 1) | isread; +} + +static inline uint32_t syn_cp14_rrt_trap(int cv, int cond, int opc1, int crm, + int rt, int rt2, int isread, + bool is_thumb) +{ + return (EC_CP14RRTTRAP << ARM_EL_EC_SHIFT) + | (is_thumb ? 0 : ARM_EL_IL) + | (cv << 24) | (cond << 20) | (opc1 << 16) + | (rt2 << 10) | (rt << 5) | (crm << 1) | isread; +} + +static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm, + int rt, int rt2, int isread, + bool is_thumb) +{ + return (EC_CP15RRTTRAP << ARM_EL_EC_SHIFT) + | (is_thumb ? 0 : ARM_EL_IL) + | (cv << 24) | (cond << 20) | (opc1 << 16) + | (rt2 << 10) | (rt << 5) | (crm << 1) | isread; +} + +static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_thumb) +{ + return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) + | (is_thumb ? 0 : ARM_EL_IL) + | (cv << 24) | (cond << 20); +} + +static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc) +{ + return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) + | (ea << 9) | (s1ptw << 7) | fsc; +} + +static inline uint32_t syn_data_abort(int same_el, int ea, int cm, int s1ptw, + int wnr, int fsc) +{ + return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) + | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc; +} + +static inline uint32_t syn_swstep(int same_el, int isv, int ex) +{ + return (EC_SOFTWARESTEP << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) + | (isv << 24) | (ex << 6) | 0x22; +} + +static inline uint32_t syn_watchpoint(int same_el, int cm, int wnr) +{ + return (EC_WATCHPOINT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) + | (cm << 8) | (wnr << 6) | 0x22; +} + +static inline uint32_t syn_breakpoint(int same_el) +{ + return (EC_BREAKPOINT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) + | ARM_EL_IL | 0x22; +} + +static inline uint32_t syn_wfx(int cv, int cond, int ti) +{ + return (EC_WFX_TRAP << ARM_EL_EC_SHIFT) | + (cv << 24) | (cond << 20) | ti; +} + +/* Update a QEMU watchpoint based on the information the guest has set in the + * DBGWCR_EL1 and DBGWVR_EL1 registers. + */ +void hw_watchpoint_update(ARMCPU *cpu, int n); +/* Update the QEMU watchpoints for every guest watchpoint. This does a + * complete delete-and-reinstate of the QEMU watchpoint list and so is + * suitable for use after migration or on reset. + */ +void hw_watchpoint_update_all(ARMCPU *cpu); +/* Update a QEMU breakpoint based on the information the guest has set in the + * DBGBCR_EL1 and DBGBVR_EL1 registers. + */ +void hw_breakpoint_update(ARMCPU *cpu, int n); +/* Update the QEMU breakpoints for every guest breakpoint. This does a + * complete delete-and-reinstate of the QEMU breakpoint list and so is + * suitable for use after migration or on reset. + */ +void hw_breakpoint_update_all(ARMCPU *cpu); + +/* Callback function for when a watchpoint or breakpoint triggers. */ +void arm_debug_excp_handler(CPUState *cs); + +#ifdef CONFIG_USER_ONLY +static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type) +{ + return false; +} +#else +/* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */ +bool arm_is_psci_call(ARMCPU *cpu, int excp_type); +/* Actually handle a PSCI call */ +void arm_handle_psci_call(ARMCPU *cpu); +#endif + +/* Do a page table walk and add page to TLB if possible */ +bool arm_tlb_fill(CPUState *cpu, vaddr address, int rw, int mmu_idx, + uint32_t *fsr); + +#endif diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c new file mode 100644 index 00000000..a5069144 --- /dev/null +++ b/target-arm/iwmmxt_helper.c @@ -0,0 +1,672 @@ +/* + * iwMMXt micro operations for XScale. + * + * Copyright (c) 2007 OpenedHand, Ltd. + * Written by Andrzej Zaborowski + * Copyright (c) 2008 CodeSourcery + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include + +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" + +/* iwMMXt macros extracted from GNU gdb. */ + +/* Set the SIMD wCASF flags for 8, 16, 32 or 64-bit operations. */ +#define SIMD8_SET( v, n, b) ((v != 0) << ((((b) + 1) * 4) + (n))) +#define SIMD16_SET(v, n, h) ((v != 0) << ((((h) + 1) * 8) + (n))) +#define SIMD32_SET(v, n, w) ((v != 0) << ((((w) + 1) * 16) + (n))) +#define SIMD64_SET(v, n) ((v != 0) << (32 + (n))) +/* Flags to pass as "n" above. */ +#define SIMD_NBIT -1 +#define SIMD_ZBIT -2 +#define SIMD_CBIT -3 +#define SIMD_VBIT -4 +/* Various status bit macros. */ +#define NBIT8(x) ((x) & 0x80) +#define NBIT16(x) ((x) & 0x8000) +#define NBIT32(x) ((x) & 0x80000000) +#define NBIT64(x) ((x) & 0x8000000000000000ULL) +#define ZBIT8(x) (((x) & 0xff) == 0) +#define ZBIT16(x) (((x) & 0xffff) == 0) +#define ZBIT32(x) (((x) & 0xffffffff) == 0) +#define ZBIT64(x) (x == 0) +/* Sign extension macros. */ +#define EXTEND8H(a) ((uint16_t) (int8_t) (a)) +#define EXTEND8(a) ((uint32_t) (int8_t) (a)) +#define EXTEND16(a) ((uint32_t) (int16_t) (a)) +#define EXTEND16S(a) ((int32_t) (int16_t) (a)) +#define EXTEND32(a) ((uint64_t) (int32_t) (a)) + +uint64_t HELPER(iwmmxt_maddsq)(uint64_t a, uint64_t b) +{ + a = (( + EXTEND16S((a >> 0) & 0xffff) * EXTEND16S((b >> 0) & 0xffff) + + EXTEND16S((a >> 16) & 0xffff) * EXTEND16S((b >> 16) & 0xffff) + ) & 0xffffffff) | ((uint64_t) ( + EXTEND16S((a >> 32) & 0xffff) * EXTEND16S((b >> 32) & 0xffff) + + EXTEND16S((a >> 48) & 0xffff) * EXTEND16S((b >> 48) & 0xffff) + ) << 32); + return a; +} + +uint64_t HELPER(iwmmxt_madduq)(uint64_t a, uint64_t b) +{ + a = (( + ((a >> 0) & 0xffff) * ((b >> 0) & 0xffff) + + ((a >> 16) & 0xffff) * ((b >> 16) & 0xffff) + ) & 0xffffffff) | (( + ((a >> 32) & 0xffff) * ((b >> 32) & 0xffff) + + ((a >> 48) & 0xffff) * ((b >> 48) & 0xffff) + ) << 32); + return a; +} + +uint64_t HELPER(iwmmxt_sadb)(uint64_t a, uint64_t b) +{ +#define abs(x) (((x) >= 0) ? x : -x) +#define SADB(SHR) abs((int) ((a >> SHR) & 0xff) - (int) ((b >> SHR) & 0xff)) + return + SADB(0) + SADB(8) + SADB(16) + SADB(24) + + SADB(32) + SADB(40) + SADB(48) + SADB(56); +#undef SADB +} + +uint64_t HELPER(iwmmxt_sadw)(uint64_t a, uint64_t b) +{ +#define SADW(SHR) \ + abs((int) ((a >> SHR) & 0xffff) - (int) ((b >> SHR) & 0xffff)) + return SADW(0) + SADW(16) + SADW(32) + SADW(48); +#undef SADW +} + +uint64_t HELPER(iwmmxt_mulslw)(uint64_t a, uint64_t b) +{ +#define MULS(SHR) ((uint64_t) ((( \ + EXTEND16S((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff) \ + ) >> 0) & 0xffff) << SHR) + return MULS(0) | MULS(16) | MULS(32) | MULS(48); +#undef MULS +} + +uint64_t HELPER(iwmmxt_mulshw)(uint64_t a, uint64_t b) +{ +#define MULS(SHR) ((uint64_t) ((( \ + EXTEND16S((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff) \ + ) >> 16) & 0xffff) << SHR) + return MULS(0) | MULS(16) | MULS(32) | MULS(48); +#undef MULS +} + +uint64_t HELPER(iwmmxt_mululw)(uint64_t a, uint64_t b) +{ +#define MULU(SHR) ((uint64_t) ((( \ + ((a >> SHR) & 0xffff) * ((b >> SHR) & 0xffff) \ + ) >> 0) & 0xffff) << SHR) + return MULU(0) | MULU(16) | MULU(32) | MULU(48); +#undef MULU +} + +uint64_t HELPER(iwmmxt_muluhw)(uint64_t a, uint64_t b) +{ +#define MULU(SHR) ((uint64_t) ((( \ + ((a >> SHR) & 0xffff) * ((b >> SHR) & 0xffff) \ + ) >> 16) & 0xffff) << SHR) + return MULU(0) | MULU(16) | MULU(32) | MULU(48); +#undef MULU +} + +uint64_t HELPER(iwmmxt_macsw)(uint64_t a, uint64_t b) +{ +#define MACS(SHR) ( \ + EXTEND16((a >> SHR) & 0xffff) * EXTEND16S((b >> SHR) & 0xffff)) + return (int64_t) (MACS(0) + MACS(16) + MACS(32) + MACS(48)); +#undef MACS +} + +uint64_t HELPER(iwmmxt_macuw)(uint64_t a, uint64_t b) +{ +#define MACU(SHR) ( \ + (uint32_t) ((a >> SHR) & 0xffff) * \ + (uint32_t) ((b >> SHR) & 0xffff)) + return MACU(0) + MACU(16) + MACU(32) + MACU(48); +#undef MACU +} + +#define NZBIT8(x, i) \ + SIMD8_SET(NBIT8((x) & 0xff), SIMD_NBIT, i) | \ + SIMD8_SET(ZBIT8((x) & 0xff), SIMD_ZBIT, i) +#define NZBIT16(x, i) \ + SIMD16_SET(NBIT16((x) & 0xffff), SIMD_NBIT, i) | \ + SIMD16_SET(ZBIT16((x) & 0xffff), SIMD_ZBIT, i) +#define NZBIT32(x, i) \ + SIMD32_SET(NBIT32((x) & 0xffffffff), SIMD_NBIT, i) | \ + SIMD32_SET(ZBIT32((x) & 0xffffffff), SIMD_ZBIT, i) +#define NZBIT64(x) \ + SIMD64_SET(NBIT64(x), SIMD_NBIT) | \ + SIMD64_SET(ZBIT64(x), SIMD_ZBIT) +#define IWMMXT_OP_UNPACK(S, SH0, SH1, SH2, SH3) \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, b)))(CPUARMState *env, \ + uint64_t a, uint64_t b) \ +{ \ + a = \ + (((a >> SH0) & 0xff) << 0) | (((b >> SH0) & 0xff) << 8) | \ + (((a >> SH1) & 0xff) << 16) | (((b >> SH1) & 0xff) << 24) | \ + (((a >> SH2) & 0xff) << 32) | (((b >> SH2) & 0xff) << 40) | \ + (((a >> SH3) & 0xff) << 48) | (((b >> SH3) & 0xff) << 56); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | \ + NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | \ + NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | \ + NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \ + return a; \ +} \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, w)))(CPUARMState *env, \ + uint64_t a, uint64_t b) \ +{ \ + a = \ + (((a >> SH0) & 0xffff) << 0) | \ + (((b >> SH0) & 0xffff) << 16) | \ + (((a >> SH2) & 0xffff) << 32) | \ + (((b >> SH2) & 0xffff) << 48); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT8(a >> 0, 0) | NZBIT8(a >> 16, 1) | \ + NZBIT8(a >> 32, 2) | NZBIT8(a >> 48, 3); \ + return a; \ +} \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, l)))(CPUARMState *env, \ + uint64_t a, uint64_t b) \ +{ \ + a = \ + (((a >> SH0) & 0xffffffff) << 0) | \ + (((b >> SH0) & 0xffffffff) << 32); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); \ + return a; \ +} \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ub)))(CPUARMState *env, \ + uint64_t x) \ +{ \ + x = \ + (((x >> SH0) & 0xff) << 0) | \ + (((x >> SH1) & 0xff) << 16) | \ + (((x >> SH2) & 0xff) << 32) | \ + (((x >> SH3) & 0xff) << 48); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | \ + NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \ + return x; \ +} \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, uw)))(CPUARMState *env, \ + uint64_t x) \ +{ \ + x = \ + (((x >> SH0) & 0xffff) << 0) | \ + (((x >> SH2) & 0xffff) << 32); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \ + return x; \ +} \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, ul)))(CPUARMState *env, \ + uint64_t x) \ +{ \ + x = (((x >> SH0) & 0xffffffff) << 0); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \ + return x; \ +} \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sb)))(CPUARMState *env, \ + uint64_t x) \ +{ \ + x = \ + ((uint64_t) EXTEND8H((x >> SH0) & 0xff) << 0) | \ + ((uint64_t) EXTEND8H((x >> SH1) & 0xff) << 16) | \ + ((uint64_t) EXTEND8H((x >> SH2) & 0xff) << 32) | \ + ((uint64_t) EXTEND8H((x >> SH3) & 0xff) << 48); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | \ + NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); \ + return x; \ +} \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sw)))(CPUARMState *env, \ + uint64_t x) \ +{ \ + x = \ + ((uint64_t) EXTEND16((x >> SH0) & 0xffff) << 0) | \ + ((uint64_t) EXTEND16((x >> SH2) & 0xffff) << 32); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); \ + return x; \ +} \ +uint64_t HELPER(glue(iwmmxt_unpack, glue(S, sl)))(CPUARMState *env, \ + uint64_t x) \ +{ \ + x = EXTEND32((x >> SH0) & 0xffffffff); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x >> 0); \ + return x; \ +} +IWMMXT_OP_UNPACK(l, 0, 8, 16, 24) +IWMMXT_OP_UNPACK(h, 32, 40, 48, 56) + +#define IWMMXT_OP_CMP(SUFF, Tb, Tw, Tl, O) \ +uint64_t HELPER(glue(iwmmxt_, glue(SUFF, b)))(CPUARMState *env, \ + uint64_t a, uint64_t b) \ +{ \ + a = \ + CMP(0, Tb, O, 0xff) | CMP(8, Tb, O, 0xff) | \ + CMP(16, Tb, O, 0xff) | CMP(24, Tb, O, 0xff) | \ + CMP(32, Tb, O, 0xff) | CMP(40, Tb, O, 0xff) | \ + CMP(48, Tb, O, 0xff) | CMP(56, Tb, O, 0xff); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | \ + NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | \ + NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | \ + NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); \ + return a; \ +} \ +uint64_t HELPER(glue(iwmmxt_, glue(SUFF, w)))(CPUARMState *env, \ + uint64_t a, uint64_t b) \ +{ \ + a = CMP(0, Tw, O, 0xffff) | CMP(16, Tw, O, 0xffff) | \ + CMP(32, Tw, O, 0xffff) | CMP(48, Tw, O, 0xffff); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) | \ + NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); \ + return a; \ +} \ +uint64_t HELPER(glue(iwmmxt_, glue(SUFF, l)))(CPUARMState *env, \ + uint64_t a, uint64_t b) \ +{ \ + a = CMP(0, Tl, O, 0xffffffff) | \ + CMP(32, Tl, O, 0xffffffff); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); \ + return a; \ +} +#define CMP(SHR, TYPE, OPER, MASK) ((((TYPE) ((a >> SHR) & MASK) OPER \ + (TYPE) ((b >> SHR) & MASK)) ? (uint64_t) MASK : 0) << SHR) +IWMMXT_OP_CMP(cmpeq, uint8_t, uint16_t, uint32_t, ==) +IWMMXT_OP_CMP(cmpgts, int8_t, int16_t, int32_t, >) +IWMMXT_OP_CMP(cmpgtu, uint8_t, uint16_t, uint32_t, >) +#undef CMP +#define CMP(SHR, TYPE, OPER, MASK) ((((TYPE) ((a >> SHR) & MASK) OPER \ + (TYPE) ((b >> SHR) & MASK)) ? a : b) & ((uint64_t) MASK << SHR)) +IWMMXT_OP_CMP(mins, int8_t, int16_t, int32_t, <) +IWMMXT_OP_CMP(minu, uint8_t, uint16_t, uint32_t, <) +IWMMXT_OP_CMP(maxs, int8_t, int16_t, int32_t, >) +IWMMXT_OP_CMP(maxu, uint8_t, uint16_t, uint32_t, >) +#undef CMP +#define CMP(SHR, TYPE, OPER, MASK) ((uint64_t) (((TYPE) ((a >> SHR) & MASK) \ + OPER (TYPE) ((b >> SHR) & MASK)) & MASK) << SHR) +IWMMXT_OP_CMP(subn, uint8_t, uint16_t, uint32_t, -) +IWMMXT_OP_CMP(addn, uint8_t, uint16_t, uint32_t, +) +#undef CMP +/* TODO Signed- and Unsigned-Saturation */ +#define CMP(SHR, TYPE, OPER, MASK) ((uint64_t) (((TYPE) ((a >> SHR) & MASK) \ + OPER (TYPE) ((b >> SHR) & MASK)) & MASK) << SHR) +IWMMXT_OP_CMP(subu, uint8_t, uint16_t, uint32_t, -) +IWMMXT_OP_CMP(addu, uint8_t, uint16_t, uint32_t, +) +IWMMXT_OP_CMP(subs, int8_t, int16_t, int32_t, -) +IWMMXT_OP_CMP(adds, int8_t, int16_t, int32_t, +) +#undef CMP +#undef IWMMXT_OP_CMP + +#define AVGB(SHR) ((( \ + ((a >> SHR) & 0xff) + ((b >> SHR) & 0xff) + round) >> 1) << SHR) +#define IWMMXT_OP_AVGB(r) \ +uint64_t HELPER(iwmmxt_avgb##r)(CPUARMState *env, uint64_t a, uint64_t b) \ +{ \ + const int round = r; \ + a = AVGB(0) | AVGB(8) | AVGB(16) | AVGB(24) | \ + AVGB(32) | AVGB(40) | AVGB(48) | AVGB(56); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + SIMD8_SET(ZBIT8((a >> 0) & 0xff), SIMD_ZBIT, 0) | \ + SIMD8_SET(ZBIT8((a >> 8) & 0xff), SIMD_ZBIT, 1) | \ + SIMD8_SET(ZBIT8((a >> 16) & 0xff), SIMD_ZBIT, 2) | \ + SIMD8_SET(ZBIT8((a >> 24) & 0xff), SIMD_ZBIT, 3) | \ + SIMD8_SET(ZBIT8((a >> 32) & 0xff), SIMD_ZBIT, 4) | \ + SIMD8_SET(ZBIT8((a >> 40) & 0xff), SIMD_ZBIT, 5) | \ + SIMD8_SET(ZBIT8((a >> 48) & 0xff), SIMD_ZBIT, 6) | \ + SIMD8_SET(ZBIT8((a >> 56) & 0xff), SIMD_ZBIT, 7); \ + return a; \ +} +IWMMXT_OP_AVGB(0) +IWMMXT_OP_AVGB(1) +#undef IWMMXT_OP_AVGB +#undef AVGB + +#define AVGW(SHR) ((( \ + ((a >> SHR) & 0xffff) + ((b >> SHR) & 0xffff) + round) >> 1) << SHR) +#define IWMMXT_OP_AVGW(r) \ +uint64_t HELPER(iwmmxt_avgw##r)(CPUARMState *env, uint64_t a, uint64_t b) \ +{ \ + const int round = r; \ + a = AVGW(0) | AVGW(16) | AVGW(32) | AVGW(48); \ + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = \ + SIMD16_SET(ZBIT16((a >> 0) & 0xffff), SIMD_ZBIT, 0) | \ + SIMD16_SET(ZBIT16((a >> 16) & 0xffff), SIMD_ZBIT, 1) | \ + SIMD16_SET(ZBIT16((a >> 32) & 0xffff), SIMD_ZBIT, 2) | \ + SIMD16_SET(ZBIT16((a >> 48) & 0xffff), SIMD_ZBIT, 3); \ + return a; \ +} +IWMMXT_OP_AVGW(0) +IWMMXT_OP_AVGW(1) +#undef IWMMXT_OP_AVGW +#undef AVGW + +uint64_t HELPER(iwmmxt_align)(uint64_t a, uint64_t b, uint32_t n) +{ + a >>= n << 3; + a |= b << (64 - (n << 3)); + return a; +} + +uint64_t HELPER(iwmmxt_insr)(uint64_t x, uint32_t a, uint32_t b, uint32_t n) +{ + x &= ~((uint64_t) b << n); + x |= (uint64_t) (a & b) << n; + return x; +} + +uint32_t HELPER(iwmmxt_setpsr_nz)(uint64_t x) +{ + return SIMD64_SET((x == 0), SIMD_ZBIT) | + SIMD64_SET((x & (1ULL << 63)), SIMD_NBIT); +} + +uint64_t HELPER(iwmmxt_bcstb)(uint32_t arg) +{ + arg &= 0xff; + return + ((uint64_t) arg << 0 ) | ((uint64_t) arg << 8 ) | + ((uint64_t) arg << 16) | ((uint64_t) arg << 24) | + ((uint64_t) arg << 32) | ((uint64_t) arg << 40) | + ((uint64_t) arg << 48) | ((uint64_t) arg << 56); +} + +uint64_t HELPER(iwmmxt_bcstw)(uint32_t arg) +{ + arg &= 0xffff; + return + ((uint64_t) arg << 0 ) | ((uint64_t) arg << 16) | + ((uint64_t) arg << 32) | ((uint64_t) arg << 48); +} + +uint64_t HELPER(iwmmxt_bcstl)(uint32_t arg) +{ + return arg | ((uint64_t) arg << 32); +} + +uint64_t HELPER(iwmmxt_addcb)(uint64_t x) +{ + return + ((x >> 0) & 0xff) + ((x >> 8) & 0xff) + + ((x >> 16) & 0xff) + ((x >> 24) & 0xff) + + ((x >> 32) & 0xff) + ((x >> 40) & 0xff) + + ((x >> 48) & 0xff) + ((x >> 56) & 0xff); +} + +uint64_t HELPER(iwmmxt_addcw)(uint64_t x) +{ + return + ((x >> 0) & 0xffff) + ((x >> 16) & 0xffff) + + ((x >> 32) & 0xffff) + ((x >> 48) & 0xffff); +} + +uint64_t HELPER(iwmmxt_addcl)(uint64_t x) +{ + return (x & 0xffffffff) + (x >> 32); +} + +uint32_t HELPER(iwmmxt_msbb)(uint64_t x) +{ + return + ((x >> 7) & 0x01) | ((x >> 14) & 0x02) | + ((x >> 21) & 0x04) | ((x >> 28) & 0x08) | + ((x >> 35) & 0x10) | ((x >> 42) & 0x20) | + ((x >> 49) & 0x40) | ((x >> 56) & 0x80); +} + +uint32_t HELPER(iwmmxt_msbw)(uint64_t x) +{ + return + ((x >> 15) & 0x01) | ((x >> 30) & 0x02) | + ((x >> 45) & 0x04) | ((x >> 52) & 0x08); +} + +uint32_t HELPER(iwmmxt_msbl)(uint64_t x) +{ + return ((x >> 31) & 0x01) | ((x >> 62) & 0x02); +} + +/* FIXME: Split wCASF setting into a separate op to avoid env use. */ +uint64_t HELPER(iwmmxt_srlw)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = (((x & (0xffffll << 0)) >> n) & (0xffffll << 0)) | + (((x & (0xffffll << 16)) >> n) & (0xffffll << 16)) | + (((x & (0xffffll << 32)) >> n) & (0xffffll << 32)) | + (((x & (0xffffll << 48)) >> n) & (0xffffll << 48)); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | + NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); + return x; +} + +uint64_t HELPER(iwmmxt_srll)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = ((x & (0xffffffffll << 0)) >> n) | + ((x >> n) & (0xffffffffll << 32)); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); + return x; +} + +uint64_t HELPER(iwmmxt_srlq)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x >>= n; + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); + return x; +} + +uint64_t HELPER(iwmmxt_sllw)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = (((x & (0xffffll << 0)) << n) & (0xffffll << 0)) | + (((x & (0xffffll << 16)) << n) & (0xffffll << 16)) | + (((x & (0xffffll << 32)) << n) & (0xffffll << 32)) | + (((x & (0xffffll << 48)) << n) & (0xffffll << 48)); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | + NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); + return x; +} + +uint64_t HELPER(iwmmxt_slll)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = ((x << n) & (0xffffffffll << 0)) | + ((x & (0xffffffffll << 32)) << n); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); + return x; +} + +uint64_t HELPER(iwmmxt_sllq)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x <<= n; + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); + return x; +} + +uint64_t HELPER(iwmmxt_sraw)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = ((uint64_t) ((EXTEND16(x >> 0) >> n) & 0xffff) << 0) | + ((uint64_t) ((EXTEND16(x >> 16) >> n) & 0xffff) << 16) | + ((uint64_t) ((EXTEND16(x >> 32) >> n) & 0xffff) << 32) | + ((uint64_t) ((EXTEND16(x >> 48) >> n) & 0xffff) << 48); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | + NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); + return x; +} + +uint64_t HELPER(iwmmxt_sral)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = (((EXTEND32(x >> 0) >> n) & 0xffffffff) << 0) | + (((EXTEND32(x >> 32) >> n) & 0xffffffff) << 32); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); + return x; +} + +uint64_t HELPER(iwmmxt_sraq)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = (int64_t) x >> n; + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); + return x; +} + +uint64_t HELPER(iwmmxt_rorw)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = ((((x & (0xffffll << 0)) >> n) | + ((x & (0xffffll << 0)) << (16 - n))) & (0xffffll << 0)) | + ((((x & (0xffffll << 16)) >> n) | + ((x & (0xffffll << 16)) << (16 - n))) & (0xffffll << 16)) | + ((((x & (0xffffll << 32)) >> n) | + ((x & (0xffffll << 32)) << (16 - n))) & (0xffffll << 32)) | + ((((x & (0xffffll << 48)) >> n) | + ((x & (0xffffll << 48)) << (16 - n))) & (0xffffll << 48)); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | + NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); + return x; +} + +uint64_t HELPER(iwmmxt_rorl)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = ((x & (0xffffffffll << 0)) >> n) | + ((x >> n) & (0xffffffffll << 32)) | + ((x << (32 - n)) & (0xffffffffll << 0)) | + ((x & (0xffffffffll << 32)) << (32 - n)); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT32(x >> 0, 0) | NZBIT32(x >> 32, 1); + return x; +} + +uint64_t HELPER(iwmmxt_rorq)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = ror64(x, n); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = NZBIT64(x); + return x; +} + +uint64_t HELPER(iwmmxt_shufh)(CPUARMState *env, uint64_t x, uint32_t n) +{ + x = (((x >> ((n << 4) & 0x30)) & 0xffff) << 0) | + (((x >> ((n << 2) & 0x30)) & 0xffff) << 16) | + (((x >> ((n << 0) & 0x30)) & 0xffff) << 32) | + (((x >> ((n >> 2) & 0x30)) & 0xffff) << 48); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT16(x >> 0, 0) | NZBIT16(x >> 16, 1) | + NZBIT16(x >> 32, 2) | NZBIT16(x >> 48, 3); + return x; +} + +/* TODO: Unsigned-Saturation */ +uint64_t HELPER(iwmmxt_packuw)(CPUARMState *env, uint64_t a, uint64_t b) +{ + a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) | + (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) | + (((b >> 0) & 0xff) << 32) | (((b >> 16) & 0xff) << 40) | + (((b >> 32) & 0xff) << 48) | (((b >> 48) & 0xff) << 56); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | + NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | + NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | + NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); + return a; +} + +uint64_t HELPER(iwmmxt_packul)(CPUARMState *env, uint64_t a, uint64_t b) +{ + a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) | + (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) | + NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); + return a; +} + +uint64_t HELPER(iwmmxt_packuq)(CPUARMState *env, uint64_t a, uint64_t b) +{ + a = (a & 0xffffffff) | ((b & 0xffffffff) << 32); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); + return a; +} + +/* TODO: Signed-Saturation */ +uint64_t HELPER(iwmmxt_packsw)(CPUARMState *env, uint64_t a, uint64_t b) +{ + a = (((a >> 0) & 0xff) << 0) | (((a >> 16) & 0xff) << 8) | + (((a >> 32) & 0xff) << 16) | (((a >> 48) & 0xff) << 24) | + (((b >> 0) & 0xff) << 32) | (((b >> 16) & 0xff) << 40) | + (((b >> 32) & 0xff) << 48) | (((b >> 48) & 0xff) << 56); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT8(a >> 0, 0) | NZBIT8(a >> 8, 1) | + NZBIT8(a >> 16, 2) | NZBIT8(a >> 24, 3) | + NZBIT8(a >> 32, 4) | NZBIT8(a >> 40, 5) | + NZBIT8(a >> 48, 6) | NZBIT8(a >> 56, 7); + return a; +} + +uint64_t HELPER(iwmmxt_packsl)(CPUARMState *env, uint64_t a, uint64_t b) +{ + a = (((a >> 0) & 0xffff) << 0) | (((a >> 32) & 0xffff) << 16) | + (((b >> 0) & 0xffff) << 32) | (((b >> 32) & 0xffff) << 48); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT16(a >> 0, 0) | NZBIT16(a >> 16, 1) | + NZBIT16(a >> 32, 2) | NZBIT16(a >> 48, 3); + return a; +} + +uint64_t HELPER(iwmmxt_packsq)(CPUARMState *env, uint64_t a, uint64_t b) +{ + a = (a & 0xffffffff) | ((b & 0xffffffff) << 32); + env->iwmmxt.cregs[ARM_IWMMXT_wCASF] = + NZBIT32(a >> 0, 0) | NZBIT32(a >> 32, 1); + return a; +} + +uint64_t HELPER(iwmmxt_muladdsl)(uint64_t c, uint32_t a, uint32_t b) +{ + return c + ((int32_t) EXTEND32(a) * (int32_t) EXTEND32(b)); +} + +uint64_t HELPER(iwmmxt_muladdsw)(uint64_t c, uint32_t a, uint32_t b) +{ + c += EXTEND32(EXTEND16S((a >> 0) & 0xffff) * + EXTEND16S((b >> 0) & 0xffff)); + c += EXTEND32(EXTEND16S((a >> 16) & 0xffff) * + EXTEND16S((b >> 16) & 0xffff)); + return c; +} + +uint64_t HELPER(iwmmxt_muladdswl)(uint64_t c, uint32_t a, uint32_t b) +{ + return c + (EXTEND32(EXTEND16S(a & 0xffff) * + EXTEND16S(b & 0xffff))); +} diff --git a/target-arm/kvm-consts.h b/target-arm/kvm-consts.h new file mode 100644 index 00000000..943bf898 --- /dev/null +++ b/target-arm/kvm-consts.h @@ -0,0 +1,186 @@ +/* + * KVM ARM ABI constant definitions + * + * Copyright (c) 2013 Linaro Limited + * + * Provide versions of KVM constant defines that can be used even + * when CONFIG_KVM is not set and we don't have access to the + * KVM headers. If CONFIG_KVM is set, we do a compile-time check + * that we haven't got out of sync somehow. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef ARM_KVM_CONSTS_H +#define ARM_KVM_CONSTS_H + +#ifdef CONFIG_KVM +#include "qemu/compiler.h" +#include +#include + +#define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(X != Y) + +#else +#define MISMATCH_CHECK(X, Y) +#endif + +#define CP_REG_SIZE_SHIFT 52 +#define CP_REG_SIZE_MASK 0x00f0000000000000ULL +#define CP_REG_SIZE_U32 0x0020000000000000ULL +#define CP_REG_SIZE_U64 0x0030000000000000ULL +#define CP_REG_ARM 0x4000000000000000ULL +#define CP_REG_ARCH_MASK 0xff00000000000000ULL + +MISMATCH_CHECK(CP_REG_SIZE_SHIFT, KVM_REG_SIZE_SHIFT) +MISMATCH_CHECK(CP_REG_SIZE_MASK, KVM_REG_SIZE_MASK) +MISMATCH_CHECK(CP_REG_SIZE_U32, KVM_REG_SIZE_U32) +MISMATCH_CHECK(CP_REG_SIZE_U64, KVM_REG_SIZE_U64) +MISMATCH_CHECK(CP_REG_ARM, KVM_REG_ARM) +MISMATCH_CHECK(CP_REG_ARCH_MASK, KVM_REG_ARCH_MASK) + +#define QEMU_PSCI_0_1_FN_BASE 0x95c1ba5e +#define QEMU_PSCI_0_1_FN(n) (QEMU_PSCI_0_1_FN_BASE + (n)) +#define QEMU_PSCI_0_1_FN_CPU_SUSPEND QEMU_PSCI_0_1_FN(0) +#define QEMU_PSCI_0_1_FN_CPU_OFF QEMU_PSCI_0_1_FN(1) +#define QEMU_PSCI_0_1_FN_CPU_ON QEMU_PSCI_0_1_FN(2) +#define QEMU_PSCI_0_1_FN_MIGRATE QEMU_PSCI_0_1_FN(3) + +MISMATCH_CHECK(QEMU_PSCI_0_1_FN_CPU_SUSPEND, KVM_PSCI_FN_CPU_SUSPEND) +MISMATCH_CHECK(QEMU_PSCI_0_1_FN_CPU_OFF, KVM_PSCI_FN_CPU_OFF) +MISMATCH_CHECK(QEMU_PSCI_0_1_FN_CPU_ON, KVM_PSCI_FN_CPU_ON) +MISMATCH_CHECK(QEMU_PSCI_0_1_FN_MIGRATE, KVM_PSCI_FN_MIGRATE) + +#define QEMU_PSCI_0_2_FN_BASE 0x84000000 +#define QEMU_PSCI_0_2_FN(n) (QEMU_PSCI_0_2_FN_BASE + (n)) + +#define QEMU_PSCI_0_2_64BIT 0x40000000 +#define QEMU_PSCI_0_2_FN64_BASE \ + (QEMU_PSCI_0_2_FN_BASE + QEMU_PSCI_0_2_64BIT) +#define QEMU_PSCI_0_2_FN64(n) (QEMU_PSCI_0_2_FN64_BASE + (n)) + +#define QEMU_PSCI_0_2_FN_PSCI_VERSION QEMU_PSCI_0_2_FN(0) +#define QEMU_PSCI_0_2_FN_CPU_SUSPEND QEMU_PSCI_0_2_FN(1) +#define QEMU_PSCI_0_2_FN_CPU_OFF QEMU_PSCI_0_2_FN(2) +#define QEMU_PSCI_0_2_FN_CPU_ON QEMU_PSCI_0_2_FN(3) +#define QEMU_PSCI_0_2_FN_AFFINITY_INFO QEMU_PSCI_0_2_FN(4) +#define QEMU_PSCI_0_2_FN_MIGRATE QEMU_PSCI_0_2_FN(5) +#define QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE QEMU_PSCI_0_2_FN(6) +#define QEMU_PSCI_0_2_FN_MIGRATE_INFO_UP_CPU QEMU_PSCI_0_2_FN(7) +#define QEMU_PSCI_0_2_FN_SYSTEM_OFF QEMU_PSCI_0_2_FN(8) +#define QEMU_PSCI_0_2_FN_SYSTEM_RESET QEMU_PSCI_0_2_FN(9) + +#define QEMU_PSCI_0_2_FN64_CPU_SUSPEND QEMU_PSCI_0_2_FN64(1) +#define QEMU_PSCI_0_2_FN64_CPU_OFF QEMU_PSCI_0_2_FN64(2) +#define QEMU_PSCI_0_2_FN64_CPU_ON QEMU_PSCI_0_2_FN64(3) +#define QEMU_PSCI_0_2_FN64_AFFINITY_INFO QEMU_PSCI_0_2_FN64(4) +#define QEMU_PSCI_0_2_FN64_MIGRATE QEMU_PSCI_0_2_FN64(5) + +MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_SUSPEND, PSCI_0_2_FN_CPU_SUSPEND) +MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_OFF, PSCI_0_2_FN_CPU_OFF) +MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_ON, PSCI_0_2_FN_CPU_ON) +MISMATCH_CHECK(QEMU_PSCI_0_2_FN_MIGRATE, PSCI_0_2_FN_MIGRATE) +MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_SUSPEND, PSCI_0_2_FN64_CPU_SUSPEND) +MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_ON, PSCI_0_2_FN64_CPU_ON) +MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_MIGRATE, PSCI_0_2_FN64_MIGRATE) + +/* PSCI v0.2 return values used by TCG emulation of PSCI */ + +/* No Trusted OS migration to worry about when offlining CPUs */ +#define QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED 2 + +/* We implement version 0.2 only */ +#define QEMU_PSCI_0_2_RET_VERSION_0_2 2 + +MISMATCH_CHECK(QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED, PSCI_0_2_TOS_MP) +MISMATCH_CHECK(QEMU_PSCI_0_2_RET_VERSION_0_2, + (PSCI_VERSION_MAJOR(0) | PSCI_VERSION_MINOR(2))) + +/* PSCI return values (inclusive of all PSCI versions) */ +#define QEMU_PSCI_RET_SUCCESS 0 +#define QEMU_PSCI_RET_NOT_SUPPORTED -1 +#define QEMU_PSCI_RET_INVALID_PARAMS -2 +#define QEMU_PSCI_RET_DENIED -3 +#define QEMU_PSCI_RET_ALREADY_ON -4 +#define QEMU_PSCI_RET_ON_PENDING -5 +#define QEMU_PSCI_RET_INTERNAL_FAILURE -6 +#define QEMU_PSCI_RET_NOT_PRESENT -7 +#define QEMU_PSCI_RET_DISABLED -8 + +MISMATCH_CHECK(QEMU_PSCI_RET_SUCCESS, PSCI_RET_SUCCESS) +MISMATCH_CHECK(QEMU_PSCI_RET_NOT_SUPPORTED, PSCI_RET_NOT_SUPPORTED) +MISMATCH_CHECK(QEMU_PSCI_RET_INVALID_PARAMS, PSCI_RET_INVALID_PARAMS) +MISMATCH_CHECK(QEMU_PSCI_RET_DENIED, PSCI_RET_DENIED) +MISMATCH_CHECK(QEMU_PSCI_RET_ALREADY_ON, PSCI_RET_ALREADY_ON) +MISMATCH_CHECK(QEMU_PSCI_RET_ON_PENDING, PSCI_RET_ON_PENDING) +MISMATCH_CHECK(QEMU_PSCI_RET_INTERNAL_FAILURE, PSCI_RET_INTERNAL_FAILURE) +MISMATCH_CHECK(QEMU_PSCI_RET_NOT_PRESENT, PSCI_RET_NOT_PRESENT) +MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED) + +/* Note that KVM uses overlapping values for AArch32 and AArch64 + * target CPU numbers. AArch32 targets: + */ +#define QEMU_KVM_ARM_TARGET_CORTEX_A15 0 +#define QEMU_KVM_ARM_TARGET_CORTEX_A7 1 + +/* AArch64 targets: */ +#define QEMU_KVM_ARM_TARGET_AEM_V8 0 +#define QEMU_KVM_ARM_TARGET_FOUNDATION_V8 1 +#define QEMU_KVM_ARM_TARGET_CORTEX_A57 2 +#define QEMU_KVM_ARM_TARGET_XGENE_POTENZA 3 +#define QEMU_KVM_ARM_TARGET_CORTEX_A53 4 + +/* There's no kernel define for this: sentinel value which + * matches no KVM target value for either 64 or 32 bit + */ +#define QEMU_KVM_ARM_TARGET_NONE UINT_MAX + +#ifdef TARGET_AARCH64 +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_AEM_V8, KVM_ARM_TARGET_AEM_V8) +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8) +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57) +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_XGENE_POTENZA, KVM_ARM_TARGET_XGENE_POTENZA) +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53) +#else +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A15, KVM_ARM_TARGET_CORTEX_A15) +MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A7, KVM_ARM_TARGET_CORTEX_A7) +#endif + +#define CP_REG_ARM64 0x6000000000000000ULL +#define CP_REG_ARM_COPROC_MASK 0x000000000FFF0000 +#define CP_REG_ARM_COPROC_SHIFT 16 +#define CP_REG_ARM64_SYSREG (0x0013 << CP_REG_ARM_COPROC_SHIFT) +#define CP_REG_ARM64_SYSREG_OP0_MASK 0x000000000000c000 +#define CP_REG_ARM64_SYSREG_OP0_SHIFT 14 +#define CP_REG_ARM64_SYSREG_OP1_MASK 0x0000000000003800 +#define CP_REG_ARM64_SYSREG_OP1_SHIFT 11 +#define CP_REG_ARM64_SYSREG_CRN_MASK 0x0000000000000780 +#define CP_REG_ARM64_SYSREG_CRN_SHIFT 7 +#define CP_REG_ARM64_SYSREG_CRM_MASK 0x0000000000000078 +#define CP_REG_ARM64_SYSREG_CRM_SHIFT 3 +#define CP_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 +#define CP_REG_ARM64_SYSREG_OP2_SHIFT 0 + +/* No kernel define but it's useful to QEMU */ +#define CP_REG_ARM64_SYSREG_CP (CP_REG_ARM64_SYSREG >> CP_REG_ARM_COPROC_SHIFT) + +#ifdef TARGET_AARCH64 +MISMATCH_CHECK(CP_REG_ARM64, KVM_REG_ARM64) +MISMATCH_CHECK(CP_REG_ARM_COPROC_MASK, KVM_REG_ARM_COPROC_MASK) +MISMATCH_CHECK(CP_REG_ARM_COPROC_SHIFT, KVM_REG_ARM_COPROC_SHIFT) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP0_MASK) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP0_SHIFT, KVM_REG_ARM64_SYSREG_OP0_SHIFT) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP1_SHIFT, KVM_REG_ARM64_SYSREG_OP1_SHIFT) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRN_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRN_SHIFT, KVM_REG_ARM64_SYSREG_CRN_SHIFT) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRM_MASK) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRM_SHIFT, KVM_REG_ARM64_SYSREG_CRM_SHIFT) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP2_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK) +MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP2_SHIFT, KVM_REG_ARM64_SYSREG_OP2_SHIFT) +#endif + +#undef MISMATCH_CHECK + +#endif diff --git a/target-arm/kvm-stub.c b/target-arm/kvm-stub.c new file mode 100644 index 00000000..db2edc2c --- /dev/null +++ b/target-arm/kvm-stub.c @@ -0,0 +1,23 @@ +/* + * QEMU KVM ARM specific function stubs + * + * Copyright Linaro Limited 2013 + * + * Author: Peter Maydell + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include "qemu-common.h" +#include "kvm_arm.h" + +bool write_kvmstate_to_list(ARMCPU *cpu) +{ + abort(); +} + +bool write_list_to_kvmstate(ARMCPU *cpu, int level) +{ + abort(); +} diff --git a/target-arm/kvm.c b/target-arm/kvm.c new file mode 100644 index 00000000..b2785420 --- /dev/null +++ b/target-arm/kvm.c @@ -0,0 +1,611 @@ +/* + * ARM implementation of KVM hooks + * + * Copyright Christoffer Dall 2009-2010 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include +#include +#include +#include + +#include + +#include "qemu-common.h" +#include "qemu/timer.h" +#include "sysemu/sysemu.h" +#include "sysemu/kvm.h" +#include "kvm_arm.h" +#include "cpu.h" +#include "internals.h" +#include "hw/arm/arm.h" +#include "exec/memattrs.h" + +const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO +}; + +static bool cap_has_mp_state; + +int kvm_arm_vcpu_init(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + struct kvm_vcpu_init init; + + init.target = cpu->kvm_target; + memcpy(init.features, cpu->kvm_init_features, sizeof(init.features)); + + return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); +} + +bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, + int *fdarray, + struct kvm_vcpu_init *init) +{ + int ret, kvmfd = -1, vmfd = -1, cpufd = -1; + + kvmfd = qemu_open("/dev/kvm", O_RDWR); + if (kvmfd < 0) { + goto err; + } + vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); + if (vmfd < 0) { + goto err; + } + cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); + if (cpufd < 0) { + goto err; + } + + ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init); + if (ret >= 0) { + ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); + if (ret < 0) { + goto err; + } + } else { + /* Old kernel which doesn't know about the + * PREFERRED_TARGET ioctl: we know it will only support + * creating one kind of guest CPU which is its preferred + * CPU type. + */ + while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) { + init->target = *cpus_to_try++; + memset(init->features, 0, sizeof(init->features)); + ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); + if (ret >= 0) { + break; + } + } + if (ret < 0) { + goto err; + } + } + + fdarray[0] = kvmfd; + fdarray[1] = vmfd; + fdarray[2] = cpufd; + + return true; + +err: + if (cpufd >= 0) { + close(cpufd); + } + if (vmfd >= 0) { + close(vmfd); + } + if (kvmfd >= 0) { + close(kvmfd); + } + + return false; +} + +void kvm_arm_destroy_scratch_host_vcpu(int *fdarray) +{ + int i; + + for (i = 2; i >= 0; i--) { + close(fdarray[i]); + } +} + +static void kvm_arm_host_cpu_class_init(ObjectClass *oc, void *data) +{ + ARMHostCPUClass *ahcc = ARM_HOST_CPU_CLASS(oc); + + /* All we really need to set up for the 'host' CPU + * is the feature bits -- we rely on the fact that the + * various ID register values in ARMCPU are only used for + * TCG CPUs. + */ + if (!kvm_arm_get_host_cpu_features(ahcc)) { + fprintf(stderr, "Failed to retrieve host CPU features!\n"); + abort(); + } +} + +static void kvm_arm_host_cpu_initfn(Object *obj) +{ + ARMHostCPUClass *ahcc = ARM_HOST_CPU_GET_CLASS(obj); + ARMCPU *cpu = ARM_CPU(obj); + CPUARMState *env = &cpu->env; + + cpu->kvm_target = ahcc->target; + cpu->dtb_compatible = ahcc->dtb_compatible; + env->features = ahcc->features; +} + +static const TypeInfo host_arm_cpu_type_info = { + .name = TYPE_ARM_HOST_CPU, +#ifdef TARGET_AARCH64 + .parent = TYPE_AARCH64_CPU, +#else + .parent = TYPE_ARM_CPU, +#endif + .instance_init = kvm_arm_host_cpu_initfn, + .class_init = kvm_arm_host_cpu_class_init, + .class_size = sizeof(ARMHostCPUClass), +}; + +int kvm_arch_init(MachineState *ms, KVMState *s) +{ + /* For ARM interrupt delivery is always asynchronous, + * whether we are using an in-kernel VGIC or not. + */ + kvm_async_interrupts_allowed = true; + + cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); + + type_register_static(&host_arm_cpu_type_info); + + return 0; +} + +unsigned long kvm_arch_vcpu_id(CPUState *cpu) +{ + return cpu->cpu_index; +} + +/* We track all the KVM devices which need their memory addresses + * passing to the kernel in a list of these structures. + * When board init is complete we run through the list and + * tell the kernel the base addresses of the memory regions. + * We use a MemoryListener to track mapping and unmapping of + * the regions during board creation, so the board models don't + * need to do anything special for the KVM case. + */ +typedef struct KVMDevice { + struct kvm_arm_device_addr kda; + struct kvm_device_attr kdattr; + MemoryRegion *mr; + QSLIST_ENTRY(KVMDevice) entries; + int dev_fd; +} KVMDevice; + +static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head; + +static void kvm_arm_devlistener_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + KVMDevice *kd; + + QSLIST_FOREACH(kd, &kvm_devices_head, entries) { + if (section->mr == kd->mr) { + kd->kda.addr = section->offset_within_address_space; + } + } +} + +static void kvm_arm_devlistener_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + KVMDevice *kd; + + QSLIST_FOREACH(kd, &kvm_devices_head, entries) { + if (section->mr == kd->mr) { + kd->kda.addr = -1; + } + } +} + +static MemoryListener devlistener = { + .region_add = kvm_arm_devlistener_add, + .region_del = kvm_arm_devlistener_del, +}; + +static void kvm_arm_set_device_addr(KVMDevice *kd) +{ + struct kvm_device_attr *attr = &kd->kdattr; + int ret; + + /* If the device control API is available and we have a device fd on the + * KVMDevice struct, let's use the newer API + */ + if (kd->dev_fd >= 0) { + uint64_t addr = kd->kda.addr; + attr->addr = (uintptr_t)&addr; + ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr); + } else { + ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda); + } + + if (ret < 0) { + fprintf(stderr, "Failed to set device address: %s\n", + strerror(-ret)); + abort(); + } +} + +static void kvm_arm_machine_init_done(Notifier *notifier, void *data) +{ + KVMDevice *kd, *tkd; + + memory_listener_unregister(&devlistener); + QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) { + if (kd->kda.addr != -1) { + kvm_arm_set_device_addr(kd); + } + memory_region_unref(kd->mr); + g_free(kd); + } +} + +static Notifier notify = { + .notify = kvm_arm_machine_init_done, +}; + +void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, + uint64_t attr, int dev_fd) +{ + KVMDevice *kd; + + if (!kvm_irqchip_in_kernel()) { + return; + } + + if (QSLIST_EMPTY(&kvm_devices_head)) { + memory_listener_register(&devlistener, NULL); + qemu_add_machine_init_done_notifier(¬ify); + } + kd = g_new0(KVMDevice, 1); + kd->mr = mr; + kd->kda.id = devid; + kd->kda.addr = -1; + kd->kdattr.flags = 0; + kd->kdattr.group = group; + kd->kdattr.attr = attr; + kd->dev_fd = dev_fd; + QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries); + memory_region_ref(kd->mr); +} + +static int compare_u64(const void *a, const void *b) +{ + if (*(uint64_t *)a > *(uint64_t *)b) { + return 1; + } + if (*(uint64_t *)a < *(uint64_t *)b) { + return -1; + } + return 0; +} + +/* Initialize the CPUState's cpreg list according to the kernel's + * definition of what CPU registers it knows about (and throw away + * the previous TCG-created cpreg list). + */ +int kvm_arm_init_cpreg_list(ARMCPU *cpu) +{ + struct kvm_reg_list rl; + struct kvm_reg_list *rlp; + int i, ret, arraylen; + CPUState *cs = CPU(cpu); + + rl.n = 0; + ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl); + if (ret != -E2BIG) { + return ret; + } + rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t)); + rlp->n = rl.n; + ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp); + if (ret) { + goto out; + } + /* Sort the list we get back from the kernel, since cpreg_tuples + * must be in strictly ascending order. + */ + qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64); + + for (i = 0, arraylen = 0; i < rlp->n; i++) { + if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) { + continue; + } + switch (rlp->reg[i] & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U32: + case KVM_REG_SIZE_U64: + break; + default: + fprintf(stderr, "Can't handle size of register in kernel list\n"); + ret = -EINVAL; + goto out; + } + + arraylen++; + } + + cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen); + cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen); + cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes, + arraylen); + cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values, + arraylen); + cpu->cpreg_array_len = arraylen; + cpu->cpreg_vmstate_array_len = arraylen; + + for (i = 0, arraylen = 0; i < rlp->n; i++) { + uint64_t regidx = rlp->reg[i]; + if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) { + continue; + } + cpu->cpreg_indexes[arraylen] = regidx; + arraylen++; + } + assert(cpu->cpreg_array_len == arraylen); + + if (!write_kvmstate_to_list(cpu)) { + /* Shouldn't happen unless kernel is inconsistent about + * what registers exist. + */ + fprintf(stderr, "Initial read of kernel register state failed\n"); + ret = -EINVAL; + goto out; + } + +out: + g_free(rlp); + return ret; +} + +bool write_kvmstate_to_list(ARMCPU *cpu) +{ + CPUState *cs = CPU(cpu); + int i; + bool ok = true; + + for (i = 0; i < cpu->cpreg_array_len; i++) { + struct kvm_one_reg r; + uint64_t regidx = cpu->cpreg_indexes[i]; + uint32_t v32; + int ret; + + r.id = regidx; + + switch (regidx & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U32: + r.addr = (uintptr_t)&v32; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + if (!ret) { + cpu->cpreg_values[i] = v32; + } + break; + case KVM_REG_SIZE_U64: + r.addr = (uintptr_t)(cpu->cpreg_values + i); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + break; + default: + abort(); + } + if (ret) { + ok = false; + } + } + return ok; +} + +bool write_list_to_kvmstate(ARMCPU *cpu, int level) +{ + CPUState *cs = CPU(cpu); + int i; + bool ok = true; + + for (i = 0; i < cpu->cpreg_array_len; i++) { + struct kvm_one_reg r; + uint64_t regidx = cpu->cpreg_indexes[i]; + uint32_t v32; + int ret; + + if (kvm_arm_cpreg_level(regidx) > level) { + continue; + } + + r.id = regidx; + switch (regidx & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U32: + v32 = cpu->cpreg_values[i]; + r.addr = (uintptr_t)&v32; + break; + case KVM_REG_SIZE_U64: + r.addr = (uintptr_t)(cpu->cpreg_values + i); + break; + default: + abort(); + } + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); + if (ret) { + /* We might fail for "unknown register" and also for + * "you tried to set a register which is constant with + * a different value from what it actually contains". + */ + ok = false; + } + } + return ok; +} + +void kvm_arm_reset_vcpu(ARMCPU *cpu) +{ + int ret; + + /* Re-init VCPU so that all registers are set to + * their respective reset values. + */ + ret = kvm_arm_vcpu_init(CPU(cpu)); + if (ret < 0) { + fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); + abort(); + } + if (!write_kvmstate_to_list(cpu)) { + fprintf(stderr, "write_kvmstate_to_list failed\n"); + abort(); + } +} + +/* + * Update KVM's MP_STATE based on what QEMU thinks it is + */ +int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu) +{ + if (cap_has_mp_state) { + struct kvm_mp_state mp_state = { + .mp_state = + cpu->powered_off ? KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE + }; + int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); + if (ret) { + fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n", + __func__, ret, strerror(-ret)); + return -1; + } + } + + return 0; +} + +/* + * Sync the KVM MP_STATE into QEMU + */ +int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) +{ + if (cap_has_mp_state) { + struct kvm_mp_state mp_state; + int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state); + if (ret) { + fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n", + __func__, ret, strerror(-ret)); + abort(); + } + cpu->powered_off = (mp_state.mp_state == KVM_MP_STATE_STOPPED); + } + + return 0; +} + +void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) +{ +} + +MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) +{ + return MEMTXATTRS_UNSPECIFIED; +} + +int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) +{ + return 0; +} + +bool kvm_arch_stop_on_emulation_error(CPUState *cs) +{ + return true; +} + +int kvm_arch_process_async_events(CPUState *cs) +{ + return 0; +} + +int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) +{ + return 1; +} + +int kvm_arch_on_sigbus(int code, void *addr) +{ + return 1; +} + +void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) +{ + qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__); +} + +int kvm_arch_insert_sw_breakpoint(CPUState *cs, + struct kvm_sw_breakpoint *bp) +{ + qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__); + return -EINVAL; +} + +int kvm_arch_insert_hw_breakpoint(target_ulong addr, + target_ulong len, int type) +{ + qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__); + return -EINVAL; +} + +int kvm_arch_remove_hw_breakpoint(target_ulong addr, + target_ulong len, int type) +{ + qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__); + return -EINVAL; +} + +int kvm_arch_remove_sw_breakpoint(CPUState *cs, + struct kvm_sw_breakpoint *bp) +{ + qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__); + return -EINVAL; +} + +void kvm_arch_remove_all_hw_breakpoints(void) +{ + qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__); +} + +void kvm_arch_init_irq_routing(KVMState *s) +{ +} + +int kvm_arch_irqchip_create(KVMState *s) +{ + int ret; + + /* If we can create the VGIC using the newer device control API, we + * let the device do this when it initializes itself, otherwise we + * fall back to the old API */ + + ret = kvm_create_device(s, KVM_DEV_TYPE_ARM_VGIC_V2, true); + if (ret == 0) { + return 1; + } + + return 0; +} + +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data) +{ + return 0; +} + +int kvm_arch_msi_data_to_gsi(uint32_t data) +{ + return (data - 32) & 0xffff; +} diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c new file mode 100644 index 00000000..421ce0ea --- /dev/null +++ b/target-arm/kvm32.c @@ -0,0 +1,478 @@ +/* + * ARM implementation of KVM hooks, 32 bit specific code. + * + * Copyright Christoffer Dall 2009-2010 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include +#include +#include +#include + +#include + +#include "qemu-common.h" +#include "qemu/timer.h" +#include "sysemu/sysemu.h" +#include "sysemu/kvm.h" +#include "kvm_arm.h" +#include "cpu.h" +#include "internals.h" +#include "hw/arm/arm.h" + +static inline void set_feature(uint64_t *features, int feature) +{ + *features |= 1ULL << feature; +} + +bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc) +{ + /* Identify the feature bits corresponding to the host CPU, and + * fill out the ARMHostCPUClass fields accordingly. To do this + * we have to create a scratch VM, create a single CPU inside it, + * and then query that CPU for the relevant ID registers. + */ + int i, ret, fdarray[3]; + uint32_t midr, id_pfr0, id_isar0, mvfr1; + uint64_t features = 0; + /* Old kernels may not know about the PREFERRED_TARGET ioctl: however + * we know these will only support creating one kind of guest CPU, + * which is its preferred CPU type. + */ + static const uint32_t cpus_to_try[] = { + QEMU_KVM_ARM_TARGET_CORTEX_A15, + QEMU_KVM_ARM_TARGET_NONE + }; + struct kvm_vcpu_init init; + struct kvm_one_reg idregs[] = { + { + .id = KVM_REG_ARM | KVM_REG_SIZE_U32 + | ENCODE_CP_REG(15, 0, 0, 0, 0, 0, 0), + .addr = (uintptr_t)&midr, + }, + { + .id = KVM_REG_ARM | KVM_REG_SIZE_U32 + | ENCODE_CP_REG(15, 0, 0, 0, 1, 0, 0), + .addr = (uintptr_t)&id_pfr0, + }, + { + .id = KVM_REG_ARM | KVM_REG_SIZE_U32 + | ENCODE_CP_REG(15, 0, 0, 0, 2, 0, 0), + .addr = (uintptr_t)&id_isar0, + }, + { + .id = KVM_REG_ARM | KVM_REG_SIZE_U32 + | KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR1, + .addr = (uintptr_t)&mvfr1, + }, + }; + + if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { + return false; + } + + ahcc->target = init.target; + + /* This is not strictly blessed by the device tree binding docs yet, + * but in practice the kernel does not care about this string so + * there is no point maintaining an KVM_ARM_TARGET_* -> string table. + */ + ahcc->dtb_compatible = "arm,arm-v7"; + + for (i = 0; i < ARRAY_SIZE(idregs); i++) { + ret = ioctl(fdarray[2], KVM_GET_ONE_REG, &idregs[i]); + if (ret) { + break; + } + } + + kvm_arm_destroy_scratch_host_vcpu(fdarray); + + if (ret) { + return false; + } + + /* Now we've retrieved all the register information we can + * set the feature bits based on the ID register fields. + * We can assume any KVM supporting CPU is at least a v7 + * with VFPv3, LPAE and the generic timers; this in turn implies + * most of the other feature bits, but a few must be tested. + */ + set_feature(&features, ARM_FEATURE_V7); + set_feature(&features, ARM_FEATURE_VFP3); + set_feature(&features, ARM_FEATURE_LPAE); + set_feature(&features, ARM_FEATURE_GENERIC_TIMER); + + switch (extract32(id_isar0, 24, 4)) { + case 1: + set_feature(&features, ARM_FEATURE_THUMB_DIV); + break; + case 2: + set_feature(&features, ARM_FEATURE_ARM_DIV); + set_feature(&features, ARM_FEATURE_THUMB_DIV); + break; + default: + break; + } + + if (extract32(id_pfr0, 12, 4) == 1) { + set_feature(&features, ARM_FEATURE_THUMB2EE); + } + if (extract32(mvfr1, 20, 4) == 1) { + set_feature(&features, ARM_FEATURE_VFP_FP16); + } + if (extract32(mvfr1, 12, 4) == 1) { + set_feature(&features, ARM_FEATURE_NEON); + } + if (extract32(mvfr1, 28, 4) == 1) { + /* FMAC support implies VFPv4 */ + set_feature(&features, ARM_FEATURE_VFP4); + } + + ahcc->features = features; + + return true; +} + +bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) +{ + /* Return true if the regidx is a register we should synchronize + * via the cpreg_tuples array (ie is not a core reg we sync by + * hand in kvm_arch_get/put_registers()) + */ + switch (regidx & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + case KVM_REG_ARM_VFP: + return false; + default: + return true; + } +} + +typedef struct CPRegStateLevel { + uint64_t regidx; + int level; +} CPRegStateLevel; + +/* All coprocessor registers not listed in the following table are assumed to + * be of the level KVM_PUT_RUNTIME_STATE. If a register should be written less + * often, you must add it to this table with a state of either + * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. + */ +static const CPRegStateLevel non_runtime_cpregs[] = { + { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, +}; + +int kvm_arm_cpreg_level(uint64_t regidx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) { + const CPRegStateLevel *l = &non_runtime_cpregs[i]; + if (l->regidx == regidx) { + return l->level; + } + } + + return KVM_PUT_RUNTIME_STATE; +} + +#define ARM_MPIDR_HWID_BITMASK 0xFFFFFF +#define ARM_CPU_ID_MPIDR 0, 0, 0, 5 + +int kvm_arch_init_vcpu(CPUState *cs) +{ + int ret; + uint64_t v; + uint32_t mpidr; + struct kvm_one_reg r; + ARMCPU *cpu = ARM_CPU(cs); + + if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE) { + fprintf(stderr, "KVM is not supported for this guest CPU type\n"); + return -EINVAL; + } + + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; + } + if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { + cpu->psci_version = 2; + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; + } + + /* Do KVM_ARM_VCPU_INIT ioctl */ + ret = kvm_arm_vcpu_init(cs); + if (ret) { + return ret; + } + + /* Query the kernel to make sure it supports 32 VFP + * registers: QEMU's "cortex-a15" CPU is always a + * VFP-D32 core. The simplest way to do this is just + * to attempt to read register d31. + */ + r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP | 31; + r.addr = (uintptr_t)(&v); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + if (ret == -ENOENT) { + return -EINVAL; + } + + /* + * When KVM is in use, PSCI is emulated in-kernel and not by qemu. + * Currently KVM has its own idea about MPIDR assignment, so we + * override our defaults with what we get from KVM. + */ + ret = kvm_get_one_reg(cs, ARM_CP15_REG32(ARM_CPU_ID_MPIDR), &mpidr); + if (ret) { + return ret; + } + cpu->mp_affinity = mpidr & ARM_MPIDR_HWID_BITMASK; + + return kvm_arm_init_cpreg_list(cpu); +} + +typedef struct Reg { + uint64_t id; + int offset; +} Reg; + +#define COREREG(KERNELNAME, QEMUFIELD) \ + { \ + KVM_REG_ARM | KVM_REG_SIZE_U32 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \ + offsetof(CPUARMState, QEMUFIELD) \ + } + +#define VFPSYSREG(R) \ + { \ + KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP | \ + KVM_REG_ARM_VFP_##R, \ + offsetof(CPUARMState, vfp.xregs[ARM_VFP_##R]) \ + } + +/* Like COREREG, but handle fields which are in a uint64_t in CPUARMState. */ +#define COREREG64(KERNELNAME, QEMUFIELD) \ + { \ + KVM_REG_ARM | KVM_REG_SIZE_U32 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \ + offsetoflow32(CPUARMState, QEMUFIELD) \ + } + +static const Reg regs[] = { + /* R0_usr .. R14_usr */ + COREREG(usr_regs.uregs[0], regs[0]), + COREREG(usr_regs.uregs[1], regs[1]), + COREREG(usr_regs.uregs[2], regs[2]), + COREREG(usr_regs.uregs[3], regs[3]), + COREREG(usr_regs.uregs[4], regs[4]), + COREREG(usr_regs.uregs[5], regs[5]), + COREREG(usr_regs.uregs[6], regs[6]), + COREREG(usr_regs.uregs[7], regs[7]), + COREREG(usr_regs.uregs[8], usr_regs[0]), + COREREG(usr_regs.uregs[9], usr_regs[1]), + COREREG(usr_regs.uregs[10], usr_regs[2]), + COREREG(usr_regs.uregs[11], usr_regs[3]), + COREREG(usr_regs.uregs[12], usr_regs[4]), + COREREG(usr_regs.uregs[13], banked_r13[0]), + COREREG(usr_regs.uregs[14], banked_r14[0]), + /* R13, R14, SPSR for SVC, ABT, UND, IRQ banks */ + COREREG(svc_regs[0], banked_r13[1]), + COREREG(svc_regs[1], banked_r14[1]), + COREREG64(svc_regs[2], banked_spsr[1]), + COREREG(abt_regs[0], banked_r13[2]), + COREREG(abt_regs[1], banked_r14[2]), + COREREG64(abt_regs[2], banked_spsr[2]), + COREREG(und_regs[0], banked_r13[3]), + COREREG(und_regs[1], banked_r14[3]), + COREREG64(und_regs[2], banked_spsr[3]), + COREREG(irq_regs[0], banked_r13[4]), + COREREG(irq_regs[1], banked_r14[4]), + COREREG64(irq_regs[2], banked_spsr[4]), + /* R8_fiq .. R14_fiq and SPSR_fiq */ + COREREG(fiq_regs[0], fiq_regs[0]), + COREREG(fiq_regs[1], fiq_regs[1]), + COREREG(fiq_regs[2], fiq_regs[2]), + COREREG(fiq_regs[3], fiq_regs[3]), + COREREG(fiq_regs[4], fiq_regs[4]), + COREREG(fiq_regs[5], banked_r13[5]), + COREREG(fiq_regs[6], banked_r14[5]), + COREREG64(fiq_regs[7], banked_spsr[5]), + /* R15 */ + COREREG(usr_regs.uregs[15], regs[15]), + /* VFP system registers */ + VFPSYSREG(FPSID), + VFPSYSREG(MVFR1), + VFPSYSREG(MVFR0), + VFPSYSREG(FPEXC), + VFPSYSREG(FPINST), + VFPSYSREG(FPINST2), +}; + +int kvm_arch_put_registers(CPUState *cs, int level) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + struct kvm_one_reg r; + int mode, bn; + int ret, i; + uint32_t cpsr, fpscr; + + /* Make sure the banked regs are properly set */ + mode = env->uncached_cpsr & CPSR_M; + bn = bank_number(mode); + if (mode == ARM_CPU_MODE_FIQ) { + memcpy(env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t)); + } else { + memcpy(env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t)); + } + env->banked_r13[bn] = env->regs[13]; + env->banked_r14[bn] = env->regs[14]; + env->banked_spsr[bn] = env->spsr; + + /* Now we can safely copy stuff down to the kernel */ + for (i = 0; i < ARRAY_SIZE(regs); i++) { + r.id = regs[i].id; + r.addr = (uintptr_t)(env) + regs[i].offset; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); + if (ret) { + return ret; + } + } + + /* Special cases which aren't a single CPUARMState field */ + cpsr = cpsr_read(env); + r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr); + r.addr = (uintptr_t)(&cpsr); + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); + if (ret) { + return ret; + } + + /* VFP registers */ + r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP; + for (i = 0; i < 32; i++) { + r.addr = (uintptr_t)(&env->vfp.regs[i]); + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); + if (ret) { + return ret; + } + r.id++; + } + + r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP | + KVM_REG_ARM_VFP_FPSCR; + fpscr = vfp_get_fpscr(env); + r.addr = (uintptr_t)&fpscr; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); + if (ret) { + return ret; + } + + /* Note that we do not call write_cpustate_to_list() + * here, so we are only writing the tuple list back to + * KVM. This is safe because nothing can change the + * CPUARMState cp15 fields (in particular gdb accesses cannot) + * and so there are no changes to sync. In fact syncing would + * be wrong at this point: for a constant register where TCG and + * KVM disagree about its value, the preceding write_list_to_cpustate() + * would not have had any effect on the CPUARMState value (since the + * register is read-only), and a write_cpustate_to_list() here would + * then try to write the TCG value back into KVM -- this would either + * fail or incorrectly change the value the guest sees. + * + * If we ever want to allow the user to modify cp15 registers via + * the gdb stub, we would need to be more clever here (for instance + * tracking the set of registers kvm_arch_get_registers() successfully + * managed to update the CPUARMState with, and only allowing those + * to be written back up into the kernel). + */ + if (!write_list_to_kvmstate(cpu, level)) { + return EINVAL; + } + + kvm_arm_sync_mpstate_to_kvm(cpu); + + return ret; +} + +int kvm_arch_get_registers(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + struct kvm_one_reg r; + int mode, bn; + int ret, i; + uint32_t cpsr, fpscr; + + for (i = 0; i < ARRAY_SIZE(regs); i++) { + r.id = regs[i].id; + r.addr = (uintptr_t)(env) + regs[i].offset; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + if (ret) { + return ret; + } + } + + /* Special cases which aren't a single CPUARMState field */ + r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr); + r.addr = (uintptr_t)(&cpsr); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + if (ret) { + return ret; + } + cpsr_write(env, cpsr, 0xffffffff); + + /* Make sure the current mode regs are properly set */ + mode = env->uncached_cpsr & CPSR_M; + bn = bank_number(mode); + if (mode == ARM_CPU_MODE_FIQ) { + memcpy(env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t)); + } else { + memcpy(env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t)); + } + env->regs[13] = env->banked_r13[bn]; + env->regs[14] = env->banked_r14[bn]; + env->spsr = env->banked_spsr[bn]; + + /* VFP registers */ + r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP; + for (i = 0; i < 32; i++) { + r.addr = (uintptr_t)(&env->vfp.regs[i]); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + if (ret) { + return ret; + } + r.id++; + } + + r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP | + KVM_REG_ARM_VFP_FPSCR; + r.addr = (uintptr_t)&fpscr; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + if (ret) { + return ret; + } + vfp_set_fpscr(env, fpscr); + + if (!write_kvmstate_to_list(cpu)) { + return EINVAL; + } + /* Note that it's OK to have registers which aren't in CPUState, + * so we can ignore a failure return here. + */ + write_list_to_cpustate(cpu); + + kvm_arm_sync_mpstate_to_qemu(cpu); + + return 0; +} diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c new file mode 100644 index 00000000..bd60889d --- /dev/null +++ b/target-arm/kvm64.c @@ -0,0 +1,466 @@ +/* + * ARM implementation of KVM hooks, 64 bit specific code + * + * Copyright Mian-M. Hamayun 2013, Virtual Open Systems + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include +#include +#include +#include + +#include + +#include "config-host.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "sysemu/sysemu.h" +#include "sysemu/kvm.h" +#include "kvm_arm.h" +#include "cpu.h" +#include "internals.h" +#include "hw/arm/arm.h" + +static inline void set_feature(uint64_t *features, int feature) +{ + *features |= 1ULL << feature; +} + +bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc) +{ + /* Identify the feature bits corresponding to the host CPU, and + * fill out the ARMHostCPUClass fields accordingly. To do this + * we have to create a scratch VM, create a single CPU inside it, + * and then query that CPU for the relevant ID registers. + * For AArch64 we currently don't care about ID registers at + * all; we just want to know the CPU type. + */ + int fdarray[3]; + uint64_t features = 0; + /* Old kernels may not know about the PREFERRED_TARGET ioctl: however + * we know these will only support creating one kind of guest CPU, + * which is its preferred CPU type. Fortunately these old kernels + * support only a very limited number of CPUs. + */ + static const uint32_t cpus_to_try[] = { + KVM_ARM_TARGET_AEM_V8, + KVM_ARM_TARGET_FOUNDATION_V8, + KVM_ARM_TARGET_CORTEX_A57, + QEMU_KVM_ARM_TARGET_NONE + }; + struct kvm_vcpu_init init; + + if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { + return false; + } + + ahcc->target = init.target; + ahcc->dtb_compatible = "arm,arm-v8"; + + kvm_arm_destroy_scratch_host_vcpu(fdarray); + + /* We can assume any KVM supporting CPU is at least a v8 + * with VFPv4+Neon; this in turn implies most of the other + * feature bits. + */ + set_feature(&features, ARM_FEATURE_V8); + set_feature(&features, ARM_FEATURE_VFP4); + set_feature(&features, ARM_FEATURE_NEON); + set_feature(&features, ARM_FEATURE_AARCH64); + + ahcc->features = features; + + return true; +} + +#define ARM_MPIDR_HWID_BITMASK 0xFF00FFFFFFULL +#define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 + +int kvm_arch_init_vcpu(CPUState *cs) +{ + int ret; + uint64_t mpidr; + ARMCPU *cpu = ARM_CPU(cs); + + if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || + !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { + fprintf(stderr, "KVM is not supported for this guest CPU type\n"); + return -EINVAL; + } + + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; + } + if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { + cpu->psci_version = 2; + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; + } + if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; + } + + /* Do KVM_ARM_VCPU_INIT ioctl */ + ret = kvm_arm_vcpu_init(cs); + if (ret) { + return ret; + } + + /* + * When KVM is in use, PSCI is emulated in-kernel and not by qemu. + * Currently KVM has its own idea about MPIDR assignment, so we + * override our defaults with what we get from KVM. + */ + ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); + if (ret) { + return ret; + } + cpu->mp_affinity = mpidr & ARM_MPIDR_HWID_BITMASK; + + return kvm_arm_init_cpreg_list(cpu); +} + +bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) +{ + /* Return true if the regidx is a register we should synchronize + * via the cpreg_tuples array (ie is not a core reg we sync by + * hand in kvm_arch_get/put_registers()) + */ + switch (regidx & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + return false; + default: + return true; + } +} + +typedef struct CPRegStateLevel { + uint64_t regidx; + int level; +} CPRegStateLevel; + +/* All system registers not listed in the following table are assumed to be + * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less + * often, you must add it to this table with a state of either + * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. + */ +static const CPRegStateLevel non_runtime_cpregs[] = { + { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, +}; + +int kvm_arm_cpreg_level(uint64_t regidx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) { + const CPRegStateLevel *l = &non_runtime_cpregs[i]; + if (l->regidx == regidx) { + return l->level; + } + } + + return KVM_PUT_RUNTIME_STATE; +} + +#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +#define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +#define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +int kvm_arch_put_registers(CPUState *cs, int level) +{ + struct kvm_one_reg reg; + uint32_t fpr; + uint64_t val; + int i; + int ret; + unsigned int el; + + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + /* If we are in AArch32 mode then we need to copy the AArch32 regs to the + * AArch64 registers before pushing them out to 64-bit KVM. + */ + if (!is_a64(env)) { + aarch64_sync_32_to_64(env); + } + + for (i = 0; i < 31; i++) { + reg.id = AARCH64_CORE_REG(regs.regs[i]); + reg.addr = (uintptr_t) &env->xregs[i]; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + } + + /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the + * QEMU side we keep the current SP in xregs[31] as well. + */ + aarch64_save_sp(env, 1); + + reg.id = AARCH64_CORE_REG(regs.sp); + reg.addr = (uintptr_t) &env->sp_el[0]; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + + reg.id = AARCH64_CORE_REG(sp_el1); + reg.addr = (uintptr_t) &env->sp_el[1]; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + + /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ + if (is_a64(env)) { + val = pstate_read(env); + } else { + val = cpsr_read(env); + } + reg.id = AARCH64_CORE_REG(regs.pstate); + reg.addr = (uintptr_t) &val; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + + reg.id = AARCH64_CORE_REG(regs.pc); + reg.addr = (uintptr_t) &env->pc; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + + reg.id = AARCH64_CORE_REG(elr_el1); + reg.addr = (uintptr_t) &env->elr_el[1]; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + + /* Saved Program State Registers + * + * Before we restore from the banked_spsr[] array we need to + * ensure that any modifications to env->spsr are correctly + * reflected in the banks. + */ + el = arm_current_el(env); + if (el > 0 && !is_a64(env)) { + i = bank_number(env->uncached_cpsr & CPSR_M); + env->banked_spsr[i] = env->spsr; + } + + /* KVM 0-4 map to QEMU banks 1-5 */ + for (i = 0; i < KVM_NR_SPSR; i++) { + reg.id = AARCH64_CORE_REG(spsr[i]); + reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + } + + /* Advanced SIMD and FP registers + * We map Qn = regs[2n+1]:regs[2n] + */ + for (i = 0; i < 32; i++) { + int rd = i << 1; + uint64_t fp_val[2]; +#ifdef HOST_WORDS_BIGENDIAN + fp_val[0] = env->vfp.regs[rd + 1]; + fp_val[1] = env->vfp.regs[rd]; +#else + fp_val[1] = env->vfp.regs[rd + 1]; + fp_val[0] = env->vfp.regs[rd]; +#endif + reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); + reg.addr = (uintptr_t)(&fp_val); + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + } + + reg.addr = (uintptr_t)(&fpr); + fpr = vfp_get_fpsr(env); + reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + + fpr = vfp_get_fpcr(env); + reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + + if (!write_list_to_kvmstate(cpu, level)) { + return EINVAL; + } + + kvm_arm_sync_mpstate_to_kvm(cpu); + + return ret; +} + +int kvm_arch_get_registers(CPUState *cs) +{ + struct kvm_one_reg reg; + uint64_t val; + uint32_t fpr; + unsigned int el; + int i; + int ret; + + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + for (i = 0; i < 31; i++) { + reg.id = AARCH64_CORE_REG(regs.regs[i]); + reg.addr = (uintptr_t) &env->xregs[i]; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + } + + reg.id = AARCH64_CORE_REG(regs.sp); + reg.addr = (uintptr_t) &env->sp_el[0]; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + + reg.id = AARCH64_CORE_REG(sp_el1); + reg.addr = (uintptr_t) &env->sp_el[1]; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + + reg.id = AARCH64_CORE_REG(regs.pstate); + reg.addr = (uintptr_t) &val; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + + env->aarch64 = ((val & PSTATE_nRW) == 0); + if (is_a64(env)) { + pstate_write(env, val); + } else { + env->uncached_cpsr = val & CPSR_M; + cpsr_write(env, val, 0xffffffff); + } + + /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the + * QEMU side we keep the current SP in xregs[31] as well. + */ + aarch64_restore_sp(env, 1); + + reg.id = AARCH64_CORE_REG(regs.pc); + reg.addr = (uintptr_t) &env->pc; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + + /* If we are in AArch32 mode then we need to sync the AArch32 regs with the + * incoming AArch64 regs received from 64-bit KVM. + * We must perform this after all of the registers have been acquired from + * the kernel. + */ + if (!is_a64(env)) { + aarch64_sync_64_to_32(env); + } + + reg.id = AARCH64_CORE_REG(elr_el1); + reg.addr = (uintptr_t) &env->elr_el[1]; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + + /* Fetch the SPSR registers + * + * KVM SPSRs 0-4 map to QEMU banks 1-5 + */ + for (i = 0; i < KVM_NR_SPSR; i++) { + reg.id = AARCH64_CORE_REG(spsr[i]); + reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + } + + el = arm_current_el(env); + if (el > 0 && !is_a64(env)) { + i = bank_number(env->uncached_cpsr & CPSR_M); + env->spsr = env->banked_spsr[i]; + } + + /* Advanced SIMD and FP registers + * We map Qn = regs[2n+1]:regs[2n] + */ + for (i = 0; i < 32; i++) { + uint64_t fp_val[2]; + reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); + reg.addr = (uintptr_t)(&fp_val); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } else { + int rd = i << 1; +#ifdef HOST_WORDS_BIGENDIAN + env->vfp.regs[rd + 1] = fp_val[0]; + env->vfp.regs[rd] = fp_val[1]; +#else + env->vfp.regs[rd + 1] = fp_val[1]; + env->vfp.regs[rd] = fp_val[0]; +#endif + } + } + + reg.addr = (uintptr_t)(&fpr); + reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + vfp_set_fpsr(env, fpr); + + reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + vfp_set_fpcr(env, fpr); + + if (!write_kvmstate_to_list(cpu)) { + return EINVAL; + } + /* Note that it's OK to have registers which aren't in CPUState, + * so we can ignore a failure return here. + */ + write_list_to_cpustate(cpu); + + kvm_arm_sync_mpstate_to_qemu(cpu); + + /* TODO: other registers */ + return ret; +} diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h new file mode 100644 index 00000000..7912d743 --- /dev/null +++ b/target-arm/kvm_arm.h @@ -0,0 +1,194 @@ +/* + * QEMU KVM support -- ARM specific functions. + * + * Copyright (c) 2012 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_KVM_ARM_H +#define QEMU_KVM_ARM_H + +#include "sysemu/kvm.h" +#include "exec/memory.h" + +/** + * kvm_arm_vcpu_init: + * @cs: CPUState + * + * Initialize (or reinitialize) the VCPU by invoking the + * KVM_ARM_VCPU_INIT ioctl with the CPU type and feature + * bitmask specified in the CPUState. + * + * Returns: 0 if success else < 0 error code + */ +int kvm_arm_vcpu_init(CPUState *cs); + +/** + * kvm_arm_register_device: + * @mr: memory region for this device + * @devid: the KVM device ID + * @group: device control API group for setting addresses + * @attr: device control API address type + * @dev_fd: device control device file descriptor (or -1 if not supported) + * + * Remember the memory region @mr, and when it is mapped by the + * machine model, tell the kernel that base address using the + * KVM_ARM_SET_DEVICE_ADDRESS ioctl or the newer device control API. @devid + * should be the ID of the device as defined by KVM_ARM_SET_DEVICE_ADDRESS or + * the arm-vgic device in the device control API. + * The machine model may map + * and unmap the device multiple times; the kernel will only be told the final + * address at the point where machine init is complete. + */ +void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, + uint64_t attr, int dev_fd); + +/** + * kvm_arm_init_cpreg_list: + * @cs: CPUState + * + * Initialize the CPUState's cpreg list according to the kernel's + * definition of what CPU registers it knows about (and throw away + * the previous TCG-created cpreg list). + * + * Returns: 0 if success, else < 0 error code + */ +int kvm_arm_init_cpreg_list(ARMCPU *cpu); + +/** + * kvm_arm_reg_syncs_via_cpreg_list + * regidx: KVM register index + * + * Return true if this KVM register should be synchronized via the + * cpreg list of arbitrary system registers, false if it is synchronized + * by hand using code in kvm_arch_get/put_registers(). + */ +bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); + +/** + * kvm_arm_cpreg_level + * regidx: KVM register index + * + * Return the level of this coprocessor/system register. Return value is + * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. + */ +int kvm_arm_cpreg_level(uint64_t regidx); + +/** + * write_list_to_kvmstate: + * @cpu: ARMCPU + * @level: the state level to sync + * + * For each register listed in the ARMCPU cpreg_indexes list, write + * its value from the cpreg_values list into the kernel (via ioctl). + * This updates KVM's working data structures from TCG data or + * from incoming migration state. + * + * Returns: true if all register values were updated correctly, + * false if some register was unknown to the kernel or could not + * be written (eg constant register with the wrong value). + * Note that we do not stop early on failure -- we will attempt + * writing all registers in the list. + */ +bool write_list_to_kvmstate(ARMCPU *cpu, int level); + +/** + * write_kvmstate_to_list: + * @cpu: ARMCPU + * + * For each register listed in the ARMCPU cpreg_indexes list, write + * its value from the kernel into the cpreg_values list. This is used to + * copy info from KVM's working data structures into TCG or + * for outbound migration. + * + * Returns: true if all register values were read correctly, + * false if some register was unknown or could not be read. + * Note that we do not stop early on failure -- we will attempt + * reading all registers in the list. + */ +bool write_kvmstate_to_list(ARMCPU *cpu); + +/** + * kvm_arm_reset_vcpu: + * @cpu: ARMCPU + * + * Called at reset time to kernel registers to their initial values. + */ +void kvm_arm_reset_vcpu(ARMCPU *cpu); + +#ifdef CONFIG_KVM +/** + * kvm_arm_create_scratch_host_vcpu: + * @cpus_to_try: array of QEMU_KVM_ARM_TARGET_* values (terminated with + * QEMU_KVM_ARM_TARGET_NONE) to try as fallback if the kernel does not + * know the PREFERRED_TARGET ioctl + * @fdarray: filled in with kvmfd, vmfd, cpufd file descriptors in that order + * @init: filled in with the necessary values for creating a host vcpu + * + * Create a scratch vcpu in its own VM of the type preferred by the host + * kernel (as would be used for '-cpu host'), for purposes of probing it + * for capabilities. + * + * Returns: true on success (and fdarray and init are filled in), + * false on failure (and fdarray and init are not valid). + */ +bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, + int *fdarray, + struct kvm_vcpu_init *init); + +/** + * kvm_arm_destroy_scratch_host_vcpu: + * @fdarray: array of fds as set up by kvm_arm_create_scratch_host_vcpu + * + * Tear down the scratch vcpu created by kvm_arm_create_scratch_host_vcpu. + */ +void kvm_arm_destroy_scratch_host_vcpu(int *fdarray); + +#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU +#define ARM_HOST_CPU_CLASS(klass) \ + OBJECT_CLASS_CHECK(ARMHostCPUClass, (klass), TYPE_ARM_HOST_CPU) +#define ARM_HOST_CPU_GET_CLASS(obj) \ + OBJECT_GET_CLASS(ARMHostCPUClass, (obj), TYPE_ARM_HOST_CPU) + +typedef struct ARMHostCPUClass { + /*< private >*/ + ARMCPUClass parent_class; + /*< public >*/ + + uint64_t features; + uint32_t target; + const char *dtb_compatible; +} ARMHostCPUClass; + +/** + * kvm_arm_get_host_cpu_features: + * @ahcc: ARMHostCPUClass to fill in + * + * Probe the capabilities of the host kernel's preferred CPU and fill + * in the ARMHostCPUClass struct accordingly. + */ +bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc); + + +/** + * kvm_arm_sync_mpstate_to_kvm + * @cpu: ARMCPU + * + * If supported set the KVM MP_STATE based on QEMU's model. + */ +int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + +/** + * kvm_arm_sync_mpstate_to_qemu + * @cpu: ARMCPU + * + * If supported get the MP_STATE from KVM and store in QEMU's model. + */ +int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + +#endif + +#endif diff --git a/target-arm/machine.c b/target-arm/machine.c new file mode 100644 index 00000000..32adfe79 --- /dev/null +++ b/target-arm/machine.c @@ -0,0 +1,330 @@ +#include "hw/hw.h" +#include "hw/boards.h" +#include "sysemu/kvm.h" +#include "kvm_arm.h" +#include "internals.h" + +static bool vfp_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return arm_feature(env, ARM_FEATURE_VFP); +} + +static int get_fpscr(QEMUFile *f, void *opaque, size_t size) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint32_t val = qemu_get_be32(f); + + vfp_set_fpscr(env, val); + return 0; +} + +static void put_fpscr(QEMUFile *f, void *opaque, size_t size) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + qemu_put_be32(f, vfp_get_fpscr(env)); +} + +static const VMStateInfo vmstate_fpscr = { + .name = "fpscr", + .get = get_fpscr, + .put = put_fpscr, +}; + +static const VMStateDescription vmstate_vfp = { + .name = "cpu/vfp", + .version_id = 3, + .minimum_version_id = 3, + .needed = vfp_needed, + .fields = (VMStateField[]) { + VMSTATE_FLOAT64_ARRAY(env.vfp.regs, ARMCPU, 64), + /* The xregs array is a little awkward because element 1 (FPSCR) + * requires a specific accessor, so we have to split it up in + * the vmstate: + */ + VMSTATE_UINT32(env.vfp.xregs[0], ARMCPU), + VMSTATE_UINT32_SUB_ARRAY(env.vfp.xregs, ARMCPU, 2, 14), + { + .name = "fpscr", + .version_id = 0, + .size = sizeof(uint32_t), + .info = &vmstate_fpscr, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_END_OF_LIST() + } +}; + +static bool iwmmxt_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return arm_feature(env, ARM_FEATURE_IWMMXT); +} + +static const VMStateDescription vmstate_iwmmxt = { + .name = "cpu/iwmmxt", + .version_id = 1, + .minimum_version_id = 1, + .needed = iwmmxt_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16), + VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16), + VMSTATE_END_OF_LIST() + } +}; + +static bool m_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return arm_feature(env, ARM_FEATURE_M); +} + +static const VMStateDescription vmstate_m = { + .name = "cpu/m", + .version_id = 1, + .minimum_version_id = 1, + .needed = m_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(env.v7m.other_sp, ARMCPU), + VMSTATE_UINT32(env.v7m.vecbase, ARMCPU), + VMSTATE_UINT32(env.v7m.basepri, ARMCPU), + VMSTATE_UINT32(env.v7m.control, ARMCPU), + VMSTATE_INT32(env.v7m.current_sp, ARMCPU), + VMSTATE_INT32(env.v7m.exception, ARMCPU), + VMSTATE_END_OF_LIST() + } +}; + +static bool thumb2ee_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return arm_feature(env, ARM_FEATURE_THUMB2EE); +} + +static const VMStateDescription vmstate_thumb2ee = { + .name = "cpu/thumb2ee", + .version_id = 1, + .minimum_version_id = 1, + .needed = thumb2ee_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT32(env.teecr, ARMCPU), + VMSTATE_UINT32(env.teehbr, ARMCPU), + VMSTATE_END_OF_LIST() + } +}; + +static bool pmsav7_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return arm_feature(env, ARM_FEATURE_MPU) && + arm_feature(env, ARM_FEATURE_V7); +} + +static bool pmsav7_rgnr_vmstate_validate(void *opaque, int version_id) +{ + ARMCPU *cpu = opaque; + + return cpu->env.cp15.c6_rgnr < cpu->pmsav7_dregion; +} + +static const VMStateDescription vmstate_pmsav7 = { + .name = "cpu/pmsav7", + .version_id = 1, + .minimum_version_id = 1, + .needed = pmsav7_needed, + .fields = (VMStateField[]) { + VMSTATE_VARRAY_UINT32(env.pmsav7.drbar, ARMCPU, pmsav7_dregion, 0, + vmstate_info_uint32, uint32_t), + VMSTATE_VARRAY_UINT32(env.pmsav7.drsr, ARMCPU, pmsav7_dregion, 0, + vmstate_info_uint32, uint32_t), + VMSTATE_VARRAY_UINT32(env.pmsav7.dracr, ARMCPU, pmsav7_dregion, 0, + vmstate_info_uint32, uint32_t), + VMSTATE_VALIDATE("rgnr is valid", pmsav7_rgnr_vmstate_validate), + VMSTATE_END_OF_LIST() + } +}; + +static int get_cpsr(QEMUFile *f, void *opaque, size_t size) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint32_t val = qemu_get_be32(f); + + env->aarch64 = ((val & PSTATE_nRW) == 0); + + if (is_a64(env)) { + pstate_write(env, val); + return 0; + } + + /* Avoid mode switch when restoring CPSR */ + env->uncached_cpsr = val & CPSR_M; + cpsr_write(env, val, 0xffffffff); + return 0; +} + +static void put_cpsr(QEMUFile *f, void *opaque, size_t size) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + uint32_t val; + + if (is_a64(env)) { + val = pstate_read(env); + } else { + val = cpsr_read(env); + } + + qemu_put_be32(f, val); +} + +static const VMStateInfo vmstate_cpsr = { + .name = "cpsr", + .get = get_cpsr, + .put = put_cpsr, +}; + +static void cpu_pre_save(void *opaque) +{ + ARMCPU *cpu = opaque; + + if (kvm_enabled()) { + if (!write_kvmstate_to_list(cpu)) { + /* This should never fail */ + abort(); + } + } else { + if (!write_cpustate_to_list(cpu)) { + /* This should never fail. */ + abort(); + } + } + + cpu->cpreg_vmstate_array_len = cpu->cpreg_array_len; + memcpy(cpu->cpreg_vmstate_indexes, cpu->cpreg_indexes, + cpu->cpreg_array_len * sizeof(uint64_t)); + memcpy(cpu->cpreg_vmstate_values, cpu->cpreg_values, + cpu->cpreg_array_len * sizeof(uint64_t)); +} + +static int cpu_post_load(void *opaque, int version_id) +{ + ARMCPU *cpu = opaque; + int i, v; + + /* Update the values list from the incoming migration data. + * Anything in the incoming data which we don't know about is + * a migration failure; anything we know about but the incoming + * data doesn't specify retains its current (reset) value. + * The indexes list remains untouched -- we only inspect the + * incoming migration index list so we can match the values array + * entries with the right slots in our own values array. + */ + + for (i = 0, v = 0; i < cpu->cpreg_array_len + && v < cpu->cpreg_vmstate_array_len; i++) { + if (cpu->cpreg_vmstate_indexes[v] > cpu->cpreg_indexes[i]) { + /* register in our list but not incoming : skip it */ + continue; + } + if (cpu->cpreg_vmstate_indexes[v] < cpu->cpreg_indexes[i]) { + /* register in their list but not ours: fail migration */ + return -1; + } + /* matching register, copy the value over */ + cpu->cpreg_values[i] = cpu->cpreg_vmstate_values[v]; + v++; + } + + if (kvm_enabled()) { + if (!write_list_to_kvmstate(cpu, KVM_PUT_FULL_STATE)) { + return -1; + } + /* Note that it's OK for the TCG side not to know about + * every register in the list; KVM is authoritative if + * we're using it. + */ + write_list_to_cpustate(cpu); + } else { + if (!write_list_to_cpustate(cpu)) { + return -1; + } + } + + hw_breakpoint_update_all(cpu); + hw_watchpoint_update_all(cpu); + + return 0; +} + +const VMStateDescription vmstate_arm_cpu = { + .name = "cpu", + .version_id = 22, + .minimum_version_id = 22, + .pre_save = cpu_pre_save, + .post_load = cpu_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(env.regs, ARMCPU, 16), + VMSTATE_UINT64_ARRAY(env.xregs, ARMCPU, 32), + VMSTATE_UINT64(env.pc, ARMCPU), + { + .name = "cpsr", + .version_id = 0, + .size = sizeof(uint32_t), + .info = &vmstate_cpsr, + .flags = VMS_SINGLE, + .offset = 0, + }, + VMSTATE_UINT32(env.spsr, ARMCPU), + VMSTATE_UINT64_ARRAY(env.banked_spsr, ARMCPU, 8), + VMSTATE_UINT32_ARRAY(env.banked_r13, ARMCPU, 8), + VMSTATE_UINT32_ARRAY(env.banked_r14, ARMCPU, 8), + VMSTATE_UINT32_ARRAY(env.usr_regs, ARMCPU, 5), + VMSTATE_UINT32_ARRAY(env.fiq_regs, ARMCPU, 5), + VMSTATE_UINT64_ARRAY(env.elr_el, ARMCPU, 4), + VMSTATE_UINT64_ARRAY(env.sp_el, ARMCPU, 4), + /* The length-check must come before the arrays to avoid + * incoming data possibly overflowing the array. + */ + VMSTATE_INT32_POSITIVE_LE(cpreg_vmstate_array_len, ARMCPU), + VMSTATE_VARRAY_INT32(cpreg_vmstate_indexes, ARMCPU, + cpreg_vmstate_array_len, + 0, vmstate_info_uint64, uint64_t), + VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU, + cpreg_vmstate_array_len, + 0, vmstate_info_uint64, uint64_t), + VMSTATE_UINT64(env.exclusive_addr, ARMCPU), + VMSTATE_UINT64(env.exclusive_val, ARMCPU), + VMSTATE_UINT64(env.exclusive_high, ARMCPU), + VMSTATE_UINT64(env.features, ARMCPU), + VMSTATE_UINT32(env.exception.syndrome, ARMCPU), + VMSTATE_UINT32(env.exception.fsr, ARMCPU), + VMSTATE_UINT64(env.exception.vaddress, ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU), + VMSTATE_BOOL(powered_off, ARMCPU), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_vfp, + &vmstate_iwmmxt, + &vmstate_m, + &vmstate_thumb2ee, + &vmstate_pmsav7, + NULL + } +}; diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c new file mode 100644 index 00000000..47d13e90 --- /dev/null +++ b/target-arm/neon_helper.c @@ -0,0 +1,2243 @@ +/* + * ARM NEON vector operations. + * + * Copyright (c) 2007, 2008 CodeSourcery. + * Written by Paul Brook + * + * This code is licensed under the GNU GPL v2. + */ +#include +#include + +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" + +#define SIGNBIT (uint32_t)0x80000000 +#define SIGNBIT64 ((uint64_t)1 << 63) + +#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q + +#define NEON_TYPE1(name, type) \ +typedef struct \ +{ \ + type v1; \ +} neon_##name; +#ifdef HOST_WORDS_BIGENDIAN +#define NEON_TYPE2(name, type) \ +typedef struct \ +{ \ + type v2; \ + type v1; \ +} neon_##name; +#define NEON_TYPE4(name, type) \ +typedef struct \ +{ \ + type v4; \ + type v3; \ + type v2; \ + type v1; \ +} neon_##name; +#else +#define NEON_TYPE2(name, type) \ +typedef struct \ +{ \ + type v1; \ + type v2; \ +} neon_##name; +#define NEON_TYPE4(name, type) \ +typedef struct \ +{ \ + type v1; \ + type v2; \ + type v3; \ + type v4; \ +} neon_##name; +#endif + +NEON_TYPE4(s8, int8_t) +NEON_TYPE4(u8, uint8_t) +NEON_TYPE2(s16, int16_t) +NEON_TYPE2(u16, uint16_t) +NEON_TYPE1(s32, int32_t) +NEON_TYPE1(u32, uint32_t) +#undef NEON_TYPE4 +#undef NEON_TYPE2 +#undef NEON_TYPE1 + +/* Copy from a uint32_t to a vector structure type. */ +#define NEON_UNPACK(vtype, dest, val) do { \ + union { \ + vtype v; \ + uint32_t i; \ + } conv_u; \ + conv_u.i = (val); \ + dest = conv_u.v; \ + } while(0) + +/* Copy from a vector structure type to a uint32_t. */ +#define NEON_PACK(vtype, dest, val) do { \ + union { \ + vtype v; \ + uint32_t i; \ + } conv_u; \ + conv_u.v = (val); \ + dest = conv_u.i; \ + } while(0) + +#define NEON_DO1 \ + NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); +#define NEON_DO2 \ + NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ + NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); +#define NEON_DO4 \ + NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ + NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ + NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ + NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); + +#define NEON_VOP_BODY(vtype, n) \ +{ \ + uint32_t res; \ + vtype vsrc1; \ + vtype vsrc2; \ + vtype vdest; \ + NEON_UNPACK(vtype, vsrc1, arg1); \ + NEON_UNPACK(vtype, vsrc2, arg2); \ + NEON_DO##n; \ + NEON_PACK(vtype, res, vdest); \ + return res; \ +} + +#define NEON_VOP(name, vtype, n) \ +uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ +NEON_VOP_BODY(vtype, n) + +#define NEON_VOP_ENV(name, vtype, n) \ +uint32_t HELPER(glue(neon_,name))(CPUARMState *env, uint32_t arg1, uint32_t arg2) \ +NEON_VOP_BODY(vtype, n) + +/* Pairwise operations. */ +/* For 32-bit elements each segment only contains a single element, so + the elementwise and pairwise operations are the same. */ +#define NEON_PDO2 \ + NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ + NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); +#define NEON_PDO4 \ + NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ + NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ + NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ + NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ + +#define NEON_POP(name, vtype, n) \ +uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ +{ \ + uint32_t res; \ + vtype vsrc1; \ + vtype vsrc2; \ + vtype vdest; \ + NEON_UNPACK(vtype, vsrc1, arg1); \ + NEON_UNPACK(vtype, vsrc2, arg2); \ + NEON_PDO##n; \ + NEON_PACK(vtype, res, vdest); \ + return res; \ +} + +/* Unary operators. */ +#define NEON_VOP1(name, vtype, n) \ +uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ +{ \ + vtype vsrc1; \ + vtype vdest; \ + NEON_UNPACK(vtype, vsrc1, arg); \ + NEON_DO##n; \ + NEON_PACK(vtype, arg, vdest); \ + return arg; \ +} + + +#define NEON_USAT(dest, src1, src2, type) do { \ + uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ + if (tmp != (type)tmp) { \ + SET_QC(); \ + dest = ~0; \ + } else { \ + dest = tmp; \ + }} while(0) +#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) +NEON_VOP_ENV(qadd_u8, neon_u8, 4) +#undef NEON_FN +#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) +NEON_VOP_ENV(qadd_u16, neon_u16, 2) +#undef NEON_FN +#undef NEON_USAT + +uint32_t HELPER(neon_qadd_u32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (res < a) { + SET_QC(); + res = ~0; + } + return res; +} + +uint64_t HELPER(neon_qadd_u64)(CPUARMState *env, uint64_t src1, uint64_t src2) +{ + uint64_t res; + + res = src1 + src2; + if (res < src1) { + SET_QC(); + res = ~(uint64_t)0; + } + return res; +} + +#define NEON_SSAT(dest, src1, src2, type) do { \ + int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ + if (tmp != (type)tmp) { \ + SET_QC(); \ + if (src2 > 0) { \ + tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ + } else { \ + tmp = 1 << (sizeof(type) * 8 - 1); \ + } \ + } \ + dest = tmp; \ + } while(0) +#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) +NEON_VOP_ENV(qadd_s8, neon_s8, 4) +#undef NEON_FN +#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) +NEON_VOP_ENV(qadd_s16, neon_s16, 2) +#undef NEON_FN +#undef NEON_SSAT + +uint32_t HELPER(neon_qadd_s32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) { + SET_QC(); + res = ~(((int32_t)a >> 31) ^ SIGNBIT); + } + return res; +} + +uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2) +{ + uint64_t res; + + res = src1 + src2; + if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { + SET_QC(); + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; + } + return res; +} + +/* Unsigned saturating accumulate of signed value + * + * Op1/Rn is treated as signed + * Op2/Rd is treated as unsigned + * + * Explicit casting is used to ensure the correct sign extension of + * inputs. The result is treated as a unsigned value and saturated as such. + * + * We use a macro for the 8/16 bit cases which expects signed integers of va, + * vb, and vr for interim calculation and an unsigned 32 bit result value r. + */ + +#define USATACC(bits, shift) \ + do { \ + va = sextract32(a, shift, bits); \ + vb = extract32(b, shift, bits); \ + vr = va + vb; \ + if (vr > UINT##bits##_MAX) { \ + SET_QC(); \ + vr = UINT##bits##_MAX; \ + } else if (vr < 0) { \ + SET_QC(); \ + vr = 0; \ + } \ + r = deposit32(r, shift, bits, vr); \ + } while (0) + +uint32_t HELPER(neon_uqadd_s8)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int16_t va, vb, vr; + uint32_t r = 0; + + USATACC(8, 0); + USATACC(8, 8); + USATACC(8, 16); + USATACC(8, 24); + return r; +} + +uint32_t HELPER(neon_uqadd_s16)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int32_t va, vb, vr; + uint64_t r = 0; + + USATACC(16, 0); + USATACC(16, 16); + return r; +} + +#undef USATACC + +uint32_t HELPER(neon_uqadd_s32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int64_t va = (int32_t)a; + int64_t vb = (uint32_t)b; + int64_t vr = va + vb; + if (vr > UINT32_MAX) { + SET_QC(); + vr = UINT32_MAX; + } else if (vr < 0) { + SET_QC(); + vr = 0; + } + return vr; +} + +uint64_t HELPER(neon_uqadd_s64)(CPUARMState *env, uint64_t a, uint64_t b) +{ + uint64_t res; + res = a + b; + /* We only need to look at the pattern of SIGN bits to detect + * +ve/-ve saturation + */ + if (~a & b & ~res & SIGNBIT64) { + SET_QC(); + res = UINT64_MAX; + } else if (a & ~b & res & SIGNBIT64) { + SET_QC(); + res = 0; + } + return res; +} + +/* Signed saturating accumulate of unsigned value + * + * Op1/Rn is treated as unsigned + * Op2/Rd is treated as signed + * + * The result is treated as a signed value and saturated as such + * + * We use a macro for the 8/16 bit cases which expects signed integers of va, + * vb, and vr for interim calculation and an unsigned 32 bit result value r. + */ + +#define SSATACC(bits, shift) \ + do { \ + va = extract32(a, shift, bits); \ + vb = sextract32(b, shift, bits); \ + vr = va + vb; \ + if (vr > INT##bits##_MAX) { \ + SET_QC(); \ + vr = INT##bits##_MAX; \ + } else if (vr < INT##bits##_MIN) { \ + SET_QC(); \ + vr = INT##bits##_MIN; \ + } \ + r = deposit32(r, shift, bits, vr); \ + } while (0) + +uint32_t HELPER(neon_sqadd_u8)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int16_t va, vb, vr; + uint32_t r = 0; + + SSATACC(8, 0); + SSATACC(8, 8); + SSATACC(8, 16); + SSATACC(8, 24); + return r; +} + +uint32_t HELPER(neon_sqadd_u16)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int32_t va, vb, vr; + uint32_t r = 0; + + SSATACC(16, 0); + SSATACC(16, 16); + + return r; +} + +#undef SSATACC + +uint32_t HELPER(neon_sqadd_u32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int64_t res; + int64_t op1 = (uint32_t)a; + int64_t op2 = (int32_t)b; + res = op1 + op2; + if (res > INT32_MAX) { + SET_QC(); + res = INT32_MAX; + } else if (res < INT32_MIN) { + SET_QC(); + res = INT32_MIN; + } + return res; +} + +uint64_t HELPER(neon_sqadd_u64)(CPUARMState *env, uint64_t a, uint64_t b) +{ + uint64_t res; + res = a + b; + /* We only need to look at the pattern of SIGN bits to detect an overflow */ + if (((a & res) + | (~b & res) + | (a & ~b)) & SIGNBIT64) { + SET_QC(); + res = INT64_MAX; + } + return res; +} + + +#define NEON_USAT(dest, src1, src2, type) do { \ + uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ + if (tmp != (type)tmp) { \ + SET_QC(); \ + dest = 0; \ + } else { \ + dest = tmp; \ + }} while(0) +#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) +NEON_VOP_ENV(qsub_u8, neon_u8, 4) +#undef NEON_FN +#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) +NEON_VOP_ENV(qsub_u16, neon_u16, 2) +#undef NEON_FN +#undef NEON_USAT + +uint32_t HELPER(neon_qsub_u32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (res > a) { + SET_QC(); + res = 0; + } + return res; +} + +uint64_t HELPER(neon_qsub_u64)(CPUARMState *env, uint64_t src1, uint64_t src2) +{ + uint64_t res; + + if (src1 < src2) { + SET_QC(); + res = 0; + } else { + res = src1 - src2; + } + return res; +} + +#define NEON_SSAT(dest, src1, src2, type) do { \ + int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ + if (tmp != (type)tmp) { \ + SET_QC(); \ + if (src2 < 0) { \ + tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ + } else { \ + tmp = 1 << (sizeof(type) * 8 - 1); \ + } \ + } \ + dest = tmp; \ + } while(0) +#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) +NEON_VOP_ENV(qsub_s8, neon_s8, 4) +#undef NEON_FN +#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) +NEON_VOP_ENV(qsub_s16, neon_s16, 2) +#undef NEON_FN +#undef NEON_SSAT + +uint32_t HELPER(neon_qsub_s32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) { + SET_QC(); + res = ~(((int32_t)a >> 31) ^ SIGNBIT); + } + return res; +} + +uint64_t HELPER(neon_qsub_s64)(CPUARMState *env, uint64_t src1, uint64_t src2) +{ + uint64_t res; + + res = src1 - src2; + if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { + SET_QC(); + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; + } + return res; +} + +#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 +NEON_VOP(hadd_s8, neon_s8, 4) +NEON_VOP(hadd_u8, neon_u8, 4) +NEON_VOP(hadd_s16, neon_s16, 2) +NEON_VOP(hadd_u16, neon_u16, 2) +#undef NEON_FN + +int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) +{ + int32_t dest; + + dest = (src1 >> 1) + (src2 >> 1); + if (src1 & src2 & 1) + dest++; + return dest; +} + +uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) +{ + uint32_t dest; + + dest = (src1 >> 1) + (src2 >> 1); + if (src1 & src2 & 1) + dest++; + return dest; +} + +#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 +NEON_VOP(rhadd_s8, neon_s8, 4) +NEON_VOP(rhadd_u8, neon_u8, 4) +NEON_VOP(rhadd_s16, neon_s16, 2) +NEON_VOP(rhadd_u16, neon_u16, 2) +#undef NEON_FN + +int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) +{ + int32_t dest; + + dest = (src1 >> 1) + (src2 >> 1); + if ((src1 | src2) & 1) + dest++; + return dest; +} + +uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) +{ + uint32_t dest; + + dest = (src1 >> 1) + (src2 >> 1); + if ((src1 | src2) & 1) + dest++; + return dest; +} + +#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 +NEON_VOP(hsub_s8, neon_s8, 4) +NEON_VOP(hsub_u8, neon_u8, 4) +NEON_VOP(hsub_s16, neon_s16, 2) +NEON_VOP(hsub_u16, neon_u16, 2) +#undef NEON_FN + +int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) +{ + int32_t dest; + + dest = (src1 >> 1) - (src2 >> 1); + if ((~src1) & src2 & 1) + dest--; + return dest; +} + +uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) +{ + uint32_t dest; + + dest = (src1 >> 1) - (src2 >> 1); + if ((~src1) & src2 & 1) + dest--; + return dest; +} + +#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 +NEON_VOP(cgt_s8, neon_s8, 4) +NEON_VOP(cgt_u8, neon_u8, 4) +NEON_VOP(cgt_s16, neon_s16, 2) +NEON_VOP(cgt_u16, neon_u16, 2) +NEON_VOP(cgt_s32, neon_s32, 1) +NEON_VOP(cgt_u32, neon_u32, 1) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 +NEON_VOP(cge_s8, neon_s8, 4) +NEON_VOP(cge_u8, neon_u8, 4) +NEON_VOP(cge_s16, neon_s16, 2) +NEON_VOP(cge_u16, neon_u16, 2) +NEON_VOP(cge_s32, neon_s32, 1) +NEON_VOP(cge_u32, neon_u32, 1) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 +NEON_VOP(min_s8, neon_s8, 4) +NEON_VOP(min_u8, neon_u8, 4) +NEON_VOP(min_s16, neon_s16, 2) +NEON_VOP(min_u16, neon_u16, 2) +NEON_VOP(min_s32, neon_s32, 1) +NEON_VOP(min_u32, neon_u32, 1) +NEON_POP(pmin_s8, neon_s8, 4) +NEON_POP(pmin_u8, neon_u8, 4) +NEON_POP(pmin_s16, neon_s16, 2) +NEON_POP(pmin_u16, neon_u16, 2) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2 +NEON_VOP(max_s8, neon_s8, 4) +NEON_VOP(max_u8, neon_u8, 4) +NEON_VOP(max_s16, neon_s16, 2) +NEON_VOP(max_u16, neon_u16, 2) +NEON_VOP(max_s32, neon_s32, 1) +NEON_VOP(max_u32, neon_u32, 1) +NEON_POP(pmax_s8, neon_s8, 4) +NEON_POP(pmax_u8, neon_u8, 4) +NEON_POP(pmax_s16, neon_s16, 2) +NEON_POP(pmax_u16, neon_u16, 2) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) +NEON_VOP(abd_s8, neon_s8, 4) +NEON_VOP(abd_u8, neon_u8, 4) +NEON_VOP(abd_s16, neon_s16, 2) +NEON_VOP(abd_u16, neon_u16, 2) +NEON_VOP(abd_s32, neon_s32, 1) +NEON_VOP(abd_u32, neon_u32, 1) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) do { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if (tmp >= (ssize_t)sizeof(src1) * 8 || \ + tmp <= -(ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp < 0) { \ + dest = src1 >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + }} while (0) +NEON_VOP(shl_u8, neon_u8, 4) +NEON_VOP(shl_u16, neon_u16, 2) +NEON_VOP(shl_u32, neon_u32, 1) +#undef NEON_FN + +uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) +{ + int8_t shift = (int8_t)shiftop; + if (shift >= 64 || shift <= -64) { + val = 0; + } else if (shift < 0) { + val >>= -shift; + } else { + val <<= shift; + } + return val; +} + +#define NEON_FN(dest, src1, src2) do { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ + dest = src1 >> (sizeof(src1) * 8 - 1); \ + } else if (tmp < 0) { \ + dest = src1 >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + }} while (0) +NEON_VOP(shl_s8, neon_s8, 4) +NEON_VOP(shl_s16, neon_s16, 2) +NEON_VOP(shl_s32, neon_s32, 1) +#undef NEON_FN + +uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) +{ + int8_t shift = (int8_t)shiftop; + int64_t val = valop; + if (shift >= 64) { + val = 0; + } else if (shift <= -64) { + val >>= 63; + } else if (shift < 0) { + val >>= -shift; + } else { + val <<= shift; + } + return val; +} + +#define NEON_FN(dest, src1, src2) do { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if ((tmp >= (ssize_t)sizeof(src1) * 8) \ + || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \ + dest = 0; \ + } else if (tmp < 0) { \ + dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + }} while (0) +NEON_VOP(rshl_s8, neon_s8, 4) +NEON_VOP(rshl_s16, neon_s16, 2) +#undef NEON_FN + +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bit accumulator. */ +uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop) +{ + int32_t dest; + int32_t val = (int32_t)valop; + int8_t shift = (int8_t)shiftop; + if ((shift >= 32) || (shift <= -32)) { + dest = 0; + } else if (shift < 0) { + int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + } + return dest; +} + +/* Handling addition overflow with 64 bit input values is more + * tricky than with 32 bit values. */ +uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) +{ + int8_t shift = (int8_t)shiftop; + int64_t val = valop; + if ((shift >= 64) || (shift <= -64)) { + val = 0; + } else if (shift < 0) { + val >>= (-shift - 1); + if (val == INT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x4000000000000000LL; + } else { + val++; + val >>= 1; + } + } else { + val <<= shift; + } + return val; +} + +#define NEON_FN(dest, src1, src2) do { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if (tmp >= (ssize_t)sizeof(src1) * 8 || \ + tmp < -(ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ + dest = src1 >> (-tmp - 1); \ + } else if (tmp < 0) { \ + dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + }} while (0) +NEON_VOP(rshl_u8, neon_u8, 4) +NEON_VOP(rshl_u16, neon_u16, 2) +#undef NEON_FN + +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bit accumulator. */ +uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop) +{ + uint32_t dest; + int8_t shift = (int8_t)shiftop; + if (shift >= 32 || shift < -32) { + dest = 0; + } else if (shift == -32) { + dest = val >> 31; + } else if (shift < 0) { + uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + } + return dest; +} + +/* Handling addition overflow with 64 bit input values is more + * tricky than with 32 bit values. */ +uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) +{ + int8_t shift = (uint8_t)shiftop; + if (shift >= 64 || shift < -64) { + val = 0; + } else if (shift == -64) { + /* Rounding a 1-bit result just preserves that bit. */ + val >>= 63; + } else if (shift < 0) { + val >>= (-shift - 1); + if (val == UINT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x8000000000000000ULL; + } else { + val++; + val >>= 1; + } + } else { + val <<= shift; + } + return val; +} + +#define NEON_FN(dest, src1, src2) do { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + if (src1) { \ + SET_QC(); \ + dest = ~0; \ + } else { \ + dest = 0; \ + } \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp < 0) { \ + dest = src1 >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + if ((dest >> tmp) != src1) { \ + SET_QC(); \ + dest = ~0; \ + } \ + }} while (0) +NEON_VOP_ENV(qshl_u8, neon_u8, 4) +NEON_VOP_ENV(qshl_u16, neon_u16, 2) +NEON_VOP_ENV(qshl_u32, neon_u32, 1) +#undef NEON_FN + +uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop) +{ + int8_t shift = (int8_t)shiftop; + if (shift >= 64) { + if (val) { + val = ~(uint64_t)0; + SET_QC(); + } + } else if (shift <= -64) { + val = 0; + } else if (shift < 0) { + val >>= -shift; + } else { + uint64_t tmp = val; + val <<= shift; + if ((val >> shift) != tmp) { + SET_QC(); + val = ~(uint64_t)0; + } + } + return val; +} + +#define NEON_FN(dest, src1, src2) do { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + if (src1) { \ + SET_QC(); \ + dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ + if (src1 > 0) { \ + dest--; \ + } \ + } else { \ + dest = src1; \ + } \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ + dest = src1 >> 31; \ + } else if (tmp < 0) { \ + dest = src1 >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + if ((dest >> tmp) != src1) { \ + SET_QC(); \ + dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ + if (src1 > 0) { \ + dest--; \ + } \ + } \ + }} while (0) +NEON_VOP_ENV(qshl_s8, neon_s8, 4) +NEON_VOP_ENV(qshl_s16, neon_s16, 2) +NEON_VOP_ENV(qshl_s32, neon_s32, 1) +#undef NEON_FN + +uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop) +{ + int8_t shift = (uint8_t)shiftop; + int64_t val = valop; + if (shift >= 64) { + if (val) { + SET_QC(); + val = (val >> 63) ^ ~SIGNBIT64; + } + } else if (shift <= -64) { + val >>= 63; + } else if (shift < 0) { + val >>= -shift; + } else { + int64_t tmp = val; + val <<= shift; + if ((val >> shift) != tmp) { + SET_QC(); + val = (tmp >> 63) ^ ~SIGNBIT64; + } + } + return val; +} + +#define NEON_FN(dest, src1, src2) do { \ + if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \ + SET_QC(); \ + dest = 0; \ + } else { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + if (src1) { \ + SET_QC(); \ + dest = ~0; \ + } else { \ + dest = 0; \ + } \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp < 0) { \ + dest = src1 >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + if ((dest >> tmp) != src1) { \ + SET_QC(); \ + dest = ~0; \ + } \ + } \ + }} while (0) +NEON_VOP_ENV(qshlu_s8, neon_u8, 4) +NEON_VOP_ENV(qshlu_s16, neon_u16, 2) +#undef NEON_FN + +uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop) +{ + if ((int32_t)valop < 0) { + SET_QC(); + return 0; + } + return helper_neon_qshl_u32(env, valop, shiftop); +} + +uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop) +{ + if ((int64_t)valop < 0) { + SET_QC(); + return 0; + } + return helper_neon_qshl_u64(env, valop, shiftop); +} + +#define NEON_FN(dest, src1, src2) do { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + if (src1) { \ + SET_QC(); \ + dest = ~0; \ + } else { \ + dest = 0; \ + } \ + } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ + dest = src1 >> (sizeof(src1) * 8 - 1); \ + } else if (tmp < 0) { \ + dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + if ((dest >> tmp) != src1) { \ + SET_QC(); \ + dest = ~0; \ + } \ + }} while (0) +NEON_VOP_ENV(qrshl_u8, neon_u8, 4) +NEON_VOP_ENV(qrshl_u16, neon_u16, 2) +#undef NEON_FN + +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bit accumulator. */ +uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shiftop) +{ + uint32_t dest; + int8_t shift = (int8_t)shiftop; + if (shift >= 32) { + if (val) { + SET_QC(); + dest = ~0; + } else { + dest = 0; + } + } else if (shift < -32) { + dest = 0; + } else if (shift == -32) { + dest = val >> 31; + } else if (shift < 0) { + uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + if ((dest >> shift) != val) { + SET_QC(); + dest = ~0; + } + } + return dest; +} + +/* Handling addition overflow with 64 bit input values is more + * tricky than with 32 bit values. */ +uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop) +{ + int8_t shift = (int8_t)shiftop; + if (shift >= 64) { + if (val) { + SET_QC(); + val = ~0; + } + } else if (shift < -64) { + val = 0; + } else if (shift == -64) { + val >>= 63; + } else if (shift < 0) { + val >>= (-shift - 1); + if (val == UINT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x8000000000000000ULL; + } else { + val++; + val >>= 1; + } + } else { \ + uint64_t tmp = val; + val <<= shift; + if ((val >> shift) != tmp) { + SET_QC(); + val = ~0; + } + } + return val; +} + +#define NEON_FN(dest, src1, src2) do { \ + int8_t tmp; \ + tmp = (int8_t)src2; \ + if (tmp >= (ssize_t)sizeof(src1) * 8) { \ + if (src1) { \ + SET_QC(); \ + dest = (1 << (sizeof(src1) * 8 - 1)); \ + if (src1 > 0) { \ + dest--; \ + } \ + } else { \ + dest = 0; \ + } \ + } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ + dest = 0; \ + } else if (tmp < 0) { \ + dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + } else { \ + dest = src1 << tmp; \ + if ((dest >> tmp) != src1) { \ + SET_QC(); \ + dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ + if (src1 > 0) { \ + dest--; \ + } \ + } \ + }} while (0) +NEON_VOP_ENV(qrshl_s8, neon_s8, 4) +NEON_VOP_ENV(qrshl_s16, neon_s16, 2) +#undef NEON_FN + +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bit accumulator. */ +uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop) +{ + int32_t dest; + int32_t val = (int32_t)valop; + int8_t shift = (int8_t)shiftop; + if (shift >= 32) { + if (val) { + SET_QC(); + dest = (val >> 31) ^ ~SIGNBIT; + } else { + dest = 0; + } + } else if (shift <= -32) { + dest = 0; + } else if (shift < 0) { + int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); + dest = big_dest >> -shift; + } else { + dest = val << shift; + if ((dest >> shift) != val) { + SET_QC(); + dest = (val >> 31) ^ ~SIGNBIT; + } + } + return dest; +} + +/* Handling addition overflow with 64 bit input values is more + * tricky than with 32 bit values. */ +uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop) +{ + int8_t shift = (uint8_t)shiftop; + int64_t val = valop; + + if (shift >= 64) { + if (val) { + SET_QC(); + val = (val >> 63) ^ ~SIGNBIT64; + } + } else if (shift <= -64) { + val = 0; + } else if (shift < 0) { + val >>= (-shift - 1); + if (val == INT64_MAX) { + /* In this case, it means that the rounding constant is 1, + * and the addition would overflow. Return the actual + * result directly. */ + val = 0x4000000000000000ULL; + } else { + val++; + val >>= 1; + } + } else { + int64_t tmp = val; + val <<= shift; + if ((val >> shift) != tmp) { + SET_QC(); + val = (tmp >> 63) ^ ~SIGNBIT64; + } + } + return val; +} + +uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) +{ + uint32_t mask; + mask = (a ^ b) & 0x80808080u; + a &= ~0x80808080u; + b &= ~0x80808080u; + return (a + b) ^ mask; +} + +uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) +{ + uint32_t mask; + mask = (a ^ b) & 0x80008000u; + a &= ~0x80008000u; + b &= ~0x80008000u; + return (a + b) ^ mask; +} + +#define NEON_FN(dest, src1, src2) dest = src1 + src2 +NEON_POP(padd_u8, neon_u8, 4) +NEON_POP(padd_u16, neon_u16, 2) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) dest = src1 - src2 +NEON_VOP(sub_u8, neon_u8, 4) +NEON_VOP(sub_u16, neon_u16, 2) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) dest = src1 * src2 +NEON_VOP(mul_u8, neon_u8, 4) +NEON_VOP(mul_u16, neon_u16, 2) +#undef NEON_FN + +/* Polynomial multiplication is like integer multiplication except the + partial products are XORed, not added. */ +uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) +{ + uint32_t mask; + uint32_t result; + result = 0; + while (op1) { + mask = 0; + if (op1 & 1) + mask |= 0xff; + if (op1 & (1 << 8)) + mask |= (0xff << 8); + if (op1 & (1 << 16)) + mask |= (0xff << 16); + if (op1 & (1 << 24)) + mask |= (0xff << 24); + result ^= op2 & mask; + op1 = (op1 >> 1) & 0x7f7f7f7f; + op2 = (op2 << 1) & 0xfefefefe; + } + return result; +} + +uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2) +{ + uint64_t result = 0; + uint64_t mask; + uint64_t op2ex = op2; + op2ex = (op2ex & 0xff) | + ((op2ex & 0xff00) << 8) | + ((op2ex & 0xff0000) << 16) | + ((op2ex & 0xff000000) << 24); + while (op1) { + mask = 0; + if (op1 & 1) { + mask |= 0xffff; + } + if (op1 & (1 << 8)) { + mask |= (0xffffU << 16); + } + if (op1 & (1 << 16)) { + mask |= (0xffffULL << 32); + } + if (op1 & (1 << 24)) { + mask |= (0xffffULL << 48); + } + result ^= op2ex & mask; + op1 = (op1 >> 1) & 0x7f7f7f7f; + op2ex <<= 1; + } + return result; +} + +#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 +NEON_VOP(tst_u8, neon_u8, 4) +NEON_VOP(tst_u16, neon_u16, 2) +NEON_VOP(tst_u32, neon_u32, 1) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 +NEON_VOP(ceq_u8, neon_u8, 4) +NEON_VOP(ceq_u16, neon_u16, 2) +NEON_VOP(ceq_u32, neon_u32, 1) +#undef NEON_FN + +#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src +NEON_VOP1(abs_s8, neon_s8, 4) +NEON_VOP1(abs_s16, neon_s16, 2) +#undef NEON_FN + +/* Count Leading Sign/Zero Bits. */ +static inline int do_clz8(uint8_t x) +{ + int n; + for (n = 8; x; n--) + x >>= 1; + return n; +} + +static inline int do_clz16(uint16_t x) +{ + int n; + for (n = 16; x; n--) + x >>= 1; + return n; +} + +#define NEON_FN(dest, src, dummy) dest = do_clz8(src) +NEON_VOP1(clz_u8, neon_u8, 4) +#undef NEON_FN + +#define NEON_FN(dest, src, dummy) dest = do_clz16(src) +NEON_VOP1(clz_u16, neon_u16, 2) +#undef NEON_FN + +#define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1 +NEON_VOP1(cls_s8, neon_s8, 4) +#undef NEON_FN + +#define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1 +NEON_VOP1(cls_s16, neon_s16, 2) +#undef NEON_FN + +uint32_t HELPER(neon_cls_s32)(uint32_t x) +{ + int count; + if ((int32_t)x < 0) + x = ~x; + for (count = 32; x; count--) + x = x >> 1; + return count - 1; +} + +/* Bit count. */ +uint32_t HELPER(neon_cnt_u8)(uint32_t x) +{ + x = (x & 0x55555555) + ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); + return x; +} + +/* Reverse bits in each 8 bit word */ +uint32_t HELPER(neon_rbit_u8)(uint32_t x) +{ + x = ((x & 0xf0f0f0f0) >> 4) + | ((x & 0x0f0f0f0f) << 4); + x = ((x & 0x88888888) >> 3) + | ((x & 0x44444444) >> 1) + | ((x & 0x22222222) << 1) + | ((x & 0x11111111) << 3); + return x; +} + +#define NEON_QDMULH16(dest, src1, src2, round) do { \ + uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ + if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ + SET_QC(); \ + tmp = (tmp >> 31) ^ ~SIGNBIT; \ + } else { \ + tmp <<= 1; \ + } \ + if (round) { \ + int32_t old = tmp; \ + tmp += 1 << 15; \ + if ((int32_t)tmp < old) { \ + SET_QC(); \ + tmp = SIGNBIT - 1; \ + } \ + } \ + dest = tmp >> 16; \ + } while(0) +#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) +NEON_VOP_ENV(qdmulh_s16, neon_s16, 2) +#undef NEON_FN +#define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) +NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2) +#undef NEON_FN +#undef NEON_QDMULH16 + +#define NEON_QDMULH32(dest, src1, src2, round) do { \ + uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ + if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ + SET_QC(); \ + tmp = (tmp >> 63) ^ ~SIGNBIT64; \ + } else { \ + tmp <<= 1; \ + } \ + if (round) { \ + int64_t old = tmp; \ + tmp += (int64_t)1 << 31; \ + if ((int64_t)tmp < old) { \ + SET_QC(); \ + tmp = SIGNBIT64 - 1; \ + } \ + } \ + dest = tmp >> 32; \ + } while(0) +#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) +NEON_VOP_ENV(qdmulh_s32, neon_s32, 1) +#undef NEON_FN +#define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) +NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1) +#undef NEON_FN +#undef NEON_QDMULH32 + +uint32_t HELPER(neon_narrow_u8)(uint64_t x) +{ + return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) + | ((x >> 24) & 0xff000000u); +} + +uint32_t HELPER(neon_narrow_u16)(uint64_t x) +{ + return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); +} + +uint32_t HELPER(neon_narrow_high_u8)(uint64_t x) +{ + return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) + | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); +} + +uint32_t HELPER(neon_narrow_high_u16)(uint64_t x) +{ + return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); +} + +uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x) +{ + x &= 0xff80ff80ff80ff80ull; + x += 0x0080008000800080ull; + return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) + | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); +} + +uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) +{ + x &= 0xffff8000ffff8000ull; + x += 0x0000800000008000ull; + return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); +} + +uint32_t HELPER(neon_unarrow_sat8)(CPUARMState *env, uint64_t x) +{ + uint16_t s; + uint8_t d; + uint32_t res = 0; +#define SAT8(n) \ + s = x >> n; \ + if (s & 0x8000) { \ + SET_QC(); \ + } else { \ + if (s > 0xff) { \ + d = 0xff; \ + SET_QC(); \ + } else { \ + d = s; \ + } \ + res |= (uint32_t)d << (n / 2); \ + } + + SAT8(0); + SAT8(16); + SAT8(32); + SAT8(48); +#undef SAT8 + return res; +} + +uint32_t HELPER(neon_narrow_sat_u8)(CPUARMState *env, uint64_t x) +{ + uint16_t s; + uint8_t d; + uint32_t res = 0; +#define SAT8(n) \ + s = x >> n; \ + if (s > 0xff) { \ + d = 0xff; \ + SET_QC(); \ + } else { \ + d = s; \ + } \ + res |= (uint32_t)d << (n / 2); + + SAT8(0); + SAT8(16); + SAT8(32); + SAT8(48); +#undef SAT8 + return res; +} + +uint32_t HELPER(neon_narrow_sat_s8)(CPUARMState *env, uint64_t x) +{ + int16_t s; + uint8_t d; + uint32_t res = 0; +#define SAT8(n) \ + s = x >> n; \ + if (s != (int8_t)s) { \ + d = (s >> 15) ^ 0x7f; \ + SET_QC(); \ + } else { \ + d = s; \ + } \ + res |= (uint32_t)d << (n / 2); + + SAT8(0); + SAT8(16); + SAT8(32); + SAT8(48); +#undef SAT8 + return res; +} + +uint32_t HELPER(neon_unarrow_sat16)(CPUARMState *env, uint64_t x) +{ + uint32_t high; + uint32_t low; + low = x; + if (low & 0x80000000) { + low = 0; + SET_QC(); + } else if (low > 0xffff) { + low = 0xffff; + SET_QC(); + } + high = x >> 32; + if (high & 0x80000000) { + high = 0; + SET_QC(); + } else if (high > 0xffff) { + high = 0xffff; + SET_QC(); + } + return low | (high << 16); +} + +uint32_t HELPER(neon_narrow_sat_u16)(CPUARMState *env, uint64_t x) +{ + uint32_t high; + uint32_t low; + low = x; + if (low > 0xffff) { + low = 0xffff; + SET_QC(); + } + high = x >> 32; + if (high > 0xffff) { + high = 0xffff; + SET_QC(); + } + return low | (high << 16); +} + +uint32_t HELPER(neon_narrow_sat_s16)(CPUARMState *env, uint64_t x) +{ + int32_t low; + int32_t high; + low = x; + if (low != (int16_t)low) { + low = (low >> 31) ^ 0x7fff; + SET_QC(); + } + high = x >> 32; + if (high != (int16_t)high) { + high = (high >> 31) ^ 0x7fff; + SET_QC(); + } + return (uint16_t)low | (high << 16); +} + +uint32_t HELPER(neon_unarrow_sat32)(CPUARMState *env, uint64_t x) +{ + if (x & 0x8000000000000000ull) { + SET_QC(); + return 0; + } + if (x > 0xffffffffu) { + SET_QC(); + return 0xffffffffu; + } + return x; +} + +uint32_t HELPER(neon_narrow_sat_u32)(CPUARMState *env, uint64_t x) +{ + if (x > 0xffffffffu) { + SET_QC(); + return 0xffffffffu; + } + return x; +} + +uint32_t HELPER(neon_narrow_sat_s32)(CPUARMState *env, uint64_t x) +{ + if ((int64_t)x != (int32_t)x) { + SET_QC(); + return ((int64_t)x >> 63) ^ 0x7fffffff; + } + return x; +} + +uint64_t HELPER(neon_widen_u8)(uint32_t x) +{ + uint64_t tmp; + uint64_t ret; + ret = (uint8_t)x; + tmp = (uint8_t)(x >> 8); + ret |= tmp << 16; + tmp = (uint8_t)(x >> 16); + ret |= tmp << 32; + tmp = (uint8_t)(x >> 24); + ret |= tmp << 48; + return ret; +} + +uint64_t HELPER(neon_widen_s8)(uint32_t x) +{ + uint64_t tmp; + uint64_t ret; + ret = (uint16_t)(int8_t)x; + tmp = (uint16_t)(int8_t)(x >> 8); + ret |= tmp << 16; + tmp = (uint16_t)(int8_t)(x >> 16); + ret |= tmp << 32; + tmp = (uint16_t)(int8_t)(x >> 24); + ret |= tmp << 48; + return ret; +} + +uint64_t HELPER(neon_widen_u16)(uint32_t x) +{ + uint64_t high = (uint16_t)(x >> 16); + return ((uint16_t)x) | (high << 32); +} + +uint64_t HELPER(neon_widen_s16)(uint32_t x) +{ + uint64_t high = (int16_t)(x >> 16); + return ((uint32_t)(int16_t)x) | (high << 32); +} + +uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) +{ + uint64_t mask; + mask = (a ^ b) & 0x8000800080008000ull; + a &= ~0x8000800080008000ull; + b &= ~0x8000800080008000ull; + return (a + b) ^ mask; +} + +uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) +{ + uint64_t mask; + mask = (a ^ b) & 0x8000000080000000ull; + a &= ~0x8000000080000000ull; + b &= ~0x8000000080000000ull; + return (a + b) ^ mask; +} + +uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) +{ + uint64_t tmp; + uint64_t tmp2; + + tmp = a & 0x0000ffff0000ffffull; + tmp += (a >> 16) & 0x0000ffff0000ffffull; + tmp2 = b & 0xffff0000ffff0000ull; + tmp2 += (b << 16) & 0xffff0000ffff0000ull; + return ( tmp & 0xffff) + | ((tmp >> 16) & 0xffff0000ull) + | ((tmp2 << 16) & 0xffff00000000ull) + | ( tmp2 & 0xffff000000000000ull); +} + +uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) +{ + uint32_t low = a + (a >> 32); + uint32_t high = b + (b >> 32); + return low + ((uint64_t)high << 32); +} + +uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) +{ + uint64_t mask; + mask = (a ^ ~b) & 0x8000800080008000ull; + a |= 0x8000800080008000ull; + b &= ~0x8000800080008000ull; + return (a - b) ^ mask; +} + +uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) +{ + uint64_t mask; + mask = (a ^ ~b) & 0x8000000080000000ull; + a |= 0x8000000080000000ull; + b &= ~0x8000000080000000ull; + return (a - b) ^ mask; +} + +uint64_t HELPER(neon_addl_saturate_s32)(CPUARMState *env, uint64_t a, uint64_t b) +{ + uint32_t x, y; + uint32_t low, high; + + x = a; + y = b; + low = x + y; + if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) { + SET_QC(); + low = ((int32_t)x >> 31) ^ ~SIGNBIT; + } + x = a >> 32; + y = b >> 32; + high = x + y; + if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) { + SET_QC(); + high = ((int32_t)x >> 31) ^ ~SIGNBIT; + } + return low | ((uint64_t)high << 32); +} + +uint64_t HELPER(neon_addl_saturate_s64)(CPUARMState *env, uint64_t a, uint64_t b) +{ + uint64_t result; + + result = a + b; + if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) { + SET_QC(); + result = ((int64_t)a >> 63) ^ ~SIGNBIT64; + } + return result; +} + +/* We have to do the arithmetic in a larger type than + * the input type, because for example with a signed 32 bit + * op the absolute difference can overflow a signed 32 bit value. + */ +#define DO_ABD(dest, x, y, intype, arithtype) do { \ + arithtype tmp_x = (intype)(x); \ + arithtype tmp_y = (intype)(y); \ + dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ + } while(0) + +uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) +{ + uint64_t tmp; + uint64_t result; + DO_ABD(result, a, b, uint8_t, uint32_t); + DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t); + result |= tmp << 16; + DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t); + result |= tmp << 32; + DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t); + result |= tmp << 48; + return result; +} + +uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) +{ + uint64_t tmp; + uint64_t result; + DO_ABD(result, a, b, int8_t, int32_t); + DO_ABD(tmp, a >> 8, b >> 8, int8_t, int32_t); + result |= tmp << 16; + DO_ABD(tmp, a >> 16, b >> 16, int8_t, int32_t); + result |= tmp << 32; + DO_ABD(tmp, a >> 24, b >> 24, int8_t, int32_t); + result |= tmp << 48; + return result; +} + +uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) +{ + uint64_t tmp; + uint64_t result; + DO_ABD(result, a, b, uint16_t, uint32_t); + DO_ABD(tmp, a >> 16, b >> 16, uint16_t, uint32_t); + return result | (tmp << 32); +} + +uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) +{ + uint64_t tmp; + uint64_t result; + DO_ABD(result, a, b, int16_t, int32_t); + DO_ABD(tmp, a >> 16, b >> 16, int16_t, int32_t); + return result | (tmp << 32); +} + +uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) +{ + uint64_t result; + DO_ABD(result, a, b, uint32_t, uint64_t); + return result; +} + +uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) +{ + uint64_t result; + DO_ABD(result, a, b, int32_t, int64_t); + return result; +} +#undef DO_ABD + +/* Widening multiply. Named type is the source type. */ +#define DO_MULL(dest, x, y, type1, type2) do { \ + type1 tmp_x = x; \ + type1 tmp_y = y; \ + dest = (type2)((type2)tmp_x * (type2)tmp_y); \ + } while(0) + +uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b) +{ + uint64_t tmp; + uint64_t result; + + DO_MULL(result, a, b, uint8_t, uint16_t); + DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t); + result |= tmp << 16; + DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t); + result |= tmp << 32; + DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t); + result |= tmp << 48; + return result; +} + +uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b) +{ + uint64_t tmp; + uint64_t result; + + DO_MULL(result, a, b, int8_t, uint16_t); + DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t); + result |= tmp << 16; + DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t); + result |= tmp << 32; + DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t); + result |= tmp << 48; + return result; +} + +uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b) +{ + uint64_t tmp; + uint64_t result; + + DO_MULL(result, a, b, uint16_t, uint32_t); + DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t); + return result | (tmp << 32); +} + +uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b) +{ + uint64_t tmp; + uint64_t result; + + DO_MULL(result, a, b, int16_t, uint32_t); + DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t); + return result | (tmp << 32); +} + +uint64_t HELPER(neon_negl_u16)(uint64_t x) +{ + uint16_t tmp; + uint64_t result; + result = (uint16_t)-x; + tmp = -(x >> 16); + result |= (uint64_t)tmp << 16; + tmp = -(x >> 32); + result |= (uint64_t)tmp << 32; + tmp = -(x >> 48); + result |= (uint64_t)tmp << 48; + return result; +} + +uint64_t HELPER(neon_negl_u32)(uint64_t x) +{ + uint32_t low = -x; + uint32_t high = -(x >> 32); + return low | ((uint64_t)high << 32); +} + +/* Saturating sign manipulation. */ +/* ??? Make these use NEON_VOP1 */ +#define DO_QABS8(x) do { \ + if (x == (int8_t)0x80) { \ + x = 0x7f; \ + SET_QC(); \ + } else if (x < 0) { \ + x = -x; \ + }} while (0) +uint32_t HELPER(neon_qabs_s8)(CPUARMState *env, uint32_t x) +{ + neon_s8 vec; + NEON_UNPACK(neon_s8, vec, x); + DO_QABS8(vec.v1); + DO_QABS8(vec.v2); + DO_QABS8(vec.v3); + DO_QABS8(vec.v4); + NEON_PACK(neon_s8, x, vec); + return x; +} +#undef DO_QABS8 + +#define DO_QNEG8(x) do { \ + if (x == (int8_t)0x80) { \ + x = 0x7f; \ + SET_QC(); \ + } else { \ + x = -x; \ + }} while (0) +uint32_t HELPER(neon_qneg_s8)(CPUARMState *env, uint32_t x) +{ + neon_s8 vec; + NEON_UNPACK(neon_s8, vec, x); + DO_QNEG8(vec.v1); + DO_QNEG8(vec.v2); + DO_QNEG8(vec.v3); + DO_QNEG8(vec.v4); + NEON_PACK(neon_s8, x, vec); + return x; +} +#undef DO_QNEG8 + +#define DO_QABS16(x) do { \ + if (x == (int16_t)0x8000) { \ + x = 0x7fff; \ + SET_QC(); \ + } else if (x < 0) { \ + x = -x; \ + }} while (0) +uint32_t HELPER(neon_qabs_s16)(CPUARMState *env, uint32_t x) +{ + neon_s16 vec; + NEON_UNPACK(neon_s16, vec, x); + DO_QABS16(vec.v1); + DO_QABS16(vec.v2); + NEON_PACK(neon_s16, x, vec); + return x; +} +#undef DO_QABS16 + +#define DO_QNEG16(x) do { \ + if (x == (int16_t)0x8000) { \ + x = 0x7fff; \ + SET_QC(); \ + } else { \ + x = -x; \ + }} while (0) +uint32_t HELPER(neon_qneg_s16)(CPUARMState *env, uint32_t x) +{ + neon_s16 vec; + NEON_UNPACK(neon_s16, vec, x); + DO_QNEG16(vec.v1); + DO_QNEG16(vec.v2); + NEON_PACK(neon_s16, x, vec); + return x; +} +#undef DO_QNEG16 + +uint32_t HELPER(neon_qabs_s32)(CPUARMState *env, uint32_t x) +{ + if (x == SIGNBIT) { + SET_QC(); + x = ~SIGNBIT; + } else if ((int32_t)x < 0) { + x = -x; + } + return x; +} + +uint32_t HELPER(neon_qneg_s32)(CPUARMState *env, uint32_t x) +{ + if (x == SIGNBIT) { + SET_QC(); + x = ~SIGNBIT; + } else { + x = -x; + } + return x; +} + +uint64_t HELPER(neon_qabs_s64)(CPUARMState *env, uint64_t x) +{ + if (x == SIGNBIT64) { + SET_QC(); + x = ~SIGNBIT64; + } else if ((int64_t)x < 0) { + x = -x; + } + return x; +} + +uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x) +{ + if (x == SIGNBIT64) { + SET_QC(); + x = ~SIGNBIT64; + } else { + x = -x; + } + return x; +} + +/* NEON Float helpers. */ +uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float32 f0 = make_float32(a); + float32 f1 = make_float32(b); + return float32_val(float32_abs(float32_sub(f0, f1, fpst))); +} + +/* Floating point comparisons produce an integer result. + * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do. + * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires. + */ +uint32_t HELPER(neon_ceq_f32)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float32_eq_quiet(make_float32(a), make_float32(b), fpst); +} + +uint32_t HELPER(neon_cge_f32)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float32_le(make_float32(b), make_float32(a), fpst); +} + +uint32_t HELPER(neon_cgt_f32)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float32_lt(make_float32(b), make_float32(a), fpst); +} + +uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float32 f0 = float32_abs(make_float32(a)); + float32 f1 = float32_abs(make_float32(b)); + return -float32_le(f1, f0, fpst); +} + +uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float32 f0 = float32_abs(make_float32(a)); + float32 f1 = float32_abs(make_float32(b)); + return -float32_lt(f1, f0, fpst); +} + +uint64_t HELPER(neon_acge_f64)(uint64_t a, uint64_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float64 f0 = float64_abs(make_float64(a)); + float64 f1 = float64_abs(make_float64(b)); + return -float64_le(f1, f0, fpst); +} + +uint64_t HELPER(neon_acgt_f64)(uint64_t a, uint64_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float64 f0 = float64_abs(make_float64(a)); + float64 f1 = float64_abs(make_float64(b)); + return -float64_lt(f1, f0, fpst); +} + +#define ELEM(V, N, SIZE) (((V) >> ((N) * (SIZE))) & ((1ull << (SIZE)) - 1)) + +void HELPER(neon_qunzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zd0, 2, 8) << 8) + | (ELEM(zd0, 4, 8) << 16) | (ELEM(zd0, 6, 8) << 24) + | (ELEM(zd1, 0, 8) << 32) | (ELEM(zd1, 2, 8) << 40) + | (ELEM(zd1, 4, 8) << 48) | (ELEM(zd1, 6, 8) << 56); + uint64_t d1 = ELEM(zm0, 0, 8) | (ELEM(zm0, 2, 8) << 8) + | (ELEM(zm0, 4, 8) << 16) | (ELEM(zm0, 6, 8) << 24) + | (ELEM(zm1, 0, 8) << 32) | (ELEM(zm1, 2, 8) << 40) + | (ELEM(zm1, 4, 8) << 48) | (ELEM(zm1, 6, 8) << 56); + uint64_t m0 = ELEM(zd0, 1, 8) | (ELEM(zd0, 3, 8) << 8) + | (ELEM(zd0, 5, 8) << 16) | (ELEM(zd0, 7, 8) << 24) + | (ELEM(zd1, 1, 8) << 32) | (ELEM(zd1, 3, 8) << 40) + | (ELEM(zd1, 5, 8) << 48) | (ELEM(zd1, 7, 8) << 56); + uint64_t m1 = ELEM(zm0, 1, 8) | (ELEM(zm0, 3, 8) << 8) + | (ELEM(zm0, 5, 8) << 16) | (ELEM(zm0, 7, 8) << 24) + | (ELEM(zm1, 1, 8) << 32) | (ELEM(zm1, 3, 8) << 40) + | (ELEM(zm1, 5, 8) << 48) | (ELEM(zm1, 7, 8) << 56); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_qunzip16)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zd0, 2, 16) << 16) + | (ELEM(zd1, 0, 16) << 32) | (ELEM(zd1, 2, 16) << 48); + uint64_t d1 = ELEM(zm0, 0, 16) | (ELEM(zm0, 2, 16) << 16) + | (ELEM(zm1, 0, 16) << 32) | (ELEM(zm1, 2, 16) << 48); + uint64_t m0 = ELEM(zd0, 1, 16) | (ELEM(zd0, 3, 16) << 16) + | (ELEM(zd1, 1, 16) << 32) | (ELEM(zd1, 3, 16) << 48); + uint64_t m1 = ELEM(zm0, 1, 16) | (ELEM(zm0, 3, 16) << 16) + | (ELEM(zm1, 1, 16) << 32) | (ELEM(zm1, 3, 16) << 48); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_qunzip32)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zd1, 0, 32) << 32); + uint64_t d1 = ELEM(zm0, 0, 32) | (ELEM(zm1, 0, 32) << 32); + uint64_t m0 = ELEM(zd0, 1, 32) | (ELEM(zd1, 1, 32) << 32); + uint64_t m1 = ELEM(zm0, 1, 32) | (ELEM(zm1, 1, 32) << 32); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_unzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm = float64_val(env->vfp.regs[rm]); + uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zd, 2, 8) << 8) + | (ELEM(zd, 4, 8) << 16) | (ELEM(zd, 6, 8) << 24) + | (ELEM(zm, 0, 8) << 32) | (ELEM(zm, 2, 8) << 40) + | (ELEM(zm, 4, 8) << 48) | (ELEM(zm, 6, 8) << 56); + uint64_t m0 = ELEM(zd, 1, 8) | (ELEM(zd, 3, 8) << 8) + | (ELEM(zd, 5, 8) << 16) | (ELEM(zd, 7, 8) << 24) + | (ELEM(zm, 1, 8) << 32) | (ELEM(zm, 3, 8) << 40) + | (ELEM(zm, 5, 8) << 48) | (ELEM(zm, 7, 8) << 56); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rd] = make_float64(d0); +} + +void HELPER(neon_unzip16)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm = float64_val(env->vfp.regs[rm]); + uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zd, 2, 16) << 16) + | (ELEM(zm, 0, 16) << 32) | (ELEM(zm, 2, 16) << 48); + uint64_t m0 = ELEM(zd, 1, 16) | (ELEM(zd, 3, 16) << 16) + | (ELEM(zm, 1, 16) << 32) | (ELEM(zm, 3, 16) << 48); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rd] = make_float64(d0); +} + +void HELPER(neon_qzip8)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8) + | (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24) + | (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40) + | (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56); + uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8) + | (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24) + | (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40) + | (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56); + uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8) + | (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24) + | (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40) + | (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56); + uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8) + | (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24) + | (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40) + | (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_qzip16)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16) + | (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48); + uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16) + | (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48); + uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16) + | (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48); + uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16) + | (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_qzip32)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm0 = float64_val(env->vfp.regs[rm]); + uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]); + uint64_t zd0 = float64_val(env->vfp.regs[rd]); + uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]); + uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32); + uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32); + uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32); + uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rm + 1] = make_float64(m1); + env->vfp.regs[rd] = make_float64(d0); + env->vfp.regs[rd + 1] = make_float64(d1); +} + +void HELPER(neon_zip8)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm = float64_val(env->vfp.regs[rm]); + uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8) + | (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24) + | (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40) + | (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56); + uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8) + | (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24) + | (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40) + | (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rd] = make_float64(d0); +} + +void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + uint64_t zm = float64_val(env->vfp.regs[rm]); + uint64_t zd = float64_val(env->vfp.regs[rd]); + uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16) + | (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48); + uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16) + | (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48); + env->vfp.regs[rm] = make_float64(m0); + env->vfp.regs[rd] = make_float64(d0); +} + +/* Helper function for 64 bit polynomial multiply case: + * perform PolynomialMult(op1, op2) and return either the top or + * bottom half of the 128 bit result. + */ +uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + for (bitnum = 0; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 << bitnum; + } + } + return res; +} +uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + /* bit 0 of op1 can't influence the high 64 bits at all */ + for (bitnum = 1; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 >> (64 - bitnum); + } + } + return res; +} diff --git a/target-arm/op_addsub.h b/target-arm/op_addsub.h new file mode 100644 index 00000000..ca4a1893 --- /dev/null +++ b/target-arm/op_addsub.h @@ -0,0 +1,103 @@ +/* + * ARMv6 integer SIMD operations. + * + * Copyright (c) 2007 CodeSourcery. + * Written by Paul Brook + * + * This code is licensed under the GPL. + */ + +#ifdef ARITH_GE +#define GE_ARG , void *gep +#define DECLARE_GE uint32_t ge = 0 +#define SET_GE *(uint32_t *)gep = ge +#else +#define GE_ARG +#define DECLARE_GE do{}while(0) +#define SET_GE do{}while(0) +#endif + +#define RESULT(val, n, width) \ + res |= ((uint32_t)(glue(glue(uint,width),_t))(val)) << (n * width) + +uint32_t HELPER(glue(PFX,add16))(uint32_t a, uint32_t b GE_ARG) +{ + uint32_t res = 0; + DECLARE_GE; + + ADD16(a, b, 0); + ADD16(a >> 16, b >> 16, 1); + SET_GE; + return res; +} + +uint32_t HELPER(glue(PFX,add8))(uint32_t a, uint32_t b GE_ARG) +{ + uint32_t res = 0; + DECLARE_GE; + + ADD8(a, b, 0); + ADD8(a >> 8, b >> 8, 1); + ADD8(a >> 16, b >> 16, 2); + ADD8(a >> 24, b >> 24, 3); + SET_GE; + return res; +} + +uint32_t HELPER(glue(PFX,sub16))(uint32_t a, uint32_t b GE_ARG) +{ + uint32_t res = 0; + DECLARE_GE; + + SUB16(a, b, 0); + SUB16(a >> 16, b >> 16, 1); + SET_GE; + return res; +} + +uint32_t HELPER(glue(PFX,sub8))(uint32_t a, uint32_t b GE_ARG) +{ + uint32_t res = 0; + DECLARE_GE; + + SUB8(a, b, 0); + SUB8(a >> 8, b >> 8, 1); + SUB8(a >> 16, b >> 16, 2); + SUB8(a >> 24, b >> 24, 3); + SET_GE; + return res; +} + +uint32_t HELPER(glue(PFX,subaddx))(uint32_t a, uint32_t b GE_ARG) +{ + uint32_t res = 0; + DECLARE_GE; + + ADD16(a, b >> 16, 0); + SUB16(a >> 16, b, 1); + SET_GE; + return res; +} + +uint32_t HELPER(glue(PFX,addsubx))(uint32_t a, uint32_t b GE_ARG) +{ + uint32_t res = 0; + DECLARE_GE; + + SUB16(a, b >> 16, 0); + ADD16(a >> 16, b, 1); + SET_GE; + return res; +} + +#undef GE_ARG +#undef DECLARE_GE +#undef SET_GE +#undef RESULT + +#undef ARITH_GE +#undef PFX +#undef ADD16 +#undef SUB16 +#undef ADD8 +#undef SUB8 diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c new file mode 100644 index 00000000..663c05d1 --- /dev/null +++ b/target-arm/op_helper.c @@ -0,0 +1,971 @@ +/* + * ARM helper routines + * + * Copyright (c) 2005-2007 CodeSourcery, LLC + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#include "cpu.h" +#include "exec/helper-proto.h" +#include "internals.h" +#include "exec/cpu_ldst.h" + +#define SIGNBIT (uint32_t)0x80000000 +#define SIGNBIT64 ((uint64_t)1 << 63) + +static void raise_exception(CPUARMState *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el) +{ + CPUState *cs = CPU(arm_env_get_cpu(env)); + + assert(!excp_is_internal(excp)); + cs->exception_index = excp; + env->exception.syndrome = syndrome; + env->exception.target_el = target_el; + cpu_loop_exit(cs); +} + +static int exception_target_el(CPUARMState *env) +{ + int target_el = MAX(1, arm_current_el(env)); + + /* No such thing as secure EL1 if EL3 is aarch32, so update the target EL + * to EL3 in this case. + */ + if (arm_is_secure(env) && !arm_el_is_aa64(env, 3) && target_el == 1) { + target_el = 3; + } + + return target_el; +} + +uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def, + uint32_t rn, uint32_t maxindex) +{ + uint32_t val; + uint32_t tmp; + int index; + int shift; + uint64_t *table; + table = (uint64_t *)&env->vfp.regs[rn]; + val = 0; + for (shift = 0; shift < 32; shift += 8) { + index = (ireg >> shift) & 0xff; + if (index < maxindex) { + tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff; + val |= tmp << shift; + } else { + val |= def & (0xff << shift); + } + } + return val; +} + +#if !defined(CONFIG_USER_ONLY) + +/* try to fill the TLB and return an exception if error. If retaddr is + * NULL, it means that the function was called in C code (i.e. not + * from generated code or from helper.c) + */ +void tlb_fill(CPUState *cs, target_ulong addr, int is_write, int mmu_idx, + uintptr_t retaddr) +{ + bool ret; + uint32_t fsr = 0; + + ret = arm_tlb_fill(cs, addr, is_write, mmu_idx, &fsr); + if (unlikely(ret)) { + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint32_t syn, exc; + bool same_el = (arm_current_el(env) != 0); + + if (retaddr) { + /* now we have a real cpu fault */ + cpu_restore_state(cs, retaddr); + } + + /* AArch64 syndrome does not have an LPAE bit */ + syn = fsr & ~(1 << 9); + + /* For insn and data aborts we assume there is no instruction syndrome + * information; this is always true for exceptions reported to EL1. + */ + if (is_write == 2) { + syn = syn_insn_abort(same_el, 0, 0, syn); + exc = EXCP_PREFETCH_ABORT; + } else { + syn = syn_data_abort(same_el, 0, 0, 0, is_write == 1, syn); + if (is_write == 1 && arm_feature(env, ARM_FEATURE_V6)) { + fsr |= (1 << 11); + } + exc = EXCP_DATA_ABORT; + } + + env->exception.vaddress = addr; + env->exception.fsr = fsr; + raise_exception(env, exc, syn, exception_target_el(env)); + } +} +#endif + +uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) + env->QF = 1; + return res; +} + +uint32_t HELPER(add_saturate)(CPUARMState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) { + env->QF = 1; + res = ~(((int32_t)a >> 31) ^ SIGNBIT); + } + return res; +} + +uint32_t HELPER(sub_saturate)(CPUARMState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) { + env->QF = 1; + res = ~(((int32_t)a >> 31) ^ SIGNBIT); + } + return res; +} + +uint32_t HELPER(double_saturate)(CPUARMState *env, int32_t val) +{ + uint32_t res; + if (val >= 0x40000000) { + res = ~SIGNBIT; + env->QF = 1; + } else if (val <= (int32_t)0xc0000000) { + res = SIGNBIT; + env->QF = 1; + } else { + res = val << 1; + } + return res; +} + +uint32_t HELPER(add_usaturate)(CPUARMState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (res < a) { + env->QF = 1; + res = ~0; + } + return res; +} + +uint32_t HELPER(sub_usaturate)(CPUARMState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (res > a) { + env->QF = 1; + res = 0; + } + return res; +} + +/* Signed saturation. */ +static inline uint32_t do_ssat(CPUARMState *env, int32_t val, int shift) +{ + int32_t top; + uint32_t mask; + + top = val >> shift; + mask = (1u << shift) - 1; + if (top > 0) { + env->QF = 1; + return mask; + } else if (top < -1) { + env->QF = 1; + return ~mask; + } + return val; +} + +/* Unsigned saturation. */ +static inline uint32_t do_usat(CPUARMState *env, int32_t val, int shift) +{ + uint32_t max; + + max = (1u << shift) - 1; + if (val < 0) { + env->QF = 1; + return 0; + } else if (val > max) { + env->QF = 1; + return max; + } + return val; +} + +/* Signed saturate. */ +uint32_t HELPER(ssat)(CPUARMState *env, uint32_t x, uint32_t shift) +{ + return do_ssat(env, x, shift); +} + +/* Dual halfword signed saturate. */ +uint32_t HELPER(ssat16)(CPUARMState *env, uint32_t x, uint32_t shift) +{ + uint32_t res; + + res = (uint16_t)do_ssat(env, (int16_t)x, shift); + res |= do_ssat(env, ((int32_t)x) >> 16, shift) << 16; + return res; +} + +/* Unsigned saturate. */ +uint32_t HELPER(usat)(CPUARMState *env, uint32_t x, uint32_t shift) +{ + return do_usat(env, x, shift); +} + +/* Dual halfword unsigned saturate. */ +uint32_t HELPER(usat16)(CPUARMState *env, uint32_t x, uint32_t shift) +{ + uint32_t res; + + res = (uint16_t)do_usat(env, (int16_t)x, shift); + res |= do_usat(env, ((int32_t)x) >> 16, shift) << 16; + return res; +} + +/* Function checks whether WFx (WFI/WFE) instructions are set up to be trapped. + * The function returns the target EL (1-3) if the instruction is to be trapped; + * otherwise it returns 0 indicating it is not trapped. + */ +static inline int check_wfx_trap(CPUARMState *env, bool is_wfe) +{ + int cur_el = arm_current_el(env); + uint64_t mask; + + /* If we are currently in EL0 then we need to check if SCTLR is set up for + * WFx instructions being trapped to EL1. These trap bits don't exist in v7. + */ + if (cur_el < 1 && arm_feature(env, ARM_FEATURE_V8)) { + int target_el; + + mask = is_wfe ? SCTLR_nTWE : SCTLR_nTWI; + if (arm_is_secure_below_el3(env) && !arm_el_is_aa64(env, 3)) { + /* Secure EL0 and Secure PL1 is at EL3 */ + target_el = 3; + } else { + target_el = 1; + } + + if (!(env->cp15.sctlr_el[target_el] & mask)) { + return target_el; + } + } + + /* We are not trapping to EL1; trap to EL2 if HCR_EL2 requires it + * No need for ARM_FEATURE check as if HCR_EL2 doesn't exist the + * bits will be zero indicating no trap. + */ + if (cur_el < 2 && !arm_is_secure(env)) { + mask = (is_wfe) ? HCR_TWE : HCR_TWI; + if (env->cp15.hcr_el2 & mask) { + return 2; + } + } + + /* We are not trapping to EL1 or EL2; trap to EL3 if SCR_EL3 requires it */ + if (cur_el < 3) { + mask = (is_wfe) ? SCR_TWE : SCR_TWI; + if (env->cp15.scr_el3 & mask) { + return 3; + } + } + + return 0; +} + +void HELPER(wfi)(CPUARMState *env) +{ + CPUState *cs = CPU(arm_env_get_cpu(env)); + int target_el = check_wfx_trap(env, false); + + if (cpu_has_work(cs)) { + /* Don't bother to go into our "low power state" if + * we would just wake up immediately. + */ + return; + } + + if (target_el) { + env->pc -= 4; + raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0), target_el); + } + + cs->exception_index = EXCP_HLT; + cs->halted = 1; + cpu_loop_exit(cs); +} + +void HELPER(wfe)(CPUARMState *env) +{ + /* This is a hint instruction that is semantically different + * from YIELD even though we currently implement it identically. + * Don't actually halt the CPU, just yield back to top + * level loop. This is not going into a "low power state" + * (ie halting until some event occurs), so we never take + * a configurable trap to a different exception level. + */ + HELPER(yield)(env); +} + +void HELPER(yield)(CPUARMState *env) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = CPU(cpu); + + /* This is a non-trappable hint instruction that generally indicates + * that the guest is currently busy-looping. Yield control back to the + * top level loop so that a more deserving VCPU has a chance to run. + */ + cs->exception_index = EXCP_YIELD; + cpu_loop_exit(cs); +} + +/* Raise an internal-to-QEMU exception. This is limited to only + * those EXCP values which are special cases for QEMU to interrupt + * execution and not to be used for exceptions which are passed to + * the guest (those must all have syndrome information and thus should + * use exception_with_syndrome). + */ +void HELPER(exception_internal)(CPUARMState *env, uint32_t excp) +{ + CPUState *cs = CPU(arm_env_get_cpu(env)); + + assert(excp_is_internal(excp)); + cs->exception_index = excp; + cpu_loop_exit(cs); +} + +/* Raise an exception with the specified syndrome register value */ +void HELPER(exception_with_syndrome)(CPUARMState *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el) +{ + raise_exception(env, excp, syndrome, target_el); +} + +uint32_t HELPER(cpsr_read)(CPUARMState *env) +{ + return cpsr_read(env) & ~(CPSR_EXEC | CPSR_RESERVED); +} + +void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask) +{ + cpsr_write(env, val, mask); +} + +/* Access to user mode registers from privileged modes. */ +uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno) +{ + uint32_t val; + + if (regno == 13) { + val = env->banked_r13[0]; + } else if (regno == 14) { + val = env->banked_r14[0]; + } else if (regno >= 8 + && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) { + val = env->usr_regs[regno - 8]; + } else { + val = env->regs[regno]; + } + return val; +} + +void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val) +{ + if (regno == 13) { + env->banked_r13[0] = val; + } else if (regno == 14) { + env->banked_r14[0] = val; + } else if (regno >= 8 + && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) { + env->usr_regs[regno - 8] = val; + } else { + env->regs[regno] = val; + } +} + +void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip, uint32_t syndrome) +{ + const ARMCPRegInfo *ri = rip; + int target_el; + + if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14 + && extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) { + raise_exception(env, EXCP_UDEF, syndrome, exception_target_el(env)); + } + + if (!ri->accessfn) { + return; + } + + switch (ri->accessfn(env, ri)) { + case CP_ACCESS_OK: + return; + case CP_ACCESS_TRAP: + target_el = exception_target_el(env); + break; + case CP_ACCESS_TRAP_EL2: + /* Requesting a trap to EL2 when we're in EL3 or S-EL0/1 is + * a bug in the access function. + */ + assert(!arm_is_secure(env) && arm_current_el(env) != 3); + target_el = 2; + break; + case CP_ACCESS_TRAP_EL3: + target_el = 3; + break; + case CP_ACCESS_TRAP_UNCATEGORIZED: + target_el = exception_target_el(env); + syndrome = syn_uncategorized(); + break; + default: + g_assert_not_reached(); + } + + raise_exception(env, EXCP_UDEF, syndrome, target_el); +} + +void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value) +{ + const ARMCPRegInfo *ri = rip; + + ri->writefn(env, ri, value); +} + +uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip) +{ + const ARMCPRegInfo *ri = rip; + + return ri->readfn(env, ri); +} + +void HELPER(set_cp_reg64)(CPUARMState *env, void *rip, uint64_t value) +{ + const ARMCPRegInfo *ri = rip; + + ri->writefn(env, ri, value); +} + +uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip) +{ + const ARMCPRegInfo *ri = rip; + + return ri->readfn(env, ri); +} + +void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm) +{ + /* MSR_i to update PSTATE. This is OK from EL0 only if UMA is set. + * Note that SPSel is never OK from EL0; we rely on handle_msr_i() + * to catch that case at translate time. + */ + if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UMA)) { + uint32_t syndrome = syn_aa64_sysregtrap(0, extract32(op, 0, 3), + extract32(op, 3, 3), 4, + imm, 0x1f, 0); + raise_exception(env, EXCP_UDEF, syndrome, exception_target_el(env)); + } + + switch (op) { + case 0x05: /* SPSel */ + update_spsel(env, imm); + break; + case 0x1e: /* DAIFSet */ + env->daif |= (imm << 6) & PSTATE_DAIF; + break; + case 0x1f: /* DAIFClear */ + env->daif &= ~((imm << 6) & PSTATE_DAIF); + break; + default: + g_assert_not_reached(); + } +} + +void HELPER(clear_pstate_ss)(CPUARMState *env) +{ + env->pstate &= ~PSTATE_SS; +} + +void HELPER(pre_hvc)(CPUARMState *env) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int cur_el = arm_current_el(env); + /* FIXME: Use actual secure state. */ + bool secure = false; + bool undef; + + if (arm_is_psci_call(cpu, EXCP_HVC)) { + /* If PSCI is enabled and this looks like a valid PSCI call then + * that overrides the architecturally mandated HVC behaviour. + */ + return; + } + + if (!arm_feature(env, ARM_FEATURE_EL2)) { + /* If EL2 doesn't exist, HVC always UNDEFs */ + undef = true; + } else if (arm_feature(env, ARM_FEATURE_EL3)) { + /* EL3.HCE has priority over EL2.HCD. */ + undef = !(env->cp15.scr_el3 & SCR_HCE); + } else { + undef = env->cp15.hcr_el2 & HCR_HCD; + } + + /* In ARMv7 and ARMv8/AArch32, HVC is undef in secure state. + * For ARMv8/AArch64, HVC is allowed in EL3. + * Note that we've already trapped HVC from EL0 at translation + * time. + */ + if (secure && (!is_a64(env) || cur_el == 1)) { + undef = true; + } + + if (undef) { + raise_exception(env, EXCP_UDEF, syn_uncategorized(), + exception_target_el(env)); + } +} + +void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome) +{ + ARMCPU *cpu = arm_env_get_cpu(env); + int cur_el = arm_current_el(env); + bool secure = arm_is_secure(env); + bool smd = env->cp15.scr_el3 & SCR_SMD; + /* On ARMv8 AArch32, SMD only applies to NS state. + * On ARMv7 SMD only applies to NS state and only if EL2 is available. + * For ARMv7 non EL2, we force SMD to zero so we don't need to re-check + * the EL2 condition here. + */ + bool undef = is_a64(env) ? smd : (!secure && smd); + + if (arm_is_psci_call(cpu, EXCP_SMC)) { + /* If PSCI is enabled and this looks like a valid PSCI call then + * that overrides the architecturally mandated SMC behaviour. + */ + return; + } + + if (!arm_feature(env, ARM_FEATURE_EL3)) { + /* If we have no EL3 then SMC always UNDEFs */ + undef = true; + } else if (!secure && cur_el == 1 && (env->cp15.hcr_el2 & HCR_TSC)) { + /* In NS EL1, HCR controlled routing to EL2 has priority over SMD. */ + raise_exception(env, EXCP_HYP_TRAP, syndrome, 2); + } + + if (undef) { + raise_exception(env, EXCP_UDEF, syn_uncategorized(), + exception_target_el(env)); + } +} + +void HELPER(exception_return)(CPUARMState *env) +{ + int cur_el = arm_current_el(env); + unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); + uint32_t spsr = env->banked_spsr[spsr_idx]; + int new_el; + + aarch64_save_sp(env, cur_el); + + env->exclusive_addr = -1; + + /* We must squash the PSTATE.SS bit to zero unless both of the + * following hold: + * 1. debug exceptions are currently disabled + * 2. singlestep will be active in the EL we return to + * We check 1 here and 2 after we've done the pstate/cpsr write() to + * transition to the EL we're going to. + */ + if (arm_generate_debug_exceptions(env)) { + spsr &= ~PSTATE_SS; + } + + if (spsr & PSTATE_nRW) { + /* TODO: We currently assume EL1/2/3 are running in AArch64. */ + env->aarch64 = 0; + new_el = 0; + env->uncached_cpsr = 0x10; + cpsr_write(env, spsr, ~0); + if (!arm_singlestep_active(env)) { + env->uncached_cpsr &= ~PSTATE_SS; + } + aarch64_sync_64_to_32(env); + + env->regs[15] = env->elr_el[1] & ~0x1; + } else { + new_el = extract32(spsr, 2, 2); + if (new_el > cur_el + || (new_el == 2 && !arm_feature(env, ARM_FEATURE_EL2))) { + /* Disallow return to an EL which is unimplemented or higher + * than the current one. + */ + goto illegal_return; + } + if (extract32(spsr, 1, 1)) { + /* Return with reserved M[1] bit set */ + goto illegal_return; + } + if (new_el == 0 && (spsr & PSTATE_SP)) { + /* Return to EL0 with M[0] bit set */ + goto illegal_return; + } + env->aarch64 = 1; + pstate_write(env, spsr); + if (!arm_singlestep_active(env)) { + env->pstate &= ~PSTATE_SS; + } + aarch64_restore_sp(env, new_el); + env->pc = env->elr_el[cur_el]; + } + + return; + +illegal_return: + /* Illegal return events of various kinds have architecturally + * mandated behaviour: + * restore NZCV and DAIF from SPSR_ELx + * set PSTATE.IL + * restore PC from ELR_ELx + * no change to exception level, execution state or stack pointer + */ + env->pstate |= PSTATE_IL; + env->pc = env->elr_el[cur_el]; + spsr &= PSTATE_NZCV | PSTATE_DAIF; + spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF); + pstate_write(env, spsr); + if (!arm_singlestep_active(env)) { + env->pstate &= ~PSTATE_SS; + } +} + +/* Return true if the linked breakpoint entry lbn passes its checks */ +static bool linked_bp_matches(ARMCPU *cpu, int lbn) +{ + CPUARMState *env = &cpu->env; + uint64_t bcr = env->cp15.dbgbcr[lbn]; + int brps = extract32(cpu->dbgdidr, 24, 4); + int ctx_cmps = extract32(cpu->dbgdidr, 20, 4); + int bt; + uint32_t contextidr; + + /* Links to unimplemented or non-context aware breakpoints are + * CONSTRAINED UNPREDICTABLE: either behave as if disabled, or + * as if linked to an UNKNOWN context-aware breakpoint (in which + * case DBGWCR_EL1.LBN must indicate that breakpoint). + * We choose the former. + */ + if (lbn > brps || lbn < (brps - ctx_cmps)) { + return false; + } + + bcr = env->cp15.dbgbcr[lbn]; + + if (extract64(bcr, 0, 1) == 0) { + /* Linked breakpoint disabled : generate no events */ + return false; + } + + bt = extract64(bcr, 20, 4); + + /* We match the whole register even if this is AArch32 using the + * short descriptor format (in which case it holds both PROCID and ASID), + * since we don't implement the optional v7 context ID masking. + */ + contextidr = extract64(env->cp15.contextidr_el[1], 0, 32); + + switch (bt) { + case 3: /* linked context ID match */ + if (arm_current_el(env) > 1) { + /* Context matches never fire in EL2 or (AArch64) EL3 */ + return false; + } + return (contextidr == extract64(env->cp15.dbgbvr[lbn], 0, 32)); + case 5: /* linked address mismatch (reserved in AArch64) */ + case 9: /* linked VMID match (reserved if no EL2) */ + case 11: /* linked context ID and VMID match (reserved if no EL2) */ + default: + /* Links to Unlinked context breakpoints must generate no + * events; we choose to do the same for reserved values too. + */ + return false; + } + + return false; +} + +static bool bp_wp_matches(ARMCPU *cpu, int n, bool is_wp) +{ + CPUARMState *env = &cpu->env; + uint64_t cr; + int pac, hmc, ssc, wt, lbn; + /* Note that for watchpoints the check is against the CPU security + * state, not the S/NS attribute on the offending data access. + */ + bool is_secure = arm_is_secure(env); + int access_el = arm_current_el(env); + + if (is_wp) { + CPUWatchpoint *wp = env->cpu_watchpoint[n]; + + if (!wp || !(wp->flags & BP_WATCHPOINT_HIT)) { + return false; + } + cr = env->cp15.dbgwcr[n]; + if (wp->hitattrs.user) { + /* The LDRT/STRT/LDT/STT "unprivileged access" instructions should + * match watchpoints as if they were accesses done at EL0, even if + * the CPU is at EL1 or higher. + */ + access_el = 0; + } + } else { + uint64_t pc = is_a64(env) ? env->pc : env->regs[15]; + + if (!env->cpu_breakpoint[n] || env->cpu_breakpoint[n]->pc != pc) { + return false; + } + cr = env->cp15.dbgbcr[n]; + } + /* The WATCHPOINT_HIT flag guarantees us that the watchpoint is + * enabled and that the address and access type match; for breakpoints + * we know the address matched; check the remaining fields, including + * linked breakpoints. We rely on WCR and BCR having the same layout + * for the LBN, SSC, HMC, PAC/PMC and is-linked fields. + * Note that some combinations of {PAC, HMC, SSC} are reserved and + * must act either like some valid combination or as if the watchpoint + * were disabled. We choose the former, and use this together with + * the fact that EL3 must always be Secure and EL2 must always be + * Non-Secure to simplify the code slightly compared to the full + * table in the ARM ARM. + */ + pac = extract64(cr, 1, 2); + hmc = extract64(cr, 13, 1); + ssc = extract64(cr, 14, 2); + + switch (ssc) { + case 0: + break; + case 1: + case 3: + if (is_secure) { + return false; + } + break; + case 2: + if (!is_secure) { + return false; + } + break; + } + + switch (access_el) { + case 3: + case 2: + if (!hmc) { + return false; + } + break; + case 1: + if (extract32(pac, 0, 1) == 0) { + return false; + } + break; + case 0: + if (extract32(pac, 1, 1) == 0) { + return false; + } + break; + default: + g_assert_not_reached(); + } + + wt = extract64(cr, 20, 1); + lbn = extract64(cr, 16, 4); + + if (wt && !linked_bp_matches(cpu, lbn)) { + return false; + } + + return true; +} + +static bool check_watchpoints(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + int n; + + /* If watchpoints are disabled globally or we can't take debug + * exceptions here then watchpoint firings are ignored. + */ + if (extract32(env->cp15.mdscr_el1, 15, 1) == 0 + || !arm_generate_debug_exceptions(env)) { + return false; + } + + for (n = 0; n < ARRAY_SIZE(env->cpu_watchpoint); n++) { + if (bp_wp_matches(cpu, n, true)) { + return true; + } + } + return false; +} + +static bool check_breakpoints(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + int n; + + /* If breakpoints are disabled globally or we can't take debug + * exceptions here then breakpoint firings are ignored. + */ + if (extract32(env->cp15.mdscr_el1, 15, 1) == 0 + || !arm_generate_debug_exceptions(env)) { + return false; + } + + for (n = 0; n < ARRAY_SIZE(env->cpu_breakpoint); n++) { + if (bp_wp_matches(cpu, n, false)) { + return true; + } + } + return false; +} + +void arm_debug_excp_handler(CPUState *cs) +{ + /* Called by core code when a watchpoint or breakpoint fires; + * need to check which one and raise the appropriate exception. + */ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + CPUWatchpoint *wp_hit = cs->watchpoint_hit; + + if (wp_hit) { + if (wp_hit->flags & BP_CPU) { + cs->watchpoint_hit = NULL; + if (check_watchpoints(cpu)) { + bool wnr = (wp_hit->flags & BP_WATCHPOINT_HIT_WRITE) != 0; + bool same_el = arm_debug_target_el(env) == arm_current_el(env); + + if (extended_addresses_enabled(env)) { + env->exception.fsr = (1 << 9) | 0x22; + } else { + env->exception.fsr = 0x2; + } + env->exception.vaddress = wp_hit->hitaddr; + raise_exception(env, EXCP_DATA_ABORT, + syn_watchpoint(same_el, 0, wnr), + arm_debug_target_el(env)); + } else { + cpu_resume_from_signal(cs, NULL); + } + } + } else { + if (check_breakpoints(cpu)) { + bool same_el = (arm_debug_target_el(env) == arm_current_el(env)); + if (extended_addresses_enabled(env)) { + env->exception.fsr = (1 << 9) | 0x22; + } else { + env->exception.fsr = 0x2; + } + /* FAR is UNKNOWN, so doesn't need setting */ + raise_exception(env, EXCP_PREFETCH_ABORT, + syn_breakpoint(same_el), + arm_debug_target_el(env)); + } + } +} + +/* ??? Flag setting arithmetic is awkward because we need to do comparisons. + The only way to do that in TCG is a conditional branch, which clobbers + all our temporaries. For now implement these as helper functions. */ + +/* Similarly for variable shift instructions. */ + +uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i) +{ + int shift = i & 0xff; + if (shift >= 32) { + if (shift == 32) + env->CF = x & 1; + else + env->CF = 0; + return 0; + } else if (shift != 0) { + env->CF = (x >> (32 - shift)) & 1; + return x << shift; + } + return x; +} + +uint32_t HELPER(shr_cc)(CPUARMState *env, uint32_t x, uint32_t i) +{ + int shift = i & 0xff; + if (shift >= 32) { + if (shift == 32) + env->CF = (x >> 31) & 1; + else + env->CF = 0; + return 0; + } else if (shift != 0) { + env->CF = (x >> (shift - 1)) & 1; + return x >> shift; + } + return x; +} + +uint32_t HELPER(sar_cc)(CPUARMState *env, uint32_t x, uint32_t i) +{ + int shift = i & 0xff; + if (shift >= 32) { + env->CF = (x >> 31) & 1; + return (int32_t)x >> 31; + } else if (shift != 0) { + env->CF = (x >> (shift - 1)) & 1; + return (int32_t)x >> shift; + } + return x; +} + +uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) +{ + int shift1, shift; + shift1 = i & 0xff; + shift = shift1 & 0x1f; + if (shift == 0) { + if (shift1 != 0) + env->CF = (x >> 31) & 1; + return x; + } else { + env->CF = (x >> (shift - 1)) & 1; + return ((uint32_t)x >> shift) | (x << (32 - shift)); + } +} diff --git a/target-arm/psci.c b/target-arm/psci.c new file mode 100644 index 00000000..20e4cb6f --- /dev/null +++ b/target-arm/psci.c @@ -0,0 +1,257 @@ +/* + * Copyright (C) 2014 - Linaro + * Author: Rob Herring + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#include +#include +#include +#include +#include +#include "internals.h" + +bool arm_is_psci_call(ARMCPU *cpu, int excp_type) +{ + /* Return true if the r0/x0 value indicates a PSCI call and + * the exception type matches the configured PSCI conduit. This is + * called before the SMC/HVC instruction is executed, to decide whether + * we should treat it as a PSCI call or with the architecturally + * defined behaviour for an SMC or HVC (which might be UNDEF or trap + * to EL2 or to EL3). + */ + CPUARMState *env = &cpu->env; + uint64_t param = is_a64(env) ? env->xregs[0] : env->regs[0]; + + switch (excp_type) { + case EXCP_HVC: + if (cpu->psci_conduit != QEMU_PSCI_CONDUIT_HVC) { + return false; + } + break; + case EXCP_SMC: + if (cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) { + return false; + } + break; + default: + return false; + } + + switch (param) { + case QEMU_PSCI_0_2_FN_PSCI_VERSION: + case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case QEMU_PSCI_0_2_FN_AFFINITY_INFO: + case QEMU_PSCI_0_2_FN64_AFFINITY_INFO: + case QEMU_PSCI_0_2_FN_SYSTEM_RESET: + case QEMU_PSCI_0_2_FN_SYSTEM_OFF: + case QEMU_PSCI_0_1_FN_CPU_ON: + case QEMU_PSCI_0_2_FN_CPU_ON: + case QEMU_PSCI_0_2_FN64_CPU_ON: + case QEMU_PSCI_0_1_FN_CPU_OFF: + case QEMU_PSCI_0_2_FN_CPU_OFF: + case QEMU_PSCI_0_1_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: + case QEMU_PSCI_0_1_FN_MIGRATE: + case QEMU_PSCI_0_2_FN_MIGRATE: + return true; + default: + return false; + } +} + +static CPUState *get_cpu_by_id(uint64_t id) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + ARMCPU *armcpu = ARM_CPU(cpu); + + if (armcpu->mp_affinity == id) { + return cpu; + } + } + + return NULL; +} + +void arm_handle_psci_call(ARMCPU *cpu) +{ + /* + * This function partially implements the logic for dispatching Power State + * Coordination Interface (PSCI) calls (as described in ARM DEN 0022B.b), + * to the extent required for bringing up and taking down secondary cores, + * and for handling reset and poweroff requests. + * Additional information about the calling convention used is available in + * the document 'SMC Calling Convention' (ARM DEN 0028) + */ + CPUState *cs = CPU(cpu); + CPUARMState *env = &cpu->env; + uint64_t param[4]; + uint64_t context_id, mpidr; + target_ulong entry; + int32_t ret = 0; + int i; + + for (i = 0; i < 4; i++) { + /* + * All PSCI functions take explicit 32-bit or native int sized + * arguments so we can simply zero-extend all arguments regardless + * of which exact function we are about to call. + */ + param[i] = is_a64(env) ? env->xregs[i] : env->regs[i]; + } + + if ((param[0] & QEMU_PSCI_0_2_64BIT) && !is_a64(env)) { + ret = QEMU_PSCI_RET_INVALID_PARAMS; + goto err; + } + + switch (param[0]) { + CPUState *target_cpu_state; + ARMCPU *target_cpu; + CPUClass *target_cpu_class; + + case QEMU_PSCI_0_2_FN_PSCI_VERSION: + ret = QEMU_PSCI_0_2_RET_VERSION_0_2; + break; + case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE: + ret = QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED; /* No trusted OS */ + break; + case QEMU_PSCI_0_2_FN_AFFINITY_INFO: + case QEMU_PSCI_0_2_FN64_AFFINITY_INFO: + mpidr = param[1]; + + switch (param[2]) { + case 0: + target_cpu_state = get_cpu_by_id(mpidr); + if (!target_cpu_state) { + ret = QEMU_PSCI_RET_INVALID_PARAMS; + break; + } + target_cpu = ARM_CPU(target_cpu_state); + ret = target_cpu->powered_off ? 1 : 0; + break; + default: + /* Everything above affinity level 0 is always on. */ + ret = 0; + } + break; + case QEMU_PSCI_0_2_FN_SYSTEM_RESET: + qemu_system_reset_request(); + /* QEMU reset and shutdown are async requests, but PSCI + * mandates that we never return from the reset/shutdown + * call, so power the CPU off now so it doesn't execute + * anything further. + */ + goto cpu_off; + case QEMU_PSCI_0_2_FN_SYSTEM_OFF: + qemu_system_shutdown_request(); + goto cpu_off; + case QEMU_PSCI_0_1_FN_CPU_ON: + case QEMU_PSCI_0_2_FN_CPU_ON: + case QEMU_PSCI_0_2_FN64_CPU_ON: + mpidr = param[1]; + entry = param[2]; + context_id = param[3]; + + /* change to the cpu we are powering up */ + target_cpu_state = get_cpu_by_id(mpidr); + if (!target_cpu_state) { + ret = QEMU_PSCI_RET_INVALID_PARAMS; + break; + } + target_cpu = ARM_CPU(target_cpu_state); + if (!target_cpu->powered_off) { + ret = QEMU_PSCI_RET_ALREADY_ON; + break; + } + target_cpu_class = CPU_GET_CLASS(target_cpu); + + /* Initialize the cpu we are turning on */ + cpu_reset(target_cpu_state); + target_cpu->powered_off = false; + target_cpu_state->halted = 0; + + /* + * The PSCI spec mandates that newly brought up CPUs enter the + * exception level of the caller in the same execution mode as + * the caller, with context_id in x0/r0, respectively. + * + * For now, it is sufficient to assert() that CPUs come out of + * reset in the same mode as the calling CPU, since we only + * implement EL1, which means that + * (a) there is no EL2 for the calling CPU to trap into to change + * its state + * (b) the newly brought up CPU enters EL1 immediately after coming + * out of reset in the default state + */ + assert(is_a64(env) == is_a64(&target_cpu->env)); + if (is_a64(env)) { + if (entry & 1) { + ret = QEMU_PSCI_RET_INVALID_PARAMS; + break; + } + target_cpu->env.xregs[0] = context_id; + } else { + target_cpu->env.regs[0] = context_id; + target_cpu->env.thumb = entry & 1; + } + target_cpu_class->set_pc(target_cpu_state, entry); + + ret = 0; + break; + case QEMU_PSCI_0_1_FN_CPU_OFF: + case QEMU_PSCI_0_2_FN_CPU_OFF: + goto cpu_off; + case QEMU_PSCI_0_1_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: + /* Affinity levels are not supported in QEMU */ + if (param[1] & 0xfffe0000) { + ret = QEMU_PSCI_RET_INVALID_PARAMS; + break; + } + /* Powerdown is not supported, we always go into WFI */ + if (is_a64(env)) { + env->xregs[0] = 0; + } else { + env->regs[0] = 0; + } + helper_wfi(env); + break; + case QEMU_PSCI_0_1_FN_MIGRATE: + case QEMU_PSCI_0_2_FN_MIGRATE: + ret = QEMU_PSCI_RET_NOT_SUPPORTED; + break; + default: + g_assert_not_reached(); + } + +err: + if (is_a64(env)) { + env->xregs[0] = ret; + } else { + env->regs[0] = ret; + } + return; + +cpu_off: + cpu->powered_off = true; + cs->halted = 1; + cs->exception_index = EXCP_HLT; + cpu_loop_exit(cs); + /* notreached */ +} diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c new file mode 100644 index 00000000..0f923d36 --- /dev/null +++ b/target-arm/translate-a64.c @@ -0,0 +1,11130 @@ +/* + * AArch64 translation + * + * Copyright (c) 2013 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#include +#include +#include +#include +#include + +#include "cpu.h" +#include "tcg-op.h" +#include "qemu/log.h" +#include "arm_ldst.h" +#include "translate.h" +#include "internals.h" +#include "qemu/host-utils.h" + +#include "exec/gen-icount.h" + +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" + +#include "trace-tcg.h" + +static TCGv_i64 cpu_X[32]; +static TCGv_i64 cpu_pc; + +/* Load/store exclusive handling */ +static TCGv_i64 cpu_exclusive_high; + +static const char *regnames[] = { + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" +}; + +enum a64_shift_type { + A64_SHIFT_TYPE_LSL = 0, + A64_SHIFT_TYPE_LSR = 1, + A64_SHIFT_TYPE_ASR = 2, + A64_SHIFT_TYPE_ROR = 3 +}; + +/* Table based decoder typedefs - used when the relevant bits for decode + * are too awkwardly scattered across the instruction (eg SIMD). + */ +typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); + +typedef struct AArch64DecodeTable { + uint32_t pattern; + uint32_t mask; + AArch64DecodeFn *disas_fn; +} AArch64DecodeTable; + +/* Function prototype for gen_ functions for calling Neon helpers */ +typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32); +typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32); +typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); +typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64); +typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); +typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64); +typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64); +typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); +typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); +typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); +typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); +typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32); +typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); + +/* initialize TCG globals. */ +void a64_translate_init(void) +{ + int i; + + cpu_pc = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUARMState, pc), + "pc"); + for (i = 0; i < 32; i++) { + cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUARMState, xregs[i]), + regnames[i]); + } + + cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUARMState, exclusive_high), "exclusive_high"); +} + +static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s) +{ + /* Return the mmu_idx to use for A64 "unprivileged load/store" insns: + * if EL1, access as if EL0; otherwise access at current EL + */ + switch (s->mmu_idx) { + case ARMMMUIdx_S12NSE1: + return ARMMMUIdx_S12NSE0; + case ARMMMUIdx_S1SE1: + return ARMMMUIdx_S1SE0; + case ARMMMUIdx_S2NS: + g_assert_not_reached(); + default: + return s->mmu_idx; + } +} + +void aarch64_cpu_dump_state(CPUState *cs, FILE *f, + fprintf_function cpu_fprintf, int flags) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint32_t psr = pstate_read(env); + int i; + + cpu_fprintf(f, "PC=%016"PRIx64" SP=%016"PRIx64"\n", + env->pc, env->xregs[31]); + for (i = 0; i < 31; i++) { + cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]); + if ((i % 4) == 3) { + cpu_fprintf(f, "\n"); + } else { + cpu_fprintf(f, " "); + } + } + cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n", + psr, + psr & PSTATE_N ? 'N' : '-', + psr & PSTATE_Z ? 'Z' : '-', + psr & PSTATE_C ? 'C' : '-', + psr & PSTATE_V ? 'V' : '-'); + cpu_fprintf(f, "\n"); + + if (flags & CPU_DUMP_FPU) { + int numvfpregs = 32; + for (i = 0; i < numvfpregs; i += 2) { + uint64_t vlo = float64_val(env->vfp.regs[i * 2]); + uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]); + cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ", + i, vhi, vlo); + vlo = float64_val(env->vfp.regs[(i + 1) * 2]); + vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]); + cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n", + i + 1, vhi, vlo); + } + cpu_fprintf(f, "FPCR: %08x FPSR: %08x\n", + vfp_get_fpcr(env), vfp_get_fpsr(env)); + } +} + +void gen_a64_set_pc_im(uint64_t val) +{ + tcg_gen_movi_i64(cpu_pc, val); +} + +static void gen_exception_internal(int excp) +{ + TCGv_i32 tcg_excp = tcg_const_i32(excp); + + assert(excp_is_internal(excp)); + gen_helper_exception_internal(cpu_env, tcg_excp); + tcg_temp_free_i32(tcg_excp); +} + +static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el) +{ + TCGv_i32 tcg_excp = tcg_const_i32(excp); + TCGv_i32 tcg_syn = tcg_const_i32(syndrome); + TCGv_i32 tcg_el = tcg_const_i32(target_el); + + gen_helper_exception_with_syndrome(cpu_env, tcg_excp, + tcg_syn, tcg_el); + tcg_temp_free_i32(tcg_el); + tcg_temp_free_i32(tcg_syn); + tcg_temp_free_i32(tcg_excp); +} + +static void gen_exception_internal_insn(DisasContext *s, int offset, int excp) +{ + gen_a64_set_pc_im(s->pc - offset); + gen_exception_internal(excp); + s->is_jmp = DISAS_EXC; +} + +static void gen_exception_insn(DisasContext *s, int offset, int excp, + uint32_t syndrome, uint32_t target_el) +{ + gen_a64_set_pc_im(s->pc - offset); + gen_exception(excp, syndrome, target_el); + s->is_jmp = DISAS_EXC; +} + +static void gen_ss_advance(DisasContext *s) +{ + /* If the singlestep state is Active-not-pending, advance to + * Active-pending. + */ + if (s->ss_active) { + s->pstate_ss = 0; + gen_helper_clear_pstate_ss(cpu_env); + } +} + +static void gen_step_complete_exception(DisasContext *s) +{ + /* We just completed step of an insn. Move from Active-not-pending + * to Active-pending, and then also take the swstep exception. + * This corresponds to making the (IMPDEF) choice to prioritize + * swstep exceptions over asynchronous exceptions taken to an exception + * level where debug is disabled. This choice has the advantage that + * we do not need to maintain internal state corresponding to the + * ISV/EX syndrome bits between completion of the step and generation + * of the exception, and our syndrome information is always correct. + */ + gen_ss_advance(s); + gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex), + default_exception_el(s)); + s->is_jmp = DISAS_EXC; +} + +static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest) +{ + /* No direct tb linking with singlestep (either QEMU's or the ARM + * debug architecture kind) or deterministic io + */ + if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) { + return false; + } + + /* Only link tbs from inside the same guest page */ + if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) { + return false; + } + + return true; +} + +static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) +{ + TranslationBlock *tb; + + tb = s->tb; + if (use_goto_tb(s, n, dest)) { + tcg_gen_goto_tb(n); + gen_a64_set_pc_im(dest); + tcg_gen_exit_tb((intptr_t)tb + n); + s->is_jmp = DISAS_TB_JUMP; + } else { + gen_a64_set_pc_im(dest); + if (s->ss_active) { + gen_step_complete_exception(s); + } else if (s->singlestep_enabled) { + gen_exception_internal(EXCP_DEBUG); + } else { + tcg_gen_exit_tb(0); + s->is_jmp = DISAS_TB_JUMP; + } + } +} + +static void unallocated_encoding(DisasContext *s) +{ + /* Unallocated and reserved encodings are uncategorized */ + gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), + default_exception_el(s)); +} + +#define unsupported_encoding(s, insn) \ + do { \ + qemu_log_mask(LOG_UNIMP, \ + "%s:%d: unsupported instruction encoding 0x%08x " \ + "at pc=%016" PRIx64 "\n", \ + __FILE__, __LINE__, insn, s->pc - 4); \ + unallocated_encoding(s); \ + } while (0); + +static void init_tmp_a64_array(DisasContext *s) +{ +#ifdef CONFIG_DEBUG_TCG + int i; + for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) { + TCGV_UNUSED_I64(s->tmp_a64[i]); + } +#endif + s->tmp_a64_count = 0; +} + +static void free_tmp_a64(DisasContext *s) +{ + int i; + for (i = 0; i < s->tmp_a64_count; i++) { + tcg_temp_free_i64(s->tmp_a64[i]); + } + init_tmp_a64_array(s); +} + +static TCGv_i64 new_tmp_a64(DisasContext *s) +{ + assert(s->tmp_a64_count < TMP_A64_MAX); + return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64(); +} + +static TCGv_i64 new_tmp_a64_zero(DisasContext *s) +{ + TCGv_i64 t = new_tmp_a64(s); + tcg_gen_movi_i64(t, 0); + return t; +} + +/* + * Register access functions + * + * These functions are used for directly accessing a register in where + * changes to the final register value are likely to be made. If you + * need to use a register for temporary calculation (e.g. index type + * operations) use the read_* form. + * + * B1.2.1 Register mappings + * + * In instruction register encoding 31 can refer to ZR (zero register) or + * the SP (stack pointer) depending on context. In QEMU's case we map SP + * to cpu_X[31] and ZR accesses to a temporary which can be discarded. + * This is the point of the _sp forms. + */ +static TCGv_i64 cpu_reg(DisasContext *s, int reg) +{ + if (reg == 31) { + return new_tmp_a64_zero(s); + } else { + return cpu_X[reg]; + } +} + +/* register access for when 31 == SP */ +static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) +{ + return cpu_X[reg]; +} + +/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 + * representing the register contents. This TCGv is an auto-freed + * temporary so it need not be explicitly freed, and may be modified. + */ +static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) +{ + TCGv_i64 v = new_tmp_a64(s); + if (reg != 31) { + if (sf) { + tcg_gen_mov_i64(v, cpu_X[reg]); + } else { + tcg_gen_ext32u_i64(v, cpu_X[reg]); + } + } else { + tcg_gen_movi_i64(v, 0); + } + return v; +} + +static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) +{ + TCGv_i64 v = new_tmp_a64(s); + if (sf) { + tcg_gen_mov_i64(v, cpu_X[reg]); + } else { + tcg_gen_ext32u_i64(v, cpu_X[reg]); + } + return v; +} + +/* We should have at some point before trying to access an FP register + * done the necessary access check, so assert that + * (a) we did the check and + * (b) we didn't then just plough ahead anyway if it failed. + * Print the instruction pattern in the abort message so we can figure + * out what we need to fix if a user encounters this problem in the wild. + */ +static inline void assert_fp_access_checked(DisasContext *s) +{ +#ifdef CONFIG_DEBUG_TCG + if (unlikely(!s->fp_access_checked || s->fp_excp_el)) { + fprintf(stderr, "target-arm: FP access check missing for " + "instruction 0x%08x\n", s->insn); + abort(); + } +#endif +} + +/* Return the offset into CPUARMState of an element of specified + * size, 'element' places in from the least significant end of + * the FP/vector register Qn. + */ +static inline int vec_reg_offset(DisasContext *s, int regno, + int element, TCGMemOp size) +{ + int offs = offsetof(CPUARMState, vfp.regs[regno * 2]); +#ifdef HOST_WORDS_BIGENDIAN + /* This is complicated slightly because vfp.regs[2n] is + * still the low half and vfp.regs[2n+1] the high half + * of the 128 bit vector, even on big endian systems. + * Calculate the offset assuming a fully bigendian 128 bits, + * then XOR to account for the order of the two 64 bit halves. + */ + offs += (16 - ((element + 1) * (1 << size))); + offs ^= 8; +#else + offs += element * (1 << size); +#endif + assert_fp_access_checked(s); + return offs; +} + +/* Return the offset into CPUARMState of a slice (from + * the least significant end) of FP register Qn (ie + * Dn, Sn, Hn or Bn). + * (Note that this is not the same mapping as for A32; see cpu.h) + */ +static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size) +{ + int offs = offsetof(CPUARMState, vfp.regs[regno * 2]); +#ifdef HOST_WORDS_BIGENDIAN + offs += (8 - (1 << size)); +#endif + assert_fp_access_checked(s); + return offs; +} + +/* Offset of the high half of the 128 bit vector Qn */ +static inline int fp_reg_hi_offset(DisasContext *s, int regno) +{ + assert_fp_access_checked(s); + return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]); +} + +/* Convenience accessors for reading and writing single and double + * FP registers. Writing clears the upper parts of the associated + * 128 bit vector register, as required by the architecture. + * Note that unlike the GP register accessors, the values returned + * by the read functions must be manually freed. + */ +static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) +{ + TCGv_i64 v = tcg_temp_new_i64(); + + tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64)); + return v; +} + +static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) +{ + TCGv_i32 v = tcg_temp_new_i32(); + + tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32)); + return v; +} + +static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) +{ + TCGv_i64 tcg_zero = tcg_const_i64(0); + + tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64)); + tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg)); + tcg_temp_free_i64(tcg_zero); +} + +static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(tmp, v); + write_fp_dreg(s, reg, tmp); + tcg_temp_free_i64(tmp); +} + +static TCGv_ptr get_fpstatus_ptr(void) +{ + TCGv_ptr statusptr = tcg_temp_new_ptr(); + int offset; + + /* In A64 all instructions (both FP and Neon) use the FPCR; + * there is no equivalent of the A32 Neon "standard FPSCR value" + * and all operations use vfp.fp_status. + */ + offset = offsetof(CPUARMState, vfp.fp_status); + tcg_gen_addi_ptr(statusptr, cpu_env, offset); + return statusptr; +} + +/* Set ZF and NF based on a 64 bit result. This is alas fiddlier + * than the 32 bit equivalent. + */ +static inline void gen_set_NZ64(TCGv_i64 result) +{ + TCGv_i64 flag = tcg_temp_new_i64(); + + tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0); + tcg_gen_trunc_i64_i32(cpu_ZF, flag); + tcg_gen_shri_i64(flag, result, 32); + tcg_gen_trunc_i64_i32(cpu_NF, flag); + tcg_temp_free_i64(flag); +} + +/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ +static inline void gen_logic_CC(int sf, TCGv_i64 result) +{ + if (sf) { + gen_set_NZ64(result); + } else { + tcg_gen_trunc_i64_i32(cpu_ZF, result); + tcg_gen_trunc_i64_i32(cpu_NF, result); + } + tcg_gen_movi_i32(cpu_CF, 0); + tcg_gen_movi_i32(cpu_VF, 0); +} + +/* dest = T0 + T1; compute C, N, V and Z flags */ +static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +{ + if (sf) { + TCGv_i64 result, flag, tmp; + result = tcg_temp_new_i64(); + flag = tcg_temp_new_i64(); + tmp = tcg_temp_new_i64(); + + tcg_gen_movi_i64(tmp, 0); + tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); + + tcg_gen_trunc_i64_i32(cpu_CF, flag); + + gen_set_NZ64(result); + + tcg_gen_xor_i64(flag, result, t0); + tcg_gen_xor_i64(tmp, t0, t1); + tcg_gen_andc_i64(flag, flag, tmp); + tcg_temp_free_i64(tmp); + tcg_gen_shri_i64(flag, flag, 32); + tcg_gen_trunc_i64_i32(cpu_VF, flag); + + tcg_gen_mov_i64(dest, result); + tcg_temp_free_i64(result); + tcg_temp_free_i64(flag); + } else { + /* 32 bit arithmetic */ + TCGv_i32 t0_32 = tcg_temp_new_i32(); + TCGv_i32 t1_32 = tcg_temp_new_i32(); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_movi_i32(tmp, 0); + tcg_gen_trunc_i64_i32(t0_32, t0); + tcg_gen_trunc_i64_i32(t1_32, t1); + tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); + tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); + tcg_gen_xor_i32(tmp, t0_32, t1_32); + tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); + tcg_gen_extu_i32_i64(dest, cpu_NF); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(t0_32); + tcg_temp_free_i32(t1_32); + } +} + +/* dest = T0 - T1; compute C, N, V and Z flags */ +static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +{ + if (sf) { + /* 64 bit arithmetic */ + TCGv_i64 result, flag, tmp; + + result = tcg_temp_new_i64(); + flag = tcg_temp_new_i64(); + tcg_gen_sub_i64(result, t0, t1); + + gen_set_NZ64(result); + + tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); + tcg_gen_trunc_i64_i32(cpu_CF, flag); + + tcg_gen_xor_i64(flag, result, t0); + tmp = tcg_temp_new_i64(); + tcg_gen_xor_i64(tmp, t0, t1); + tcg_gen_and_i64(flag, flag, tmp); + tcg_temp_free_i64(tmp); + tcg_gen_shri_i64(flag, flag, 32); + tcg_gen_trunc_i64_i32(cpu_VF, flag); + tcg_gen_mov_i64(dest, result); + tcg_temp_free_i64(flag); + tcg_temp_free_i64(result); + } else { + /* 32 bit arithmetic */ + TCGv_i32 t0_32 = tcg_temp_new_i32(); + TCGv_i32 t1_32 = tcg_temp_new_i32(); + TCGv_i32 tmp; + + tcg_gen_trunc_i64_i32(t0_32, t0); + tcg_gen_trunc_i64_i32(t1_32, t1); + tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); + tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); + tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, t0_32, t1_32); + tcg_temp_free_i32(t0_32); + tcg_temp_free_i32(t1_32); + tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); + tcg_temp_free_i32(tmp); + tcg_gen_extu_i32_i64(dest, cpu_NF); + } +} + +/* dest = T0 + T1 + CF; do not compute flags. */ +static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +{ + TCGv_i64 flag = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(flag, cpu_CF); + tcg_gen_add_i64(dest, t0, t1); + tcg_gen_add_i64(dest, dest, flag); + tcg_temp_free_i64(flag); + + if (!sf) { + tcg_gen_ext32u_i64(dest, dest); + } +} + +/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ +static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +{ + if (sf) { + TCGv_i64 result, cf_64, vf_64, tmp; + result = tcg_temp_new_i64(); + cf_64 = tcg_temp_new_i64(); + vf_64 = tcg_temp_new_i64(); + tmp = tcg_const_i64(0); + + tcg_gen_extu_i32_i64(cf_64, cpu_CF); + tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp); + tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp); + tcg_gen_trunc_i64_i32(cpu_CF, cf_64); + gen_set_NZ64(result); + + tcg_gen_xor_i64(vf_64, result, t0); + tcg_gen_xor_i64(tmp, t0, t1); + tcg_gen_andc_i64(vf_64, vf_64, tmp); + tcg_gen_shri_i64(vf_64, vf_64, 32); + tcg_gen_trunc_i64_i32(cpu_VF, vf_64); + + tcg_gen_mov_i64(dest, result); + + tcg_temp_free_i64(tmp); + tcg_temp_free_i64(vf_64); + tcg_temp_free_i64(cf_64); + tcg_temp_free_i64(result); + } else { + TCGv_i32 t0_32, t1_32, tmp; + t0_32 = tcg_temp_new_i32(); + t1_32 = tcg_temp_new_i32(); + tmp = tcg_const_i32(0); + + tcg_gen_trunc_i64_i32(t0_32, t0); + tcg_gen_trunc_i64_i32(t1_32, t1); + tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp); + tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp); + + tcg_gen_mov_i32(cpu_ZF, cpu_NF); + tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); + tcg_gen_xor_i32(tmp, t0_32, t1_32); + tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); + tcg_gen_extu_i32_i64(dest, cpu_NF); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(t1_32); + tcg_temp_free_i32(t0_32); + } +} + +/* + * Load/Store generators + */ + +/* + * Store from GPR register to memory. + */ +static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, + TCGv_i64 tcg_addr, int size, int memidx) +{ + g_assert(size <= 3); + tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size); +} + +static void do_gpr_st(DisasContext *s, TCGv_i64 source, + TCGv_i64 tcg_addr, int size) +{ + do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s)); +} + +/* + * Load from memory to GPR register + */ +static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + int size, bool is_signed, bool extend, int memidx) +{ + TCGMemOp memop = MO_TE + size; + + g_assert(size <= 3); + + if (is_signed) { + memop += MO_SIGN; + } + + tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); + + if (extend && is_signed) { + g_assert(size < 3); + tcg_gen_ext32u_i64(dest, dest); + } +} + +static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, + int size, bool is_signed, bool extend) +{ + do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend, + get_mem_index(s)); +} + +/* + * Store from FP register to memory + */ +static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) +{ + /* This writes the bottom N bits of a 128 bit wide vector to memory */ + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64)); + if (size < 4) { + tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size); + } else { + TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); + tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ); + tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx)); + tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); + tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ); + tcg_temp_free_i64(tcg_hiaddr); + } + + tcg_temp_free_i64(tmp); +} + +/* + * Load from memory to FP register + */ +static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) +{ + /* This always zero-extends and writes to a full 128 bit wide vector */ + TCGv_i64 tmplo = tcg_temp_new_i64(); + TCGv_i64 tmphi; + + if (size < 4) { + TCGMemOp memop = MO_TE + size; + tmphi = tcg_const_i64(0); + tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop); + } else { + TCGv_i64 tcg_hiaddr; + tmphi = tcg_temp_new_i64(); + tcg_hiaddr = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ); + tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); + tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ); + tcg_temp_free_i64(tcg_hiaddr); + } + + tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); + tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx)); + + tcg_temp_free_i64(tmplo); + tcg_temp_free_i64(tmphi); +} + +/* + * Vector load/store helpers. + * + * The principal difference between this and a FP load is that we don't + * zero extend as we are filling a partial chunk of the vector register. + * These functions don't support 128 bit loads/stores, which would be + * normal load/store operations. + * + * The _i32 versions are useful when operating on 32 bit quantities + * (eg for floating point single or using Neon helper functions). + */ + +/* Get value of an element within a vector register */ +static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, + int element, TCGMemOp memop) +{ + int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); + switch (memop) { + case MO_8: + tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_16: + tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_32: + tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_8|MO_SIGN: + tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_16|MO_SIGN: + tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_32|MO_SIGN: + tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off); + break; + case MO_64: + case MO_64|MO_SIGN: + tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off); + break; + default: + g_assert_not_reached(); + } +} + +static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, + int element, TCGMemOp memop) +{ + int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); + switch (memop) { + case MO_8: + tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off); + break; + case MO_16: + tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off); + break; + case MO_8|MO_SIGN: + tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off); + break; + case MO_16|MO_SIGN: + tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off); + break; + case MO_32: + case MO_32|MO_SIGN: + tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off); + break; + default: + g_assert_not_reached(); + } +} + +/* Set value of an element within a vector register */ +static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, + int element, TCGMemOp memop) +{ + int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); + switch (memop) { + case MO_8: + tcg_gen_st8_i64(tcg_src, cpu_env, vect_off); + break; + case MO_16: + tcg_gen_st16_i64(tcg_src, cpu_env, vect_off); + break; + case MO_32: + tcg_gen_st32_i64(tcg_src, cpu_env, vect_off); + break; + case MO_64: + tcg_gen_st_i64(tcg_src, cpu_env, vect_off); + break; + default: + g_assert_not_reached(); + } +} + +static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, + int destidx, int element, TCGMemOp memop) +{ + int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); + switch (memop) { + case MO_8: + tcg_gen_st8_i32(tcg_src, cpu_env, vect_off); + break; + case MO_16: + tcg_gen_st16_i32(tcg_src, cpu_env, vect_off); + break; + case MO_32: + tcg_gen_st_i32(tcg_src, cpu_env, vect_off); + break; + default: + g_assert_not_reached(); + } +} + +/* Clear the high 64 bits of a 128 bit vector (in general non-quad + * vector ops all need to do this). + */ +static void clear_vec_high(DisasContext *s, int rd) +{ + TCGv_i64 tcg_zero = tcg_const_i64(0); + + write_vec_element(s, tcg_zero, rd, 1, MO_64); + tcg_temp_free_i64(tcg_zero); +} + +/* Store from vector register to memory */ +static void do_vec_st(DisasContext *s, int srcidx, int element, + TCGv_i64 tcg_addr, int size) +{ + TCGMemOp memop = MO_TE + size; + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + read_vec_element(s, tcg_tmp, srcidx, element, size); + tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + + tcg_temp_free_i64(tcg_tmp); +} + +/* Load from memory to vector register */ +static void do_vec_ld(DisasContext *s, int destidx, int element, + TCGv_i64 tcg_addr, int size) +{ + TCGMemOp memop = MO_TE + size; + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop); + write_vec_element(s, tcg_tmp, destidx, element, size); + + tcg_temp_free_i64(tcg_tmp); +} + +/* Check that FP/Neon access is enabled. If it is, return + * true. If not, emit code to generate an appropriate exception, + * and return false; the caller should not emit any code for + * the instruction. Note that this check must happen after all + * unallocated-encoding checks (otherwise the syndrome information + * for the resulting exception will be incorrect). + */ +static inline bool fp_access_check(DisasContext *s) +{ + assert(!s->fp_access_checked); + s->fp_access_checked = true; + + if (!s->fp_excp_el) { + return true; + } + + gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false), + s->fp_excp_el); + return false; +} + +/* + * This utility function is for doing register extension with an + * optional shift. You will likely want to pass a temporary for the + * destination register. See DecodeRegExtend() in the ARM ARM. + */ +static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, + int option, unsigned int shift) +{ + int extsize = extract32(option, 0, 2); + bool is_signed = extract32(option, 2, 1); + + if (is_signed) { + switch (extsize) { + case 0: + tcg_gen_ext8s_i64(tcg_out, tcg_in); + break; + case 1: + tcg_gen_ext16s_i64(tcg_out, tcg_in); + break; + case 2: + tcg_gen_ext32s_i64(tcg_out, tcg_in); + break; + case 3: + tcg_gen_mov_i64(tcg_out, tcg_in); + break; + } + } else { + switch (extsize) { + case 0: + tcg_gen_ext8u_i64(tcg_out, tcg_in); + break; + case 1: + tcg_gen_ext16u_i64(tcg_out, tcg_in); + break; + case 2: + tcg_gen_ext32u_i64(tcg_out, tcg_in); + break; + case 3: + tcg_gen_mov_i64(tcg_out, tcg_in); + break; + } + } + + if (shift) { + tcg_gen_shli_i64(tcg_out, tcg_out, shift); + } +} + +static inline void gen_check_sp_alignment(DisasContext *s) +{ + /* The AArch64 architecture mandates that (if enabled via PSTATE + * or SCTLR bits) there is a check that SP is 16-aligned on every + * SP-relative load or store (with an exception generated if it is not). + * In line with general QEMU practice regarding misaligned accesses, + * we omit these checks for the sake of guest program performance. + * This function is provided as a hook so we can more easily add these + * checks in future (possibly as a "favour catching guest program bugs + * over speed" user selectable option). + */ +} + +/* + * This provides a simple table based table lookup decoder. It is + * intended to be used when the relevant bits for decode are too + * awkwardly placed and switch/if based logic would be confusing and + * deeply nested. Since it's a linear search through the table, tables + * should be kept small. + * + * It returns the first handler where insn & mask == pattern, or + * NULL if there is no match. + * The table is terminated by an empty mask (i.e. 0) + */ +static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, + uint32_t insn) +{ + const AArch64DecodeTable *tptr = table; + + while (tptr->mask) { + if ((insn & tptr->mask) == tptr->pattern) { + return tptr->disas_fn; + } + tptr++; + } + return NULL; +} + +/* + * the instruction disassembly implemented here matches + * the instruction encoding classifications in chapter 3 (C3) + * of the ARM Architecture Reference Manual (DDI0487A_a) + */ + +/* C3.2.7 Unconditional branch (immediate) + * 31 30 26 25 0 + * +----+-----------+-------------------------------------+ + * | op | 0 0 1 0 1 | imm26 | + * +----+-----------+-------------------------------------+ + */ +static void disas_uncond_b_imm(DisasContext *s, uint32_t insn) +{ + uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4; + + if (insn & (1U << 31)) { + /* C5.6.26 BL Branch with link */ + tcg_gen_movi_i64(cpu_reg(s, 30), s->pc); + } + + /* C5.6.20 B Branch / C5.6.26 BL Branch with link */ + gen_goto_tb(s, 0, addr); +} + +/* C3.2.1 Compare & branch (immediate) + * 31 30 25 24 23 5 4 0 + * +----+-------------+----+---------------------+--------+ + * | sf | 0 1 1 0 1 0 | op | imm19 | Rt | + * +----+-------------+----+---------------------+--------+ + */ +static void disas_comp_b_imm(DisasContext *s, uint32_t insn) +{ + unsigned int sf, op, rt; + uint64_t addr; + TCGLabel *label_match; + TCGv_i64 tcg_cmp; + + sf = extract32(insn, 31, 1); + op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */ + rt = extract32(insn, 0, 5); + addr = s->pc + sextract32(insn, 5, 19) * 4 - 4; + + tcg_cmp = read_cpu_reg(s, rt, sf); + label_match = gen_new_label(); + + tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, + tcg_cmp, 0, label_match); + + gen_goto_tb(s, 0, s->pc); + gen_set_label(label_match); + gen_goto_tb(s, 1, addr); +} + +/* C3.2.5 Test & branch (immediate) + * 31 30 25 24 23 19 18 5 4 0 + * +----+-------------+----+-------+-------------+------+ + * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt | + * +----+-------------+----+-------+-------------+------+ + */ +static void disas_test_b_imm(DisasContext *s, uint32_t insn) +{ + unsigned int bit_pos, op, rt; + uint64_t addr; + TCGLabel *label_match; + TCGv_i64 tcg_cmp; + + bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5); + op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */ + addr = s->pc + sextract32(insn, 5, 14) * 4 - 4; + rt = extract32(insn, 0, 5); + + tcg_cmp = tcg_temp_new_i64(); + tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos)); + label_match = gen_new_label(); + tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, + tcg_cmp, 0, label_match); + tcg_temp_free_i64(tcg_cmp); + gen_goto_tb(s, 0, s->pc); + gen_set_label(label_match); + gen_goto_tb(s, 1, addr); +} + +/* C3.2.2 / C5.6.19 Conditional branch (immediate) + * 31 25 24 23 5 4 3 0 + * +---------------+----+---------------------+----+------+ + * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond | + * +---------------+----+---------------------+----+------+ + */ +static void disas_cond_b_imm(DisasContext *s, uint32_t insn) +{ + unsigned int cond; + uint64_t addr; + + if ((insn & (1 << 4)) || (insn & (1 << 24))) { + unallocated_encoding(s); + return; + } + addr = s->pc + sextract32(insn, 5, 19) * 4 - 4; + cond = extract32(insn, 0, 4); + + if (cond < 0x0e) { + /* genuinely conditional branches */ + TCGLabel *label_match = gen_new_label(); + arm_gen_test_cc(cond, label_match); + gen_goto_tb(s, 0, s->pc); + gen_set_label(label_match); + gen_goto_tb(s, 1, addr); + } else { + /* 0xe and 0xf are both "always" conditions */ + gen_goto_tb(s, 0, addr); + } +} + +/* C5.6.68 HINT */ +static void handle_hint(DisasContext *s, uint32_t insn, + unsigned int op1, unsigned int op2, unsigned int crm) +{ + unsigned int selector = crm << 3 | op2; + + if (op1 != 3) { + unallocated_encoding(s); + return; + } + + switch (selector) { + case 0: /* NOP */ + return; + case 3: /* WFI */ + s->is_jmp = DISAS_WFI; + return; + case 1: /* YIELD */ + s->is_jmp = DISAS_YIELD; + return; + case 2: /* WFE */ + s->is_jmp = DISAS_WFE; + return; + case 4: /* SEV */ + case 5: /* SEVL */ + /* we treat all as NOP at least for now */ + return; + default: + /* default specified as NOP equivalent */ + return; + } +} + +static void gen_clrex(DisasContext *s, uint32_t insn) +{ + tcg_gen_movi_i64(cpu_exclusive_addr, -1); +} + +/* CLREX, DSB, DMB, ISB */ +static void handle_sync(DisasContext *s, uint32_t insn, + unsigned int op1, unsigned int op2, unsigned int crm) +{ + if (op1 != 3) { + unallocated_encoding(s); + return; + } + + switch (op2) { + case 2: /* CLREX */ + gen_clrex(s, insn); + return; + case 4: /* DSB */ + case 5: /* DMB */ + case 6: /* ISB */ + /* We don't emulate caches so barriers are no-ops */ + return; + default: + unallocated_encoding(s); + return; + } +} + +/* C5.6.130 MSR (immediate) - move immediate to processor state field */ +static void handle_msr_i(DisasContext *s, uint32_t insn, + unsigned int op1, unsigned int op2, unsigned int crm) +{ + int op = op1 << 3 | op2; + switch (op) { + case 0x05: /* SPSel */ + if (s->current_el == 0) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x1e: /* DAIFSet */ + case 0x1f: /* DAIFClear */ + { + TCGv_i32 tcg_imm = tcg_const_i32(crm); + TCGv_i32 tcg_op = tcg_const_i32(op); + gen_a64_set_pc_im(s->pc - 4); + gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm); + tcg_temp_free_i32(tcg_imm); + tcg_temp_free_i32(tcg_op); + s->is_jmp = DISAS_UPDATE; + break; + } + default: + unallocated_encoding(s); + return; + } +} + +static void gen_get_nzcv(TCGv_i64 tcg_rt) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + TCGv_i32 nzcv = tcg_temp_new_i32(); + + /* build bit 31, N */ + tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); + /* build bit 30, Z */ + tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); + tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); + /* build bit 29, C */ + tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); + /* build bit 28, V */ + tcg_gen_shri_i32(tmp, cpu_VF, 31); + tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); + /* generate result */ + tcg_gen_extu_i32_i64(tcg_rt, nzcv); + + tcg_temp_free_i32(nzcv); + tcg_temp_free_i32(tmp); +} + +static void gen_set_nzcv(TCGv_i64 tcg_rt) + +{ + TCGv_i32 nzcv = tcg_temp_new_i32(); + + /* take NZCV from R[t] */ + tcg_gen_trunc_i64_i32(nzcv, tcg_rt); + + /* bit 31, N */ + tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); + /* bit 30, Z */ + tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); + /* bit 29, C */ + tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); + tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); + /* bit 28, V */ + tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); + tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); + tcg_temp_free_i32(nzcv); +} + +/* C5.6.129 MRS - move from system register + * C5.6.131 MSR (register) - move to system register + * C5.6.204 SYS + * C5.6.205 SYSL + * These are all essentially the same insn in 'read' and 'write' + * versions, with varying op0 fields. + */ +static void handle_sys(DisasContext *s, uint32_t insn, bool isread, + unsigned int op0, unsigned int op1, unsigned int op2, + unsigned int crn, unsigned int crm, unsigned int rt) +{ + const ARMCPRegInfo *ri; + TCGv_i64 tcg_rt; + + ri = get_arm_cp_reginfo(s->cp_regs, + ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, + crn, crm, op0, op1, op2)); + + if (!ri) { + /* Unknown register; this might be a guest error or a QEMU + * unimplemented feature. + */ + qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " + "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", + isread ? "read" : "write", op0, op1, crn, crm, op2); + unallocated_encoding(s); + return; + } + + /* Check access permissions */ + if (!cp_access_ok(s->current_el, ri, isread)) { + unallocated_encoding(s); + return; + } + + if (ri->accessfn) { + /* Emit code to perform further access permissions checks at + * runtime; this may result in an exception. + */ + TCGv_ptr tmpptr; + TCGv_i32 tcg_syn; + uint32_t syndrome; + + gen_a64_set_pc_im(s->pc - 4); + tmpptr = tcg_const_ptr(ri); + syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); + tcg_syn = tcg_const_i32(syndrome); + gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn); + tcg_temp_free_ptr(tmpptr); + tcg_temp_free_i32(tcg_syn); + } + + /* Handle special cases first */ + switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) { + case ARM_CP_NOP: + return; + case ARM_CP_NZCV: + tcg_rt = cpu_reg(s, rt); + if (isread) { + gen_get_nzcv(tcg_rt); + } else { + gen_set_nzcv(tcg_rt); + } + return; + case ARM_CP_CURRENTEL: + /* Reads as current EL value from pstate, which is + * guaranteed to be constant by the tb flags. + */ + tcg_rt = cpu_reg(s, rt); + tcg_gen_movi_i64(tcg_rt, s->current_el << 2); + return; + case ARM_CP_DC_ZVA: + /* Writes clear the aligned block of memory which rt points into. */ + tcg_rt = cpu_reg(s, rt); + gen_helper_dc_zva(cpu_env, tcg_rt); + return; + default: + break; + } + + if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { + gen_io_start(); + } + + tcg_rt = cpu_reg(s, rt); + + if (isread) { + if (ri->type & ARM_CP_CONST) { + tcg_gen_movi_i64(tcg_rt, ri->resetvalue); + } else if (ri->readfn) { + TCGv_ptr tmpptr; + tmpptr = tcg_const_ptr(ri); + gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr); + tcg_temp_free_ptr(tmpptr); + } else { + tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset); + } + } else { + if (ri->type & ARM_CP_CONST) { + /* If not forbidden by access permissions, treat as WI */ + return; + } else if (ri->writefn) { + TCGv_ptr tmpptr; + tmpptr = tcg_const_ptr(ri); + gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt); + tcg_temp_free_ptr(tmpptr); + } else { + tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset); + } + } + + if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { + /* I/O operations must end the TB here (whether read or write) */ + gen_io_end(); + s->is_jmp = DISAS_UPDATE; + } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { + /* We default to ending the TB on a coprocessor register write, + * but allow this to be suppressed by the register definition + * (usually only necessary to work around guest bugs). + */ + s->is_jmp = DISAS_UPDATE; + } +} + +/* C3.2.4 System + * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0 + * +---------------------+---+-----+-----+-------+-------+-----+------+ + * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt | + * +---------------------+---+-----+-----+-------+-------+-----+------+ + */ +static void disas_system(DisasContext *s, uint32_t insn) +{ + unsigned int l, op0, op1, crn, crm, op2, rt; + l = extract32(insn, 21, 1); + op0 = extract32(insn, 19, 2); + op1 = extract32(insn, 16, 3); + crn = extract32(insn, 12, 4); + crm = extract32(insn, 8, 4); + op2 = extract32(insn, 5, 3); + rt = extract32(insn, 0, 5); + + if (op0 == 0) { + if (l || rt != 31) { + unallocated_encoding(s); + return; + } + switch (crn) { + case 2: /* C5.6.68 HINT */ + handle_hint(s, insn, op1, op2, crm); + break; + case 3: /* CLREX, DSB, DMB, ISB */ + handle_sync(s, insn, op1, op2, crm); + break; + case 4: /* C5.6.130 MSR (immediate) */ + handle_msr_i(s, insn, op1, op2, crm); + break; + default: + unallocated_encoding(s); + break; + } + return; + } + handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt); +} + +/* C3.2.3 Exception generation + * + * 31 24 23 21 20 5 4 2 1 0 + * +-----------------+-----+------------------------+-----+----+ + * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL | + * +-----------------------+------------------------+----------+ + */ +static void disas_exc(DisasContext *s, uint32_t insn) +{ + int opc = extract32(insn, 21, 3); + int op2_ll = extract32(insn, 0, 5); + int imm16 = extract32(insn, 5, 16); + TCGv_i32 tmp; + + switch (opc) { + case 0: + /* For SVC, HVC and SMC we advance the single-step state + * machine before taking the exception. This is architecturally + * mandated, to ensure that single-stepping a system call + * instruction works properly. + */ + switch (op2_ll) { + case 1: + gen_ss_advance(s); + gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16), + default_exception_el(s)); + break; + case 2: + if (s->current_el == 0) { + unallocated_encoding(s); + break; + } + /* The pre HVC helper handles cases when HVC gets trapped + * as an undefined insn by runtime configuration. + */ + gen_a64_set_pc_im(s->pc - 4); + gen_helper_pre_hvc(cpu_env); + gen_ss_advance(s); + gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2); + break; + case 3: + if (s->current_el == 0) { + unallocated_encoding(s); + break; + } + gen_a64_set_pc_im(s->pc - 4); + tmp = tcg_const_i32(syn_aa64_smc(imm16)); + gen_helper_pre_smc(cpu_env, tmp); + tcg_temp_free_i32(tmp); + gen_ss_advance(s); + gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3); + break; + default: + unallocated_encoding(s); + break; + } + break; + case 1: + if (op2_ll != 0) { + unallocated_encoding(s); + break; + } + /* BRK */ + gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16), + default_exception_el(s)); + break; + case 2: + if (op2_ll != 0) { + unallocated_encoding(s); + break; + } + /* HLT */ + unsupported_encoding(s, insn); + break; + case 5: + if (op2_ll < 1 || op2_ll > 3) { + unallocated_encoding(s); + break; + } + /* DCPS1, DCPS2, DCPS3 */ + unsupported_encoding(s, insn); + break; + default: + unallocated_encoding(s); + break; + } +} + +/* C3.2.7 Unconditional branch (register) + * 31 25 24 21 20 16 15 10 9 5 4 0 + * +---------------+-------+-------+-------+------+-------+ + * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 | + * +---------------+-------+-------+-------+------+-------+ + */ +static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) +{ + unsigned int opc, op2, op3, rn, op4; + + opc = extract32(insn, 21, 4); + op2 = extract32(insn, 16, 5); + op3 = extract32(insn, 10, 6); + rn = extract32(insn, 5, 5); + op4 = extract32(insn, 0, 5); + + if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) { + unallocated_encoding(s); + return; + } + + switch (opc) { + case 0: /* BR */ + case 2: /* RET */ + tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn)); + break; + case 1: /* BLR */ + tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn)); + tcg_gen_movi_i64(cpu_reg(s, 30), s->pc); + break; + case 4: /* ERET */ + if (s->current_el == 0) { + unallocated_encoding(s); + return; + } + gen_helper_exception_return(cpu_env); + s->is_jmp = DISAS_JUMP; + return; + case 5: /* DRPS */ + if (rn != 0x1f) { + unallocated_encoding(s); + } else { + unsupported_encoding(s, insn); + } + return; + default: + unallocated_encoding(s); + return; + } + + s->is_jmp = DISAS_JUMP; +} + +/* C3.2 Branches, exception generating and system instructions */ +static void disas_b_exc_sys(DisasContext *s, uint32_t insn) +{ + switch (extract32(insn, 25, 7)) { + case 0x0a: case 0x0b: + case 0x4a: case 0x4b: /* Unconditional branch (immediate) */ + disas_uncond_b_imm(s, insn); + break; + case 0x1a: case 0x5a: /* Compare & branch (immediate) */ + disas_comp_b_imm(s, insn); + break; + case 0x1b: case 0x5b: /* Test & branch (immediate) */ + disas_test_b_imm(s, insn); + break; + case 0x2a: /* Conditional branch (immediate) */ + disas_cond_b_imm(s, insn); + break; + case 0x6a: /* Exception generation / System */ + if (insn & (1 << 24)) { + disas_system(s, insn); + } else { + disas_exc(s, insn); + } + break; + case 0x6b: /* Unconditional branch (register) */ + disas_uncond_b_reg(s, insn); + break; + default: + unallocated_encoding(s); + break; + } +} + +/* + * Load/Store exclusive instructions are implemented by remembering + * the value/address loaded, and seeing if these are the same + * when the store is performed. This is not actually the architecturally + * mandated semantics, but it works for typical guest code sequences + * and avoids having to monitor regular stores. + * + * In system emulation mode only one CPU will be running at once, so + * this sequence is effectively atomic. In user emulation mode we + * throw an exception and handle the atomic operation elsewhere. + */ +static void gen_load_exclusive(DisasContext *s, int rt, int rt2, + TCGv_i64 addr, int size, bool is_pair) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGMemOp memop = MO_TE + size; + + g_assert(size <= 3); + tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop); + + if (is_pair) { + TCGv_i64 addr2 = tcg_temp_new_i64(); + TCGv_i64 hitmp = tcg_temp_new_i64(); + + g_assert(size >= 2); + tcg_gen_addi_i64(addr2, addr, 1 << size); + tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop); + tcg_temp_free_i64(addr2); + tcg_gen_mov_i64(cpu_exclusive_high, hitmp); + tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp); + tcg_temp_free_i64(hitmp); + } + + tcg_gen_mov_i64(cpu_exclusive_val, tmp); + tcg_gen_mov_i64(cpu_reg(s, rt), tmp); + + tcg_temp_free_i64(tmp); + tcg_gen_mov_i64(cpu_exclusive_addr, addr); +} + +#ifdef CONFIG_USER_ONLY +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, + TCGv_i64 addr, int size, int is_pair) +{ + tcg_gen_mov_i64(cpu_exclusive_test, addr); + tcg_gen_movi_i32(cpu_exclusive_info, + size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14)); + gen_exception_internal_insn(s, 4, EXCP_STREX); +} +#else +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, + TCGv_i64 inaddr, int size, int is_pair) +{ + /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] + * && (!is_pair || env->exclusive_high == [addr + datasize])) { + * [addr] = {Rt}; + * if (is_pair) { + * [addr + datasize] = {Rt2}; + * } + * {Rd} = 0; + * } else { + * {Rd} = 1; + * } + * env->exclusive_addr = -1; + */ + TCGLabel *fail_label = gen_new_label(); + TCGLabel *done_label = gen_new_label(); + TCGv_i64 addr = tcg_temp_local_new_i64(); + TCGv_i64 tmp; + + /* Copy input into a local temp so it is not trashed when the + * basic block ends at the branch insn. + */ + tcg_gen_mov_i64(addr, inaddr); + tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); + + tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size); + tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label); + tcg_temp_free_i64(tmp); + + if (is_pair) { + TCGv_i64 addrhi = tcg_temp_new_i64(); + TCGv_i64 tmphi = tcg_temp_new_i64(); + + tcg_gen_addi_i64(addrhi, addr, 1 << size); + tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size); + tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label); + + tcg_temp_free_i64(tmphi); + tcg_temp_free_i64(addrhi); + } + + /* We seem to still have the exclusive monitor, so do the store */ + tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size); + if (is_pair) { + TCGv_i64 addrhi = tcg_temp_new_i64(); + + tcg_gen_addi_i64(addrhi, addr, 1 << size); + tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi, + get_mem_index(s), MO_TE + size); + tcg_temp_free_i64(addrhi); + } + + tcg_temp_free_i64(addr); + + tcg_gen_movi_i64(cpu_reg(s, rd), 0); + tcg_gen_br(done_label); + gen_set_label(fail_label); + tcg_gen_movi_i64(cpu_reg(s, rd), 1); + gen_set_label(done_label); + tcg_gen_movi_i64(cpu_exclusive_addr, -1); + +} +#endif + +/* C3.3.6 Load/store exclusive + * + * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0 + * +-----+-------------+----+---+----+------+----+-------+------+------+ + * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt | + * +-----+-------------+----+---+----+------+----+-------+------+------+ + * + * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit + * L: 0 -> store, 1 -> load + * o2: 0 -> exclusive, 1 -> not + * o1: 0 -> single register, 1 -> register pair + * o0: 1 -> load-acquire/store-release, 0 -> not + * + * o0 == 0 AND o2 == 1 is un-allocated + * o1 == 1 is un-allocated except for 32 and 64 bit sizes + */ +static void disas_ldst_excl(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int rt2 = extract32(insn, 10, 5); + int is_lasr = extract32(insn, 15, 1); + int rs = extract32(insn, 16, 5); + int is_pair = extract32(insn, 21, 1); + int is_store = !extract32(insn, 22, 1); + int is_excl = !extract32(insn, 23, 1); + int size = extract32(insn, 30, 2); + TCGv_i64 tcg_addr; + + if ((!is_excl && !is_lasr) || + (is_pair && size < 2)) { + unallocated_encoding(s); + return; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + + /* Note that since TCG is single threaded load-acquire/store-release + * semantics require no extra if (is_lasr) { ... } handling. + */ + + if (is_excl) { + if (!is_store) { + s->is_ldex = true; + gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair); + } else { + gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair); + } + } else { + TCGv_i64 tcg_rt = cpu_reg(s, rt); + if (is_store) { + do_gpr_st(s, tcg_rt, tcg_addr, size); + } else { + do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false); + } + if (is_pair) { + TCGv_i64 tcg_rt2 = cpu_reg(s, rt); + tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size); + if (is_store) { + do_gpr_st(s, tcg_rt2, tcg_addr, size); + } else { + do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false); + } + } + } +} + +/* + * C3.3.5 Load register (literal) + * + * 31 30 29 27 26 25 24 23 5 4 0 + * +-----+-------+---+-----+-------------------+-------+ + * | opc | 0 1 1 | V | 0 0 | imm19 | Rt | + * +-----+-------+---+-----+-------------------+-------+ + * + * V: 1 -> vector (simd/fp) + * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit, + * 10-> 32 bit signed, 11 -> prefetch + * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated) + */ +static void disas_ld_lit(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int64_t imm = sextract32(insn, 5, 19) << 2; + bool is_vector = extract32(insn, 26, 1); + int opc = extract32(insn, 30, 2); + bool is_signed = false; + int size = 2; + TCGv_i64 tcg_rt, tcg_addr; + + if (is_vector) { + if (opc == 3) { + unallocated_encoding(s); + return; + } + size = 2 + opc; + if (!fp_access_check(s)) { + return; + } + } else { + if (opc == 3) { + /* PRFM (literal) : prefetch */ + return; + } + size = 2 + extract32(opc, 0, 1); + is_signed = extract32(opc, 1, 1); + } + + tcg_rt = cpu_reg(s, rt); + + tcg_addr = tcg_const_i64((s->pc - 4) + imm); + if (is_vector) { + do_fp_ld(s, rt, tcg_addr, size); + } else { + do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false); + } + tcg_temp_free_i64(tcg_addr); +} + +/* + * C5.6.80 LDNP (Load Pair - non-temporal hint) + * C5.6.81 LDP (Load Pair - non vector) + * C5.6.82 LDPSW (Load Pair Signed Word - non vector) + * C5.6.176 STNP (Store Pair - non-temporal hint) + * C5.6.177 STP (Store Pair - non vector) + * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint) + * C6.3.165 LDP (Load Pair of SIMD&FP) + * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint) + * C6.3.284 STP (Store Pair of SIMD&FP) + * + * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0 + * +-----+-------+---+---+-------+---+-----------------------------+ + * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt | + * +-----+-------+---+---+-------+---+-------+-------+------+------+ + * + * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit + * LDPSW 01 + * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit + * V: 0 -> GPR, 1 -> Vector + * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, + * 10 -> signed offset, 11 -> pre-index + * L: 0 -> Store 1 -> Load + * + * Rt, Rt2 = GPR or SIMD registers to be stored + * Rn = general purpose register containing address + * imm7 = signed offset (multiple of 4 or 8 depending on size) + */ +static void disas_ldst_pair(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int rt2 = extract32(insn, 10, 5); + uint64_t offset = sextract64(insn, 15, 7); + int index = extract32(insn, 23, 2); + bool is_vector = extract32(insn, 26, 1); + bool is_load = extract32(insn, 22, 1); + int opc = extract32(insn, 30, 2); + + bool is_signed = false; + bool postindex = false; + bool wback = false; + + TCGv_i64 tcg_addr; /* calculated address */ + int size; + + if (opc == 3) { + unallocated_encoding(s); + return; + } + + if (is_vector) { + size = 2 + opc; + } else { + size = 2 + extract32(opc, 1, 1); + is_signed = extract32(opc, 0, 1); + if (!is_load && is_signed) { + unallocated_encoding(s); + return; + } + } + + switch (index) { + case 1: /* post-index */ + postindex = true; + wback = true; + break; + case 0: + /* signed offset with "non-temporal" hint. Since we don't emulate + * caches we don't care about hints to the cache system about + * data access patterns, and handle this identically to plain + * signed offset. + */ + if (is_signed) { + /* There is no non-temporal-hint version of LDPSW */ + unallocated_encoding(s); + return; + } + postindex = false; + break; + case 2: /* signed offset, rn not updated */ + postindex = false; + break; + case 3: /* pre-index */ + postindex = false; + wback = true; + break; + } + + if (is_vector && !fp_access_check(s)) { + return; + } + + offset <<= size; + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + tcg_addr = read_cpu_reg_sp(s, rn, 1); + + if (!postindex) { + tcg_gen_addi_i64(tcg_addr, tcg_addr, offset); + } + + if (is_vector) { + if (is_load) { + do_fp_ld(s, rt, tcg_addr, size); + } else { + do_fp_st(s, rt, tcg_addr, size); + } + } else { + TCGv_i64 tcg_rt = cpu_reg(s, rt); + if (is_load) { + do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false); + } else { + do_gpr_st(s, tcg_rt, tcg_addr, size); + } + } + tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size); + if (is_vector) { + if (is_load) { + do_fp_ld(s, rt2, tcg_addr, size); + } else { + do_fp_st(s, rt2, tcg_addr, size); + } + } else { + TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); + if (is_load) { + do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false); + } else { + do_gpr_st(s, tcg_rt2, tcg_addr, size); + } + } + + if (wback) { + if (postindex) { + tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size)); + } else { + tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size); + } + tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr); + } +} + +/* + * C3.3.8 Load/store (immediate post-indexed) + * C3.3.9 Load/store (immediate pre-indexed) + * C3.3.12 Load/store (unscaled immediate) + * + * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0 + * +----+-------+---+-----+-----+---+--------+-----+------+------+ + * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt | + * +----+-------+---+-----+-----+---+--------+-----+------+------+ + * + * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback) + 10 -> unprivileged + * V = 0 -> non-vector + * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit + * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 + */ +static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int imm9 = sextract32(insn, 12, 9); + int opc = extract32(insn, 22, 2); + int size = extract32(insn, 30, 2); + int idx = extract32(insn, 10, 2); + bool is_signed = false; + bool is_store = false; + bool is_extended = false; + bool is_unpriv = (idx == 2); + bool is_vector = extract32(insn, 26, 1); + bool post_index; + bool writeback; + + TCGv_i64 tcg_addr; + + if (is_vector) { + size |= (opc & 2) << 1; + if (size > 4 || is_unpriv) { + unallocated_encoding(s); + return; + } + is_store = ((opc & 1) == 0); + if (!fp_access_check(s)) { + return; + } + } else { + if (size == 3 && opc == 2) { + /* PRFM - prefetch */ + if (is_unpriv) { + unallocated_encoding(s); + return; + } + return; + } + if (opc == 3 && size > 1) { + unallocated_encoding(s); + return; + } + is_store = (opc == 0); + is_signed = opc & (1<<1); + is_extended = (size < 3) && (opc & 1); + } + + switch (idx) { + case 0: + case 2: + post_index = false; + writeback = false; + break; + case 1: + post_index = true; + writeback = true; + break; + case 3: + post_index = false; + writeback = true; + break; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + + if (!post_index) { + tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9); + } + + if (is_vector) { + if (is_store) { + do_fp_st(s, rt, tcg_addr, size); + } else { + do_fp_ld(s, rt, tcg_addr, size); + } + } else { + TCGv_i64 tcg_rt = cpu_reg(s, rt); + int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + + if (is_store) { + do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx); + } else { + do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size, + is_signed, is_extended, memidx); + } + } + + if (writeback) { + TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); + if (post_index) { + tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9); + } + tcg_gen_mov_i64(tcg_rn, tcg_addr); + } +} + +/* + * C3.3.10 Load/store (register offset) + * + * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ + * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | + * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ + * + * For non-vector: + * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit + * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 + * For vector: + * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated + * opc<0>: 0 -> store, 1 -> load + * V: 1 -> vector/simd + * opt: extend encoding (see DecodeRegExtend) + * S: if S=1 then scale (essentially index by sizeof(size)) + * Rt: register to transfer into/out of + * Rn: address register or SP for base + * Rm: offset register or ZR for offset + */ +static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int shift = extract32(insn, 12, 1); + int rm = extract32(insn, 16, 5); + int opc = extract32(insn, 22, 2); + int opt = extract32(insn, 13, 3); + int size = extract32(insn, 30, 2); + bool is_signed = false; + bool is_store = false; + bool is_extended = false; + bool is_vector = extract32(insn, 26, 1); + + TCGv_i64 tcg_rm; + TCGv_i64 tcg_addr; + + if (extract32(opt, 1, 1) == 0) { + unallocated_encoding(s); + return; + } + + if (is_vector) { + size |= (opc & 2) << 1; + if (size > 4) { + unallocated_encoding(s); + return; + } + is_store = !extract32(opc, 0, 1); + if (!fp_access_check(s)) { + return; + } + } else { + if (size == 3 && opc == 2) { + /* PRFM - prefetch */ + return; + } + if (opc == 3 && size > 1) { + unallocated_encoding(s); + return; + } + is_store = (opc == 0); + is_signed = extract32(opc, 1, 1); + is_extended = (size < 3) && extract32(opc, 0, 1); + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + + tcg_rm = read_cpu_reg(s, rm, 1); + ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); + + tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm); + + if (is_vector) { + if (is_store) { + do_fp_st(s, rt, tcg_addr, size); + } else { + do_fp_ld(s, rt, tcg_addr, size); + } + } else { + TCGv_i64 tcg_rt = cpu_reg(s, rt); + if (is_store) { + do_gpr_st(s, tcg_rt, tcg_addr, size); + } else { + do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended); + } + } +} + +/* + * C3.3.13 Load/store (unsigned immediate) + * + * 31 30 29 27 26 25 24 23 22 21 10 9 5 + * +----+-------+---+-----+-----+------------+-------+------+ + * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt | + * +----+-------+---+-----+-----+------------+-------+------+ + * + * For non-vector: + * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit + * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 + * For vector: + * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated + * opc<0>: 0 -> store, 1 -> load + * Rn: base address register (inc SP) + * Rt: target register + */ +static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + unsigned int imm12 = extract32(insn, 10, 12); + bool is_vector = extract32(insn, 26, 1); + int size = extract32(insn, 30, 2); + int opc = extract32(insn, 22, 2); + unsigned int offset; + + TCGv_i64 tcg_addr; + + bool is_store; + bool is_signed = false; + bool is_extended = false; + + if (is_vector) { + size |= (opc & 2) << 1; + if (size > 4) { + unallocated_encoding(s); + return; + } + is_store = !extract32(opc, 0, 1); + if (!fp_access_check(s)) { + return; + } + } else { + if (size == 3 && opc == 2) { + /* PRFM - prefetch */ + return; + } + if (opc == 3 && size > 1) { + unallocated_encoding(s); + return; + } + is_store = (opc == 0); + is_signed = extract32(opc, 1, 1); + is_extended = (size < 3) && extract32(opc, 0, 1); + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + offset = imm12 << size; + tcg_gen_addi_i64(tcg_addr, tcg_addr, offset); + + if (is_vector) { + if (is_store) { + do_fp_st(s, rt, tcg_addr, size); + } else { + do_fp_ld(s, rt, tcg_addr, size); + } + } else { + TCGv_i64 tcg_rt = cpu_reg(s, rt); + if (is_store) { + do_gpr_st(s, tcg_rt, tcg_addr, size); + } else { + do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended); + } + } +} + +/* Load/store register (all forms) */ +static void disas_ldst_reg(DisasContext *s, uint32_t insn) +{ + switch (extract32(insn, 24, 2)) { + case 0: + if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) { + disas_ldst_reg_roffset(s, insn); + } else { + /* Load/store register (unscaled immediate) + * Load/store immediate pre/post-indexed + * Load/store register unprivileged + */ + disas_ldst_reg_imm9(s, insn); + } + break; + case 1: + disas_ldst_reg_unsigned_imm(s, insn); + break; + default: + unallocated_encoding(s); + break; + } +} + +/* C3.3.1 AdvSIMD load/store multiple structures + * + * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 + * +---+---+---------------+---+-------------+--------+------+------+------+ + * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt | + * +---+---+---------------+---+-------------+--------+------+------+------+ + * + * C3.3.2 AdvSIMD load/store multiple structures (post-indexed) + * + * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0 + * +---+---+---------------+---+---+---------+--------+------+------+------+ + * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt | + * +---+---+---------------+---+---+---------+--------+------+------+------+ + * + * Rt: first (or only) SIMD&FP register to be transferred + * Rn: base address or SP + * Rm (post-index only): post-index register (when !31) or size dependent #imm + */ +static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int size = extract32(insn, 10, 2); + int opcode = extract32(insn, 12, 4); + bool is_store = !extract32(insn, 22, 1); + bool is_postidx = extract32(insn, 23, 1); + bool is_q = extract32(insn, 30, 1); + TCGv_i64 tcg_addr, tcg_rn; + + int ebytes = 1 << size; + int elements = (is_q ? 128 : 64) / (8 << size); + int rpt; /* num iterations */ + int selem; /* structure elements */ + int r; + + if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { + unallocated_encoding(s); + return; + } + + /* From the shared decode logic */ + switch (opcode) { + case 0x0: + rpt = 1; + selem = 4; + break; + case 0x2: + rpt = 4; + selem = 1; + break; + case 0x4: + rpt = 1; + selem = 3; + break; + case 0x6: + rpt = 3; + selem = 1; + break; + case 0x7: + rpt = 1; + selem = 1; + break; + case 0x8: + rpt = 1; + selem = 2; + break; + case 0xa: + rpt = 2; + selem = 1; + break; + default: + unallocated_encoding(s); + return; + } + + if (size == 3 && !is_q && selem != 1) { + /* reserved */ + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + tcg_rn = cpu_reg_sp(s, rn); + tcg_addr = tcg_temp_new_i64(); + tcg_gen_mov_i64(tcg_addr, tcg_rn); + + for (r = 0; r < rpt; r++) { + int e; + for (e = 0; e < elements; e++) { + int tt = (rt + r) % 32; + int xs; + for (xs = 0; xs < selem; xs++) { + if (is_store) { + do_vec_st(s, tt, e, tcg_addr, size); + } else { + do_vec_ld(s, tt, e, tcg_addr, size); + + /* For non-quad operations, setting a slice of the low + * 64 bits of the register clears the high 64 bits (in + * the ARM ARM pseudocode this is implicit in the fact + * that 'rval' is a 64 bit wide variable). We optimize + * by noticing that we only need to do this the first + * time we touch a register. + */ + if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) { + clear_vec_high(s, tt); + } + } + tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); + tt = (tt + 1) % 32; + } + } + } + + if (is_postidx) { + int rm = extract32(insn, 16, 5); + if (rm == 31) { + tcg_gen_mov_i64(tcg_rn, tcg_addr); + } else { + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); + } + } + tcg_temp_free_i64(tcg_addr); +} + +/* C3.3.3 AdvSIMD load/store single structure + * + * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 + * +---+---+---------------+-----+-----------+-----+---+------+------+------+ + * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt | + * +---+---+---------------+-----+-----------+-----+---+------+------+------+ + * + * C3.3.4 AdvSIMD load/store single structure (post-indexed) + * + * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 + * +---+---+---------------+-----+-----------+-----+---+------+------+------+ + * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt | + * +---+---+---------------+-----+-----------+-----+---+------+------+------+ + * + * Rt: first (or only) SIMD&FP register to be transferred + * Rn: base address or SP + * Rm (post-index only): post-index register (when !31) or size dependent #imm + * index = encoded in Q:S:size dependent on size + * + * lane_size = encoded in R, opc + * transfer width = encoded in opc, S, size + */ +static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int size = extract32(insn, 10, 2); + int S = extract32(insn, 12, 1); + int opc = extract32(insn, 13, 3); + int R = extract32(insn, 21, 1); + int is_load = extract32(insn, 22, 1); + int is_postidx = extract32(insn, 23, 1); + int is_q = extract32(insn, 30, 1); + + int scale = extract32(opc, 1, 2); + int selem = (extract32(opc, 0, 1) << 1 | R) + 1; + bool replicate = false; + int index = is_q << 3 | S << 2 | size; + int ebytes, xs; + TCGv_i64 tcg_addr, tcg_rn; + + switch (scale) { + case 3: + if (!is_load || S) { + unallocated_encoding(s); + return; + } + scale = size; + replicate = true; + break; + case 0: + break; + case 1: + if (extract32(size, 0, 1)) { + unallocated_encoding(s); + return; + } + index >>= 1; + break; + case 2: + if (extract32(size, 1, 1)) { + unallocated_encoding(s); + return; + } + if (!extract32(size, 0, 1)) { + index >>= 2; + } else { + if (S) { + unallocated_encoding(s); + return; + } + index >>= 3; + scale = 3; + } + break; + default: + g_assert_not_reached(); + } + + if (!fp_access_check(s)) { + return; + } + + ebytes = 1 << scale; + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + tcg_rn = cpu_reg_sp(s, rn); + tcg_addr = tcg_temp_new_i64(); + tcg_gen_mov_i64(tcg_addr, tcg_rn); + + for (xs = 0; xs < selem; xs++) { + if (replicate) { + /* Load and replicate to all elements */ + uint64_t mulconst; + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, + get_mem_index(s), MO_TE + scale); + switch (scale) { + case 0: + mulconst = 0x0101010101010101ULL; + break; + case 1: + mulconst = 0x0001000100010001ULL; + break; + case 2: + mulconst = 0x0000000100000001ULL; + break; + case 3: + mulconst = 0; + break; + default: + g_assert_not_reached(); + } + if (mulconst) { + tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst); + } + write_vec_element(s, tcg_tmp, rt, 0, MO_64); + if (is_q) { + write_vec_element(s, tcg_tmp, rt, 1, MO_64); + } else { + clear_vec_high(s, rt); + } + tcg_temp_free_i64(tcg_tmp); + } else { + /* Load/store one element per register */ + if (is_load) { + do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale); + } else { + do_vec_st(s, rt, index, tcg_addr, MO_TE + scale); + } + } + tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes); + rt = (rt + 1) % 32; + } + + if (is_postidx) { + int rm = extract32(insn, 16, 5); + if (rm == 31) { + tcg_gen_mov_i64(tcg_rn, tcg_addr); + } else { + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); + } + } + tcg_temp_free_i64(tcg_addr); +} + +/* C3.3 Loads and stores */ +static void disas_ldst(DisasContext *s, uint32_t insn) +{ + switch (extract32(insn, 24, 6)) { + case 0x08: /* Load/store exclusive */ + disas_ldst_excl(s, insn); + break; + case 0x18: case 0x1c: /* Load register (literal) */ + disas_ld_lit(s, insn); + break; + case 0x28: case 0x29: + case 0x2c: case 0x2d: /* Load/store pair (all forms) */ + disas_ldst_pair(s, insn); + break; + case 0x38: case 0x39: + case 0x3c: case 0x3d: /* Load/store register (all forms) */ + disas_ldst_reg(s, insn); + break; + case 0x0c: /* AdvSIMD load/store multiple structures */ + disas_ldst_multiple_struct(s, insn); + break; + case 0x0d: /* AdvSIMD load/store single structure */ + disas_ldst_single_struct(s, insn); + break; + default: + unallocated_encoding(s); + break; + } +} + +/* C3.4.6 PC-rel. addressing + * 31 30 29 28 24 23 5 4 0 + * +----+-------+-----------+-------------------+------+ + * | op | immlo | 1 0 0 0 0 | immhi | Rd | + * +----+-------+-----------+-------------------+------+ + */ +static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) +{ + unsigned int page, rd; + uint64_t base; + uint64_t offset; + + page = extract32(insn, 31, 1); + /* SignExtend(immhi:immlo) -> offset */ + offset = sextract64(insn, 5, 19); + offset = offset << 2 | extract32(insn, 29, 2); + rd = extract32(insn, 0, 5); + base = s->pc - 4; + + if (page) { + /* ADRP (page based) */ + base &= ~0xfff; + offset <<= 12; + } + + tcg_gen_movi_i64(cpu_reg(s, rd), base + offset); +} + +/* + * C3.4.1 Add/subtract (immediate) + * + * 31 30 29 28 24 23 22 21 10 9 5 4 0 + * +--+--+--+-----------+-----+-------------+-----+-----+ + * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd | + * +--+--+--+-----------+-----+-------------+-----+-----+ + * + * sf: 0 -> 32bit, 1 -> 64bit + * op: 0 -> add , 1 -> sub + * S: 1 -> set flags + * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12 + */ +static void disas_add_sub_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + uint64_t imm = extract32(insn, 10, 12); + int shift = extract32(insn, 22, 2); + bool setflags = extract32(insn, 29, 1); + bool sub_op = extract32(insn, 30, 1); + bool is_64bit = extract32(insn, 31, 1); + + TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); + TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); + TCGv_i64 tcg_result; + + switch (shift) { + case 0x0: + break; + case 0x1: + imm <<= 12; + break; + default: + unallocated_encoding(s); + return; + } + + tcg_result = tcg_temp_new_i64(); + if (!setflags) { + if (sub_op) { + tcg_gen_subi_i64(tcg_result, tcg_rn, imm); + } else { + tcg_gen_addi_i64(tcg_result, tcg_rn, imm); + } + } else { + TCGv_i64 tcg_imm = tcg_const_i64(imm); + if (sub_op) { + gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); + } else { + gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); + } + tcg_temp_free_i64(tcg_imm); + } + + if (is_64bit) { + tcg_gen_mov_i64(tcg_rd, tcg_result); + } else { + tcg_gen_ext32u_i64(tcg_rd, tcg_result); + } + + tcg_temp_free_i64(tcg_result); +} + +/* The input should be a value in the bottom e bits (with higher + * bits zero); returns that value replicated into every element + * of size e in a 64 bit integer. + */ +static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) +{ + assert(e != 0); + while (e < 64) { + mask |= mask << e; + e *= 2; + } + return mask; +} + +/* Return a value with the bottom len bits set (where 0 < len <= 64) */ +static inline uint64_t bitmask64(unsigned int length) +{ + assert(length > 0 && length <= 64); + return ~0ULL >> (64 - length); +} + +/* Simplified variant of pseudocode DecodeBitMasks() for the case where we + * only require the wmask. Returns false if the imms/immr/immn are a reserved + * value (ie should cause a guest UNDEF exception), and true if they are + * valid, in which case the decoded bit pattern is written to result. + */ +static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, + unsigned int imms, unsigned int immr) +{ + uint64_t mask; + unsigned e, levels, s, r; + int len; + + assert(immn < 2 && imms < 64 && immr < 64); + + /* The bit patterns we create here are 64 bit patterns which + * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or + * 64 bits each. Each element contains the same value: a run + * of between 1 and e-1 non-zero bits, rotated within the + * element by between 0 and e-1 bits. + * + * The element size and run length are encoded into immn (1 bit) + * and imms (6 bits) as follows: + * 64 bit elements: immn = 1, imms = + * 32 bit elements: immn = 0, imms = 0 : + * 16 bit elements: immn = 0, imms = 10 : + * 8 bit elements: immn = 0, imms = 110 : + * 4 bit elements: immn = 0, imms = 1110 : + * 2 bit elements: immn = 0, imms = 11110 : + * Notice that immn = 0, imms = 11111x is the only combination + * not covered by one of the above options; this is reserved. + * Further, all-ones is a reserved pattern. + * + * In all cases the rotation is by immr % e (and immr is 6 bits). + */ + + /* First determine the element size */ + len = 31 - clz32((immn << 6) | (~imms & 0x3f)); + if (len < 1) { + /* This is the immn == 0, imms == 0x11111x case */ + return false; + } + e = 1 << len; + + levels = e - 1; + s = imms & levels; + r = immr & levels; + + if (s == levels) { + /* mustn't be all-ones. */ + return false; + } + + /* Create the value of one element: s+1 set bits rotated + * by r within the element (which is e bits wide)... + */ + mask = bitmask64(s + 1); + if (r) { + mask = (mask >> r) | (mask << (e - r)); + mask &= bitmask64(e); + } + /* ...then replicate the element over the whole 64 bit value */ + mask = bitfield_replicate(mask, e); + *result = mask; + return true; +} + +/* C3.4.4 Logical (immediate) + * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 + * +----+-----+-------------+---+------+------+------+------+ + * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd | + * +----+-----+-------------+---+------+------+------+------+ + */ +static void disas_logic_imm(DisasContext *s, uint32_t insn) +{ + unsigned int sf, opc, is_n, immr, imms, rn, rd; + TCGv_i64 tcg_rd, tcg_rn; + uint64_t wmask; + bool is_and = false; + + sf = extract32(insn, 31, 1); + opc = extract32(insn, 29, 2); + is_n = extract32(insn, 22, 1); + immr = extract32(insn, 16, 6); + imms = extract32(insn, 10, 6); + rn = extract32(insn, 5, 5); + rd = extract32(insn, 0, 5); + + if (!sf && is_n) { + unallocated_encoding(s); + return; + } + + if (opc == 0x3) { /* ANDS */ + tcg_rd = cpu_reg(s, rd); + } else { + tcg_rd = cpu_reg_sp(s, rd); + } + tcg_rn = cpu_reg(s, rn); + + if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) { + /* some immediate field values are reserved */ + unallocated_encoding(s); + return; + } + + if (!sf) { + wmask &= 0xffffffff; + } + + switch (opc) { + case 0x3: /* ANDS */ + case 0x0: /* AND */ + tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask); + is_and = true; + break; + case 0x1: /* ORR */ + tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask); + break; + case 0x2: /* EOR */ + tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask); + break; + default: + assert(FALSE); /* must handle all above */ + break; + } + + if (!sf && !is_and) { + /* zero extend final result; we know we can skip this for AND + * since the immediate had the high 32 bits clear. + */ + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + + if (opc == 3) { /* ANDS */ + gen_logic_CC(sf, tcg_rd); + } +} + +/* + * C3.4.5 Move wide (immediate) + * + * 31 30 29 28 23 22 21 20 5 4 0 + * +--+-----+-------------+-----+----------------+------+ + * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd | + * +--+-----+-------------+-----+----------------+------+ + * + * sf: 0 -> 32 bit, 1 -> 64 bit + * opc: 00 -> N, 10 -> Z, 11 -> K + * hw: shift/16 (0,16, and sf only 32, 48) + */ +static void disas_movw_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + uint64_t imm = extract32(insn, 5, 16); + int sf = extract32(insn, 31, 1); + int opc = extract32(insn, 29, 2); + int pos = extract32(insn, 21, 2) << 4; + TCGv_i64 tcg_rd = cpu_reg(s, rd); + TCGv_i64 tcg_imm; + + if (!sf && (pos >= 32)) { + unallocated_encoding(s); + return; + } + + switch (opc) { + case 0: /* MOVN */ + case 2: /* MOVZ */ + imm <<= pos; + if (opc == 0) { + imm = ~imm; + } + if (!sf) { + imm &= 0xffffffffu; + } + tcg_gen_movi_i64(tcg_rd, imm); + break; + case 3: /* MOVK */ + tcg_imm = tcg_const_i64(imm); + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16); + tcg_temp_free_i64(tcg_imm); + if (!sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + break; + default: + unallocated_encoding(s); + break; + } +} + +/* C3.4.2 Bitfield + * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 + * +----+-----+-------------+---+------+------+------+------+ + * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd | + * +----+-----+-------------+---+------+------+------+------+ + */ +static void disas_bitfield(DisasContext *s, uint32_t insn) +{ + unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len; + TCGv_i64 tcg_rd, tcg_tmp; + + sf = extract32(insn, 31, 1); + opc = extract32(insn, 29, 2); + n = extract32(insn, 22, 1); + ri = extract32(insn, 16, 6); + si = extract32(insn, 10, 6); + rn = extract32(insn, 5, 5); + rd = extract32(insn, 0, 5); + bitsize = sf ? 64 : 32; + + if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) { + unallocated_encoding(s); + return; + } + + tcg_rd = cpu_reg(s, rd); + tcg_tmp = read_cpu_reg(s, rn, sf); + + /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */ + + if (opc != 1) { /* SBFM or UBFM */ + tcg_gen_movi_i64(tcg_rd, 0); + } + + /* do the bit move operation */ + if (si >= ri) { + /* Wd = Wn */ + tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); + pos = 0; + len = (si - ri) + 1; + } else { + /* Wd<32+s-r,32-r> = Wn */ + pos = bitsize - ri; + len = si + 1; + } + + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); + + if (opc == 0) { /* SBFM - sign extend the destination field */ + tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len)); + tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len)); + } + + if (!sf) { /* zero extend final result */ + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } +} + +/* C3.4.3 Extract + * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0 + * +----+------+-------------+---+----+------+--------+------+------+ + * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd | + * +----+------+-------------+---+----+------+--------+------+------+ + */ +static void disas_extract(DisasContext *s, uint32_t insn) +{ + unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0; + + sf = extract32(insn, 31, 1); + n = extract32(insn, 22, 1); + rm = extract32(insn, 16, 5); + imm = extract32(insn, 10, 6); + rn = extract32(insn, 5, 5); + rd = extract32(insn, 0, 5); + op21 = extract32(insn, 29, 2); + op0 = extract32(insn, 21, 1); + bitsize = sf ? 64 : 32; + + if (sf != n || op21 || op0 || imm >= bitsize) { + unallocated_encoding(s); + } else { + TCGv_i64 tcg_rd, tcg_rm, tcg_rn; + + tcg_rd = cpu_reg(s, rd); + + if (imm) { + /* OPTME: we can special case rm==rn as a rotate */ + tcg_rm = read_cpu_reg(s, rm, sf); + tcg_rn = read_cpu_reg(s, rn, sf); + tcg_gen_shri_i64(tcg_rm, tcg_rm, imm); + tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm); + tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn); + if (!sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + } else { + /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, + * so an extract from bit 0 is a special case. + */ + if (sf) { + tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm)); + } else { + tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm)); + } + } + + } +} + +/* C3.4 Data processing - immediate */ +static void disas_data_proc_imm(DisasContext *s, uint32_t insn) +{ + switch (extract32(insn, 23, 6)) { + case 0x20: case 0x21: /* PC-rel. addressing */ + disas_pc_rel_adr(s, insn); + break; + case 0x22: case 0x23: /* Add/subtract (immediate) */ + disas_add_sub_imm(s, insn); + break; + case 0x24: /* Logical (immediate) */ + disas_logic_imm(s, insn); + break; + case 0x25: /* Move wide (immediate) */ + disas_movw_imm(s, insn); + break; + case 0x26: /* Bitfield */ + disas_bitfield(s, insn); + break; + case 0x27: /* Extract */ + disas_extract(s, insn); + break; + default: + unallocated_encoding(s); + break; + } +} + +/* Shift a TCGv src by TCGv shift_amount, put result in dst. + * Note that it is the caller's responsibility to ensure that the + * shift amount is in range (ie 0..31 or 0..63) and provide the ARM + * mandated semantics for out of range shifts. + */ +static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, + enum a64_shift_type shift_type, TCGv_i64 shift_amount) +{ + switch (shift_type) { + case A64_SHIFT_TYPE_LSL: + tcg_gen_shl_i64(dst, src, shift_amount); + break; + case A64_SHIFT_TYPE_LSR: + tcg_gen_shr_i64(dst, src, shift_amount); + break; + case A64_SHIFT_TYPE_ASR: + if (!sf) { + tcg_gen_ext32s_i64(dst, src); + } + tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); + break; + case A64_SHIFT_TYPE_ROR: + if (sf) { + tcg_gen_rotr_i64(dst, src, shift_amount); + } else { + TCGv_i32 t0, t1; + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(t0, src); + tcg_gen_trunc_i64_i32(t1, shift_amount); + tcg_gen_rotr_i32(t0, t0, t1); + tcg_gen_extu_i32_i64(dst, t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + } + break; + default: + assert(FALSE); /* all shift types should be handled */ + break; + } + + if (!sf) { /* zero extend final result */ + tcg_gen_ext32u_i64(dst, dst); + } +} + +/* Shift a TCGv src by immediate, put result in dst. + * The shift amount must be in range (this should always be true as the + * relevant instructions will UNDEF on bad shift immediates). + */ +static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, + enum a64_shift_type shift_type, unsigned int shift_i) +{ + assert(shift_i < (sf ? 64 : 32)); + + if (shift_i == 0) { + tcg_gen_mov_i64(dst, src); + } else { + TCGv_i64 shift_const; + + shift_const = tcg_const_i64(shift_i); + shift_reg(dst, src, sf, shift_type, shift_const); + tcg_temp_free_i64(shift_const); + } +} + +/* C3.5.10 Logical (shifted register) + * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 + * +----+-----+-----------+-------+---+------+--------+------+------+ + * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | + * +----+-----+-----------+-------+---+------+--------+------+------+ + */ +static void disas_logic_reg(DisasContext *s, uint32_t insn) +{ + TCGv_i64 tcg_rd, tcg_rn, tcg_rm; + unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; + + sf = extract32(insn, 31, 1); + opc = extract32(insn, 29, 2); + shift_type = extract32(insn, 22, 2); + invert = extract32(insn, 21, 1); + rm = extract32(insn, 16, 5); + shift_amount = extract32(insn, 10, 6); + rn = extract32(insn, 5, 5); + rd = extract32(insn, 0, 5); + + if (!sf && (shift_amount & (1 << 5))) { + unallocated_encoding(s); + return; + } + + tcg_rd = cpu_reg(s, rd); + + if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { + /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for + * register-register MOV and MVN, so it is worth special casing. + */ + tcg_rm = cpu_reg(s, rm); + if (invert) { + tcg_gen_not_i64(tcg_rd, tcg_rm); + if (!sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + } else { + if (sf) { + tcg_gen_mov_i64(tcg_rd, tcg_rm); + } else { + tcg_gen_ext32u_i64(tcg_rd, tcg_rm); + } + } + return; + } + + tcg_rm = read_cpu_reg(s, rm, sf); + + if (shift_amount) { + shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); + } + + tcg_rn = cpu_reg(s, rn); + + switch (opc | (invert << 2)) { + case 0: /* AND */ + case 3: /* ANDS */ + tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); + break; + case 1: /* ORR */ + tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); + break; + case 2: /* EOR */ + tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); + break; + case 4: /* BIC */ + case 7: /* BICS */ + tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); + break; + case 5: /* ORN */ + tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); + break; + case 6: /* EON */ + tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); + break; + default: + assert(FALSE); + break; + } + + if (!sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + + if (opc == 3) { + gen_logic_CC(sf, tcg_rd); + } +} + +/* + * C3.5.1 Add/subtract (extended register) + * + * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| + * +--+--+--+-----------+-----+--+-------+------+------+----+----+ + * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | + * +--+--+--+-----------+-----+--+-------+------+------+----+----+ + * + * sf: 0 -> 32bit, 1 -> 64bit + * op: 0 -> add , 1 -> sub + * S: 1 -> set flags + * opt: 00 + * option: extension type (see DecodeRegExtend) + * imm3: optional shift to Rm + * + * Rd = Rn + LSL(extend(Rm), amount) + */ +static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int imm3 = extract32(insn, 10, 3); + int option = extract32(insn, 13, 3); + int rm = extract32(insn, 16, 5); + bool setflags = extract32(insn, 29, 1); + bool sub_op = extract32(insn, 30, 1); + bool sf = extract32(insn, 31, 1); + + TCGv_i64 tcg_rm, tcg_rn; /* temps */ + TCGv_i64 tcg_rd; + TCGv_i64 tcg_result; + + if (imm3 > 4) { + unallocated_encoding(s); + return; + } + + /* non-flag setting ops may use SP */ + if (!setflags) { + tcg_rd = cpu_reg_sp(s, rd); + } else { + tcg_rd = cpu_reg(s, rd); + } + tcg_rn = read_cpu_reg_sp(s, rn, sf); + + tcg_rm = read_cpu_reg(s, rm, sf); + ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); + + tcg_result = tcg_temp_new_i64(); + + if (!setflags) { + if (sub_op) { + tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); + } else { + tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); + } + } else { + if (sub_op) { + gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); + } else { + gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); + } + } + + if (sf) { + tcg_gen_mov_i64(tcg_rd, tcg_result); + } else { + tcg_gen_ext32u_i64(tcg_rd, tcg_result); + } + + tcg_temp_free_i64(tcg_result); +} + +/* + * C3.5.2 Add/subtract (shifted register) + * + * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 + * +--+--+--+-----------+-----+--+-------+---------+------+------+ + * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | + * +--+--+--+-----------+-----+--+-------+---------+------+------+ + * + * sf: 0 -> 32bit, 1 -> 64bit + * op: 0 -> add , 1 -> sub + * S: 1 -> set flags + * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED + * imm6: Shift amount to apply to Rm before the add/sub + */ +static void disas_add_sub_reg(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int imm6 = extract32(insn, 10, 6); + int rm = extract32(insn, 16, 5); + int shift_type = extract32(insn, 22, 2); + bool setflags = extract32(insn, 29, 1); + bool sub_op = extract32(insn, 30, 1); + bool sf = extract32(insn, 31, 1); + + TCGv_i64 tcg_rd = cpu_reg(s, rd); + TCGv_i64 tcg_rn, tcg_rm; + TCGv_i64 tcg_result; + + if ((shift_type == 3) || (!sf && (imm6 > 31))) { + unallocated_encoding(s); + return; + } + + tcg_rn = read_cpu_reg(s, rn, sf); + tcg_rm = read_cpu_reg(s, rm, sf); + + shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); + + tcg_result = tcg_temp_new_i64(); + + if (!setflags) { + if (sub_op) { + tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); + } else { + tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); + } + } else { + if (sub_op) { + gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); + } else { + gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); + } + } + + if (sf) { + tcg_gen_mov_i64(tcg_rd, tcg_result); + } else { + tcg_gen_ext32u_i64(tcg_rd, tcg_result); + } + + tcg_temp_free_i64(tcg_result); +} + +/* C3.5.9 Data-processing (3 source) + + 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 + +--+------+-----------+------+------+----+------+------+------+ + |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | + +--+------+-----------+------+------+----+------+------+------+ + + */ +static void disas_data_proc_3src(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int ra = extract32(insn, 10, 5); + int rm = extract32(insn, 16, 5); + int op_id = (extract32(insn, 29, 3) << 4) | + (extract32(insn, 21, 3) << 1) | + extract32(insn, 15, 1); + bool sf = extract32(insn, 31, 1); + bool is_sub = extract32(op_id, 0, 1); + bool is_high = extract32(op_id, 2, 1); + bool is_signed = false; + TCGv_i64 tcg_op1; + TCGv_i64 tcg_op2; + TCGv_i64 tcg_tmp; + + /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ + switch (op_id) { + case 0x42: /* SMADDL */ + case 0x43: /* SMSUBL */ + case 0x44: /* SMULH */ + is_signed = true; + break; + case 0x0: /* MADD (32bit) */ + case 0x1: /* MSUB (32bit) */ + case 0x40: /* MADD (64bit) */ + case 0x41: /* MSUB (64bit) */ + case 0x4a: /* UMADDL */ + case 0x4b: /* UMSUBL */ + case 0x4c: /* UMULH */ + break; + default: + unallocated_encoding(s); + return; + } + + if (is_high) { + TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ + TCGv_i64 tcg_rd = cpu_reg(s, rd); + TCGv_i64 tcg_rn = cpu_reg(s, rn); + TCGv_i64 tcg_rm = cpu_reg(s, rm); + + if (is_signed) { + tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); + } else { + tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); + } + + tcg_temp_free_i64(low_bits); + return; + } + + tcg_op1 = tcg_temp_new_i64(); + tcg_op2 = tcg_temp_new_i64(); + tcg_tmp = tcg_temp_new_i64(); + + if (op_id < 0x42) { + tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); + tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); + } else { + if (is_signed) { + tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); + tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); + } else { + tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); + tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); + } + } + + if (ra == 31 && !is_sub) { + /* Special-case MADD with rA == XZR; it is the standard MUL alias */ + tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); + } else { + tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); + if (is_sub) { + tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); + } else { + tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); + } + } + + if (!sf) { + tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); + } + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_tmp); +} + +/* C3.5.3 - Add/subtract (with carry) + * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 + * +--+--+--+------------------------+------+---------+------+-----+ + * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | opcode2 | Rn | Rd | + * +--+--+--+------------------------+------+---------+------+-----+ + * [000000] + */ + +static void disas_adc_sbc(DisasContext *s, uint32_t insn) +{ + unsigned int sf, op, setflags, rm, rn, rd; + TCGv_i64 tcg_y, tcg_rn, tcg_rd; + + if (extract32(insn, 10, 6) != 0) { + unallocated_encoding(s); + return; + } + + sf = extract32(insn, 31, 1); + op = extract32(insn, 30, 1); + setflags = extract32(insn, 29, 1); + rm = extract32(insn, 16, 5); + rn = extract32(insn, 5, 5); + rd = extract32(insn, 0, 5); + + tcg_rd = cpu_reg(s, rd); + tcg_rn = cpu_reg(s, rn); + + if (op) { + tcg_y = new_tmp_a64(s); + tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); + } else { + tcg_y = cpu_reg(s, rm); + } + + if (setflags) { + gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); + } else { + gen_adc(sf, tcg_rd, tcg_rn, tcg_y); + } +} + +/* C3.5.4 - C3.5.5 Conditional compare (immediate / register) + * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 + * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ + * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | + * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ + * [1] y [0] [0] + */ +static void disas_cc(DisasContext *s, uint32_t insn) +{ + unsigned int sf, op, y, cond, rn, nzcv, is_imm; + TCGLabel *label_continue = NULL; + TCGv_i64 tcg_tmp, tcg_y, tcg_rn; + + if (!extract32(insn, 29, 1)) { + unallocated_encoding(s); + return; + } + if (insn & (1 << 10 | 1 << 4)) { + unallocated_encoding(s); + return; + } + sf = extract32(insn, 31, 1); + op = extract32(insn, 30, 1); + is_imm = extract32(insn, 11, 1); + y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ + cond = extract32(insn, 12, 4); + rn = extract32(insn, 5, 5); + nzcv = extract32(insn, 0, 4); + + if (cond < 0x0e) { /* not always */ + TCGLabel *label_match = gen_new_label(); + label_continue = gen_new_label(); + arm_gen_test_cc(cond, label_match); + /* nomatch: */ + tcg_tmp = tcg_temp_new_i64(); + tcg_gen_movi_i64(tcg_tmp, nzcv << 28); + gen_set_nzcv(tcg_tmp); + tcg_temp_free_i64(tcg_tmp); + tcg_gen_br(label_continue); + gen_set_label(label_match); + } + /* match, or condition is always */ + if (is_imm) { + tcg_y = new_tmp_a64(s); + tcg_gen_movi_i64(tcg_y, y); + } else { + tcg_y = cpu_reg(s, y); + } + tcg_rn = cpu_reg(s, rn); + + tcg_tmp = tcg_temp_new_i64(); + if (op) { + gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); + } else { + gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); + } + tcg_temp_free_i64(tcg_tmp); + + if (cond < 0x0e) { /* continue */ + gen_set_label(label_continue); + } +} + +/* C3.5.6 Conditional select + * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 + * +----+----+---+-----------------+------+------+-----+------+------+ + * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | + * +----+----+---+-----------------+------+------+-----+------+------+ + */ +static void disas_cond_select(DisasContext *s, uint32_t insn) +{ + unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; + TCGv_i64 tcg_rd, tcg_src; + + if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { + /* S == 1 or op2<1> == 1 */ + unallocated_encoding(s); + return; + } + sf = extract32(insn, 31, 1); + else_inv = extract32(insn, 30, 1); + rm = extract32(insn, 16, 5); + cond = extract32(insn, 12, 4); + else_inc = extract32(insn, 10, 1); + rn = extract32(insn, 5, 5); + rd = extract32(insn, 0, 5); + + if (rd == 31) { + /* silly no-op write; until we use movcond we must special-case + * this to avoid a dead temporary across basic blocks. + */ + return; + } + + tcg_rd = cpu_reg(s, rd); + + if (cond >= 0x0e) { /* condition "always" */ + tcg_src = read_cpu_reg(s, rn, sf); + tcg_gen_mov_i64(tcg_rd, tcg_src); + } else { + /* OPTME: we could use movcond here, at the cost of duplicating + * a lot of the arm_gen_test_cc() logic. + */ + TCGLabel *label_match = gen_new_label(); + TCGLabel *label_continue = gen_new_label(); + + arm_gen_test_cc(cond, label_match); + /* nomatch: */ + tcg_src = cpu_reg(s, rm); + + if (else_inv && else_inc) { + tcg_gen_neg_i64(tcg_rd, tcg_src); + } else if (else_inv) { + tcg_gen_not_i64(tcg_rd, tcg_src); + } else if (else_inc) { + tcg_gen_addi_i64(tcg_rd, tcg_src, 1); + } else { + tcg_gen_mov_i64(tcg_rd, tcg_src); + } + if (!sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + tcg_gen_br(label_continue); + /* match: */ + gen_set_label(label_match); + tcg_src = read_cpu_reg(s, rn, sf); + tcg_gen_mov_i64(tcg_rd, tcg_src); + /* continue: */ + gen_set_label(label_continue); + } +} + +static void handle_clz(DisasContext *s, unsigned int sf, + unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_rd, tcg_rn; + tcg_rd = cpu_reg(s, rd); + tcg_rn = cpu_reg(s, rn); + + if (sf) { + gen_helper_clz64(tcg_rd, tcg_rn); + } else { + TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn); + gen_helper_clz(tcg_tmp32, tcg_tmp32); + tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); + tcg_temp_free_i32(tcg_tmp32); + } +} + +static void handle_cls(DisasContext *s, unsigned int sf, + unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_rd, tcg_rn; + tcg_rd = cpu_reg(s, rd); + tcg_rn = cpu_reg(s, rn); + + if (sf) { + gen_helper_cls64(tcg_rd, tcg_rn); + } else { + TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn); + gen_helper_cls32(tcg_tmp32, tcg_tmp32); + tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); + tcg_temp_free_i32(tcg_tmp32); + } +} + +static void handle_rbit(DisasContext *s, unsigned int sf, + unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_rd, tcg_rn; + tcg_rd = cpu_reg(s, rd); + tcg_rn = cpu_reg(s, rn); + + if (sf) { + gen_helper_rbit64(tcg_rd, tcg_rn); + } else { + TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn); + gen_helper_rbit(tcg_tmp32, tcg_tmp32); + tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); + tcg_temp_free_i32(tcg_tmp32); + } +} + +/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */ +static void handle_rev64(DisasContext *s, unsigned int sf, + unsigned int rn, unsigned int rd) +{ + if (!sf) { + unallocated_encoding(s); + return; + } + tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); +} + +/* C5.6.149 REV with sf==0, opcode==2 + * C5.6.151 REV32 (sf==1, opcode==2) + */ +static void handle_rev32(DisasContext *s, unsigned int sf, + unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_rd = cpu_reg(s, rd); + + if (sf) { + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); + + /* bswap32_i64 requires zero high word */ + tcg_gen_ext32u_i64(tcg_tmp, tcg_rn); + tcg_gen_bswap32_i64(tcg_rd, tcg_tmp); + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32); + tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp); + tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp); + + tcg_temp_free_i64(tcg_tmp); + } else { + tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn)); + tcg_gen_bswap32_i64(tcg_rd, tcg_rd); + } +} + +/* C5.6.150 REV16 (opcode==1) */ +static void handle_rev16(DisasContext *s, unsigned int sf, + unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_rd = cpu_reg(s, rd); + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); + + tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff); + tcg_gen_bswap16_i64(tcg_rd, tcg_tmp); + + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16); + tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff); + tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16); + + if (sf) { + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32); + tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff); + tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16); + + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48); + tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16); + } + + tcg_temp_free_i64(tcg_tmp); +} + +/* C3.5.7 Data-processing (1 source) + * 31 30 29 28 21 20 16 15 10 9 5 4 0 + * +----+---+---+-----------------+---------+--------+------+------+ + * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | + * +----+---+---+-----------------+---------+--------+------+------+ + */ +static void disas_data_proc_1src(DisasContext *s, uint32_t insn) +{ + unsigned int sf, opcode, rn, rd; + + if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) { + unallocated_encoding(s); + return; + } + + sf = extract32(insn, 31, 1); + opcode = extract32(insn, 10, 6); + rn = extract32(insn, 5, 5); + rd = extract32(insn, 0, 5); + + switch (opcode) { + case 0: /* RBIT */ + handle_rbit(s, sf, rn, rd); + break; + case 1: /* REV16 */ + handle_rev16(s, sf, rn, rd); + break; + case 2: /* REV32 */ + handle_rev32(s, sf, rn, rd); + break; + case 3: /* REV64 */ + handle_rev64(s, sf, rn, rd); + break; + case 4: /* CLZ */ + handle_clz(s, sf, rn, rd); + break; + case 5: /* CLS */ + handle_cls(s, sf, rn, rd); + break; + } +} + +static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, + unsigned int rm, unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_n, tcg_m, tcg_rd; + tcg_rd = cpu_reg(s, rd); + + if (!sf && is_signed) { + tcg_n = new_tmp_a64(s); + tcg_m = new_tmp_a64(s); + tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); + tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); + } else { + tcg_n = read_cpu_reg(s, rn, sf); + tcg_m = read_cpu_reg(s, rm, sf); + } + + if (is_signed) { + gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); + } else { + gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); + } + + if (!sf) { /* zero extend final result */ + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } +} + +/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */ +static void handle_shift_reg(DisasContext *s, + enum a64_shift_type shift_type, unsigned int sf, + unsigned int rm, unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_shift = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = cpu_reg(s, rd); + TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); + + tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); + shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); + tcg_temp_free_i64(tcg_shift); +} + +/* CRC32[BHWX], CRC32C[BHWX] */ +static void handle_crc32(DisasContext *s, + unsigned int sf, unsigned int sz, bool crc32c, + unsigned int rm, unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_acc, tcg_val; + TCGv_i32 tcg_bytes; + + if (!arm_dc_feature(s, ARM_FEATURE_CRC) + || (sf == 1 && sz != 3) + || (sf == 0 && sz == 3)) { + unallocated_encoding(s); + return; + } + + if (sz == 3) { + tcg_val = cpu_reg(s, rm); + } else { + uint64_t mask; + switch (sz) { + case 0: + mask = 0xFF; + break; + case 1: + mask = 0xFFFF; + break; + case 2: + mask = 0xFFFFFFFF; + break; + default: + g_assert_not_reached(); + } + tcg_val = new_tmp_a64(s); + tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); + } + + tcg_acc = cpu_reg(s, rn); + tcg_bytes = tcg_const_i32(1 << sz); + + if (crc32c) { + gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + } else { + gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + } + + tcg_temp_free_i32(tcg_bytes); +} + +/* C3.5.8 Data-processing (2 source) + * 31 30 29 28 21 20 16 15 10 9 5 4 0 + * +----+---+---+-----------------+------+--------+------+------+ + * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | + * +----+---+---+-----------------+------+--------+------+------+ + */ +static void disas_data_proc_2src(DisasContext *s, uint32_t insn) +{ + unsigned int sf, rm, opcode, rn, rd; + sf = extract32(insn, 31, 1); + rm = extract32(insn, 16, 5); + opcode = extract32(insn, 10, 6); + rn = extract32(insn, 5, 5); + rd = extract32(insn, 0, 5); + + if (extract32(insn, 29, 1)) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 2: /* UDIV */ + handle_div(s, false, sf, rm, rn, rd); + break; + case 3: /* SDIV */ + handle_div(s, true, sf, rm, rn, rd); + break; + case 8: /* LSLV */ + handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); + break; + case 9: /* LSRV */ + handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); + break; + case 10: /* ASRV */ + handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); + break; + case 11: /* RORV */ + handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); + break; + case 16: + case 17: + case 18: + case 19: + case 20: + case 21: + case 22: + case 23: /* CRC32 */ + { + int sz = extract32(opcode, 0, 2); + bool crc32c = extract32(opcode, 2, 1); + handle_crc32(s, sf, sz, crc32c, rm, rn, rd); + break; + } + default: + unallocated_encoding(s); + break; + } +} + +/* C3.5 Data processing - register */ +static void disas_data_proc_reg(DisasContext *s, uint32_t insn) +{ + switch (extract32(insn, 24, 5)) { + case 0x0a: /* Logical (shifted register) */ + disas_logic_reg(s, insn); + break; + case 0x0b: /* Add/subtract */ + if (insn & (1 << 21)) { /* (extended register) */ + disas_add_sub_ext_reg(s, insn); + } else { + disas_add_sub_reg(s, insn); + } + break; + case 0x1b: /* Data-processing (3 source) */ + disas_data_proc_3src(s, insn); + break; + case 0x1a: + switch (extract32(insn, 21, 3)) { + case 0x0: /* Add/subtract (with carry) */ + disas_adc_sbc(s, insn); + break; + case 0x2: /* Conditional compare */ + disas_cc(s, insn); /* both imm and reg forms */ + break; + case 0x4: /* Conditional select */ + disas_cond_select(s, insn); + break; + case 0x6: /* Data-processing */ + if (insn & (1 << 30)) { /* (1 source) */ + disas_data_proc_1src(s, insn); + } else { /* (2 source) */ + disas_data_proc_2src(s, insn); + } + break; + default: + unallocated_encoding(s); + break; + } + break; + default: + unallocated_encoding(s); + break; + } +} + +static void handle_fp_compare(DisasContext *s, bool is_double, + unsigned int rn, unsigned int rm, + bool cmp_with_zero, bool signal_all_nans) +{ + TCGv_i64 tcg_flags = tcg_temp_new_i64(); + TCGv_ptr fpst = get_fpstatus_ptr(); + + if (is_double) { + TCGv_i64 tcg_vn, tcg_vm; + + tcg_vn = read_fp_dreg(s, rn); + if (cmp_with_zero) { + tcg_vm = tcg_const_i64(0); + } else { + tcg_vm = read_fp_dreg(s, rm); + } + if (signal_all_nans) { + gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } else { + gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } + tcg_temp_free_i64(tcg_vn); + tcg_temp_free_i64(tcg_vm); + } else { + TCGv_i32 tcg_vn, tcg_vm; + + tcg_vn = read_fp_sreg(s, rn); + if (cmp_with_zero) { + tcg_vm = tcg_const_i32(0); + } else { + tcg_vm = read_fp_sreg(s, rm); + } + if (signal_all_nans) { + gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } else { + gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); + } + tcg_temp_free_i32(tcg_vn); + tcg_temp_free_i32(tcg_vm); + } + + tcg_temp_free_ptr(fpst); + + gen_set_nzcv(tcg_flags); + + tcg_temp_free_i64(tcg_flags); +} + +/* C3.6.22 Floating point compare + * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 + * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ + * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | + * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ + */ +static void disas_fp_compare(DisasContext *s, uint32_t insn) +{ + unsigned int mos, type, rm, op, rn, opc, op2r; + + mos = extract32(insn, 29, 3); + type = extract32(insn, 22, 2); /* 0 = single, 1 = double */ + rm = extract32(insn, 16, 5); + op = extract32(insn, 14, 2); + rn = extract32(insn, 5, 5); + opc = extract32(insn, 3, 2); + op2r = extract32(insn, 0, 3); + + if (mos || op || op2r || type > 1) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2); +} + +/* C3.6.23 Floating point conditional compare + * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 + * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ + * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | + * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ + */ +static void disas_fp_ccomp(DisasContext *s, uint32_t insn) +{ + unsigned int mos, type, rm, cond, rn, op, nzcv; + TCGv_i64 tcg_flags; + TCGLabel *label_continue = NULL; + + mos = extract32(insn, 29, 3); + type = extract32(insn, 22, 2); /* 0 = single, 1 = double */ + rm = extract32(insn, 16, 5); + cond = extract32(insn, 12, 4); + rn = extract32(insn, 5, 5); + op = extract32(insn, 4, 1); + nzcv = extract32(insn, 0, 4); + + if (mos || type > 1) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (cond < 0x0e) { /* not always */ + TCGLabel *label_match = gen_new_label(); + label_continue = gen_new_label(); + arm_gen_test_cc(cond, label_match); + /* nomatch: */ + tcg_flags = tcg_const_i64(nzcv << 28); + gen_set_nzcv(tcg_flags); + tcg_temp_free_i64(tcg_flags); + tcg_gen_br(label_continue); + gen_set_label(label_match); + } + + handle_fp_compare(s, type, rn, rm, false, op); + + if (cond < 0x0e) { + gen_set_label(label_continue); + } +} + +/* copy src FP register to dst FP register; type specifies single or double */ +static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src) +{ + if (type) { + TCGv_i64 v = read_fp_dreg(s, src); + write_fp_dreg(s, dst, v); + tcg_temp_free_i64(v); + } else { + TCGv_i32 v = read_fp_sreg(s, src); + write_fp_sreg(s, dst, v); + tcg_temp_free_i32(v); + } +} + +/* C3.6.24 Floating point conditional select + * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+---+------+------+-----+------+------+ + * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | + * +---+---+---+-----------+------+---+------+------+-----+------+------+ + */ +static void disas_fp_csel(DisasContext *s, uint32_t insn) +{ + unsigned int mos, type, rm, cond, rn, rd; + TCGLabel *label_continue = NULL; + + mos = extract32(insn, 29, 3); + type = extract32(insn, 22, 2); /* 0 = single, 1 = double */ + rm = extract32(insn, 16, 5); + cond = extract32(insn, 12, 4); + rn = extract32(insn, 5, 5); + rd = extract32(insn, 0, 5); + + if (mos || type > 1) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (cond < 0x0e) { /* not always */ + TCGLabel *label_match = gen_new_label(); + label_continue = gen_new_label(); + arm_gen_test_cc(cond, label_match); + /* nomatch: */ + gen_mov_fp2fp(s, type, rd, rm); + tcg_gen_br(label_continue); + gen_set_label(label_match); + } + + gen_mov_fp2fp(s, type, rd, rn); + + if (cond < 0x0e) { /* continue */ + gen_set_label(label_continue); + } +} + +/* C3.6.25 Floating-point data-processing (1 source) - single precision */ +static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) +{ + TCGv_ptr fpst; + TCGv_i32 tcg_op; + TCGv_i32 tcg_res; + + fpst = get_fpstatus_ptr(); + tcg_op = read_fp_sreg(s, rn); + tcg_res = tcg_temp_new_i32(); + + switch (opcode) { + case 0x0: /* FMOV */ + tcg_gen_mov_i32(tcg_res, tcg_op); + break; + case 0x1: /* FABS */ + gen_helper_vfp_abss(tcg_res, tcg_op); + break; + case 0x2: /* FNEG */ + gen_helper_vfp_negs(tcg_res, tcg_op); + break; + case 0x3: /* FSQRT */ + gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); + break; + case 0x8: /* FRINTN */ + case 0x9: /* FRINTP */ + case 0xa: /* FRINTM */ + case 0xb: /* FRINTZ */ + case 0xc: /* FRINTA */ + { + TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7)); + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + gen_helper_rints(tcg_res, tcg_op, fpst); + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + break; + } + case 0xe: /* FRINTX */ + gen_helper_rints_exact(tcg_res, tcg_op, fpst); + break; + case 0xf: /* FRINTI */ + gen_helper_rints(tcg_res, tcg_op, fpst); + break; + default: + abort(); + } + + write_fp_sreg(s, rd, tcg_res); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tcg_op); + tcg_temp_free_i32(tcg_res); +} + +/* C3.6.25 Floating-point data-processing (1 source) - double precision */ +static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) +{ + TCGv_ptr fpst; + TCGv_i64 tcg_op; + TCGv_i64 tcg_res; + + fpst = get_fpstatus_ptr(); + tcg_op = read_fp_dreg(s, rn); + tcg_res = tcg_temp_new_i64(); + + switch (opcode) { + case 0x0: /* FMOV */ + tcg_gen_mov_i64(tcg_res, tcg_op); + break; + case 0x1: /* FABS */ + gen_helper_vfp_absd(tcg_res, tcg_op); + break; + case 0x2: /* FNEG */ + gen_helper_vfp_negd(tcg_res, tcg_op); + break; + case 0x3: /* FSQRT */ + gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env); + break; + case 0x8: /* FRINTN */ + case 0x9: /* FRINTP */ + case 0xa: /* FRINTM */ + case 0xb: /* FRINTZ */ + case 0xc: /* FRINTA */ + { + TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7)); + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + gen_helper_rintd(tcg_res, tcg_op, fpst); + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + break; + } + case 0xe: /* FRINTX */ + gen_helper_rintd_exact(tcg_res, tcg_op, fpst); + break; + case 0xf: /* FRINTI */ + gen_helper_rintd(tcg_res, tcg_op, fpst); + break; + default: + abort(); + } + + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i64(tcg_op); + tcg_temp_free_i64(tcg_res); +} + +static void handle_fp_fcvt(DisasContext *s, int opcode, + int rd, int rn, int dtype, int ntype) +{ + switch (ntype) { + case 0x0: + { + TCGv_i32 tcg_rn = read_fp_sreg(s, rn); + if (dtype == 1) { + /* Single to double */ + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env); + write_fp_dreg(s, rd, tcg_rd); + tcg_temp_free_i64(tcg_rd); + } else { + /* Single to half */ + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env); + /* write_fp_sreg is OK here because top half of tcg_rd is zero */ + write_fp_sreg(s, rd, tcg_rd); + tcg_temp_free_i32(tcg_rd); + } + tcg_temp_free_i32(tcg_rn); + break; + } + case 0x1: + { + TCGv_i64 tcg_rn = read_fp_dreg(s, rn); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + if (dtype == 0) { + /* Double to single */ + gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env); + } else { + /* Double to half */ + gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env); + /* write_fp_sreg is OK here because top half of tcg_rd is zero */ + } + write_fp_sreg(s, rd, tcg_rd); + tcg_temp_free_i32(tcg_rd); + tcg_temp_free_i64(tcg_rn); + break; + } + case 0x3: + { + TCGv_i32 tcg_rn = read_fp_sreg(s, rn); + tcg_gen_ext16u_i32(tcg_rn, tcg_rn); + if (dtype == 0) { + /* Half to single */ + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env); + write_fp_sreg(s, rd, tcg_rd); + tcg_temp_free_i32(tcg_rd); + } else { + /* Half to double */ + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env); + write_fp_dreg(s, rd, tcg_rd); + tcg_temp_free_i64(tcg_rd); + } + tcg_temp_free_i32(tcg_rn); + break; + } + default: + abort(); + } +} + +/* C3.6.25 Floating point data-processing (1 source) + * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 + * +---+---+---+-----------+------+---+--------+-----------+------+------+ + * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | + * +---+---+---+-----------+------+---+--------+-----------+------+------+ + */ +static void disas_fp_1src(DisasContext *s, uint32_t insn) +{ + int type = extract32(insn, 22, 2); + int opcode = extract32(insn, 15, 6); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + switch (opcode) { + case 0x4: case 0x5: case 0x7: + { + /* FCVT between half, single and double precision */ + int dtype = extract32(opcode, 0, 2); + if (type == 2 || dtype == type) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + + handle_fp_fcvt(s, opcode, rd, rn, dtype, type); + break; + } + case 0x0 ... 0x3: + case 0x8 ... 0xc: + case 0xe ... 0xf: + /* 32-to-32 and 64-to-64 ops */ + switch (type) { + case 0: + if (!fp_access_check(s)) { + return; + } + + handle_fp_1src_single(s, opcode, rd, rn); + break; + case 1: + if (!fp_access_check(s)) { + return; + } + + handle_fp_1src_double(s, opcode, rd, rn); + break; + default: + unallocated_encoding(s); + } + break; + default: + unallocated_encoding(s); + break; + } +} + +/* C3.6.26 Floating-point data-processing (2 source) - single precision */ +static void handle_fp_2src_single(DisasContext *s, int opcode, + int rd, int rn, int rm) +{ + TCGv_i32 tcg_op1; + TCGv_i32 tcg_op2; + TCGv_i32 tcg_res; + TCGv_ptr fpst; + + tcg_res = tcg_temp_new_i32(); + fpst = get_fpstatus_ptr(); + tcg_op1 = read_fp_sreg(s, rn); + tcg_op2 = read_fp_sreg(s, rm); + + switch (opcode) { + case 0x0: /* FMUL */ + gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1: /* FDIV */ + gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2: /* FADD */ + gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3: /* FSUB */ + gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x4: /* FMAX */ + gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5: /* FMIN */ + gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x6: /* FMAXNM */ + gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7: /* FMINNM */ + gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x8: /* FNMUL */ + gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_negs(tcg_res, tcg_res); + break; + } + + write_fp_sreg(s, rd, tcg_res); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i32(tcg_res); +} + +/* C3.6.26 Floating-point data-processing (2 source) - double precision */ +static void handle_fp_2src_double(DisasContext *s, int opcode, + int rd, int rn, int rm) +{ + TCGv_i64 tcg_op1; + TCGv_i64 tcg_op2; + TCGv_i64 tcg_res; + TCGv_ptr fpst; + + tcg_res = tcg_temp_new_i64(); + fpst = get_fpstatus_ptr(); + tcg_op1 = read_fp_dreg(s, rn); + tcg_op2 = read_fp_dreg(s, rm); + + switch (opcode) { + case 0x0: /* FMUL */ + gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1: /* FDIV */ + gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2: /* FADD */ + gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3: /* FSUB */ + gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x4: /* FMAX */ + gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5: /* FMIN */ + gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x6: /* FMAXNM */ + gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7: /* FMINNM */ + gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x8: /* FNMUL */ + gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_negd(tcg_res, tcg_res); + break; + } + + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res); +} + +/* C3.6.26 Floating point data-processing (2 source) + * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+---+------+--------+-----+------+------+ + * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | + * +---+---+---+-----------+------+---+------+--------+-----+------+------+ + */ +static void disas_fp_2src(DisasContext *s, uint32_t insn) +{ + int type = extract32(insn, 22, 2); + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int rm = extract32(insn, 16, 5); + int opcode = extract32(insn, 12, 4); + + if (opcode > 8) { + unallocated_encoding(s); + return; + } + + switch (type) { + case 0: + if (!fp_access_check(s)) { + return; + } + handle_fp_2src_single(s, opcode, rd, rn, rm); + break; + case 1: + if (!fp_access_check(s)) { + return; + } + handle_fp_2src_double(s, opcode, rd, rn, rm); + break; + default: + unallocated_encoding(s); + } +} + +/* C3.6.27 Floating-point data-processing (3 source) - single precision */ +static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, + int rd, int rn, int rm, int ra) +{ + TCGv_i32 tcg_op1, tcg_op2, tcg_op3; + TCGv_i32 tcg_res = tcg_temp_new_i32(); + TCGv_ptr fpst = get_fpstatus_ptr(); + + tcg_op1 = read_fp_sreg(s, rn); + tcg_op2 = read_fp_sreg(s, rm); + tcg_op3 = read_fp_sreg(s, ra); + + /* These are fused multiply-add, and must be done as one + * floating point operation with no rounding between the + * multiplication and addition steps. + * NB that doing the negations here as separate steps is + * correct : an input NaN should come out with its sign bit + * flipped if it is a negated-input. + */ + if (o1 == true) { + gen_helper_vfp_negs(tcg_op3, tcg_op3); + } + + if (o0 != o1) { + gen_helper_vfp_negs(tcg_op1, tcg_op1); + } + + gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); + + write_fp_sreg(s, rd, tcg_res); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i32(tcg_op3); + tcg_temp_free_i32(tcg_res); +} + +/* C3.6.27 Floating-point data-processing (3 source) - double precision */ +static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, + int rd, int rn, int rm, int ra) +{ + TCGv_i64 tcg_op1, tcg_op2, tcg_op3; + TCGv_i64 tcg_res = tcg_temp_new_i64(); + TCGv_ptr fpst = get_fpstatus_ptr(); + + tcg_op1 = read_fp_dreg(s, rn); + tcg_op2 = read_fp_dreg(s, rm); + tcg_op3 = read_fp_dreg(s, ra); + + /* These are fused multiply-add, and must be done as one + * floating point operation with no rounding between the + * multiplication and addition steps. + * NB that doing the negations here as separate steps is + * correct : an input NaN should come out with its sign bit + * flipped if it is a negated-input. + */ + if (o1 == true) { + gen_helper_vfp_negd(tcg_op3, tcg_op3); + } + + if (o0 != o1) { + gen_helper_vfp_negd(tcg_op1, tcg_op1); + } + + gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); + + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_op3); + tcg_temp_free_i64(tcg_res); +} + +/* C3.6.27 Floating point data-processing (3 source) + * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 + * +---+---+---+-----------+------+----+------+----+------+------+------+ + * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | + * +---+---+---+-----------+------+----+------+----+------+------+------+ + */ +static void disas_fp_3src(DisasContext *s, uint32_t insn) +{ + int type = extract32(insn, 22, 2); + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int ra = extract32(insn, 10, 5); + int rm = extract32(insn, 16, 5); + bool o0 = extract32(insn, 15, 1); + bool o1 = extract32(insn, 21, 1); + + switch (type) { + case 0: + if (!fp_access_check(s)) { + return; + } + handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); + break; + case 1: + if (!fp_access_check(s)) { + return; + } + handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); + break; + default: + unallocated_encoding(s); + } +} + +/* C3.6.28 Floating point immediate + * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 + * +---+---+---+-----------+------+---+------------+-------+------+------+ + * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | + * +---+---+---+-----------+------+---+------------+-------+------+------+ + */ +static void disas_fp_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int imm8 = extract32(insn, 13, 8); + int is_double = extract32(insn, 22, 2); + uint64_t imm; + TCGv_i64 tcg_res; + + if (is_double > 1) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + /* The imm8 encodes the sign bit, enough bits to represent + * an exponent in the range 01....1xx to 10....0xx, + * and the most significant 4 bits of the mantissa; see + * VFPExpandImm() in the v8 ARM ARM. + */ + if (is_double) { + imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | + (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) | + extract32(imm8, 0, 6); + imm <<= 48; + } else { + imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | + (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) | + (extract32(imm8, 0, 6) << 3); + imm <<= 16; + } + + tcg_res = tcg_const_i64(imm); + write_fp_dreg(s, rd, tcg_res); + tcg_temp_free_i64(tcg_res); +} + +/* Handle floating point <=> fixed point conversions. Note that we can + * also deal with fp <=> integer conversions as a special case (scale == 64) + * OPTME: consider handling that special case specially or at least skipping + * the call to scalbn in the helpers for zero shifts. + */ +static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, + bool itof, int rmode, int scale, int sf, int type) +{ + bool is_signed = !(opcode & 1); + bool is_double = type; + TCGv_ptr tcg_fpstatus; + TCGv_i32 tcg_shift; + + tcg_fpstatus = get_fpstatus_ptr(); + + tcg_shift = tcg_const_i32(64 - scale); + + if (itof) { + TCGv_i64 tcg_int = cpu_reg(s, rn); + if (!sf) { + TCGv_i64 tcg_extend = new_tmp_a64(s); + + if (is_signed) { + tcg_gen_ext32s_i64(tcg_extend, tcg_int); + } else { + tcg_gen_ext32u_i64(tcg_extend, tcg_int); + } + + tcg_int = tcg_extend; + } + + if (is_double) { + TCGv_i64 tcg_double = tcg_temp_new_i64(); + if (is_signed) { + gen_helper_vfp_sqtod(tcg_double, tcg_int, + tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_uqtod(tcg_double, tcg_int, + tcg_shift, tcg_fpstatus); + } + write_fp_dreg(s, rd, tcg_double); + tcg_temp_free_i64(tcg_double); + } else { + TCGv_i32 tcg_single = tcg_temp_new_i32(); + if (is_signed) { + gen_helper_vfp_sqtos(tcg_single, tcg_int, + tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_uqtos(tcg_single, tcg_int, + tcg_shift, tcg_fpstatus); + } + write_fp_sreg(s, rd, tcg_single); + tcg_temp_free_i32(tcg_single); + } + } else { + TCGv_i64 tcg_int = cpu_reg(s, rd); + TCGv_i32 tcg_rmode; + + if (extract32(opcode, 2, 1)) { + /* There are too many rounding modes to all fit into rmode, + * so FCVTA[US] is a special case. + */ + rmode = FPROUNDING_TIEAWAY; + } + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + + if (is_double) { + TCGv_i64 tcg_double = read_fp_dreg(s, rn); + if (is_signed) { + if (!sf) { + gen_helper_vfp_tosld(tcg_int, tcg_double, + tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_tosqd(tcg_int, tcg_double, + tcg_shift, tcg_fpstatus); + } + } else { + if (!sf) { + gen_helper_vfp_tould(tcg_int, tcg_double, + tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_touqd(tcg_int, tcg_double, + tcg_shift, tcg_fpstatus); + } + } + tcg_temp_free_i64(tcg_double); + } else { + TCGv_i32 tcg_single = read_fp_sreg(s, rn); + if (sf) { + if (is_signed) { + gen_helper_vfp_tosqs(tcg_int, tcg_single, + tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_touqs(tcg_int, tcg_single, + tcg_shift, tcg_fpstatus); + } + } else { + TCGv_i32 tcg_dest = tcg_temp_new_i32(); + if (is_signed) { + gen_helper_vfp_tosls(tcg_dest, tcg_single, + tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_touls(tcg_dest, tcg_single, + tcg_shift, tcg_fpstatus); + } + tcg_gen_extu_i32_i64(tcg_int, tcg_dest); + tcg_temp_free_i32(tcg_dest); + } + tcg_temp_free_i32(tcg_single); + } + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + + if (!sf) { + tcg_gen_ext32u_i64(tcg_int, tcg_int); + } + } + + tcg_temp_free_ptr(tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); +} + +/* C3.6.29 Floating point <-> fixed point conversions + * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 + * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ + * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | + * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ + */ +static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int scale = extract32(insn, 10, 6); + int opcode = extract32(insn, 16, 3); + int rmode = extract32(insn, 19, 2); + int type = extract32(insn, 22, 2); + bool sbit = extract32(insn, 29, 1); + bool sf = extract32(insn, 31, 1); + bool itof; + + if (sbit || (type > 1) + || (!sf && scale < 32)) { + unallocated_encoding(s); + return; + } + + switch ((rmode << 3) | opcode) { + case 0x2: /* SCVTF */ + case 0x3: /* UCVTF */ + itof = true; + break; + case 0x18: /* FCVTZS */ + case 0x19: /* FCVTZU */ + itof = false; + break; + default: + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); +} + +static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) +{ + /* FMOV: gpr to or from float, double, or top half of quad fp reg, + * without conversion. + */ + + if (itof) { + TCGv_i64 tcg_rn = cpu_reg(s, rn); + + switch (type) { + case 0: + { + /* 32 bit */ + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(tmp, tcg_rn); + tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64)); + tcg_gen_movi_i64(tmp, 0); + tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd)); + tcg_temp_free_i64(tmp); + break; + } + case 1: + { + /* 64 bit */ + TCGv_i64 tmp = tcg_const_i64(0); + tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64)); + tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd)); + tcg_temp_free_i64(tmp); + break; + } + case 2: + /* 64 bit to top half. */ + tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); + break; + } + } else { + TCGv_i64 tcg_rd = cpu_reg(s, rd); + + switch (type) { + case 0: + /* 32 bit */ + tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32)); + break; + case 1: + /* 64 bit */ + tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64)); + break; + case 2: + /* 64 bits from top half */ + tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn)); + break; + } + } +} + +/* C3.6.30 Floating point <-> integer conversions + * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 + * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ + * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | + * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ + */ +static void disas_fp_int_conv(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 16, 3); + int rmode = extract32(insn, 19, 2); + int type = extract32(insn, 22, 2); + bool sbit = extract32(insn, 29, 1); + bool sf = extract32(insn, 31, 1); + + if (sbit) { + unallocated_encoding(s); + return; + } + + if (opcode > 5) { + /* FMOV */ + bool itof = opcode & 1; + + if (rmode >= 2) { + unallocated_encoding(s); + return; + } + + switch (sf << 3 | type << 1 | rmode) { + case 0x0: /* 32 bit */ + case 0xa: /* 64 bit */ + case 0xd: /* 64 bit to top half of quad */ + break; + default: + /* all other sf/type/rmode combinations are invalid */ + unallocated_encoding(s); + break; + } + + if (!fp_access_check(s)) { + return; + } + handle_fmov(s, rd, rn, type, itof); + } else { + /* actual FP conversions */ + bool itof = extract32(opcode, 1, 1); + + if (type > 1 || (rmode != 0 && opcode > 1)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); + } +} + +/* FP-specific subcases of table C3-6 (SIMD and FP data processing) + * 31 30 29 28 25 24 0 + * +---+---+---+---------+-----------------------------+ + * | | 0 | | 1 1 1 1 | | + * +---+---+---+---------+-----------------------------+ + */ +static void disas_data_proc_fp(DisasContext *s, uint32_t insn) +{ + if (extract32(insn, 24, 1)) { + /* Floating point data-processing (3 source) */ + disas_fp_3src(s, insn); + } else if (extract32(insn, 21, 1) == 0) { + /* Floating point to fixed point conversions */ + disas_fp_fixed_conv(s, insn); + } else { + switch (extract32(insn, 10, 2)) { + case 1: + /* Floating point conditional compare */ + disas_fp_ccomp(s, insn); + break; + case 2: + /* Floating point data-processing (2 source) */ + disas_fp_2src(s, insn); + break; + case 3: + /* Floating point conditional select */ + disas_fp_csel(s, insn); + break; + case 0: + switch (ctz32(extract32(insn, 12, 4))) { + case 0: /* [15:12] == xxx1 */ + /* Floating point immediate */ + disas_fp_imm(s, insn); + break; + case 1: /* [15:12] == xx10 */ + /* Floating point compare */ + disas_fp_compare(s, insn); + break; + case 2: /* [15:12] == x100 */ + /* Floating point data-processing (1 source) */ + disas_fp_1src(s, insn); + break; + case 3: /* [15:12] == 1000 */ + unallocated_encoding(s); + break; + default: /* [15:12] == 0000 */ + /* Floating point <-> integer conversions */ + disas_fp_int_conv(s, insn); + break; + } + break; + } + } +} + +static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, + int pos) +{ + /* Extract 64 bits from the middle of two concatenated 64 bit + * vector register slices left:right. The extracted bits start + * at 'pos' bits into the right (least significant) side. + * We return the result in tcg_right, and guarantee not to + * trash tcg_left. + */ + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + assert(pos > 0 && pos < 64); + + tcg_gen_shri_i64(tcg_right, tcg_right, pos); + tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); + tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); + + tcg_temp_free_i64(tcg_tmp); +} + +/* C3.6.1 EXT + * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 + * +---+---+-------------+-----+---+------+---+------+---+------+------+ + * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | + * +---+---+-------------+-----+---+------+---+------+---+------+------+ + */ +static void disas_simd_ext(DisasContext *s, uint32_t insn) +{ + int is_q = extract32(insn, 30, 1); + int op2 = extract32(insn, 22, 2); + int imm4 = extract32(insn, 11, 4); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int pos = imm4 << 3; + TCGv_i64 tcg_resl, tcg_resh; + + if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + tcg_resh = tcg_temp_new_i64(); + tcg_resl = tcg_temp_new_i64(); + + /* Vd gets bits starting at pos bits into Vm:Vn. This is + * either extracting 128 bits from a 128:128 concatenation, or + * extracting 64 bits from a 64:64 concatenation. + */ + if (!is_q) { + read_vec_element(s, tcg_resl, rn, 0, MO_64); + if (pos != 0) { + read_vec_element(s, tcg_resh, rm, 0, MO_64); + do_ext64(s, tcg_resh, tcg_resl, pos); + } + tcg_gen_movi_i64(tcg_resh, 0); + } else { + TCGv_i64 tcg_hh; + typedef struct { + int reg; + int elt; + } EltPosns; + EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; + EltPosns *elt = eltposns; + + if (pos >= 64) { + elt++; + pos -= 64; + } + + read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); + elt++; + read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); + elt++; + if (pos != 0) { + do_ext64(s, tcg_resh, tcg_resl, pos); + tcg_hh = tcg_temp_new_i64(); + read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); + do_ext64(s, tcg_hh, tcg_resh, pos); + tcg_temp_free_i64(tcg_hh); + } + } + + write_vec_element(s, tcg_resl, rd, 0, MO_64); + tcg_temp_free_i64(tcg_resl); + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_resh); +} + +/* C3.6.2 TBL/TBX + * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 + * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ + * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | + * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ + */ +static void disas_simd_tb(DisasContext *s, uint32_t insn) +{ + int op2 = extract32(insn, 22, 2); + int is_q = extract32(insn, 30, 1); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int is_tblx = extract32(insn, 12, 1); + int len = extract32(insn, 13, 2); + TCGv_i64 tcg_resl, tcg_resh, tcg_idx; + TCGv_i32 tcg_regno, tcg_numregs; + + if (op2 != 0) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + /* This does a table lookup: for every byte element in the input + * we index into a table formed from up to four vector registers, + * and then the output is the result of the lookups. Our helper + * function does the lookup operation for a single 64 bit part of + * the input. + */ + tcg_resl = tcg_temp_new_i64(); + tcg_resh = tcg_temp_new_i64(); + + if (is_tblx) { + read_vec_element(s, tcg_resl, rd, 0, MO_64); + } else { + tcg_gen_movi_i64(tcg_resl, 0); + } + if (is_tblx && is_q) { + read_vec_element(s, tcg_resh, rd, 1, MO_64); + } else { + tcg_gen_movi_i64(tcg_resh, 0); + } + + tcg_idx = tcg_temp_new_i64(); + tcg_regno = tcg_const_i32(rn); + tcg_numregs = tcg_const_i32(len + 1); + read_vec_element(s, tcg_idx, rm, 0, MO_64); + gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx, + tcg_regno, tcg_numregs); + if (is_q) { + read_vec_element(s, tcg_idx, rm, 1, MO_64); + gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx, + tcg_regno, tcg_numregs); + } + tcg_temp_free_i64(tcg_idx); + tcg_temp_free_i32(tcg_regno); + tcg_temp_free_i32(tcg_numregs); + + write_vec_element(s, tcg_resl, rd, 0, MO_64); + tcg_temp_free_i64(tcg_resl); + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_resh); +} + +/* C3.6.3 ZIP/UZP/TRN + * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 + * +---+---+-------------+------+---+------+---+------------------+------+ + * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | + * +---+---+-------------+------+---+------+---+------------------+------+ + */ +static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int rm = extract32(insn, 16, 5); + int size = extract32(insn, 22, 2); + /* opc field bits [1:0] indicate ZIP/UZP/TRN; + * bit 2 indicates 1 vs 2 variant of the insn. + */ + int opcode = extract32(insn, 12, 2); + bool part = extract32(insn, 14, 1); + bool is_q = extract32(insn, 30, 1); + int esize = 8 << size; + int i, ofs; + int datasize = is_q ? 128 : 64; + int elements = datasize / esize; + TCGv_i64 tcg_res, tcg_resl, tcg_resh; + + if (opcode == 0 || (size == 3 && !is_q)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + tcg_resl = tcg_const_i64(0); + tcg_resh = tcg_const_i64(0); + tcg_res = tcg_temp_new_i64(); + + for (i = 0; i < elements; i++) { + switch (opcode) { + case 1: /* UZP1/2 */ + { + int midpoint = elements / 2; + if (i < midpoint) { + read_vec_element(s, tcg_res, rn, 2 * i + part, size); + } else { + read_vec_element(s, tcg_res, rm, + 2 * (i - midpoint) + part, size); + } + break; + } + case 2: /* TRN1/2 */ + if (i & 1) { + read_vec_element(s, tcg_res, rm, (i & ~1) + part, size); + } else { + read_vec_element(s, tcg_res, rn, (i & ~1) + part, size); + } + break; + case 3: /* ZIP1/2 */ + { + int base = part * elements / 2; + if (i & 1) { + read_vec_element(s, tcg_res, rm, base + (i >> 1), size); + } else { + read_vec_element(s, tcg_res, rn, base + (i >> 1), size); + } + break; + } + default: + g_assert_not_reached(); + } + + ofs = i * esize; + if (ofs < 64) { + tcg_gen_shli_i64(tcg_res, tcg_res, ofs); + tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res); + } else { + tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64); + tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res); + } + } + + tcg_temp_free_i64(tcg_res); + + write_vec_element(s, tcg_resl, rd, 0, MO_64); + tcg_temp_free_i64(tcg_resl); + write_vec_element(s, tcg_resh, rd, 1, MO_64); + tcg_temp_free_i64(tcg_resh); +} + +static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2, + int opc, bool is_min, TCGv_ptr fpst) +{ + /* Helper function for disas_simd_across_lanes: do a single precision + * min/max operation on the specified two inputs, + * and return the result in tcg_elt1. + */ + if (opc == 0xc) { + if (is_min) { + gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst); + } else { + gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst); + } + } else { + assert(opc == 0xf); + if (is_min) { + gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst); + } else { + gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst); + } + } +} + +/* C3.6.4 AdvSIMD across lanes + * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+-----------+--------+-----+------+------+ + * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | + * +---+---+---+-----------+------+-----------+--------+-----+------+------+ + */ +static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + bool is_q = extract32(insn, 30, 1); + bool is_u = extract32(insn, 29, 1); + bool is_fp = false; + bool is_min = false; + int esize; + int elements; + int i; + TCGv_i64 tcg_res, tcg_elt; + + switch (opcode) { + case 0x1b: /* ADDV */ + if (is_u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x3: /* SADDLV, UADDLV */ + case 0xa: /* SMAXV, UMAXV */ + case 0x1a: /* SMINV, UMINV */ + if (size == 3 || (size == 2 && !is_q)) { + unallocated_encoding(s); + return; + } + break; + case 0xc: /* FMAXNMV, FMINNMV */ + case 0xf: /* FMAXV, FMINV */ + if (!is_u || !is_q || extract32(size, 0, 1)) { + unallocated_encoding(s); + return; + } + /* Bit 1 of size field encodes min vs max, and actual size is always + * 32 bits: adjust the size variable so following code can rely on it + */ + is_min = extract32(size, 1, 1); + is_fp = true; + size = 2; + break; + default: + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + esize = 8 << size; + elements = (is_q ? 128 : 64) / esize; + + tcg_res = tcg_temp_new_i64(); + tcg_elt = tcg_temp_new_i64(); + + /* These instructions operate across all lanes of a vector + * to produce a single result. We can guarantee that a 64 + * bit intermediate is sufficient: + * + for [US]ADDLV the maximum element size is 32 bits, and + * the result type is 64 bits + * + for FMAX*V, FMIN*V, ADDV the intermediate type is the + * same as the element size, which is 32 bits at most + * For the integer operations we can choose to work at 64 + * or 32 bits and truncate at the end; for simplicity + * we use 64 bits always. The floating point + * ops do require 32 bit intermediates, though. + */ + if (!is_fp) { + read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); + + for (i = 1; i < elements; i++) { + read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); + + switch (opcode) { + case 0x03: /* SADDLV / UADDLV */ + case 0x1b: /* ADDV */ + tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); + break; + case 0x0a: /* SMAXV / UMAXV */ + tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, + tcg_res, + tcg_res, tcg_elt, tcg_res, tcg_elt); + break; + case 0x1a: /* SMINV / UMINV */ + tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE, + tcg_res, + tcg_res, tcg_elt, tcg_res, tcg_elt); + break; + break; + default: + g_assert_not_reached(); + } + + } + } else { + /* Floating point ops which work on 32 bit (single) intermediates. + * Note that correct NaN propagation requires that we do these + * operations in exactly the order specified by the pseudocode. + */ + TCGv_i32 tcg_elt1 = tcg_temp_new_i32(); + TCGv_i32 tcg_elt2 = tcg_temp_new_i32(); + TCGv_i32 tcg_elt3 = tcg_temp_new_i32(); + TCGv_ptr fpst = get_fpstatus_ptr(); + + assert(esize == 32); + assert(elements == 4); + + read_vec_element(s, tcg_elt, rn, 0, MO_32); + tcg_gen_trunc_i64_i32(tcg_elt1, tcg_elt); + read_vec_element(s, tcg_elt, rn, 1, MO_32); + tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt); + + do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst); + + read_vec_element(s, tcg_elt, rn, 2, MO_32); + tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt); + read_vec_element(s, tcg_elt, rn, 3, MO_32); + tcg_gen_trunc_i64_i32(tcg_elt3, tcg_elt); + + do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst); + + do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst); + + tcg_gen_extu_i32_i64(tcg_res, tcg_elt1); + tcg_temp_free_i32(tcg_elt1); + tcg_temp_free_i32(tcg_elt2); + tcg_temp_free_i32(tcg_elt3); + tcg_temp_free_ptr(fpst); + } + + tcg_temp_free_i64(tcg_elt); + + /* Now truncate the result to the width required for the final output */ + if (opcode == 0x03) { + /* SADDLV, UADDLV: result is 2*esize */ + size++; + } + + switch (size) { + case 0: + tcg_gen_ext8u_i64(tcg_res, tcg_res); + break; + case 1: + tcg_gen_ext16u_i64(tcg_res, tcg_res); + break; + case 2: + tcg_gen_ext32u_i64(tcg_res, tcg_res); + break; + case 3: + break; + default: + g_assert_not_reached(); + } + + write_fp_dreg(s, rd, tcg_res); + tcg_temp_free_i64(tcg_res); +} + +/* C6.3.31 DUP (Element, Vector) + * + * 31 30 29 21 20 16 15 10 9 5 4 0 + * +---+---+-------------------+--------+-------------+------+------+ + * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | + * +---+---+-------------------+--------+-------------+------+------+ + * + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + */ +static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, + int imm5) +{ + int size = ctz32(imm5); + int esize = 8 << size; + int elements = (is_q ? 128 : 64) / esize; + int index, i; + TCGv_i64 tmp; + + if (size > 3 || (size == 3 && !is_q)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + index = imm5 >> (size + 1); + + tmp = tcg_temp_new_i64(); + read_vec_element(s, tmp, rn, index, size); + + for (i = 0; i < elements; i++) { + write_vec_element(s, tmp, rd, i, size); + } + + if (!is_q) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tmp); +} + +/* C6.3.31 DUP (element, scalar) + * 31 21 20 16 15 10 9 5 4 0 + * +-----------------------+--------+-------------+------+------+ + * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | + * +-----------------------+--------+-------------+------+------+ + */ +static void handle_simd_dupes(DisasContext *s, int rd, int rn, + int imm5) +{ + int size = ctz32(imm5); + int index; + TCGv_i64 tmp; + + if (size > 3) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + index = imm5 >> (size + 1); + + /* This instruction just extracts the specified element and + * zero-extends it into the bottom of the destination register. + */ + tmp = tcg_temp_new_i64(); + read_vec_element(s, tmp, rn, index, size); + write_fp_dreg(s, rd, tmp); + tcg_temp_free_i64(tmp); +} + +/* C6.3.32 DUP (General) + * + * 31 30 29 21 20 16 15 10 9 5 4 0 + * +---+---+-------------------+--------+-------------+------+------+ + * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | + * +---+---+-------------------+--------+-------------+------+------+ + * + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + */ +static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, + int imm5) +{ + int size = ctz32(imm5); + int esize = 8 << size; + int elements = (is_q ? 128 : 64)/esize; + int i = 0; + + if (size > 3 || ((size == 3) && !is_q)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + for (i = 0; i < elements; i++) { + write_vec_element(s, cpu_reg(s, rn), rd, i, size); + } + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* C6.3.150 INS (Element) + * + * 31 21 20 16 15 14 11 10 9 5 4 0 + * +-----------------------+--------+------------+---+------+------+ + * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | + * +-----------------------+--------+------------+---+------+------+ + * + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + * index: encoded in imm5<4:size+1> + */ +static void handle_simd_inse(DisasContext *s, int rd, int rn, + int imm4, int imm5) +{ + int size = ctz32(imm5); + int src_index, dst_index; + TCGv_i64 tmp; + + if (size > 3) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + dst_index = extract32(imm5, 1+size, 5); + src_index = extract32(imm4, size, 4); + + tmp = tcg_temp_new_i64(); + + read_vec_element(s, tmp, rn, src_index, size); + write_vec_element(s, tmp, rd, dst_index, size); + + tcg_temp_free_i64(tmp); +} + + +/* C6.3.151 INS (General) + * + * 31 21 20 16 15 10 9 5 4 0 + * +-----------------------+--------+-------------+------+------+ + * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | + * +-----------------------+--------+-------------+------+------+ + * + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + * index: encoded in imm5<4:size+1> + */ +static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) +{ + int size = ctz32(imm5); + int idx; + + if (size > 3) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + idx = extract32(imm5, 1 + size, 4 - size); + write_vec_element(s, cpu_reg(s, rn), rd, idx, size); +} + +/* + * C6.3.321 UMOV (General) + * C6.3.237 SMOV (General) + * + * 31 30 29 21 20 16 15 12 10 9 5 4 0 + * +---+---+-------------------+--------+-------------+------+------+ + * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | + * +---+---+-------------------+--------+-------------+------+------+ + * + * U: unsigned when set + * size: encoded in imm5 (see ARM ARM LowestSetBit()) + */ +static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, + int rn, int rd, int imm5) +{ + int size = ctz32(imm5); + int element; + TCGv_i64 tcg_rd; + + /* Check for UnallocatedEncodings */ + if (is_signed) { + if (size > 2 || (size == 2 && !is_q)) { + unallocated_encoding(s); + return; + } + } else { + if (size > 3 + || (size < 3 && is_q) + || (size == 3 && !is_q)) { + unallocated_encoding(s); + return; + } + } + + if (!fp_access_check(s)) { + return; + } + + element = extract32(imm5, 1+size, 4); + + tcg_rd = cpu_reg(s, rd); + read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); + if (is_signed && !is_q) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } +} + +/* C3.6.5 AdvSIMD copy + * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 + * +---+---+----+-----------------+------+---+------+---+------+------+ + * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | + * +---+---+----+-----------------+------+---+------+---+------+------+ + */ +static void disas_simd_copy(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int imm4 = extract32(insn, 11, 4); + int op = extract32(insn, 29, 1); + int is_q = extract32(insn, 30, 1); + int imm5 = extract32(insn, 16, 5); + + if (op) { + if (is_q) { + /* INS (element) */ + handle_simd_inse(s, rd, rn, imm4, imm5); + } else { + unallocated_encoding(s); + } + } else { + switch (imm4) { + case 0: + /* DUP (element - vector) */ + handle_simd_dupe(s, is_q, rd, rn, imm5); + break; + case 1: + /* DUP (general) */ + handle_simd_dupg(s, is_q, rd, rn, imm5); + break; + case 3: + if (is_q) { + /* INS (general) */ + handle_simd_insg(s, rd, rn, imm5); + } else { + unallocated_encoding(s); + } + break; + case 5: + case 7: + /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ + handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); + break; + default: + unallocated_encoding(s); + break; + } + } +} + +/* C3.6.6 AdvSIMD modified immediate + * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 + * +---+---+----+---------------------+-----+-------+----+---+-------+------+ + * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | + * +---+---+----+---------------------+-----+-------+----+---+-------+------+ + * + * There are a number of operations that can be carried out here: + * MOVI - move (shifted) imm into register + * MVNI - move inverted (shifted) imm into register + * ORR - bitwise OR of (shifted) imm with register + * BIC - bitwise clear of (shifted) imm with register + */ +static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int cmode = extract32(insn, 12, 4); + int cmode_3_1 = extract32(cmode, 1, 3); + int cmode_0 = extract32(cmode, 0, 1); + int o2 = extract32(insn, 11, 1); + uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); + bool is_neg = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + uint64_t imm = 0; + TCGv_i64 tcg_rd, tcg_imm; + int i; + + if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + /* See AdvSIMDExpandImm() in ARM ARM */ + switch (cmode_3_1) { + case 0: /* Replicate(Zeros(24):imm8, 2) */ + case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */ + case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */ + case 3: /* Replicate(imm8:Zeros(24), 2) */ + { + int shift = cmode_3_1 * 8; + imm = bitfield_replicate(abcdefgh << shift, 32); + break; + } + case 4: /* Replicate(Zeros(8):imm8, 4) */ + case 5: /* Replicate(imm8:Zeros(8), 4) */ + { + int shift = (cmode_3_1 & 0x1) * 8; + imm = bitfield_replicate(abcdefgh << shift, 16); + break; + } + case 6: + if (cmode_0) { + /* Replicate(Zeros(8):imm8:Ones(16), 2) */ + imm = (abcdefgh << 16) | 0xffff; + } else { + /* Replicate(Zeros(16):imm8:Ones(8), 2) */ + imm = (abcdefgh << 8) | 0xff; + } + imm = bitfield_replicate(imm, 32); + break; + case 7: + if (!cmode_0 && !is_neg) { + imm = bitfield_replicate(abcdefgh, 8); + } else if (!cmode_0 && is_neg) { + int i; + imm = 0; + for (i = 0; i < 8; i++) { + if ((abcdefgh) & (1 << i)) { + imm |= 0xffULL << (i * 8); + } + } + } else if (cmode_0) { + if (is_neg) { + imm = (abcdefgh & 0x3f) << 48; + if (abcdefgh & 0x80) { + imm |= 0x8000000000000000ULL; + } + if (abcdefgh & 0x40) { + imm |= 0x3fc0000000000000ULL; + } else { + imm |= 0x4000000000000000ULL; + } + } else { + imm = (abcdefgh & 0x3f) << 19; + if (abcdefgh & 0x80) { + imm |= 0x80000000; + } + if (abcdefgh & 0x40) { + imm |= 0x3e000000; + } else { + imm |= 0x40000000; + } + imm |= (imm << 32); + } + } + break; + } + + if (cmode_3_1 != 7 && is_neg) { + imm = ~imm; + } + + tcg_imm = tcg_const_i64(imm); + tcg_rd = new_tmp_a64(s); + + for (i = 0; i < 2; i++) { + int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64); + + if (i == 1 && !is_q) { + /* non-quad ops clear high half of vector */ + tcg_gen_movi_i64(tcg_rd, 0); + } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) { + tcg_gen_ld_i64(tcg_rd, cpu_env, foffs); + if (is_neg) { + /* AND (BIC) */ + tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm); + } else { + /* ORR */ + tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm); + } + } else { + /* MOVI */ + tcg_gen_mov_i64(tcg_rd, tcg_imm); + } + tcg_gen_st_i64(tcg_rd, cpu_env, foffs); + } + + tcg_temp_free_i64(tcg_imm); +} + +/* C3.6.7 AdvSIMD scalar copy + * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 + * +-----+----+-----------------+------+---+------+---+------+------+ + * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | + * +-----+----+-----------------+------+---+------+---+------+------+ + */ +static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int imm4 = extract32(insn, 11, 4); + int imm5 = extract32(insn, 16, 5); + int op = extract32(insn, 29, 1); + + if (op != 0 || imm4 != 0) { + unallocated_encoding(s); + return; + } + + /* DUP (element, scalar) */ + handle_simd_dupes(s, rd, rn, imm5); +} + +/* C3.6.8 AdvSIMD scalar pairwise + * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +-----+---+-----------+------+-----------+--------+-----+------+------+ + * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | + * +-----+---+-----------+------+-----------+--------+-----+------+------+ + */ +static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) +{ + int u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + TCGv_ptr fpst; + + /* For some ops (the FP ones), size[1] is part of the encoding. + * For ADDP strictly it is not but size[1] is always 1 for valid + * encodings. + */ + opcode |= (extract32(size, 1, 1) << 5); + + switch (opcode) { + case 0x3b: /* ADDP */ + if (u || size != 3) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + + TCGV_UNUSED_PTR(fpst); + break; + case 0xc: /* FMAXNMP */ + case 0xd: /* FADDP */ + case 0xf: /* FMAXP */ + case 0x2c: /* FMINNMP */ + case 0x2f: /* FMINP */ + /* FP op, size[0] is 32 or 64 bit */ + if (!u) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + + size = extract32(size, 0, 1) ? 3 : 2; + fpst = get_fpstatus_ptr(); + break; + default: + unallocated_encoding(s); + return; + } + + if (size == 3) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, 0, MO_64); + read_vec_element(s, tcg_op2, rn, 1, MO_64); + + switch (opcode) { + case 0x3b: /* ADDP */ + tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); + break; + case 0xc: /* FMAXNMP */ + gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0xd: /* FADDP */ + gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0xf: /* FMAXP */ + gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2c: /* FMINNMP */ + gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2f: /* FMINP */ + gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res); + } else { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op1, rn, 0, MO_32); + read_vec_element_i32(s, tcg_op2, rn, 1, MO_32); + + switch (opcode) { + case 0xc: /* FMAXNMP */ + gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0xd: /* FADDP */ + gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0xf: /* FMAXP */ + gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2c: /* FMINNMP */ + gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2f: /* FMINP */ + gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + write_fp_sreg(s, rd, tcg_res); + + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i32(tcg_res); + } + + if (!TCGV_IS_UNUSED_PTR(fpst)) { + tcg_temp_free_ptr(fpst); + } +} + +/* + * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) + * + * This code is handles the common shifting code and is used by both + * the vector and scalar code. + */ +static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, + TCGv_i64 tcg_rnd, bool accumulate, + bool is_u, int size, int shift) +{ + bool extended_result = false; + bool round = !TCGV_IS_UNUSED_I64(tcg_rnd); + int ext_lshift = 0; + TCGv_i64 tcg_src_hi; + + if (round && size == 3) { + extended_result = true; + ext_lshift = 64 - shift; + tcg_src_hi = tcg_temp_new_i64(); + } else if (shift == 64) { + if (!accumulate && is_u) { + /* result is zero */ + tcg_gen_movi_i64(tcg_res, 0); + return; + } + } + + /* Deal with the rounding step */ + if (round) { + if (extended_result) { + TCGv_i64 tcg_zero = tcg_const_i64(0); + if (!is_u) { + /* take care of sign extending tcg_res */ + tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); + tcg_gen_add2_i64(tcg_src, tcg_src_hi, + tcg_src, tcg_src_hi, + tcg_rnd, tcg_zero); + } else { + tcg_gen_add2_i64(tcg_src, tcg_src_hi, + tcg_src, tcg_zero, + tcg_rnd, tcg_zero); + } + tcg_temp_free_i64(tcg_zero); + } else { + tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); + } + } + + /* Now do the shift right */ + if (round && extended_result) { + /* extended case, >64 bit precision required */ + if (ext_lshift == 0) { + /* special case, only high bits matter */ + tcg_gen_mov_i64(tcg_src, tcg_src_hi); + } else { + tcg_gen_shri_i64(tcg_src, tcg_src, shift); + tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); + tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); + } + } else { + if (is_u) { + if (shift == 64) { + /* essentially shifting in 64 zeros */ + tcg_gen_movi_i64(tcg_src, 0); + } else { + tcg_gen_shri_i64(tcg_src, tcg_src, shift); + } + } else { + if (shift == 64) { + /* effectively extending the sign-bit */ + tcg_gen_sari_i64(tcg_src, tcg_src, 63); + } else { + tcg_gen_sari_i64(tcg_src, tcg_src, shift); + } + } + } + + if (accumulate) { + tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); + } else { + tcg_gen_mov_i64(tcg_res, tcg_src); + } + + if (extended_result) { + tcg_temp_free_i64(tcg_src_hi); + } +} + +/* Common SHL/SLI - Shift left with an optional insert */ +static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src, + bool insert, int shift) +{ + if (insert) { /* SLI */ + tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift); + } else { /* SHL */ + tcg_gen_shli_i64(tcg_res, tcg_src, shift); + } +} + +/* SRI: shift right with insert */ +static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src, + int size, int shift) +{ + int esize = 8 << size; + + /* shift count same as element size is valid but does nothing; + * special case to avoid potential shift by 64. + */ + if (shift != esize) { + tcg_gen_shri_i64(tcg_src, tcg_src, shift); + tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift); + } +} + +/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ +static void handle_scalar_simd_shri(DisasContext *s, + bool is_u, int immh, int immb, + int opcode, int rn, int rd) +{ + const int size = 3; + int immhb = immh << 3 | immb; + int shift = 2 * (8 << size) - immhb; + bool accumulate = false; + bool round = false; + bool insert = false; + TCGv_i64 tcg_rn; + TCGv_i64 tcg_rd; + TCGv_i64 tcg_round; + + if (!extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + switch (opcode) { + case 0x02: /* SSRA / USRA (accumulate) */ + accumulate = true; + break; + case 0x04: /* SRSHR / URSHR (rounding) */ + round = true; + break; + case 0x06: /* SRSRA / URSRA (accum + rounding) */ + accumulate = round = true; + break; + case 0x08: /* SRI */ + insert = true; + break; + } + + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } + + tcg_rn = read_fp_dreg(s, rn); + tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); + + if (insert) { + handle_shri_with_ins(tcg_rd, tcg_rn, size, shift); + } else { + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + accumulate, is_u, size, shift); + } + + write_fp_dreg(s, rd, tcg_rd); + + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); + if (round) { + tcg_temp_free_i64(tcg_round); + } +} + +/* SHL/SLI - Scalar shift left */ +static void handle_scalar_simd_shli(DisasContext *s, bool insert, + int immh, int immb, int opcode, + int rn, int rd) +{ + int size = 32 - clz32(immh) - 1; + int immhb = immh << 3 | immb; + int shift = immhb - (8 << size); + TCGv_i64 tcg_rn = new_tmp_a64(s); + TCGv_i64 tcg_rd = new_tmp_a64(s); + + if (!extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + tcg_rn = read_fp_dreg(s, rn); + tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); + + handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift); + + write_fp_dreg(s, rd, tcg_rd); + + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); +} + +/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with + * (signed/unsigned) narrowing */ +static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, + bool is_u_shift, bool is_u_narrow, + int immh, int immb, int opcode, + int rn, int rd) +{ + int immhb = immh << 3 | immb; + int size = 32 - clz32(immh) - 1; + int esize = 8 << size; + int shift = (2 * esize) - immhb; + int elements = is_scalar ? 1 : (64 / esize); + bool round = extract32(opcode, 0, 1); + TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); + TCGv_i64 tcg_rn, tcg_rd, tcg_round; + TCGv_i32 tcg_rd_narrowed; + TCGv_i64 tcg_final; + + static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { + { gen_helper_neon_narrow_sat_s8, + gen_helper_neon_unarrow_sat8 }, + { gen_helper_neon_narrow_sat_s16, + gen_helper_neon_unarrow_sat16 }, + { gen_helper_neon_narrow_sat_s32, + gen_helper_neon_unarrow_sat32 }, + { NULL, NULL }, + }; + static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { + gen_helper_neon_narrow_sat_u8, + gen_helper_neon_narrow_sat_u16, + gen_helper_neon_narrow_sat_u32, + NULL + }; + NeonGenNarrowEnvFn *narrowfn; + + int i; + + assert(size < 4); + + if (extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (is_u_shift) { + narrowfn = unsigned_narrow_fns[size]; + } else { + narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; + } + + tcg_rn = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + tcg_rd_narrowed = tcg_temp_new_i32(); + tcg_final = tcg_const_i64(0); + + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } + + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, ldop); + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + false, is_u_shift, size+1, shift); + narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); + tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); + tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + } + + if (!is_q) { + clear_vec_high(s, rd); + write_vec_element(s, tcg_final, rd, 0, MO_64); + } else { + write_vec_element(s, tcg_final, rd, 1, MO_64); + } + + if (round) { + tcg_temp_free_i64(tcg_round); + } + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i32(tcg_rd_narrowed); + tcg_temp_free_i64(tcg_final); + return; +} + +/* SQSHLU, UQSHL, SQSHL: saturating left shifts */ +static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, + bool src_unsigned, bool dst_unsigned, + int immh, int immb, int rn, int rd) +{ + int immhb = immh << 3 | immb; + int size = 32 - clz32(immh) - 1; + int shift = immhb - (8 << size); + int pass; + + assert(immh != 0); + assert(!(scalar && is_q)); + + if (!scalar) { + if (!is_q && extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + /* Since we use the variable-shift helpers we must + * replicate the shift count into each element of + * the tcg_shift value. + */ + switch (size) { + case 0: + shift |= shift << 8; + /* fall through */ + case 1: + shift |= shift << 16; + break; + case 2: + case 3: + break; + default: + g_assert_not_reached(); + } + } + + if (!fp_access_check(s)) { + return; + } + + if (size == 3) { + TCGv_i64 tcg_shift = tcg_const_i64(shift); + static NeonGenTwo64OpEnvFn * const fns[2][2] = { + { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, + { NULL, gen_helper_neon_qshl_u64 }, + }; + NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; + int maxpass = is_q ? 2 : 1; + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + genfn(tcg_op, cpu_env, tcg_op, tcg_shift); + write_vec_element(s, tcg_op, rd, pass, MO_64); + + tcg_temp_free_i64(tcg_op); + } + tcg_temp_free_i64(tcg_shift); + + if (!is_q) { + clear_vec_high(s, rd); + } + } else { + TCGv_i32 tcg_shift = tcg_const_i32(shift); + static NeonGenTwoOpEnvFn * const fns[2][2][3] = { + { + { gen_helper_neon_qshl_s8, + gen_helper_neon_qshl_s16, + gen_helper_neon_qshl_s32 }, + { gen_helper_neon_qshlu_s8, + gen_helper_neon_qshlu_s16, + gen_helper_neon_qshlu_s32 } + }, { + { NULL, NULL, NULL }, + { gen_helper_neon_qshl_u8, + gen_helper_neon_qshl_u16, + gen_helper_neon_qshl_u32 } + } + }; + NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; + TCGMemOp memop = scalar ? size : MO_32; + int maxpass = scalar ? 1 : is_q ? 4 : 2; + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, pass, memop); + genfn(tcg_op, cpu_env, tcg_op, tcg_shift); + if (scalar) { + switch (size) { + case 0: + tcg_gen_ext8u_i32(tcg_op, tcg_op); + break; + case 1: + tcg_gen_ext16u_i32(tcg_op, tcg_op); + break; + case 2: + break; + default: + g_assert_not_reached(); + } + write_fp_sreg(s, rd, tcg_op); + } else { + write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + } + + tcg_temp_free_i32(tcg_op); + } + tcg_temp_free_i32(tcg_shift); + + if (!is_q && !scalar) { + clear_vec_high(s, rd); + } + } +} + +/* Common vector code for handling integer to FP conversion */ +static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, + int elements, int is_signed, + int fracbits, int size) +{ + bool is_double = size == 3 ? true : false; + TCGv_ptr tcg_fpst = get_fpstatus_ptr(); + TCGv_i32 tcg_shift = tcg_const_i32(fracbits); + TCGv_i64 tcg_int = tcg_temp_new_i64(); + TCGMemOp mop = size | (is_signed ? MO_SIGN : 0); + int pass; + + for (pass = 0; pass < elements; pass++) { + read_vec_element(s, tcg_int, rn, pass, mop); + + if (is_double) { + TCGv_i64 tcg_double = tcg_temp_new_i64(); + if (is_signed) { + gen_helper_vfp_sqtod(tcg_double, tcg_int, + tcg_shift, tcg_fpst); + } else { + gen_helper_vfp_uqtod(tcg_double, tcg_int, + tcg_shift, tcg_fpst); + } + if (elements == 1) { + write_fp_dreg(s, rd, tcg_double); + } else { + write_vec_element(s, tcg_double, rd, pass, MO_64); + } + tcg_temp_free_i64(tcg_double); + } else { + TCGv_i32 tcg_single = tcg_temp_new_i32(); + if (is_signed) { + gen_helper_vfp_sqtos(tcg_single, tcg_int, + tcg_shift, tcg_fpst); + } else { + gen_helper_vfp_uqtos(tcg_single, tcg_int, + tcg_shift, tcg_fpst); + } + if (elements == 1) { + write_fp_sreg(s, rd, tcg_single); + } else { + write_vec_element_i32(s, tcg_single, rd, pass, MO_32); + } + tcg_temp_free_i32(tcg_single); + } + } + + if (!is_double && elements == 2) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_int); + tcg_temp_free_ptr(tcg_fpst); + tcg_temp_free_i32(tcg_shift); +} + +/* UCVTF/SCVTF - Integer to FP conversion */ +static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, + bool is_q, bool is_u, + int immh, int immb, int opcode, + int rn, int rd) +{ + bool is_double = extract32(immh, 3, 1); + int size = is_double ? MO_64 : MO_32; + int elements; + int immhb = immh << 3 | immb; + int fracbits = (is_double ? 128 : 64) - immhb; + + if (!extract32(immh, 2, 2)) { + unallocated_encoding(s); + return; + } + + if (is_scalar) { + elements = 1; + } else { + elements = is_double ? 2 : is_q ? 4 : 2; + if (is_double && !is_q) { + unallocated_encoding(s); + return; + } + } + + if (!fp_access_check(s)) { + return; + } + + /* immh == 0 would be a failure of the decode logic */ + g_assert(immh); + + handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); +} + +/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ +static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, + bool is_q, bool is_u, + int immh, int immb, int rn, int rd) +{ + bool is_double = extract32(immh, 3, 1); + int immhb = immh << 3 | immb; + int fracbits = (is_double ? 128 : 64) - immhb; + int pass; + TCGv_ptr tcg_fpstatus; + TCGv_i32 tcg_rmode, tcg_shift; + + if (!extract32(immh, 2, 2)) { + unallocated_encoding(s); + return; + } + + if (!is_scalar && !is_q && is_double) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + assert(!(is_scalar && is_q)); + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_fpstatus = get_fpstatus_ptr(); + tcg_shift = tcg_const_i32(fracbits); + + if (is_double) { + int maxpass = is_scalar ? 1 : 2; + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + if (is_u) { + gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } + write_vec_element(s, tcg_op, rd, pass, MO_64); + tcg_temp_free_i64(tcg_op); + } + if (!is_q) { + clear_vec_high(s, rd); + } + } else { + int maxpass = is_scalar ? 1 : is_q ? 4 : 2; + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + if (is_u) { + gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } + if (is_scalar) { + write_fp_sreg(s, rd, tcg_op); + } else { + write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + } + tcg_temp_free_i32(tcg_op); + } + if (!is_q && !is_scalar) { + clear_vec_high(s, rd); + } + } + + tcg_temp_free_ptr(tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); +} + +/* C3.6.9 AdvSIMD scalar shift by immediate + * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 + * +-----+---+-------------+------+------+--------+---+------+------+ + * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | + * +-----+---+-------------+------+------+--------+---+------+------+ + * + * This is the scalar version so it works on a fixed sized registers + */ +static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 11, 5); + int immb = extract32(insn, 16, 3); + int immh = extract32(insn, 19, 4); + bool is_u = extract32(insn, 29, 1); + + if (immh == 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x08: /* SRI */ + if (!is_u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x00: /* SSHR / USHR */ + case 0x02: /* SSRA / USRA */ + case 0x04: /* SRSHR / URSHR */ + case 0x06: /* SRSRA / URSRA */ + handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); + break; + case 0x0a: /* SHL / SLI */ + handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); + break; + case 0x1c: /* SCVTF, UCVTF */ + handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, + opcode, rn, rd); + break; + case 0x10: /* SQSHRUN, SQSHRUN2 */ + case 0x11: /* SQRSHRUN, SQRSHRUN2 */ + if (!is_u) { + unallocated_encoding(s); + return; + } + handle_vec_simd_sqshrn(s, true, false, false, true, + immh, immb, opcode, rn, rd); + break; + case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ + case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ + handle_vec_simd_sqshrn(s, true, false, is_u, is_u, + immh, immb, opcode, rn, rd); + break; + case 0xc: /* SQSHLU */ + if (!is_u) { + unallocated_encoding(s); + return; + } + handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); + break; + case 0xe: /* SQSHL, UQSHL */ + handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); + break; + case 0x1f: /* FCVTZS, FCVTZU */ + handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); + break; + default: + unallocated_encoding(s); + break; + } +} + +/* C3.6.10 AdvSIMD scalar three different + * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 + * +-----+---+-----------+------+---+------+--------+-----+------+------+ + * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | + * +-----+---+-----------+------+---+------+--------+-----+------+------+ + */ +static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) +{ + bool is_u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 4); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + if (is_u) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x9: /* SQDMLAL, SQDMLAL2 */ + case 0xb: /* SQDMLSL, SQDMLSL2 */ + case 0xd: /* SQDMULL, SQDMULL2 */ + if (size == 0 || size == 3) { + unallocated_encoding(s); + return; + } + break; + default: + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (size == 2) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); + read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); + + tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); + + switch (opcode) { + case 0xd: /* SQDMULL, SQDMULL2 */ + break; + case 0xb: /* SQDMLSL, SQDMLSL2 */ + tcg_gen_neg_i64(tcg_res, tcg_res); + /* fall through */ + case 0x9: /* SQDMLAL, SQDMLAL2 */ + read_vec_element(s, tcg_op1, rd, 0, MO_64); + gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, + tcg_res, tcg_op1); + break; + default: + g_assert_not_reached(); + } + + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res); + } else { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element_i32(s, tcg_op1, rn, 0, MO_16); + read_vec_element_i32(s, tcg_op2, rm, 0, MO_16); + + gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); + + switch (opcode) { + case 0xd: /* SQDMULL, SQDMULL2 */ + break; + case 0xb: /* SQDMLSL, SQDMLSL2 */ + gen_helper_neon_negl_u32(tcg_res, tcg_res); + /* fall through */ + case 0x9: /* SQDMLAL, SQDMLAL2 */ + { + TCGv_i64 tcg_op3 = tcg_temp_new_i64(); + read_vec_element(s, tcg_op3, rd, 0, MO_32); + gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, + tcg_res, tcg_op3); + tcg_temp_free_i64(tcg_op3); + break; + } + default: + g_assert_not_reached(); + } + + tcg_gen_ext32u_i64(tcg_res, tcg_res); + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i64(tcg_res); + } +} + +static void handle_3same_64(DisasContext *s, int opcode, bool u, + TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) +{ + /* Handle 64x64->64 opcodes which are shared between the scalar + * and vector 3-same groups. We cover every opcode where size == 3 + * is valid in either the three-reg-same (integer, not pairwise) + * or scalar-three-reg-same groups. (Some opcodes are not yet + * implemented.) + */ + TCGCond cond; + + switch (opcode) { + case 0x1: /* SQADD */ + if (u) { + gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } else { + gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } + break; + case 0x5: /* SQSUB */ + if (u) { + gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } else { + gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } + break; + case 0x6: /* CMGT, CMHI */ + /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0. + * We implement this using setcond (test) and then negating. + */ + cond = u ? TCG_COND_GTU : TCG_COND_GT; + do_cmop: + tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); + tcg_gen_neg_i64(tcg_rd, tcg_rd); + break; + case 0x7: /* CMGE, CMHS */ + cond = u ? TCG_COND_GEU : TCG_COND_GE; + goto do_cmop; + case 0x11: /* CMTST, CMEQ */ + if (u) { + cond = TCG_COND_EQ; + goto do_cmop; + } + /* CMTST : test is "if (X & Y != 0)". */ + tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); + tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0); + tcg_gen_neg_i64(tcg_rd, tcg_rd); + break; + case 0x8: /* SSHL, USHL */ + if (u) { + gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm); + } else { + gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm); + } + break; + case 0x9: /* SQSHL, UQSHL */ + if (u) { + gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } else { + gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } + break; + case 0xa: /* SRSHL, URSHL */ + if (u) { + gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); + } else { + gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); + } + break; + case 0xb: /* SQRSHL, UQRSHL */ + if (u) { + gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } else { + gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } + break; + case 0x10: /* ADD, SUB */ + if (u) { + tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); + } else { + tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); + } + break; + default: + g_assert_not_reached(); + } +} + +/* Handle the 3-same-operands float operations; shared by the scalar + * and vector encodings. The caller must filter out any encodings + * not allocated for the encoding it is dealing with. + */ +static void handle_3same_float(DisasContext *s, int size, int elements, + int fpopcode, int rd, int rn, int rm) +{ + int pass; + TCGv_ptr fpst = get_fpstatus_ptr(); + + for (pass = 0; pass < elements; pass++) { + if (size) { + /* Double */ + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + switch (fpopcode) { + case 0x39: /* FMLS */ + /* As usual for ARM, separate negation for fused multiply-add */ + gen_helper_vfp_negd(tcg_op1, tcg_op1); + /* fall through */ + case 0x19: /* FMLA */ + read_vec_element(s, tcg_res, rd, pass, MO_64); + gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, + tcg_res, fpst); + break; + case 0x18: /* FMAXNM */ + gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1a: /* FADD */ + gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1b: /* FMULX */ + gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1c: /* FCMEQ */ + gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1e: /* FMAX */ + gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1f: /* FRECPS */ + gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x38: /* FMINNM */ + gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3a: /* FSUB */ + gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3e: /* FMIN */ + gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3f: /* FRSQRTS */ + gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5b: /* FMUL */ + gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5c: /* FCMGE */ + gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5d: /* FACGE */ + gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5f: /* FDIV */ + gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7a: /* FABD */ + gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_absd(tcg_res, tcg_res); + break; + case 0x7c: /* FCMGT */ + gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7d: /* FACGT */ + gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + write_vec_element(s, tcg_res, rd, pass, MO_64); + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } else { + /* Single */ + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); + read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); + + switch (fpopcode) { + case 0x39: /* FMLS */ + /* As usual for ARM, separate negation for fused multiply-add */ + gen_helper_vfp_negs(tcg_op1, tcg_op1); + /* fall through */ + case 0x19: /* FMLA */ + read_vec_element_i32(s, tcg_res, rd, pass, MO_32); + gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, + tcg_res, fpst); + break; + case 0x1a: /* FADD */ + gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1b: /* FMULX */ + gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1c: /* FCMEQ */ + gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1e: /* FMAX */ + gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1f: /* FRECPS */ + gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x18: /* FMAXNM */ + gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x38: /* FMINNM */ + gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3a: /* FSUB */ + gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3e: /* FMIN */ + gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3f: /* FRSQRTS */ + gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5b: /* FMUL */ + gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5c: /* FCMGE */ + gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5d: /* FACGE */ + gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5f: /* FDIV */ + gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7a: /* FABD */ + gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); + gen_helper_vfp_abss(tcg_res, tcg_res); + break; + case 0x7c: /* FCMGT */ + gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7d: /* FACGT */ + gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + if (elements == 1) { + /* scalar single so clear high part */ + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); + write_vec_element(s, tcg_tmp, rd, pass, MO_64); + tcg_temp_free_i64(tcg_tmp); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + } + } + + tcg_temp_free_ptr(fpst); + + if ((elements << size) < 4) { + /* scalar, or non-quad vector op */ + clear_vec_high(s, rd); + } +} + +/* C3.6.11 AdvSIMD scalar three same + * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 + * +-----+---+-----------+------+---+------+--------+---+------+------+ + * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | + * +-----+---+-----------+------+---+------+--------+---+------+------+ + */ +static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 11, 5); + int rm = extract32(insn, 16, 5); + int size = extract32(insn, 22, 2); + bool u = extract32(insn, 29, 1); + TCGv_i64 tcg_rd; + + if (opcode >= 0x18) { + /* Floating point: U, size[1] and opcode indicate operation */ + int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); + switch (fpopcode) { + case 0x1b: /* FMULX */ + case 0x1f: /* FRECPS */ + case 0x3f: /* FRSQRTS */ + case 0x5d: /* FACGE */ + case 0x7d: /* FACGT */ + case 0x1c: /* FCMEQ */ + case 0x5c: /* FCMGE */ + case 0x7c: /* FCMGT */ + case 0x7a: /* FABD */ + break; + default: + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); + return; + } + + switch (opcode) { + case 0x1: /* SQADD, UQADD */ + case 0x5: /* SQSUB, UQSUB */ + case 0x9: /* SQSHL, UQSHL */ + case 0xb: /* SQRSHL, UQRSHL */ + break; + case 0x8: /* SSHL, USHL */ + case 0xa: /* SRSHL, URSHL */ + case 0x6: /* CMGT, CMHI */ + case 0x7: /* CMGE, CMHS */ + case 0x11: /* CMTST, CMEQ */ + case 0x10: /* ADD, SUB (vector) */ + if (size != 3) { + unallocated_encoding(s); + return; + } + break; + case 0x16: /* SQDMULH, SQRDMULH (vector) */ + if (size != 1 && size != 2) { + unallocated_encoding(s); + return; + } + break; + default: + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + tcg_rd = tcg_temp_new_i64(); + + if (size == 3) { + TCGv_i64 tcg_rn = read_fp_dreg(s, rn); + TCGv_i64 tcg_rm = read_fp_dreg(s, rm); + + handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rm); + } else { + /* Do a single operation on the lowest element in the vector. + * We use the standard Neon helpers and rely on 0 OP 0 == 0 with + * no side effects for all these operations. + * OPTME: special-purpose helpers would avoid doing some + * unnecessary work in the helper for the 8 and 16 bit cases. + */ + NeonGenTwoOpEnvFn *genenvfn; + TCGv_i32 tcg_rn = tcg_temp_new_i32(); + TCGv_i32 tcg_rm = tcg_temp_new_i32(); + TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_rn, rn, 0, size); + read_vec_element_i32(s, tcg_rm, rm, 0, size); + + switch (opcode) { + case 0x1: /* SQADD, UQADD */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, + { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, + { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x5: /* SQSUB, UQSUB */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, + { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, + { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x9: /* SQSHL, UQSHL */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, + { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, + { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0xb: /* SQRSHL, UQRSHL */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, + { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, + { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x16: /* SQDMULH, SQRDMULH */ + { + static NeonGenTwoOpEnvFn * const fns[2][2] = { + { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, + { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, + }; + assert(size == 1 || size == 2); + genenvfn = fns[size - 1][u]; + break; + } + default: + g_assert_not_reached(); + } + + genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm); + tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); + tcg_temp_free_i32(tcg_rd32); + tcg_temp_free_i32(tcg_rn); + tcg_temp_free_i32(tcg_rm); + } + + write_fp_dreg(s, rd, tcg_rd); + + tcg_temp_free_i64(tcg_rd); +} + +static void handle_2misc_64(DisasContext *s, int opcode, bool u, + TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, + TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) +{ + /* Handle 64->64 opcodes which are shared between the scalar and + * vector 2-reg-misc groups. We cover every integer opcode where size == 3 + * is valid in either group and also the double-precision fp ops. + * The caller only need provide tcg_rmode and tcg_fpstatus if the op + * requires them. + */ + TCGCond cond; + + switch (opcode) { + case 0x4: /* CLS, CLZ */ + if (u) { + gen_helper_clz64(tcg_rd, tcg_rn); + } else { + gen_helper_cls64(tcg_rd, tcg_rn); + } + break; + case 0x5: /* NOT */ + /* This opcode is shared with CNT and RBIT but we have earlier + * enforced that size == 3 if and only if this is the NOT insn. + */ + tcg_gen_not_i64(tcg_rd, tcg_rn); + break; + case 0x7: /* SQABS, SQNEG */ + if (u) { + gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); + } else { + gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); + } + break; + case 0xa: /* CMLT */ + /* 64 bit integer comparison against zero, result is + * test ? (2^64 - 1) : 0. We implement via setcond(!test) and + * subtracting 1. + */ + cond = TCG_COND_LT; + do_cmop: + tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0); + tcg_gen_neg_i64(tcg_rd, tcg_rd); + break; + case 0x8: /* CMGT, CMGE */ + cond = u ? TCG_COND_GE : TCG_COND_GT; + goto do_cmop; + case 0x9: /* CMEQ, CMLE */ + cond = u ? TCG_COND_LE : TCG_COND_EQ; + goto do_cmop; + case 0xb: /* ABS, NEG */ + if (u) { + tcg_gen_neg_i64(tcg_rd, tcg_rn); + } else { + TCGv_i64 tcg_zero = tcg_const_i64(0); + tcg_gen_neg_i64(tcg_rd, tcg_rn); + tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero, + tcg_rn, tcg_rd); + tcg_temp_free_i64(tcg_zero); + } + break; + case 0x2f: /* FABS */ + gen_helper_vfp_absd(tcg_rd, tcg_rn); + break; + case 0x6f: /* FNEG */ + gen_helper_vfp_negd(tcg_rd, tcg_rn); + break; + case 0x7f: /* FSQRT */ + gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env); + break; + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + case 0x58: /* FRINTA */ + case 0x79: /* FRINTI */ + gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); + break; + case 0x59: /* FRINTX */ + gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); + break; + default: + g_assert_not_reached(); + } +} + +static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, + bool is_scalar, bool is_u, bool is_q, + int size, int rn, int rd) +{ + bool is_double = (size == 3); + TCGv_ptr fpst; + + if (!fp_access_check(s)) { + return; + } + + fpst = get_fpstatus_ptr(); + + if (is_double) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_zero = tcg_const_i64(0); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + NeonGenTwoDoubleOPFn *genfn; + bool swap = false; + int pass; + + switch (opcode) { + case 0x2e: /* FCMLT (zero) */ + swap = true; + /* fallthrough */ + case 0x2c: /* FCMGT (zero) */ + genfn = gen_helper_neon_cgt_f64; + break; + case 0x2d: /* FCMEQ (zero) */ + genfn = gen_helper_neon_ceq_f64; + break; + case 0x6d: /* FCMLE (zero) */ + swap = true; + /* fall through */ + case 0x6c: /* FCMGE (zero) */ + genfn = gen_helper_neon_cge_f64; + break; + default: + g_assert_not_reached(); + } + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + read_vec_element(s, tcg_op, rn, pass, MO_64); + if (swap) { + genfn(tcg_res, tcg_zero, tcg_op, fpst); + } else { + genfn(tcg_res, tcg_op, tcg_zero, fpst); + } + write_vec_element(s, tcg_res, rd, pass, MO_64); + } + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_zero); + tcg_temp_free_i64(tcg_op); + } else { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_zero = tcg_const_i32(0); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + NeonGenTwoSingleOPFn *genfn; + bool swap = false; + int pass, maxpasses; + + switch (opcode) { + case 0x2e: /* FCMLT (zero) */ + swap = true; + /* fall through */ + case 0x2c: /* FCMGT (zero) */ + genfn = gen_helper_neon_cgt_f32; + break; + case 0x2d: /* FCMEQ (zero) */ + genfn = gen_helper_neon_ceq_f32; + break; + case 0x6d: /* FCMLE (zero) */ + swap = true; + /* fall through */ + case 0x6c: /* FCMGE (zero) */ + genfn = gen_helper_neon_cge_f32; + break; + default: + g_assert_not_reached(); + } + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + for (pass = 0; pass < maxpasses; pass++) { + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + if (swap) { + genfn(tcg_res, tcg_zero, tcg_op, fpst); + } else { + genfn(tcg_res, tcg_op, tcg_zero, fpst); + } + if (is_scalar) { + write_fp_sreg(s, rd, tcg_res); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + } + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_zero); + tcg_temp_free_i32(tcg_op); + if (!is_q && !is_scalar) { + clear_vec_high(s, rd); + } + } + + tcg_temp_free_ptr(fpst); +} + +static void handle_2misc_reciprocal(DisasContext *s, int opcode, + bool is_scalar, bool is_u, bool is_q, + int size, int rn, int rd) +{ + bool is_double = (size == 3); + TCGv_ptr fpst = get_fpstatus_ptr(); + + if (is_double) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + int pass; + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + read_vec_element(s, tcg_op, rn, pass, MO_64); + switch (opcode) { + case 0x3d: /* FRECPE */ + gen_helper_recpe_f64(tcg_res, tcg_op, fpst); + break; + case 0x3f: /* FRECPX */ + gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); + break; + case 0x7d: /* FRSQRTE */ + gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); + break; + default: + g_assert_not_reached(); + } + write_vec_element(s, tcg_res, rd, pass, MO_64); + } + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_op); + } else { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + int pass, maxpasses; + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + for (pass = 0; pass < maxpasses; pass++) { + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + + switch (opcode) { + case 0x3c: /* URECPE */ + gen_helper_recpe_u32(tcg_res, tcg_op, fpst); + break; + case 0x3d: /* FRECPE */ + gen_helper_recpe_f32(tcg_res, tcg_op, fpst); + break; + case 0x3f: /* FRECPX */ + gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); + break; + case 0x7d: /* FRSQRTE */ + gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); + break; + default: + g_assert_not_reached(); + } + + if (is_scalar) { + write_fp_sreg(s, rd, tcg_res); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + } + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_op); + if (!is_q && !is_scalar) { + clear_vec_high(s, rd); + } + } + tcg_temp_free_ptr(fpst); +} + +static void handle_2misc_narrow(DisasContext *s, bool scalar, + int opcode, bool u, bool is_q, + int size, int rn, int rd) +{ + /* Handle 2-reg-misc ops which are narrowing (so each 2*size element + * in the source becomes a size element in the destination). + */ + int pass; + TCGv_i32 tcg_res[2]; + int destelt = is_q ? 2 : 0; + int passes = scalar ? 1 : 2; + + if (scalar) { + tcg_res[1] = tcg_const_i32(0); + } + + for (pass = 0; pass < passes; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + NeonGenNarrowFn *genfn = NULL; + NeonGenNarrowEnvFn *genenvfn = NULL; + + if (scalar) { + read_vec_element(s, tcg_op, rn, pass, size + 1); + } else { + read_vec_element(s, tcg_op, rn, pass, MO_64); + } + tcg_res[pass] = tcg_temp_new_i32(); + + switch (opcode) { + case 0x12: /* XTN, SQXTUN */ + { + static NeonGenNarrowFn * const xtnfns[3] = { + gen_helper_neon_narrow_u8, + gen_helper_neon_narrow_u16, + tcg_gen_trunc_i64_i32, + }; + static NeonGenNarrowEnvFn * const sqxtunfns[3] = { + gen_helper_neon_unarrow_sat8, + gen_helper_neon_unarrow_sat16, + gen_helper_neon_unarrow_sat32, + }; + if (u) { + genenvfn = sqxtunfns[size]; + } else { + genfn = xtnfns[size]; + } + break; + } + case 0x14: /* SQXTN, UQXTN */ + { + static NeonGenNarrowEnvFn * const fns[3][2] = { + { gen_helper_neon_narrow_sat_s8, + gen_helper_neon_narrow_sat_u8 }, + { gen_helper_neon_narrow_sat_s16, + gen_helper_neon_narrow_sat_u16 }, + { gen_helper_neon_narrow_sat_s32, + gen_helper_neon_narrow_sat_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x16: /* FCVTN, FCVTN2 */ + /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ + if (size == 2) { + gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env); + } else { + TCGv_i32 tcg_lo = tcg_temp_new_i32(); + TCGv_i32 tcg_hi = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tcg_lo, tcg_op); + gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env); + tcg_gen_shri_i64(tcg_op, tcg_op, 32); + tcg_gen_trunc_i64_i32(tcg_hi, tcg_op); + gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env); + tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); + tcg_temp_free_i32(tcg_lo); + tcg_temp_free_i32(tcg_hi); + } + break; + case 0x56: /* FCVTXN, FCVTXN2 */ + /* 64 bit to 32 bit float conversion + * with von Neumann rounding (round to odd) + */ + assert(size == 2); + gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env); + break; + default: + g_assert_not_reached(); + } + + if (genfn) { + genfn(tcg_res[pass], tcg_op); + } else if (genenvfn) { + genenvfn(tcg_res[pass], cpu_env, tcg_op); + } + + tcg_temp_free_i64(tcg_op); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* Remaining saturating accumulating ops */ +static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, + bool is_q, int size, int rn, int rd) +{ + bool is_double = (size == 3); + + if (is_double) { + TCGv_i64 tcg_rn = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + int pass; + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + read_vec_element(s, tcg_rn, rn, pass, MO_64); + read_vec_element(s, tcg_rd, rd, pass, MO_64); + + if (is_u) { /* USQADD */ + gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + } else { /* SUQADD */ + gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + } + write_vec_element(s, tcg_rd, rd, pass, MO_64); + } + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_rn); + } else { + TCGv_i32 tcg_rn = tcg_temp_new_i32(); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + int pass, maxpasses; + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + for (pass = 0; pass < maxpasses; pass++) { + if (is_scalar) { + read_vec_element_i32(s, tcg_rn, rn, pass, size); + read_vec_element_i32(s, tcg_rd, rd, pass, size); + } else { + read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); + read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); + } + + if (is_u) { /* USQADD */ + switch (size) { + case 0: + gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 1: + gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 2: + gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + default: + g_assert_not_reached(); + } + } else { /* SUQADD */ + switch (size) { + case 0: + gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 1: + gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 2: + gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + default: + g_assert_not_reached(); + } + } + + if (is_scalar) { + TCGv_i64 tcg_zero = tcg_const_i64(0); + write_vec_element(s, tcg_zero, rd, 0, MO_64); + tcg_temp_free_i64(tcg_zero); + } + write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); + } + + if (!is_q) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i32(tcg_rd); + tcg_temp_free_i32(tcg_rn); + } +} + +/* C3.6.12 AdvSIMD scalar two reg misc + * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +-----+---+-----------+------+-----------+--------+-----+------+------+ + * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | + * +-----+---+-----------+------+-----------+--------+-----+------+------+ + */ +static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 12, 5); + int size = extract32(insn, 22, 2); + bool u = extract32(insn, 29, 1); + bool is_fcvt = false; + int rmode; + TCGv_i32 tcg_rmode; + TCGv_ptr tcg_fpstatus; + + switch (opcode) { + case 0x3: /* USQADD / SUQADD*/ + if (!fp_access_check(s)) { + return; + } + handle_2misc_satacc(s, true, u, false, size, rn, rd); + return; + case 0x7: /* SQABS / SQNEG */ + break; + case 0xa: /* CMLT */ + if (u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x8: /* CMGT, CMGE */ + case 0x9: /* CMEQ, CMLE */ + case 0xb: /* ABS, NEG */ + if (size != 3) { + unallocated_encoding(s); + return; + } + break; + case 0x12: /* SQXTUN */ + if (!u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x14: /* SQXTN, UQXTN */ + if (size == 3) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); + return; + case 0xc ... 0xf: + case 0x16 ... 0x1d: + case 0x1f: + /* Floating point: U, size[1] and opcode indicate operation; + * size[0] indicates single or double precision. + */ + opcode |= (extract32(size, 1, 1) << 5) | (u << 6); + size = extract32(size, 0, 1) ? 3 : 2; + switch (opcode) { + case 0x2c: /* FCMGT (zero) */ + case 0x2d: /* FCMEQ (zero) */ + case 0x2e: /* FCMLT (zero) */ + case 0x6c: /* FCMGE (zero) */ + case 0x6d: /* FCMLE (zero) */ + handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); + return; + case 0x1d: /* SCVTF */ + case 0x5d: /* UCVTF */ + { + bool is_signed = (opcode == 0x1d); + if (!fp_access_check(s)) { + return; + } + handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); + return; + } + case 0x3d: /* FRECPE */ + case 0x3f: /* FRECPX */ + case 0x7d: /* FRSQRTE */ + if (!fp_access_check(s)) { + return; + } + handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); + return; + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + is_fcvt = true; + rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + break; + case 0x1c: /* FCVTAS */ + case 0x5c: /* FCVTAU */ + /* TIEAWAY doesn't fit in the usual rounding mode encoding */ + is_fcvt = true; + rmode = FPROUNDING_TIEAWAY; + break; + case 0x56: /* FCVTXN, FCVTXN2 */ + if (size == 2) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); + return; + default: + unallocated_encoding(s); + return; + } + break; + default: + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (is_fcvt) { + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_fpstatus = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_I32(tcg_rmode); + TCGV_UNUSED_PTR(tcg_fpstatus); + } + + if (size == 3) { + TCGv_i64 tcg_rn = read_fp_dreg(s, rn); + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + + handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); + write_fp_dreg(s, rd, tcg_rd); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_rn); + } else { + TCGv_i32 tcg_rn = tcg_temp_new_i32(); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_rn, rn, 0, size); + + switch (opcode) { + case 0x7: /* SQABS, SQNEG */ + { + NeonGenOneOpEnvFn *genfn; + static NeonGenOneOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, + { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, + }; + genfn = fns[size][u]; + genfn(tcg_rd, cpu_env, tcg_rn); + break; + } + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + default: + g_assert_not_reached(); + } + + write_fp_sreg(s, rd, tcg_rd); + tcg_temp_free_i32(tcg_rd); + tcg_temp_free_i32(tcg_rn); + } + + if (is_fcvt) { + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + tcg_temp_free_ptr(tcg_fpstatus); + } +} + +/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ +static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, + int immh, int immb, int opcode, int rn, int rd) +{ + int size = 32 - clz32(immh) - 1; + int immhb = immh << 3 | immb; + int shift = 2 * (8 << size) - immhb; + bool accumulate = false; + bool round = false; + bool insert = false; + int dsize = is_q ? 128 : 64; + int esize = 8 << size; + int elements = dsize/esize; + TCGMemOp memop = size | (is_u ? 0 : MO_SIGN); + TCGv_i64 tcg_rn = new_tmp_a64(s); + TCGv_i64 tcg_rd = new_tmp_a64(s); + TCGv_i64 tcg_round; + int i; + + if (extract32(immh, 3, 1) && !is_q) { + unallocated_encoding(s); + return; + } + + if (size > 3 && !is_q) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + switch (opcode) { + case 0x02: /* SSRA / USRA (accumulate) */ + accumulate = true; + break; + case 0x04: /* SRSHR / URSHR (rounding) */ + round = true; + break; + case 0x06: /* SRSRA / URSRA (accum + rounding) */ + accumulate = round = true; + break; + case 0x08: /* SRI */ + insert = true; + break; + } + + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } + + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, memop); + if (accumulate || insert) { + read_vec_element(s, tcg_rd, rd, i, memop); + } + + if (insert) { + handle_shri_with_ins(tcg_rd, tcg_rn, size, shift); + } else { + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + accumulate, is_u, size, shift); + } + + write_vec_element(s, tcg_rd, rd, i, size); + } + + if (!is_q) { + clear_vec_high(s, rd); + } + + if (round) { + tcg_temp_free_i64(tcg_round); + } +} + +/* SHL/SLI - Vector shift left */ +static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, + int immh, int immb, int opcode, int rn, int rd) +{ + int size = 32 - clz32(immh) - 1; + int immhb = immh << 3 | immb; + int shift = immhb - (8 << size); + int dsize = is_q ? 128 : 64; + int esize = 8 << size; + int elements = dsize/esize; + TCGv_i64 tcg_rn = new_tmp_a64(s); + TCGv_i64 tcg_rd = new_tmp_a64(s); + int i; + + if (extract32(immh, 3, 1) && !is_q) { + unallocated_encoding(s); + return; + } + + if (size > 3 && !is_q) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, size); + if (insert) { + read_vec_element(s, tcg_rd, rd, i, size); + } + + handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift); + + write_vec_element(s, tcg_rd, rd, i, size); + } + + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* USHLL/SHLL - Vector shift left with widening */ +static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, + int immh, int immb, int opcode, int rn, int rd) +{ + int size = 32 - clz32(immh) - 1; + int immhb = immh << 3 | immb; + int shift = immhb - (8 << size); + int dsize = 64; + int esize = 8 << size; + int elements = dsize/esize; + TCGv_i64 tcg_rn = new_tmp_a64(s); + TCGv_i64 tcg_rd = new_tmp_a64(s); + int i; + + if (size >= 3) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + /* For the LL variants the store is larger than the load, + * so if rd == rn we would overwrite parts of our input. + * So load everything right now and use shifts in the main loop. + */ + read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); + + for (i = 0; i < elements; i++) { + tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); + ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); + tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); + write_vec_element(s, tcg_rd, rd, i, size + 1); + } +} + +/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ +static void handle_vec_simd_shrn(DisasContext *s, bool is_q, + int immh, int immb, int opcode, int rn, int rd) +{ + int immhb = immh << 3 | immb; + int size = 32 - clz32(immh) - 1; + int dsize = 64; + int esize = 8 << size; + int elements = dsize/esize; + int shift = (2 * esize) - immhb; + bool round = extract32(opcode, 0, 1); + TCGv_i64 tcg_rn, tcg_rd, tcg_final; + TCGv_i64 tcg_round; + int i; + + if (extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + tcg_rn = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + tcg_final = tcg_temp_new_i64(); + read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); + + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } + + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, size+1); + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + false, true, size+1, shift); + + tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + } + + if (!is_q) { + clear_vec_high(s, rd); + write_vec_element(s, tcg_final, rd, 0, MO_64); + } else { + write_vec_element(s, tcg_final, rd, 1, MO_64); + } + + if (round) { + tcg_temp_free_i64(tcg_round); + } + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_final); + return; +} + + +/* C3.6.14 AdvSIMD shift by immediate + * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 + * +---+---+---+-------------+------+------+--------+---+------+------+ + * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | + * +---+---+---+-------------+------+------+--------+---+------+------+ + */ +static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 11, 5); + int immb = extract32(insn, 16, 3); + int immh = extract32(insn, 19, 4); + bool is_u = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + + switch (opcode) { + case 0x08: /* SRI */ + if (!is_u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x00: /* SSHR / USHR */ + case 0x02: /* SSRA / USRA (accumulate) */ + case 0x04: /* SRSHR / URSHR (rounding) */ + case 0x06: /* SRSRA / URSRA (accum + rounding) */ + handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); + break; + case 0x0a: /* SHL / SLI */ + handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); + break; + case 0x10: /* SHRN */ + case 0x11: /* RSHRN / SQRSHRUN */ + if (is_u) { + handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, + opcode, rn, rd); + } else { + handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); + } + break; + case 0x12: /* SQSHRN / UQSHRN */ + case 0x13: /* SQRSHRN / UQRSHRN */ + handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, + opcode, rn, rd); + break; + case 0x14: /* SSHLL / USHLL */ + handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); + break; + case 0x1c: /* SCVTF / UCVTF */ + handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, + opcode, rn, rd); + break; + case 0xc: /* SQSHLU */ + if (!is_u) { + unallocated_encoding(s); + return; + } + handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); + break; + case 0xe: /* SQSHL, UQSHL */ + handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); + break; + case 0x1f: /* FCVTZS/ FCVTZU */ + handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); + return; + default: + unallocated_encoding(s); + return; + } +} + +/* Generate code to do a "long" addition or subtraction, ie one done in + * TCGv_i64 on vector lanes twice the width specified by size. + */ +static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, + TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) +{ + static NeonGenTwo64OpFn * const fns[3][2] = { + { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, + { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, + { tcg_gen_add_i64, tcg_gen_sub_i64 }, + }; + NeonGenTwo64OpFn *genfn; + assert(size < 3); + + genfn = fns[size][is_sub]; + genfn(tcg_res, tcg_op1, tcg_op2); +} + +static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, + int opcode, int rd, int rn, int rm) +{ + /* 3-reg-different widening insns: 64 x 64 -> 128 */ + TCGv_i64 tcg_res[2]; + int pass, accop; + + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + /* Does this op do an adding accumulate, a subtracting accumulate, + * or no accumulate at all? + */ + switch (opcode) { + case 5: + case 8: + case 9: + accop = 1; + break; + case 10: + case 11: + accop = -1; + break; + default: + accop = 0; + break; + } + + if (accop != 0) { + read_vec_element(s, tcg_res[0], rd, 0, MO_64); + read_vec_element(s, tcg_res[1], rd, 1, MO_64); + } + + /* size == 2 means two 32x32->64 operations; this is worth special + * casing because we can generally handle it inline. + */ + if (size == 2) { + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_passres; + TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); + + int elt = pass + is_q * 2; + + read_vec_element(s, tcg_op1, rn, elt, memop); + read_vec_element(s, tcg_op2, rm, elt, memop); + + if (accop == 0) { + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + switch (opcode) { + case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ + tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); + break; + case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ + tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); + break; + case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ + case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ + { + TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); + TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); + + tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); + tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); + tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, + tcg_passres, + tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); + tcg_temp_free_i64(tcg_tmp1); + tcg_temp_free_i64(tcg_tmp2); + break; + } + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ + tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); + break; + case 9: /* SQDMLAL, SQDMLAL2 */ + case 11: /* SQDMLSL, SQDMLSL2 */ + case 13: /* SQDMULL, SQDMULL2 */ + tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + break; + default: + g_assert_not_reached(); + } + + if (opcode == 9 || opcode == 11) { + /* saturating accumulate ops */ + if (accop < 0) { + tcg_gen_neg_i64(tcg_passres, tcg_passres); + } + gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, + tcg_res[pass], tcg_passres); + } else if (accop > 0) { + tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + } else if (accop < 0) { + tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + } + + if (accop != 0) { + tcg_temp_free_i64(tcg_passres); + } + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } + } else { + /* size 0 or 1, generally helper functions */ + for (pass = 0; pass < 2; pass++) { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i64 tcg_passres; + int elt = pass + is_q * 2; + + read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); + read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); + + if (accop == 0) { + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + switch (opcode) { + case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ + case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ + { + TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); + static NeonGenWidenFn * const widenfns[2][2] = { + { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, + { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, + }; + NeonGenWidenFn *widenfn = widenfns[size][is_u]; + + widenfn(tcg_op2_64, tcg_op2); + widenfn(tcg_passres, tcg_op1); + gen_neon_addl(size, (opcode == 2), tcg_passres, + tcg_passres, tcg_op2_64); + tcg_temp_free_i64(tcg_op2_64); + break; + } + case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ + case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ + if (size == 0) { + if (is_u) { + gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); + } else { + gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); + } + } else { + if (is_u) { + gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); + } else { + gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); + } + } + break; + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ + if (size == 0) { + if (is_u) { + gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); + } else { + gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); + } + } else { + if (is_u) { + gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); + } else { + gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); + } + } + break; + case 9: /* SQDMLAL, SQDMLAL2 */ + case 11: /* SQDMLSL, SQDMLSL2 */ + case 13: /* SQDMULL, SQDMULL2 */ + assert(size == 1); + gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + break; + case 14: /* PMULL */ + assert(size == 0); + gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + + if (accop != 0) { + if (opcode == 9 || opcode == 11) { + /* saturating accumulate ops */ + if (accop < 0) { + gen_helper_neon_negl_u32(tcg_passres, tcg_passres); + } + gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, + tcg_res[pass], + tcg_passres); + } else { + gen_neon_addl(size, (accop < 0), tcg_res[pass], + tcg_res[pass], tcg_passres); + } + tcg_temp_free_i64(tcg_passres); + } + } + } + + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + tcg_temp_free_i64(tcg_res[0]); + tcg_temp_free_i64(tcg_res[1]); +} + +static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, + int opcode, int rd, int rn, int rm) +{ + TCGv_i64 tcg_res[2]; + int part = is_q ? 2 : 0; + int pass; + + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); + static NeonGenWidenFn * const widenfns[3][2] = { + { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, + { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, + { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, + }; + NeonGenWidenFn *widenfn = widenfns[size][is_u]; + + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); + widenfn(tcg_op2_wide, tcg_op2); + tcg_temp_free_i32(tcg_op2); + tcg_res[pass] = tcg_temp_new_i64(); + gen_neon_addl(size, (opcode == 3), + tcg_res[pass], tcg_op1, tcg_op2_wide); + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2_wide); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } +} + +static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in) +{ + tcg_gen_shri_i64(in, in, 32); + tcg_gen_trunc_i64_i32(res, in); +} + +static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) +{ + tcg_gen_addi_i64(in, in, 1U << 31); + do_narrow_high_u32(res, in); +} + +static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, + int opcode, int rd, int rn, int rm) +{ + TCGv_i32 tcg_res[2]; + int part = is_q ? 2 : 0; + int pass; + + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_wideres = tcg_temp_new_i64(); + static NeonGenNarrowFn * const narrowfns[3][2] = { + { gen_helper_neon_narrow_high_u8, + gen_helper_neon_narrow_round_high_u8 }, + { gen_helper_neon_narrow_high_u16, + gen_helper_neon_narrow_round_high_u16 }, + { do_narrow_high_u32, do_narrow_round_high_u32 }, + }; + NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; + + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + + tcg_res[pass] = tcg_temp_new_i32(); + gennarrow(tcg_res[pass], tcg_wideres); + tcg_temp_free_i64(tcg_wideres); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } + if (!is_q) { + clear_vec_high(s, rd); + } +} + +static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm) +{ + /* PMULL of 64 x 64 -> 128 is an odd special case because it + * is the only three-reg-diff instruction which produces a + * 128-bit wide result from a single operation. However since + * it's possible to calculate the two halves more or less + * separately we just use two helper calls. + */ + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, is_q, MO_64); + read_vec_element(s, tcg_op2, rm, is_q, MO_64); + gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2); + write_vec_element(s, tcg_res, rd, 0, MO_64); + gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2); + write_vec_element(s, tcg_res, rd, 1, MO_64); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res); +} + +/* C3.6.15 AdvSIMD three different + * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+---+------+--------+-----+------+------+ + * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | + * +---+---+---+-----------+------+---+------+--------+-----+------+------+ + */ +static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) +{ + /* Instructions in this group fall into three basic classes + * (in each case with the operation working on each element in + * the input vectors): + * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra + * 128 bit input) + * (2) wide 64 x 128 -> 128 + * (3) narrowing 128 x 128 -> 64 + * Here we do initial decode, catch unallocated cases and + * dispatch to separate functions for each class. + */ + int is_q = extract32(insn, 30, 1); + int is_u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 4); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + switch (opcode) { + case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ + case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ + /* 64 x 128 -> 128 */ + if (size == 3) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); + break; + case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ + case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ + /* 128 x 128 -> 64 */ + if (size == 3) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); + break; + case 14: /* PMULL, PMULL2 */ + if (is_u || size == 1 || size == 2) { + unallocated_encoding(s); + return; + } + if (size == 3) { + if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_pmull_64(s, is_q, rd, rn, rm); + return; + } + goto is_widening; + case 9: /* SQDMLAL, SQDMLAL2 */ + case 11: /* SQDMLSL, SQDMLSL2 */ + case 13: /* SQDMULL, SQDMULL2 */ + if (is_u || size == 0) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ + case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ + case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ + case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ + case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ + /* 64 x 64 -> 128 */ + if (size == 3) { + unallocated_encoding(s); + return; + } + is_widening: + if (!fp_access_check(s)) { + return; + } + + handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); + break; + default: + /* opcode 15 not allocated */ + unallocated_encoding(s); + break; + } +} + +/* Logic op (opcode == 3) subgroup of C3.6.16. */ +static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int rm = extract32(insn, 16, 5); + int size = extract32(insn, 22, 2); + bool is_u = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + TCGv_i64 tcg_op1, tcg_op2, tcg_res[2]; + int pass; + + if (!fp_access_check(s)) { + return; + } + + tcg_op1 = tcg_temp_new_i64(); + tcg_op2 = tcg_temp_new_i64(); + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = tcg_temp_new_i64(); + + for (pass = 0; pass < (is_q ? 2 : 1); pass++) { + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + if (!is_u) { + switch (size) { + case 0: /* AND */ + tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 1: /* BIC */ + tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 2: /* ORR */ + tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 3: /* ORN */ + tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + } + } else { + if (size != 0) { + /* B* ops need res loaded to operate on */ + read_vec_element(s, tcg_res[pass], rd, pass, MO_64); + } + + switch (size) { + case 0: /* EOR */ + tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 1: /* BSL bitwise select */ + tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2); + tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]); + tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1); + break; + case 2: /* BIT, bitwise insert if true */ + tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]); + tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2); + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + break; + case 3: /* BIF, bitwise insert if false */ + tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]); + tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2); + tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + break; + } + } + } + + write_vec_element(s, tcg_res[0], rd, 0, MO_64); + if (!is_q) { + tcg_gen_movi_i64(tcg_res[1], 0); + } + write_vec_element(s, tcg_res[1], rd, 1, MO_64); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res[0]); + tcg_temp_free_i64(tcg_res[1]); +} + +/* Helper functions for 32 bit comparisons */ +static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) +{ + tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2); +} + +static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) +{ + tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2); +} + +static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) +{ + tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2); +} + +static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) +{ + tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2); +} + +/* Pairwise op subgroup of C3.6.16. + * + * This is called directly or via the handle_3same_float for float pairwise + * operations where the opcode and size are calculated differently. + */ +static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, + int size, int rn, int rm, int rd) +{ + TCGv_ptr fpst; + int pass; + + /* Floating point operations need fpst */ + if (opcode >= 0x58) { + fpst = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_PTR(fpst); + } + + if (!fp_access_check(s)) { + return; + } + + /* These operations work on the concatenated rm:rn, with each pair of + * adjacent elements being operated on to produce an element in the result. + */ + if (size == 3) { + TCGv_i64 tcg_res[2]; + + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + int passreg = (pass == 0) ? rn : rm; + + read_vec_element(s, tcg_op1, passreg, 0, MO_64); + read_vec_element(s, tcg_op2, passreg, 1, MO_64); + tcg_res[pass] = tcg_temp_new_i64(); + + switch (opcode) { + case 0x17: /* ADDP */ + tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); + break; + case 0x58: /* FMAXNMP */ + gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x5a: /* FADDP */ + gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x5e: /* FMAXP */ + gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x78: /* FMINNMP */ + gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x7e: /* FMINP */ + gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } + } else { + int maxpass = is_q ? 4 : 2; + TCGv_i32 tcg_res[4]; + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + NeonGenTwoOpFn *genfn = NULL; + int passreg = pass < (maxpass / 2) ? rn : rm; + int passelt = (is_q && (pass & 1)) ? 2 : 0; + + read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); + read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); + tcg_res[pass] = tcg_temp_new_i32(); + + switch (opcode) { + case 0x17: /* ADDP */ + { + static NeonGenTwoOpFn * const fns[3] = { + gen_helper_neon_padd_u8, + gen_helper_neon_padd_u16, + tcg_gen_add_i32, + }; + genfn = fns[size]; + break; + } + case 0x14: /* SMAXP, UMAXP */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, + { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, + { gen_max_s32, gen_max_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x15: /* SMINP, UMINP */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, + { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, + { gen_min_s32, gen_min_u32 }, + }; + genfn = fns[size][u]; + break; + } + /* The FP operations are all on single floats (32 bit) */ + case 0x58: /* FMAXNMP */ + gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x5a: /* FADDP */ + gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x5e: /* FMAXP */ + gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x78: /* FMINNMP */ + gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + case 0x7e: /* FMINP */ + gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); + break; + default: + g_assert_not_reached(); + } + + /* FP ops called directly, otherwise call now */ + if (genfn) { + genfn(tcg_res[pass], tcg_op1, tcg_op2); + } + + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + } + + for (pass = 0; pass < maxpass; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } + if (!is_q) { + clear_vec_high(s, rd); + } + } + + if (!TCGV_IS_UNUSED_PTR(fpst)) { + tcg_temp_free_ptr(fpst); + } +} + +/* Floating point op subgroup of C3.6.16. */ +static void disas_simd_3same_float(DisasContext *s, uint32_t insn) +{ + /* For floating point ops, the U, size[1] and opcode bits + * together indicate the operation. size[0] indicates single + * or double. + */ + int fpopcode = extract32(insn, 11, 5) + | (extract32(insn, 23, 1) << 5) + | (extract32(insn, 29, 1) << 6); + int is_q = extract32(insn, 30, 1); + int size = extract32(insn, 22, 1); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + int datasize = is_q ? 128 : 64; + int esize = 32 << size; + int elements = datasize / esize; + + if (size == 1 && !is_q) { + unallocated_encoding(s); + return; + } + + switch (fpopcode) { + case 0x58: /* FMAXNMP */ + case 0x5a: /* FADDP */ + case 0x5e: /* FMAXP */ + case 0x78: /* FMINNMP */ + case 0x7e: /* FMINP */ + if (size && !is_q) { + unallocated_encoding(s); + return; + } + handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, + rn, rm, rd); + return; + case 0x1b: /* FMULX */ + case 0x1f: /* FRECPS */ + case 0x3f: /* FRSQRTS */ + case 0x5d: /* FACGE */ + case 0x7d: /* FACGT */ + case 0x19: /* FMLA */ + case 0x39: /* FMLS */ + case 0x18: /* FMAXNM */ + case 0x1a: /* FADD */ + case 0x1c: /* FCMEQ */ + case 0x1e: /* FMAX */ + case 0x38: /* FMINNM */ + case 0x3a: /* FSUB */ + case 0x3e: /* FMIN */ + case 0x5b: /* FMUL */ + case 0x5c: /* FCMGE */ + case 0x5f: /* FDIV */ + case 0x7a: /* FABD */ + case 0x7c: /* FCMGT */ + if (!fp_access_check(s)) { + return; + } + + handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); + return; + default: + unallocated_encoding(s); + return; + } +} + +/* Integer op subgroup of C3.6.16. */ +static void disas_simd_3same_int(DisasContext *s, uint32_t insn) +{ + int is_q = extract32(insn, 30, 1); + int u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 11, 5); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int pass; + + switch (opcode) { + case 0x13: /* MUL, PMUL */ + if (u && size != 0) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x0: /* SHADD, UHADD */ + case 0x2: /* SRHADD, URHADD */ + case 0x4: /* SHSUB, UHSUB */ + case 0xc: /* SMAX, UMAX */ + case 0xd: /* SMIN, UMIN */ + case 0xe: /* SABD, UABD */ + case 0xf: /* SABA, UABA */ + case 0x12: /* MLA, MLS */ + if (size == 3) { + unallocated_encoding(s); + return; + } + break; + case 0x16: /* SQDMULH, SQRDMULH */ + if (size == 0 || size == 3) { + unallocated_encoding(s); + return; + } + break; + default: + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + } + + if (!fp_access_check(s)) { + return; + } + + if (size == 3) { + assert(is_q); + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, pass, MO_64); + read_vec_element(s, tcg_op2, rm, pass, MO_64); + + handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); + + write_vec_element(s, tcg_res, rd, pass, MO_64); + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } + } else { + for (pass = 0; pass < (is_q ? 4 : 2); pass++) { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + NeonGenTwoOpFn *genfn = NULL; + NeonGenTwoOpEnvFn *genenvfn = NULL; + + read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); + read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); + + switch (opcode) { + case 0x0: /* SHADD, UHADD */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, + { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, + { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x1: /* SQADD, UQADD */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, + { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, + { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x2: /* SRHADD, URHADD */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, + { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, + { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x4: /* SHSUB, UHSUB */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, + { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, + { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x5: /* SQSUB, UQSUB */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, + { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, + { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x6: /* CMGT, CMHI */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 }, + { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 }, + { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x7: /* CMGE, CMHS */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 }, + { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 }, + { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x8: /* SSHL, USHL */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 }, + { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 }, + { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x9: /* SQSHL, UQSHL */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, + { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, + { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0xa: /* SRSHL, URSHL */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, + { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, + { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0xb: /* SQRSHL, UQRSHL */ + { + static NeonGenTwoOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, + { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, + { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0xc: /* SMAX, UMAX */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_max_s8, gen_helper_neon_max_u8 }, + { gen_helper_neon_max_s16, gen_helper_neon_max_u16 }, + { gen_max_s32, gen_max_u32 }, + }; + genfn = fns[size][u]; + break; + } + + case 0xd: /* SMIN, UMIN */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_min_s8, gen_helper_neon_min_u8 }, + { gen_helper_neon_min_s16, gen_helper_neon_min_u16 }, + { gen_min_s32, gen_min_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0xe: /* SABD, UABD */ + case 0xf: /* SABA, UABA */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 }, + { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 }, + { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x10: /* ADD, SUB */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 }, + { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, + { tcg_gen_add_i32, tcg_gen_sub_i32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x11: /* CMTST, CMEQ */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 }, + { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 }, + { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 }, + }; + genfn = fns[size][u]; + break; + } + case 0x13: /* MUL, PMUL */ + if (u) { + /* PMUL */ + assert(size == 0); + genfn = gen_helper_neon_mul_p8; + break; + } + /* fall through : MUL */ + case 0x12: /* MLA, MLS */ + { + static NeonGenTwoOpFn * const fns[3] = { + gen_helper_neon_mul_u8, + gen_helper_neon_mul_u16, + tcg_gen_mul_i32, + }; + genfn = fns[size]; + break; + } + case 0x16: /* SQDMULH, SQRDMULH */ + { + static NeonGenTwoOpEnvFn * const fns[2][2] = { + { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, + { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, + }; + assert(size == 1 || size == 2); + genenvfn = fns[size - 1][u]; + break; + } + default: + g_assert_not_reached(); + } + + if (genenvfn) { + genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2); + } else { + genfn(tcg_res, tcg_op1, tcg_op2); + } + + if (opcode == 0xf || opcode == 0x12) { + /* SABA, UABA, MLA, MLS: accumulating ops */ + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 }, + { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, + { tcg_gen_add_i32, tcg_gen_sub_i32 }, + }; + bool is_sub = (opcode == 0x12 && u); /* MLS */ + + genfn = fns[size][is_sub]; + read_vec_element_i32(s, tcg_op1, rd, pass, MO_32); + genfn(tcg_res, tcg_op1, tcg_res); + } + + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + } + } + + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* C3.6.16 AdvSIMD three same + * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 + * +---+---+---+-----------+------+---+------+--------+---+------+------+ + * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | + * +---+---+---+-----------+------+---+------+--------+---+------+------+ + */ +static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) +{ + int opcode = extract32(insn, 11, 5); + + switch (opcode) { + case 0x3: /* logic ops */ + disas_simd_3same_logic(s, insn); + break; + case 0x17: /* ADDP */ + case 0x14: /* SMAXP, UMAXP */ + case 0x15: /* SMINP, UMINP */ + { + /* Pairwise operations */ + int is_q = extract32(insn, 30, 1); + int u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + if (opcode == 0x17) { + if (u || (size == 3 && !is_q)) { + unallocated_encoding(s); + return; + } + } else { + if (size == 3) { + unallocated_encoding(s); + return; + } + } + handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); + break; + } + case 0x18 ... 0x31: + /* floating point ops, sz[1] and U are part of opcode */ + disas_simd_3same_float(s, insn); + break; + default: + disas_simd_3same_int(s, insn); + break; + } +} + +static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, + int size, int rn, int rd) +{ + /* Handle 2-reg-misc ops which are widening (so each size element + * in the source becomes a 2*size element in the destination. + * The only instruction like this is FCVTL. + */ + int pass; + + if (size == 3) { + /* 32 -> 64 bit fp conversion */ + TCGv_i64 tcg_res[2]; + int srcelt = is_q ? 2 : 0; + + for (pass = 0; pass < 2; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + tcg_res[pass] = tcg_temp_new_i64(); + + read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); + gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env); + tcg_temp_free_i32(tcg_op); + } + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } + } else { + /* 16 -> 32 bit fp conversion */ + int srcelt = is_q ? 4 : 0; + TCGv_i32 tcg_res[4]; + + for (pass = 0; pass < 4; pass++) { + tcg_res[pass] = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], + cpu_env); + } + for (pass = 0; pass < 4; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } + } +} + +static void handle_rev(DisasContext *s, int opcode, bool u, + bool is_q, int size, int rn, int rd) +{ + int op = (opcode << 1) | u; + int opsz = op + size; + int grp_size = 3 - opsz; + int dsize = is_q ? 128 : 64; + int i; + + if (opsz >= 3) { + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (size == 0) { + /* Special case bytes, use bswap op on each group of elements */ + int groups = dsize / (8 << grp_size); + + for (i = 0; i < groups; i++) { + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + + read_vec_element(s, tcg_tmp, rn, i, grp_size); + switch (grp_size) { + case MO_16: + tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); + break; + case MO_32: + tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp); + break; + case MO_64: + tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); + break; + default: + g_assert_not_reached(); + } + write_vec_element(s, tcg_tmp, rd, i, grp_size); + tcg_temp_free_i64(tcg_tmp); + } + if (!is_q) { + clear_vec_high(s, rd); + } + } else { + int revmask = (1 << grp_size) - 1; + int esize = 8 << size; + int elements = dsize / esize; + TCGv_i64 tcg_rn = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = tcg_const_i64(0); + TCGv_i64 tcg_rd_hi = tcg_const_i64(0); + + for (i = 0; i < elements; i++) { + int e_rev = (i & 0xf) ^ revmask; + int off = e_rev * esize; + read_vec_element(s, tcg_rn, rn, i, size); + if (off >= 64) { + tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi, + tcg_rn, off - 64, esize); + } else { + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize); + } + } + write_vec_element(s, tcg_rd, rd, 0, MO_64); + write_vec_element(s, tcg_rd_hi, rd, 1, MO_64); + + tcg_temp_free_i64(tcg_rd_hi); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_rn); + } +} + +static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, + bool is_q, int size, int rn, int rd) +{ + /* Implement the pairwise operations from 2-misc: + * SADDLP, UADDLP, SADALP, UADALP. + * These all add pairs of elements in the input to produce a + * double-width result element in the output (possibly accumulating). + */ + bool accum = (opcode == 0x6); + int maxpass = is_q ? 2 : 1; + int pass; + TCGv_i64 tcg_res[2]; + + if (size == 2) { + /* 32 + 32 -> 64 op */ + TCGMemOp memop = size + (u ? 0 : MO_SIGN); + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + + tcg_res[pass] = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, pass * 2, memop); + read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); + tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); + if (accum) { + read_vec_element(s, tcg_op1, rd, pass, MO_64); + tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + } + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } + } else { + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + NeonGenOneOpFn *genfn; + static NeonGenOneOpFn * const fns[2][2] = { + { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, + { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, + }; + + genfn = fns[size][u]; + + tcg_res[pass] = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + genfn(tcg_res[pass], tcg_op); + + if (accum) { + read_vec_element(s, tcg_op, rd, pass, MO_64); + if (size == 0) { + gen_helper_neon_addl_u16(tcg_res[pass], + tcg_res[pass], tcg_op); + } else { + gen_helper_neon_addl_u32(tcg_res[pass], + tcg_res[pass], tcg_op); + } + } + tcg_temp_free_i64(tcg_op); + } + } + if (!is_q) { + tcg_res[1] = tcg_const_i64(0); + } + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } +} + +static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) +{ + /* Implement SHLL and SHLL2 */ + int pass; + int part = is_q ? 2 : 0; + TCGv_i64 tcg_res[2]; + + for (pass = 0; pass < 2; pass++) { + static NeonGenWidenFn * const widenfns[3] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + }; + NeonGenWidenFn *widenfn = widenfns[size]; + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); + tcg_res[pass] = tcg_temp_new_i64(); + widenfn(tcg_res[pass], tcg_op); + tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); + + tcg_temp_free_i32(tcg_op); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } +} + +/* C3.6.17 AdvSIMD two reg misc + * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+-----------+--------+-----+------+------+ + * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | + * +---+---+---+-----------+------+-----------+--------+-----+------+------+ + */ +static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) +{ + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + bool u = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + bool need_fpstatus = false; + bool need_rmode = false; + int rmode = -1; + TCGv_i32 tcg_rmode; + TCGv_ptr tcg_fpstatus; + + switch (opcode) { + case 0x0: /* REV64, REV32 */ + case 0x1: /* REV16 */ + handle_rev(s, opcode, u, is_q, size, rn, rd); + return; + case 0x5: /* CNT, NOT, RBIT */ + if (u && size == 0) { + /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */ + size = 3; + break; + } else if (u && size == 1) { + /* RBIT */ + break; + } else if (!u && size == 0) { + /* CNT */ + break; + } + unallocated_encoding(s); + return; + case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ + case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ + if (size == 3) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + + handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); + return; + case 0x4: /* CLS, CLZ */ + if (size == 3) { + unallocated_encoding(s); + return; + } + break; + case 0x2: /* SADDLP, UADDLP */ + case 0x6: /* SADALP, UADALP */ + if (size == 3) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); + return; + case 0x13: /* SHLL, SHLL2 */ + if (u == 0 || size == 3) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_shll(s, is_q, size, rn, rd); + return; + case 0xa: /* CMLT */ + if (u == 1) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x8: /* CMGT, CMGE */ + case 0x9: /* CMEQ, CMLE */ + case 0xb: /* ABS, NEG */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x3: /* SUQADD, USQADD */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_2misc_satacc(s, false, u, is_q, size, rn, rd); + return; + case 0x7: /* SQABS, SQNEG */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0xc ... 0xf: + case 0x16 ... 0x1d: + case 0x1f: + { + /* Floating point: U, size[1] and opcode indicate operation; + * size[0] indicates single or double precision. + */ + int is_double = extract32(size, 0, 1); + opcode |= (extract32(size, 1, 1) << 5) | (u << 6); + size = is_double ? 3 : 2; + switch (opcode) { + case 0x2f: /* FABS */ + case 0x6f: /* FNEG */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x1d: /* SCVTF */ + case 0x5d: /* UCVTF */ + { + bool is_signed = (opcode == 0x1d) ? true : false; + int elements = is_double ? 2 : is_q ? 4 : 2; + if (is_double && !is_q) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); + return; + } + case 0x2c: /* FCMGT (zero) */ + case 0x2d: /* FCMEQ (zero) */ + case 0x2e: /* FCMLT (zero) */ + case 0x6c: /* FCMGE (zero) */ + case 0x6d: /* FCMLE (zero) */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); + return; + case 0x7f: /* FSQRT */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + need_fpstatus = true; + need_rmode = true; + rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x5c: /* FCVTAU */ + case 0x1c: /* FCVTAS */ + need_fpstatus = true; + need_rmode = true; + rmode = FPROUNDING_TIEAWAY; + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x3c: /* URECPE */ + if (size == 3) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x3d: /* FRECPE */ + case 0x7d: /* FRSQRTE */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); + return; + case 0x56: /* FCVTXN, FCVTXN2 */ + if (size == 2) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x16: /* FCVTN, FCVTN2 */ + /* handle_2misc_narrow does a 2*size -> size operation, but these + * instructions encode the source size rather than dest size. + */ + if (!fp_access_check(s)) { + return; + } + handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); + return; + case 0x17: /* FCVTL, FCVTL2 */ + if (!fp_access_check(s)) { + return; + } + handle_2misc_widening(s, opcode, is_q, size, rn, rd); + return; + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + need_rmode = true; + rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + /* fall through */ + case 0x59: /* FRINTX */ + case 0x79: /* FRINTI */ + need_fpstatus = true; + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x58: /* FRINTA */ + need_rmode = true; + rmode = FPROUNDING_TIEAWAY; + need_fpstatus = true; + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x7c: /* URSQRTE */ + if (size == 3) { + unallocated_encoding(s); + return; + } + need_fpstatus = true; + break; + default: + unallocated_encoding(s); + return; + } + break; + } + default: + unallocated_encoding(s); + return; + } + + if (!fp_access_check(s)) { + return; + } + + if (need_fpstatus) { + tcg_fpstatus = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_PTR(tcg_fpstatus); + } + if (need_rmode) { + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + } else { + TCGV_UNUSED_I32(tcg_rmode); + } + + if (size == 3) { + /* All 64-bit element operations can be shared with scalar 2misc */ + int pass; + + for (pass = 0; pass < (is_q ? 2 : 1); pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + + handle_2misc_64(s, opcode, u, tcg_res, tcg_op, + tcg_rmode, tcg_fpstatus); + + write_vec_element(s, tcg_res, rd, pass, MO_64); + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_op); + } + } else { + int pass; + + for (pass = 0; pass < (is_q ? 4 : 2); pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + TCGCond cond; + + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + + if (size == 2) { + /* Special cases for 32 bit elements */ + switch (opcode) { + case 0xa: /* CMLT */ + /* 32 bit integer comparison against zero, result is + * test ? (2^32 - 1) : 0. We implement via setcond(test) + * and inverting. + */ + cond = TCG_COND_LT; + do_cmop: + tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0); + tcg_gen_neg_i32(tcg_res, tcg_res); + break; + case 0x8: /* CMGT, CMGE */ + cond = u ? TCG_COND_GE : TCG_COND_GT; + goto do_cmop; + case 0x9: /* CMEQ, CMLE */ + cond = u ? TCG_COND_LE : TCG_COND_EQ; + goto do_cmop; + case 0x4: /* CLS */ + if (u) { + gen_helper_clz32(tcg_res, tcg_op); + } else { + gen_helper_cls32(tcg_res, tcg_op); + } + break; + case 0x7: /* SQABS, SQNEG */ + if (u) { + gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); + } else { + gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); + } + break; + case 0xb: /* ABS, NEG */ + if (u) { + tcg_gen_neg_i32(tcg_res, tcg_op); + } else { + TCGv_i32 tcg_zero = tcg_const_i32(0); + tcg_gen_neg_i32(tcg_res, tcg_op); + tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op, + tcg_zero, tcg_op, tcg_res); + tcg_temp_free_i32(tcg_zero); + } + break; + case 0x2f: /* FABS */ + gen_helper_vfp_abss(tcg_res, tcg_op); + break; + case 0x6f: /* FNEG */ + gen_helper_vfp_negs(tcg_res, tcg_op); + break; + case 0x7f: /* FSQRT */ + gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); + break; + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_tosls(tcg_res, tcg_op, + tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_touls(tcg_res, tcg_op, + tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + case 0x58: /* FRINTA */ + case 0x79: /* FRINTI */ + gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); + break; + case 0x59: /* FRINTX */ + gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); + break; + case 0x7c: /* URSQRTE */ + gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus); + break; + default: + g_assert_not_reached(); + } + } else { + /* Use helpers for 8 and 16 bit elements */ + switch (opcode) { + case 0x5: /* CNT, RBIT */ + /* For these two insns size is part of the opcode specifier + * (handled earlier); they always operate on byte elements. + */ + if (u) { + gen_helper_neon_rbit_u8(tcg_res, tcg_op); + } else { + gen_helper_neon_cnt_u8(tcg_res, tcg_op); + } + break; + case 0x7: /* SQABS, SQNEG */ + { + NeonGenOneOpEnvFn *genfn; + static NeonGenOneOpEnvFn * const fns[2][2] = { + { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, + { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + }; + genfn = fns[size][u]; + genfn(tcg_res, cpu_env, tcg_op); + break; + } + case 0x8: /* CMGT, CMGE */ + case 0x9: /* CMEQ, CMLE */ + case 0xa: /* CMLT */ + { + static NeonGenTwoOpFn * const fns[3][2] = { + { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 }, + { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 }, + { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 }, + }; + NeonGenTwoOpFn *genfn; + int comp; + bool reverse; + TCGv_i32 tcg_zero = tcg_const_i32(0); + + /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */ + comp = (opcode - 0x8) * 2 + u; + /* ...but LE, LT are implemented as reverse GE, GT */ + reverse = (comp > 2); + if (reverse) { + comp = 4 - comp; + } + genfn = fns[comp][size]; + if (reverse) { + genfn(tcg_res, tcg_zero, tcg_op); + } else { + genfn(tcg_res, tcg_op, tcg_zero); + } + tcg_temp_free_i32(tcg_zero); + break; + } + case 0xb: /* ABS, NEG */ + if (u) { + TCGv_i32 tcg_zero = tcg_const_i32(0); + if (size) { + gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op); + } else { + gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op); + } + tcg_temp_free_i32(tcg_zero); + } else { + if (size) { + gen_helper_neon_abs_s16(tcg_res, tcg_op); + } else { + gen_helper_neon_abs_s8(tcg_res, tcg_op); + } + } + break; + case 0x4: /* CLS, CLZ */ + if (u) { + if (size == 0) { + gen_helper_neon_clz_u8(tcg_res, tcg_op); + } else { + gen_helper_neon_clz_u16(tcg_res, tcg_op); + } + } else { + if (size == 0) { + gen_helper_neon_cls_s8(tcg_res, tcg_op); + } else { + gen_helper_neon_cls_s16(tcg_res, tcg_op); + } + } + break; + default: + g_assert_not_reached(); + } + } + + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_op); + } + } + if (!is_q) { + clear_vec_high(s, rd); + } + + if (need_rmode) { + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + } + if (need_fpstatus) { + tcg_temp_free_ptr(tcg_fpstatus); + } +} + +/* C3.6.13 AdvSIMD scalar x indexed element + * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 + * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ + * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | + * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ + * C3.6.18 AdvSIMD vector x indexed element + * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 + * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ + * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | + * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ + */ +static void disas_simd_indexed(DisasContext *s, uint32_t insn) +{ + /* This encoding has two kinds of instruction: + * normal, where we perform elt x idxelt => elt for each + * element in the vector + * long, where we perform elt x idxelt and generate a result of + * double the width of the input element + * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). + */ + bool is_scalar = extract32(insn, 28, 1); + bool is_q = extract32(insn, 30, 1); + bool u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int l = extract32(insn, 21, 1); + int m = extract32(insn, 20, 1); + /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ + int rm = extract32(insn, 16, 4); + int opcode = extract32(insn, 12, 4); + int h = extract32(insn, 11, 1); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + bool is_long = false; + bool is_fp = false; + int index; + TCGv_ptr fpst; + + switch (opcode) { + case 0x0: /* MLA */ + case 0x4: /* MLS */ + if (!u || is_scalar) { + unallocated_encoding(s); + return; + } + break; + case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */ + if (is_scalar) { + unallocated_encoding(s); + return; + } + is_long = true; + break; + case 0x3: /* SQDMLAL, SQDMLAL2 */ + case 0x7: /* SQDMLSL, SQDMLSL2 */ + case 0xb: /* SQDMULL, SQDMULL2 */ + is_long = true; + /* fall through */ + case 0xc: /* SQDMULH */ + case 0xd: /* SQRDMULH */ + if (u) { + unallocated_encoding(s); + return; + } + break; + case 0x8: /* MUL */ + if (u || is_scalar) { + unallocated_encoding(s); + return; + } + break; + case 0x1: /* FMLA */ + case 0x5: /* FMLS */ + if (u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x9: /* FMUL, FMULX */ + if (!extract32(size, 1, 1)) { + unallocated_encoding(s); + return; + } + is_fp = true; + break; + default: + unallocated_encoding(s); + return; + } + + if (is_fp) { + /* low bit of size indicates single/double */ + size = extract32(size, 0, 1) ? 3 : 2; + if (size == 2) { + index = h << 1 | l; + } else { + if (l || !is_q) { + unallocated_encoding(s); + return; + } + index = h; + } + rm |= (m << 4); + } else { + switch (size) { + case 1: + index = h << 2 | l << 1 | m; + break; + case 2: + index = h << 1 | l; + rm |= (m << 4); + break; + default: + unallocated_encoding(s); + return; + } + } + + if (!fp_access_check(s)) { + return; + } + + if (is_fp) { + fpst = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_PTR(fpst); + } + + if (size == 3) { + TCGv_i64 tcg_idx = tcg_temp_new_i64(); + int pass; + + assert(is_fp && is_q && !is_long); + + read_vec_element(s, tcg_idx, rm, index, MO_64); + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + + switch (opcode) { + case 0x5: /* FMLS */ + /* As usual for ARM, separate negation for fused multiply-add */ + gen_helper_vfp_negd(tcg_op, tcg_op); + /* fall through */ + case 0x1: /* FMLA */ + read_vec_element(s, tcg_res, rd, pass, MO_64); + gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); + break; + case 0x9: /* FMUL, FMULX */ + if (u) { + gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); + } else { + gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); + } + break; + default: + g_assert_not_reached(); + } + + write_vec_element(s, tcg_res, rd, pass, MO_64); + tcg_temp_free_i64(tcg_op); + tcg_temp_free_i64(tcg_res); + } + + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_idx); + } else if (!is_long) { + /* 32 bit floating point, or 16 or 32 bit integer. + * For the 16 bit scalar case we use the usual Neon helpers and + * rely on the fact that 0 op 0 == 0 with no side effects. + */ + TCGv_i32 tcg_idx = tcg_temp_new_i32(); + int pass, maxpasses; + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + read_vec_element_i32(s, tcg_idx, rm, index, size); + + if (size == 1 && !is_scalar) { + /* The simplest way to handle the 16x16 indexed ops is to duplicate + * the index into both halves of the 32 bit tcg_idx and then use + * the usual Neon helpers. + */ + tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); + } + + for (pass = 0; pass < maxpasses; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); + + switch (opcode) { + case 0x0: /* MLA */ + case 0x4: /* MLS */ + case 0x8: /* MUL */ + { + static NeonGenTwoOpFn * const fns[2][2] = { + { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, + { tcg_gen_add_i32, tcg_gen_sub_i32 }, + }; + NeonGenTwoOpFn *genfn; + bool is_sub = opcode == 0x4; + + if (size == 1) { + gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); + } else { + tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); + } + if (opcode == 0x8) { + break; + } + read_vec_element_i32(s, tcg_op, rd, pass, MO_32); + genfn = fns[size - 1][is_sub]; + genfn(tcg_res, tcg_op, tcg_res); + break; + } + case 0x5: /* FMLS */ + /* As usual for ARM, separate negation for fused multiply-add */ + gen_helper_vfp_negs(tcg_op, tcg_op); + /* fall through */ + case 0x1: /* FMLA */ + read_vec_element_i32(s, tcg_res, rd, pass, MO_32); + gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); + break; + case 0x9: /* FMUL, FMULX */ + if (u) { + gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); + } else { + gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); + } + break; + case 0xc: /* SQDMULH */ + if (size == 1) { + gen_helper_neon_qdmulh_s16(tcg_res, cpu_env, + tcg_op, tcg_idx); + } else { + gen_helper_neon_qdmulh_s32(tcg_res, cpu_env, + tcg_op, tcg_idx); + } + break; + case 0xd: /* SQRDMULH */ + if (size == 1) { + gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env, + tcg_op, tcg_idx); + } else { + gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env, + tcg_op, tcg_idx); + } + break; + default: + g_assert_not_reached(); + } + + if (is_scalar) { + write_fp_sreg(s, rd, tcg_res); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + + tcg_temp_free_i32(tcg_op); + tcg_temp_free_i32(tcg_res); + } + + tcg_temp_free_i32(tcg_idx); + + if (!is_q) { + clear_vec_high(s, rd); + } + } else { + /* long ops: 16x16->32 or 32x32->64 */ + TCGv_i64 tcg_res[2]; + int pass; + bool satop = extract32(opcode, 0, 1); + TCGMemOp memop = MO_32; + + if (satop || !u) { + memop |= MO_SIGN; + } + + if (size == 2) { + TCGv_i64 tcg_idx = tcg_temp_new_i64(); + + read_vec_element(s, tcg_idx, rm, index, memop); + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_passres; + int passelt; + + if (is_scalar) { + passelt = 0; + } else { + passelt = pass + (is_q * 2); + } + + read_vec_element(s, tcg_op, rn, passelt, memop); + + tcg_res[pass] = tcg_temp_new_i64(); + + if (opcode == 0xa || opcode == 0xb) { + /* Non-accumulating ops */ + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); + tcg_temp_free_i64(tcg_op); + + if (satop) { + /* saturating, doubling */ + gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + } + + if (opcode == 0xa || opcode == 0xb) { + continue; + } + + /* Accumulating op: handle accumulate step */ + read_vec_element(s, tcg_res[pass], rd, pass, MO_64); + + switch (opcode) { + case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + break; + case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + break; + case 0x7: /* SQDMLSL, SQDMLSL2 */ + tcg_gen_neg_i64(tcg_passres, tcg_passres); + /* fall through */ + case 0x3: /* SQDMLAL, SQDMLAL2 */ + gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, + tcg_res[pass], + tcg_passres); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i64(tcg_passres); + } + tcg_temp_free_i64(tcg_idx); + + if (is_scalar) { + clear_vec_high(s, rd); + } + } else { + TCGv_i32 tcg_idx = tcg_temp_new_i32(); + + assert(size == 1); + read_vec_element_i32(s, tcg_idx, rm, index, size); + + if (!is_scalar) { + /* The simplest way to handle the 16x16 indexed ops is to + * duplicate the index into both halves of the 32 bit tcg_idx + * and then use the usual Neon helpers. + */ + tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); + } + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i64 tcg_passres; + + if (is_scalar) { + read_vec_element_i32(s, tcg_op, rn, pass, size); + } else { + read_vec_element_i32(s, tcg_op, rn, + pass + (is_q * 2), MO_32); + } + + tcg_res[pass] = tcg_temp_new_i64(); + + if (opcode == 0xa || opcode == 0xb) { + /* Non-accumulating ops */ + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + if (memop & MO_SIGN) { + gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); + } else { + gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); + } + if (satop) { + gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + } + tcg_temp_free_i32(tcg_op); + + if (opcode == 0xa || opcode == 0xb) { + continue; + } + + /* Accumulating op: handle accumulate step */ + read_vec_element(s, tcg_res[pass], rd, pass, MO_64); + + switch (opcode) { + case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], + tcg_passres); + break; + case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], + tcg_passres); + break; + case 0x7: /* SQDMLSL, SQDMLSL2 */ + gen_helper_neon_negl_u32(tcg_passres, tcg_passres); + /* fall through */ + case 0x3: /* SQDMLAL, SQDMLAL2 */ + gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, + tcg_res[pass], + tcg_passres); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i64(tcg_passres); + } + tcg_temp_free_i32(tcg_idx); + + if (is_scalar) { + tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); + } + } + + if (is_scalar) { + tcg_res[1] = tcg_const_i64(0); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } + } + + if (!TCGV_IS_UNUSED_PTR(fpst)) { + tcg_temp_free_ptr(fpst); + } +} + +/* C3.6.19 Crypto AES + * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +-----------------+------+-----------+--------+-----+------+------+ + * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | + * +-----------------+------+-----------+--------+-----+------+------+ + */ +static void disas_crypto_aes(DisasContext *s, uint32_t insn) +{ + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int decrypt; + TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt; + CryptoThreeOpEnvFn *genfn; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) + || size != 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x4: /* AESE */ + decrypt = 0; + genfn = gen_helper_crypto_aese; + break; + case 0x6: /* AESMC */ + decrypt = 0; + genfn = gen_helper_crypto_aesmc; + break; + case 0x5: /* AESD */ + decrypt = 1; + genfn = gen_helper_crypto_aese; + break; + case 0x7: /* AESIMC */ + decrypt = 1; + genfn = gen_helper_crypto_aesmc; + break; + default: + unallocated_encoding(s); + return; + } + + /* Note that we convert the Vx register indexes into the + * index within the vfp.regs[] array, so we can share the + * helper with the AArch32 instructions. + */ + tcg_rd_regno = tcg_const_i32(rd << 1); + tcg_rn_regno = tcg_const_i32(rn << 1); + tcg_decrypt = tcg_const_i32(decrypt); + + genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt); + + tcg_temp_free_i32(tcg_rd_regno); + tcg_temp_free_i32(tcg_rn_regno); + tcg_temp_free_i32(tcg_decrypt); +} + +/* C3.6.20 Crypto three-reg SHA + * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 + * +-----------------+------+---+------+---+--------+-----+------+------+ + * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | + * +-----------------+------+---+------+---+--------+-----+------+------+ + */ +static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) +{ + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 3); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + CryptoThreeOpEnvFn *genfn; + TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno; + int feature = ARM_FEATURE_V8_SHA256; + + if (size != 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0: /* SHA1C */ + case 1: /* SHA1P */ + case 2: /* SHA1M */ + case 3: /* SHA1SU0 */ + genfn = NULL; + feature = ARM_FEATURE_V8_SHA1; + break; + case 4: /* SHA256H */ + genfn = gen_helper_crypto_sha256h; + break; + case 5: /* SHA256H2 */ + genfn = gen_helper_crypto_sha256h2; + break; + case 6: /* SHA256SU1 */ + genfn = gen_helper_crypto_sha256su1; + break; + default: + unallocated_encoding(s); + return; + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + tcg_rd_regno = tcg_const_i32(rd << 1); + tcg_rn_regno = tcg_const_i32(rn << 1); + tcg_rm_regno = tcg_const_i32(rm << 1); + + if (genfn) { + genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno); + } else { + TCGv_i32 tcg_opcode = tcg_const_i32(opcode); + + gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno, + tcg_rn_regno, tcg_rm_regno, tcg_opcode); + tcg_temp_free_i32(tcg_opcode); + } + + tcg_temp_free_i32(tcg_rd_regno); + tcg_temp_free_i32(tcg_rn_regno); + tcg_temp_free_i32(tcg_rm_regno); +} + +/* C3.6.21 Crypto two-reg SHA + * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 + * +-----------------+------+-----------+--------+-----+------+------+ + * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | + * +-----------------+------+-----------+--------+-----+------+------+ + */ +static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) +{ + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + CryptoTwoOpEnvFn *genfn; + int feature; + TCGv_i32 tcg_rd_regno, tcg_rn_regno; + + if (size != 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0: /* SHA1H */ + feature = ARM_FEATURE_V8_SHA1; + genfn = gen_helper_crypto_sha1h; + break; + case 1: /* SHA1SU1 */ + feature = ARM_FEATURE_V8_SHA1; + genfn = gen_helper_crypto_sha1su1; + break; + case 2: /* SHA256SU0 */ + feature = ARM_FEATURE_V8_SHA256; + genfn = gen_helper_crypto_sha256su0; + break; + default: + unallocated_encoding(s); + return; + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + tcg_rd_regno = tcg_const_i32(rd << 1); + tcg_rn_regno = tcg_const_i32(rn << 1); + + genfn(cpu_env, tcg_rd_regno, tcg_rn_regno); + + tcg_temp_free_i32(tcg_rd_regno); + tcg_temp_free_i32(tcg_rn_regno); +} + +/* C3.6 Data processing - SIMD, inc Crypto + * + * As the decode gets a little complex we are using a table based + * approach for this part of the decode. + */ +static const AArch64DecodeTable data_proc_simd[] = { + /* pattern , mask , fn */ + { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, + { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, + { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, + { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, + { 0x0e000400, 0x9fe08400, disas_simd_copy }, + { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ + /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ + { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, + { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, + { 0x0e000000, 0xbf208c00, disas_simd_tb }, + { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, + { 0x2e000000, 0xbf208400, disas_simd_ext }, + { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, + { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, + { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, + { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, + { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, + { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ + { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, + { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, + { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, + { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, + { 0x00000000, 0x00000000, NULL } +}; + +static void disas_data_proc_simd(DisasContext *s, uint32_t insn) +{ + /* Note that this is called with all non-FP cases from + * table C3-6 so it must UNDEF for entries not specifically + * allocated to instructions in that table. + */ + AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); + if (fn) { + fn(s, insn); + } else { + unallocated_encoding(s); + } +} + +/* C3.6 Data processing - SIMD and floating point */ +static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) +{ + if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { + disas_data_proc_fp(s, insn); + } else { + /* SIMD, including crypto */ + disas_data_proc_simd(s, insn); + } +} + +/* C3.1 A64 instruction index by encoding */ +static void disas_a64_insn(CPUARMState *env, DisasContext *s) +{ + uint32_t insn; + + insn = arm_ldl_code(env, s->pc, s->bswap_code); + s->insn = insn; + s->pc += 4; + + s->fp_access_checked = false; + + switch (extract32(insn, 25, 4)) { + case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */ + unallocated_encoding(s); + break; + case 0x8: case 0x9: /* Data processing - immediate */ + disas_data_proc_imm(s, insn); + break; + case 0xa: case 0xb: /* Branch, exception generation and system insns */ + disas_b_exc_sys(s, insn); + break; + case 0x4: + case 0x6: + case 0xc: + case 0xe: /* Loads and stores */ + disas_ldst(s, insn); + break; + case 0x5: + case 0xd: /* Data processing - register */ + disas_data_proc_reg(s, insn); + break; + case 0x7: + case 0xf: /* Data processing - SIMD and floating point */ + disas_data_proc_simd_fp(s, insn); + break; + default: + assert(FALSE); /* all 15 cases should be handled above */ + break; + } + + /* if we allocated any temporaries, free them here */ + free_tmp_a64(s); +} + +void gen_intermediate_code_internal_a64(ARMCPU *cpu, + TranslationBlock *tb, + bool search_pc) +{ + CPUState *cs = CPU(cpu); + CPUARMState *env = &cpu->env; + DisasContext dc1, *dc = &dc1; + CPUBreakpoint *bp; + int j, lj; + target_ulong pc_start; + target_ulong next_page_start; + int num_insns; + int max_insns; + + pc_start = tb->pc; + + dc->tb = tb; + + dc->is_jmp = DISAS_NEXT; + dc->pc = pc_start; + dc->singlestep_enabled = cs->singlestep_enabled; + dc->condjmp = 0; + + dc->aarch64 = 1; + dc->el3_is_aa64 = arm_el_is_aa64(env, 3); + dc->thumb = 0; + dc->bswap_code = 0; + dc->condexec_mask = 0; + dc->condexec_cond = 0; + dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags); + dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); +#if !defined(CONFIG_USER_ONLY) + dc->user = (dc->current_el == 0); +#endif + dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags); + dc->vec_len = 0; + dc->vec_stride = 0; + dc->cp_regs = cpu->cp_regs; + dc->features = env->features; + + /* Single step state. The code-generation logic here is: + * SS_ACTIVE == 0: + * generate code with no special handling for single-stepping (except + * that anything that can make us go to SS_ACTIVE == 1 must end the TB; + * this happens anyway because those changes are all system register or + * PSTATE writes). + * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) + * emit code for one insn + * emit code to clear PSTATE.SS + * emit code to generate software step exception for completed step + * end TB (as usual for having generated an exception) + * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) + * emit code to generate a software step exception + * end the TB + */ + dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags); + dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags); + dc->is_ldex = false; + dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el); + + init_tmp_a64_array(dc); + + next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; + lj = -1; + num_insns = 0; + max_insns = tb->cflags & CF_COUNT_MASK; + if (max_insns == 0) { + max_insns = CF_COUNT_MASK; + } + + gen_tb_start(tb); + + tcg_clear_temp_count(); + + do { + if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) { + QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { + if (bp->pc == dc->pc) { + gen_exception_internal_insn(dc, 0, EXCP_DEBUG); + /* Advance PC so that clearing the breakpoint will + invalidate this TB. */ + dc->pc += 2; + goto done_generating; + } + } + } + + if (search_pc) { + j = tcg_op_buf_count(); + if (lj < j) { + lj++; + while (lj < j) { + tcg_ctx.gen_opc_instr_start[lj++] = 0; + } + } + tcg_ctx.gen_opc_pc[lj] = dc->pc; + tcg_ctx.gen_opc_instr_start[lj] = 1; + tcg_ctx.gen_opc_icount[lj] = num_insns; + } + + if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) { + gen_io_start(); + } + + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) { + tcg_gen_debug_insn_start(dc->pc); + } + + if (dc->ss_active && !dc->pstate_ss) { + /* Singlestep state is Active-pending. + * If we're in this state at the start of a TB then either + * a) we just took an exception to an EL which is being debugged + * and this is the first insn in the exception handler + * b) debug exceptions were masked and we just unmasked them + * without changing EL (eg by clearing PSTATE.D) + * In either case we're going to take a swstep exception in the + * "did not step an insn" case, and so the syndrome ISV and EX + * bits should be zero. + */ + assert(num_insns == 0); + gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0), + default_exception_el(dc)); + dc->is_jmp = DISAS_EXC; + break; + } + + disas_a64_insn(env, dc); + + if (tcg_check_temp_count()) { + fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n", + dc->pc); + } + + /* Translation stops when a conditional branch is encountered. + * Otherwise the subsequent code could get translated several times. + * Also stop translation when a page boundary is reached. This + * ensures prefetch aborts occur at the right place. + */ + num_insns++; + } while (!dc->is_jmp && !tcg_op_buf_full() && + !cs->singlestep_enabled && + !singlestep && + !dc->ss_active && + dc->pc < next_page_start && + num_insns < max_insns); + + if (tb->cflags & CF_LAST_IO) { + gen_io_end(); + } + + if (unlikely(cs->singlestep_enabled || dc->ss_active) + && dc->is_jmp != DISAS_EXC) { + /* Note that this means single stepping WFI doesn't halt the CPU. + * For conditional branch insns this is harmless unreachable code as + * gen_goto_tb() has already handled emitting the debug exception + * (and thus a tb-jump is not possible when singlestepping). + */ + assert(dc->is_jmp != DISAS_TB_JUMP); + if (dc->is_jmp != DISAS_JUMP) { + gen_a64_set_pc_im(dc->pc); + } + if (cs->singlestep_enabled) { + gen_exception_internal(EXCP_DEBUG); + } else { + gen_step_complete_exception(dc); + } + } else { + switch (dc->is_jmp) { + case DISAS_NEXT: + gen_goto_tb(dc, 1, dc->pc); + break; + default: + case DISAS_UPDATE: + gen_a64_set_pc_im(dc->pc); + /* fall through */ + case DISAS_JUMP: + /* indicate that the hash table must be used to find the next TB */ + tcg_gen_exit_tb(0); + break; + case DISAS_TB_JUMP: + case DISAS_EXC: + case DISAS_SWI: + break; + case DISAS_WFE: + gen_a64_set_pc_im(dc->pc); + gen_helper_wfe(cpu_env); + break; + case DISAS_YIELD: + gen_a64_set_pc_im(dc->pc); + gen_helper_yield(cpu_env); + break; + case DISAS_WFI: + /* This is a special case because we don't want to just halt the CPU + * if trying to debug across a WFI. + */ + gen_a64_set_pc_im(dc->pc); + gen_helper_wfi(cpu_env); + /* The helper doesn't necessarily throw an exception, but we + * must go back to the main loop to check for interrupts anyway. + */ + tcg_gen_exit_tb(0); + break; + } + } + +done_generating: + gen_tb_end(tb, num_insns); + +#ifdef DEBUG_DISAS + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + qemu_log("----------------\n"); + qemu_log("IN: %s\n", lookup_symbol(pc_start)); + log_target_disas(cs, pc_start, dc->pc - pc_start, + 4 | (dc->bswap_code << 1)); + qemu_log("\n"); + } +#endif + if (search_pc) { + j = tcg_op_buf_count(); + lj++; + while (lj <= j) { + tcg_ctx.gen_opc_instr_start[lj++] = 0; + } + } else { + tb->size = dc->pc - pc_start; + tb->icount = num_insns; + } +} diff --git a/target-arm/translate.c b/target-arm/translate.c new file mode 100644 index 00000000..43853221 --- /dev/null +++ b/target-arm/translate.c @@ -0,0 +1,11584 @@ +/* + * ARM translation + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2005-2007 CodeSourcery + * Copyright (c) 2007 OpenedHand, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#include +#include +#include +#include +#include + +#include "cpu.h" +#include "internals.h" +#include "disas/disas.h" +#include "tcg-op.h" +#include "qemu/log.h" +#include "qemu/bitops.h" +#include "arm_ldst.h" + +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" + +#include "trace-tcg.h" + + +#define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T) +#define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5) +/* currently all emulated v5 cores are also v5TE, so don't bother */ +#define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5) +#define ENABLE_ARCH_5J 0 +#define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6) +#define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K) +#define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2) +#define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7) +#define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8) + +#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0) + +#include "translate.h" +static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE]; + +#if defined(CONFIG_USER_ONLY) +#define IS_USER(s) 1 +#else +#define IS_USER(s) (s->user) +#endif + +TCGv_ptr cpu_env; +/* We reuse the same 64-bit temporaries for efficiency. */ +static TCGv_i64 cpu_V0, cpu_V1, cpu_M0; +static TCGv_i32 cpu_R[16]; +TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF; +TCGv_i64 cpu_exclusive_addr; +TCGv_i64 cpu_exclusive_val; +#ifdef CONFIG_USER_ONLY +TCGv_i64 cpu_exclusive_test; +TCGv_i32 cpu_exclusive_info; +#endif + +/* FIXME: These should be removed. */ +static TCGv_i32 cpu_F0s, cpu_F1s; +static TCGv_i64 cpu_F0d, cpu_F1d; + +#include "exec/gen-icount.h" + +static const char *regnames[] = + { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" }; + +/* initialize TCG globals. */ +void arm_translate_init(void) +{ + int i; + + cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env"); + + for (i = 0; i < 16; i++) { + cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUARMState, regs[i]), + regnames[i]); + } + cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF"); + cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF"); + cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF"); + cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF"); + + cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUARMState, exclusive_addr), "exclusive_addr"); + cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUARMState, exclusive_val), "exclusive_val"); +#ifdef CONFIG_USER_ONLY + cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUARMState, exclusive_test), "exclusive_test"); + cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUARMState, exclusive_info), "exclusive_info"); +#endif + + a64_translate_init(); +} + +static inline ARMMMUIdx get_a32_user_mem_index(DisasContext *s) +{ + /* Return the mmu_idx to use for A32/T32 "unprivileged load/store" + * insns: + * if PL2, UNPREDICTABLE (we choose to implement as if PL0) + * otherwise, access as if at PL0. + */ + switch (s->mmu_idx) { + case ARMMMUIdx_S1E2: /* this one is UNPREDICTABLE */ + case ARMMMUIdx_S12NSE0: + case ARMMMUIdx_S12NSE1: + return ARMMMUIdx_S12NSE0; + case ARMMMUIdx_S1E3: + case ARMMMUIdx_S1SE0: + case ARMMMUIdx_S1SE1: + return ARMMMUIdx_S1SE0; + case ARMMMUIdx_S2NS: + default: + g_assert_not_reached(); + } +} + +static inline TCGv_i32 load_cpu_offset(int offset) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_ld_i32(tmp, cpu_env, offset); + return tmp; +} + +#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name)) + +static inline void store_cpu_offset(TCGv_i32 var, int offset) +{ + tcg_gen_st_i32(var, cpu_env, offset); + tcg_temp_free_i32(var); +} + +#define store_cpu_field(var, name) \ + store_cpu_offset(var, offsetof(CPUARMState, name)) + +/* Set a variable to the value of a CPU register. */ +static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg) +{ + if (reg == 15) { + uint32_t addr; + /* normally, since we updated PC, we need only to add one insn */ + if (s->thumb) + addr = (long)s->pc + 2; + else + addr = (long)s->pc + 4; + tcg_gen_movi_i32(var, addr); + } else { + tcg_gen_mov_i32(var, cpu_R[reg]); + } +} + +/* Create a new temporary and set it to the value of a CPU register. */ +static inline TCGv_i32 load_reg(DisasContext *s, int reg) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + load_reg_var(s, tmp, reg); + return tmp; +} + +/* Set a CPU register. The source must be a temporary and will be + marked as dead. */ +static void store_reg(DisasContext *s, int reg, TCGv_i32 var) +{ + if (reg == 15) { + tcg_gen_andi_i32(var, var, ~1); + s->is_jmp = DISAS_JUMP; + } + tcg_gen_mov_i32(cpu_R[reg], var); + tcg_temp_free_i32(var); +} + +/* Value extensions. */ +#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var) +#define gen_uxth(var) tcg_gen_ext16u_i32(var, var) +#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var) +#define gen_sxth(var) tcg_gen_ext16s_i32(var, var) + +#define gen_sxtb16(var) gen_helper_sxtb16(var, var) +#define gen_uxtb16(var) gen_helper_uxtb16(var, var) + + +static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask) +{ + TCGv_i32 tmp_mask = tcg_const_i32(mask); + gen_helper_cpsr_write(cpu_env, var, tmp_mask); + tcg_temp_free_i32(tmp_mask); +} +/* Set NZCV flags from the high 4 bits of var. */ +#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV) + +static void gen_exception_internal(int excp) +{ + TCGv_i32 tcg_excp = tcg_const_i32(excp); + + assert(excp_is_internal(excp)); + gen_helper_exception_internal(cpu_env, tcg_excp); + tcg_temp_free_i32(tcg_excp); +} + +static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el) +{ + TCGv_i32 tcg_excp = tcg_const_i32(excp); + TCGv_i32 tcg_syn = tcg_const_i32(syndrome); + TCGv_i32 tcg_el = tcg_const_i32(target_el); + + gen_helper_exception_with_syndrome(cpu_env, tcg_excp, + tcg_syn, tcg_el); + + tcg_temp_free_i32(tcg_el); + tcg_temp_free_i32(tcg_syn); + tcg_temp_free_i32(tcg_excp); +} + +static void gen_ss_advance(DisasContext *s) +{ + /* If the singlestep state is Active-not-pending, advance to + * Active-pending. + */ + if (s->ss_active) { + s->pstate_ss = 0; + gen_helper_clear_pstate_ss(cpu_env); + } +} + +static void gen_step_complete_exception(DisasContext *s) +{ + /* We just completed step of an insn. Move from Active-not-pending + * to Active-pending, and then also take the swstep exception. + * This corresponds to making the (IMPDEF) choice to prioritize + * swstep exceptions over asynchronous exceptions taken to an exception + * level where debug is disabled. This choice has the advantage that + * we do not need to maintain internal state corresponding to the + * ISV/EX syndrome bits between completion of the step and generation + * of the exception, and our syndrome information is always correct. + */ + gen_ss_advance(s); + gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex), + default_exception_el(s)); + s->is_jmp = DISAS_EXC; +} + +static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 tmp1 = tcg_temp_new_i32(); + TCGv_i32 tmp2 = tcg_temp_new_i32(); + tcg_gen_ext16s_i32(tmp1, a); + tcg_gen_ext16s_i32(tmp2, b); + tcg_gen_mul_i32(tmp1, tmp1, tmp2); + tcg_temp_free_i32(tmp2); + tcg_gen_sari_i32(a, a, 16); + tcg_gen_sari_i32(b, b, 16); + tcg_gen_mul_i32(b, b, a); + tcg_gen_mov_i32(a, tmp1); + tcg_temp_free_i32(tmp1); +} + +/* Byteswap each halfword. */ +static void gen_rev16(TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_shri_i32(tmp, var, 8); + tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff); + tcg_gen_shli_i32(var, var, 8); + tcg_gen_andi_i32(var, var, 0xff00ff00); + tcg_gen_or_i32(var, var, tmp); + tcg_temp_free_i32(tmp); +} + +/* Byteswap low halfword and sign extend. */ +static void gen_revsh(TCGv_i32 var) +{ + tcg_gen_ext16u_i32(var, var); + tcg_gen_bswap16_i32(var, var); + tcg_gen_ext16s_i32(var, var); +} + +/* Unsigned bitfield extract. */ +static void gen_ubfx(TCGv_i32 var, int shift, uint32_t mask) +{ + if (shift) + tcg_gen_shri_i32(var, var, shift); + tcg_gen_andi_i32(var, var, mask); +} + +/* Signed bitfield extract. */ +static void gen_sbfx(TCGv_i32 var, int shift, int width) +{ + uint32_t signbit; + + if (shift) + tcg_gen_sari_i32(var, var, shift); + if (shift + width < 32) { + signbit = 1u << (width - 1); + tcg_gen_andi_i32(var, var, (1u << width) - 1); + tcg_gen_xori_i32(var, var, signbit); + tcg_gen_subi_i32(var, var, signbit); + } +} + +/* Return (b << 32) + a. Mark inputs as dead */ +static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b) +{ + TCGv_i64 tmp64 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(tmp64, b); + tcg_temp_free_i32(b); + tcg_gen_shli_i64(tmp64, tmp64, 32); + tcg_gen_add_i64(a, tmp64, a); + + tcg_temp_free_i64(tmp64); + return a; +} + +/* Return (b << 32) - a. Mark inputs as dead. */ +static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b) +{ + TCGv_i64 tmp64 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(tmp64, b); + tcg_temp_free_i32(b); + tcg_gen_shli_i64(tmp64, tmp64, 32); + tcg_gen_sub_i64(a, tmp64, a); + + tcg_temp_free_i64(tmp64); + return a; +} + +/* 32x32->64 multiply. Marks inputs as dead. */ +static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 lo = tcg_temp_new_i32(); + TCGv_i32 hi = tcg_temp_new_i32(); + TCGv_i64 ret; + + tcg_gen_mulu2_i32(lo, hi, a, b); + tcg_temp_free_i32(a); + tcg_temp_free_i32(b); + + ret = tcg_temp_new_i64(); + tcg_gen_concat_i32_i64(ret, lo, hi); + tcg_temp_free_i32(lo); + tcg_temp_free_i32(hi); + + return ret; +} + +static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 lo = tcg_temp_new_i32(); + TCGv_i32 hi = tcg_temp_new_i32(); + TCGv_i64 ret; + + tcg_gen_muls2_i32(lo, hi, a, b); + tcg_temp_free_i32(a); + tcg_temp_free_i32(b); + + ret = tcg_temp_new_i64(); + tcg_gen_concat_i32_i64(ret, lo, hi); + tcg_temp_free_i32(lo); + tcg_temp_free_i32(hi); + + return ret; +} + +/* Swap low and high halfwords. */ +static void gen_swap_half(TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_shri_i32(tmp, var, 16); + tcg_gen_shli_i32(var, var, 16); + tcg_gen_or_i32(var, var, tmp); + tcg_temp_free_i32(tmp); +} + +/* Dual 16-bit add. Result placed in t0 and t1 is marked as dead. + tmp = (t0 ^ t1) & 0x8000; + t0 &= ~0x8000; + t1 &= ~0x8000; + t0 = (t0 + t1) ^ tmp; + */ + +static void gen_add16(TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, t0, t1); + tcg_gen_andi_i32(tmp, tmp, 0x8000); + tcg_gen_andi_i32(t0, t0, ~0x8000); + tcg_gen_andi_i32(t1, t1, ~0x8000); + tcg_gen_add_i32(t0, t0, t1); + tcg_gen_xor_i32(t0, t0, tmp); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(t1); +} + +/* Set CF to the top bit of var. */ +static void gen_set_CF_bit31(TCGv_i32 var) +{ + tcg_gen_shri_i32(cpu_CF, var, 31); +} + +/* Set N and Z flags from var. */ +static inline void gen_logic_CC(TCGv_i32 var) +{ + tcg_gen_mov_i32(cpu_NF, var); + tcg_gen_mov_i32(cpu_ZF, var); +} + +/* T0 += T1 + CF. */ +static void gen_adc(TCGv_i32 t0, TCGv_i32 t1) +{ + tcg_gen_add_i32(t0, t0, t1); + tcg_gen_add_i32(t0, t0, cpu_CF); +} + +/* dest = T0 + T1 + CF. */ +static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) +{ + tcg_gen_add_i32(dest, t0, t1); + tcg_gen_add_i32(dest, dest, cpu_CF); +} + +/* dest = T0 - T1 + CF - 1. */ +static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) +{ + tcg_gen_sub_i32(dest, t0, t1); + tcg_gen_add_i32(dest, dest, cpu_CF); + tcg_gen_subi_i32(dest, dest, 1); +} + +/* dest = T0 + T1. Compute C, N, V and Z flags */ +static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); + tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); + tcg_gen_xor_i32(tmp, t0, t1); + tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); + tcg_temp_free_i32(tmp); + tcg_gen_mov_i32(dest, cpu_NF); +} + +/* dest = T0 + T1 + CF. Compute C, N, V and Z flags */ +static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + if (TCG_TARGET_HAS_add2_i32) { + tcg_gen_movi_i32(tmp, 0); + tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp); + tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp); + } else { + TCGv_i64 q0 = tcg_temp_new_i64(); + TCGv_i64 q1 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(q0, t0); + tcg_gen_extu_i32_i64(q1, t1); + tcg_gen_add_i64(q0, q0, q1); + tcg_gen_extu_i32_i64(q1, cpu_CF); + tcg_gen_add_i64(q0, q0, q1); + tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0); + tcg_temp_free_i64(q0); + tcg_temp_free_i64(q1); + } + tcg_gen_mov_i32(cpu_ZF, cpu_NF); + tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); + tcg_gen_xor_i32(tmp, t0, t1); + tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); + tcg_temp_free_i32(tmp); + tcg_gen_mov_i32(dest, cpu_NF); +} + +/* dest = T0 - T1. Compute C, N, V and Z flags */ +static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 tmp; + tcg_gen_sub_i32(cpu_NF, t0, t1); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); + tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1); + tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, t0, t1); + tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); + tcg_temp_free_i32(tmp); + tcg_gen_mov_i32(dest, cpu_NF); +} + +/* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */ +static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_not_i32(tmp, t1); + gen_adc_CC(dest, t0, tmp); + tcg_temp_free_i32(tmp); +} + +#define GEN_SHIFT(name) \ +static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \ +{ \ + TCGv_i32 tmp1, tmp2, tmp3; \ + tmp1 = tcg_temp_new_i32(); \ + tcg_gen_andi_i32(tmp1, t1, 0xff); \ + tmp2 = tcg_const_i32(0); \ + tmp3 = tcg_const_i32(0x1f); \ + tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \ + tcg_temp_free_i32(tmp3); \ + tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \ + tcg_gen_##name##_i32(dest, tmp2, tmp1); \ + tcg_temp_free_i32(tmp2); \ + tcg_temp_free_i32(tmp1); \ +} +GEN_SHIFT(shl) +GEN_SHIFT(shr) +#undef GEN_SHIFT + +static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 tmp1, tmp2; + tmp1 = tcg_temp_new_i32(); + tcg_gen_andi_i32(tmp1, t1, 0xff); + tmp2 = tcg_const_i32(0x1f); + tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1); + tcg_temp_free_i32(tmp2); + tcg_gen_sar_i32(dest, t0, tmp1); + tcg_temp_free_i32(tmp1); +} + +static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src) +{ + TCGv_i32 c0 = tcg_const_i32(0); + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_neg_i32(tmp, src); + tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp); + tcg_temp_free_i32(c0); + tcg_temp_free_i32(tmp); +} + +static void shifter_out_im(TCGv_i32 var, int shift) +{ + if (shift == 0) { + tcg_gen_andi_i32(cpu_CF, var, 1); + } else { + tcg_gen_shri_i32(cpu_CF, var, shift); + if (shift != 31) { + tcg_gen_andi_i32(cpu_CF, cpu_CF, 1); + } + } +} + +/* Shift by immediate. Includes special handling for shift == 0. */ +static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop, + int shift, int flags) +{ + switch (shiftop) { + case 0: /* LSL */ + if (shift != 0) { + if (flags) + shifter_out_im(var, 32 - shift); + tcg_gen_shli_i32(var, var, shift); + } + break; + case 1: /* LSR */ + if (shift == 0) { + if (flags) { + tcg_gen_shri_i32(cpu_CF, var, 31); + } + tcg_gen_movi_i32(var, 0); + } else { + if (flags) + shifter_out_im(var, shift - 1); + tcg_gen_shri_i32(var, var, shift); + } + break; + case 2: /* ASR */ + if (shift == 0) + shift = 32; + if (flags) + shifter_out_im(var, shift - 1); + if (shift == 32) + shift = 31; + tcg_gen_sari_i32(var, var, shift); + break; + case 3: /* ROR/RRX */ + if (shift != 0) { + if (flags) + shifter_out_im(var, shift - 1); + tcg_gen_rotri_i32(var, var, shift); break; + } else { + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_shli_i32(tmp, cpu_CF, 31); + if (flags) + shifter_out_im(var, 0); + tcg_gen_shri_i32(var, var, 1); + tcg_gen_or_i32(var, var, tmp); + tcg_temp_free_i32(tmp); + } + } +}; + +static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop, + TCGv_i32 shift, int flags) +{ + if (flags) { + switch (shiftop) { + case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break; + case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break; + case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break; + case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break; + } + } else { + switch (shiftop) { + case 0: + gen_shl(var, var, shift); + break; + case 1: + gen_shr(var, var, shift); + break; + case 2: + gen_sar(var, var, shift); + break; + case 3: tcg_gen_andi_i32(shift, shift, 0x1f); + tcg_gen_rotr_i32(var, var, shift); break; + } + } + tcg_temp_free_i32(shift); +} + +#define PAS_OP(pfx) \ + switch (op2) { \ + case 0: gen_pas_helper(glue(pfx,add16)); break; \ + case 1: gen_pas_helper(glue(pfx,addsubx)); break; \ + case 2: gen_pas_helper(glue(pfx,subaddx)); break; \ + case 3: gen_pas_helper(glue(pfx,sub16)); break; \ + case 4: gen_pas_helper(glue(pfx,add8)); break; \ + case 7: gen_pas_helper(glue(pfx,sub8)); break; \ + } +static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_ptr tmp; + + switch (op1) { +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp) + case 1: + tmp = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE)); + PAS_OP(s) + tcg_temp_free_ptr(tmp); + break; + case 5: + tmp = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE)); + PAS_OP(u) + tcg_temp_free_ptr(tmp); + break; +#undef gen_pas_helper +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b) + case 2: + PAS_OP(q); + break; + case 3: + PAS_OP(sh); + break; + case 6: + PAS_OP(uq); + break; + case 7: + PAS_OP(uh); + break; +#undef gen_pas_helper + } +} +#undef PAS_OP + +/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */ +#define PAS_OP(pfx) \ + switch (op1) { \ + case 0: gen_pas_helper(glue(pfx,add8)); break; \ + case 1: gen_pas_helper(glue(pfx,add16)); break; \ + case 2: gen_pas_helper(glue(pfx,addsubx)); break; \ + case 4: gen_pas_helper(glue(pfx,sub8)); break; \ + case 5: gen_pas_helper(glue(pfx,sub16)); break; \ + case 6: gen_pas_helper(glue(pfx,subaddx)); break; \ + } +static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_ptr tmp; + + switch (op2) { +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp) + case 0: + tmp = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE)); + PAS_OP(s) + tcg_temp_free_ptr(tmp); + break; + case 4: + tmp = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE)); + PAS_OP(u) + tcg_temp_free_ptr(tmp); + break; +#undef gen_pas_helper +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b) + case 1: + PAS_OP(q); + break; + case 2: + PAS_OP(sh); + break; + case 5: + PAS_OP(uq); + break; + case 6: + PAS_OP(uh); + break; +#undef gen_pas_helper + } +} +#undef PAS_OP + +/* + * generate a conditional branch based on ARM condition code cc. + * This is common between ARM and Aarch64 targets. + */ +void arm_gen_test_cc(int cc, TCGLabel *label) +{ + TCGv_i32 tmp; + TCGLabel *inv; + + switch (cc) { + case 0: /* eq: Z */ + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label); + break; + case 1: /* ne: !Z */ + tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label); + break; + case 2: /* cs: C */ + tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label); + break; + case 3: /* cc: !C */ + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label); + break; + case 4: /* mi: N */ + tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label); + break; + case 5: /* pl: !N */ + tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label); + break; + case 6: /* vs: V */ + tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label); + break; + case 7: /* vc: !V */ + tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label); + break; + case 8: /* hi: C && !Z */ + inv = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv); + tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label); + gen_set_label(inv); + break; + case 9: /* ls: !C || Z */ + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label); + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label); + break; + case 10: /* ge: N == V -> N ^ V == 0 */ + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label); + tcg_temp_free_i32(tmp); + break; + case 11: /* lt: N != V -> N ^ V != 0 */ + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label); + tcg_temp_free_i32(tmp); + break; + case 12: /* gt: !Z && N == V */ + inv = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv); + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label); + tcg_temp_free_i32(tmp); + gen_set_label(inv); + break; + case 13: /* le: Z || N != V */ + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label); + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label); + tcg_temp_free_i32(tmp); + break; + default: + fprintf(stderr, "Bad condition code 0x%x\n", cc); + abort(); + } +} + +static const uint8_t table_logic_cc[16] = { + 1, /* and */ + 1, /* xor */ + 0, /* sub */ + 0, /* rsb */ + 0, /* add */ + 0, /* adc */ + 0, /* sbc */ + 0, /* rsc */ + 1, /* andl */ + 1, /* xorl */ + 0, /* cmp */ + 0, /* cmn */ + 1, /* orr */ + 1, /* mov */ + 1, /* bic */ + 1, /* mvn */ +}; + +/* Set PC and Thumb state from an immediate address. */ +static inline void gen_bx_im(DisasContext *s, uint32_t addr) +{ + TCGv_i32 tmp; + + s->is_jmp = DISAS_UPDATE; + if (s->thumb != (addr & 1)) { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, addr & 1); + tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb)); + tcg_temp_free_i32(tmp); + } + tcg_gen_movi_i32(cpu_R[15], addr & ~1); +} + +/* Set PC and Thumb state from var. var is marked as dead. */ +static inline void gen_bx(DisasContext *s, TCGv_i32 var) +{ + s->is_jmp = DISAS_UPDATE; + tcg_gen_andi_i32(cpu_R[15], var, ~1); + tcg_gen_andi_i32(var, var, 1); + store_cpu_field(var, thumb); +} + +/* Variant of store_reg which uses branch&exchange logic when storing + to r15 in ARM architecture v7 and above. The source must be a temporary + and will be marked as dead. */ +static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var) +{ + if (reg == 15 && ENABLE_ARCH_7) { + gen_bx(s, var); + } else { + store_reg(s, reg, var); + } +} + +/* Variant of store_reg which uses branch&exchange logic when storing + * to r15 in ARM architecture v5T and above. This is used for storing + * the results of a LDR/LDM/POP into r15, and corresponds to the cases + * in the ARM ARM which use the LoadWritePC() pseudocode function. */ +static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) +{ + if (reg == 15 && ENABLE_ARCH_5) { + gen_bx(s, var); + } else { + store_reg(s, reg, var); + } +} + +/* Abstractions of "generate code to do a guest load/store for + * AArch32", where a vaddr is always 32 bits (and is zero + * extended if we're a 64 bit core) and data is also + * 32 bits unless specifically doing a 64 bit access. + * These functions work like tcg_gen_qemu_{ld,st}* except + * that the address argument is TCGv_i32 rather than TCGv. + */ +#if TARGET_LONG_BITS == 32 + +#define DO_GEN_LD(SUFF, OPC) \ +static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \ +{ \ + tcg_gen_qemu_ld_i32(val, addr, index, OPC); \ +} + +#define DO_GEN_ST(SUFF, OPC) \ +static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \ +{ \ + tcg_gen_qemu_st_i32(val, addr, index, OPC); \ +} + +static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index) +{ + tcg_gen_qemu_ld_i64(val, addr, index, MO_TEQ); +} + +static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index) +{ + tcg_gen_qemu_st_i64(val, addr, index, MO_TEQ); +} + +#else + +#define DO_GEN_LD(SUFF, OPC) \ +static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \ +{ \ + TCGv addr64 = tcg_temp_new(); \ + tcg_gen_extu_i32_i64(addr64, addr); \ + tcg_gen_qemu_ld_i32(val, addr64, index, OPC); \ + tcg_temp_free(addr64); \ +} + +#define DO_GEN_ST(SUFF, OPC) \ +static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \ +{ \ + TCGv addr64 = tcg_temp_new(); \ + tcg_gen_extu_i32_i64(addr64, addr); \ + tcg_gen_qemu_st_i32(val, addr64, index, OPC); \ + tcg_temp_free(addr64); \ +} + +static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index) +{ + TCGv addr64 = tcg_temp_new(); + tcg_gen_extu_i32_i64(addr64, addr); + tcg_gen_qemu_ld_i64(val, addr64, index, MO_TEQ); + tcg_temp_free(addr64); +} + +static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index) +{ + TCGv addr64 = tcg_temp_new(); + tcg_gen_extu_i32_i64(addr64, addr); + tcg_gen_qemu_st_i64(val, addr64, index, MO_TEQ); + tcg_temp_free(addr64); +} + +#endif + +DO_GEN_LD(8s, MO_SB) +DO_GEN_LD(8u, MO_UB) +DO_GEN_LD(16s, MO_TESW) +DO_GEN_LD(16u, MO_TEUW) +DO_GEN_LD(32u, MO_TEUL) +DO_GEN_ST(8, MO_UB) +DO_GEN_ST(16, MO_TEUW) +DO_GEN_ST(32, MO_TEUL) + +static inline void gen_set_pc_im(DisasContext *s, target_ulong val) +{ + tcg_gen_movi_i32(cpu_R[15], val); +} + +static inline void gen_hvc(DisasContext *s, int imm16) +{ + /* The pre HVC helper handles cases when HVC gets trapped + * as an undefined insn by runtime configuration (ie before + * the insn really executes). + */ + gen_set_pc_im(s, s->pc - 4); + gen_helper_pre_hvc(cpu_env); + /* Otherwise we will treat this as a real exception which + * happens after execution of the insn. (The distinction matters + * for the PC value reported to the exception handler and also + * for single stepping.) + */ + s->svc_imm = imm16; + gen_set_pc_im(s, s->pc); + s->is_jmp = DISAS_HVC; +} + +static inline void gen_smc(DisasContext *s) +{ + /* As with HVC, we may take an exception either before or after + * the insn executes. + */ + TCGv_i32 tmp; + + gen_set_pc_im(s, s->pc - 4); + tmp = tcg_const_i32(syn_aa32_smc()); + gen_helper_pre_smc(cpu_env, tmp); + tcg_temp_free_i32(tmp); + gen_set_pc_im(s, s->pc); + s->is_jmp = DISAS_SMC; +} + +static inline void +gen_set_condexec (DisasContext *s) +{ + if (s->condexec_mask) { + uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1); + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, val); + store_cpu_field(tmp, condexec_bits); + } +} + +static void gen_exception_internal_insn(DisasContext *s, int offset, int excp) +{ + gen_set_condexec(s); + gen_set_pc_im(s, s->pc - offset); + gen_exception_internal(excp); + s->is_jmp = DISAS_JUMP; +} + +static void gen_exception_insn(DisasContext *s, int offset, int excp, + int syn, uint32_t target_el) +{ + gen_set_condexec(s); + gen_set_pc_im(s, s->pc - offset); + gen_exception(excp, syn, target_el); + s->is_jmp = DISAS_JUMP; +} + +/* Force a TB lookup after an instruction that changes the CPU state. */ +static inline void gen_lookup_tb(DisasContext *s) +{ + tcg_gen_movi_i32(cpu_R[15], s->pc & ~1); + s->is_jmp = DISAS_UPDATE; +} + +static inline void gen_add_data_offset(DisasContext *s, unsigned int insn, + TCGv_i32 var) +{ + int val, rm, shift, shiftop; + TCGv_i32 offset; + + if (!(insn & (1 << 25))) { + /* immediate */ + val = insn & 0xfff; + if (!(insn & (1 << 23))) + val = -val; + if (val != 0) + tcg_gen_addi_i32(var, var, val); + } else { + /* shift/register */ + rm = (insn) & 0xf; + shift = (insn >> 7) & 0x1f; + shiftop = (insn >> 5) & 3; + offset = load_reg(s, rm); + gen_arm_shift_im(offset, shiftop, shift, 0); + if (!(insn & (1 << 23))) + tcg_gen_sub_i32(var, var, offset); + else + tcg_gen_add_i32(var, var, offset); + tcg_temp_free_i32(offset); + } +} + +static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn, + int extra, TCGv_i32 var) +{ + int val, rm; + TCGv_i32 offset; + + if (insn & (1 << 22)) { + /* immediate */ + val = (insn & 0xf) | ((insn >> 4) & 0xf0); + if (!(insn & (1 << 23))) + val = -val; + val += extra; + if (val != 0) + tcg_gen_addi_i32(var, var, val); + } else { + /* register */ + if (extra) + tcg_gen_addi_i32(var, var, extra); + rm = (insn) & 0xf; + offset = load_reg(s, rm); + if (!(insn & (1 << 23))) + tcg_gen_sub_i32(var, var, offset); + else + tcg_gen_add_i32(var, var, offset); + tcg_temp_free_i32(offset); + } +} + +static TCGv_ptr get_fpstatus_ptr(int neon) +{ + TCGv_ptr statusptr = tcg_temp_new_ptr(); + int offset; + if (neon) { + offset = offsetof(CPUARMState, vfp.standard_fp_status); + } else { + offset = offsetof(CPUARMState, vfp.fp_status); + } + tcg_gen_addi_ptr(statusptr, cpu_env, offset); + return statusptr; +} + +#define VFP_OP2(name) \ +static inline void gen_vfp_##name(int dp) \ +{ \ + TCGv_ptr fpst = get_fpstatus_ptr(0); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst); \ + } \ + tcg_temp_free_ptr(fpst); \ +} + +VFP_OP2(add) +VFP_OP2(sub) +VFP_OP2(mul) +VFP_OP2(div) + +#undef VFP_OP2 + +static inline void gen_vfp_F1_mul(int dp) +{ + /* Like gen_vfp_mul() but put result in F1 */ + TCGv_ptr fpst = get_fpstatus_ptr(0); + if (dp) { + gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst); + } else { + gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst); + } + tcg_temp_free_ptr(fpst); +} + +static inline void gen_vfp_F1_neg(int dp) +{ + /* Like gen_vfp_neg() but put result in F1 */ + if (dp) { + gen_helper_vfp_negd(cpu_F1d, cpu_F0d); + } else { + gen_helper_vfp_negs(cpu_F1s, cpu_F0s); + } +} + +static inline void gen_vfp_abs(int dp) +{ + if (dp) + gen_helper_vfp_absd(cpu_F0d, cpu_F0d); + else + gen_helper_vfp_abss(cpu_F0s, cpu_F0s); +} + +static inline void gen_vfp_neg(int dp) +{ + if (dp) + gen_helper_vfp_negd(cpu_F0d, cpu_F0d); + else + gen_helper_vfp_negs(cpu_F0s, cpu_F0s); +} + +static inline void gen_vfp_sqrt(int dp) +{ + if (dp) + gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env); + else + gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env); +} + +static inline void gen_vfp_cmp(int dp) +{ + if (dp) + gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env); + else + gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env); +} + +static inline void gen_vfp_cmpe(int dp) +{ + if (dp) + gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env); + else + gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env); +} + +static inline void gen_vfp_F1_ld0(int dp) +{ + if (dp) + tcg_gen_movi_i64(cpu_F1d, 0); + else + tcg_gen_movi_i32(cpu_F1s, 0); +} + +#define VFP_GEN_ITOF(name) \ +static inline void gen_vfp_##name(int dp, int neon) \ +{ \ + TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ + } \ + tcg_temp_free_ptr(statusptr); \ +} + +VFP_GEN_ITOF(uito) +VFP_GEN_ITOF(sito) +#undef VFP_GEN_ITOF + +#define VFP_GEN_FTOI(name) \ +static inline void gen_vfp_##name(int dp, int neon) \ +{ \ + TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ + if (dp) { \ + gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \ + } else { \ + gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \ + } \ + tcg_temp_free_ptr(statusptr); \ +} + +VFP_GEN_FTOI(toui) +VFP_GEN_FTOI(touiz) +VFP_GEN_FTOI(tosi) +VFP_GEN_FTOI(tosiz) +#undef VFP_GEN_FTOI + +#define VFP_GEN_FIX(name, round) \ +static inline void gen_vfp_##name(int dp, int shift, int neon) \ +{ \ + TCGv_i32 tmp_shift = tcg_const_i32(shift); \ + TCGv_ptr statusptr = get_fpstatus_ptr(neon); \ + if (dp) { \ + gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \ + statusptr); \ + } else { \ + gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \ + statusptr); \ + } \ + tcg_temp_free_i32(tmp_shift); \ + tcg_temp_free_ptr(statusptr); \ +} +VFP_GEN_FIX(tosh, _round_to_zero) +VFP_GEN_FIX(tosl, _round_to_zero) +VFP_GEN_FIX(touh, _round_to_zero) +VFP_GEN_FIX(toul, _round_to_zero) +VFP_GEN_FIX(shto, ) +VFP_GEN_FIX(slto, ) +VFP_GEN_FIX(uhto, ) +VFP_GEN_FIX(ulto, ) +#undef VFP_GEN_FIX + +static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr) +{ + if (dp) { + gen_aa32_ld64(cpu_F0d, addr, get_mem_index(s)); + } else { + gen_aa32_ld32u(cpu_F0s, addr, get_mem_index(s)); + } +} + +static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr) +{ + if (dp) { + gen_aa32_st64(cpu_F0d, addr, get_mem_index(s)); + } else { + gen_aa32_st32(cpu_F0s, addr, get_mem_index(s)); + } +} + +static inline long +vfp_reg_offset (int dp, int reg) +{ + if (dp) + return offsetof(CPUARMState, vfp.regs[reg]); + else if (reg & 1) { + return offsetof(CPUARMState, vfp.regs[reg >> 1]) + + offsetof(CPU_DoubleU, l.upper); + } else { + return offsetof(CPUARMState, vfp.regs[reg >> 1]) + + offsetof(CPU_DoubleU, l.lower); + } +} + +/* Return the offset of a 32-bit piece of a NEON register. + zero is the least significant end of the register. */ +static inline long +neon_reg_offset (int reg, int n) +{ + int sreg; + sreg = reg * 2 + n; + return vfp_reg_offset(0, sreg); +} + +static TCGv_i32 neon_load_reg(int reg, int pass) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass)); + return tmp; +} + +static void neon_store_reg(int reg, int pass, TCGv_i32 var) +{ + tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass)); + tcg_temp_free_i32(var); +} + +static inline void neon_load_reg64(TCGv_i64 var, int reg) +{ + tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg)); +} + +static inline void neon_store_reg64(TCGv_i64 var, int reg) +{ + tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg)); +} + +#define tcg_gen_ld_f32 tcg_gen_ld_i32 +#define tcg_gen_ld_f64 tcg_gen_ld_i64 +#define tcg_gen_st_f32 tcg_gen_st_i32 +#define tcg_gen_st_f64 tcg_gen_st_i64 + +static inline void gen_mov_F0_vreg(int dp, int reg) +{ + if (dp) + tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg)); + else + tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg)); +} + +static inline void gen_mov_F1_vreg(int dp, int reg) +{ + if (dp) + tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg)); + else + tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg)); +} + +static inline void gen_mov_vreg_F0(int dp, int reg) +{ + if (dp) + tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg)); + else + tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg)); +} + +#define ARM_CP_RW_BIT (1 << 20) + +static inline void iwmmxt_load_reg(TCGv_i64 var, int reg) +{ + tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg])); +} + +static inline void iwmmxt_store_reg(TCGv_i64 var, int reg) +{ + tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg])); +} + +static inline TCGv_i32 iwmmxt_load_creg(int reg) +{ + TCGv_i32 var = tcg_temp_new_i32(); + tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); + return var; +} + +static inline void iwmmxt_store_creg(int reg, TCGv_i32 var) +{ + tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); + tcg_temp_free_i32(var); +} + +static inline void gen_op_iwmmxt_movq_wRn_M0(int rn) +{ + iwmmxt_store_reg(cpu_M0, rn); +} + +static inline void gen_op_iwmmxt_movq_M0_wRn(int rn) +{ + iwmmxt_load_reg(cpu_M0, rn); +} + +static inline void gen_op_iwmmxt_orq_M0_wRn(int rn) +{ + iwmmxt_load_reg(cpu_V1, rn); + tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1); +} + +static inline void gen_op_iwmmxt_andq_M0_wRn(int rn) +{ + iwmmxt_load_reg(cpu_V1, rn); + tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1); +} + +static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn) +{ + iwmmxt_load_reg(cpu_V1, rn); + tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1); +} + +#define IWMMXT_OP(name) \ +static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ +{ \ + iwmmxt_load_reg(cpu_V1, rn); \ + gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \ +} + +#define IWMMXT_OP_ENV(name) \ +static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ +{ \ + iwmmxt_load_reg(cpu_V1, rn); \ + gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \ +} + +#define IWMMXT_OP_ENV_SIZE(name) \ +IWMMXT_OP_ENV(name##b) \ +IWMMXT_OP_ENV(name##w) \ +IWMMXT_OP_ENV(name##l) + +#define IWMMXT_OP_ENV1(name) \ +static inline void gen_op_iwmmxt_##name##_M0(void) \ +{ \ + gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \ +} + +IWMMXT_OP(maddsq) +IWMMXT_OP(madduq) +IWMMXT_OP(sadb) +IWMMXT_OP(sadw) +IWMMXT_OP(mulslw) +IWMMXT_OP(mulshw) +IWMMXT_OP(mululw) +IWMMXT_OP(muluhw) +IWMMXT_OP(macsw) +IWMMXT_OP(macuw) + +IWMMXT_OP_ENV_SIZE(unpackl) +IWMMXT_OP_ENV_SIZE(unpackh) + +IWMMXT_OP_ENV1(unpacklub) +IWMMXT_OP_ENV1(unpackluw) +IWMMXT_OP_ENV1(unpacklul) +IWMMXT_OP_ENV1(unpackhub) +IWMMXT_OP_ENV1(unpackhuw) +IWMMXT_OP_ENV1(unpackhul) +IWMMXT_OP_ENV1(unpacklsb) +IWMMXT_OP_ENV1(unpacklsw) +IWMMXT_OP_ENV1(unpacklsl) +IWMMXT_OP_ENV1(unpackhsb) +IWMMXT_OP_ENV1(unpackhsw) +IWMMXT_OP_ENV1(unpackhsl) + +IWMMXT_OP_ENV_SIZE(cmpeq) +IWMMXT_OP_ENV_SIZE(cmpgtu) +IWMMXT_OP_ENV_SIZE(cmpgts) + +IWMMXT_OP_ENV_SIZE(mins) +IWMMXT_OP_ENV_SIZE(minu) +IWMMXT_OP_ENV_SIZE(maxs) +IWMMXT_OP_ENV_SIZE(maxu) + +IWMMXT_OP_ENV_SIZE(subn) +IWMMXT_OP_ENV_SIZE(addn) +IWMMXT_OP_ENV_SIZE(subu) +IWMMXT_OP_ENV_SIZE(addu) +IWMMXT_OP_ENV_SIZE(subs) +IWMMXT_OP_ENV_SIZE(adds) + +IWMMXT_OP_ENV(avgb0) +IWMMXT_OP_ENV(avgb1) +IWMMXT_OP_ENV(avgw0) +IWMMXT_OP_ENV(avgw1) + +IWMMXT_OP_ENV(packuw) +IWMMXT_OP_ENV(packul) +IWMMXT_OP_ENV(packuq) +IWMMXT_OP_ENV(packsw) +IWMMXT_OP_ENV(packsl) +IWMMXT_OP_ENV(packsq) + +static void gen_op_iwmmxt_set_mup(void) +{ + TCGv_i32 tmp; + tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]); + tcg_gen_ori_i32(tmp, tmp, 2); + store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]); +} + +static void gen_op_iwmmxt_set_cup(void) +{ + TCGv_i32 tmp; + tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]); + tcg_gen_ori_i32(tmp, tmp, 1); + store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]); +} + +static void gen_op_iwmmxt_setpsr_nz(void) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0); + store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]); +} + +static inline void gen_op_iwmmxt_addl_M0_wRn(int rn) +{ + iwmmxt_load_reg(cpu_V1, rn); + tcg_gen_ext32u_i64(cpu_V1, cpu_V1); + tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1); +} + +static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, + TCGv_i32 dest) +{ + int rd; + uint32_t offset; + TCGv_i32 tmp; + + rd = (insn >> 16) & 0xf; + tmp = load_reg(s, rd); + + offset = (insn & 0xff) << ((insn >> 7) & 2); + if (insn & (1 << 24)) { + /* Pre indexed */ + if (insn & (1 << 23)) + tcg_gen_addi_i32(tmp, tmp, offset); + else + tcg_gen_addi_i32(tmp, tmp, -offset); + tcg_gen_mov_i32(dest, tmp); + if (insn & (1 << 21)) + store_reg(s, rd, tmp); + else + tcg_temp_free_i32(tmp); + } else if (insn & (1 << 21)) { + /* Post indexed */ + tcg_gen_mov_i32(dest, tmp); + if (insn & (1 << 23)) + tcg_gen_addi_i32(tmp, tmp, offset); + else + tcg_gen_addi_i32(tmp, tmp, -offset); + store_reg(s, rd, tmp); + } else if (!(insn & (1 << 23))) + return 1; + return 0; +} + +static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest) +{ + int rd = (insn >> 0) & 0xf; + TCGv_i32 tmp; + + if (insn & (1 << 8)) { + if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) { + return 1; + } else { + tmp = iwmmxt_load_creg(rd); + } + } else { + tmp = tcg_temp_new_i32(); + iwmmxt_load_reg(cpu_V0, rd); + tcg_gen_trunc_i64_i32(tmp, cpu_V0); + } + tcg_gen_andi_i32(tmp, tmp, mask); + tcg_gen_mov_i32(dest, tmp); + tcg_temp_free_i32(tmp); + return 0; +} + +/* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred + (ie. an undefined instruction). */ +static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) +{ + int rd, wrd; + int rdhi, rdlo, rd0, rd1, i; + TCGv_i32 addr; + TCGv_i32 tmp, tmp2, tmp3; + + if ((insn & 0x0e000e00) == 0x0c000000) { + if ((insn & 0x0fe00ff0) == 0x0c400000) { + wrd = insn & 0xf; + rdlo = (insn >> 12) & 0xf; + rdhi = (insn >> 16) & 0xf; + if (insn & ARM_CP_RW_BIT) { /* TMRRC */ + iwmmxt_load_reg(cpu_V0, wrd); + tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0); + tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); + tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0); + } else { /* TMCRR */ + tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); + iwmmxt_store_reg(cpu_V0, wrd); + gen_op_iwmmxt_set_mup(); + } + return 0; + } + + wrd = (insn >> 12) & 0xf; + addr = tcg_temp_new_i32(); + if (gen_iwmmxt_address(s, insn, addr)) { + tcg_temp_free_i32(addr); + return 1; + } + if (insn & ARM_CP_RW_BIT) { + if ((insn >> 28) == 0xf) { /* WLDRW wCx */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + iwmmxt_store_creg(wrd, tmp); + } else { + i = 1; + if (insn & (1 << 8)) { + if (insn & (1 << 22)) { /* WLDRD */ + gen_aa32_ld64(cpu_M0, addr, get_mem_index(s)); + i = 0; + } else { /* WLDRW wRd */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + } + } else { + tmp = tcg_temp_new_i32(); + if (insn & (1 << 22)) { /* WLDRH */ + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + } else { /* WLDRB */ + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + } + } + if (i) { + tcg_gen_extu_i32_i64(cpu_M0, tmp); + tcg_temp_free_i32(tmp); + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + } + } else { + if ((insn >> 28) == 0xf) { /* WSTRW wCx */ + tmp = iwmmxt_load_creg(wrd); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + } else { + gen_op_iwmmxt_movq_M0_wRn(wrd); + tmp = tcg_temp_new_i32(); + if (insn & (1 << 8)) { + if (insn & (1 << 22)) { /* WSTRD */ + gen_aa32_st64(cpu_M0, addr, get_mem_index(s)); + } else { /* WSTRW wRd */ + tcg_gen_trunc_i64_i32(tmp, cpu_M0); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + } + } else { + if (insn & (1 << 22)) { /* WSTRH */ + tcg_gen_trunc_i64_i32(tmp, cpu_M0); + gen_aa32_st16(tmp, addr, get_mem_index(s)); + } else { /* WSTRB */ + tcg_gen_trunc_i64_i32(tmp, cpu_M0); + gen_aa32_st8(tmp, addr, get_mem_index(s)); + } + } + } + tcg_temp_free_i32(tmp); + } + tcg_temp_free_i32(addr); + return 0; + } + + if ((insn & 0x0f000000) != 0x0e000000) + return 1; + + switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) { + case 0x000: /* WOR */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 0) & 0xf; + rd1 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + gen_op_iwmmxt_orq_M0_wRn(rd1); + gen_op_iwmmxt_setpsr_nz(); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x011: /* TMCR */ + if (insn & 0xf) + return 1; + rd = (insn >> 12) & 0xf; + wrd = (insn >> 16) & 0xf; + switch (wrd) { + case ARM_IWMMXT_wCID: + case ARM_IWMMXT_wCASF: + break; + case ARM_IWMMXT_wCon: + gen_op_iwmmxt_set_cup(); + /* Fall through. */ + case ARM_IWMMXT_wCSSF: + tmp = iwmmxt_load_creg(wrd); + tmp2 = load_reg(s, rd); + tcg_gen_andc_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + iwmmxt_store_creg(wrd, tmp); + break; + case ARM_IWMMXT_wCGR0: + case ARM_IWMMXT_wCGR1: + case ARM_IWMMXT_wCGR2: + case ARM_IWMMXT_wCGR3: + gen_op_iwmmxt_set_cup(); + tmp = load_reg(s, rd); + iwmmxt_store_creg(wrd, tmp); + break; + default: + return 1; + } + break; + case 0x100: /* WXOR */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 0) & 0xf; + rd1 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + gen_op_iwmmxt_xorq_M0_wRn(rd1); + gen_op_iwmmxt_setpsr_nz(); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x111: /* TMRC */ + if (insn & 0xf) + return 1; + rd = (insn >> 12) & 0xf; + wrd = (insn >> 16) & 0xf; + tmp = iwmmxt_load_creg(wrd); + store_reg(s, rd, tmp); + break; + case 0x300: /* WANDN */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 0) & 0xf; + rd1 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + tcg_gen_neg_i64(cpu_M0, cpu_M0); + gen_op_iwmmxt_andq_M0_wRn(rd1); + gen_op_iwmmxt_setpsr_nz(); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x200: /* WAND */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 0) & 0xf; + rd1 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + gen_op_iwmmxt_andq_M0_wRn(rd1); + gen_op_iwmmxt_setpsr_nz(); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x810: case 0xa10: /* WMADD */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 0) & 0xf; + rd1 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + if (insn & (1 << 21)) + gen_op_iwmmxt_maddsq_M0_wRn(rd1); + else + gen_op_iwmmxt_madduq_M0_wRn(rd1); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 0: + gen_op_iwmmxt_unpacklb_M0_wRn(rd1); + break; + case 1: + gen_op_iwmmxt_unpacklw_M0_wRn(rd1); + break; + case 2: + gen_op_iwmmxt_unpackll_M0_wRn(rd1); + break; + case 3: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 0: + gen_op_iwmmxt_unpackhb_M0_wRn(rd1); + break; + case 1: + gen_op_iwmmxt_unpackhw_M0_wRn(rd1); + break; + case 2: + gen_op_iwmmxt_unpackhl_M0_wRn(rd1); + break; + case 3: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + if (insn & (1 << 22)) + gen_op_iwmmxt_sadw_M0_wRn(rd1); + else + gen_op_iwmmxt_sadb_M0_wRn(rd1); + if (!(insn & (1 << 20))) + gen_op_iwmmxt_addl_M0_wRn(wrd); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + if (insn & (1 << 21)) { + if (insn & (1 << 20)) + gen_op_iwmmxt_mulshw_M0_wRn(rd1); + else + gen_op_iwmmxt_mulslw_M0_wRn(rd1); + } else { + if (insn & (1 << 20)) + gen_op_iwmmxt_muluhw_M0_wRn(rd1); + else + gen_op_iwmmxt_mululw_M0_wRn(rd1); + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + if (insn & (1 << 21)) + gen_op_iwmmxt_macsw_M0_wRn(rd1); + else + gen_op_iwmmxt_macuw_M0_wRn(rd1); + if (!(insn & (1 << 20))) { + iwmmxt_load_reg(cpu_V1, wrd); + tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1); + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 0: + gen_op_iwmmxt_cmpeqb_M0_wRn(rd1); + break; + case 1: + gen_op_iwmmxt_cmpeqw_M0_wRn(rd1); + break; + case 2: + gen_op_iwmmxt_cmpeql_M0_wRn(rd1); + break; + case 3: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + if (insn & (1 << 22)) { + if (insn & (1 << 20)) + gen_op_iwmmxt_avgw1_M0_wRn(rd1); + else + gen_op_iwmmxt_avgw0_M0_wRn(rd1); + } else { + if (insn & (1 << 20)) + gen_op_iwmmxt_avgb1_M0_wRn(rd1); + else + gen_op_iwmmxt_avgb0_M0_wRn(rd1); + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3)); + tcg_gen_andi_i32(tmp, tmp, 7); + iwmmxt_load_reg(cpu_V1, rd1); + gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp); + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */ + if (((insn >> 6) & 3) == 3) + return 1; + rd = (insn >> 12) & 0xf; + wrd = (insn >> 16) & 0xf; + tmp = load_reg(s, rd); + gen_op_iwmmxt_movq_M0_wRn(wrd); + switch ((insn >> 6) & 3) { + case 0: + tmp2 = tcg_const_i32(0xff); + tmp3 = tcg_const_i32((insn & 7) << 3); + break; + case 1: + tmp2 = tcg_const_i32(0xffff); + tmp3 = tcg_const_i32((insn & 3) << 4); + break; + case 2: + tmp2 = tcg_const_i32(0xffffffff); + tmp3 = tcg_const_i32((insn & 1) << 5); + break; + default: + TCGV_UNUSED_I32(tmp2); + TCGV_UNUSED_I32(tmp3); + } + gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3); + tcg_temp_free_i32(tmp3); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */ + rd = (insn >> 12) & 0xf; + wrd = (insn >> 16) & 0xf; + if (rd == 15 || ((insn >> 22) & 3) == 3) + return 1; + gen_op_iwmmxt_movq_M0_wRn(wrd); + tmp = tcg_temp_new_i32(); + switch ((insn >> 22) & 3) { + case 0: + tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3); + tcg_gen_trunc_i64_i32(tmp, cpu_M0); + if (insn & 8) { + tcg_gen_ext8s_i32(tmp, tmp); + } else { + tcg_gen_andi_i32(tmp, tmp, 0xff); + } + break; + case 1: + tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4); + tcg_gen_trunc_i64_i32(tmp, cpu_M0); + if (insn & 8) { + tcg_gen_ext16s_i32(tmp, tmp); + } else { + tcg_gen_andi_i32(tmp, tmp, 0xffff); + } + break; + case 2: + tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5); + tcg_gen_trunc_i64_i32(tmp, cpu_M0); + break; + } + store_reg(s, rd, tmp); + break; + case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */ + if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3) + return 1; + tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); + switch ((insn >> 22) & 3) { + case 0: + tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0); + break; + case 1: + tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4); + break; + case 2: + tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12); + break; + } + tcg_gen_shli_i32(tmp, tmp, 28); + gen_set_nzcv(tmp); + tcg_temp_free_i32(tmp); + break; + case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */ + if (((insn >> 6) & 3) == 3) + return 1; + rd = (insn >> 12) & 0xf; + wrd = (insn >> 16) & 0xf; + tmp = load_reg(s, rd); + switch ((insn >> 6) & 3) { + case 0: + gen_helper_iwmmxt_bcstb(cpu_M0, tmp); + break; + case 1: + gen_helper_iwmmxt_bcstw(cpu_M0, tmp); + break; + case 2: + gen_helper_iwmmxt_bcstl(cpu_M0, tmp); + break; + } + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */ + if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) + return 1; + tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); + tmp2 = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp2, tmp); + switch ((insn >> 22) & 3) { + case 0: + for (i = 0; i < 7; i ++) { + tcg_gen_shli_i32(tmp2, tmp2, 4); + tcg_gen_and_i32(tmp, tmp, tmp2); + } + break; + case 1: + for (i = 0; i < 3; i ++) { + tcg_gen_shli_i32(tmp2, tmp2, 8); + tcg_gen_and_i32(tmp, tmp, tmp2); + } + break; + case 2: + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_and_i32(tmp, tmp, tmp2); + break; + } + gen_set_nzcv(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + break; + case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */ + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 0: + gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0); + break; + case 1: + gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0); + break; + case 2: + gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0); + break; + case 3: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */ + if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3) + return 1; + tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF); + tmp2 = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp2, tmp); + switch ((insn >> 22) & 3) { + case 0: + for (i = 0; i < 7; i ++) { + tcg_gen_shli_i32(tmp2, tmp2, 4); + tcg_gen_or_i32(tmp, tmp, tmp2); + } + break; + case 1: + for (i = 0; i < 3; i ++) { + tcg_gen_shli_i32(tmp2, tmp2, 8); + tcg_gen_or_i32(tmp, tmp, tmp2); + } + break; + case 2: + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp, tmp, tmp2); + break; + } + gen_set_nzcv(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + break; + case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */ + rd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3) + return 1; + gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = tcg_temp_new_i32(); + switch ((insn >> 22) & 3) { + case 0: + gen_helper_iwmmxt_msbb(tmp, cpu_M0); + break; + case 1: + gen_helper_iwmmxt_msbw(tmp, cpu_M0); + break; + case 2: + gen_helper_iwmmxt_msbl(tmp, cpu_M0); + break; + } + store_reg(s, rd, tmp); + break; + case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */ + case 0x906: case 0xb06: case 0xd06: case 0xf06: + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 0: + if (insn & (1 << 21)) + gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1); + else + gen_op_iwmmxt_cmpgtub_M0_wRn(rd1); + break; + case 1: + if (insn & (1 << 21)) + gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1); + else + gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1); + break; + case 2: + if (insn & (1 << 21)) + gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1); + else + gen_op_iwmmxt_cmpgtul_M0_wRn(rd1); + break; + case 3: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */ + case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e: + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 0: + if (insn & (1 << 21)) + gen_op_iwmmxt_unpacklsb_M0(); + else + gen_op_iwmmxt_unpacklub_M0(); + break; + case 1: + if (insn & (1 << 21)) + gen_op_iwmmxt_unpacklsw_M0(); + else + gen_op_iwmmxt_unpackluw_M0(); + break; + case 2: + if (insn & (1 << 21)) + gen_op_iwmmxt_unpacklsl_M0(); + else + gen_op_iwmmxt_unpacklul_M0(); + break; + case 3: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */ + case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c: + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 0: + if (insn & (1 << 21)) + gen_op_iwmmxt_unpackhsb_M0(); + else + gen_op_iwmmxt_unpackhub_M0(); + break; + case 1: + if (insn & (1 << 21)) + gen_op_iwmmxt_unpackhsw_M0(); + else + gen_op_iwmmxt_unpackhuw_M0(); + break; + case 2: + if (insn & (1 << 21)) + gen_op_iwmmxt_unpackhsl_M0(); + else + gen_op_iwmmxt_unpackhul_M0(); + break; + case 3: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */ + case 0x214: case 0x614: case 0xa14: case 0xe14: + if (((insn >> 22) & 3) == 0) + return 1; + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = tcg_temp_new_i32(); + if (gen_iwmmxt_shift(insn, 0xff, tmp)) { + tcg_temp_free_i32(tmp); + return 1; + } + switch ((insn >> 22) & 3) { + case 1: + gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp); + break; + case 2: + gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp); + break; + case 3: + gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp); + break; + } + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */ + case 0x014: case 0x414: case 0x814: case 0xc14: + if (((insn >> 22) & 3) == 0) + return 1; + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = tcg_temp_new_i32(); + if (gen_iwmmxt_shift(insn, 0xff, tmp)) { + tcg_temp_free_i32(tmp); + return 1; + } + switch ((insn >> 22) & 3) { + case 1: + gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp); + break; + case 2: + gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp); + break; + case 3: + gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp); + break; + } + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */ + case 0x114: case 0x514: case 0x914: case 0xd14: + if (((insn >> 22) & 3) == 0) + return 1; + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = tcg_temp_new_i32(); + if (gen_iwmmxt_shift(insn, 0xff, tmp)) { + tcg_temp_free_i32(tmp); + return 1; + } + switch ((insn >> 22) & 3) { + case 1: + gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp); + break; + case 2: + gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp); + break; + case 3: + gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp); + break; + } + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */ + case 0x314: case 0x714: case 0xb14: case 0xf14: + if (((insn >> 22) & 3) == 0) + return 1; + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = tcg_temp_new_i32(); + switch ((insn >> 22) & 3) { + case 1: + if (gen_iwmmxt_shift(insn, 0xf, tmp)) { + tcg_temp_free_i32(tmp); + return 1; + } + gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp); + break; + case 2: + if (gen_iwmmxt_shift(insn, 0x1f, tmp)) { + tcg_temp_free_i32(tmp); + return 1; + } + gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp); + break; + case 3: + if (gen_iwmmxt_shift(insn, 0x3f, tmp)) { + tcg_temp_free_i32(tmp); + return 1; + } + gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp); + break; + } + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */ + case 0x916: case 0xb16: case 0xd16: case 0xf16: + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 0: + if (insn & (1 << 21)) + gen_op_iwmmxt_minsb_M0_wRn(rd1); + else + gen_op_iwmmxt_minub_M0_wRn(rd1); + break; + case 1: + if (insn & (1 << 21)) + gen_op_iwmmxt_minsw_M0_wRn(rd1); + else + gen_op_iwmmxt_minuw_M0_wRn(rd1); + break; + case 2: + if (insn & (1 << 21)) + gen_op_iwmmxt_minsl_M0_wRn(rd1); + else + gen_op_iwmmxt_minul_M0_wRn(rd1); + break; + case 3: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */ + case 0x816: case 0xa16: case 0xc16: case 0xe16: + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 0: + if (insn & (1 << 21)) + gen_op_iwmmxt_maxsb_M0_wRn(rd1); + else + gen_op_iwmmxt_maxub_M0_wRn(rd1); + break; + case 1: + if (insn & (1 << 21)) + gen_op_iwmmxt_maxsw_M0_wRn(rd1); + else + gen_op_iwmmxt_maxuw_M0_wRn(rd1); + break; + case 2: + if (insn & (1 << 21)) + gen_op_iwmmxt_maxsl_M0_wRn(rd1); + else + gen_op_iwmmxt_maxul_M0_wRn(rd1); + break; + case 3: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */ + case 0x402: case 0x502: case 0x602: case 0x702: + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = tcg_const_i32((insn >> 20) & 3); + iwmmxt_load_reg(cpu_V1, rd1); + gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp); + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */ + case 0x41a: case 0x51a: case 0x61a: case 0x71a: + case 0x81a: case 0x91a: case 0xa1a: case 0xb1a: + case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a: + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 20) & 0xf) { + case 0x0: + gen_op_iwmmxt_subnb_M0_wRn(rd1); + break; + case 0x1: + gen_op_iwmmxt_subub_M0_wRn(rd1); + break; + case 0x3: + gen_op_iwmmxt_subsb_M0_wRn(rd1); + break; + case 0x4: + gen_op_iwmmxt_subnw_M0_wRn(rd1); + break; + case 0x5: + gen_op_iwmmxt_subuw_M0_wRn(rd1); + break; + case 0x7: + gen_op_iwmmxt_subsw_M0_wRn(rd1); + break; + case 0x8: + gen_op_iwmmxt_subnl_M0_wRn(rd1); + break; + case 0x9: + gen_op_iwmmxt_subul_M0_wRn(rd1); + break; + case 0xb: + gen_op_iwmmxt_subsl_M0_wRn(rd1); + break; + default: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */ + case 0x41e: case 0x51e: case 0x61e: case 0x71e: + case 0x81e: case 0x91e: case 0xa1e: case 0xb1e: + case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e: + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f)); + gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp); + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */ + case 0x418: case 0x518: case 0x618: case 0x718: + case 0x818: case 0x918: case 0xa18: case 0xb18: + case 0xc18: case 0xd18: case 0xe18: case 0xf18: + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 20) & 0xf) { + case 0x0: + gen_op_iwmmxt_addnb_M0_wRn(rd1); + break; + case 0x1: + gen_op_iwmmxt_addub_M0_wRn(rd1); + break; + case 0x3: + gen_op_iwmmxt_addsb_M0_wRn(rd1); + break; + case 0x4: + gen_op_iwmmxt_addnw_M0_wRn(rd1); + break; + case 0x5: + gen_op_iwmmxt_adduw_M0_wRn(rd1); + break; + case 0x7: + gen_op_iwmmxt_addsw_M0_wRn(rd1); + break; + case 0x8: + gen_op_iwmmxt_addnl_M0_wRn(rd1); + break; + case 0x9: + gen_op_iwmmxt_addul_M0_wRn(rd1); + break; + case 0xb: + gen_op_iwmmxt_addsl_M0_wRn(rd1); + break; + default: + return 1; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */ + case 0x408: case 0x508: case 0x608: case 0x708: + case 0x808: case 0x908: case 0xa08: case 0xb08: + case 0xc08: case 0xd08: case 0xe08: case 0xf08: + if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0) + return 1; + wrd = (insn >> 12) & 0xf; + rd0 = (insn >> 16) & 0xf; + rd1 = (insn >> 0) & 0xf; + gen_op_iwmmxt_movq_M0_wRn(rd0); + switch ((insn >> 22) & 3) { + case 1: + if (insn & (1 << 21)) + gen_op_iwmmxt_packsw_M0_wRn(rd1); + else + gen_op_iwmmxt_packuw_M0_wRn(rd1); + break; + case 2: + if (insn & (1 << 21)) + gen_op_iwmmxt_packsl_M0_wRn(rd1); + else + gen_op_iwmmxt_packul_M0_wRn(rd1); + break; + case 3: + if (insn & (1 << 21)) + gen_op_iwmmxt_packsq_M0_wRn(rd1); + else + gen_op_iwmmxt_packuq_M0_wRn(rd1); + break; + } + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + gen_op_iwmmxt_set_cup(); + break; + case 0x201: case 0x203: case 0x205: case 0x207: + case 0x209: case 0x20b: case 0x20d: case 0x20f: + case 0x211: case 0x213: case 0x215: case 0x217: + case 0x219: case 0x21b: case 0x21d: case 0x21f: + wrd = (insn >> 5) & 0xf; + rd0 = (insn >> 12) & 0xf; + rd1 = (insn >> 0) & 0xf; + if (rd0 == 0xf || rd1 == 0xf) + return 1; + gen_op_iwmmxt_movq_M0_wRn(wrd); + tmp = load_reg(s, rd0); + tmp2 = load_reg(s, rd1); + switch ((insn >> 16) & 0xf) { + case 0x0: /* TMIA */ + gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2); + break; + case 0x8: /* TMIAPH */ + gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2); + break; + case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */ + if (insn & (1 << 16)) + tcg_gen_shri_i32(tmp, tmp, 16); + if (insn & (1 << 17)) + tcg_gen_shri_i32(tmp2, tmp2, 16); + gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); + break; + default: + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + return 1; + } + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + gen_op_iwmmxt_movq_wRn_M0(wrd); + gen_op_iwmmxt_set_mup(); + break; + default: + return 1; + } + + return 0; +} + +/* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred + (ie. an undefined instruction). */ +static int disas_dsp_insn(DisasContext *s, uint32_t insn) +{ + int acc, rd0, rd1, rdhi, rdlo; + TCGv_i32 tmp, tmp2; + + if ((insn & 0x0ff00f10) == 0x0e200010) { + /* Multiply with Internal Accumulate Format */ + rd0 = (insn >> 12) & 0xf; + rd1 = insn & 0xf; + acc = (insn >> 5) & 7; + + if (acc != 0) + return 1; + + tmp = load_reg(s, rd0); + tmp2 = load_reg(s, rd1); + switch ((insn >> 16) & 0xf) { + case 0x0: /* MIA */ + gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2); + break; + case 0x8: /* MIAPH */ + gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2); + break; + case 0xc: /* MIABB */ + case 0xd: /* MIABT */ + case 0xe: /* MIATB */ + case 0xf: /* MIATT */ + if (insn & (1 << 16)) + tcg_gen_shri_i32(tmp, tmp, 16); + if (insn & (1 << 17)) + tcg_gen_shri_i32(tmp2, tmp2, 16); + gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); + break; + default: + return 1; + } + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + + gen_op_iwmmxt_movq_wRn_M0(acc); + return 0; + } + + if ((insn & 0x0fe00ff8) == 0x0c400000) { + /* Internal Accumulator Access Format */ + rdhi = (insn >> 16) & 0xf; + rdlo = (insn >> 12) & 0xf; + acc = insn & 7; + + if (acc != 0) + return 1; + + if (insn & ARM_CP_RW_BIT) { /* MRA */ + iwmmxt_load_reg(cpu_V0, acc); + tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0); + tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); + tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0); + tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1); + } else { /* MAR */ + tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); + iwmmxt_store_reg(cpu_V0, acc); + } + return 0; + } + + return 1; +} + +#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n)) +#define VFP_SREG(insn, bigbit, smallbit) \ + ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1)) +#define VFP_DREG(reg, insn, bigbit, smallbit) do { \ + if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \ + reg = (((insn) >> (bigbit)) & 0x0f) \ + | (((insn) >> ((smallbit) - 4)) & 0x10); \ + } else { \ + if (insn & (1 << (smallbit))) \ + return 1; \ + reg = ((insn) >> (bigbit)) & 0x0f; \ + }} while (0) + +#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22) +#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22) +#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7) +#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7) +#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5) +#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5) + +/* Move between integer and VFP cores. */ +static TCGv_i32 gen_vfp_mrs(void) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp, cpu_F0s); + return tmp; +} + +static void gen_vfp_msr(TCGv_i32 tmp) +{ + tcg_gen_mov_i32(cpu_F0s, tmp); + tcg_temp_free_i32(tmp); +} + +static void gen_neon_dup_u8(TCGv_i32 var, int shift) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + if (shift) + tcg_gen_shri_i32(var, var, shift); + tcg_gen_ext8u_i32(var, var); + tcg_gen_shli_i32(tmp, var, 8); + tcg_gen_or_i32(var, var, tmp); + tcg_gen_shli_i32(tmp, var, 16); + tcg_gen_or_i32(var, var, tmp); + tcg_temp_free_i32(tmp); +} + +static void gen_neon_dup_low16(TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_ext16u_i32(var, var); + tcg_gen_shli_i32(tmp, var, 16); + tcg_gen_or_i32(var, var, tmp); + tcg_temp_free_i32(tmp); +} + +static void gen_neon_dup_high16(TCGv_i32 var) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_andi_i32(var, var, 0xffff0000); + tcg_gen_shri_i32(tmp, var, 16); + tcg_gen_or_i32(var, var, tmp); + tcg_temp_free_i32(tmp); +} + +static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size) +{ + /* Load a single Neon element and replicate into a 32 bit TCG reg */ + TCGv_i32 tmp = tcg_temp_new_i32(); + switch (size) { + case 0: + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + gen_neon_dup_u8(tmp, 0); + break; + case 1: + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + gen_neon_dup_low16(tmp); + break; + case 2: + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + break; + default: /* Avoid compiler warnings. */ + abort(); + } + return tmp; +} + +static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm, + uint32_t dp) +{ + uint32_t cc = extract32(insn, 20, 2); + + if (dp) { + TCGv_i64 frn, frm, dest; + TCGv_i64 tmp, zero, zf, nf, vf; + + zero = tcg_const_i64(0); + + frn = tcg_temp_new_i64(); + frm = tcg_temp_new_i64(); + dest = tcg_temp_new_i64(); + + zf = tcg_temp_new_i64(); + nf = tcg_temp_new_i64(); + vf = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(zf, cpu_ZF); + tcg_gen_ext_i32_i64(nf, cpu_NF); + tcg_gen_ext_i32_i64(vf, cpu_VF); + + tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn)); + tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm)); + switch (cc) { + case 0: /* eq: Z */ + tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, + frn, frm); + break; + case 1: /* vs: V */ + tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, + frn, frm); + break; + case 2: /* ge: N == V -> N ^ V == 0 */ + tmp = tcg_temp_new_i64(); + tcg_gen_xor_i64(tmp, vf, nf); + tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, + frn, frm); + tcg_temp_free_i64(tmp); + break; + case 3: /* gt: !Z && N == V */ + tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, + frn, frm); + tmp = tcg_temp_new_i64(); + tcg_gen_xor_i64(tmp, vf, nf); + tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, + dest, frm); + tcg_temp_free_i64(tmp); + break; + } + tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i64(frn); + tcg_temp_free_i64(frm); + tcg_temp_free_i64(dest); + + tcg_temp_free_i64(zf); + tcg_temp_free_i64(nf); + tcg_temp_free_i64(vf); + + tcg_temp_free_i64(zero); + } else { + TCGv_i32 frn, frm, dest; + TCGv_i32 tmp, zero; + + zero = tcg_const_i32(0); + + frn = tcg_temp_new_i32(); + frm = tcg_temp_new_i32(); + dest = tcg_temp_new_i32(); + tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn)); + tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm)); + switch (cc) { + case 0: /* eq: Z */ + tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, + frn, frm); + break; + case 1: /* vs: V */ + tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, + frn, frm); + break; + case 2: /* ge: N == V -> N ^ V == 0 */ + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, + frn, frm); + tcg_temp_free_i32(tmp); + break; + case 3: /* gt: !Z && N == V */ + tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, + frn, frm); + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, + dest, frm); + tcg_temp_free_i32(tmp); + break; + } + tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i32(frn); + tcg_temp_free_i32(frm); + tcg_temp_free_i32(dest); + + tcg_temp_free_i32(zero); + } + + return 0; +} + +static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn, + uint32_t rm, uint32_t dp) +{ + uint32_t vmin = extract32(insn, 6, 1); + TCGv_ptr fpst = get_fpstatus_ptr(0); + + if (dp) { + TCGv_i64 frn, frm, dest; + + frn = tcg_temp_new_i64(); + frm = tcg_temp_new_i64(); + dest = tcg_temp_new_i64(); + + tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn)); + tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm)); + if (vmin) { + gen_helper_vfp_minnumd(dest, frn, frm, fpst); + } else { + gen_helper_vfp_maxnumd(dest, frn, frm, fpst); + } + tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i64(frn); + tcg_temp_free_i64(frm); + tcg_temp_free_i64(dest); + } else { + TCGv_i32 frn, frm, dest; + + frn = tcg_temp_new_i32(); + frm = tcg_temp_new_i32(); + dest = tcg_temp_new_i32(); + + tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn)); + tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm)); + if (vmin) { + gen_helper_vfp_minnums(dest, frn, frm, fpst); + } else { + gen_helper_vfp_maxnums(dest, frn, frm, fpst); + } + tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i32(frn); + tcg_temp_free_i32(frm); + tcg_temp_free_i32(dest); + } + + tcg_temp_free_ptr(fpst); + return 0; +} + +static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp, + int rounding) +{ + TCGv_ptr fpst = get_fpstatus_ptr(0); + TCGv_i32 tcg_rmode; + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + + if (dp) { + TCGv_i64 tcg_op; + TCGv_i64 tcg_res; + tcg_op = tcg_temp_new_i64(); + tcg_res = tcg_temp_new_i64(); + tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm)); + gen_helper_rintd(tcg_res, tcg_op, fpst); + tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i64(tcg_op); + tcg_temp_free_i64(tcg_res); + } else { + TCGv_i32 tcg_op; + TCGv_i32 tcg_res; + tcg_op = tcg_temp_new_i32(); + tcg_res = tcg_temp_new_i32(); + tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm)); + gen_helper_rints(tcg_res, tcg_op, fpst); + tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i32(tcg_op); + tcg_temp_free_i32(tcg_res); + } + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + + tcg_temp_free_ptr(fpst); + return 0; +} + +static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp, + int rounding) +{ + bool is_signed = extract32(insn, 7, 1); + TCGv_ptr fpst = get_fpstatus_ptr(0); + TCGv_i32 tcg_rmode, tcg_shift; + + tcg_shift = tcg_const_i32(0); + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + + if (dp) { + TCGv_i64 tcg_double, tcg_res; + TCGv_i32 tcg_tmp; + /* Rd is encoded as a single precision register even when the source + * is double precision. + */ + rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1); + tcg_double = tcg_temp_new_i64(); + tcg_res = tcg_temp_new_i64(); + tcg_tmp = tcg_temp_new_i32(); + tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm)); + if (is_signed) { + gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst); + } else { + gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst); + } + tcg_gen_trunc_i64_i32(tcg_tmp, tcg_res); + tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd)); + tcg_temp_free_i32(tcg_tmp); + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_double); + } else { + TCGv_i32 tcg_single, tcg_res; + tcg_single = tcg_temp_new_i32(); + tcg_res = tcg_temp_new_i32(); + tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm)); + if (is_signed) { + gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst); + } else { + gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst); + } + tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd)); + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_single); + } + + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + + tcg_temp_free_i32(tcg_shift); + + tcg_temp_free_ptr(fpst); + + return 0; +} + +/* Table for converting the most common AArch32 encoding of + * rounding mode to arm_fprounding order (which matches the + * common AArch64 order); see ARM ARM pseudocode FPDecodeRM(). + */ +static const uint8_t fp_decode_rm[] = { + FPROUNDING_TIEAWAY, + FPROUNDING_TIEEVEN, + FPROUNDING_POSINF, + FPROUNDING_NEGINF, +}; + +static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn) +{ + uint32_t rd, rn, rm, dp = extract32(insn, 8, 1); + + if (!arm_dc_feature(s, ARM_FEATURE_V8)) { + return 1; + } + + if (dp) { + VFP_DREG_D(rd, insn); + VFP_DREG_N(rn, insn); + VFP_DREG_M(rm, insn); + } else { + rd = VFP_SREG_D(insn); + rn = VFP_SREG_N(insn); + rm = VFP_SREG_M(insn); + } + + if ((insn & 0x0f800e50) == 0x0e000a00) { + return handle_vsel(insn, rd, rn, rm, dp); + } else if ((insn & 0x0fb00e10) == 0x0e800a00) { + return handle_vminmaxnm(insn, rd, rn, rm, dp); + } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) { + /* VRINTA, VRINTN, VRINTP, VRINTM */ + int rounding = fp_decode_rm[extract32(insn, 16, 2)]; + return handle_vrint(insn, rd, rm, dp, rounding); + } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) { + /* VCVTA, VCVTN, VCVTP, VCVTM */ + int rounding = fp_decode_rm[extract32(insn, 16, 2)]; + return handle_vcvt(insn, rd, rm, dp, rounding); + } + return 1; +} + +/* Disassemble a VFP instruction. Returns nonzero if an error occurred + (ie. an undefined instruction). */ +static int disas_vfp_insn(DisasContext *s, uint32_t insn) +{ + uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask; + int dp, veclen; + TCGv_i32 addr; + TCGv_i32 tmp; + TCGv_i32 tmp2; + + if (!arm_dc_feature(s, ARM_FEATURE_VFP)) { + return 1; + } + + /* FIXME: this access check should not take precedence over UNDEF + * for invalid encodings; we will generate incorrect syndrome information + * for attempts to execute invalid vfp/neon encodings with FP disabled. + */ + if (s->fp_excp_el) { + gen_exception_insn(s, 4, EXCP_UDEF, + syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el); + return 0; + } + + if (!s->vfp_enabled) { + /* VFP disabled. Only allow fmxr/fmrx to/from some control regs. */ + if ((insn & 0x0fe00fff) != 0x0ee00a10) + return 1; + rn = (insn >> 16) & 0xf; + if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2 + && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) { + return 1; + } + } + + if (extract32(insn, 28, 4) == 0xf) { + /* Encodings with T=1 (Thumb) or unconditional (ARM): + * only used in v8 and above. + */ + return disas_vfp_v8_insn(s, insn); + } + + dp = ((insn & 0xf00) == 0xb00); + switch ((insn >> 24) & 0xf) { + case 0xe: + if (insn & (1 << 4)) { + /* single register transfer */ + rd = (insn >> 12) & 0xf; + if (dp) { + int size; + int pass; + + VFP_DREG_N(rn, insn); + if (insn & 0xf) + return 1; + if (insn & 0x00c00060 + && !arm_dc_feature(s, ARM_FEATURE_NEON)) { + return 1; + } + + pass = (insn >> 21) & 1; + if (insn & (1 << 22)) { + size = 0; + offset = ((insn >> 5) & 3) * 8; + } else if (insn & (1 << 5)) { + size = 1; + offset = (insn & (1 << 6)) ? 16 : 0; + } else { + size = 2; + offset = 0; + } + if (insn & ARM_CP_RW_BIT) { + /* vfp->arm */ + tmp = neon_load_reg(rn, pass); + switch (size) { + case 0: + if (offset) + tcg_gen_shri_i32(tmp, tmp, offset); + if (insn & (1 << 23)) + gen_uxtb(tmp); + else + gen_sxtb(tmp); + break; + case 1: + if (insn & (1 << 23)) { + if (offset) { + tcg_gen_shri_i32(tmp, tmp, 16); + } else { + gen_uxth(tmp); + } + } else { + if (offset) { + tcg_gen_sari_i32(tmp, tmp, 16); + } else { + gen_sxth(tmp); + } + } + break; + case 2: + break; + } + store_reg(s, rd, tmp); + } else { + /* arm->vfp */ + tmp = load_reg(s, rd); + if (insn & (1 << 23)) { + /* VDUP */ + if (size == 0) { + gen_neon_dup_u8(tmp, 0); + } else if (size == 1) { + gen_neon_dup_low16(tmp); + } + for (n = 0; n <= pass * 2; n++) { + tmp2 = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp2, tmp); + neon_store_reg(rn, n, tmp2); + } + neon_store_reg(rn, n, tmp); + } else { + /* VMOV */ + switch (size) { + case 0: + tmp2 = neon_load_reg(rn, pass); + tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8); + tcg_temp_free_i32(tmp2); + break; + case 1: + tmp2 = neon_load_reg(rn, pass); + tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16); + tcg_temp_free_i32(tmp2); + break; + case 2: + break; + } + neon_store_reg(rn, pass, tmp); + } + } + } else { /* !dp */ + if ((insn & 0x6f) != 0x00) + return 1; + rn = VFP_SREG_N(insn); + if (insn & ARM_CP_RW_BIT) { + /* vfp->arm */ + if (insn & (1 << 21)) { + /* system register */ + rn >>= 1; + + switch (rn) { + case ARM_VFP_FPSID: + /* VFP2 allows access to FSID from userspace. + VFP3 restricts all id registers to privileged + accesses. */ + if (IS_USER(s) + && arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + tmp = load_cpu_field(vfp.xregs[rn]); + break; + case ARM_VFP_FPEXC: + if (IS_USER(s)) + return 1; + tmp = load_cpu_field(vfp.xregs[rn]); + break; + case ARM_VFP_FPINST: + case ARM_VFP_FPINST2: + /* Not present in VFP3. */ + if (IS_USER(s) + || arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + tmp = load_cpu_field(vfp.xregs[rn]); + break; + case ARM_VFP_FPSCR: + if (rd == 15) { + tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); + tcg_gen_andi_i32(tmp, tmp, 0xf0000000); + } else { + tmp = tcg_temp_new_i32(); + gen_helper_vfp_get_fpscr(tmp, cpu_env); + } + break; + case ARM_VFP_MVFR2: + if (!arm_dc_feature(s, ARM_FEATURE_V8)) { + return 1; + } + /* fall through */ + case ARM_VFP_MVFR0: + case ARM_VFP_MVFR1: + if (IS_USER(s) + || !arm_dc_feature(s, ARM_FEATURE_MVFR)) { + return 1; + } + tmp = load_cpu_field(vfp.xregs[rn]); + break; + default: + return 1; + } + } else { + gen_mov_F0_vreg(0, rn); + tmp = gen_vfp_mrs(); + } + if (rd == 15) { + /* Set the 4 flag bits in the CPSR. */ + gen_set_nzcv(tmp); + tcg_temp_free_i32(tmp); + } else { + store_reg(s, rd, tmp); + } + } else { + /* arm->vfp */ + if (insn & (1 << 21)) { + rn >>= 1; + /* system register */ + switch (rn) { + case ARM_VFP_FPSID: + case ARM_VFP_MVFR0: + case ARM_VFP_MVFR1: + /* Writes are ignored. */ + break; + case ARM_VFP_FPSCR: + tmp = load_reg(s, rd); + gen_helper_vfp_set_fpscr(cpu_env, tmp); + tcg_temp_free_i32(tmp); + gen_lookup_tb(s); + break; + case ARM_VFP_FPEXC: + if (IS_USER(s)) + return 1; + /* TODO: VFP subarchitecture support. + * For now, keep the EN bit only */ + tmp = load_reg(s, rd); + tcg_gen_andi_i32(tmp, tmp, 1 << 30); + store_cpu_field(tmp, vfp.xregs[rn]); + gen_lookup_tb(s); + break; + case ARM_VFP_FPINST: + case ARM_VFP_FPINST2: + if (IS_USER(s)) { + return 1; + } + tmp = load_reg(s, rd); + store_cpu_field(tmp, vfp.xregs[rn]); + break; + default: + return 1; + } + } else { + tmp = load_reg(s, rd); + gen_vfp_msr(tmp); + gen_mov_vreg_F0(0, rn); + } + } + } + } else { + /* data processing */ + /* The opcode is in bits 23, 21, 20 and 6. */ + op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1); + if (dp) { + if (op == 15) { + /* rn is opcode */ + rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1); + } else { + /* rn is register number */ + VFP_DREG_N(rn, insn); + } + + if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) || + ((rn & 0x1e) == 0x6))) { + /* Integer or single/half precision destination. */ + rd = VFP_SREG_D(insn); + } else { + VFP_DREG_D(rd, insn); + } + if (op == 15 && + (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) || + ((rn & 0x1e) == 0x4))) { + /* VCVT from int or half precision is always from S reg + * regardless of dp bit. VCVT with immediate frac_bits + * has same format as SREG_M. + */ + rm = VFP_SREG_M(insn); + } else { + VFP_DREG_M(rm, insn); + } + } else { + rn = VFP_SREG_N(insn); + if (op == 15 && rn == 15) { + /* Double precision destination. */ + VFP_DREG_D(rd, insn); + } else { + rd = VFP_SREG_D(insn); + } + /* NB that we implicitly rely on the encoding for the frac_bits + * in VCVT of fixed to float being the same as that of an SREG_M + */ + rm = VFP_SREG_M(insn); + } + + veclen = s->vec_len; + if (op == 15 && rn > 3) + veclen = 0; + + /* Shut up compiler warnings. */ + delta_m = 0; + delta_d = 0; + bank_mask = 0; + + if (veclen > 0) { + if (dp) + bank_mask = 0xc; + else + bank_mask = 0x18; + + /* Figure out what type of vector operation this is. */ + if ((rd & bank_mask) == 0) { + /* scalar */ + veclen = 0; + } else { + if (dp) + delta_d = (s->vec_stride >> 1) + 1; + else + delta_d = s->vec_stride + 1; + + if ((rm & bank_mask) == 0) { + /* mixed scalar/vector */ + delta_m = 0; + } else { + /* vector */ + delta_m = delta_d; + } + } + } + + /* Load the initial operands. */ + if (op == 15) { + switch (rn) { + case 16: + case 17: + /* Integer source */ + gen_mov_F0_vreg(0, rm); + break; + case 8: + case 9: + /* Compare */ + gen_mov_F0_vreg(dp, rd); + gen_mov_F1_vreg(dp, rm); + break; + case 10: + case 11: + /* Compare with zero */ + gen_mov_F0_vreg(dp, rd); + gen_vfp_F1_ld0(dp); + break; + case 20: + case 21: + case 22: + case 23: + case 28: + case 29: + case 30: + case 31: + /* Source and destination the same. */ + gen_mov_F0_vreg(dp, rd); + break; + case 4: + case 5: + case 6: + case 7: + /* VCVTB, VCVTT: only present with the halfprec extension + * UNPREDICTABLE if bit 8 is set prior to ARMv8 + * (we choose to UNDEF) + */ + if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) || + !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) { + return 1; + } + if (!extract32(rn, 1, 1)) { + /* Half precision source. */ + gen_mov_F0_vreg(0, rm); + break; + } + /* Otherwise fall through */ + default: + /* One source operand. */ + gen_mov_F0_vreg(dp, rm); + break; + } + } else { + /* Two source operands. */ + gen_mov_F0_vreg(dp, rn); + gen_mov_F1_vreg(dp, rm); + } + + for (;;) { + /* Perform the calculation. */ + switch (op) { + case 0: /* VMLA: fd + (fn * fm) */ + /* Note that order of inputs to the add matters for NaNs */ + gen_vfp_F1_mul(dp); + gen_mov_F0_vreg(dp, rd); + gen_vfp_add(dp); + break; + case 1: /* VMLS: fd + -(fn * fm) */ + gen_vfp_mul(dp); + gen_vfp_F1_neg(dp); + gen_mov_F0_vreg(dp, rd); + gen_vfp_add(dp); + break; + case 2: /* VNMLS: -fd + (fn * fm) */ + /* Note that it isn't valid to replace (-A + B) with (B - A) + * or similar plausible looking simplifications + * because this will give wrong results for NaNs. + */ + gen_vfp_F1_mul(dp); + gen_mov_F0_vreg(dp, rd); + gen_vfp_neg(dp); + gen_vfp_add(dp); + break; + case 3: /* VNMLA: -fd + -(fn * fm) */ + gen_vfp_mul(dp); + gen_vfp_F1_neg(dp); + gen_mov_F0_vreg(dp, rd); + gen_vfp_neg(dp); + gen_vfp_add(dp); + break; + case 4: /* mul: fn * fm */ + gen_vfp_mul(dp); + break; + case 5: /* nmul: -(fn * fm) */ + gen_vfp_mul(dp); + gen_vfp_neg(dp); + break; + case 6: /* add: fn + fm */ + gen_vfp_add(dp); + break; + case 7: /* sub: fn - fm */ + gen_vfp_sub(dp); + break; + case 8: /* div: fn / fm */ + gen_vfp_div(dp); + break; + case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */ + case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */ + case 12: /* VFMA : fd = muladd( fd, fn, fm) */ + case 13: /* VFMS : fd = muladd( fd, -fn, fm) */ + /* These are fused multiply-add, and must be done as one + * floating point operation with no rounding between the + * multiplication and addition steps. + * NB that doing the negations here as separate steps is + * correct : an input NaN should come out with its sign bit + * flipped if it is a negated-input. + */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) { + return 1; + } + if (dp) { + TCGv_ptr fpst; + TCGv_i64 frd; + if (op & 1) { + /* VFNMS, VFMS */ + gen_helper_vfp_negd(cpu_F0d, cpu_F0d); + } + frd = tcg_temp_new_i64(); + tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd)); + if (op & 2) { + /* VFNMA, VFNMS */ + gen_helper_vfp_negd(frd, frd); + } + fpst = get_fpstatus_ptr(0); + gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d, + cpu_F1d, frd, fpst); + tcg_temp_free_ptr(fpst); + tcg_temp_free_i64(frd); + } else { + TCGv_ptr fpst; + TCGv_i32 frd; + if (op & 1) { + /* VFNMS, VFMS */ + gen_helper_vfp_negs(cpu_F0s, cpu_F0s); + } + frd = tcg_temp_new_i32(); + tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd)); + if (op & 2) { + gen_helper_vfp_negs(frd, frd); + } + fpst = get_fpstatus_ptr(0); + gen_helper_vfp_muladds(cpu_F0s, cpu_F0s, + cpu_F1s, frd, fpst); + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(frd); + } + break; + case 14: /* fconst */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + + n = (insn << 12) & 0x80000000; + i = ((insn >> 12) & 0x70) | (insn & 0xf); + if (dp) { + if (i & 0x40) + i |= 0x3f80; + else + i |= 0x4000; + n |= i << 16; + tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32); + } else { + if (i & 0x40) + i |= 0x780; + else + i |= 0x800; + n |= i << 19; + tcg_gen_movi_i32(cpu_F0s, n); + } + break; + case 15: /* extension space */ + switch (rn) { + case 0: /* cpy */ + /* no-op */ + break; + case 1: /* abs */ + gen_vfp_abs(dp); + break; + case 2: /* neg */ + gen_vfp_neg(dp); + break; + case 3: /* sqrt */ + gen_vfp_sqrt(dp); + break; + case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */ + tmp = gen_vfp_mrs(); + tcg_gen_ext16u_i32(tmp, tmp); + if (dp) { + gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp, + cpu_env); + } else { + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, + cpu_env); + } + tcg_temp_free_i32(tmp); + break; + case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */ + tmp = gen_vfp_mrs(); + tcg_gen_shri_i32(tmp, tmp, 16); + if (dp) { + gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp, + cpu_env); + } else { + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, + cpu_env); + } + tcg_temp_free_i32(tmp); + break; + case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */ + tmp = tcg_temp_new_i32(); + if (dp) { + gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d, + cpu_env); + } else { + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, + cpu_env); + } + gen_mov_F0_vreg(0, rd); + tmp2 = gen_vfp_mrs(); + tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + gen_vfp_msr(tmp); + break; + case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */ + tmp = tcg_temp_new_i32(); + if (dp) { + gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d, + cpu_env); + } else { + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, + cpu_env); + } + tcg_gen_shli_i32(tmp, tmp, 16); + gen_mov_F0_vreg(0, rd); + tmp2 = gen_vfp_mrs(); + tcg_gen_ext16u_i32(tmp2, tmp2); + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + gen_vfp_msr(tmp); + break; + case 8: /* cmp */ + gen_vfp_cmp(dp); + break; + case 9: /* cmpe */ + gen_vfp_cmpe(dp); + break; + case 10: /* cmpz */ + gen_vfp_cmp(dp); + break; + case 11: /* cmpez */ + gen_vfp_F1_ld0(dp); + gen_vfp_cmpe(dp); + break; + case 12: /* vrintr */ + { + TCGv_ptr fpst = get_fpstatus_ptr(0); + if (dp) { + gen_helper_rintd(cpu_F0d, cpu_F0d, fpst); + } else { + gen_helper_rints(cpu_F0s, cpu_F0s, fpst); + } + tcg_temp_free_ptr(fpst); + break; + } + case 13: /* vrintz */ + { + TCGv_ptr fpst = get_fpstatus_ptr(0); + TCGv_i32 tcg_rmode; + tcg_rmode = tcg_const_i32(float_round_to_zero); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + if (dp) { + gen_helper_rintd(cpu_F0d, cpu_F0d, fpst); + } else { + gen_helper_rints(cpu_F0s, cpu_F0s, fpst); + } + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + tcg_temp_free_ptr(fpst); + break; + } + case 14: /* vrintx */ + { + TCGv_ptr fpst = get_fpstatus_ptr(0); + if (dp) { + gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst); + } else { + gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst); + } + tcg_temp_free_ptr(fpst); + break; + } + case 15: /* single<->double conversion */ + if (dp) + gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env); + else + gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env); + break; + case 16: /* fuito */ + gen_vfp_uito(dp, 0); + break; + case 17: /* fsito */ + gen_vfp_sito(dp, 0); + break; + case 20: /* fshto */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + gen_vfp_shto(dp, 16 - rm, 0); + break; + case 21: /* fslto */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + gen_vfp_slto(dp, 32 - rm, 0); + break; + case 22: /* fuhto */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + gen_vfp_uhto(dp, 16 - rm, 0); + break; + case 23: /* fulto */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + gen_vfp_ulto(dp, 32 - rm, 0); + break; + case 24: /* ftoui */ + gen_vfp_toui(dp, 0); + break; + case 25: /* ftouiz */ + gen_vfp_touiz(dp, 0); + break; + case 26: /* ftosi */ + gen_vfp_tosi(dp, 0); + break; + case 27: /* ftosiz */ + gen_vfp_tosiz(dp, 0); + break; + case 28: /* ftosh */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + gen_vfp_tosh(dp, 16 - rm, 0); + break; + case 29: /* ftosl */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + gen_vfp_tosl(dp, 32 - rm, 0); + break; + case 30: /* ftouh */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + gen_vfp_touh(dp, 16 - rm, 0); + break; + case 31: /* ftoul */ + if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) { + return 1; + } + gen_vfp_toul(dp, 32 - rm, 0); + break; + default: /* undefined */ + return 1; + } + break; + default: /* undefined */ + return 1; + } + + /* Write back the result. */ + if (op == 15 && (rn >= 8 && rn <= 11)) { + /* Comparison, do nothing. */ + } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 || + (rn & 0x1e) == 0x6)) { + /* VCVT double to int: always integer result. + * VCVT double to half precision is always a single + * precision result. + */ + gen_mov_vreg_F0(0, rd); + } else if (op == 15 && rn == 15) { + /* conversion */ + gen_mov_vreg_F0(!dp, rd); + } else { + gen_mov_vreg_F0(dp, rd); + } + + /* break out of the loop if we have finished */ + if (veclen == 0) + break; + + if (op == 15 && delta_m == 0) { + /* single source one-many */ + while (veclen--) { + rd = ((rd + delta_d) & (bank_mask - 1)) + | (rd & bank_mask); + gen_mov_vreg_F0(dp, rd); + } + break; + } + /* Setup the next operands. */ + veclen--; + rd = ((rd + delta_d) & (bank_mask - 1)) + | (rd & bank_mask); + + if (op == 15) { + /* One source operand. */ + rm = ((rm + delta_m) & (bank_mask - 1)) + | (rm & bank_mask); + gen_mov_F0_vreg(dp, rm); + } else { + /* Two source operands. */ + rn = ((rn + delta_d) & (bank_mask - 1)) + | (rn & bank_mask); + gen_mov_F0_vreg(dp, rn); + if (delta_m) { + rm = ((rm + delta_m) & (bank_mask - 1)) + | (rm & bank_mask); + gen_mov_F1_vreg(dp, rm); + } + } + } + } + break; + case 0xc: + case 0xd: + if ((insn & 0x03e00000) == 0x00400000) { + /* two-register transfer */ + rn = (insn >> 16) & 0xf; + rd = (insn >> 12) & 0xf; + if (dp) { + VFP_DREG_M(rm, insn); + } else { + rm = VFP_SREG_M(insn); + } + + if (insn & ARM_CP_RW_BIT) { + /* vfp->arm */ + if (dp) { + gen_mov_F0_vreg(0, rm * 2); + tmp = gen_vfp_mrs(); + store_reg(s, rd, tmp); + gen_mov_F0_vreg(0, rm * 2 + 1); + tmp = gen_vfp_mrs(); + store_reg(s, rn, tmp); + } else { + gen_mov_F0_vreg(0, rm); + tmp = gen_vfp_mrs(); + store_reg(s, rd, tmp); + gen_mov_F0_vreg(0, rm + 1); + tmp = gen_vfp_mrs(); + store_reg(s, rn, tmp); + } + } else { + /* arm->vfp */ + if (dp) { + tmp = load_reg(s, rd); + gen_vfp_msr(tmp); + gen_mov_vreg_F0(0, rm * 2); + tmp = load_reg(s, rn); + gen_vfp_msr(tmp); + gen_mov_vreg_F0(0, rm * 2 + 1); + } else { + tmp = load_reg(s, rd); + gen_vfp_msr(tmp); + gen_mov_vreg_F0(0, rm); + tmp = load_reg(s, rn); + gen_vfp_msr(tmp); + gen_mov_vreg_F0(0, rm + 1); + } + } + } else { + /* Load/store */ + rn = (insn >> 16) & 0xf; + if (dp) + VFP_DREG_D(rd, insn); + else + rd = VFP_SREG_D(insn); + if ((insn & 0x01200000) == 0x01000000) { + /* Single load/store */ + offset = (insn & 0xff) << 2; + if ((insn & (1 << 23)) == 0) + offset = -offset; + if (s->thumb && rn == 15) { + /* This is actually UNPREDICTABLE */ + addr = tcg_temp_new_i32(); + tcg_gen_movi_i32(addr, s->pc & ~2); + } else { + addr = load_reg(s, rn); + } + tcg_gen_addi_i32(addr, addr, offset); + if (insn & (1 << 20)) { + gen_vfp_ld(s, dp, addr); + gen_mov_vreg_F0(dp, rd); + } else { + gen_mov_F0_vreg(dp, rd); + gen_vfp_st(s, dp, addr); + } + tcg_temp_free_i32(addr); + } else { + /* load/store multiple */ + int w = insn & (1 << 21); + if (dp) + n = (insn >> 1) & 0x7f; + else + n = insn & 0xff; + + if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) { + /* P == U , W == 1 => UNDEF */ + return 1; + } + if (n == 0 || (rd + n) > 32 || (dp && n > 16)) { + /* UNPREDICTABLE cases for bad immediates: we choose to + * UNDEF to avoid generating huge numbers of TCG ops + */ + return 1; + } + if (rn == 15 && w) { + /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */ + return 1; + } + + if (s->thumb && rn == 15) { + /* This is actually UNPREDICTABLE */ + addr = tcg_temp_new_i32(); + tcg_gen_movi_i32(addr, s->pc & ~2); + } else { + addr = load_reg(s, rn); + } + if (insn & (1 << 24)) /* pre-decrement */ + tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2)); + + if (dp) + offset = 8; + else + offset = 4; + for (i = 0; i < n; i++) { + if (insn & ARM_CP_RW_BIT) { + /* load */ + gen_vfp_ld(s, dp, addr); + gen_mov_vreg_F0(dp, rd + i); + } else { + /* store */ + gen_mov_F0_vreg(dp, rd + i); + gen_vfp_st(s, dp, addr); + } + tcg_gen_addi_i32(addr, addr, offset); + } + if (w) { + /* writeback */ + if (insn & (1 << 24)) + offset = -offset * n; + else if (dp && (insn & 1)) + offset = 4; + else + offset = 0; + + if (offset != 0) + tcg_gen_addi_i32(addr, addr, offset); + store_reg(s, rn, addr); + } else { + tcg_temp_free_i32(addr); + } + } + } + break; + default: + /* Should never happen. */ + return 1; + } + return 0; +} + +static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) +{ + TranslationBlock *tb; + + tb = s->tb; + if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) { + tcg_gen_goto_tb(n); + gen_set_pc_im(s, dest); + tcg_gen_exit_tb((uintptr_t)tb + n); + } else { + gen_set_pc_im(s, dest); + tcg_gen_exit_tb(0); + } +} + +static inline void gen_jmp (DisasContext *s, uint32_t dest) +{ + if (unlikely(s->singlestep_enabled || s->ss_active)) { + /* An indirect jump so that we still trigger the debug exception. */ + if (s->thumb) + dest |= 1; + gen_bx_im(s, dest); + } else { + gen_goto_tb(s, 0, dest); + s->is_jmp = DISAS_TB_JUMP; + } +} + +static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y) +{ + if (x) + tcg_gen_sari_i32(t0, t0, 16); + else + gen_sxth(t0); + if (y) + tcg_gen_sari_i32(t1, t1, 16); + else + gen_sxth(t1); + tcg_gen_mul_i32(t0, t0, t1); +} + +/* Return the mask of PSR bits set by a MSR instruction. */ +static uint32_t msr_mask(DisasContext *s, int flags, int spsr) +{ + uint32_t mask; + + mask = 0; + if (flags & (1 << 0)) + mask |= 0xff; + if (flags & (1 << 1)) + mask |= 0xff00; + if (flags & (1 << 2)) + mask |= 0xff0000; + if (flags & (1 << 3)) + mask |= 0xff000000; + + /* Mask out undefined bits. */ + mask &= ~CPSR_RESERVED; + if (!arm_dc_feature(s, ARM_FEATURE_V4T)) { + mask &= ~CPSR_T; + } + if (!arm_dc_feature(s, ARM_FEATURE_V5)) { + mask &= ~CPSR_Q; /* V5TE in reality*/ + } + if (!arm_dc_feature(s, ARM_FEATURE_V6)) { + mask &= ~(CPSR_E | CPSR_GE); + } + if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) { + mask &= ~CPSR_IT; + } + /* Mask out execution state and reserved bits. */ + if (!spsr) { + mask &= ~(CPSR_EXEC | CPSR_RESERVED); + } + /* Mask out privileged bits. */ + if (IS_USER(s)) + mask &= CPSR_USER; + return mask; +} + +/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */ +static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0) +{ + TCGv_i32 tmp; + if (spsr) { + /* ??? This is also undefined in system mode. */ + if (IS_USER(s)) + return 1; + + tmp = load_cpu_field(spsr); + tcg_gen_andi_i32(tmp, tmp, ~mask); + tcg_gen_andi_i32(t0, t0, mask); + tcg_gen_or_i32(tmp, tmp, t0); + store_cpu_field(tmp, spsr); + } else { + gen_set_cpsr(t0, mask); + } + tcg_temp_free_i32(t0); + gen_lookup_tb(s); + return 0; +} + +/* Returns nonzero if access to the PSR is not permitted. */ +static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val) +{ + TCGv_i32 tmp; + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, val); + return gen_set_psr(s, mask, spsr, tmp); +} + +/* Generate an old-style exception return. Marks pc as dead. */ +static void gen_exception_return(DisasContext *s, TCGv_i32 pc) +{ + TCGv_i32 tmp; + store_reg(s, 15, pc); + tmp = load_cpu_field(spsr); + gen_set_cpsr(tmp, CPSR_ERET_MASK); + tcg_temp_free_i32(tmp); + s->is_jmp = DISAS_UPDATE; +} + +/* Generate a v6 exception return. Marks both values as dead. */ +static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr) +{ + gen_set_cpsr(cpsr, CPSR_ERET_MASK); + tcg_temp_free_i32(cpsr); + store_reg(s, 15, pc); + s->is_jmp = DISAS_UPDATE; +} + +static void gen_nop_hint(DisasContext *s, int val) +{ + switch (val) { + case 1: /* yield */ + gen_set_pc_im(s, s->pc); + s->is_jmp = DISAS_YIELD; + break; + case 3: /* wfi */ + gen_set_pc_im(s, s->pc); + s->is_jmp = DISAS_WFI; + break; + case 2: /* wfe */ + gen_set_pc_im(s, s->pc); + s->is_jmp = DISAS_WFE; + break; + case 4: /* sev */ + case 5: /* sevl */ + /* TODO: Implement SEV, SEVL and WFE. May help SMP performance. */ + default: /* nop */ + break; + } +} + +#define CPU_V001 cpu_V0, cpu_V0, cpu_V1 + +static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1) +{ + switch (size) { + case 0: gen_helper_neon_add_u8(t0, t0, t1); break; + case 1: gen_helper_neon_add_u16(t0, t0, t1); break; + case 2: tcg_gen_add_i32(t0, t0, t1); break; + default: abort(); + } +} + +static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1) +{ + switch (size) { + case 0: gen_helper_neon_sub_u8(t0, t1, t0); break; + case 1: gen_helper_neon_sub_u16(t0, t1, t0); break; + case 2: tcg_gen_sub_i32(t0, t1, t0); break; + default: return; + } +} + +/* 32-bit pairwise ops end up the same as the elementwise versions. */ +#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32 +#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32 +#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32 +#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32 + +#define GEN_NEON_INTEGER_OP_ENV(name) do { \ + switch ((size << 1) | u) { \ + case 0: \ + gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \ + break; \ + case 1: \ + gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \ + break; \ + case 2: \ + gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \ + break; \ + case 3: \ + gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \ + break; \ + case 4: \ + gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \ + break; \ + case 5: \ + gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \ + break; \ + default: return 1; \ + }} while (0) + +#define GEN_NEON_INTEGER_OP(name) do { \ + switch ((size << 1) | u) { \ + case 0: \ + gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \ + break; \ + case 1: \ + gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \ + break; \ + case 2: \ + gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \ + break; \ + case 3: \ + gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \ + break; \ + case 4: \ + gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \ + break; \ + case 5: \ + gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \ + break; \ + default: return 1; \ + }} while (0) + +static TCGv_i32 neon_load_scratch(int scratch) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); + return tmp; +} + +static void neon_store_scratch(int scratch, TCGv_i32 var) +{ + tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch])); + tcg_temp_free_i32(var); +} + +static inline TCGv_i32 neon_get_scalar(int size, int reg) +{ + TCGv_i32 tmp; + if (size == 1) { + tmp = neon_load_reg(reg & 7, reg >> 4); + if (reg & 8) { + gen_neon_dup_high16(tmp); + } else { + gen_neon_dup_low16(tmp); + } + } else { + tmp = neon_load_reg(reg & 15, reg >> 4); + } + return tmp; +} + +static int gen_neon_unzip(int rd, int rm, int size, int q) +{ + TCGv_i32 tmp, tmp2; + if (!q && size == 2) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + if (q) { + switch (size) { + case 0: + gen_helper_neon_qunzip8(cpu_env, tmp, tmp2); + break; + case 1: + gen_helper_neon_qunzip16(cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qunzip32(cpu_env, tmp, tmp2); + break; + default: + abort(); + } + } else { + switch (size) { + case 0: + gen_helper_neon_unzip8(cpu_env, tmp, tmp2); + break; + case 1: + gen_helper_neon_unzip16(cpu_env, tmp, tmp2); + break; + default: + abort(); + } + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + return 0; +} + +static int gen_neon_zip(int rd, int rm, int size, int q) +{ + TCGv_i32 tmp, tmp2; + if (!q && size == 2) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + if (q) { + switch (size) { + case 0: + gen_helper_neon_qzip8(cpu_env, tmp, tmp2); + break; + case 1: + gen_helper_neon_qzip16(cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qzip32(cpu_env, tmp, tmp2); + break; + default: + abort(); + } + } else { + switch (size) { + case 0: + gen_helper_neon_zip8(cpu_env, tmp, tmp2); + break; + case 1: + gen_helper_neon_zip16(cpu_env, tmp, tmp2); + break; + default: + abort(); + } + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + return 0; +} + +static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 rd, tmp; + + rd = tcg_temp_new_i32(); + tmp = tcg_temp_new_i32(); + + tcg_gen_shli_i32(rd, t0, 8); + tcg_gen_andi_i32(rd, rd, 0xff00ff00); + tcg_gen_andi_i32(tmp, t1, 0x00ff00ff); + tcg_gen_or_i32(rd, rd, tmp); + + tcg_gen_shri_i32(t1, t1, 8); + tcg_gen_andi_i32(t1, t1, 0x00ff00ff); + tcg_gen_andi_i32(tmp, t0, 0xff00ff00); + tcg_gen_or_i32(t1, t1, tmp); + tcg_gen_mov_i32(t0, rd); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(rd); +} + +static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1) +{ + TCGv_i32 rd, tmp; + + rd = tcg_temp_new_i32(); + tmp = tcg_temp_new_i32(); + + tcg_gen_shli_i32(rd, t0, 16); + tcg_gen_andi_i32(tmp, t1, 0xffff); + tcg_gen_or_i32(rd, rd, tmp); + tcg_gen_shri_i32(t1, t1, 16); + tcg_gen_andi_i32(tmp, t0, 0xffff0000); + tcg_gen_or_i32(t1, t1, tmp); + tcg_gen_mov_i32(t0, rd); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(rd); +} + + +static struct { + int nregs; + int interleave; + int spacing; +} neon_ls_element_type[11] = { + {4, 4, 1}, + {4, 4, 2}, + {4, 1, 1}, + {4, 2, 1}, + {3, 3, 1}, + {3, 3, 2}, + {3, 1, 1}, + {1, 1, 1}, + {2, 2, 1}, + {2, 2, 2}, + {2, 1, 1} +}; + +/* Translate a NEON load/store element instruction. Return nonzero if the + instruction is invalid. */ +static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) +{ + int rd, rn, rm; + int op; + int nregs; + int interleave; + int spacing; + int stride; + int size; + int reg; + int pass; + int load; + int shift; + int n; + TCGv_i32 addr; + TCGv_i32 tmp; + TCGv_i32 tmp2; + TCGv_i64 tmp64; + + /* FIXME: this access check should not take precedence over UNDEF + * for invalid encodings; we will generate incorrect syndrome information + * for attempts to execute invalid vfp/neon encodings with FP disabled. + */ + if (s->fp_excp_el) { + gen_exception_insn(s, 4, EXCP_UDEF, + syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el); + return 0; + } + + if (!s->vfp_enabled) + return 1; + VFP_DREG_D(rd, insn); + rn = (insn >> 16) & 0xf; + rm = insn & 0xf; + load = (insn & (1 << 21)) != 0; + if ((insn & (1 << 23)) == 0) { + /* Load store all elements. */ + op = (insn >> 8) & 0xf; + size = (insn >> 6) & 3; + if (op > 10) + return 1; + /* Catch UNDEF cases for bad values of align field */ + switch (op & 0xc) { + case 4: + if (((insn >> 5) & 1) == 1) { + return 1; + } + break; + case 8: + if (((insn >> 4) & 3) == 3) { + return 1; + } + break; + default: + break; + } + nregs = neon_ls_element_type[op].nregs; + interleave = neon_ls_element_type[op].interleave; + spacing = neon_ls_element_type[op].spacing; + if (size == 3 && (interleave | spacing) != 1) + return 1; + addr = tcg_temp_new_i32(); + load_reg_var(s, addr, rn); + stride = (1 << size) * interleave; + for (reg = 0; reg < nregs; reg++) { + if (interleave > 2 || (interleave == 2 && nregs == 2)) { + load_reg_var(s, addr, rn); + tcg_gen_addi_i32(addr, addr, (1 << size) * reg); + } else if (interleave == 2 && nregs == 4 && reg == 2) { + load_reg_var(s, addr, rn); + tcg_gen_addi_i32(addr, addr, 1 << size); + } + if (size == 3) { + tmp64 = tcg_temp_new_i64(); + if (load) { + gen_aa32_ld64(tmp64, addr, get_mem_index(s)); + neon_store_reg64(tmp64, rd); + } else { + neon_load_reg64(tmp64, rd); + gen_aa32_st64(tmp64, addr, get_mem_index(s)); + } + tcg_temp_free_i64(tmp64); + tcg_gen_addi_i32(addr, addr, stride); + } else { + for (pass = 0; pass < 2; pass++) { + if (size == 2) { + if (load) { + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + neon_store_reg(rd, pass, tmp); + } else { + tmp = neon_load_reg(rd, pass); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + tcg_gen_addi_i32(addr, addr, stride); + } else if (size == 1) { + if (load) { + tmp = tcg_temp_new_i32(); + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + tcg_gen_addi_i32(addr, addr, stride); + tmp2 = tcg_temp_new_i32(); + gen_aa32_ld16u(tmp2, addr, get_mem_index(s)); + tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + neon_store_reg(rd, pass, tmp); + } else { + tmp = neon_load_reg(rd, pass); + tmp2 = tcg_temp_new_i32(); + tcg_gen_shri_i32(tmp2, tmp, 16); + gen_aa32_st16(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + tcg_gen_addi_i32(addr, addr, stride); + gen_aa32_st16(tmp2, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp2); + tcg_gen_addi_i32(addr, addr, stride); + } + } else /* size == 0 */ { + if (load) { + TCGV_UNUSED_I32(tmp2); + for (n = 0; n < 4; n++) { + tmp = tcg_temp_new_i32(); + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + tcg_gen_addi_i32(addr, addr, stride); + if (n == 0) { + tmp2 = tmp; + } else { + tcg_gen_shli_i32(tmp, tmp, n * 8); + tcg_gen_or_i32(tmp2, tmp2, tmp); + tcg_temp_free_i32(tmp); + } + } + neon_store_reg(rd, pass, tmp2); + } else { + tmp2 = neon_load_reg(rd, pass); + for (n = 0; n < 4; n++) { + tmp = tcg_temp_new_i32(); + if (n == 0) { + tcg_gen_mov_i32(tmp, tmp2); + } else { + tcg_gen_shri_i32(tmp, tmp2, n * 8); + } + gen_aa32_st8(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + tcg_gen_addi_i32(addr, addr, stride); + } + tcg_temp_free_i32(tmp2); + } + } + } + } + rd += spacing; + } + tcg_temp_free_i32(addr); + stride = nregs * 8; + } else { + size = (insn >> 10) & 3; + if (size == 3) { + /* Load single element to all lanes. */ + int a = (insn >> 4) & 1; + if (!load) { + return 1; + } + size = (insn >> 6) & 3; + nregs = ((insn >> 8) & 3) + 1; + + if (size == 3) { + if (nregs != 4 || a == 0) { + return 1; + } + /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */ + size = 2; + } + if (nregs == 1 && a == 1 && size == 0) { + return 1; + } + if (nregs == 3 && a == 1) { + return 1; + } + addr = tcg_temp_new_i32(); + load_reg_var(s, addr, rn); + if (nregs == 1) { + /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */ + tmp = gen_load_and_replicate(s, addr, size); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); + if (insn & (1 << 5)) { + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1)); + } + tcg_temp_free_i32(tmp); + } else { + /* VLD2/3/4 to all lanes: bit 5 indicates register stride */ + stride = (insn & (1 << 5)) ? 2 : 1; + for (reg = 0; reg < nregs; reg++) { + tmp = gen_load_and_replicate(s, addr, size); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1)); + tcg_temp_free_i32(tmp); + tcg_gen_addi_i32(addr, addr, 1 << size); + rd += stride; + } + } + tcg_temp_free_i32(addr); + stride = (1 << size) * nregs; + } else { + /* Single element. */ + int idx = (insn >> 4) & 0xf; + pass = (insn >> 7) & 1; + switch (size) { + case 0: + shift = ((insn >> 5) & 3) * 8; + stride = 1; + break; + case 1: + shift = ((insn >> 6) & 1) * 16; + stride = (insn & (1 << 5)) ? 2 : 1; + break; + case 2: + shift = 0; + stride = (insn & (1 << 6)) ? 2 : 1; + break; + default: + abort(); + } + nregs = ((insn >> 8) & 3) + 1; + /* Catch the UNDEF cases. This is unavoidably a bit messy. */ + switch (nregs) { + case 1: + if (((idx & (1 << size)) != 0) || + (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) { + return 1; + } + break; + case 3: + if ((idx & 1) != 0) { + return 1; + } + /* fall through */ + case 2: + if (size == 2 && (idx & 2) != 0) { + return 1; + } + break; + case 4: + if ((size == 2) && ((idx & 3) == 3)) { + return 1; + } + break; + default: + abort(); + } + if ((rd + stride * (nregs - 1)) > 31) { + /* Attempts to write off the end of the register file + * are UNPREDICTABLE; we choose to UNDEF because otherwise + * the neon_load_reg() would write off the end of the array. + */ + return 1; + } + addr = tcg_temp_new_i32(); + load_reg_var(s, addr, rn); + for (reg = 0; reg < nregs; reg++) { + if (load) { + tmp = tcg_temp_new_i32(); + switch (size) { + case 0: + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + break; + case 1: + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + break; + case 2: + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + break; + default: /* Avoid compiler warnings. */ + abort(); + } + if (size != 2) { + tmp2 = neon_load_reg(rd, pass); + tcg_gen_deposit_i32(tmp, tmp2, tmp, + shift, size ? 16 : 8); + tcg_temp_free_i32(tmp2); + } + neon_store_reg(rd, pass, tmp); + } else { /* Store */ + tmp = neon_load_reg(rd, pass); + if (shift) + tcg_gen_shri_i32(tmp, tmp, shift); + switch (size) { + case 0: + gen_aa32_st8(tmp, addr, get_mem_index(s)); + break; + case 1: + gen_aa32_st16(tmp, addr, get_mem_index(s)); + break; + case 2: + gen_aa32_st32(tmp, addr, get_mem_index(s)); + break; + } + tcg_temp_free_i32(tmp); + } + rd += stride; + tcg_gen_addi_i32(addr, addr, 1 << size); + } + tcg_temp_free_i32(addr); + stride = nregs * (1 << size); + } + } + if (rm != 15) { + TCGv_i32 base; + + base = load_reg(s, rn); + if (rm == 13) { + tcg_gen_addi_i32(base, base, stride); + } else { + TCGv_i32 index; + index = load_reg(s, rm); + tcg_gen_add_i32(base, base, index); + tcg_temp_free_i32(index); + } + store_reg(s, rn, base); + } + return 0; +} + +/* Bitwise select. dest = c ? t : f. Clobbers T and F. */ +static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c) +{ + tcg_gen_and_i32(t, t, c); + tcg_gen_andc_i32(f, f, c); + tcg_gen_or_i32(dest, t, f); +} + +static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src) +{ + switch (size) { + case 0: gen_helper_neon_narrow_u8(dest, src); break; + case 1: gen_helper_neon_narrow_u16(dest, src); break; + case 2: tcg_gen_trunc_i64_i32(dest, src); break; + default: abort(); + } +} + +static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src) +{ + switch (size) { + case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break; + case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break; + case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break; + default: abort(); + } +} + +static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src) +{ + switch (size) { + case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break; + case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break; + case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break; + default: abort(); + } +} + +static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src) +{ + switch (size) { + case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break; + case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break; + case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break; + default: abort(); + } +} + +static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift, + int q, int u) +{ + if (q) { + if (u) { + switch (size) { + case 1: gen_helper_neon_rshl_u16(var, var, shift); break; + case 2: gen_helper_neon_rshl_u32(var, var, shift); break; + default: abort(); + } + } else { + switch (size) { + case 1: gen_helper_neon_rshl_s16(var, var, shift); break; + case 2: gen_helper_neon_rshl_s32(var, var, shift); break; + default: abort(); + } + } + } else { + if (u) { + switch (size) { + case 1: gen_helper_neon_shl_u16(var, var, shift); break; + case 2: gen_helper_neon_shl_u32(var, var, shift); break; + default: abort(); + } + } else { + switch (size) { + case 1: gen_helper_neon_shl_s16(var, var, shift); break; + case 2: gen_helper_neon_shl_s32(var, var, shift); break; + default: abort(); + } + } + } +} + +static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u) +{ + if (u) { + switch (size) { + case 0: gen_helper_neon_widen_u8(dest, src); break; + case 1: gen_helper_neon_widen_u16(dest, src); break; + case 2: tcg_gen_extu_i32_i64(dest, src); break; + default: abort(); + } + } else { + switch (size) { + case 0: gen_helper_neon_widen_s8(dest, src); break; + case 1: gen_helper_neon_widen_s16(dest, src); break; + case 2: tcg_gen_ext_i32_i64(dest, src); break; + default: abort(); + } + } + tcg_temp_free_i32(src); +} + +static inline void gen_neon_addl(int size) +{ + switch (size) { + case 0: gen_helper_neon_addl_u16(CPU_V001); break; + case 1: gen_helper_neon_addl_u32(CPU_V001); break; + case 2: tcg_gen_add_i64(CPU_V001); break; + default: abort(); + } +} + +static inline void gen_neon_subl(int size) +{ + switch (size) { + case 0: gen_helper_neon_subl_u16(CPU_V001); break; + case 1: gen_helper_neon_subl_u32(CPU_V001); break; + case 2: tcg_gen_sub_i64(CPU_V001); break; + default: abort(); + } +} + +static inline void gen_neon_negl(TCGv_i64 var, int size) +{ + switch (size) { + case 0: gen_helper_neon_negl_u16(var, var); break; + case 1: gen_helper_neon_negl_u32(var, var); break; + case 2: + tcg_gen_neg_i64(var, var); + break; + default: abort(); + } +} + +static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size) +{ + switch (size) { + case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break; + case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break; + default: abort(); + } +} + +static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b, + int size, int u) +{ + TCGv_i64 tmp; + + switch ((size << 1) | u) { + case 0: gen_helper_neon_mull_s8(dest, a, b); break; + case 1: gen_helper_neon_mull_u8(dest, a, b); break; + case 2: gen_helper_neon_mull_s16(dest, a, b); break; + case 3: gen_helper_neon_mull_u16(dest, a, b); break; + case 4: + tmp = gen_muls_i64_i32(a, b); + tcg_gen_mov_i64(dest, tmp); + tcg_temp_free_i64(tmp); + break; + case 5: + tmp = gen_mulu_i64_i32(a, b); + tcg_gen_mov_i64(dest, tmp); + tcg_temp_free_i64(tmp); + break; + default: abort(); + } + + /* gen_helper_neon_mull_[su]{8|16} do not free their parameters. + Don't forget to clean them now. */ + if (size < 2) { + tcg_temp_free_i32(a); + tcg_temp_free_i32(b); + } +} + +static void gen_neon_narrow_op(int op, int u, int size, + TCGv_i32 dest, TCGv_i64 src) +{ + if (op) { + if (u) { + gen_neon_unarrow_sats(size, dest, src); + } else { + gen_neon_narrow(size, dest, src); + } + } else { + if (u) { + gen_neon_narrow_satu(size, dest, src); + } else { + gen_neon_narrow_sats(size, dest, src); + } + } +} + +/* Symbolic constants for op fields for Neon 3-register same-length. + * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B + * table A7-9. + */ +#define NEON_3R_VHADD 0 +#define NEON_3R_VQADD 1 +#define NEON_3R_VRHADD 2 +#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */ +#define NEON_3R_VHSUB 4 +#define NEON_3R_VQSUB 5 +#define NEON_3R_VCGT 6 +#define NEON_3R_VCGE 7 +#define NEON_3R_VSHL 8 +#define NEON_3R_VQSHL 9 +#define NEON_3R_VRSHL 10 +#define NEON_3R_VQRSHL 11 +#define NEON_3R_VMAX 12 +#define NEON_3R_VMIN 13 +#define NEON_3R_VABD 14 +#define NEON_3R_VABA 15 +#define NEON_3R_VADD_VSUB 16 +#define NEON_3R_VTST_VCEQ 17 +#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */ +#define NEON_3R_VMUL 19 +#define NEON_3R_VPMAX 20 +#define NEON_3R_VPMIN 21 +#define NEON_3R_VQDMULH_VQRDMULH 22 +#define NEON_3R_VPADD 23 +#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */ +#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */ +#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */ +#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */ +#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */ +#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */ +#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */ +#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */ + +static const uint8_t neon_3r_sizes[] = { + [NEON_3R_VHADD] = 0x7, + [NEON_3R_VQADD] = 0xf, + [NEON_3R_VRHADD] = 0x7, + [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */ + [NEON_3R_VHSUB] = 0x7, + [NEON_3R_VQSUB] = 0xf, + [NEON_3R_VCGT] = 0x7, + [NEON_3R_VCGE] = 0x7, + [NEON_3R_VSHL] = 0xf, + [NEON_3R_VQSHL] = 0xf, + [NEON_3R_VRSHL] = 0xf, + [NEON_3R_VQRSHL] = 0xf, + [NEON_3R_VMAX] = 0x7, + [NEON_3R_VMIN] = 0x7, + [NEON_3R_VABD] = 0x7, + [NEON_3R_VABA] = 0x7, + [NEON_3R_VADD_VSUB] = 0xf, + [NEON_3R_VTST_VCEQ] = 0x7, + [NEON_3R_VML] = 0x7, + [NEON_3R_VMUL] = 0x7, + [NEON_3R_VPMAX] = 0x7, + [NEON_3R_VPMIN] = 0x7, + [NEON_3R_VQDMULH_VQRDMULH] = 0x6, + [NEON_3R_VPADD] = 0x7, + [NEON_3R_SHA] = 0xf, /* size field encodes op type */ + [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */ + [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */ +}; + +/* Symbolic constants for op fields for Neon 2-register miscellaneous. + * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B + * table A7-13. + */ +#define NEON_2RM_VREV64 0 +#define NEON_2RM_VREV32 1 +#define NEON_2RM_VREV16 2 +#define NEON_2RM_VPADDL 4 +#define NEON_2RM_VPADDL_U 5 +#define NEON_2RM_AESE 6 /* Includes AESD */ +#define NEON_2RM_AESMC 7 /* Includes AESIMC */ +#define NEON_2RM_VCLS 8 +#define NEON_2RM_VCLZ 9 +#define NEON_2RM_VCNT 10 +#define NEON_2RM_VMVN 11 +#define NEON_2RM_VPADAL 12 +#define NEON_2RM_VPADAL_U 13 +#define NEON_2RM_VQABS 14 +#define NEON_2RM_VQNEG 15 +#define NEON_2RM_VCGT0 16 +#define NEON_2RM_VCGE0 17 +#define NEON_2RM_VCEQ0 18 +#define NEON_2RM_VCLE0 19 +#define NEON_2RM_VCLT0 20 +#define NEON_2RM_SHA1H 21 +#define NEON_2RM_VABS 22 +#define NEON_2RM_VNEG 23 +#define NEON_2RM_VCGT0_F 24 +#define NEON_2RM_VCGE0_F 25 +#define NEON_2RM_VCEQ0_F 26 +#define NEON_2RM_VCLE0_F 27 +#define NEON_2RM_VCLT0_F 28 +#define NEON_2RM_VABS_F 30 +#define NEON_2RM_VNEG_F 31 +#define NEON_2RM_VSWP 32 +#define NEON_2RM_VTRN 33 +#define NEON_2RM_VUZP 34 +#define NEON_2RM_VZIP 35 +#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */ +#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */ +#define NEON_2RM_VSHLL 38 +#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */ +#define NEON_2RM_VRINTN 40 +#define NEON_2RM_VRINTX 41 +#define NEON_2RM_VRINTA 42 +#define NEON_2RM_VRINTZ 43 +#define NEON_2RM_VCVT_F16_F32 44 +#define NEON_2RM_VRINTM 45 +#define NEON_2RM_VCVT_F32_F16 46 +#define NEON_2RM_VRINTP 47 +#define NEON_2RM_VCVTAU 48 +#define NEON_2RM_VCVTAS 49 +#define NEON_2RM_VCVTNU 50 +#define NEON_2RM_VCVTNS 51 +#define NEON_2RM_VCVTPU 52 +#define NEON_2RM_VCVTPS 53 +#define NEON_2RM_VCVTMU 54 +#define NEON_2RM_VCVTMS 55 +#define NEON_2RM_VRECPE 56 +#define NEON_2RM_VRSQRTE 57 +#define NEON_2RM_VRECPE_F 58 +#define NEON_2RM_VRSQRTE_F 59 +#define NEON_2RM_VCVT_FS 60 +#define NEON_2RM_VCVT_FU 61 +#define NEON_2RM_VCVT_SF 62 +#define NEON_2RM_VCVT_UF 63 + +static int neon_2rm_is_float_op(int op) +{ + /* Return true if this neon 2reg-misc op is float-to-float */ + return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F || + (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) || + op == NEON_2RM_VRINTM || + (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) || + op >= NEON_2RM_VRECPE_F); +} + +/* Each entry in this array has bit n set if the insn allows + * size value n (otherwise it will UNDEF). Since unallocated + * op values will have no bits set they always UNDEF. + */ +static const uint8_t neon_2rm_sizes[] = { + [NEON_2RM_VREV64] = 0x7, + [NEON_2RM_VREV32] = 0x3, + [NEON_2RM_VREV16] = 0x1, + [NEON_2RM_VPADDL] = 0x7, + [NEON_2RM_VPADDL_U] = 0x7, + [NEON_2RM_AESE] = 0x1, + [NEON_2RM_AESMC] = 0x1, + [NEON_2RM_VCLS] = 0x7, + [NEON_2RM_VCLZ] = 0x7, + [NEON_2RM_VCNT] = 0x1, + [NEON_2RM_VMVN] = 0x1, + [NEON_2RM_VPADAL] = 0x7, + [NEON_2RM_VPADAL_U] = 0x7, + [NEON_2RM_VQABS] = 0x7, + [NEON_2RM_VQNEG] = 0x7, + [NEON_2RM_VCGT0] = 0x7, + [NEON_2RM_VCGE0] = 0x7, + [NEON_2RM_VCEQ0] = 0x7, + [NEON_2RM_VCLE0] = 0x7, + [NEON_2RM_VCLT0] = 0x7, + [NEON_2RM_SHA1H] = 0x4, + [NEON_2RM_VABS] = 0x7, + [NEON_2RM_VNEG] = 0x7, + [NEON_2RM_VCGT0_F] = 0x4, + [NEON_2RM_VCGE0_F] = 0x4, + [NEON_2RM_VCEQ0_F] = 0x4, + [NEON_2RM_VCLE0_F] = 0x4, + [NEON_2RM_VCLT0_F] = 0x4, + [NEON_2RM_VABS_F] = 0x4, + [NEON_2RM_VNEG_F] = 0x4, + [NEON_2RM_VSWP] = 0x1, + [NEON_2RM_VTRN] = 0x7, + [NEON_2RM_VUZP] = 0x7, + [NEON_2RM_VZIP] = 0x7, + [NEON_2RM_VMOVN] = 0x7, + [NEON_2RM_VQMOVN] = 0x7, + [NEON_2RM_VSHLL] = 0x7, + [NEON_2RM_SHA1SU1] = 0x4, + [NEON_2RM_VRINTN] = 0x4, + [NEON_2RM_VRINTX] = 0x4, + [NEON_2RM_VRINTA] = 0x4, + [NEON_2RM_VRINTZ] = 0x4, + [NEON_2RM_VCVT_F16_F32] = 0x2, + [NEON_2RM_VRINTM] = 0x4, + [NEON_2RM_VCVT_F32_F16] = 0x2, + [NEON_2RM_VRINTP] = 0x4, + [NEON_2RM_VCVTAU] = 0x4, + [NEON_2RM_VCVTAS] = 0x4, + [NEON_2RM_VCVTNU] = 0x4, + [NEON_2RM_VCVTNS] = 0x4, + [NEON_2RM_VCVTPU] = 0x4, + [NEON_2RM_VCVTPS] = 0x4, + [NEON_2RM_VCVTMU] = 0x4, + [NEON_2RM_VCVTMS] = 0x4, + [NEON_2RM_VRECPE] = 0x4, + [NEON_2RM_VRSQRTE] = 0x4, + [NEON_2RM_VRECPE_F] = 0x4, + [NEON_2RM_VRSQRTE_F] = 0x4, + [NEON_2RM_VCVT_FS] = 0x4, + [NEON_2RM_VCVT_FU] = 0x4, + [NEON_2RM_VCVT_SF] = 0x4, + [NEON_2RM_VCVT_UF] = 0x4, +}; + +/* Translate a NEON data processing instruction. Return nonzero if the + instruction is invalid. + We process data in a mixture of 32-bit and 64-bit chunks. + Mostly we use 32-bit chunks so we can use normal scalar instructions. */ + +static int disas_neon_data_insn(DisasContext *s, uint32_t insn) +{ + int op; + int q; + int rd, rn, rm; + int size; + int shift; + int pass; + int count; + int pairwise; + int u; + uint32_t imm, mask; + TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; + TCGv_i64 tmp64; + + /* FIXME: this access check should not take precedence over UNDEF + * for invalid encodings; we will generate incorrect syndrome information + * for attempts to execute invalid vfp/neon encodings with FP disabled. + */ + if (s->fp_excp_el) { + gen_exception_insn(s, 4, EXCP_UDEF, + syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el); + return 0; + } + + if (!s->vfp_enabled) + return 1; + q = (insn & (1 << 6)) != 0; + u = (insn >> 24) & 1; + VFP_DREG_D(rd, insn); + VFP_DREG_N(rn, insn); + VFP_DREG_M(rm, insn); + size = (insn >> 20) & 3; + if ((insn & (1 << 23)) == 0) { + /* Three register same length. */ + op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); + /* Catch invalid op and bad size combinations: UNDEF */ + if ((neon_3r_sizes[op] & (1 << size)) == 0) { + return 1; + } + /* All insns of this form UNDEF for either this condition or the + * superset of cases "Q==1"; we catch the latter later. + */ + if (q && ((rd | rn | rm) & 1)) { + return 1; + } + /* + * The SHA-1/SHA-256 3-register instructions require special treatment + * here, as their size field is overloaded as an op type selector, and + * they all consume their input in a single pass. + */ + if (op == NEON_3R_SHA) { + if (!q) { + return 1; + } + if (!u) { /* SHA-1 */ + if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rn); + tmp3 = tcg_const_i32(rm); + tmp4 = tcg_const_i32(size); + gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4); + tcg_temp_free_i32(tmp4); + } else { /* SHA-256 */ + if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rn); + tmp3 = tcg_const_i32(rm); + switch (size) { + case 0: + gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3); + break; + case 1: + gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3); + break; + case 2: + gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3); + break; + } + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + return 0; + } + if (size == 3 && op != NEON_3R_LOGIC) { + /* 64-bit element instructions. */ + for (pass = 0; pass < (q ? 2 : 1); pass++) { + neon_load_reg64(cpu_V0, rn + pass); + neon_load_reg64(cpu_V1, rm + pass); + switch (op) { + case NEON_3R_VQADD: + if (u) { + gen_helper_neon_qadd_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); + } else { + gen_helper_neon_qadd_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); + } + break; + case NEON_3R_VQSUB: + if (u) { + gen_helper_neon_qsub_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); + } else { + gen_helper_neon_qsub_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); + } + break; + case NEON_3R_VSHL: + if (u) { + gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); + } else { + gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0); + } + break; + case NEON_3R_VQSHL: + if (u) { + gen_helper_neon_qshl_u64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); + } else { + gen_helper_neon_qshl_s64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); + } + break; + case NEON_3R_VRSHL: + if (u) { + gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0); + } else { + gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0); + } + break; + case NEON_3R_VQRSHL: + if (u) { + gen_helper_neon_qrshl_u64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); + } else { + gen_helper_neon_qrshl_s64(cpu_V0, cpu_env, + cpu_V1, cpu_V0); + } + break; + case NEON_3R_VADD_VSUB: + if (u) { + tcg_gen_sub_i64(CPU_V001); + } else { + tcg_gen_add_i64(CPU_V001); + } + break; + default: + abort(); + } + neon_store_reg64(cpu_V0, rd + pass); + } + return 0; + } + pairwise = 0; + switch (op) { + case NEON_3R_VSHL: + case NEON_3R_VQSHL: + case NEON_3R_VRSHL: + case NEON_3R_VQRSHL: + { + int rtmp; + /* Shift instruction operands are reversed. */ + rtmp = rn; + rn = rm; + rm = rtmp; + } + break; + case NEON_3R_VPADD: + if (u) { + return 1; + } + /* Fall through */ + case NEON_3R_VPMAX: + case NEON_3R_VPMIN: + pairwise = 1; + break; + case NEON_3R_FLOAT_ARITH: + pairwise = (u && size < 2); /* if VPADD (float) */ + break; + case NEON_3R_FLOAT_MINMAX: + pairwise = u; /* if VPMIN/VPMAX (float) */ + break; + case NEON_3R_FLOAT_CMP: + if (!u && size) { + /* no encoding for U=0 C=1x */ + return 1; + } + break; + case NEON_3R_FLOAT_ACMP: + if (!u) { + return 1; + } + break; + case NEON_3R_FLOAT_MISC: + /* VMAXNM/VMINNM in ARMv8 */ + if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) { + return 1; + } + break; + case NEON_3R_VMUL: + if (u && (size != 0)) { + /* UNDEF on invalid size for polynomial subcase */ + return 1; + } + break; + case NEON_3R_VFM: + if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) { + return 1; + } + break; + default: + break; + } + + if (pairwise && q) { + /* All the pairwise insns UNDEF if Q is set */ + return 1; + } + + for (pass = 0; pass < (q ? 4 : 2); pass++) { + + if (pairwise) { + /* Pairwise. */ + if (pass < 1) { + tmp = neon_load_reg(rn, 0); + tmp2 = neon_load_reg(rn, 1); + } else { + tmp = neon_load_reg(rm, 0); + tmp2 = neon_load_reg(rm, 1); + } + } else { + /* Elementwise. */ + tmp = neon_load_reg(rn, pass); + tmp2 = neon_load_reg(rm, pass); + } + switch (op) { + case NEON_3R_VHADD: + GEN_NEON_INTEGER_OP(hadd); + break; + case NEON_3R_VQADD: + GEN_NEON_INTEGER_OP_ENV(qadd); + break; + case NEON_3R_VRHADD: + GEN_NEON_INTEGER_OP(rhadd); + break; + case NEON_3R_LOGIC: /* Logic ops. */ + switch ((u << 2) | size) { + case 0: /* VAND */ + tcg_gen_and_i32(tmp, tmp, tmp2); + break; + case 1: /* BIC */ + tcg_gen_andc_i32(tmp, tmp, tmp2); + break; + case 2: /* VORR */ + tcg_gen_or_i32(tmp, tmp, tmp2); + break; + case 3: /* VORN */ + tcg_gen_orc_i32(tmp, tmp, tmp2); + break; + case 4: /* VEOR */ + tcg_gen_xor_i32(tmp, tmp, tmp2); + break; + case 5: /* VBSL */ + tmp3 = neon_load_reg(rd, pass); + gen_neon_bsl(tmp, tmp, tmp2, tmp3); + tcg_temp_free_i32(tmp3); + break; + case 6: /* VBIT */ + tmp3 = neon_load_reg(rd, pass); + gen_neon_bsl(tmp, tmp, tmp3, tmp2); + tcg_temp_free_i32(tmp3); + break; + case 7: /* VBIF */ + tmp3 = neon_load_reg(rd, pass); + gen_neon_bsl(tmp, tmp3, tmp, tmp2); + tcg_temp_free_i32(tmp3); + break; + } + break; + case NEON_3R_VHSUB: + GEN_NEON_INTEGER_OP(hsub); + break; + case NEON_3R_VQSUB: + GEN_NEON_INTEGER_OP_ENV(qsub); + break; + case NEON_3R_VCGT: + GEN_NEON_INTEGER_OP(cgt); + break; + case NEON_3R_VCGE: + GEN_NEON_INTEGER_OP(cge); + break; + case NEON_3R_VSHL: + GEN_NEON_INTEGER_OP(shl); + break; + case NEON_3R_VQSHL: + GEN_NEON_INTEGER_OP_ENV(qshl); + break; + case NEON_3R_VRSHL: + GEN_NEON_INTEGER_OP(rshl); + break; + case NEON_3R_VQRSHL: + GEN_NEON_INTEGER_OP_ENV(qrshl); + break; + case NEON_3R_VMAX: + GEN_NEON_INTEGER_OP(max); + break; + case NEON_3R_VMIN: + GEN_NEON_INTEGER_OP(min); + break; + case NEON_3R_VABD: + GEN_NEON_INTEGER_OP(abd); + break; + case NEON_3R_VABA: + GEN_NEON_INTEGER_OP(abd); + tcg_temp_free_i32(tmp2); + tmp2 = neon_load_reg(rd, pass); + gen_neon_add(size, tmp, tmp2); + break; + case NEON_3R_VADD_VSUB: + if (!u) { /* VADD */ + gen_neon_add(size, tmp, tmp2); + } else { /* VSUB */ + switch (size) { + case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break; + default: abort(); + } + } + break; + case NEON_3R_VTST_VCEQ: + if (!u) { /* VTST */ + switch (size) { + case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break; + default: abort(); + } + } else { /* VCEQ */ + switch (size) { + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; + default: abort(); + } + } + break; + case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */ + switch (size) { + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; + default: abort(); + } + tcg_temp_free_i32(tmp2); + tmp2 = neon_load_reg(rd, pass); + if (u) { /* VMLS */ + gen_neon_rsb(size, tmp, tmp2); + } else { /* VMLA */ + gen_neon_add(size, tmp, tmp2); + } + break; + case NEON_3R_VMUL: + if (u) { /* polynomial */ + gen_helper_neon_mul_p8(tmp, tmp, tmp2); + } else { /* Integer */ + switch (size) { + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; + default: abort(); + } + } + break; + case NEON_3R_VPMAX: + GEN_NEON_INTEGER_OP(pmax); + break; + case NEON_3R_VPMIN: + GEN_NEON_INTEGER_OP(pmin); + break; + case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */ + if (!u) { /* VQDMULH */ + switch (size) { + case 1: + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); + break; + default: abort(); + } + } else { /* VQRDMULH */ + switch (size) { + case 1: + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); + break; + case 2: + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); + break; + default: abort(); + } + } + break; + case NEON_3R_VPADD: + switch (size) { + case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break; + default: abort(); + } + break; + case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + switch ((u << 2) | size) { + case 0: /* VADD */ + case 4: /* VPADD */ + gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); + break; + case 2: /* VSUB */ + gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus); + break; + case 6: /* VABD */ + gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus); + break; + default: + abort(); + } + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_3R_FLOAT_MULTIPLY: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); + if (!u) { + tcg_temp_free_i32(tmp2); + tmp2 = neon_load_reg(rd, pass); + if (size == 0) { + gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus); + } + } + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_3R_FLOAT_CMP: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + if (!u) { + gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus); + } else { + if (size == 0) { + gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus); + } + } + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_3R_FLOAT_ACMP: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + if (size == 0) { + gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus); + } + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_3R_FLOAT_MINMAX: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + if (size == 0) { + gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus); + } + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_3R_FLOAT_MISC: + if (u) { + /* VMAXNM/VMINNM */ + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + if (size == 0) { + gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus); + } else { + gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus); + } + tcg_temp_free_ptr(fpstatus); + } else { + if (size == 0) { + gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); + } else { + gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); + } + } + break; + case NEON_3R_VFM: + { + /* VFMA, VFMS: fused multiply-add */ + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + TCGv_i32 tmp3 = neon_load_reg(rd, pass); + if (size) { + /* VFMS */ + gen_helper_vfp_negs(tmp, tmp); + } + gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus); + tcg_temp_free_i32(tmp3); + tcg_temp_free_ptr(fpstatus); + break; + } + default: + abort(); + } + tcg_temp_free_i32(tmp2); + + /* Save the result. For elementwise operations we can put it + straight into the destination register. For pairwise operations + we have to be careful to avoid clobbering the source operands. */ + if (pairwise && rd == rm) { + neon_store_scratch(pass, tmp); + } else { + neon_store_reg(rd, pass, tmp); + } + + } /* for pass */ + if (pairwise && rd == rm) { + for (pass = 0; pass < (q ? 4 : 2); pass++) { + tmp = neon_load_scratch(pass); + neon_store_reg(rd, pass, tmp); + } + } + /* End of 3 register same size operations. */ + } else if (insn & (1 << 4)) { + if ((insn & 0x00380080) != 0) { + /* Two registers and shift. */ + op = (insn >> 8) & 0xf; + if (insn & (1 << 7)) { + /* 64-bit shift. */ + if (op > 7) { + return 1; + } + size = 3; + } else { + size = 2; + while ((insn & (1 << (size + 19))) == 0) + size--; + } + shift = (insn >> 16) & ((1 << (3 + size)) - 1); + /* To avoid excessive duplication of ops we implement shift + by immediate using the variable shift operations. */ + if (op < 8) { + /* Shift by immediate: + VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */ + if (q && ((rd | rm) & 1)) { + return 1; + } + if (!u && (op == 4 || op == 6)) { + return 1; + } + /* Right shifts are encoded as N - shift, where N is the + element size in bits. */ + if (op <= 4) + shift = shift - (1 << (size + 3)); + if (size == 3) { + count = q + 1; + } else { + count = q ? 4: 2; + } + switch (size) { + case 0: + imm = (uint8_t) shift; + imm |= imm << 8; + imm |= imm << 16; + break; + case 1: + imm = (uint16_t) shift; + imm |= imm << 16; + break; + case 2: + case 3: + imm = shift; + break; + default: + abort(); + } + + for (pass = 0; pass < count; pass++) { + if (size == 3) { + neon_load_reg64(cpu_V0, rm + pass); + tcg_gen_movi_i64(cpu_V1, imm); + switch (op) { + case 0: /* VSHR */ + case 1: /* VSRA */ + if (u) + gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); + else + gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1); + break; + case 2: /* VRSHR */ + case 3: /* VRSRA */ + if (u) + gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1); + else + gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1); + break; + case 4: /* VSRI */ + case 5: /* VSHL, VSLI */ + gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1); + break; + case 6: /* VQSHLU */ + gen_helper_neon_qshlu_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); + break; + case 7: /* VQSHL */ + if (u) { + gen_helper_neon_qshl_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); + } else { + gen_helper_neon_qshl_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); + } + break; + } + if (op == 1 || op == 3) { + /* Accumulate. */ + neon_load_reg64(cpu_V1, rd + pass); + tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1); + } else if (op == 4 || (op == 5 && u)) { + /* Insert */ + neon_load_reg64(cpu_V1, rd + pass); + uint64_t mask; + if (shift < -63 || shift > 63) { + mask = 0; + } else { + if (op == 4) { + mask = 0xffffffffffffffffull >> -shift; + } else { + mask = 0xffffffffffffffffull << shift; + } + } + tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask); + tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); + } + neon_store_reg64(cpu_V0, rd + pass); + } else { /* size < 3 */ + /* Operands in T0 and T1. */ + tmp = neon_load_reg(rm, pass); + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, imm); + switch (op) { + case 0: /* VSHR */ + case 1: /* VSRA */ + GEN_NEON_INTEGER_OP(shl); + break; + case 2: /* VRSHR */ + case 3: /* VRSRA */ + GEN_NEON_INTEGER_OP(rshl); + break; + case 4: /* VSRI */ + case 5: /* VSHL, VSLI */ + switch (size) { + case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break; + default: abort(); + } + break; + case 6: /* VQSHLU */ + switch (size) { + case 0: + gen_helper_neon_qshlu_s8(tmp, cpu_env, + tmp, tmp2); + break; + case 1: + gen_helper_neon_qshlu_s16(tmp, cpu_env, + tmp, tmp2); + break; + case 2: + gen_helper_neon_qshlu_s32(tmp, cpu_env, + tmp, tmp2); + break; + default: + abort(); + } + break; + case 7: /* VQSHL */ + GEN_NEON_INTEGER_OP_ENV(qshl); + break; + } + tcg_temp_free_i32(tmp2); + + if (op == 1 || op == 3) { + /* Accumulate. */ + tmp2 = neon_load_reg(rd, pass); + gen_neon_add(size, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } else if (op == 4 || (op == 5 && u)) { + /* Insert */ + switch (size) { + case 0: + if (op == 4) + mask = 0xff >> -shift; + else + mask = (uint8_t)(0xff << shift); + mask |= mask << 8; + mask |= mask << 16; + break; + case 1: + if (op == 4) + mask = 0xffff >> -shift; + else + mask = (uint16_t)(0xffff << shift); + mask |= mask << 16; + break; + case 2: + if (shift < -31 || shift > 31) { + mask = 0; + } else { + if (op == 4) + mask = 0xffffffffu >> -shift; + else + mask = 0xffffffffu << shift; + } + break; + default: + abort(); + } + tmp2 = neon_load_reg(rd, pass); + tcg_gen_andi_i32(tmp, tmp, mask); + tcg_gen_andi_i32(tmp2, tmp2, ~mask); + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + neon_store_reg(rd, pass, tmp); + } + } /* for pass */ + } else if (op < 10) { + /* Shift by immediate and narrow: + VSHRN, VRSHRN, VQSHRN, VQRSHRN. */ + int input_unsigned = (op == 8) ? !u : u; + if (rm & 1) { + return 1; + } + shift = shift - (1 << (size + 3)); + size++; + if (size == 3) { + tmp64 = tcg_const_i64(shift); + neon_load_reg64(cpu_V0, rm); + neon_load_reg64(cpu_V1, rm + 1); + for (pass = 0; pass < 2; pass++) { + TCGv_i64 in; + if (pass == 0) { + in = cpu_V0; + } else { + in = cpu_V1; + } + if (q) { + if (input_unsigned) { + gen_helper_neon_rshl_u64(cpu_V0, in, tmp64); + } else { + gen_helper_neon_rshl_s64(cpu_V0, in, tmp64); + } + } else { + if (input_unsigned) { + gen_helper_neon_shl_u64(cpu_V0, in, tmp64); + } else { + gen_helper_neon_shl_s64(cpu_V0, in, tmp64); + } + } + tmp = tcg_temp_new_i32(); + gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); + neon_store_reg(rd, pass, tmp); + } /* for pass */ + tcg_temp_free_i64(tmp64); + } else { + if (size == 1) { + imm = (uint16_t)shift; + imm |= imm << 16; + } else { + /* size == 2 */ + imm = (uint32_t)shift; + } + tmp2 = tcg_const_i32(imm); + tmp4 = neon_load_reg(rm + 1, 0); + tmp5 = neon_load_reg(rm + 1, 1); + for (pass = 0; pass < 2; pass++) { + if (pass == 0) { + tmp = neon_load_reg(rm, 0); + } else { + tmp = tmp4; + } + gen_neon_shift_narrow(size, tmp, tmp2, q, + input_unsigned); + if (pass == 0) { + tmp3 = neon_load_reg(rm, 1); + } else { + tmp3 = tmp5; + } + gen_neon_shift_narrow(size, tmp3, tmp2, q, + input_unsigned); + tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp3); + tmp = tcg_temp_new_i32(); + gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0); + neon_store_reg(rd, pass, tmp); + } /* for pass */ + tcg_temp_free_i32(tmp2); + } + } else if (op == 10) { + /* VSHLL, VMOVL */ + if (q || (rd & 1)) { + return 1; + } + tmp = neon_load_reg(rm, 0); + tmp2 = neon_load_reg(rm, 1); + for (pass = 0; pass < 2; pass++) { + if (pass == 1) + tmp = tmp2; + + gen_neon_widen(cpu_V0, tmp, size, u); + + if (shift != 0) { + /* The shift is less than the width of the source + type, so we can just shift the whole register. */ + tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); + /* Widen the result of shift: we need to clear + * the potential overflow bits resulting from + * left bits of the narrow input appearing as + * right bits of left the neighbour narrow + * input. */ + if (size < 2 || !u) { + uint64_t imm64; + if (size == 0) { + imm = (0xffu >> (8 - shift)); + imm |= imm << 16; + } else if (size == 1) { + imm = 0xffff >> (16 - shift); + } else { + /* size == 2 */ + imm = 0xffffffff >> (32 - shift); + } + if (size < 2) { + imm64 = imm | (((uint64_t)imm) << 32); + } else { + imm64 = imm; + } + tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64); + } + } + neon_store_reg64(cpu_V0, rd + pass); + } + } else if (op >= 14) { + /* VCVT fixed-point. */ + if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) { + return 1; + } + /* We have already masked out the must-be-1 top bit of imm6, + * hence this 32-shift where the ARM ARM has 64-imm6. + */ + shift = 32 - shift; + for (pass = 0; pass < (q ? 4 : 2); pass++) { + tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass)); + if (!(op & 1)) { + if (u) + gen_vfp_ulto(0, shift, 1); + else + gen_vfp_slto(0, shift, 1); + } else { + if (u) + gen_vfp_toul(0, shift, 1); + else + gen_vfp_tosl(0, shift, 1); + } + tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass)); + } + } else { + return 1; + } + } else { /* (insn & 0x00380080) == 0 */ + int invert; + if (q && (rd & 1)) { + return 1; + } + + op = (insn >> 8) & 0xf; + /* One register and immediate. */ + imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf); + invert = (insn & (1 << 5)) != 0; + /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. + * We choose to not special-case this and will behave as if a + * valid constant encoding of 0 had been given. + */ + switch (op) { + case 0: case 1: + /* no-op */ + break; + case 2: case 3: + imm <<= 8; + break; + case 4: case 5: + imm <<= 16; + break; + case 6: case 7: + imm <<= 24; + break; + case 8: case 9: + imm |= imm << 16; + break; + case 10: case 11: + imm = (imm << 8) | (imm << 24); + break; + case 12: + imm = (imm << 8) | 0xff; + break; + case 13: + imm = (imm << 16) | 0xffff; + break; + case 14: + imm |= (imm << 8) | (imm << 16) | (imm << 24); + if (invert) + imm = ~imm; + break; + case 15: + if (invert) { + return 1; + } + imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) + | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); + break; + } + if (invert) + imm = ~imm; + + for (pass = 0; pass < (q ? 4 : 2); pass++) { + if (op & 1 && op < 12) { + tmp = neon_load_reg(rd, pass); + if (invert) { + /* The immediate value has already been inverted, so + BIC becomes AND. */ + tcg_gen_andi_i32(tmp, tmp, imm); + } else { + tcg_gen_ori_i32(tmp, tmp, imm); + } + } else { + /* VMOV, VMVN. */ + tmp = tcg_temp_new_i32(); + if (op == 14 && invert) { + int n; + uint32_t val; + val = 0; + for (n = 0; n < 4; n++) { + if (imm & (1 << (n + (pass & 1) * 4))) + val |= 0xff << (n * 8); + } + tcg_gen_movi_i32(tmp, val); + } else { + tcg_gen_movi_i32(tmp, imm); + } + } + neon_store_reg(rd, pass, tmp); + } + } + } else { /* (insn & 0x00800010 == 0x00800000) */ + if (size != 3) { + op = (insn >> 8) & 0xf; + if ((insn & (1 << 6)) == 0) { + /* Three registers of different lengths. */ + int src1_wide; + int src2_wide; + int prewiden; + /* undefreq: bit 0 : UNDEF if size == 0 + * bit 1 : UNDEF if size == 1 + * bit 2 : UNDEF if size == 2 + * bit 3 : UNDEF if U == 1 + * Note that [2:0] set implies 'always UNDEF' + */ + int undefreq; + /* prewiden, src1_wide, src2_wide, undefreq */ + static const int neon_3reg_wide[16][4] = { + {1, 0, 0, 0}, /* VADDL */ + {1, 1, 0, 0}, /* VADDW */ + {1, 0, 0, 0}, /* VSUBL */ + {1, 1, 0, 0}, /* VSUBW */ + {0, 1, 1, 0}, /* VADDHN */ + {0, 0, 0, 0}, /* VABAL */ + {0, 1, 1, 0}, /* VSUBHN */ + {0, 0, 0, 0}, /* VABDL */ + {0, 0, 0, 0}, /* VMLAL */ + {0, 0, 0, 9}, /* VQDMLAL */ + {0, 0, 0, 0}, /* VMLSL */ + {0, 0, 0, 9}, /* VQDMLSL */ + {0, 0, 0, 0}, /* Integer VMULL */ + {0, 0, 0, 1}, /* VQDMULL */ + {0, 0, 0, 0xa}, /* Polynomial VMULL */ + {0, 0, 0, 7}, /* Reserved: always UNDEF */ + }; + + prewiden = neon_3reg_wide[op][0]; + src1_wide = neon_3reg_wide[op][1]; + src2_wide = neon_3reg_wide[op][2]; + undefreq = neon_3reg_wide[op][3]; + + if ((undefreq & (1 << size)) || + ((undefreq & 8) && u)) { + return 1; + } + if ((src1_wide && (rn & 1)) || + (src2_wide && (rm & 1)) || + (!src2_wide && (rd & 1))) { + return 1; + } + + /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply) + * outside the loop below as it only performs a single pass. + */ + if (op == 14 && size == 2) { + TCGv_i64 tcg_rn, tcg_rm, tcg_rd; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { + return 1; + } + tcg_rn = tcg_temp_new_i64(); + tcg_rm = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + neon_load_reg64(tcg_rn, rn); + neon_load_reg64(tcg_rm, rm); + gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd); + gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd + 1); + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rm); + tcg_temp_free_i64(tcg_rd); + return 0; + } + + /* Avoid overlapping operands. Wide source operands are + always aligned so will never overlap with wide + destinations in problematic ways. */ + if (rd == rm && !src2_wide) { + tmp = neon_load_reg(rm, 1); + neon_store_scratch(2, tmp); + } else if (rd == rn && !src1_wide) { + tmp = neon_load_reg(rn, 1); + neon_store_scratch(2, tmp); + } + TCGV_UNUSED_I32(tmp3); + for (pass = 0; pass < 2; pass++) { + if (src1_wide) { + neon_load_reg64(cpu_V0, rn + pass); + TCGV_UNUSED_I32(tmp); + } else { + if (pass == 1 && rd == rn) { + tmp = neon_load_scratch(2); + } else { + tmp = neon_load_reg(rn, pass); + } + if (prewiden) { + gen_neon_widen(cpu_V0, tmp, size, u); + } + } + if (src2_wide) { + neon_load_reg64(cpu_V1, rm + pass); + TCGV_UNUSED_I32(tmp2); + } else { + if (pass == 1 && rd == rm) { + tmp2 = neon_load_scratch(2); + } else { + tmp2 = neon_load_reg(rm, pass); + } + if (prewiden) { + gen_neon_widen(cpu_V1, tmp2, size, u); + } + } + switch (op) { + case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */ + gen_neon_addl(size); + break; + case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */ + gen_neon_subl(size); + break; + case 5: case 7: /* VABAL, VABDL */ + switch ((size << 1) | u) { + case 0: + gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2); + break; + case 1: + gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2); + break; + case 2: + gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2); + break; + case 3: + gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2); + break; + case 4: + gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2); + break; + case 5: + gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2); + break; + default: abort(); + } + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + break; + case 8: case 9: case 10: case 11: case 12: case 13: + /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ + gen_neon_mull(cpu_V0, tmp, tmp2, size, u); + break; + case 14: /* Polynomial VMULL */ + gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + break; + default: /* 15 is RESERVED: caught earlier */ + abort(); + } + if (op == 13) { + /* VQDMULL */ + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + neon_store_reg64(cpu_V0, rd + pass); + } else if (op == 5 || (op >= 8 && op <= 11)) { + /* Accumulate. */ + neon_load_reg64(cpu_V1, rd + pass); + switch (op) { + case 10: /* VMLSL */ + gen_neon_negl(cpu_V0, size); + /* Fall through */ + case 5: case 8: /* VABAL, VMLAL */ + gen_neon_addl(size); + break; + case 9: case 11: /* VQDMLAL, VQDMLSL */ + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + if (op == 11) { + gen_neon_negl(cpu_V0, size); + } + gen_neon_addl_saturate(cpu_V0, cpu_V1, size); + break; + default: + abort(); + } + neon_store_reg64(cpu_V0, rd + pass); + } else if (op == 4 || op == 6) { + /* Narrowing operation. */ + tmp = tcg_temp_new_i32(); + if (!u) { + switch (size) { + case 0: + gen_helper_neon_narrow_high_u8(tmp, cpu_V0); + break; + case 1: + gen_helper_neon_narrow_high_u16(tmp, cpu_V0); + break; + case 2: + tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); + tcg_gen_trunc_i64_i32(tmp, cpu_V0); + break; + default: abort(); + } + } else { + switch (size) { + case 0: + gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0); + break; + case 1: + gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0); + break; + case 2: + tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31); + tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); + tcg_gen_trunc_i64_i32(tmp, cpu_V0); + break; + default: abort(); + } + } + if (pass == 0) { + tmp3 = tmp; + } else { + neon_store_reg(rd, 0, tmp3); + neon_store_reg(rd, 1, tmp); + } + } else { + /* Write back the result. */ + neon_store_reg64(cpu_V0, rd + pass); + } + } + } else { + /* Two registers and a scalar. NB that for ops of this form + * the ARM ARM labels bit 24 as Q, but it is in our variable + * 'u', not 'q'. + */ + if (size == 0) { + return 1; + } + switch (op) { + case 1: /* Float VMLA scalar */ + case 5: /* Floating point VMLS scalar */ + case 9: /* Floating point VMUL scalar */ + if (size == 1) { + return 1; + } + /* fall through */ + case 0: /* Integer VMLA scalar */ + case 4: /* Integer VMLS scalar */ + case 8: /* Integer VMUL scalar */ + case 12: /* VQDMULH scalar */ + case 13: /* VQRDMULH scalar */ + if (u && ((rd | rn) & 1)) { + return 1; + } + tmp = neon_get_scalar(size, rm); + neon_store_scratch(0, tmp); + for (pass = 0; pass < (u ? 4 : 2); pass++) { + tmp = neon_load_scratch(0); + tmp2 = neon_load_reg(rn, pass); + if (op == 12) { + if (size == 1) { + gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2); + } else { + gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2); + } + } else if (op == 13) { + if (size == 1) { + gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2); + } else { + gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2); + } + } else if (op & 1) { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_ptr(fpstatus); + } else { + switch (size) { + case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break; + case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break; + default: abort(); + } + } + tcg_temp_free_i32(tmp2); + if (op < 8) { + /* Accumulate. */ + tmp2 = neon_load_reg(rd, pass); + switch (op) { + case 0: + gen_neon_add(size, tmp, tmp2); + break; + case 1: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_ptr(fpstatus); + break; + } + case 4: + gen_neon_rsb(size, tmp, tmp2); + break; + case 5: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); + break; + } + default: + abort(); + } + tcg_temp_free_i32(tmp2); + } + neon_store_reg(rd, pass, tmp); + } + break; + case 3: /* VQDMLAL scalar */ + case 7: /* VQDMLSL scalar */ + case 11: /* VQDMULL scalar */ + if (u == 1) { + return 1; + } + /* fall through */ + case 2: /* VMLAL sclar */ + case 6: /* VMLSL scalar */ + case 10: /* VMULL scalar */ + if (rd & 1) { + return 1; + } + tmp2 = neon_get_scalar(size, rm); + /* We need a copy of tmp2 because gen_neon_mull + * deletes it during pass 0. */ + tmp4 = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp4, tmp2); + tmp3 = neon_load_reg(rn, 1); + + for (pass = 0; pass < 2; pass++) { + if (pass == 0) { + tmp = neon_load_reg(rn, 0); + } else { + tmp = tmp3; + tmp2 = tmp4; + } + gen_neon_mull(cpu_V0, tmp, tmp2, size, u); + if (op != 11) { + neon_load_reg64(cpu_V1, rd + pass); + } + switch (op) { + case 6: + gen_neon_negl(cpu_V0, size); + /* Fall through */ + case 2: + gen_neon_addl(size); + break; + case 3: case 7: + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + if (op == 7) { + gen_neon_negl(cpu_V0, size); + } + gen_neon_addl_saturate(cpu_V0, cpu_V1, size); + break; + case 10: + /* no-op */ + break; + case 11: + gen_neon_addl_saturate(cpu_V0, cpu_V0, size); + break; + default: + abort(); + } + neon_store_reg64(cpu_V0, rd + pass); + } + + + break; + default: /* 14 and 15 are RESERVED */ + return 1; + } + } + } else { /* size == 3 */ + if (!u) { + /* Extract. */ + imm = (insn >> 8) & 0xf; + + if (imm > 7 && !q) + return 1; + + if (q && ((rd | rn | rm) & 1)) { + return 1; + } + + if (imm == 0) { + neon_load_reg64(cpu_V0, rn); + if (q) { + neon_load_reg64(cpu_V1, rn + 1); + } + } else if (imm == 8) { + neon_load_reg64(cpu_V0, rn + 1); + if (q) { + neon_load_reg64(cpu_V1, rm); + } + } else if (q) { + tmp64 = tcg_temp_new_i64(); + if (imm < 8) { + neon_load_reg64(cpu_V0, rn); + neon_load_reg64(tmp64, rn + 1); + } else { + neon_load_reg64(cpu_V0, rn + 1); + neon_load_reg64(tmp64, rm); + } + tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8); + tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8)); + tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); + if (imm < 8) { + neon_load_reg64(cpu_V1, rm); + } else { + neon_load_reg64(cpu_V1, rm + 1); + imm -= 8; + } + tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8)); + tcg_gen_shri_i64(tmp64, tmp64, imm * 8); + tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64); + tcg_temp_free_i64(tmp64); + } else { + /* BUGFIX */ + neon_load_reg64(cpu_V0, rn); + tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8); + neon_load_reg64(cpu_V1, rm); + tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8)); + tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); + } + neon_store_reg64(cpu_V0, rd); + if (q) { + neon_store_reg64(cpu_V1, rd + 1); + } + } else if ((insn & (1 << 11)) == 0) { + /* Two register misc. */ + op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); + size = (insn >> 18) & 3; + /* UNDEF for unknown op values and bad op-size combinations */ + if ((neon_2rm_sizes[op] & (1 << size)) == 0) { + return 1; + } + if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) && + q && ((rm | rd) & 1)) { + return 1; + } + switch (op) { + case NEON_2RM_VREV64: + for (pass = 0; pass < (q ? 2 : 1); pass++) { + tmp = neon_load_reg(rm, pass * 2); + tmp2 = neon_load_reg(rm, pass * 2 + 1); + switch (size) { + case 0: tcg_gen_bswap32_i32(tmp, tmp); break; + case 1: gen_swap_half(tmp); break; + case 2: /* no-op */ break; + default: abort(); + } + neon_store_reg(rd, pass * 2 + 1, tmp); + if (size == 2) { + neon_store_reg(rd, pass * 2, tmp2); + } else { + switch (size) { + case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break; + case 1: gen_swap_half(tmp2); break; + default: abort(); + } + neon_store_reg(rd, pass * 2, tmp2); + } + } + break; + case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U: + case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U: + for (pass = 0; pass < q + 1; pass++) { + tmp = neon_load_reg(rm, pass * 2); + gen_neon_widen(cpu_V0, tmp, size, op & 1); + tmp = neon_load_reg(rm, pass * 2 + 1); + gen_neon_widen(cpu_V1, tmp, size, op & 1); + switch (size) { + case 0: gen_helper_neon_paddl_u16(CPU_V001); break; + case 1: gen_helper_neon_paddl_u32(CPU_V001); break; + case 2: tcg_gen_add_i64(CPU_V001); break; + default: abort(); + } + if (op >= NEON_2RM_VPADAL) { + /* Accumulate. */ + neon_load_reg64(cpu_V1, rd + pass); + gen_neon_addl(size); + } + neon_store_reg64(cpu_V0, rd + pass); + } + break; + case NEON_2RM_VTRN: + if (size == 2) { + int n; + for (n = 0; n < (q ? 4 : 2); n += 2) { + tmp = neon_load_reg(rm, n); + tmp2 = neon_load_reg(rd, n + 1); + neon_store_reg(rm, n, tmp2); + neon_store_reg(rd, n + 1, tmp); + } + } else { + goto elementwise; + } + break; + case NEON_2RM_VUZP: + if (gen_neon_unzip(rd, rm, size, q)) { + return 1; + } + break; + case NEON_2RM_VZIP: + if (gen_neon_zip(rd, rm, size, q)) { + return 1; + } + break; + case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN: + /* also VQMOVUN; op field and mnemonics don't line up */ + if (rm & 1) { + return 1; + } + TCGV_UNUSED_I32(tmp2); + for (pass = 0; pass < 2; pass++) { + neon_load_reg64(cpu_V0, rm + pass); + tmp = tcg_temp_new_i32(); + gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size, + tmp, cpu_V0); + if (pass == 0) { + tmp2 = tmp; + } else { + neon_store_reg(rd, 0, tmp2); + neon_store_reg(rd, 1, tmp); + } + } + break; + case NEON_2RM_VSHLL: + if (q || (rd & 1)) { + return 1; + } + tmp = neon_load_reg(rm, 0); + tmp2 = neon_load_reg(rm, 1); + for (pass = 0; pass < 2; pass++) { + if (pass == 1) + tmp = tmp2; + gen_neon_widen(cpu_V0, tmp, size, 1); + tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size); + neon_store_reg64(cpu_V0, rd + pass); + } + break; + case NEON_2RM_VCVT_F16_F32: + if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) || + q || (rm & 1)) { + return 1; + } + tmp = tcg_temp_new_i32(); + tmp2 = tcg_temp_new_i32(); + tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0)); + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1)); + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp2, tmp2, tmp); + tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2)); + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3)); + neon_store_reg(rd, 0, tmp2); + tmp2 = tcg_temp_new_i32(); + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp2, tmp2, tmp); + neon_store_reg(rd, 1, tmp2); + tcg_temp_free_i32(tmp); + break; + case NEON_2RM_VCVT_F32_F16: + if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) || + q || (rd & 1)) { + return 1; + } + tmp3 = tcg_temp_new_i32(); + tmp = neon_load_reg(rm, 0); + tmp2 = neon_load_reg(rm, 1); + tcg_gen_ext16u_i32(tmp3, tmp); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0)); + tcg_gen_shri_i32(tmp3, tmp, 16); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1)); + tcg_temp_free_i32(tmp); + tcg_gen_ext16u_i32(tmp3, tmp2); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2)); + tcg_gen_shri_i32(tmp3, tmp2, 16); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3)); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + break; + case NEON_2RM_AESE: case NEON_2RM_AESMC: + if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) + || ((rm | rd) & 1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + + /* Bit 6 is the lowest opcode bit; it distinguishes between + * encryption (AESE/AESMC) and decryption (AESD/AESIMC) + */ + tmp3 = tcg_const_i32(extract32(insn, 6, 1)); + + if (op == NEON_2RM_AESE) { + gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3); + } else { + gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3); + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + break; + case NEON_2RM_SHA1H: + if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1) + || ((rm | rd) & 1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + + gen_helper_crypto_sha1h(cpu_env, tmp, tmp2); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + break; + case NEON_2RM_SHA1SU1: + if ((rm | rd) & 1) { + return 1; + } + /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */ + if (q) { + if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) { + return 1; + } + } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + if (q) { + gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2); + } else { + gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2); + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + break; + default: + elementwise: + for (pass = 0; pass < (q ? 4 : 2); pass++) { + if (neon_2rm_is_float_op(op)) { + tcg_gen_ld_f32(cpu_F0s, cpu_env, + neon_reg_offset(rm, pass)); + TCGV_UNUSED_I32(tmp); + } else { + tmp = neon_load_reg(rm, pass); + } + switch (op) { + case NEON_2RM_VREV32: + switch (size) { + case 0: tcg_gen_bswap32_i32(tmp, tmp); break; + case 1: gen_swap_half(tmp); break; + default: abort(); + } + break; + case NEON_2RM_VREV16: + gen_rev16(tmp); + break; + case NEON_2RM_VCLS: + switch (size) { + case 0: gen_helper_neon_cls_s8(tmp, tmp); break; + case 1: gen_helper_neon_cls_s16(tmp, tmp); break; + case 2: gen_helper_neon_cls_s32(tmp, tmp); break; + default: abort(); + } + break; + case NEON_2RM_VCLZ: + switch (size) { + case 0: gen_helper_neon_clz_u8(tmp, tmp); break; + case 1: gen_helper_neon_clz_u16(tmp, tmp); break; + case 2: gen_helper_clz(tmp, tmp); break; + default: abort(); + } + break; + case NEON_2RM_VCNT: + gen_helper_neon_cnt_u8(tmp, tmp); + break; + case NEON_2RM_VMVN: + tcg_gen_not_i32(tmp, tmp); + break; + case NEON_2RM_VQABS: + switch (size) { + case 0: + gen_helper_neon_qabs_s8(tmp, cpu_env, tmp); + break; + case 1: + gen_helper_neon_qabs_s16(tmp, cpu_env, tmp); + break; + case 2: + gen_helper_neon_qabs_s32(tmp, cpu_env, tmp); + break; + default: abort(); + } + break; + case NEON_2RM_VQNEG: + switch (size) { + case 0: + gen_helper_neon_qneg_s8(tmp, cpu_env, tmp); + break; + case 1: + gen_helper_neon_qneg_s16(tmp, cpu_env, tmp); + break; + case 2: + gen_helper_neon_qneg_s32(tmp, cpu_env, tmp); + break; + default: abort(); + } + break; + case NEON_2RM_VCGT0: case NEON_2RM_VCLE0: + tmp2 = tcg_const_i32(0); + switch(size) { + case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break; + default: abort(); + } + tcg_temp_free_i32(tmp2); + if (op == NEON_2RM_VCLE0) { + tcg_gen_not_i32(tmp, tmp); + } + break; + case NEON_2RM_VCGE0: case NEON_2RM_VCLT0: + tmp2 = tcg_const_i32(0); + switch(size) { + case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break; + default: abort(); + } + tcg_temp_free_i32(tmp2); + if (op == NEON_2RM_VCLT0) { + tcg_gen_not_i32(tmp, tmp); + } + break; + case NEON_2RM_VCEQ0: + tmp2 = tcg_const_i32(0); + switch(size) { + case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; + case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; + case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; + default: abort(); + } + tcg_temp_free_i32(tmp2); + break; + case NEON_2RM_VABS: + switch(size) { + case 0: gen_helper_neon_abs_s8(tmp, tmp); break; + case 1: gen_helper_neon_abs_s16(tmp, tmp); break; + case 2: tcg_gen_abs_i32(tmp, tmp); break; + default: abort(); + } + break; + case NEON_2RM_VNEG: + tmp2 = tcg_const_i32(0); + gen_neon_rsb(size, tmp, tmp2); + tcg_temp_free_i32(tmp2); + break; + case NEON_2RM_VCGT0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + tmp2 = tcg_const_i32(0); + gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VCGE0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + tmp2 = tcg_const_i32(0); + gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VCEQ0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + tmp2 = tcg_const_i32(0); + gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VCLE0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + tmp2 = tcg_const_i32(0); + gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus); + tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VCLT0_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + tmp2 = tcg_const_i32(0); + gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus); + tcg_temp_free_i32(tmp2); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VABS_F: + gen_vfp_abs(0); + break; + case NEON_2RM_VNEG_F: + gen_vfp_neg(0); + break; + case NEON_2RM_VSWP: + tmp2 = neon_load_reg(rd, pass); + neon_store_reg(rm, pass, tmp2); + break; + case NEON_2RM_VTRN: + tmp2 = neon_load_reg(rd, pass); + switch (size) { + case 0: gen_neon_trn_u8(tmp, tmp2); break; + case 1: gen_neon_trn_u16(tmp, tmp2); break; + default: abort(); + } + neon_store_reg(rm, pass, tmp2); + break; + case NEON_2RM_VRINTN: + case NEON_2RM_VRINTA: + case NEON_2RM_VRINTM: + case NEON_2RM_VRINTP: + case NEON_2RM_VRINTZ: + { + TCGv_i32 tcg_rmode; + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + int rmode; + + if (op == NEON_2RM_VRINTZ) { + rmode = FPROUNDING_ZERO; + } else { + rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1]; + } + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, + cpu_env); + gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus); + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, + cpu_env); + tcg_temp_free_ptr(fpstatus); + tcg_temp_free_i32(tcg_rmode); + break; + } + case NEON_2RM_VRINTX: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VCVTAU: + case NEON_2RM_VCVTAS: + case NEON_2RM_VCVTNU: + case NEON_2RM_VCVTNS: + case NEON_2RM_VCVTPU: + case NEON_2RM_VCVTPS: + case NEON_2RM_VCVTMU: + case NEON_2RM_VCVTMS: + { + bool is_signed = !extract32(insn, 7, 1); + TCGv_ptr fpst = get_fpstatus_ptr(1); + TCGv_i32 tcg_rmode, tcg_shift; + int rmode = fp_decode_rm[extract32(insn, 8, 2)]; + + tcg_shift = tcg_const_i32(0); + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, + cpu_env); + + if (is_signed) { + gen_helper_vfp_tosls(cpu_F0s, cpu_F0s, + tcg_shift, fpst); + } else { + gen_helper_vfp_touls(cpu_F0s, cpu_F0s, + tcg_shift, fpst); + } + + gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, + cpu_env); + tcg_temp_free_i32(tcg_rmode); + tcg_temp_free_i32(tcg_shift); + tcg_temp_free_ptr(fpst); + break; + } + case NEON_2RM_VRECPE: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_recpe_u32(tmp, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VRSQRTE: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_rsqrte_u32(tmp, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VRECPE_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VRSQRTE_F: + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus); + tcg_temp_free_ptr(fpstatus); + break; + } + case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ + gen_vfp_sito(0, 1); + break; + case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ + gen_vfp_uito(0, 1); + break; + case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ + gen_vfp_tosiz(0, 1); + break; + case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ + gen_vfp_touiz(0, 1); + break; + default: + /* Reserved op values were caught by the + * neon_2rm_sizes[] check earlier. + */ + abort(); + } + if (neon_2rm_is_float_op(op)) { + tcg_gen_st_f32(cpu_F0s, cpu_env, + neon_reg_offset(rd, pass)); + } else { + neon_store_reg(rd, pass, tmp); + } + } + break; + } + } else if ((insn & (1 << 10)) == 0) { + /* VTBL, VTBX. */ + int n = ((insn >> 8) & 3) + 1; + if ((rn + n) > 32) { + /* This is UNPREDICTABLE; we choose to UNDEF to avoid the + * helper function running off the end of the register file. + */ + return 1; + } + n <<= 3; + if (insn & (1 << 6)) { + tmp = neon_load_reg(rd, 0); + } else { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } + tmp2 = neon_load_reg(rm, 0); + tmp4 = tcg_const_i32(rn); + tmp5 = tcg_const_i32(n); + gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5); + tcg_temp_free_i32(tmp); + if (insn & (1 << 6)) { + tmp = neon_load_reg(rd, 1); + } else { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } + tmp3 = neon_load_reg(rm, 1); + gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5); + tcg_temp_free_i32(tmp5); + tcg_temp_free_i32(tmp4); + neon_store_reg(rd, 0, tmp2); + neon_store_reg(rd, 1, tmp3); + tcg_temp_free_i32(tmp); + } else if ((insn & 0x380) == 0) { + /* VDUP */ + if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) { + return 1; + } + if (insn & (1 << 19)) { + tmp = neon_load_reg(rm, 1); + } else { + tmp = neon_load_reg(rm, 0); + } + if (insn & (1 << 16)) { + gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8); + } else if (insn & (1 << 17)) { + if ((insn >> 18) & 1) + gen_neon_dup_high16(tmp); + else + gen_neon_dup_low16(tmp); + } + for (pass = 0; pass < (q ? 4 : 2); pass++) { + tmp2 = tcg_temp_new_i32(); + tcg_gen_mov_i32(tmp2, tmp); + neon_store_reg(rd, pass, tmp2); + } + tcg_temp_free_i32(tmp); + } else { + return 1; + } + } + } + return 0; +} + +static int disas_coproc_insn(DisasContext *s, uint32_t insn) +{ + int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2; + const ARMCPRegInfo *ri; + + cpnum = (insn >> 8) & 0xf; + + /* First check for coprocessor space used for XScale/iwMMXt insns */ + if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) { + if (extract32(s->c15_cpar, cpnum, 1) == 0) { + return 1; + } + if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { + return disas_iwmmxt_insn(s, insn); + } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) { + return disas_dsp_insn(s, insn); + } + return 1; + } + + /* Otherwise treat as a generic register access */ + is64 = (insn & (1 << 25)) == 0; + if (!is64 && ((insn & (1 << 4)) == 0)) { + /* cdp */ + return 1; + } + + crm = insn & 0xf; + if (is64) { + crn = 0; + opc1 = (insn >> 4) & 0xf; + opc2 = 0; + rt2 = (insn >> 16) & 0xf; + } else { + crn = (insn >> 16) & 0xf; + opc1 = (insn >> 21) & 7; + opc2 = (insn >> 5) & 7; + rt2 = 0; + } + isread = (insn >> 20) & 1; + rt = (insn >> 12) & 0xf; + + ri = get_arm_cp_reginfo(s->cp_regs, + ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2)); + if (ri) { + /* Check access permissions */ + if (!cp_access_ok(s->current_el, ri, isread)) { + return 1; + } + + if (ri->accessfn || + (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) { + /* Emit code to perform further access permissions checks at + * runtime; this may result in an exception. + * Note that on XScale all cp0..c13 registers do an access check + * call in order to handle c15_cpar. + */ + TCGv_ptr tmpptr; + TCGv_i32 tcg_syn; + uint32_t syndrome; + + /* Note that since we are an implementation which takes an + * exception on a trapped conditional instruction only if the + * instruction passes its condition code check, we can take + * advantage of the clause in the ARM ARM that allows us to set + * the COND field in the instruction to 0xE in all cases. + * We could fish the actual condition out of the insn (ARM) + * or the condexec bits (Thumb) but it isn't necessary. + */ + switch (cpnum) { + case 14: + if (is64) { + syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2, + isread, s->thumb); + } else { + syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm, + rt, isread, s->thumb); + } + break; + case 15: + if (is64) { + syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2, + isread, s->thumb); + } else { + syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm, + rt, isread, s->thumb); + } + break; + default: + /* ARMv8 defines that only coprocessors 14 and 15 exist, + * so this can only happen if this is an ARMv7 or earlier CPU, + * in which case the syndrome information won't actually be + * guest visible. + */ + assert(!arm_dc_feature(s, ARM_FEATURE_V8)); + syndrome = syn_uncategorized(); + break; + } + + gen_set_pc_im(s, s->pc - 4); + tmpptr = tcg_const_ptr(ri); + tcg_syn = tcg_const_i32(syndrome); + gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn); + tcg_temp_free_ptr(tmpptr); + tcg_temp_free_i32(tcg_syn); + } + + /* Handle special cases first */ + switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) { + case ARM_CP_NOP: + return 0; + case ARM_CP_WFI: + if (isread) { + return 1; + } + gen_set_pc_im(s, s->pc); + s->is_jmp = DISAS_WFI; + return 0; + default: + break; + } + + if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { + gen_io_start(); + } + + if (isread) { + /* Read */ + if (is64) { + TCGv_i64 tmp64; + TCGv_i32 tmp; + if (ri->type & ARM_CP_CONST) { + tmp64 = tcg_const_i64(ri->resetvalue); + } else if (ri->readfn) { + TCGv_ptr tmpptr; + tmp64 = tcg_temp_new_i64(); + tmpptr = tcg_const_ptr(ri); + gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr); + tcg_temp_free_ptr(tmpptr); + } else { + tmp64 = tcg_temp_new_i64(); + tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset); + } + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tmp, tmp64); + store_reg(s, rt, tmp); + tcg_gen_shri_i64(tmp64, tmp64, 32); + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_temp_free_i64(tmp64); + store_reg(s, rt2, tmp); + } else { + TCGv_i32 tmp; + if (ri->type & ARM_CP_CONST) { + tmp = tcg_const_i32(ri->resetvalue); + } else if (ri->readfn) { + TCGv_ptr tmpptr; + tmp = tcg_temp_new_i32(); + tmpptr = tcg_const_ptr(ri); + gen_helper_get_cp_reg(tmp, cpu_env, tmpptr); + tcg_temp_free_ptr(tmpptr); + } else { + tmp = load_cpu_offset(ri->fieldoffset); + } + if (rt == 15) { + /* Destination register of r15 for 32 bit loads sets + * the condition codes from the high 4 bits of the value + */ + gen_set_nzcv(tmp); + tcg_temp_free_i32(tmp); + } else { + store_reg(s, rt, tmp); + } + } + } else { + /* Write */ + if (ri->type & ARM_CP_CONST) { + /* If not forbidden by access permissions, treat as WI */ + return 0; + } + + if (is64) { + TCGv_i32 tmplo, tmphi; + TCGv_i64 tmp64 = tcg_temp_new_i64(); + tmplo = load_reg(s, rt); + tmphi = load_reg(s, rt2); + tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi); + tcg_temp_free_i32(tmplo); + tcg_temp_free_i32(tmphi); + if (ri->writefn) { + TCGv_ptr tmpptr = tcg_const_ptr(ri); + gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64); + tcg_temp_free_ptr(tmpptr); + } else { + tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset); + } + tcg_temp_free_i64(tmp64); + } else { + if (ri->writefn) { + TCGv_i32 tmp; + TCGv_ptr tmpptr; + tmp = load_reg(s, rt); + tmpptr = tcg_const_ptr(ri); + gen_helper_set_cp_reg(cpu_env, tmpptr, tmp); + tcg_temp_free_ptr(tmpptr); + tcg_temp_free_i32(tmp); + } else { + TCGv_i32 tmp = load_reg(s, rt); + store_cpu_offset(tmp, ri->fieldoffset); + } + } + } + + if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { + /* I/O operations must end the TB here (whether read or write) */ + gen_io_end(); + gen_lookup_tb(s); + } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { + /* We default to ending the TB on a coprocessor register write, + * but allow this to be suppressed by the register definition + * (usually only necessary to work around guest bugs). + */ + gen_lookup_tb(s); + } + + return 0; + } + + /* Unknown register; this might be a guest error or a QEMU + * unimplemented feature. + */ + if (is64) { + qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 " + "64 bit system register cp:%d opc1: %d crm:%d " + "(%s)\n", + isread ? "read" : "write", cpnum, opc1, crm, + s->ns ? "non-secure" : "secure"); + } else { + qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 " + "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d " + "(%s)\n", + isread ? "read" : "write", cpnum, opc1, crn, crm, opc2, + s->ns ? "non-secure" : "secure"); + } + + return 1; +} + + +/* Store a 64-bit value to a register pair. Clobbers val. */ +static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val) +{ + TCGv_i32 tmp; + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tmp, val); + store_reg(s, rlow, tmp); + tmp = tcg_temp_new_i32(); + tcg_gen_shri_i64(val, val, 32); + tcg_gen_trunc_i64_i32(tmp, val); + store_reg(s, rhigh, tmp); +} + +/* load a 32-bit value from a register and perform a 64-bit accumulate. */ +static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow) +{ + TCGv_i64 tmp; + TCGv_i32 tmp2; + + /* Load value and extend to 64 bits. */ + tmp = tcg_temp_new_i64(); + tmp2 = load_reg(s, rlow); + tcg_gen_extu_i32_i64(tmp, tmp2); + tcg_temp_free_i32(tmp2); + tcg_gen_add_i64(val, val, tmp); + tcg_temp_free_i64(tmp); +} + +/* load and add a 64-bit value from a register pair. */ +static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh) +{ + TCGv_i64 tmp; + TCGv_i32 tmpl; + TCGv_i32 tmph; + + /* Load 64-bit value rd:rn. */ + tmpl = load_reg(s, rlow); + tmph = load_reg(s, rhigh); + tmp = tcg_temp_new_i64(); + tcg_gen_concat_i32_i64(tmp, tmpl, tmph); + tcg_temp_free_i32(tmpl); + tcg_temp_free_i32(tmph); + tcg_gen_add_i64(val, val, tmp); + tcg_temp_free_i64(tmp); +} + +/* Set N and Z flags from hi|lo. */ +static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi) +{ + tcg_gen_mov_i32(cpu_NF, hi); + tcg_gen_or_i32(cpu_ZF, lo, hi); +} + +/* Load/Store exclusive instructions are implemented by remembering + the value/address loaded, and seeing if these are the same + when the store is performed. This should be sufficient to implement + the architecturally mandated semantics, and avoids having to monitor + regular stores. + + In system emulation mode only one CPU will be running at once, so + this sequence is effectively atomic. In user emulation mode we + throw an exception and handle the atomic operation elsewhere. */ +static void gen_load_exclusive(DisasContext *s, int rt, int rt2, + TCGv_i32 addr, int size) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + + s->is_ldex = true; + + switch (size) { + case 0: + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + break; + case 1: + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + break; + case 2: + case 3: + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + break; + default: + abort(); + } + + if (size == 3) { + TCGv_i32 tmp2 = tcg_temp_new_i32(); + TCGv_i32 tmp3 = tcg_temp_new_i32(); + + tcg_gen_addi_i32(tmp2, addr, 4); + gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s)); + tcg_temp_free_i32(tmp2); + tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3); + store_reg(s, rt2, tmp3); + } else { + tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp); + } + + store_reg(s, rt, tmp); + tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr); +} + +static void gen_clrex(DisasContext *s) +{ + tcg_gen_movi_i64(cpu_exclusive_addr, -1); +} + +#ifdef CONFIG_USER_ONLY +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, + TCGv_i32 addr, int size) +{ + tcg_gen_extu_i32_i64(cpu_exclusive_test, addr); + tcg_gen_movi_i32(cpu_exclusive_info, + size | (rd << 4) | (rt << 8) | (rt2 << 12)); + gen_exception_internal_insn(s, 4, EXCP_STREX); +} +#else +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, + TCGv_i32 addr, int size) +{ + TCGv_i32 tmp; + TCGv_i64 val64, extaddr; + TCGLabel *done_label; + TCGLabel *fail_label; + + /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) { + [addr] = {Rt}; + {Rd} = 0; + } else { + {Rd} = 1; + } */ + fail_label = gen_new_label(); + done_label = gen_new_label(); + extaddr = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(extaddr, addr); + tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label); + tcg_temp_free_i64(extaddr); + + tmp = tcg_temp_new_i32(); + switch (size) { + case 0: + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + break; + case 1: + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + break; + case 2: + case 3: + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + break; + default: + abort(); + } + + val64 = tcg_temp_new_i64(); + if (size == 3) { + TCGv_i32 tmp2 = tcg_temp_new_i32(); + TCGv_i32 tmp3 = tcg_temp_new_i32(); + tcg_gen_addi_i32(tmp2, addr, 4); + gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s)); + tcg_temp_free_i32(tmp2); + tcg_gen_concat_i32_i64(val64, tmp, tmp3); + tcg_temp_free_i32(tmp3); + } else { + tcg_gen_extu_i32_i64(val64, tmp); + } + tcg_temp_free_i32(tmp); + + tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label); + tcg_temp_free_i64(val64); + + tmp = load_reg(s, rt); + switch (size) { + case 0: + gen_aa32_st8(tmp, addr, get_mem_index(s)); + break; + case 1: + gen_aa32_st16(tmp, addr, get_mem_index(s)); + break; + case 2: + case 3: + gen_aa32_st32(tmp, addr, get_mem_index(s)); + break; + default: + abort(); + } + tcg_temp_free_i32(tmp); + if (size == 3) { + tcg_gen_addi_i32(addr, addr, 4); + tmp = load_reg(s, rt2); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + tcg_gen_movi_i32(cpu_R[rd], 0); + tcg_gen_br(done_label); + gen_set_label(fail_label); + tcg_gen_movi_i32(cpu_R[rd], 1); + gen_set_label(done_label); + tcg_gen_movi_i64(cpu_exclusive_addr, -1); +} +#endif + +/* gen_srs: + * @env: CPUARMState + * @s: DisasContext + * @mode: mode field from insn (which stack to store to) + * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn + * @writeback: true if writeback bit set + * + * Generate code for the SRS (Store Return State) insn. + */ +static void gen_srs(DisasContext *s, + uint32_t mode, uint32_t amode, bool writeback) +{ + int32_t offset; + TCGv_i32 addr = tcg_temp_new_i32(); + TCGv_i32 tmp = tcg_const_i32(mode); + gen_helper_get_r13_banked(addr, cpu_env, tmp); + tcg_temp_free_i32(tmp); + switch (amode) { + case 0: /* DA */ + offset = -4; + break; + case 1: /* IA */ + offset = 0; + break; + case 2: /* DB */ + offset = -8; + break; + case 3: /* IB */ + offset = 4; + break; + default: + abort(); + } + tcg_gen_addi_i32(addr, addr, offset); + tmp = load_reg(s, 14); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + tmp = load_cpu_field(spsr); + tcg_gen_addi_i32(addr, addr, 4); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + if (writeback) { + switch (amode) { + case 0: + offset = -8; + break; + case 1: + offset = 4; + break; + case 2: + offset = -4; + break; + case 3: + offset = 0; + break; + default: + abort(); + } + tcg_gen_addi_i32(addr, addr, offset); + tmp = tcg_const_i32(mode); + gen_helper_set_r13_banked(cpu_env, tmp, addr); + tcg_temp_free_i32(tmp); + } + tcg_temp_free_i32(addr); +} + +static void disas_arm_insn(DisasContext *s, unsigned int insn) +{ + unsigned int cond, val, op1, i, shift, rm, rs, rn, rd, sh; + TCGv_i32 tmp; + TCGv_i32 tmp2; + TCGv_i32 tmp3; + TCGv_i32 addr; + TCGv_i64 tmp64; + + /* M variants do not implement ARM mode. */ + if (arm_dc_feature(s, ARM_FEATURE_M)) { + goto illegal_op; + } + cond = insn >> 28; + if (cond == 0xf){ + /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we + * choose to UNDEF. In ARMv5 and above the space is used + * for miscellaneous unconditional instructions. + */ + ARCH(5); + + /* Unconditional instructions. */ + if (((insn >> 25) & 7) == 1) { + /* NEON Data processing. */ + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + goto illegal_op; + } + + if (disas_neon_data_insn(s, insn)) { + goto illegal_op; + } + return; + } + if ((insn & 0x0f100000) == 0x04000000) { + /* NEON load/store. */ + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + goto illegal_op; + } + + if (disas_neon_ls_insn(s, insn)) { + goto illegal_op; + } + return; + } + if ((insn & 0x0f000e10) == 0x0e000a00) { + /* VFP. */ + if (disas_vfp_insn(s, insn)) { + goto illegal_op; + } + return; + } + if (((insn & 0x0f30f000) == 0x0510f000) || + ((insn & 0x0f30f010) == 0x0710f000)) { + if ((insn & (1 << 22)) == 0) { + /* PLDW; v7MP */ + if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) { + goto illegal_op; + } + } + /* Otherwise PLD; v5TE+ */ + ARCH(5TE); + return; + } + if (((insn & 0x0f70f000) == 0x0450f000) || + ((insn & 0x0f70f010) == 0x0650f000)) { + ARCH(7); + return; /* PLI; V7 */ + } + if (((insn & 0x0f700000) == 0x04100000) || + ((insn & 0x0f700010) == 0x06100000)) { + if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) { + goto illegal_op; + } + return; /* v7MP: Unallocated memory hint: must NOP */ + } + + if ((insn & 0x0ffffdff) == 0x01010000) { + ARCH(6); + /* setend */ + if (((insn >> 9) & 1) != s->bswap_code) { + /* Dynamic endianness switching not implemented. */ + qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n"); + goto illegal_op; + } + return; + } else if ((insn & 0x0fffff00) == 0x057ff000) { + switch ((insn >> 4) & 0xf) { + case 1: /* clrex */ + ARCH(6K); + gen_clrex(s); + return; + case 4: /* dsb */ + case 5: /* dmb */ + case 6: /* isb */ + ARCH(7); + /* We don't emulate caches so these are a no-op. */ + return; + default: + goto illegal_op; + } + } else if ((insn & 0x0e5fffe0) == 0x084d0500) { + /* srs */ + if (IS_USER(s)) { + goto illegal_op; + } + ARCH(6); + gen_srs(s, (insn & 0x1f), (insn >> 23) & 3, insn & (1 << 21)); + return; + } else if ((insn & 0x0e50ffe0) == 0x08100a00) { + /* rfe */ + int32_t offset; + if (IS_USER(s)) + goto illegal_op; + ARCH(6); + rn = (insn >> 16) & 0xf; + addr = load_reg(s, rn); + i = (insn >> 23) & 3; + switch (i) { + case 0: offset = -4; break; /* DA */ + case 1: offset = 0; break; /* IA */ + case 2: offset = -8; break; /* DB */ + case 3: offset = 4; break; /* IB */ + default: abort(); + } + if (offset) + tcg_gen_addi_i32(addr, addr, offset); + /* Load PC into tmp and CPSR into tmp2. */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + tcg_gen_addi_i32(addr, addr, 4); + tmp2 = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp2, addr, get_mem_index(s)); + if (insn & (1 << 21)) { + /* Base writeback. */ + switch (i) { + case 0: offset = -8; break; + case 1: offset = 4; break; + case 2: offset = -4; break; + case 3: offset = 0; break; + default: abort(); + } + if (offset) + tcg_gen_addi_i32(addr, addr, offset); + store_reg(s, rn, addr); + } else { + tcg_temp_free_i32(addr); + } + gen_rfe(s, tmp, tmp2); + return; + } else if ((insn & 0x0e000000) == 0x0a000000) { + /* branch link and change to thumb (blx ) */ + int32_t offset; + + val = (uint32_t)s->pc; + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, val); + store_reg(s, 14, tmp); + /* Sign-extend the 24-bit offset */ + offset = (((int32_t)insn) << 8) >> 8; + /* offset * 4 + bit24 * 2 + (thumb bit) */ + val += (offset << 2) | ((insn >> 23) & 2) | 1; + /* pipeline offset */ + val += 4; + /* protected by ARCH(5); above, near the start of uncond block */ + gen_bx_im(s, val); + return; + } else if ((insn & 0x0e000f00) == 0x0c000100) { + if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) { + /* iWMMXt register transfer. */ + if (extract32(s->c15_cpar, 1, 1)) { + if (!disas_iwmmxt_insn(s, insn)) { + return; + } + } + } + } else if ((insn & 0x0fe00000) == 0x0c400000) { + /* Coprocessor double register transfer. */ + ARCH(5TE); + } else if ((insn & 0x0f000010) == 0x0e000010) { + /* Additional coprocessor register transfer. */ + } else if ((insn & 0x0ff10020) == 0x01000000) { + uint32_t mask; + uint32_t val; + /* cps (privileged) */ + if (IS_USER(s)) + return; + mask = val = 0; + if (insn & (1 << 19)) { + if (insn & (1 << 8)) + mask |= CPSR_A; + if (insn & (1 << 7)) + mask |= CPSR_I; + if (insn & (1 << 6)) + mask |= CPSR_F; + if (insn & (1 << 18)) + val |= mask; + } + if (insn & (1 << 17)) { + mask |= CPSR_M; + val |= (insn & 0x1f); + } + if (mask) { + gen_set_psr_im(s, mask, 0, val); + } + return; + } + goto illegal_op; + } + if (cond != 0xe) { + /* if not always execute, we generate a conditional jump to + next instruction */ + s->condlabel = gen_new_label(); + arm_gen_test_cc(cond ^ 1, s->condlabel); + s->condjmp = 1; + } + if ((insn & 0x0f900000) == 0x03000000) { + if ((insn & (1 << 21)) == 0) { + ARCH(6T2); + rd = (insn >> 12) & 0xf; + val = ((insn >> 4) & 0xf000) | (insn & 0xfff); + if ((insn & (1 << 22)) == 0) { + /* MOVW */ + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, val); + } else { + /* MOVT */ + tmp = load_reg(s, rd); + tcg_gen_ext16u_i32(tmp, tmp); + tcg_gen_ori_i32(tmp, tmp, val << 16); + } + store_reg(s, rd, tmp); + } else { + if (((insn >> 12) & 0xf) != 0xf) + goto illegal_op; + if (((insn >> 16) & 0xf) == 0) { + gen_nop_hint(s, insn & 0xff); + } else { + /* CPSR = immediate */ + val = insn & 0xff; + shift = ((insn >> 8) & 0xf) * 2; + if (shift) + val = (val >> shift) | (val << (32 - shift)); + i = ((insn & (1 << 22)) != 0); + if (gen_set_psr_im(s, msr_mask(s, (insn >> 16) & 0xf, i), + i, val)) { + goto illegal_op; + } + } + } + } else if ((insn & 0x0f900000) == 0x01000000 + && (insn & 0x00000090) != 0x00000090) { + /* miscellaneous instructions */ + op1 = (insn >> 21) & 3; + sh = (insn >> 4) & 0xf; + rm = insn & 0xf; + switch (sh) { + case 0x0: /* move program status register */ + if (op1 & 1) { + /* PSR = reg */ + tmp = load_reg(s, rm); + i = ((op1 & 2) != 0); + if (gen_set_psr(s, msr_mask(s, (insn >> 16) & 0xf, i), i, tmp)) + goto illegal_op; + } else { + /* reg = PSR */ + rd = (insn >> 12) & 0xf; + if (op1 & 2) { + if (IS_USER(s)) + goto illegal_op; + tmp = load_cpu_field(spsr); + } else { + tmp = tcg_temp_new_i32(); + gen_helper_cpsr_read(tmp, cpu_env); + } + store_reg(s, rd, tmp); + } + break; + case 0x1: + if (op1 == 1) { + /* branch/exchange thumb (bx). */ + ARCH(4T); + tmp = load_reg(s, rm); + gen_bx(s, tmp); + } else if (op1 == 3) { + /* clz */ + ARCH(5); + rd = (insn >> 12) & 0xf; + tmp = load_reg(s, rm); + gen_helper_clz(tmp, tmp); + store_reg(s, rd, tmp); + } else { + goto illegal_op; + } + break; + case 0x2: + if (op1 == 1) { + ARCH(5J); /* bxj */ + /* Trivial implementation equivalent to bx. */ + tmp = load_reg(s, rm); + gen_bx(s, tmp); + } else { + goto illegal_op; + } + break; + case 0x3: + if (op1 != 1) + goto illegal_op; + + ARCH(5); + /* branch link/exchange thumb (blx) */ + tmp = load_reg(s, rm); + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, s->pc); + store_reg(s, 14, tmp2); + gen_bx(s, tmp); + break; + case 0x4: + { + /* crc32/crc32c */ + uint32_t c = extract32(insn, 8, 4); + + /* Check this CPU supports ARMv8 CRC instructions. + * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED. + * Bits 8, 10 and 11 should be zero. + */ + if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 || + (c & 0xd) != 0) { + goto illegal_op; + } + + rn = extract32(insn, 16, 4); + rd = extract32(insn, 12, 4); + + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + if (op1 == 0) { + tcg_gen_andi_i32(tmp2, tmp2, 0xff); + } else if (op1 == 1) { + tcg_gen_andi_i32(tmp2, tmp2, 0xffff); + } + tmp3 = tcg_const_i32(1 << op1); + if (c & 0x2) { + gen_helper_crc32c(tmp, tmp, tmp2, tmp3); + } else { + gen_helper_crc32(tmp, tmp, tmp2, tmp3); + } + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + store_reg(s, rd, tmp); + break; + } + case 0x5: /* saturating add/subtract */ + ARCH(5TE); + rd = (insn >> 12) & 0xf; + rn = (insn >> 16) & 0xf; + tmp = load_reg(s, rm); + tmp2 = load_reg(s, rn); + if (op1 & 2) + gen_helper_double_saturate(tmp2, cpu_env, tmp2); + if (op1 & 1) + gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2); + else + gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + break; + case 7: + { + int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4); + switch (op1) { + case 1: + /* bkpt */ + ARCH(5); + gen_exception_insn(s, 4, EXCP_BKPT, + syn_aa32_bkpt(imm16, false), + default_exception_el(s)); + break; + case 2: + /* Hypervisor call (v7) */ + ARCH(7); + if (IS_USER(s)) { + goto illegal_op; + } + gen_hvc(s, imm16); + break; + case 3: + /* Secure monitor call (v6+) */ + ARCH(6K); + if (IS_USER(s)) { + goto illegal_op; + } + gen_smc(s); + break; + default: + goto illegal_op; + } + break; + } + case 0x8: /* signed multiply */ + case 0xa: + case 0xc: + case 0xe: + ARCH(5TE); + rs = (insn >> 8) & 0xf; + rn = (insn >> 12) & 0xf; + rd = (insn >> 16) & 0xf; + if (op1 == 1) { + /* (32 * 16) >> 16 */ + tmp = load_reg(s, rm); + tmp2 = load_reg(s, rs); + if (sh & 4) + tcg_gen_sari_i32(tmp2, tmp2, 16); + else + gen_sxth(tmp2); + tmp64 = gen_muls_i64_i32(tmp, tmp2); + tcg_gen_shri_i64(tmp64, tmp64, 16); + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_temp_free_i64(tmp64); + if ((sh & 2) == 0) { + tmp2 = load_reg(s, rn); + gen_helper_add_setq(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + store_reg(s, rd, tmp); + } else { + /* 16 * 16 */ + tmp = load_reg(s, rm); + tmp2 = load_reg(s, rs); + gen_mulxy(tmp, tmp2, sh & 2, sh & 4); + tcg_temp_free_i32(tmp2); + if (op1 == 2) { + tmp64 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(tmp64, tmp); + tcg_temp_free_i32(tmp); + gen_addq(s, tmp64, rn, rd); + gen_storeq_reg(s, rn, rd, tmp64); + tcg_temp_free_i64(tmp64); + } else { + if (op1 == 0) { + tmp2 = load_reg(s, rn); + gen_helper_add_setq(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + store_reg(s, rd, tmp); + } + } + break; + default: + goto illegal_op; + } + } else if (((insn & 0x0e000000) == 0 && + (insn & 0x00000090) != 0x90) || + ((insn & 0x0e000000) == (1 << 25))) { + int set_cc, logic_cc, shiftop; + + op1 = (insn >> 21) & 0xf; + set_cc = (insn >> 20) & 1; + logic_cc = table_logic_cc[op1] & set_cc; + + /* data processing instruction */ + if (insn & (1 << 25)) { + /* immediate operand */ + val = insn & 0xff; + shift = ((insn >> 8) & 0xf) * 2; + if (shift) { + val = (val >> shift) | (val << (32 - shift)); + } + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, val); + if (logic_cc && shift) { + gen_set_CF_bit31(tmp2); + } + } else { + /* register */ + rm = (insn) & 0xf; + tmp2 = load_reg(s, rm); + shiftop = (insn >> 5) & 3; + if (!(insn & (1 << 4))) { + shift = (insn >> 7) & 0x1f; + gen_arm_shift_im(tmp2, shiftop, shift, logic_cc); + } else { + rs = (insn >> 8) & 0xf; + tmp = load_reg(s, rs); + gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc); + } + } + if (op1 != 0x0f && op1 != 0x0d) { + rn = (insn >> 16) & 0xf; + tmp = load_reg(s, rn); + } else { + TCGV_UNUSED_I32(tmp); + } + rd = (insn >> 12) & 0xf; + switch(op1) { + case 0x00: + tcg_gen_and_i32(tmp, tmp, tmp2); + if (logic_cc) { + gen_logic_CC(tmp); + } + store_reg_bx(s, rd, tmp); + break; + case 0x01: + tcg_gen_xor_i32(tmp, tmp, tmp2); + if (logic_cc) { + gen_logic_CC(tmp); + } + store_reg_bx(s, rd, tmp); + break; + case 0x02: + if (set_cc && rd == 15) { + /* SUBS r15, ... is used for exception return. */ + if (IS_USER(s)) { + goto illegal_op; + } + gen_sub_CC(tmp, tmp, tmp2); + gen_exception_return(s, tmp); + } else { + if (set_cc) { + gen_sub_CC(tmp, tmp, tmp2); + } else { + tcg_gen_sub_i32(tmp, tmp, tmp2); + } + store_reg_bx(s, rd, tmp); + } + break; + case 0x03: + if (set_cc) { + gen_sub_CC(tmp, tmp2, tmp); + } else { + tcg_gen_sub_i32(tmp, tmp2, tmp); + } + store_reg_bx(s, rd, tmp); + break; + case 0x04: + if (set_cc) { + gen_add_CC(tmp, tmp, tmp2); + } else { + tcg_gen_add_i32(tmp, tmp, tmp2); + } + store_reg_bx(s, rd, tmp); + break; + case 0x05: + if (set_cc) { + gen_adc_CC(tmp, tmp, tmp2); + } else { + gen_add_carry(tmp, tmp, tmp2); + } + store_reg_bx(s, rd, tmp); + break; + case 0x06: + if (set_cc) { + gen_sbc_CC(tmp, tmp, tmp2); + } else { + gen_sub_carry(tmp, tmp, tmp2); + } + store_reg_bx(s, rd, tmp); + break; + case 0x07: + if (set_cc) { + gen_sbc_CC(tmp, tmp2, tmp); + } else { + gen_sub_carry(tmp, tmp2, tmp); + } + store_reg_bx(s, rd, tmp); + break; + case 0x08: + if (set_cc) { + tcg_gen_and_i32(tmp, tmp, tmp2); + gen_logic_CC(tmp); + } + tcg_temp_free_i32(tmp); + break; + case 0x09: + if (set_cc) { + tcg_gen_xor_i32(tmp, tmp, tmp2); + gen_logic_CC(tmp); + } + tcg_temp_free_i32(tmp); + break; + case 0x0a: + if (set_cc) { + gen_sub_CC(tmp, tmp, tmp2); + } + tcg_temp_free_i32(tmp); + break; + case 0x0b: + if (set_cc) { + gen_add_CC(tmp, tmp, tmp2); + } + tcg_temp_free_i32(tmp); + break; + case 0x0c: + tcg_gen_or_i32(tmp, tmp, tmp2); + if (logic_cc) { + gen_logic_CC(tmp); + } + store_reg_bx(s, rd, tmp); + break; + case 0x0d: + if (logic_cc && rd == 15) { + /* MOVS r15, ... is used for exception return. */ + if (IS_USER(s)) { + goto illegal_op; + } + gen_exception_return(s, tmp2); + } else { + if (logic_cc) { + gen_logic_CC(tmp2); + } + store_reg_bx(s, rd, tmp2); + } + break; + case 0x0e: + tcg_gen_andc_i32(tmp, tmp, tmp2); + if (logic_cc) { + gen_logic_CC(tmp); + } + store_reg_bx(s, rd, tmp); + break; + default: + case 0x0f: + tcg_gen_not_i32(tmp2, tmp2); + if (logic_cc) { + gen_logic_CC(tmp2); + } + store_reg_bx(s, rd, tmp2); + break; + } + if (op1 != 0x0f && op1 != 0x0d) { + tcg_temp_free_i32(tmp2); + } + } else { + /* other instructions */ + op1 = (insn >> 24) & 0xf; + switch(op1) { + case 0x0: + case 0x1: + /* multiplies, extra load/stores */ + sh = (insn >> 5) & 3; + if (sh == 0) { + if (op1 == 0x0) { + rd = (insn >> 16) & 0xf; + rn = (insn >> 12) & 0xf; + rs = (insn >> 8) & 0xf; + rm = (insn) & 0xf; + op1 = (insn >> 20) & 0xf; + switch (op1) { + case 0: case 1: case 2: case 3: case 6: + /* 32 bit mul */ + tmp = load_reg(s, rs); + tmp2 = load_reg(s, rm); + tcg_gen_mul_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + if (insn & (1 << 22)) { + /* Subtract (mls) */ + ARCH(6T2); + tmp2 = load_reg(s, rn); + tcg_gen_sub_i32(tmp, tmp2, tmp); + tcg_temp_free_i32(tmp2); + } else if (insn & (1 << 21)) { + /* Add */ + tmp2 = load_reg(s, rn); + tcg_gen_add_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + if (insn & (1 << 20)) + gen_logic_CC(tmp); + store_reg(s, rd, tmp); + break; + case 4: + /* 64 bit mul double accumulate (UMAAL) */ + ARCH(6); + tmp = load_reg(s, rs); + tmp2 = load_reg(s, rm); + tmp64 = gen_mulu_i64_i32(tmp, tmp2); + gen_addq_lo(s, tmp64, rn); + gen_addq_lo(s, tmp64, rd); + gen_storeq_reg(s, rn, rd, tmp64); + tcg_temp_free_i64(tmp64); + break; + case 8: case 9: case 10: case 11: + case 12: case 13: case 14: case 15: + /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */ + tmp = load_reg(s, rs); + tmp2 = load_reg(s, rm); + if (insn & (1 << 22)) { + tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2); + } else { + tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2); + } + if (insn & (1 << 21)) { /* mult accumulate */ + TCGv_i32 al = load_reg(s, rn); + TCGv_i32 ah = load_reg(s, rd); + tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah); + tcg_temp_free_i32(al); + tcg_temp_free_i32(ah); + } + if (insn & (1 << 20)) { + gen_logicq_cc(tmp, tmp2); + } + store_reg(s, rn, tmp); + store_reg(s, rd, tmp2); + break; + default: + goto illegal_op; + } + } else { + rn = (insn >> 16) & 0xf; + rd = (insn >> 12) & 0xf; + if (insn & (1 << 23)) { + /* load/store exclusive */ + int op2 = (insn >> 8) & 3; + op1 = (insn >> 21) & 0x3; + + switch (op2) { + case 0: /* lda/stl */ + if (op1 == 1) { + goto illegal_op; + } + ARCH(8); + break; + case 1: /* reserved */ + goto illegal_op; + case 2: /* ldaex/stlex */ + ARCH(8); + break; + case 3: /* ldrex/strex */ + if (op1) { + ARCH(6K); + } else { + ARCH(6); + } + break; + } + + addr = tcg_temp_local_new_i32(); + load_reg_var(s, addr, rn); + + /* Since the emulation does not have barriers, + the acquire/release semantics need no special + handling */ + if (op2 == 0) { + if (insn & (1 << 20)) { + tmp = tcg_temp_new_i32(); + switch (op1) { + case 0: /* lda */ + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + break; + case 2: /* ldab */ + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + break; + case 3: /* ldah */ + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + break; + default: + abort(); + } + store_reg(s, rd, tmp); + } else { + rm = insn & 0xf; + tmp = load_reg(s, rm); + switch (op1) { + case 0: /* stl */ + gen_aa32_st32(tmp, addr, get_mem_index(s)); + break; + case 2: /* stlb */ + gen_aa32_st8(tmp, addr, get_mem_index(s)); + break; + case 3: /* stlh */ + gen_aa32_st16(tmp, addr, get_mem_index(s)); + break; + default: + abort(); + } + tcg_temp_free_i32(tmp); + } + } else if (insn & (1 << 20)) { + switch (op1) { + case 0: /* ldrex */ + gen_load_exclusive(s, rd, 15, addr, 2); + break; + case 1: /* ldrexd */ + gen_load_exclusive(s, rd, rd + 1, addr, 3); + break; + case 2: /* ldrexb */ + gen_load_exclusive(s, rd, 15, addr, 0); + break; + case 3: /* ldrexh */ + gen_load_exclusive(s, rd, 15, addr, 1); + break; + default: + abort(); + } + } else { + rm = insn & 0xf; + switch (op1) { + case 0: /* strex */ + gen_store_exclusive(s, rd, rm, 15, addr, 2); + break; + case 1: /* strexd */ + gen_store_exclusive(s, rd, rm, rm + 1, addr, 3); + break; + case 2: /* strexb */ + gen_store_exclusive(s, rd, rm, 15, addr, 0); + break; + case 3: /* strexh */ + gen_store_exclusive(s, rd, rm, 15, addr, 1); + break; + default: + abort(); + } + } + tcg_temp_free_i32(addr); + } else { + /* SWP instruction */ + rm = (insn) & 0xf; + + /* ??? This is not really atomic. However we know + we never have multiple CPUs running in parallel, + so it is good enough. */ + addr = load_reg(s, rn); + tmp = load_reg(s, rm); + tmp2 = tcg_temp_new_i32(); + if (insn & (1 << 22)) { + gen_aa32_ld8u(tmp2, addr, get_mem_index(s)); + gen_aa32_st8(tmp, addr, get_mem_index(s)); + } else { + gen_aa32_ld32u(tmp2, addr, get_mem_index(s)); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(addr); + store_reg(s, rd, tmp2); + } + } + } else { + int address_offset; + bool load = insn & (1 << 20); + bool doubleword = false; + /* Misc load/store */ + rn = (insn >> 16) & 0xf; + rd = (insn >> 12) & 0xf; + + if (!load && (sh & 2)) { + /* doubleword */ + ARCH(5TE); + if (rd & 1) { + /* UNPREDICTABLE; we choose to UNDEF */ + goto illegal_op; + } + load = (sh & 1) == 0; + doubleword = true; + } + + addr = load_reg(s, rn); + if (insn & (1 << 24)) + gen_add_datah_offset(s, insn, 0, addr); + address_offset = 0; + + if (doubleword) { + if (!load) { + /* store */ + tmp = load_reg(s, rd); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + tcg_gen_addi_i32(addr, addr, 4); + tmp = load_reg(s, rd + 1); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } else { + /* load */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + store_reg(s, rd, tmp); + tcg_gen_addi_i32(addr, addr, 4); + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + rd++; + } + address_offset = -4; + } else if (load) { + /* load */ + tmp = tcg_temp_new_i32(); + switch (sh) { + case 1: + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + break; + case 2: + gen_aa32_ld8s(tmp, addr, get_mem_index(s)); + break; + default: + case 3: + gen_aa32_ld16s(tmp, addr, get_mem_index(s)); + break; + } + } else { + /* store */ + tmp = load_reg(s, rd); + gen_aa32_st16(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + /* Perform base writeback before the loaded value to + ensure correct behavior with overlapping index registers. + ldrd with base writeback is is undefined if the + destination and index registers overlap. */ + if (!(insn & (1 << 24))) { + gen_add_datah_offset(s, insn, address_offset, addr); + store_reg(s, rn, addr); + } else if (insn & (1 << 21)) { + if (address_offset) + tcg_gen_addi_i32(addr, addr, address_offset); + store_reg(s, rn, addr); + } else { + tcg_temp_free_i32(addr); + } + if (load) { + /* Complete the load. */ + store_reg(s, rd, tmp); + } + } + break; + case 0x4: + case 0x5: + goto do_ldst; + case 0x6: + case 0x7: + if (insn & (1 << 4)) { + ARCH(6); + /* Armv6 Media instructions. */ + rm = insn & 0xf; + rn = (insn >> 16) & 0xf; + rd = (insn >> 12) & 0xf; + rs = (insn >> 8) & 0xf; + switch ((insn >> 23) & 3) { + case 0: /* Parallel add/subtract. */ + op1 = (insn >> 20) & 7; + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + sh = (insn >> 5) & 7; + if ((op1 & 3) == 0 || sh == 5 || sh == 6) + goto illegal_op; + gen_arm_parallel_addsub(op1, sh, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + break; + case 1: + if ((insn & 0x00700020) == 0) { + /* Halfword pack. */ + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + shift = (insn >> 7) & 0x1f; + if (insn & (1 << 6)) { + /* pkhtb */ + if (shift == 0) + shift = 31; + tcg_gen_sari_i32(tmp2, tmp2, shift); + tcg_gen_andi_i32(tmp, tmp, 0xffff0000); + tcg_gen_ext16u_i32(tmp2, tmp2); + } else { + /* pkhbt */ + if (shift) + tcg_gen_shli_i32(tmp2, tmp2, shift); + tcg_gen_ext16u_i32(tmp, tmp); + tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); + } + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + } else if ((insn & 0x00200020) == 0x00200000) { + /* [us]sat */ + tmp = load_reg(s, rm); + shift = (insn >> 7) & 0x1f; + if (insn & (1 << 6)) { + if (shift == 0) + shift = 31; + tcg_gen_sari_i32(tmp, tmp, shift); + } else { + tcg_gen_shli_i32(tmp, tmp, shift); + } + sh = (insn >> 16) & 0x1f; + tmp2 = tcg_const_i32(sh); + if (insn & (1 << 22)) + gen_helper_usat(tmp, cpu_env, tmp, tmp2); + else + gen_helper_ssat(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + } else if ((insn & 0x00300fe0) == 0x00200f20) { + /* [us]sat16 */ + tmp = load_reg(s, rm); + sh = (insn >> 16) & 0x1f; + tmp2 = tcg_const_i32(sh); + if (insn & (1 << 22)) + gen_helper_usat16(tmp, cpu_env, tmp, tmp2); + else + gen_helper_ssat16(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + } else if ((insn & 0x00700fe0) == 0x00000fa0) { + /* Select bytes. */ + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + tmp3 = tcg_temp_new_i32(); + tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE)); + gen_helper_sel_flags(tmp, tmp3, tmp, tmp2); + tcg_temp_free_i32(tmp3); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + } else if ((insn & 0x000003e0) == 0x00000060) { + tmp = load_reg(s, rm); + shift = (insn >> 10) & 3; + /* ??? In many cases it's not necessary to do a + rotate, a shift is sufficient. */ + if (shift != 0) + tcg_gen_rotri_i32(tmp, tmp, shift * 8); + op1 = (insn >> 20) & 7; + switch (op1) { + case 0: gen_sxtb16(tmp); break; + case 2: gen_sxtb(tmp); break; + case 3: gen_sxth(tmp); break; + case 4: gen_uxtb16(tmp); break; + case 6: gen_uxtb(tmp); break; + case 7: gen_uxth(tmp); break; + default: goto illegal_op; + } + if (rn != 15) { + tmp2 = load_reg(s, rn); + if ((op1 & 3) == 0) { + gen_add16(tmp, tmp2); + } else { + tcg_gen_add_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + } + store_reg(s, rd, tmp); + } else if ((insn & 0x003f0f60) == 0x003f0f20) { + /* rev */ + tmp = load_reg(s, rm); + if (insn & (1 << 22)) { + if (insn & (1 << 7)) { + gen_revsh(tmp); + } else { + ARCH(6T2); + gen_helper_rbit(tmp, tmp); + } + } else { + if (insn & (1 << 7)) + gen_rev16(tmp); + else + tcg_gen_bswap32_i32(tmp, tmp); + } + store_reg(s, rd, tmp); + } else { + goto illegal_op; + } + break; + case 2: /* Multiplies (Type 3). */ + switch ((insn >> 20) & 0x7) { + case 5: + if (((insn >> 6) ^ (insn >> 7)) & 1) { + /* op2 not 00x or 11x : UNDEF */ + goto illegal_op; + } + /* Signed multiply most significant [accumulate]. + (SMMUL, SMMLA, SMMLS) */ + tmp = load_reg(s, rm); + tmp2 = load_reg(s, rs); + tmp64 = gen_muls_i64_i32(tmp, tmp2); + + if (rd != 15) { + tmp = load_reg(s, rd); + if (insn & (1 << 6)) { + tmp64 = gen_subq_msw(tmp64, tmp); + } else { + tmp64 = gen_addq_msw(tmp64, tmp); + } + } + if (insn & (1 << 5)) { + tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u); + } + tcg_gen_shri_i64(tmp64, tmp64, 32); + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_temp_free_i64(tmp64); + store_reg(s, rn, tmp); + break; + case 0: + case 4: + /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */ + if (insn & (1 << 7)) { + goto illegal_op; + } + tmp = load_reg(s, rm); + tmp2 = load_reg(s, rs); + if (insn & (1 << 5)) + gen_swap_half(tmp2); + gen_smul_dual(tmp, tmp2); + if (insn & (1 << 22)) { + /* smlald, smlsld */ + TCGv_i64 tmp64_2; + + tmp64 = tcg_temp_new_i64(); + tmp64_2 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(tmp64, tmp); + tcg_gen_ext_i32_i64(tmp64_2, tmp2); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + if (insn & (1 << 6)) { + tcg_gen_sub_i64(tmp64, tmp64, tmp64_2); + } else { + tcg_gen_add_i64(tmp64, tmp64, tmp64_2); + } + tcg_temp_free_i64(tmp64_2); + gen_addq(s, tmp64, rd, rn); + gen_storeq_reg(s, rd, rn, tmp64); + tcg_temp_free_i64(tmp64); + } else { + /* smuad, smusd, smlad, smlsd */ + if (insn & (1 << 6)) { + /* This subtraction cannot overflow. */ + tcg_gen_sub_i32(tmp, tmp, tmp2); + } else { + /* This addition cannot overflow 32 bits; + * however it may overflow considered as a + * signed operation, in which case we must set + * the Q flag. + */ + gen_helper_add_setq(tmp, cpu_env, tmp, tmp2); + } + tcg_temp_free_i32(tmp2); + if (rd != 15) + { + tmp2 = load_reg(s, rd); + gen_helper_add_setq(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + store_reg(s, rn, tmp); + } + break; + case 1: + case 3: + /* SDIV, UDIV */ + if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) { + goto illegal_op; + } + if (((insn >> 5) & 7) || (rd != 15)) { + goto illegal_op; + } + tmp = load_reg(s, rm); + tmp2 = load_reg(s, rs); + if (insn & (1 << 21)) { + gen_helper_udiv(tmp, tmp, tmp2); + } else { + gen_helper_sdiv(tmp, tmp, tmp2); + } + tcg_temp_free_i32(tmp2); + store_reg(s, rn, tmp); + break; + default: + goto illegal_op; + } + break; + case 3: + op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7); + switch (op1) { + case 0: /* Unsigned sum of absolute differences. */ + ARCH(6); + tmp = load_reg(s, rm); + tmp2 = load_reg(s, rs); + gen_helper_usad8(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + if (rd != 15) { + tmp2 = load_reg(s, rd); + tcg_gen_add_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + store_reg(s, rn, tmp); + break; + case 0x20: case 0x24: case 0x28: case 0x2c: + /* Bitfield insert/clear. */ + ARCH(6T2); + shift = (insn >> 7) & 0x1f; + i = (insn >> 16) & 0x1f; + if (i < shift) { + /* UNPREDICTABLE; we choose to UNDEF */ + goto illegal_op; + } + i = i + 1 - shift; + if (rm == 15) { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } else { + tmp = load_reg(s, rm); + } + if (i != 32) { + tmp2 = load_reg(s, rd); + tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i); + tcg_temp_free_i32(tmp2); + } + store_reg(s, rd, tmp); + break; + case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */ + case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */ + ARCH(6T2); + tmp = load_reg(s, rm); + shift = (insn >> 7) & 0x1f; + i = ((insn >> 16) & 0x1f) + 1; + if (shift + i > 32) + goto illegal_op; + if (i < 32) { + if (op1 & 0x20) { + gen_ubfx(tmp, shift, (1u << i) - 1); + } else { + gen_sbfx(tmp, shift, i); + } + } + store_reg(s, rd, tmp); + break; + default: + goto illegal_op; + } + break; + } + break; + } + do_ldst: + /* Check for undefined extension instructions + * per the ARM Bible IE: + * xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx + */ + sh = (0xf << 20) | (0xf << 4); + if (op1 == 0x7 && ((insn & sh) == sh)) + { + goto illegal_op; + } + /* load/store byte/word */ + rn = (insn >> 16) & 0xf; + rd = (insn >> 12) & 0xf; + tmp2 = load_reg(s, rn); + if ((insn & 0x01200000) == 0x00200000) { + /* ldrt/strt */ + i = get_a32_user_mem_index(s); + } else { + i = get_mem_index(s); + } + if (insn & (1 << 24)) + gen_add_data_offset(s, insn, tmp2); + if (insn & (1 << 20)) { + /* load */ + tmp = tcg_temp_new_i32(); + if (insn & (1 << 22)) { + gen_aa32_ld8u(tmp, tmp2, i); + } else { + gen_aa32_ld32u(tmp, tmp2, i); + } + } else { + /* store */ + tmp = load_reg(s, rd); + if (insn & (1 << 22)) { + gen_aa32_st8(tmp, tmp2, i); + } else { + gen_aa32_st32(tmp, tmp2, i); + } + tcg_temp_free_i32(tmp); + } + if (!(insn & (1 << 24))) { + gen_add_data_offset(s, insn, tmp2); + store_reg(s, rn, tmp2); + } else if (insn & (1 << 21)) { + store_reg(s, rn, tmp2); + } else { + tcg_temp_free_i32(tmp2); + } + if (insn & (1 << 20)) { + /* Complete the load. */ + store_reg_from_load(s, rd, tmp); + } + break; + case 0x08: + case 0x09: + { + int j, n, loaded_base; + bool exc_return = false; + bool is_load = extract32(insn, 20, 1); + bool user = false; + TCGv_i32 loaded_var; + /* load/store multiple words */ + /* XXX: store correct base if write back */ + if (insn & (1 << 22)) { + /* LDM (user), LDM (exception return) and STM (user) */ + if (IS_USER(s)) + goto illegal_op; /* only usable in supervisor mode */ + + if (is_load && extract32(insn, 15, 1)) { + exc_return = true; + } else { + user = true; + } + } + rn = (insn >> 16) & 0xf; + addr = load_reg(s, rn); + + /* compute total size */ + loaded_base = 0; + TCGV_UNUSED_I32(loaded_var); + n = 0; + for(i=0;i<16;i++) { + if (insn & (1 << i)) + n++; + } + /* XXX: test invalid n == 0 case ? */ + if (insn & (1 << 23)) { + if (insn & (1 << 24)) { + /* pre increment */ + tcg_gen_addi_i32(addr, addr, 4); + } else { + /* post increment */ + } + } else { + if (insn & (1 << 24)) { + /* pre decrement */ + tcg_gen_addi_i32(addr, addr, -(n * 4)); + } else { + /* post decrement */ + if (n != 1) + tcg_gen_addi_i32(addr, addr, -((n - 1) * 4)); + } + } + j = 0; + for(i=0;i<16;i++) { + if (insn & (1 << i)) { + if (is_load) { + /* load */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + if (user) { + tmp2 = tcg_const_i32(i); + gen_helper_set_user_reg(cpu_env, tmp2, tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + } else if (i == rn) { + loaded_var = tmp; + loaded_base = 1; + } else { + store_reg_from_load(s, i, tmp); + } + } else { + /* store */ + if (i == 15) { + /* special case: r15 = PC + 8 */ + val = (long)s->pc + 4; + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, val); + } else if (user) { + tmp = tcg_temp_new_i32(); + tmp2 = tcg_const_i32(i); + gen_helper_get_user_reg(tmp, cpu_env, tmp2); + tcg_temp_free_i32(tmp2); + } else { + tmp = load_reg(s, i); + } + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + j++; + /* no need to add after the last transfer */ + if (j != n) + tcg_gen_addi_i32(addr, addr, 4); + } + } + if (insn & (1 << 21)) { + /* write back */ + if (insn & (1 << 23)) { + if (insn & (1 << 24)) { + /* pre increment */ + } else { + /* post increment */ + tcg_gen_addi_i32(addr, addr, 4); + } + } else { + if (insn & (1 << 24)) { + /* pre decrement */ + if (n != 1) + tcg_gen_addi_i32(addr, addr, -((n - 1) * 4)); + } else { + /* post decrement */ + tcg_gen_addi_i32(addr, addr, -(n * 4)); + } + } + store_reg(s, rn, addr); + } else { + tcg_temp_free_i32(addr); + } + if (loaded_base) { + store_reg(s, rn, loaded_var); + } + if (exc_return) { + /* Restore CPSR from SPSR. */ + tmp = load_cpu_field(spsr); + gen_set_cpsr(tmp, CPSR_ERET_MASK); + tcg_temp_free_i32(tmp); + s->is_jmp = DISAS_UPDATE; + } + } + break; + case 0xa: + case 0xb: + { + int32_t offset; + + /* branch (and link) */ + val = (int32_t)s->pc; + if (insn & (1 << 24)) { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, val); + store_reg(s, 14, tmp); + } + offset = sextract32(insn << 2, 0, 26); + val += offset + 4; + gen_jmp(s, val); + } + break; + case 0xc: + case 0xd: + case 0xe: + if (((insn >> 8) & 0xe) == 10) { + /* VFP. */ + if (disas_vfp_insn(s, insn)) { + goto illegal_op; + } + } else if (disas_coproc_insn(s, insn)) { + /* Coprocessor. */ + goto illegal_op; + } + break; + case 0xf: + /* swi */ + gen_set_pc_im(s, s->pc); + s->svc_imm = extract32(insn, 0, 24); + s->is_jmp = DISAS_SWI; + break; + default: + illegal_op: + gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), + default_exception_el(s)); + break; + } + } +} + +/* Return true if this is a Thumb-2 logical op. */ +static int +thumb2_logic_op(int op) +{ + return (op < 8); +} + +/* Generate code for a Thumb-2 data processing operation. If CONDS is nonzero + then set condition code flags based on the result of the operation. + If SHIFTER_OUT is nonzero then set the carry flag for logical operations + to the high bit of T1. + Returns zero if the opcode is valid. */ + +static int +gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out, + TCGv_i32 t0, TCGv_i32 t1) +{ + int logic_cc; + + logic_cc = 0; + switch (op) { + case 0: /* and */ + tcg_gen_and_i32(t0, t0, t1); + logic_cc = conds; + break; + case 1: /* bic */ + tcg_gen_andc_i32(t0, t0, t1); + logic_cc = conds; + break; + case 2: /* orr */ + tcg_gen_or_i32(t0, t0, t1); + logic_cc = conds; + break; + case 3: /* orn */ + tcg_gen_orc_i32(t0, t0, t1); + logic_cc = conds; + break; + case 4: /* eor */ + tcg_gen_xor_i32(t0, t0, t1); + logic_cc = conds; + break; + case 8: /* add */ + if (conds) + gen_add_CC(t0, t0, t1); + else + tcg_gen_add_i32(t0, t0, t1); + break; + case 10: /* adc */ + if (conds) + gen_adc_CC(t0, t0, t1); + else + gen_adc(t0, t1); + break; + case 11: /* sbc */ + if (conds) { + gen_sbc_CC(t0, t0, t1); + } else { + gen_sub_carry(t0, t0, t1); + } + break; + case 13: /* sub */ + if (conds) + gen_sub_CC(t0, t0, t1); + else + tcg_gen_sub_i32(t0, t0, t1); + break; + case 14: /* rsb */ + if (conds) + gen_sub_CC(t0, t1, t0); + else + tcg_gen_sub_i32(t0, t1, t0); + break; + default: /* 5, 6, 7, 9, 12, 15. */ + return 1; + } + if (logic_cc) { + gen_logic_CC(t0); + if (shifter_out) + gen_set_CF_bit31(t1); + } + return 0; +} + +/* Translate a 32-bit thumb instruction. Returns nonzero if the instruction + is not legal. */ +static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw1) +{ + uint32_t insn, imm, shift, offset; + uint32_t rd, rn, rm, rs; + TCGv_i32 tmp; + TCGv_i32 tmp2; + TCGv_i32 tmp3; + TCGv_i32 addr; + TCGv_i64 tmp64; + int op; + int shiftop; + int conds; + int logic_cc; + + if (!(arm_dc_feature(s, ARM_FEATURE_THUMB2) + || arm_dc_feature(s, ARM_FEATURE_M))) { + /* Thumb-1 cores may need to treat bl and blx as a pair of + 16-bit instructions to get correct prefetch abort behavior. */ + insn = insn_hw1; + if ((insn & (1 << 12)) == 0) { + ARCH(5); + /* Second half of blx. */ + offset = ((insn & 0x7ff) << 1); + tmp = load_reg(s, 14); + tcg_gen_addi_i32(tmp, tmp, offset); + tcg_gen_andi_i32(tmp, tmp, 0xfffffffc); + + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, s->pc | 1); + store_reg(s, 14, tmp2); + gen_bx(s, tmp); + return 0; + } + if (insn & (1 << 11)) { + /* Second half of bl. */ + offset = ((insn & 0x7ff) << 1) | 1; + tmp = load_reg(s, 14); + tcg_gen_addi_i32(tmp, tmp, offset); + + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, s->pc | 1); + store_reg(s, 14, tmp2); + gen_bx(s, tmp); + return 0; + } + if ((s->pc & ~TARGET_PAGE_MASK) == 0) { + /* Instruction spans a page boundary. Implement it as two + 16-bit instructions in case the second half causes an + prefetch abort. */ + offset = ((int32_t)insn << 21) >> 9; + tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + offset); + return 0; + } + /* Fall through to 32-bit decode. */ + } + + insn = arm_lduw_code(env, s->pc, s->bswap_code); + s->pc += 2; + insn |= (uint32_t)insn_hw1 << 16; + + if ((insn & 0xf800e800) != 0xf000e800) { + ARCH(6T2); + } + + rn = (insn >> 16) & 0xf; + rs = (insn >> 12) & 0xf; + rd = (insn >> 8) & 0xf; + rm = insn & 0xf; + switch ((insn >> 25) & 0xf) { + case 0: case 1: case 2: case 3: + /* 16-bit instructions. Should never happen. */ + abort(); + case 4: + if (insn & (1 << 22)) { + /* Other load/store, table branch. */ + if (insn & 0x01200000) { + /* Load/store doubleword. */ + if (rn == 15) { + addr = tcg_temp_new_i32(); + tcg_gen_movi_i32(addr, s->pc & ~3); + } else { + addr = load_reg(s, rn); + } + offset = (insn & 0xff) * 4; + if ((insn & (1 << 23)) == 0) + offset = -offset; + if (insn & (1 << 24)) { + tcg_gen_addi_i32(addr, addr, offset); + offset = 0; + } + if (insn & (1 << 20)) { + /* ldrd */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + store_reg(s, rs, tmp); + tcg_gen_addi_i32(addr, addr, 4); + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + store_reg(s, rd, tmp); + } else { + /* strd */ + tmp = load_reg(s, rs); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + tcg_gen_addi_i32(addr, addr, 4); + tmp = load_reg(s, rd); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + if (insn & (1 << 21)) { + /* Base writeback. */ + if (rn == 15) + goto illegal_op; + tcg_gen_addi_i32(addr, addr, offset - 4); + store_reg(s, rn, addr); + } else { + tcg_temp_free_i32(addr); + } + } else if ((insn & (1 << 23)) == 0) { + /* Load/store exclusive word. */ + addr = tcg_temp_local_new_i32(); + load_reg_var(s, addr, rn); + tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2); + if (insn & (1 << 20)) { + gen_load_exclusive(s, rs, 15, addr, 2); + } else { + gen_store_exclusive(s, rd, rs, 15, addr, 2); + } + tcg_temp_free_i32(addr); + } else if ((insn & (7 << 5)) == 0) { + /* Table Branch. */ + if (rn == 15) { + addr = tcg_temp_new_i32(); + tcg_gen_movi_i32(addr, s->pc); + } else { + addr = load_reg(s, rn); + } + tmp = load_reg(s, rm); + tcg_gen_add_i32(addr, addr, tmp); + if (insn & (1 << 4)) { + /* tbh */ + tcg_gen_add_i32(addr, addr, tmp); + tcg_temp_free_i32(tmp); + tmp = tcg_temp_new_i32(); + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + } else { /* tbb */ + tcg_temp_free_i32(tmp); + tmp = tcg_temp_new_i32(); + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + } + tcg_temp_free_i32(addr); + tcg_gen_shli_i32(tmp, tmp, 1); + tcg_gen_addi_i32(tmp, tmp, s->pc); + store_reg(s, 15, tmp); + } else { + int op2 = (insn >> 6) & 0x3; + op = (insn >> 4) & 0x3; + switch (op2) { + case 0: + goto illegal_op; + case 1: + /* Load/store exclusive byte/halfword/doubleword */ + if (op == 2) { + goto illegal_op; + } + ARCH(7); + break; + case 2: + /* Load-acquire/store-release */ + if (op == 3) { + goto illegal_op; + } + /* Fall through */ + case 3: + /* Load-acquire/store-release exclusive */ + ARCH(8); + break; + } + addr = tcg_temp_local_new_i32(); + load_reg_var(s, addr, rn); + if (!(op2 & 1)) { + if (insn & (1 << 20)) { + tmp = tcg_temp_new_i32(); + switch (op) { + case 0: /* ldab */ + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + break; + case 1: /* ldah */ + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + break; + case 2: /* lda */ + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + break; + default: + abort(); + } + store_reg(s, rs, tmp); + } else { + tmp = load_reg(s, rs); + switch (op) { + case 0: /* stlb */ + gen_aa32_st8(tmp, addr, get_mem_index(s)); + break; + case 1: /* stlh */ + gen_aa32_st16(tmp, addr, get_mem_index(s)); + break; + case 2: /* stl */ + gen_aa32_st32(tmp, addr, get_mem_index(s)); + break; + default: + abort(); + } + tcg_temp_free_i32(tmp); + } + } else if (insn & (1 << 20)) { + gen_load_exclusive(s, rs, rd, addr, op); + } else { + gen_store_exclusive(s, rm, rs, rd, addr, op); + } + tcg_temp_free_i32(addr); + } + } else { + /* Load/store multiple, RFE, SRS. */ + if (((insn >> 23) & 1) == ((insn >> 24) & 1)) { + /* RFE, SRS: not available in user mode or on M profile */ + if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) { + goto illegal_op; + } + if (insn & (1 << 20)) { + /* rfe */ + addr = load_reg(s, rn); + if ((insn & (1 << 24)) == 0) + tcg_gen_addi_i32(addr, addr, -8); + /* Load PC into tmp and CPSR into tmp2. */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + tcg_gen_addi_i32(addr, addr, 4); + tmp2 = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp2, addr, get_mem_index(s)); + if (insn & (1 << 21)) { + /* Base writeback. */ + if (insn & (1 << 24)) { + tcg_gen_addi_i32(addr, addr, 4); + } else { + tcg_gen_addi_i32(addr, addr, -4); + } + store_reg(s, rn, addr); + } else { + tcg_temp_free_i32(addr); + } + gen_rfe(s, tmp, tmp2); + } else { + /* srs */ + gen_srs(s, (insn & 0x1f), (insn & (1 << 24)) ? 1 : 2, + insn & (1 << 21)); + } + } else { + int i, loaded_base = 0; + TCGv_i32 loaded_var; + /* Load/store multiple. */ + addr = load_reg(s, rn); + offset = 0; + for (i = 0; i < 16; i++) { + if (insn & (1 << i)) + offset += 4; + } + if (insn & (1 << 24)) { + tcg_gen_addi_i32(addr, addr, -offset); + } + + TCGV_UNUSED_I32(loaded_var); + for (i = 0; i < 16; i++) { + if ((insn & (1 << i)) == 0) + continue; + if (insn & (1 << 20)) { + /* Load. */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + if (i == 15) { + gen_bx(s, tmp); + } else if (i == rn) { + loaded_var = tmp; + loaded_base = 1; + } else { + store_reg(s, i, tmp); + } + } else { + /* Store. */ + tmp = load_reg(s, i); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + tcg_gen_addi_i32(addr, addr, 4); + } + if (loaded_base) { + store_reg(s, rn, loaded_var); + } + if (insn & (1 << 21)) { + /* Base register writeback. */ + if (insn & (1 << 24)) { + tcg_gen_addi_i32(addr, addr, -offset); + } + /* Fault if writeback register is in register list. */ + if (insn & (1 << rn)) + goto illegal_op; + store_reg(s, rn, addr); + } else { + tcg_temp_free_i32(addr); + } + } + } + break; + case 5: + + op = (insn >> 21) & 0xf; + if (op == 6) { + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + goto illegal_op; + } + /* Halfword pack. */ + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3); + if (insn & (1 << 5)) { + /* pkhtb */ + if (shift == 0) + shift = 31; + tcg_gen_sari_i32(tmp2, tmp2, shift); + tcg_gen_andi_i32(tmp, tmp, 0xffff0000); + tcg_gen_ext16u_i32(tmp2, tmp2); + } else { + /* pkhbt */ + if (shift) + tcg_gen_shli_i32(tmp2, tmp2, shift); + tcg_gen_ext16u_i32(tmp, tmp); + tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); + } + tcg_gen_or_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + } else { + /* Data processing register constant shift. */ + if (rn == 15) { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } else { + tmp = load_reg(s, rn); + } + tmp2 = load_reg(s, rm); + + shiftop = (insn >> 4) & 3; + shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c); + conds = (insn & (1 << 20)) != 0; + logic_cc = (conds && thumb2_logic_op(op)); + gen_arm_shift_im(tmp2, shiftop, shift, logic_cc); + if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2)) + goto illegal_op; + tcg_temp_free_i32(tmp2); + if (rd != 15) { + store_reg(s, rd, tmp); + } else { + tcg_temp_free_i32(tmp); + } + } + break; + case 13: /* Misc data processing. */ + op = ((insn >> 22) & 6) | ((insn >> 7) & 1); + if (op < 4 && (insn & 0xf000) != 0xf000) + goto illegal_op; + switch (op) { + case 0: /* Register controlled shift. */ + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + if ((insn & 0x70) != 0) + goto illegal_op; + op = (insn >> 21) & 3; + logic_cc = (insn & (1 << 20)) != 0; + gen_arm_shift_reg(tmp, op, tmp2, logic_cc); + if (logic_cc) + gen_logic_CC(tmp); + store_reg_bx(s, rd, tmp); + break; + case 1: /* Sign/zero extend. */ + op = (insn >> 20) & 7; + switch (op) { + case 0: /* SXTAH, SXTH */ + case 1: /* UXTAH, UXTH */ + case 4: /* SXTAB, SXTB */ + case 5: /* UXTAB, UXTB */ + break; + case 2: /* SXTAB16, SXTB16 */ + case 3: /* UXTAB16, UXTB16 */ + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + goto illegal_op; + } + break; + default: + goto illegal_op; + } + if (rn != 15) { + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + goto illegal_op; + } + } + tmp = load_reg(s, rm); + shift = (insn >> 4) & 3; + /* ??? In many cases it's not necessary to do a + rotate, a shift is sufficient. */ + if (shift != 0) + tcg_gen_rotri_i32(tmp, tmp, shift * 8); + op = (insn >> 20) & 7; + switch (op) { + case 0: gen_sxth(tmp); break; + case 1: gen_uxth(tmp); break; + case 2: gen_sxtb16(tmp); break; + case 3: gen_uxtb16(tmp); break; + case 4: gen_sxtb(tmp); break; + case 5: gen_uxtb(tmp); break; + default: + g_assert_not_reached(); + } + if (rn != 15) { + tmp2 = load_reg(s, rn); + if ((op >> 1) == 1) { + gen_add16(tmp, tmp2); + } else { + tcg_gen_add_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + } + store_reg(s, rd, tmp); + break; + case 2: /* SIMD add/subtract. */ + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + goto illegal_op; + } + op = (insn >> 20) & 7; + shift = (insn >> 4) & 7; + if ((op & 3) == 3 || (shift & 3) == 3) + goto illegal_op; + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + gen_thumb2_parallel_addsub(op, shift, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + break; + case 3: /* Other data processing. */ + op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7); + if (op < 4) { + /* Saturating add/subtract. */ + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + goto illegal_op; + } + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + if (op & 1) + gen_helper_double_saturate(tmp, cpu_env, tmp); + if (op & 2) + gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp); + else + gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } else { + switch (op) { + case 0x0a: /* rbit */ + case 0x08: /* rev */ + case 0x09: /* rev16 */ + case 0x0b: /* revsh */ + case 0x18: /* clz */ + break; + case 0x10: /* sel */ + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + goto illegal_op; + } + break; + case 0x20: /* crc32/crc32c */ + case 0x21: + case 0x22: + case 0x28: + case 0x29: + case 0x2a: + if (!arm_dc_feature(s, ARM_FEATURE_CRC)) { + goto illegal_op; + } + break; + default: + goto illegal_op; + } + tmp = load_reg(s, rn); + switch (op) { + case 0x0a: /* rbit */ + gen_helper_rbit(tmp, tmp); + break; + case 0x08: /* rev */ + tcg_gen_bswap32_i32(tmp, tmp); + break; + case 0x09: /* rev16 */ + gen_rev16(tmp); + break; + case 0x0b: /* revsh */ + gen_revsh(tmp); + break; + case 0x10: /* sel */ + tmp2 = load_reg(s, rm); + tmp3 = tcg_temp_new_i32(); + tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE)); + gen_helper_sel_flags(tmp, tmp3, tmp, tmp2); + tcg_temp_free_i32(tmp3); + tcg_temp_free_i32(tmp2); + break; + case 0x18: /* clz */ + gen_helper_clz(tmp, tmp); + break; + case 0x20: + case 0x21: + case 0x22: + case 0x28: + case 0x29: + case 0x2a: + { + /* crc32/crc32c */ + uint32_t sz = op & 0x3; + uint32_t c = op & 0x8; + + tmp2 = load_reg(s, rm); + if (sz == 0) { + tcg_gen_andi_i32(tmp2, tmp2, 0xff); + } else if (sz == 1) { + tcg_gen_andi_i32(tmp2, tmp2, 0xffff); + } + tmp3 = tcg_const_i32(1 << sz); + if (c) { + gen_helper_crc32c(tmp, tmp, tmp2, tmp3); + } else { + gen_helper_crc32(tmp, tmp, tmp2, tmp3); + } + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + break; + } + default: + g_assert_not_reached(); + } + } + store_reg(s, rd, tmp); + break; + case 4: case 5: /* 32-bit multiply. Sum of absolute differences. */ + switch ((insn >> 20) & 7) { + case 0: /* 32 x 32 -> 32 */ + case 7: /* Unsigned sum of absolute differences. */ + break; + case 1: /* 16 x 16 -> 32 */ + case 2: /* Dual multiply add. */ + case 3: /* 32 * 16 -> 32msb */ + case 4: /* Dual multiply subtract. */ + case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */ + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + goto illegal_op; + } + break; + } + op = (insn >> 4) & 0xf; + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + switch ((insn >> 20) & 7) { + case 0: /* 32 x 32 -> 32 */ + tcg_gen_mul_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + if (rs != 15) { + tmp2 = load_reg(s, rs); + if (op) + tcg_gen_sub_i32(tmp, tmp2, tmp); + else + tcg_gen_add_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + break; + case 1: /* 16 x 16 -> 32 */ + gen_mulxy(tmp, tmp2, op & 2, op & 1); + tcg_temp_free_i32(tmp2); + if (rs != 15) { + tmp2 = load_reg(s, rs); + gen_helper_add_setq(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + break; + case 2: /* Dual multiply add. */ + case 4: /* Dual multiply subtract. */ + if (op) + gen_swap_half(tmp2); + gen_smul_dual(tmp, tmp2); + if (insn & (1 << 22)) { + /* This subtraction cannot overflow. */ + tcg_gen_sub_i32(tmp, tmp, tmp2); + } else { + /* This addition cannot overflow 32 bits; + * however it may overflow considered as a signed + * operation, in which case we must set the Q flag. + */ + gen_helper_add_setq(tmp, cpu_env, tmp, tmp2); + } + tcg_temp_free_i32(tmp2); + if (rs != 15) + { + tmp2 = load_reg(s, rs); + gen_helper_add_setq(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + break; + case 3: /* 32 * 16 -> 32msb */ + if (op) + tcg_gen_sari_i32(tmp2, tmp2, 16); + else + gen_sxth(tmp2); + tmp64 = gen_muls_i64_i32(tmp, tmp2); + tcg_gen_shri_i64(tmp64, tmp64, 16); + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_temp_free_i64(tmp64); + if (rs != 15) + { + tmp2 = load_reg(s, rs); + gen_helper_add_setq(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + break; + case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */ + tmp64 = gen_muls_i64_i32(tmp, tmp2); + if (rs != 15) { + tmp = load_reg(s, rs); + if (insn & (1 << 20)) { + tmp64 = gen_addq_msw(tmp64, tmp); + } else { + tmp64 = gen_subq_msw(tmp64, tmp); + } + } + if (insn & (1 << 4)) { + tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u); + } + tcg_gen_shri_i64(tmp64, tmp64, 32); + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_temp_free_i64(tmp64); + break; + case 7: /* Unsigned sum of absolute differences. */ + gen_helper_usad8(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + if (rs != 15) { + tmp2 = load_reg(s, rs); + tcg_gen_add_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + } + break; + } + store_reg(s, rd, tmp); + break; + case 6: case 7: /* 64-bit multiply, Divide. */ + op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70); + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + if ((op & 0x50) == 0x10) { + /* sdiv, udiv */ + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) { + goto illegal_op; + } + if (op & 0x20) + gen_helper_udiv(tmp, tmp, tmp2); + else + gen_helper_sdiv(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + } else if ((op & 0xe) == 0xc) { + /* Dual multiply accumulate long. */ + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + goto illegal_op; + } + if (op & 1) + gen_swap_half(tmp2); + gen_smul_dual(tmp, tmp2); + if (op & 0x10) { + tcg_gen_sub_i32(tmp, tmp, tmp2); + } else { + tcg_gen_add_i32(tmp, tmp, tmp2); + } + tcg_temp_free_i32(tmp2); + /* BUGFIX */ + tmp64 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(tmp64, tmp); + tcg_temp_free_i32(tmp); + gen_addq(s, tmp64, rs, rd); + gen_storeq_reg(s, rs, rd, tmp64); + tcg_temp_free_i64(tmp64); + } else { + if (op & 0x20) { + /* Unsigned 64-bit multiply */ + tmp64 = gen_mulu_i64_i32(tmp, tmp2); + } else { + if (op & 8) { + /* smlalxy */ + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + goto illegal_op; + } + gen_mulxy(tmp, tmp2, op & 2, op & 1); + tcg_temp_free_i32(tmp2); + tmp64 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(tmp64, tmp); + tcg_temp_free_i32(tmp); + } else { + /* Signed 64-bit multiply */ + tmp64 = gen_muls_i64_i32(tmp, tmp2); + } + } + if (op & 4) { + /* umaal */ + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + tcg_temp_free_i64(tmp64); + goto illegal_op; + } + gen_addq_lo(s, tmp64, rs); + gen_addq_lo(s, tmp64, rd); + } else if (op & 0x40) { + /* 64-bit accumulate. */ + gen_addq(s, tmp64, rs, rd); + } + gen_storeq_reg(s, rs, rd, tmp64); + tcg_temp_free_i64(tmp64); + } + break; + } + break; + case 6: case 7: case 14: case 15: + /* Coprocessor. */ + if (((insn >> 24) & 3) == 3) { + /* Translate into the equivalent ARM encoding. */ + insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28); + if (disas_neon_data_insn(s, insn)) { + goto illegal_op; + } + } else if (((insn >> 8) & 0xe) == 10) { + if (disas_vfp_insn(s, insn)) { + goto illegal_op; + } + } else { + if (insn & (1 << 28)) + goto illegal_op; + if (disas_coproc_insn(s, insn)) { + goto illegal_op; + } + } + break; + case 8: case 9: case 10: case 11: + if (insn & (1 << 15)) { + /* Branches, misc control. */ + if (insn & 0x5000) { + /* Unconditional branch. */ + /* signextend(hw1[10:0]) -> offset[:12]. */ + offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff; + /* hw1[10:0] -> offset[11:1]. */ + offset |= (insn & 0x7ff) << 1; + /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22] + offset[24:22] already have the same value because of the + sign extension above. */ + offset ^= ((~insn) & (1 << 13)) << 10; + offset ^= ((~insn) & (1 << 11)) << 11; + + if (insn & (1 << 14)) { + /* Branch and link. */ + tcg_gen_movi_i32(cpu_R[14], s->pc | 1); + } + + offset += s->pc; + if (insn & (1 << 12)) { + /* b/bl */ + gen_jmp(s, offset); + } else { + /* blx */ + offset &= ~(uint32_t)2; + /* thumb2 bx, no need to check */ + gen_bx_im(s, offset); + } + } else if (((insn >> 23) & 7) == 7) { + /* Misc control */ + if (insn & (1 << 13)) + goto illegal_op; + + if (insn & (1 << 26)) { + if (!(insn & (1 << 20))) { + /* Hypervisor call (v7) */ + int imm16 = extract32(insn, 16, 4) << 12 + | extract32(insn, 0, 12); + ARCH(7); + if (IS_USER(s)) { + goto illegal_op; + } + gen_hvc(s, imm16); + } else { + /* Secure monitor call (v6+) */ + ARCH(6K); + if (IS_USER(s)) { + goto illegal_op; + } + gen_smc(s); + } + } else { + op = (insn >> 20) & 7; + switch (op) { + case 0: /* msr cpsr. */ + if (arm_dc_feature(s, ARM_FEATURE_M)) { + tmp = load_reg(s, rn); + addr = tcg_const_i32(insn & 0xff); + gen_helper_v7m_msr(cpu_env, addr, tmp); + tcg_temp_free_i32(addr); + tcg_temp_free_i32(tmp); + gen_lookup_tb(s); + break; + } + /* fall through */ + case 1: /* msr spsr. */ + if (arm_dc_feature(s, ARM_FEATURE_M)) { + goto illegal_op; + } + tmp = load_reg(s, rn); + if (gen_set_psr(s, + msr_mask(s, (insn >> 8) & 0xf, op == 1), + op == 1, tmp)) + goto illegal_op; + break; + case 2: /* cps, nop-hint. */ + if (((insn >> 8) & 7) == 0) { + gen_nop_hint(s, insn & 0xff); + } + /* Implemented as NOP in user mode. */ + if (IS_USER(s)) + break; + offset = 0; + imm = 0; + if (insn & (1 << 10)) { + if (insn & (1 << 7)) + offset |= CPSR_A; + if (insn & (1 << 6)) + offset |= CPSR_I; + if (insn & (1 << 5)) + offset |= CPSR_F; + if (insn & (1 << 9)) + imm = CPSR_A | CPSR_I | CPSR_F; + } + if (insn & (1 << 8)) { + offset |= 0x1f; + imm |= (insn & 0x1f); + } + if (offset) { + gen_set_psr_im(s, offset, 0, imm); + } + break; + case 3: /* Special control operations. */ + ARCH(7); + op = (insn >> 4) & 0xf; + switch (op) { + case 2: /* clrex */ + gen_clrex(s); + break; + case 4: /* dsb */ + case 5: /* dmb */ + case 6: /* isb */ + /* These execute as NOPs. */ + break; + default: + goto illegal_op; + } + break; + case 4: /* bxj */ + /* Trivial implementation equivalent to bx. */ + tmp = load_reg(s, rn); + gen_bx(s, tmp); + break; + case 5: /* Exception return. */ + if (IS_USER(s)) { + goto illegal_op; + } + if (rn != 14 || rd != 15) { + goto illegal_op; + } + tmp = load_reg(s, rn); + tcg_gen_subi_i32(tmp, tmp, insn & 0xff); + gen_exception_return(s, tmp); + break; + case 6: /* mrs cpsr. */ + tmp = tcg_temp_new_i32(); + if (arm_dc_feature(s, ARM_FEATURE_M)) { + addr = tcg_const_i32(insn & 0xff); + gen_helper_v7m_mrs(tmp, cpu_env, addr); + tcg_temp_free_i32(addr); + } else { + gen_helper_cpsr_read(tmp, cpu_env); + } + store_reg(s, rd, tmp); + break; + case 7: /* mrs spsr. */ + /* Not accessible in user mode. */ + if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) { + goto illegal_op; + } + tmp = load_cpu_field(spsr); + store_reg(s, rd, tmp); + break; + } + } + } else { + /* Conditional branch. */ + op = (insn >> 22) & 0xf; + /* Generate a conditional jump to next instruction. */ + s->condlabel = gen_new_label(); + arm_gen_test_cc(op ^ 1, s->condlabel); + s->condjmp = 1; + + /* offset[11:1] = insn[10:0] */ + offset = (insn & 0x7ff) << 1; + /* offset[17:12] = insn[21:16]. */ + offset |= (insn & 0x003f0000) >> 4; + /* offset[31:20] = insn[26]. */ + offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11; + /* offset[18] = insn[13]. */ + offset |= (insn & (1 << 13)) << 5; + /* offset[19] = insn[11]. */ + offset |= (insn & (1 << 11)) << 8; + + /* jump to the offset */ + gen_jmp(s, s->pc + offset); + } + } else { + /* Data processing immediate. */ + if (insn & (1 << 25)) { + if (insn & (1 << 24)) { + if (insn & (1 << 20)) + goto illegal_op; + /* Bitfield/Saturate. */ + op = (insn >> 21) & 7; + imm = insn & 0x1f; + shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c); + if (rn == 15) { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } else { + tmp = load_reg(s, rn); + } + switch (op) { + case 2: /* Signed bitfield extract. */ + imm++; + if (shift + imm > 32) + goto illegal_op; + if (imm < 32) + gen_sbfx(tmp, shift, imm); + break; + case 6: /* Unsigned bitfield extract. */ + imm++; + if (shift + imm > 32) + goto illegal_op; + if (imm < 32) + gen_ubfx(tmp, shift, (1u << imm) - 1); + break; + case 3: /* Bitfield insert/clear. */ + if (imm < shift) + goto illegal_op; + imm = imm + 1 - shift; + if (imm != 32) { + tmp2 = load_reg(s, rd); + tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm); + tcg_temp_free_i32(tmp2); + } + break; + case 7: + goto illegal_op; + default: /* Saturate. */ + if (shift) { + if (op & 1) + tcg_gen_sari_i32(tmp, tmp, shift); + else + tcg_gen_shli_i32(tmp, tmp, shift); + } + tmp2 = tcg_const_i32(imm); + if (op & 4) { + /* Unsigned. */ + if ((op & 1) && shift == 0) { + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + goto illegal_op; + } + gen_helper_usat16(tmp, cpu_env, tmp, tmp2); + } else { + gen_helper_usat(tmp, cpu_env, tmp, tmp2); + } + } else { + /* Signed. */ + if ((op & 1) && shift == 0) { + if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) { + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + goto illegal_op; + } + gen_helper_ssat16(tmp, cpu_env, tmp, tmp2); + } else { + gen_helper_ssat(tmp, cpu_env, tmp, tmp2); + } + } + tcg_temp_free_i32(tmp2); + break; + } + store_reg(s, rd, tmp); + } else { + imm = ((insn & 0x04000000) >> 15) + | ((insn & 0x7000) >> 4) | (insn & 0xff); + if (insn & (1 << 22)) { + /* 16-bit immediate. */ + imm |= (insn >> 4) & 0xf000; + if (insn & (1 << 23)) { + /* movt */ + tmp = load_reg(s, rd); + tcg_gen_ext16u_i32(tmp, tmp); + tcg_gen_ori_i32(tmp, tmp, imm << 16); + } else { + /* movw */ + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, imm); + } + } else { + /* Add/sub 12-bit immediate. */ + if (rn == 15) { + offset = s->pc & ~(uint32_t)3; + if (insn & (1 << 23)) + offset -= imm; + else + offset += imm; + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, offset); + } else { + tmp = load_reg(s, rn); + if (insn & (1 << 23)) + tcg_gen_subi_i32(tmp, tmp, imm); + else + tcg_gen_addi_i32(tmp, tmp, imm); + } + } + store_reg(s, rd, tmp); + } + } else { + int shifter_out = 0; + /* modified 12-bit immediate. */ + shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12); + imm = (insn & 0xff); + switch (shift) { + case 0: /* XY */ + /* Nothing to do. */ + break; + case 1: /* 00XY00XY */ + imm |= imm << 16; + break; + case 2: /* XY00XY00 */ + imm |= imm << 16; + imm <<= 8; + break; + case 3: /* XYXYXYXY */ + imm |= imm << 16; + imm |= imm << 8; + break; + default: /* Rotated constant. */ + shift = (shift << 1) | (imm >> 7); + imm |= 0x80; + imm = imm << (32 - shift); + shifter_out = 1; + break; + } + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, imm); + rn = (insn >> 16) & 0xf; + if (rn == 15) { + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } else { + tmp = load_reg(s, rn); + } + op = (insn >> 21) & 0xf; + if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0, + shifter_out, tmp, tmp2)) + goto illegal_op; + tcg_temp_free_i32(tmp2); + rd = (insn >> 8) & 0xf; + if (rd != 15) { + store_reg(s, rd, tmp); + } else { + tcg_temp_free_i32(tmp); + } + } + } + break; + case 12: /* Load/store single data item. */ + { + int postinc = 0; + int writeback = 0; + int memidx; + if ((insn & 0x01100000) == 0x01000000) { + if (disas_neon_ls_insn(s, insn)) { + goto illegal_op; + } + break; + } + op = ((insn >> 21) & 3) | ((insn >> 22) & 4); + if (rs == 15) { + if (!(insn & (1 << 20))) { + goto illegal_op; + } + if (op != 2) { + /* Byte or halfword load space with dest == r15 : memory hints. + * Catch them early so we don't emit pointless addressing code. + * This space is a mix of: + * PLD/PLDW/PLI, which we implement as NOPs (note that unlike + * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP + * cores) + * unallocated hints, which must be treated as NOPs + * UNPREDICTABLE space, which we NOP or UNDEF depending on + * which is easiest for the decoding logic + * Some space which must UNDEF + */ + int op1 = (insn >> 23) & 3; + int op2 = (insn >> 6) & 0x3f; + if (op & 2) { + goto illegal_op; + } + if (rn == 15) { + /* UNPREDICTABLE, unallocated hint or + * PLD/PLDW/PLI (literal) + */ + return 0; + } + if (op1 & 1) { + return 0; /* PLD/PLDW/PLI or unallocated hint */ + } + if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) { + return 0; /* PLD/PLDW/PLI or unallocated hint */ + } + /* UNDEF space, or an UNPREDICTABLE */ + return 1; + } + } + memidx = get_mem_index(s); + if (rn == 15) { + addr = tcg_temp_new_i32(); + /* PC relative. */ + /* s->pc has already been incremented by 4. */ + imm = s->pc & 0xfffffffc; + if (insn & (1 << 23)) + imm += insn & 0xfff; + else + imm -= insn & 0xfff; + tcg_gen_movi_i32(addr, imm); + } else { + addr = load_reg(s, rn); + if (insn & (1 << 23)) { + /* Positive offset. */ + imm = insn & 0xfff; + tcg_gen_addi_i32(addr, addr, imm); + } else { + imm = insn & 0xff; + switch ((insn >> 8) & 0xf) { + case 0x0: /* Shifted Register. */ + shift = (insn >> 4) & 0xf; + if (shift > 3) { + tcg_temp_free_i32(addr); + goto illegal_op; + } + tmp = load_reg(s, rm); + if (shift) + tcg_gen_shli_i32(tmp, tmp, shift); + tcg_gen_add_i32(addr, addr, tmp); + tcg_temp_free_i32(tmp); + break; + case 0xc: /* Negative offset. */ + tcg_gen_addi_i32(addr, addr, -imm); + break; + case 0xe: /* User privilege. */ + tcg_gen_addi_i32(addr, addr, imm); + memidx = get_a32_user_mem_index(s); + break; + case 0x9: /* Post-decrement. */ + imm = -imm; + /* Fall through. */ + case 0xb: /* Post-increment. */ + postinc = 1; + writeback = 1; + break; + case 0xd: /* Pre-decrement. */ + imm = -imm; + /* Fall through. */ + case 0xf: /* Pre-increment. */ + tcg_gen_addi_i32(addr, addr, imm); + writeback = 1; + break; + default: + tcg_temp_free_i32(addr); + goto illegal_op; + } + } + } + if (insn & (1 << 20)) { + /* Load. */ + tmp = tcg_temp_new_i32(); + switch (op) { + case 0: + gen_aa32_ld8u(tmp, addr, memidx); + break; + case 4: + gen_aa32_ld8s(tmp, addr, memidx); + break; + case 1: + gen_aa32_ld16u(tmp, addr, memidx); + break; + case 5: + gen_aa32_ld16s(tmp, addr, memidx); + break; + case 2: + gen_aa32_ld32u(tmp, addr, memidx); + break; + default: + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(addr); + goto illegal_op; + } + if (rs == 15) { + gen_bx(s, tmp); + } else { + store_reg(s, rs, tmp); + } + } else { + /* Store. */ + tmp = load_reg(s, rs); + switch (op) { + case 0: + gen_aa32_st8(tmp, addr, memidx); + break; + case 1: + gen_aa32_st16(tmp, addr, memidx); + break; + case 2: + gen_aa32_st32(tmp, addr, memidx); + break; + default: + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(addr); + goto illegal_op; + } + tcg_temp_free_i32(tmp); + } + if (postinc) + tcg_gen_addi_i32(addr, addr, imm); + if (writeback) { + store_reg(s, rn, addr); + } else { + tcg_temp_free_i32(addr); + } + } + break; + default: + goto illegal_op; + } + return 0; +illegal_op: + return 1; +} + +static void disas_thumb_insn(CPUARMState *env, DisasContext *s) +{ + uint32_t val, insn, op, rm, rn, rd, shift, cond; + int32_t offset; + int i; + TCGv_i32 tmp; + TCGv_i32 tmp2; + TCGv_i32 addr; + + if (s->condexec_mask) { + cond = s->condexec_cond; + if (cond != 0x0e) { /* Skip conditional when condition is AL. */ + s->condlabel = gen_new_label(); + arm_gen_test_cc(cond ^ 1, s->condlabel); + s->condjmp = 1; + } + } + + insn = arm_lduw_code(env, s->pc, s->bswap_code); + s->pc += 2; + + switch (insn >> 12) { + case 0: case 1: + + rd = insn & 7; + op = (insn >> 11) & 3; + if (op == 3) { + /* add/subtract */ + rn = (insn >> 3) & 7; + tmp = load_reg(s, rn); + if (insn & (1 << 10)) { + /* immediate */ + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, (insn >> 6) & 7); + } else { + /* reg */ + rm = (insn >> 6) & 7; + tmp2 = load_reg(s, rm); + } + if (insn & (1 << 9)) { + if (s->condexec_mask) + tcg_gen_sub_i32(tmp, tmp, tmp2); + else + gen_sub_CC(tmp, tmp, tmp2); + } else { + if (s->condexec_mask) + tcg_gen_add_i32(tmp, tmp, tmp2); + else + gen_add_CC(tmp, tmp, tmp2); + } + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + } else { + /* shift immediate */ + rm = (insn >> 3) & 7; + shift = (insn >> 6) & 0x1f; + tmp = load_reg(s, rm); + gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0); + if (!s->condexec_mask) + gen_logic_CC(tmp); + store_reg(s, rd, tmp); + } + break; + case 2: case 3: + /* arithmetic large immediate */ + op = (insn >> 11) & 3; + rd = (insn >> 8) & 0x7; + if (op == 0) { /* mov */ + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, insn & 0xff); + if (!s->condexec_mask) + gen_logic_CC(tmp); + store_reg(s, rd, tmp); + } else { + tmp = load_reg(s, rd); + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, insn & 0xff); + switch (op) { + case 1: /* cmp */ + gen_sub_CC(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + break; + case 2: /* add */ + if (s->condexec_mask) + tcg_gen_add_i32(tmp, tmp, tmp2); + else + gen_add_CC(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + break; + case 3: /* sub */ + if (s->condexec_mask) + tcg_gen_sub_i32(tmp, tmp, tmp2); + else + gen_sub_CC(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + break; + } + } + break; + case 4: + if (insn & (1 << 11)) { + rd = (insn >> 8) & 7; + /* load pc-relative. Bit 1 of PC is ignored. */ + val = s->pc + 2 + ((insn & 0xff) * 4); + val &= ~(uint32_t)2; + addr = tcg_temp_new_i32(); + tcg_gen_movi_i32(addr, val); + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(addr); + store_reg(s, rd, tmp); + break; + } + if (insn & (1 << 10)) { + /* data processing extended or blx */ + rd = (insn & 7) | ((insn >> 4) & 8); + rm = (insn >> 3) & 0xf; + op = (insn >> 8) & 3; + switch (op) { + case 0: /* add */ + tmp = load_reg(s, rd); + tmp2 = load_reg(s, rm); + tcg_gen_add_i32(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + store_reg(s, rd, tmp); + break; + case 1: /* cmp */ + tmp = load_reg(s, rd); + tmp2 = load_reg(s, rm); + gen_sub_CC(tmp, tmp, tmp2); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp); + break; + case 2: /* mov/cpy */ + tmp = load_reg(s, rm); + store_reg(s, rd, tmp); + break; + case 3:/* branch [and link] exchange thumb register */ + tmp = load_reg(s, rm); + if (insn & (1 << 7)) { + ARCH(5); + val = (uint32_t)s->pc | 1; + tmp2 = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp2, val); + store_reg(s, 14, tmp2); + } + /* already thumb, no need to check */ + gen_bx(s, tmp); + break; + } + break; + } + + /* data processing register */ + rd = insn & 7; + rm = (insn >> 3) & 7; + op = (insn >> 6) & 0xf; + if (op == 2 || op == 3 || op == 4 || op == 7) { + /* the shift/rotate ops want the operands backwards */ + val = rm; + rm = rd; + rd = val; + val = 1; + } else { + val = 0; + } + + if (op == 9) { /* neg */ + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + } else if (op != 0xf) { /* mvn doesn't read its first operand */ + tmp = load_reg(s, rd); + } else { + TCGV_UNUSED_I32(tmp); + } + + tmp2 = load_reg(s, rm); + switch (op) { + case 0x0: /* and */ + tcg_gen_and_i32(tmp, tmp, tmp2); + if (!s->condexec_mask) + gen_logic_CC(tmp); + break; + case 0x1: /* eor */ + tcg_gen_xor_i32(tmp, tmp, tmp2); + if (!s->condexec_mask) + gen_logic_CC(tmp); + break; + case 0x2: /* lsl */ + if (s->condexec_mask) { + gen_shl(tmp2, tmp2, tmp); + } else { + gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp); + gen_logic_CC(tmp2); + } + break; + case 0x3: /* lsr */ + if (s->condexec_mask) { + gen_shr(tmp2, tmp2, tmp); + } else { + gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp); + gen_logic_CC(tmp2); + } + break; + case 0x4: /* asr */ + if (s->condexec_mask) { + gen_sar(tmp2, tmp2, tmp); + } else { + gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp); + gen_logic_CC(tmp2); + } + break; + case 0x5: /* adc */ + if (s->condexec_mask) { + gen_adc(tmp, tmp2); + } else { + gen_adc_CC(tmp, tmp, tmp2); + } + break; + case 0x6: /* sbc */ + if (s->condexec_mask) { + gen_sub_carry(tmp, tmp, tmp2); + } else { + gen_sbc_CC(tmp, tmp, tmp2); + } + break; + case 0x7: /* ror */ + if (s->condexec_mask) { + tcg_gen_andi_i32(tmp, tmp, 0x1f); + tcg_gen_rotr_i32(tmp2, tmp2, tmp); + } else { + gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp); + gen_logic_CC(tmp2); + } + break; + case 0x8: /* tst */ + tcg_gen_and_i32(tmp, tmp, tmp2); + gen_logic_CC(tmp); + rd = 16; + break; + case 0x9: /* neg */ + if (s->condexec_mask) + tcg_gen_neg_i32(tmp, tmp2); + else + gen_sub_CC(tmp, tmp, tmp2); + break; + case 0xa: /* cmp */ + gen_sub_CC(tmp, tmp, tmp2); + rd = 16; + break; + case 0xb: /* cmn */ + gen_add_CC(tmp, tmp, tmp2); + rd = 16; + break; + case 0xc: /* orr */ + tcg_gen_or_i32(tmp, tmp, tmp2); + if (!s->condexec_mask) + gen_logic_CC(tmp); + break; + case 0xd: /* mul */ + tcg_gen_mul_i32(tmp, tmp, tmp2); + if (!s->condexec_mask) + gen_logic_CC(tmp); + break; + case 0xe: /* bic */ + tcg_gen_andc_i32(tmp, tmp, tmp2); + if (!s->condexec_mask) + gen_logic_CC(tmp); + break; + case 0xf: /* mvn */ + tcg_gen_not_i32(tmp2, tmp2); + if (!s->condexec_mask) + gen_logic_CC(tmp2); + val = 1; + rm = rd; + break; + } + if (rd != 16) { + if (val) { + store_reg(s, rm, tmp2); + if (op != 0xf) + tcg_temp_free_i32(tmp); + } else { + store_reg(s, rd, tmp); + tcg_temp_free_i32(tmp2); + } + } else { + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + } + break; + + case 5: + /* load/store register offset. */ + rd = insn & 7; + rn = (insn >> 3) & 7; + rm = (insn >> 6) & 7; + op = (insn >> 9) & 7; + addr = load_reg(s, rn); + tmp = load_reg(s, rm); + tcg_gen_add_i32(addr, addr, tmp); + tcg_temp_free_i32(tmp); + + if (op < 3) { /* store */ + tmp = load_reg(s, rd); + } else { + tmp = tcg_temp_new_i32(); + } + + switch (op) { + case 0: /* str */ + gen_aa32_st32(tmp, addr, get_mem_index(s)); + break; + case 1: /* strh */ + gen_aa32_st16(tmp, addr, get_mem_index(s)); + break; + case 2: /* strb */ + gen_aa32_st8(tmp, addr, get_mem_index(s)); + break; + case 3: /* ldrsb */ + gen_aa32_ld8s(tmp, addr, get_mem_index(s)); + break; + case 4: /* ldr */ + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + break; + case 5: /* ldrh */ + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + break; + case 6: /* ldrb */ + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + break; + case 7: /* ldrsh */ + gen_aa32_ld16s(tmp, addr, get_mem_index(s)); + break; + } + if (op >= 3) { /* load */ + store_reg(s, rd, tmp); + } else { + tcg_temp_free_i32(tmp); + } + tcg_temp_free_i32(addr); + break; + + case 6: + /* load/store word immediate offset */ + rd = insn & 7; + rn = (insn >> 3) & 7; + addr = load_reg(s, rn); + val = (insn >> 4) & 0x7c; + tcg_gen_addi_i32(addr, addr, val); + + if (insn & (1 << 11)) { + /* load */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + store_reg(s, rd, tmp); + } else { + /* store */ + tmp = load_reg(s, rd); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + tcg_temp_free_i32(addr); + break; + + case 7: + /* load/store byte immediate offset */ + rd = insn & 7; + rn = (insn >> 3) & 7; + addr = load_reg(s, rn); + val = (insn >> 6) & 0x1f; + tcg_gen_addi_i32(addr, addr, val); + + if (insn & (1 << 11)) { + /* load */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld8u(tmp, addr, get_mem_index(s)); + store_reg(s, rd, tmp); + } else { + /* store */ + tmp = load_reg(s, rd); + gen_aa32_st8(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + tcg_temp_free_i32(addr); + break; + + case 8: + /* load/store halfword immediate offset */ + rd = insn & 7; + rn = (insn >> 3) & 7; + addr = load_reg(s, rn); + val = (insn >> 5) & 0x3e; + tcg_gen_addi_i32(addr, addr, val); + + if (insn & (1 << 11)) { + /* load */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld16u(tmp, addr, get_mem_index(s)); + store_reg(s, rd, tmp); + } else { + /* store */ + tmp = load_reg(s, rd); + gen_aa32_st16(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + tcg_temp_free_i32(addr); + break; + + case 9: + /* load/store from stack */ + rd = (insn >> 8) & 7; + addr = load_reg(s, 13); + val = (insn & 0xff) * 4; + tcg_gen_addi_i32(addr, addr, val); + + if (insn & (1 << 11)) { + /* load */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + store_reg(s, rd, tmp); + } else { + /* store */ + tmp = load_reg(s, rd); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + tcg_temp_free_i32(addr); + break; + + case 10: + /* add to high reg */ + rd = (insn >> 8) & 7; + if (insn & (1 << 11)) { + /* SP */ + tmp = load_reg(s, 13); + } else { + /* PC. bit 1 is ignored. */ + tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2); + } + val = (insn & 0xff) * 4; + tcg_gen_addi_i32(tmp, tmp, val); + store_reg(s, rd, tmp); + break; + + case 11: + /* misc */ + op = (insn >> 8) & 0xf; + switch (op) { + case 0: + /* adjust stack pointer */ + tmp = load_reg(s, 13); + val = (insn & 0x7f) * 4; + if (insn & (1 << 7)) + val = -(int32_t)val; + tcg_gen_addi_i32(tmp, tmp, val); + store_reg(s, 13, tmp); + break; + + case 2: /* sign/zero extend. */ + ARCH(6); + rd = insn & 7; + rm = (insn >> 3) & 7; + tmp = load_reg(s, rm); + switch ((insn >> 6) & 3) { + case 0: gen_sxth(tmp); break; + case 1: gen_sxtb(tmp); break; + case 2: gen_uxth(tmp); break; + case 3: gen_uxtb(tmp); break; + } + store_reg(s, rd, tmp); + break; + case 4: case 5: case 0xc: case 0xd: + /* push/pop */ + addr = load_reg(s, 13); + if (insn & (1 << 8)) + offset = 4; + else + offset = 0; + for (i = 0; i < 8; i++) { + if (insn & (1 << i)) + offset += 4; + } + if ((insn & (1 << 11)) == 0) { + tcg_gen_addi_i32(addr, addr, -offset); + } + for (i = 0; i < 8; i++) { + if (insn & (1 << i)) { + if (insn & (1 << 11)) { + /* pop */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + store_reg(s, i, tmp); + } else { + /* push */ + tmp = load_reg(s, i); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + /* advance to the next address. */ + tcg_gen_addi_i32(addr, addr, 4); + } + } + TCGV_UNUSED_I32(tmp); + if (insn & (1 << 8)) { + if (insn & (1 << 11)) { + /* pop pc */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + /* don't set the pc until the rest of the instruction + has completed */ + } else { + /* push lr */ + tmp = load_reg(s, 14); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + tcg_gen_addi_i32(addr, addr, 4); + } + if ((insn & (1 << 11)) == 0) { + tcg_gen_addi_i32(addr, addr, -offset); + } + /* write back the new stack pointer */ + store_reg(s, 13, addr); + /* set the new PC value */ + if ((insn & 0x0900) == 0x0900) { + store_reg_from_load(s, 15, tmp); + } + break; + + case 1: case 3: case 9: case 11: /* czb */ + rm = insn & 7; + tmp = load_reg(s, rm); + s->condlabel = gen_new_label(); + s->condjmp = 1; + if (insn & (1 << 11)) + tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel); + else + tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel); + tcg_temp_free_i32(tmp); + offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3; + val = (uint32_t)s->pc + 2; + val += offset; + gen_jmp(s, val); + break; + + case 15: /* IT, nop-hint. */ + if ((insn & 0xf) == 0) { + gen_nop_hint(s, (insn >> 4) & 0xf); + break; + } + /* If Then. */ + s->condexec_cond = (insn >> 4) & 0xe; + s->condexec_mask = insn & 0x1f; + /* No actual code generated for this insn, just setup state. */ + break; + + case 0xe: /* bkpt */ + { + int imm8 = extract32(insn, 0, 8); + ARCH(5); + gen_exception_insn(s, 2, EXCP_BKPT, syn_aa32_bkpt(imm8, true), + default_exception_el(s)); + break; + } + + case 0xa: /* rev */ + ARCH(6); + rn = (insn >> 3) & 0x7; + rd = insn & 0x7; + tmp = load_reg(s, rn); + switch ((insn >> 6) & 3) { + case 0: tcg_gen_bswap32_i32(tmp, tmp); break; + case 1: gen_rev16(tmp); break; + case 3: gen_revsh(tmp); break; + default: goto illegal_op; + } + store_reg(s, rd, tmp); + break; + + case 6: + switch ((insn >> 5) & 7) { + case 2: + /* setend */ + ARCH(6); + if (((insn >> 3) & 1) != s->bswap_code) { + /* Dynamic endianness switching not implemented. */ + qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n"); + goto illegal_op; + } + break; + case 3: + /* cps */ + ARCH(6); + if (IS_USER(s)) { + break; + } + if (arm_dc_feature(s, ARM_FEATURE_M)) { + tmp = tcg_const_i32((insn & (1 << 4)) != 0); + /* FAULTMASK */ + if (insn & 1) { + addr = tcg_const_i32(19); + gen_helper_v7m_msr(cpu_env, addr, tmp); + tcg_temp_free_i32(addr); + } + /* PRIMASK */ + if (insn & 2) { + addr = tcg_const_i32(16); + gen_helper_v7m_msr(cpu_env, addr, tmp); + tcg_temp_free_i32(addr); + } + tcg_temp_free_i32(tmp); + gen_lookup_tb(s); + } else { + if (insn & (1 << 4)) { + shift = CPSR_A | CPSR_I | CPSR_F; + } else { + shift = 0; + } + gen_set_psr_im(s, ((insn & 7) << 6), 0, shift); + } + break; + default: + goto undef; + } + break; + + default: + goto undef; + } + break; + + case 12: + { + /* load/store multiple */ + TCGv_i32 loaded_var; + TCGV_UNUSED_I32(loaded_var); + rn = (insn >> 8) & 0x7; + addr = load_reg(s, rn); + for (i = 0; i < 8; i++) { + if (insn & (1 << i)) { + if (insn & (1 << 11)) { + /* load */ + tmp = tcg_temp_new_i32(); + gen_aa32_ld32u(tmp, addr, get_mem_index(s)); + if (i == rn) { + loaded_var = tmp; + } else { + store_reg(s, i, tmp); + } + } else { + /* store */ + tmp = load_reg(s, i); + gen_aa32_st32(tmp, addr, get_mem_index(s)); + tcg_temp_free_i32(tmp); + } + /* advance to the next address */ + tcg_gen_addi_i32(addr, addr, 4); + } + } + if ((insn & (1 << rn)) == 0) { + /* base reg not in list: base register writeback */ + store_reg(s, rn, addr); + } else { + /* base reg in list: if load, complete it now */ + if (insn & (1 << 11)) { + store_reg(s, rn, loaded_var); + } + tcg_temp_free_i32(addr); + } + break; + } + case 13: + /* conditional branch or swi */ + cond = (insn >> 8) & 0xf; + if (cond == 0xe) + goto undef; + + if (cond == 0xf) { + /* swi */ + gen_set_pc_im(s, s->pc); + s->svc_imm = extract32(insn, 0, 8); + s->is_jmp = DISAS_SWI; + break; + } + /* generate a conditional jump to next instruction */ + s->condlabel = gen_new_label(); + arm_gen_test_cc(cond ^ 1, s->condlabel); + s->condjmp = 1; + + /* jump to the offset */ + val = (uint32_t)s->pc + 2; + offset = ((int32_t)insn << 24) >> 24; + val += offset << 1; + gen_jmp(s, val); + break; + + case 14: + if (insn & (1 << 11)) { + if (disas_thumb2_insn(env, s, insn)) + goto undef32; + break; + } + /* unconditional branch */ + val = (uint32_t)s->pc; + offset = ((int32_t)insn << 21) >> 21; + val += (offset << 1) + 2; + gen_jmp(s, val); + break; + + case 15: + if (disas_thumb2_insn(env, s, insn)) + goto undef32; + break; + } + return; +undef32: + gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), + default_exception_el(s)); + return; +illegal_op: +undef: + gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized(), + default_exception_el(s)); +} + +/* generate intermediate code in gen_opc_buf and gen_opparam_buf for + basic block 'tb'. If search_pc is TRUE, also generate PC + information for each intermediate instruction. */ +static inline void gen_intermediate_code_internal(ARMCPU *cpu, + TranslationBlock *tb, + bool search_pc) +{ + CPUState *cs = CPU(cpu); + CPUARMState *env = &cpu->env; + DisasContext dc1, *dc = &dc1; + CPUBreakpoint *bp; + int j, lj; + target_ulong pc_start; + target_ulong next_page_start; + int num_insns; + int max_insns; + + /* generate intermediate code */ + + /* The A64 decoder has its own top level loop, because it doesn't need + * the A32/T32 complexity to do with conditional execution/IT blocks/etc. + */ + if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) { + gen_intermediate_code_internal_a64(cpu, tb, search_pc); + return; + } + + pc_start = tb->pc; + + dc->tb = tb; + + dc->is_jmp = DISAS_NEXT; + dc->pc = pc_start; + dc->singlestep_enabled = cs->singlestep_enabled; + dc->condjmp = 0; + + dc->aarch64 = 0; + dc->el3_is_aa64 = arm_el_is_aa64(env, 3); + dc->thumb = ARM_TBFLAG_THUMB(tb->flags); + dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags); + dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1; + dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4; + dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags); + dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); +#if !defined(CONFIG_USER_ONLY) + dc->user = (dc->current_el == 0); +#endif + dc->ns = ARM_TBFLAG_NS(tb->flags); + dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags); + dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags); + dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags); + dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags); + dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags); + dc->cp_regs = cpu->cp_regs; + dc->features = env->features; + + /* Single step state. The code-generation logic here is: + * SS_ACTIVE == 0: + * generate code with no special handling for single-stepping (except + * that anything that can make us go to SS_ACTIVE == 1 must end the TB; + * this happens anyway because those changes are all system register or + * PSTATE writes). + * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) + * emit code for one insn + * emit code to clear PSTATE.SS + * emit code to generate software step exception for completed step + * end TB (as usual for having generated an exception) + * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) + * emit code to generate a software step exception + * end the TB + */ + dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags); + dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags); + dc->is_ldex = false; + dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */ + + cpu_F0s = tcg_temp_new_i32(); + cpu_F1s = tcg_temp_new_i32(); + cpu_F0d = tcg_temp_new_i64(); + cpu_F1d = tcg_temp_new_i64(); + cpu_V0 = cpu_F0d; + cpu_V1 = cpu_F1d; + /* FIXME: cpu_M0 can probably be the same as cpu_V0. */ + cpu_M0 = tcg_temp_new_i64(); + next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; + lj = -1; + num_insns = 0; + max_insns = tb->cflags & CF_COUNT_MASK; + if (max_insns == 0) + max_insns = CF_COUNT_MASK; + + gen_tb_start(tb); + + tcg_clear_temp_count(); + + /* A note on handling of the condexec (IT) bits: + * + * We want to avoid the overhead of having to write the updated condexec + * bits back to the CPUARMState for every instruction in an IT block. So: + * (1) if the condexec bits are not already zero then we write + * zero back into the CPUARMState now. This avoids complications trying + * to do it at the end of the block. (For example if we don't do this + * it's hard to identify whether we can safely skip writing condexec + * at the end of the TB, which we definitely want to do for the case + * where a TB doesn't do anything with the IT state at all.) + * (2) if we are going to leave the TB then we call gen_set_condexec() + * which will write the correct value into CPUARMState if zero is wrong. + * This is done both for leaving the TB at the end, and for leaving + * it because of an exception we know will happen, which is done in + * gen_exception_insn(). The latter is necessary because we need to + * leave the TB with the PC/IT state just prior to execution of the + * instruction which caused the exception. + * (3) if we leave the TB unexpectedly (eg a data abort on a load) + * then the CPUARMState will be wrong and we need to reset it. + * This is handled in the same way as restoration of the + * PC in these situations: we will be called again with search_pc=1 + * and generate a mapping of the condexec bits for each PC in + * gen_opc_condexec_bits[]. restore_state_to_opc() then uses + * this to restore the condexec bits. + * + * Note that there are no instructions which can read the condexec + * bits, and none which can write non-static values to them, so + * we don't need to care about whether CPUARMState is correct in the + * middle of a TB. + */ + + /* Reset the conditional execution bits immediately. This avoids + complications trying to do it at the end of the block. */ + if (dc->condexec_mask || dc->condexec_cond) + { + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, 0); + store_cpu_field(tmp, condexec_bits); + } + do { +#ifdef CONFIG_USER_ONLY + /* Intercept jump to the magic kernel page. */ + if (dc->pc >= 0xffff0000) { + /* We always get here via a jump, so know we are not in a + conditional execution block. */ + gen_exception_internal(EXCP_KERNEL_TRAP); + dc->is_jmp = DISAS_UPDATE; + break; + } +#else + if (dc->pc >= 0xfffffff0 && arm_dc_feature(dc, ARM_FEATURE_M)) { + /* We always get here via a jump, so know we are not in a + conditional execution block. */ + gen_exception_internal(EXCP_EXCEPTION_EXIT); + dc->is_jmp = DISAS_UPDATE; + break; + } +#endif + + if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) { + QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { + if (bp->pc == dc->pc) { + gen_exception_internal_insn(dc, 0, EXCP_DEBUG); + /* Advance PC so that clearing the breakpoint will + invalidate this TB. */ + dc->pc += 2; + goto done_generating; + } + } + } + if (search_pc) { + j = tcg_op_buf_count(); + if (lj < j) { + lj++; + while (lj < j) + tcg_ctx.gen_opc_instr_start[lj++] = 0; + } + tcg_ctx.gen_opc_pc[lj] = dc->pc; + gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1); + tcg_ctx.gen_opc_instr_start[lj] = 1; + tcg_ctx.gen_opc_icount[lj] = num_insns; + } + + if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) + gen_io_start(); + + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) { + tcg_gen_debug_insn_start(dc->pc); + } + + if (dc->ss_active && !dc->pstate_ss) { + /* Singlestep state is Active-pending. + * If we're in this state at the start of a TB then either + * a) we just took an exception to an EL which is being debugged + * and this is the first insn in the exception handler + * b) debug exceptions were masked and we just unmasked them + * without changing EL (eg by clearing PSTATE.D) + * In either case we're going to take a swstep exception in the + * "did not step an insn" case, and so the syndrome ISV and EX + * bits should be zero. + */ + assert(num_insns == 0); + gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0), + default_exception_el(dc)); + goto done_generating; + } + + if (dc->thumb) { + disas_thumb_insn(env, dc); + if (dc->condexec_mask) { + dc->condexec_cond = (dc->condexec_cond & 0xe) + | ((dc->condexec_mask >> 4) & 1); + dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f; + if (dc->condexec_mask == 0) { + dc->condexec_cond = 0; + } + } + } else { + unsigned int insn = arm_ldl_code(env, dc->pc, dc->bswap_code); + dc->pc += 4; + disas_arm_insn(dc, insn); + } + + if (dc->condjmp && !dc->is_jmp) { + gen_set_label(dc->condlabel); + dc->condjmp = 0; + } + + if (tcg_check_temp_count()) { + fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n", + dc->pc); + } + + /* Translation stops when a conditional branch is encountered. + * Otherwise the subsequent code could get translated several times. + * Also stop translation when a page boundary is reached. This + * ensures prefetch aborts occur at the right place. */ + num_insns ++; + } while (!dc->is_jmp && !tcg_op_buf_full() && + !cs->singlestep_enabled && + !singlestep && + !dc->ss_active && + dc->pc < next_page_start && + num_insns < max_insns); + + if (tb->cflags & CF_LAST_IO) { + if (dc->condjmp) { + /* FIXME: This can theoretically happen with self-modifying + code. */ + cpu_abort(cs, "IO on conditional branch instruction"); + } + gen_io_end(); + } + + /* At this stage dc->condjmp will only be set when the skipped + instruction was a conditional branch or trap, and the PC has + already been written. */ + if (unlikely(cs->singlestep_enabled || dc->ss_active)) { + /* Make sure the pc is updated, and raise a debug exception. */ + if (dc->condjmp) { + gen_set_condexec(dc); + if (dc->is_jmp == DISAS_SWI) { + gen_ss_advance(dc); + gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb), + default_exception_el(dc)); + } else if (dc->is_jmp == DISAS_HVC) { + gen_ss_advance(dc); + gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2); + } else if (dc->is_jmp == DISAS_SMC) { + gen_ss_advance(dc); + gen_exception(EXCP_SMC, syn_aa32_smc(), 3); + } else if (dc->ss_active) { + gen_step_complete_exception(dc); + } else { + gen_exception_internal(EXCP_DEBUG); + } + gen_set_label(dc->condlabel); + } + if (dc->condjmp || !dc->is_jmp) { + gen_set_pc_im(dc, dc->pc); + dc->condjmp = 0; + } + gen_set_condexec(dc); + if (dc->is_jmp == DISAS_SWI && !dc->condjmp) { + gen_ss_advance(dc); + gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb), + default_exception_el(dc)); + } else if (dc->is_jmp == DISAS_HVC && !dc->condjmp) { + gen_ss_advance(dc); + gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2); + } else if (dc->is_jmp == DISAS_SMC && !dc->condjmp) { + gen_ss_advance(dc); + gen_exception(EXCP_SMC, syn_aa32_smc(), 3); + } else if (dc->ss_active) { + gen_step_complete_exception(dc); + } else { + /* FIXME: Single stepping a WFI insn will not halt + the CPU. */ + gen_exception_internal(EXCP_DEBUG); + } + } else { + /* While branches must always occur at the end of an IT block, + there are a few other things that can cause us to terminate + the TB in the middle of an IT block: + - Exception generating instructions (bkpt, swi, undefined). + - Page boundaries. + - Hardware watchpoints. + Hardware breakpoints have already been handled and skip this code. + */ + gen_set_condexec(dc); + switch(dc->is_jmp) { + case DISAS_NEXT: + gen_goto_tb(dc, 1, dc->pc); + break; + default: + case DISAS_JUMP: + case DISAS_UPDATE: + /* indicate that the hash table must be used to find the next TB */ + tcg_gen_exit_tb(0); + break; + case DISAS_TB_JUMP: + /* nothing more to generate */ + break; + case DISAS_WFI: + gen_helper_wfi(cpu_env); + /* The helper doesn't necessarily throw an exception, but we + * must go back to the main loop to check for interrupts anyway. + */ + tcg_gen_exit_tb(0); + break; + case DISAS_WFE: + gen_helper_wfe(cpu_env); + break; + case DISAS_YIELD: + gen_helper_yield(cpu_env); + break; + case DISAS_SWI: + gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb), + default_exception_el(dc)); + break; + case DISAS_HVC: + gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2); + break; + case DISAS_SMC: + gen_exception(EXCP_SMC, syn_aa32_smc(), 3); + break; + } + if (dc->condjmp) { + gen_set_label(dc->condlabel); + gen_set_condexec(dc); + gen_goto_tb(dc, 1, dc->pc); + dc->condjmp = 0; + } + } + +done_generating: + gen_tb_end(tb, num_insns); + +#ifdef DEBUG_DISAS + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + qemu_log("----------------\n"); + qemu_log("IN: %s\n", lookup_symbol(pc_start)); + log_target_disas(cs, pc_start, dc->pc - pc_start, + dc->thumb | (dc->bswap_code << 1)); + qemu_log("\n"); + } +#endif + if (search_pc) { + j = tcg_op_buf_count(); + lj++; + while (lj <= j) + tcg_ctx.gen_opc_instr_start[lj++] = 0; + } else { + tb->size = dc->pc - pc_start; + tb->icount = num_insns; + } +} + +void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) +{ + gen_intermediate_code_internal(arm_env_get_cpu(env), tb, false); +} + +void gen_intermediate_code_pc(CPUARMState *env, TranslationBlock *tb) +{ + gen_intermediate_code_internal(arm_env_get_cpu(env), tb, true); +} + +static const char *cpu_mode_names[16] = { + "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt", + "???", "???", "hyp", "und", "???", "???", "???", "sys" +}; + +void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, + int flags) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + int i; + uint32_t psr; + + if (is_a64(env)) { + aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags); + return; + } + + for(i=0;i<16;i++) { + cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]); + if ((i % 4) == 3) + cpu_fprintf(f, "\n"); + else + cpu_fprintf(f, " "); + } + psr = cpsr_read(env); + cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%d\n", + psr, + psr & (1 << 31) ? 'N' : '-', + psr & (1 << 30) ? 'Z' : '-', + psr & (1 << 29) ? 'C' : '-', + psr & (1 << 28) ? 'V' : '-', + psr & CPSR_T ? 'T' : 'A', + cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26); + + if (flags & CPU_DUMP_FPU) { + int numvfpregs = 0; + if (arm_feature(env, ARM_FEATURE_VFP)) { + numvfpregs += 16; + } + if (arm_feature(env, ARM_FEATURE_VFP3)) { + numvfpregs += 16; + } + for (i = 0; i < numvfpregs; i++) { + uint64_t v = float64_val(env->vfp.regs[i]); + cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n", + i * 2, (uint32_t)v, + i * 2 + 1, (uint32_t)(v >> 32), + i, v); + } + cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]); + } +} + +void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos) +{ + if (is_a64(env)) { + env->pc = tcg_ctx.gen_opc_pc[pc_pos]; + env->condexec_bits = 0; + } else { + env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos]; + env->condexec_bits = gen_opc_condexec_bits[pc_pos]; + } +} diff --git a/target-arm/translate.h b/target-arm/translate.h new file mode 100644 index 00000000..679bdbcb --- /dev/null +++ b/target-arm/translate.h @@ -0,0 +1,148 @@ +#ifndef TARGET_ARM_TRANSLATE_H +#define TARGET_ARM_TRANSLATE_H + +/* internal defines */ +typedef struct DisasContext { + target_ulong pc; + uint32_t insn; + int is_jmp; + /* Nonzero if this instruction has been conditionally skipped. */ + int condjmp; + /* The label that will be jumped to when the instruction is skipped. */ + TCGLabel *condlabel; + /* Thumb-2 conditional execution bits. */ + int condexec_mask; + int condexec_cond; + struct TranslationBlock *tb; + int singlestep_enabled; + int thumb; + int bswap_code; +#if !defined(CONFIG_USER_ONLY) + int user; +#endif + ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */ + bool ns; /* Use non-secure CPREG bank on access */ + int fp_excp_el; /* FP exception EL or 0 if enabled */ + bool el3_is_aa64; /* Flag indicating whether EL3 is AArch64 or not */ + bool vfp_enabled; /* FP enabled via FPSCR.EN */ + int vec_len; + int vec_stride; + /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI + * so that top level loop can generate correct syndrome information. + */ + uint32_t svc_imm; + int aarch64; + int current_el; + GHashTable *cp_regs; + uint64_t features; /* CPU features bits */ + /* Because unallocated encodings generate different exception syndrome + * information from traps due to FP being disabled, we can't do a single + * "is fp access disabled" check at a high level in the decode tree. + * To help in catching bugs where the access check was forgotten in some + * code path, we set this flag when the access check is done, and assert + * that it is set at the point where we actually touch the FP regs. + */ + bool fp_access_checked; + /* ARMv8 single-step state (this is distinct from the QEMU gdbstub + * single-step support). + */ + bool ss_active; + bool pstate_ss; + /* True if the insn just emitted was a load-exclusive instruction + * (necessary for syndrome information for single step exceptions), + * ie A64 LDX*, LDAX*, A32/T32 LDREX*, LDAEX*. + */ + bool is_ldex; + /* True if a single-step exception will be taken to the current EL */ + bool ss_same_el; + /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ + int c15_cpar; +#define TMP_A64_MAX 16 + int tmp_a64_count; + TCGv_i64 tmp_a64[TMP_A64_MAX]; +} DisasContext; + +/* Share the TCG temporaries common between 32 and 64 bit modes. */ +extern TCGv_ptr cpu_env; +extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF; +extern TCGv_i64 cpu_exclusive_addr; +extern TCGv_i64 cpu_exclusive_val; +#ifdef CONFIG_USER_ONLY +extern TCGv_i64 cpu_exclusive_test; +extern TCGv_i32 cpu_exclusive_info; +#endif + +static inline int arm_dc_feature(DisasContext *dc, int feature) +{ + return (dc->features & (1ULL << feature)) != 0; +} + +static inline int get_mem_index(DisasContext *s) +{ + return s->mmu_idx; +} + +/* Function used to determine the target exception EL when otherwise not known + * or default. + */ +static inline int default_exception_el(DisasContext *s) +{ + /* If we are coming from secure EL0 in a system with a 32-bit EL3, then + * there is no secure EL1, so we route exceptions to EL3. Otherwise, + * exceptions can only be routed to ELs above 1, so we target the higher of + * 1 or the current EL. + */ + return (s->mmu_idx == ARMMMUIdx_S1SE0 && !s->el3_is_aa64) + ? 3 : MAX(1, s->current_el); +} + +/* target-specific extra values for is_jmp */ +/* These instructions trap after executing, so the A32/T32 decoder must + * defer them until after the conditional execution state has been updated. + * WFI also needs special handling when single-stepping. + */ +#define DISAS_WFI 4 +#define DISAS_SWI 5 +/* For instructions which unconditionally cause an exception we can skip + * emitting unreachable code at the end of the TB in the A64 decoder + */ +#define DISAS_EXC 6 +/* WFE */ +#define DISAS_WFE 7 +#define DISAS_HVC 8 +#define DISAS_SMC 9 +#define DISAS_YIELD 10 + +#ifdef TARGET_AARCH64 +void a64_translate_init(void); +void gen_intermediate_code_internal_a64(ARMCPU *cpu, + TranslationBlock *tb, + bool search_pc); +void gen_a64_set_pc_im(uint64_t val); +void aarch64_cpu_dump_state(CPUState *cs, FILE *f, + fprintf_function cpu_fprintf, int flags); +#else +static inline void a64_translate_init(void) +{ +} + +static inline void gen_intermediate_code_internal_a64(ARMCPU *cpu, + TranslationBlock *tb, + bool search_pc) +{ +} + +static inline void gen_a64_set_pc_im(uint64_t val) +{ +} + +static inline void aarch64_cpu_dump_state(CPUState *cs, FILE *f, + fprintf_function cpu_fprintf, + int flags) +{ +} +#endif + +void arm_gen_test_cc(int cc, TCGLabel *label); + +#endif /* TARGET_ARM_TRANSLATE_H */ -- cgit v1.2.3