--- a/gcc/addresses.h +++ b/gcc/addresses.h @@ -78,3 +78,42 @@ return ok_for_base_p_1 (regno, mode, outer_code, index_code); } + +/* Wrapper function to unify target macros MODE_INDEX_REG_CLASS and + INDEX_REG_CLASS. Arguments as for the MODE_INDEX_REG_CLASS macro. */ + +static inline enum reg_class +index_reg_class (enum machine_mode mode ATTRIBUTE_UNUSED) +{ +#ifdef MODE_INDEX_REG_CLASS + return MODE_INDEX_REG_CLASS (mode); +#else + return INDEX_REG_CLASS; +#endif +} + +/* Wrapper function to unify target macros REGNO_MODE_OK_FOR_INDEX_P + and REGNO_OK_FOR_INDEX_P. Arguments as for the + REGNO_MODE_OK_FOR_INDEX_P macro. */ + +static inline bool +ok_for_index_p_1 (unsigned regno, enum machine_mode mode ATTRIBUTE_UNUSED) +{ +#ifdef REGNO_MODE_OK_FOR_INDEX_P + return REGNO_MODE_OK_FOR_INDEX_P (regno, mode); +#else + return REGNO_OK_FOR_INDEX_P (regno); +#endif +} + +/* Wrapper around ok_for_index_p_1, for use after register allocation is + complete. Arguments as for the called function. */ + +static inline bool +regno_ok_for_index_p (unsigned regno, enum machine_mode mode) +{ + if (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0) + regno = reg_renumber[regno]; + + return ok_for_index_p_1 (regno, mode); +} --- a/gcc/calls.c +++ b/gcc/calls.c @@ -3803,7 +3803,7 @@ cse'ing of library calls could delete a call and leave the pop. */ NO_DEFER_POP; valreg = (mem_value == 0 && outmode != VOIDmode - ? hard_libcall_value (outmode) : NULL_RTX); + ? hard_libcall_value (outmode, orgfun) : NULL_RTX); /* Stack must be properly aligned now. */ gcc_assert (!(stack_pointer_delta @@ -4048,8 +4048,17 @@ /* We need to make a save area. */ unsigned int size = arg->locate.size.constant * BITS_PER_UNIT; enum machine_mode save_mode = mode_for_size (size, MODE_INT, 1); - rtx adr = memory_address (save_mode, XEXP (arg->stack_slot, 0)); - rtx stack_area = gen_rtx_MEM (save_mode, adr); + rtx adr; + rtx stack_area; + + /* We can only use save_mode if the arg is sufficiently + aligned. */ + if (STRICT_ALIGNMENT + && GET_MODE_ALIGNMENT (save_mode) > arg->locate.boundary) + save_mode = BLKmode; + + adr = memory_address (save_mode, XEXP (arg->stack_slot, 0)); + stack_area = gen_rtx_MEM (save_mode, adr); if (save_mode == BLKmode) { --- a/gcc/c-common.c +++ b/gcc/c-common.c @@ -33,7 +33,6 @@ #include "varray.h" #include "expr.h" #include "c-common.h" -#include "diagnostic.h" #include "tm_p.h" #include "obstack.h" #include "cpplib.h" @@ -42,6 +41,7 @@ #include "tree-inline.h" #include "c-tree.h" #include "toplev.h" +#include "diagnostic.h" #include "tree-iterator.h" #include "hashtab.h" #include "tree-mudflap.h" @@ -497,6 +497,10 @@ This is a count, since unevaluated expressions can nest. */ int skip_evaluation; +/* Whether lexing has been completed, so subsequent preprocessor + errors should use the compiler's input_location. */ +bool done_lexing = false; + /* Information about how a function name is generated. */ struct fname_var_t { @@ -7522,6 +7526,68 @@ #undef catenate_messages } +/* Callback from cpp_error for PFILE to print diagnostics from the + preprocessor. The diagnostic is of type LEVEL, at location + LOCATION unless this is after lexing and the compiler's location + should be used instead, with column number possibly overridden by + COLUMN_OVERRIDE if not zero; MSG is the translated message and AP + the arguments. Returns true if a diagnostic was emitted, false + otherwise. */ + +bool +c_cpp_error (cpp_reader *pfile ATTRIBUTE_UNUSED, int level, + location_t location, unsigned int column_override, + const char *msg, va_list *ap) +{ + diagnostic_info diagnostic; + diagnostic_t dlevel; + int save_warn_system_headers = warn_system_headers; + bool ret; + + switch (level) + { + case CPP_DL_WARNING_SYSHDR: + if (flag_no_output) + return false; + warn_system_headers = 1; + /* Fall through. */ + case CPP_DL_WARNING: + if (flag_no_output) + return false; + dlevel = DK_WARNING; + break; + case CPP_DL_PEDWARN: + if (flag_no_output && !flag_pedantic_errors) + return false; + dlevel = DK_PEDWARN; + break; + case CPP_DL_ERROR: + dlevel = DK_ERROR; + break; + case CPP_DL_ICE: + dlevel = DK_ICE; + break; + case CPP_DL_NOTE: + dlevel = DK_NOTE; + break; + case CPP_DL_FATAL: + dlevel = DK_FATAL; + break; + default: + gcc_unreachable (); + } + if (done_lexing) + location = input_location; + diagnostic_set_info_translated (&diagnostic, msg, ap, + location, dlevel); + if (column_override) + diagnostic_override_column (&diagnostic, column_override); + ret = report_diagnostic (&diagnostic); + if (level == CPP_DL_WARNING_SYSHDR) + warn_system_headers = save_warn_system_headers; + return ret; +} + /* Walk a gimplified function and warn for functions whose return value is ignored and attribute((warn_unused_result)) is set. This is done before inlining, so we don't have to worry about that. */ --- a/gcc/c-common.h +++ b/gcc/c-common.h @@ -658,6 +658,11 @@ extern int skip_evaluation; +/* Whether lexing has been completed, so subsequent preprocessor + errors should use the compiler's input_location. */ + +extern bool done_lexing; + /* C types are partitioned into three subsets: object, function, and incomplete types. */ #define C_TYPE_OBJECT_P(type) \ --- a/gcc/c-convert.c +++ b/gcc/c-convert.c @@ -70,6 +70,7 @@ tree e = expr; enum tree_code code = TREE_CODE (type); const char *invalid_conv_diag; + tree ret; if (type == error_mark_node || expr == error_mark_node @@ -85,6 +86,9 @@ if (type == TREE_TYPE (expr)) return expr; + ret = targetm.convert_to_type (type, expr); + if (ret) + return ret; if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (TREE_TYPE (expr))) return fold_convert (type, expr); --- a/gcc/c-decl.c +++ b/gcc/c-decl.c @@ -3994,6 +3994,7 @@ bool bitfield = width != NULL; tree element_type; struct c_arg_info *arg_info = 0; + const char *errmsg; if (decl_context == FUNCDEF) funcdef_flag = true, decl_context = NORMAL; @@ -4531,6 +4532,12 @@ error ("%qs declared as function returning an array", name); type = integer_type_node; } + errmsg = targetm.invalid_return_type (type); + if (errmsg) + { + error (errmsg); + type = integer_type_node; + } /* Construct the function type and go to the next inner layer of declarator. */ @@ -5044,6 +5051,7 @@ { tree parm, type, typelt; unsigned int parmno; + const char *errmsg; /* If there is a parameter of incomplete type in a definition, this is an error. In a declaration this is valid, and a @@ -5087,6 +5095,14 @@ } } + errmsg = targetm.invalid_parameter_type (type); + if (errmsg) + { + error (errmsg); + TREE_VALUE (typelt) = error_mark_node; + TREE_TYPE (parm) = error_mark_node; + } + if (DECL_NAME (parm) && TREE_USED (parm)) warn_if_shadowing (parm); } @@ -8071,7 +8087,7 @@ /* Don't waste time on further processing if -fsyntax-only or we've encountered errors. */ - if (flag_syntax_only || errorcount || sorrycount || cpp_errors (parse_in)) + if (flag_syntax_only || errorcount || sorrycount) return; /* Close the external scope. */ --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -488,7 +488,8 @@ { unsigned int align; - align = LOCAL_DECL_ALIGNMENT (decl); + align = alignment_for_aligned_arrays (TREE_TYPE (decl), + LOCAL_DECL_ALIGNMENT (decl)); if (align > MAX_SUPPORTED_STACK_ALIGNMENT) align = MAX_SUPPORTED_STACK_ALIGNMENT; --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -475,9 +475,11 @@ if (DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL) { node->origin = cgraph_node (DECL_CONTEXT (decl)); + node->origin->ever_was_nested = 1; node->next_nested = node->origin->nested; node->origin->nested = node; node->master_clone = node; + node->ever_was_nested = 1; } if (assembler_name_hash) { --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -185,6 +185,8 @@ unsigned output : 1; /* Set for aliases once they got through assemble_alias. */ unsigned alias : 1; + /* Set if the function is a nested function or has nested functions. */ + unsigned ever_was_nested : 1; /* In non-unit-at-a-time mode the function body of inline candidates is saved into clone before compiling so the function in original form can be --- a/gcc/common.opt +++ b/gcc/common.opt @@ -153,6 +153,10 @@ Common Var(warn_padded) Warning Warn when padding is required to align structure members +Wpoison-system-directories +Common Var(flag_poison_system_directories) Init(1) +Warn for -I and -L options using system directories if cross compiling + Wshadow Common Var(warn_shadow) Warning Warn when one local variable shadows another @@ -270,6 +274,12 @@ fabi-version= Common Joined UInteger Var(flag_abi_version) Init(2) +falign-arrays +Target Report Var(flag_align_arrays) +Set the minimum alignment for array variables to be the largest power +of two less than or equal to their total storage size, or the biggest +alignment used on the machine, whichever is smaller. + falign-functions Common Report Var(align_functions,0) Optimization UInteger Align the start of functions @@ -467,6 +477,10 @@ Common Report Var(flag_early_inlining) Init(1) Optimization Perform early inlining +feglibc= +Common Report Joined Undocumented +EGLIBC configuration specifier, serves multilib purposes. + feliminate-dwarf2-dups Common Report Var(flag_eliminate_dwarf2_dups) Perform DWARF2 duplicate elimination @@ -895,6 +909,10 @@ Common Report Var(flag_profile_values) Insert code to profile values of expressions +fpromote-loop-indices +Common Report Var(flag_promote_loop_indices) Optimization +Promote loop indices to word-sized indices when safe + frandom-seed Common @@ -1227,6 +1245,15 @@ Common Report Var(flag_tree_pre) Optimization Enable SSA-PRE optimization on trees +ftree-pre-partial-partial +Common Report Var(flag_tree_pre_partial_partial) Optimization +In SSA-PRE optimization on trees, enable partial-partial redundancy elimination. + +ftree-pre-partial-partial-obliviously +Common Report Var(flag_tree_pre_partial_partial_obliviously) Optimization +In SSA-PRE optimization on trees, enable partial-partial redundancy +elimination without regard for the cost of the inserted phi nodes. + ftree-reassoc Common Report Var(flag_tree_reassoc) Init(1) Optimization Enable reassociation on tree level --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -43,6 +43,7 @@ #include "optabs.h" #include "toplev.h" #include "recog.h" +#include "cgraph.h" #include "ggc.h" #include "except.h" #include "c-pragma.h" @@ -53,6 +54,8 @@ #include "debug.h" #include "langhooks.h" #include "df.h" +#include "intl.h" +#include "params.h" /* Forward definitions of types. */ typedef struct minipool_node Mnode; @@ -110,6 +113,7 @@ static unsigned long arm_isr_value (tree); static unsigned long arm_compute_func_type (void); static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *); +static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *); static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *); #if TARGET_DLLIMPORT_DECL_ATTRIBUTES static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); @@ -123,6 +127,10 @@ static int count_insns_for_constant (HOST_WIDE_INT, int); static int arm_get_strip_length (int); static bool arm_function_ok_for_sibcall (tree, tree); +static bool arm_return_in_memory (const_tree, const_tree); +static rtx arm_function_value (const_tree, const_tree, bool); +static rtx arm_libcall_value (enum machine_mode, rtx); + static void arm_internal_label (FILE *, const char *, unsigned long); static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); @@ -148,6 +156,9 @@ static rtx emit_set_insn (rtx, rtx); static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, tree, bool); +static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, + const_tree); +static int aapcs_select_return_coproc (const_tree, const_tree); #ifdef OBJECT_FORMAT_ELF static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; @@ -175,6 +186,7 @@ static bool arm_output_ttype (rtx); #endif static void arm_dwarf_handle_frame_unspec (const char *, rtx, int); +static rtx arm_dwarf_register_span(rtx); static tree arm_cxx_guard_type (void); static bool arm_cxx_guard_mask_bit (void); @@ -197,6 +209,15 @@ static int arm_issue_rate (void); static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; static bool arm_allocate_stack_slots_for_args (void); +static bool arm_warn_func_result (void); +static int arm_multipass_dfa_lookahead (void); +static const char *arm_invalid_parameter_type (const_tree t); +static const char *arm_invalid_return_type (const_tree t); +static tree arm_promoted_type (const_tree t); +static tree arm_convert_to_type (tree type, tree expr); +static bool arm_scalar_mode_supported_p (enum machine_mode); +static int arm_vector_min_alignment (const_tree type); +static bool arm_vector_always_misalign(const_tree); /* Initialize the GCC target structure. */ @@ -256,6 +277,12 @@ #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE arm_function_value + +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE arm_libcall_value + #undef TARGET_ASM_OUTPUT_MI_THUNK #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK @@ -299,6 +326,9 @@ #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args +#undef TARGET_WARN_FUNC_RESULT +#define TARGET_WARN_FUNC_RESULT arm_warn_func_result + #undef TARGET_DEFAULT_SHORT_ENUMS #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums @@ -353,6 +383,9 @@ #undef TARGET_ASM_TTYPE #define TARGET_ASM_TTYPE arm_output_ttype +#undef TARGET_CXX_TTYPE_REF_ENCODE +#define TARGET_CXX_TTYPE_REF_ENCODE hook_cxx_ttype_ref_in_bit0 + #undef TARGET_ARM_EABI_UNWINDER #define TARGET_ARM_EABI_UNWINDER true #endif /* TARGET_UNWIND_INFO */ @@ -360,6 +393,9 @@ #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec +#undef TARGET_DWARF_REGISTER_SPAN +#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span + #undef TARGET_CANNOT_COPY_INSN_P #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p @@ -398,6 +434,30 @@ #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel #endif +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD arm_multipass_dfa_lookahead + +#undef TARGET_INVALID_PARAMETER_TYPE +#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type + +#undef TARGET_INVALID_RETURN_TYPE +#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type + +#undef TARGET_PROMOTED_TYPE +#define TARGET_PROMOTED_TYPE arm_promoted_type + +#undef TARGET_CONVERT_TO_TYPE +#define TARGET_CONVERT_TO_TYPE arm_convert_to_type + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p + +#undef TARGET_VECTOR_MIN_ALIGNMENT +#define TARGET_VECTOR_MIN_ALIGNMENT arm_vector_min_alignment + +#undef TARGET_VECTOR_ALWAYS_MISALIGN +#define TARGET_VECTOR_ALWAYS_MISALIGN arm_vector_always_misalign + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -423,18 +483,18 @@ /* The default processor used if not overridden by commandline. */ static enum processor_type arm_default_cpu = arm_none; -/* Which floating point model to use. */ -enum arm_fp_model arm_fp_model; - -/* Which floating point hardware is available. */ -enum fputype arm_fpu_arch; - /* Which floating point hardware to schedule for. */ -enum fputype arm_fpu_tune; +int arm_fpu_attr; + +/* Which floating popint hardware to use. */ +const struct arm_fpu_desc *arm_fpu_desc; /* Whether to use floating point hardware. */ enum float_abi_type arm_float_abi; +/* Which __fp16 format to use. */ +enum arm_fp16_format_type arm_fp16_format; + /* Which ABI to use. */ enum arm_abi_type arm_abi; @@ -473,9 +533,19 @@ #define FL_DIV (1 << 18) /* Hardware divide. */ #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ #define FL_NEON (1 << 20) /* Neon instructions. */ +#define FL_MARVELL_F (1 << 21) /* Marvell Feroceon. */ +#define FL_ARCH7EM (1 << 22) /* Instructions present in ARMv7E-M. */ #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ +/* Some flags are ignored when comparing -mcpu and -march: + FL_MARVELL_F so that -mcpu=marvell-f -march=v5te works. + FL_LDSCHED and FL_WBUF only effect tuning, + FL_CO_PROC, FL_VFPV2, FL_VFPV3 and FL_NEON because FP + coprocessors are handled separately. */ +#define FL_COMPAT (FL_MARVELL_F | FL_LDSCHED | FL_WBUF | FL_CO_PROC | \ + FL_VFPV2 | FL_VFPV3 | FL_NEON) + #define FL_FOR_ARCH2 FL_NOTM #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) @@ -497,6 +567,7 @@ #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM) #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) +#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) /* The bits in this mask specify which instructions we are allowed to generate. */ @@ -533,6 +604,9 @@ /* Nonzero if instructions not present in the 'M' profile can be used. */ int arm_arch_notm = 0; +/* Nonzero if instructions present in ARMv7E-M can be used. */ +int arm_arch7em = 0; + /* Nonzero if this chip can benefit from load scheduling. */ int arm_ld_sched = 0; @@ -551,6 +625,9 @@ /* Nonzero if tuning for XScale */ int arm_tune_xscale = 0; +/* Nonzero if tuning for Marvell Feroceon. */ +int arm_tune_marvell_f = 0; + /* Nonzero if we want to tune for stores that access the write-buffer. This typically means an ARM6 or ARM7 with MMU or MPU. */ int arm_tune_wbuf = 0; @@ -561,6 +638,9 @@ /* Nonzero if generating Thumb instructions. */ int thumb_code = 0; +/* Nonzero if generating code for Janus2. */ +int janus2_code = 0; + /* Nonzero if we should define __THUMB_INTERWORK__ in the preprocessor. XXX This is a bit of a hack, it's intended to help work around @@ -593,6 +673,8 @@ /* The maximum number of insns to be used when loading a constant. */ static int arm_constant_limit = 3; +static enum arm_pcs arm_pcs_default; + /* For an explanation of these variables, see final_prescan_insn below. */ int arm_ccfsm_state; /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ @@ -673,9 +755,11 @@ {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL}, {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL}, {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL}, + {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL}, {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL}, {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, + {"marvell-f", marvell_f, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE | FL_MARVELL_F, NULL}, {NULL, arm_none, NULL, 0 , NULL} }; @@ -705,49 +789,34 @@ /* The name of the preprocessor macro to define for this architecture. */ -char arm_arch_name[] = "__ARM_ARCH_0UNK__"; - -struct fpu_desc -{ - const char * name; - enum fputype fpu; -}; - +#define ARM_ARCH_NAME_SIZE 25 +char arm_arch_name[ARM_ARCH_NAME_SIZE] = "__ARM_ARCH_0UNK__"; /* Available values for -mfpu=. */ -static const struct fpu_desc all_fpus[] = +static const struct arm_fpu_desc all_fpus[] = { - {"fpa", FPUTYPE_FPA}, - {"fpe2", FPUTYPE_FPA_EMU2}, - {"fpe3", FPUTYPE_FPA_EMU2}, - {"maverick", FPUTYPE_MAVERICK}, - {"vfp", FPUTYPE_VFP}, - {"vfp3", FPUTYPE_VFP3}, - {"vfpv3", FPUTYPE_VFP3}, - {"vfpv3-d16", FPUTYPE_VFP3D16}, - {"neon", FPUTYPE_NEON} + {"fpa", ARM_FP_MODEL_FPA, 0, 0, false, false}, + {"fpe2", ARM_FP_MODEL_FPA, 2, 0, false, false}, + {"fpe3", ARM_FP_MODEL_FPA, 3, 0, false, false}, + {"maverick", ARM_FP_MODEL_MAVERICK, 0, 0, false, false}, + {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false}, + {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, + {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true }, + {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false}, + {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false}, + {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true }, + {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true }, + {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false}, + {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true }, + {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true }, + {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true }, + {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true }, + {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true , true }, + /* Compatibility aliases. */ + {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, }; - -/* Floating point models used by the different hardware. - See fputype in arm.h. */ - -static const enum fputype fp_model_for_fpu[] = -{ - /* No FP hardware. */ - ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */ - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */ - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */ - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */ - ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */ - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */ - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */ - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */ - ARM_FP_MODEL_VFP /* FPUTYPE_NEON */ -}; - - struct float_abi { const char * name; @@ -765,6 +834,23 @@ }; +struct fp16_format +{ + const char *name; + enum arm_fp16_format_type fp16_format_type; +}; + + +/* Available values for -mfp16-format=. */ + +static const struct fp16_format all_fp16_formats[] = +{ + {"none", ARM_FP16_FORMAT_NONE}, + {"ieee", ARM_FP16_FORMAT_IEEE}, + {"alternative", ARM_FP16_FORMAT_ALTERNATIVE} +}; + + struct abi_name { const char *name; @@ -922,6 +1008,44 @@ set_optab_libfunc (umod_optab, DImode, NULL); set_optab_libfunc (smod_optab, SImode, NULL); set_optab_libfunc (umod_optab, SImode, NULL); + + /* Half-precision float operations. The compiler handles all operations + with NULL libfuncs by converting the SFmode. */ + switch (arm_fp16_format) + { + case ARM_FP16_FORMAT_IEEE: + case ARM_FP16_FORMAT_ALTERNATIVE: + + /* Conversions. */ + set_conv_libfunc (trunc_optab, HFmode, SFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_f2h_ieee" + : "__gnu_f2h_alternative")); + set_conv_libfunc (sext_optab, SFmode, HFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_h2f_ieee" + : "__gnu_h2f_alternative")); + + /* Arithmetic. */ + set_optab_libfunc (add_optab, HFmode, NULL); + set_optab_libfunc (sdiv_optab, HFmode, NULL); + set_optab_libfunc (smul_optab, HFmode, NULL); + set_optab_libfunc (neg_optab, HFmode, NULL); + set_optab_libfunc (sub_optab, HFmode, NULL); + + /* Comparisons. */ + set_optab_libfunc (eq_optab, HFmode, NULL); + set_optab_libfunc (ne_optab, HFmode, NULL); + set_optab_libfunc (lt_optab, HFmode, NULL); + set_optab_libfunc (le_optab, HFmode, NULL); + set_optab_libfunc (ge_optab, HFmode, NULL); + set_optab_libfunc (gt_optab, HFmode, NULL); + set_optab_libfunc (unord_optab, HFmode, NULL); + break; + + default: + break; + } } /* On AAPCS systems, this is the "struct __va_list". */ @@ -1135,6 +1259,7 @@ arm_override_options (void) { unsigned i; + int len; enum processor_type target_arch_cpu = arm_none; enum processor_type selected_cpu = arm_none; @@ -1152,7 +1277,11 @@ { /* Set the architecture define. */ if (i != ARM_OPT_SET_TUNE) - sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); + { + len = snprintf (arm_arch_name, ARM_ARCH_NAME_SIZE, + "__ARM_ARCH_%s__", sel->arch); + gcc_assert (len < ARM_ARCH_NAME_SIZE); + } /* Determine the processor core for which we should tune code-generation. */ @@ -1178,8 +1307,8 @@ make sure that they are compatible. We only generate a warning though, and we prefer the CPU over the architecture. */ - if (insn_flags != 0 && (insn_flags ^ sel->flags)) - warning (0, "switch -mcpu=%s conflicts with -march= switch", + if (insn_flags != 0 && ((insn_flags ^ sel->flags) & ~FL_COMPAT)) + warning (0, "switch -mcpu=%s conflicts with -march= switch, assuming CPU feature set", ptr->string); insn_flags = sel->flags; @@ -1279,7 +1408,11 @@ insn_flags = sel->flags; } - sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); + + len = snprintf (arm_arch_name, ARM_ARCH_NAME_SIZE, + "__ARM_ARCH_%s__", sel->arch); + gcc_assert (len < ARM_ARCH_NAME_SIZE); + arm_default_cpu = (enum processor_type) (sel - all_cores); if (arm_tune == arm_none) arm_tune = arm_default_cpu; @@ -1289,8 +1422,35 @@ chosen. */ gcc_assert (arm_tune != arm_none); + if (arm_tune == cortexa8 && optimize >= 3) + { + /* These alignments were experimentally determined to improve SPECint + performance on SPECCPU 2000. */ + if (align_functions <= 0) + align_functions = 16; + if (align_jumps <= 0) + align_jumps = 16; + } + tune_flags = all_cores[(int)arm_tune].flags; + if (target_fp16_format_name) + { + for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) + { + if (streq (all_fp16_formats[i].name, target_fp16_format_name)) + { + arm_fp16_format = all_fp16_formats[i].fp16_format_type; + break; + } + } + if (i == ARRAY_SIZE (all_fp16_formats)) + error ("invalid __fp16 format option: -mfp16-format=%s", + target_fp16_format_name); + } + else + arm_fp16_format = ARM_FP16_FORMAT_NONE; + if (target_abi_name) { for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++) @@ -1383,6 +1543,7 @@ arm_arch6 = (insn_flags & FL_ARCH6) != 0; arm_arch6k = (insn_flags & FL_ARCH6K) != 0; arm_arch_notm = (insn_flags & FL_NOTM) != 0; + arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0; @@ -1390,12 +1551,25 @@ arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; thumb_code = (TARGET_ARM == 0); + janus2_code = (TARGET_FIX_JANUS != 0); + if (janus2_code && TARGET_THUMB2) + error ("janus2 fix is not applicable when targeting a thumb2 core"); arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; + arm_tune_marvell_f = (tune_flags & FL_MARVELL_F) != 0; arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; - arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; + /* Hardware integer division is supported by some variants of the ARM + architecture in Thumb-2 mode. In addition some (but not all) Marvell + CPUs support their own hardware integer division instructions. + The assembler will pick the correct encoding. */ + if (TARGET_MARVELL_DIV && (insn_flags & FL_MARVELL_F) == 0) + error ("-mmarvell-div is only supported when targeting a Marvell core"); + + arm_arch_hwdiv = (TARGET_ARM && TARGET_MARVELL_DIV) + || (TARGET_THUMB2 && (insn_flags & FL_DIV) != 0); + /* If we are not using the default (ARM mode) section anchor offset ranges, then set the correct ranges now. */ if (TARGET_THUMB1) @@ -1434,7 +1608,6 @@ if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) error ("iwmmxt abi requires an iwmmxt capable cpu"); - arm_fp_model = ARM_FP_MODEL_UNKNOWN; if (target_fpu_name == NULL && target_fpe_name != NULL) { if (streq (target_fpe_name, "2")) @@ -1445,46 +1618,52 @@ error ("invalid floating point emulation option: -mfpe=%s", target_fpe_name); } - if (target_fpu_name != NULL) - { - /* The user specified a FPU. */ - for (i = 0; i < ARRAY_SIZE (all_fpus); i++) - { - if (streq (all_fpus[i].name, target_fpu_name)) - { - arm_fpu_arch = all_fpus[i].fpu; - arm_fpu_tune = arm_fpu_arch; - arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; - break; - } - } - if (arm_fp_model == ARM_FP_MODEL_UNKNOWN) - error ("invalid floating point option: -mfpu=%s", target_fpu_name); - } - else + + if (target_fpu_name == NULL) { #ifdef FPUTYPE_DEFAULT - /* Use the default if it is specified for this platform. */ - arm_fpu_arch = FPUTYPE_DEFAULT; - arm_fpu_tune = FPUTYPE_DEFAULT; + target_fpu_name = FPUTYPE_DEFAULT; #else - /* Pick one based on CPU type. */ - /* ??? Some targets assume FPA is the default. - if ((insn_flags & FL_VFP) != 0) - arm_fpu_arch = FPUTYPE_VFP; - else - */ if (arm_arch_cirrus) - arm_fpu_arch = FPUTYPE_MAVERICK; + target_fpu_name = "maverick"; else - arm_fpu_arch = FPUTYPE_FPA_EMU2; + target_fpu_name = "fpe2"; #endif - if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2) - arm_fpu_tune = FPUTYPE_FPA; + } + + arm_fpu_desc = NULL; + for (i = 0; i < ARRAY_SIZE (all_fpus); i++) + { + if (streq (all_fpus[i].name, target_fpu_name)) + { + arm_fpu_desc = &all_fpus[i]; + break; + } + } + if (!arm_fpu_desc) + error ("invalid floating point option: -mfpu=%s", target_fpu_name); + + switch (arm_fpu_desc->model) + { + case ARM_FP_MODEL_FPA: + if (arm_fpu_desc->rev == 2) + arm_fpu_attr = FPU_FPE2; + else if (arm_fpu_desc->rev == 3) + arm_fpu_attr = FPU_FPE3; else - arm_fpu_tune = arm_fpu_arch; - arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; - gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN); + arm_fpu_attr = FPU_FPA; + break; + + case ARM_FP_MODEL_MAVERICK: + arm_fpu_attr = FPU_MAVERICK; + break; + + case ARM_FP_MODEL_VFP: + arm_fpu_attr = FPU_VFP; + break; + + default: + gcc_unreachable(); } if (target_float_abi_name != NULL) @@ -1505,9 +1684,6 @@ else arm_float_abi = TARGET_DEFAULT_FLOAT_ABI; - if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) - sorry ("-mfloat-abi=hard and VFP"); - /* FPA and iWMMXt are incompatible because the insn encodings overlap. VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon will ever exist. GCC makes no attempt to support this combination. */ @@ -1518,15 +1694,40 @@ if (TARGET_THUMB2 && TARGET_IWMMXT) sorry ("Thumb-2 iWMMXt"); + /* __fp16 support currently assumes the core has ldrh. */ + if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) + sorry ("__fp16 and no ldrh"); + /* If soft-float is specified then don't use FPU. */ if (TARGET_SOFT_FLOAT) - arm_fpu_arch = FPUTYPE_NONE; + arm_fpu_attr = FPU_NONE; + + if (TARGET_AAPCS_BASED) + { + if (arm_abi == ARM_ABI_IWMMXT) + arm_pcs_default = ARM_PCS_AAPCS_IWMMXT; + else if (arm_float_abi == ARM_FLOAT_ABI_HARD + && TARGET_HARD_FLOAT + && TARGET_VFP) + arm_pcs_default = ARM_PCS_AAPCS_VFP; + else + arm_pcs_default = ARM_PCS_AAPCS; + } + else + { + if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) + sorry ("-mfloat-abi=hard and VFP"); + + if (arm_abi == ARM_ABI_APCS) + arm_pcs_default = ARM_PCS_APCS; + else + arm_pcs_default = ARM_PCS_ATPCS; + } /* For arm2/3 there is no need to do any scheduling if there is only a floating point emulator, or we are doing software floating-point. */ if ((TARGET_SOFT_FLOAT - || arm_fpu_tune == FPUTYPE_FPA_EMU2 - || arm_fpu_tune == FPUTYPE_FPA_EMU3) + || (TARGET_FPA && arm_fpu_desc->rev)) && (tune_flags & FL_MODE32) == 0) flag_schedule_insns = flag_schedule_insns_after_reload = 0; @@ -1616,8 +1817,7 @@ fix_cm3_ldrd = 0; } - /* ??? We might want scheduling for thumb2. */ - if (TARGET_THUMB && flag_schedule_insns) + if (TARGET_THUMB1 && flag_schedule_insns) { /* Don't warn since it's on by default in -O2. */ flag_schedule_insns = 0; @@ -1653,6 +1853,36 @@ /* Register global variables with the garbage collector. */ arm_add_gc_roots (); + + if (low_irq_latency && TARGET_THUMB) + { + warning (0, + "-low-irq-latency has no effect when compiling for the Thumb"); + low_irq_latency = 0; + } + + /* CSL LOCAL */ + /* Loop unrolling can be a substantial win. At -O2, limit to 2x + unrolling by default to prevent excessive code growth; at -O3, + limit to 4x unrolling by default. We know we are not optimizing + for size if this is set (see arm_optimization_options). */ + if (flag_unroll_loops == 2) + { + if (optimize == 2) + { + flag_unroll_loops = 1; + if (!PARAM_SET_P (PARAM_MAX_UNROLL_TIMES)) + set_param_value ("max-unroll-times", 2); + } + else if (optimize > 2) + { + flag_unroll_loops = 1; + if (!PARAM_SET_P (PARAM_MAX_UNROLL_TIMES)) + set_param_value ("max-unroll-times", 4); + } + else + flag_unroll_loops = 0; + } } static void @@ -1782,6 +2012,14 @@ return !IS_NAKED (arm_current_func_type ()); } +static bool +arm_warn_func_result (void) +{ + /* Naked functions are implemented entirely in assembly, including the + return sequence, so suppress warnings about this. */ + return !IS_NAKED (arm_current_func_type ()); +} + /* Return 1 if it is possible to return using a single instruction. If SIBLING is non-null, this is a test for a return before a sibling @@ -2873,14 +3111,19 @@ /* Define how to find the value returned by a function. */ -rtx -arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED) +static rtx +arm_function_value(const_tree type, const_tree func, + bool outgoing ATTRIBUTE_UNUSED) { enum machine_mode mode; int unsignedp ATTRIBUTE_UNUSED; rtx r ATTRIBUTE_UNUSED; mode = TYPE_MODE (type); + + if (TARGET_AAPCS_BASED) + return aapcs_allocate_return_reg (mode, type, func); + /* Promote integer types. */ if (INTEGRAL_TYPE_P (type)) PROMOTE_FUNCTION_MODE (mode, unsignedp, type); @@ -2897,7 +3140,36 @@ } } - return LIBCALL_VALUE(mode); + return LIBCALL_VALUE (mode); +} + +rtx +arm_libcall_value (enum machine_mode mode, rtx libcall) +{ + if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS + && GET_MODE_CLASS (mode) == MODE_FLOAT) + { + /* The following libcalls return their result in integer registers, + even though they return a floating point value. */ + if (rtx_equal_p (libcall, + convert_optab_libfunc (sfloat_optab, mode, SImode)) + || rtx_equal_p (libcall, + convert_optab_libfunc (ufloat_optab, mode, SImode)) + || rtx_equal_p (libcall, + convert_optab_libfunc (sfloat_optab, mode, DImode)) + || rtx_equal_p (libcall, + convert_optab_libfunc (ufloat_optab, mode, DImode)) + || rtx_equal_p (libcall, + convert_optab_libfunc (trunc_optab, HFmode, SFmode)) + || rtx_equal_p (libcall, + convert_optab_libfunc (sext_optab, SFmode, HFmode))) + return gen_rtx_REG (mode, ARG_REGISTER(1)); + + /* XXX There are other libcalls that return in integer registers, + but I think they are all handled by hard insns. */ + } + + return LIBCALL_VALUE (mode); } /* Determine the amount of memory needed to store the possible return @@ -2907,10 +3179,12 @@ { int size = 16; - if (TARGET_ARM) + if (TARGET_32BIT) { if (TARGET_HARD_FLOAT_ABI) { + if (TARGET_VFP) + size += 32; if (TARGET_FPA) size += 12; if (TARGET_MAVERICK) @@ -2923,27 +3197,56 @@ return size; } -/* Decide whether a type should be returned in memory (true) - or in a register (false). This is called as the target hook - TARGET_RETURN_IN_MEMORY. */ +/* Decide whether TYPE should be returned in memory (true) + or in a register (false). FNTYPE is the type of the function making + the call. */ static bool -arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +arm_return_in_memory (const_tree type, const_tree fntype) { HOST_WIDE_INT size; - size = int_size_in_bytes (type); + size = int_size_in_bytes (type); /* Negative if not fixed size. */ + + if (TARGET_AAPCS_BASED) + { + /* Simple, non-aggregate types (ie not including vectors and + complex) are always returned in a register (or registers). + We don't care about which register here, so we can short-cut + some of the detail. */ + if (!AGGREGATE_TYPE_P (type) + && TREE_CODE (type) != VECTOR_TYPE + && TREE_CODE (type) != COMPLEX_TYPE) + return false; + + /* Any return value that is no larger than one word can be + returned in r0. */ + if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD) + return false; + + /* Check any available co-processors to see if they accept the + type as a register candidate (VFP, for example, can return + some aggregates in consecutive registers). These aren't + available if the call is variadic. */ + if (aapcs_select_return_coproc (type, fntype) >= 0) + return false; + + /* Vector values should be returned using ARM registers, not + memory (unless they're over 16 bytes, which will break since + we only have four call-clobbered registers to play with). */ + if (TREE_CODE (type) == VECTOR_TYPE) + return (size < 0 || size > (4 * UNITS_PER_WORD)); + + /* The rest go in memory. */ + return true; + } - /* Vector values should be returned using ARM registers, not memory (unless - they're over 16 bytes, which will break since we only have four - call-clobbered registers to play with). */ if (TREE_CODE (type) == VECTOR_TYPE) return (size < 0 || size > (4 * UNITS_PER_WORD)); if (!AGGREGATE_TYPE_P (type) && - !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE)) - /* All simple types are returned in registers. - For AAPCS, complex types are treated the same as aggregates. */ - return 0; + (TREE_CODE (type) != VECTOR_TYPE)) + /* All simple types are returned in registers. */ + return false; if (arm_abi != ARM_ABI_APCS) { @@ -2960,7 +3263,7 @@ the aggregate is either huge or of variable size, and in either case we will want to return it via memory and not in a register. */ if (size < 0 || size > UNITS_PER_WORD) - return 1; + return true; if (TREE_CODE (type) == RECORD_TYPE) { @@ -2980,18 +3283,18 @@ continue; if (field == NULL) - return 0; /* An empty structure. Allowed by an extension to ANSI C. */ + return false; /* An empty structure. Allowed by an extension to ANSI C. */ /* Check that the first field is valid for returning in a register. */ /* ... Floats are not allowed */ if (FLOAT_TYPE_P (TREE_TYPE (field))) - return 1; + return true; /* ... Aggregates that are not themselves valid for returning in a register are not allowed. */ if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) - return 1; + return true; /* Now check the remaining fields, if any. Only bitfields are allowed, since they are not addressable. */ @@ -3003,10 +3306,10 @@ continue; if (!DECL_BIT_FIELD_TYPE (field)) - return 1; + return true; } - return 0; + return false; } if (TREE_CODE (type) == UNION_TYPE) @@ -3023,18 +3326,18 @@ continue; if (FLOAT_TYPE_P (TREE_TYPE (field))) - return 1; + return true; if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) - return 1; + return true; } - return 0; + return false; } #endif /* not ARM_WINCE */ /* Return all other types in memory. */ - return 1; + return true; } /* Indicate whether or not words of a double are in big-endian order. */ @@ -3059,14 +3362,780 @@ return 1; } +const struct pcs_attribute_arg +{ + const char *arg; + enum arm_pcs value; +} pcs_attribute_args[] = + { + {"aapcs", ARM_PCS_AAPCS}, + {"aapcs-vfp", ARM_PCS_AAPCS_VFP}, + {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT}, + {"atpcs", ARM_PCS_ATPCS}, + {"apcs", ARM_PCS_APCS}, + {NULL, ARM_PCS_UNKNOWN} + }; + +static enum arm_pcs +arm_pcs_from_attribute (tree attr) +{ + const struct pcs_attribute_arg *ptr; + const char *arg; + + /* Get the value of the argument. */ + if (TREE_VALUE (attr) == NULL_TREE + || TREE_CODE (TREE_VALUE (attr)) != STRING_CST) + return ARM_PCS_UNKNOWN; + + arg = TREE_STRING_POINTER (TREE_VALUE (attr)); + + /* Check it against the list of known arguments. */ + for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++) + if (streq (arg, ptr->arg)) + return ptr->value; + + /* An unrecognized interrupt type. */ + return ARM_PCS_UNKNOWN; +} + +/* Get the PCS variant to use for this call. TYPE is the function's type + specification, DECL is the specific declartion. DECL may be null if + the call could be indirect or if this is a library call. */ +static enum arm_pcs +arm_get_pcs_model (const_tree type, const_tree decl) +{ + bool user_convention = false; + enum arm_pcs user_pcs = arm_pcs_default; + tree attr; + + gcc_assert (type); + + attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type)); + if (attr) + { + user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr)); + user_convention = true; + } + + if (TARGET_AAPCS_BASED) + { + /* Detect varargs functions. These always use the base rules + (no argument is ever a candidate for a co-processor + register). */ + bool base_rules = (TYPE_ARG_TYPES (type) != 0 + && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type))) + != void_type_node)); + + if (user_convention) + { + if (user_pcs > ARM_PCS_AAPCS_LOCAL) + sorry ("Non-AAPCS derived PCS variant"); + else if (base_rules && user_pcs != ARM_PCS_AAPCS) + error ("Variadic functions must use the base AAPCS variant"); + } + + if (base_rules) + return ARM_PCS_AAPCS; + else if (user_convention) + return user_pcs; + else if (decl && flag_unit_at_a_time) + { + /* Local functions never leak outside this compilation unit, + so we are free to use whatever conventions are + appropriate. */ + /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */ + struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); + if (i && i->local) + return ARM_PCS_AAPCS_LOCAL; + } + } + else if (user_convention && user_pcs != arm_pcs_default) + sorry ("PCS variant"); + + /* For everything else we use the target's default. */ + return arm_pcs_default; +} + + +static void +aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + rtx libcall ATTRIBUTE_UNUSED, + const_tree fndecl ATTRIBUTE_UNUSED) +{ + /* Record the unallocated VFP registers. */ + pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1; + pcum->aapcs_vfp_reg_alloc = 0; +} + +/* Walk down the type tree of TYPE counting consecutive base elements. + If *MODEP is VOIDmode, then set it to the first valid floating point + type. If a non-floating point type is found, or if a floating point + type that doesn't match a non-VOIDmode *MODEP is found, then return -1, + otherwise return the count in the sub-tree. */ +static int +aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) +{ + enum machine_mode mode; + HOST_WIDE_INT size; + + switch (TREE_CODE (type)) + { + case REAL_TYPE: + mode = TYPE_MODE (type); + if (mode != DFmode && mode != SFmode) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 1; + + break; + + case COMPLEX_TYPE: + mode = TYPE_MODE (TREE_TYPE (type)); + if (mode != DFmode && mode != SFmode) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 2; + + break; + + case VECTOR_TYPE: + /* Use V2SImode and V4SImode as representatives of all 64-bit + and 128-bit vector types, whether or not those modes are + supported with the present options. */ + size = int_size_in_bytes (type); + switch (size) + { + case 8: + mode = V2SImode; + break; + case 16: + mode = V4SImode; + break; + default: + return -1; + } + + if (*modep == VOIDmode) + *modep = mode; + + /* Vector modes are considered to be opaque: two vectors are + equivalent for the purposes of being homogeneous aggregates + if they are the same size. */ + if (*modep == mode) + return 1; + + break; + + case ARRAY_TYPE: + { + int count; + tree index = TYPE_DOMAIN (type); + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P(type)) + return -1; + + count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); + if (count == -1 + || !index + || !TYPE_MAX_VALUE (index) + || !host_integerp (TYPE_MAX_VALUE (index), 1) + || !TYPE_MIN_VALUE (index) + || !host_integerp (TYPE_MIN_VALUE (index), 1) + || count < 0) + return -1; + + count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1) + - tree_low_cst (TYPE_MIN_VALUE (index), 1)); + + /* There must be no padding. */ + if (!host_integerp (TYPE_SIZE (type), 1) + || (tree_low_cst (TYPE_SIZE (type), 1) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case RECORD_TYPE: + { + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P(type)) + return -1; + + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count += sub_count; + } + + /* There must be no padding. */ + if (!host_integerp (TYPE_SIZE (type), 1) + || (tree_low_cst (TYPE_SIZE (type), 1) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case UNION_TYPE: + case QUAL_UNION_TYPE: + { + /* These aren't very interesting except in a degenerate case. */ + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P(type)) + return -1; + + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count = count > sub_count ? count : sub_count; + } + + /* There must be no padding. */ + if (!host_integerp (TYPE_SIZE (type), 1) + || (tree_low_cst (TYPE_SIZE (type), 1) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + default: + break; + } + + return -1; +} + +/* Return true if PCS_VARIANT should use VFP registers. */ +static bool +use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) +{ + if (pcs_variant == ARM_PCS_AAPCS_VFP) + return true; + + if (pcs_variant != ARM_PCS_AAPCS_LOCAL) + return false; + + return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && + (TARGET_VFP_DOUBLE || !is_double)); +} + +static bool +aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, + enum machine_mode mode, const_tree type, + int *base_mode, int *count) +{ + enum machine_mode new_mode = VOIDmode; + + if (GET_MODE_CLASS (mode) == MODE_FLOAT + || GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + *count = 1; + new_mode = mode; + } + else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) + { + *count = 2; + new_mode = (mode == DCmode ? DFmode : SFmode); + } + else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE)) + { + int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); + + if (ag_count > 0 && ag_count <= 4) + *count = ag_count; + else + return false; + } + else + return false; + + + if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1)) + return false; + + *base_mode = new_mode; + return true; +} + +static bool +aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, + enum machine_mode mode, const_tree type) +{ + int count ATTRIBUTE_UNUSED; + int ag_mode ATTRIBUTE_UNUSED; + + if (!use_vfp_abi (pcs_variant, false)) + return false; + return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, + &ag_mode, &count); +} + +static bool +aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type) +{ + if (!use_vfp_abi (pcum->pcs_variant, false)) + return false; + + return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type, + &pcum->aapcs_vfp_rmode, + &pcum->aapcs_vfp_rcount); +} + +static bool +aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED) +{ + int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode); + unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1; + int regno; + + for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift) + if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask) + { + pcum->aapcs_vfp_reg_alloc = mask << regno; + if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) + { + int i; + int rcount = pcum->aapcs_vfp_rcount; + int rshift = shift; + enum machine_mode rmode = pcum->aapcs_vfp_rmode; + rtx par; + if (!TARGET_NEON) + { + /* Avoid using unsupported vector modes. */ + if (rmode == V2SImode) + rmode = DImode; + else if (rmode == V4SImode) + { + rmode = DImode; + rcount *= 2; + rshift /= 2; + } + } + par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount)); + for (i = 0; i < rcount; i++) + { + rtx tmp = gen_rtx_REG (rmode, + FIRST_VFP_REGNUM + regno + i * rshift); + tmp = gen_rtx_EXPR_LIST + (VOIDmode, tmp, + GEN_INT (i * GET_MODE_SIZE (rmode))); + XVECEXP (par, 0, i) = tmp; + } + + pcum->aapcs_reg = par; + } + else + pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno); + return true; + } + return false; +} + +static rtx +aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, + enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED) +{ + if (!use_vfp_abi (pcs_variant, false)) + return false; + + if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) + { + int count; + int ag_mode; + int i; + rtx par; + int shift; + + aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, + &ag_mode, &count); + + if (!TARGET_NEON) + { + if (ag_mode == V2SImode) + ag_mode = DImode; + else if (ag_mode == V4SImode) + { + ag_mode = DImode; + count *= 2; + } + } + shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode); + par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); + for (i = 0; i < count; i++) + { + rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, + GEN_INT (i * GET_MODE_SIZE (ag_mode))); + XVECEXP (par, 0, i) = tmp; + } + + return par; + } + + return gen_rtx_REG (mode, FIRST_VFP_REGNUM); +} + +static void +aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type ATTRIBUTE_UNUSED) +{ + pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc; + pcum->aapcs_vfp_reg_alloc = 0; + return; +} + +#define AAPCS_CP(X) \ + { \ + aapcs_ ## X ## _cum_init, \ + aapcs_ ## X ## _is_call_candidate, \ + aapcs_ ## X ## _allocate, \ + aapcs_ ## X ## _is_return_candidate, \ + aapcs_ ## X ## _allocate_return_reg, \ + aapcs_ ## X ## _advance \ + } + +/* Table of co-processors that can be used to pass arguments in + registers. Idealy no arugment should be a candidate for more than + one co-processor table entry, but the table is processed in order + and stops after the first match. If that entry then fails to put + the argument into a co-processor register, the argument will go on + the stack. */ +static struct +{ + /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */ + void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree); + + /* Return true if an argument of mode MODE (or type TYPE if MODE is + BLKmode) is a candidate for this co-processor's registers; this + function should ignore any position-dependent state in + CUMULATIVE_ARGS and only use call-type dependent information. */ + bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); + + /* Return true if the argument does get a co-processor register; it + should set aapcs_reg to an RTX of the register allocated as is + required for a return from FUNCTION_ARG. */ + bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); + + /* Return true if a result of mode MODE (or type TYPE if MODE is + BLKmode) is can be returned in this co-processor's registers. */ + bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree); + + /* Allocate and return an RTX element to hold the return type of a + call, this routine must not fail and will only be called if + is_return_candidate returned true with the same parameters. */ + rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree); + + /* Finish processing this argument and prepare to start processing + the next one. */ + void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); +} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] = + { + AAPCS_CP(vfp) + }; + +#undef AAPCS_CP + +static int +aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + tree type) +{ + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) + return i; + + return -1; +} + +static int +aapcs_select_return_coproc (const_tree type, const_tree fntype) +{ + /* We aren't passed a decl, so we can't check that a call is local. + However, it isn't clear that that would be a win anyway, since it + might limit some tail-calling opportunities. */ + enum arm_pcs pcs_variant; + + if (fntype) + { + const_tree fndecl = NULL_TREE; + + if (TREE_CODE (fntype) == FUNCTION_DECL) + { + fndecl = fntype; + fntype = TREE_TYPE (fntype); + } + + pcs_variant = arm_get_pcs_model (fntype, fndecl); + } + else + pcs_variant = arm_pcs_default; + + if (pcs_variant != ARM_PCS_AAPCS) + { + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, + TYPE_MODE (type), + type)) + return i; + } + return -1; +} + +static rtx +aapcs_allocate_return_reg (enum machine_mode mode, const_tree type, + const_tree fntype) +{ + /* We aren't passed a decl, so we can't check that a call is local. + However, it isn't clear that that would be a win anyway, since it + might limit some tail-calling opportunities. */ + enum arm_pcs pcs_variant; + + if (fntype) + { + const_tree fndecl = NULL_TREE; + + if (TREE_CODE (fntype) == FUNCTION_DECL) + { + fndecl = fntype; + fntype = TREE_TYPE (fntype); + } + + pcs_variant = arm_get_pcs_model (fntype, fndecl); + } + else + pcs_variant = arm_pcs_default; + + /* Promote integer types. */ + if (type && INTEGRAL_TYPE_P (type)) + PROMOTE_FUNCTION_MODE (mode, unsignedp, type); + + if (pcs_variant != ARM_PCS_AAPCS) + { + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode, + type)) + return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant, + mode, type); + } + + /* Promotes small structs returned in a register to full-word size + for big-endian AAPCS. */ + if (type && arm_return_in_msb (type)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if (size % UNITS_PER_WORD != 0) + { + size += UNITS_PER_WORD - size % UNITS_PER_WORD; + mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); + } + } + + return gen_rtx_REG (mode, R0_REGNUM); +} + +rtx +aapcs_libcall_value (enum machine_mode mode) +{ + return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE); +} + +/* Lay out a function argument using the AAPCS rules. The rule + numbers referred to here are those in the AAPCS. */ +static void +aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + tree type, int named) +{ + int nregs, nregs2; + int ncrn; + + /* We only need to do this once per argument. */ + if (pcum->aapcs_arg_processed) + return; + + pcum->aapcs_arg_processed = true; + + /* Special case: if named is false then we are handling an incoming + anonymous argument which is on the stack. */ + if (!named) + return; + + /* Is this a potential co-processor register candidate? */ + if (pcum->pcs_variant != ARM_PCS_AAPCS) + { + int slot = aapcs_select_call_coproc (pcum, mode, type); + pcum->aapcs_cprc_slot = slot; + + /* We don't have to apply any of the rules from part B of the + preparation phase, these are handled elsewhere in the + compiler. */ + + if (slot >= 0) + { + /* A Co-processor register candidate goes either in its own + class of registers or on the stack. */ + if (!pcum->aapcs_cprc_failed[slot]) + { + /* C1.cp - Try to allocate the argument to co-processor + registers. */ + if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type)) + return; + + /* C2.cp - Put the argument on the stack and note that we + can't assign any more candidates in this slot. We also + need to note that we have allocated stack space, so that + we won't later try to split a non-cprc candidate between + core registers and the stack. */ + pcum->aapcs_cprc_failed[slot] = true; + pcum->can_split = false; + } + + /* We didn't get a register, so this argument goes on the + stack. */ + gcc_assert (pcum->can_split == false); + return; + } + } + + /* C3 - For double-word aligned arguments, round the NCRN up to the + next even number. */ + ncrn = pcum->aapcs_ncrn; + if ((ncrn & 1) && arm_needs_doubleword_align (mode, type)) + ncrn++; + + nregs = ARM_NUM_REGS2(mode, type); + + /* Sigh, this test should really assert that nregs > 0, but a GCC + extension allows empty structs and then gives them empty size; it + then allows such a structure to be passed by value. For some of + the code below we have to pretend that such an argument has + non-zero size so that we 'locate' it correctly either in + registers or on the stack. */ + gcc_assert (nregs >= 0); + + nregs2 = nregs ? nregs : 1; + + /* C4 - Argument fits entirely in core registers. */ + if (ncrn + nregs2 <= NUM_ARG_REGS) + { + pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); + pcum->aapcs_next_ncrn = ncrn + nregs; + return; + } + + /* C5 - Some core registers left and there are no arguments already + on the stack: split this argument between the remaining core + registers and the stack. */ + if (ncrn < NUM_ARG_REGS && pcum->can_split) + { + pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); + pcum->aapcs_next_ncrn = NUM_ARG_REGS; + pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD; + return; + } + + /* C6 - NCRN is set to 4. */ + pcum->aapcs_next_ncrn = NUM_ARG_REGS; + + /* C7,C8 - arugment goes on the stack. We have nothing to do here. */ + return; +} + /* Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function whose data type is FNTYPE. For a library call, FNTYPE is NULL. */ void arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, - rtx libname ATTRIBUTE_UNUSED, + rtx libname, tree fndecl ATTRIBUTE_UNUSED) { + /* Long call handling. */ + if (fntype) + pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl); + else + pcum->pcs_variant = arm_pcs_default; + + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + /* XXX We should also detect some library calls here and handle + them using the base rules too; for example the floating point + support functions always work this way. */ + + if (rtx_equal_p (libname, + convert_optab_libfunc (sfix_optab, DImode, DFmode)) + || rtx_equal_p (libname, + convert_optab_libfunc (ufix_optab, DImode, DFmode)) + || rtx_equal_p (libname, + convert_optab_libfunc (sfix_optab, DImode, SFmode)) + || rtx_equal_p (libname, + convert_optab_libfunc (ufix_optab, DImode, SFmode)) + || rtx_equal_p (libname, + convert_optab_libfunc (trunc_optab, HFmode, SFmode)) + || rtx_equal_p (libname, + convert_optab_libfunc (sext_optab, SFmode, HFmode))) + pcum->pcs_variant = ARM_PCS_AAPCS; + + pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0; + pcum->aapcs_reg = NULL_RTX; + pcum->aapcs_partial = 0; + pcum->aapcs_arg_processed = false; + pcum->aapcs_cprc_slot = -1; + pcum->can_split = true; + + if (pcum->pcs_variant != ARM_PCS_AAPCS) + { + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + { + pcum->aapcs_cprc_failed[i] = false; + aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl); + } + } + return; + } + + /* Legacy ABIs */ + /* On the ARM, the offset starts at 0. */ pcum->nregs = 0; pcum->iwmmxt_nregs = 0; @@ -3120,6 +4189,17 @@ { int nregs; + /* Handle the special case quickly. Pick an arbitrary value for op2 of + a call insn (op3 of a call_value insn). */ + if (mode == VOIDmode) + return const0_rtx; + + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + aapcs_layout_arg (pcum, mode, type, named); + return pcum->aapcs_reg; + } + /* Varargs vectors are treated the same as long long. named_count avoids having to change the way arm handles 'named' */ if (TARGET_IWMMXT_ABI @@ -3161,10 +4241,16 @@ static int arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, - tree type, bool named ATTRIBUTE_UNUSED) + tree type, bool named) { int nregs = pcum->nregs; + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + aapcs_layout_arg (pcum, mode, type, named); + return pcum->aapcs_partial; + } + if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode)) return 0; @@ -3176,6 +4262,39 @@ return 0; } +void +arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + tree type, bool named) +{ + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + aapcs_layout_arg (pcum, mode, type, named); + + if (pcum->aapcs_cprc_slot >= 0) + { + aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode, + type); + pcum->aapcs_cprc_slot = -1; + } + + /* Generic stuff. */ + pcum->aapcs_arg_processed = false; + pcum->aapcs_ncrn = pcum->aapcs_next_ncrn; + pcum->aapcs_reg = NULL_RTX; + pcum->aapcs_partial = 0; + } + else + { + pcum->nargs += 1; + if (arm_vector_mode_supported_p (mode) + && pcum->named_count > pcum->nargs + && TARGET_IWMMXT_ABI) + pcum->iwmmxt_nregs += 1; + else + pcum->nregs += ARM_NUM_REGS2 (mode, type); + } +} + /* Variable sized types are passed by reference. This is a GCC extension to the ARM ABI. */ @@ -3226,6 +4345,8 @@ /* Whereas these functions are always known to reside within the 26 bit addressing range. */ { "short_call", 0, 0, false, true, true, NULL }, + /* Specify the procedure call conventions for a function. */ + { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute }, /* Interrupt Service Routines have special prologue and epilogue requirements. */ { "isr", 0, 1, false, false, false, arm_handle_isr_attribute }, { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute }, @@ -3328,6 +4449,21 @@ return NULL_TREE; } +/* Handle a "pcs" attribute; arguments as in struct + attribute_spec.handler. */ +static tree +arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN) + { + warning (OPT_Wattributes, "%qs attribute ignored", + IDENTIFIER_POINTER (name)); + *no_add_attrs = true; + } + return NULL_TREE; +} + #if TARGET_DLLIMPORT_DECL_ATTRIBUTES /* Handle the "notshared" attribute. This attribute is another way of requesting hidden visibility. ARM's compiler supports @@ -3489,7 +4625,7 @@ /* Return nonzero if it is ok to make a tail-call to DECL. */ static bool -arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +arm_function_ok_for_sibcall (tree decl, tree exp) { unsigned long func_type; @@ -3522,6 +4658,21 @@ if (IS_INTERRUPT (func_type)) return false; + if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) + { + /* Check that the return value locations are the same. For + example that we aren't returning a value from the sibling in + a VFP register but then need to transfer it to a core + register. */ + rtx a, b; + + a = arm_function_value (TREE_TYPE (exp), decl, false); + b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), + cfun->decl, false); + if (!rtx_equal_p (a, b)) + return false; + } + /* Never tailcall if function may be called with a misaligned SP. */ if (IS_STACKALIGN (func_type)) return false; @@ -4120,6 +5271,7 @@ if (GET_MODE_SIZE (mode) <= 4 && ! (arm_arch4 && (mode == HImode + || mode == HFmode || (mode == QImode && outer == SIGN_EXTEND)))) { if (code == MULT) @@ -4148,13 +5300,15 @@ load. */ if (arm_arch4) { - if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode)) + if (mode == HImode + || mode == HFmode + || (outer == SIGN_EXTEND && mode == QImode)) range = 256; else range = 4096; } else - range = (mode == HImode) ? 4095 : 4096; + range = (mode == HImode || mode == HFmode) ? 4095 : 4096; return (code == CONST_INT && INTVAL (index) < range @@ -4325,7 +5479,8 @@ return 1; /* This is PC relative data after arm_reorg runs. */ - else if (GET_MODE_SIZE (mode) >= 4 && reload_completed + else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) + && reload_completed && (GET_CODE (x) == LABEL_REF || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS @@ -5024,7 +6179,7 @@ case UMOD: if (TARGET_HARD_FLOAT && mode == SFmode) *total = COSTS_N_INSNS (2); - else if (TARGET_HARD_FLOAT && mode == DFmode) + else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE) *total = COSTS_N_INSNS (4); else *total = COSTS_N_INSNS (20); @@ -5063,23 +6218,6 @@ return true; case MINUS: - if (TARGET_THUMB2) - { - if (GET_MODE_CLASS (mode) == MODE_FLOAT) - { - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) - *total = COSTS_N_INSNS (1); - else - *total = COSTS_N_INSNS (20); - } - else - *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); - /* Thumb2 does not have RSB, so all arguments must be - registers (subtracting a constant is canonicalized as - addition of the negated constant). */ - return false; - } - if (mode == DImode) { *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); @@ -5102,7 +6240,9 @@ if (GET_MODE_CLASS (mode) == MODE_FLOAT) { - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) { *total = COSTS_N_INSNS (1); if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE @@ -5143,6 +6283,17 @@ return true; } + /* A shift as a part of RSB costs no more than RSB itself. */ + if (GET_CODE (XEXP (x, 0)) == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && ((INTVAL (XEXP (XEXP (x, 0), 1)) + & (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)) + { + *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed); + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } + if (subcode == MULT && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT && ((INTVAL (XEXP (XEXP (x, 1), 1)) & @@ -5164,6 +6315,19 @@ return true; } + /* MLS is just as expensive as its underlying multiplication. + Exclude a shift by a constant, which is expressed as a + multiplication. */ + if (TARGET_32BIT && arm_arch_thumb2 + && GET_CODE (XEXP (x, 1)) == MULT + && ! (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT + && ((INTVAL (XEXP (XEXP (x, 1), 1)) & + (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0))) + { + /* The cost comes from the cost of the multiply. */ + return false; + } + /* Fall through */ case PLUS: @@ -5192,7 +6356,9 @@ if (GET_MODE_CLASS (mode) == MODE_FLOAT) { - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) { *total = COSTS_N_INSNS (1); if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE @@ -5307,7 +6473,9 @@ case NEG: if (GET_MODE_CLASS (mode) == MODE_FLOAT) { - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) { *total = COSTS_N_INSNS (1); return false; @@ -5460,7 +6628,9 @@ case ABS: if (GET_MODE_CLASS (mode == MODE_FLOAT)) { - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) { *total = COSTS_N_INSNS (1); return false; @@ -5563,7 +6733,8 @@ return true; case CONST_DOUBLE: - if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)) + if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x) + && (mode == SFmode || !TARGET_VFP_SINGLE)) *total = COSTS_N_INSNS (1); else *total = COSTS_N_INSNS (4); @@ -5638,7 +6809,8 @@ return false; case MINUS: - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) { *total = COSTS_N_INSNS (1); return false; @@ -5668,7 +6840,8 @@ return false; case PLUS: - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) { *total = COSTS_N_INSNS (1); return false; @@ -5698,7 +6871,8 @@ return false; case NEG: - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) { *total = COSTS_N_INSNS (1); return false; @@ -5722,7 +6896,8 @@ return false; case ABS: - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) *total = COSTS_N_INSNS (1); else *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); @@ -5939,7 +7114,9 @@ if (GET_MODE_CLASS (mode) == MODE_FLOAT) { - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) { *total = COSTS_N_INSNS (1); return false; @@ -6096,7 +7273,9 @@ if (GET_MODE_CLASS (mode) == MODE_FLOAT) { - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) { *total = COSTS_N_INSNS (1); return false; @@ -6919,10 +8098,13 @@ } /* Return TRUE if OP is a memory operand which we can load or store a vector - to/from. If CORE is true, we're moving from ARM registers not Neon - registers. */ + to/from. TYPE is one of the following values: + 0 - Vector load/stor (vldr) + 1 - Core registers (ldm) + 2 - Element/structure loads (vld1) + */ int -neon_vector_mem_operand (rtx op, bool core) +neon_vector_mem_operand (rtx op, int type) { rtx ind; @@ -6955,23 +8137,16 @@ return arm_address_register_rtx_p (ind, 0); /* Allow post-increment with Neon registers. */ - if (!core && GET_CODE (ind) == POST_INC) + if ((type != 1 && GET_CODE (ind) == POST_INC) + || (type == 0 && GET_CODE (ind) == PRE_DEC)) return arm_address_register_rtx_p (XEXP (ind, 0), 0); -#if 0 - /* FIXME: We can support this too if we use VLD1/VST1. */ - if (!core - && GET_CODE (ind) == POST_MODIFY - && arm_address_register_rtx_p (XEXP (ind, 0), 0) - && GET_CODE (XEXP (ind, 1)) == PLUS - && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0))) - ind = XEXP (ind, 1); -#endif + /* FIXME: vld1 allows register post-modify. */ /* Match: (plus (reg) (const)). */ - if (!core + if (type == 0 && GET_CODE (ind) == PLUS && GET_CODE (XEXP (ind, 0)) == REG && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) @@ -7038,10 +8213,19 @@ enum reg_class coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) { + if (mode == HFmode) + { + if (!TARGET_NEON_FP16) + return GENERAL_REGS; + if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2)) + return NO_REGS; + return GENERAL_REGS; + } + if (TARGET_NEON && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) - && neon_vector_mem_operand (x, FALSE)) + && neon_vector_mem_operand (x, 0)) return NO_REGS; if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode)) @@ -7438,6 +8622,9 @@ int base_reg = -1; int i; + if (low_irq_latency) + return 0; + /* Can only handle 2, 3, or 4 insns at present, though could be easily extended if required. */ gcc_assert (nops >= 2 && nops <= 4); @@ -7667,6 +8854,9 @@ int base_reg = -1; int i; + if (low_irq_latency) + return 0; + /* Can only handle 2, 3, or 4 insns at present, though could be easily extended if required. */ gcc_assert (nops >= 2 && nops <= 4); @@ -7874,7 +9064,7 @@ As a compromise, we use ldr for counts of 1 or 2 regs, and ldm for counts of 3 or 4 regs. */ - if (arm_tune_xscale && count <= 2 && ! optimize_size) + if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size)) { rtx seq; @@ -7937,7 +9127,7 @@ /* See arm_gen_load_multiple for discussion of the pros/cons of ldm/stm usage for XScale. */ - if (arm_tune_xscale && count <= 2 && ! optimize_size) + if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size)) { rtx seq; @@ -9555,7 +10745,10 @@ gcc_assert (GET_CODE (from) != BARRIER); /* Count the length of this insn. */ - count += get_attr_length (from); + if (LABEL_P (from) && (align_jumps > 0 || align_loops > 0)) + count += MAX (align_jumps, align_loops); + else + count += get_attr_length (from); /* If there is a jump table, add its length. */ tmp = is_jump_table (from); @@ -9867,6 +11060,8 @@ insn = table; } } + else if (LABEL_P (insn) && (align_jumps > 0 || align_loops > 0)) + address += MAX (align_jumps, align_loops); } fix = minipool_fix_head; @@ -10072,6 +11267,21 @@ vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count) { int i; + int offset; + + if (low_irq_latency) + { + /* Output a sequence of FLDD instructions. */ + offset = 0; + for (i = reg; i < reg + count; ++i, offset += 8) + { + fputc ('\t', stream); + asm_fprintf (stream, "fldd\td%d, [%r,#%d]\n", i, base, offset); + } + asm_fprintf (stream, "\tadd\tsp, sp, #%d\n", count * 8); + return; + } + /* Workaround ARM10 VFPr1 bug. */ if (count == 2 && !arm_arch6) @@ -10142,6 +11352,56 @@ rtx tmp, reg; int i; + if (low_irq_latency) + { + int saved_size; + rtx sp_insn; + + if (!count) + return 0; + + saved_size = count * GET_MODE_SIZE (DFmode); + + /* Since fstd does not have postdecrement addressing mode, + we first decrement stack pointer and then use base+offset + stores for VFP registers. The ARM EABI unwind information + can't easily describe base+offset loads, so we attach + a note for the effects of the whole block in the first insn, + and avoid marking the subsequent instructions + with RTX_FRAME_RELATED_P. */ + sp_insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-saved_size)); + sp_insn = emit_insn (sp_insn); + RTX_FRAME_RELATED_P (sp_insn) = 1; + + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1)); + XVECEXP (dwarf, 0, 0) = + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (stack_pointer_rtx, -saved_size)); + + /* push double VFP registers to stack */ + for (i = 0; i < count; ++i ) + { + rtx reg; + rtx mem; + rtx addr; + rtx insn; + reg = gen_rtx_REG (DFmode, base_reg + 2*i); + addr = (i == 0) ? stack_pointer_rtx + : gen_rtx_PLUS (SImode, stack_pointer_rtx, + GEN_INT (i * GET_MODE_SIZE (DFmode))); + mem = gen_frame_mem (DFmode, addr); + insn = emit_move_insn (mem, reg); + XVECEXP (dwarf, 0, i+1) = + gen_rtx_SET (VOIDmode, mem, reg); + } + + REG_NOTES (sp_insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf, + REG_NOTES (sp_insn)); + + return saved_size; + } + /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two register pairs are stored by a store multiple insn. We avoid this by pushing an extra pair. */ @@ -10758,7 +12018,7 @@ } /* Output a move, load or store for quad-word vectors in ARM registers. Only - handles MEMs accepted by neon_vector_mem_operand with CORE=true. */ + handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */ const char * output_move_quad (rtx *operands) @@ -10954,6 +12214,12 @@ ops[1] = reg; break; + case PRE_DEC: + templ = "v%smdb%%?\t%%0!, %%h1"; + ops[0] = XEXP (addr, 0); + ops[1] = reg; + break; + case POST_MODIFY: /* FIXME: Not currently enabled in neon_vector_mem_operand. */ gcc_unreachable (); @@ -10968,7 +12234,7 @@ { /* We're only using DImode here because it's a convenient size. */ ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i); - ops[1] = adjust_address (mem, SImode, 8 * i); + ops[1] = adjust_address (mem, DImode, 8 * i); if (reg_overlap_mentioned_p (ops[0], mem)) { gcc_assert (overlap == -1); @@ -11557,7 +12823,7 @@ if (count > 0) { /* Workaround ARM10 VFPr1 bug. */ - if (count == 2 && !arm_arch6) + if (count == 2 && !arm_arch6 && !low_irq_latency) count++; saved += count * 8; } @@ -11886,6 +13152,41 @@ return_used_this_function = 0; } +/* Generate to STREAM a code sequence that pops registers identified + in REGS_MASK from SP. SP is incremented as the result. +*/ +static void +print_pop_reg_by_ldr (FILE *stream, int regs_mask, int rfe) +{ + int reg; + + gcc_assert (! (regs_mask & (1 << SP_REGNUM))); + + for (reg = 0; reg < PC_REGNUM; ++reg) + if (regs_mask & (1 << reg)) + asm_fprintf (stream, "\tldr\t%r, [%r], #4\n", + reg, SP_REGNUM); + + if (regs_mask & (1 << PC_REGNUM)) + { + if (rfe) + /* When returning from exception, we need to + copy SPSR to CPSR. There are two ways to do + that: the ldm instruction with "^" suffix, + and movs instruction. The latter would + require that we load from stack to some + scratch register, and then move to PC. + Therefore, we'd need extra instruction and + have to make sure we actually have a spare + register. Using ldm with a single register + is simler. */ + asm_fprintf (stream, "\tldm\tsp!, {pc}^\n"); + else + asm_fprintf (stream, "\tldr\t%r, [%r], #4\n", + PC_REGNUM, SP_REGNUM); + } +} + const char * arm_output_epilogue (rtx sibling) { @@ -11946,7 +13247,7 @@ /* This variable is for the Virtual Frame Pointer, not VFP regs. */ int vfp_offset = offsets->frame; - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_FPA_EMU2) { for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) @@ -12169,7 +13470,7 @@ SP_REGNUM, HARD_FRAME_POINTER_REGNUM); } - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_FPA_EMU2) { for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) @@ -12253,22 +13554,19 @@ to load use the LDR instruction - it is faster. For Thumb-2 always use pop and the assembler will pick the best instruction.*/ if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM) - && !IS_INTERRUPT(func_type)) + && !IS_INTERRUPT (func_type)) { asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM); } else if (saved_regs_mask) { - if (saved_regs_mask & (1 << SP_REGNUM)) - /* Note - write back to the stack register is not enabled - (i.e. "ldmfd sp!..."). We know that the stack pointer is - in the list of registers and if we add writeback the - instruction becomes UNPREDICTABLE. */ - print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, - rfe); - else if (TARGET_ARM) - print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask, - rfe); + gcc_assert ( ! (saved_regs_mask & (1 << SP_REGNUM))); + if (TARGET_ARM) + if (low_irq_latency) + print_pop_reg_by_ldr (f, saved_regs_mask, rfe); + else + print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask, + rfe); else print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0); } @@ -12389,6 +13687,32 @@ gcc_assert (num_regs && num_regs <= 16); + if (low_irq_latency) + { + rtx insn = 0; + + /* Emit a series of ldr instructions rather rather than a single ldm. */ + /* TODO: Use ldrd where possible. */ + gcc_assert (! (mask & (1 << SP_REGNUM))); + + for (i = LAST_ARM_REGNUM; i >= 0; --i) + { + if (mask & (1 << i)) + + { + rtx reg, where, mem; + + reg = gen_rtx_REG (SImode, i); + where = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx); + mem = gen_rtx_MEM (SImode, where); + insn = emit_move_insn (mem, reg); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + return insn; + } + /* We don't record the PC in the dwarf frame information. */ num_dwarf_regs = num_regs; if (mask & (1 << PC_REGNUM)) @@ -12737,22 +14061,23 @@ { int reg = -1; - for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) - { - if ((offsets->saved_regs_mask & (1 << i)) == 0) - { - reg = i; - break; - } - } - - if (reg == -1 && arm_size_return_regs () <= 12 - && !crtl->tail_call_emit) + /* If it is safe to use r3, then do so. This sometimes + generates better code on Thumb-2 by avoiding the need to + use 32-bit push/pop instructions. */ + if (!crtl->tail_call_emit + && arm_size_return_regs () <= 12) { - /* Push/pop an argument register (r3) if all callee saved - registers are already being pushed. */ reg = 3; } + else + for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) + { + if ((offsets->saved_regs_mask & (1 << i)) == 0) + { + reg = i; + break; + } + } if (reg != -1) { @@ -12876,7 +14201,7 @@ /* Save any floating point call-saved registers used by this function. */ - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_FPA_EMU2) { for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) @@ -13483,7 +14808,11 @@ { fprintf (stream, ", %s ", shift); if (val == -1) - arm_print_operand (stream, XEXP (x, 1), 0); + { + arm_print_operand (stream, XEXP (x, 1), 0); + if (janus2_code) + fprintf(stream, "\n\tnop"); + } else fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val); } @@ -13704,6 +15033,30 @@ } return; + /* Print the high single-precision register of a VFP double-precision + register. */ + case 'p': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_DOUBLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1); + } + return; + /* Print a VFP/Neon double precision or quad precision register name. */ case 'P': case 'q': @@ -13821,6 +15174,57 @@ } return; + /* Memory operand for vld1/vst1 instruction. */ + case 'A': + { + rtx addr; + bool postinc = FALSE; + unsigned align; + + gcc_assert (GET_CODE (x) == MEM); + addr = XEXP (x, 0); + if (GET_CODE (addr) == POST_INC) + { + postinc = 1; + addr = XEXP (addr, 0); + } + align = MEM_ALIGN (x) >> 3; + asm_fprintf (stream, "[%r", REGNO (addr)); + if (align > GET_MODE_SIZE (GET_MODE (x))) + align = GET_MODE_SIZE (GET_MODE (x)); + if (align >= 8) + asm_fprintf (stream, ", :%d", align << 3); + asm_fprintf (stream, "]"); + if (postinc) + fputs("!", stream); + } + return; + + /* Register specifier for vld1.16/vst1.16. Translate the S register + number into a D register number and element index. */ + case 'z': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_SINGLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = regno - FIRST_VFP_REGNUM; + fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0)); + } + return; + default: if (x == 0) { @@ -13854,6 +15258,12 @@ default: gcc_assert (GET_CODE (x) != NEG); fputc ('#', stream); + if (GET_CODE (x) == HIGH) + { + fputs (":lower16:", stream); + x = XEXP (x, 0); + } + output_addr_const (stream, x); break; } @@ -14245,6 +15655,10 @@ first insn after the following code_label if REVERSE is true. */ rtx start_insn = insn; + /* Don't do this if we're not considering conditional execution. */ + if (TARGET_NO_SINGLE_COND_EXEC) + return; + /* If in state 4, check if the target branch is reached, in order to change back to state 0. */ if (arm_ccfsm_state == 4) @@ -14618,6 +16032,11 @@ if (mode == DFmode) return VFP_REGNO_OK_FOR_DOUBLE (regno); + /* VFP registers can hold HFmode values, but there is no point in + putting them there unless we have hardware conversion insns. */ + if (mode == HFmode) + return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno); + if (TARGET_NEON) return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) || (VALID_NEON_QREG_MODE (mode) @@ -14637,16 +16056,16 @@ return mode == SImode; if (IS_IWMMXT_REGNUM (regno)) - return VALID_IWMMXT_REG_MODE (mode); + return VALID_IWMMXT_REG_MODE (mode) && mode != SImode; } - /* We allow any value to be stored in the general registers. + /* We allow almost any value to be stored in the general registers. Restrict doubleword quantities to even register pairs so that we can - use ldrd. Do not allow Neon structure opaque modes in general registers; - they would use too many. */ + use ldrd. Do not allow very large Neon structure opaque modes in + general registers; they would use too many. */ if (regno <= LAST_ARM_REGNUM) return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) - && !VALID_NEON_STRUCT_MODE (mode); + && ARM_NUM_REGS (mode) <= 4; if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM) @@ -16103,6 +17522,15 @@ } static void +arm_init_fp16_builtins (void) +{ + tree fp16_type = make_node (REAL_TYPE); + TYPE_PRECISION (fp16_type) = 16; + layout_type (fp16_type); + (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); +} + +static void arm_init_builtins (void) { arm_init_tls_builtins (); @@ -16112,6 +17540,71 @@ if (TARGET_NEON) arm_init_neon_builtins (); + + if (arm_fp16_format) + arm_init_fp16_builtins (); +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_parameter_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("function parameters cannot have __fp16 type"); + return NULL; +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_return_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("functions cannot return __fp16 type"); + return NULL; +} + +/* Implement TARGET_PROMOTED_TYPE. */ + +static tree +arm_promoted_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return float_type_node; + return NULL_TREE; +} + +/* Implement TARGET_CONVERT_TO_TYPE. + Specifically, this hook implements the peculiarity of the ARM + half-precision floating-point C semantics that requires conversions between + __fp16 to or from double to do an intermediate conversion to float. */ + +static tree +arm_convert_to_type (tree type, tree expr) +{ + tree fromtype = TREE_TYPE (expr); + if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type)) + return NULL_TREE; + if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32) + || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32)) + return convert (type, convert (float_type_node, expr)); + return NULL_TREE; +} + +/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. + This simply adds HFmode as a supported mode; even though we don't + implement arithmetic on this type directly, it's supported by + optabs conversions, much the way the double-word arithmetic is + special-cased in the default hook. */ + +static bool +arm_scalar_mode_supported_p (enum machine_mode mode) +{ + if (mode == HFmode) + return (arm_fp16_format != ARM_FP16_FORMAT_NONE); + else + return default_scalar_mode_supported_p (mode); } /* Errors in the source file can cause expand_expr to return const0_rtx @@ -17191,6 +18684,7 @@ unsigned HOST_WIDE_INT mask = 0xff; int i; + val = val & (unsigned HOST_WIDE_INT)0xffffffffu; if (val == 0) /* XXX */ return 0; @@ -18279,40 +19773,8 @@ else { int set_float_abi_attributes = 0; - switch (arm_fpu_arch) - { - case FPUTYPE_FPA: - fpu_name = "fpa"; - break; - case FPUTYPE_FPA_EMU2: - fpu_name = "fpe2"; - break; - case FPUTYPE_FPA_EMU3: - fpu_name = "fpe3"; - break; - case FPUTYPE_MAVERICK: - fpu_name = "maverick"; - break; - case FPUTYPE_VFP: - fpu_name = "vfp"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_VFP3D16: - fpu_name = "vfpv3-d16"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_VFP3: - fpu_name = "vfpv3"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_NEON: - fpu_name = "neon"; - set_float_abi_attributes = 1; - break; - default: - abort(); - } - if (set_float_abi_attributes) + fpu_name = arm_fpu_desc->name; + if (arm_fp_model == ARM_FP_MODEL_VFP) { if (TARGET_HARD_FLOAT) asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n"); @@ -18362,6 +19824,11 @@ val = 6; asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); + /* Tag_ABI_FP_16bit_format. */ + if (arm_fp16_format) + asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", + (int)arm_fp16_format); + if (arm_lang_output_object_attributes_hook) arm_lang_output_object_attributes_hook(); } @@ -18591,6 +20058,23 @@ return 1; } +/* Emit a fp16 constant appropriately padded to occupy a 4-byte word. + HFmode constant pool entries are actually loaded with ldr. */ +void +arm_emit_fp16_const (rtx c) +{ + REAL_VALUE_TYPE r; + long bits; + + REAL_VALUE_FROM_CONST_DOUBLE (r, c); + bits = real_to_target (NULL, &r, HFmode); + if (WORDS_BIG_ENDIAN) + assemble_zeros (2); + assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1); + if (!WORDS_BIG_ENDIAN) + assemble_zeros (2); +} + const char * arm_output_load_gr (rtx *operands) { @@ -18628,19 +20112,24 @@ that way. */ static void -arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum, +arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum, enum machine_mode mode, tree type, int *pretend_size, int second_time ATTRIBUTE_UNUSED) { - int nregs = cum->nregs; - if (nregs & 1 - && ARM_DOUBLEWORD_ALIGN - && arm_needs_doubleword_align (mode, type)) - nregs++; - + int nregs; + cfun->machine->uses_anonymous_args = 1; + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + nregs = pcum->aapcs_ncrn; + if ((nregs & 1) && arm_needs_doubleword_align (mode, type)) + nregs++; + } + else + nregs = pcum->nregs; + if (nregs < NUM_ARG_REGS) *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; } @@ -19024,9 +20513,10 @@ || mode == V16QImode || mode == V4SFmode || mode == V2DImode)) return true; - if ((mode == V2SImode) - || (mode == V4HImode) - || (mode == V8QImode)) + if ((TARGET_NEON || TARGET_IWMMXT) + && ((mode == V2SImode) + || (mode == V4HImode) + || (mode == V8QImode))) return true; return false; @@ -19057,9 +20547,14 @@ if (IS_FPA_REGNUM (regno)) return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM; - /* FIXME: VFPv3 register numbering. */ if (IS_VFP_REGNUM (regno)) - return 64 + regno - FIRST_VFP_REGNUM; + { + /* See comment in arm_dwarf_register_span. */ + if (VFP_REGNO_OK_FOR_SINGLE (regno)) + return 64 + regno - FIRST_VFP_REGNUM; + else + return 256 + (regno - FIRST_VFP_REGNUM) / 2; + } if (IS_IWMMXT_GR_REGNUM (regno)) return 104 + regno - FIRST_IWMMXT_GR_REGNUM; @@ -19070,6 +20565,39 @@ gcc_unreachable (); } +/* Dwarf models VFPv3 registers as 32 64-bit registers. + GCC models tham as 64 32-bit registers, so we need to describe this to + the DWARF generation code. Other registers can use the default. */ +static rtx +arm_dwarf_register_span(rtx rtl) +{ + unsigned regno; + int nregs; + int i; + rtx p; + + regno = REGNO (rtl); + if (!IS_VFP_REGNUM (regno)) + return NULL_RTX; + + /* The EABI defines two VFP register ranges: + 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent) + 256-287: D0-D31 + The recommended encodings for s0-s31 is a DW_OP_bit_piece of the + corresponding D register. However gdb6.6 does not support this, so + we use the legacy encodings. We also use these encodings for D0-D15 + for compatibility with older debuggers. */ + if (VFP_REGNO_OK_FOR_SINGLE (regno)) + return NULL_RTX; + + nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8; + p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs)); + regno = (regno - FIRST_VFP_REGNUM) / 2; + for (i = 0; i < nregs; i++) + XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); + + return p; +} #ifdef TARGET_UNWIND_INFO /* Emit unwind directives for a store-multiple instruction or stack pointer @@ -19556,6 +21084,7 @@ case cortexr4f: case cortexa8: case cortexa9: + case marvell_f: return 2; default: @@ -19620,6 +21149,10 @@ return "St9__va_list"; } + /* Half-precision float. */ + if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) + return "Dh"; + if (TREE_CODE (type) != VECTOR_TYPE) return NULL; @@ -19676,6 +21209,87 @@ given on the command line. */ if (level > 0) flag_section_anchors = 2; + + if (size) + { + /* Select optimizations that are a win for code size. + + The inlining options set below have two important + consequences for functions not explicitly marked + inline: + - Static functions used once are inlined if + sufficiently small. Static functions used twice + are not inlined. + - Non-static functions are never inlined. + So in effect, inlining will never cause two copies + of function bodies to be created. */ + /* Empirical results show that these options benefit code + size on arm. */ + /* FIXME: -fsee seems to be broken for Thumb-2. */ + /* flag_see = 1; */ + flag_move_loop_invariants = 0; + /* In Thumb mode the function call code size overhead is typically very + small, and narrow branch instructions have very limited range. + Inlining even medium sized functions tends to bloat the caller and + require the use of long branch instructions. On average the long + branches cost more than eliminating the function call overhead saves, + so we use extremely restrictive automatic inlining heuristics. In ARM + mode the results are fairly neutral, probably due to better constant + pool placement. */ + set_param_value ("max-inline-insns-single", 1); + set_param_value ("max-inline-insns-auto", 1); + } + else + { + /* CSL LOCAL */ + /* Set flag_unroll_loops to a default value, so that we can tell + if it was specified on the command line; see + arm_override_options. */ + flag_unroll_loops = 2; + /* Promote loop indices to int where possible. Consider moving this + to -Os, also. */ + flag_promote_loop_indices = 1; + } +} + +/* Return how many instructions to look ahead for better insn + scheduling. */ +static int +arm_multipass_dfa_lookahead (void) +{ + return (arm_tune == marvell_f) ? 4 : 0; +} + +/* Return the minimum alignment required to load or store a + vector of the given type, which may be less than the + natural alignment of the type. */ + +static int +arm_vector_min_alignment (const_tree type) +{ + if (TARGET_NEON) + { + /* The NEON element load and store instructions only require the + alignment of the element type. They can benefit from higher + statically reported alignment, but we do not take advantage + of that yet. */ + gcc_assert (TREE_CODE (type) == VECTOR_TYPE); + return TYPE_ALIGN_UNIT (TREE_TYPE (type)); + } + + return default_vector_min_alignment (type); +} + +static bool +arm_vector_always_misalign(const_tree type ATTRIBUTE_UNUSED) +{ + /* On big-endian targets array loads (vld1) and vector loads (vldm) + use a different format. Always use the "misaligned" array variant. + FIXME: this still doesn't work for big-endian because of constant + loads and other operations using vldm ordering. See + issue 6722. */ + return TARGET_NEON && !BYTES_BIG_ENDIAN; } #include "gt-arm.h" + --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -104,6 +104,7 @@ ARM_CORE("xscale", xscale, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale) ARM_CORE("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) ARM_CORE("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) +ARM_CORE("marvell-f", marvell_f, 5TE, FL_LDSCHED | FL_VFPV2 | FL_MARVELL_F, 9e) /* V5TEJ Architecture Processors */ ARM_CORE("arm926ej-s", arm926ejs, 5TEJ, FL_LDSCHED, 9e) @@ -117,9 +118,13 @@ ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e) ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e) + +/* V7 Architecture Processors */ +ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e) ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, 9e) ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e) ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e) ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e) +ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e) --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -85,6 +85,10 @@ builtin_define ("__IWMMXT__"); \ if (TARGET_AAPCS_BASED) \ builtin_define ("__ARM_EABI__"); \ + if (arm_tune_marvell_f) \ + builtin_define ("__ARM_TUNE_MARVELL_F__"); \ + if (low_irq_latency) \ + builtin_define ("__low_irq_latency__"); \ } while (0) /* The various ARM cores. */ @@ -199,6 +203,13 @@ #define TARGET_AAPCS_BASED \ (arm_abi != ARM_ABI_APCS && arm_abi != ARM_ABI_ATPCS) +/* True if we should avoid generating conditional execution instructions. */ +#define TARGET_NO_COND_EXEC (arm_tune_marvell_f && !optimize_size) +/* Avoid most conditional instructions, but allow pairs with opposite + conditions and the same destination. */ +#define TARGET_NO_SINGLE_COND_EXEC \ + ((arm_tune_cortex_a9 || arm_tune_marvell_f) && !optimize_size) + #define TARGET_HARD_TP (target_thread_pointer == TP_CP15) #define TARGET_SOFT_TP (target_thread_pointer == TP_SOFT) @@ -211,35 +222,43 @@ /* Thumb-1 only. */ #define TARGET_THUMB1_ONLY (TARGET_THUMB1 && !arm_arch_notm) +#define TARGET_FPA_EMU2 (TARGET_FPA && arm_fpu_desc->rev == 2) /* The following two macros concern the ability to execute coprocessor instructions for VFPv3 or NEON. TARGET_VFP3/TARGET_VFPD32 are currently only ever tested when we know we are generating for VFP hardware; we need to be more careful with TARGET_NEON as noted below. */ /* FPU is has the full VFPv3/NEON register file of 32 D registers. */ -#define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \ - && (arm_fpu_arch == FPUTYPE_VFP3 \ - || arm_fpu_arch == FPUTYPE_NEON)) +#define TARGET_VFPD32 (TARGET_VFP && arm_arch_vfp_regs == VFP_REG_D32) /* FPU supports VFPv3 instructions. */ -#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \ - && (arm_fpu_arch == FPUTYPE_VFP3D16 \ - || TARGET_VFPD32)) +#define TARGET_VFP3 (TARGET_VFP && arm_arch_vfp_rev >= 3) + +/* FPU only supports VFP single-precision instructions. */ +#define TARGET_VFP_SINGLE (TARGET_VFP && arm_arch_vfp_regs == VFP_REG_SINGLE) + +/* FPU supports VFP double-precision instructions. */ +#define TARGET_VFP_DOUBLE (TARGET_VFP && arm_arch_vfp_regs != VFP_REG_SINGLE) + +/* FPU supports half-precision floating-point with NEON element load/store. */ +#define TARGET_NEON_FP16 (TARGET_VFP && arm_arch_vfp_neon && arm_arch_vfp_fp16) + +/* FPU supports VFP half-precision floating-point. */ +#define TARGET_FP16 (TARGET_VFP && arm_arch_vfp_fp16) /* FPU supports Neon instructions. The setting of this macro gets revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT and TARGET_HARD_FLOAT to ensure that NEON instructions are available. */ #define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \ - && arm_fp_model == ARM_FP_MODEL_VFP \ - && arm_fpu_arch == FPUTYPE_NEON) + && TARGET_VFP && arm_arch_vfp_neon) /* "DSP" multiply instructions, eg. SMULxy. */ #define TARGET_DSP_MULTIPLY \ - (TARGET_32BIT && arm_arch5e && arm_arch_notm) + (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7em)) /* Integer SIMD instructions, and extend-accumulate instructions. */ #define TARGET_INT_SIMD \ - (TARGET_32BIT && arm_arch6 && arm_arch_notm) + (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em)) /* Should MOVW/MOVT be used in preference to a constant pool. */ #define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size) @@ -289,40 +308,30 @@ ARM_FP_MODEL_VFP }; -extern enum arm_fp_model arm_fp_model; - -/* Which floating point hardware is available. Also update - fp_model_for_fpu in arm.c when adding entries to this list. */ -enum fputype -{ - /* No FP hardware. */ - FPUTYPE_NONE, - /* Full FPA support. */ - FPUTYPE_FPA, - /* Emulated FPA hardware, Issue 2 emulator (no LFM/SFM). */ - FPUTYPE_FPA_EMU2, - /* Emulated FPA hardware, Issue 3 emulator. */ - FPUTYPE_FPA_EMU3, - /* Cirrus Maverick floating point co-processor. */ - FPUTYPE_MAVERICK, - /* VFP. */ - FPUTYPE_VFP, - /* VFPv3-D16. */ - FPUTYPE_VFP3D16, - /* VFPv3. */ - FPUTYPE_VFP3, - /* Neon. */ - FPUTYPE_NEON +enum vfp_reg_type { + VFP_REG_D16, + VFP_REG_D32, + VFP_REG_SINGLE }; -/* Recast the floating point class to be the floating point attribute. */ -#define arm_fpu_attr ((enum attr_fpu) arm_fpu_tune) - -/* What type of floating point to tune for */ -extern enum fputype arm_fpu_tune; +extern const struct arm_fpu_desc +{ + const char *name; + enum arm_fp_model model; + int rev; + enum vfp_reg_type myregs; + int neon; + int fp16; +} *arm_fpu_desc; + +#define arm_fp_model arm_fpu_desc->model +#define arm_arch_vfp_rev arm_fpu_desc->rev +#define arm_arch_vfp_regs arm_fpu_desc->myregs +#define arm_arch_vfp_neon arm_fpu_desc->neon +#define arm_arch_vfp_fp16 arm_fpu_desc->fp16 -/* What type of floating point instructions are available */ -extern enum fputype arm_fpu_arch; +/* Which floating point hardware to schedule for. */ +extern int arm_fpu_attr; enum float_abi_type { @@ -337,6 +346,21 @@ #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT #endif +/* Which __fp16 format to use. + The enumeration values correspond to the numbering for the + Tag_ABI_FP_16bit_format attribute. + */ +enum arm_fp16_format_type +{ + ARM_FP16_FORMAT_NONE = 0, + ARM_FP16_FORMAT_IEEE = 1, + ARM_FP16_FORMAT_ALTERNATIVE = 2 +}; + +extern enum arm_fp16_format_type arm_fp16_format; +#define LARGEST_EXPONENT_IS_NORMAL(bits) \ + ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + /* Which ABI to use. */ enum arm_abi_type { @@ -383,12 +407,18 @@ /* Nonzero if instructions not present in the 'M' profile can be used. */ extern int arm_arch_notm; +/* Nonzero if instructions present in ARMv7E-M can be used. */ +extern int arm_arch7em; + /* Nonzero if this chip can benefit from load scheduling. */ extern int arm_ld_sched; /* Nonzero if generating thumb code. */ extern int thumb_code; +/* Nonzero if generating Janus2 code. */ +extern int janus2_code; + /* Nonzero if this chip is a StrongARM. */ extern int arm_tune_strongarm; @@ -404,6 +434,9 @@ /* Nonzero if tuning for XScale. */ extern int arm_tune_xscale; +/* Nonzero if tuning for Marvell Feroceon. */ +extern int arm_tune_marvell_f; + /* Nonzero if tuning for stores via the write buffer. */ extern int arm_tune_wbuf; @@ -423,6 +456,10 @@ /* Nonzero if chip supports integer division instruction. */ extern int arm_arch_hwdiv; +/* Nonzero if we should minimize interrupt latency of the + generated code. */ +extern int low_irq_latency; + #ifndef TARGET_DEFAULT #define TARGET_DEFAULT (MASK_APCS_FRAME) #endif @@ -757,12 +794,11 @@ fixed_regs[regno] = call_used_regs[regno] = 1; \ } \ \ - if (TARGET_THUMB && optimize_size) \ - { \ - /* When optimizing for size, it's better not to use \ - the HI regs, because of the overhead of stacking \ - them. */ \ - /* ??? Is this still true for thumb2? */ \ + if (TARGET_THUMB1 && optimize_size) \ + { \ + /* When optimizing for size on Thumb-1, it's better not \ + to use the HI regs, because of the overhead of \ + stacking them. */ \ for (regno = FIRST_HI_REGNUM; \ regno <= LAST_HI_REGNUM; ++regno) \ fixed_regs[regno] = call_used_regs[regno] = 1; \ @@ -881,6 +917,9 @@ /* The number of (integer) argument register available. */ #define NUM_ARG_REGS 4 +/* And similarly for the VFP. */ +#define NUM_VFP_ARG_REGS 16 + /* Return the register number of the N'th (integer) argument. */ #define ARG_REGISTER(N) (N - 1) @@ -1059,7 +1098,7 @@ (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) #define VALID_IWMMXT_REG_MODE(MODE) \ - (arm_vector_mode_supported_p (MODE) || (MODE) == DImode) + (arm_vector_mode_supported_p (MODE) || (MODE) == DImode || (MODE) == SImode) /* Modes valid for Neon D registers. */ #define VALID_NEON_DREG_MODE(MODE) \ @@ -1230,11 +1269,14 @@ || reg_classes_intersect_p (VFP_REGS, (CLASS)) \ : 0) -/* We need to define this for LO_REGS on thumb. Otherwise we can end up - using r0-r4 for function arguments, r7 for the stack frame and don't - have enough left over to do doubleword arithmetic. */ +/* We need to define this for LO_REGS on Thumb-1. Otherwise we can end up + using r0-r4 for function arguments, r7 for the stack frame and don't have + enough left over to do doubleword arithmetic. For Thumb-2 all the + potentially problematic instructions accept high registers so this is not + necessary. Care needs to be taken to avoid adding new Thumb-2 patterns + that require many low registers. */ #define CLASS_LIKELY_SPILLED_P(CLASS) \ - ((TARGET_THUMB && (CLASS) == LO_REGS) \ + ((TARGET_THUMB1 && (CLASS) == LO_REGS) \ || (CLASS) == CC_REG) /* The class value for index registers, and the one for base regs. */ @@ -1245,7 +1287,7 @@ when addressing quantities in QI or HI mode; if we don't know the mode, then we must be conservative. */ #define MODE_BASE_REG_CLASS(MODE) \ - (TARGET_32BIT ? CORE_REGS : \ + (TARGET_32BIT ? (TARGET_THUMB2 ? LO_REGS : CORE_REGS) : \ (((MODE) == SImode) ? BASE_REGS : LO_REGS)) /* For Thumb we can not support SP+reg addressing, so we return LO_REGS @@ -1346,6 +1388,9 @@ else if (TARGET_MAVERICK && TARGET_HARD_FLOAT) \ /* Need to be careful, -256 is not a valid offset. */ \ low = val >= 0 ? (val & 0xff) : -((-val) & 0xff); \ + else if (TARGET_REALLY_IWMMXT && MODE == SImode) \ + /* Need to be careful, -1024 is not a valid offset. */ \ + low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff); \ else if (MODE == SImode \ || (MODE == SFmode && TARGET_SOFT_FLOAT) \ || ((MODE == HImode || MODE == QImode) && ! arm_arch4)) \ @@ -1416,13 +1461,17 @@ /* If defined, gives a class of registers that cannot be used as the operand of a SUBREG that changes the mode of the object illegally. */ -/* Moves between FPA_REGS and GENERAL_REGS are two memory insns. */ +/* Moves between FPA_REGS and GENERAL_REGS are two memory insns. + Moves between VFP_REGS and GENERAL_REGS are a single insn, but + it is typically more expensive than a single memory access. We set + the cost to less than two memory accesses so that floating + point to integer conversion does not go through memory. */ #define REGISTER_MOVE_COST(MODE, FROM, TO) \ (TARGET_32BIT ? \ ((FROM) == FPA_REGS && (TO) != FPA_REGS ? 20 : \ (FROM) != FPA_REGS && (TO) == FPA_REGS ? 20 : \ - IS_VFP_CLASS (FROM) && !IS_VFP_CLASS (TO) ? 10 : \ - !IS_VFP_CLASS (FROM) && IS_VFP_CLASS (TO) ? 10 : \ + IS_VFP_CLASS (FROM) && !IS_VFP_CLASS (TO) ? 15 : \ + !IS_VFP_CLASS (FROM) && IS_VFP_CLASS (TO) ? 15 : \ (FROM) == IWMMXT_REGS && (TO) != IWMMXT_REGS ? 4 : \ (FROM) != IWMMXT_REGS && (TO) == IWMMXT_REGS ? 4 : \ (FROM) == IWMMXT_GR_REGS || (TO) == IWMMXT_GR_REGS ? 20 : \ @@ -1491,9 +1540,10 @@ /* Define how to find the value returned by a library function assuming the value has mode MODE. */ -#define LIBCALL_VALUE(MODE) \ - (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA \ - && GET_MODE_CLASS (MODE) == MODE_FLOAT \ +#define LIBCALL_VALUE(MODE) \ + (TARGET_AAPCS_BASED ? aapcs_libcall_value (MODE) \ + : (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA \ + && GET_MODE_CLASS (MODE) == MODE_FLOAT) \ ? gen_rtx_REG (MODE, FIRST_FPA_REGNUM) \ : TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK \ && GET_MODE_CLASS (MODE) == MODE_FLOAT \ @@ -1502,22 +1552,16 @@ ? gen_rtx_REG (MODE, FIRST_IWMMXT_REGNUM) \ : gen_rtx_REG (MODE, ARG_REGISTER (1))) -/* Define how to find the value returned by a function. - VALTYPE is the data type of the value (as a tree). - If the precise function being called is known, FUNC is its FUNCTION_DECL; - otherwise, FUNC is 0. */ -#define FUNCTION_VALUE(VALTYPE, FUNC) \ - arm_function_value (VALTYPE, FUNC); - -/* 1 if N is a possible register number for a function value. - On the ARM, only r0 and f0 can return results. */ -/* On a Cirrus chip, mvf0 can return results. */ -#define FUNCTION_VALUE_REGNO_P(REGNO) \ - ((REGNO) == ARG_REGISTER (1) \ - || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM) \ - && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK) \ - || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI) \ - || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM) \ +/* 1 if REGNO is a possible register number for a function value. */ +#define FUNCTION_VALUE_REGNO_P(REGNO) \ + ((REGNO) == ARG_REGISTER (1) \ + || (TARGET_AAPCS_BASED && TARGET_32BIT \ + && TARGET_VFP && TARGET_HARD_FLOAT \ + && (REGNO) == FIRST_VFP_REGNUM) \ + || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM) \ + && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK) \ + || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI) \ + || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM) \ && TARGET_HARD_FLOAT_ABI && TARGET_FPA)) /* Amount of memory needed for an untyped call to save all possible return @@ -1617,9 +1661,27 @@ that is in text_section. */ extern GTY(()) rtx thumb_call_via_label[14]; +/* The number of potential ways of assigning to a co-processor. */ +#define ARM_NUM_COPROC_SLOTS 1 + +/* Enumeration of procedure calling standard variants. We don't really + support all of these yet. */ +enum arm_pcs +{ + ARM_PCS_AAPCS, /* Base standard AAPCS. */ + ARM_PCS_AAPCS_VFP, /* Use VFP registers for floating point values. */ + ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors. */ + /* This must be the last AAPCS variant. */ + ARM_PCS_AAPCS_LOCAL, /* Private call within this compilation unit. */ + ARM_PCS_ATPCS, /* ATPCS. */ + ARM_PCS_APCS, /* APCS (legacy Linux etc). */ + ARM_PCS_UNKNOWN +}; + +/* We can't define this inside a generator file because it needs enum + machine_mode. */ /* A C type for declaring a variable that is used as the first argument of - `FUNCTION_ARG' and other related values. For some target machines, the - type `int' suffices and can hold the number of bytes of argument so far. */ + `FUNCTION_ARG' and other related values. */ typedef struct { /* This is the number of registers of arguments scanned so far. */ @@ -1628,9 +1690,33 @@ int iwmmxt_nregs; int named_count; int nargs; - int can_split; + /* Which procedure call variant to use for this call. */ + enum arm_pcs pcs_variant; + + /* AAPCS related state tracking. */ + int aapcs_arg_processed; /* No need to lay out this argument again. */ + int aapcs_cprc_slot; /* Index of co-processor rules to handle + this argument, or -1 if using core + registers. */ + int aapcs_ncrn; + int aapcs_next_ncrn; + rtx aapcs_reg; /* Register assigned to this argument. */ + int aapcs_partial; /* How many bytes are passed in regs (if + split between core regs and stack. + Zero otherwise. */ + int aapcs_cprc_failed[ARM_NUM_COPROC_SLOTS]; + int can_split; /* Argument can be split between core regs + and the stack. */ + /* Private data for tracking VFP register allocation */ + unsigned aapcs_vfp_regs_free; + unsigned aapcs_vfp_reg_alloc; + int aapcs_vfp_rcount; + /* Can't include insn-modes.h because this header is needed before we + generate it. */ + int /* enum machine_mode */ aapcs_vfp_rmode; } CUMULATIVE_ARGS; + /* Define where to put the arguments to a function. Value is zero to push the argument on the stack, or a hard register in which to store the argument. @@ -1674,13 +1760,7 @@ of mode MODE and data type TYPE. (TYPE is null for libcalls where that information may not be available.) */ #define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \ - (CUM).nargs += 1; \ - if (arm_vector_mode_supported_p (MODE) \ - && (CUM).named_count > (CUM).nargs \ - && TARGET_IWMMXT_ABI) \ - (CUM).iwmmxt_nregs += 1; \ - else \ - (CUM).nregs += ARM_NUM_REGS2 (MODE, TYPE) + arm_function_arg_advance (&(CUM), (MODE), (TYPE), (NAMED)) /* If defined, a C expression that gives the alignment boundary, in bits, of an argument with the specified mode and type. If it is not defined, @@ -1692,9 +1772,11 @@ /* 1 if N is a possible register number for function argument passing. On the ARM, r0-r3 are used to pass args. */ -#define FUNCTION_ARG_REGNO_P(REGNO) \ - (IN_RANGE ((REGNO), 0, 3) \ - || (TARGET_IWMMXT_ABI \ +#define FUNCTION_ARG_REGNO_P(REGNO) \ + (IN_RANGE ((REGNO), 0, 3) \ + || (TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT \ + && IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)) \ + || (TARGET_IWMMXT_ABI \ && IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9))) @@ -2324,7 +2406,8 @@ /* Try to generate sequences that don't involve branches, we can then use conditional instructions */ #define BRANCH_COST(speed_p, predictable_p) \ - (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0)) + (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \ + : (optimize > 0 ? 2 : 0)) /* Position Independent Code. */ /* We decide which register to use based on the compilation options and @@ -2392,6 +2475,7 @@ /* The arm5 clz instruction returns 32. */ #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) #undef ASM_APP_OFF #define ASM_APP_OFF (TARGET_THUMB1 ? "\t.code\t16\n" : \ @@ -2404,6 +2488,19 @@ if (TARGET_ARM) \ asm_fprintf (STREAM,"\tstmfd\t%r!,{%r}\n", \ STACK_POINTER_REGNUM, REGNO); \ + else if (TARGET_THUMB1 \ + && (REGNO) == STATIC_CHAIN_REGNUM) \ + { \ + /* We can't push STATIC_CHAIN_REGNUM (r12) directly with Thumb-1. + We know that ASM_OUTPUT_REG_PUSH will be matched with + ASM_OUTPUT_REG_POP, and that r7 isn't used by the function + profiler, so we can use it as a scratch reg. WARNING: This isn't + safe in the general case! It may be sensitive to future changes + in final.c:profile_function. */ \ + asm_fprintf (STREAM, "\tpush\t{r7}\n"); \ + asm_fprintf (STREAM, "\tmov\tr7, %r\n", REGNO);\ + asm_fprintf (STREAM, "\tpush\t{r7}\n"); \ + } \ else \ asm_fprintf (STREAM, "\tpush {%r}\n", REGNO); \ } while (0) @@ -2415,6 +2512,14 @@ if (TARGET_ARM) \ asm_fprintf (STREAM, "\tldmfd\t%r!,{%r}\n", \ STACK_POINTER_REGNUM, REGNO); \ + else if (TARGET_THUMB1 \ + && (REGNO) == STATIC_CHAIN_REGNUM) \ + { \ + /* See comment in ASM_OUTPUT_REG_PUSH. */ \ + asm_fprintf (STREAM, "\tpop\t{r7}\n"); \ + asm_fprintf (STREAM, "\tmov\t%r, r7\n", REGNO);\ + asm_fprintf (STREAM, "\tpop\t{r7}\n"); \ + } \ else \ asm_fprintf (STREAM, "\tpop {%r}\n", REGNO); \ } while (0) --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -99,6 +99,7 @@ ; correctly for PIC usage. (UNSPEC_GOTSYM_OFF 24) ; The offset of the start of the the GOT from a ; a given symbolic address. + (UNSPEC_RBIT 25) ; rbit operation. ] ) @@ -131,6 +132,8 @@ (VUNSPEC_WCMP_EQ 12) ; Used by the iWMMXt WCMPEQ instructions (VUNSPEC_WCMP_GTU 13) ; Used by the iWMMXt WCMPGTU instructions (VUNSPEC_WCMP_GT 14) ; Used by the iwMMXT WCMPGT instructions + (VUNSPEC_ALIGN16 15) ; Used to force 16-byte alignment. + (VUNSPEC_ALIGN32 16) ; Used to force 32-byte alignment. (VUNSPEC_EH_RETURN 20); Use to override the return address for exception ; handling. ] @@ -144,6 +147,10 @@ ; patterns that share the same RTL in both ARM and Thumb code. (define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code"))) +; FIX_JANUS is set to 'yes' when compiling for Janus2, it causes to +; add a nop after shifts, in order to work around a Janus2 bug +(define_attr "fix_janus" "no,yes" (const (symbol_ref "janus2_code"))) + ; IS_STRONGARM is set to 'yes' when compiling for StrongARM, it affects ; scheduling decisions for the load unit and the multiplier. (define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_tune_strongarm"))) @@ -158,7 +165,7 @@ ; Floating Point Unit. If we only have floating point emulation, then there ; is no point in scheduling the floating point insns. (Well, for best ; performance we should try and group them together). -(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon" +(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp" (const (symbol_ref "arm_fpu_attr"))) ; LENGTH of an instruction (in bytes) @@ -185,7 +192,7 @@ ;; scheduling information. (define_attr "insn" - "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other" + "mov,mvn,and,orr,eor,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other" (const_string "other")) ; TYPE attribute is used to detect floating point instructions which, if @@ -251,8 +258,6 @@ (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) ;; Classification of NEON instructions for scheduling purposes. -;; Do not set this attribute and the "type" attribute together in -;; any one instruction pattern. (define_attr "neon_type" "neon_int_1,\ neon_int_2,\ @@ -415,7 +420,7 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else - (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9") + (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9,marvell_f") (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) @@ -423,7 +428,7 @@ (define_attr "generic_vfp" "yes,no" (const (if_then_else (and (eq_attr "fpu" "vfp") - (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9") + (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9,marvell_f") (eq_attr "tune_cortexr4" "no")) (const_string "yes") (const_string "no")))) @@ -437,6 +442,8 @@ (include "cortex-a9.md") (include "cortex-r4.md") (include "cortex-r4f.md") +(include "marvell-f.md") +(include "marvell-f-vfp.md") (include "vfp11.md") @@ -472,9 +479,9 @@ if (TARGET_THUMB1) { if (GET_CODE (operands[1]) != REG) - operands[1] = force_reg (SImode, operands[1]); + operands[1] = force_reg (DImode, operands[1]); if (GET_CODE (operands[2]) != REG) - operands[2] = force_reg (SImode, operands[2]); + operands[2] = force_reg (DImode, operands[2]); } " ) @@ -620,10 +627,11 @@ sub%?\\t%0, %1, #%n2 sub%?\\t%0, %1, #%n2 #" - "TARGET_32BIT && - GET_CODE (operands[2]) == CONST_INT + "TARGET_32BIT + && GET_CODE (operands[2]) == CONST_INT && !(const_ok_for_arm (INTVAL (operands[2])) - || const_ok_for_arm (-INTVAL (operands[2])))" + || const_ok_for_arm (-INTVAL (operands[2]))) + && (reload_completed || !arm_eliminable_register (operands[1]))" [(clobber (const_int 0))] " arm_split_constant (PLUS, SImode, curr_insn, @@ -639,10 +647,10 @@ ;; register. Trying to reload it will always fail catastrophically, ;; so never allow those alternatives to match if reloading is needed. -(define_insn "*thumb1_addsi3" - [(set (match_operand:SI 0 "register_operand" "=l,l,l,*rk,*hk,l,!k") - (plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,!k,!k") - (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,!M,!O")))] +(define_insn_and_split "*thumb1_addsi3" + [(set (match_operand:SI 0 "register_operand" "=l,l,l,*rk,*hk,l,!k,l,l") + (plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,!k,!k,0,l") + (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,!M,!O,Pa,Pb")))] "TARGET_THUMB1" "* static const char * const asms[] = @@ -653,7 +661,9 @@ \"add\\t%0, %0, %2\", \"add\\t%0, %0, %2\", \"add\\t%0, %1, %2\", - \"add\\t%0, %1, %2\" + \"add\\t%0, %1, %2\", + \"#\", + \"#\" }; if ((which_alternative == 2 || which_alternative == 6) && GET_CODE (operands[2]) == CONST_INT @@ -661,7 +671,22 @@ return \"sub\\t%0, %1, #%n2\"; return asms[which_alternative]; " - [(set_attr "length" "2")] + "&& reload_completed && CONST_INT_P (operands[2]) + && operands[1] != stack_pointer_rtx + && (INTVAL (operands[2]) > 255 || INTVAL (operands[2]) < -255)" + [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))] + { + HOST_WIDE_INT offset = INTVAL (operands[2]); + if (offset > 255) + offset = 255; + else if (offset < -255) + offset = -255; + + operands[3] = GEN_INT (offset); + operands[2] = GEN_INT (INTVAL (operands[2]) - offset); + } + [(set_attr "length" "2,2,2,2,2,2,2,4,4")] ) ;; Reloading and elimination of the frame pointer can @@ -854,7 +879,11 @@ [(set_attr "conds" "use") (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*addsi3_carryin_alt1" @@ -938,7 +967,7 @@ [(set (match_operand:DF 0 "s_register_operand" "") (plus:DF (match_operand:DF 1 "s_register_operand" "") (match_operand:DF 2 "arm_float_add_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" " if (TARGET_MAVERICK && !cirrus_fp_register (operands[2], DFmode)) @@ -1176,7 +1205,7 @@ [(set (match_operand:DF 0 "s_register_operand" "") (minus:DF (match_operand:DF 1 "arm_float_rhs_operand" "") (match_operand:DF 2 "arm_float_rhs_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" " if (TARGET_MAVERICK) { @@ -1332,6 +1361,49 @@ (set_attr "predicable" "yes")] ) +; The combiner cannot combine the first and last insns in the +; following sequence because of the intervening insn, so help the +; combiner with this splitter. The combiner does attempt to split +; this particular combination but does not know this exact split. +; Note that the combiner puts the constant at the outermost operation +; as a part of canonicalization. +; +; mul r3, r2, r1 +; r3, r3, +; add r3, r3, r4 + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "plusminus_operator" + [(plus:SI (mult:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "s_register_operand" "")) + (match_operand:SI 4 "s_register_operand" "")) + (match_operand:SI 5 "arm_immediate_operand" "")]))] + "TARGET_32BIT" + [(set (match_dup 0) + (plus:SI (mult:SI (match_dup 2) (match_dup 3)) + (match_dup 4))) + (set (match_dup 0) + (match_op_dup:SI 1 [(match_dup 0) (match_dup 5)]))] + "") + +; Likewise for MLS. MLS is available only on select architectures. + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "plusminus_operator" + [(minus:SI (match_operand:SI 2 "s_register_operand" "") + (mult:SI (match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 4 "s_register_operand" ""))) + (match_operand:SI 5 "arm_immediate_operand" "")]))] + "TARGET_32BIT && arm_arch_thumb2" + [(set (match_dup 0) + (minus:SI (match_dup 2) + (mult:SI (match_dup 3) (match_dup 4)))) + (set (match_dup 0) + (match_op_dup:SI 1 [(match_dup 0) (match_dup 5)]))] + "") + (define_insn "*mulsi3addsi_compare0" [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV @@ -1713,7 +1785,7 @@ [(set (match_operand:DF 0 "s_register_operand" "") (mult:DF (match_operand:DF 1 "s_register_operand" "") (match_operand:DF 2 "arm_float_rhs_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" " if (TARGET_MAVERICK && !cirrus_fp_register (operands[2], DFmode)) @@ -1733,7 +1805,7 @@ [(set (match_operand:DF 0 "s_register_operand" "") (div:DF (match_operand:DF 1 "arm_float_rhs_operand" "") (match_operand:DF 2 "arm_float_rhs_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" "") ;; Modulo insns @@ -1960,6 +2032,7 @@ DONE; " [(set_attr "length" "4,4,16") + (set_attr "insn" "and") (set_attr "predicable" "yes")] ) @@ -1969,7 +2042,8 @@ (match_operand:SI 2 "register_operand" "l")))] "TARGET_THUMB1" "and\\t%0, %0, %2" - [(set_attr "length" "2")] + [(set_attr "length" "2") + (set_attr "insn" "and")] ) (define_insn "*andsi3_compare0" @@ -1984,7 +2058,8 @@ "@ and%.\\t%0, %1, %2 bic%.\\t%0, %1, #%B2" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "insn" "and,*")] ) (define_insn "*andsi3_compare0_scratch" @@ -2280,7 +2355,7 @@ } } - target = operands[0]; + target = copy_rtx (operands[0]); /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical subreg as the final target. */ if (GET_CODE (target) == SUBREG) @@ -2528,7 +2603,11 @@ (set_attr "shift" "2") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*andsi_notsi_si_compare0" @@ -2576,6 +2655,7 @@ orr%?\\t%Q0, %Q1, %2 #" [(set_attr "length" "4,8") + (set_attr "insn" "orr") (set_attr "predicable" "yes")] ) @@ -2638,7 +2718,8 @@ (match_operand:SI 2 "register_operand" "l")))] "TARGET_THUMB1" "orr\\t%0, %0, %2" - [(set_attr "length" "2")] + [(set_attr "length" "2") + (set_attr "insn" "orr")] ) (define_peephole2 @@ -2663,7 +2744,8 @@ (ior:SI (match_dup 1) (match_dup 2)))] "TARGET_32BIT" "orr%.\\t%0, %1, %2" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "insn" "orr")] ) (define_insn "*iorsi3_compare0_scratch" @@ -2674,7 +2756,8 @@ (clobber (match_scratch:SI 0 "=r"))] "TARGET_32BIT" "orr%.\\t%0, %1, %2" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "insn" "orr")] ) (define_insn "xordi3" @@ -2697,7 +2780,8 @@ eor%?\\t%Q0, %Q1, %2 #" [(set_attr "length" "4,8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "insn" "eor")] ) (define_insn "*xordi_sesidi_di" @@ -2728,7 +2812,8 @@ (match_operand:SI 2 "arm_rhs_operand" "rI")))] "TARGET_32BIT" "eor%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "insn" "eor")] ) (define_insn "*thumb1_xorsi3" @@ -2737,7 +2822,8 @@ (match_operand:SI 2 "register_operand" "l")))] "TARGET_THUMB1" "eor\\t%0, %0, %2" - [(set_attr "length" "2")] + [(set_attr "length" "2") + (set_attr "insn" "eor")] ) (define_insn "*xorsi3_compare0" @@ -2749,7 +2835,8 @@ (xor:SI (match_dup 1) (match_dup 2)))] "TARGET_32BIT" "eor%.\\t%0, %1, %2" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "insn" "eor")] ) (define_insn "*xorsi3_compare0_scratch" @@ -2906,7 +2993,7 @@ (smax:SI (match_operand:SI 1 "s_register_operand" "") (match_operand:SI 2 "arm_rhs_operand" ""))) (clobber (reg:CC CC_REGNUM))])] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" " if (operands[2] == const0_rtx || operands[2] == constm1_rtx) { @@ -2933,7 +3020,8 @@ (const_int -1)))] "TARGET_32BIT" "orr%?\\t%0, %1, %1, asr #31" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "insn" "orr")] ) (define_insn "*arm_smax_insn" @@ -2941,7 +3029,7 @@ (smax:SI (match_operand:SI 1 "s_register_operand" "%0,?r") (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_COND_EXEC" "@ cmp\\t%1, %2\;movlt\\t%0, %2 cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2" @@ -2955,7 +3043,7 @@ (smin:SI (match_operand:SI 1 "s_register_operand" "") (match_operand:SI 2 "arm_rhs_operand" ""))) (clobber (reg:CC CC_REGNUM))])] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" " if (operands[2] == const0_rtx) { @@ -2973,7 +3061,8 @@ (const_int 0)))] "TARGET_32BIT" "and%?\\t%0, %1, %1, asr #31" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "insn" "and")] ) (define_insn "*arm_smin_insn" @@ -2981,7 +3070,7 @@ (smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r") (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_COND_EXEC" "@ cmp\\t%1, %2\;movge\\t%0, %2 cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2" @@ -2995,7 +3084,7 @@ (umax:SI (match_operand:SI 1 "s_register_operand" "") (match_operand:SI 2 "arm_rhs_operand" ""))) (clobber (reg:CC CC_REGNUM))])] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "" ) @@ -3004,7 +3093,7 @@ (umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_COND_EXEC" "@ cmp\\t%1, %2\;movcc\\t%0, %2 cmp\\t%1, %2\;movcs\\t%0, %1 @@ -3019,7 +3108,7 @@ (umin:SI (match_operand:SI 1 "s_register_operand" "") (match_operand:SI 2 "arm_rhs_operand" ""))) (clobber (reg:CC CC_REGNUM))])] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "" ) @@ -3028,7 +3117,7 @@ (umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_COND_EXEC" "@ cmp\\t%1, %2\;movcs\\t%0, %2 cmp\\t%1, %2\;movcc\\t%0, %1 @@ -3043,7 +3132,7 @@ [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "s_register_operand" "r")])) (clobber (reg:CC CC_REGNUM))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "* operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, operands[1], operands[2]); @@ -3163,11 +3252,23 @@ [(set (match_operand:SI 0 "register_operand" "=l,l") (ashift:SI (match_operand:SI 1 "register_operand" "l,0") (match_operand:SI 2 "nonmemory_operand" "N,l")))] - "TARGET_THUMB1" + "TARGET_THUMB1 && !janus2_code" "lsl\\t%0, %1, %2" [(set_attr "length" "2")] ) +(define_insn "*thumb1_ashlsi3_janus2" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (ashift:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] + "TARGET_THUMB1 && janus2_code" + "@ + lsl\\t%0, %1, %2 + lsl\\t%0, %1, %2\;nop" + [(set_attr "length" "2,4")] +) + + (define_expand "ashrdi3" [(set (match_operand:DI 0 "s_register_operand" "") (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "") @@ -3200,6 +3301,7 @@ "TARGET_32BIT" "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx" [(set_attr "conds" "clob") + (set_attr "insn" "mov") (set_attr "length" "8")] ) @@ -3219,11 +3321,22 @@ [(set (match_operand:SI 0 "register_operand" "=l,l") (ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0") (match_operand:SI 2 "nonmemory_operand" "N,l")))] - "TARGET_THUMB1" + "TARGET_THUMB1 && !janus2_code" "asr\\t%0, %1, %2" [(set_attr "length" "2")] ) +(define_insn "*thumb1_ashrsi3_janus2" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] + "TARGET_THUMB1 && janus2_code" + "@ + asr\\t%0, %1, %2 + asr\\t%0, %1, %2\;nop" + [(set_attr "length" "2,4")] +) + (define_expand "lshrdi3" [(set (match_operand:DI 0 "s_register_operand" "") (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "") @@ -3256,6 +3369,7 @@ "TARGET_32BIT" "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx" [(set_attr "conds" "clob") + (set_attr "insn" "mov") (set_attr "length" "8")] ) @@ -3278,11 +3392,22 @@ [(set (match_operand:SI 0 "register_operand" "=l,l") (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0") (match_operand:SI 2 "nonmemory_operand" "N,l")))] - "TARGET_THUMB1" + "TARGET_THUMB1 && !janus2_code" "lsr\\t%0, %1, %2" [(set_attr "length" "2")] ) +(define_insn "*thumb1_lshrsi3_janus2" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] + "TARGET_THUMB1 && janus2_code" + "@ + lsr\\t%0, %1, %2 + lsr\\t%0, %1, %2; nop" + [(set_attr "length" "2,4")] +) + (define_expand "rotlsi3" [(set (match_operand:SI 0 "s_register_operand" "") (rotatert:SI (match_operand:SI 1 "s_register_operand" "") @@ -3324,11 +3449,20 @@ [(set (match_operand:SI 0 "register_operand" "=l") (rotatert:SI (match_operand:SI 1 "register_operand" "0") (match_operand:SI 2 "register_operand" "l")))] - "TARGET_THUMB1" + "TARGET_THUMB1 && !janus2_code" "ror\\t%0, %0, %2" [(set_attr "length" "2")] ) +(define_insn "*thumb1_rotrsi3_janus2" + [(set (match_operand:SI 0 "register_operand" "=l") + (rotatert:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1 && janus2_code" + "ror\\t%0, %0, %2; nop" + [(set_attr "length" "4")] +) + (define_insn "*arm_shiftsi3" [(set (match_operand:SI 0 "s_register_operand" "=r") (match_operator:SI 3 "shift_operator" @@ -3340,7 +3474,11 @@ (set_attr "shift" "1") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*shiftsi3_compare0" @@ -3357,7 +3495,11 @@ (set_attr "shift" "1") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*shiftsi3_compare0_scratch" @@ -3370,7 +3512,11 @@ "TARGET_32BIT" "* return arm_output_shift(operands, 1);" [(set_attr "conds" "set") - (set_attr "shift" "1")] + (set_attr "shift" "1") + (set (attr "length") (if_then_else (and (match_operand 2 "s_register_operand" "") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*arm_notsi_shiftsi" @@ -3382,9 +3528,14 @@ "mvn%?\\t%0, %1%S3" [(set_attr "predicable" "yes") (set_attr "shift" "1") + (set_attr "insn" "mvn") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*arm_notsi_shiftsi_compare0" @@ -3399,9 +3550,14 @@ "mvn%.\\t%0, %1%S3" [(set_attr "conds" "set") (set_attr "shift" "1") + (set_attr "insn" "mvn") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*arm_not_shiftsi_compare0_scratch" @@ -3415,9 +3571,14 @@ "mvn%.\\t%0, %1%S3" [(set_attr "conds" "set") (set_attr "shift" "1") + (set_attr "insn" "mvn") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) ;; We don't really have extzv, but defining this using shifts helps @@ -3550,12 +3711,12 @@ (define_expand "negdf2" [(set (match_operand:DF 0 "s_register_operand" "") (neg:DF (match_operand:DF 1 "s_register_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" "") ;; abssi2 doesn't really clobber the condition codes if a different register ;; is being set. To keep things simple, assume during rtl manipulations that -;; it does, but tell the final scan operator the truth. Similarly for +;; it does, and the splitter will eliminate it. Similarly for ;; (neg (abs...)) (define_expand "abssi2" @@ -3567,22 +3728,28 @@ " if (TARGET_THUMB1) operands[2] = gen_rtx_SCRATCH (SImode); + else if (TARGET_NO_SINGLE_COND_EXEC) + { + emit_insn(gen_rtx_SET(VOIDmode, operands[0], + gen_rtx_ABS(SImode, operands[1]))); + DONE; + } else operands[2] = gen_rtx_REG (CCmode, CC_REGNUM); ") (define_insn "*arm_abssi2" - [(set (match_operand:SI 0 "s_register_operand" "=r,&r") - (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))) + [(set (match_operand:SI 0 "s_register_operand" "=r") + (abs:SI (match_operand:SI 1 "s_register_operand" "r"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" - "@ - cmp\\t%0, #0\;rsblt\\t%0, %0, #0 - eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31" - [(set_attr "conds" "clob,*") - (set_attr "shift" "1") + "TARGET_32BIT && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "shift" "1") ;; predicable can't be set based on the variant, so left as no - (set_attr "length" "8")] + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 10) + (const_int 8)))] ) (define_insn_and_split "*thumb1_abssi2" @@ -3600,17 +3767,17 @@ ) (define_insn "*arm_neg_abssi2" - [(set (match_operand:SI 0 "s_register_operand" "=r,&r") - (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))) + [(set (match_operand:SI 0 "s_register_operand" "=r") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "r")))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" - "@ - cmp\\t%0, #0\;rsbgt\\t%0, %0, #0 - eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31" - [(set_attr "conds" "clob,*") - (set_attr "shift" "1") + "TARGET_32BIT && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "shift" "1") ;; predicable can't be set based on the variant, so left as no - (set_attr "length" "8")] + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 10) + (const_int 8)))] ) (define_insn_and_split "*thumb1_neg_abssi2" @@ -3627,6 +3794,93 @@ [(set_attr "length" "6")] ) +;; Simplified version for when avoiding conditional execution +(define_insn "*arm_nocond_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (abs:SI (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT && TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "shift" "1") + (set_attr "length" "8") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_nocond_neg_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "r"))))] + "TARGET_32BIT && TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "shift" "1") + (set_attr "length" "8") + (set_attr "predicable" "yes")] +) + +;; Splitters for ABS patterns. + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (abs:SI (match_operand:SI 1 "s_register_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed && rtx_equal_p(operands[0], operands[1])" + [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0))) + (cond_exec (lt (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (neg:SI (match_dup 1))))] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed && rtx_equal_p(operands[0], operands[1])" + [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0))) + (cond_exec (gt (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (neg:SI (match_dup 1))))] +) + +;; GCC does not add/remove clobbers when matching splitters, so we need +;; variants with and without the CC clobber. +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (abs:SI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])" + [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31)) + (match_dup 1))) + (set (match_dup 0) (minus:SI (match_dup 0) + (ashiftrt:SI (match_dup 1) (const_int 31))))] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (abs:SI (match_operand:SI 1 "s_register_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])" + [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31)) + (match_dup 1))) + (set (match_dup 0) (minus:SI (match_dup 0) + (ashiftrt:SI (match_dup 1) (const_int 31))))] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" ""))))] + "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])" + [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31)) + (match_dup 1))) + (set (match_dup 0) (minus:SI (ashiftrt:SI (match_dup 1) (const_int 31)) + (match_dup 0)))] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])" + [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31)) + (match_dup 1))) + (set (match_dup 0) (minus:SI (ashiftrt:SI (match_dup 1) (const_int 31)) + (match_dup 0)))] +) + (define_expand "abssf2" [(set (match_operand:SF 0 "s_register_operand" "") (abs:SF (match_operand:SF 1 "s_register_operand" "")))] @@ -3636,7 +3890,7 @@ (define_expand "absdf2" [(set (match_operand:DF 0 "s_register_operand" "") (abs:DF (match_operand:DF 1 "s_register_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" "") (define_expand "sqrtsf2" @@ -3648,7 +3902,7 @@ (define_expand "sqrtdf2" [(set (match_operand:DF 0 "s_register_operand" "") (sqrt:DF (match_operand:DF 1 "s_register_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" "") (define_insn_and_split "one_cmpldi2" @@ -3682,7 +3936,8 @@ (not:SI (match_operand:SI 1 "s_register_operand" "r")))] "TARGET_32BIT" "mvn%?\\t%0, %1" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "insn" "mvn")] ) (define_insn "*thumb1_one_cmplsi2" @@ -3690,7 +3945,8 @@ (not:SI (match_operand:SI 1 "register_operand" "l")))] "TARGET_THUMB1" "mvn\\t%0, %1" - [(set_attr "length" "2")] + [(set_attr "length" "2") + (set_attr "insn" "mvn")] ) (define_insn "*notsi_compare0" @@ -3701,7 +3957,8 @@ (not:SI (match_dup 1)))] "TARGET_32BIT" "mvn%.\\t%0, %1" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "insn" "mvn")] ) (define_insn "*notsi_compare0_scratch" @@ -3711,11 +3968,40 @@ (clobber (match_scratch:SI 0 "=r"))] "TARGET_32BIT" "mvn%.\\t%0, %1" - [(set_attr "conds" "set")] + [(set_attr "conds" "set") + (set_attr "insn" "mvn")] ) ;; Fixed <--> Floating conversion insns +(define_expand "floatsihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:SI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + +(define_expand "floatdihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:DI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + (define_expand "floatsisf2" [(set (match_operand:SF 0 "s_register_operand" "") (float:SF (match_operand:SI 1 "s_register_operand" "")))] @@ -3731,7 +4017,7 @@ (define_expand "floatsidf2" [(set (match_operand:DF 0 "s_register_operand" "") (float:DF (match_operand:SI 1 "s_register_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" " if (TARGET_MAVERICK) { @@ -3740,6 +4026,30 @@ } ") +(define_expand "fix_trunchfsi2" + [(set (match_operand:SI 0 "general_operand" "") + (fix:SI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + +(define_expand "fix_trunchfdi2" + [(set (match_operand:DI 0 "general_operand" "") + (fix:DI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + (define_expand "fix_truncsfsi2" [(set (match_operand:SI 0 "s_register_operand" "") (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" ""))))] @@ -3759,7 +4069,7 @@ (define_expand "fix_truncdfsi2" [(set (match_operand:SI 0 "s_register_operand" "") (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" ""))))] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" " if (TARGET_MAVERICK) { @@ -3776,9 +4086,25 @@ [(set (match_operand:SF 0 "s_register_operand" "") (float_truncate:SF (match_operand:DF 1 "s_register_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" "" ) + +/* DFmode -> HFmode conversions have to go through SFmode. */ +(define_expand "truncdfhf2" + [(set (match_operand:HF 0 "general_operand" "") + (float_truncate:HF + (match_operand:DF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) ;; Zero and sign extension instructions. @@ -3800,6 +4126,7 @@ return \"mov%?\\t%R0, #0\"; " [(set_attr "length" "8") + (set_attr "insn" "mov") (set_attr "predicable" "yes")] ) @@ -3843,6 +4170,7 @@ " [(set_attr "length" "8") (set_attr "shift" "1") + (set_attr "insn" "mov") (set_attr "predicable" "yes")] ) @@ -4123,6 +4451,28 @@ "" ) +(define_code_iterator ior_xor [ior xor]) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ior_xor:SI (and:SI (ashift:SI + (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" "")) + (zero_extend:SI + (match_operator 5 "subreg_lowpart_operator" + [(match_operand:SI 4 "s_register_operand" "")]))))] + "TARGET_32BIT + && (INTVAL (operands[3]) + == (GET_MODE_MASK (GET_MODE (operands[5])) + & (GET_MODE_MASK (GET_MODE (operands[5])) + << (INTVAL (operands[2])))))" + [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2)) + (match_dup 4))) + (set (match_dup 0) (zero_extend:SI (match_dup 5)))] + "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);" +) + (define_insn "*compareqi_eq0" [(set (reg:CC_Z CC_REGNUM) (compare:CC_Z (match_operand:QI 0 "s_register_operand" "r") @@ -4639,9 +4989,24 @@ (define_expand "extendsfdf2" [(set (match_operand:DF 0 "s_register_operand" "") (float_extend:DF (match_operand:SF 1 "s_register_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" "" ) + +/* HFmode -> DFmode conversions have to go through SFmode. */ +(define_expand "extendhfdf2" + [(set (match_operand:DF 0 "general_operand" "") + (float_extend:DF (match_operand:HF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (DFmode, op1, 0); + emit_insn (gen_movdf (operands[0], op1)); + DONE; + }" +) ;; Move insns (including loads and stores) @@ -4877,6 +5242,7 @@ }" [(set_attr "length" "4,4,6,2,2,6,4,4") (set_attr "type" "*,*,*,load2,store2,load2,store2,*") + (set_attr "insn" "*,mov,*,*,*,*,*,mov") (set_attr "pool_range" "*,*,*,*,*,1020,*,*")] ) @@ -4903,14 +5269,6 @@ optimize && can_create_pseudo_p ()); DONE; } - - if (TARGET_USE_MOVT && !target_word_relocations - && GET_CODE (operands[1]) == SYMBOL_REF - && !flag_pic && !arm_tls_referenced_p (operands[1])) - { - arm_emit_movpair (operands[0], operands[1]); - DONE; - } } else /* TARGET_THUMB1... */ { @@ -4984,18 +5342,9 @@ (set_attr "length" "4")] ) -(define_insn "*arm_movw" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r") - (high:SI (match_operand:SI 1 "general_operand" "i")))] - "TARGET_32BIT" - "movw%?\t%0, #:lower16:%c1" - [(set_attr "predicable" "yes") - (set_attr "length" "4")] -) - (define_insn "*arm_movsi_insn" [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m") - (match_operand:SI 1 "general_operand" "rk, I,K,N,mi,rk"))] + (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk"))] "TARGET_ARM && ! TARGET_IWMMXT && !(TARGET_HARD_FLOAT && TARGET_VFP) && ( register_operand (operands[0], SImode) @@ -5008,6 +5357,7 @@ ldr%?\\t%0, %1 str%?\\t%1, %0" [(set_attr "type" "*,*,*,*,load1,store1") + (set_attr "insn" "mov,mov,mvn,mov,*,*") (set_attr "predicable" "yes") (set_attr "pool_range" "*,*,*,*,4096,*") (set_attr "neg_pool_range" "*,*,*,*,4084,*")] @@ -5027,6 +5377,19 @@ " ) +(define_split + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "TARGET_32BIT + && TARGET_USE_MOVT && GET_CODE (operands[1]) == SYMBOL_REF + && !flag_pic && !target_word_relocations + && !arm_tls_referenced_p (operands[1])" + [(clobber (const_int 0))] +{ + arm_emit_movpair (operands[0], operands[1]); + DONE; +}) + (define_insn "*thumb1_movsi_insn" [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*lhk") (match_operand:SI 1 "general_operand" "l, I,J,K,>,l,mi,l,*lhk"))] @@ -5065,7 +5428,7 @@ (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))] " { - unsigned HOST_WIDE_INT val = INTVAL (operands[1]); + unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu; unsigned HOST_WIDE_INT mask = 0xff; int i; @@ -5627,6 +5990,7 @@ ldr%(h%)\\t%0, %1\\t%@ movhi" [(set_attr "type" "*,*,store1,load1") (set_attr "predicable" "yes") + (set_attr "insn" "mov,mvn,*,*") (set_attr "pool_range" "*,*,*,256") (set_attr "neg_pool_range" "*,*,*,244")] ) @@ -5638,7 +6002,8 @@ "@ mov%?\\t%0, %1\\t%@ movhi mvn%?\\t%0, #%B1\\t%@ movhi" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "insn" "mov,mvn")] ) (define_expand "thumb_movhi_clobber" @@ -5769,6 +6134,7 @@ ldr%(b%)\\t%0, %1 str%(b%)\\t%1, %0" [(set_attr "type" "*,*,load1,store1") + (set_attr "insn" "mov,mvn,*,*") (set_attr "predicable" "yes")] ) @@ -5787,9 +6153,111 @@ mov\\t%0, %1" [(set_attr "length" "2") (set_attr "type" "*,load1,store1,*,*,*") + (set_attr "insn" "*,*,*,mov,mov,mov") (set_attr "pool_range" "*,32,*,*,*,*")] ) +;; HFmode moves +(define_expand "movhf" + [(set (match_operand:HF 0 "general_operand" "") + (match_operand:HF 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (HFmode, operands[1]); + } + else /* TARGET_THUMB1 */ + { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (HFmode, operands[1]); + } + } + " +) + +(define_insn "*arm32_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r") + (match_operand:HF 1 "general_operand" " m,r,r,F"))] + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* ARM register from memory */ + return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\"; + case 1: /* memory from ARM register */ + return \"str%(h%)\\t%1, %0\\t%@ __fp16\"; + case 2: /* ARM register from ARM register */ + return \"mov%?\\t%0, %1\\t%@ __fp16\"; + case 3: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw%?\\t%0, %1\", ops); + else + output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "load1,store1,*,*") + (set_attr "length" "4,4,4,8") + (set_attr "predicable" "yes") + ] +) + +(define_insn "*thumb1_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h") + (match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))] + "TARGET_THUMB1 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 1: + { + rtx addr; + gcc_assert (GET_CODE(operands[1]) == MEM); + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == LABEL_REF + || (GET_CODE (addr) == CONST + && GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)) + { + /* Constant pool entry. */ + return \"ldr\\t%0, %1\"; + } + return \"ldrh\\t%0, %1\"; + } + case 2: return \"strh\\t%1, %0\"; + default: return \"mov\\t%0, %1\"; + } + " + [(set_attr "length" "2") + (set_attr "type" "*,load1,store1,*,*") + (set_attr "pool_range" "*,1020,*,*,*")] +) + (define_expand "movsf" [(set (match_operand:SF 0 "general_operand" "") (match_operand:SF 1 "general_operand" ""))] @@ -5842,6 +6310,7 @@ [(set_attr "length" "4,4,4") (set_attr "predicable" "yes") (set_attr "type" "*,load1,store1") + (set_attr "insn" "mov,*,*") (set_attr "pool_range" "*,4096,*") (set_attr "neg_pool_range" "*,4084,*")] ) @@ -6297,7 +6766,7 @@ (match_operand:BLK 1 "general_operand" "") (match_operand:SI 2 "const_int_operand" "") (match_operand:SI 3 "const_int_operand" "")] - "TARGET_EITHER" + "TARGET_EITHER && !low_irq_latency" " if (TARGET_32BIT) { @@ -7476,7 +7945,7 @@ (define_expand "cmpdf" [(match_operand:DF 0 "s_register_operand" "") (match_operand:DF 1 "arm_float_compare_operand" "")] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" " arm_compare_op0 = operands[0]; arm_compare_op1 = operands[1]; @@ -7507,7 +7976,11 @@ (set_attr "shift" "1") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*arm_cmpsi_shiftsi_swp" @@ -7522,7 +7995,11 @@ (set_attr "shift" "1") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*arm_cmpsi_negshiftsi_si" @@ -7537,7 +8014,11 @@ [(set_attr "conds" "set") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) ;; Cirrus SF compare instruction @@ -7879,77 +8360,77 @@ (define_expand "seq" [(set (match_operand:SI 0 "s_register_operand" "") (eq:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (EQ, arm_compare_op0, arm_compare_op1);" ) (define_expand "sne" [(set (match_operand:SI 0 "s_register_operand" "") (ne:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (NE, arm_compare_op0, arm_compare_op1);" ) (define_expand "sgt" [(set (match_operand:SI 0 "s_register_operand" "") (gt:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (GT, arm_compare_op0, arm_compare_op1);" ) (define_expand "sle" [(set (match_operand:SI 0 "s_register_operand" "") (le:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (LE, arm_compare_op0, arm_compare_op1);" ) (define_expand "sge" [(set (match_operand:SI 0 "s_register_operand" "") (ge:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (GE, arm_compare_op0, arm_compare_op1);" ) (define_expand "slt" [(set (match_operand:SI 0 "s_register_operand" "") (lt:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (LT, arm_compare_op0, arm_compare_op1);" ) (define_expand "sgtu" [(set (match_operand:SI 0 "s_register_operand" "") (gtu:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (GTU, arm_compare_op0, arm_compare_op1);" ) (define_expand "sleu" [(set (match_operand:SI 0 "s_register_operand" "") (leu:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (LEU, arm_compare_op0, arm_compare_op1);" ) (define_expand "sgeu" [(set (match_operand:SI 0 "s_register_operand" "") (geu:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (GEU, arm_compare_op0, arm_compare_op1);" ) (define_expand "sltu" [(set (match_operand:SI 0 "s_register_operand" "") (ltu:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (LTU, arm_compare_op0, arm_compare_op1);" ) (define_expand "sunordered" [(set (match_operand:SI 0 "s_register_operand" "") (unordered:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (UNORDERED, arm_compare_op0, arm_compare_op1);" ) @@ -7957,7 +8438,7 @@ (define_expand "sordered" [(set (match_operand:SI 0 "s_register_operand" "") (ordered:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (ORDERED, arm_compare_op0, arm_compare_op1);" ) @@ -7965,7 +8446,7 @@ (define_expand "sungt" [(set (match_operand:SI 0 "s_register_operand" "") (ungt:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (UNGT, arm_compare_op0, arm_compare_op1);" ) @@ -7973,7 +8454,7 @@ (define_expand "sunge" [(set (match_operand:SI 0 "s_register_operand" "") (unge:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (UNGE, arm_compare_op0, arm_compare_op1);" ) @@ -7981,7 +8462,7 @@ (define_expand "sunlt" [(set (match_operand:SI 0 "s_register_operand" "") (unlt:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (UNLT, arm_compare_op0, arm_compare_op1);" ) @@ -7989,7 +8470,7 @@ (define_expand "sunle" [(set (match_operand:SI 0 "s_register_operand" "") (unle:SI (match_dup 1) (const_int 0)))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" "operands[1] = arm_gen_compare_reg (UNLE, arm_compare_op0, arm_compare_op1);" ) @@ -8018,6 +8499,7 @@ "TARGET_ARM" "mov%D1\\t%0, #0\;mov%d1\\t%0, #1" [(set_attr "conds" "use") + (set_attr "insn" "mov") (set_attr "length" "8")] ) @@ -8028,6 +8510,7 @@ "TARGET_ARM" "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" [(set_attr "conds" "use") + (set_attr "insn" "mov") (set_attr "length" "8")] ) @@ -8038,6 +8521,7 @@ "TARGET_ARM" "mov%D1\\t%0, #0\;mvn%d1\\t%0, #1" [(set_attr "conds" "use") + (set_attr "insn" "mov") (set_attr "length" "8")] ) @@ -8241,7 +8725,7 @@ (if_then_else:SI (match_operand 1 "arm_comparison_operator" "") (match_operand:SI 2 "arm_not_operand" "") (match_operand:SI 3 "arm_not_operand" "")))] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_COND_EXEC" " { enum rtx_code code = GET_CODE (operands[1]); @@ -8260,7 +8744,7 @@ (if_then_else:SF (match_operand 1 "arm_comparison_operator" "") (match_operand:SF 2 "s_register_operand" "") (match_operand:SF 3 "nonmemory_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT" + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_NO_COND_EXEC" " { enum rtx_code code = GET_CODE (operands[1]); @@ -8285,7 +8769,7 @@ (if_then_else:DF (match_operand 1 "arm_comparison_operator" "") (match_operand:DF 2 "s_register_operand" "") (match_operand:DF 3 "arm_float_add_operand" "")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE) && !TARGET_NO_COND_EXEC" " { enum rtx_code code = GET_CODE (operands[1]); @@ -8317,7 +8801,8 @@ mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2" [(set_attr "length" "4,4,4,4,8,8,8,8") - (set_attr "conds" "use")] + (set_attr "conds" "use") + (set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn")] ) (define_insn "*movsfcc_soft_insn" @@ -8330,7 +8815,8 @@ "@ mov%D3\\t%0, %2 mov%d3\\t%0, %1" - [(set_attr "conds" "use")] + [(set_attr "conds" "use") + (set_attr "insn" "mov")] ) @@ -8733,7 +9219,7 @@ [(match_operand 1 "cc_register" "") (const_int 0)]) (return) (pc)))] - "TARGET_ARM && USE_RETURN_INSN (TRUE)" + "TARGET_ARM && USE_RETURN_INSN (TRUE) && !TARGET_NO_COND_EXEC" "* { if (arm_ccfsm_state == 2) @@ -8754,7 +9240,7 @@ [(match_operand 1 "cc_register" "") (const_int 0)]) (pc) (return)))] - "TARGET_ARM && USE_RETURN_INSN (TRUE)" + "TARGET_ARM && USE_RETURN_INSN (TRUE) && !TARGET_NO_COND_EXEC" "* { if (arm_ccfsm_state == 2) @@ -9072,7 +9558,11 @@ (set_attr "shift" "4") (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_split @@ -9110,7 +9600,11 @@ (set_attr "shift" "4") (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*arith_shiftsi_compare0_scratch" @@ -9128,7 +9622,11 @@ (set_attr "shift" "4") (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*sub_shiftsi" @@ -9143,7 +9641,11 @@ (set_attr "shift" "3") (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*sub_shiftsi_compare0" @@ -9163,7 +9665,11 @@ (set_attr "shift" "3") (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) (define_insn "*sub_shiftsi_compare0_scratch" @@ -9181,7 +9687,11 @@ (set_attr "shift" "3") (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)))] ) @@ -9194,6 +9704,7 @@ "TARGET_ARM" "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1" [(set_attr "conds" "use") + (set_attr "insn" "mov") (set_attr "length" "8")] ) @@ -9207,6 +9718,7 @@ orr%d2\\t%0, %1, #1 mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1" [(set_attr "conds" "use") + (set_attr "insn" "orr") (set_attr "length" "4,8")] ) @@ -9216,7 +9728,7 @@ [(match_operand:SI 2 "s_register_operand" "r,r") (match_operand:SI 3 "arm_add_operand" "rI,L")])) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_COND_EXEC" "* if (operands[3] == const0_rtx) { @@ -9271,6 +9783,7 @@ return \"\"; " [(set_attr "conds" "use") + (set_attr "insn" "mov") (set_attr "length" "4,4,8")] ) @@ -9282,7 +9795,7 @@ (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) (match_operand:SI 1 "s_register_operand" "0,?r")])) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "* if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx) return \"%i5\\t%0, %1, %2, lsr #31\"; @@ -9678,7 +10191,7 @@ (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_COND_EXEC" "* if (GET_CODE (operands[5]) == LT && (operands[4] == const0_rtx)) @@ -9744,7 +10257,7 @@ (match_operand:SI 3 "arm_add_operand" "rIL,rIL")) (match_operand:SI 1 "arm_rhs_operand" "0,?rI"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "8,12")] @@ -9780,7 +10293,7 @@ (match_operand:SI 2 "s_register_operand" "r,r") (match_operand:SI 3 "arm_add_operand" "rIL,rIL")))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "8,12")] @@ -9818,7 +10331,7 @@ [(match_operand:SI 3 "s_register_operand" "r") (match_operand:SI 4 "arm_rhs_operand" "rI")]))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "12")] @@ -9968,7 +10481,7 @@ (not:SI (match_operand:SI 2 "s_register_operand" "r,r")))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "8,12")] @@ -9987,6 +10500,7 @@ mov%d4\\t%0, %1\;mvn%D4\\t%0, %2 mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2" [(set_attr "conds" "use") + (set_attr "insn" "mvn") (set_attr "length" "4,8,8")] ) @@ -10000,7 +10514,7 @@ (match_operand:SI 2 "s_register_operand" "r,r")) (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "8,12")] @@ -10019,6 +10533,7 @@ mov%D4\\t%0, %1\;mvn%d4\\t%0, %2 mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2" [(set_attr "conds" "use") + (set_attr "insn" "mvn") (set_attr "length" "4,8,8")] ) @@ -10033,7 +10548,7 @@ (match_operand:SI 3 "arm_rhs_operand" "rM,rM")]) (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "8,12")] @@ -10055,10 +10570,23 @@ mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4" [(set_attr "conds" "use") (set_attr "shift" "2") - (set_attr "length" "4,8,8") + (set_attr "insn" "mov") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set_attr_alternative "length" + [(if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)) + (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 12) + (const_int 8)) + (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 12) + (const_int 8))])] ) (define_insn "*ifcompare_move_shift" @@ -10072,7 +10600,7 @@ [(match_operand:SI 2 "s_register_operand" "r,r") (match_operand:SI 3 "arm_rhs_operand" "rM,rM")]))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "8,12")] @@ -10094,10 +10622,24 @@ mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4" [(set_attr "conds" "use") (set_attr "shift" "2") - (set_attr "length" "4,8,8") + (set_attr "insn" "mov") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set_attr_alternative "length" + [(if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 8) + (const_int 4)) + (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 12) + (const_int 8)) + (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 12) + (const_int 8))]) + (set_attr "insn" "mov")] ) (define_insn "*ifcompare_shift_shift" @@ -10113,7 +10655,7 @@ [(match_operand:SI 3 "s_register_operand" "r") (match_operand:SI 4 "arm_rhs_operand" "rM")]))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "12")] @@ -10134,12 +10676,16 @@ "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7" [(set_attr "conds" "use") (set_attr "shift" "1") - (set_attr "length" "8") + (set_attr "insn" "mov") (set (attr "type") (if_then_else (and (match_operand 2 "const_int_operand" "") (match_operand 4 "const_int_operand" "")) (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "alu_shift_reg"))) + (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") + (eq_attr "fix_janus" "yes")) + (const_int 16) + (const_int 8)))] ) (define_insn "*ifcompare_not_arith" @@ -10153,7 +10699,7 @@ [(match_operand:SI 2 "s_register_operand" "r") (match_operand:SI 3 "arm_rhs_operand" "rI")]))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "12")] @@ -10171,6 +10717,7 @@ "TARGET_ARM" "mvn%d5\\t%0, %1\;%I6%D5\\t%0, %2, %3" [(set_attr "conds" "use") + (set_attr "insn" "mvn") (set_attr "length" "8")] ) @@ -10185,7 +10732,7 @@ (match_operand:SI 3 "arm_rhs_operand" "rI")]) (not:SI (match_operand:SI 1 "s_register_operand" "r")))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "12")] @@ -10203,6 +10750,7 @@ "TARGET_ARM" "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3" [(set_attr "conds" "use") + (set_attr "insn" "mvn") (set_attr "length" "8")] ) @@ -10215,7 +10763,7 @@ (neg:SI (match_operand:SI 2 "s_register_operand" "r,r")) (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "8,12")] @@ -10246,7 +10794,7 @@ (match_operand:SI 1 "arm_not_operand" "0,?rIK") (neg:SI (match_operand:SI 2 "s_register_operand" "r,r")))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM" + "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" "#" [(set_attr "conds" "clob") (set_attr "length" "8,12")] @@ -10614,7 +11162,7 @@ (match_dup 0) (match_operand 4 "" ""))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM && reload_completed" + "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC" [(set (match_dup 5) (match_dup 6)) (cond_exec (match_dup 7) (set (match_dup 0) (match_dup 4)))] @@ -10642,7 +11190,7 @@ (match_operand 4 "" "") (match_dup 0))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM && reload_completed" + "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC" [(set (match_dup 5) (match_dup 6)) (cond_exec (match_op_dup 1 [(match_dup 5) (const_int 0)]) (set (match_dup 0) (match_dup 4)))] @@ -10663,7 +11211,7 @@ (match_operand 4 "" "") (match_operand 5 "" ""))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM && reload_completed" + "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC" [(set (match_dup 6) (match_dup 7)) (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)]) (set (match_dup 0) (match_dup 4))) @@ -10695,7 +11243,7 @@ (not:SI (match_operand:SI 5 "s_register_operand" "")))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARM && reload_completed" + "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC" [(set (match_dup 6) (match_dup 7)) (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)]) (set (match_dup 0) (match_dup 4))) @@ -10730,6 +11278,7 @@ mvn%D4\\t%0, %2 mov%d4\\t%0, %1\;mvn%D4\\t%0, %2" [(set_attr "conds" "use") + (set_attr "insn" "mvn") (set_attr "length" "4,8")] ) @@ -10864,6 +11413,24 @@ " ) +(define_insn "align_16" + [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN16)] + "TARGET_EITHER" + "* + assemble_align (128); + return \"\"; + " +) + +(define_insn "align_32" + [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN32)] + "TARGET_EITHER" + "* + assemble_align (256); + return \"\"; + " +) + (define_insn "consttable_end" [(unspec_volatile [(const_int 0)] VUNSPEC_POOL_END)] "TARGET_EITHER" @@ -10890,6 +11457,7 @@ "TARGET_THUMB1" "* making_const_table = TRUE; + gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT); assemble_integer (operands[0], 2, BITS_PER_WORD, 1); assemble_zeros (2); return \"\"; @@ -10902,19 +11470,30 @@ "TARGET_EITHER" "* { + rtx x = operands[0]; making_const_table = TRUE; - switch (GET_MODE_CLASS (GET_MODE (operands[0]))) + switch (GET_MODE_CLASS (GET_MODE (x))) { case MODE_FLOAT: - { - REAL_VALUE_TYPE r; - REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]); - assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD); - break; - } + if (GET_MODE (x) == HFmode) + arm_emit_fp16_const (x); + else + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + assemble_real (r, GET_MODE (x), BITS_PER_WORD); + } + break; default: - assemble_integer (operands[0], 4, BITS_PER_WORD, 1); - mark_symbol_refs_as_used (operands[0]); + /* XXX: Sometimes gcc does something really dumb and ends up with + a HIGH in a constant pool entry, usually because it's trying to + load into a VFP register. We know this will always be used in + combination with a LO_SUM which ignores the high bits, so just + strip off the HIGH. */ + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + assemble_integer (x, 4, BITS_PER_WORD, 1); + mark_symbol_refs_as_used (x); break; } return \"\"; @@ -11008,6 +11587,28 @@ [(set_attr "predicable" "yes") (set_attr "insn" "clz")]) +(define_insn "rbitsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))] + "TARGET_32BIT && arm_arch_thumb2" + "rbit%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "insn" "clz")]) + +(define_expand "ctzsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (ctz:SI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch_thumb2" + " + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_rbitsi2 (tmp, operands[1])); + emit_insn (gen_clzsi2 (operands[0], tmp)); + } + DONE; + " +) + ;; V5E instructions. (define_insn "prefetch" @@ -11017,13 +11618,15 @@ "TARGET_32BIT && arm_arch5e" "pld\\t%a0") -;; General predication pattern +;; General predication pattern. +;; Conditional branches are available as both arm_cond_branch and +;; predicated arm_jump, so it doesn't matter if we disable the latter. (define_cond_exec [(match_operator 0 "arm_comparison_operator" [(match_operand 1 "cc_register" "") (const_int 0)])] - "TARGET_32BIT" + "TARGET_32BIT && !TARGET_NO_SINGLE_COND_EXEC" "" ) --- a/gcc/config/arm/arm-modes.def +++ b/gcc/config/arm/arm-modes.def @@ -25,6 +25,11 @@ FIXME What format is this? */ FLOAT_MODE (XF, 12, 0); +/* Half-precision floating point */ +FLOAT_MODE (HF, 2, 0); +ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + ? &arm_half_format : &ieee_half_format)); + /* CCFPEmode should be used with floating inequalities, CCFPmode should be used with floating equalities. CC_NOOVmode should be used with SImode integer equalities. @@ -62,6 +67,4 @@ INT_MODE (EI, 24); INT_MODE (OI, 32); INT_MODE (CI, 48); -/* ??? This should actually have 512 bits but the precision only has 9 - bits. */ -FRACTIONAL_INT_MODE (XI, 511, 64); +INT_MODE (XI, 64); --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -61,7 +61,7 @@ typedef __builtin_neon_usi uint32x4_t __attribute__ ((__vector_size__ (16))); typedef __builtin_neon_udi uint64x2_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_sf float32_t; +typedef float float32_t; typedef __builtin_neon_poly8 poly8_t; typedef __builtin_neon_poly16 poly16_t; @@ -5085,7 +5085,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c) { - return (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c); + return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -5151,7 +5151,7 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c) { - return (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c); + return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -5283,7 +5283,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vdup_n_f32 (float32_t __a) { - return (float32x2_t)__builtin_neon_vdup_nv2sf (__a); + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -5349,7 +5349,7 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vdupq_n_f32 (float32_t __a) { - return (float32x4_t)__builtin_neon_vdup_nv4sf (__a); + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -5415,7 +5415,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmov_n_f32 (float32_t __a) { - return (float32x2_t)__builtin_neon_vdup_nv2sf (__a); + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -5481,7 +5481,7 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmovq_n_f32 (float32_t __a) { - return (float32x4_t)__builtin_neon_vdup_nv4sf (__a); + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -6591,7 +6591,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmul_n_f32 (float32x2_t __a, float32_t __b) { - return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 3); + return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) @@ -6621,7 +6621,7 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmulq_n_f32 (float32x4_t __a, float32_t __b) { - return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 3); + return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -6735,7 +6735,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) { - return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 3); + return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) @@ -6765,7 +6765,7 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) { - return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 3); + return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -6831,7 +6831,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) { - return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 3); + return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) @@ -6861,7 +6861,7 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) { - return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 3); + return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -7851,7 +7851,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_f32 (const float32_t * __a) { - return (float32x2_t)__builtin_neon_vld1v2sf (__a); + return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -7917,7 +7917,7 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_f32 (const float32_t * __a) { - return (float32x4_t)__builtin_neon_vld1v4sf (__a); + return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -7977,7 +7977,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c) { - return (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c); + return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -8043,7 +8043,7 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c) { - return (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c); + return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -8109,7 +8109,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_dup_f32 (const float32_t * __a) { - return (float32x2_t)__builtin_neon_vld1_dupv2sf (__a); + return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -8175,7 +8175,7 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_dup_f32 (const float32_t * __a) { - return (float32x4_t)__builtin_neon_vld1_dupv4sf (__a); + return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -8247,7 +8247,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_f32 (float32_t * __a, float32x2_t __b) { - __builtin_neon_vst1v2sf (__a, __b); + __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8313,7 +8313,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_f32 (float32_t * __a, float32x4_t __b) { - __builtin_neon_vst1v4sf (__a, __b); + __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8373,7 +8373,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c) { - __builtin_neon_vst1_lanev2sf (__a, __b, __c); + __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8439,7 +8439,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c) { - __builtin_neon_vst1_lanev4sf (__a, __b, __c); + __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8512,7 +8512,7 @@ vld2_f32 (const float32_t * __a) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld2v2sf (__a); + __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -8600,7 +8600,7 @@ vld2q_f32 (const float32_t * __a) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld2v4sf (__a); + __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -8676,7 +8676,7 @@ { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -8748,7 +8748,7 @@ { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -8807,7 +8807,7 @@ vld2_dup_f32 (const float32_t * __a) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld2_dupv2sf (__a); + __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -8892,7 +8892,7 @@ vst2_f32 (float32_t * __a, float32x2x2_t __b) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v2sf (__a, __bu.__o); + __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8969,7 +8969,7 @@ vst2q_f32 (float32_t * __a, float32x4x2_t __b) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v4sf (__a, __bu.__o); + __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9032,7 +9032,7 @@ vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c); + __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9088,7 +9088,7 @@ vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c); + __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9140,7 +9140,7 @@ vld3_f32 (const float32_t * __a) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld3v2sf (__a); + __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9228,7 +9228,7 @@ vld3q_f32 (const float32_t * __a) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld3v4sf (__a); + __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9304,7 +9304,7 @@ { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -9376,7 +9376,7 @@ { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -9435,7 +9435,7 @@ vld3_dup_f32 (const float32_t * __a) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld3_dupv2sf (__a); + __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9520,7 +9520,7 @@ vst3_f32 (float32_t * __a, float32x2x3_t __b) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v2sf (__a, __bu.__o); + __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9597,7 +9597,7 @@ vst3q_f32 (float32_t * __a, float32x4x3_t __b) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v4sf (__a, __bu.__o); + __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9660,7 +9660,7 @@ vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c); + __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9716,7 +9716,7 @@ vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c); + __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9768,7 +9768,7 @@ vld4_f32 (const float32_t * __a) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld4v2sf (__a); + __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9856,7 +9856,7 @@ vld4q_f32 (const float32_t * __a) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld4v4sf (__a); + __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9932,7 +9932,7 @@ { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -10004,7 +10004,7 @@ { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -10063,7 +10063,7 @@ vld4_dup_f32 (const float32_t * __a) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld4_dupv2sf (__a); + __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -10148,7 +10148,7 @@ vst4_f32 (float32_t * __a, float32x2x4_t __b) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v2sf (__a, __bu.__o); + __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -10225,7 +10225,7 @@ vst4q_f32 (float32_t * __a, float32x4x4_t __b) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v4sf (__a, __bu.__o); + __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -10288,7 +10288,7 @@ vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c); + __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -10344,7 +10344,7 @@ vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c); + __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -78,6 +78,10 @@ mfp= Target RejectNegative Joined Undocumented Var(target_fpe_name) +mfp16-format= +Target RejectNegative Joined Var(target_fp16_format_name) +Specify the __fp16 floating-point format + ;; Now ignored. mfpe Target RejectNegative Mask(FPE) Undocumented @@ -93,6 +97,10 @@ Target RejectNegative Alias for -mfloat-abi=hard +mfix-janus-2cc +Target Report Mask(FIX_JANUS) +Work around hardware errata for Avalent Janus 2CC cores. + mlittle-endian Target Report RejectNegative InverseMask(BIG_END) Assume target CPU is configured as little endian @@ -101,6 +109,10 @@ Target Report Mask(LONG_CALLS) Generate call insns as indirect calls, if necessary +mmarvell-div +Target Report Mask(MARVELL_DIV) +Generate hardware integer division instructions supported by some Marvell cores. + mpic-register= Target RejectNegative Joined Var(arm_pic_register_string) Specify the register to be used for PIC addressing @@ -157,6 +169,10 @@ Target Report Mask(NEON_VECTORIZE_QUAD) Use Neon quad-word (rather than double-word) registers for vectorization +mlow-irq-latency +Target Report Var(low_irq_latency) +Try to reduce interrupt latency of the generated code + mword-relocations Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) Only generate absolute relocations on word sized values. --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -88,7 +88,7 @@ extern int cirrus_memory_offset (rtx); extern int arm_coproc_mem_operand (rtx, bool); -extern int neon_vector_mem_operand (rtx, bool); +extern int neon_vector_mem_operand (rtx, int); extern int neon_struct_mem_operand (rtx); extern int arm_no_early_store_addr_dep (rtx, rtx); extern int arm_no_early_alu_shift_dep (rtx, rtx); @@ -144,6 +144,7 @@ extern int arm_debugger_arg_offset (int, rtx); extern bool arm_is_long_call_p (tree); extern int arm_emit_vector_const (FILE *, rtx); +extern void arm_emit_fp16_const (rtx c); extern const char * arm_output_load_gr (rtx *); extern const char *vfp_output_fstmd (rtx *); extern void arm_set_return_address (rtx, rtx); @@ -154,13 +155,15 @@ #if defined TREE_CODE extern rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int); +extern void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + tree, bool); extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); extern bool arm_pad_arg_upward (enum machine_mode, const_tree); extern bool arm_pad_reg_upward (enum machine_mode, tree, int); extern bool arm_needs_doubleword_align (enum machine_mode, tree); -extern rtx arm_function_value(const_tree, const_tree); #endif extern int arm_apply_result_size (void); +extern rtx aapcs_libcall_value (enum machine_mode); #endif /* RTX_CODE */ --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from arm-cores.def (define_attr "tune" - "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1" + "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,marvell_f,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa5,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1,cortexm0" (const (symbol_ref "arm_tune"))) --- a/gcc/config/arm/bpabi.h +++ b/gcc/config/arm/bpabi.h @@ -30,7 +30,7 @@ /* Section 4.1 of the AAPCS requires the use of VFP format. */ #undef FPUTYPE_DEFAULT -#define FPUTYPE_DEFAULT FPUTYPE_VFP +#define FPUTYPE_DEFAULT "vfp" /* TARGET_BIG_ENDIAN_DEFAULT is set in config.gcc for big endian configurations. */ @@ -53,6 +53,8 @@ #define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}" +#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5|mcpu=cortex-a8|mcpu=cortex-a9:%{!r:--be8}}}" + /* Tell the assembler to build BPABI binaries. */ #undef SUBTARGET_EXTRA_ASM_SPEC #define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC @@ -65,7 +67,7 @@ #define BPABI_LINK_SPEC \ "%{mbig-endian:-EB} %{mlittle-endian:-EL} " \ "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} " \ - "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC + "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC #undef LINK_SPEC #define LINK_SPEC BPABI_LINK_SPEC @@ -90,16 +92,22 @@ #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (muldi3, lmul) #endif #ifdef L_fixdfdi -#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixdfdi, d2lz) +#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixdfdi, d2lz) \ + extern DWtype __fixdfdi (DFtype) __attribute__((pcs("aapcs"))); \ + extern UDWtype __fixunsdfdi (DFtype) __asm__("__aeabi_d2ulz") __attribute__((pcs("aapcs"))); #endif #ifdef L_fixunsdfdi -#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfdi, d2ulz) +#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfdi, d2ulz) \ + extern UDWtype __fixunsdfdi (DFtype) __attribute__((pcs("aapcs"))); #endif #ifdef L_fixsfdi -#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixsfdi, f2lz) +#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixsfdi, f2lz) \ + extern DWtype __fixsfdi (SFtype) __attribute__((pcs("aapcs"))); \ + extern UDWtype __fixunssfdi (SFtype) __asm__("__aeabi_f2ulz") __attribute__((pcs("aapcs"))); #endif #ifdef L_fixunssfdi -#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfdi, f2ulz) +#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfdi, f2ulz) \ + extern UDWtype __fixunssfdi (SFtype) __attribute__((pcs("aapcs"))); #endif #ifdef L_floatdidf #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatdidf, l2d) --- a/gcc/config/arm/bpabi.S +++ b/gcc/config/arm/bpabi.S @@ -64,20 +64,69 @@ #endif /* L_aeabi_ulcmp */ +.macro test_div_by_zero signed +/* Tail-call to divide-by-zero handlers which may be overridden by the user, + so unwinding works properly. */ +#if defined(__thumb2__) + cbnz yyh, 1f + cbnz yyl, 1f + cmp xxh, #0 + do_it eq + cmpeq xxl, #0 + .ifc \signed, unsigned + beq 2f + mov xxh, #0xffffffff + mov xxl, xxh +2: + .else + do_it lt, t + movlt xxl, #0 + movlt xxh, #0x80000000 + do_it gt, t + movgt xxh, #0x7fffffff + movgt xxl, #0xffffffff + .endif + b SYM (__aeabi_ldiv0) __PLT__ +1: +#else + /* Note: Thumb-1 code calls via an ARM shim on processors which + support ARM mode. */ + cmp yyh, #0 + cmpeq yyl, #0 + bne 2f + cmp xxh, #0 + cmpeq xxl, #0 + .ifc \signed, unsigned + movne xxh, #0xffffffff + movne xxl, #0xffffffff + .else + movlt xxh, #0x80000000 + movlt xxl, #0 + movgt xxh, #0x7fffffff + movgt xxl, #0xffffffff + .endif + b SYM (__aeabi_ldiv0) __PLT__ +2: +#endif +.endm + #ifdef L_aeabi_ldivmod ARM_FUNC_START aeabi_ldivmod + test_div_by_zero signed + sub sp, sp, #8 -#if defined(__thumb2__) +/* Low latency and Thumb-2 do_push implementations can't push sp directly. */ +#if defined(__thumb2__) || defined(__irq_low_latency__) mov ip, sp - push {ip, lr} + do_push (ip, lr) #else - do_push {sp, lr} + stmfd sp!, {sp, lr} #endif bl SYM(__gnu_ldivmod_helper) __PLT__ ldr lr, [sp, #4] add sp, sp, #8 - do_pop {r2, r3} + do_pop (r2, r3) RET #endif /* L_aeabi_ldivmod */ @@ -85,17 +134,20 @@ #ifdef L_aeabi_uldivmod ARM_FUNC_START aeabi_uldivmod + test_div_by_zero unsigned + sub sp, sp, #8 -#if defined(__thumb2__) +/* Low latency and Thumb-2 do_push implementations can't push sp directly. */ +#if defined(__thumb2__) || defined(__irq_low_latency__) mov ip, sp - push {ip, lr} + do_push (ip, lr) #else - do_push {sp, lr} + stmfd sp!, {sp, lr} #endif bl SYM(__gnu_uldivmod_helper) __PLT__ ldr lr, [sp, #4] add sp, sp, #8 - do_pop {r2, r3} + do_pop (r2, r3) RET #endif /* L_aeabi_divmod */ --- a/gcc/config/arm/bpabi-v6m.S +++ b/gcc/config/arm/bpabi-v6m.S @@ -69,9 +69,52 @@ #endif /* L_aeabi_ulcmp */ +.macro test_div_by_zero signed + cmp yyh, #0 + bne 7f + cmp yyl, #0 + bne 7f + cmp xxh, #0 + bne 2f + cmp xxl, #0 +2: + .ifc \signed, unsigned + beq 3f + mov xxh, #0 + mvn xxh, xxh @ 0xffffffff + mov xxl, xxh +3: + .else + beq 5f + blt 6f + mov xxl, #0 + mvn xxl, xxl @ 0xffffffff + lsr xxh, xxl, #1 @ 0x7fffffff + b 5f +6: mov xxh, #0x80 + lsl xxh, xxh, #24 @ 0x80000000 + mov xxl, #0 +5: + .endif + @ tailcalls are tricky on v6-m. + push {r0, r1, r2} + ldr r0, 1f + adr r1, 1f + add r0, r1 + str r0, [sp, #8] + @ We know we are not on armv4t, so pop pc is safe. + pop {r0, r1, pc} + .align 2 +1: + .word __aeabi_ldiv0 - 1b +7: +.endm + #ifdef L_aeabi_ldivmod FUNC_START aeabi_ldivmod + test_div_by_zero signed + push {r0, r1} mov r0, sp push {r0, lr} @@ -89,6 +132,8 @@ #ifdef L_aeabi_uldivmod FUNC_START aeabi_uldivmod + test_div_by_zero unsigned + push {r0, r1} mov r0, sp push {r0, lr} --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -25,14 +25,15 @@ ;; In ARM state, 'l' is an alias for 'r' ;; The following normal constraints have been used: -;; in ARM/Thumb-2 state: G, H, I, J, K, L, M +;; in ARM/Thumb-2 state: G, H, I, j, J, K, L, M ;; in Thumb-1 state: I, J, K, L, M, N, O ;; The following multi-letter normal constraints have been used: -;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv +;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy +;; in Thumb-1 state: Pa, Pb ;; The following memory constraints have been used: -;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Us +;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us ;; in ARM state: Uq @@ -65,6 +66,13 @@ (define_register_constraint "h" "TARGET_THUMB ? HI_REGS : NO_REGS" "In Thumb state the core registers @code{r8}-@code{r15}.") +(define_constraint "j" + "A constant suitable for a MOVW instruction. (ARM/Thumb-2)" + (and (match_test "TARGET_32BIT && arm_arch_thumb2") + (ior (match_code "high") + (and (match_code "const_int") + (match_test "(ival & 0xffff0000) == 0"))))) + (define_register_constraint "k" "STACK_REG" "@internal The stack register.") @@ -116,11 +124,9 @@ : ((ival >= 0 && ival <= 1020) && ((ival & 3) == 0))"))) (define_constraint "N" - "In ARM/Thumb-2 state a constant suitable for a MOVW instruction. - In Thumb-1 state a constant in the range 0-31." + "Thumb-1 state a constant in the range 0-31." (and (match_code "const_int") - (match_test "TARGET_32BIT ? arm_arch_thumb2 && ((ival & 0xffff0000) == 0) - : (ival >= 0 && ival <= 31)"))) + (match_test "!TARGET_32BIT && (ival >= 0 && ival <= 31)"))) (define_constraint "O" "In Thumb-1 state a constant that is a multiple of 4 in the range @@ -129,6 +135,18 @@ (match_test "TARGET_THUMB1 && ival >= -508 && ival <= 508 && ((ival & 3) == 0)"))) +(define_constraint "Pa" + "@internal In Thumb-1 state a constant in the range -510 to +510" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= -510 && ival <= 510 + && (ival > 255 || ival < -255)"))) + +(define_constraint "Pb" + "@internal In Thumb-1 state a constant in the range -262 to +262" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= -262 && ival <= 262 + && (ival > 255 || ival < -255)"))) + (define_constraint "G" "In ARM/Thumb-2 state a valid FPA immediate constant." (and (match_code "const_double") @@ -189,10 +207,17 @@ (define_constraint "Dv" "@internal In ARM/Thumb-2 state a const_double which can be used with a VFP fconsts - or fconstd instruction." + instruction." (and (match_code "const_double") (match_test "TARGET_32BIT && vfp3_const_double_rtx (op)"))) +(define_constraint "Dy" + "@internal + In ARM/Thumb-2 state a const_double which can be used with a VFP fconstd + instruction." + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)"))) + (define_memory_constraint "Ut" "@internal In ARM/Thumb-2 state an address valid for loading/storing opaque structure @@ -214,17 +239,24 @@ (define_memory_constraint "Un" "@internal + In ARM/Thumb-2 state a valid address for Neon doubleword vector + load/store instructions." + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0)"))) + +(define_memory_constraint "Um" + "@internal In ARM/Thumb-2 state a valid address for Neon element and structure load/store instructions." (and (match_code "mem") - (match_test "TARGET_32BIT && neon_vector_mem_operand (op, FALSE)"))) + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) (define_memory_constraint "Us" "@internal In ARM/Thumb-2 state a valid address for non-offset loads/stores of quad-word values in four ARM registers." (and (match_code "mem") - (match_test "TARGET_32BIT && neon_vector_mem_operand (op, TRUE)"))) + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1)"))) (define_memory_constraint "Uq" "@internal --- /dev/null +++ b/gcc/config/arm/fp16.c @@ -0,0 +1,145 @@ +/* Half-float conversion routines. + + Copyright (C) 2008, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +static inline unsigned short +__gnu_f2h_internal(unsigned int a, int ieee) +{ + unsigned short sign = (a >> 16) & 0x8000; + int aexp = (a >> 23) & 0xff; + unsigned int mantissa = a & 0x007fffff; + unsigned int mask; + unsigned int increment; + + if (aexp == 0xff) + { + if (!ieee) + return sign; + return sign | 0x7e00 | (mantissa >> 13); + } + + if (aexp == 0 && mantissa == 0) + return sign; + + aexp -= 127; + + /* Decimal point between bits 22 and 23. */ + mantissa |= 0x00800000; + if (aexp < -14) + { + mask = 0x007fffff; + if (aexp < -25) + aexp = -26; + else if (aexp != -25) + mask >>= 24 + aexp; + } + else + mask = 0x00001fff; + + /* Round. */ + if (mantissa & mask) + { + increment = (mask + 1) >> 1; + if ((mantissa & mask) == increment) + increment = mantissa & (increment << 1); + mantissa += increment; + if (mantissa >= 0x01000000) + { + mantissa >>= 1; + aexp++; + } + } + + if (ieee) + { + if (aexp > 15) + return sign | 0x7c00; + } + else + { + if (aexp > 16) + return sign | 0x7fff; + } + + if (aexp < -24) + return sign; + + if (aexp < -14) + { + mantissa >>= -14 - aexp; + aexp = -14; + } + + /* We leave the leading 1 in the mantissa, and subtract one + from the exponent bias to compensate. */ + return sign | (((aexp + 14) << 10) + (mantissa >> 13)); +} + +unsigned int +__gnu_h2f_internal(unsigned short a, int ieee) +{ + unsigned int sign = (unsigned int)(a & 0x8000) << 16; + int aexp = (a >> 10) & 0x1f; + unsigned int mantissa = a & 0x3ff; + + if (aexp == 0x1f && ieee) + return sign | 0x7f800000 | (mantissa << 13); + + if (aexp == 0) + { + int shift; + + if (mantissa == 0) + return sign; + + shift = __builtin_clz(mantissa) - 21; + mantissa <<= shift; + aexp = -shift; + } + + return sign | (((aexp + 0x70) << 23) + (mantissa << 13)); +} + +unsigned short +__gnu_f2h_ieee(unsigned int a) +{ + return __gnu_f2h_internal(a, 1); +} + +unsigned int +__gnu_h2f_ieee(unsigned short a) +{ + return __gnu_h2f_internal(a, 1); +} + +unsigned short +__gnu_f2h_alternative(unsigned int x) +{ + return __gnu_f2h_internal(x, 0); +} + +unsigned int +__gnu_h2f_alternative(unsigned short a) +{ + return __gnu_h2f_internal(a, 0); +} --- a/gcc/config/arm/fpa.md +++ b/gcc/config/arm/fpa.md @@ -599,10 +599,10 @@ { default: case 0: return \"mvf%?e\\t%0, %1\"; - case 1: if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + case 1: if (TARGET_FPA_EMU2) return \"ldf%?e\\t%0, %1\"; return \"lfm%?\\t%0, 1, %1\"; - case 2: if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + case 2: if (TARGET_FPA_EMU2) return \"stf%?e\\t%1, %0\"; return \"sfm%?\\t%1, 1, %0\"; } --- /dev/null +++ b/gcc/config/arm/hwdiv.md @@ -0,0 +1,41 @@ +;; ARM instruction patterns for hardware division +;; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. +;; Written by CodeSourcery, LLC. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +(define_insn "divsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (div:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")))] + "arm_arch_hwdiv" + "sdiv%?\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "insn" "sdiv")] +) + +(define_insn "udivsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (udiv:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")))] + "arm_arch_hwdiv" + "udiv%?\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "insn" "udiv")] +) + --- a/gcc/config/arm/ieee754-df.S +++ b/gcc/config/arm/ieee754-df.S @@ -83,7 +83,7 @@ ARM_FUNC_START adddf3 ARM_FUNC_ALIAS aeabi_dadd adddf3 -1: do_push {r4, r5, lr} +1: do_push (r4, r5, lr) @ Look for zeroes, equal values, INF, or NAN. shift1 lsl, r4, xh, #1 @@ -427,7 +427,7 @@ do_it eq, t moveq r1, #0 RETc(eq) - do_push {r4, r5, lr} + do_push (r4, r5, lr) mov r4, #0x400 @ initial exponent add r4, r4, #(52-1 - 1) mov r5, #0 @ sign bit is 0 @@ -447,7 +447,7 @@ do_it eq, t moveq r1, #0 RETc(eq) - do_push {r4, r5, lr} + do_push (r4, r5, lr) mov r4, #0x400 @ initial exponent add r4, r4, #(52-1 - 1) ands r5, r0, #0x80000000 @ sign bit in r5 @@ -481,7 +481,7 @@ RETc(eq) @ we are done already. @ value was denormalized. We can normalize it now. - do_push {r4, r5, lr} + do_push (r4, r5, lr) mov r4, #0x380 @ setup corresponding exponent and r5, xh, #0x80000000 @ move sign bit in r5 bic xh, xh, #0x80000000 @@ -508,9 +508,9 @@ @ compatibility. adr ip, LSYM(f0_ret) @ Push pc as well so that RETLDM works correctly. - do_push {r4, r5, ip, lr, pc} + do_push (r4, r5, ip, lr, pc) #else - do_push {r4, r5, lr} + do_push (r4, r5, lr) #endif mov r5, #0 @@ -534,9 +534,9 @@ @ compatibility. adr ip, LSYM(f0_ret) @ Push pc as well so that RETLDM works correctly. - do_push {r4, r5, ip, lr, pc} + do_push (r4, r5, ip, lr, pc) #else - do_push {r4, r5, lr} + do_push (r4, r5, lr) #endif ands r5, ah, #0x80000000 @ sign bit in r5 @@ -585,7 +585,7 @@ @ Legacy code expects the result to be returned in f0. Copy it @ there as well. LSYM(f0_ret): - do_push {r0, r1} + do_push (r0, r1) ldfd f0, [sp], #8 RETLDM @@ -602,7 +602,7 @@ ARM_FUNC_START muldf3 ARM_FUNC_ALIAS aeabi_dmul muldf3 - do_push {r4, r5, r6, lr} + do_push (r4, r5, r6, lr) @ Mask out exponents, trap any zero/denormal/INF/NAN. mov ip, #0xff @@ -910,7 +910,7 @@ ARM_FUNC_START divdf3 ARM_FUNC_ALIAS aeabi_ddiv divdf3 - do_push {r4, r5, r6, lr} + do_push (r4, r5, r6, lr) @ Mask out exponents, trap any zero/denormal/INF/NAN. mov ip, #0xff @@ -1117,7 +1117,7 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2 mov ip, #1 @ how should we specify unordered here? -1: str ip, [sp, #-4] +1: str ip, [sp, #-4]! @ Trap any INF/NAN first. mov ip, xh, lsl #1 @@ -1129,7 +1129,8 @@ @ Test for equality. @ Note that 0.0 is equal to -0.0. -2: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 +2: add sp, sp, #4 + orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 do_it eq, e COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 teqne xh, yh @ or xh == yh @@ -1168,7 +1169,7 @@ bne 2b orrs ip, yl, yh, lsl #12 beq 2b @ y is not NAN -5: ldr r0, [sp, #-4] @ unordered return code +5: ldr r0, [sp], #4 @ unordered return code RET FUNC_END gedf2 @@ -1194,7 +1195,7 @@ @ The status-returning routines are required to preserve all @ registers except ip, lr, and cpsr. -6: do_push {r0, lr} +6: do_push (r0, lr) ARM_CALL cmpdf2 @ Set the Z flag correctly, and the C flag unconditionally. cmp r0, #0 --- a/gcc/config/arm/ieee754-sf.S +++ b/gcc/config/arm/ieee754-sf.S @@ -481,7 +481,7 @@ and r3, ip, #0x80000000 @ Well, no way to make it shorter without the umull instruction. - do_push {r3, r4, r5} + do_push (r3, r4, r5) mov r4, r0, lsr #16 mov r5, r1, lsr #16 bic r0, r0, r4, lsl #16 @@ -492,7 +492,7 @@ mla r0, r4, r1, r0 adds r3, r3, r0, lsl #16 adc r1, ip, r0, lsr #16 - do_pop {r0, r4, r5} + do_pop (r0, r4, r5) #else @@ -822,7 +822,7 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2 mov ip, #1 @ how should we specify unordered here? -1: str ip, [sp, #-4] +1: str ip, [sp, #-4]! @ Trap any INF/NAN first. mov r2, r0, lsl #1 @@ -834,7 +834,8 @@ @ Compare values. @ Note that 0.0 is equal to -0.0. -2: orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag +2: add sp, sp, #4 + orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag do_it ne teqne r0, r1 @ if not 0 compare sign do_it pl @@ -858,7 +859,7 @@ bne 2b movs ip, r1, lsl #9 beq 2b @ r1 is not NAN -5: ldr r0, [sp, #-4] @ return unordered code. +5: ldr r0, [sp], #4 @ return unordered code. RET FUNC_END gesf2 @@ -881,7 +882,7 @@ @ The status-returning routines are required to preserve all @ registers except ip, lr, and cpsr. -6: do_push {r0, r1, r2, r3, lr} +6: do_push (r0, r1, r2, r3, lr) ARM_CALL cmpsf2 @ Set the Z flag correctly, and the C flag unconditionally. cmp r0, #0 --- a/gcc/config/arm/lib1funcs.asm +++ b/gcc/config/arm/lib1funcs.asm @@ -27,8 +27,17 @@ #if defined(__ELF__) && defined(__linux__) .section .note.GNU-stack,"",%progbits .previous -#endif +#endif /* __ELF__ and __linux__ */ +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align8_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align8_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ /* ------------------------------------------------------------------------ */ /* We need to know what prefix to add to function names. */ @@ -233,8 +242,8 @@ .macro shift1 op, arg0, arg1, arg2 \op \arg0, \arg1, \arg2 .endm -#define do_push push -#define do_pop pop +#define do_push(...) push {__VA_ARGS__} +#define do_pop(...) pop {__VA_ARGS__} #define COND(op1, op2, cond) op1 ## op2 ## cond /* Perform an arithmetic operation with a variable shift operand. This requires two instructions and a scratch register on Thumb-2. */ @@ -248,24 +257,133 @@ .macro shift1 op, arg0, arg1, arg2 mov \arg0, \arg1, \op \arg2 .endm -#define do_push stmfd sp!, -#define do_pop ldmfd sp!, +#if defined(__low_irq_latency__) +#define do_push(...) \ + _buildN1(do_push, _buildC1(__VA_ARGS__))( __VA_ARGS__) +#define _buildN1(BASE, X) _buildN2(BASE, X) +#define _buildN2(BASE, X) BASE##X +#define _buildC1(...) _buildC2(__VA_ARGS__,9,8,7,6,5,4,3,2,1) +#define _buildC2(a1,a2,a3,a4,a5,a6,a7,a8,a9,c,...) c + +#define do_push1(r1) str r1, [sp, #-4]! +#define do_push2(r1, r2) str r2, [sp, #-4]! ; str r1, [sp, #-4]! +#define do_push3(r1, r2, r3) str r3, [sp, #-4]! ; str r2, [sp, #-4]!; str r1, [sp, #-4]! +#define do_push4(r1, r2, r3, r4) \ + do_push3 (r2, r3, r4);\ + do_push1 (r1) +#define do_push5(r1, r2, r3, r4, r5) \ + do_push4 (r2, r3, r4, r5);\ + do_push1 (r1) + +#define do_pop(...) \ +_buildN1(do_pop, _buildC1(__VA_ARGS__))( __VA_ARGS__) + +#define do_pop1(r1) ldr r1, [sp], #4 +#define do_pop2(r1, r2) ldr r1, [sp], #4 ; ldr r2, [sp], #4 +#define do_pop3(r1, r2, r3) ldr r1, [sp], #4 ; str r2, [sp], #4; str r3, [sp], #4 +#define do_pop4(r1, r2, r3, r4) \ + do_pop1 (r1);\ + do_pup3 (r2, r3, r4) +#define do_pop5(r1, r2, r3, r4, r5) \ + do_pop1 (r1);\ + do_pop4 (r2, r3, r4, r5) +#else +#define do_push(...) stmfd sp!, { __VA_ARGS__} +#define do_pop(...) ldmfd sp!, {__VA_ARGS__} +#endif + + #define COND(op1, op2, cond) op1 ## cond ## op2 .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp \name \dest, \src1, \src2, \shiftop \shiftreg .endm #endif -.macro ARM_LDIV0 name +#ifdef __ARM_EABI__ +.macro ARM_LDIV0 name signed + cmp r0, #0 + .ifc \signed, unsigned + movne r0, #0xffffffff + .else + movgt r0, #0x7fffffff + movlt r0, #0x80000000 + .endif + b SYM (__aeabi_idiv0) __PLT__ +.endm +#else +.macro ARM_LDIV0 name signed str lr, [sp, #-8]! 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8 bl SYM (__div0) __PLT__ mov r0, #0 @ About as wrong as it could be. RETLDM unwind=98b .endm +#endif -.macro THUMB_LDIV0 name +#ifdef __ARM_EABI__ +.macro THUMB_LDIV0 name signed +#if defined(__ARM_ARCH_6M__) + .ifc \signed, unsigned + cmp r0, #0 + beq 1f + mov r0, #0 + mvn r0, r0 @ 0xffffffff +1: + .else + cmp r0, #0 + beq 2f + blt 3f + mov r0, #0 + mvn r0, r0 + lsr r0, r0, #1 @ 0x7fffffff + b 2f +3: mov r0, #0x80 + lsl r0, r0, #24 @ 0x80000000 +2: + .endif + push {r0, r1, r2} + ldr r0, 4f + adr r1, 4f + add r0, r1 + str r0, [sp, #8] + @ We know we are not on armv4t, so pop pc is safe. + pop {r0, r1, pc} + .align 2 +4: + .word __aeabi_idiv0 - 4b +#elif defined(__thumb2__) + .syntax unified + .ifc \signed, unsigned + cbz r0, 1f + mov r0, #0xffffffff +1: + .else + cmp r0, #0 + do_it gt + movgt r0, #0x7fffffff + do_it lt + movlt r0, #0x80000000 + .endif + b.w SYM(__aeabi_idiv0) __PLT__ +#else + .align 2 + bx pc + nop + .arm + cmp r0, #0 + .ifc \signed, unsigned + movne r0, #0xffffffff + .else + movgt r0, #0x7fffffff + movlt r0, #0x80000000 + .endif + b SYM(__aeabi_idiv0) __PLT__ + .thumb +#endif +.endm +#else +.macro THUMB_LDIV0 name signed push { r1, lr } 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8 bl SYM (__div0) @@ -277,18 +395,19 @@ pop { r1, pc } #endif .endm +#endif .macro FUNC_END name SIZE (__\name) .endm -.macro DIV_FUNC_END name +.macro DIV_FUNC_END name signed cfi_start __\name, LSYM(Lend_div0) LSYM(Ldiv0): #ifdef __thumb__ - THUMB_LDIV0 \name + THUMB_LDIV0 \name \signed #else - ARM_LDIV0 \name + ARM_LDIV0 \name \signed #endif cfi_end LSYM(Lend_div0) FUNC_END \name @@ -413,6 +532,12 @@ #define yyl r2 #endif +#ifdef __ARM_EABI__ +.macro WEAK name + .weak SYM (__\name) +.endm +#endif + #ifdef __thumb__ /* Register aliases. */ @@ -437,6 +562,43 @@ #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) +#if defined (__thumb2__) + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit + rsb \curbit, \curbit, #31 + adr \result, 1f + add \curbit, \result, \curbit, lsl #4 + mov \result, #0 + mov pc, \curbit +.p2align 3 +1: + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp.w \dividend, \divisor, lsl #shift + nop.n + adc.w \result, \result, \result + it cs + subcs.w \dividend, \dividend, \divisor, lsl #shift + .endr +#elif defined(__ARM_TUNE_MARVELL_F__) + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit + mov \divisor, \divisor, lsl \curbit + rsb \curbit, \curbit, #31 + mov \curbit, \curbit, lsl #2 + mov \result, #0 + add pc, pc, \curbit, lsl #2 + nop + .rept 32 + cmp \dividend, \divisor + subcs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 + adc \result, \result, \result + .endr +#else /* ! defined(__ARM_TUNE_MARVELL_F__) */ clz \curbit, \dividend clz \result, \divisor sub \curbit, \result, \curbit @@ -452,6 +614,7 @@ adc \result, \result, \result subcs \dividend, \dividend, \divisor, lsl #shift .endr +#endif /* defined(__ARM_TUNE_MARVELL_F__) */ #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ #if __ARM_ARCH__ >= 5 @@ -499,18 +662,23 @@ @ Division loop 1: cmp \dividend, \divisor + do_it hs, t subhs \dividend, \dividend, \divisor orrhs \result, \result, \curbit cmp \dividend, \divisor, lsr #1 + do_it hs, t subhs \dividend, \dividend, \divisor, lsr #1 orrhs \result, \result, \curbit, lsr #1 cmp \dividend, \divisor, lsr #2 + do_it hs, t subhs \dividend, \dividend, \divisor, lsr #2 orrhs \result, \result, \curbit, lsr #2 cmp \dividend, \divisor, lsr #3 + do_it hs, t subhs \dividend, \dividend, \divisor, lsr #3 orrhs \result, \result, \curbit, lsr #3 cmp \dividend, #0 @ Early termination? + do_it ne, t movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? movne \divisor, \divisor, lsr #4 bne 1b @@ -799,13 +967,14 @@ /* ------------------------------------------------------------------------ */ #ifdef L_udivsi3 +#if defined(__ARM_ARCH_6M__) + FUNC_START udivsi3 FUNC_ALIAS aeabi_uidiv udivsi3 -#ifdef __thumb__ - cmp divisor, #0 beq LSYM(Ldiv0) +LSYM(udivsi3_nodiv0): mov curbit, #1 mov result, #0 @@ -819,9 +988,16 @@ pop { work } RET -#else /* ARM version. */ +#else /* ARM/Thumb-2 version. */ + + ARM_FUNC_START udivsi3 + ARM_FUNC_ALIAS aeabi_uidiv udivsi3 + /* Note: if called via udivsi3_nodiv0, this will unnecessarily check + for division-by-zero a second time. */ +LSYM(udivsi3_nodiv0): subs r2, r1, #1 + do_it eq RETc(eq) bcc LSYM(Ldiv0) cmp r0, r1 @@ -834,7 +1010,8 @@ mov r0, r2 RET -11: moveq r0, #1 +11: do_it eq, e + moveq r0, #1 movne r0, #0 RET @@ -845,19 +1022,24 @@ #endif /* ARM version */ - DIV_FUNC_END udivsi3 + DIV_FUNC_END udivsi3 unsigned +#if defined(__ARM_ARCH_6M__) FUNC_START aeabi_uidivmod -#ifdef __thumb__ + cmp r1, #0 + beq LSYM(Ldiv0) push {r0, r1, lr} - bl SYM(__udivsi3) + bl LSYM(udivsi3_nodiv0) POP {r1, r2, r3} mul r2, r0 sub r1, r1, r2 bx r3 #else +ARM_FUNC_START aeabi_uidivmod + cmp r1, #0 + beq LSYM(Ldiv0) stmfd sp!, { r0, r1, lr } - bl SYM(__udivsi3) + bl LSYM(udivsi3_nodiv0) ldmfd sp!, { r1, r2, lr } mul r3, r2, r0 sub r1, r1, r3 @@ -904,19 +1086,20 @@ #endif /* ARM version. */ - DIV_FUNC_END umodsi3 + DIV_FUNC_END umodsi3 unsigned #endif /* L_umodsi3 */ /* ------------------------------------------------------------------------ */ #ifdef L_divsi3 +#if defined(__ARM_ARCH_6M__) + FUNC_START divsi3 FUNC_ALIAS aeabi_idiv divsi3 -#ifdef __thumb__ cmp divisor, #0 beq LSYM(Ldiv0) - +LSYM(divsi3_nodiv0): push { work } mov work, dividend eor work, divisor @ Save the sign of the result. @@ -945,15 +1128,21 @@ pop { work } RET -#else /* ARM version. */ +#else /* ARM/Thumb-2 version. */ + ARM_FUNC_START divsi3 + ARM_FUNC_ALIAS aeabi_idiv divsi3 + cmp r1, #0 - eor ip, r0, r1 @ save the sign of the result. beq LSYM(Ldiv0) +LSYM(divsi3_nodiv0): + eor ip, r0, r1 @ save the sign of the result. + do_it mi rsbmi r1, r1, #0 @ loops below use unsigned. subs r2, r1, #1 @ division by 1 or -1 ? beq 10f movs r3, r0 + do_it mi rsbmi r3, r0, #0 @ positive dividend value cmp r3, r1 bls 11f @@ -963,14 +1152,18 @@ ARM_DIV_BODY r3, r1, r0, r2 cmp ip, #0 + do_it mi rsbmi r0, r0, #0 RET 10: teq ip, r0 @ same sign ? + do_it mi rsbmi r0, r0, #0 RET -11: movlo r0, #0 +11: do_it lo + movlo r0, #0 + do_it eq,t moveq r0, ip, asr #31 orreq r0, r0, #1 RET @@ -979,24 +1172,30 @@ cmp ip, #0 mov r0, r3, lsr r2 + do_it mi rsbmi r0, r0, #0 RET #endif /* ARM version */ - DIV_FUNC_END divsi3 + DIV_FUNC_END divsi3 signed +#if defined(__ARM_ARCH_6M__) FUNC_START aeabi_idivmod -#ifdef __thumb__ + cmp r1, #0 + beq LSYM(Ldiv0) push {r0, r1, lr} - bl SYM(__divsi3) + bl LSYM(divsi3_nodiv0) POP {r1, r2, r3} mul r2, r0 sub r1, r1, r2 bx r3 #else +ARM_FUNC_START aeabi_idivmod + cmp r1, #0 + beq LSYM(Ldiv0) stmfd sp!, { r0, r1, lr } - bl SYM(__divsi3) + bl LSYM(divsi3_nodiv0) ldmfd sp!, { r1, r2, lr } mul r3, r2, r0 sub r1, r1, r3 @@ -1062,21 +1261,25 @@ #endif /* ARM version */ - DIV_FUNC_END modsi3 + DIV_FUNC_END modsi3 signed #endif /* L_modsi3 */ /* ------------------------------------------------------------------------ */ #ifdef L_dvmd_tls - FUNC_START div0 - FUNC_ALIAS aeabi_idiv0 div0 - FUNC_ALIAS aeabi_ldiv0 div0 - +#ifdef __ARM_EABI__ + WEAK aeabi_idiv0 + WEAK aeabi_ldiv0 + FUNC_START aeabi_idiv0 + FUNC_START aeabi_ldiv0 RET - FUNC_END aeabi_ldiv0 FUNC_END aeabi_idiv0 +#else + FUNC_START div0 + RET FUNC_END div0 +#endif #endif /* L_divmodsi_tools */ /* ------------------------------------------------------------------------ */ @@ -1086,16 +1289,49 @@ /* Constant taken from . */ #define SIGFPE 8 +#ifdef __ARM_EABI__ + WEAK aeabi_idiv0 + WEAK aeabi_ldiv0 + ARM_FUNC_START aeabi_idiv0 + ARM_FUNC_START aeabi_ldiv0 +#else ARM_FUNC_START div0 +#endif - do_push {r1, lr} + do_push (r1, lr) mov r0, #SIGFPE bl SYM(raise) __PLT__ RETLDM r1 +#ifdef __ARM_EABI__ + FUNC_END aeabi_ldiv0 + FUNC_END aeabi_idiv0 +#else FUNC_END div0 +#endif #endif /* L_dvmd_lnx */ +#ifdef L_clear_cache +#if defined __ARM_EABI__ && defined __linux__ +@ EABI GNU/Linux call to cacheflush syscall. + ARM_FUNC_START clear_cache + do_push (r7) +#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) + movw r7, #2 + movt r7, #0xf +#else + mov r7, #0xf0000 + add r7, r7, #2 +#endif + mov r2, #0 + swi 0 + do_pop (r7) + RET + FUNC_END clear_cache +#else +#error "This is only for ARM EABI GNU/Linux" +#endif +#endif /* L_clear_cache */ /* ------------------------------------------------------------------------ */ /* Dword shift operations. */ /* All the following Dword shift variants rely on the fact that @@ -1292,7 +1528,7 @@ push {r4, lr} # else ARM_FUNC_START clzdi2 - do_push {r4, lr} + do_push (r4, lr) # endif cmp xxh, #0 bne 1f --- a/gcc/config/arm/linux-eabi.h +++ b/gcc/config/arm/linux-eabi.h @@ -66,22 +66,14 @@ /* At this point, bpabi.h will have clobbered LINK_SPEC. We want to use the GNU/Linux version, not the generic BPABI version. */ #undef LINK_SPEC -#define LINK_SPEC LINUX_TARGET_LINK_SPEC +#define LINK_SPEC LINUX_TARGET_LINK_SPEC BE8_LINK_SPEC /* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we do not use -lfloat. */ #undef LIBGCC_SPEC -/* Clear the instruction cache from `beg' to `end'. This makes an - inline system call to SYS_cacheflush. */ +/* Clear the instruction cache from `beg' to `end'. This is + implemented in lib1funcs.asm, so ensure an error if this definition + is used. */ #undef CLEAR_INSN_CACHE -#define CLEAR_INSN_CACHE(BEG, END) \ -{ \ - register unsigned long _beg __asm ("a1") = (unsigned long) (BEG); \ - register unsigned long _end __asm ("a2") = (unsigned long) (END); \ - register unsigned long _flg __asm ("a3") = 0; \ - register unsigned long _scno __asm ("r7") = 0xf0002; \ - __asm __volatile ("swi 0 @ sys_cacheflush" \ - : "=r" (_beg) \ - : "0" (_beg), "r" (_end), "r" (_flg), "r" (_scno)); \ -} +#define CLEAR_INSN_CACHE(BEG, END) not used --- a/gcc/config/arm/linux-elf.h +++ b/gcc/config/arm/linux-elf.h @@ -98,7 +98,7 @@ /* NWFPE always understands FPA instructions. */ #undef FPUTYPE_DEFAULT -#define FPUTYPE_DEFAULT FPUTYPE_FPA_EMU3 +#define FPUTYPE_DEFAULT "fpe3" /* Call the function profiler with a given profile label. */ #undef ARM_FUNCTION_PROFILER --- /dev/null +++ b/gcc/config/arm/marvell-f.md @@ -0,0 +1,365 @@ +;; Marvell 2850 pipeline description +;; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. +;; Written by Marvell and CodeSourcery, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 2, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;; This automaton provides a pipeline description for the Marvell +;; 2850 core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "marvell_f") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; This is a dual-issue processor with three pipelines: +;; +;; 1. Arithmetic and load/store pipeline A1. +;; Issue | E1 | E2 | OF | WR | WB for load-store instructions +;; Issue | E1 | E2 | WB for arithmetic instructions +;; +;; 2. Arithmetic pipeline A2. +;; Issue | E1 | E2 | WB +;; +;; 3. Multiply and multiply-accumulate pipeline. +;; Issue | MAC1 | MAC2 | MAC3 | WB +;; +;; There are various bypasses modelled to a greater or lesser extent. +;; +;; Latencies in this file correspond to the number of cycles after +;; the issue stage that it takes for the result of the instruction to +;; be computed, or for its side-effects to occur. + +(define_cpu_unit "a1_e1,a1_e2,a1_of,a1_wr,a1_wb" "marvell_f") ; ALU 1 +(define_cpu_unit "a2_e1,a2_e2,a2_wb" "marvell_f") ; ALU 2 +(define_cpu_unit "m_1,m_2,m_3,m_wb" "marvell_f") ; MAC + +;; We define an SRAM cpu unit to enable us to describe conflicts +;; between loads at the E2 stage and stores at the WR stage. + +(define_cpu_unit "sram" "marvell_f") + +;; Handling of dual-issue constraints. +;; +;; Certain pairs of instructions can be issued in parallel, and certain +;; pairs cannot. We divide a subset of the instructions into groups as +;; follows. +;; +;; - data processing 1 (mov, mvn); +;; - data processing 2 (adc, add, and, bic, cmn, cmp, eor, orr, rsb, +;; rsc, sbc, sub, teq, tst); +;; - load single (ldr, ldrb, ldrbt, ldrt, ldrh, ldrsb, ldrsh); +;; - store single (str, strb, strbt, strt, strh); +;; - swap (swp, swpb); +;; - pld; +;; - count leading zeros and DSP add/sub (clz, qadd, qdadd, qsub, qdsub); +;; - multiply 2 (mul, muls, smull, umull, smulxy, smulls, umulls); +;; - multiply 3 (mla, mlas, smlal, umlal, smlaxy, smlalxy, smlawx, +;; smlawy, smlals, umlals); +;; - branches (b, bl, blx, bx). +;; +;; Ignoring conditional execution, it is a good approximation to the core +;; to model that two instructions may only be issued in parallel if the +;; following conditions are met. +;; I. The instructions both fall into one of the above groups and their +;; corresponding groups have a entry in the matrix below that is not X. +;; II. The second instruction does not read any register updated by the +;; first instruction (already enforced by the GCC scheduler). +;; III. The second instruction does not need the carry flag updated by the +;; first instruction. Currently we do not model this. +;; +;; First Second instruction group +;; insn +;; DP1 DP2 L S SWP PLD CLZ M2 M3 B +;; +;; DP1 ok ok ok ok ok ok ok ok ok ok +;; DP2(1) ok ok ok ok ok ok ok ok ok ok +;; DP2(2) ok (2) ok (4) ok ok ok ok X ok +;; L } +;; SWP } ok ok X X X X ok ok ok ok +;; PLD } +;; S(3) ok ok X X X X ok ok ok ok +;; S(4) ok (2) X X X X ok ok X ok +;; CLZ ok ok ok ok ok ok ok ok ok ok +;; M2 ok ok ok ok ok ok ok X X ok +;; M3 ok (2) ok (4) ok ok ok X X ok +;; B ok ok ok ok ok ok ok ok ok ok +;; +;; (1) without register shift +;; (2) with register shift +;; (3) with immediate offset +;; (4) with register offset +;; +;; We define a fake cpu unit "reg_shift_lock" to enforce constraints +;; between instructions in groups DP2(2) and M3. All other +;; constraints are enforced automatically by virtue of the limited +;; number of pipelines available for the various operations, with +;; the exception of constraints involving S(4) that we do not model. + +(define_cpu_unit "reg_shift_lock" "marvell_f") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; 1. Certain logic operations can be retired after the E1 stage if +;; the pipeline is not already retiring another instruction. In this +;; model we assume this behaviour always holds for mov, mvn, and, orr, eor +;; instructions. If a register shift is involved and the instruction is +;; not mov or mvn, then a dual-issue constraint must be enforced. + +;; The first two cases are separate so they can be identified for +;; bypasses below. + +(define_insn_reservation "marvell_f_alu_early_retire" 1 + (and (eq_attr "tune" "marvell_f") + (and (eq_attr "type" "alu") + (eq_attr "insn" "mov,mvn,and,orr,eor"))) + "(a1_e1,a1_wb)|(a2_e1,a2_wb)") + +(define_insn_reservation "marvell_f_alu_early_retire_shift" 1 + (and (eq_attr "tune" "marvell_f") + (and (eq_attr "type" "alu_shift_reg") + (eq_attr "insn" "mov,mvn,and,orr,eor"))) + "(a1_e1,a1_wb)|(a2_e1,a2_wb)") + +(define_insn_reservation "marvell_f_alu_early_retire_reg_shift1" 1 + (and (eq_attr "tune" "marvell_f") + (and (eq_attr "type" "alu_shift_reg") + (eq_attr "insn" "mov,mvn"))) + "(a1_e1,a1_wb)|(a2_e1,a2_wb)") + +(define_insn_reservation "marvell_f_alu_early_retire_reg_shift2" 1 + (and (eq_attr "tune" "marvell_f") + (and (eq_attr "type" "alu_shift_reg") + (eq_attr "insn" "and,orr,eor"))) + "(reg_shift_lock+a1_e1,a1_wb)|(reg_shift_lock+a2_e1,a2_wb)") + +;; 2. ALU operations with no shifted operand. These bypass the E1 stage if +;; the E2 stage of the corresponding pipeline is clear; here, we always +;; model this scenario [*]. We give the operation a latency of 1 yet reserve +;; both E1 and E2 for it (thus preventing the GCC scheduler, in the case +;; where both E1 and E2 of one pipeline are clear, from issuing one +;; instruction to each). +;; +;; [*] The non-bypass case is a latency of two, reserving E1 on the first +;; cycle and E2 on the next. Due to the way the scheduler works we +;; have to choose between taking this as the default and taking the +;; above case (with latency one) as the default; we choose the latter. + +(define_insn_reservation "marvell_f_alu_op_bypass_e1" 1 + (and (eq_attr "tune" "marvell_f") + (and (eq_attr "type" "alu") + (not (eq_attr "insn" "mov,mvn,and,orr,eor")))) + "(a1_e1+a1_e2,a1_wb)|(a2_e1+a2_e2,a2_wb)") + +;; 3. ALU operations with a shift-by-constant operand. + +(define_insn_reservation "marvell_f_alu_shift_op" 2 + (and (eq_attr "tune" "marvell_f") + (and (eq_attr "type" "alu_shift") + (not (eq_attr "insn" "mov,mvn,and,orr,eor")))) + "(a1_e1,a1_e2,a1_wb)|(a2_e1,a2_e2,a2_wb)") + +;; 4. ALU operations with a shift-by-register operand. Since the +;; instruction is never mov or mvn, a dual-issue constraint must +;; be enforced. + +(define_insn_reservation "marvell_f_alu_shift_reg_op" 2 + (and (eq_attr "tune" "marvell_f") + (and (eq_attr "type" "alu_shift_reg") + (not (eq_attr "insn" "mov,mvn,and,orr,eor")))) + "(reg_shift_lock+a1_e1,a1_e2,a1_wb)|(reg_shift_lock+a2_e1,a2_e2,a2_wb)") + +;; Given an ALU operation with shift (I1) followed by another ALU +;; operation (I2), with I2 depending on the destination register Rd of I1 +;; and with I2 not using that value as the amount or the starting value for +;; a shift, then I1 and I2 may be issued to the same pipeline on +;; consecutive cycles. In terms of this model that corresponds to I1 +;; having a latency of one cycle. There are three cases for various +;; I1 and I2 as follows. + +;; (a) I1 has a constant or register shift and I2 doesn't have a shift at all. +(define_bypass 1 "marvell_f_alu_shift_op,\ + marvell_f_alu_shift_reg_op" + "marvell_f_alu_op_bypass_e1,marvell_f_alu_early_retire") + +;; (b) I1 has a constant or register shift and I2 has a constant shift. +;; Rd must not provide the starting value for the shift. +(define_bypass 1 "marvell_f_alu_shift_op,\ + marvell_f_alu_shift_reg_op" + "marvell_f_alu_shift_op,marvell_f_alu_early_retire_shift" + "arm_no_early_alu_shift_value_dep") + +;; (c) I1 has a constant or register shift and I2 has a register shift. +;; Rd must not provide the amount by which to shift. +(define_bypass 1 "marvell_f_alu_shift_op,\ + marvell_f_alu_shift_reg_op" + "marvell_f_alu_shift_reg_op,\ + marvell_f_alu_early_retire_reg_shift1,\ + marvell_f_alu_early_retire_reg_shift2" + "arm_no_early_alu_shift_dep") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions in group "Multiply 2". + +(define_insn_reservation "marvell_f_multiply_2" 3 + (and (eq_attr "tune" "marvell_f") + (eq_attr "insn" "mul,muls,smull,umull,smulxy,smulls,umulls")) + "m_1,m_2,m_3,m_wb") + +;; Multiplication instructions in group "Multiply 3". There is a +;; dual-issue constraint with non-multiplication ALU instructions +;; to be respected here. + +(define_insn_reservation "marvell_f_multiply_3" 3 + (and (eq_attr "tune" "marvell_f") + (eq_attr "insn" "mla,mlas,smlal,umlal,smlaxy,smlalxy,smlawx,\ + smlawy,smlals,umlals")) + "reg_shift_lock+m_1,m_2,m_3,m_wb") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Conditional backward b instructions can have a zero-cycle penalty, and +;; other conditional b and bl instructions have a one-cycle penalty if +;; predicted correctly. Currently we model the zero-cycle case for all +;; branches. + +(define_insn_reservation "marvell_f_branches" 0 + (and (eq_attr "tune" "marvell_f") + (eq_attr "type" "branch")) + "nothing") + +;; Call latencies are not predictable; a semi-arbitrary very large +;; number is used as "positive infinity" for such latencies. + +(define_insn_reservation "marvell_f_call" 32 + (and (eq_attr "tune" "marvell_f") + (eq_attr "type" "call")) + "nothing") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/store instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback. +;; These models assume that all memory references hit in dcache. + +;; 1. Load/store for single registers. + +;; The worst case for a load is when the load result is needed in E1 +;; (for example for a register shift), giving a latency of four. Loads +;; skip E1 and access memory at the E2 stage. + +(define_insn_reservation "marvell_f_load1" 4 + (and (eq_attr "tune" "marvell_f") + (eq_attr "type" "load1,load_byte")) + "a1_e2+sram,a1_of,a1_wr,a1_wb") + +;; The result for a load may be bypassed (to be available at the same +;; time as the load arrives in the WR stage, so effectively at the OF +;; stage) to the Rn operand at E2 with a latency of two. The result may +;; be bypassed to a non-Rn operand at E2 with a latency of three. For +;; instructions without shifts, detection of an Rn bypass situation is +;; difficult (because some of the instruction patterns switch their +;; operands), and so we do not model that here. For instructions with +;; shifts, the operand used at E2 will always be Rn, and so we can +;; model the latency-two bypass for these. + +(define_bypass 2 "marvell_f_load1" + "marvell_f_alu_shift_op" + "arm_no_early_alu_shift_value_dep") + +(define_bypass 2 "marvell_f_load1" + "marvell_f_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") + +;; Stores write at the WR stage and loads read at the E2 stage, giving +;; a store latency of three. + +(define_insn_reservation "marvell_f_store1" 3 + (and (eq_attr "tune" "marvell_f") + (eq_attr "type" "store1")) + "a1_e2,a1_of,a1_wr+sram,a1_wb") + +;; 2. Load/store for two consecutive registers. These may be dealt +;; with in the same number of cycles as single loads and stores. + +(define_insn_reservation "marvell_f_load2" 4 + (and (eq_attr "tune" "marvell_f") + (eq_attr "type" "load2")) + "a1_e2+sram,a1_of,a1_wr,a1_wb") + +(define_insn_reservation "marvell_f_store2" 3 + (and (eq_attr "tune" "marvell_f") + (eq_attr "type" "store2")) + "a1_e2,a1_of,a1_wr+sram,a1_wb") + +;; The first word of a doubleword load is eligible for the latency-two +;; bypass described above for single loads, but this is not modelled here. +;; We do however assume that either word may also be bypassed with +;; latency three for ALU operations with shifts (where the shift value and +;; amount do not depend on the loaded value) and latency four for ALU +;; operations without shifts. The latency four case is of course the default. + +(define_bypass 3 "marvell_f_load2" + "marvell_f_alu_shift_op" + "arm_no_early_alu_shift_value_dep") + +(define_bypass 3 "marvell_f_load2" + "marvell_f_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") + +;; 3. Load/store for more than two registers. + +;; These instructions stall for an extra cycle in the decode stage; +;; individual load/store instructions for each register are then issued. +;; The load/store multiple instruction itself is removed from the decode +;; stage at the same time as the final load/store instruction is issued. +;; To complicate matters, pairs of loads/stores referencing two +;; consecutive registers will be issued together as doubleword operations. +;; We model a 3-word load as an LDR plus an LDRD, and a 4-word load +;; as two LDRDs; thus, these are allocated the same latencies (the +;; latency for two consecutive loads plus one for the setup stall). +;; The extra stall is modelled by reserving E1. + +(define_insn_reservation "marvell_f_load3_4" 6 + (and (eq_attr "tune" "marvell_f") + (eq_attr "type" "load3,load4")) + "a1_e1,a1_e1+a1_e2+sram,a1_e2+sram+a1_of,a1_of+a1_wr,a1_wr+a1_wb,a1_wb") + +;; Bypasses are possible for ldm as for single loads, but we do not +;; model them here since the order of the constituent loads is +;; difficult to predict. + +(define_insn_reservation "marvell_f_store3_4" 5 + (and (eq_attr "tune" "marvell_f") + (eq_attr "type" "store3,store4")) + "a1_e1,a1_e1+a1_e2,a1_e2+a1_of,a1_of+a1_wr+sram,a1_wr+sram+a1_wb,a1_wb") + --- /dev/null +++ b/gcc/config/arm/marvell-f-vfp.md @@ -0,0 +1,153 @@ +;; Marvell 2850 VFP pipeline description +;; Copyright (C) 2007 Free Software Foundation, Inc. +;; Written by CodeSourcery, Inc. + +;; This file is part of GCC. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;; This automaton provides a pipeline description for the Marvell +;; 2850 core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "marvell_f_vfp") + +;; This is a single-issue VFPv2 implementation with the following execution +;; units: +;; +;; 1. Addition/subtraction unit; takes three cycles, pipelined. +;; 2. Multiplication unit; takes four cycles, pipelined. +;; 3. Add buffer, used for multiply-accumulate (see below). +;; 4. Divide/square root unit, not pipelined. +;; For single-precision: takes sixteen cycles, can accept another insn +;; after fifteen cycles. +;; For double-precision: takes thirty-one cycles, can accept another insn +;; after thirty cycles. +;; 5. Single-cycle unit, pipelined. +;; This does absolute value/copy/negate/compare in one cycle and +;; conversion in two cycles. +;; +;; When all three operands of a multiply-accumulate instruction are ready, +;; one is issued to the add buffer (which can hold six operands in a FIFO) +;; and the two to be multiplied are issued to the multiply unit. After +;; four cycles in the multiply unit, one cycle is taken to issue the +;; operand from the add buffer plus the multiplication result to the +;; addition/subtraction unit. That issue takes priority over any add/sub +;; instruction waiting at the normal issue stage, but may be performed in +;; parallel with the issue of a non-add/sub instruction. The total time +;; for a multiply-accumulate instruction to pass through the execution +;; units is hence eight cycles. +;; +;; We do not need to explicitly model the add buffer because it can +;; always issue the instruction at the head of its FIFO (due to the above +;; priority rule) and there are more spaces in the add buffer (six) than +;; there are stages (four) in the multiplication unit. +;; +;; Two instructions may be retired at once from the head of an 8-entry +;; reorder buffer. Data from these first two instructions only may be +;; forwarded to the inputs of the issue unit. We assume that the +;; pressure on the reorder buffer will be sufficiently low that every +;; instruction entering it will be eligible for data forwarding. Since +;; data is forwarded to the issue unit and not the execution units (so +;; for example single-cycle instructions cannot be issued back-to-back), +;; the latencies given below are the cycle counts above plus one. + +(define_cpu_unit "mf_vfp_issue" "marvell_f_vfp") +(define_cpu_unit "mf_vfp_add" "marvell_f_vfp") +(define_cpu_unit "mf_vfp_mul" "marvell_f_vfp") +(define_cpu_unit "mf_vfp_div" "marvell_f_vfp") +(define_cpu_unit "mf_vfp_single_cycle" "marvell_f_vfp") + +;; An attribute to indicate whether our reservations are applicable. + +(define_attr "marvell_f_vfp" "yes,no" + (const (if_then_else (and (eq_attr "tune" "marvell_f") + (eq_attr "fpu" "vfp")) + (const_string "yes") (const_string "no")))) + +;; Reservations of functional units. The nothing*2 reservations at the +;; start of many of the reservation strings correspond to the decode +;; stages. We need to have these reservations so that we can correctly +;; reserve parts of the core's A1 pipeline for loads and stores. For +;; that case (since loads skip E1) the pipelines line up thus: +;; A1 pipe: Issue E2 OF WR WB ... +;; VFP pipe: Fetch Decode1 Decode2 Issue Execute1 ... +;; For a load, we need to make a reservation of E2, and thus we must +;; use Decode1 as the starting point for all VFP reservations here. +;; +;; For reservations of pipelined VFP execution units we only reserve +;; the execution unit for the first execution cycle, omitting any trailing +;; "nothing" reservations. + +(define_insn_reservation "marvell_f_vfp_add" 4 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "farith")) + "nothing*2,mf_vfp_issue,mf_vfp_add") + +(define_insn_reservation "marvell_f_vfp_mul" 5 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "fmuls,fmuld")) + "nothing*2,mf_vfp_issue,mf_vfp_mul") + +(define_insn_reservation "marvell_f_vfp_divs" 17 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "fdivs")) + "nothing*2,mf_vfp_issue,mf_vfp_div*15") + +(define_insn_reservation "marvell_f_vfp_divd" 32 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "fdivd")) + "nothing*2,mf_vfp_issue,mf_vfp_div*30") + +;; The DFA lookahead is small enough that the "add" reservation here +;; will always take priority over any addition/subtraction instruction +;; issued five cycles after the multiply-accumulate instruction, as +;; required. +(define_insn_reservation "marvell_f_vfp_mac" 9 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "fmacs,fmacd")) + "nothing*2,mf_vfp_issue,mf_vfp_mul,nothing*4,mf_vfp_add") + +(define_insn_reservation "marvell_f_vfp_single" 2 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "ffarith")) + "nothing*2,mf_vfp_issue,mf_vfp_single_cycle") + +(define_insn_reservation "marvell_f_vfp_convert" 3 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "f_cvt")) + "nothing*2,mf_vfp_issue,mf_vfp_single_cycle") + +(define_insn_reservation "marvell_f_vfp_load" 2 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "f_loads,f_loadd")) + "a1_e2+sram,a1_of,a1_wr+mf_vfp_issue,a1_wb+mf_vfp_single_cycle") + +(define_insn_reservation "marvell_f_vfp_from_core" 2 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "r_2_f")) + "a1_e2,a1_of,a1_wr+mf_vfp_issue,a1_wb+mf_vfp_single_cycle") + +;; The interaction between the core and VFP pipelines during VFP +;; store operations and core <-> VFP moves is not clear, so we guess. +(define_insn_reservation "marvell_f_vfp_store" 3 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "f_stores,f_stored")) + "a1_e2,a1_of,mf_vfp_issue,a1_wr+sram+mf_vfp_single_cycle") + +(define_insn_reservation "marvell_f_vfp_to_core" 4 + (and (eq_attr "marvell_f_vfp" "yes") + (eq_attr "type" "f_2_r")) + "a1_e2,a1_of,a1_wr+mf_vfp_issue,a1_wb+mf_vfp_single_cycle") + --- /dev/null +++ b/gcc/config/arm/montavista-linux.h @@ -0,0 +1,33 @@ +/* MontaVista GNU/Linux Configuration. + Copyright (C) 2009 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Add -tarmv6 and -tthumb2 options for convenience in generating multilibs. +*/ +#undef CC1_SPEC +#define CC1_SPEC " \ + %{tarmv6: -march=armv6 -mfloat-abi=softfp ; \ + tthumb2: -mthumb -march=armv7-a -mfloat-abi=softfp ; \ + : -march=armv5t}" + +/* The various C libraries each have their own subdirectory. */ +#undef SYSROOT_SUFFIX_SPEC +#define SYSROOT_SUFFIX_SPEC \ + "%{tarmv6:/armv6 ; \ + tthumb2:/thumb2}" --- a/gcc/config/arm/neon-gen.ml +++ b/gcc/config/arm/neon-gen.ml @@ -122,6 +122,7 @@ | T_uint16 | T_int16 -> T_intHI | T_uint32 | T_int32 -> T_intSI | T_uint64 | T_int64 -> T_intDI + | T_float32 -> T_floatSF | T_poly8 -> T_intQI | T_poly16 -> T_intHI | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) @@ -320,7 +321,7 @@ typeinfo; Format.print_newline (); (* Extra types not in . *) - Format.printf "typedef __builtin_neon_sf float32_t;\n"; + Format.printf "typedef float float32_t;\n"; Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -159,7 +159,8 @@ (UNSPEC_VUZP1 201) (UNSPEC_VUZP2 202) (UNSPEC_VZIP1 203) - (UNSPEC_VZIP2 204)]) + (UNSPEC_VZIP2 204) + (UNSPEC_MISALIGNED_ACCESS 205)]) ;; Double-width vector modes. (define_mode_iterator VD [V8QI V4HI V2SI V2SF]) @@ -459,7 +460,9 @@ "=w,Uv,w, w, ?r,?w,?r,?r, ?Us") (match_operand:VD 1 "general_operand" " w,w, Dn,Uvi, w, r, r, Usi,r"))] - "TARGET_NEON" + "TARGET_NEON + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" { if (which_alternative == 2) { @@ -481,7 +484,7 @@ /* FIXME: If the memory layout is changed in big-endian mode, output_move_vfp below must be changed to output_move_neon (which will use the - element/structure loads/stores), and the constraint changed to 'Un' instead + element/structure loads/stores), and the constraint changed to 'Um' instead of 'Uv'. */ switch (which_alternative) @@ -506,7 +509,9 @@ "=w,Un,w, w, ?r,?w,?r,?r, ?Us") (match_operand:VQXMOV 1 "general_operand" " w,w, Dn,Uni, w, r, r, Usi, r"))] - "TARGET_NEON" + "TARGET_NEON + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" { if (which_alternative == 2) { @@ -549,6 +554,11 @@ (match_operand:TI 1 "general_operand" ""))] "TARGET_NEON" { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (TImode, operands[1]); + } }) (define_expand "mov" @@ -556,12 +566,19 @@ (match_operand:VSTRUCT 1 "general_operand" ""))] "TARGET_NEON" { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (mode, operands[1]); + } }) (define_insn "*neon_mov" [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] - "TARGET_NEON" + "TARGET_NEON + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" { switch (which_alternative) { @@ -658,6 +675,49 @@ neon_disambiguate_copy (operands, dest, src, 4); }) +(define_expand "movmisalign" + [(set (match_operand:VDQX 0 "nonimmediate_operand" "") + (unspec:VDQX [(match_operand:VDQX 1 "general_operand" "")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" +{ + if (!s_register_operand (operands[0], mode) + && !s_register_operand (operands[1], mode)) + FAIL; +}) + +(define_insn "*movmisalign_neon_store" + [(set (match_operand:VDX 0 "memory_operand" "=Um") + (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vst1.\t{%P1}, %A0" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) + +(define_insn "*movmisalign_neon_load" + [(set (match_operand:VDX 0 "s_register_operand" "=w") + (unspec:VDX [(match_operand:VDX 1 "memory_operand" " Um")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vld1.\t{%P0}, %A1" + [(set_attr "neon_type" "neon_vld1_1_2_regs")]) + +(define_insn "*movmisalign_neon_store" + [(set (match_operand:VQX 0 "memory_operand" "=Um") + (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vst1.\t{%q1}, %A0" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) + +(define_insn "*movmisalign_neon_load" + [(set (match_operand:VQX 0 "s_register_operand" "=w") + (unspec:VQX [(match_operand:VQX 1 "general_operand" " Um")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vld1.\t{%q0}, %A1" + [(set_attr "neon_type" "neon_vld1_1_2_regs")]) + (define_insn "vec_set_internal" [(set (match_operand:VD 0 "s_register_operand" "=w") (vec_merge:VD @@ -862,6 +922,50 @@ (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] ) +(define_insn "*mul3add_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w")) + (match_operand:VDQ 1 "s_register_operand" "0")))] + "TARGET_NEON" + "vmla.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_qqq_8_16") + (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] +) + +(define_insn "*mul3negadd_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0") + (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w"))))] + "TARGET_NEON" + "vmls.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_qqq_8_16") + (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] +) + (define_insn "ior3" [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") @@ -3611,7 +3715,8 @@ UNSPEC_VSHLL_N))] "TARGET_NEON" { - neon_const_bounds (operands[2], 0, neon_element_bits (mode)); + /* The boundaries are: 0 < imm <= size. */ + neon_const_bounds (operands[2], 0, neon_element_bits (mode) + 1); return "vshll.%T3%#\t%q0, %P1, %2"; } [(set_attr "neon_type" "neon_shift_1")] --- a/gcc/config/arm/neon.ml +++ b/gcc/config/arm/neon.ml @@ -50,7 +50,7 @@ | T_ptrto of vectype | T_const of vectype | T_void | T_intQI | T_intHI | T_intSI - | T_intDI + | T_intDI | T_floatSF (* The meanings of the following are: TImode : "Tetra", two registers (four words). @@ -1693,6 +1693,7 @@ | T_intHI -> "__builtin_neon_hi" | T_intSI -> "__builtin_neon_si" | T_intDI -> "__builtin_neon_di" + | T_floatSF -> "__builtin_neon_sf" | T_arrayof (num, base) -> let basename = name (fun x -> x) base in affix (Printf.sprintf "%sx%d" basename num) --- a/gcc/config/arm/neon-testgen.ml +++ b/gcc/config/arm/neon-testgen.ml @@ -51,8 +51,8 @@ Printf.fprintf chan "/* This file was autogenerated by neon-testgen. */\n\n"; Printf.fprintf chan "/* { dg-do assemble } */\n"; Printf.fprintf chan "/* { dg-require-effective-target arm_neon_ok } */\n"; - Printf.fprintf chan - "/* { dg-options \"-save-temps -O0 -mfpu=neon -mfloat-abi=softfp\" } */\n"; + Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n"; + Printf.fprintf chan "/* { dg-add-options arm_neon } */\n"; Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n"; Printf.fprintf chan "void test_%s (void)\n{\n" test_name --- a/gcc/config/arm/netbsd-elf.h +++ b/gcc/config/arm/netbsd-elf.h @@ -153,5 +153,5 @@ while (0) #undef FPUTYPE_DEFAULT -#define FPUTYPE_DEFAULT FPUTYPE_VFP +#define FPUTYPE_DEFAULT "vfp" --- /dev/null +++ b/gcc/config/arm/nocrt0.h @@ -0,0 +1,25 @@ +/* Definitions for generic libgloss based cofigs where crt0 is supplied by + the linker script. + Copyright (C) 2006 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC " crti%O%s crtbegin%O%s" + +#undef LIB_SPEC +#define LIB_SPEC "-lc" --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -73,6 +73,10 @@ || REGNO_REG_CLASS (REGNO (op)) == FPA_REGS)); }) +(define_special_predicate "subreg_lowpart_operator" + (and (match_code "subreg") + (match_test "subreg_lowpart_p (op)"))) + ;; Reg, subreg(reg) or const_int. (define_predicate "reg_or_int_operand" (ior (match_code "const_int") @@ -168,6 +172,11 @@ (and (match_code "plus,minus,ior,xor,and") (match_test "mode == GET_MODE (op)"))) +;; True for plus/minus operators +(define_special_predicate "plusminus_operator" + (and (match_code "plus,minus") + (match_test "mode == GET_MODE (op)"))) + ;; True for logical binary operators. (define_special_predicate "logical_binary_operator" (and (match_code "ior,xor,and") @@ -295,6 +304,9 @@ HOST_WIDE_INT i = 1, base = 0; rtx elt; + if (low_irq_latency) + return false; + if (count <= 1 || GET_CODE (XVECEXP (op, 0, 0)) != SET) return false; @@ -352,6 +364,9 @@ HOST_WIDE_INT i = 1, base = 0; rtx elt; + if (low_irq_latency) + return false; + if (count <= 1 || GET_CODE (XVECEXP (op, 0, 0)) != SET) return false; --- a/gcc/config/arm/sfp-machine.h +++ b/gcc/config/arm/sfp-machine.h @@ -14,9 +14,11 @@ #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) +#define _FP_NANFRAC_H ((_FP_QNANBIT_H << 1) - 1) #define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) #define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 #define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_H 0 #define _FP_NANSIGN_S 0 #define _FP_NANSIGN_D 0 #define _FP_NANSIGN_Q 0 @@ -92,5 +94,7 @@ #define __fixdfdi __aeabi_d2lz #define __fixunsdfdi __aeabi_d2ulz #define __floatdidf __aeabi_l2d +#define __extendhfsf2 __gnu_h2f_ieee +#define __truncsfhf2 __gnu_f2h_ieee #endif /* __ARM_EABI__ */ --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -13,7 +13,9 @@ $(srcdir)/config/arm/iwmmxt.md \ $(srcdir)/config/arm/vfp.md \ $(srcdir)/config/arm/neon.md \ - $(srcdir)/config/arm/thumb2.md + $(srcdir)/config/arm/thumb2.md \ + $(srcdir)/config/arm/marvell-f.md \ + $(srcdir)/config/arm/hwdiv.md s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \ s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES) --- a/gcc/config/arm/t-arm-elf +++ b/gcc/config/arm/t-arm-elf @@ -24,10 +24,18 @@ #MULTILIB_MATCHES += march?armv7=march?armv7-a #MULTILIB_MATCHES += march?armv7=march?armv7-r #MULTILIB_MATCHES += march?armv7=march?armv7-m +#MULTILIB_MATCHES += march?armv7=march?armv7e-m #MULTILIB_MATCHES += march?armv7=mcpu?cortex-a8 #MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4 #MULTILIB_MATCHES += march?armv7=mcpu?cortex-m3 +# Not quite true. We can support hard-vfp calling in Thumb2, but how do we +# express that here? Also, we really need architecture v5e or later +# (mcrr etc). +MULTILIB_OPTIONS += mfloat-abi=hard +MULTILIB_DIRNAMES += fpu +MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard* + # MULTILIB_OPTIONS += mcpu=ep9312 # MULTILIB_DIRNAMES += ep9312 # MULTILIB_EXCEPTIONS += *mthumb/*mcpu=ep9312* --- a/gcc/config/arm/t-bpabi +++ b/gcc/config/arm/t-bpabi @@ -1,10 +1,13 @@ # Add the bpabi.S functions. -LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod +LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod \ + _aeabi_idiv0 _aeabi_ldiv0 # Add the BPABI C functions. LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \ $(srcdir)/config/arm/unaligned-funcs.c +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c + UNWIND_H = $(srcdir)/config/arm/unwind-arm.h LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \ $(srcdir)/config/arm/libunwind.S \ --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -24,6 +24,8 @@ ;; changes made in armv5t as "thumb2". These are considered part ;; the 16-bit Thumb-1 instruction set. +(include "hwdiv.md") + (define_insn "*thumb2_incscc" [(set (match_operand:SI 0 "s_register_operand" "=r,r") (plus:SI (match_operator:SI 2 "arm_comparison_operator" @@ -172,34 +174,6 @@ (set_attr "length" "8")] ) -(define_insn "*thumb2_abssi2" - [(set (match_operand:SI 0 "s_register_operand" "=r,&r") - (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))) - (clobber (reg:CC CC_REGNUM))] - "TARGET_THUMB2" - "@ - cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0 - eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31" - [(set_attr "conds" "clob,*") - (set_attr "shift" "1") - ;; predicable can't be set based on the variant, so left as no - (set_attr "length" "10,8")] -) - -(define_insn "*thumb2_neg_abssi2" - [(set (match_operand:SI 0 "s_register_operand" "=r,&r") - (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))) - (clobber (reg:CC CC_REGNUM))] - "TARGET_THUMB2" - "@ - cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0 - eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31" - [(set_attr "conds" "clob,*") - (set_attr "shift" "1") - ;; predicable can't be set based on the variant, so left as no - (set_attr "length" "10,8")] -) - (define_insn "*thumb2_movdi" [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m") (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r"))] @@ -223,9 +197,14 @@ (set_attr "neg_pool_range" "*,*,*,0,*")] ) +;; We have two alternatives here for memory loads (and similarly for stores) +;; to reflect the fact that the permissible constant pool ranges differ +;; between ldr instructions taking low regs and ldr instructions taking high +;; regs. The high register alternatives are not taken into account when +;; choosing register preferences in order to reflect their expense. (define_insn "*thumb2_movsi_insn" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m") - (match_operand:SI 1 "general_operand" "rk ,I,K,N,mi,rk"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l,*hk,m,*m") + (match_operand:SI 1 "general_operand" "rk ,I,K,j,mi,*mi,l,*hk"))] "TARGET_THUMB2 && ! TARGET_IWMMXT && !(TARGET_HARD_FLOAT && TARGET_VFP) && ( register_operand (operands[0], SImode) @@ -236,11 +215,13 @@ mvn%?\\t%0, #%B1 movw%?\\t%0, %1 ldr%?\\t%0, %1 + ldr%?\\t%0, %1 + str%?\\t%1, %0 str%?\\t%1, %0" - [(set_attr "type" "*,*,*,*,load1,store1") + [(set_attr "type" "*,*,*,*,load1,load1,store1,store1") (set_attr "predicable" "yes") - (set_attr "pool_range" "*,*,*,*,4096,*") - (set_attr "neg_pool_range" "*,*,*,*,0,*")] + (set_attr "pool_range" "*,*,*,*,1020,4096,*,*") + (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")] ) ;; ??? We can probably do better with thumb2 @@ -1128,27 +1109,7 @@ return \"add%!\\t%0, %1, %2\"; " [(set_attr "predicable" "yes") - (set_attr "length" "2")] -) - -(define_insn "divsi3" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (div:SI (match_operand:SI 1 "s_register_operand" "r") - (match_operand:SI 2 "s_register_operand" "r")))] - "TARGET_THUMB2 && arm_arch_hwdiv" - "sdiv%?\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "insn" "sdiv")] -) - -(define_insn "udivsi3" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (udiv:SI (match_operand:SI 1 "s_register_operand" "r") - (match_operand:SI 2 "s_register_operand" "r")))] - "TARGET_THUMB2 && arm_arch_hwdiv" - "udiv%?\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "insn" "udiv")] + (set_attr "length" "4")] ) (define_insn "*thumb2_subsi_short" @@ -1162,6 +1123,71 @@ (set_attr "length" "2")] ) +;; 16-bit encodings of "muls" and "mul". We only use these when +;; optimizing for size since "muls" is slow on all known +;; implementations and since "mul" will be generated by +;; "*arm_mulsi3_v6" anyhow. The assembler will use a 16-bit encoding +;; for "mul" whenever possible anyhow. +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (mult:SI (match_operand:SI 1 "low_register_operand" "") + (match_dup 0)))] + "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)" + [(parallel + [(set (match_dup 0) + (mult:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (mult:SI (match_dup 0) + (match_operand:SI 1 "low_register_operand" "")))] + "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)" + [(parallel + [(set (match_dup 0) + (mult:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_insn "*thumb2_mulsi_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (mult:SI (match_operand:SI 1 "low_register_operand" "%0") + (match_operand:SI 2 "low_register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && optimize_size && reload_completed" + "mul%!\\t%0, %2, %0" + [(set_attr "predicable" "yes") + (set_attr "length" "2") + (set_attr "insn" "muls")]) + +(define_insn "*thumb2_mulsi_short_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=l") + (mult:SI (match_dup 1) (match_dup 2)))] + "TARGET_THUMB2 && optimize_size" + "muls\\t%0, %2, %0" + [(set_attr "length" "2") + (set_attr "insn" "muls")]) + +(define_insn "*thumb2_mulsi_short_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_THUMB2 && optimize_size" + "muls\\t%0, %2, %0" + [(set_attr "length" "2") + (set_attr "insn" "muls")]) + (define_insn "*thumb2_cbz" [(set (pc) (if_then_else (eq (match_operand:SI 0 "s_register_operand" "l,?r") @@ -1171,7 +1197,7 @@ (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" "* - if (get_attr_length (insn) == 2 && which_alternative == 0) + if (get_attr_length (insn) == 2) return \"cbz\\t%0, %l1\"; else return \"cmp\\t%0, #0\;beq\\t%l1\"; @@ -1179,7 +1205,8 @@ [(set (attr "length") (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int 2)) - (le (minus (match_dup 1) (pc)) (const_int 128))) + (le (minus (match_dup 1) (pc)) (const_int 128)) + (eq (symbol_ref ("which_alternative")) (const_int 0))) (const_int 2) (const_int 8)))] ) @@ -1193,7 +1220,7 @@ (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" "* - if (get_attr_length (insn) == 2 && which_alternative == 0) + if (get_attr_length (insn) == 2) return \"cbnz\\t%0, %l1\"; else return \"cmp\\t%0, #0\;bne\\t%l1\"; @@ -1201,7 +1228,8 @@ [(set (attr "length") (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int 2)) - (le (minus (match_dup 1) (pc)) (const_int 128))) + (le (minus (match_dup 1) (pc)) (const_int 128)) + (eq (symbol_ref ("which_alternative")) (const_int 0))) (const_int 2) (const_int 8)))] ) --- a/gcc/config/arm/t-linux-eabi +++ b/gcc/config/arm/t-linux-eabi @@ -6,8 +6,8 @@ MULTILIB_OPTIONS = MULTILIB_DIRNAMES = -# Use a version of div0 which raises SIGFPE. -LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx +# Use a version of div0 which raises SIGFPE, and a special __clear_cache. +LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache # Multilib the standard Linux files. Don't include crti.o or crtn.o, # which are provided by glibc. --- a/gcc/config/arm/t-symbian +++ b/gcc/config/arm/t-symbian @@ -17,6 +17,9 @@ LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c LIB2ADDEHDEP = $(UNWIND_H) +# Include half-float helpers. +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c + # Create a multilib for processors with VFP floating-point, and a # multilib for those without -- using the soft-float ABI in both # cases. Symbian OS object should be compiled with interworking --- a/gcc/config/arm/uclinux-eabi.h +++ b/gcc/config/arm/uclinux-eabi.h @@ -50,6 +50,10 @@ #undef ARM_DEFAULT_ABI #define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "--start-group %G %L --end-group" + /* Clear the instruction cache from `beg' to `end'. This makes an inline system call to SYS_cacheflush. */ #undef CLEAR_INSN_CACHE --- a/gcc/config/arm/unwind-arm.c +++ b/gcc/config/arm/unwind-arm.c @@ -1000,7 +1000,6 @@ while (code != _URC_END_OF_STACK && code != _URC_FAILURE); - finish: restore_non_core_regs (&saved_vrs); return code; } @@ -1168,6 +1167,9 @@ { matched = (void *)(ucbp + 1); rtti = _Unwind_decode_target2 ((_uw) &data[i + 1]); + /* There is no way to encode an exception + specification for 'class X * &', so + always pass false for is_reference. */ if (__cxa_type_match (ucbp, (type_info *) rtti, 0, &matched)) break; @@ -1197,8 +1199,6 @@ ucbp->barrier_cache.bitpattern[4] = (_uw) &data[1]; if (data[0] & uint32_highbit) - phase2_call_unexpected_after_unwind = 1; - else { data += rtti_count + 1; /* Setup for entry to the handler. */ @@ -1208,6 +1208,8 @@ _Unwind_SetGR (context, 0, (_uw) ucbp); return _URC_INSTALL_CONTEXT; } + else + phase2_call_unexpected_after_unwind = 1; } if (data[0] & uint32_highbit) data++; --- a/gcc/config/arm/unwind-arm.h +++ b/gcc/config/arm/unwind-arm.h @@ -229,9 +229,10 @@ return 0; #if (defined(linux) && !defined(__uClinux__)) || defined(__NetBSD__) - /* Pc-relative indirect. */ + /* Pc-relative indirect. Propagate the bottom 2 bits, which can + contain referenceness information in gnu unwinding tables. */ tmp += ptr; - tmp = *(_Unwind_Word *) tmp; + tmp = *(_Unwind_Word *) (tmp & ~(_Unwind_Word)3) | (tmp & 3); #elif defined(__symbian__) || defined(__uClinux__) /* Absolute pointer. Nothing more to do. */ #else --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -38,6 +38,11 @@ "TARGET_NEON || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (mode, operands[1]); + } }) ;; Vector arithmetic. Expanders are blank, then unnamed insns implement --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -51,7 +51,7 @@ ;; problems because small constants get converted into adds. (define_insn "*arm_movsi_vfp" [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv") - (match_operand:SI 1 "general_operand" "rk, I,K,N,mi,rk,r,*t,*t,*Uvi,*t"))] + (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))] "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT && ( s_register_operand (operands[0], SImode) || s_register_operand (operands[1], SImode))" @@ -82,13 +82,17 @@ " [(set_attr "predicable" "yes") (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") + (set_attr "neon_type" "*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") + (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*") (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] ) +;; See thumb2.md:thumb2_movsi_insn for an explanation of the split +;; high/low register alternatives for loads and stores here. (define_insn "*thumb2_movsi_vfp" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m,*t,r, *t,*t, *Uv") - (match_operand:SI 1 "general_operand" "rk, I,K,N,mi,rk,r,*t,*t,*Uvi,*t"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l,*hk,m,*m,*t,r, *t,*t, *Uv") + (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,*mi,l,*hk,r,*t,*t,*Uvi,*t"))] "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT && ( s_register_operand (operands[0], SImode) || s_register_operand (operands[1], SImode))" @@ -102,25 +106,29 @@ case 3: return \"movw%?\\t%0, %1\"; case 4: - return \"ldr%?\\t%0, %1\"; case 5: - return \"str%?\\t%1, %0\"; + return \"ldr%?\\t%0, %1\"; case 6: - return \"fmsr%?\\t%0, %1\\t%@ int\"; case 7: - return \"fmrs%?\\t%0, %1\\t%@ int\"; + return \"str%?\\t%1, %0\"; case 8: + return \"fmsr%?\\t%0, %1\\t%@ int\"; + case 9: + return \"fmrs%?\\t%0, %1\\t%@ int\"; + case 10: return \"fcpys%?\\t%0, %1\\t%@ int\"; - case 9: case 10: + case 11: case 12: return output_move_vfp (operands); default: gcc_unreachable (); } " [(set_attr "predicable" "yes") - (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_load,f_store") - (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") - (set_attr "neg_pool_range" "*,*,*,*, 0,*,*,*,*,1008,*")] + (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_load,f_store") + (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") + (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*") + (set_attr "pool_range" "*,*,*,*,1020,4096,*,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] ) @@ -145,7 +153,10 @@ case 4: return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; case 5: - return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; + else + return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; case 6: case 7: return output_move_vfp (operands); default: @@ -153,7 +164,14 @@ } " [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") - (set_attr "length" "8,8,8,4,4,4,4,4") + (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") + (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) + (eq_attr "alternative" "5") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) (set_attr "pool_range" "*,1020,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")] ) @@ -172,7 +190,10 @@ case 4: return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; case 5: - return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; + else + return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; case 6: case 7: return output_move_vfp (operands); default: @@ -180,11 +201,123 @@ } " [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_load,f_store") - (set_attr "length" "8,8,8,4,4,4,4,4") + (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") + (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) + (eq_attr "alternative" "5") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) (set_attr "pool_range" "*,4096,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*, 0,*,*,*,*,1008,*")] ) +;; HFmode moves +(define_insn "*movhf_vfp_neon" + [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r") + (match_operand:HF 1 "general_operand" " Um, t,m,r,t,r,r,t,F"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* S register from memory */ + return \"vld1.16\\t{%z0}, %A1\"; + case 1: /* memory from S register */ + return \"vst1.16\\t{%z1}, %A0\"; + case 2: /* ARM register from memory */ + return \"ldrh\\t%0, %1\\t%@ __fp16\"; + case 3: /* memory from ARM register */ + return \"strh\\t%1, %0\\t%@ __fp16\"; + case 4: /* S register from S register */ + return \"fcpys\\t%0, %1\"; + case 5: /* ARM register from ARM register */ + return \"mov\\t%0, %1\\t%@ __fp16\"; + case 6: /* S register from ARM register */ + return \"fmsr\\t%0, %1\"; + case 7: /* ARM register from S register */ + return \"fmrs\\t%0, %1\"; + case 8: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw\\t%0, %1\", ops); + else + output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*") + (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*") + (set_attr "length" "4,4,4,4,4,4,4,4,8")] +) + +;; FP16 without element load/store instructions. +(define_insn "*movhf_vfp" + [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r") + (match_operand:HF 1 "general_operand" " m,r,t,r,r,t,F"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16 && !TARGET_NEON_FP16 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* ARM register from memory */ + return \"ldrh\\t%0, %1\\t%@ __fp16\"; + case 1: /* memory from ARM register */ + return \"strh\\t%1, %0\\t%@ __fp16\"; + case 2: /* S register from S register */ + return \"fcpys\\t%0, %1\"; + case 3: /* ARM register from ARM register */ + return \"mov\\t%0, %1\\t%@ __fp16\"; + case 4: /* S register from ARM register */ + return \"fmsr\\t%0, %1\"; + case 5: /* ARM register from S register */ + return \"fmrs\\t%0, %1\"; + case 6: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw\\t%0, %1\", ops); + else + output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "load1,store1,fcpys,*,r_2_f,f_2_r,*") + (set_attr "length" "4,4,4,4,4,4,8")] +) + ;; SFmode moves ;; Disparage the w<->r cases because reloading an invalid address is @@ -222,6 +355,8 @@ [(set_attr "predicable" "yes") (set_attr "type" "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") + (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") + (set_attr "insn" "*,*,*,*,*,*,*,*,mov") (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")] ) @@ -258,6 +393,8 @@ [(set_attr "predicable" "yes") (set_attr "type" "r_2_f,f_2_r,fconsts,f_load,f_store,load1,store1,fcpys,*") + (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") + (set_attr "insn" "*,*,*,*,*,*,*,*,mov") (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] ) @@ -267,7 +404,7 @@ (define_insn "*movdf_vfp" [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r") - (match_operand:DF 1 "soft_df_operand" " ?r,w,Dv,mF,r,UvF,w, w,r"))] + (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))] "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP && ( register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode))" @@ -280,13 +417,17 @@ case 1: return \"fmrrd%?\\t%Q0, %R0, %P1\"; case 2: + gcc_assert (TARGET_VFP_DOUBLE); return \"fconstd%?\\t%P0, #%G1\"; case 3: case 4: return output_move_double (operands); case 5: case 6: return output_move_vfp (operands); case 7: - return \"fcpyd%?\\t%P0, %P1\"; + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; + else + return \"fcpyd%?\\t%P0, %P1\"; case 8: return \"#\"; default: @@ -296,14 +437,21 @@ " [(set_attr "type" "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*") - (set_attr "length" "4,4,4,8,8,4,4,4,8") + (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*") + (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) + (eq_attr "alternative" "7") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")] ) (define_insn "*thumb2_movdf_vfp" [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r") - (match_operand:DF 1 "soft_df_operand" " ?r,w,Dv,mF,r,UvF,w, w,r"))] + (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))] "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" "* { @@ -314,13 +462,17 @@ case 1: return \"fmrrd%?\\t%Q0, %R0, %P1\"; case 2: + gcc_assert (TARGET_VFP_DOUBLE); return \"fconstd%?\\t%P0, #%G1\"; case 3: case 4: case 8: return output_move_double (operands); case 5: case 6: return output_move_vfp (operands); case 7: - return \"fcpyd%?\\t%P0, %P1\"; + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; + else + return \"fcpyd%?\\t%P0, %P1\"; default: abort (); } @@ -328,7 +480,14 @@ " [(set_attr "type" "r_2_f,f_2_r,fconstd,load2,store2,f_load,f_store,ffarithd,*") - (set_attr "length" "4,4,4,8,8,4,4,4,8") + (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*") + (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) + (eq_attr "alternative" "7") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*") (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")] ) @@ -356,7 +515,8 @@ fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") + (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")] ) (define_insn "*thumb2_movsfcc_vfp" @@ -379,7 +539,8 @@ ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") + (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")] ) (define_insn "*movdfcc_vfp" @@ -389,7 +550,7 @@ [(match_operand 4 "cc_register" "") (const_int 0)]) (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] - "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "@ fcpyd%D3\\t%P0, %P2 fcpyd%d3\\t%P0, %P1 @@ -402,7 +563,8 @@ fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" [(set_attr "conds" "use") (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") + (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")] ) (define_insn "*thumb2_movdfcc_vfp" @@ -412,7 +574,7 @@ [(match_operand 4 "cc_register" "") (const_int 0)]) (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "@ it\\t%D3\;fcpyd%D3\\t%P0, %P2 it\\t%d3\;fcpyd%d3\\t%P0, %P1 @@ -425,7 +587,8 @@ ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" [(set_attr "conds" "use") (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") + (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")] ) @@ -443,7 +606,7 @@ (define_insn "*absdf2_vfp" [(set (match_operand:DF 0 "s_register_operand" "=w") (abs:DF (match_operand:DF 1 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fabsd%?\\t%P0, %P1" [(set_attr "predicable" "yes") (set_attr "type" "ffarithd")] @@ -463,12 +626,12 @@ (define_insn_and_split "*negdf2_vfp" [(set (match_operand:DF 0 "s_register_operand" "=w,?r,?r") (neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "@ fnegd%?\\t%P0, %P1 # #" - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && reload_completed + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed && arm_general_register_operand (operands[0], DFmode)" [(set (match_dup 0) (match_dup 1))] " @@ -523,7 +686,7 @@ [(set (match_operand:DF 0 "s_register_operand" "=w") (plus:DF (match_operand:DF 1 "s_register_operand" "w") (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "faddd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") (set_attr "type" "faddd")] @@ -544,7 +707,7 @@ [(set (match_operand:DF 0 "s_register_operand" "=w") (minus:DF (match_operand:DF 1 "s_register_operand" "w") (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fsubd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") (set_attr "type" "faddd")] @@ -567,7 +730,7 @@ [(set (match_operand:DF 0 "s_register_operand" "+w") (div:DF (match_operand:DF 1 "s_register_operand" "w") (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fdivd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") (set_attr "type" "fdivd")] @@ -590,7 +753,7 @@ [(set (match_operand:DF 0 "s_register_operand" "+w") (mult:DF (match_operand:DF 1 "s_register_operand" "w") (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fmuld%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") (set_attr "type" "fmuld")] @@ -611,7 +774,7 @@ [(set (match_operand:DF 0 "s_register_operand" "+w") (mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w")) (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fnmuld%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") (set_attr "type" "fmuld")] @@ -626,7 +789,8 @@ (plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") (match_operand:SF 3 "s_register_operand" "t")) (match_operand:SF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP + && (!arm_tune_marvell_f || optimize_size)" "fmacs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") (set_attr "type" "fmacs")] @@ -637,7 +801,8 @@ (plus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w") (match_operand:DF 3 "s_register_operand" "w")) (match_operand:DF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE + && (!arm_tune_marvell_f || optimize_size)" "fmacd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") (set_attr "type" "fmacd")] @@ -649,7 +814,8 @@ (minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") (match_operand:SF 3 "s_register_operand" "t")) (match_operand:SF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP + && (!arm_tune_marvell_f || optimize_size)" "fmscs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") (set_attr "type" "fmacs")] @@ -660,7 +826,8 @@ (minus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w") (match_operand:DF 3 "s_register_operand" "w")) (match_operand:DF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE + && (!arm_tune_marvell_f || optimize_size)" "fmscd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") (set_attr "type" "fmacd")] @@ -672,7 +839,8 @@ (minus:SF (match_operand:SF 1 "s_register_operand" "0") (mult:SF (match_operand:SF 2 "s_register_operand" "t") (match_operand:SF 3 "s_register_operand" "t"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP + && (!arm_tune_marvell_f || optimize_size)" "fnmacs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") (set_attr "type" "fmacs")] @@ -683,7 +851,8 @@ (minus:DF (match_operand:DF 1 "s_register_operand" "0") (mult:DF (match_operand:DF 2 "s_register_operand" "w") (match_operand:DF 3 "s_register_operand" "w"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE + && (!arm_tune_marvell_f || optimize_size)" "fnmacd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") (set_attr "type" "fmacd")] @@ -697,7 +866,8 @@ (neg:SF (match_operand:SF 2 "s_register_operand" "t")) (match_operand:SF 3 "s_register_operand" "t")) (match_operand:SF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP + && (!arm_tune_marvell_f || optimize_size)" "fnmscs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") (set_attr "type" "fmacs")] @@ -709,7 +879,8 @@ (neg:DF (match_operand:DF 2 "s_register_operand" "w")) (match_operand:DF 3 "s_register_operand" "w")) (match_operand:DF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE + && (!arm_tune_marvell_f || optimize_size)" "fnmscd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") (set_attr "type" "fmacd")] @@ -721,7 +892,7 @@ (define_insn "*extendsfdf2_vfp" [(set (match_operand:DF 0 "s_register_operand" "=w") (float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fcvtds%?\\t%P0, %1" [(set_attr "predicable" "yes") (set_attr "type" "f_cvt")] @@ -730,12 +901,30 @@ (define_insn "*truncdfsf2_vfp" [(set (match_operand:SF 0 "s_register_operand" "=t") (float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fcvtsd%?\\t%0, %P1" [(set_attr "predicable" "yes") (set_attr "type" "f_cvt")] ) +(define_insn "extendhfsf2" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" + "vcvtb%?.f32.f16\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "truncsfhf2" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" + "vcvtb%?.f16.f32\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + (define_insn "*truncsisf2_vfp" [(set (match_operand:SI 0 "s_register_operand" "=t") (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] @@ -748,7 +937,7 @@ (define_insn "*truncsidf2_vfp" [(set (match_operand:SI 0 "s_register_operand" "=t") (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "ftosizd%?\\t%0, %P1" [(set_attr "predicable" "yes") (set_attr "type" "f_cvt")] @@ -767,7 +956,7 @@ (define_insn "fixuns_truncdfsi2" [(set (match_operand:SI 0 "s_register_operand" "=t") (unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "ftouizd%?\\t%0, %P1" [(set_attr "predicable" "yes") (set_attr "type" "f_cvt")] @@ -786,7 +975,7 @@ (define_insn "*floatsidf2_vfp" [(set (match_operand:DF 0 "s_register_operand" "=w") (float:DF (match_operand:SI 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fsitod%?\\t%P0, %1" [(set_attr "predicable" "yes") (set_attr "type" "f_cvt")] @@ -805,7 +994,7 @@ (define_insn "floatunssidf2" [(set (match_operand:DF 0 "s_register_operand" "=w") (unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fuitod%?\\t%P0, %1" [(set_attr "predicable" "yes") (set_attr "type" "f_cvt")] @@ -826,7 +1015,7 @@ (define_insn "*sqrtdf2_vfp" [(set (match_operand:DF 0 "s_register_operand" "=w") (sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fsqrtd%?\\t%P0, %P1" [(set_attr "predicable" "yes") (set_attr "type" "fdivd")] @@ -878,9 +1067,9 @@ [(set (reg:CCFP CC_REGNUM) (compare:CCFP (match_operand:DF 0 "s_register_operand" "w") (match_operand:DF 1 "vfp_compare_operand" "wG")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "#" - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" [(set (reg:CCFP VFPCC_REGNUM) (compare:CCFP (match_dup 0) (match_dup 1))) @@ -893,9 +1082,9 @@ [(set (reg:CCFPE CC_REGNUM) (compare:CCFPE (match_operand:DF 0 "s_register_operand" "w") (match_operand:DF 1 "vfp_compare_operand" "wG")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "#" - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" [(set (reg:CCFPE VFPCC_REGNUM) (compare:CCFPE (match_dup 0) (match_dup 1))) @@ -935,7 +1124,7 @@ [(set (reg:CCFP VFPCC_REGNUM) (compare:CCFP (match_operand:DF 0 "s_register_operand" "w,w") (match_operand:DF 1 "vfp_compare_operand" "w,G")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "@ fcmpd%?\\t%P0, %P1 fcmpzd%?\\t%P0" @@ -947,7 +1136,7 @@ [(set (reg:CCFPE VFPCC_REGNUM) (compare:CCFPE (match_operand:DF 0 "s_register_operand" "w,w") (match_operand:DF 1 "vfp_compare_operand" "w,G")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "@ fcmped%?\\t%P0, %P1 fcmpezd%?\\t%P0" --- /dev/null +++ b/gcc/config/i386/atom.md @@ -0,0 +1,795 @@ +;; Atom Scheduling +;; Copyright (C) 2009 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; +;; Atom is an in-order core with two integer pipelines. + + +(define_attr "atom_unit" "sishuf,simul,jeu,complex,other" + (const_string "other")) + +(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other" + (const_string "other")) + +(define_automaton "atom") + +;; Atom has two ports: port 0 and port 1 connecting to all execution units +(define_cpu_unit "atom-port-0,atom-port-1" "atom") + +;; EU: Execution Unit +;; Atom EUs are connected by port 0 or port 1. + +(define_cpu_unit "atom-eu-0, atom-eu-1, + atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4" + "atom") + +;; Some EUs have duplicated copied and can be accessed via either +;; port 0 or port 1 +;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)") + +;;; Some instructions is dual-pipe execution, need both ports +;;; Complex multi-op macro-instructoins need both ports and all EUs +(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)") +(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 + + atom-imul-1 + atom-imul-2 + atom-imul-3 + + atom-imul-4)") + +;;; Most of simple instructions have 1 cycle latency. Some of them +;;; issue in port 0, some in port 0 and some in either port. +(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)") +(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)") +(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)") + +;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput +(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)") + +;;; fmul insn can have 4 or 5 cycles latency +(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4") +(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3") + +;;; fadd can has 5 cycles latency depends on instruction forms +(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5") + +;;; imul insn has 5 cycles latency +(define_reservation "atom-imul-32" + "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4, + atom-port-0") +;;; imul instruction excludes other non-FP instructions. +(exclusion_set "atom-eu-0, atom-eu-1" + "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4") + +;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on +;;; instruction forms +(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)") +(define_reservation "atom-dual-2c" + "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)") +(define_reservation "atom-dual-5c" + "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)") + +;;; Complex macro-instruction has variants of latency, and uses both ports. +(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)") + +(define_insn_reservation "atom_other" 9 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "other") + (eq_attr "atom_unit" "!jeu"))) + "atom-complex, atom-all-eu*8") + +;; return has type "other" with atom_unit "jeu" +(define_insn_reservation "atom_other_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "other") + (eq_attr "atom_unit" "jeu"))) + "atom-dual-1c") + +(define_insn_reservation "atom_multi" 9 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "multi")) + "atom-complex, atom-all-eu*8") + +;; Normal alu insns without carry +(define_insn_reservation "atom_alu" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "none") + (eq_attr "use_carry" "0")))) + "atom-simple-either") + +;; Normal alu insns without carry +(define_insn_reservation "atom_alu_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "!none") + (eq_attr "use_carry" "0")))) + "atom-simple-either") + +;; Alu insn consuming CF, such as add/sbb +(define_insn_reservation "atom_alu_carry" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "none") + (eq_attr "use_carry" "1")))) + "atom-simple-either") + +;; Alu insn consuming CF, such as add/sbb +(define_insn_reservation "atom_alu_carry_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "!none") + (eq_attr "use_carry" "1")))) + "atom-simple-either") + +(define_insn_reservation "atom_alu1" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_alu1_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_negnot" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "negnot") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_negnot_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "negnot") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_imov" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imov") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_imov_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imov") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +;; 16<-16, 32<-32 +(define_insn_reservation "atom_imovx" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "none") + (ior (and (match_operand:HI 0 "register_operand") + (match_operand:HI 1 "general_operand")) + (and (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "general_operand")))))) + "atom-simple-either") + +;; 16<-16, 32<-32, mem +(define_insn_reservation "atom_imovx_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "!none") + (ior (and (match_operand:HI 0 "register_operand") + (match_operand:HI 1 "general_operand")) + (and (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "general_operand")))))) + "atom-simple-either") + +;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8 +(define_insn_reservation "atom_imovx_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "none") + (ior (match_operand:QI 0 "register_operand") + (ior (and (match_operand:SI 0 "register_operand") + (not (match_operand:SI 1 "general_operand"))) + (match_operand:DI 0 "register_operand")))))) + "atom-simple-0") + +;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem +(define_insn_reservation "atom_imovx_2_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "!none") + (ior (match_operand:QI 0 "register_operand") + (ior (and (match_operand:SI 0 "register_operand") + (not (match_operand:SI 1 "general_operand"))) + (match_operand:DI 0 "register_operand")))))) + "atom-simple-0") + +;; 16<-8 +(define_insn_reservation "atom_imovx_3" 3 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (match_operand:HI 0 "register_operand") + (match_operand:QI 1 "general_operand")))) + "atom-complex, atom-all-eu*2") + +(define_insn_reservation "atom_lea" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "lea") + (eq_attr "mode" "!HI"))) + "atom-simple-either") + +;; lea 16bit address is complex insn +(define_insn_reservation "atom_lea_2" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "lea") + (eq_attr "mode" "HI"))) + "atom-complex, atom-all-eu") + +(define_insn_reservation "atom_incdec" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "incdec") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_incdec_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "incdec") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +;; simple shift instruction use SHIFT eu, none memory +(define_insn_reservation "atom_ishift" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift") + (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))) + "atom-simple-0") + +;; simple shift instruction use SHIFT eu, memory +(define_insn_reservation "atom_ishift_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift") + (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0")))) + "atom-simple-0") + +;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles +(define_insn_reservation "atom_ishift_3" 7 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift") + (eq_attr "prefix_0f" "1"))) + "atom-complex, atom-all-eu*6") + +(define_insn_reservation "atom_ishift1" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift1") + (eq_attr "memory" "none"))) + "atom-simple-0") + +(define_insn_reservation "atom_ishift1_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift1") + (eq_attr "memory" "!none"))) + "atom-simple-0") + +(define_insn_reservation "atom_rotate" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "none"))) + "atom-simple-0") + +(define_insn_reservation "atom_rotate_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "!none"))) + "atom-simple-0") + +(define_insn_reservation "atom_rotate1" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "rotate1") + (eq_attr "memory" "none"))) + "atom-simple-0") + +(define_insn_reservation "atom_rotate1_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "rotate1") + (eq_attr "memory" "!none"))) + "atom-simple-0") + +(define_insn_reservation "atom_imul" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imul") + (and (eq_attr "memory" "none") (eq_attr "mode" "SI")))) + "atom-imul-32") + +(define_insn_reservation "atom_imul_mem" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imul") + (and (eq_attr "memory" "!none") (eq_attr "mode" "SI")))) + "atom-imul-32") + +;; latency set to 10 as common 64x64 imul +(define_insn_reservation "atom_imul_3" 10 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imul") + (eq_attr "mode" "!SI"))) + "atom-complex, atom-all-eu*9") + +(define_insn_reservation "atom_idiv" 65 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "idiv")) + "atom-complex, atom-all-eu*32, nothing*32") + +(define_insn_reservation "atom_icmp" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "icmp") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_icmp_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "icmp") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_test" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "test") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_test_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "test") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_ibr" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "!load"))) + "atom-simple-1") + +;; complex if jump target is from address +(define_insn_reservation "atom_ibr_2" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "load"))) + "atom-complex, atom-all-eu") + +(define_insn_reservation "atom_setcc" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "!store"))) + "atom-simple-either") + +;; 2 cycles complex if target is in memory +(define_insn_reservation "atom_setcc_2" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "store"))) + "atom-complex, atom-all-eu") + +(define_insn_reservation "atom_icmov" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "icmov") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_icmov_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "icmov") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +;; UCODE if segreg, ignored +(define_insn_reservation "atom_push" 2 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "push")) + "atom-dual-2c") + +;; pop r64 is 1 cycle. UCODE if segreg, ignored +(define_insn_reservation "atom_pop" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "pop") + (eq_attr "mode" "DI"))) + "atom-dual-1c") + +;; pop non-r64 is 2 cycles. UCODE if segreg, ignored +(define_insn_reservation "atom_pop_2" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "pop") + (eq_attr "mode" "!DI"))) + "atom-dual-2c") + +;; UCODE if segreg, ignored +(define_insn_reservation "atom_call" 1 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "call")) + "atom-dual-1c") + +(define_insn_reservation "atom_callv" 1 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "callv")) + "atom-dual-1c") + +(define_insn_reservation "atom_leave" 3 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "leave")) + "atom-complex, atom-all-eu*2") + +(define_insn_reservation "atom_str" 3 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "str")) + "atom-complex, atom-all-eu*2") + +(define_insn_reservation "atom_sselog" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sselog") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_sselog_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sselog") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_sselog1" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sselog1") + (eq_attr "memory" "none"))) + "atom-simple-0") + +(define_insn_reservation "atom_sselog1_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sselog1") + (eq_attr "memory" "!none"))) + "atom-simple-0") + +;; not pmad, not psad +(define_insn_reservation "atom_sseiadd" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "!simul") + (eq_attr "atom_unit" "!complex"))))) + "atom-simple-either") + +;; pmad, psad and 64 +(define_insn_reservation "atom_sseiadd_2" 4 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "simul" ) + (eq_attr "mode" "DI"))))) + "atom-fmul-4c") + +;; pmad, psad and 128 +(define_insn_reservation "atom_sseiadd_3" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "simul" ) + (eq_attr "mode" "TI"))))) + "atom-fmul-5c") + +;; if paddq(64 bit op), phadd/phsub +(define_insn_reservation "atom_sseiadd_4" 6 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseiadd") + (ior (match_operand:V2DI 0 "register_operand") + (eq_attr "atom_unit" "complex")))) + "atom-complex, atom-all-eu*5") + +;; if immediate op. +(define_insn_reservation "atom_sseishft" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseishft") + (and (eq_attr "atom_unit" "!sishuf") + (match_operand 2 "immediate_operand")))) + "atom-simple-either") + +;; if palignr or psrldq +(define_insn_reservation "atom_sseishft_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseishft") + (and (eq_attr "atom_unit" "sishuf") + (match_operand 2 "immediate_operand")))) + "atom-simple-0") + +;; if reg/mem op +(define_insn_reservation "atom_sseishft_3" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseishft") + (not (match_operand 2 "immediate_operand")))) + "atom-complex, atom-all-eu") + +(define_insn_reservation "atom_sseimul" 1 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "sseimul")) + "atom-simple-0") + +;; rcpss or rsqrtss +(define_insn_reservation "atom_sse" 4 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF")))) + "atom-fmul-4c") + +;; movshdup, movsldup. Suggest to type sseishft +(define_insn_reservation "atom_sse_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (eq_attr "atom_sse_attr" "movdup"))) + "atom-simple-0") + +;; lfence +(define_insn_reservation "atom_sse_3" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (eq_attr "atom_sse_attr" "lfence"))) + "atom-simple-either") + +;; sfence,clflush,mfence, prefetch +(define_insn_reservation "atom_sse_4" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (ior (eq_attr "atom_sse_attr" "fence") + (eq_attr "atom_sse_attr" "prefetch")))) + "atom-simple-0") + +;; rcpps, rsqrtss, sqrt, ldmxcsr +(define_insn_reservation "atom_sse_5" 7 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (ior (ior (eq_attr "atom_sse_attr" "sqrt") + (eq_attr "atom_sse_attr" "mxcsr")) + (and (eq_attr "atom_sse_attr" "rcp") + (eq_attr "mode" "V4SF"))))) + "atom-complex, atom-all-eu*6") + +;; xmm->xmm +(define_insn_reservation "atom_ssemov" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy")))) + "atom-simple-either") + +;; reg->xmm +(define_insn_reservation "atom_ssemov_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r")))) + "atom-simple-0") + +;; xmm->reg +(define_insn_reservation "atom_ssemov_3" 3 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy")))) + "atom-eu-0-3-1") + +;; mov mem +(define_insn_reservation "atom_ssemov_4" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (and (eq_attr "movu" "0") (eq_attr "memory" "!none")))) + "atom-simple-0") + +;; movu mem +(define_insn_reservation "atom_ssemov_5" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (ior (eq_attr "movu" "1") (eq_attr "memory" "!none")))) + "atom-complex, atom-all-eu") + +;; no memory simple +(define_insn_reservation "atom_sseadd" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "!V2DF") + (eq_attr "atom_unit" "!complex"))))) + "atom-fadd-5c") + +;; memory simple +(define_insn_reservation "atom_sseadd_mem" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "!none") + (and (eq_attr "mode" "!V2DF") + (eq_attr "atom_unit" "!complex"))))) + "atom-dual-5c") + +;; maxps, minps, *pd, hadd, hsub +(define_insn_reservation "atom_sseadd_3" 8 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseadd") + (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex")))) + "atom-complex, atom-all-eu*7") + +;; Except dppd/dpps +(define_insn_reservation "atom_ssemul" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "!SF"))) + "atom-fmul-5c") + +;; Except dppd/dpps, 4 cycle if mulss +(define_insn_reservation "atom_ssemul_2" 4 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "SF"))) + "atom-fmul-4c") + +(define_insn_reservation "atom_ssecmp" 1 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "ssecmp")) + "atom-simple-either") + +(define_insn_reservation "atom_ssecomi" 10 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "ssecomi")) + "atom-complex, atom-all-eu*9") + +;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi +(define_insn_reservation "atom_ssecvt" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "register_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "register_operand"))))) + "atom-fadd-5c") + +;; memory and cvtpi2ps, cvtps2pi, cvttps2pi +(define_insn_reservation "atom_ssecvt_2" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "memory_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "memory_operand"))))) + "atom-dual-5c") + +;; otherwise. 7 cycles average for cvtss2sd +(define_insn_reservation "atom_ssecvt_3" 7 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssecvt") + (not (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "nonimmediate_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "nonimmediate_operand")))))) + "atom-complex, atom-all-eu*6") + +;; memory and cvtsi2sd +(define_insn_reservation "atom_sseicvt" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseicvt") + (and (match_operand:V2DF 0 "register_operand") + (match_operand:SI 1 "memory_operand")))) + "atom-dual-5c") + +;; otherwise. 8 cycles average for cvtsd2si +(define_insn_reservation "atom_sseicvt_2" 8 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseicvt") + (not (and (match_operand:V2DF 0 "register_operand") + (match_operand:SI 1 "memory_operand"))))) + "atom-complex, atom-all-eu*7") + +(define_insn_reservation "atom_ssediv" 62 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "ssediv")) + "atom-complex, atom-all-eu*12, nothing*49") + +;; simple for fmov +(define_insn_reservation "atom_fmov" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none"))) + "atom-simple-either") + +;; simple for fmov +(define_insn_reservation "atom_fmov_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +;; Define bypass here + +;; There will be no stall from lea to non-mem EX insns +(define_bypass 0 "atom_lea" + "atom_alu_carry, + atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, + atom_incdec, atom_setcc, atom_icmov, atom_pop") + +(define_bypass 0 "atom_lea" + "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_imovx_mem, atom_imovx_2_mem, + atom_imov_mem, atom_icmov_mem, atom_fmov_mem" + "!ix86_agi_dependent") + +;; There will be 3 cycles stall from EX insns to AGAN insns LEA +(define_bypass 4 "atom_alu_carry, + atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, + atom_incdec,atom_ishift,atom_ishift1,atom_rotate, + atom_rotate1, atom_setcc, atom_icmov, atom_pop, + atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_imovx_mem, atom_imovx_2_mem, + atom_imov_mem, atom_icmov_mem, atom_fmov_mem" + "atom_lea") + +;; There will be 3 cycles stall from EX insns to insns need addr calculation +(define_bypass 4 "atom_alu_carry, + atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, + atom_incdec,atom_ishift,atom_ishift1,atom_rotate, + atom_rotate1, atom_setcc, atom_icmov, atom_pop, + atom_imovx_mem, atom_imovx_2_mem, + atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_imov_mem, atom_icmov_mem, atom_fmov_mem" + "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_negnot_mem, atom_imov_mem, atom_incdec_mem, + atom_imovx_mem, atom_imovx_2_mem, + atom_imul_mem, atom_icmp_mem, + atom_test_mem, atom_icmov_mem, atom_sselog_mem, + atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem, + atom_ishift_mem, atom_ishift1_mem, + atom_rotate_mem, atom_rotate1_mem" + "ix86_agi_dependent") + +;; Stall from imul to lea is 8 cycles. +(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea") + +;; Stall from imul to memory address is 8 cycles. +(define_bypass 9 "atom_imul, atom_imul_mem" + "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_negnot_mem, atom_imov_mem, atom_incdec_mem, + atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem, + atom_rotate1_mem, atom_imul_mem, atom_icmp_mem, + atom_test_mem, atom_icmov_mem, atom_sselog_mem, + atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem" + "ix86_agi_dependent") + +;; There will be 0 cycle stall from cmp/test to jcc + +;; There will be 1 cycle stall from flag producer to cmov and adc/sbb +(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry, + atom_alu1, atom_negnot, atom_incdec, atom_ishift, + atom_ishift1, atom_rotate, atom_rotate1" + "atom_icmov, atom_alu_carry") + +;; lea to shift count stall is 2 cycles +(define_bypass 3 "atom_lea" + "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, + atom_ishift_mem, atom_ishift1_mem, + atom_rotate_mem, atom_rotate1_mem" + "ix86_dep_by_shift_count") + +;; lea to shift source stall is 1 cycle +(define_bypass 2 "atom_lea" + "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1" + "!ix86_dep_by_shift_count") + +;; non-lea to shift count stall is 1 cycle +(define_bypass 2 "atom_alu_carry, + atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, + atom_incdec,atom_ishift,atom_ishift1,atom_rotate, + atom_rotate1, atom_setcc, atom_icmov, atom_pop, + atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_imovx_mem, atom_imovx_2_mem, + atom_imov_mem, atom_icmov_mem, atom_fmov_mem" + "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, + atom_ishift_mem, atom_ishift1_mem, + atom_rotate_mem, atom_rotate1_mem" + "ix86_dep_by_shift_count") --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -29,6 +29,7 @@ #define bit_CMPXCHG16B (1 << 13) #define bit_SSE4_1 (1 << 19) #define bit_SSE4_2 (1 << 20) +#define bit_MOVBE (1 << 22) #define bit_POPCNT (1 << 23) #define bit_AES (1 << 25) #define bit_XSAVE (1 << 26) --- a/gcc/config/i386/cygming.h +++ b/gcc/config/i386/cygming.h @@ -34,7 +34,7 @@ #endif #undef TARGET_64BIT_MS_ABI -#define TARGET_64BIT_MS_ABI (!cfun ? DEFAULT_ABI == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI) +#define TARGET_64BIT_MS_ABI (!cfun ? ix86_abi == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI) #undef DEFAULT_ABI #define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI) @@ -203,7 +203,7 @@ #define CHECK_STACK_LIMIT 4000 #undef STACK_BOUNDARY -#define STACK_BOUNDARY (DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD) +#define STACK_BOUNDARY (ix86_abi == MS_ABI ? 128 : BITS_PER_WORD) /* By default, target has a 80387, uses IEEE compatible arithmetic, returns float values in the 387 and needs stack probes. --- a/gcc/config/i386/cygming.opt +++ b/gcc/config/i386/cygming.opt @@ -45,3 +45,7 @@ mwindows Target Create GUI application + +mpe-aligned-commons +Target Var(use_pe_aligned_common) Init(HAVE_GAS_ALIGNED_COMM) +Use the GNU extension to the PE format for aligned common data --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -378,7 +378,7 @@ /* Extended features */ unsigned int has_lahf_lm = 0, has_sse4a = 0; unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; - unsigned int has_sse4_1 = 0, has_sse4_2 = 0; + unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; unsigned int has_pclmul = 0; @@ -398,9 +398,22 @@ __cpuid (1, eax, ebx, ecx, edx); - /* We don't care for extended family. */ model = (eax >> 4) & 0x0f; family = (eax >> 8) & 0x0f; + if (vendor == SIG_INTEL) + { + unsigned int extended_model, extended_family; + + extended_model = (eax >> 12) & 0xf0; + extended_family = (eax >> 20) & 0xff; + if (family == 0x0f) + { + family += extended_family; + model += extended_model; + } + else if (family == 0x06) + model += extended_model; + } has_sse3 = ecx & bit_SSE3; has_ssse3 = ecx & bit_SSSE3; @@ -408,6 +421,7 @@ has_sse4_2 = ecx & bit_SSE4_2; has_avx = ecx & bit_AVX; has_cmpxchg16b = ecx & bit_CMPXCHG16B; + has_movbe = ecx & bit_MOVBE; has_popcnt = ecx & bit_POPCNT; has_aes = ecx & bit_AES; has_pclmul = ecx & bit_PCLMUL; @@ -505,8 +519,8 @@ break; case PROCESSOR_PENTIUMPRO: if (has_longmode) - /* It is Core 2 Duo. */ - cpu = "core2"; + /* It is Core 2 or Atom. */ + cpu = (model == 28) ? "atom" : "core2"; else if (arch) { if (has_sse3) @@ -597,6 +611,8 @@ options = concat (options, "-mcx16 ", NULL); if (has_lahf_lm) options = concat (options, "-msahf ", NULL); + if (has_movbe) + options = concat (options, "-mmovbe ", NULL); if (has_aes) options = concat (options, "-maes ", NULL); if (has_pclmul) --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1036,6 +1036,79 @@ 1, /* cond_not_taken_branch_cost. */ }; +static const +struct processor_costs atom_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, + {libcall, {{32, loop}, {64, rep_prefix_4_byte}, + {8192, rep_prefix_8_byte}, {-1, libcall}}}}, + {{libcall, {{8, loop}, {15, unrolled_loop}, + {2048, rep_prefix_4_byte}, {-1, libcall}}}, + {libcall, {{24, loop}, {32, unrolled_loop}, + {8192, rep_prefix_8_byte}, {-1, libcall}}}}, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + /* Generic64 should produce code tuned for Nocona and K8. */ static const struct processor_costs generic64_cost = { @@ -1194,6 +1267,7 @@ #define m_PENT4 (1<