diff --git a/toolchain/gcc/Config.in b/toolchain/gcc/Config.in index 8ec6bab0a..1f58ed8a0 100644 --- a/toolchain/gcc/Config.in +++ b/toolchain/gcc/Config.in @@ -53,6 +53,10 @@ if !LINUX_2_4 bool "gcc 4.4.1 with CodeSourcery enhancements" depends BROKEN + config GCC_VERSION_4_4_3_CS + bool "gcc 4.4.3 with CodeSourcery enhancements" + depends BROKEN + endif endchoice diff --git a/toolchain/gcc/Config.version b/toolchain/gcc/Config.version index 91a9be934..2f535eabb 100644 --- a/toolchain/gcc/Config.version +++ b/toolchain/gcc/Config.version @@ -10,6 +10,7 @@ config GCC_VERSION default "4.4.1+cs" if GCC_VERSION_4_4_1_CS default "4.4.2" if GCC_VERSION_4_4_2 default "4.4.3" if GCC_VERSION_4_4_3 + default "4.4.3" if GCC_VERSION_4_4_3_CS default "llvm" if GCC_VERSION_LLVM default "4.1.2" @@ -51,6 +52,7 @@ config GCC_VERSION_4_4 default y if GCC_VERSION_4_4_1_CS default y if GCC_VERSION_4_4_2 default y if GCC_VERSION_4_4_3 + default y if GCC_VERSION_4_4_3_CS endif diff --git a/toolchain/gcc/Makefile b/toolchain/gcc/Makefile index 09ed7b80b..fe07e7d5c 100644 --- a/toolchain/gcc/Makefile +++ b/toolchain/gcc/Makefile @@ -98,7 +98,7 @@ GCC_CONFIGURE:= \ $(call qstrip,$(CONFIG_EXTRA_GCC_CONFIG_OPTIONS)) \ $(if $(CONFIG_mips64)$(CONFIG_mips64el),--with-arch=mips64 --with-abi=64) \ $(if $(CONFIG_GCC_VERSION_LLVM),--enable-llvm=$(BUILD_DIR_BASE)/host/llvm) \ - $(if $(CONFIG_GCC_VERSION_4_3_3_CS)$(CONFIG_GCC_VERSION_4_4_1_CS),--enable-poison-system-directories) + $(if $(CONFIG_GCC_VERSION_4_3_3_CS)$(CONFIG_GCC_VERSION_4_4_1_CS)$(CONFIG_GCC_VERSION_4_4_3_CS),--enable-poison-system-directories) ifneq ($(CONFIG_GCC_VERSION_4_3)$(CONFIG_GCC_VERSION_4_4),) GCC_BUILD_TARGET_LIBGCC:=y diff --git a/toolchain/gcc/patches/4.4.3+cs/000-codesourcery_2009q3_68.patch b/toolchain/gcc/patches/4.4.3+cs/000-codesourcery_2009q3_68.patch new file mode 100644 index 000000000..88be9a85f --- /dev/null +++ b/toolchain/gcc/patches/4.4.3+cs/000-codesourcery_2009q3_68.patch @@ -0,0 +1,69035 @@ +diff -Nur a/config/mh-mingw b/config/mh-mingw +--- a/config/mh-mingw 2008-11-21 14:54:41.000000000 +0100 ++++ b/config/mh-mingw 2010-01-25 09:50:28.945687353 +0100 +@@ -1,6 +1,8 @@ + # Add -D__USE_MINGW_ACCESS to enable the built compiler to work on Windows + # Vista (see PR33281 for details). +-BOOT_CFLAGS += -D__USE_MINGW_ACCESS -Wno-pedantic-ms-format +-CFLAGS += -D__USE_MINGW_ACCESS ++# Because we wrap access in libiberty/cygpath.c, we do not want to use ++# the MinGW wrappers for access. ++BOOT_CFLAGS += -Wno-pedantic-ms-format ++# CFLAGS += -D__USE_MINGW_ACCESS + # Increase stack limit to same as Linux default. + LDFLAGS += -Wl,--stack,8388608 +diff -Nur a/config/stdint.m4 b/config/stdint.m4 +--- a/config/stdint.m4 2007-04-12 15:06:43.000000000 +0200 ++++ b/config/stdint.m4 2010-01-25 09:50:28.945687353 +0100 +@@ -115,19 +115,19 @@ + + # Lacking an uintptr_t? Test size of void * + case "$acx_cv_header_stdint:$ac_cv_type_uintptr_t" in +- stddef.h:* | *:no) AC_CHECK_SIZEOF(void *) ;; ++ stddef.h:* | *:no) AC_CHECK_SIZEOF(void *,,/* no standard headers */) ;; + esac + + # Lacking an uint64_t? Test size of long + case "$acx_cv_header_stdint:$ac_cv_type_uint64_t:$ac_cv_type_u_int64_t" in +- stddef.h:*:* | *:no:no) AC_CHECK_SIZEOF(long) ;; ++ stddef.h:*:* | *:no:no) AC_CHECK_SIZEOF(long,,/* no standard headers */) ;; + esac + + if test $acx_cv_header_stdint = stddef.h; then + # Lacking a good header? Test size of everything and deduce all types. +- AC_CHECK_SIZEOF(int) +- AC_CHECK_SIZEOF(short) +- AC_CHECK_SIZEOF(char) ++ AC_CHECK_SIZEOF(int,,/* no standard headers */) ++ AC_CHECK_SIZEOF(short,,/* no standard headers */) ++ AC_CHECK_SIZEOF(char,,/* no standard headers */) + + AC_MSG_CHECKING(for type equivalent to int8_t) + case "$ac_cv_sizeof_char" in +diff -Nur a/config/tls.m4 b/config/tls.m4 +--- a/config/tls.m4 2009-01-23 05:58:03.000000000 +0100 ++++ b/config/tls.m4 2010-01-25 09:50:28.945687353 +0100 +@@ -1,5 +1,6 @@ + dnl Check whether the target supports TLS. + AC_DEFUN([GCC_CHECK_TLS], [ ++ AC_REQUIRE([AC_CANONICAL_HOST]) + GCC_ENABLE(tls, yes, [], [Use thread-local storage]) + AC_CACHE_CHECK([whether the target supports thread-local storage], + gcc_cv_have_tls, [ +@@ -66,7 +67,24 @@ + [dnl This is the cross-compiling case. Assume libc supports TLS if the + dnl binutils and the compiler do. + AC_LINK_IFELSE([__thread int a; int b; int main() { return a = b; }], +- [gcc_cv_have_tls=yes], [gcc_cv_have_tls=no]) ++ [chktls_save_LDFLAGS="$LDFLAGS" ++ dnl Shared library options may depend on the host; this check ++ dnl is only known to be needed for GNU/Linux. ++ case $host in ++ *-*-linux*) ++ LDFLAGS="-shared -Wl,--no-undefined $LDFLAGS" ++ ;; ++ esac ++ chktls_save_CFLAGS="$CFLAGS" ++ CFLAGS="-fPIC $CFLAGS" ++ dnl If -shared works, test if TLS works in a shared library. ++ AC_LINK_IFELSE([int f() { return 0; }], ++ [AC_LINK_IFELSE([__thread int a; int b; int f() { return a = b; }], ++ [gcc_cv_have_tls=yes], ++ [gcc_cv_have_tls=no])], ++ [gcc_cv_have_tls=yes]) ++ CFLAGS="$chktls_save_CFLAGS" ++ LDFLAGS="$chktls_save_LDFLAGS"], [gcc_cv_have_tls=no]) + ] + )]) + if test "$enable_tls $gcc_cv_have_tls" = "yes yes"; then +diff -Nur a/configure b/configure +--- a/configure 2009-04-25 06:10:29.000000000 +0200 ++++ b/configure 2010-01-25 09:50:28.945687353 +0100 +@@ -2277,7 +2277,7 @@ + noconfigdirs="$noconfigdirs target-newlib target-libgloss target-rda ${libgcj}" + ;; + *-*-vxworks*) +- noconfigdirs="$noconfigdirs target-newlib target-libgloss target-libiberty target-libstdc++-v3 ${libgcj}" ++ noconfigdirs="$noconfigdirs target-newlib target-libgloss target-libiberty ${libgcj}" + ;; + alpha*-dec-osf*) + # ld works, but does not support shared libraries. +diff -Nur a/configure.ac b/configure.ac +--- a/configure.ac 2009-04-25 06:10:29.000000000 +0200 ++++ b/configure.ac 2010-01-25 09:50:28.945687353 +0100 +@@ -512,7 +512,7 @@ + noconfigdirs="$noconfigdirs target-newlib target-libgloss target-rda ${libgcj}" + ;; + *-*-vxworks*) +- noconfigdirs="$noconfigdirs target-newlib target-libgloss target-libiberty target-libstdc++-v3 ${libgcj}" ++ noconfigdirs="$noconfigdirs target-newlib target-libgloss target-libiberty ${libgcj}" + ;; + alpha*-dec-osf*) + # ld works, but does not support shared libraries. +diff -Nur a/fixincludes/fixincl.tpl b/fixincludes/fixincl.tpl +--- a/fixincludes/fixincl.tpl 2008-09-06 21:57:26.000000000 +0200 ++++ b/fixincludes/fixincl.tpl 2010-01-25 09:50:28.945687353 +0100 +@@ -38,7 +38,7 @@ + #ifndef SED_PROGRAM + #define SED_PROGRAM "/usr/bin/sed" + #endif +-static char const sed_cmd_z[] = SED_PROGRAM; ++static char const sed_cmd_z[] = "sed"; + [= + + FOR fix =] +diff -Nur a/fixincludes/fixincl.x b/fixincludes/fixincl.x +--- a/fixincludes/fixincl.x 2009-02-28 19:13:31.000000000 +0100 ++++ b/fixincludes/fixincl.x 2010-01-25 09:50:28.945687353 +0100 +@@ -2,11 +2,11 @@ + * + * DO NOT EDIT THIS FILE (fixincl.x) + * +- * It has been AutoGen-ed Saturday February 28, 2009 at 10:11:41 AM PST ++ * It has been AutoGen-ed Monday July 20, 2009 at 01:53:53 PM PDT + * From the definitions inclhack.def + * and the template file fixincl + */ +-/* DO NOT SVN-MERGE THIS FILE, EITHER Sat Feb 28 10:11:41 PST 2009 ++/* DO NOT SVN-MERGE THIS FILE, EITHER Mon Jul 20 13:53:53 PDT 2009 + * + * You must regenerate it. Use the ./genfixes script. + * +@@ -15,7 +15,7 @@ + * certain ANSI-incompatible system header files which are fixed to work + * correctly with ANSI C and placed in a directory that GNU C will search. + * +- * This file contains 180 fixup descriptions. ++ * This file contains 181 fixup descriptions. + * + * See README for more information. + * +@@ -39,7 +39,7 @@ + #ifndef SED_PROGRAM + #define SED_PROGRAM "/usr/bin/sed" + #endif +-static char const sed_cmd_z[] = SED_PROGRAM; ++static char const sed_cmd_z[] = "sed"; + + /* * * * * * * * * * * * * * * * * * * * * * * * * * + * +@@ -2300,6 +2300,42 @@ + + /* * * * * * * * * * * * * * * * * * * * * * * * * * + * ++ * Description of Glibc_String2_Memset fix ++ */ ++tSCC zGlibc_String2_MemsetName[] = ++ "glibc_string2_memset"; ++ ++/* ++ * File name selection pattern ++ */ ++tSCC zGlibc_String2_MemsetList[] = ++ "bits/string2.h\0"; ++/* ++ * Machine/OS name selection pattern ++ */ ++#define apzGlibc_String2_MemsetMachs (const char**)NULL ++ ++/* ++ * content selection pattern - do fix if pattern found ++ */ ++tSCC zGlibc_String2_MemsetSelect0[] = ++ "#ifndef _HAVE_STRING_ARCH_memset\n\ ++# if _STRING_ARCH_unaligned"; ++ ++#define GLIBC_STRING2_MEMSET_TEST_CT 1 ++static tTestDesc aGlibc_String2_MemsetTests[] = { ++ { TT_EGREP, zGlibc_String2_MemsetSelect0, (regex_t*)NULL }, }; ++ ++/* ++ * Fix Command Arguments for Glibc_String2_Memset ++ */ ++static const char* apzGlibc_String2_MemsetPatch[] = { ++ "format", ++ "%0 && 0", ++ (char*)NULL }; ++ ++/* * * * * * * * * * * * * * * * * * * * * * * * * * ++ * + * Description of Gnu_Types fix + */ + tSCC zGnu_TypesName[] = +@@ -5617,8 +5653,7 @@ + * Machine/OS name selection pattern + */ + tSCC* apzSolaris_Mutex_Init_2Machs[] = { +- "*-*-solaris2.[0-9]", +- "*-*-solaris2.[0-9][!0-9]*", ++ "*-*-solaris*", + (const char*)NULL }; + + /* +@@ -5627,8 +5662,15 @@ + tSCC zSolaris_Mutex_Init_2Select0[] = + "@\\(#\\)pthread.h[ \t]+1.[0-9]+[ \t]+[0-9/]+ SMI"; + +-#define SOLARIS_MUTEX_INIT_2_TEST_CT 1 ++/* ++ * perform the 'test' shell command - do fix on success ++ */ ++tSCC zSolaris_Mutex_Init_2Test0[] = ++ " -n \"`grep '#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)' \\`dirname $file\\`/sys/types.h`\""; ++ ++#define SOLARIS_MUTEX_INIT_2_TEST_CT 2 + static tTestDesc aSolaris_Mutex_Init_2Tests[] = { ++ { TT_TEST, zSolaris_Mutex_Init_2Test0, 0 /* unused */ }, + { TT_EGREP, zSolaris_Mutex_Init_2Select0, (regex_t*)NULL }, }; + + /* +@@ -5670,8 +5712,15 @@ + tSCC zSolaris_Rwlock_Init_1Select0[] = + "@\\(#\\)pthread.h[ \t]+1.[0-9]+[ \t]+[0-9/]+ SMI"; + +-#define SOLARIS_RWLOCK_INIT_1_TEST_CT 1 ++/* ++ * perform the 'test' shell command - do fix on success ++ */ ++tSCC zSolaris_Rwlock_Init_1Test0[] = ++ " -n \"`grep '#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)' \\`dirname $file\\`/sys/types.h`\""; ++ ++#define SOLARIS_RWLOCK_INIT_1_TEST_CT 2 + static tTestDesc aSolaris_Rwlock_Init_1Tests[] = { ++ { TT_TEST, zSolaris_Rwlock_Init_1Test0, 0 /* unused */ }, + { TT_EGREP, zSolaris_Rwlock_Init_1Select0, (regex_t*)NULL }, }; + + /* +@@ -5741,8 +5790,7 @@ + * Machine/OS name selection pattern + */ + tSCC* apzSolaris_Once_Init_2Machs[] = { +- "*-*-solaris2.[0-9]", +- "*-*-solaris2.[0-9][!0-9]*", ++ "*-*-solaris*", + (const char*)NULL }; + + /* +@@ -5751,8 +5799,15 @@ + tSCC zSolaris_Once_Init_2Select0[] = + "@\\(#\\)pthread.h[ \t]+1.[0-9]+[ \t]+[0-9/]+ SMI"; + +-#define SOLARIS_ONCE_INIT_2_TEST_CT 1 ++/* ++ * perform the 'test' shell command - do fix on success ++ */ ++tSCC zSolaris_Once_Init_2Test0[] = ++ " -n \"`grep '#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)' \\`dirname $file\\`/sys/types.h`\""; ++ ++#define SOLARIS_ONCE_INIT_2_TEST_CT 2 + static tTestDesc aSolaris_Once_Init_2Tests[] = { ++ { TT_TEST, zSolaris_Once_Init_2Test0, 0 /* unused */ }, + { TT_EGREP, zSolaris_Once_Init_2Select0, (regex_t*)NULL }, }; + + /* +@@ -7308,9 +7363,9 @@ + * + * List of all fixes + */ +-#define REGEX_COUNT 226 +-#define MACH_LIST_SIZE_LIMIT 181 +-#define FIX_COUNT 180 ++#define REGEX_COUNT 227 ++#define MACH_LIST_SIZE_LIMIT 169 ++#define FIX_COUNT 181 + + /* + * Enumerate the fixes +@@ -7371,6 +7426,7 @@ + GLIBC_C99_INLINE_3_FIXIDX, + GLIBC_C99_INLINE_4_FIXIDX, + GLIBC_MUTEX_INIT_FIXIDX, ++ GLIBC_STRING2_MEMSET_FIXIDX, + GNU_TYPES_FIXIDX, + HP_INLINE_FIXIDX, + HP_SYSFILE_FIXIDX, +@@ -7774,6 +7830,11 @@ + GLIBC_MUTEX_INIT_TEST_CT, FD_MACH_ONLY, + aGlibc_Mutex_InitTests, apzGlibc_Mutex_InitPatch, 0 }, + ++ { zGlibc_String2_MemsetName, zGlibc_String2_MemsetList, ++ apzGlibc_String2_MemsetMachs, ++ GLIBC_STRING2_MEMSET_TEST_CT, FD_MACH_ONLY | FD_SUBROUTINE, ++ aGlibc_String2_MemsetTests, apzGlibc_String2_MemsetPatch, 0 }, ++ + { zGnu_TypesName, zGnu_TypesList, + apzGnu_TypesMachs, + GNU_TYPES_TEST_CT, FD_MACH_IFNOT | FD_SUBROUTINE, +diff -Nur a/fixincludes/inclhack.def b/fixincludes/inclhack.def +--- a/fixincludes/inclhack.def 2009-02-28 19:13:31.000000000 +0100 ++++ b/fixincludes/inclhack.def 2010-01-25 09:50:28.955687088 +0100 +@@ -1302,6 +1302,21 @@ + }; + + ++/* glibc's bits/string2.h (before 2004-05-26) generates bogus ++ -Wstrict-aliasing warnings from calls to memset. */ ++fix = { ++ hackname = glibc_string2_memset; ++ files = "bits/string2.h"; ++ select = "#ifndef _HAVE_STRING_ARCH_memset\n# if _STRING_ARCH_unaligned"; ++ c_fix = format; ++ c_fix_arg = "%0 && 0"; ++ test_text = "#ifndef _HAVE_STRING_ARCH_memset\n" ++ "# if _STRING_ARCH_unaligned\n" ++ "# endif\n" ++ "#endif\n"; ++}; ++ ++ + /* + * Fix these files to use the types we think they should for + * ptrdiff_t, size_t, and wchar_t. +@@ -2939,24 +2954,32 @@ + }; + + /* +- * Sun Solaris defines PTHREAD_MUTEX_INITIALIZER with a trailing +- * "0" for the last field of the pthread_mutex_t structure, which is +- * of type upad64_t, which itself is typedef'd to int64_t, but with +- * __STDC__ defined (e.g. by -ansi) it is a union. So change the +- * initializer to "{0}" instead ++ * Sun Solaris defines the last field of the pthread_mutex_t structure ++ * to have type upad64_t. Whether upad64_t is an integer type or a ++ * union depends on whether or not the headers believe that a 64-bit ++ * integer type is available. But, PTHREAD_MUTEX_INITIALIZER is not ++ * appropriately conditionalized; it always uses "0", and never "{0}". ++ * In order to avoid warnings/errors from the compiler, we must make ++ * the initializer use braces where appropriate. ++ * ++ * Prior to Solaris 10, if __STDC__ is 1 (as when compiling with ++ * -ansi), the definition would be a union. Beginning with Solaris ++ * 10, the headers check for __GNUC__, and will never use a union with ++ * GCC. We check /usr/include/sys/types.h to see if it checks for ++ * __STDC__. ++ * ++ * A "mach" test for Solaris 10 is undesirable because we want to ++ * allow a compiler built for Solaris <10 to be used on Solaris >=10, ++ * but the installed version of fixincludes hard-wires the target ++ * machine to the configure-time $target, rather than automatically ++ * determining it at installation time. + */ + fix = { + hackname = solaris_mutex_init_2; + select = '@\(#\)pthread.h' "[ \t]+1.[0-9]+[ \t]+[0-9/]+ SMI"; + files = pthread.h; +- /* +- * On Solaris 10, this fix is unnecessary because upad64_t is +- * always defined correctly regardless of the definition of the +- * __STDC__ macro. The first "mach" pattern matches up to +- * solaris9. The second "mach" pattern will not match any two (or +- * more) digit solaris version, but it will match e.g. 2.5.1. +- */ +- mach = '*-*-solaris2.[0-9]', '*-*-solaris2.[0-9][!0-9]*'; ++ mach = '*-*-solaris*'; ++ test = " -n \"`grep '#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)' \\`dirname $file\\`/sys/types.h`\""; + c_fix = format; + c_fix_arg = "#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)\n" + "%0\n" +@@ -2967,6 +2990,7 @@ + "(|/\*.*\*/[ \t]*\\\\\n[ \t]*)\\{.*)" + ",[ \t]*0\\}" "(|[ \t].*)$"; + test_text = ++ "`mkdir -p sys; echo '#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)' >> sys/types.h`" + '#ident "@(#)pthread.h 1.26 98/04/12 SMI"'"\n" + "#define PTHREAD_MUTEX_INITIALIZER\t{{{0},0}, {{{0}}}, 0}\n" + "#define PTHREAD_COND_INITIALIZER\t{{{0}, 0}, 0}\t/* DEFAULTCV */\n" +@@ -2978,17 +3002,14 @@ + + + /* +- * Sun Solaris defines PTHREAD_RWLOCK_INITIALIZER with a "0" for some +- * fields of the pthread_rwlock_t structure, which are of type +- * upad64_t, which itself is typedef'd to int64_t, but with __STDC__ +- * defined (e.g. by -ansi) it is a union. So change the initializer +- * to "{0}" instead. ++ * See comments for solaris_mutex_init_2 re. upad64_t. + */ + fix = { + hackname = solaris_rwlock_init_1; + select = '@\(#\)pthread.h' "[ \t]+1.[0-9]+[ \t]+[0-9/]+ SMI"; + files = pthread.h; + mach = '*-*-solaris*'; ++ test = " -n \"`grep '#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)' \\`dirname $file\\`/sys/types.h`\""; + c_fix = format; + c_fix_arg = "#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)\n" + "%0\n" +@@ -3024,24 +3045,14 @@ + + + /* +- * Sun Solaris defines PTHREAD_ONCE_INIT with a "0" for some +- * fields of the pthread_once_t structure, which are of type +- * upad64_t, which itself is typedef'd to int64_t, but with __STDC__ +- * defined (e.g. by -ansi) it is a union. So change the initializer +- * to "{0}" instead. This test relies on solaris_once_init_1. ++ * See comments for solaris_mutex_init_2 re. upad64_t. + */ + fix = { + hackname = solaris_once_init_2; + select = '@\(#\)pthread.h' "[ \t]+1.[0-9]+[ \t]+[0-9/]+ SMI"; + files = pthread.h; +- /* +- * On Solaris 10, this fix is unnecessary because upad64_t is +- * always defined correctly regardless of the definition of the +- * __STDC__ macro. The first "mach" pattern matches up to +- * solaris9. The second "mach" pattern will not match any two (or +- * more) digit solaris version, but it will match e.g. 2.5.1. +- */ +- mach = '*-*-solaris2.[0-9]', '*-*-solaris2.[0-9][!0-9]*'; ++ mach = '*-*-solaris*'; ++ test = " -n \"`grep '#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)' \\`dirname $file\\`/sys/types.h`\""; + c_fix = format; + c_fix_arg = "#if __STDC__ - 0 == 0 && !defined(_NO_LONGLONG)\n" + "%0\n" +diff -Nur a/fixincludes/server.c b/fixincludes/server.c +--- a/fixincludes/server.c 2005-08-15 02:50:43.000000000 +0200 ++++ b/fixincludes/server.c 2010-01-25 09:50:28.955687088 +0100 +@@ -266,7 +266,7 @@ + /* Make sure the process will pay attention to us, send the + supplied command, and then have it output a special marker that + we can find. */ +- fprintf (server_pair.pf_write, "cd %s\n%s\n\necho\necho %s\n", ++ fprintf (server_pair.pf_write, "cd '%s'\n%s\n\necho\necho %s\n", + p_cur_dir, pz_cmd, z_done); + fflush (server_pair.pf_write); + +diff -Nur a/fixincludes/tests/base/bits/string2.h b/fixincludes/tests/base/bits/string2.h +--- a/fixincludes/tests/base/bits/string2.h 2007-03-26 15:25:26.000000000 +0200 ++++ b/fixincludes/tests/base/bits/string2.h 2010-01-25 09:50:28.955687088 +0100 +@@ -16,3 +16,12 @@ + # define __STRING_INLINE extern __inline + # endif + #endif /* GLIBC_C99_INLINE_3_CHECK */ ++ ++ ++#if defined( GLIBC_STRING2_MEMSET_CHECK ) ++#ifndef _HAVE_STRING_ARCH_memset ++# if _STRING_ARCH_unaligned && 0 ++# endif ++#endif ++ ++#endif /* GLIBC_STRING2_MEMSET_CHECK */ +diff -Nur a/fixincludes/tests/base/sys/types.h b/fixincludes/tests/base/sys/types.h +--- a/fixincludes/tests/base/sys/types.h 2004-08-31 11:27:00.000000000 +0200 ++++ b/fixincludes/tests/base/sys/types.h 2010-01-25 09:50:28.955687088 +0100 +@@ -28,3 +28,4 @@ + + #endif /* ushort_t */ + #endif /* GNU_TYPES_CHECK */ ++#if !defined(__STRICT_ANSI__) && !defined(_NO_LONGLONG) +diff -Nur a/gcc/acinclude.m4 b/gcc/acinclude.m4 +--- a/gcc/acinclude.m4 2008-06-02 21:37:45.000000000 +0200 ++++ b/gcc/acinclude.m4 2010-01-25 09:50:28.955687088 +0100 +@@ -482,3 +482,53 @@ + AC_DEFUN([gcc_AC_BUILD_EXEEXT], [ + ac_executable_extensions="$build_exeext"]) + ++ ++# --with-license=PATH ++AC_DEFUN([CSL_AC_LICENSE],[ ++ AC_ARG_WITH(license, ++ AC_HELP_STRING([--with-license], ++ [the path to the installed license component]), ++ [case "$withval" in ++ (yes) AC_MSG_ERROR([license not specified]) ;; ++ (no) with_license= ;; ++ (*) ;; ++ esac], ++ [with_license=]) ++ AC_SUBST(licensedir, $with_license) ++]) ++ ++# --with-csl-license-feature=FOO ++AC_DEFUN([CSL_AC_LICENSE_FEATURE],[ ++ AC_ARG_WITH(csl-license-feature, ++ AC_HELP_STRING([--with-csl-license-feature=FEATURE], ++ [Use FEATURE to communicate with the license manager]), ++ [case "$withval" in ++ (yes) AC_MSG_ERROR([license feature not specified]) ;; ++ (no) CSL_LICENSE_FEATURE="" ;; ++ (*) CSL_LICENSE_FEATURE="$withval" ;; ++ esac], ++ CSL_LICENSE_FEATURE="" ++ ) ++ if test x"$CSL_LICENSE_FEATURE" != x; then ++ AC_DEFINE_UNQUOTED(CSL_LICENSE_FEATURE, "$CSL_LICENSE_FEATURE", ++ [Required license feature]) ++ fi ++]) ++ ++# --with-csl-license-version=VERSION ++AC_DEFUN([CSL_AC_LICENSE_VERSION],[ ++ AC_ARG_WITH(csl-license-version, ++ AC_HELP_STRING([--with-csl-license-version=VERSION], ++ [Use VERSION to communicate with the license manager]), ++ [case "$withval" in ++ (yes) AC_MSG_ERROR([license version not specified]) ;; ++ (no) CSL_LICENSE_VERSION="" ;; ++ (*) CSL_LICENSE_VERSION="$withval" ;; ++ esac], ++ CSL_LICENSE_VERSION="" ++ ) ++ if test x"$CSL_LICENSE_VERSION" != x; then ++ AC_DEFINE_UNQUOTED(CSL_LICENSE_VERSION, "$CSL_LICENSE_VERSION", ++ [Required license version]) ++ fi ++]) +diff -Nur a/gcc/addresses.h b/gcc/addresses.h +--- a/gcc/addresses.h 2007-07-26 10:37:01.000000000 +0200 ++++ b/gcc/addresses.h 2010-01-25 09:50:28.955687088 +0100 +@@ -78,3 +78,42 @@ + + return ok_for_base_p_1 (regno, mode, outer_code, index_code); + } ++ ++/* Wrapper function to unify target macros MODE_INDEX_REG_CLASS and ++ INDEX_REG_CLASS. Arguments as for the MODE_INDEX_REG_CLASS macro. */ ++ ++static inline enum reg_class ++index_reg_class (enum machine_mode mode ATTRIBUTE_UNUSED) ++{ ++#ifdef MODE_INDEX_REG_CLASS ++ return MODE_INDEX_REG_CLASS (mode); ++#else ++ return INDEX_REG_CLASS; ++#endif ++} ++ ++/* Wrapper function to unify target macros REGNO_MODE_OK_FOR_INDEX_P ++ and REGNO_OK_FOR_INDEX_P. Arguments as for the ++ REGNO_MODE_OK_FOR_INDEX_P macro. */ ++ ++static inline bool ++ok_for_index_p_1 (unsigned regno, enum machine_mode mode ATTRIBUTE_UNUSED) ++{ ++#ifdef REGNO_MODE_OK_FOR_INDEX_P ++ return REGNO_MODE_OK_FOR_INDEX_P (regno, mode); ++#else ++ return REGNO_OK_FOR_INDEX_P (regno); ++#endif ++} ++ ++/* Wrapper around ok_for_index_p_1, for use after register allocation is ++ complete. Arguments as for the called function. */ ++ ++static inline bool ++regno_ok_for_index_p (unsigned regno, enum machine_mode mode) ++{ ++ if (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0) ++ regno = reg_renumber[regno]; ++ ++ return ok_for_index_p_1 (regno, mode); ++} +diff -Nur a/gcc/calls.c b/gcc/calls.c +--- a/gcc/calls.c 2009-02-20 12:19:34.000000000 +0100 ++++ b/gcc/calls.c 2010-01-25 09:50:28.955687088 +0100 +@@ -3803,7 +3803,7 @@ + cse'ing of library calls could delete a call and leave the pop. */ + NO_DEFER_POP; + valreg = (mem_value == 0 && outmode != VOIDmode +- ? hard_libcall_value (outmode) : NULL_RTX); ++ ? hard_libcall_value (outmode, orgfun) : NULL_RTX); + + /* Stack must be properly aligned now. */ + gcc_assert (!(stack_pointer_delta +@@ -4048,8 +4048,17 @@ + /* We need to make a save area. */ + unsigned int size = arg->locate.size.constant * BITS_PER_UNIT; + enum machine_mode save_mode = mode_for_size (size, MODE_INT, 1); +- rtx adr = memory_address (save_mode, XEXP (arg->stack_slot, 0)); +- rtx stack_area = gen_rtx_MEM (save_mode, adr); ++ rtx adr; ++ rtx stack_area; ++ ++ /* We can only use save_mode if the arg is sufficiently ++ aligned. */ ++ if (STRICT_ALIGNMENT ++ && GET_MODE_ALIGNMENT (save_mode) > arg->locate.boundary) ++ save_mode = BLKmode; ++ ++ adr = memory_address (save_mode, XEXP (arg->stack_slot, 0)); ++ stack_area = gen_rtx_MEM (save_mode, adr); + + if (save_mode == BLKmode) + { +diff -Nur a/gcc/c-common.c b/gcc/c-common.c +--- a/gcc/c-common.c 2009-03-30 19:42:27.000000000 +0200 ++++ b/gcc/c-common.c 2010-01-25 09:50:28.955687088 +0100 +@@ -33,7 +33,6 @@ + #include "varray.h" + #include "expr.h" + #include "c-common.h" +-#include "diagnostic.h" + #include "tm_p.h" + #include "obstack.h" + #include "cpplib.h" +@@ -42,6 +41,7 @@ + #include "tree-inline.h" + #include "c-tree.h" + #include "toplev.h" ++#include "diagnostic.h" + #include "tree-iterator.h" + #include "hashtab.h" + #include "tree-mudflap.h" +@@ -497,6 +497,10 @@ + This is a count, since unevaluated expressions can nest. */ + int skip_evaluation; + ++/* Whether lexing has been completed, so subsequent preprocessor ++ errors should use the compiler's input_location. */ ++bool done_lexing = false; ++ + /* Information about how a function name is generated. */ + struct fname_var_t + { +@@ -7522,6 +7526,68 @@ + #undef catenate_messages + } + ++/* Callback from cpp_error for PFILE to print diagnostics from the ++ preprocessor. The diagnostic is of type LEVEL, at location ++ LOCATION unless this is after lexing and the compiler's location ++ should be used instead, with column number possibly overridden by ++ COLUMN_OVERRIDE if not zero; MSG is the translated message and AP ++ the arguments. Returns true if a diagnostic was emitted, false ++ otherwise. */ ++ ++bool ++c_cpp_error (cpp_reader *pfile ATTRIBUTE_UNUSED, int level, ++ location_t location, unsigned int column_override, ++ const char *msg, va_list *ap) ++{ ++ diagnostic_info diagnostic; ++ diagnostic_t dlevel; ++ int save_warn_system_headers = warn_system_headers; ++ bool ret; ++ ++ switch (level) ++ { ++ case CPP_DL_WARNING_SYSHDR: ++ if (flag_no_output) ++ return false; ++ warn_system_headers = 1; ++ /* Fall through. */ ++ case CPP_DL_WARNING: ++ if (flag_no_output) ++ return false; ++ dlevel = DK_WARNING; ++ break; ++ case CPP_DL_PEDWARN: ++ if (flag_no_output && !flag_pedantic_errors) ++ return false; ++ dlevel = DK_PEDWARN; ++ break; ++ case CPP_DL_ERROR: ++ dlevel = DK_ERROR; ++ break; ++ case CPP_DL_ICE: ++ dlevel = DK_ICE; ++ break; ++ case CPP_DL_NOTE: ++ dlevel = DK_NOTE; ++ break; ++ case CPP_DL_FATAL: ++ dlevel = DK_FATAL; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ if (done_lexing) ++ location = input_location; ++ diagnostic_set_info_translated (&diagnostic, msg, ap, ++ location, dlevel); ++ if (column_override) ++ diagnostic_override_column (&diagnostic, column_override); ++ ret = report_diagnostic (&diagnostic); ++ if (level == CPP_DL_WARNING_SYSHDR) ++ warn_system_headers = save_warn_system_headers; ++ return ret; ++} ++ + /* Walk a gimplified function and warn for functions whose return value is + ignored and attribute((warn_unused_result)) is set. This is done before + inlining, so we don't have to worry about that. */ +diff -Nur a/gcc/c-common.h b/gcc/c-common.h +--- a/gcc/c-common.h 2009-03-30 19:42:27.000000000 +0200 ++++ b/gcc/c-common.h 2010-01-25 09:50:28.955687088 +0100 +@@ -658,6 +658,11 @@ + + extern int skip_evaluation; + ++/* Whether lexing has been completed, so subsequent preprocessor ++ errors should use the compiler's input_location. */ ++ ++extern bool done_lexing; ++ + /* C types are partitioned into three subsets: object, function, and + incomplete types. */ + #define C_TYPE_OBJECT_P(type) \ +diff -Nur a/gcc/c-convert.c b/gcc/c-convert.c +--- a/gcc/c-convert.c 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/c-convert.c 2010-01-25 09:50:28.955687088 +0100 +@@ -70,6 +70,7 @@ + tree e = expr; + enum tree_code code = TREE_CODE (type); + const char *invalid_conv_diag; ++ tree ret; + + if (type == error_mark_node + || expr == error_mark_node +@@ -85,6 +86,9 @@ + + if (type == TREE_TYPE (expr)) + return expr; ++ ret = targetm.convert_to_type (type, expr); ++ if (ret) ++ return ret; + + if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (TREE_TYPE (expr))) + return fold_convert (type, expr); +diff -Nur a/gcc/c-decl.c b/gcc/c-decl.c +--- a/gcc/c-decl.c 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/c-decl.c 2010-01-25 09:50:28.955687088 +0100 +@@ -3994,6 +3994,7 @@ + bool bitfield = width != NULL; + tree element_type; + struct c_arg_info *arg_info = 0; ++ const char *errmsg; + + if (decl_context == FUNCDEF) + funcdef_flag = true, decl_context = NORMAL; +@@ -4531,6 +4532,12 @@ + error ("%qs declared as function returning an array", name); + type = integer_type_node; + } ++ errmsg = targetm.invalid_return_type (type); ++ if (errmsg) ++ { ++ error (errmsg); ++ type = integer_type_node; ++ } + + /* Construct the function type and go to the next + inner layer of declarator. */ +@@ -5044,6 +5051,7 @@ + { + tree parm, type, typelt; + unsigned int parmno; ++ const char *errmsg; + + /* If there is a parameter of incomplete type in a definition, + this is an error. In a declaration this is valid, and a +@@ -5087,6 +5095,14 @@ + } + } + ++ errmsg = targetm.invalid_parameter_type (type); ++ if (errmsg) ++ { ++ error (errmsg); ++ TREE_VALUE (typelt) = error_mark_node; ++ TREE_TYPE (parm) = error_mark_node; ++ } ++ + if (DECL_NAME (parm) && TREE_USED (parm)) + warn_if_shadowing (parm); + } +@@ -8071,7 +8087,7 @@ + + /* Don't waste time on further processing if -fsyntax-only or we've + encountered errors. */ +- if (flag_syntax_only || errorcount || sorrycount || cpp_errors (parse_in)) ++ if (flag_syntax_only || errorcount || sorrycount) + return; + + /* Close the external scope. */ +diff -Nur a/gcc/cfgexpand.c b/gcc/cfgexpand.c +--- a/gcc/cfgexpand.c 2009-07-11 21:06:26.000000000 +0200 ++++ b/gcc/cfgexpand.c 2010-01-25 09:50:28.955687088 +0100 +@@ -488,7 +488,8 @@ + { + unsigned int align; + +- align = LOCAL_DECL_ALIGNMENT (decl); ++ align = alignment_for_aligned_arrays (TREE_TYPE (decl), ++ LOCAL_DECL_ALIGNMENT (decl)); + + if (align > MAX_SUPPORTED_STACK_ALIGNMENT) + align = MAX_SUPPORTED_STACK_ALIGNMENT; +diff -Nur a/gcc/cgraph.c b/gcc/cgraph.c +--- a/gcc/cgraph.c 2008-11-16 23:31:58.000000000 +0100 ++++ b/gcc/cgraph.c 2010-01-25 09:50:28.955687088 +0100 +@@ -475,9 +475,11 @@ + if (DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL) + { + node->origin = cgraph_node (DECL_CONTEXT (decl)); ++ node->origin->ever_was_nested = 1; + node->next_nested = node->origin->nested; + node->origin->nested = node; + node->master_clone = node; ++ node->ever_was_nested = 1; + } + if (assembler_name_hash) + { +diff -Nur a/gcc/cgraph.h b/gcc/cgraph.h +--- a/gcc/cgraph.h 2009-03-23 17:29:33.000000000 +0100 ++++ b/gcc/cgraph.h 2010-01-25 09:50:28.955687088 +0100 +@@ -185,6 +185,8 @@ + unsigned output : 1; + /* Set for aliases once they got through assemble_alias. */ + unsigned alias : 1; ++ /* Set if the function is a nested function or has nested functions. */ ++ unsigned ever_was_nested : 1; + + /* In non-unit-at-a-time mode the function body of inline candidates is saved + into clone before compiling so the function in original form can be +diff -Nur a/gcc/common.opt b/gcc/common.opt +--- a/gcc/common.opt 2009-03-28 18:28:45.000000000 +0100 ++++ b/gcc/common.opt 2010-01-25 09:50:28.955687088 +0100 +@@ -153,6 +153,10 @@ + Common Var(warn_padded) Warning + Warn when padding is required to align structure members + ++Wpoison-system-directories ++Common Var(flag_poison_system_directories) Init(1) ++Warn for -I and -L options using system directories if cross compiling ++ + Wshadow + Common Var(warn_shadow) Warning + Warn when one local variable shadows another +@@ -270,6 +274,12 @@ + fabi-version= + Common Joined UInteger Var(flag_abi_version) Init(2) + ++falign-arrays ++Target Report Var(flag_align_arrays) ++Set the minimum alignment for array variables to be the largest power ++of two less than or equal to their total storage size, or the biggest ++alignment used on the machine, whichever is smaller. ++ + falign-functions + Common Report Var(align_functions,0) Optimization UInteger + Align the start of functions +@@ -467,6 +477,10 @@ + Common Report Var(flag_early_inlining) Init(1) Optimization + Perform early inlining + ++feglibc= ++Common Report Joined Undocumented ++EGLIBC configuration specifier, serves multilib purposes. ++ + feliminate-dwarf2-dups + Common Report Var(flag_eliminate_dwarf2_dups) + Perform DWARF2 duplicate elimination +@@ -895,6 +909,10 @@ + Common Report Var(flag_profile_values) + Insert code to profile values of expressions + ++fpromote-loop-indices ++Common Report Var(flag_promote_loop_indices) Optimization ++Promote loop indices to word-sized indices when safe ++ + frandom-seed + Common + +@@ -1227,6 +1245,15 @@ + Common Report Var(flag_tree_pre) Optimization + Enable SSA-PRE optimization on trees + ++ftree-pre-partial-partial ++Common Report Var(flag_tree_pre_partial_partial) Optimization ++In SSA-PRE optimization on trees, enable partial-partial redundancy elimination. ++ ++ftree-pre-partial-partial-obliviously ++Common Report Var(flag_tree_pre_partial_partial_obliviously) Optimization ++In SSA-PRE optimization on trees, enable partial-partial redundancy ++elimination without regard for the cost of the inserted phi nodes. ++ + ftree-reassoc + Common Report Var(flag_tree_reassoc) Init(1) Optimization + Enable reassociation on tree level +diff -Nur a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c +--- a/gcc/config/arm/arm.c 2009-06-02 09:18:16.000000000 +0200 ++++ b/gcc/config/arm/arm.c 2010-01-25 09:50:28.975687047 +0100 +@@ -43,6 +43,7 @@ + #include "optabs.h" + #include "toplev.h" + #include "recog.h" ++#include "cgraph.h" + #include "ggc.h" + #include "except.h" + #include "c-pragma.h" +@@ -53,6 +54,8 @@ + #include "debug.h" + #include "langhooks.h" + #include "df.h" ++#include "intl.h" ++#include "params.h" + + /* Forward definitions of types. */ + typedef struct minipool_node Mnode; +@@ -110,6 +113,7 @@ + static unsigned long arm_isr_value (tree); + static unsigned long arm_compute_func_type (void); + static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *); ++static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *); + static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *); + #if TARGET_DLLIMPORT_DECL_ATTRIBUTES + static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); +@@ -123,6 +127,10 @@ + static int count_insns_for_constant (HOST_WIDE_INT, int); + static int arm_get_strip_length (int); + static bool arm_function_ok_for_sibcall (tree, tree); ++static bool arm_return_in_memory (const_tree, const_tree); ++static rtx arm_function_value (const_tree, const_tree, bool); ++static rtx arm_libcall_value (enum machine_mode, rtx); ++ + static void arm_internal_label (FILE *, const char *, unsigned long); + static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, + tree); +@@ -148,6 +156,9 @@ + static rtx emit_set_insn (rtx, rtx); + static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, + tree, bool); ++static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, ++ const_tree); ++static int aapcs_select_return_coproc (const_tree, const_tree); + + #ifdef OBJECT_FORMAT_ELF + static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; +@@ -175,6 +186,7 @@ + static bool arm_output_ttype (rtx); + #endif + static void arm_dwarf_handle_frame_unspec (const char *, rtx, int); ++static rtx arm_dwarf_register_span(rtx); + + static tree arm_cxx_guard_type (void); + static bool arm_cxx_guard_mask_bit (void); +@@ -197,6 +209,15 @@ + static int arm_issue_rate (void); + static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; + static bool arm_allocate_stack_slots_for_args (void); ++static bool arm_warn_func_result (void); ++static int arm_multipass_dfa_lookahead (void); ++static const char *arm_invalid_parameter_type (const_tree t); ++static const char *arm_invalid_return_type (const_tree t); ++static tree arm_promoted_type (const_tree t); ++static tree arm_convert_to_type (tree type, tree expr); ++static bool arm_scalar_mode_supported_p (enum machine_mode); ++static int arm_vector_min_alignment (const_tree type); ++static bool arm_vector_always_misalign(const_tree); + + + /* Initialize the GCC target structure. */ +@@ -256,6 +277,12 @@ + #undef TARGET_FUNCTION_OK_FOR_SIBCALL + #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall + ++#undef TARGET_FUNCTION_VALUE ++#define TARGET_FUNCTION_VALUE arm_function_value ++ ++#undef TARGET_LIBCALL_VALUE ++#define TARGET_LIBCALL_VALUE arm_libcall_value ++ + #undef TARGET_ASM_OUTPUT_MI_THUNK + #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk + #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +@@ -299,6 +326,9 @@ + #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS + #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args + ++#undef TARGET_WARN_FUNC_RESULT ++#define TARGET_WARN_FUNC_RESULT arm_warn_func_result ++ + #undef TARGET_DEFAULT_SHORT_ENUMS + #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums + +@@ -353,6 +383,9 @@ + #undef TARGET_ASM_TTYPE + #define TARGET_ASM_TTYPE arm_output_ttype + ++#undef TARGET_CXX_TTYPE_REF_ENCODE ++#define TARGET_CXX_TTYPE_REF_ENCODE hook_cxx_ttype_ref_in_bit0 ++ + #undef TARGET_ARM_EABI_UNWINDER + #define TARGET_ARM_EABI_UNWINDER true + #endif /* TARGET_UNWIND_INFO */ +@@ -360,6 +393,9 @@ + #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC + #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec + ++#undef TARGET_DWARF_REGISTER_SPAN ++#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span ++ + #undef TARGET_CANNOT_COPY_INSN_P + #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p + +@@ -398,6 +434,30 @@ + #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel + #endif + ++#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ++#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD arm_multipass_dfa_lookahead ++ ++#undef TARGET_INVALID_PARAMETER_TYPE ++#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type ++ ++#undef TARGET_INVALID_RETURN_TYPE ++#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type ++ ++#undef TARGET_PROMOTED_TYPE ++#define TARGET_PROMOTED_TYPE arm_promoted_type ++ ++#undef TARGET_CONVERT_TO_TYPE ++#define TARGET_CONVERT_TO_TYPE arm_convert_to_type ++ ++#undef TARGET_SCALAR_MODE_SUPPORTED_P ++#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p ++ ++#undef TARGET_VECTOR_MIN_ALIGNMENT ++#define TARGET_VECTOR_MIN_ALIGNMENT arm_vector_min_alignment ++ ++#undef TARGET_VECTOR_ALWAYS_MISALIGN ++#define TARGET_VECTOR_ALWAYS_MISALIGN arm_vector_always_misalign ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + /* Obstack for minipool constant handling. */ +@@ -423,18 +483,18 @@ + /* The default processor used if not overridden by commandline. */ + static enum processor_type arm_default_cpu = arm_none; + +-/* Which floating point model to use. */ +-enum arm_fp_model arm_fp_model; +- +-/* Which floating point hardware is available. */ +-enum fputype arm_fpu_arch; +- + /* Which floating point hardware to schedule for. */ +-enum fputype arm_fpu_tune; ++int arm_fpu_attr; ++ ++/* Which floating popint hardware to use. */ ++const struct arm_fpu_desc *arm_fpu_desc; + + /* Whether to use floating point hardware. */ + enum float_abi_type arm_float_abi; + ++/* Which __fp16 format to use. */ ++enum arm_fp16_format_type arm_fp16_format; ++ + /* Which ABI to use. */ + enum arm_abi_type arm_abi; + +@@ -473,9 +533,19 @@ + #define FL_DIV (1 << 18) /* Hardware divide. */ + #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ + #define FL_NEON (1 << 20) /* Neon instructions. */ ++#define FL_MARVELL_F (1 << 21) /* Marvell Feroceon. */ ++#define FL_ARCH7EM (1 << 22) /* Instructions present in ARMv7E-M. */ + + #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ + ++/* Some flags are ignored when comparing -mcpu and -march: ++ FL_MARVELL_F so that -mcpu=marvell-f -march=v5te works. ++ FL_LDSCHED and FL_WBUF only effect tuning, ++ FL_CO_PROC, FL_VFPV2, FL_VFPV3 and FL_NEON because FP ++ coprocessors are handled separately. */ ++#define FL_COMPAT (FL_MARVELL_F | FL_LDSCHED | FL_WBUF | FL_CO_PROC | \ ++ FL_VFPV2 | FL_VFPV3 | FL_NEON) ++ + #define FL_FOR_ARCH2 FL_NOTM + #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) + #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) +@@ -497,6 +567,7 @@ + #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM) + #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) + #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) ++#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) + + /* The bits in this mask specify which + instructions we are allowed to generate. */ +@@ -533,6 +604,9 @@ + /* Nonzero if instructions not present in the 'M' profile can be used. */ + int arm_arch_notm = 0; + ++/* Nonzero if instructions present in ARMv7E-M can be used. */ ++int arm_arch7em = 0; ++ + /* Nonzero if this chip can benefit from load scheduling. */ + int arm_ld_sched = 0; + +@@ -551,6 +625,9 @@ + /* Nonzero if tuning for XScale */ + int arm_tune_xscale = 0; + ++/* Nonzero if tuning for Marvell Feroceon. */ ++int arm_tune_marvell_f = 0; ++ + /* Nonzero if we want to tune for stores that access the write-buffer. + This typically means an ARM6 or ARM7 with MMU or MPU. */ + int arm_tune_wbuf = 0; +@@ -561,6 +638,9 @@ + /* Nonzero if generating Thumb instructions. */ + int thumb_code = 0; + ++/* Nonzero if generating code for Janus2. */ ++int janus2_code = 0; ++ + /* Nonzero if we should define __THUMB_INTERWORK__ in the + preprocessor. + XXX This is a bit of a hack, it's intended to help work around +@@ -593,6 +673,8 @@ + /* The maximum number of insns to be used when loading a constant. */ + static int arm_constant_limit = 3; + ++static enum arm_pcs arm_pcs_default; ++ + /* For an explanation of these variables, see final_prescan_insn below. */ + int arm_ccfsm_state; + /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ +@@ -673,9 +755,11 @@ + {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL}, + {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL}, + {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL}, ++ {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL}, + {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL}, + {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, + {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, ++ {"marvell-f", marvell_f, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE | FL_MARVELL_F, NULL}, + {NULL, arm_none, NULL, 0 , NULL} + }; + +@@ -705,49 +789,34 @@ + + /* The name of the preprocessor macro to define for this architecture. */ + +-char arm_arch_name[] = "__ARM_ARCH_0UNK__"; +- +-struct fpu_desc +-{ +- const char * name; +- enum fputype fpu; +-}; +- ++#define ARM_ARCH_NAME_SIZE 25 ++char arm_arch_name[ARM_ARCH_NAME_SIZE] = "__ARM_ARCH_0UNK__"; + + /* Available values for -mfpu=. */ + +-static const struct fpu_desc all_fpus[] = ++static const struct arm_fpu_desc all_fpus[] = + { +- {"fpa", FPUTYPE_FPA}, +- {"fpe2", FPUTYPE_FPA_EMU2}, +- {"fpe3", FPUTYPE_FPA_EMU2}, +- {"maverick", FPUTYPE_MAVERICK}, +- {"vfp", FPUTYPE_VFP}, +- {"vfp3", FPUTYPE_VFP3}, +- {"vfpv3", FPUTYPE_VFP3}, +- {"vfpv3-d16", FPUTYPE_VFP3D16}, +- {"neon", FPUTYPE_NEON} ++ {"fpa", ARM_FP_MODEL_FPA, 0, 0, false, false}, ++ {"fpe2", ARM_FP_MODEL_FPA, 2, 0, false, false}, ++ {"fpe3", ARM_FP_MODEL_FPA, 3, 0, false, false}, ++ {"maverick", ARM_FP_MODEL_MAVERICK, 0, 0, false, false}, ++ {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false}, ++ {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, ++ {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true }, ++ {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false}, ++ {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false}, ++ {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true }, ++ {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true }, ++ {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false}, ++ {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true }, ++ {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true }, ++ {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true }, ++ {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true }, ++ {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true , true }, ++ /* Compatibility aliases. */ ++ {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, + }; + +- +-/* Floating point models used by the different hardware. +- See fputype in arm.h. */ +- +-static const enum fputype fp_model_for_fpu[] = +-{ +- /* No FP hardware. */ +- ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */ +- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */ +- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */ +- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */ +- ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */ +- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */ +- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */ +- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */ +- ARM_FP_MODEL_VFP /* FPUTYPE_NEON */ +-}; +- +- + struct float_abi + { + const char * name; +@@ -765,6 +834,23 @@ + }; + + ++struct fp16_format ++{ ++ const char *name; ++ enum arm_fp16_format_type fp16_format_type; ++}; ++ ++ ++/* Available values for -mfp16-format=. */ ++ ++static const struct fp16_format all_fp16_formats[] = ++{ ++ {"none", ARM_FP16_FORMAT_NONE}, ++ {"ieee", ARM_FP16_FORMAT_IEEE}, ++ {"alternative", ARM_FP16_FORMAT_ALTERNATIVE} ++}; ++ ++ + struct abi_name + { + const char *name; +@@ -922,6 +1008,44 @@ + set_optab_libfunc (umod_optab, DImode, NULL); + set_optab_libfunc (smod_optab, SImode, NULL); + set_optab_libfunc (umod_optab, SImode, NULL); ++ ++ /* Half-precision float operations. The compiler handles all operations ++ with NULL libfuncs by converting the SFmode. */ ++ switch (arm_fp16_format) ++ { ++ case ARM_FP16_FORMAT_IEEE: ++ case ARM_FP16_FORMAT_ALTERNATIVE: ++ ++ /* Conversions. */ ++ set_conv_libfunc (trunc_optab, HFmode, SFmode, ++ (arm_fp16_format == ARM_FP16_FORMAT_IEEE ++ ? "__gnu_f2h_ieee" ++ : "__gnu_f2h_alternative")); ++ set_conv_libfunc (sext_optab, SFmode, HFmode, ++ (arm_fp16_format == ARM_FP16_FORMAT_IEEE ++ ? "__gnu_h2f_ieee" ++ : "__gnu_h2f_alternative")); ++ ++ /* Arithmetic. */ ++ set_optab_libfunc (add_optab, HFmode, NULL); ++ set_optab_libfunc (sdiv_optab, HFmode, NULL); ++ set_optab_libfunc (smul_optab, HFmode, NULL); ++ set_optab_libfunc (neg_optab, HFmode, NULL); ++ set_optab_libfunc (sub_optab, HFmode, NULL); ++ ++ /* Comparisons. */ ++ set_optab_libfunc (eq_optab, HFmode, NULL); ++ set_optab_libfunc (ne_optab, HFmode, NULL); ++ set_optab_libfunc (lt_optab, HFmode, NULL); ++ set_optab_libfunc (le_optab, HFmode, NULL); ++ set_optab_libfunc (ge_optab, HFmode, NULL); ++ set_optab_libfunc (gt_optab, HFmode, NULL); ++ set_optab_libfunc (unord_optab, HFmode, NULL); ++ break; ++ ++ default: ++ break; ++ } + } + + /* On AAPCS systems, this is the "struct __va_list". */ +@@ -1135,6 +1259,7 @@ + arm_override_options (void) + { + unsigned i; ++ int len; + enum processor_type target_arch_cpu = arm_none; + enum processor_type selected_cpu = arm_none; + +@@ -1152,7 +1277,11 @@ + { + /* Set the architecture define. */ + if (i != ARM_OPT_SET_TUNE) +- sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); ++ { ++ len = snprintf (arm_arch_name, ARM_ARCH_NAME_SIZE, ++ "__ARM_ARCH_%s__", sel->arch); ++ gcc_assert (len < ARM_ARCH_NAME_SIZE); ++ } + + /* Determine the processor core for which we should + tune code-generation. */ +@@ -1178,8 +1307,8 @@ + make sure that they are compatible. We only generate + a warning though, and we prefer the CPU over the + architecture. */ +- if (insn_flags != 0 && (insn_flags ^ sel->flags)) +- warning (0, "switch -mcpu=%s conflicts with -march= switch", ++ if (insn_flags != 0 && ((insn_flags ^ sel->flags) & ~FL_COMPAT)) ++ warning (0, "switch -mcpu=%s conflicts with -march= switch, assuming CPU feature set", + ptr->string); + + insn_flags = sel->flags; +@@ -1279,7 +1408,11 @@ + + insn_flags = sel->flags; + } +- sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); ++ ++ len = snprintf (arm_arch_name, ARM_ARCH_NAME_SIZE, ++ "__ARM_ARCH_%s__", sel->arch); ++ gcc_assert (len < ARM_ARCH_NAME_SIZE); ++ + arm_default_cpu = (enum processor_type) (sel - all_cores); + if (arm_tune == arm_none) + arm_tune = arm_default_cpu; +@@ -1289,8 +1422,35 @@ + chosen. */ + gcc_assert (arm_tune != arm_none); + ++ if (arm_tune == cortexa8 && optimize >= 3) ++ { ++ /* These alignments were experimentally determined to improve SPECint ++ performance on SPECCPU 2000. */ ++ if (align_functions <= 0) ++ align_functions = 16; ++ if (align_jumps <= 0) ++ align_jumps = 16; ++ } ++ + tune_flags = all_cores[(int)arm_tune].flags; + ++ if (target_fp16_format_name) ++ { ++ for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) ++ { ++ if (streq (all_fp16_formats[i].name, target_fp16_format_name)) ++ { ++ arm_fp16_format = all_fp16_formats[i].fp16_format_type; ++ break; ++ } ++ } ++ if (i == ARRAY_SIZE (all_fp16_formats)) ++ error ("invalid __fp16 format option: -mfp16-format=%s", ++ target_fp16_format_name); ++ } ++ else ++ arm_fp16_format = ARM_FP16_FORMAT_NONE; ++ + if (target_abi_name) + { + for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++) +@@ -1383,6 +1543,7 @@ + arm_arch6 = (insn_flags & FL_ARCH6) != 0; + arm_arch6k = (insn_flags & FL_ARCH6K) != 0; + arm_arch_notm = (insn_flags & FL_NOTM) != 0; ++ arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; + arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; + arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; + arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0; +@@ -1390,12 +1551,25 @@ + arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; + arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; + thumb_code = (TARGET_ARM == 0); ++ janus2_code = (TARGET_FIX_JANUS != 0); ++ if (janus2_code && TARGET_THUMB2) ++ error ("janus2 fix is not applicable when targeting a thumb2 core"); + arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; + arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; ++ arm_tune_marvell_f = (tune_flags & FL_MARVELL_F) != 0; + arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; +- arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; + arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; + ++ /* Hardware integer division is supported by some variants of the ARM ++ architecture in Thumb-2 mode. In addition some (but not all) Marvell ++ CPUs support their own hardware integer division instructions. ++ The assembler will pick the correct encoding. */ ++ if (TARGET_MARVELL_DIV && (insn_flags & FL_MARVELL_F) == 0) ++ error ("-mmarvell-div is only supported when targeting a Marvell core"); ++ ++ arm_arch_hwdiv = (TARGET_ARM && TARGET_MARVELL_DIV) ++ || (TARGET_THUMB2 && (insn_flags & FL_DIV) != 0); ++ + /* If we are not using the default (ARM mode) section anchor offset + ranges, then set the correct ranges now. */ + if (TARGET_THUMB1) +@@ -1434,7 +1608,6 @@ + if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) + error ("iwmmxt abi requires an iwmmxt capable cpu"); + +- arm_fp_model = ARM_FP_MODEL_UNKNOWN; + if (target_fpu_name == NULL && target_fpe_name != NULL) + { + if (streq (target_fpe_name, "2")) +@@ -1445,46 +1618,52 @@ + error ("invalid floating point emulation option: -mfpe=%s", + target_fpe_name); + } +- if (target_fpu_name != NULL) +- { +- /* The user specified a FPU. */ +- for (i = 0; i < ARRAY_SIZE (all_fpus); i++) +- { +- if (streq (all_fpus[i].name, target_fpu_name)) +- { +- arm_fpu_arch = all_fpus[i].fpu; +- arm_fpu_tune = arm_fpu_arch; +- arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; +- break; +- } +- } +- if (arm_fp_model == ARM_FP_MODEL_UNKNOWN) +- error ("invalid floating point option: -mfpu=%s", target_fpu_name); +- } +- else ++ ++ if (target_fpu_name == NULL) + { + #ifdef FPUTYPE_DEFAULT +- /* Use the default if it is specified for this platform. */ +- arm_fpu_arch = FPUTYPE_DEFAULT; +- arm_fpu_tune = FPUTYPE_DEFAULT; ++ target_fpu_name = FPUTYPE_DEFAULT; + #else +- /* Pick one based on CPU type. */ +- /* ??? Some targets assume FPA is the default. +- if ((insn_flags & FL_VFP) != 0) +- arm_fpu_arch = FPUTYPE_VFP; +- else +- */ + if (arm_arch_cirrus) +- arm_fpu_arch = FPUTYPE_MAVERICK; ++ target_fpu_name = "maverick"; + else +- arm_fpu_arch = FPUTYPE_FPA_EMU2; ++ target_fpu_name = "fpe2"; + #endif +- if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2) +- arm_fpu_tune = FPUTYPE_FPA; ++ } ++ ++ arm_fpu_desc = NULL; ++ for (i = 0; i < ARRAY_SIZE (all_fpus); i++) ++ { ++ if (streq (all_fpus[i].name, target_fpu_name)) ++ { ++ arm_fpu_desc = &all_fpus[i]; ++ break; ++ } ++ } ++ if (!arm_fpu_desc) ++ error ("invalid floating point option: -mfpu=%s", target_fpu_name); ++ ++ switch (arm_fpu_desc->model) ++ { ++ case ARM_FP_MODEL_FPA: ++ if (arm_fpu_desc->rev == 2) ++ arm_fpu_attr = FPU_FPE2; ++ else if (arm_fpu_desc->rev == 3) ++ arm_fpu_attr = FPU_FPE3; + else +- arm_fpu_tune = arm_fpu_arch; +- arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; +- gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN); ++ arm_fpu_attr = FPU_FPA; ++ break; ++ ++ case ARM_FP_MODEL_MAVERICK: ++ arm_fpu_attr = FPU_MAVERICK; ++ break; ++ ++ case ARM_FP_MODEL_VFP: ++ arm_fpu_attr = FPU_VFP; ++ break; ++ ++ default: ++ gcc_unreachable(); + } + + if (target_float_abi_name != NULL) +@@ -1505,9 +1684,6 @@ + else + arm_float_abi = TARGET_DEFAULT_FLOAT_ABI; + +- if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) +- sorry ("-mfloat-abi=hard and VFP"); +- + /* FPA and iWMMXt are incompatible because the insn encodings overlap. + VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon + will ever exist. GCC makes no attempt to support this combination. */ +@@ -1518,15 +1694,40 @@ + if (TARGET_THUMB2 && TARGET_IWMMXT) + sorry ("Thumb-2 iWMMXt"); + ++ /* __fp16 support currently assumes the core has ldrh. */ ++ if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) ++ sorry ("__fp16 and no ldrh"); ++ + /* If soft-float is specified then don't use FPU. */ + if (TARGET_SOFT_FLOAT) +- arm_fpu_arch = FPUTYPE_NONE; ++ arm_fpu_attr = FPU_NONE; ++ ++ if (TARGET_AAPCS_BASED) ++ { ++ if (arm_abi == ARM_ABI_IWMMXT) ++ arm_pcs_default = ARM_PCS_AAPCS_IWMMXT; ++ else if (arm_float_abi == ARM_FLOAT_ABI_HARD ++ && TARGET_HARD_FLOAT ++ && TARGET_VFP) ++ arm_pcs_default = ARM_PCS_AAPCS_VFP; ++ else ++ arm_pcs_default = ARM_PCS_AAPCS; ++ } ++ else ++ { ++ if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) ++ sorry ("-mfloat-abi=hard and VFP"); ++ ++ if (arm_abi == ARM_ABI_APCS) ++ arm_pcs_default = ARM_PCS_APCS; ++ else ++ arm_pcs_default = ARM_PCS_ATPCS; ++ } + + /* For arm2/3 there is no need to do any scheduling if there is only + a floating point emulator, or we are doing software floating-point. */ + if ((TARGET_SOFT_FLOAT +- || arm_fpu_tune == FPUTYPE_FPA_EMU2 +- || arm_fpu_tune == FPUTYPE_FPA_EMU3) ++ || (TARGET_FPA && arm_fpu_desc->rev)) + && (tune_flags & FL_MODE32) == 0) + flag_schedule_insns = flag_schedule_insns_after_reload = 0; + +@@ -1616,8 +1817,7 @@ + fix_cm3_ldrd = 0; + } + +- /* ??? We might want scheduling for thumb2. */ +- if (TARGET_THUMB && flag_schedule_insns) ++ if (TARGET_THUMB1 && flag_schedule_insns) + { + /* Don't warn since it's on by default in -O2. */ + flag_schedule_insns = 0; +@@ -1653,6 +1853,36 @@ + + /* Register global variables with the garbage collector. */ + arm_add_gc_roots (); ++ ++ if (low_irq_latency && TARGET_THUMB) ++ { ++ warning (0, ++ "-low-irq-latency has no effect when compiling for the Thumb"); ++ low_irq_latency = 0; ++ } ++ ++ /* CSL LOCAL */ ++ /* Loop unrolling can be a substantial win. At -O2, limit to 2x ++ unrolling by default to prevent excessive code growth; at -O3, ++ limit to 4x unrolling by default. We know we are not optimizing ++ for size if this is set (see arm_optimization_options). */ ++ if (flag_unroll_loops == 2) ++ { ++ if (optimize == 2) ++ { ++ flag_unroll_loops = 1; ++ if (!PARAM_SET_P (PARAM_MAX_UNROLL_TIMES)) ++ set_param_value ("max-unroll-times", 2); ++ } ++ else if (optimize > 2) ++ { ++ flag_unroll_loops = 1; ++ if (!PARAM_SET_P (PARAM_MAX_UNROLL_TIMES)) ++ set_param_value ("max-unroll-times", 4); ++ } ++ else ++ flag_unroll_loops = 0; ++ } + } + + static void +@@ -1782,6 +2012,14 @@ + return !IS_NAKED (arm_current_func_type ()); + } + ++static bool ++arm_warn_func_result (void) ++{ ++ /* Naked functions are implemented entirely in assembly, including the ++ return sequence, so suppress warnings about this. */ ++ return !IS_NAKED (arm_current_func_type ()); ++} ++ + + /* Return 1 if it is possible to return using a single instruction. + If SIBLING is non-null, this is a test for a return before a sibling +@@ -2873,14 +3111,19 @@ + + /* Define how to find the value returned by a function. */ + +-rtx +-arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED) ++static rtx ++arm_function_value(const_tree type, const_tree func, ++ bool outgoing ATTRIBUTE_UNUSED) + { + enum machine_mode mode; + int unsignedp ATTRIBUTE_UNUSED; + rtx r ATTRIBUTE_UNUSED; + + mode = TYPE_MODE (type); ++ ++ if (TARGET_AAPCS_BASED) ++ return aapcs_allocate_return_reg (mode, type, func); ++ + /* Promote integer types. */ + if (INTEGRAL_TYPE_P (type)) + PROMOTE_FUNCTION_MODE (mode, unsignedp, type); +@@ -2897,7 +3140,36 @@ + } + } + +- return LIBCALL_VALUE(mode); ++ return LIBCALL_VALUE (mode); ++} ++ ++rtx ++arm_libcall_value (enum machine_mode mode, rtx libcall) ++{ ++ if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS ++ && GET_MODE_CLASS (mode) == MODE_FLOAT) ++ { ++ /* The following libcalls return their result in integer registers, ++ even though they return a floating point value. */ ++ if (rtx_equal_p (libcall, ++ convert_optab_libfunc (sfloat_optab, mode, SImode)) ++ || rtx_equal_p (libcall, ++ convert_optab_libfunc (ufloat_optab, mode, SImode)) ++ || rtx_equal_p (libcall, ++ convert_optab_libfunc (sfloat_optab, mode, DImode)) ++ || rtx_equal_p (libcall, ++ convert_optab_libfunc (ufloat_optab, mode, DImode)) ++ || rtx_equal_p (libcall, ++ convert_optab_libfunc (trunc_optab, HFmode, SFmode)) ++ || rtx_equal_p (libcall, ++ convert_optab_libfunc (sext_optab, SFmode, HFmode))) ++ return gen_rtx_REG (mode, ARG_REGISTER(1)); ++ ++ /* XXX There are other libcalls that return in integer registers, ++ but I think they are all handled by hard insns. */ ++ } ++ ++ return LIBCALL_VALUE (mode); + } + + /* Determine the amount of memory needed to store the possible return +@@ -2907,10 +3179,12 @@ + { + int size = 16; + +- if (TARGET_ARM) ++ if (TARGET_32BIT) + { + if (TARGET_HARD_FLOAT_ABI) + { ++ if (TARGET_VFP) ++ size += 32; + if (TARGET_FPA) + size += 12; + if (TARGET_MAVERICK) +@@ -2923,27 +3197,56 @@ + return size; + } + +-/* Decide whether a type should be returned in memory (true) +- or in a register (false). This is called as the target hook +- TARGET_RETURN_IN_MEMORY. */ ++/* Decide whether TYPE should be returned in memory (true) ++ or in a register (false). FNTYPE is the type of the function making ++ the call. */ + static bool +-arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) ++arm_return_in_memory (const_tree type, const_tree fntype) + { + HOST_WIDE_INT size; + +- size = int_size_in_bytes (type); ++ size = int_size_in_bytes (type); /* Negative if not fixed size. */ ++ ++ if (TARGET_AAPCS_BASED) ++ { ++ /* Simple, non-aggregate types (ie not including vectors and ++ complex) are always returned in a register (or registers). ++ We don't care about which register here, so we can short-cut ++ some of the detail. */ ++ if (!AGGREGATE_TYPE_P (type) ++ && TREE_CODE (type) != VECTOR_TYPE ++ && TREE_CODE (type) != COMPLEX_TYPE) ++ return false; ++ ++ /* Any return value that is no larger than one word can be ++ returned in r0. */ ++ if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD) ++ return false; ++ ++ /* Check any available co-processors to see if they accept the ++ type as a register candidate (VFP, for example, can return ++ some aggregates in consecutive registers). These aren't ++ available if the call is variadic. */ ++ if (aapcs_select_return_coproc (type, fntype) >= 0) ++ return false; ++ ++ /* Vector values should be returned using ARM registers, not ++ memory (unless they're over 16 bytes, which will break since ++ we only have four call-clobbered registers to play with). */ ++ if (TREE_CODE (type) == VECTOR_TYPE) ++ return (size < 0 || size > (4 * UNITS_PER_WORD)); ++ ++ /* The rest go in memory. */ ++ return true; ++ } + +- /* Vector values should be returned using ARM registers, not memory (unless +- they're over 16 bytes, which will break since we only have four +- call-clobbered registers to play with). */ + if (TREE_CODE (type) == VECTOR_TYPE) + return (size < 0 || size > (4 * UNITS_PER_WORD)); + + if (!AGGREGATE_TYPE_P (type) && +- !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE)) +- /* All simple types are returned in registers. +- For AAPCS, complex types are treated the same as aggregates. */ +- return 0; ++ (TREE_CODE (type) != VECTOR_TYPE)) ++ /* All simple types are returned in registers. */ ++ return false; + + if (arm_abi != ARM_ABI_APCS) + { +@@ -2960,7 +3263,7 @@ + the aggregate is either huge or of variable size, and in either case + we will want to return it via memory and not in a register. */ + if (size < 0 || size > UNITS_PER_WORD) +- return 1; ++ return true; + + if (TREE_CODE (type) == RECORD_TYPE) + { +@@ -2980,18 +3283,18 @@ + continue; + + if (field == NULL) +- return 0; /* An empty structure. Allowed by an extension to ANSI C. */ ++ return false; /* An empty structure. Allowed by an extension to ANSI C. */ + + /* Check that the first field is valid for returning in a register. */ + + /* ... Floats are not allowed */ + if (FLOAT_TYPE_P (TREE_TYPE (field))) +- return 1; ++ return true; + + /* ... Aggregates that are not themselves valid for returning in + a register are not allowed. */ + if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) +- return 1; ++ return true; + + /* Now check the remaining fields, if any. Only bitfields are allowed, + since they are not addressable. */ +@@ -3003,10 +3306,10 @@ + continue; + + if (!DECL_BIT_FIELD_TYPE (field)) +- return 1; ++ return true; + } + +- return 0; ++ return false; + } + + if (TREE_CODE (type) == UNION_TYPE) +@@ -3023,18 +3326,18 @@ + continue; + + if (FLOAT_TYPE_P (TREE_TYPE (field))) +- return 1; ++ return true; + + if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) +- return 1; ++ return true; + } + +- return 0; ++ return false; + } + #endif /* not ARM_WINCE */ + + /* Return all other types in memory. */ +- return 1; ++ return true; + } + + /* Indicate whether or not words of a double are in big-endian order. */ +@@ -3059,14 +3362,780 @@ + return 1; + } + ++const struct pcs_attribute_arg ++{ ++ const char *arg; ++ enum arm_pcs value; ++} pcs_attribute_args[] = ++ { ++ {"aapcs", ARM_PCS_AAPCS}, ++ {"aapcs-vfp", ARM_PCS_AAPCS_VFP}, ++ {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT}, ++ {"atpcs", ARM_PCS_ATPCS}, ++ {"apcs", ARM_PCS_APCS}, ++ {NULL, ARM_PCS_UNKNOWN} ++ }; ++ ++static enum arm_pcs ++arm_pcs_from_attribute (tree attr) ++{ ++ const struct pcs_attribute_arg *ptr; ++ const char *arg; ++ ++ /* Get the value of the argument. */ ++ if (TREE_VALUE (attr) == NULL_TREE ++ || TREE_CODE (TREE_VALUE (attr)) != STRING_CST) ++ return ARM_PCS_UNKNOWN; ++ ++ arg = TREE_STRING_POINTER (TREE_VALUE (attr)); ++ ++ /* Check it against the list of known arguments. */ ++ for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++) ++ if (streq (arg, ptr->arg)) ++ return ptr->value; ++ ++ /* An unrecognized interrupt type. */ ++ return ARM_PCS_UNKNOWN; ++} ++ ++/* Get the PCS variant to use for this call. TYPE is the function's type ++ specification, DECL is the specific declartion. DECL may be null if ++ the call could be indirect or if this is a library call. */ ++static enum arm_pcs ++arm_get_pcs_model (const_tree type, const_tree decl) ++{ ++ bool user_convention = false; ++ enum arm_pcs user_pcs = arm_pcs_default; ++ tree attr; ++ ++ gcc_assert (type); ++ ++ attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type)); ++ if (attr) ++ { ++ user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr)); ++ user_convention = true; ++ } ++ ++ if (TARGET_AAPCS_BASED) ++ { ++ /* Detect varargs functions. These always use the base rules ++ (no argument is ever a candidate for a co-processor ++ register). */ ++ bool base_rules = (TYPE_ARG_TYPES (type) != 0 ++ && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type))) ++ != void_type_node)); ++ ++ if (user_convention) ++ { ++ if (user_pcs > ARM_PCS_AAPCS_LOCAL) ++ sorry ("Non-AAPCS derived PCS variant"); ++ else if (base_rules && user_pcs != ARM_PCS_AAPCS) ++ error ("Variadic functions must use the base AAPCS variant"); ++ } ++ ++ if (base_rules) ++ return ARM_PCS_AAPCS; ++ else if (user_convention) ++ return user_pcs; ++ else if (decl && flag_unit_at_a_time) ++ { ++ /* Local functions never leak outside this compilation unit, ++ so we are free to use whatever conventions are ++ appropriate. */ ++ /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */ ++ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); ++ if (i && i->local) ++ return ARM_PCS_AAPCS_LOCAL; ++ } ++ } ++ else if (user_convention && user_pcs != arm_pcs_default) ++ sorry ("PCS variant"); ++ ++ /* For everything else we use the target's default. */ ++ return arm_pcs_default; ++} ++ ++ ++static void ++aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, ++ const_tree fntype ATTRIBUTE_UNUSED, ++ rtx libcall ATTRIBUTE_UNUSED, ++ const_tree fndecl ATTRIBUTE_UNUSED) ++{ ++ /* Record the unallocated VFP registers. */ ++ pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1; ++ pcum->aapcs_vfp_reg_alloc = 0; ++} ++ ++/* Walk down the type tree of TYPE counting consecutive base elements. ++ If *MODEP is VOIDmode, then set it to the first valid floating point ++ type. If a non-floating point type is found, or if a floating point ++ type that doesn't match a non-VOIDmode *MODEP is found, then return -1, ++ otherwise return the count in the sub-tree. */ ++static int ++aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) ++{ ++ enum machine_mode mode; ++ HOST_WIDE_INT size; ++ ++ switch (TREE_CODE (type)) ++ { ++ case REAL_TYPE: ++ mode = TYPE_MODE (type); ++ if (mode != DFmode && mode != SFmode) ++ return -1; ++ ++ if (*modep == VOIDmode) ++ *modep = mode; ++ ++ if (*modep == mode) ++ return 1; ++ ++ break; ++ ++ case COMPLEX_TYPE: ++ mode = TYPE_MODE (TREE_TYPE (type)); ++ if (mode != DFmode && mode != SFmode) ++ return -1; ++ ++ if (*modep == VOIDmode) ++ *modep = mode; ++ ++ if (*modep == mode) ++ return 2; ++ ++ break; ++ ++ case VECTOR_TYPE: ++ /* Use V2SImode and V4SImode as representatives of all 64-bit ++ and 128-bit vector types, whether or not those modes are ++ supported with the present options. */ ++ size = int_size_in_bytes (type); ++ switch (size) ++ { ++ case 8: ++ mode = V2SImode; ++ break; ++ case 16: ++ mode = V4SImode; ++ break; ++ default: ++ return -1; ++ } ++ ++ if (*modep == VOIDmode) ++ *modep = mode; ++ ++ /* Vector modes are considered to be opaque: two vectors are ++ equivalent for the purposes of being homogeneous aggregates ++ if they are the same size. */ ++ if (*modep == mode) ++ return 1; ++ ++ break; ++ ++ case ARRAY_TYPE: ++ { ++ int count; ++ tree index = TYPE_DOMAIN (type); ++ ++ /* Can't handle incomplete types. */ ++ if (!COMPLETE_TYPE_P(type)) ++ return -1; ++ ++ count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); ++ if (count == -1 ++ || !index ++ || !TYPE_MAX_VALUE (index) ++ || !host_integerp (TYPE_MAX_VALUE (index), 1) ++ || !TYPE_MIN_VALUE (index) ++ || !host_integerp (TYPE_MIN_VALUE (index), 1) ++ || count < 0) ++ return -1; ++ ++ count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1) ++ - tree_low_cst (TYPE_MIN_VALUE (index), 1)); ++ ++ /* There must be no padding. */ ++ if (!host_integerp (TYPE_SIZE (type), 1) ++ || (tree_low_cst (TYPE_SIZE (type), 1) ++ != count * GET_MODE_BITSIZE (*modep))) ++ return -1; ++ ++ return count; ++ } ++ ++ case RECORD_TYPE: ++ { ++ int count = 0; ++ int sub_count; ++ tree field; ++ ++ /* Can't handle incomplete types. */ ++ if (!COMPLETE_TYPE_P(type)) ++ return -1; ++ ++ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) ++ { ++ if (TREE_CODE (field) != FIELD_DECL) ++ continue; ++ ++ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); ++ if (sub_count < 0) ++ return -1; ++ count += sub_count; ++ } ++ ++ /* There must be no padding. */ ++ if (!host_integerp (TYPE_SIZE (type), 1) ++ || (tree_low_cst (TYPE_SIZE (type), 1) ++ != count * GET_MODE_BITSIZE (*modep))) ++ return -1; ++ ++ return count; ++ } ++ ++ case UNION_TYPE: ++ case QUAL_UNION_TYPE: ++ { ++ /* These aren't very interesting except in a degenerate case. */ ++ int count = 0; ++ int sub_count; ++ tree field; ++ ++ /* Can't handle incomplete types. */ ++ if (!COMPLETE_TYPE_P(type)) ++ return -1; ++ ++ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) ++ { ++ if (TREE_CODE (field) != FIELD_DECL) ++ continue; ++ ++ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); ++ if (sub_count < 0) ++ return -1; ++ count = count > sub_count ? count : sub_count; ++ } ++ ++ /* There must be no padding. */ ++ if (!host_integerp (TYPE_SIZE (type), 1) ++ || (tree_low_cst (TYPE_SIZE (type), 1) ++ != count * GET_MODE_BITSIZE (*modep))) ++ return -1; ++ ++ return count; ++ } ++ ++ default: ++ break; ++ } ++ ++ return -1; ++} ++ ++/* Return true if PCS_VARIANT should use VFP registers. */ ++static bool ++use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) ++{ ++ if (pcs_variant == ARM_PCS_AAPCS_VFP) ++ return true; ++ ++ if (pcs_variant != ARM_PCS_AAPCS_LOCAL) ++ return false; ++ ++ return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && ++ (TARGET_VFP_DOUBLE || !is_double)); ++} ++ ++static bool ++aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, ++ enum machine_mode mode, const_tree type, ++ int *base_mode, int *count) ++{ ++ enum machine_mode new_mode = VOIDmode; ++ ++ if (GET_MODE_CLASS (mode) == MODE_FLOAT ++ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT ++ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) ++ { ++ *count = 1; ++ new_mode = mode; ++ } ++ else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) ++ { ++ *count = 2; ++ new_mode = (mode == DCmode ? DFmode : SFmode); ++ } ++ else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE)) ++ { ++ int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); ++ ++ if (ag_count > 0 && ag_count <= 4) ++ *count = ag_count; ++ else ++ return false; ++ } ++ else ++ return false; ++ ++ ++ if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1)) ++ return false; ++ ++ *base_mode = new_mode; ++ return true; ++} ++ ++static bool ++aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, ++ enum machine_mode mode, const_tree type) ++{ ++ int count ATTRIBUTE_UNUSED; ++ int ag_mode ATTRIBUTE_UNUSED; ++ ++ if (!use_vfp_abi (pcs_variant, false)) ++ return false; ++ return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, ++ &ag_mode, &count); ++} ++ ++static bool ++aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, ++ const_tree type) ++{ ++ if (!use_vfp_abi (pcum->pcs_variant, false)) ++ return false; ++ ++ return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type, ++ &pcum->aapcs_vfp_rmode, ++ &pcum->aapcs_vfp_rcount); ++} ++ ++static bool ++aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, ++ const_tree type ATTRIBUTE_UNUSED) ++{ ++ int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode); ++ unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1; ++ int regno; ++ ++ for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift) ++ if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask) ++ { ++ pcum->aapcs_vfp_reg_alloc = mask << regno; ++ if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) ++ { ++ int i; ++ int rcount = pcum->aapcs_vfp_rcount; ++ int rshift = shift; ++ enum machine_mode rmode = pcum->aapcs_vfp_rmode; ++ rtx par; ++ if (!TARGET_NEON) ++ { ++ /* Avoid using unsupported vector modes. */ ++ if (rmode == V2SImode) ++ rmode = DImode; ++ else if (rmode == V4SImode) ++ { ++ rmode = DImode; ++ rcount *= 2; ++ rshift /= 2; ++ } ++ } ++ par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount)); ++ for (i = 0; i < rcount; i++) ++ { ++ rtx tmp = gen_rtx_REG (rmode, ++ FIRST_VFP_REGNUM + regno + i * rshift); ++ tmp = gen_rtx_EXPR_LIST ++ (VOIDmode, tmp, ++ GEN_INT (i * GET_MODE_SIZE (rmode))); ++ XVECEXP (par, 0, i) = tmp; ++ } ++ ++ pcum->aapcs_reg = par; ++ } ++ else ++ pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno); ++ return true; ++ } ++ return false; ++} ++ ++static rtx ++aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, ++ enum machine_mode mode, ++ const_tree type ATTRIBUTE_UNUSED) ++{ ++ if (!use_vfp_abi (pcs_variant, false)) ++ return false; ++ ++ if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) ++ { ++ int count; ++ int ag_mode; ++ int i; ++ rtx par; ++ int shift; ++ ++ aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, ++ &ag_mode, &count); ++ ++ if (!TARGET_NEON) ++ { ++ if (ag_mode == V2SImode) ++ ag_mode = DImode; ++ else if (ag_mode == V4SImode) ++ { ++ ag_mode = DImode; ++ count *= 2; ++ } ++ } ++ shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode); ++ par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); ++ for (i = 0; i < count; i++) ++ { ++ rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift); ++ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, ++ GEN_INT (i * GET_MODE_SIZE (ag_mode))); ++ XVECEXP (par, 0, i) = tmp; ++ } ++ ++ return par; ++ } ++ ++ return gen_rtx_REG (mode, FIRST_VFP_REGNUM); ++} ++ ++static void ++aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, ++ enum machine_mode mode ATTRIBUTE_UNUSED, ++ const_tree type ATTRIBUTE_UNUSED) ++{ ++ pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc; ++ pcum->aapcs_vfp_reg_alloc = 0; ++ return; ++} ++ ++#define AAPCS_CP(X) \ ++ { \ ++ aapcs_ ## X ## _cum_init, \ ++ aapcs_ ## X ## _is_call_candidate, \ ++ aapcs_ ## X ## _allocate, \ ++ aapcs_ ## X ## _is_return_candidate, \ ++ aapcs_ ## X ## _allocate_return_reg, \ ++ aapcs_ ## X ## _advance \ ++ } ++ ++/* Table of co-processors that can be used to pass arguments in ++ registers. Idealy no arugment should be a candidate for more than ++ one co-processor table entry, but the table is processed in order ++ and stops after the first match. If that entry then fails to put ++ the argument into a co-processor register, the argument will go on ++ the stack. */ ++static struct ++{ ++ /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */ ++ void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree); ++ ++ /* Return true if an argument of mode MODE (or type TYPE if MODE is ++ BLKmode) is a candidate for this co-processor's registers; this ++ function should ignore any position-dependent state in ++ CUMULATIVE_ARGS and only use call-type dependent information. */ ++ bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); ++ ++ /* Return true if the argument does get a co-processor register; it ++ should set aapcs_reg to an RTX of the register allocated as is ++ required for a return from FUNCTION_ARG. */ ++ bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); ++ ++ /* Return true if a result of mode MODE (or type TYPE if MODE is ++ BLKmode) is can be returned in this co-processor's registers. */ ++ bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree); ++ ++ /* Allocate and return an RTX element to hold the return type of a ++ call, this routine must not fail and will only be called if ++ is_return_candidate returned true with the same parameters. */ ++ rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree); ++ ++ /* Finish processing this argument and prepare to start processing ++ the next one. */ ++ void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); ++} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] = ++ { ++ AAPCS_CP(vfp) ++ }; ++ ++#undef AAPCS_CP ++ ++static int ++aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, ++ tree type) ++{ ++ int i; ++ ++ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) ++ if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) ++ return i; ++ ++ return -1; ++} ++ ++static int ++aapcs_select_return_coproc (const_tree type, const_tree fntype) ++{ ++ /* We aren't passed a decl, so we can't check that a call is local. ++ However, it isn't clear that that would be a win anyway, since it ++ might limit some tail-calling opportunities. */ ++ enum arm_pcs pcs_variant; ++ ++ if (fntype) ++ { ++ const_tree fndecl = NULL_TREE; ++ ++ if (TREE_CODE (fntype) == FUNCTION_DECL) ++ { ++ fndecl = fntype; ++ fntype = TREE_TYPE (fntype); ++ } ++ ++ pcs_variant = arm_get_pcs_model (fntype, fndecl); ++ } ++ else ++ pcs_variant = arm_pcs_default; ++ ++ if (pcs_variant != ARM_PCS_AAPCS) ++ { ++ int i; ++ ++ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) ++ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, ++ TYPE_MODE (type), ++ type)) ++ return i; ++ } ++ return -1; ++} ++ ++static rtx ++aapcs_allocate_return_reg (enum machine_mode mode, const_tree type, ++ const_tree fntype) ++{ ++ /* We aren't passed a decl, so we can't check that a call is local. ++ However, it isn't clear that that would be a win anyway, since it ++ might limit some tail-calling opportunities. */ ++ enum arm_pcs pcs_variant; ++ ++ if (fntype) ++ { ++ const_tree fndecl = NULL_TREE; ++ ++ if (TREE_CODE (fntype) == FUNCTION_DECL) ++ { ++ fndecl = fntype; ++ fntype = TREE_TYPE (fntype); ++ } ++ ++ pcs_variant = arm_get_pcs_model (fntype, fndecl); ++ } ++ else ++ pcs_variant = arm_pcs_default; ++ ++ /* Promote integer types. */ ++ if (type && INTEGRAL_TYPE_P (type)) ++ PROMOTE_FUNCTION_MODE (mode, unsignedp, type); ++ ++ if (pcs_variant != ARM_PCS_AAPCS) ++ { ++ int i; ++ ++ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) ++ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode, ++ type)) ++ return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant, ++ mode, type); ++ } ++ ++ /* Promotes small structs returned in a register to full-word size ++ for big-endian AAPCS. */ ++ if (type && arm_return_in_msb (type)) ++ { ++ HOST_WIDE_INT size = int_size_in_bytes (type); ++ if (size % UNITS_PER_WORD != 0) ++ { ++ size += UNITS_PER_WORD - size % UNITS_PER_WORD; ++ mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); ++ } ++ } ++ ++ return gen_rtx_REG (mode, R0_REGNUM); ++} ++ ++rtx ++aapcs_libcall_value (enum machine_mode mode) ++{ ++ return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE); ++} ++ ++/* Lay out a function argument using the AAPCS rules. The rule ++ numbers referred to here are those in the AAPCS. */ ++static void ++aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, ++ tree type, int named) ++{ ++ int nregs, nregs2; ++ int ncrn; ++ ++ /* We only need to do this once per argument. */ ++ if (pcum->aapcs_arg_processed) ++ return; ++ ++ pcum->aapcs_arg_processed = true; ++ ++ /* Special case: if named is false then we are handling an incoming ++ anonymous argument which is on the stack. */ ++ if (!named) ++ return; ++ ++ /* Is this a potential co-processor register candidate? */ ++ if (pcum->pcs_variant != ARM_PCS_AAPCS) ++ { ++ int slot = aapcs_select_call_coproc (pcum, mode, type); ++ pcum->aapcs_cprc_slot = slot; ++ ++ /* We don't have to apply any of the rules from part B of the ++ preparation phase, these are handled elsewhere in the ++ compiler. */ ++ ++ if (slot >= 0) ++ { ++ /* A Co-processor register candidate goes either in its own ++ class of registers or on the stack. */ ++ if (!pcum->aapcs_cprc_failed[slot]) ++ { ++ /* C1.cp - Try to allocate the argument to co-processor ++ registers. */ ++ if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type)) ++ return; ++ ++ /* C2.cp - Put the argument on the stack and note that we ++ can't assign any more candidates in this slot. We also ++ need to note that we have allocated stack space, so that ++ we won't later try to split a non-cprc candidate between ++ core registers and the stack. */ ++ pcum->aapcs_cprc_failed[slot] = true; ++ pcum->can_split = false; ++ } ++ ++ /* We didn't get a register, so this argument goes on the ++ stack. */ ++ gcc_assert (pcum->can_split == false); ++ return; ++ } ++ } ++ ++ /* C3 - For double-word aligned arguments, round the NCRN up to the ++ next even number. */ ++ ncrn = pcum->aapcs_ncrn; ++ if ((ncrn & 1) && arm_needs_doubleword_align (mode, type)) ++ ncrn++; ++ ++ nregs = ARM_NUM_REGS2(mode, type); ++ ++ /* Sigh, this test should really assert that nregs > 0, but a GCC ++ extension allows empty structs and then gives them empty size; it ++ then allows such a structure to be passed by value. For some of ++ the code below we have to pretend that such an argument has ++ non-zero size so that we 'locate' it correctly either in ++ registers or on the stack. */ ++ gcc_assert (nregs >= 0); ++ ++ nregs2 = nregs ? nregs : 1; ++ ++ /* C4 - Argument fits entirely in core registers. */ ++ if (ncrn + nregs2 <= NUM_ARG_REGS) ++ { ++ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); ++ pcum->aapcs_next_ncrn = ncrn + nregs; ++ return; ++ } ++ ++ /* C5 - Some core registers left and there are no arguments already ++ on the stack: split this argument between the remaining core ++ registers and the stack. */ ++ if (ncrn < NUM_ARG_REGS && pcum->can_split) ++ { ++ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); ++ pcum->aapcs_next_ncrn = NUM_ARG_REGS; ++ pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD; ++ return; ++ } ++ ++ /* C6 - NCRN is set to 4. */ ++ pcum->aapcs_next_ncrn = NUM_ARG_REGS; ++ ++ /* C7,C8 - arugment goes on the stack. We have nothing to do here. */ ++ return; ++} ++ + /* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is NULL. */ + void + arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, +- rtx libname ATTRIBUTE_UNUSED, ++ rtx libname, + tree fndecl ATTRIBUTE_UNUSED) + { ++ /* Long call handling. */ ++ if (fntype) ++ pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl); ++ else ++ pcum->pcs_variant = arm_pcs_default; ++ ++ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) ++ { ++ /* XXX We should also detect some library calls here and handle ++ them using the base rules too; for example the floating point ++ support functions always work this way. */ ++ ++ if (rtx_equal_p (libname, ++ convert_optab_libfunc (sfix_optab, DImode, DFmode)) ++ || rtx_equal_p (libname, ++ convert_optab_libfunc (ufix_optab, DImode, DFmode)) ++ || rtx_equal_p (libname, ++ convert_optab_libfunc (sfix_optab, DImode, SFmode)) ++ || rtx_equal_p (libname, ++ convert_optab_libfunc (ufix_optab, DImode, SFmode)) ++ || rtx_equal_p (libname, ++ convert_optab_libfunc (trunc_optab, HFmode, SFmode)) ++ || rtx_equal_p (libname, ++ convert_optab_libfunc (sext_optab, SFmode, HFmode))) ++ pcum->pcs_variant = ARM_PCS_AAPCS; ++ ++ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0; ++ pcum->aapcs_reg = NULL_RTX; ++ pcum->aapcs_partial = 0; ++ pcum->aapcs_arg_processed = false; ++ pcum->aapcs_cprc_slot = -1; ++ pcum->can_split = true; ++ ++ if (pcum->pcs_variant != ARM_PCS_AAPCS) ++ { ++ int i; ++ ++ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) ++ { ++ pcum->aapcs_cprc_failed[i] = false; ++ aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl); ++ } ++ } ++ return; ++ } ++ ++ /* Legacy ABIs */ ++ + /* On the ARM, the offset starts at 0. */ + pcum->nregs = 0; + pcum->iwmmxt_nregs = 0; +@@ -3120,6 +4189,17 @@ + { + int nregs; + ++ /* Handle the special case quickly. Pick an arbitrary value for op2 of ++ a call insn (op3 of a call_value insn). */ ++ if (mode == VOIDmode) ++ return const0_rtx; ++ ++ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) ++ { ++ aapcs_layout_arg (pcum, mode, type, named); ++ return pcum->aapcs_reg; ++ } ++ + /* Varargs vectors are treated the same as long long. + named_count avoids having to change the way arm handles 'named' */ + if (TARGET_IWMMXT_ABI +@@ -3161,10 +4241,16 @@ + + static int + arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, +- tree type, bool named ATTRIBUTE_UNUSED) ++ tree type, bool named) + { + int nregs = pcum->nregs; + ++ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) ++ { ++ aapcs_layout_arg (pcum, mode, type, named); ++ return pcum->aapcs_partial; ++ } ++ + if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode)) + return 0; + +@@ -3173,7 +4259,40 @@ + && pcum->can_split) + return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; + +- return 0; ++ return 0; ++} ++ ++void ++arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode, ++ tree type, bool named) ++{ ++ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) ++ { ++ aapcs_layout_arg (pcum, mode, type, named); ++ ++ if (pcum->aapcs_cprc_slot >= 0) ++ { ++ aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode, ++ type); ++ pcum->aapcs_cprc_slot = -1; ++ } ++ ++ /* Generic stuff. */ ++ pcum->aapcs_arg_processed = false; ++ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn; ++ pcum->aapcs_reg = NULL_RTX; ++ pcum->aapcs_partial = 0; ++ } ++ else ++ { ++ pcum->nargs += 1; ++ if (arm_vector_mode_supported_p (mode) ++ && pcum->named_count > pcum->nargs ++ && TARGET_IWMMXT_ABI) ++ pcum->iwmmxt_nregs += 1; ++ else ++ pcum->nregs += ARM_NUM_REGS2 (mode, type); ++ } + } + + /* Variable sized types are passed by reference. This is a GCC +@@ -3226,6 +4345,8 @@ + /* Whereas these functions are always known to reside within the 26 bit + addressing range. */ + { "short_call", 0, 0, false, true, true, NULL }, ++ /* Specify the procedure call conventions for a function. */ ++ { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute }, + /* Interrupt Service Routines have special prologue and epilogue requirements. */ + { "isr", 0, 1, false, false, false, arm_handle_isr_attribute }, + { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute }, +@@ -3328,6 +4449,21 @@ + return NULL_TREE; + } + ++/* Handle a "pcs" attribute; arguments as in struct ++ attribute_spec.handler. */ ++static tree ++arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, ++ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) ++{ ++ if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN) ++ { ++ warning (OPT_Wattributes, "%qs attribute ignored", ++ IDENTIFIER_POINTER (name)); ++ *no_add_attrs = true; ++ } ++ return NULL_TREE; ++} ++ + #if TARGET_DLLIMPORT_DECL_ATTRIBUTES + /* Handle the "notshared" attribute. This attribute is another way of + requesting hidden visibility. ARM's compiler supports +@@ -3489,7 +4625,7 @@ + + /* Return nonzero if it is ok to make a tail-call to DECL. */ + static bool +-arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) ++arm_function_ok_for_sibcall (tree decl, tree exp) + { + unsigned long func_type; + +@@ -3522,6 +4658,21 @@ + if (IS_INTERRUPT (func_type)) + return false; + ++ if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) ++ { ++ /* Check that the return value locations are the same. For ++ example that we aren't returning a value from the sibling in ++ a VFP register but then need to transfer it to a core ++ register. */ ++ rtx a, b; ++ ++ a = arm_function_value (TREE_TYPE (exp), decl, false); ++ b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), ++ cfun->decl, false); ++ if (!rtx_equal_p (a, b)) ++ return false; ++ } ++ + /* Never tailcall if function may be called with a misaligned SP. */ + if (IS_STACKALIGN (func_type)) + return false; +@@ -4120,6 +5271,7 @@ + if (GET_MODE_SIZE (mode) <= 4 + && ! (arm_arch4 + && (mode == HImode ++ || mode == HFmode + || (mode == QImode && outer == SIGN_EXTEND)))) + { + if (code == MULT) +@@ -4148,13 +5300,15 @@ + load. */ + if (arm_arch4) + { +- if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode)) ++ if (mode == HImode ++ || mode == HFmode ++ || (outer == SIGN_EXTEND && mode == QImode)) + range = 256; + else + range = 4096; + } + else +- range = (mode == HImode) ? 4095 : 4096; ++ range = (mode == HImode || mode == HFmode) ? 4095 : 4096; + + return (code == CONST_INT + && INTVAL (index) < range +@@ -4325,7 +5479,8 @@ + return 1; + + /* This is PC relative data after arm_reorg runs. */ +- else if (GET_MODE_SIZE (mode) >= 4 && reload_completed ++ else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) ++ && reload_completed + && (GET_CODE (x) == LABEL_REF + || (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS +@@ -5024,7 +6179,7 @@ + case UMOD: + if (TARGET_HARD_FLOAT && mode == SFmode) + *total = COSTS_N_INSNS (2); +- else if (TARGET_HARD_FLOAT && mode == DFmode) ++ else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE) + *total = COSTS_N_INSNS (4); + else + *total = COSTS_N_INSNS (20); +@@ -5063,23 +6218,6 @@ + return true; + + case MINUS: +- if (TARGET_THUMB2) +- { +- if (GET_MODE_CLASS (mode) == MODE_FLOAT) +- { +- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) +- *total = COSTS_N_INSNS (1); +- else +- *total = COSTS_N_INSNS (20); +- } +- else +- *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); +- /* Thumb2 does not have RSB, so all arguments must be +- registers (subtracting a constant is canonicalized as +- addition of the negated constant). */ +- return false; +- } +- + if (mode == DImode) + { + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); +@@ -5102,7 +6240,9 @@ + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { +- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) ++ if (TARGET_HARD_FLOAT ++ && (mode == SFmode ++ || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE +@@ -5143,6 +6283,17 @@ + return true; + } + ++ /* A shift as a part of RSB costs no more than RSB itself. */ ++ if (GET_CODE (XEXP (x, 0)) == MULT ++ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT ++ && ((INTVAL (XEXP (XEXP (x, 0), 1)) ++ & (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)) ++ { ++ *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed); ++ *total += rtx_cost (XEXP (x, 1), code, speed); ++ return true; ++ } ++ + if (subcode == MULT + && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT + && ((INTVAL (XEXP (XEXP (x, 1), 1)) & +@@ -5164,6 +6315,19 @@ + return true; + } + ++ /* MLS is just as expensive as its underlying multiplication. ++ Exclude a shift by a constant, which is expressed as a ++ multiplication. */ ++ if (TARGET_32BIT && arm_arch_thumb2 ++ && GET_CODE (XEXP (x, 1)) == MULT ++ && ! (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT ++ && ((INTVAL (XEXP (XEXP (x, 1), 1)) & ++ (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0))) ++ { ++ /* The cost comes from the cost of the multiply. */ ++ return false; ++ } ++ + /* Fall through */ + + case PLUS: +@@ -5192,7 +6356,9 @@ + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { +- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) ++ if (TARGET_HARD_FLOAT ++ && (mode == SFmode ++ || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE +@@ -5307,7 +6473,9 @@ + case NEG: + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { +- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) ++ if (TARGET_HARD_FLOAT ++ && (mode == SFmode ++ || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; +@@ -5460,7 +6628,9 @@ + case ABS: + if (GET_MODE_CLASS (mode == MODE_FLOAT)) + { +- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) ++ if (TARGET_HARD_FLOAT ++ && (mode == SFmode ++ || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; +@@ -5563,7 +6733,8 @@ + return true; + + case CONST_DOUBLE: +- if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)) ++ if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x) ++ && (mode == SFmode || !TARGET_VFP_SINGLE)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); +@@ -5638,7 +6809,8 @@ + return false; + + case MINUS: +- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) ++ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT ++ && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *total = COSTS_N_INSNS (1); + return false; +@@ -5668,7 +6840,8 @@ + return false; + + case PLUS: +- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) ++ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT ++ && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *total = COSTS_N_INSNS (1); + return false; +@@ -5698,7 +6871,8 @@ + return false; + + case NEG: +- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) ++ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT ++ && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *total = COSTS_N_INSNS (1); + return false; +@@ -5722,7 +6896,8 @@ + return false; + + case ABS: +- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) ++ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT ++ && (mode == SFmode || !TARGET_VFP_SINGLE)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); +@@ -5939,7 +7114,9 @@ + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { +- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) ++ if (TARGET_HARD_FLOAT ++ && (mode == SFmode ++ || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; +@@ -6096,7 +7273,9 @@ + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { +- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) ++ if (TARGET_HARD_FLOAT ++ && (mode == SFmode ++ || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; +@@ -6919,10 +8098,13 @@ + } + + /* Return TRUE if OP is a memory operand which we can load or store a vector +- to/from. If CORE is true, we're moving from ARM registers not Neon +- registers. */ ++ to/from. TYPE is one of the following values: ++ 0 - Vector load/stor (vldr) ++ 1 - Core registers (ldm) ++ 2 - Element/structure loads (vld1) ++ */ + int +-neon_vector_mem_operand (rtx op, bool core) ++neon_vector_mem_operand (rtx op, int type) + { + rtx ind; + +@@ -6955,23 +8137,16 @@ + return arm_address_register_rtx_p (ind, 0); + + /* Allow post-increment with Neon registers. */ +- if (!core && GET_CODE (ind) == POST_INC) ++ if ((type != 1 && GET_CODE (ind) == POST_INC) ++ || (type == 0 && GET_CODE (ind) == PRE_DEC)) + return arm_address_register_rtx_p (XEXP (ind, 0), 0); + +-#if 0 +- /* FIXME: We can support this too if we use VLD1/VST1. */ +- if (!core +- && GET_CODE (ind) == POST_MODIFY +- && arm_address_register_rtx_p (XEXP (ind, 0), 0) +- && GET_CODE (XEXP (ind, 1)) == PLUS +- && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0))) +- ind = XEXP (ind, 1); +-#endif ++ /* FIXME: vld1 allows register post-modify. */ + + /* Match: + (plus (reg) + (const)). */ +- if (!core ++ if (type == 0 + && GET_CODE (ind) == PLUS + && GET_CODE (XEXP (ind, 0)) == REG + && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) +@@ -7038,10 +8213,19 @@ + enum reg_class + coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) + { ++ if (mode == HFmode) ++ { ++ if (!TARGET_NEON_FP16) ++ return GENERAL_REGS; ++ if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2)) ++ return NO_REGS; ++ return GENERAL_REGS; ++ } ++ + if (TARGET_NEON + && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) +- && neon_vector_mem_operand (x, FALSE)) ++ && neon_vector_mem_operand (x, 0)) + return NO_REGS; + + if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode)) +@@ -7438,6 +8622,9 @@ + int base_reg = -1; + int i; + ++ if (low_irq_latency) ++ return 0; ++ + /* Can only handle 2, 3, or 4 insns at present, + though could be easily extended if required. */ + gcc_assert (nops >= 2 && nops <= 4); +@@ -7667,6 +8854,9 @@ + int base_reg = -1; + int i; + ++ if (low_irq_latency) ++ return 0; ++ + /* Can only handle 2, 3, or 4 insns at present, though could be easily + extended if required. */ + gcc_assert (nops >= 2 && nops <= 4); +@@ -7874,7 +9064,7 @@ + + As a compromise, we use ldr for counts of 1 or 2 regs, and ldm + for counts of 3 or 4 regs. */ +- if (arm_tune_xscale && count <= 2 && ! optimize_size) ++ if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size)) + { + rtx seq; + +@@ -7937,7 +9127,7 @@ + + /* See arm_gen_load_multiple for discussion of + the pros/cons of ldm/stm usage for XScale. */ +- if (arm_tune_xscale && count <= 2 && ! optimize_size) ++ if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size)) + { + rtx seq; + +@@ -9555,7 +10745,10 @@ + gcc_assert (GET_CODE (from) != BARRIER); + + /* Count the length of this insn. */ +- count += get_attr_length (from); ++ if (LABEL_P (from) && (align_jumps > 0 || align_loops > 0)) ++ count += MAX (align_jumps, align_loops); ++ else ++ count += get_attr_length (from); + + /* If there is a jump table, add its length. */ + tmp = is_jump_table (from); +@@ -9867,6 +11060,8 @@ + insn = table; + } + } ++ else if (LABEL_P (insn) && (align_jumps > 0 || align_loops > 0)) ++ address += MAX (align_jumps, align_loops); + } + + fix = minipool_fix_head; +@@ -10072,6 +11267,21 @@ + vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count) + { + int i; ++ int offset; ++ ++ if (low_irq_latency) ++ { ++ /* Output a sequence of FLDD instructions. */ ++ offset = 0; ++ for (i = reg; i < reg + count; ++i, offset += 8) ++ { ++ fputc ('\t', stream); ++ asm_fprintf (stream, "fldd\td%d, [%r,#%d]\n", i, base, offset); ++ } ++ asm_fprintf (stream, "\tadd\tsp, sp, #%d\n", count * 8); ++ return; ++ } ++ + + /* Workaround ARM10 VFPr1 bug. */ + if (count == 2 && !arm_arch6) +@@ -10142,6 +11352,56 @@ + rtx tmp, reg; + int i; + ++ if (low_irq_latency) ++ { ++ int saved_size; ++ rtx sp_insn; ++ ++ if (!count) ++ return 0; ++ ++ saved_size = count * GET_MODE_SIZE (DFmode); ++ ++ /* Since fstd does not have postdecrement addressing mode, ++ we first decrement stack pointer and then use base+offset ++ stores for VFP registers. The ARM EABI unwind information ++ can't easily describe base+offset loads, so we attach ++ a note for the effects of the whole block in the first insn, ++ and avoid marking the subsequent instructions ++ with RTX_FRAME_RELATED_P. */ ++ sp_insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-saved_size)); ++ sp_insn = emit_insn (sp_insn); ++ RTX_FRAME_RELATED_P (sp_insn) = 1; ++ ++ dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1)); ++ XVECEXP (dwarf, 0, 0) = ++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, ++ plus_constant (stack_pointer_rtx, -saved_size)); ++ ++ /* push double VFP registers to stack */ ++ for (i = 0; i < count; ++i ) ++ { ++ rtx reg; ++ rtx mem; ++ rtx addr; ++ rtx insn; ++ reg = gen_rtx_REG (DFmode, base_reg + 2*i); ++ addr = (i == 0) ? stack_pointer_rtx ++ : gen_rtx_PLUS (SImode, stack_pointer_rtx, ++ GEN_INT (i * GET_MODE_SIZE (DFmode))); ++ mem = gen_frame_mem (DFmode, addr); ++ insn = emit_move_insn (mem, reg); ++ XVECEXP (dwarf, 0, i+1) = ++ gen_rtx_SET (VOIDmode, mem, reg); ++ } ++ ++ REG_NOTES (sp_insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf, ++ REG_NOTES (sp_insn)); ++ ++ return saved_size; ++ } ++ + /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two + register pairs are stored by a store multiple insn. We avoid this + by pushing an extra pair. */ +@@ -10758,7 +12018,7 @@ + } + + /* Output a move, load or store for quad-word vectors in ARM registers. Only +- handles MEMs accepted by neon_vector_mem_operand with CORE=true. */ ++ handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */ + + const char * + output_move_quad (rtx *operands) +@@ -10954,6 +12214,12 @@ + ops[1] = reg; + break; + ++ case PRE_DEC: ++ templ = "v%smdb%%?\t%%0!, %%h1"; ++ ops[0] = XEXP (addr, 0); ++ ops[1] = reg; ++ break; ++ + case POST_MODIFY: + /* FIXME: Not currently enabled in neon_vector_mem_operand. */ + gcc_unreachable (); +@@ -10968,7 +12234,7 @@ + { + /* We're only using DImode here because it's a convenient size. */ + ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i); +- ops[1] = adjust_address (mem, SImode, 8 * i); ++ ops[1] = adjust_address (mem, DImode, 8 * i); + if (reg_overlap_mentioned_p (ops[0], mem)) + { + gcc_assert (overlap == -1); +@@ -11557,7 +12823,7 @@ + if (count > 0) + { + /* Workaround ARM10 VFPr1 bug. */ +- if (count == 2 && !arm_arch6) ++ if (count == 2 && !arm_arch6 && !low_irq_latency) + count++; + saved += count * 8; + } +@@ -11886,6 +13152,41 @@ + return_used_this_function = 0; + } + ++/* Generate to STREAM a code sequence that pops registers identified ++ in REGS_MASK from SP. SP is incremented as the result. ++*/ ++static void ++print_pop_reg_by_ldr (FILE *stream, int regs_mask, int rfe) ++{ ++ int reg; ++ ++ gcc_assert (! (regs_mask & (1 << SP_REGNUM))); ++ ++ for (reg = 0; reg < PC_REGNUM; ++reg) ++ if (regs_mask & (1 << reg)) ++ asm_fprintf (stream, "\tldr\t%r, [%r], #4\n", ++ reg, SP_REGNUM); ++ ++ if (regs_mask & (1 << PC_REGNUM)) ++ { ++ if (rfe) ++ /* When returning from exception, we need to ++ copy SPSR to CPSR. There are two ways to do ++ that: the ldm instruction with "^" suffix, ++ and movs instruction. The latter would ++ require that we load from stack to some ++ scratch register, and then move to PC. ++ Therefore, we'd need extra instruction and ++ have to make sure we actually have a spare ++ register. Using ldm with a single register ++ is simler. */ ++ asm_fprintf (stream, "\tldm\tsp!, {pc}^\n"); ++ else ++ asm_fprintf (stream, "\tldr\t%r, [%r], #4\n", ++ PC_REGNUM, SP_REGNUM); ++ } ++} ++ + const char * + arm_output_epilogue (rtx sibling) + { +@@ -11946,7 +13247,7 @@ + /* This variable is for the Virtual Frame Pointer, not VFP regs. */ + int vfp_offset = offsets->frame; + +- if (arm_fpu_arch == FPUTYPE_FPA_EMU2) ++ if (TARGET_FPA_EMU2) + { + for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) +@@ -12169,7 +13470,7 @@ + SP_REGNUM, HARD_FRAME_POINTER_REGNUM); + } + +- if (arm_fpu_arch == FPUTYPE_FPA_EMU2) ++ if (TARGET_FPA_EMU2) + { + for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) +@@ -12253,22 +13554,19 @@ + to load use the LDR instruction - it is faster. For Thumb-2 + always use pop and the assembler will pick the best instruction.*/ + if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM) +- && !IS_INTERRUPT(func_type)) ++ && !IS_INTERRUPT (func_type)) + { + asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM); + } + else if (saved_regs_mask) + { +- if (saved_regs_mask & (1 << SP_REGNUM)) +- /* Note - write back to the stack register is not enabled +- (i.e. "ldmfd sp!..."). We know that the stack pointer is +- in the list of registers and if we add writeback the +- instruction becomes UNPREDICTABLE. */ +- print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, +- rfe); +- else if (TARGET_ARM) +- print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask, +- rfe); ++ gcc_assert ( ! (saved_regs_mask & (1 << SP_REGNUM))); ++ if (TARGET_ARM) ++ if (low_irq_latency) ++ print_pop_reg_by_ldr (f, saved_regs_mask, rfe); ++ else ++ print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask, ++ rfe); + else + print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0); + } +@@ -12389,6 +13687,32 @@ + + gcc_assert (num_regs && num_regs <= 16); + ++ if (low_irq_latency) ++ { ++ rtx insn = 0; ++ ++ /* Emit a series of ldr instructions rather rather than a single ldm. */ ++ /* TODO: Use ldrd where possible. */ ++ gcc_assert (! (mask & (1 << SP_REGNUM))); ++ ++ for (i = LAST_ARM_REGNUM; i >= 0; --i) ++ { ++ if (mask & (1 << i)) ++ ++ { ++ rtx reg, where, mem; ++ ++ reg = gen_rtx_REG (SImode, i); ++ where = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx); ++ mem = gen_rtx_MEM (SImode, where); ++ insn = emit_move_insn (mem, reg); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ } ++ ++ return insn; ++ } ++ + /* We don't record the PC in the dwarf frame information. */ + num_dwarf_regs = num_regs; + if (mask & (1 << PC_REGNUM)) +@@ -12737,22 +14061,23 @@ + { + int reg = -1; + +- for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) +- { +- if ((offsets->saved_regs_mask & (1 << i)) == 0) +- { +- reg = i; +- break; +- } +- } +- +- if (reg == -1 && arm_size_return_regs () <= 12 +- && !crtl->tail_call_emit) ++ /* If it is safe to use r3, then do so. This sometimes ++ generates better code on Thumb-2 by avoiding the need to ++ use 32-bit push/pop instructions. */ ++ if (!crtl->tail_call_emit ++ && arm_size_return_regs () <= 12) + { +- /* Push/pop an argument register (r3) if all callee saved +- registers are already being pushed. */ + reg = 3; + } ++ else ++ for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) ++ { ++ if ((offsets->saved_regs_mask & (1 << i)) == 0) ++ { ++ reg = i; ++ break; ++ } ++ } + + if (reg != -1) + { +@@ -12876,7 +14201,7 @@ + + /* Save any floating point call-saved registers used by this + function. */ +- if (arm_fpu_arch == FPUTYPE_FPA_EMU2) ++ if (TARGET_FPA_EMU2) + { + for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) +@@ -13483,7 +14808,11 @@ + { + fprintf (stream, ", %s ", shift); + if (val == -1) +- arm_print_operand (stream, XEXP (x, 1), 0); ++ { ++ arm_print_operand (stream, XEXP (x, 1), 0); ++ if (janus2_code) ++ fprintf(stream, "\n\tnop"); ++ } + else + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val); + } +@@ -13704,6 +15033,30 @@ + } + return; + ++ /* Print the high single-precision register of a VFP double-precision ++ register. */ ++ case 'p': ++ { ++ int mode = GET_MODE (x); ++ int regno; ++ ++ if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG) ++ { ++ output_operand_lossage ("invalid operand for code '%c'", code); ++ return; ++ } ++ ++ regno = REGNO (x); ++ if (!VFP_REGNO_OK_FOR_DOUBLE (regno)) ++ { ++ output_operand_lossage ("invalid operand for code '%c'", code); ++ return; ++ } ++ ++ fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1); ++ } ++ return; ++ + /* Print a VFP/Neon double precision or quad precision register name. */ + case 'P': + case 'q': +@@ -13821,6 +15174,57 @@ + } + return; + ++ /* Memory operand for vld1/vst1 instruction. */ ++ case 'A': ++ { ++ rtx addr; ++ bool postinc = FALSE; ++ unsigned align; ++ ++ gcc_assert (GET_CODE (x) == MEM); ++ addr = XEXP (x, 0); ++ if (GET_CODE (addr) == POST_INC) ++ { ++ postinc = 1; ++ addr = XEXP (addr, 0); ++ } ++ align = MEM_ALIGN (x) >> 3; ++ asm_fprintf (stream, "[%r", REGNO (addr)); ++ if (align > GET_MODE_SIZE (GET_MODE (x))) ++ align = GET_MODE_SIZE (GET_MODE (x)); ++ if (align >= 8) ++ asm_fprintf (stream, ", :%d", align << 3); ++ asm_fprintf (stream, "]"); ++ if (postinc) ++ fputs("!", stream); ++ } ++ return; ++ ++ /* Register specifier for vld1.16/vst1.16. Translate the S register ++ number into a D register number and element index. */ ++ case 'z': ++ { ++ int mode = GET_MODE (x); ++ int regno; ++ ++ if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG) ++ { ++ output_operand_lossage ("invalid operand for code '%c'", code); ++ return; ++ } ++ ++ regno = REGNO (x); ++ if (!VFP_REGNO_OK_FOR_SINGLE (regno)) ++ { ++ output_operand_lossage ("invalid operand for code '%c'", code); ++ return; ++ } ++ ++ regno = regno - FIRST_VFP_REGNUM; ++ fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0)); ++ } ++ return; ++ + default: + if (x == 0) + { +@@ -13854,6 +15258,12 @@ + default: + gcc_assert (GET_CODE (x) != NEG); + fputc ('#', stream); ++ if (GET_CODE (x) == HIGH) ++ { ++ fputs (":lower16:", stream); ++ x = XEXP (x, 0); ++ } ++ + output_addr_const (stream, x); + break; + } +@@ -14245,6 +15655,10 @@ + first insn after the following code_label if REVERSE is true. */ + rtx start_insn = insn; + ++ /* Don't do this if we're not considering conditional execution. */ ++ if (TARGET_NO_SINGLE_COND_EXEC) ++ return; ++ + /* If in state 4, check if the target branch is reached, in order to + change back to state 0. */ + if (arm_ccfsm_state == 4) +@@ -14618,6 +16032,11 @@ + if (mode == DFmode) + return VFP_REGNO_OK_FOR_DOUBLE (regno); + ++ /* VFP registers can hold HFmode values, but there is no point in ++ putting them there unless we have hardware conversion insns. */ ++ if (mode == HFmode) ++ return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno); ++ + if (TARGET_NEON) + return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) + || (VALID_NEON_QREG_MODE (mode) +@@ -14637,16 +16056,16 @@ + return mode == SImode; + + if (IS_IWMMXT_REGNUM (regno)) +- return VALID_IWMMXT_REG_MODE (mode); ++ return VALID_IWMMXT_REG_MODE (mode) && mode != SImode; + } + +- /* We allow any value to be stored in the general registers. ++ /* We allow almost any value to be stored in the general registers. + Restrict doubleword quantities to even register pairs so that we can +- use ldrd. Do not allow Neon structure opaque modes in general registers; +- they would use too many. */ ++ use ldrd. Do not allow very large Neon structure opaque modes in ++ general registers; they would use too many. */ + if (regno <= LAST_ARM_REGNUM) + return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) +- && !VALID_NEON_STRUCT_MODE (mode); ++ && ARM_NUM_REGS (mode) <= 4; + + if (regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM) +@@ -16103,6 +17522,15 @@ + } + + static void ++arm_init_fp16_builtins (void) ++{ ++ tree fp16_type = make_node (REAL_TYPE); ++ TYPE_PRECISION (fp16_type) = 16; ++ layout_type (fp16_type); ++ (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); ++} ++ ++static void + arm_init_builtins (void) + { + arm_init_tls_builtins (); +@@ -16112,6 +17540,71 @@ + + if (TARGET_NEON) + arm_init_neon_builtins (); ++ ++ if (arm_fp16_format) ++ arm_init_fp16_builtins (); ++} ++ ++/* Implement TARGET_INVALID_PARAMETER_TYPE. */ ++ ++static const char * ++arm_invalid_parameter_type (const_tree t) ++{ ++ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) ++ return N_("function parameters cannot have __fp16 type"); ++ return NULL; ++} ++ ++/* Implement TARGET_INVALID_PARAMETER_TYPE. */ ++ ++static const char * ++arm_invalid_return_type (const_tree t) ++{ ++ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) ++ return N_("functions cannot return __fp16 type"); ++ return NULL; ++} ++ ++/* Implement TARGET_PROMOTED_TYPE. */ ++ ++static tree ++arm_promoted_type (const_tree t) ++{ ++ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) ++ return float_type_node; ++ return NULL_TREE; ++} ++ ++/* Implement TARGET_CONVERT_TO_TYPE. ++ Specifically, this hook implements the peculiarity of the ARM ++ half-precision floating-point C semantics that requires conversions between ++ __fp16 to or from double to do an intermediate conversion to float. */ ++ ++static tree ++arm_convert_to_type (tree type, tree expr) ++{ ++ tree fromtype = TREE_TYPE (expr); ++ if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type)) ++ return NULL_TREE; ++ if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32) ++ || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32)) ++ return convert (type, convert (float_type_node, expr)); ++ return NULL_TREE; ++} ++ ++/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. ++ This simply adds HFmode as a supported mode; even though we don't ++ implement arithmetic on this type directly, it's supported by ++ optabs conversions, much the way the double-word arithmetic is ++ special-cased in the default hook. */ ++ ++static bool ++arm_scalar_mode_supported_p (enum machine_mode mode) ++{ ++ if (mode == HFmode) ++ return (arm_fp16_format != ARM_FP16_FORMAT_NONE); ++ else ++ return default_scalar_mode_supported_p (mode); + } + + /* Errors in the source file can cause expand_expr to return const0_rtx +@@ -17191,6 +18684,7 @@ + unsigned HOST_WIDE_INT mask = 0xff; + int i; + ++ val = val & (unsigned HOST_WIDE_INT)0xffffffffu; + if (val == 0) /* XXX */ + return 0; + +@@ -18279,40 +19773,8 @@ + else + { + int set_float_abi_attributes = 0; +- switch (arm_fpu_arch) +- { +- case FPUTYPE_FPA: +- fpu_name = "fpa"; +- break; +- case FPUTYPE_FPA_EMU2: +- fpu_name = "fpe2"; +- break; +- case FPUTYPE_FPA_EMU3: +- fpu_name = "fpe3"; +- break; +- case FPUTYPE_MAVERICK: +- fpu_name = "maverick"; +- break; +- case FPUTYPE_VFP: +- fpu_name = "vfp"; +- set_float_abi_attributes = 1; +- break; +- case FPUTYPE_VFP3D16: +- fpu_name = "vfpv3-d16"; +- set_float_abi_attributes = 1; +- break; +- case FPUTYPE_VFP3: +- fpu_name = "vfpv3"; +- set_float_abi_attributes = 1; +- break; +- case FPUTYPE_NEON: +- fpu_name = "neon"; +- set_float_abi_attributes = 1; +- break; +- default: +- abort(); +- } +- if (set_float_abi_attributes) ++ fpu_name = arm_fpu_desc->name; ++ if (arm_fp_model == ARM_FP_MODEL_VFP) + { + if (TARGET_HARD_FLOAT) + asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n"); +@@ -18362,6 +19824,11 @@ + val = 6; + asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); + ++ /* Tag_ABI_FP_16bit_format. */ ++ if (arm_fp16_format) ++ asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", ++ (int)arm_fp16_format); ++ + if (arm_lang_output_object_attributes_hook) + arm_lang_output_object_attributes_hook(); + } +@@ -18591,6 +20058,23 @@ + return 1; + } + ++/* Emit a fp16 constant appropriately padded to occupy a 4-byte word. ++ HFmode constant pool entries are actually loaded with ldr. */ ++void ++arm_emit_fp16_const (rtx c) ++{ ++ REAL_VALUE_TYPE r; ++ long bits; ++ ++ REAL_VALUE_FROM_CONST_DOUBLE (r, c); ++ bits = real_to_target (NULL, &r, HFmode); ++ if (WORDS_BIG_ENDIAN) ++ assemble_zeros (2); ++ assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1); ++ if (!WORDS_BIG_ENDIAN) ++ assemble_zeros (2); ++} ++ + const char * + arm_output_load_gr (rtx *operands) + { +@@ -18628,19 +20112,24 @@ + that way. */ + + static void +-arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum, ++arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum, + enum machine_mode mode, + tree type, + int *pretend_size, + int second_time ATTRIBUTE_UNUSED) + { +- int nregs = cum->nregs; +- if (nregs & 1 +- && ARM_DOUBLEWORD_ALIGN +- && arm_needs_doubleword_align (mode, type)) +- nregs++; +- ++ int nregs; ++ + cfun->machine->uses_anonymous_args = 1; ++ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) ++ { ++ nregs = pcum->aapcs_ncrn; ++ if ((nregs & 1) && arm_needs_doubleword_align (mode, type)) ++ nregs++; ++ } ++ else ++ nregs = pcum->nregs; ++ + if (nregs < NUM_ARG_REGS) + *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; + } +@@ -19024,9 +20513,10 @@ + || mode == V16QImode || mode == V4SFmode || mode == V2DImode)) + return true; + +- if ((mode == V2SImode) +- || (mode == V4HImode) +- || (mode == V8QImode)) ++ if ((TARGET_NEON || TARGET_IWMMXT) ++ && ((mode == V2SImode) ++ || (mode == V4HImode) ++ || (mode == V8QImode))) + return true; + + return false; +@@ -19057,9 +20547,14 @@ + if (IS_FPA_REGNUM (regno)) + return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM; + +- /* FIXME: VFPv3 register numbering. */ + if (IS_VFP_REGNUM (regno)) +- return 64 + regno - FIRST_VFP_REGNUM; ++ { ++ /* See comment in arm_dwarf_register_span. */ ++ if (VFP_REGNO_OK_FOR_SINGLE (regno)) ++ return 64 + regno - FIRST_VFP_REGNUM; ++ else ++ return 256 + (regno - FIRST_VFP_REGNUM) / 2; ++ } + + if (IS_IWMMXT_GR_REGNUM (regno)) + return 104 + regno - FIRST_IWMMXT_GR_REGNUM; +@@ -19070,6 +20565,39 @@ + gcc_unreachable (); + } + ++/* Dwarf models VFPv3 registers as 32 64-bit registers. ++ GCC models tham as 64 32-bit registers, so we need to describe this to ++ the DWARF generation code. Other registers can use the default. */ ++static rtx ++arm_dwarf_register_span(rtx rtl) ++{ ++ unsigned regno; ++ int nregs; ++ int i; ++ rtx p; ++ ++ regno = REGNO (rtl); ++ if (!IS_VFP_REGNUM (regno)) ++ return NULL_RTX; ++ ++ /* The EABI defines two VFP register ranges: ++ 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent) ++ 256-287: D0-D31 ++ The recommended encodings for s0-s31 is a DW_OP_bit_piece of the ++ corresponding D register. However gdb6.6 does not support this, so ++ we use the legacy encodings. We also use these encodings for D0-D15 ++ for compatibility with older debuggers. */ ++ if (VFP_REGNO_OK_FOR_SINGLE (regno)) ++ return NULL_RTX; ++ ++ nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8; ++ p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs)); ++ regno = (regno - FIRST_VFP_REGNUM) / 2; ++ for (i = 0; i < nregs; i++) ++ XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); ++ ++ return p; ++} + + #ifdef TARGET_UNWIND_INFO + /* Emit unwind directives for a store-multiple instruction or stack pointer +@@ -19556,6 +21084,7 @@ + case cortexr4f: + case cortexa8: + case cortexa9: ++ case marvell_f: + return 2; + + default: +@@ -19620,6 +21149,10 @@ + return "St9__va_list"; + } + ++ /* Half-precision float. */ ++ if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) ++ return "Dh"; ++ + if (TREE_CODE (type) != VECTOR_TYPE) + return NULL; + +@@ -19676,6 +21209,86 @@ + given on the command line. */ + if (level > 0) + flag_section_anchors = 2; ++ ++ if (size) ++ { ++ /* Select optimizations that are a win for code size. ++ ++ The inlining options set below have two important ++ consequences for functions not explicitly marked ++ inline: ++ - Static functions used once are inlined if ++ sufficiently small. Static functions used twice ++ are not inlined. ++ - Non-static functions are never inlined. ++ So in effect, inlining will never cause two copies ++ of function bodies to be created. */ ++ /* Empirical results show that these options benefit code ++ size on arm. */ ++ /* FIXME: -fsee seems to be broken for Thumb-2. */ ++ /* flag_see = 1; */ ++ flag_move_loop_invariants = 0; ++ /* In Thumb mode the function call code size overhead is typically very ++ small, and narrow branch instructions have very limited range. ++ Inlining even medium sized functions tends to bloat the caller and ++ require the use of long branch instructions. On average the long ++ branches cost more than eliminating the function call overhead saves, ++ so we use extremely restrictive automatic inlining heuristics. In ARM ++ mode the results are fairly neutral, probably due to better constant ++ pool placement. */ ++ set_param_value ("max-inline-insns-single", 1); ++ set_param_value ("max-inline-insns-auto", 1); ++ } ++ else ++ { ++ /* CSL LOCAL */ ++ /* Set flag_unroll_loops to a default value, so that we can tell ++ if it was specified on the command line; see ++ arm_override_options. */ ++ flag_unroll_loops = 2; ++ /* Promote loop indices to int where possible. Consider moving this ++ to -Os, also. */ ++ flag_promote_loop_indices = 1; ++ } ++} ++ ++/* Return how many instructions to look ahead for better insn ++ scheduling. */ ++static int ++arm_multipass_dfa_lookahead (void) ++{ ++ return (arm_tune == marvell_f) ? 4 : 0; ++} ++ ++/* Return the minimum alignment required to load or store a ++ vector of the given type, which may be less than the ++ natural alignment of the type. */ ++ ++static int ++arm_vector_min_alignment (const_tree type) ++{ ++ if (TARGET_NEON) ++ { ++ /* The NEON element load and store instructions only require the ++ alignment of the element type. They can benefit from higher ++ statically reported alignment, but we do not take advantage ++ of that yet. */ ++ gcc_assert (TREE_CODE (type) == VECTOR_TYPE); ++ return TYPE_ALIGN_UNIT (TREE_TYPE (type)); ++ } ++ ++ return default_vector_min_alignment (type); ++} ++ ++static bool ++arm_vector_always_misalign(const_tree type ATTRIBUTE_UNUSED) ++{ ++ /* On big-endian targets array loads (vld1) and vector loads (vldm) ++ use a different format. Always use the "misaligned" array variant. ++ FIXME: this still doesn't work for big-endian because of constant ++ loads and other operations using vldm ordering. See ++ issue 6722. */ ++ return TARGET_NEON && !BYTES_BIG_ENDIAN; + } + + #include "gt-arm.h" +diff -Nur a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def +--- a/gcc/config/arm/arm-cores.def 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/arm-cores.def 2010-01-25 09:50:28.975687047 +0100 +@@ -104,6 +104,7 @@ + ARM_CORE("xscale", xscale, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale) + ARM_CORE("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) + ARM_CORE("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) ++ARM_CORE("marvell-f", marvell_f, 5TE, FL_LDSCHED | FL_VFPV2 | FL_MARVELL_F, 9e) + + /* V5TEJ Architecture Processors */ + ARM_CORE("arm926ej-s", arm926ejs, 5TEJ, FL_LDSCHED, 9e) +@@ -117,9 +118,13 @@ + ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e) + ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) + ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e) ++ ++/* V7 Architecture Processors */ ++ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e) + ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) + ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, 9e) + ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) + ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e) + ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e) + ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e) ++ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e) +diff -Nur a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h +--- a/gcc/config/arm/arm.h 2009-04-23 02:31:13.000000000 +0200 ++++ b/gcc/config/arm/arm.h 2010-01-25 09:50:28.975687047 +0100 +@@ -85,6 +85,10 @@ + builtin_define ("__IWMMXT__"); \ + if (TARGET_AAPCS_BASED) \ + builtin_define ("__ARM_EABI__"); \ ++ if (arm_tune_marvell_f) \ ++ builtin_define ("__ARM_TUNE_MARVELL_F__"); \ ++ if (low_irq_latency) \ ++ builtin_define ("__low_irq_latency__"); \ + } while (0) + + /* The various ARM cores. */ +@@ -199,6 +203,13 @@ + #define TARGET_AAPCS_BASED \ + (arm_abi != ARM_ABI_APCS && arm_abi != ARM_ABI_ATPCS) + ++/* True if we should avoid generating conditional execution instructions. */ ++#define TARGET_NO_COND_EXEC (arm_tune_marvell_f && !optimize_size) ++/* Avoid most conditional instructions, but allow pairs with opposite ++ conditions and the same destination. */ ++#define TARGET_NO_SINGLE_COND_EXEC \ ++ ((arm_tune_cortex_a9 || arm_tune_marvell_f) && !optimize_size) ++ + #define TARGET_HARD_TP (target_thread_pointer == TP_CP15) + #define TARGET_SOFT_TP (target_thread_pointer == TP_SOFT) + +@@ -211,35 +222,43 @@ + /* Thumb-1 only. */ + #define TARGET_THUMB1_ONLY (TARGET_THUMB1 && !arm_arch_notm) + ++#define TARGET_FPA_EMU2 (TARGET_FPA && arm_fpu_desc->rev == 2) + /* The following two macros concern the ability to execute coprocessor + instructions for VFPv3 or NEON. TARGET_VFP3/TARGET_VFPD32 are currently + only ever tested when we know we are generating for VFP hardware; we need + to be more careful with TARGET_NEON as noted below. */ + + /* FPU is has the full VFPv3/NEON register file of 32 D registers. */ +-#define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \ +- && (arm_fpu_arch == FPUTYPE_VFP3 \ +- || arm_fpu_arch == FPUTYPE_NEON)) ++#define TARGET_VFPD32 (TARGET_VFP && arm_arch_vfp_regs == VFP_REG_D32) + + /* FPU supports VFPv3 instructions. */ +-#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \ +- && (arm_fpu_arch == FPUTYPE_VFP3D16 \ +- || TARGET_VFPD32)) ++#define TARGET_VFP3 (TARGET_VFP && arm_arch_vfp_rev >= 3) ++ ++/* FPU only supports VFP single-precision instructions. */ ++#define TARGET_VFP_SINGLE (TARGET_VFP && arm_arch_vfp_regs == VFP_REG_SINGLE) ++ ++/* FPU supports VFP double-precision instructions. */ ++#define TARGET_VFP_DOUBLE (TARGET_VFP && arm_arch_vfp_regs != VFP_REG_SINGLE) ++ ++/* FPU supports half-precision floating-point with NEON element load/store. */ ++#define TARGET_NEON_FP16 (TARGET_VFP && arm_arch_vfp_neon && arm_arch_vfp_fp16) ++ ++/* FPU supports VFP half-precision floating-point. */ ++#define TARGET_FP16 (TARGET_VFP && arm_arch_vfp_fp16) + + /* FPU supports Neon instructions. The setting of this macro gets + revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT + and TARGET_HARD_FLOAT to ensure that NEON instructions are + available. */ + #define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \ +- && arm_fp_model == ARM_FP_MODEL_VFP \ +- && arm_fpu_arch == FPUTYPE_NEON) ++ && TARGET_VFP && arm_arch_vfp_neon) + + /* "DSP" multiply instructions, eg. SMULxy. */ + #define TARGET_DSP_MULTIPLY \ +- (TARGET_32BIT && arm_arch5e && arm_arch_notm) ++ (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7em)) + /* Integer SIMD instructions, and extend-accumulate instructions. */ + #define TARGET_INT_SIMD \ +- (TARGET_32BIT && arm_arch6 && arm_arch_notm) ++ (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em)) + + /* Should MOVW/MOVT be used in preference to a constant pool. */ + #define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size) +@@ -289,40 +308,30 @@ + ARM_FP_MODEL_VFP + }; + +-extern enum arm_fp_model arm_fp_model; +- +-/* Which floating point hardware is available. Also update +- fp_model_for_fpu in arm.c when adding entries to this list. */ +-enum fputype +-{ +- /* No FP hardware. */ +- FPUTYPE_NONE, +- /* Full FPA support. */ +- FPUTYPE_FPA, +- /* Emulated FPA hardware, Issue 2 emulator (no LFM/SFM). */ +- FPUTYPE_FPA_EMU2, +- /* Emulated FPA hardware, Issue 3 emulator. */ +- FPUTYPE_FPA_EMU3, +- /* Cirrus Maverick floating point co-processor. */ +- FPUTYPE_MAVERICK, +- /* VFP. */ +- FPUTYPE_VFP, +- /* VFPv3-D16. */ +- FPUTYPE_VFP3D16, +- /* VFPv3. */ +- FPUTYPE_VFP3, +- /* Neon. */ +- FPUTYPE_NEON ++enum vfp_reg_type { ++ VFP_REG_D16, ++ VFP_REG_D32, ++ VFP_REG_SINGLE + }; + +-/* Recast the floating point class to be the floating point attribute. */ +-#define arm_fpu_attr ((enum attr_fpu) arm_fpu_tune) +- +-/* What type of floating point to tune for */ +-extern enum fputype arm_fpu_tune; ++extern const struct arm_fpu_desc ++{ ++ const char *name; ++ enum arm_fp_model model; ++ int rev; ++ enum vfp_reg_type myregs; ++ int neon; ++ int fp16; ++} *arm_fpu_desc; ++ ++#define arm_fp_model arm_fpu_desc->model ++#define arm_arch_vfp_rev arm_fpu_desc->rev ++#define arm_arch_vfp_regs arm_fpu_desc->myregs ++#define arm_arch_vfp_neon arm_fpu_desc->neon ++#define arm_arch_vfp_fp16 arm_fpu_desc->fp16 + +-/* What type of floating point instructions are available */ +-extern enum fputype arm_fpu_arch; ++/* Which floating point hardware to schedule for. */ ++extern int arm_fpu_attr; + + enum float_abi_type + { +@@ -337,6 +346,21 @@ + #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT + #endif + ++/* Which __fp16 format to use. ++ The enumeration values correspond to the numbering for the ++ Tag_ABI_FP_16bit_format attribute. ++ */ ++enum arm_fp16_format_type ++{ ++ ARM_FP16_FORMAT_NONE = 0, ++ ARM_FP16_FORMAT_IEEE = 1, ++ ARM_FP16_FORMAT_ALTERNATIVE = 2 ++}; ++ ++extern enum arm_fp16_format_type arm_fp16_format; ++#define LARGEST_EXPONENT_IS_NORMAL(bits) \ ++ ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) ++ + /* Which ABI to use. */ + enum arm_abi_type + { +@@ -383,12 +407,18 @@ + /* Nonzero if instructions not present in the 'M' profile can be used. */ + extern int arm_arch_notm; + ++/* Nonzero if instructions present in ARMv7E-M can be used. */ ++extern int arm_arch7em; ++ + /* Nonzero if this chip can benefit from load scheduling. */ + extern int arm_ld_sched; + + /* Nonzero if generating thumb code. */ + extern int thumb_code; + ++/* Nonzero if generating Janus2 code. */ ++extern int janus2_code; ++ + /* Nonzero if this chip is a StrongARM. */ + extern int arm_tune_strongarm; + +@@ -404,6 +434,9 @@ + /* Nonzero if tuning for XScale. */ + extern int arm_tune_xscale; + ++/* Nonzero if tuning for Marvell Feroceon. */ ++extern int arm_tune_marvell_f; ++ + /* Nonzero if tuning for stores via the write buffer. */ + extern int arm_tune_wbuf; + +@@ -423,6 +456,10 @@ + /* Nonzero if chip supports integer division instruction. */ + extern int arm_arch_hwdiv; + ++/* Nonzero if we should minimize interrupt latency of the ++ generated code. */ ++extern int low_irq_latency; ++ + #ifndef TARGET_DEFAULT + #define TARGET_DEFAULT (MASK_APCS_FRAME) + #endif +@@ -757,12 +794,11 @@ + fixed_regs[regno] = call_used_regs[regno] = 1; \ + } \ + \ +- if (TARGET_THUMB && optimize_size) \ +- { \ +- /* When optimizing for size, it's better not to use \ +- the HI regs, because of the overhead of stacking \ +- them. */ \ +- /* ??? Is this still true for thumb2? */ \ ++ if (TARGET_THUMB1 && optimize_size) \ ++ { \ ++ /* When optimizing for size on Thumb-1, it's better not \ ++ to use the HI regs, because of the overhead of \ ++ stacking them. */ \ + for (regno = FIRST_HI_REGNUM; \ + regno <= LAST_HI_REGNUM; ++regno) \ + fixed_regs[regno] = call_used_regs[regno] = 1; \ +@@ -881,6 +917,9 @@ + /* The number of (integer) argument register available. */ + #define NUM_ARG_REGS 4 + ++/* And similarly for the VFP. */ ++#define NUM_VFP_ARG_REGS 16 ++ + /* Return the register number of the N'th (integer) argument. */ + #define ARG_REGISTER(N) (N - 1) + +@@ -1059,7 +1098,7 @@ + (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) + + #define VALID_IWMMXT_REG_MODE(MODE) \ +- (arm_vector_mode_supported_p (MODE) || (MODE) == DImode) ++ (arm_vector_mode_supported_p (MODE) || (MODE) == DImode || (MODE) == SImode) + + /* Modes valid for Neon D registers. */ + #define VALID_NEON_DREG_MODE(MODE) \ +@@ -1230,11 +1269,14 @@ + || reg_classes_intersect_p (VFP_REGS, (CLASS)) \ + : 0) + +-/* We need to define this for LO_REGS on thumb. Otherwise we can end up +- using r0-r4 for function arguments, r7 for the stack frame and don't +- have enough left over to do doubleword arithmetic. */ ++/* We need to define this for LO_REGS on Thumb-1. Otherwise we can end up ++ using r0-r4 for function arguments, r7 for the stack frame and don't have ++ enough left over to do doubleword arithmetic. For Thumb-2 all the ++ potentially problematic instructions accept high registers so this is not ++ necessary. Care needs to be taken to avoid adding new Thumb-2 patterns ++ that require many low registers. */ + #define CLASS_LIKELY_SPILLED_P(CLASS) \ +- ((TARGET_THUMB && (CLASS) == LO_REGS) \ ++ ((TARGET_THUMB1 && (CLASS) == LO_REGS) \ + || (CLASS) == CC_REG) + + /* The class value for index registers, and the one for base regs. */ +@@ -1245,7 +1287,7 @@ + when addressing quantities in QI or HI mode; if we don't know the + mode, then we must be conservative. */ + #define MODE_BASE_REG_CLASS(MODE) \ +- (TARGET_32BIT ? CORE_REGS : \ ++ (TARGET_32BIT ? (TARGET_THUMB2 ? LO_REGS : CORE_REGS) : \ + (((MODE) == SImode) ? BASE_REGS : LO_REGS)) + + /* For Thumb we can not support SP+reg addressing, so we return LO_REGS +@@ -1346,6 +1388,9 @@ + else if (TARGET_MAVERICK && TARGET_HARD_FLOAT) \ + /* Need to be careful, -256 is not a valid offset. */ \ + low = val >= 0 ? (val & 0xff) : -((-val) & 0xff); \ ++ else if (TARGET_REALLY_IWMMXT && MODE == SImode) \ ++ /* Need to be careful, -1024 is not a valid offset. */ \ ++ low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff); \ + else if (MODE == SImode \ + || (MODE == SFmode && TARGET_SOFT_FLOAT) \ + || ((MODE == HImode || MODE == QImode) && ! arm_arch4)) \ +@@ -1416,13 +1461,17 @@ + /* If defined, gives a class of registers that cannot be used as the + operand of a SUBREG that changes the mode of the object illegally. */ + +-/* Moves between FPA_REGS and GENERAL_REGS are two memory insns. */ ++/* Moves between FPA_REGS and GENERAL_REGS are two memory insns. ++ Moves between VFP_REGS and GENERAL_REGS are a single insn, but ++ it is typically more expensive than a single memory access. We set ++ the cost to less than two memory accesses so that floating ++ point to integer conversion does not go through memory. */ + #define REGISTER_MOVE_COST(MODE, FROM, TO) \ + (TARGET_32BIT ? \ + ((FROM) == FPA_REGS && (TO) != FPA_REGS ? 20 : \ + (FROM) != FPA_REGS && (TO) == FPA_REGS ? 20 : \ +- IS_VFP_CLASS (FROM) && !IS_VFP_CLASS (TO) ? 10 : \ +- !IS_VFP_CLASS (FROM) && IS_VFP_CLASS (TO) ? 10 : \ ++ IS_VFP_CLASS (FROM) && !IS_VFP_CLASS (TO) ? 15 : \ ++ !IS_VFP_CLASS (FROM) && IS_VFP_CLASS (TO) ? 15 : \ + (FROM) == IWMMXT_REGS && (TO) != IWMMXT_REGS ? 4 : \ + (FROM) != IWMMXT_REGS && (TO) == IWMMXT_REGS ? 4 : \ + (FROM) == IWMMXT_GR_REGS || (TO) == IWMMXT_GR_REGS ? 20 : \ +@@ -1491,9 +1540,10 @@ + + /* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +-#define LIBCALL_VALUE(MODE) \ +- (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA \ +- && GET_MODE_CLASS (MODE) == MODE_FLOAT \ ++#define LIBCALL_VALUE(MODE) \ ++ (TARGET_AAPCS_BASED ? aapcs_libcall_value (MODE) \ ++ : (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA \ ++ && GET_MODE_CLASS (MODE) == MODE_FLOAT) \ + ? gen_rtx_REG (MODE, FIRST_FPA_REGNUM) \ + : TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK \ + && GET_MODE_CLASS (MODE) == MODE_FLOAT \ +@@ -1502,22 +1552,16 @@ + ? gen_rtx_REG (MODE, FIRST_IWMMXT_REGNUM) \ + : gen_rtx_REG (MODE, ARG_REGISTER (1))) + +-/* Define how to find the value returned by a function. +- VALTYPE is the data type of the value (as a tree). +- If the precise function being called is known, FUNC is its FUNCTION_DECL; +- otherwise, FUNC is 0. */ +-#define FUNCTION_VALUE(VALTYPE, FUNC) \ +- arm_function_value (VALTYPE, FUNC); +- +-/* 1 if N is a possible register number for a function value. +- On the ARM, only r0 and f0 can return results. */ +-/* On a Cirrus chip, mvf0 can return results. */ +-#define FUNCTION_VALUE_REGNO_P(REGNO) \ +- ((REGNO) == ARG_REGISTER (1) \ +- || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM) \ +- && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK) \ +- || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI) \ +- || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM) \ ++/* 1 if REGNO is a possible register number for a function value. */ ++#define FUNCTION_VALUE_REGNO_P(REGNO) \ ++ ((REGNO) == ARG_REGISTER (1) \ ++ || (TARGET_AAPCS_BASED && TARGET_32BIT \ ++ && TARGET_VFP && TARGET_HARD_FLOAT \ ++ && (REGNO) == FIRST_VFP_REGNUM) \ ++ || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM) \ ++ && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK) \ ++ || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI) \ ++ || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM) \ + && TARGET_HARD_FLOAT_ABI && TARGET_FPA)) + + /* Amount of memory needed for an untyped call to save all possible return +@@ -1617,9 +1661,27 @@ + that is in text_section. */ + extern GTY(()) rtx thumb_call_via_label[14]; + ++/* The number of potential ways of assigning to a co-processor. */ ++#define ARM_NUM_COPROC_SLOTS 1 ++ ++/* Enumeration of procedure calling standard variants. We don't really ++ support all of these yet. */ ++enum arm_pcs ++{ ++ ARM_PCS_AAPCS, /* Base standard AAPCS. */ ++ ARM_PCS_AAPCS_VFP, /* Use VFP registers for floating point values. */ ++ ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors. */ ++ /* This must be the last AAPCS variant. */ ++ ARM_PCS_AAPCS_LOCAL, /* Private call within this compilation unit. */ ++ ARM_PCS_ATPCS, /* ATPCS. */ ++ ARM_PCS_APCS, /* APCS (legacy Linux etc). */ ++ ARM_PCS_UNKNOWN ++}; ++ ++/* We can't define this inside a generator file because it needs enum ++ machine_mode. */ + /* A C type for declaring a variable that is used as the first argument of +- `FUNCTION_ARG' and other related values. For some target machines, the +- type `int' suffices and can hold the number of bytes of argument so far. */ ++ `FUNCTION_ARG' and other related values. */ + typedef struct + { + /* This is the number of registers of arguments scanned so far. */ +@@ -1628,9 +1690,33 @@ + int iwmmxt_nregs; + int named_count; + int nargs; +- int can_split; ++ /* Which procedure call variant to use for this call. */ ++ enum arm_pcs pcs_variant; ++ ++ /* AAPCS related state tracking. */ ++ int aapcs_arg_processed; /* No need to lay out this argument again. */ ++ int aapcs_cprc_slot; /* Index of co-processor rules to handle ++ this argument, or -1 if using core ++ registers. */ ++ int aapcs_ncrn; ++ int aapcs_next_ncrn; ++ rtx aapcs_reg; /* Register assigned to this argument. */ ++ int aapcs_partial; /* How many bytes are passed in regs (if ++ split between core regs and stack. ++ Zero otherwise. */ ++ int aapcs_cprc_failed[ARM_NUM_COPROC_SLOTS]; ++ int can_split; /* Argument can be split between core regs ++ and the stack. */ ++ /* Private data for tracking VFP register allocation */ ++ unsigned aapcs_vfp_regs_free; ++ unsigned aapcs_vfp_reg_alloc; ++ int aapcs_vfp_rcount; ++ /* Can't include insn-modes.h because this header is needed before we ++ generate it. */ ++ int /* enum machine_mode */ aapcs_vfp_rmode; + } CUMULATIVE_ARGS; + ++ + /* Define where to put the arguments to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. +@@ -1674,13 +1760,7 @@ + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + #define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \ +- (CUM).nargs += 1; \ +- if (arm_vector_mode_supported_p (MODE) \ +- && (CUM).named_count > (CUM).nargs \ +- && TARGET_IWMMXT_ABI) \ +- (CUM).iwmmxt_nregs += 1; \ +- else \ +- (CUM).nregs += ARM_NUM_REGS2 (MODE, TYPE) ++ arm_function_arg_advance (&(CUM), (MODE), (TYPE), (NAMED)) + + /* If defined, a C expression that gives the alignment boundary, in bits, of an + argument with the specified mode and type. If it is not defined, +@@ -1692,9 +1772,11 @@ + + /* 1 if N is a possible register number for function argument passing. + On the ARM, r0-r3 are used to pass args. */ +-#define FUNCTION_ARG_REGNO_P(REGNO) \ +- (IN_RANGE ((REGNO), 0, 3) \ +- || (TARGET_IWMMXT_ABI \ ++#define FUNCTION_ARG_REGNO_P(REGNO) \ ++ (IN_RANGE ((REGNO), 0, 3) \ ++ || (TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT \ ++ && IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)) \ ++ || (TARGET_IWMMXT_ABI \ + && IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9))) + + +@@ -2324,7 +2406,8 @@ + /* Try to generate sequences that don't involve branches, we can then use + conditional instructions */ + #define BRANCH_COST(speed_p, predictable_p) \ +- (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0)) ++ (TARGET_32BIT ? (TARGET_THUMB2 && optimize_size ? 1 : 4) \ ++ : (optimize > 0 ? 2 : 0)) + + /* Position Independent Code. */ + /* We decide which register to use based on the compilation options and +@@ -2392,6 +2475,7 @@ + + /* The arm5 clz instruction returns 32. */ + #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) ++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) + + #undef ASM_APP_OFF + #define ASM_APP_OFF (TARGET_THUMB1 ? "\t.code\t16\n" : \ +@@ -2404,6 +2488,19 @@ + if (TARGET_ARM) \ + asm_fprintf (STREAM,"\tstmfd\t%r!,{%r}\n", \ + STACK_POINTER_REGNUM, REGNO); \ ++ else if (TARGET_THUMB1 \ ++ && (REGNO) == STATIC_CHAIN_REGNUM) \ ++ { \ ++ /* We can't push STATIC_CHAIN_REGNUM (r12) directly with Thumb-1. ++ We know that ASM_OUTPUT_REG_PUSH will be matched with ++ ASM_OUTPUT_REG_POP, and that r7 isn't used by the function ++ profiler, so we can use it as a scratch reg. WARNING: This isn't ++ safe in the general case! It may be sensitive to future changes ++ in final.c:profile_function. */ \ ++ asm_fprintf (STREAM, "\tpush\t{r7}\n"); \ ++ asm_fprintf (STREAM, "\tmov\tr7, %r\n", REGNO);\ ++ asm_fprintf (STREAM, "\tpush\t{r7}\n"); \ ++ } \ + else \ + asm_fprintf (STREAM, "\tpush {%r}\n", REGNO); \ + } while (0) +@@ -2415,6 +2512,14 @@ + if (TARGET_ARM) \ + asm_fprintf (STREAM, "\tldmfd\t%r!,{%r}\n", \ + STACK_POINTER_REGNUM, REGNO); \ ++ else if (TARGET_THUMB1 \ ++ && (REGNO) == STATIC_CHAIN_REGNUM) \ ++ { \ ++ /* See comment in ASM_OUTPUT_REG_PUSH. */ \ ++ asm_fprintf (STREAM, "\tpop\t{r7}\n"); \ ++ asm_fprintf (STREAM, "\tmov\t%r, r7\n", REGNO);\ ++ asm_fprintf (STREAM, "\tpop\t{r7}\n"); \ ++ } \ + else \ + asm_fprintf (STREAM, "\tpop {%r}\n", REGNO); \ + } while (0) +diff -Nur a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md +--- a/gcc/config/arm/arm.md 2009-05-16 15:28:27.000000000 +0200 ++++ b/gcc/config/arm/arm.md 2010-01-25 09:50:28.985687200 +0100 +@@ -99,6 +99,7 @@ + ; correctly for PIC usage. + (UNSPEC_GOTSYM_OFF 24) ; The offset of the start of the the GOT from a + ; a given symbolic address. ++ (UNSPEC_RBIT 25) ; rbit operation. + ] + ) + +@@ -131,6 +132,8 @@ + (VUNSPEC_WCMP_EQ 12) ; Used by the iWMMXt WCMPEQ instructions + (VUNSPEC_WCMP_GTU 13) ; Used by the iWMMXt WCMPGTU instructions + (VUNSPEC_WCMP_GT 14) ; Used by the iwMMXT WCMPGT instructions ++ (VUNSPEC_ALIGN16 15) ; Used to force 16-byte alignment. ++ (VUNSPEC_ALIGN32 16) ; Used to force 32-byte alignment. + (VUNSPEC_EH_RETURN 20); Use to override the return address for exception + ; handling. + ] +@@ -144,6 +147,10 @@ + ; patterns that share the same RTL in both ARM and Thumb code. + (define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code"))) + ++; FIX_JANUS is set to 'yes' when compiling for Janus2, it causes to ++; add a nop after shifts, in order to work around a Janus2 bug ++(define_attr "fix_janus" "no,yes" (const (symbol_ref "janus2_code"))) ++ + ; IS_STRONGARM is set to 'yes' when compiling for StrongARM, it affects + ; scheduling decisions for the load unit and the multiplier. + (define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_tune_strongarm"))) +@@ -158,7 +165,7 @@ + ; Floating Point Unit. If we only have floating point emulation, then there + ; is no point in scheduling the floating point insns. (Well, for best + ; performance we should try and group them together). +-(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon" ++(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp" + (const (symbol_ref "arm_fpu_attr"))) + + ; LENGTH of an instruction (in bytes) +@@ -185,7 +192,7 @@ + ;; scheduling information. + + (define_attr "insn" +- "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other" ++ "mov,mvn,and,orr,eor,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other" + (const_string "other")) + + ; TYPE attribute is used to detect floating point instructions which, if +@@ -251,8 +258,6 @@ + (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) + + ;; Classification of NEON instructions for scheduling purposes. +-;; Do not set this attribute and the "type" attribute together in +-;; any one instruction pattern. + (define_attr "neon_type" + "neon_int_1,\ + neon_int_2,\ +@@ -415,7 +420,7 @@ + + (define_attr "generic_sched" "yes,no" + (const (if_then_else +- (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9") ++ (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9,marvell_f") + (eq_attr "tune_cortexr4" "yes")) + (const_string "no") + (const_string "yes")))) +@@ -423,7 +428,7 @@ + (define_attr "generic_vfp" "yes,no" + (const (if_then_else + (and (eq_attr "fpu" "vfp") +- (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9") ++ (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9,marvell_f") + (eq_attr "tune_cortexr4" "no")) + (const_string "yes") + (const_string "no")))) +@@ -437,6 +442,8 @@ + (include "cortex-a9.md") + (include "cortex-r4.md") + (include "cortex-r4f.md") ++(include "marvell-f.md") ++(include "marvell-f-vfp.md") + (include "vfp11.md") + + +@@ -472,9 +479,9 @@ + if (TARGET_THUMB1) + { + if (GET_CODE (operands[1]) != REG) +- operands[1] = force_reg (SImode, operands[1]); ++ operands[1] = force_reg (DImode, operands[1]); + if (GET_CODE (operands[2]) != REG) +- operands[2] = force_reg (SImode, operands[2]); ++ operands[2] = force_reg (DImode, operands[2]); + } + " + ) +@@ -620,10 +627,11 @@ + sub%?\\t%0, %1, #%n2 + sub%?\\t%0, %1, #%n2 + #" +- "TARGET_32BIT && +- GET_CODE (operands[2]) == CONST_INT ++ "TARGET_32BIT ++ && GET_CODE (operands[2]) == CONST_INT + && !(const_ok_for_arm (INTVAL (operands[2])) +- || const_ok_for_arm (-INTVAL (operands[2])))" ++ || const_ok_for_arm (-INTVAL (operands[2]))) ++ && (reload_completed || !arm_eliminable_register (operands[1]))" + [(clobber (const_int 0))] + " + arm_split_constant (PLUS, SImode, curr_insn, +@@ -639,10 +647,10 @@ + ;; register. Trying to reload it will always fail catastrophically, + ;; so never allow those alternatives to match if reloading is needed. + +-(define_insn "*thumb1_addsi3" +- [(set (match_operand:SI 0 "register_operand" "=l,l,l,*rk,*hk,l,!k") +- (plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,!k,!k") +- (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,!M,!O")))] ++(define_insn_and_split "*thumb1_addsi3" ++ [(set (match_operand:SI 0 "register_operand" "=l,l,l,*rk,*hk,l,!k,l,l") ++ (plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,!k,!k,0,l") ++ (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,!M,!O,Pa,Pb")))] + "TARGET_THUMB1" + "* + static const char * const asms[] = +@@ -653,7 +661,9 @@ + \"add\\t%0, %0, %2\", + \"add\\t%0, %0, %2\", + \"add\\t%0, %1, %2\", +- \"add\\t%0, %1, %2\" ++ \"add\\t%0, %1, %2\", ++ \"#\", ++ \"#\" + }; + if ((which_alternative == 2 || which_alternative == 6) + && GET_CODE (operands[2]) == CONST_INT +@@ -661,7 +671,22 @@ + return \"sub\\t%0, %1, #%n2\"; + return asms[which_alternative]; + " +- [(set_attr "length" "2")] ++ "&& reload_completed && CONST_INT_P (operands[2]) ++ && operands[1] != stack_pointer_rtx ++ && (INTVAL (operands[2]) > 255 || INTVAL (operands[2]) < -255)" ++ [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2))) ++ (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))] ++ { ++ HOST_WIDE_INT offset = INTVAL (operands[2]); ++ if (offset > 255) ++ offset = 255; ++ else if (offset < -255) ++ offset = -255; ++ ++ operands[3] = GEN_INT (offset); ++ operands[2] = GEN_INT (INTVAL (operands[2]) - offset); ++ } ++ [(set_attr "length" "2,2,2,2,2,2,2,4,4")] + ) + + ;; Reloading and elimination of the frame pointer can +@@ -854,7 +879,11 @@ + [(set_attr "conds" "use") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*addsi3_carryin_alt1" +@@ -938,7 +967,7 @@ + [(set (match_operand:DF 0 "s_register_operand" "") + (plus:DF (match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "arm_float_add_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK + && !cirrus_fp_register (operands[2], DFmode)) +@@ -1176,7 +1205,7 @@ + [(set (match_operand:DF 0 "s_register_operand" "") + (minus:DF (match_operand:DF 1 "arm_float_rhs_operand" "") + (match_operand:DF 2 "arm_float_rhs_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK) + { +@@ -1332,6 +1361,49 @@ + (set_attr "predicable" "yes")] + ) + ++; The combiner cannot combine the first and last insns in the ++; following sequence because of the intervening insn, so help the ++; combiner with this splitter. The combiner does attempt to split ++; this particular combination but does not know this exact split. ++; Note that the combiner puts the constant at the outermost operation ++; as a part of canonicalization. ++; ++; mul r3, r2, r1 ++; r3, r3, ++; add r3, r3, r4 ++ ++(define_split ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (match_operator:SI 1 "plusminus_operator" ++ [(plus:SI (mult:SI (match_operand:SI 2 "s_register_operand" "") ++ (match_operand:SI 3 "s_register_operand" "")) ++ (match_operand:SI 4 "s_register_operand" "")) ++ (match_operand:SI 5 "arm_immediate_operand" "")]))] ++ "TARGET_32BIT" ++ [(set (match_dup 0) ++ (plus:SI (mult:SI (match_dup 2) (match_dup 3)) ++ (match_dup 4))) ++ (set (match_dup 0) ++ (match_op_dup:SI 1 [(match_dup 0) (match_dup 5)]))] ++ "") ++ ++; Likewise for MLS. MLS is available only on select architectures. ++ ++(define_split ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (match_operator:SI 1 "plusminus_operator" ++ [(minus:SI (match_operand:SI 2 "s_register_operand" "") ++ (mult:SI (match_operand:SI 3 "s_register_operand" "") ++ (match_operand:SI 4 "s_register_operand" ""))) ++ (match_operand:SI 5 "arm_immediate_operand" "")]))] ++ "TARGET_32BIT && arm_arch_thumb2" ++ [(set (match_dup 0) ++ (minus:SI (match_dup 2) ++ (mult:SI (match_dup 3) (match_dup 4)))) ++ (set (match_dup 0) ++ (match_op_dup:SI 1 [(match_dup 0) (match_dup 5)]))] ++ "") ++ + (define_insn "*mulsi3addsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV +@@ -1713,7 +1785,7 @@ + [(set (match_operand:DF 0 "s_register_operand" "") + (mult:DF (match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "arm_float_rhs_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK + && !cirrus_fp_register (operands[2], DFmode)) +@@ -1733,7 +1805,7 @@ + [(set (match_operand:DF 0 "s_register_operand" "") + (div:DF (match_operand:DF 1 "arm_float_rhs_operand" "") + (match_operand:DF 2 "arm_float_rhs_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" + "") + + ;; Modulo insns +@@ -1960,6 +2032,7 @@ + DONE; + " + [(set_attr "length" "4,4,16") ++ (set_attr "insn" "and") + (set_attr "predicable" "yes")] + ) + +@@ -1969,7 +2042,8 @@ + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "and\\t%0, %0, %2" +- [(set_attr "length" "2")] ++ [(set_attr "length" "2") ++ (set_attr "insn" "and")] + ) + + (define_insn "*andsi3_compare0" +@@ -1984,7 +2058,8 @@ + "@ + and%.\\t%0, %1, %2 + bic%.\\t%0, %1, #%B2" +- [(set_attr "conds" "set")] ++ [(set_attr "conds" "set") ++ (set_attr "insn" "and,*")] + ) + + (define_insn "*andsi3_compare0_scratch" +@@ -2280,7 +2355,7 @@ + } + } + +- target = operands[0]; ++ target = copy_rtx (operands[0]); + /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical + subreg as the final target. */ + if (GET_CODE (target) == SUBREG) +@@ -2528,7 +2603,11 @@ + (set_attr "shift" "2") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*andsi_notsi_si_compare0" +@@ -2576,6 +2655,7 @@ + orr%?\\t%Q0, %Q1, %2 + #" + [(set_attr "length" "4,8") ++ (set_attr "insn" "orr") + (set_attr "predicable" "yes")] + ) + +@@ -2638,7 +2718,8 @@ + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "orr\\t%0, %0, %2" +- [(set_attr "length" "2")] ++ [(set_attr "length" "2") ++ (set_attr "insn" "orr")] + ) + + (define_peephole2 +@@ -2663,7 +2744,8 @@ + (ior:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "orr%.\\t%0, %1, %2" +- [(set_attr "conds" "set")] ++ [(set_attr "conds" "set") ++ (set_attr "insn" "orr")] + ) + + (define_insn "*iorsi3_compare0_scratch" +@@ -2674,7 +2756,8 @@ + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_32BIT" + "orr%.\\t%0, %1, %2" +- [(set_attr "conds" "set")] ++ [(set_attr "conds" "set") ++ (set_attr "insn" "orr")] + ) + + (define_insn "xordi3" +@@ -2697,7 +2780,8 @@ + eor%?\\t%Q0, %Q1, %2 + #" + [(set_attr "length" "4,8") +- (set_attr "predicable" "yes")] ++ (set_attr "predicable" "yes") ++ (set_attr "insn" "eor")] + ) + + (define_insn "*xordi_sesidi_di" +@@ -2728,7 +2812,8 @@ + (match_operand:SI 2 "arm_rhs_operand" "rI")))] + "TARGET_32BIT" + "eor%?\\t%0, %1, %2" +- [(set_attr "predicable" "yes")] ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "eor")] + ) + + (define_insn "*thumb1_xorsi3" +@@ -2737,7 +2822,8 @@ + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "eor\\t%0, %0, %2" +- [(set_attr "length" "2")] ++ [(set_attr "length" "2") ++ (set_attr "insn" "eor")] + ) + + (define_insn "*xorsi3_compare0" +@@ -2749,7 +2835,8 @@ + (xor:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "eor%.\\t%0, %1, %2" +- [(set_attr "conds" "set")] ++ [(set_attr "conds" "set") ++ (set_attr "insn" "eor")] + ) + + (define_insn "*xorsi3_compare0_scratch" +@@ -2906,7 +2993,7 @@ + (smax:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + " + if (operands[2] == const0_rtx || operands[2] == constm1_rtx) + { +@@ -2933,7 +3020,8 @@ + (const_int -1)))] + "TARGET_32BIT" + "orr%?\\t%0, %1, %1, asr #31" +- [(set_attr "predicable" "yes")] ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "orr")] + ) + + (define_insn "*arm_smax_insn" +@@ -2941,7 +3029,7 @@ + (smax:SI (match_operand:SI 1 "s_register_operand" "%0,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_COND_EXEC" + "@ + cmp\\t%1, %2\;movlt\\t%0, %2 + cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2" +@@ -2955,7 +3043,7 @@ + (smin:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + " + if (operands[2] == const0_rtx) + { +@@ -2973,7 +3061,8 @@ + (const_int 0)))] + "TARGET_32BIT" + "and%?\\t%0, %1, %1, asr #31" +- [(set_attr "predicable" "yes")] ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "and")] + ) + + (define_insn "*arm_smin_insn" +@@ -2981,7 +3070,7 @@ + (smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_COND_EXEC" + "@ + cmp\\t%1, %2\;movge\\t%0, %2 + cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2" +@@ -2995,7 +3084,7 @@ + (umax:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "" + ) + +@@ -3004,7 +3093,7 @@ + (umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_COND_EXEC" + "@ + cmp\\t%1, %2\;movcc\\t%0, %2 + cmp\\t%1, %2\;movcs\\t%0, %1 +@@ -3019,7 +3108,7 @@ + (umin:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "" + ) + +@@ -3028,7 +3117,7 @@ + (umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_COND_EXEC" + "@ + cmp\\t%1, %2\;movcs\\t%0, %2 + cmp\\t%1, %2\;movcc\\t%0, %1 +@@ -3043,7 +3132,7 @@ + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")])) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "* + operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, + operands[1], operands[2]); +@@ -3163,11 +3252,23 @@ + [(set (match_operand:SI 0 "register_operand" "=l,l") + (ashift:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] +- "TARGET_THUMB1" ++ "TARGET_THUMB1 && !janus2_code" + "lsl\\t%0, %1, %2" + [(set_attr "length" "2")] + ) + ++(define_insn "*thumb1_ashlsi3_janus2" ++ [(set (match_operand:SI 0 "register_operand" "=l,l") ++ (ashift:SI (match_operand:SI 1 "register_operand" "l,0") ++ (match_operand:SI 2 "nonmemory_operand" "N,l")))] ++ "TARGET_THUMB1 && janus2_code" ++ "@ ++ lsl\\t%0, %1, %2 ++ lsl\\t%0, %1, %2\;nop" ++ [(set_attr "length" "2,4")] ++) ++ ++ + (define_expand "ashrdi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "") +@@ -3200,6 +3301,7 @@ + "TARGET_32BIT" + "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx" + [(set_attr "conds" "clob") ++ (set_attr "insn" "mov") + (set_attr "length" "8")] + ) + +@@ -3219,11 +3321,22 @@ + [(set (match_operand:SI 0 "register_operand" "=l,l") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] +- "TARGET_THUMB1" ++ "TARGET_THUMB1 && !janus2_code" + "asr\\t%0, %1, %2" + [(set_attr "length" "2")] + ) + ++(define_insn "*thumb1_ashrsi3_janus2" ++ [(set (match_operand:SI 0 "register_operand" "=l,l") ++ (ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0") ++ (match_operand:SI 2 "nonmemory_operand" "N,l")))] ++ "TARGET_THUMB1 && janus2_code" ++ "@ ++ asr\\t%0, %1, %2 ++ asr\\t%0, %1, %2\;nop" ++ [(set_attr "length" "2,4")] ++) ++ + (define_expand "lshrdi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "") +@@ -3256,6 +3369,7 @@ + "TARGET_32BIT" + "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx" + [(set_attr "conds" "clob") ++ (set_attr "insn" "mov") + (set_attr "length" "8")] + ) + +@@ -3278,11 +3392,22 @@ + [(set (match_operand:SI 0 "register_operand" "=l,l") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] +- "TARGET_THUMB1" ++ "TARGET_THUMB1 && !janus2_code" + "lsr\\t%0, %1, %2" + [(set_attr "length" "2")] + ) + ++(define_insn "*thumb1_lshrsi3_janus2" ++ [(set (match_operand:SI 0 "register_operand" "=l,l") ++ (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0") ++ (match_operand:SI 2 "nonmemory_operand" "N,l")))] ++ "TARGET_THUMB1 && janus2_code" ++ "@ ++ lsr\\t%0, %1, %2 ++ lsr\\t%0, %1, %2; nop" ++ [(set_attr "length" "2,4")] ++) ++ + (define_expand "rotlsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (rotatert:SI (match_operand:SI 1 "s_register_operand" "") +@@ -3324,11 +3449,20 @@ + [(set (match_operand:SI 0 "register_operand" "=l") + (rotatert:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "l")))] +- "TARGET_THUMB1" ++ "TARGET_THUMB1 && !janus2_code" + "ror\\t%0, %0, %2" + [(set_attr "length" "2")] + ) + ++(define_insn "*thumb1_rotrsi3_janus2" ++ [(set (match_operand:SI 0 "register_operand" "=l") ++ (rotatert:SI (match_operand:SI 1 "register_operand" "0") ++ (match_operand:SI 2 "register_operand" "l")))] ++ "TARGET_THUMB1 && janus2_code" ++ "ror\\t%0, %0, %2; nop" ++ [(set_attr "length" "4")] ++) ++ + (define_insn "*arm_shiftsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (match_operator:SI 3 "shift_operator" +@@ -3340,7 +3474,11 @@ + (set_attr "shift" "1") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*shiftsi3_compare0" +@@ -3357,7 +3495,11 @@ + (set_attr "shift" "1") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*shiftsi3_compare0_scratch" +@@ -3370,7 +3512,11 @@ + "TARGET_32BIT" + "* return arm_output_shift(operands, 1);" + [(set_attr "conds" "set") +- (set_attr "shift" "1")] ++ (set_attr "shift" "1") ++ (set (attr "length") (if_then_else (and (match_operand 2 "s_register_operand" "") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*arm_notsi_shiftsi" +@@ -3382,9 +3528,14 @@ + "mvn%?\\t%0, %1%S3" + [(set_attr "predicable" "yes") + (set_attr "shift" "1") ++ (set_attr "insn" "mvn") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*arm_notsi_shiftsi_compare0" +@@ -3399,9 +3550,14 @@ + "mvn%.\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") ++ (set_attr "insn" "mvn") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*arm_not_shiftsi_compare0_scratch" +@@ -3415,9 +3571,14 @@ + "mvn%.\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") ++ (set_attr "insn" "mvn") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + ;; We don't really have extzv, but defining this using shifts helps +@@ -3550,12 +3711,12 @@ + (define_expand "negdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (neg:DF (match_operand:DF 1 "s_register_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" + "") + + ;; abssi2 doesn't really clobber the condition codes if a different register + ;; is being set. To keep things simple, assume during rtl manipulations that +-;; it does, but tell the final scan operator the truth. Similarly for ++;; it does, and the splitter will eliminate it. Similarly for + ;; (neg (abs...)) + + (define_expand "abssi2" +@@ -3567,22 +3728,28 @@ + " + if (TARGET_THUMB1) + operands[2] = gen_rtx_SCRATCH (SImode); ++ else if (TARGET_NO_SINGLE_COND_EXEC) ++ { ++ emit_insn(gen_rtx_SET(VOIDmode, operands[0], ++ gen_rtx_ABS(SImode, operands[1]))); ++ DONE; ++ } + else + operands[2] = gen_rtx_REG (CCmode, CC_REGNUM); + ") + + (define_insn "*arm_abssi2" +- [(set (match_operand:SI 0 "s_register_operand" "=r,&r") +- (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))) ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (abs:SI (match_operand:SI 1 "s_register_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" +- "@ +- cmp\\t%0, #0\;rsblt\\t%0, %0, #0 +- eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31" +- [(set_attr "conds" "clob,*") +- (set_attr "shift" "1") ++ "TARGET_32BIT && !TARGET_NO_SINGLE_COND_EXEC" ++ "#" ++ [(set_attr "shift" "1") + ;; predicable can't be set based on the variant, so left as no +- (set_attr "length" "8")] ++ (set (attr "length") ++ (if_then_else (eq_attr "is_thumb" "yes") ++ (const_int 10) ++ (const_int 8)))] + ) + + (define_insn_and_split "*thumb1_abssi2" +@@ -3600,17 +3767,17 @@ + ) + + (define_insn "*arm_neg_abssi2" +- [(set (match_operand:SI 0 "s_register_operand" "=r,&r") +- (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))) ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "r")))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" +- "@ +- cmp\\t%0, #0\;rsbgt\\t%0, %0, #0 +- eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31" +- [(set_attr "conds" "clob,*") +- (set_attr "shift" "1") ++ "TARGET_32BIT && !TARGET_NO_SINGLE_COND_EXEC" ++ "#" ++ [(set_attr "shift" "1") + ;; predicable can't be set based on the variant, so left as no +- (set_attr "length" "8")] ++ (set (attr "length") ++ (if_then_else (eq_attr "is_thumb" "yes") ++ (const_int 10) ++ (const_int 8)))] + ) + + (define_insn_and_split "*thumb1_neg_abssi2" +@@ -3627,6 +3794,93 @@ + [(set_attr "length" "6")] + ) + ++;; Simplified version for when avoiding conditional execution ++(define_insn "*arm_nocond_abssi2" ++ [(set (match_operand:SI 0 "s_register_operand" "=&r") ++ (abs:SI (match_operand:SI 1 "s_register_operand" "r")))] ++ "TARGET_32BIT && TARGET_NO_SINGLE_COND_EXEC" ++ "#" ++ [(set_attr "shift" "1") ++ (set_attr "length" "8") ++ (set_attr "predicable" "yes")] ++) ++ ++(define_insn "*arm_nocond_neg_abssi2" ++ [(set (match_operand:SI 0 "s_register_operand" "=&r") ++ (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "r"))))] ++ "TARGET_32BIT && TARGET_NO_SINGLE_COND_EXEC" ++ "#" ++ [(set_attr "shift" "1") ++ (set_attr "length" "8") ++ (set_attr "predicable" "yes")] ++) ++ ++;; Splitters for ABS patterns. ++ ++(define_split ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (abs:SI (match_operand:SI 1 "s_register_operand" ""))) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_32BIT && reload_completed && rtx_equal_p(operands[0], operands[1])" ++ [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0))) ++ (cond_exec (lt (reg:CC CC_REGNUM) (const_int 0)) ++ (set (match_dup 0) (neg:SI (match_dup 1))))] ++) ++ ++(define_split ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "")))) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_32BIT && reload_completed && rtx_equal_p(operands[0], operands[1])" ++ [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0))) ++ (cond_exec (gt (reg:CC CC_REGNUM) (const_int 0)) ++ (set (match_dup 0) (neg:SI (match_dup 1))))] ++) ++ ++;; GCC does not add/remove clobbers when matching splitters, so we need ++;; variants with and without the CC clobber. ++(define_split ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (abs:SI (match_operand:SI 1 "s_register_operand" "")))] ++ "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])" ++ [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31)) ++ (match_dup 1))) ++ (set (match_dup 0) (minus:SI (match_dup 0) ++ (ashiftrt:SI (match_dup 1) (const_int 31))))] ++) ++ ++(define_split ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (abs:SI (match_operand:SI 1 "s_register_operand" ""))) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])" ++ [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31)) ++ (match_dup 1))) ++ (set (match_dup 0) (minus:SI (match_dup 0) ++ (ashiftrt:SI (match_dup 1) (const_int 31))))] ++) ++ ++(define_split ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" ""))))] ++ "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])" ++ [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31)) ++ (match_dup 1))) ++ (set (match_dup 0) (minus:SI (ashiftrt:SI (match_dup 1) (const_int 31)) ++ (match_dup 0)))] ++) ++ ++(define_split ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "")))) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_32BIT && reload_completed && !rtx_equal_p(operands[0], operands[1])" ++ [(set (match_dup 0) (xor:SI (ashiftrt:SI (match_dup 1) (const_int 31)) ++ (match_dup 1))) ++ (set (match_dup 0) (minus:SI (ashiftrt:SI (match_dup 1) (const_int 31)) ++ (match_dup 0)))] ++) ++ + (define_expand "abssf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (abs:SF (match_operand:SF 1 "s_register_operand" "")))] +@@ -3636,7 +3890,7 @@ + (define_expand "absdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (abs:DF (match_operand:DF 1 "s_register_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "") + + (define_expand "sqrtsf2" +@@ -3648,7 +3902,7 @@ + (define_expand "sqrtdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (sqrt:DF (match_operand:DF 1 "s_register_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" + "") + + (define_insn_and_split "one_cmpldi2" +@@ -3682,7 +3936,8 @@ + (not:SI (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT" + "mvn%?\\t%0, %1" +- [(set_attr "predicable" "yes")] ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "mvn")] + ) + + (define_insn "*thumb1_one_cmplsi2" +@@ -3690,7 +3945,8 @@ + (not:SI (match_operand:SI 1 "register_operand" "l")))] + "TARGET_THUMB1" + "mvn\\t%0, %1" +- [(set_attr "length" "2")] ++ [(set_attr "length" "2") ++ (set_attr "insn" "mvn")] + ) + + (define_insn "*notsi_compare0" +@@ -3701,7 +3957,8 @@ + (not:SI (match_dup 1)))] + "TARGET_32BIT" + "mvn%.\\t%0, %1" +- [(set_attr "conds" "set")] ++ [(set_attr "conds" "set") ++ (set_attr "insn" "mvn")] + ) + + (define_insn "*notsi_compare0_scratch" +@@ -3711,11 +3968,40 @@ + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_32BIT" + "mvn%.\\t%0, %1" +- [(set_attr "conds" "set")] ++ [(set_attr "conds" "set") ++ (set_attr "insn" "mvn")] + ) + + ;; Fixed <--> Floating conversion insns + ++(define_expand "floatsihf2" ++ [(set (match_operand:HF 0 "general_operand" "") ++ (float:HF (match_operand:SI 1 "general_operand" "")))] ++ "TARGET_EITHER" ++ " ++ { ++ rtx op1 = gen_reg_rtx (SFmode); ++ expand_float (op1, operands[1], 0); ++ op1 = convert_to_mode (HFmode, op1, 0); ++ emit_move_insn (operands[0], op1); ++ DONE; ++ }" ++) ++ ++(define_expand "floatdihf2" ++ [(set (match_operand:HF 0 "general_operand" "") ++ (float:HF (match_operand:DI 1 "general_operand" "")))] ++ "TARGET_EITHER" ++ " ++ { ++ rtx op1 = gen_reg_rtx (SFmode); ++ expand_float (op1, operands[1], 0); ++ op1 = convert_to_mode (HFmode, op1, 0); ++ emit_move_insn (operands[0], op1); ++ DONE; ++ }" ++) ++ + (define_expand "floatsisf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (float:SF (match_operand:SI 1 "s_register_operand" "")))] +@@ -3731,7 +4017,7 @@ + (define_expand "floatsidf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (float:DF (match_operand:SI 1 "s_register_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK) + { +@@ -3740,6 +4026,30 @@ + } + ") + ++(define_expand "fix_trunchfsi2" ++ [(set (match_operand:SI 0 "general_operand" "") ++ (fix:SI (fix:HF (match_operand:HF 1 "general_operand" ""))))] ++ "TARGET_EITHER" ++ " ++ { ++ rtx op1 = convert_to_mode (SFmode, operands[1], 0); ++ expand_fix (operands[0], op1, 0); ++ DONE; ++ }" ++) ++ ++(define_expand "fix_trunchfdi2" ++ [(set (match_operand:DI 0 "general_operand" "") ++ (fix:DI (fix:HF (match_operand:HF 1 "general_operand" ""))))] ++ "TARGET_EITHER" ++ " ++ { ++ rtx op1 = convert_to_mode (SFmode, operands[1], 0); ++ expand_fix (operands[0], op1, 0); ++ DONE; ++ }" ++) ++ + (define_expand "fix_truncsfsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" ""))))] +@@ -3759,7 +4069,7 @@ + (define_expand "fix_truncdfsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" ""))))] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK) + { +@@ -3776,9 +4086,25 @@ + [(set (match_operand:SF 0 "s_register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "s_register_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "" + ) ++ ++/* DFmode -> HFmode conversions have to go through SFmode. */ ++(define_expand "truncdfhf2" ++ [(set (match_operand:HF 0 "general_operand" "") ++ (float_truncate:HF ++ (match_operand:DF 1 "general_operand" "")))] ++ "TARGET_EITHER" ++ " ++ { ++ rtx op1; ++ op1 = convert_to_mode (SFmode, operands[1], 0); ++ op1 = convert_to_mode (HFmode, op1, 0); ++ emit_move_insn (operands[0], op1); ++ DONE; ++ }" ++) + + ;; Zero and sign extension instructions. + +@@ -3800,6 +4126,7 @@ + return \"mov%?\\t%R0, #0\"; + " + [(set_attr "length" "8") ++ (set_attr "insn" "mov") + (set_attr "predicable" "yes")] + ) + +@@ -3843,6 +4170,7 @@ + " + [(set_attr "length" "8") + (set_attr "shift" "1") ++ (set_attr "insn" "mov") + (set_attr "predicable" "yes")] + ) + +@@ -4123,6 +4451,28 @@ + "" + ) + ++(define_code_iterator ior_xor [ior xor]) ++ ++(define_split ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (ior_xor:SI (and:SI (ashift:SI ++ (match_operand:SI 1 "s_register_operand" "") ++ (match_operand:SI 2 "const_int_operand" "")) ++ (match_operand:SI 3 "const_int_operand" "")) ++ (zero_extend:SI ++ (match_operator 5 "subreg_lowpart_operator" ++ [(match_operand:SI 4 "s_register_operand" "")]))))] ++ "TARGET_32BIT ++ && (INTVAL (operands[3]) ++ == (GET_MODE_MASK (GET_MODE (operands[5])) ++ & (GET_MODE_MASK (GET_MODE (operands[5])) ++ << (INTVAL (operands[2])))))" ++ [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2)) ++ (match_dup 4))) ++ (set (match_dup 0) (zero_extend:SI (match_dup 5)))] ++ "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);" ++) ++ + (define_insn "*compareqi_eq0" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z (match_operand:QI 0 "s_register_operand" "r") +@@ -4639,9 +4989,24 @@ + (define_expand "extendsfdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (float_extend:DF (match_operand:SF 1 "s_register_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "" + ) ++ ++/* HFmode -> DFmode conversions have to go through SFmode. */ ++(define_expand "extendhfdf2" ++ [(set (match_operand:DF 0 "general_operand" "") ++ (float_extend:DF (match_operand:HF 1 "general_operand" "")))] ++ "TARGET_EITHER" ++ " ++ { ++ rtx op1; ++ op1 = convert_to_mode (SFmode, operands[1], 0); ++ op1 = convert_to_mode (DFmode, op1, 0); ++ emit_insn (gen_movdf (operands[0], op1)); ++ DONE; ++ }" ++) + + ;; Move insns (including loads and stores) + +@@ -4877,6 +5242,7 @@ + }" + [(set_attr "length" "4,4,6,2,2,6,4,4") + (set_attr "type" "*,*,*,load2,store2,load2,store2,*") ++ (set_attr "insn" "*,mov,*,*,*,*,*,mov") + (set_attr "pool_range" "*,*,*,*,*,1020,*,*")] + ) + +@@ -4903,14 +5269,6 @@ + optimize && can_create_pseudo_p ()); + DONE; + } +- +- if (TARGET_USE_MOVT && !target_word_relocations +- && GET_CODE (operands[1]) == SYMBOL_REF +- && !flag_pic && !arm_tls_referenced_p (operands[1])) +- { +- arm_emit_movpair (operands[0], operands[1]); +- DONE; +- } + } + else /* TARGET_THUMB1... */ + { +@@ -4984,18 +5342,9 @@ + (set_attr "length" "4")] + ) + +-(define_insn "*arm_movw" +- [(set (match_operand:SI 0 "nonimmediate_operand" "=r") +- (high:SI (match_operand:SI 1 "general_operand" "i")))] +- "TARGET_32BIT" +- "movw%?\t%0, #:lower16:%c1" +- [(set_attr "predicable" "yes") +- (set_attr "length" "4")] +-) +- + (define_insn "*arm_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m") +- (match_operand:SI 1 "general_operand" "rk, I,K,N,mi,rk"))] ++ (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk"))] + "TARGET_ARM && ! TARGET_IWMMXT + && !(TARGET_HARD_FLOAT && TARGET_VFP) + && ( register_operand (operands[0], SImode) +@@ -5008,6 +5357,7 @@ + ldr%?\\t%0, %1 + str%?\\t%1, %0" + [(set_attr "type" "*,*,*,*,load1,store1") ++ (set_attr "insn" "mov,mov,mvn,mov,*,*") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,*,*,*,4096,*") + (set_attr "neg_pool_range" "*,*,*,*,4084,*")] +@@ -5027,6 +5377,19 @@ + " + ) + ++(define_split ++ [(set (match_operand:SI 0 "arm_general_register_operand" "") ++ (match_operand:SI 1 "general_operand" ""))] ++ "TARGET_32BIT ++ && TARGET_USE_MOVT && GET_CODE (operands[1]) == SYMBOL_REF ++ && !flag_pic && !target_word_relocations ++ && !arm_tls_referenced_p (operands[1])" ++ [(clobber (const_int 0))] ++{ ++ arm_emit_movpair (operands[0], operands[1]); ++ DONE; ++}) ++ + (define_insn "*thumb1_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*lhk") + (match_operand:SI 1 "general_operand" "l, I,J,K,>,l,mi,l,*lhk"))] +@@ -5065,7 +5428,7 @@ + (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))] + " + { +- unsigned HOST_WIDE_INT val = INTVAL (operands[1]); ++ unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu; + unsigned HOST_WIDE_INT mask = 0xff; + int i; + +@@ -5627,6 +5990,7 @@ + ldr%(h%)\\t%0, %1\\t%@ movhi" + [(set_attr "type" "*,*,store1,load1") + (set_attr "predicable" "yes") ++ (set_attr "insn" "mov,mvn,*,*") + (set_attr "pool_range" "*,*,*,256") + (set_attr "neg_pool_range" "*,*,*,244")] + ) +@@ -5638,7 +6002,8 @@ + "@ + mov%?\\t%0, %1\\t%@ movhi + mvn%?\\t%0, #%B1\\t%@ movhi" +- [(set_attr "predicable" "yes")] ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "mov,mvn")] + ) + + (define_expand "thumb_movhi_clobber" +@@ -5769,6 +6134,7 @@ + ldr%(b%)\\t%0, %1 + str%(b%)\\t%1, %0" + [(set_attr "type" "*,*,load1,store1") ++ (set_attr "insn" "mov,mvn,*,*") + (set_attr "predicable" "yes")] + ) + +@@ -5787,9 +6153,111 @@ + mov\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "*,load1,store1,*,*,*") ++ (set_attr "insn" "*,*,*,mov,mov,mov") + (set_attr "pool_range" "*,32,*,*,*,*")] + ) + ++;; HFmode moves ++(define_expand "movhf" ++ [(set (match_operand:HF 0 "general_operand" "") ++ (match_operand:HF 1 "general_operand" ""))] ++ "TARGET_EITHER" ++ " ++ if (TARGET_32BIT) ++ { ++ if (GET_CODE (operands[0]) == MEM) ++ operands[1] = force_reg (HFmode, operands[1]); ++ } ++ else /* TARGET_THUMB1 */ ++ { ++ if (can_create_pseudo_p ()) ++ { ++ if (GET_CODE (operands[0]) != REG) ++ operands[1] = force_reg (HFmode, operands[1]); ++ } ++ } ++ " ++) ++ ++(define_insn "*arm32_movhf" ++ [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r") ++ (match_operand:HF 1 "general_operand" " m,r,r,F"))] ++ "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) ++ && ( s_register_operand (operands[0], HFmode) ++ || s_register_operand (operands[1], HFmode))" ++ "* ++ switch (which_alternative) ++ { ++ case 0: /* ARM register from memory */ ++ return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\"; ++ case 1: /* memory from ARM register */ ++ return \"str%(h%)\\t%1, %0\\t%@ __fp16\"; ++ case 2: /* ARM register from ARM register */ ++ return \"mov%?\\t%0, %1\\t%@ __fp16\"; ++ case 3: /* ARM register from constant */ ++ { ++ REAL_VALUE_TYPE r; ++ long bits; ++ rtx ops[4]; ++ ++ REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); ++ bits = real_to_target (NULL, &r, HFmode); ++ ops[0] = operands[0]; ++ ops[1] = GEN_INT (bits); ++ ops[2] = GEN_INT (bits & 0xff00); ++ ops[3] = GEN_INT (bits & 0x00ff); ++ ++ if (arm_arch_thumb2) ++ output_asm_insn (\"movw%?\\t%0, %1\", ops); ++ else ++ output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops); ++ return \"\"; ++ } ++ default: ++ gcc_unreachable (); ++ } ++ " ++ [(set_attr "conds" "unconditional") ++ (set_attr "type" "load1,store1,*,*") ++ (set_attr "length" "4,4,4,8") ++ (set_attr "predicable" "yes") ++ ] ++) ++ ++(define_insn "*thumb1_movhf" ++ [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h") ++ (match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))] ++ "TARGET_THUMB1 ++ && ( s_register_operand (operands[0], HFmode) ++ || s_register_operand (operands[1], HFmode))" ++ "* ++ switch (which_alternative) ++ { ++ case 1: ++ { ++ rtx addr; ++ gcc_assert (GET_CODE(operands[1]) == MEM); ++ addr = XEXP (operands[1], 0); ++ if (GET_CODE (addr) == LABEL_REF ++ || (GET_CODE (addr) == CONST ++ && GET_CODE (XEXP (addr, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF ++ && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)) ++ { ++ /* Constant pool entry. */ ++ return \"ldr\\t%0, %1\"; ++ } ++ return \"ldrh\\t%0, %1\"; ++ } ++ case 2: return \"strh\\t%1, %0\"; ++ default: return \"mov\\t%0, %1\"; ++ } ++ " ++ [(set_attr "length" "2") ++ (set_attr "type" "*,load1,store1,*,*") ++ (set_attr "pool_range" "*,1020,*,*,*")] ++) ++ + (define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] +@@ -5842,6 +6310,7 @@ + [(set_attr "length" "4,4,4") + (set_attr "predicable" "yes") + (set_attr "type" "*,load1,store1") ++ (set_attr "insn" "mov,*,*") + (set_attr "pool_range" "*,4096,*") + (set_attr "neg_pool_range" "*,4084,*")] + ) +@@ -6297,7 +6766,7 @@ + (match_operand:BLK 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")] +- "TARGET_EITHER" ++ "TARGET_EITHER && !low_irq_latency" + " + if (TARGET_32BIT) + { +@@ -7476,7 +7945,7 @@ + (define_expand "cmpdf" + [(match_operand:DF 0 "s_register_operand" "") + (match_operand:DF 1 "arm_float_compare_operand" "")] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + arm_compare_op0 = operands[0]; + arm_compare_op1 = operands[1]; +@@ -7507,7 +7976,11 @@ + (set_attr "shift" "1") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*arm_cmpsi_shiftsi_swp" +@@ -7522,7 +7995,11 @@ + (set_attr "shift" "1") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*arm_cmpsi_negshiftsi_si" +@@ -7537,7 +8014,11 @@ + [(set_attr "conds" "set") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + ;; Cirrus SF compare instruction +@@ -7879,77 +8360,77 @@ + (define_expand "seq" + [(set (match_operand:SI 0 "s_register_operand" "") + (eq:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (EQ, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "sne" + [(set (match_operand:SI 0 "s_register_operand" "") + (ne:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (NE, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "sgt" + [(set (match_operand:SI 0 "s_register_operand" "") + (gt:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (GT, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "sle" + [(set (match_operand:SI 0 "s_register_operand" "") + (le:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (LE, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "sge" + [(set (match_operand:SI 0 "s_register_operand" "") + (ge:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (GE, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "slt" + [(set (match_operand:SI 0 "s_register_operand" "") + (lt:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (LT, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "sgtu" + [(set (match_operand:SI 0 "s_register_operand" "") + (gtu:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (GTU, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "sleu" + [(set (match_operand:SI 0 "s_register_operand" "") + (leu:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (LEU, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "sgeu" + [(set (match_operand:SI 0 "s_register_operand" "") + (geu:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (GEU, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "sltu" + [(set (match_operand:SI 0 "s_register_operand" "") + (ltu:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (LTU, arm_compare_op0, arm_compare_op1);" + ) + + (define_expand "sunordered" + [(set (match_operand:SI 0 "s_register_operand" "") + (unordered:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (UNORDERED, arm_compare_op0, + arm_compare_op1);" + ) +@@ -7957,7 +8438,7 @@ + (define_expand "sordered" + [(set (match_operand:SI 0 "s_register_operand" "") + (ordered:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (ORDERED, arm_compare_op0, + arm_compare_op1);" + ) +@@ -7965,7 +8446,7 @@ + (define_expand "sungt" + [(set (match_operand:SI 0 "s_register_operand" "") + (ungt:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (UNGT, arm_compare_op0, + arm_compare_op1);" + ) +@@ -7973,7 +8454,7 @@ + (define_expand "sunge" + [(set (match_operand:SI 0 "s_register_operand" "") + (unge:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (UNGE, arm_compare_op0, + arm_compare_op1);" + ) +@@ -7981,7 +8462,7 @@ + (define_expand "sunlt" + [(set (match_operand:SI 0 "s_register_operand" "") + (unlt:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (UNLT, arm_compare_op0, + arm_compare_op1);" + ) +@@ -7989,7 +8470,7 @@ + (define_expand "sunle" + [(set (match_operand:SI 0 "s_register_operand" "") + (unle:SI (match_dup 1) (const_int 0)))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP) && !TARGET_NO_COND_EXEC" + "operands[1] = arm_gen_compare_reg (UNLE, arm_compare_op0, + arm_compare_op1);" + ) +@@ -8018,6 +8499,7 @@ + "TARGET_ARM" + "mov%D1\\t%0, #0\;mov%d1\\t%0, #1" + [(set_attr "conds" "use") ++ (set_attr "insn" "mov") + (set_attr "length" "8")] + ) + +@@ -8028,6 +8510,7 @@ + "TARGET_ARM" + "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" + [(set_attr "conds" "use") ++ (set_attr "insn" "mov") + (set_attr "length" "8")] + ) + +@@ -8038,6 +8521,7 @@ + "TARGET_ARM" + "mov%D1\\t%0, #0\;mvn%d1\\t%0, #1" + [(set_attr "conds" "use") ++ (set_attr "insn" "mov") + (set_attr "length" "8")] + ) + +@@ -8241,7 +8725,7 @@ + (if_then_else:SI (match_operand 1 "arm_comparison_operator" "") + (match_operand:SI 2 "arm_not_operand" "") + (match_operand:SI 3 "arm_not_operand" "")))] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_COND_EXEC" + " + { + enum rtx_code code = GET_CODE (operands[1]); +@@ -8260,7 +8744,7 @@ + (if_then_else:SF (match_operand 1 "arm_comparison_operator" "") + (match_operand:SF 2 "s_register_operand" "") + (match_operand:SF 3 "nonmemory_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_NO_COND_EXEC" + " + { + enum rtx_code code = GET_CODE (operands[1]); +@@ -8285,7 +8769,7 @@ + (if_then_else:DF (match_operand 1 "arm_comparison_operator" "") + (match_operand:DF 2 "s_register_operand" "") + (match_operand:DF 3 "arm_float_add_operand" "")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE) && !TARGET_NO_COND_EXEC" + " + { + enum rtx_code code = GET_CODE (operands[1]); +@@ -8317,7 +8801,8 @@ + mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 + mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2" + [(set_attr "length" "4,4,4,4,8,8,8,8") +- (set_attr "conds" "use")] ++ (set_attr "conds" "use") ++ (set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn")] + ) + + (define_insn "*movsfcc_soft_insn" +@@ -8330,7 +8815,8 @@ + "@ + mov%D3\\t%0, %2 + mov%d3\\t%0, %1" +- [(set_attr "conds" "use")] ++ [(set_attr "conds" "use") ++ (set_attr "insn" "mov")] + ) + + +@@ -8733,7 +9219,7 @@ + [(match_operand 1 "cc_register" "") (const_int 0)]) + (return) + (pc)))] +- "TARGET_ARM && USE_RETURN_INSN (TRUE)" ++ "TARGET_ARM && USE_RETURN_INSN (TRUE) && !TARGET_NO_COND_EXEC" + "* + { + if (arm_ccfsm_state == 2) +@@ -8754,7 +9240,7 @@ + [(match_operand 1 "cc_register" "") (const_int 0)]) + (pc) + (return)))] +- "TARGET_ARM && USE_RETURN_INSN (TRUE)" ++ "TARGET_ARM && USE_RETURN_INSN (TRUE) && !TARGET_NO_COND_EXEC" + "* + { + if (arm_ccfsm_state == 2) +@@ -9072,7 +9558,11 @@ + (set_attr "shift" "4") + (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_split +@@ -9110,7 +9600,11 @@ + (set_attr "shift" "4") + (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*arith_shiftsi_compare0_scratch" +@@ -9128,7 +9622,11 @@ + (set_attr "shift" "4") + (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*sub_shiftsi" +@@ -9143,7 +9641,11 @@ + (set_attr "shift" "3") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*sub_shiftsi_compare0" +@@ -9163,7 +9665,11 @@ + (set_attr "shift" "3") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + (define_insn "*sub_shiftsi_compare0_scratch" +@@ -9181,7 +9687,11 @@ + (set_attr "shift" "3") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)))] + ) + + +@@ -9194,6 +9704,7 @@ + "TARGET_ARM" + "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1" + [(set_attr "conds" "use") ++ (set_attr "insn" "mov") + (set_attr "length" "8")] + ) + +@@ -9207,6 +9718,7 @@ + orr%d2\\t%0, %1, #1 + mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1" + [(set_attr "conds" "use") ++ (set_attr "insn" "orr") + (set_attr "length" "4,8")] + ) + +@@ -9216,7 +9728,7 @@ + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rI,L")])) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_COND_EXEC" + "* + if (operands[3] == const0_rtx) + { +@@ -9271,6 +9783,7 @@ + return \"\"; + " + [(set_attr "conds" "use") ++ (set_attr "insn" "mov") + (set_attr "length" "4,4,8")] + ) + +@@ -9282,7 +9795,7 @@ + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "s_register_operand" "0,?r")])) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "* + if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx) + return \"%i5\\t%0, %1, %2, lsr #31\"; +@@ -9678,7 +10191,7 @@ + (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_COND_EXEC" + "* + if (GET_CODE (operands[5]) == LT + && (operands[4] == const0_rtx)) +@@ -9744,7 +10257,7 @@ + (match_operand:SI 3 "arm_add_operand" "rIL,rIL")) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +@@ -9780,7 +10293,7 @@ + (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rIL,rIL")))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +@@ -9818,7 +10331,7 @@ + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12")] +@@ -9968,7 +10481,7 @@ + (not:SI + (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +@@ -9987,6 +10500,7 @@ + mov%d4\\t%0, %1\;mvn%D4\\t%0, %2 + mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2" + [(set_attr "conds" "use") ++ (set_attr "insn" "mvn") + (set_attr "length" "4,8,8")] + ) + +@@ -10000,7 +10514,7 @@ + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +@@ -10019,6 +10533,7 @@ + mov%D4\\t%0, %1\;mvn%d4\\t%0, %2 + mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2" + [(set_attr "conds" "use") ++ (set_attr "insn" "mvn") + (set_attr "length" "4,8,8")] + ) + +@@ -10033,7 +10548,7 @@ + (match_operand:SI 3 "arm_rhs_operand" "rM,rM")]) + (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +@@ -10055,10 +10570,23 @@ + mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4" + [(set_attr "conds" "use") + (set_attr "shift" "2") +- (set_attr "length" "4,8,8") ++ (set_attr "insn" "mov") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set_attr_alternative "length" ++ [(if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)) ++ (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 12) ++ (const_int 8)) ++ (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 12) ++ (const_int 8))])] + ) + + (define_insn "*ifcompare_move_shift" +@@ -10072,7 +10600,7 @@ + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rM,rM")]))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +@@ -10094,10 +10622,24 @@ + mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4" + [(set_attr "conds" "use") + (set_attr "shift" "2") +- (set_attr "length" "4,8,8") ++ (set_attr "insn" "mov") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set_attr_alternative "length" ++ [(if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 8) ++ (const_int 4)) ++ (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 12) ++ (const_int 8)) ++ (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 12) ++ (const_int 8))]) ++ (set_attr "insn" "mov")] + ) + + (define_insn "*ifcompare_shift_shift" +@@ -10113,7 +10655,7 @@ + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rM")]))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12")] +@@ -10134,12 +10676,16 @@ + "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7" + [(set_attr "conds" "use") + (set_attr "shift" "1") +- (set_attr "length" "8") ++ (set_attr "insn" "mov") + (set (attr "type") (if_then_else + (and (match_operand 2 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_string "alu_shift") +- (const_string "alu_shift_reg")))] ++ (const_string "alu_shift_reg"))) ++ (set (attr "length") (if_then_else (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "fix_janus" "yes")) ++ (const_int 16) ++ (const_int 8)))] + ) + + (define_insn "*ifcompare_not_arith" +@@ -10153,7 +10699,7 @@ + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12")] +@@ -10171,6 +10717,7 @@ + "TARGET_ARM" + "mvn%d5\\t%0, %1\;%I6%D5\\t%0, %2, %3" + [(set_attr "conds" "use") ++ (set_attr "insn" "mvn") + (set_attr "length" "8")] + ) + +@@ -10185,7 +10732,7 @@ + (match_operand:SI 3 "arm_rhs_operand" "rI")]) + (not:SI (match_operand:SI 1 "s_register_operand" "r")))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12")] +@@ -10203,6 +10750,7 @@ + "TARGET_ARM" + "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3" + [(set_attr "conds" "use") ++ (set_attr "insn" "mvn") + (set_attr "length" "8")] + ) + +@@ -10215,7 +10763,7 @@ + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +@@ -10246,7 +10794,7 @@ + (match_operand:SI 1 "arm_not_operand" "0,?rIK") + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM" ++ "TARGET_ARM && !TARGET_NO_SINGLE_COND_EXEC" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +@@ -10614,7 +11162,7 @@ + (match_dup 0) + (match_operand 4 "" ""))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM && reload_completed" ++ "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC" + [(set (match_dup 5) (match_dup 6)) + (cond_exec (match_dup 7) + (set (match_dup 0) (match_dup 4)))] +@@ -10642,7 +11190,7 @@ + (match_operand 4 "" "") + (match_dup 0))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM && reload_completed" ++ "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC" + [(set (match_dup 5) (match_dup 6)) + (cond_exec (match_op_dup 1 [(match_dup 5) (const_int 0)]) + (set (match_dup 0) (match_dup 4)))] +@@ -10663,7 +11211,7 @@ + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM && reload_completed" ++ "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC" + [(set (match_dup 6) (match_dup 7)) + (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)]) + (set (match_dup 0) (match_dup 4))) +@@ -10695,7 +11243,7 @@ + (not:SI + (match_operand:SI 5 "s_register_operand" "")))) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_ARM && reload_completed" ++ "TARGET_ARM && reload_completed && !TARGET_NO_SINGLE_COND_EXEC" + [(set (match_dup 6) (match_dup 7)) + (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)]) + (set (match_dup 0) (match_dup 4))) +@@ -10730,6 +11278,7 @@ + mvn%D4\\t%0, %2 + mov%d4\\t%0, %1\;mvn%D4\\t%0, %2" + [(set_attr "conds" "use") ++ (set_attr "insn" "mvn") + (set_attr "length" "4,8")] + ) + +@@ -10864,6 +11413,24 @@ + " + ) + ++(define_insn "align_16" ++ [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN16)] ++ "TARGET_EITHER" ++ "* ++ assemble_align (128); ++ return \"\"; ++ " ++) ++ ++(define_insn "align_32" ++ [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN32)] ++ "TARGET_EITHER" ++ "* ++ assemble_align (256); ++ return \"\"; ++ " ++) ++ + (define_insn "consttable_end" + [(unspec_volatile [(const_int 0)] VUNSPEC_POOL_END)] + "TARGET_EITHER" +@@ -10890,6 +11457,7 @@ + "TARGET_THUMB1" + "* + making_const_table = TRUE; ++ gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT); + assemble_integer (operands[0], 2, BITS_PER_WORD, 1); + assemble_zeros (2); + return \"\"; +@@ -10902,19 +11470,30 @@ + "TARGET_EITHER" + "* + { ++ rtx x = operands[0]; + making_const_table = TRUE; +- switch (GET_MODE_CLASS (GET_MODE (operands[0]))) ++ switch (GET_MODE_CLASS (GET_MODE (x))) + { + case MODE_FLOAT: +- { +- REAL_VALUE_TYPE r; +- REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]); +- assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD); +- break; +- } ++ if (GET_MODE (x) == HFmode) ++ arm_emit_fp16_const (x); ++ else ++ { ++ REAL_VALUE_TYPE r; ++ REAL_VALUE_FROM_CONST_DOUBLE (r, x); ++ assemble_real (r, GET_MODE (x), BITS_PER_WORD); ++ } ++ break; + default: +- assemble_integer (operands[0], 4, BITS_PER_WORD, 1); +- mark_symbol_refs_as_used (operands[0]); ++ /* XXX: Sometimes gcc does something really dumb and ends up with ++ a HIGH in a constant pool entry, usually because it's trying to ++ load into a VFP register. We know this will always be used in ++ combination with a LO_SUM which ignores the high bits, so just ++ strip off the HIGH. */ ++ if (GET_CODE (x) == HIGH) ++ x = XEXP (x, 0); ++ assemble_integer (x, 4, BITS_PER_WORD, 1); ++ mark_symbol_refs_as_used (x); + break; + } + return \"\"; +@@ -11008,6 +11587,28 @@ + [(set_attr "predicable" "yes") + (set_attr "insn" "clz")]) + ++(define_insn "rbitsi2" ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))] ++ "TARGET_32BIT && arm_arch_thumb2" ++ "rbit%?\\t%0, %1" ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "clz")]) ++ ++(define_expand "ctzsi2" ++ [(set (match_operand:SI 0 "s_register_operand" "") ++ (ctz:SI (match_operand:SI 1 "s_register_operand" "")))] ++ "TARGET_32BIT && arm_arch_thumb2" ++ " ++ { ++ rtx tmp = gen_reg_rtx (SImode); ++ emit_insn (gen_rbitsi2 (tmp, operands[1])); ++ emit_insn (gen_clzsi2 (operands[0], tmp)); ++ } ++ DONE; ++ " ++) ++ + ;; V5E instructions. + + (define_insn "prefetch" +@@ -11017,13 +11618,15 @@ + "TARGET_32BIT && arm_arch5e" + "pld\\t%a0") + +-;; General predication pattern ++;; General predication pattern. ++;; Conditional branches are available as both arm_cond_branch and ++;; predicated arm_jump, so it doesn't matter if we disable the latter. + + (define_cond_exec + [(match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") + (const_int 0)])] +- "TARGET_32BIT" ++ "TARGET_32BIT && !TARGET_NO_SINGLE_COND_EXEC" + "" + ) + +diff -Nur a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def +--- a/gcc/config/arm/arm-modes.def 2007-08-02 12:49:31.000000000 +0200 ++++ b/gcc/config/arm/arm-modes.def 2010-01-25 09:50:28.985687200 +0100 +@@ -25,6 +25,11 @@ + FIXME What format is this? */ + FLOAT_MODE (XF, 12, 0); + ++/* Half-precision floating point */ ++FLOAT_MODE (HF, 2, 0); ++ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) ++ ? &arm_half_format : &ieee_half_format)); ++ + /* CCFPEmode should be used with floating inequalities, + CCFPmode should be used with floating equalities. + CC_NOOVmode should be used with SImode integer equalities. +@@ -62,6 +67,4 @@ + INT_MODE (EI, 24); + INT_MODE (OI, 32); + INT_MODE (CI, 48); +-/* ??? This should actually have 512 bits but the precision only has 9 +- bits. */ +-FRACTIONAL_INT_MODE (XI, 511, 64); ++INT_MODE (XI, 64); +diff -Nur a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h +--- a/gcc/config/arm/arm_neon.h 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/arm/arm_neon.h 2010-01-25 09:50:28.985687200 +0100 +@@ -36,7 +36,11 @@ + extern "C" { + #endif + ++#if defined (__vxworks) && defined (_WRS_KERNEL) ++#include ++#else + #include ++#endif + + typedef __builtin_neon_qi int8x8_t __attribute__ ((__vector_size__ (8))); + typedef __builtin_neon_hi int16x4_t __attribute__ ((__vector_size__ (8))); +@@ -61,7 +65,7 @@ + typedef __builtin_neon_usi uint32x4_t __attribute__ ((__vector_size__ (16))); + typedef __builtin_neon_udi uint64x2_t __attribute__ ((__vector_size__ (16))); + +-typedef __builtin_neon_sf float32_t; ++typedef float float32_t; + typedef __builtin_neon_poly8 poly8_t; + typedef __builtin_neon_poly16 poly16_t; + +@@ -5085,7 +5089,7 @@ + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c) + { +- return (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c); ++ return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -5151,7 +5155,7 @@ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c) + { +- return (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c); ++ return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +@@ -5283,7 +5287,7 @@ + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vdup_n_f32 (float32_t __a) + { +- return (float32x2_t)__builtin_neon_vdup_nv2sf (__a); ++ return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -5349,7 +5353,7 @@ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vdupq_n_f32 (float32_t __a) + { +- return (float32x4_t)__builtin_neon_vdup_nv4sf (__a); ++ return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +@@ -5415,7 +5419,7 @@ + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vmov_n_f32 (float32_t __a) + { +- return (float32x2_t)__builtin_neon_vdup_nv2sf (__a); ++ return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -5481,7 +5485,7 @@ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vmovq_n_f32 (float32_t __a) + { +- return (float32x4_t)__builtin_neon_vdup_nv4sf (__a); ++ return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +@@ -6591,7 +6595,7 @@ + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vmul_n_f32 (float32x2_t __a, float32_t __b) + { +- return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 3); ++ return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +@@ -6621,7 +6625,7 @@ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vmulq_n_f32 (float32x4_t __a, float32_t __b) + { +- return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 3); ++ return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +@@ -6735,7 +6739,7 @@ + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) + { +- return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 3); ++ return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +@@ -6765,7 +6769,7 @@ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) + { +- return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 3); ++ return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +@@ -6831,7 +6835,7 @@ + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) + { +- return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 3); ++ return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +@@ -6861,7 +6865,7 @@ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) + { +- return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 3); ++ return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +@@ -7851,7 +7855,7 @@ + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vld1_f32 (const float32_t * __a) + { +- return (float32x2_t)__builtin_neon_vld1v2sf (__a); ++ return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -7917,7 +7921,7 @@ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vld1q_f32 (const float32_t * __a) + { +- return (float32x4_t)__builtin_neon_vld1v4sf (__a); ++ return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +@@ -7977,7 +7981,7 @@ + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c) + { +- return (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c); ++ return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -8043,7 +8047,7 @@ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c) + { +- return (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c); ++ return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +@@ -8109,7 +8113,7 @@ + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vld1_dup_f32 (const float32_t * __a) + { +- return (float32x2_t)__builtin_neon_vld1_dupv2sf (__a); ++ return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +@@ -8175,7 +8179,7 @@ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vld1q_dup_f32 (const float32_t * __a) + { +- return (float32x4_t)__builtin_neon_vld1_dupv4sf (__a); ++ return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +@@ -8247,7 +8251,7 @@ + __extension__ static __inline void __attribute__ ((__always_inline__)) + vst1_f32 (float32_t * __a, float32x2_t __b) + { +- __builtin_neon_vst1v2sf (__a, __b); ++ __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -8313,7 +8317,7 @@ + __extension__ static __inline void __attribute__ ((__always_inline__)) + vst1q_f32 (float32_t * __a, float32x4_t __b) + { +- __builtin_neon_vst1v4sf (__a, __b); ++ __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -8373,7 +8377,7 @@ + __extension__ static __inline void __attribute__ ((__always_inline__)) + vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c) + { +- __builtin_neon_vst1_lanev2sf (__a, __b, __c); ++ __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -8439,7 +8443,7 @@ + __extension__ static __inline void __attribute__ ((__always_inline__)) + vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c) + { +- __builtin_neon_vst1_lanev4sf (__a, __b, __c); ++ __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -8512,7 +8516,7 @@ + vld2_f32 (const float32_t * __a) + { + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; +- __rv.__o = __builtin_neon_vld2v2sf (__a); ++ __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; + } + +@@ -8600,7 +8604,7 @@ + vld2q_f32 (const float32_t * __a) + { + union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; +- __rv.__o = __builtin_neon_vld2v4sf (__a); ++ __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; + } + +@@ -8676,7 +8680,7 @@ + { + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; +- __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c); ++ __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; + } + +@@ -8748,7 +8752,7 @@ + { + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; +- __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c); ++ __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; + } + +@@ -8807,7 +8811,7 @@ + vld2_dup_f32 (const float32_t * __a) + { + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; +- __rv.__o = __builtin_neon_vld2_dupv2sf (__a); ++ __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; + } + +@@ -8892,7 +8896,7 @@ + vst2_f32 (float32_t * __a, float32x2x2_t __b) + { + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; +- __builtin_neon_vst2v2sf (__a, __bu.__o); ++ __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -8969,7 +8973,7 @@ + vst2q_f32 (float32_t * __a, float32x4x2_t __b) + { + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; +- __builtin_neon_vst2v4sf (__a, __bu.__o); ++ __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -9032,7 +9036,7 @@ + vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c) + { + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; +- __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c); ++ __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -9088,7 +9092,7 @@ + vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c) + { + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; +- __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c); ++ __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -9140,7 +9144,7 @@ + vld3_f32 (const float32_t * __a) + { + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; +- __rv.__o = __builtin_neon_vld3v2sf (__a); ++ __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; + } + +@@ -9228,7 +9232,7 @@ + vld3q_f32 (const float32_t * __a) + { + union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; +- __rv.__o = __builtin_neon_vld3v4sf (__a); ++ __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; + } + +@@ -9304,7 +9308,7 @@ + { + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; +- __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c); ++ __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; + } + +@@ -9376,7 +9380,7 @@ + { + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; +- __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c); ++ __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; + } + +@@ -9435,7 +9439,7 @@ + vld3_dup_f32 (const float32_t * __a) + { + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; +- __rv.__o = __builtin_neon_vld3_dupv2sf (__a); ++ __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; + } + +@@ -9520,7 +9524,7 @@ + vst3_f32 (float32_t * __a, float32x2x3_t __b) + { + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; +- __builtin_neon_vst3v2sf (__a, __bu.__o); ++ __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -9597,7 +9601,7 @@ + vst3q_f32 (float32_t * __a, float32x4x3_t __b) + { + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; +- __builtin_neon_vst3v4sf (__a, __bu.__o); ++ __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -9660,7 +9664,7 @@ + vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c) + { + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; +- __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c); ++ __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -9716,7 +9720,7 @@ + vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c) + { + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; +- __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c); ++ __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -9768,7 +9772,7 @@ + vld4_f32 (const float32_t * __a) + { + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; +- __rv.__o = __builtin_neon_vld4v2sf (__a); ++ __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; + } + +@@ -9856,7 +9860,7 @@ + vld4q_f32 (const float32_t * __a) + { + union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; +- __rv.__o = __builtin_neon_vld4v4sf (__a); ++ __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; + } + +@@ -9932,7 +9936,7 @@ + { + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; +- __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c); ++ __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; + } + +@@ -10004,7 +10008,7 @@ + { + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; +- __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c); ++ __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; + } + +@@ -10063,7 +10067,7 @@ + vld4_dup_f32 (const float32_t * __a) + { + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; +- __rv.__o = __builtin_neon_vld4_dupv2sf (__a); ++ __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; + } + +@@ -10148,7 +10152,7 @@ + vst4_f32 (float32_t * __a, float32x2x4_t __b) + { + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; +- __builtin_neon_vst4v2sf (__a, __bu.__o); ++ __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -10225,7 +10229,7 @@ + vst4q_f32 (float32_t * __a, float32x4x4_t __b) + { + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; +- __builtin_neon_vst4v4sf (__a, __bu.__o); ++ __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -10288,7 +10292,7 @@ + vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c) + { + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; +- __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c); ++ __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +@@ -10344,7 +10348,7 @@ + vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c) + { + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; +- __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c); ++ __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); + } + + __extension__ static __inline void __attribute__ ((__always_inline__)) +diff -Nur a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt +--- a/gcc/config/arm/arm.opt 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/arm.opt 2010-01-25 09:50:28.985687200 +0100 +@@ -78,6 +78,10 @@ + mfp= + Target RejectNegative Joined Undocumented Var(target_fpe_name) + ++mfp16-format= ++Target RejectNegative Joined Var(target_fp16_format_name) ++Specify the __fp16 floating-point format ++ + ;; Now ignored. + mfpe + Target RejectNegative Mask(FPE) Undocumented +@@ -93,6 +97,10 @@ + Target RejectNegative + Alias for -mfloat-abi=hard + ++mfix-janus-2cc ++Target Report Mask(FIX_JANUS) ++Work around hardware errata for Avalent Janus 2CC cores. ++ + mlittle-endian + Target Report RejectNegative InverseMask(BIG_END) + Assume target CPU is configured as little endian +@@ -101,6 +109,10 @@ + Target Report Mask(LONG_CALLS) + Generate call insns as indirect calls, if necessary + ++mmarvell-div ++Target Report Mask(MARVELL_DIV) ++Generate hardware integer division instructions supported by some Marvell cores. ++ + mpic-register= + Target RejectNegative Joined Var(arm_pic_register_string) + Specify the register to be used for PIC addressing +@@ -157,6 +169,10 @@ + Target Report Mask(NEON_VECTORIZE_QUAD) + Use Neon quad-word (rather than double-word) registers for vectorization + ++mlow-irq-latency ++Target Report Var(low_irq_latency) ++Try to reduce interrupt latency of the generated code ++ + mword-relocations + Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) + Only generate absolute relocations on word sized values. +diff -Nur a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h +--- a/gcc/config/arm/arm-protos.h 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/arm-protos.h 2010-01-25 09:50:28.985687200 +0100 +@@ -88,7 +88,7 @@ + + extern int cirrus_memory_offset (rtx); + extern int arm_coproc_mem_operand (rtx, bool); +-extern int neon_vector_mem_operand (rtx, bool); ++extern int neon_vector_mem_operand (rtx, int); + extern int neon_struct_mem_operand (rtx); + extern int arm_no_early_store_addr_dep (rtx, rtx); + extern int arm_no_early_alu_shift_dep (rtx, rtx); +@@ -144,6 +144,7 @@ + extern int arm_debugger_arg_offset (int, rtx); + extern bool arm_is_long_call_p (tree); + extern int arm_emit_vector_const (FILE *, rtx); ++extern void arm_emit_fp16_const (rtx c); + extern const char * arm_output_load_gr (rtx *); + extern const char *vfp_output_fstmd (rtx *); + extern void arm_set_return_address (rtx, rtx); +@@ -154,13 +155,15 @@ + + #if defined TREE_CODE + extern rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int); ++extern void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, ++ tree, bool); + extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); + extern bool arm_pad_arg_upward (enum machine_mode, const_tree); + extern bool arm_pad_reg_upward (enum machine_mode, tree, int); + extern bool arm_needs_doubleword_align (enum machine_mode, tree); +-extern rtx arm_function_value(const_tree, const_tree); + #endif + extern int arm_apply_result_size (void); ++extern rtx aapcs_libcall_value (enum machine_mode); + + #endif /* RTX_CODE */ + +diff -Nur a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md +--- a/gcc/config/arm/arm-tune.md 2009-07-22 09:43:59.000000000 +0200 ++++ b/gcc/config/arm/arm-tune.md 2010-01-25 09:50:28.985687200 +0100 +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from arm-cores.def + (define_attr "tune" +- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1" ++ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,marvell_f,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa5,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1,cortexm0" + (const (symbol_ref "arm_tune"))) +diff -Nur a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h +--- a/gcc/config/arm/bpabi.h 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/bpabi.h 2010-01-25 09:50:28.985687200 +0100 +@@ -30,7 +30,7 @@ + + /* Section 4.1 of the AAPCS requires the use of VFP format. */ + #undef FPUTYPE_DEFAULT +-#define FPUTYPE_DEFAULT FPUTYPE_VFP ++#define FPUTYPE_DEFAULT "vfp" + + /* TARGET_BIG_ENDIAN_DEFAULT is set in + config.gcc for big endian configurations. */ +@@ -53,6 +53,8 @@ + + #define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}" + ++#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5|mcpu=cortex-a8|mcpu=cortex-a9:%{!r:--be8}}}" ++ + /* Tell the assembler to build BPABI binaries. */ + #undef SUBTARGET_EXTRA_ASM_SPEC + #define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC +@@ -65,7 +67,7 @@ + #define BPABI_LINK_SPEC \ + "%{mbig-endian:-EB} %{mlittle-endian:-EL} " \ + "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} " \ +- "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC ++ "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC + + #undef LINK_SPEC + #define LINK_SPEC BPABI_LINK_SPEC +@@ -90,16 +92,22 @@ + #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (muldi3, lmul) + #endif + #ifdef L_fixdfdi +-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixdfdi, d2lz) ++#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixdfdi, d2lz) \ ++ extern DWtype __fixdfdi (DFtype) __attribute__((pcs("aapcs"))); \ ++ extern UDWtype __fixunsdfdi (DFtype) __asm__("__aeabi_d2ulz") __attribute__((pcs("aapcs"))); + #endif + #ifdef L_fixunsdfdi +-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfdi, d2ulz) ++#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfdi, d2ulz) \ ++ extern UDWtype __fixunsdfdi (DFtype) __attribute__((pcs("aapcs"))); + #endif + #ifdef L_fixsfdi +-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixsfdi, f2lz) ++#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixsfdi, f2lz) \ ++ extern DWtype __fixsfdi (SFtype) __attribute__((pcs("aapcs"))); \ ++ extern UDWtype __fixunssfdi (SFtype) __asm__("__aeabi_f2ulz") __attribute__((pcs("aapcs"))); + #endif + #ifdef L_fixunssfdi +-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfdi, f2ulz) ++#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfdi, f2ulz) \ ++ extern UDWtype __fixunssfdi (SFtype) __attribute__((pcs("aapcs"))); + #endif + #ifdef L_floatdidf + #define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatdidf, l2d) +diff -Nur a/gcc/config/arm/bpabi.S b/gcc/config/arm/bpabi.S +--- a/gcc/config/arm/bpabi.S 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/arm/bpabi.S 2010-01-25 09:50:28.985687200 +0100 +@@ -64,20 +64,69 @@ + + #endif /* L_aeabi_ulcmp */ + ++.macro test_div_by_zero signed ++/* Tail-call to divide-by-zero handlers which may be overridden by the user, ++ so unwinding works properly. */ ++#if defined(__thumb2__) ++ cbnz yyh, 1f ++ cbnz yyl, 1f ++ cmp xxh, #0 ++ do_it eq ++ cmpeq xxl, #0 ++ .ifc \signed, unsigned ++ beq 2f ++ mov xxh, #0xffffffff ++ mov xxl, xxh ++2: ++ .else ++ do_it lt, t ++ movlt xxl, #0 ++ movlt xxh, #0x80000000 ++ do_it gt, t ++ movgt xxh, #0x7fffffff ++ movgt xxl, #0xffffffff ++ .endif ++ b SYM (__aeabi_ldiv0) __PLT__ ++1: ++#else ++ /* Note: Thumb-1 code calls via an ARM shim on processors which ++ support ARM mode. */ ++ cmp yyh, #0 ++ cmpeq yyl, #0 ++ bne 2f ++ cmp xxh, #0 ++ cmpeq xxl, #0 ++ .ifc \signed, unsigned ++ movne xxh, #0xffffffff ++ movne xxl, #0xffffffff ++ .else ++ movlt xxh, #0x80000000 ++ movlt xxl, #0 ++ movgt xxh, #0x7fffffff ++ movgt xxl, #0xffffffff ++ .endif ++ b SYM (__aeabi_ldiv0) __PLT__ ++2: ++#endif ++.endm ++ + #ifdef L_aeabi_ldivmod + + ARM_FUNC_START aeabi_ldivmod ++ test_div_by_zero signed ++ + sub sp, sp, #8 +-#if defined(__thumb2__) ++/* Low latency and Thumb-2 do_push implementations can't push sp directly. */ ++#if defined(__thumb2__) || defined(__irq_low_latency__) + mov ip, sp +- push {ip, lr} ++ do_push (ip, lr) + #else +- do_push {sp, lr} ++ stmfd sp!, {sp, lr} + #endif + bl SYM(__gnu_ldivmod_helper) __PLT__ + ldr lr, [sp, #4] + add sp, sp, #8 +- do_pop {r2, r3} ++ do_pop (r2, r3) + RET + + #endif /* L_aeabi_ldivmod */ +@@ -85,17 +134,20 @@ + #ifdef L_aeabi_uldivmod + + ARM_FUNC_START aeabi_uldivmod ++ test_div_by_zero unsigned ++ + sub sp, sp, #8 +-#if defined(__thumb2__) ++/* Low latency and Thumb-2 do_push implementations can't push sp directly. */ ++#if defined(__thumb2__) || defined(__irq_low_latency__) + mov ip, sp +- push {ip, lr} ++ do_push (ip, lr) + #else +- do_push {sp, lr} ++ stmfd sp!, {sp, lr} + #endif + bl SYM(__gnu_uldivmod_helper) __PLT__ + ldr lr, [sp, #4] + add sp, sp, #8 +- do_pop {r2, r3} ++ do_pop (r2, r3) + RET + + #endif /* L_aeabi_divmod */ +diff -Nur a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S +--- a/gcc/config/arm/bpabi-v6m.S 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/arm/bpabi-v6m.S 2010-01-25 09:50:28.985687200 +0100 +@@ -69,9 +69,52 @@ + + #endif /* L_aeabi_ulcmp */ + ++.macro test_div_by_zero signed ++ cmp yyh, #0 ++ bne 7f ++ cmp yyl, #0 ++ bne 7f ++ cmp xxh, #0 ++ bne 2f ++ cmp xxl, #0 ++2: ++ .ifc \signed, unsigned ++ beq 3f ++ mov xxh, #0 ++ mvn xxh, xxh @ 0xffffffff ++ mov xxl, xxh ++3: ++ .else ++ beq 5f ++ blt 6f ++ mov xxl, #0 ++ mvn xxl, xxl @ 0xffffffff ++ lsr xxh, xxl, #1 @ 0x7fffffff ++ b 5f ++6: mov xxh, #0x80 ++ lsl xxh, xxh, #24 @ 0x80000000 ++ mov xxl, #0 ++5: ++ .endif ++ @ tailcalls are tricky on v6-m. ++ push {r0, r1, r2} ++ ldr r0, 1f ++ adr r1, 1f ++ add r0, r1 ++ str r0, [sp, #8] ++ @ We know we are not on armv4t, so pop pc is safe. ++ pop {r0, r1, pc} ++ .align 2 ++1: ++ .word __aeabi_ldiv0 - 1b ++7: ++.endm ++ + #ifdef L_aeabi_ldivmod + + FUNC_START aeabi_ldivmod ++ test_div_by_zero signed ++ + push {r0, r1} + mov r0, sp + push {r0, lr} +@@ -89,6 +132,8 @@ + #ifdef L_aeabi_uldivmod + + FUNC_START aeabi_uldivmod ++ test_div_by_zero unsigned ++ + push {r0, r1} + mov r0, sp + push {r0, lr} +diff -Nur a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md +--- a/gcc/config/arm/constraints.md 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/constraints.md 2010-01-25 09:50:28.985687200 +0100 +@@ -25,14 +25,15 @@ + ;; In ARM state, 'l' is an alias for 'r' + + ;; The following normal constraints have been used: +-;; in ARM/Thumb-2 state: G, H, I, J, K, L, M ++;; in ARM/Thumb-2 state: G, H, I, j, J, K, L, M + ;; in Thumb-1 state: I, J, K, L, M, N, O + + ;; The following multi-letter normal constraints have been used: +-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv ++;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy ++;; in Thumb-1 state: Pa, Pb + + ;; The following memory constraints have been used: +-;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Us ++;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us + ;; in ARM state: Uq + + +@@ -65,6 +66,13 @@ + (define_register_constraint "h" "TARGET_THUMB ? HI_REGS : NO_REGS" + "In Thumb state the core registers @code{r8}-@code{r15}.") + ++(define_constraint "j" ++ "A constant suitable for a MOVW instruction. (ARM/Thumb-2)" ++ (and (match_test "TARGET_32BIT && arm_arch_thumb2") ++ (ior (match_code "high") ++ (and (match_code "const_int") ++ (match_test "(ival & 0xffff0000) == 0"))))) ++ + (define_register_constraint "k" "STACK_REG" + "@internal The stack register.") + +@@ -116,11 +124,9 @@ + : ((ival >= 0 && ival <= 1020) && ((ival & 3) == 0))"))) + + (define_constraint "N" +- "In ARM/Thumb-2 state a constant suitable for a MOVW instruction. +- In Thumb-1 state a constant in the range 0-31." ++ "Thumb-1 state a constant in the range 0-31." + (and (match_code "const_int") +- (match_test "TARGET_32BIT ? arm_arch_thumb2 && ((ival & 0xffff0000) == 0) +- : (ival >= 0 && ival <= 31)"))) ++ (match_test "!TARGET_32BIT && (ival >= 0 && ival <= 31)"))) + + (define_constraint "O" + "In Thumb-1 state a constant that is a multiple of 4 in the range +@@ -129,6 +135,18 @@ + (match_test "TARGET_THUMB1 && ival >= -508 && ival <= 508 + && ((ival & 3) == 0)"))) + ++(define_constraint "Pa" ++ "@internal In Thumb-1 state a constant in the range -510 to +510" ++ (and (match_code "const_int") ++ (match_test "TARGET_THUMB1 && ival >= -510 && ival <= 510 ++ && (ival > 255 || ival < -255)"))) ++ ++(define_constraint "Pb" ++ "@internal In Thumb-1 state a constant in the range -262 to +262" ++ (and (match_code "const_int") ++ (match_test "TARGET_THUMB1 && ival >= -262 && ival <= 262 ++ && (ival > 255 || ival < -255)"))) ++ + (define_constraint "G" + "In ARM/Thumb-2 state a valid FPA immediate constant." + (and (match_code "const_double") +@@ -189,10 +207,17 @@ + (define_constraint "Dv" + "@internal + In ARM/Thumb-2 state a const_double which can be used with a VFP fconsts +- or fconstd instruction." ++ instruction." + (and (match_code "const_double") + (match_test "TARGET_32BIT && vfp3_const_double_rtx (op)"))) + ++(define_constraint "Dy" ++ "@internal ++ In ARM/Thumb-2 state a const_double which can be used with a VFP fconstd ++ instruction." ++ (and (match_code "const_double") ++ (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)"))) ++ + (define_memory_constraint "Ut" + "@internal + In ARM/Thumb-2 state an address valid for loading/storing opaque structure +@@ -214,17 +239,24 @@ + + (define_memory_constraint "Un" + "@internal ++ In ARM/Thumb-2 state a valid address for Neon doubleword vector ++ load/store instructions." ++ (and (match_code "mem") ++ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0)"))) ++ ++(define_memory_constraint "Um" ++ "@internal + In ARM/Thumb-2 state a valid address for Neon element and structure + load/store instructions." + (and (match_code "mem") +- (match_test "TARGET_32BIT && neon_vector_mem_operand (op, FALSE)"))) ++ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) + + (define_memory_constraint "Us" + "@internal + In ARM/Thumb-2 state a valid address for non-offset loads/stores of + quad-word values in four ARM registers." + (and (match_code "mem") +- (match_test "TARGET_32BIT && neon_vector_mem_operand (op, TRUE)"))) ++ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1)"))) + + (define_memory_constraint "Uq" + "@internal +diff -Nur a/gcc/config/arm/fp16.c b/gcc/config/arm/fp16.c +--- a/gcc/config/arm/fp16.c 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/fp16.c 2010-01-25 09:50:28.985687200 +0100 +@@ -0,0 +1,145 @@ ++/* Half-float conversion routines. ++ ++ Copyright (C) 2008, 2009 Free Software Foundation, Inc. ++ Contributed by CodeSourcery. ++ ++ This file is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by the ++ Free Software Foundation; either version 3, or (at your option) any ++ later version. ++ ++ This file is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++static inline unsigned short ++__gnu_f2h_internal(unsigned int a, int ieee) ++{ ++ unsigned short sign = (a >> 16) & 0x8000; ++ int aexp = (a >> 23) & 0xff; ++ unsigned int mantissa = a & 0x007fffff; ++ unsigned int mask; ++ unsigned int increment; ++ ++ if (aexp == 0xff) ++ { ++ if (!ieee) ++ return sign; ++ return sign | 0x7e00 | (mantissa >> 13); ++ } ++ ++ if (aexp == 0 && mantissa == 0) ++ return sign; ++ ++ aexp -= 127; ++ ++ /* Decimal point between bits 22 and 23. */ ++ mantissa |= 0x00800000; ++ if (aexp < -14) ++ { ++ mask = 0x007fffff; ++ if (aexp < -25) ++ aexp = -26; ++ else if (aexp != -25) ++ mask >>= 24 + aexp; ++ } ++ else ++ mask = 0x00001fff; ++ ++ /* Round. */ ++ if (mantissa & mask) ++ { ++ increment = (mask + 1) >> 1; ++ if ((mantissa & mask) == increment) ++ increment = mantissa & (increment << 1); ++ mantissa += increment; ++ if (mantissa >= 0x01000000) ++ { ++ mantissa >>= 1; ++ aexp++; ++ } ++ } ++ ++ if (ieee) ++ { ++ if (aexp > 15) ++ return sign | 0x7c00; ++ } ++ else ++ { ++ if (aexp > 16) ++ return sign | 0x7fff; ++ } ++ ++ if (aexp < -24) ++ return sign; ++ ++ if (aexp < -14) ++ { ++ mantissa >>= -14 - aexp; ++ aexp = -14; ++ } ++ ++ /* We leave the leading 1 in the mantissa, and subtract one ++ from the exponent bias to compensate. */ ++ return sign | (((aexp + 14) << 10) + (mantissa >> 13)); ++} ++ ++unsigned int ++__gnu_h2f_internal(unsigned short a, int ieee) ++{ ++ unsigned int sign = (unsigned int)(a & 0x8000) << 16; ++ int aexp = (a >> 10) & 0x1f; ++ unsigned int mantissa = a & 0x3ff; ++ ++ if (aexp == 0x1f && ieee) ++ return sign | 0x7f800000 | (mantissa << 13); ++ ++ if (aexp == 0) ++ { ++ int shift; ++ ++ if (mantissa == 0) ++ return sign; ++ ++ shift = __builtin_clz(mantissa) - 21; ++ mantissa <<= shift; ++ aexp = -shift; ++ } ++ ++ return sign | (((aexp + 0x70) << 23) + (mantissa << 13)); ++} ++ ++unsigned short ++__gnu_f2h_ieee(unsigned int a) ++{ ++ return __gnu_f2h_internal(a, 1); ++} ++ ++unsigned int ++__gnu_h2f_ieee(unsigned short a) ++{ ++ return __gnu_h2f_internal(a, 1); ++} ++ ++unsigned short ++__gnu_f2h_alternative(unsigned int x) ++{ ++ return __gnu_f2h_internal(x, 0); ++} ++ ++unsigned int ++__gnu_h2f_alternative(unsigned short a) ++{ ++ return __gnu_h2f_internal(a, 0); ++} +diff -Nur a/gcc/config/arm/fpa.md b/gcc/config/arm/fpa.md +--- a/gcc/config/arm/fpa.md 2007-08-02 12:49:31.000000000 +0200 ++++ b/gcc/config/arm/fpa.md 2010-01-25 09:50:28.985687200 +0100 +@@ -599,10 +599,10 @@ + { + default: + case 0: return \"mvf%?e\\t%0, %1\"; +- case 1: if (arm_fpu_arch == FPUTYPE_FPA_EMU2) ++ case 1: if (TARGET_FPA_EMU2) + return \"ldf%?e\\t%0, %1\"; + return \"lfm%?\\t%0, 1, %1\"; +- case 2: if (arm_fpu_arch == FPUTYPE_FPA_EMU2) ++ case 2: if (TARGET_FPA_EMU2) + return \"stf%?e\\t%1, %0\"; + return \"sfm%?\\t%1, 1, %0\"; + } +diff -Nur a/gcc/config/arm/hwdiv.md b/gcc/config/arm/hwdiv.md +--- a/gcc/config/arm/hwdiv.md 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/hwdiv.md 2010-01-25 09:50:28.985687200 +0100 +@@ -0,0 +1,41 @@ ++;; ARM instruction patterns for hardware division ++;; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. ++;; Written by CodeSourcery, LLC. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 2, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING. If not, write to ++;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, ++;; Boston, MA 02110-1301, USA. ++ ++(define_insn "divsi3" ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (div:SI (match_operand:SI 1 "s_register_operand" "r") ++ (match_operand:SI 2 "s_register_operand" "r")))] ++ "arm_arch_hwdiv" ++ "sdiv%?\t%0, %1, %2" ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "sdiv")] ++) ++ ++(define_insn "udivsi3" ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (udiv:SI (match_operand:SI 1 "s_register_operand" "r") ++ (match_operand:SI 2 "s_register_operand" "r")))] ++ "arm_arch_hwdiv" ++ "udiv%?\t%0, %1, %2" ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "udiv")] ++) ++ +diff -Nur a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S +--- a/gcc/config/arm/ieee754-df.S 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/arm/ieee754-df.S 2010-01-25 09:50:28.985687200 +0100 +@@ -83,7 +83,7 @@ + ARM_FUNC_START adddf3 + ARM_FUNC_ALIAS aeabi_dadd adddf3 + +-1: do_push {r4, r5, lr} ++1: do_push (r4, r5, lr) + + @ Look for zeroes, equal values, INF, or NAN. + shift1 lsl, r4, xh, #1 +@@ -427,7 +427,7 @@ + do_it eq, t + moveq r1, #0 + RETc(eq) +- do_push {r4, r5, lr} ++ do_push (r4, r5, lr) + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + mov r5, #0 @ sign bit is 0 +@@ -447,7 +447,7 @@ + do_it eq, t + moveq r1, #0 + RETc(eq) +- do_push {r4, r5, lr} ++ do_push (r4, r5, lr) + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + ands r5, r0, #0x80000000 @ sign bit in r5 +@@ -481,7 +481,7 @@ + RETc(eq) @ we are done already. + + @ value was denormalized. We can normalize it now. +- do_push {r4, r5, lr} ++ do_push (r4, r5, lr) + mov r4, #0x380 @ setup corresponding exponent + and r5, xh, #0x80000000 @ move sign bit in r5 + bic xh, xh, #0x80000000 +@@ -508,9 +508,9 @@ + @ compatibility. + adr ip, LSYM(f0_ret) + @ Push pc as well so that RETLDM works correctly. +- do_push {r4, r5, ip, lr, pc} ++ do_push (r4, r5, ip, lr, pc) + #else +- do_push {r4, r5, lr} ++ do_push (r4, r5, lr) + #endif + + mov r5, #0 +@@ -534,9 +534,9 @@ + @ compatibility. + adr ip, LSYM(f0_ret) + @ Push pc as well so that RETLDM works correctly. +- do_push {r4, r5, ip, lr, pc} ++ do_push (r4, r5, ip, lr, pc) + #else +- do_push {r4, r5, lr} ++ do_push (r4, r5, lr) + #endif + + ands r5, ah, #0x80000000 @ sign bit in r5 +@@ -585,7 +585,7 @@ + @ Legacy code expects the result to be returned in f0. Copy it + @ there as well. + LSYM(f0_ret): +- do_push {r0, r1} ++ do_push (r0, r1) + ldfd f0, [sp], #8 + RETLDM + +@@ -602,7 +602,7 @@ + + ARM_FUNC_START muldf3 + ARM_FUNC_ALIAS aeabi_dmul muldf3 +- do_push {r4, r5, r6, lr} ++ do_push (r4, r5, r6, lr) + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff +@@ -910,7 +910,7 @@ + ARM_FUNC_START divdf3 + ARM_FUNC_ALIAS aeabi_ddiv divdf3 + +- do_push {r4, r5, r6, lr} ++ do_push (r4, r5, r6, lr) + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff +@@ -1117,7 +1117,7 @@ + ARM_FUNC_ALIAS eqdf2 cmpdf2 + mov ip, #1 @ how should we specify unordered here? + +-1: str ip, [sp, #-4] ++1: str ip, [sp, #-4]! + + @ Trap any INF/NAN first. + mov ip, xh, lsl #1 +@@ -1129,7 +1129,8 @@ + + @ Test for equality. + @ Note that 0.0 is equal to -0.0. +-2: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 ++2: add sp, sp, #4 ++ orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 + do_it eq, e + COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 + teqne xh, yh @ or xh == yh +@@ -1168,7 +1169,7 @@ + bne 2b + orrs ip, yl, yh, lsl #12 + beq 2b @ y is not NAN +-5: ldr r0, [sp, #-4] @ unordered return code ++5: ldr r0, [sp], #4 @ unordered return code + RET + + FUNC_END gedf2 +@@ -1194,7 +1195,7 @@ + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +-6: do_push {r0, lr} ++6: do_push (r0, lr) + ARM_CALL cmpdf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 +diff -Nur a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S +--- a/gcc/config/arm/ieee754-sf.S 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/arm/ieee754-sf.S 2010-01-25 09:50:28.985687200 +0100 +@@ -481,7 +481,7 @@ + and r3, ip, #0x80000000 + + @ Well, no way to make it shorter without the umull instruction. +- do_push {r3, r4, r5} ++ do_push (r3, r4, r5) + mov r4, r0, lsr #16 + mov r5, r1, lsr #16 + bic r0, r0, r4, lsl #16 +@@ -492,7 +492,7 @@ + mla r0, r4, r1, r0 + adds r3, r3, r0, lsl #16 + adc r1, ip, r0, lsr #16 +- do_pop {r0, r4, r5} ++ do_pop (r0, r4, r5) + + #else + +@@ -822,7 +822,7 @@ + ARM_FUNC_ALIAS eqsf2 cmpsf2 + mov ip, #1 @ how should we specify unordered here? + +-1: str ip, [sp, #-4] ++1: str ip, [sp, #-4]! + + @ Trap any INF/NAN first. + mov r2, r0, lsl #1 +@@ -834,7 +834,8 @@ + + @ Compare values. + @ Note that 0.0 is equal to -0.0. +-2: orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag ++2: add sp, sp, #4 ++ orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag + do_it ne + teqne r0, r1 @ if not 0 compare sign + do_it pl +@@ -858,7 +859,7 @@ + bne 2b + movs ip, r1, lsl #9 + beq 2b @ r1 is not NAN +-5: ldr r0, [sp, #-4] @ return unordered code. ++5: ldr r0, [sp], #4 @ return unordered code. + RET + + FUNC_END gesf2 +@@ -881,7 +882,7 @@ + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +-6: do_push {r0, r1, r2, r3, lr} ++6: do_push (r0, r1, r2, r3, lr) + ARM_CALL cmpsf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 +diff -Nur a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm +--- a/gcc/config/arm/lib1funcs.asm 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/arm/lib1funcs.asm 2010-01-25 09:50:28.985687200 +0100 +@@ -27,8 +27,17 @@ + #if defined(__ELF__) && defined(__linux__) + .section .note.GNU-stack,"",%progbits + .previous +-#endif ++#endif /* __ELF__ and __linux__ */ + ++#ifdef __ARM_EABI__ ++/* Some attributes that are common to all routines in this file. */ ++ /* Tag_ABI_align8_needed: This code does not require 8-byte ++ alignment from the caller. */ ++ /* .eabi_attribute 24, 0 -- default setting. */ ++ /* Tag_ABI_align8_preserved: This code preserves 8-byte ++ alignment in any callee. */ ++ .eabi_attribute 25, 1 ++#endif /* __ARM_EABI__ */ + /* ------------------------------------------------------------------------ */ + + /* We need to know what prefix to add to function names. */ +@@ -233,8 +242,8 @@ + .macro shift1 op, arg0, arg1, arg2 + \op \arg0, \arg1, \arg2 + .endm +-#define do_push push +-#define do_pop pop ++#define do_push(...) push {__VA_ARGS__} ++#define do_pop(...) pop {__VA_ARGS__} + #define COND(op1, op2, cond) op1 ## op2 ## cond + /* Perform an arithmetic operation with a variable shift operand. This + requires two instructions and a scratch register on Thumb-2. */ +@@ -248,24 +257,133 @@ + .macro shift1 op, arg0, arg1, arg2 + mov \arg0, \arg1, \op \arg2 + .endm +-#define do_push stmfd sp!, +-#define do_pop ldmfd sp!, ++#if defined(__low_irq_latency__) ++#define do_push(...) \ ++ _buildN1(do_push, _buildC1(__VA_ARGS__))( __VA_ARGS__) ++#define _buildN1(BASE, X) _buildN2(BASE, X) ++#define _buildN2(BASE, X) BASE##X ++#define _buildC1(...) _buildC2(__VA_ARGS__,9,8,7,6,5,4,3,2,1) ++#define _buildC2(a1,a2,a3,a4,a5,a6,a7,a8,a9,c,...) c ++ ++#define do_push1(r1) str r1, [sp, #-4]! ++#define do_push2(r1, r2) str r2, [sp, #-4]! ; str r1, [sp, #-4]! ++#define do_push3(r1, r2, r3) str r3, [sp, #-4]! ; str r2, [sp, #-4]!; str r1, [sp, #-4]! ++#define do_push4(r1, r2, r3, r4) \ ++ do_push3 (r2, r3, r4);\ ++ do_push1 (r1) ++#define do_push5(r1, r2, r3, r4, r5) \ ++ do_push4 (r2, r3, r4, r5);\ ++ do_push1 (r1) ++ ++#define do_pop(...) \ ++_buildN1(do_pop, _buildC1(__VA_ARGS__))( __VA_ARGS__) ++ ++#define do_pop1(r1) ldr r1, [sp], #4 ++#define do_pop2(r1, r2) ldr r1, [sp], #4 ; ldr r2, [sp], #4 ++#define do_pop3(r1, r2, r3) ldr r1, [sp], #4 ; str r2, [sp], #4; str r3, [sp], #4 ++#define do_pop4(r1, r2, r3, r4) \ ++ do_pop1 (r1);\ ++ do_pup3 (r2, r3, r4) ++#define do_pop5(r1, r2, r3, r4, r5) \ ++ do_pop1 (r1);\ ++ do_pop4 (r2, r3, r4, r5) ++#else ++#define do_push(...) stmfd sp!, { __VA_ARGS__} ++#define do_pop(...) ldmfd sp!, {__VA_ARGS__} ++#endif ++ ++ + #define COND(op1, op2, cond) op1 ## cond ## op2 + .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp + \name \dest, \src1, \src2, \shiftop \shiftreg + .endm + #endif + +-.macro ARM_LDIV0 name ++#ifdef __ARM_EABI__ ++.macro ARM_LDIV0 name signed ++ cmp r0, #0 ++ .ifc \signed, unsigned ++ movne r0, #0xffffffff ++ .else ++ movgt r0, #0x7fffffff ++ movlt r0, #0x80000000 ++ .endif ++ b SYM (__aeabi_idiv0) __PLT__ ++.endm ++#else ++.macro ARM_LDIV0 name signed + str lr, [sp, #-8]! + 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8 + bl SYM (__div0) __PLT__ + mov r0, #0 @ About as wrong as it could be. + RETLDM unwind=98b + .endm ++#endif + + +-.macro THUMB_LDIV0 name ++#ifdef __ARM_EABI__ ++.macro THUMB_LDIV0 name signed ++#if defined(__ARM_ARCH_6M__) ++ .ifc \signed, unsigned ++ cmp r0, #0 ++ beq 1f ++ mov r0, #0 ++ mvn r0, r0 @ 0xffffffff ++1: ++ .else ++ cmp r0, #0 ++ beq 2f ++ blt 3f ++ mov r0, #0 ++ mvn r0, r0 ++ lsr r0, r0, #1 @ 0x7fffffff ++ b 2f ++3: mov r0, #0x80 ++ lsl r0, r0, #24 @ 0x80000000 ++2: ++ .endif ++ push {r0, r1, r2} ++ ldr r0, 4f ++ adr r1, 4f ++ add r0, r1 ++ str r0, [sp, #8] ++ @ We know we are not on armv4t, so pop pc is safe. ++ pop {r0, r1, pc} ++ .align 2 ++4: ++ .word __aeabi_idiv0 - 4b ++#elif defined(__thumb2__) ++ .syntax unified ++ .ifc \signed, unsigned ++ cbz r0, 1f ++ mov r0, #0xffffffff ++1: ++ .else ++ cmp r0, #0 ++ do_it gt ++ movgt r0, #0x7fffffff ++ do_it lt ++ movlt r0, #0x80000000 ++ .endif ++ b.w SYM(__aeabi_idiv0) __PLT__ ++#else ++ .align 2 ++ bx pc ++ nop ++ .arm ++ cmp r0, #0 ++ .ifc \signed, unsigned ++ movne r0, #0xffffffff ++ .else ++ movgt r0, #0x7fffffff ++ movlt r0, #0x80000000 ++ .endif ++ b SYM(__aeabi_idiv0) __PLT__ ++ .thumb ++#endif ++.endm ++#else ++.macro THUMB_LDIV0 name signed + push { r1, lr } + 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8 + bl SYM (__div0) +@@ -277,18 +395,19 @@ + pop { r1, pc } + #endif + .endm ++#endif + + .macro FUNC_END name + SIZE (__\name) + .endm + +-.macro DIV_FUNC_END name ++.macro DIV_FUNC_END name signed + cfi_start __\name, LSYM(Lend_div0) + LSYM(Ldiv0): + #ifdef __thumb__ +- THUMB_LDIV0 \name ++ THUMB_LDIV0 \name \signed + #else +- ARM_LDIV0 \name ++ ARM_LDIV0 \name \signed + #endif + cfi_end LSYM(Lend_div0) + FUNC_END \name +@@ -413,6 +532,12 @@ + #define yyl r2 + #endif + ++#ifdef __ARM_EABI__ ++.macro WEAK name ++ .weak SYM (__\name) ++.endm ++#endif ++ + #ifdef __thumb__ + /* Register aliases. */ + +@@ -437,6 +562,43 @@ + + #if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) + ++#if defined (__thumb2__) ++ clz \curbit, \dividend ++ clz \result, \divisor ++ sub \curbit, \result, \curbit ++ rsb \curbit, \curbit, #31 ++ adr \result, 1f ++ add \curbit, \result, \curbit, lsl #4 ++ mov \result, #0 ++ mov pc, \curbit ++.p2align 3 ++1: ++ .set shift, 32 ++ .rept 32 ++ .set shift, shift - 1 ++ cmp.w \dividend, \divisor, lsl #shift ++ nop.n ++ adc.w \result, \result, \result ++ it cs ++ subcs.w \dividend, \dividend, \divisor, lsl #shift ++ .endr ++#elif defined(__ARM_TUNE_MARVELL_F__) ++ clz \curbit, \dividend ++ clz \result, \divisor ++ sub \curbit, \result, \curbit ++ mov \divisor, \divisor, lsl \curbit ++ rsb \curbit, \curbit, #31 ++ mov \curbit, \curbit, lsl #2 ++ mov \result, #0 ++ add pc, pc, \curbit, lsl #2 ++ nop ++ .rept 32 ++ cmp \dividend, \divisor ++ subcs \dividend, \dividend, \divisor ++ mov \divisor, \divisor, lsr #1 ++ adc \result, \result, \result ++ .endr ++#else /* ! defined(__ARM_TUNE_MARVELL_F__) */ + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit +@@ -452,6 +614,7 @@ + adc \result, \result, \result + subcs \dividend, \dividend, \divisor, lsl #shift + .endr ++#endif /* defined(__ARM_TUNE_MARVELL_F__) */ + + #else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ + #if __ARM_ARCH__ >= 5 +@@ -499,18 +662,23 @@ + + @ Division loop + 1: cmp \dividend, \divisor ++ do_it hs, t + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 ++ do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 ++ do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 ++ do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? ++ do_it ne, t + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b +@@ -799,13 +967,14 @@ + /* ------------------------------------------------------------------------ */ + #ifdef L_udivsi3 + ++#if defined(__ARM_ARCH_6M__) ++ + FUNC_START udivsi3 + FUNC_ALIAS aeabi_uidiv udivsi3 + +-#ifdef __thumb__ +- + cmp divisor, #0 + beq LSYM(Ldiv0) ++LSYM(udivsi3_nodiv0): + mov curbit, #1 + mov result, #0 + +@@ -819,9 +988,16 @@ + pop { work } + RET + +-#else /* ARM version. */ ++#else /* ARM/Thumb-2 version. */ ++ ++ ARM_FUNC_START udivsi3 ++ ARM_FUNC_ALIAS aeabi_uidiv udivsi3 + ++ /* Note: if called via udivsi3_nodiv0, this will unnecessarily check ++ for division-by-zero a second time. */ ++LSYM(udivsi3_nodiv0): + subs r2, r1, #1 ++ do_it eq + RETc(eq) + bcc LSYM(Ldiv0) + cmp r0, r1 +@@ -834,7 +1010,8 @@ + mov r0, r2 + RET + +-11: moveq r0, #1 ++11: do_it eq, e ++ moveq r0, #1 + movne r0, #0 + RET + +@@ -845,19 +1022,24 @@ + + #endif /* ARM version */ + +- DIV_FUNC_END udivsi3 ++ DIV_FUNC_END udivsi3 unsigned + ++#if defined(__ARM_ARCH_6M__) + FUNC_START aeabi_uidivmod +-#ifdef __thumb__ ++ cmp r1, #0 ++ beq LSYM(Ldiv0) + push {r0, r1, lr} +- bl SYM(__udivsi3) ++ bl LSYM(udivsi3_nodiv0) + POP {r1, r2, r3} + mul r2, r0 + sub r1, r1, r2 + bx r3 + #else ++ARM_FUNC_START aeabi_uidivmod ++ cmp r1, #0 ++ beq LSYM(Ldiv0) + stmfd sp!, { r0, r1, lr } +- bl SYM(__udivsi3) ++ bl LSYM(udivsi3_nodiv0) + ldmfd sp!, { r1, r2, lr } + mul r3, r2, r0 + sub r1, r1, r3 +@@ -904,19 +1086,20 @@ + + #endif /* ARM version. */ + +- DIV_FUNC_END umodsi3 ++ DIV_FUNC_END umodsi3 unsigned + + #endif /* L_umodsi3 */ + /* ------------------------------------------------------------------------ */ + #ifdef L_divsi3 + ++#if defined(__ARM_ARCH_6M__) ++ + FUNC_START divsi3 + FUNC_ALIAS aeabi_idiv divsi3 + +-#ifdef __thumb__ + cmp divisor, #0 + beq LSYM(Ldiv0) +- ++LSYM(divsi3_nodiv0): + push { work } + mov work, dividend + eor work, divisor @ Save the sign of the result. +@@ -945,15 +1128,21 @@ + pop { work } + RET + +-#else /* ARM version. */ ++#else /* ARM/Thumb-2 version. */ + ++ ARM_FUNC_START divsi3 ++ ARM_FUNC_ALIAS aeabi_idiv divsi3 ++ + cmp r1, #0 +- eor ip, r0, r1 @ save the sign of the result. + beq LSYM(Ldiv0) ++LSYM(divsi3_nodiv0): ++ eor ip, r0, r1 @ save the sign of the result. ++ do_it mi + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 ++ do_it mi + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f +@@ -963,14 +1152,18 @@ + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 ++ do_it mi + rsbmi r0, r0, #0 + RET + + 10: teq ip, r0 @ same sign ? ++ do_it mi + rsbmi r0, r0, #0 + RET + +-11: movlo r0, #0 ++11: do_it lo ++ movlo r0, #0 ++ do_it eq,t + moveq r0, ip, asr #31 + orreq r0, r0, #1 + RET +@@ -979,24 +1172,30 @@ + + cmp ip, #0 + mov r0, r3, lsr r2 ++ do_it mi + rsbmi r0, r0, #0 + RET + + #endif /* ARM version */ + +- DIV_FUNC_END divsi3 ++ DIV_FUNC_END divsi3 signed + ++#if defined(__ARM_ARCH_6M__) + FUNC_START aeabi_idivmod +-#ifdef __thumb__ ++ cmp r1, #0 ++ beq LSYM(Ldiv0) + push {r0, r1, lr} +- bl SYM(__divsi3) ++ bl LSYM(divsi3_nodiv0) + POP {r1, r2, r3} + mul r2, r0 + sub r1, r1, r2 + bx r3 + #else ++ARM_FUNC_START aeabi_idivmod ++ cmp r1, #0 ++ beq LSYM(Ldiv0) + stmfd sp!, { r0, r1, lr } +- bl SYM(__divsi3) ++ bl LSYM(divsi3_nodiv0) + ldmfd sp!, { r1, r2, lr } + mul r3, r2, r0 + sub r1, r1, r3 +@@ -1062,21 +1261,25 @@ + + #endif /* ARM version */ + +- DIV_FUNC_END modsi3 ++ DIV_FUNC_END modsi3 signed + + #endif /* L_modsi3 */ + /* ------------------------------------------------------------------------ */ + #ifdef L_dvmd_tls + +- FUNC_START div0 +- FUNC_ALIAS aeabi_idiv0 div0 +- FUNC_ALIAS aeabi_ldiv0 div0 +- ++#ifdef __ARM_EABI__ ++ WEAK aeabi_idiv0 ++ WEAK aeabi_ldiv0 ++ FUNC_START aeabi_idiv0 ++ FUNC_START aeabi_ldiv0 + RET +- + FUNC_END aeabi_ldiv0 + FUNC_END aeabi_idiv0 ++#else ++ FUNC_START div0 ++ RET + FUNC_END div0 ++#endif + + #endif /* L_divmodsi_tools */ + /* ------------------------------------------------------------------------ */ +@@ -1086,16 +1289,49 @@ + /* Constant taken from . */ + #define SIGFPE 8 + ++#ifdef __ARM_EABI__ ++ WEAK aeabi_idiv0 ++ WEAK aeabi_ldiv0 ++ ARM_FUNC_START aeabi_idiv0 ++ ARM_FUNC_START aeabi_ldiv0 ++#else + ARM_FUNC_START div0 ++#endif + +- do_push {r1, lr} ++ do_push (r1, lr) + mov r0, #SIGFPE + bl SYM(raise) __PLT__ + RETLDM r1 + ++#ifdef __ARM_EABI__ ++ FUNC_END aeabi_ldiv0 ++ FUNC_END aeabi_idiv0 ++#else + FUNC_END div0 ++#endif + + #endif /* L_dvmd_lnx */ ++#ifdef L_clear_cache ++#if defined __ARM_EABI__ && defined __linux__ ++@ EABI GNU/Linux call to cacheflush syscall. ++ ARM_FUNC_START clear_cache ++ do_push (r7) ++#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) ++ movw r7, #2 ++ movt r7, #0xf ++#else ++ mov r7, #0xf0000 ++ add r7, r7, #2 ++#endif ++ mov r2, #0 ++ swi 0 ++ do_pop (r7) ++ RET ++ FUNC_END clear_cache ++#else ++#error "This is only for ARM EABI GNU/Linux" ++#endif ++#endif /* L_clear_cache */ + /* ------------------------------------------------------------------------ */ + /* Dword shift operations. */ + /* All the following Dword shift variants rely on the fact that +@@ -1292,7 +1528,7 @@ + push {r4, lr} + # else + ARM_FUNC_START clzdi2 +- do_push {r4, lr} ++ do_push (r4, lr) + # endif + cmp xxh, #0 + bne 1f +diff -Nur a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h +--- a/gcc/config/arm/linux-eabi.h 2007-11-08 14:44:09.000000000 +0100 ++++ b/gcc/config/arm/linux-eabi.h 2010-01-25 09:50:28.995687913 +0100 +@@ -66,22 +66,14 @@ + /* At this point, bpabi.h will have clobbered LINK_SPEC. We want to + use the GNU/Linux version, not the generic BPABI version. */ + #undef LINK_SPEC +-#define LINK_SPEC LINUX_TARGET_LINK_SPEC ++#define LINK_SPEC LINUX_TARGET_LINK_SPEC BE8_LINK_SPEC + + /* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we + do not use -lfloat. */ + #undef LIBGCC_SPEC + +-/* Clear the instruction cache from `beg' to `end'. This makes an +- inline system call to SYS_cacheflush. */ ++/* Clear the instruction cache from `beg' to `end'. This is ++ implemented in lib1funcs.asm, so ensure an error if this definition ++ is used. */ + #undef CLEAR_INSN_CACHE +-#define CLEAR_INSN_CACHE(BEG, END) \ +-{ \ +- register unsigned long _beg __asm ("a1") = (unsigned long) (BEG); \ +- register unsigned long _end __asm ("a2") = (unsigned long) (END); \ +- register unsigned long _flg __asm ("a3") = 0; \ +- register unsigned long _scno __asm ("r7") = 0xf0002; \ +- __asm __volatile ("swi 0 @ sys_cacheflush" \ +- : "=r" (_beg) \ +- : "0" (_beg), "r" (_end), "r" (_flg), "r" (_scno)); \ +-} ++#define CLEAR_INSN_CACHE(BEG, END) not used +diff -Nur a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h +--- a/gcc/config/arm/linux-elf.h 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/linux-elf.h 2010-01-25 09:50:28.995687913 +0100 +@@ -98,7 +98,7 @@ + + /* NWFPE always understands FPA instructions. */ + #undef FPUTYPE_DEFAULT +-#define FPUTYPE_DEFAULT FPUTYPE_FPA_EMU3 ++#define FPUTYPE_DEFAULT "fpe3" + + /* Call the function profiler with a given profile label. */ + #undef ARM_FUNCTION_PROFILER +diff -Nur a/gcc/config/arm/marvell-f.md b/gcc/config/arm/marvell-f.md +--- a/gcc/config/arm/marvell-f.md 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/marvell-f.md 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,365 @@ ++;; Marvell 2850 pipeline description ++;; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. ++;; Written by Marvell and CodeSourcery, Inc. ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 2, or (at your ++;; option) any later version. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING. If not, write to ++;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, ++;; Boston, MA 02110-1301, USA. ++ ++;; This automaton provides a pipeline description for the Marvell ++;; 2850 core. ++;; ++;; The model given here assumes that the condition for all conditional ++;; instructions is "true", i.e., that all of the instructions are ++;; actually executed. ++ ++(define_automaton "marvell_f") ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;; Pipelines ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++;; This is a dual-issue processor with three pipelines: ++;; ++;; 1. Arithmetic and load/store pipeline A1. ++;; Issue | E1 | E2 | OF | WR | WB for load-store instructions ++;; Issue | E1 | E2 | WB for arithmetic instructions ++;; ++;; 2. Arithmetic pipeline A2. ++;; Issue | E1 | E2 | WB ++;; ++;; 3. Multiply and multiply-accumulate pipeline. ++;; Issue | MAC1 | MAC2 | MAC3 | WB ++;; ++;; There are various bypasses modelled to a greater or lesser extent. ++;; ++;; Latencies in this file correspond to the number of cycles after ++;; the issue stage that it takes for the result of the instruction to ++;; be computed, or for its side-effects to occur. ++ ++(define_cpu_unit "a1_e1,a1_e2,a1_of,a1_wr,a1_wb" "marvell_f") ; ALU 1 ++(define_cpu_unit "a2_e1,a2_e2,a2_wb" "marvell_f") ; ALU 2 ++(define_cpu_unit "m_1,m_2,m_3,m_wb" "marvell_f") ; MAC ++ ++;; We define an SRAM cpu unit to enable us to describe conflicts ++;; between loads at the E2 stage and stores at the WR stage. ++ ++(define_cpu_unit "sram" "marvell_f") ++ ++;; Handling of dual-issue constraints. ++;; ++;; Certain pairs of instructions can be issued in parallel, and certain ++;; pairs cannot. We divide a subset of the instructions into groups as ++;; follows. ++;; ++;; - data processing 1 (mov, mvn); ++;; - data processing 2 (adc, add, and, bic, cmn, cmp, eor, orr, rsb, ++;; rsc, sbc, sub, teq, tst); ++;; - load single (ldr, ldrb, ldrbt, ldrt, ldrh, ldrsb, ldrsh); ++;; - store single (str, strb, strbt, strt, strh); ++;; - swap (swp, swpb); ++;; - pld; ++;; - count leading zeros and DSP add/sub (clz, qadd, qdadd, qsub, qdsub); ++;; - multiply 2 (mul, muls, smull, umull, smulxy, smulls, umulls); ++;; - multiply 3 (mla, mlas, smlal, umlal, smlaxy, smlalxy, smlawx, ++;; smlawy, smlals, umlals); ++;; - branches (b, bl, blx, bx). ++;; ++;; Ignoring conditional execution, it is a good approximation to the core ++;; to model that two instructions may only be issued in parallel if the ++;; following conditions are met. ++;; I. The instructions both fall into one of the above groups and their ++;; corresponding groups have a entry in the matrix below that is not X. ++;; II. The second instruction does not read any register updated by the ++;; first instruction (already enforced by the GCC scheduler). ++;; III. The second instruction does not need the carry flag updated by the ++;; first instruction. Currently we do not model this. ++;; ++;; First Second instruction group ++;; insn ++;; DP1 DP2 L S SWP PLD CLZ M2 M3 B ++;; ++;; DP1 ok ok ok ok ok ok ok ok ok ok ++;; DP2(1) ok ok ok ok ok ok ok ok ok ok ++;; DP2(2) ok (2) ok (4) ok ok ok ok X ok ++;; L } ++;; SWP } ok ok X X X X ok ok ok ok ++;; PLD } ++;; S(3) ok ok X X X X ok ok ok ok ++;; S(4) ok (2) X X X X ok ok X ok ++;; CLZ ok ok ok ok ok ok ok ok ok ok ++;; M2 ok ok ok ok ok ok ok X X ok ++;; M3 ok (2) ok (4) ok ok ok X X ok ++;; B ok ok ok ok ok ok ok ok ok ok ++;; ++;; (1) without register shift ++;; (2) with register shift ++;; (3) with immediate offset ++;; (4) with register offset ++;; ++;; We define a fake cpu unit "reg_shift_lock" to enforce constraints ++;; between instructions in groups DP2(2) and M3. All other ++;; constraints are enforced automatically by virtue of the limited ++;; number of pipelines available for the various operations, with ++;; the exception of constraints involving S(4) that we do not model. ++ ++(define_cpu_unit "reg_shift_lock" "marvell_f") ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;; ALU instructions ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++;; 1. Certain logic operations can be retired after the E1 stage if ++;; the pipeline is not already retiring another instruction. In this ++;; model we assume this behaviour always holds for mov, mvn, and, orr, eor ++;; instructions. If a register shift is involved and the instruction is ++;; not mov or mvn, then a dual-issue constraint must be enforced. ++ ++;; The first two cases are separate so they can be identified for ++;; bypasses below. ++ ++(define_insn_reservation "marvell_f_alu_early_retire" 1 ++ (and (eq_attr "tune" "marvell_f") ++ (and (eq_attr "type" "alu") ++ (eq_attr "insn" "mov,mvn,and,orr,eor"))) ++ "(a1_e1,a1_wb)|(a2_e1,a2_wb)") ++ ++(define_insn_reservation "marvell_f_alu_early_retire_shift" 1 ++ (and (eq_attr "tune" "marvell_f") ++ (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "insn" "mov,mvn,and,orr,eor"))) ++ "(a1_e1,a1_wb)|(a2_e1,a2_wb)") ++ ++(define_insn_reservation "marvell_f_alu_early_retire_reg_shift1" 1 ++ (and (eq_attr "tune" "marvell_f") ++ (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "insn" "mov,mvn"))) ++ "(a1_e1,a1_wb)|(a2_e1,a2_wb)") ++ ++(define_insn_reservation "marvell_f_alu_early_retire_reg_shift2" 1 ++ (and (eq_attr "tune" "marvell_f") ++ (and (eq_attr "type" "alu_shift_reg") ++ (eq_attr "insn" "and,orr,eor"))) ++ "(reg_shift_lock+a1_e1,a1_wb)|(reg_shift_lock+a2_e1,a2_wb)") ++ ++;; 2. ALU operations with no shifted operand. These bypass the E1 stage if ++;; the E2 stage of the corresponding pipeline is clear; here, we always ++;; model this scenario [*]. We give the operation a latency of 1 yet reserve ++;; both E1 and E2 for it (thus preventing the GCC scheduler, in the case ++;; where both E1 and E2 of one pipeline are clear, from issuing one ++;; instruction to each). ++;; ++;; [*] The non-bypass case is a latency of two, reserving E1 on the first ++;; cycle and E2 on the next. Due to the way the scheduler works we ++;; have to choose between taking this as the default and taking the ++;; above case (with latency one) as the default; we choose the latter. ++ ++(define_insn_reservation "marvell_f_alu_op_bypass_e1" 1 ++ (and (eq_attr "tune" "marvell_f") ++ (and (eq_attr "type" "alu") ++ (not (eq_attr "insn" "mov,mvn,and,orr,eor")))) ++ "(a1_e1+a1_e2,a1_wb)|(a2_e1+a2_e2,a2_wb)") ++ ++;; 3. ALU operations with a shift-by-constant operand. ++ ++(define_insn_reservation "marvell_f_alu_shift_op" 2 ++ (and (eq_attr "tune" "marvell_f") ++ (and (eq_attr "type" "alu_shift") ++ (not (eq_attr "insn" "mov,mvn,and,orr,eor")))) ++ "(a1_e1,a1_e2,a1_wb)|(a2_e1,a2_e2,a2_wb)") ++ ++;; 4. ALU operations with a shift-by-register operand. Since the ++;; instruction is never mov or mvn, a dual-issue constraint must ++;; be enforced. ++ ++(define_insn_reservation "marvell_f_alu_shift_reg_op" 2 ++ (and (eq_attr "tune" "marvell_f") ++ (and (eq_attr "type" "alu_shift_reg") ++ (not (eq_attr "insn" "mov,mvn,and,orr,eor")))) ++ "(reg_shift_lock+a1_e1,a1_e2,a1_wb)|(reg_shift_lock+a2_e1,a2_e2,a2_wb)") ++ ++;; Given an ALU operation with shift (I1) followed by another ALU ++;; operation (I2), with I2 depending on the destination register Rd of I1 ++;; and with I2 not using that value as the amount or the starting value for ++;; a shift, then I1 and I2 may be issued to the same pipeline on ++;; consecutive cycles. In terms of this model that corresponds to I1 ++;; having a latency of one cycle. There are three cases for various ++;; I1 and I2 as follows. ++ ++;; (a) I1 has a constant or register shift and I2 doesn't have a shift at all. ++(define_bypass 1 "marvell_f_alu_shift_op,\ ++ marvell_f_alu_shift_reg_op" ++ "marvell_f_alu_op_bypass_e1,marvell_f_alu_early_retire") ++ ++;; (b) I1 has a constant or register shift and I2 has a constant shift. ++;; Rd must not provide the starting value for the shift. ++(define_bypass 1 "marvell_f_alu_shift_op,\ ++ marvell_f_alu_shift_reg_op" ++ "marvell_f_alu_shift_op,marvell_f_alu_early_retire_shift" ++ "arm_no_early_alu_shift_value_dep") ++ ++;; (c) I1 has a constant or register shift and I2 has a register shift. ++;; Rd must not provide the amount by which to shift. ++(define_bypass 1 "marvell_f_alu_shift_op,\ ++ marvell_f_alu_shift_reg_op" ++ "marvell_f_alu_shift_reg_op,\ ++ marvell_f_alu_early_retire_reg_shift1,\ ++ marvell_f_alu_early_retire_reg_shift2" ++ "arm_no_early_alu_shift_dep") ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;; Multiplication instructions ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++;; Multiplication instructions in group "Multiply 2". ++ ++(define_insn_reservation "marvell_f_multiply_2" 3 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "insn" "mul,muls,smull,umull,smulxy,smulls,umulls")) ++ "m_1,m_2,m_3,m_wb") ++ ++;; Multiplication instructions in group "Multiply 3". There is a ++;; dual-issue constraint with non-multiplication ALU instructions ++;; to be respected here. ++ ++(define_insn_reservation "marvell_f_multiply_3" 3 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "insn" "mla,mlas,smlal,umlal,smlaxy,smlalxy,smlawx,\ ++ smlawy,smlals,umlals")) ++ "reg_shift_lock+m_1,m_2,m_3,m_wb") ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;; Branch instructions ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++;; Conditional backward b instructions can have a zero-cycle penalty, and ++;; other conditional b and bl instructions have a one-cycle penalty if ++;; predicted correctly. Currently we model the zero-cycle case for all ++;; branches. ++ ++(define_insn_reservation "marvell_f_branches" 0 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "type" "branch")) ++ "nothing") ++ ++;; Call latencies are not predictable; a semi-arbitrary very large ++;; number is used as "positive infinity" for such latencies. ++ ++(define_insn_reservation "marvell_f_call" 32 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "type" "call")) ++ "nothing") ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;; Load/store instructions ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++;; The models for load/store instructions do not accurately describe ++;; the difference between operations with a base register writeback. ++;; These models assume that all memory references hit in dcache. ++ ++;; 1. Load/store for single registers. ++ ++;; The worst case for a load is when the load result is needed in E1 ++;; (for example for a register shift), giving a latency of four. Loads ++;; skip E1 and access memory at the E2 stage. ++ ++(define_insn_reservation "marvell_f_load1" 4 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "type" "load1,load_byte")) ++ "a1_e2+sram,a1_of,a1_wr,a1_wb") ++ ++;; The result for a load may be bypassed (to be available at the same ++;; time as the load arrives in the WR stage, so effectively at the OF ++;; stage) to the Rn operand at E2 with a latency of two. The result may ++;; be bypassed to a non-Rn operand at E2 with a latency of three. For ++;; instructions without shifts, detection of an Rn bypass situation is ++;; difficult (because some of the instruction patterns switch their ++;; operands), and so we do not model that here. For instructions with ++;; shifts, the operand used at E2 will always be Rn, and so we can ++;; model the latency-two bypass for these. ++ ++(define_bypass 2 "marvell_f_load1" ++ "marvell_f_alu_shift_op" ++ "arm_no_early_alu_shift_value_dep") ++ ++(define_bypass 2 "marvell_f_load1" ++ "marvell_f_alu_shift_reg_op" ++ "arm_no_early_alu_shift_dep") ++ ++;; Stores write at the WR stage and loads read at the E2 stage, giving ++;; a store latency of three. ++ ++(define_insn_reservation "marvell_f_store1" 3 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "type" "store1")) ++ "a1_e2,a1_of,a1_wr+sram,a1_wb") ++ ++;; 2. Load/store for two consecutive registers. These may be dealt ++;; with in the same number of cycles as single loads and stores. ++ ++(define_insn_reservation "marvell_f_load2" 4 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "type" "load2")) ++ "a1_e2+sram,a1_of,a1_wr,a1_wb") ++ ++(define_insn_reservation "marvell_f_store2" 3 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "type" "store2")) ++ "a1_e2,a1_of,a1_wr+sram,a1_wb") ++ ++;; The first word of a doubleword load is eligible for the latency-two ++;; bypass described above for single loads, but this is not modelled here. ++;; We do however assume that either word may also be bypassed with ++;; latency three for ALU operations with shifts (where the shift value and ++;; amount do not depend on the loaded value) and latency four for ALU ++;; operations without shifts. The latency four case is of course the default. ++ ++(define_bypass 3 "marvell_f_load2" ++ "marvell_f_alu_shift_op" ++ "arm_no_early_alu_shift_value_dep") ++ ++(define_bypass 3 "marvell_f_load2" ++ "marvell_f_alu_shift_reg_op" ++ "arm_no_early_alu_shift_dep") ++ ++;; 3. Load/store for more than two registers. ++ ++;; These instructions stall for an extra cycle in the decode stage; ++;; individual load/store instructions for each register are then issued. ++;; The load/store multiple instruction itself is removed from the decode ++;; stage at the same time as the final load/store instruction is issued. ++;; To complicate matters, pairs of loads/stores referencing two ++;; consecutive registers will be issued together as doubleword operations. ++;; We model a 3-word load as an LDR plus an LDRD, and a 4-word load ++;; as two LDRDs; thus, these are allocated the same latencies (the ++;; latency for two consecutive loads plus one for the setup stall). ++;; The extra stall is modelled by reserving E1. ++ ++(define_insn_reservation "marvell_f_load3_4" 6 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "type" "load3,load4")) ++ "a1_e1,a1_e1+a1_e2+sram,a1_e2+sram+a1_of,a1_of+a1_wr,a1_wr+a1_wb,a1_wb") ++ ++;; Bypasses are possible for ldm as for single loads, but we do not ++;; model them here since the order of the constituent loads is ++;; difficult to predict. ++ ++(define_insn_reservation "marvell_f_store3_4" 5 ++ (and (eq_attr "tune" "marvell_f") ++ (eq_attr "type" "store3,store4")) ++ "a1_e1,a1_e1+a1_e2,a1_e2+a1_of,a1_of+a1_wr+sram,a1_wr+sram+a1_wb,a1_wb") ++ +diff -Nur a/gcc/config/arm/marvell-f-vfp.md b/gcc/config/arm/marvell-f-vfp.md +--- a/gcc/config/arm/marvell-f-vfp.md 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/marvell-f-vfp.md 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,153 @@ ++;; Marvell 2850 VFP pipeline description ++;; Copyright (C) 2007 Free Software Foundation, Inc. ++;; Written by CodeSourcery, Inc. ++ ++;; This file is part of GCC. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING. If not, write to ++;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, ++;; Boston, MA 02110-1301, USA. ++ ++;; This automaton provides a pipeline description for the Marvell ++;; 2850 core. ++;; ++;; The model given here assumes that the condition for all conditional ++;; instructions is "true", i.e., that all of the instructions are ++;; actually executed. ++ ++(define_automaton "marvell_f_vfp") ++ ++;; This is a single-issue VFPv2 implementation with the following execution ++;; units: ++;; ++;; 1. Addition/subtraction unit; takes three cycles, pipelined. ++;; 2. Multiplication unit; takes four cycles, pipelined. ++;; 3. Add buffer, used for multiply-accumulate (see below). ++;; 4. Divide/square root unit, not pipelined. ++;; For single-precision: takes sixteen cycles, can accept another insn ++;; after fifteen cycles. ++;; For double-precision: takes thirty-one cycles, can accept another insn ++;; after thirty cycles. ++;; 5. Single-cycle unit, pipelined. ++;; This does absolute value/copy/negate/compare in one cycle and ++;; conversion in two cycles. ++;; ++;; When all three operands of a multiply-accumulate instruction are ready, ++;; one is issued to the add buffer (which can hold six operands in a FIFO) ++;; and the two to be multiplied are issued to the multiply unit. After ++;; four cycles in the multiply unit, one cycle is taken to issue the ++;; operand from the add buffer plus the multiplication result to the ++;; addition/subtraction unit. That issue takes priority over any add/sub ++;; instruction waiting at the normal issue stage, but may be performed in ++;; parallel with the issue of a non-add/sub instruction. The total time ++;; for a multiply-accumulate instruction to pass through the execution ++;; units is hence eight cycles. ++;; ++;; We do not need to explicitly model the add buffer because it can ++;; always issue the instruction at the head of its FIFO (due to the above ++;; priority rule) and there are more spaces in the add buffer (six) than ++;; there are stages (four) in the multiplication unit. ++;; ++;; Two instructions may be retired at once from the head of an 8-entry ++;; reorder buffer. Data from these first two instructions only may be ++;; forwarded to the inputs of the issue unit. We assume that the ++;; pressure on the reorder buffer will be sufficiently low that every ++;; instruction entering it will be eligible for data forwarding. Since ++;; data is forwarded to the issue unit and not the execution units (so ++;; for example single-cycle instructions cannot be issued back-to-back), ++;; the latencies given below are the cycle counts above plus one. ++ ++(define_cpu_unit "mf_vfp_issue" "marvell_f_vfp") ++(define_cpu_unit "mf_vfp_add" "marvell_f_vfp") ++(define_cpu_unit "mf_vfp_mul" "marvell_f_vfp") ++(define_cpu_unit "mf_vfp_div" "marvell_f_vfp") ++(define_cpu_unit "mf_vfp_single_cycle" "marvell_f_vfp") ++ ++;; An attribute to indicate whether our reservations are applicable. ++ ++(define_attr "marvell_f_vfp" "yes,no" ++ (const (if_then_else (and (eq_attr "tune" "marvell_f") ++ (eq_attr "fpu" "vfp")) ++ (const_string "yes") (const_string "no")))) ++ ++;; Reservations of functional units. The nothing*2 reservations at the ++;; start of many of the reservation strings correspond to the decode ++;; stages. We need to have these reservations so that we can correctly ++;; reserve parts of the core's A1 pipeline for loads and stores. For ++;; that case (since loads skip E1) the pipelines line up thus: ++;; A1 pipe: Issue E2 OF WR WB ... ++;; VFP pipe: Fetch Decode1 Decode2 Issue Execute1 ... ++;; For a load, we need to make a reservation of E2, and thus we must ++;; use Decode1 as the starting point for all VFP reservations here. ++;; ++;; For reservations of pipelined VFP execution units we only reserve ++;; the execution unit for the first execution cycle, omitting any trailing ++;; "nothing" reservations. ++ ++(define_insn_reservation "marvell_f_vfp_add" 4 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "farith")) ++ "nothing*2,mf_vfp_issue,mf_vfp_add") ++ ++(define_insn_reservation "marvell_f_vfp_mul" 5 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "fmuls,fmuld")) ++ "nothing*2,mf_vfp_issue,mf_vfp_mul") ++ ++(define_insn_reservation "marvell_f_vfp_divs" 17 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "fdivs")) ++ "nothing*2,mf_vfp_issue,mf_vfp_div*15") ++ ++(define_insn_reservation "marvell_f_vfp_divd" 32 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "fdivd")) ++ "nothing*2,mf_vfp_issue,mf_vfp_div*30") ++ ++;; The DFA lookahead is small enough that the "add" reservation here ++;; will always take priority over any addition/subtraction instruction ++;; issued five cycles after the multiply-accumulate instruction, as ++;; required. ++(define_insn_reservation "marvell_f_vfp_mac" 9 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "fmacs,fmacd")) ++ "nothing*2,mf_vfp_issue,mf_vfp_mul,nothing*4,mf_vfp_add") ++ ++(define_insn_reservation "marvell_f_vfp_single" 2 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "ffarith")) ++ "nothing*2,mf_vfp_issue,mf_vfp_single_cycle") ++ ++(define_insn_reservation "marvell_f_vfp_convert" 3 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "f_cvt")) ++ "nothing*2,mf_vfp_issue,mf_vfp_single_cycle") ++ ++(define_insn_reservation "marvell_f_vfp_load" 2 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "f_loads,f_loadd")) ++ "a1_e2+sram,a1_of,a1_wr+mf_vfp_issue,a1_wb+mf_vfp_single_cycle") ++ ++(define_insn_reservation "marvell_f_vfp_from_core" 2 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "r_2_f")) ++ "a1_e2,a1_of,a1_wr+mf_vfp_issue,a1_wb+mf_vfp_single_cycle") ++ ++;; The interaction between the core and VFP pipelines during VFP ++;; store operations and core <-> VFP moves is not clear, so we guess. ++(define_insn_reservation "marvell_f_vfp_store" 3 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "f_stores,f_stored")) ++ "a1_e2,a1_of,mf_vfp_issue,a1_wr+sram+mf_vfp_single_cycle") ++ ++(define_insn_reservation "marvell_f_vfp_to_core" 4 ++ (and (eq_attr "marvell_f_vfp" "yes") ++ (eq_attr "type" "f_2_r")) ++ "a1_e2,a1_of,a1_wr+mf_vfp_issue,a1_wb+mf_vfp_single_cycle") ++ +diff -Nur a/gcc/config/arm/montavista-linux.h b/gcc/config/arm/montavista-linux.h +--- a/gcc/config/arm/montavista-linux.h 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/montavista-linux.h 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,33 @@ ++/* MontaVista GNU/Linux Configuration. ++ Copyright (C) 2009 ++ Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* Add -tarmv6 and -tthumb2 options for convenience in generating multilibs. ++*/ ++#undef CC1_SPEC ++#define CC1_SPEC " \ ++ %{tarmv6: -march=armv6 -mfloat-abi=softfp ; \ ++ tthumb2: -mthumb -march=armv7-a -mfloat-abi=softfp ; \ ++ : -march=armv5t}" ++ ++/* The various C libraries each have their own subdirectory. */ ++#undef SYSROOT_SUFFIX_SPEC ++#define SYSROOT_SUFFIX_SPEC \ ++ "%{tarmv6:/armv6 ; \ ++ tthumb2:/thumb2}" +diff -Nur a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml +--- a/gcc/config/arm/neon-gen.ml 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/arm/neon-gen.ml 2010-01-25 09:50:28.995687913 +0100 +@@ -122,6 +122,7 @@ + | T_uint16 | T_int16 -> T_intHI + | T_uint32 | T_int32 -> T_intSI + | T_uint64 | T_int64 -> T_intDI ++ | T_float32 -> T_floatSF + | T_poly8 -> T_intQI + | T_poly16 -> T_intHI + | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) +@@ -320,7 +321,7 @@ + typeinfo; + Format.print_newline (); + (* Extra types not in . *) +- Format.printf "typedef __builtin_neon_sf float32_t;\n"; ++ Format.printf "typedef float float32_t;\n"; + Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; + Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" + +@@ -399,7 +400,11 @@ + "extern \"C\" {"; + "#endif"; + ""; ++"#if defined (__vxworks) && defined (_WRS_KERNEL)"; ++"#include "; ++"#else"; + "#include "; ++"#endif"; + ""]; + deftypes (); + arrtypes (); +diff -Nur a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md +--- a/gcc/config/arm/neon.md 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/neon.md 2010-01-25 09:50:28.995687913 +0100 +@@ -159,7 +159,8 @@ + (UNSPEC_VUZP1 201) + (UNSPEC_VUZP2 202) + (UNSPEC_VZIP1 203) +- (UNSPEC_VZIP2 204)]) ++ (UNSPEC_VZIP2 204) ++ (UNSPEC_MISALIGNED_ACCESS 205)]) + + ;; Double-width vector modes. + (define_mode_iterator VD [V8QI V4HI V2SI V2SF]) +@@ -459,7 +460,9 @@ + "=w,Uv,w, w, ?r,?w,?r,?r, ?Us") + (match_operand:VD 1 "general_operand" + " w,w, Dn,Uvi, w, r, r, Usi,r"))] +- "TARGET_NEON" ++ "TARGET_NEON ++ && (register_operand (operands[0], mode) ++ || register_operand (operands[1], mode))" + { + if (which_alternative == 2) + { +@@ -481,7 +484,7 @@ + + /* FIXME: If the memory layout is changed in big-endian mode, output_move_vfp + below must be changed to output_move_neon (which will use the +- element/structure loads/stores), and the constraint changed to 'Un' instead ++ element/structure loads/stores), and the constraint changed to 'Um' instead + of 'Uv'. */ + + switch (which_alternative) +@@ -506,7 +509,9 @@ + "=w,Un,w, w, ?r,?w,?r,?r, ?Us") + (match_operand:VQXMOV 1 "general_operand" + " w,w, Dn,Uni, w, r, r, Usi, r"))] +- "TARGET_NEON" ++ "TARGET_NEON ++ && (register_operand (operands[0], mode) ++ || register_operand (operands[1], mode))" + { + if (which_alternative == 2) + { +@@ -549,6 +554,11 @@ + (match_operand:TI 1 "general_operand" ""))] + "TARGET_NEON" + { ++ if (can_create_pseudo_p ()) ++ { ++ if (GET_CODE (operands[0]) != REG) ++ operands[1] = force_reg (TImode, operands[1]); ++ } + }) + + (define_expand "mov" +@@ -556,12 +566,19 @@ + (match_operand:VSTRUCT 1 "general_operand" ""))] + "TARGET_NEON" + { ++ if (can_create_pseudo_p ()) ++ { ++ if (GET_CODE (operands[0]) != REG) ++ operands[1] = force_reg (mode, operands[1]); ++ } + }) + + (define_insn "*neon_mov" + [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") + (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] +- "TARGET_NEON" ++ "TARGET_NEON ++ && (register_operand (operands[0], mode) ++ || register_operand (operands[1], mode))" + { + switch (which_alternative) + { +@@ -658,6 +675,49 @@ + neon_disambiguate_copy (operands, dest, src, 4); + }) + ++(define_expand "movmisalign" ++ [(set (match_operand:VDQX 0 "nonimmediate_operand" "") ++ (unspec:VDQX [(match_operand:VDQX 1 "general_operand" "")] ++ UNSPEC_MISALIGNED_ACCESS))] ++ "TARGET_NEON && !BYTES_BIG_ENDIAN" ++{ ++ if (!s_register_operand (operands[0], mode) ++ && !s_register_operand (operands[1], mode)) ++ FAIL; ++}) ++ ++(define_insn "*movmisalign_neon_store" ++ [(set (match_operand:VDX 0 "memory_operand" "=Um") ++ (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] ++ UNSPEC_MISALIGNED_ACCESS))] ++ "TARGET_NEON && !BYTES_BIG_ENDIAN" ++ "vst1.\t{%P1}, %A0" ++ [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) ++ ++(define_insn "*movmisalign_neon_load" ++ [(set (match_operand:VDX 0 "s_register_operand" "=w") ++ (unspec:VDX [(match_operand:VDX 1 "memory_operand" " Um")] ++ UNSPEC_MISALIGNED_ACCESS))] ++ "TARGET_NEON && !BYTES_BIG_ENDIAN" ++ "vld1.\t{%P0}, %A1" ++ [(set_attr "neon_type" "neon_vld1_1_2_regs")]) ++ ++(define_insn "*movmisalign_neon_store" ++ [(set (match_operand:VQX 0 "memory_operand" "=Um") ++ (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] ++ UNSPEC_MISALIGNED_ACCESS))] ++ "TARGET_NEON && !BYTES_BIG_ENDIAN" ++ "vst1.\t{%q1}, %A0" ++ [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) ++ ++(define_insn "*movmisalign_neon_load" ++ [(set (match_operand:VQX 0 "s_register_operand" "=w") ++ (unspec:VQX [(match_operand:VQX 1 "general_operand" " Um")] ++ UNSPEC_MISALIGNED_ACCESS))] ++ "TARGET_NEON && !BYTES_BIG_ENDIAN" ++ "vld1.\t{%q0}, %A1" ++ [(set_attr "neon_type" "neon_vld1_1_2_regs")]) ++ + (define_insn "vec_set_internal" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (vec_merge:VD +@@ -862,6 +922,50 @@ + (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] + ) + ++(define_insn "*mul3add_neon" ++ [(set (match_operand:VDQ 0 "s_register_operand" "=w") ++ (plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") ++ (match_operand:VDQ 3 "s_register_operand" "w")) ++ (match_operand:VDQ 1 "s_register_operand" "0")))] ++ "TARGET_NEON" ++ "vmla.\t%0, %2, %3" ++ [(set (attr "neon_type") ++ (if_then_else (ne (symbol_ref "") (const_int 0)) ++ (if_then_else (ne (symbol_ref "") (const_int 0)) ++ (const_string "neon_fp_vmla_ddd") ++ (const_string "neon_fp_vmla_qqq")) ++ (if_then_else (ne (symbol_ref "") (const_int 0)) ++ (if_then_else ++ (ne (symbol_ref "") (const_int 0)) ++ (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") ++ (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) ++ (if_then_else (ne (symbol_ref "") (const_int 0)) ++ (const_string "neon_mla_qqq_8_16") ++ (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] ++) ++ ++(define_insn "*mul3negadd_neon" ++ [(set (match_operand:VDQ 0 "s_register_operand" "=w") ++ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0") ++ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") ++ (match_operand:VDQ 3 "s_register_operand" "w"))))] ++ "TARGET_NEON" ++ "vmls.\t%0, %2, %3" ++ [(set (attr "neon_type") ++ (if_then_else (ne (symbol_ref "") (const_int 0)) ++ (if_then_else (ne (symbol_ref "") (const_int 0)) ++ (const_string "neon_fp_vmla_ddd") ++ (const_string "neon_fp_vmla_qqq")) ++ (if_then_else (ne (symbol_ref "") (const_int 0)) ++ (if_then_else ++ (ne (symbol_ref "") (const_int 0)) ++ (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") ++ (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) ++ (if_then_else (ne (symbol_ref "") (const_int 0)) ++ (const_string "neon_mla_qqq_8_16") ++ (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] ++) ++ + (define_insn "ior3" + [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") + (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") +@@ -3611,7 +3715,8 @@ + UNSPEC_VSHLL_N))] + "TARGET_NEON" + { +- neon_const_bounds (operands[2], 0, neon_element_bits (mode)); ++ /* The boundaries are: 0 < imm <= size. */ ++ neon_const_bounds (operands[2], 0, neon_element_bits (mode) + 1); + return "vshll.%T3%#\t%q0, %P1, %2"; + } + [(set_attr "neon_type" "neon_shift_1")] +diff -Nur a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml +--- a/gcc/config/arm/neon.ml 2007-08-02 12:49:31.000000000 +0200 ++++ b/gcc/config/arm/neon.ml 2010-01-25 09:50:28.995687913 +0100 +@@ -50,7 +50,7 @@ + | T_ptrto of vectype | T_const of vectype + | T_void | T_intQI + | T_intHI | T_intSI +- | T_intDI ++ | T_intDI | T_floatSF + + (* The meanings of the following are: + TImode : "Tetra", two registers (four words). +@@ -1693,6 +1693,7 @@ + | T_intHI -> "__builtin_neon_hi" + | T_intSI -> "__builtin_neon_si" + | T_intDI -> "__builtin_neon_di" ++ | T_floatSF -> "__builtin_neon_sf" + | T_arrayof (num, base) -> + let basename = name (fun x -> x) base in + affix (Printf.sprintf "%sx%d" basename num) +diff -Nur a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml +--- a/gcc/config/arm/neon-testgen.ml 2007-08-02 12:49:31.000000000 +0200 ++++ b/gcc/config/arm/neon-testgen.ml 2010-01-25 09:50:28.995687913 +0100 +@@ -51,8 +51,8 @@ + Printf.fprintf chan "/* This file was autogenerated by neon-testgen. */\n\n"; + Printf.fprintf chan "/* { dg-do assemble } */\n"; + Printf.fprintf chan "/* { dg-require-effective-target arm_neon_ok } */\n"; +- Printf.fprintf chan +- "/* { dg-options \"-save-temps -O0 -mfpu=neon -mfloat-abi=softfp\" } */\n"; ++ Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n"; ++ Printf.fprintf chan "/* { dg-add-options arm_neon } */\n"; + Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n"; + Printf.fprintf chan "void test_%s (void)\n{\n" test_name + +diff -Nur a/gcc/config/arm/netbsd-elf.h b/gcc/config/arm/netbsd-elf.h +--- a/gcc/config/arm/netbsd-elf.h 2007-08-02 12:49:31.000000000 +0200 ++++ b/gcc/config/arm/netbsd-elf.h 2010-01-25 09:50:28.995687913 +0100 +@@ -153,5 +153,5 @@ + while (0) + + #undef FPUTYPE_DEFAULT +-#define FPUTYPE_DEFAULT FPUTYPE_VFP ++#define FPUTYPE_DEFAULT "vfp" + +diff -Nur a/gcc/config/arm/nocrt0.h b/gcc/config/arm/nocrt0.h +--- a/gcc/config/arm/nocrt0.h 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/nocrt0.h 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,25 @@ ++/* Definitions for generic libgloss based cofigs where crt0 is supplied by ++ the linker script. ++ Copyright (C) 2006 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#undef STARTFILE_SPEC ++#define STARTFILE_SPEC " crti%O%s crtbegin%O%s" ++ ++#undef LIB_SPEC ++#define LIB_SPEC "-lc" +diff -Nur a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md +--- a/gcc/config/arm/predicates.md 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/predicates.md 2010-01-25 09:50:28.995687913 +0100 +@@ -73,6 +73,10 @@ + || REGNO_REG_CLASS (REGNO (op)) == FPA_REGS)); + }) + ++(define_special_predicate "subreg_lowpart_operator" ++ (and (match_code "subreg") ++ (match_test "subreg_lowpart_p (op)"))) ++ + ;; Reg, subreg(reg) or const_int. + (define_predicate "reg_or_int_operand" + (ior (match_code "const_int") +@@ -168,6 +172,11 @@ + (and (match_code "plus,minus,ior,xor,and") + (match_test "mode == GET_MODE (op)"))) + ++;; True for plus/minus operators ++(define_special_predicate "plusminus_operator" ++ (and (match_code "plus,minus") ++ (match_test "mode == GET_MODE (op)"))) ++ + ;; True for logical binary operators. + (define_special_predicate "logical_binary_operator" + (and (match_code "ior,xor,and") +@@ -295,6 +304,9 @@ + HOST_WIDE_INT i = 1, base = 0; + rtx elt; + ++ if (low_irq_latency) ++ return false; ++ + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET) + return false; +@@ -352,6 +364,9 @@ + HOST_WIDE_INT i = 1, base = 0; + rtx elt; + ++ if (low_irq_latency) ++ return false; ++ + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET) + return false; +diff -Nur a/gcc/config/arm/sfp-machine.h b/gcc/config/arm/sfp-machine.h +--- a/gcc/config/arm/sfp-machine.h 2008-03-03 15:30:48.000000000 +0100 ++++ b/gcc/config/arm/sfp-machine.h 2010-01-25 09:50:28.995687913 +0100 +@@ -14,9 +14,11 @@ + #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) + #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + ++#define _FP_NANFRAC_H ((_FP_QNANBIT_H << 1) - 1) + #define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) + #define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 + #define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 ++#define _FP_NANSIGN_H 0 + #define _FP_NANSIGN_S 0 + #define _FP_NANSIGN_D 0 + #define _FP_NANSIGN_Q 0 +@@ -92,5 +94,7 @@ + #define __fixdfdi __aeabi_d2lz + #define __fixunsdfdi __aeabi_d2ulz + #define __floatdidf __aeabi_l2d ++#define __extendhfsf2 __gnu_h2f_ieee ++#define __truncsfhf2 __gnu_f2h_ieee + + #endif /* __ARM_EABI__ */ +diff -Nur a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm +--- a/gcc/config/arm/t-arm 2008-03-27 20:20:18.000000000 +0100 ++++ b/gcc/config/arm/t-arm 2010-01-25 09:50:28.995687913 +0100 +@@ -13,7 +13,9 @@ + $(srcdir)/config/arm/iwmmxt.md \ + $(srcdir)/config/arm/vfp.md \ + $(srcdir)/config/arm/neon.md \ +- $(srcdir)/config/arm/thumb2.md ++ $(srcdir)/config/arm/thumb2.md \ ++ $(srcdir)/config/arm/marvell-f.md \ ++ $(srcdir)/config/arm/hwdiv.md + + s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \ + s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES) +diff -Nur a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf +--- a/gcc/config/arm/t-arm-elf 2008-06-12 19:29:47.000000000 +0200 ++++ b/gcc/config/arm/t-arm-elf 2010-01-25 09:50:28.995687913 +0100 +@@ -24,10 +24,18 @@ + #MULTILIB_MATCHES += march?armv7=march?armv7-a + #MULTILIB_MATCHES += march?armv7=march?armv7-r + #MULTILIB_MATCHES += march?armv7=march?armv7-m ++#MULTILIB_MATCHES += march?armv7=march?armv7e-m + #MULTILIB_MATCHES += march?armv7=mcpu?cortex-a8 + #MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4 + #MULTILIB_MATCHES += march?armv7=mcpu?cortex-m3 + ++# Not quite true. We can support hard-vfp calling in Thumb2, but how do we ++# express that here? Also, we really need architecture v5e or later ++# (mcrr etc). ++MULTILIB_OPTIONS += mfloat-abi=hard ++MULTILIB_DIRNAMES += fpu ++MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard* ++ + # MULTILIB_OPTIONS += mcpu=ep9312 + # MULTILIB_DIRNAMES += ep9312 + # MULTILIB_EXCEPTIONS += *mthumb/*mcpu=ep9312* +diff -Nur a/gcc/config/arm/t-asa b/gcc/config/arm/t-asa +--- a/gcc/config/arm/t-asa 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/t-asa 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,45 @@ ++# Overrides for ASA ++ ++# Here is the expected output from xgcc -print-multi-lib. ++# ++# .;@fno-omit-frame-pointer@mapcs-frame ++# armv4t;@march=armv4t@fno-omit-frame-pointer@mapcs-frame ++# armv6;@march=armv6@fno-omit-frame-pointer@mapcs-frame ++# armv7a;@march=armv7-a@fno-omit-frame-pointer@mapcs-frame ++# armv6f;@march=armv6@mfloat-abi=softfp@fno-omit-frame-pointer@mapcs-frame ++# armv7af;@march=armv7-a@mfpu=neon@mfloat-abi=softfp@fno-omit-frame-pointer@mapcs-frame ++# thumb2;@mthumb@march=armv7-a@fno-omit-frame-pointer@mapcs-frame ++# thumb2f;@mthumb@march=armv7-a@mfpu=neon@mfloat-abi=softfp@fno-omit-frame-pointer@mapcs-frame ++ ++MULTILIB_OPTIONS = mthumb march=armv4t/march=armv6/march=armv7-a mfpu=neon mfloat-abi=softfp ++MULTILIB_DIRNAMES = thumb v4t v6 v7a neon softfp ++MULTILIB_MATCHES = ++ ++MULTILIB_EXTRA_OPTS = fno-omit-frame-pointer mapcs-frame ++ ++MULTILIB_EXCEPTIONS = mthumb ++MULTILIB_EXCEPTIONS += mfpu=neon* ++MULTILIB_EXCEPTIONS += mfloat-abi=softfp ++MULTILIB_EXCEPTIONS += *march=armv4t*/*mfpu=neon* ++MULTILIB_EXCEPTIONS += *march=armv4t*/*mfloat-abi=softfp* ++MULTILIB_EXCEPTIONS += march=armv6/*mfpu=neon* ++MULTILIB_EXCEPTIONS += mthumb/mfpu=neon ++MULTILIB_EXCEPTIONS += mthumb/mfloat-abi=softfp ++MULTILIB_EXCEPTIONS += mthumb/mfpu=neon* ++MULTILIB_EXCEPTIONS += mthumb/march=armv6/mfpu=neon* ++ ++MULTILIB_OSDIRNAMES = march.armv4t=!armv4t ++MULTILIB_OSDIRNAMES += march.armv6=!armv6 ++MULTILIB_OSDIRNAMES += march.armv6/mfloat-abi.softfp=!armv6f ++MULTILIB_OSDIRNAMES += march.armv7-a=!armv7a ++MULTILIB_OSDIRNAMES += march.armv7-a/mfpu.neon/mfloat-abi.softfp=!armv7af ++MULTILIB_OSDIRNAMES += mthumb/march.armv7-a=!thumb2 ++MULTILIB_OSDIRNAMES += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=!thumb2f ++ ++MULTILIB_ALIASES = march?armv4t=mthumb/march?armv4t ++MULTILIB_ALIASES += march?armv6=mthumb/march?armv6 ++MULTILIB_ALIASES += march?armv6/mfloat-abi?softfp=mthumb/march?armv6/mfloat-abi?softfp ++MULTILIB_ALIASES += march?armv7-a/mfpu?neon/mfloat-abi?softfp=march?armv7-a/mfpu?neon ++MULTILIB_ALIASES += march?armv7-a/mfpu?neon/mfloat-abi?softfp=march?armv7-a/mfloat-abi?softfp ++MULTILIB_ALIASES += mthumb/march?armv7-a/mfpu?neon/mfloat-abi?softfp=mthumb/march?armv7-a/mfpu?neon ++MULTILIB_ALIASES += mthumb/march?armv7-a/mfpu?neon/mfloat-abi?softfp=mthumb/march?armv7-a/mfloat-abi?softfp +diff -Nur a/gcc/config/arm/t-bpabi b/gcc/config/arm/t-bpabi +--- a/gcc/config/arm/t-bpabi 2005-11-04 15:51:20.000000000 +0100 ++++ b/gcc/config/arm/t-bpabi 2010-01-25 09:50:28.995687913 +0100 +@@ -1,10 +1,13 @@ + # Add the bpabi.S functions. +-LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod ++LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod \ ++ _aeabi_idiv0 _aeabi_ldiv0 + + # Add the BPABI C functions. + LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \ + $(srcdir)/config/arm/unaligned-funcs.c + ++LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c ++ + UNWIND_H = $(srcdir)/config/arm/unwind-arm.h + LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \ + $(srcdir)/config/arm/libunwind.S \ +diff -Nur a/gcc/config/arm/t-cs-eabi b/gcc/config/arm/t-cs-eabi +--- a/gcc/config/arm/t-cs-eabi 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/t-cs-eabi 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,198 @@ ++# Multilibs for SourceryG++ arm-none-eabi ++ ++MULTILIB_OPTIONS = mthumb ++MULTILIB_DIRNAMES = t ++MULTILIB_EXCEPTIONS = ++MULTILIB_MATCHES = ++MULTILIB_ALIASES = ++ ++MULTILIB_OPTIONS += march=armv7/march=armv7-a/march=armv5te/march=armv6-m ++MULTILIB_DIRNAMES += v7 v7a v5te v6m ++MULTILIB_MATCHES += march?armv7-a=march?armv7a ++MULTILIB_MATCHES += march?armv7=march?armv7r ++MULTILIB_MATCHES += march?armv7=march?armv7m ++MULTILIB_MATCHES += march?armv7=march?armv7-r ++MULTILIB_MATCHES += march?armv7=march?armv7-m ++MULTILIB_MATCHES += march?armv7=march?armv7e-m ++MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a9 ++MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a8 ++MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a5 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4f ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-m3 ++MULTILIB_MATCHES += march?armv6-m=mcpu?cortex-m1 ++MULTILIB_MATCHES += march?armv6-m=mcpu?cortex-m0 ++MULTILIB_MATCHES += march?armv5te=march?armv6 ++MULTILIB_MATCHES += march?armv5te=march?armv6j ++MULTILIB_MATCHES += march?armv5te=march?armv6k ++MULTILIB_MATCHES += march?armv5te=march?armv6z ++MULTILIB_MATCHES += march?armv5te=march?armv6zk ++MULTILIB_MATCHES += march?armv5te=march?armv6t2 ++MULTILIB_MATCHES += march?armv5te=march?iwmmxt ++MULTILIB_MATCHES += march?armv5te=march?iwmmxt2 ++MULTILIB_MATCHES += march?armv5te=mcpu?arm9e ++MULTILIB_MATCHES += march?armv5te=mcpu?arm946e-s ++MULTILIB_MATCHES += march?armv5te=mcpu?arm966e-s ++MULTILIB_MATCHES += march?armv5te=mcpu?arm968e-s ++MULTILIB_MATCHES += march?armv5te=mcpu?arm10e ++MULTILIB_MATCHES += march?armv5te=mcpu?arm1020e ++MULTILIB_MATCHES += march?armv5te=mcpu?arm1022e ++MULTILIB_MATCHES += march?armv5te=mcpu?xscale ++MULTILIB_MATCHES += march?armv5te=mcpu?iwmmxt ++MULTILIB_MATCHES += march?armv5te=mcpu?iwmmxt2 ++MULTILIB_MATCHES += march?armv5te=mcpu?marvell-f ++MULTILIB_MATCHES += march?armv5te=mcpu?arm926ej-s ++MULTILIB_MATCHES += march?armv5te=mcpu?arm1026ej-s ++MULTILIB_MATCHES += march?armv5te=mcpu?arm1136j-s ++MULTILIB_MATCHES += march?armv5te=mcpu?arm1136jf-s ++MULTILIB_MATCHES += march?armv5te=mcpu?arm1176jz-s ++MULTILIB_MATCHES += march?armv5te=mcpu?arm1176jzf-s ++MULTILIB_MATCHES += march?armv5te=mcpu?mpcorenovfp ++MULTILIB_MATCHES += march?armv5te=mcpu?mpcore ++MULTILIB_MATCHES += march?armv5te=mcpu?arm1156t2-s ++ ++MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard ++MULTILIB_DIRNAMES += softfp hard ++MULTILIB_MATCHES += mfloat-abi?hard=mhard-float ++ ++MULTILIB_OPTIONS += mfpu=neon ++MULTILIB_DIRNAMES += neon ++MULTILIB_EXCEPTIONS += mfpu=neon ++MULTILIB_MATCHES += mfpu?neon=mfpu?neon-fp16 ++MULTILIB_MATCHES += mfpu?neon=mfpu?neon-vfpv4 ++ ++MULTILIB_ALIASES += mthumb=mthumb/mfpu?neon ++MULTILIB_ALIASES += mthumb=mthumb/march?armv5te/mfpu?neon ++MULTILIB_ALIASES += mbig-endian=mthumb/mfpu?neon/mbig-endian ++#MULTILIB_ALIASES += mfloat-abi?softfp=mthumb/mfloat-abi?softfp/mfpu?neon ++#MULTILIB_ALIASES += mfloat-abi?softfp=mfloat-abi?softfp/mfpu?neon ++#MULTILIB_ALIASES += mfloat-abi?softfp/mbig-endian=mfloat-abi?softfp/mfpu?neon/mbig-endian ++#MULTILIB_ALIASES += mfloat-abi?softfp/mbig-endian=mthumb/mfloat-abi?softfp/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7-a/mfpu?neon ++MULTILIB_ALIASES += mthumb/march?armv7/mbig-endian=mthumb/march?armv7-a/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += march?armv7-a/mfloat-abi?softfp/mfpu?neon=mthumb/march?armv7-a/mfloat-abi?softfp/mfpu?neon ++MULTILIB_ALIASES += march?armv7-a/mfloat-abi?hard/mfpu?neon=mthumb/march?armv7-a/mfloat-abi?hard/mfpu?neon ++ ++MULTILIB_OPTIONS += mbig-endian ++MULTILIB_DIRNAMES += be ++MULTILIB_ALIASES += mbig-endian=mfpu?neon/mbig-endian ++ ++# ARMv6-M does not have ARM mode. ++MULTILIB_EXCEPTIONS += march=armv6-m ++ ++# Some ARMv7 variants have ARM mode. Use the ARM libraries. ++MULTILIB_EXCEPTIONS += march=armv7 march=armv7/* ++MULTILIB_ALIASES += mbig-endian=march?armv7/mbig-endian ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp=march?armv7/mfloat-abi?softfp ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp=march?armv7/mfloat-abi?softfp/mfpu?neon ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp/mbig-endian=march?armv7/mfloat-abi?softfp/mbig-endian ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp/mbig-endian=march?armv7/mfloat-abi?softfp/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mbig-endian=march?armv7/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7/mfloat-abi?softfp/mfpu?neon ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7/mfpu?neon ++MULTILIB_ALIASES += mthumb/march?armv7/mbig-endian=mthumb/march?armv7/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7/mbig-endian=mthumb/march?armv7/mfloat-abi?softfp/mfpu?neon/mbig-endian ++ ++# ARMv7-A is specially useful used with VFPv3 (enabled by NEON). Rest of the cases behaves as ARMv7. ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7-a ++MULTILIB_ALIASES += mbig-endian=march?armv7-a/mbig-endian ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp/mbig-endian=march?armv7-a/mfloat-abi?softfp/mbig-endian ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp/mbig-endian=march?armv7-a/mfloat-abi?softfp/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7-a/mfloat-abi?softfp ++MULTILIB_ALIASES += mthumb/march?armv7/mbig-endian=mthumb/march?armv7-a/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7/mbig-endian=mthumb/march?armv7-a/mfloat-abi?softfp/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7/mfloat-abi?softfp ++MULTILIB_ALIASES += march?armv5te=march?armv7-a ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp=march?armv7-a/mfloat-abi?softfp ++MULTILIB_ALIASES += march?armv5te=march?armv7-a/mfpu?neon ++MULTILIB_ALIASES += mbig-endian=march?armv7-a/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7/mbig-endian=mthumb/march?armv7-a/mfloat-abi?softfp/mfpu?neon/mbig-endian ++ ++# ARMv5T thumb uses the ARMv5T ARM libraries (with or without VFP). ++MULTILIB_ALIASES += mthumb=mthumb/march?armv5te ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp=mthumb/march?armv5te/mfloat-abi?softfp ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp=march?armv5te/mfloat-abi?softfp/mfpu?neon ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp=mthumb/march?armv5te/mfloat-abi?softfp/mfpu?neon ++MULTILIB_ALIASES += march?armv5te=march?armv5te/mfpu?neon ++MULTILIB_ALIASES += mbig-endian=march?armv5te/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp/mbig-endian=march?armv5te/mfloat-abi?softfp/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mbig-endian=mthumb/march?armv5te/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp/mbig-endian=mthumb/march?armv5te/mfloat-abi?softfp/mfpu?neon/mbig-endian ++ ++# ARMv6-M and VFP are incompatible. ++# FIXME: The compiler should probably error. ++MULTILIB_EXCEPTIONS += *march=armv6-m/mfloat-abi=softfp ++MULTILIB_ALIASES += mthumb/march?armv6-m=mthumb/march?armv6-m/mfpu?neon ++MULTILIB_EXCEPTIONS += march=armv6-m*mfpu=neon ++MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfloat-abi=softfp/mfpu=neon ++ ++# ARMv4t VFP isn't really supported, so use the soft-float libraries. ++MULTILIB_EXCEPTIONS += mfloat-abi?softfp ++MULTILIB_EXCEPTIONS += mfloat-abi?softfp/mfpu?neon ++MULTILIB_ALIASES += mthumb=mthumb/mfloat-abi?softfp ++MULTILIB_ALIASES += mthumb=mthumb/mfloat-abi?softfp/mfpu?neon ++ ++MULTILIB_ALIASES += mbig-endian=mfloat-abi?softfp/mbig-endian ++MULTILIB_ALIASES += mbig-endian=mfloat-abi?softfp/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mbig-endian=mthumb/mfloat-abi?softfp/mbig-endian ++MULTILIB_ALIASES += mbig-endian=mthumb/mfloat-abi?softfp/mfpu?neon/mbig-endian ++ ++# We don't have a big-endian ARMv6-M compatible multilibs. ++MULTILIB_EXCEPTIONS += *march=armv6-m*mbig-endian ++ ++# Use the generic libraries for big-endian ARMv5T ++MULTILIB_ALIASES += mbig-endian=march?armv5te/mbig-endian ++MULTILIB_ALIASES += mbig-endian=mfloat-abi?softfp/mbig-endian ++MULTILIB_ALIASES += mbig-endian=mthumb/march?armv5te/mbig-endian ++MULTILIB_ALIASES += march?armv5te/mfloat-abi?softfp/mbig-endian=mthumb/march?armv5te/mfloat-abi?softfp/mbig-endian ++ ++# Use ARM libraries for big-endian Thumb. ++MULTILIB_ALIASES += mbig-endian=mthumb/mbig-endian ++ ++# Don't bother with big-endian Thumb-2 VFP. Use the soft-float libraries ++# for now. ++MULTILIB_ALIASES += mthumb/march?armv7/mbig-endian=mthumb/march?armv7/mfloat-abi?softfp/mbig-endian ++ ++# The only -mfloat-abi=hard libraries provided are for little-endian ++# v7-A NEON. ++MULTILIB_EXCEPTIONS += mfloat-abi=hard* ++MULTILIB_EXCEPTIONS += *march=armv5te*mfloat-abi=hard* ++MULTILIB_EXCEPTIONS += *march=armv7/*mfloat-abi=hard* ++MULTILIB_EXCEPTIONS += *march=armv6-m*mfloat-abi=hard* ++MULTILIB_EXCEPTIONS += mthumb/mfloat-abi=hard* ++MULTILIB_EXCEPTIONS += *mfloat-abi=hard*mbig-endian ++MULTILIB_EXCEPTIONS += *mfloat-abi=hard ++ ++# FIXME: We need a sane way of doing this. ++# This isn't really a multilib, it's a hack to add an extra option ++# to the v7-m multilib. ++MULTILIB_OPTIONS += mfix-cortex-m3-ldrd ++MULTILIB_DIRNAMES += broken_ldrd ++ ++MULTILIB_EXCEPTIONS += mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += mthumb/mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += *march=armv6-m*mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += *march=armv7-a*mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += *mcpu=*mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += *mbig-endian*mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += *mfloat-abi=softfp*mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += *march=armv5te*mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += *mfpu=neon*mfix-cortex-m3-ldrd ++ ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7 ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7-a/mfix-cortex-m3-ldrd ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7/mfpu?neon/mfix-cortex-m3-ldrd ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7-a/mfpu?neon/mfix-cortex-m3-ldrd ++ ++# As of at least 4.2, gcc passes the wrong -L options if some multilibs are ++# omitted from MULTILIB_OSDIRNAMES ++MULTILIB_OSDIRNAMES = mthumb=!thumb ++MULTILIB_OSDIRNAMES += mbig-endian=!be ++MULTILIB_OSDIRNAMES += march.armv5te=!armv5te ++MULTILIB_OSDIRNAMES += march.armv5te/mfloat-abi.softfp=!vfp ++MULTILIB_OSDIRNAMES += march.armv5te/mfloat-abi.softfp/mbig-endian=!vfp-be ++MULTILIB_OSDIRNAMES += mthumb/march.armv7/mfix-cortex-m3-ldrd=!thumb2 ++MULTILIB_OSDIRNAMES += march.armv7-a/mfloat-abi.softfp/mfpu.neon=!armv7-a-neon ++MULTILIB_OSDIRNAMES += march.armv7-a/mfloat-abi.hard/mfpu.neon=!armv7-a-hard ++MULTILIB_OSDIRNAMES += mthumb/march.armv7/mbig-endian=!thumb2-be ++MULTILIB_OSDIRNAMES += mthumb/march.armv6-m=!armv6-m +diff -Nur a/gcc/config/arm/t-cs-eabi-lite b/gcc/config/arm/t-cs-eabi-lite +--- a/gcc/config/arm/t-cs-eabi-lite 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/t-cs-eabi-lite 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,47 @@ ++# We build 4 multilibs: ++# ./ (default) ++# thumb/ -mthumb ++# thumb2/ -mthumb -march=armv7 ++# armv6-m/ -mthumb -march=armv6-m ++ ++MULTILIB_OPTIONS = mthumb ++MULTILIB_DIRNAMES = thumb ++MULTILIB_EXCEPTIONS = ++MULTILIB_MATCHES = ++MULTILIB_ALIASES = ++ ++MULTILIB_OPTIONS += march=armv7/march=armv6-m ++MULTILIB_DIRNAMES += v7 v6-m ++MULTILIB_EXCEPTIONS += march=armv7* ++MULTILIB_MATCHES += march?armv7=march?armv7-a ++MULTILIB_MATCHES += march?armv7=march?armv7-r ++MULTILIB_MATCHES += march?armv7=march?armv7-m ++MULTILIB_MATCHES += march?armv7=march?armv7e-m ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-a9 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-a8 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-a5 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4f ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-m3 ++ ++MULTILIB_EXCEPTIONS += march=armv6-m ++MULTILIB_MATCHES += march?armv6-m=mcpu?cortex-m1 ++MULTILIB_MATCHES += march?armv6-m=mcpu?cortex-m0 ++ ++# FIXME: We need a sane way of doing this. ++# This isn't really a multilib, it's a hack to add an extra option ++# to the v7-m multilib. ++MULTILIB_OPTIONS += mfix-cortex-m3-ldrd ++MULTILIB_DIRNAMES += broken_ldrd ++ ++MULTILIB_EXCEPTIONS += mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += mthumb/mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += *march=armv6-m*mfix-cortex-m3-ldrd ++ ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7 ++ ++# As of at least 4.2, gcc passes the wrong -L options if some multilibs are ++# omitted from MULTILIB_OSDIRNAMES ++MULTILIB_OSDIRNAMES = mthumb=!thumb ++MULTILIB_OSDIRNAMES += mthumb/march.armv7/mfix-cortex-m3-ldrd=!thumb2 ++MULTILIB_OSDIRNAMES += mthumb/march.armv6-m=!armv6-m +diff -Nur a/gcc/config/arm/t-cs-linux b/gcc/config/arm/t-cs-linux +--- a/gcc/config/arm/t-cs-linux 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/t-cs-linux 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,112 @@ ++# Multilibs for SourceryG++ arm-none-linux-gnueabi ++ ++MULTILIB_OPTIONS = mthumb ++MULTILIB_DIRNAMES = t ++MULTILIB_EXCEPTIONS = ++MULTILIB_MATCHES = ++MULTILIB_ALIASES = ++ ++MULTILIB_OPTIONS += march=armv4t/march=armv7-a ++MULTILIB_DIRNAMES += v4t v7a ++ ++MULTILIB_MATCHES += march?armv7-a=march?armv7a ++MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a9 ++MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a8 ++MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a5 ++MULTILIB_MATCHES += march?armv4t=march?ep9312 ++MULTILIB_MATCHES += march?armv4t=mcpu?arm7tdmi ++MULTILIB_MATCHES += march?armv4t=mcpu?arm7tdmi-s ++MULTILIB_MATCHES += march?armv4t=mcpu?arm710t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm720t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm740t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm9 ++MULTILIB_MATCHES += march?armv4t=mcpu?arm9tdmi ++MULTILIB_MATCHES += march?armv4t=mcpu?arm920 ++MULTILIB_MATCHES += march?armv4t=mcpu?arm920t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm922t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm940t ++MULTILIB_MATCHES += march?armv4t=mcpu?ep9312 ++MULTILIB_MATCHES += march?armv4t=march?armv5 ++MULTILIB_MATCHES += march?armv4t=march?armv5t ++MULTILIB_MATCHES += march?armv4t=march?arm10tdmi ++MULTILIB_MATCHES += march?armv4t=march?arm1020t ++ ++MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard ++MULTILIB_DIRNAMES += softfp hard ++MULTILIB_MATCHES += mfloat-abi?hard=mhard-float ++ ++MULTILIB_OPTIONS += mfpu=neon ++MULTILIB_DIRNAMES += neon ++MULTILIB_EXCEPTIONS += mfpu=neon ++MULTILIB_MATCHES += mfpu?neon=mfpu?neon-fp16 ++MULTILIB_MATCHES += mfpu?neon=mfpu?neon-vfpv4 ++MULTILIB_ALIASES += mfloat-abi?softfp=mfloat-abi?softfp/mfpu?neon ++MULTILIB_ALIASES += mfloat-abi?softfp=mthumb/mfloat-abi?softfp/mfpu?neon ++MULTILIB_ALIASES += march?armv7-a/mfloat-abi?hard/mfpu?neon=mthumb/march?armv7-a/mfloat-abi?hard/mfpu?neon ++ ++MULTILIB_OPTIONS += mbig-endian ++MULTILIB_DIRNAMES += be ++MULTILIB_ALIASES += mbig-endian=mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mfloat-abi?softfp/mbig-endian=mfloat-abi?softfp/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mbig-endian=mthumb/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mfloat-abi?softfp/mbig-endian=mthumb/mfloat-abi?softfp/mfpu?neon/mbig-endian ++ ++# Do not build Thumb libraries. ++MULTILIB_EXCEPTIONS += mthumb ++MULTILIB_EXCEPTIONS += mthumb/mfpu=neon ++ ++# Use ARM libraries for ARMv4t Thumb and VFP. ++MULTILIB_ALIASES += march?armv4t=mthumb/march?armv4t ++MULTILIB_ALIASES += march?armv4t=march?armv4t/mfloat-abi?softfp ++MULTILIB_ALIASES += march?armv4t=mthumb/march?armv4t/mfloat-abi?softfp ++MULTILIB_ALIASES += march?armv4t=march?armv4t/mfpu?neon ++MULTILIB_ALIASES += march?armv4t=march?armv4t/mfloat-abi?softfp/mfpu?neon ++MULTILIB_ALIASES += march?armv4t=mthumb/march?armv4t/mfpu?neon ++MULTILIB_ALIASES += march?armv4t=mthumb/march?armv4t/mfloat-abi?softfp/mfpu?neon ++ ++# We do not support ARMv4t big-endian. ++MULTILIB_EXCEPTIONS += *march=armv4t*mbig-endian ++ ++# Behave ARMv7-A as ARMv7 for some cases. ++MULTILIB_EXCEPTIONS += march=armv7-a ++MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=neon ++MULTILIB_ALIASES += mfloat-abi?softfp=march?armv7-a/mfloat-abi?softfp ++MULTILIB_ALIASES += mbig-endian=march?armv7-a/mbig-endian ++MULTILIB_ALIASES += mbig-endian=march?armv7-a/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mfloat-abi?softfp/mbig-endian=march?armv7-a/mfloat-abi?softfp/mbig-endian ++MULTILIB_ALIASES += mfloat-abi?softfp/mbig-endian=march?armv7-a/mfloat-abi?softfp/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7-a=mthumb/march?armv7-a/mfpu?neon ++MULTILIB_ALIASES += mthumb/march?armv7-a/mbig-endian=mthumb/march?armv7-a/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7-a/mbig-endian=mthumb/march?armv7-a/mfloat-abi?softfp/mfpu?neon/mbig-endian ++MULTILIB_ALIASES += mthumb/march?armv7-a=mthumb/march?armv7-a/mfloat-abi?softfp ++ ++# Thumb-1 VFP isn't really a meaningful combination. Use the ARM VFP. ++MULTILIB_ALIASES += mfloat-abi?softfp=mthumb/mfloat-abi?softfp ++MULTILIB_ALIASES += mfloat-abi?softfp/mbig-endian=mthumb/mfloat-abi?softfp/mbig-endian ++ ++# Use ARM libraries for big-endian Thumb. ++MULTILIB_ALIASES += mbig-endian=mthumb/mbig-endian ++ ++# Don't bother with big-endian Thumb-2 VFP. Use the soft-float libraries ++# for now. ++MULTILIB_ALIASES += mthumb/march?armv7-a/mbig-endian=mthumb/march?armv7-a/mfloat-abi?softfp/mbig-endian ++ ++# The only -mfloat-abi=hard libraries provided are for little-endian ++# v7-A NEON. ++MULTILIB_EXCEPTIONS += mfloat-abi=hard* ++MULTILIB_EXCEPTIONS += *march=armv4t*mfloat-abi=hard* ++MULTILIB_EXCEPTIONS += mthumb/mfloat-abi=hard* ++MULTILIB_EXCEPTIONS += *mfloat-abi=hard*mbig-endian ++MULTILIB_EXCEPTIONS += *mfloat-abi=hard ++ ++# As of at least 4.2, gcc passes the wrong -L options if some multilibs are ++# omitted from MULTILIB_OSDIRNAMES ++MULTILIB_OSDIRNAMES = march.armv4t=!armv4t ++MULTILIB_OSDIRNAMES += mbig-endian=!be ++MULTILIB_OSDIRNAMES += mfloat-abi.softfp=!vfp ++MULTILIB_OSDIRNAMES += mfloat-abi.softfp/mbig-endian=!vfp-be ++MULTILIB_OSDIRNAMES += mthumb/march.armv7-a=!thumb2 ++MULTILIB_OSDIRNAMES += march.armv7-a/mfloat-abi.softfp/mfpu.neon=!armv7-a-neon ++MULTILIB_OSDIRNAMES += mthumb/march.armv7-a/mfloat-abi.softfp/mfpu.neon=!thumb2-neon ++MULTILIB_OSDIRNAMES += march.armv7-a/mfloat-abi.hard/mfpu.neon=!armv7-a-hard ++MULTILIB_OSDIRNAMES += mthumb/march.armv7-a/mbig-endian=!thumb2-be +diff -Nur a/gcc/config/arm/t-cs-linux-lite b/gcc/config/arm/t-cs-linux-lite +--- a/gcc/config/arm/t-cs-linux-lite 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/t-cs-linux-lite 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,48 @@ ++# We build 3 multilibs: ++# ./ (default) ++# armv4t/ -march=armv4t [-mthumb] ++# thumb2/ -mthumb -march=armv7 ++MULTILIB_OPTIONS = mthumb ++MULTILIB_DIRNAMES = thumb ++MULTILIB_OPTIONS += march=armv4t/march=armv7 ++MULTILIB_DIRNAMES += v4t v7 ++MULTILIB_EXCEPTIONS += march=armv7 ++MULTILIB_EXCEPTIONS += mthumb ++ ++MULTILIB_ALIASES = march?armv4t=mthumb/march?armv4t ++ ++# As of at least 4.2, gcc passes the wrong -L options if some multilibs are ++# omitted from MULTILIB_OSDIRNAMES ++MULTILIB_OSDIRNAMES = march.armv4t=!armv4t ++MULTILIB_OSDIRNAMES += mthumb/march.armv7=!thumb2 ++ ++MULTILIB_MATCHES += march?armv7=march?armv7a ++MULTILIB_MATCHES += march?armv7=march?armv7r ++MULTILIB_MATCHES += march?armv7=march?armv7m ++MULTILIB_MATCHES += march?armv7=march?armv7-a ++MULTILIB_MATCHES += march?armv7=march?armv7-r ++MULTILIB_MATCHES += march?armv7=march?armv7-m ++MULTILIB_MATCHES += march?armv7=march?armv7e-m ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-a9 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-a8 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-a5 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4f ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-m3 ++MULTILIB_MATCHES += march?armv4t=march?ep9312 ++MULTILIB_MATCHES += march?armv4t=mcpu?arm7tdmi ++MULTILIB_MATCHES += march?armv4t=mcpu?arm7tdmi-s ++MULTILIB_MATCHES += march?armv4t=mcpu?arm710t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm720t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm740t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm9 ++MULTILIB_MATCHES += march?armv4t=mcpu?arm9tdmi ++MULTILIB_MATCHES += march?armv4t=mcpu?arm920 ++MULTILIB_MATCHES += march?armv4t=mcpu?arm920t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm922t ++MULTILIB_MATCHES += march?armv4t=mcpu?arm940t ++MULTILIB_MATCHES += march?armv4t=mcpu?ep9312 ++MULTILIB_MATCHES += march?armv4t=march?armv5 ++MULTILIB_MATCHES += march?armv4t=march?armv5t ++MULTILIB_MATCHES += march?armv4t=march?arm10tdmi ++MULTILIB_MATCHES += march?armv4t=march?arm1020t +diff -Nur a/gcc/config/arm/t-cs-uclinux-eabi b/gcc/config/arm/t-cs-uclinux-eabi +--- a/gcc/config/arm/t-cs-uclinux-eabi 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/t-cs-uclinux-eabi 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,55 @@ ++# EABI uClinux multilib selection. Other setting are inherited from t-arm-elf ++ ++# We build 3 multilibs: ++# . (default) ++# thumb2/ -mthumb -march=armv7 -mfix-cortex-m3-ldrd ++# armv6-m/ -mthumb -march=armv6-m ++ ++MULTILIB_OPTIONS = mthumb ++MULTILIB_DIRNAMES = thumb ++MULTILIB_EXCEPTIONS = ++MULTILIB_MATCHES = ++ ++MULTILIB_OPTIONS += march=armv7/march=armv6-m ++MULTILIB_DIRNAMES += armv7 armv6-m ++ ++MULTILIB_EXCEPTIONS += mthumb ++ ++MULTILIB_EXCEPTIONS += march=armv7 ++MULTILIB_MATCHES += march?armv7=march?armv7a ++MULTILIB_MATCHES += march?armv7=march?armv7r ++MULTILIB_MATCHES += march?armv7=march?armv7m ++MULTILIB_MATCHES += march?armv7=march?armv7-a ++MULTILIB_MATCHES += march?armv7=march?armv7-r ++MULTILIB_MATCHES += march?armv7=march?armv7-m ++MULTILIB_MATCHES += march?armv7=march?armv7e-m ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-a9 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-a8 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-a5 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4 ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4f ++MULTILIB_MATCHES += march?armv7=mcpu?cortex-m3 ++ ++MULTILIB_EXCEPTIONS += march=armv6-m ++MULTILIB_MATCHES += march?armv6-m=mcpu?cortex-m1 ++MULTILIB_MATCHES += march?armv6-m=mcpu?cortex-m0 ++ ++MULTILIB_ALIASES = ++ ++# FIXME: We need a sane way of doing this. ++# This isn't really a multilib, it's a hack to add an extra option ++# to the v7-m multilib. ++MULTILIB_OPTIONS += mfix-cortex-m3-ldrd ++MULTILIB_DIRNAMES += broken_ldrd ++ ++MULTILIB_EXCEPTIONS += mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += mthumb/mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += march=armv7/mfix-cortex-m3-ldrd ++MULTILIB_EXCEPTIONS += *march=armv6-m*mfix-cortex-m3-ldrd ++ ++MULTILIB_ALIASES += mthumb/march?armv7/mfix-cortex-m3-ldrd=mthumb/march?armv7 ++ ++ ++MULTILIB_OSDIRNAMES = mthumb/march.armv7/mfix-cortex-m3-ldrd=!thumb2 ++MULTILIB_OSDIRNAMES += mthumb/march.armv6-m=!armv6-m ++ +diff -Nur a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md +--- a/gcc/config/arm/thumb2.md 2008-11-21 00:00:00.000000000 +0100 ++++ b/gcc/config/arm/thumb2.md 2010-01-25 09:50:28.995687913 +0100 +@@ -24,6 +24,8 @@ + ;; changes made in armv5t as "thumb2". These are considered part + ;; the 16-bit Thumb-1 instruction set. + ++(include "hwdiv.md") ++ + (define_insn "*thumb2_incscc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (plus:SI (match_operator:SI 2 "arm_comparison_operator" +@@ -172,34 +174,6 @@ + (set_attr "length" "8")] + ) + +-(define_insn "*thumb2_abssi2" +- [(set (match_operand:SI 0 "s_register_operand" "=r,&r") +- (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))) +- (clobber (reg:CC CC_REGNUM))] +- "TARGET_THUMB2" +- "@ +- cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0 +- eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31" +- [(set_attr "conds" "clob,*") +- (set_attr "shift" "1") +- ;; predicable can't be set based on the variant, so left as no +- (set_attr "length" "10,8")] +-) +- +-(define_insn "*thumb2_neg_abssi2" +- [(set (match_operand:SI 0 "s_register_operand" "=r,&r") +- (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))) +- (clobber (reg:CC CC_REGNUM))] +- "TARGET_THUMB2" +- "@ +- cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0 +- eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31" +- [(set_attr "conds" "clob,*") +- (set_attr "shift" "1") +- ;; predicable can't be set based on the variant, so left as no +- (set_attr "length" "10,8")] +-) +- + (define_insn "*thumb2_movdi" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m") + (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r"))] +@@ -223,9 +197,14 @@ + (set_attr "neg_pool_range" "*,*,*,0,*")] + ) + ++;; We have two alternatives here for memory loads (and similarly for stores) ++;; to reflect the fact that the permissible constant pool ranges differ ++;; between ldr instructions taking low regs and ldr instructions taking high ++;; regs. The high register alternatives are not taken into account when ++;; choosing register preferences in order to reflect their expense. + (define_insn "*thumb2_movsi_insn" +- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m") +- (match_operand:SI 1 "general_operand" "rk ,I,K,N,mi,rk"))] ++ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l,*hk,m,*m") ++ (match_operand:SI 1 "general_operand" "rk ,I,K,j,mi,*mi,l,*hk"))] + "TARGET_THUMB2 && ! TARGET_IWMMXT + && !(TARGET_HARD_FLOAT && TARGET_VFP) + && ( register_operand (operands[0], SImode) +@@ -236,11 +215,13 @@ + mvn%?\\t%0, #%B1 + movw%?\\t%0, %1 + ldr%?\\t%0, %1 ++ ldr%?\\t%0, %1 ++ str%?\\t%1, %0 + str%?\\t%1, %0" +- [(set_attr "type" "*,*,*,*,load1,store1") ++ [(set_attr "type" "*,*,*,*,load1,load1,store1,store1") + (set_attr "predicable" "yes") +- (set_attr "pool_range" "*,*,*,*,4096,*") +- (set_attr "neg_pool_range" "*,*,*,*,0,*")] ++ (set_attr "pool_range" "*,*,*,*,1020,4096,*,*") ++ (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")] + ) + + ;; ??? We can probably do better with thumb2 +@@ -1128,27 +1109,7 @@ + return \"add%!\\t%0, %1, %2\"; + " + [(set_attr "predicable" "yes") +- (set_attr "length" "2")] +-) +- +-(define_insn "divsi3" +- [(set (match_operand:SI 0 "s_register_operand" "=r") +- (div:SI (match_operand:SI 1 "s_register_operand" "r") +- (match_operand:SI 2 "s_register_operand" "r")))] +- "TARGET_THUMB2 && arm_arch_hwdiv" +- "sdiv%?\t%0, %1, %2" +- [(set_attr "predicable" "yes") +- (set_attr "insn" "sdiv")] +-) +- +-(define_insn "udivsi3" +- [(set (match_operand:SI 0 "s_register_operand" "=r") +- (udiv:SI (match_operand:SI 1 "s_register_operand" "r") +- (match_operand:SI 2 "s_register_operand" "r")))] +- "TARGET_THUMB2 && arm_arch_hwdiv" +- "udiv%?\t%0, %1, %2" +- [(set_attr "predicable" "yes") +- (set_attr "insn" "udiv")] ++ (set_attr "length" "4")] + ) + + (define_insn "*thumb2_subsi_short" +@@ -1162,6 +1123,71 @@ + (set_attr "length" "2")] + ) + ++;; 16-bit encodings of "muls" and "mul". We only use these when ++;; optimizing for size since "muls" is slow on all known ++;; implementations and since "mul" will be generated by ++;; "*arm_mulsi3_v6" anyhow. The assembler will use a 16-bit encoding ++;; for "mul" whenever possible anyhow. ++(define_peephole2 ++ [(set (match_operand:SI 0 "low_register_operand" "") ++ (mult:SI (match_operand:SI 1 "low_register_operand" "") ++ (match_dup 0)))] ++ "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)" ++ [(parallel ++ [(set (match_dup 0) ++ (mult:SI (match_dup 0) (match_dup 1))) ++ (clobber (reg:CC CC_REGNUM))])] ++ "" ++) ++ ++(define_peephole2 ++ [(set (match_operand:SI 0 "low_register_operand" "") ++ (mult:SI (match_dup 0) ++ (match_operand:SI 1 "low_register_operand" "")))] ++ "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)" ++ [(parallel ++ [(set (match_dup 0) ++ (mult:SI (match_dup 0) (match_dup 1))) ++ (clobber (reg:CC CC_REGNUM))])] ++ "" ++) ++ ++(define_insn "*thumb2_mulsi_short" ++ [(set (match_operand:SI 0 "low_register_operand" "=l") ++ (mult:SI (match_operand:SI 1 "low_register_operand" "%0") ++ (match_operand:SI 2 "low_register_operand" "l"))) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_THUMB2 && optimize_size && reload_completed" ++ "mul%!\\t%0, %2, %0" ++ [(set_attr "predicable" "yes") ++ (set_attr "length" "2") ++ (set_attr "insn" "muls")]) ++ ++(define_insn "*thumb2_mulsi_short_compare0" ++ [(set (reg:CC_NOOV CC_REGNUM) ++ (compare:CC_NOOV ++ (mult:SI (match_operand:SI 1 "register_operand" "%0") ++ (match_operand:SI 2 "register_operand" "l")) ++ (const_int 0))) ++ (set (match_operand:SI 0 "register_operand" "=l") ++ (mult:SI (match_dup 1) (match_dup 2)))] ++ "TARGET_THUMB2 && optimize_size" ++ "muls\\t%0, %2, %0" ++ [(set_attr "length" "2") ++ (set_attr "insn" "muls")]) ++ ++(define_insn "*thumb2_mulsi_short_compare0_scratch" ++ [(set (reg:CC_NOOV CC_REGNUM) ++ (compare:CC_NOOV ++ (mult:SI (match_operand:SI 1 "register_operand" "%0") ++ (match_operand:SI 2 "register_operand" "l")) ++ (const_int 0))) ++ (clobber (match_scratch:SI 0 "=r"))] ++ "TARGET_THUMB2 && optimize_size" ++ "muls\\t%0, %2, %0" ++ [(set_attr "length" "2") ++ (set_attr "insn" "muls")]) ++ + (define_insn "*thumb2_cbz" + [(set (pc) (if_then_else + (eq (match_operand:SI 0 "s_register_operand" "l,?r") +@@ -1171,7 +1197,7 @@ + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* +- if (get_attr_length (insn) == 2 && which_alternative == 0) ++ if (get_attr_length (insn) == 2) + return \"cbz\\t%0, %l1\"; + else + return \"cmp\\t%0, #0\;beq\\t%l1\"; +@@ -1179,7 +1205,8 @@ + [(set (attr "length") + (if_then_else + (and (ge (minus (match_dup 1) (pc)) (const_int 2)) +- (le (minus (match_dup 1) (pc)) (const_int 128))) ++ (le (minus (match_dup 1) (pc)) (const_int 128)) ++ (eq (symbol_ref ("which_alternative")) (const_int 0))) + (const_int 2) + (const_int 8)))] + ) +@@ -1193,7 +1220,7 @@ + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* +- if (get_attr_length (insn) == 2 && which_alternative == 0) ++ if (get_attr_length (insn) == 2) + return \"cbnz\\t%0, %l1\"; + else + return \"cmp\\t%0, #0\;bne\\t%l1\"; +@@ -1201,7 +1228,8 @@ + [(set (attr "length") + (if_then_else + (and (ge (minus (match_dup 1) (pc)) (const_int 2)) +- (le (minus (match_dup 1) (pc)) (const_int 128))) ++ (le (minus (match_dup 1) (pc)) (const_int 128)) ++ (eq (symbol_ref ("which_alternative")) (const_int 0))) + (const_int 2) + (const_int 8)))] + ) +diff -Nur a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi +--- a/gcc/config/arm/t-linux-eabi 2009-01-24 22:06:08.000000000 +0100 ++++ b/gcc/config/arm/t-linux-eabi 2010-01-25 09:50:28.995687913 +0100 +@@ -6,8 +6,8 @@ + MULTILIB_OPTIONS = + MULTILIB_DIRNAMES = + +-# Use a version of div0 which raises SIGFPE. +-LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx ++# Use a version of div0 which raises SIGFPE, and a special __clear_cache. ++LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache + + # Multilib the standard Linux files. Don't include crti.o or crtn.o, + # which are provided by glibc. +diff -Nur a/gcc/config/arm/t-montavista-linux b/gcc/config/arm/t-montavista-linux +--- a/gcc/config/arm/t-montavista-linux 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/t-montavista-linux 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,33 @@ ++# MontaVista GNU/Linux Configuration. ++# Copyright (C) 2009 ++# Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++MULTILIB_OPTIONS = tarmv6/tthumb2 ++MULTILIB_DIRNAMES = armv6 thumb2 ++ ++MULTILIB_EXCEPTIONS = ++ ++MULTILIB_OSDIRNAMES = ++ ++MULTILIB_ALIASES = ++ ++MULTILIB_MATCHES = ++ ++# These files must be built for each multilib. ++EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o +diff -Nur a/gcc/config/arm/t-symbian b/gcc/config/arm/t-symbian +--- a/gcc/config/arm/t-symbian 2008-06-12 19:29:47.000000000 +0200 ++++ b/gcc/config/arm/t-symbian 2010-01-25 09:50:28.995687913 +0100 +@@ -17,6 +17,9 @@ + LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c + LIB2ADDEHDEP = $(UNWIND_H) + ++# Include half-float helpers. ++LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c ++ + # Create a multilib for processors with VFP floating-point, and a + # multilib for those without -- using the soft-float ABI in both + # cases. Symbian OS object should be compiled with interworking +diff -Nur a/gcc/config/arm/t-wrs-linux b/gcc/config/arm/t-wrs-linux +--- a/gcc/config/arm/t-wrs-linux 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/t-wrs-linux 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,43 @@ ++# Wind River GNU/Linux Configuration. ++# Copyright (C) 2006, 2007, 2008 ++# Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++MULTILIB_OPTIONS = muclibc ++MULTILIB_OPTIONS += tarm926ej-s/tiwmmxt/txscale/tarm920t/tthumb2/tcortex-a8-be8 ++MULTILIB_OPTIONS += mfloat-abi=softfp ++MULTILIB_DIRNAMES = uclibc ++MULTILIB_DIRNAMES += tarm926ej-s tiwmmxt txscale tarm920t thumb2 cortex-a8-be8 ++MULTILIB_DIRNAMES += softfp ++ ++MULTILIB_EXCEPTIONS = *muclibc*/*tarm920t* ++MULTILIB_EXCEPTIONS += *muclibc*/*cortex-a8-be8* ++ ++MULTILIB_EXCEPTIONS += *tiwmmxt*/*mfloat-abi=softfp* ++MULTILIB_EXCEPTIONS += *txscale*/*mfloat-abi=softfp* ++MULTILIB_EXCEPTIONS += *tarm920t*/*mfloat-abi=softfp* ++MULTILIB_EXCEPTIONS += *thumb2*/*mfloat-abi=softfp* ++ ++MULTILIB_MATCHES = tiwmmxt=tiwmmxt2 ++ ++MULTILIB_ALIASES = tcortex-a8-be8=tcortex-a8-be8/mfloat-abi?softfp ++MULTILIB_OSDIRNAMES = ++ ++# These files must be built for each multilib. ++EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o ++ +diff -Nur a/gcc/config/arm/uclinux-eabi.h b/gcc/config/arm/uclinux-eabi.h +--- a/gcc/config/arm/uclinux-eabi.h 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/uclinux-eabi.h 2010-01-25 09:50:28.995687913 +0100 +@@ -50,6 +50,10 @@ + #undef ARM_DEFAULT_ABI + #define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX + ++#undef LINK_GCC_C_SEQUENCE_SPEC ++#define LINK_GCC_C_SEQUENCE_SPEC \ ++ "--start-group %G %L --end-group" ++ + /* Clear the instruction cache from `beg' to `end'. This makes an + inline system call to SYS_cacheflush. */ + #undef CLEAR_INSN_CACHE +diff -Nur a/gcc/config/arm/unwind-arm.c b/gcc/config/arm/unwind-arm.c +--- a/gcc/config/arm/unwind-arm.c 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/arm/unwind-arm.c 2010-01-25 09:50:28.995687913 +0100 +@@ -1000,7 +1000,6 @@ + while (code != _URC_END_OF_STACK + && code != _URC_FAILURE); + +- finish: + restore_non_core_regs (&saved_vrs); + return code; + } +@@ -1168,6 +1167,9 @@ + { + matched = (void *)(ucbp + 1); + rtti = _Unwind_decode_target2 ((_uw) &data[i + 1]); ++ /* There is no way to encode an exception ++ specification for 'class X * &', so ++ always pass false for is_reference. */ + if (__cxa_type_match (ucbp, (type_info *) rtti, 0, + &matched)) + break; +@@ -1197,8 +1199,6 @@ + ucbp->barrier_cache.bitpattern[4] = (_uw) &data[1]; + + if (data[0] & uint32_highbit) +- phase2_call_unexpected_after_unwind = 1; +- else + { + data += rtti_count + 1; + /* Setup for entry to the handler. */ +@@ -1208,6 +1208,8 @@ + _Unwind_SetGR (context, 0, (_uw) ucbp); + return _URC_INSTALL_CONTEXT; + } ++ else ++ phase2_call_unexpected_after_unwind = 1; + } + if (data[0] & uint32_highbit) + data++; +diff -Nur a/gcc/config/arm/unwind-arm.h b/gcc/config/arm/unwind-arm.h +--- a/gcc/config/arm/unwind-arm.h 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/arm/unwind-arm.h 2010-01-25 09:50:28.995687913 +0100 +@@ -229,9 +229,10 @@ + return 0; + + #if (defined(linux) && !defined(__uClinux__)) || defined(__NetBSD__) +- /* Pc-relative indirect. */ ++ /* Pc-relative indirect. Propagate the bottom 2 bits, which can ++ contain referenceness information in gnu unwinding tables. */ + tmp += ptr; +- tmp = *(_Unwind_Word *) tmp; ++ tmp = *(_Unwind_Word *) (tmp & ~(_Unwind_Word)3) | (tmp & 3); + #elif defined(__symbian__) || defined(__uClinux__) + /* Absolute pointer. Nothing more to do. */ + #else +diff -Nur a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md +--- a/gcc/config/arm/vec-common.md 2007-08-22 22:32:18.000000000 +0200 ++++ b/gcc/config/arm/vec-common.md 2010-01-25 09:50:28.995687913 +0100 +@@ -38,6 +38,11 @@ + "TARGET_NEON + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" + { ++ if (can_create_pseudo_p ()) ++ { ++ if (GET_CODE (operands[0]) != REG) ++ operands[1] = force_reg (mode, operands[1]); ++ } + }) + + ;; Vector arithmetic. Expanders are blank, then unnamed insns implement +diff -Nur a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md +--- a/gcc/config/arm/vfp.md 2008-09-01 15:40:49.000000000 +0200 ++++ b/gcc/config/arm/vfp.md 2010-01-25 09:50:28.995687913 +0100 +@@ -51,7 +51,7 @@ + ;; problems because small constants get converted into adds. + (define_insn "*arm_movsi_vfp" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv") +- (match_operand:SI 1 "general_operand" "rk, I,K,N,mi,rk,r,*t,*t,*Uvi,*t"))] ++ (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))] + "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT + && ( s_register_operand (operands[0], SImode) + || s_register_operand (operands[1], SImode))" +@@ -82,13 +82,17 @@ + " + [(set_attr "predicable" "yes") + (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") ++ (set_attr "neon_type" "*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") ++ (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*") + (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] + ) + ++;; See thumb2.md:thumb2_movsi_insn for an explanation of the split ++;; high/low register alternatives for loads and stores here. + (define_insn "*thumb2_movsi_vfp" +- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m,*t,r, *t,*t, *Uv") +- (match_operand:SI 1 "general_operand" "rk, I,K,N,mi,rk,r,*t,*t,*Uvi,*t"))] ++ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l,*hk,m,*m,*t,r, *t,*t, *Uv") ++ (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,*mi,l,*hk,r,*t,*t,*Uvi,*t"))] + "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT + && ( s_register_operand (operands[0], SImode) + || s_register_operand (operands[1], SImode))" +@@ -102,25 +106,29 @@ + case 3: + return \"movw%?\\t%0, %1\"; + case 4: +- return \"ldr%?\\t%0, %1\"; + case 5: +- return \"str%?\\t%1, %0\"; ++ return \"ldr%?\\t%0, %1\"; + case 6: +- return \"fmsr%?\\t%0, %1\\t%@ int\"; + case 7: +- return \"fmrs%?\\t%0, %1\\t%@ int\"; ++ return \"str%?\\t%1, %0\"; + case 8: ++ return \"fmsr%?\\t%0, %1\\t%@ int\"; ++ case 9: ++ return \"fmrs%?\\t%0, %1\\t%@ int\"; ++ case 10: + return \"fcpys%?\\t%0, %1\\t%@ int\"; +- case 9: case 10: ++ case 11: case 12: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") +- (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_load,f_store") +- (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") +- (set_attr "neg_pool_range" "*,*,*,*, 0,*,*,*,*,1008,*")] ++ (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_load,f_store") ++ (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") ++ (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*") ++ (set_attr "pool_range" "*,*,*,*,1020,4096,*,*,*,*,*,1020,*") ++ (set_attr "neg_pool_range" "*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] + ) + + +@@ -145,7 +153,10 @@ + case 4: + return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; + case 5: +- return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; ++ if (TARGET_VFP_SINGLE) ++ return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; ++ else ++ return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + case 6: case 7: + return output_move_vfp (operands); + default: +@@ -153,7 +164,14 @@ + } + " + [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") +- (set_attr "length" "8,8,8,4,4,4,4,4") ++ (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") ++ (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) ++ (eq_attr "alternative" "5") ++ (if_then_else ++ (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) ++ (const_int 8) ++ (const_int 4))] ++ (const_int 4))) + (set_attr "pool_range" "*,1020,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")] + ) +@@ -172,7 +190,10 @@ + case 4: + return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; + case 5: +- return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; ++ if (TARGET_VFP_SINGLE) ++ return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; ++ else ++ return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + case 6: case 7: + return output_move_vfp (operands); + default: +@@ -180,11 +201,123 @@ + } + " + [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_load,f_store") +- (set_attr "length" "8,8,8,4,4,4,4,4") ++ (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") ++ (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) ++ (eq_attr "alternative" "5") ++ (if_then_else ++ (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) ++ (const_int 8) ++ (const_int 4))] ++ (const_int 4))) + (set_attr "pool_range" "*,4096,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*, 0,*,*,*,*,1008,*")] + ) + ++;; HFmode moves ++(define_insn "*movhf_vfp_neon" ++ [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r") ++ (match_operand:HF 1 "general_operand" " Um, t,m,r,t,r,r,t,F"))] ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16 ++ && ( s_register_operand (operands[0], HFmode) ++ || s_register_operand (operands[1], HFmode))" ++ "* ++ switch (which_alternative) ++ { ++ case 0: /* S register from memory */ ++ return \"vld1.16\\t{%z0}, %A1\"; ++ case 1: /* memory from S register */ ++ return \"vst1.16\\t{%z1}, %A0\"; ++ case 2: /* ARM register from memory */ ++ return \"ldrh\\t%0, %1\\t%@ __fp16\"; ++ case 3: /* memory from ARM register */ ++ return \"strh\\t%1, %0\\t%@ __fp16\"; ++ case 4: /* S register from S register */ ++ return \"fcpys\\t%0, %1\"; ++ case 5: /* ARM register from ARM register */ ++ return \"mov\\t%0, %1\\t%@ __fp16\"; ++ case 6: /* S register from ARM register */ ++ return \"fmsr\\t%0, %1\"; ++ case 7: /* ARM register from S register */ ++ return \"fmrs\\t%0, %1\"; ++ case 8: /* ARM register from constant */ ++ { ++ REAL_VALUE_TYPE r; ++ long bits; ++ rtx ops[4]; ++ ++ REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); ++ bits = real_to_target (NULL, &r, HFmode); ++ ops[0] = operands[0]; ++ ops[1] = GEN_INT (bits); ++ ops[2] = GEN_INT (bits & 0xff00); ++ ops[3] = GEN_INT (bits & 0x00ff); ++ ++ if (arm_arch_thumb2) ++ output_asm_insn (\"movw\\t%0, %1\", ops); ++ else ++ output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); ++ return \"\"; ++ } ++ default: ++ gcc_unreachable (); ++ } ++ " ++ [(set_attr "conds" "unconditional") ++ (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*") ++ (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*") ++ (set_attr "length" "4,4,4,4,4,4,4,4,8")] ++) ++ ++;; FP16 without element load/store instructions. ++(define_insn "*movhf_vfp" ++ [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r") ++ (match_operand:HF 1 "general_operand" " m,r,t,r,r,t,F"))] ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16 && !TARGET_NEON_FP16 ++ && ( s_register_operand (operands[0], HFmode) ++ || s_register_operand (operands[1], HFmode))" ++ "* ++ switch (which_alternative) ++ { ++ case 0: /* ARM register from memory */ ++ return \"ldrh\\t%0, %1\\t%@ __fp16\"; ++ case 1: /* memory from ARM register */ ++ return \"strh\\t%1, %0\\t%@ __fp16\"; ++ case 2: /* S register from S register */ ++ return \"fcpys\\t%0, %1\"; ++ case 3: /* ARM register from ARM register */ ++ return \"mov\\t%0, %1\\t%@ __fp16\"; ++ case 4: /* S register from ARM register */ ++ return \"fmsr\\t%0, %1\"; ++ case 5: /* ARM register from S register */ ++ return \"fmrs\\t%0, %1\"; ++ case 6: /* ARM register from constant */ ++ { ++ REAL_VALUE_TYPE r; ++ long bits; ++ rtx ops[4]; ++ ++ REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); ++ bits = real_to_target (NULL, &r, HFmode); ++ ops[0] = operands[0]; ++ ops[1] = GEN_INT (bits); ++ ops[2] = GEN_INT (bits & 0xff00); ++ ops[3] = GEN_INT (bits & 0x00ff); ++ ++ if (arm_arch_thumb2) ++ output_asm_insn (\"movw\\t%0, %1\", ops); ++ else ++ output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); ++ return \"\"; ++ } ++ default: ++ gcc_unreachable (); ++ } ++ " ++ [(set_attr "conds" "unconditional") ++ (set_attr "type" "load1,store1,fcpys,*,r_2_f,f_2_r,*") ++ (set_attr "length" "4,4,4,4,4,4,8")] ++) ++ + + ;; SFmode moves + ;; Disparage the w<->r cases because reloading an invalid address is +@@ -222,6 +355,8 @@ + [(set_attr "predicable" "yes") + (set_attr "type" + "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") ++ (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") ++ (set_attr "insn" "*,*,*,*,*,*,*,*,mov") + (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")] + ) +@@ -258,6 +393,8 @@ + [(set_attr "predicable" "yes") + (set_attr "type" + "r_2_f,f_2_r,fconsts,f_load,f_store,load1,store1,fcpys,*") ++ (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") ++ (set_attr "insn" "*,*,*,*,*,*,*,*,mov") + (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] + ) +@@ -267,7 +404,7 @@ + + (define_insn "*movdf_vfp" + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r") +- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dv,mF,r,UvF,w, w,r"))] ++ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + && ( register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" +@@ -280,13 +417,17 @@ + case 1: + return \"fmrrd%?\\t%Q0, %R0, %P1\"; + case 2: ++ gcc_assert (TARGET_VFP_DOUBLE); + return \"fconstd%?\\t%P0, #%G1\"; + case 3: case 4: + return output_move_double (operands); + case 5: case 6: + return output_move_vfp (operands); + case 7: +- return \"fcpyd%?\\t%P0, %P1\"; ++ if (TARGET_VFP_SINGLE) ++ return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; ++ else ++ return \"fcpyd%?\\t%P0, %P1\"; + case 8: + return \"#\"; + default: +@@ -296,14 +437,21 @@ + " + [(set_attr "type" + "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*") +- (set_attr "length" "4,4,4,8,8,4,4,4,8") ++ (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*") ++ (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) ++ (eq_attr "alternative" "7") ++ (if_then_else ++ (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) ++ (const_int 8) ++ (const_int 4))] ++ (const_int 4))) + (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")] + ) + + (define_insn "*thumb2_movdf_vfp" + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r") +- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dv,mF,r,UvF,w, w,r"))] ++ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "* + { +@@ -314,13 +462,17 @@ + case 1: + return \"fmrrd%?\\t%Q0, %R0, %P1\"; + case 2: ++ gcc_assert (TARGET_VFP_DOUBLE); + return \"fconstd%?\\t%P0, #%G1\"; + case 3: case 4: case 8: + return output_move_double (operands); + case 5: case 6: + return output_move_vfp (operands); + case 7: +- return \"fcpyd%?\\t%P0, %P1\"; ++ if (TARGET_VFP_SINGLE) ++ return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; ++ else ++ return \"fcpyd%?\\t%P0, %P1\"; + default: + abort (); + } +@@ -328,7 +480,14 @@ + " + [(set_attr "type" + "r_2_f,f_2_r,fconstd,load2,store2,f_load,f_store,ffarithd,*") +- (set_attr "length" "4,4,4,8,8,4,4,4,8") ++ (set_attr "neon_type" "neon_mcr_2_mcrr,neon_mrrc,*,*,*,*,*,neon_vmov,*") ++ (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) ++ (eq_attr "alternative" "7") ++ (if_then_else ++ (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) ++ (const_int 8) ++ (const_int 4))] ++ (const_int 4))) + (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*") + (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")] + ) +@@ -356,7 +515,8 @@ + fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,4,4,8,4,4,8") +- (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] ++ (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") ++ (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")] + ) + + (define_insn "*thumb2_movsfcc_vfp" +@@ -379,7 +539,8 @@ + ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "6,6,10,6,6,10,6,6,10") +- (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] ++ (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") ++ (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr,neon_mcr,neon_mcr,neon_mrc,neon_mrc,neon_mrc")] + ) + + (define_insn "*movdfcc_vfp" +@@ -389,7 +550,7 @@ + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") + (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] +- "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fcpyd%D3\\t%P0, %P2 + fcpyd%d3\\t%P0, %P1 +@@ -402,7 +563,8 @@ + fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,4,4,8,4,4,8") +- (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] ++ (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") ++ (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")] + ) + + (define_insn "*thumb2_movdfcc_vfp" +@@ -412,7 +574,7 @@ + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") + (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] +- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + it\\t%D3\;fcpyd%D3\\t%P0, %P2 + it\\t%d3\;fcpyd%d3\\t%P0, %P1 +@@ -425,7 +587,8 @@ + ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" + [(set_attr "conds" "use") + (set_attr "length" "6,6,10,6,6,10,6,6,10") +- (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] ++ (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r") ++ (set_attr "neon_type" "neon_vmov,neon_vmov,neon_vmov,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mcr_2_mcrr,neon_mrrc,neon_mrrc,neon_mrrc")] + ) + + +@@ -443,7 +606,7 @@ + (define_insn "*absdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (abs:DF (match_operand:DF 1 "s_register_operand" "w")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fabsd%?\\t%P0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "ffarithd")] +@@ -463,12 +626,12 @@ + (define_insn_and_split "*negdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w,?r,?r") + (neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fnegd%?\\t%P0, %P1 + # + #" +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && reload_completed ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed + && arm_general_register_operand (operands[0], DFmode)" + [(set (match_dup 0) (match_dup 1))] + " +@@ -523,7 +686,7 @@ + [(set (match_operand:DF 0 "s_register_operand" "=w") + (plus:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "faddd%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "faddd")] +@@ -544,7 +707,7 @@ + [(set (match_operand:DF 0 "s_register_operand" "=w") + (minus:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fsubd%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "faddd")] +@@ -567,7 +730,7 @@ + [(set (match_operand:DF 0 "s_register_operand" "+w") + (div:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fdivd%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "fdivd")] +@@ -590,7 +753,7 @@ + [(set (match_operand:DF 0 "s_register_operand" "+w") + (mult:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fmuld%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "fmuld")] +@@ -611,7 +774,7 @@ + [(set (match_operand:DF 0 "s_register_operand" "+w") + (mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w")) + (match_operand:DF 2 "s_register_operand" "w")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fnmuld%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "fmuld")] +@@ -626,7 +789,8 @@ + (plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") + (match_operand:SF 3 "s_register_operand" "t")) + (match_operand:SF 1 "s_register_operand" "0")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP ++ && (!arm_tune_marvell_f || optimize_size)" + "fmacs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacs")] +@@ -637,7 +801,8 @@ + (plus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w") + (match_operand:DF 3 "s_register_operand" "w")) + (match_operand:DF 1 "s_register_operand" "0")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE ++ && (!arm_tune_marvell_f || optimize_size)" + "fmacd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacd")] +@@ -649,7 +814,8 @@ + (minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") + (match_operand:SF 3 "s_register_operand" "t")) + (match_operand:SF 1 "s_register_operand" "0")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP ++ && (!arm_tune_marvell_f || optimize_size)" + "fmscs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacs")] +@@ -660,7 +826,8 @@ + (minus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w") + (match_operand:DF 3 "s_register_operand" "w")) + (match_operand:DF 1 "s_register_operand" "0")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE ++ && (!arm_tune_marvell_f || optimize_size)" + "fmscd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacd")] +@@ -672,7 +839,8 @@ + (minus:SF (match_operand:SF 1 "s_register_operand" "0") + (mult:SF (match_operand:SF 2 "s_register_operand" "t") + (match_operand:SF 3 "s_register_operand" "t"))))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP ++ && (!arm_tune_marvell_f || optimize_size)" + "fnmacs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacs")] +@@ -683,7 +851,8 @@ + (minus:DF (match_operand:DF 1 "s_register_operand" "0") + (mult:DF (match_operand:DF 2 "s_register_operand" "w") + (match_operand:DF 3 "s_register_operand" "w"))))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE ++ && (!arm_tune_marvell_f || optimize_size)" + "fnmacd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacd")] +@@ -697,7 +866,8 @@ + (neg:SF (match_operand:SF 2 "s_register_operand" "t")) + (match_operand:SF 3 "s_register_operand" "t")) + (match_operand:SF 1 "s_register_operand" "0")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP ++ && (!arm_tune_marvell_f || optimize_size)" + "fnmscs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacs")] +@@ -709,7 +879,8 @@ + (neg:DF (match_operand:DF 2 "s_register_operand" "w")) + (match_operand:DF 3 "s_register_operand" "w")) + (match_operand:DF 1 "s_register_operand" "0")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE ++ && (!arm_tune_marvell_f || optimize_size)" + "fnmscd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacd")] +@@ -721,7 +892,7 @@ + (define_insn "*extendsfdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fcvtds%?\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +@@ -730,12 +901,30 @@ + (define_insn "*truncdfsf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fcvtsd%?\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] + ) + ++(define_insn "extendhfsf2" ++ [(set (match_operand:SF 0 "s_register_operand" "=t") ++ (float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))] ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" ++ "vcvtb%?.f32.f16\\t%0, %1" ++ [(set_attr "predicable" "yes") ++ (set_attr "type" "f_cvt")] ++) ++ ++(define_insn "truncsfhf2" ++ [(set (match_operand:HF 0 "s_register_operand" "=t") ++ (float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))] ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" ++ "vcvtb%?.f16.f32\\t%0, %1" ++ [(set_attr "predicable" "yes") ++ (set_attr "type" "f_cvt")] ++) ++ + (define_insn "*truncsisf2_vfp" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] +@@ -748,7 +937,7 @@ + (define_insn "*truncsidf2_vfp" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "ftosizd%?\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +@@ -767,7 +956,7 @@ + (define_insn "fixuns_truncdfsi2" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "ftouizd%?\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +@@ -786,7 +975,7 @@ + (define_insn "*floatsidf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (float:DF (match_operand:SI 1 "s_register_operand" "t")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fsitod%?\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +@@ -805,7 +994,7 @@ + (define_insn "floatunssidf2" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fuitod%?\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +@@ -826,7 +1015,7 @@ + (define_insn "*sqrtdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fsqrtd%?\\t%P0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "fdivd")] +@@ -878,9 +1067,9 @@ + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:DF 0 "s_register_operand" "w") + (match_operand:DF 1 "vfp_compare_operand" "wG")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "#" +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_dup 0) + (match_dup 1))) +@@ -893,9 +1082,9 @@ + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (match_operand:DF 0 "s_register_operand" "w") + (match_operand:DF 1 "vfp_compare_operand" "wG")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "#" +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_dup 0) + (match_dup 1))) +@@ -935,7 +1124,7 @@ + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_operand:DF 0 "s_register_operand" "w,w") + (match_operand:DF 1 "vfp_compare_operand" "w,G")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fcmpd%?\\t%P0, %P1 + fcmpzd%?\\t%P0" +@@ -947,7 +1136,7 @@ + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_operand:DF 0 "s_register_operand" "w,w") + (match_operand:DF 1 "vfp_compare_operand" "w,G")))] +- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fcmped%?\\t%P0, %P1 + fcmpezd%?\\t%P0" +diff -Nur a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h +--- a/gcc/config/arm/vxworks.h 2009-02-20 16:20:38.000000000 +0100 ++++ b/gcc/config/arm/vxworks.h 2010-01-25 09:50:28.995687913 +0100 +@@ -97,7 +97,7 @@ + /* There is no default multilib. */ + #undef MULTILIB_DEFAULTS + +-#define FPUTYPE_DEFAULT FPUTYPE_VFP ++#define FPUTYPE_DEFAULT "vfp" + + #undef FUNCTION_PROFILER + #define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER +diff -Nur a/gcc/config/arm/wrs-linux.h b/gcc/config/arm/wrs-linux.h +--- a/gcc/config/arm/wrs-linux.h 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/arm/wrs-linux.h 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,76 @@ ++/* Wind River GNU/Linux Configuration. ++ Copyright (C) 2006, 2007, 2008 ++ Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* Use the ARM926EJ-S by default. */ ++#undef SUBTARGET_CPU_DEFAULT ++#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm926ejs ++ ++/* Add a -tiwmmxt option for convenience in generating multilibs. ++ This option generates big-endian IWMMXT code. */ ++#undef CC1_SPEC ++#define CC1_SPEC " \ ++ %{tarm926ej-s: -mcpu=arm926ej-s ; \ ++ tiwmmxt: -mcpu=iwmmxt ; \ ++ tiwmmxt2: -mcpu=iwmmxt ; \ ++ txscale: -mcpu=xscale -mbig-endian ; \ ++ tarm920t: -mcpu=arm920t ; \ ++ tthumb2: %{!mcpu=*:%{!march=*:-march=armv6t2}} -mthumb ; \ ++ tcortex-a8-be8: -mcpu=cortex-a8 -mbig-endian -mfloat-abi=softfp \ ++ -mfpu=neon } \ ++ %{txscale:%{mfloat-abi=softfp:%eXScale VFP multilib not provided}} \ ++ %{tarm920t:%{mfloat-abi=softfp:%eARM920T VFP multilib not provided}} \ ++ %{profile:-p}" ++ ++/* Since the ARM926EJ-S is the default processor, we do not need to ++ provide an explicit multilib for that processor. */ ++#undef MULTILIB_DEFAULTS ++#define MULTILIB_DEFAULTS \ ++ { "tarm926ej-s" } ++ ++/* The GLIBC headers are in /usr/include, relative to the sysroot; the ++ uClibc headers are in /uclibc/usr/include. */ ++#undef SYSROOT_HEADERS_SUFFIX_SPEC ++#define SYSROOT_HEADERS_SUFFIX_SPEC \ ++ "%{muclibc:/uclibc}" ++ ++/* Translate -tiwmmxt appropriately for the assembler. The -meabi=5 ++ option is the relevant part of SUBTARGET_EXTRA_ASM_SPEC in bpabi.h. */ ++#undef SUBTARGET_EXTRA_ASM_SPEC ++#define SUBTARGET_EXTRA_ASM_SPEC \ ++ "%{tiwmmxt2:-mcpu=iwmmxt2} %{tiwmmxt:-mcpu=iwmmxt} %{txscale:-mcpu=xscale -EB} %{tcortex-a8-be8:-mcpu=cortex-a8 -EB} -meabi=5" ++ ++/* Translate -tiwmmxt for the linker. */ ++#undef SUBTARGET_EXTRA_LINK_SPEC ++#define SUBTARGET_EXTRA_LINK_SPEC \ ++ " %{tiwmmxt:-m armelf_linux_eabi ; \ ++ txscale:-m armelfb_linux_eabi ; \ ++ tcortex-a8-be8:-m armelfb_linux_eabi %{!r:--be8} ; \ ++ : -m armelf_linux_eabi}" ++ ++/* The various C libraries each have their own subdirectory. */ ++#undef SYSROOT_SUFFIX_SPEC ++#define SYSROOT_SUFFIX_SPEC \ ++ "%{muclibc:/uclibc}%{tiwmmxt:/tiwmmxt ; \ ++ tiwmmxt2:/tiwmmxt ; \ ++ txscale:/txscale ; \ ++ tarm920t:/tarm920t ; \ ++ tthumb2:/thumb2 ; \ ++ tcortex-a8-be8:/cortex-a8-be8}%{!tthumb2:%{!tcortex-a8-be8:%{mfloat-abi=softfp:/softfp}}}" ++ +diff -Nur a/gcc/config/i386/atom.md b/gcc/config/i386/atom.md +--- a/gcc/config/i386/atom.md 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/i386/atom.md 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,795 @@ ++;; Atom Scheduling ++;; Copyright (C) 2009 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++;; ++;; Atom is an in-order core with two integer pipelines. ++ ++ ++(define_attr "atom_unit" "sishuf,simul,jeu,complex,other" ++ (const_string "other")) ++ ++(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other" ++ (const_string "other")) ++ ++(define_automaton "atom") ++ ++;; Atom has two ports: port 0 and port 1 connecting to all execution units ++(define_cpu_unit "atom-port-0,atom-port-1" "atom") ++ ++;; EU: Execution Unit ++;; Atom EUs are connected by port 0 or port 1. ++ ++(define_cpu_unit "atom-eu-0, atom-eu-1, ++ atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4" ++ "atom") ++ ++;; Some EUs have duplicated copied and can be accessed via either ++;; port 0 or port 1 ++;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)") ++ ++;;; Some instructions is dual-pipe execution, need both ports ++;;; Complex multi-op macro-instructoins need both ports and all EUs ++(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)") ++(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 + ++ atom-imul-1 + atom-imul-2 + atom-imul-3 + ++ atom-imul-4)") ++ ++;;; Most of simple instructions have 1 cycle latency. Some of them ++;;; issue in port 0, some in port 0 and some in either port. ++(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)") ++(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)") ++(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)") ++ ++;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput ++(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)") ++ ++;;; fmul insn can have 4 or 5 cycles latency ++(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4") ++(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3") ++ ++;;; fadd can has 5 cycles latency depends on instruction forms ++(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5") ++ ++;;; imul insn has 5 cycles latency ++(define_reservation "atom-imul-32" ++ "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4, ++ atom-port-0") ++;;; imul instruction excludes other non-FP instructions. ++(exclusion_set "atom-eu-0, atom-eu-1" ++ "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4") ++ ++;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on ++;;; instruction forms ++(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)") ++(define_reservation "atom-dual-2c" ++ "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)") ++(define_reservation "atom-dual-5c" ++ "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)") ++ ++;;; Complex macro-instruction has variants of latency, and uses both ports. ++(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)") ++ ++(define_insn_reservation "atom_other" 9 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "other") ++ (eq_attr "atom_unit" "!jeu"))) ++ "atom-complex, atom-all-eu*8") ++ ++;; return has type "other" with atom_unit "jeu" ++(define_insn_reservation "atom_other_2" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "other") ++ (eq_attr "atom_unit" "jeu"))) ++ "atom-dual-1c") ++ ++(define_insn_reservation "atom_multi" 9 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "multi")) ++ "atom-complex, atom-all-eu*8") ++ ++;; Normal alu insns without carry ++(define_insn_reservation "atom_alu" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "alu") ++ (and (eq_attr "memory" "none") ++ (eq_attr "use_carry" "0")))) ++ "atom-simple-either") ++ ++;; Normal alu insns without carry ++(define_insn_reservation "atom_alu_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "alu") ++ (and (eq_attr "memory" "!none") ++ (eq_attr "use_carry" "0")))) ++ "atom-simple-either") ++ ++;; Alu insn consuming CF, such as add/sbb ++(define_insn_reservation "atom_alu_carry" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "alu") ++ (and (eq_attr "memory" "none") ++ (eq_attr "use_carry" "1")))) ++ "atom-simple-either") ++ ++;; Alu insn consuming CF, such as add/sbb ++(define_insn_reservation "atom_alu_carry_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "alu") ++ (and (eq_attr "memory" "!none") ++ (eq_attr "use_carry" "1")))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_alu1" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_alu1_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_negnot" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "negnot") ++ (eq_attr "memory" "none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_negnot_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "negnot") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_imov" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imov") ++ (eq_attr "memory" "none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_imov_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imov") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-either") ++ ++;; 16<-16, 32<-32 ++(define_insn_reservation "atom_imovx" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imovx") ++ (and (eq_attr "memory" "none") ++ (ior (and (match_operand:HI 0 "register_operand") ++ (match_operand:HI 1 "general_operand")) ++ (and (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "general_operand")))))) ++ "atom-simple-either") ++ ++;; 16<-16, 32<-32, mem ++(define_insn_reservation "atom_imovx_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imovx") ++ (and (eq_attr "memory" "!none") ++ (ior (and (match_operand:HI 0 "register_operand") ++ (match_operand:HI 1 "general_operand")) ++ (and (match_operand:SI 0 "register_operand") ++ (match_operand:SI 1 "general_operand")))))) ++ "atom-simple-either") ++ ++;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8 ++(define_insn_reservation "atom_imovx_2" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imovx") ++ (and (eq_attr "memory" "none") ++ (ior (match_operand:QI 0 "register_operand") ++ (ior (and (match_operand:SI 0 "register_operand") ++ (not (match_operand:SI 1 "general_operand"))) ++ (match_operand:DI 0 "register_operand")))))) ++ "atom-simple-0") ++ ++;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem ++(define_insn_reservation "atom_imovx_2_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imovx") ++ (and (eq_attr "memory" "!none") ++ (ior (match_operand:QI 0 "register_operand") ++ (ior (and (match_operand:SI 0 "register_operand") ++ (not (match_operand:SI 1 "general_operand"))) ++ (match_operand:DI 0 "register_operand")))))) ++ "atom-simple-0") ++ ++;; 16<-8 ++(define_insn_reservation "atom_imovx_3" 3 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imovx") ++ (and (match_operand:HI 0 "register_operand") ++ (match_operand:QI 1 "general_operand")))) ++ "atom-complex, atom-all-eu*2") ++ ++(define_insn_reservation "atom_lea" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "lea") ++ (eq_attr "mode" "!HI"))) ++ "atom-simple-either") ++ ++;; lea 16bit address is complex insn ++(define_insn_reservation "atom_lea_2" 2 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "lea") ++ (eq_attr "mode" "HI"))) ++ "atom-complex, atom-all-eu") ++ ++(define_insn_reservation "atom_incdec" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "incdec") ++ (eq_attr "memory" "none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_incdec_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "incdec") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-either") ++ ++;; simple shift instruction use SHIFT eu, none memory ++(define_insn_reservation "atom_ishift" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ishift") ++ (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))) ++ "atom-simple-0") ++ ++;; simple shift instruction use SHIFT eu, memory ++(define_insn_reservation "atom_ishift_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ishift") ++ (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0")))) ++ "atom-simple-0") ++ ++;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles ++(define_insn_reservation "atom_ishift_3" 7 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ishift") ++ (eq_attr "prefix_0f" "1"))) ++ "atom-complex, atom-all-eu*6") ++ ++(define_insn_reservation "atom_ishift1" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ishift1") ++ (eq_attr "memory" "none"))) ++ "atom-simple-0") ++ ++(define_insn_reservation "atom_ishift1_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ishift1") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-0") ++ ++(define_insn_reservation "atom_rotate" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "rotate") ++ (eq_attr "memory" "none"))) ++ "atom-simple-0") ++ ++(define_insn_reservation "atom_rotate_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "rotate") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-0") ++ ++(define_insn_reservation "atom_rotate1" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "rotate1") ++ (eq_attr "memory" "none"))) ++ "atom-simple-0") ++ ++(define_insn_reservation "atom_rotate1_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "rotate1") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-0") ++ ++(define_insn_reservation "atom_imul" 5 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imul") ++ (and (eq_attr "memory" "none") (eq_attr "mode" "SI")))) ++ "atom-imul-32") ++ ++(define_insn_reservation "atom_imul_mem" 5 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imul") ++ (and (eq_attr "memory" "!none") (eq_attr "mode" "SI")))) ++ "atom-imul-32") ++ ++;; latency set to 10 as common 64x64 imul ++(define_insn_reservation "atom_imul_3" 10 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "imul") ++ (eq_attr "mode" "!SI"))) ++ "atom-complex, atom-all-eu*9") ++ ++(define_insn_reservation "atom_idiv" 65 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "idiv")) ++ "atom-complex, atom-all-eu*32, nothing*32") ++ ++(define_insn_reservation "atom_icmp" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "icmp") ++ (eq_attr "memory" "none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_icmp_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "icmp") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_test" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "test") ++ (eq_attr "memory" "none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_test_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "test") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_ibr" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "!load"))) ++ "atom-simple-1") ++ ++;; complex if jump target is from address ++(define_insn_reservation "atom_ibr_2" 2 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "load"))) ++ "atom-complex, atom-all-eu") ++ ++(define_insn_reservation "atom_setcc" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "setcc") ++ (eq_attr "memory" "!store"))) ++ "atom-simple-either") ++ ++;; 2 cycles complex if target is in memory ++(define_insn_reservation "atom_setcc_2" 2 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "setcc") ++ (eq_attr "memory" "store"))) ++ "atom-complex, atom-all-eu") ++ ++(define_insn_reservation "atom_icmov" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "icmov") ++ (eq_attr "memory" "none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_icmov_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "icmov") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-either") ++ ++;; UCODE if segreg, ignored ++(define_insn_reservation "atom_push" 2 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "push")) ++ "atom-dual-2c") ++ ++;; pop r64 is 1 cycle. UCODE if segreg, ignored ++(define_insn_reservation "atom_pop" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "pop") ++ (eq_attr "mode" "DI"))) ++ "atom-dual-1c") ++ ++;; pop non-r64 is 2 cycles. UCODE if segreg, ignored ++(define_insn_reservation "atom_pop_2" 2 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "pop") ++ (eq_attr "mode" "!DI"))) ++ "atom-dual-2c") ++ ++;; UCODE if segreg, ignored ++(define_insn_reservation "atom_call" 1 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "call")) ++ "atom-dual-1c") ++ ++(define_insn_reservation "atom_callv" 1 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "callv")) ++ "atom-dual-1c") ++ ++(define_insn_reservation "atom_leave" 3 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "leave")) ++ "atom-complex, atom-all-eu*2") ++ ++(define_insn_reservation "atom_str" 3 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "str")) ++ "atom-complex, atom-all-eu*2") ++ ++(define_insn_reservation "atom_sselog" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sselog") ++ (eq_attr "memory" "none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_sselog_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sselog") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_sselog1" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sselog1") ++ (eq_attr "memory" "none"))) ++ "atom-simple-0") ++ ++(define_insn_reservation "atom_sselog1_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sselog1") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-0") ++ ++;; not pmad, not psad ++(define_insn_reservation "atom_sseiadd" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseiadd") ++ (and (not (match_operand:V2DI 0 "register_operand")) ++ (and (eq_attr "atom_unit" "!simul") ++ (eq_attr "atom_unit" "!complex"))))) ++ "atom-simple-either") ++ ++;; pmad, psad and 64 ++(define_insn_reservation "atom_sseiadd_2" 4 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseiadd") ++ (and (not (match_operand:V2DI 0 "register_operand")) ++ (and (eq_attr "atom_unit" "simul" ) ++ (eq_attr "mode" "DI"))))) ++ "atom-fmul-4c") ++ ++;; pmad, psad and 128 ++(define_insn_reservation "atom_sseiadd_3" 5 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseiadd") ++ (and (not (match_operand:V2DI 0 "register_operand")) ++ (and (eq_attr "atom_unit" "simul" ) ++ (eq_attr "mode" "TI"))))) ++ "atom-fmul-5c") ++ ++;; if paddq(64 bit op), phadd/phsub ++(define_insn_reservation "atom_sseiadd_4" 6 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseiadd") ++ (ior (match_operand:V2DI 0 "register_operand") ++ (eq_attr "atom_unit" "complex")))) ++ "atom-complex, atom-all-eu*5") ++ ++;; if immediate op. ++(define_insn_reservation "atom_sseishft" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseishft") ++ (and (eq_attr "atom_unit" "!sishuf") ++ (match_operand 2 "immediate_operand")))) ++ "atom-simple-either") ++ ++;; if palignr or psrldq ++(define_insn_reservation "atom_sseishft_2" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseishft") ++ (and (eq_attr "atom_unit" "sishuf") ++ (match_operand 2 "immediate_operand")))) ++ "atom-simple-0") ++ ++;; if reg/mem op ++(define_insn_reservation "atom_sseishft_3" 2 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseishft") ++ (not (match_operand 2 "immediate_operand")))) ++ "atom-complex, atom-all-eu") ++ ++(define_insn_reservation "atom_sseimul" 1 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "sseimul")) ++ "atom-simple-0") ++ ++;; rcpss or rsqrtss ++(define_insn_reservation "atom_sse" 4 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF")))) ++ "atom-fmul-4c") ++ ++;; movshdup, movsldup. Suggest to type sseishft ++(define_insn_reservation "atom_sse_2" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sse") ++ (eq_attr "atom_sse_attr" "movdup"))) ++ "atom-simple-0") ++ ++;; lfence ++(define_insn_reservation "atom_sse_3" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sse") ++ (eq_attr "atom_sse_attr" "lfence"))) ++ "atom-simple-either") ++ ++;; sfence,clflush,mfence, prefetch ++(define_insn_reservation "atom_sse_4" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sse") ++ (ior (eq_attr "atom_sse_attr" "fence") ++ (eq_attr "atom_sse_attr" "prefetch")))) ++ "atom-simple-0") ++ ++;; rcpps, rsqrtss, sqrt, ldmxcsr ++(define_insn_reservation "atom_sse_5" 7 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sse") ++ (ior (ior (eq_attr "atom_sse_attr" "sqrt") ++ (eq_attr "atom_sse_attr" "mxcsr")) ++ (and (eq_attr "atom_sse_attr" "rcp") ++ (eq_attr "mode" "V4SF"))))) ++ "atom-complex, atom-all-eu*6") ++ ++;; xmm->xmm ++(define_insn_reservation "atom_ssemov" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssemov") ++ (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy")))) ++ "atom-simple-either") ++ ++;; reg->xmm ++(define_insn_reservation "atom_ssemov_2" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssemov") ++ (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r")))) ++ "atom-simple-0") ++ ++;; xmm->reg ++(define_insn_reservation "atom_ssemov_3" 3 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssemov") ++ (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy")))) ++ "atom-eu-0-3-1") ++ ++;; mov mem ++(define_insn_reservation "atom_ssemov_4" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "movu" "0") (eq_attr "memory" "!none")))) ++ "atom-simple-0") ++ ++;; movu mem ++(define_insn_reservation "atom_ssemov_5" 2 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssemov") ++ (ior (eq_attr "movu" "1") (eq_attr "memory" "!none")))) ++ "atom-complex, atom-all-eu") ++ ++;; no memory simple ++(define_insn_reservation "atom_sseadd" 5 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "!V2DF") ++ (eq_attr "atom_unit" "!complex"))))) ++ "atom-fadd-5c") ++ ++;; memory simple ++(define_insn_reservation "atom_sseadd_mem" 5 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "!V2DF") ++ (eq_attr "atom_unit" "!complex"))))) ++ "atom-dual-5c") ++ ++;; maxps, minps, *pd, hadd, hsub ++(define_insn_reservation "atom_sseadd_3" 8 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseadd") ++ (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex")))) ++ "atom-complex, atom-all-eu*7") ++ ++;; Except dppd/dpps ++(define_insn_reservation "atom_ssemul" 5 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "mode" "!SF"))) ++ "atom-fmul-5c") ++ ++;; Except dppd/dpps, 4 cycle if mulss ++(define_insn_reservation "atom_ssemul_2" 4 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "mode" "SF"))) ++ "atom-fmul-4c") ++ ++(define_insn_reservation "atom_ssecmp" 1 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "ssecmp")) ++ "atom-simple-either") ++ ++(define_insn_reservation "atom_ssecomi" 10 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "ssecomi")) ++ "atom-complex, atom-all-eu*9") ++ ++;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi ++(define_insn_reservation "atom_ssecvt" 5 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssecvt") ++ (ior (and (match_operand:V2SI 0 "register_operand") ++ (match_operand:V4SF 1 "register_operand")) ++ (and (match_operand:V4SF 0 "register_operand") ++ (match_operand:V2SI 1 "register_operand"))))) ++ "atom-fadd-5c") ++ ++;; memory and cvtpi2ps, cvtps2pi, cvttps2pi ++(define_insn_reservation "atom_ssecvt_2" 5 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssecvt") ++ (ior (and (match_operand:V2SI 0 "register_operand") ++ (match_operand:V4SF 1 "memory_operand")) ++ (and (match_operand:V4SF 0 "register_operand") ++ (match_operand:V2SI 1 "memory_operand"))))) ++ "atom-dual-5c") ++ ++;; otherwise. 7 cycles average for cvtss2sd ++(define_insn_reservation "atom_ssecvt_3" 7 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "ssecvt") ++ (not (ior (and (match_operand:V2SI 0 "register_operand") ++ (match_operand:V4SF 1 "nonimmediate_operand")) ++ (and (match_operand:V4SF 0 "register_operand") ++ (match_operand:V2SI 1 "nonimmediate_operand")))))) ++ "atom-complex, atom-all-eu*6") ++ ++;; memory and cvtsi2sd ++(define_insn_reservation "atom_sseicvt" 5 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseicvt") ++ (and (match_operand:V2DF 0 "register_operand") ++ (match_operand:SI 1 "memory_operand")))) ++ "atom-dual-5c") ++ ++;; otherwise. 8 cycles average for cvtsd2si ++(define_insn_reservation "atom_sseicvt_2" 8 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "sseicvt") ++ (not (and (match_operand:V2DF 0 "register_operand") ++ (match_operand:SI 1 "memory_operand"))))) ++ "atom-complex, atom-all-eu*7") ++ ++(define_insn_reservation "atom_ssediv" 62 ++ (and (eq_attr "cpu" "atom") ++ (eq_attr "type" "ssediv")) ++ "atom-complex, atom-all-eu*12, nothing*49") ++ ++;; simple for fmov ++(define_insn_reservation "atom_fmov" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "none"))) ++ "atom-simple-either") ++ ++;; simple for fmov ++(define_insn_reservation "atom_fmov_mem" 1 ++ (and (eq_attr "cpu" "atom") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "!none"))) ++ "atom-simple-either") ++ ++;; Define bypass here ++ ++;; There will be no stall from lea to non-mem EX insns ++(define_bypass 0 "atom_lea" ++ "atom_alu_carry, ++ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, ++ atom_incdec, atom_setcc, atom_icmov, atom_pop") ++ ++(define_bypass 0 "atom_lea" ++ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, ++ atom_imovx_mem, atom_imovx_2_mem, ++ atom_imov_mem, atom_icmov_mem, atom_fmov_mem" ++ "!ix86_agi_dependent") ++ ++;; There will be 3 cycles stall from EX insns to AGAN insns LEA ++(define_bypass 4 "atom_alu_carry, ++ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, ++ atom_incdec,atom_ishift,atom_ishift1,atom_rotate, ++ atom_rotate1, atom_setcc, atom_icmov, atom_pop, ++ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, ++ atom_imovx_mem, atom_imovx_2_mem, ++ atom_imov_mem, atom_icmov_mem, atom_fmov_mem" ++ "atom_lea") ++ ++;; There will be 3 cycles stall from EX insns to insns need addr calculation ++(define_bypass 4 "atom_alu_carry, ++ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, ++ atom_incdec,atom_ishift,atom_ishift1,atom_rotate, ++ atom_rotate1, atom_setcc, atom_icmov, atom_pop, ++ atom_imovx_mem, atom_imovx_2_mem, ++ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, ++ atom_imov_mem, atom_icmov_mem, atom_fmov_mem" ++ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, ++ atom_negnot_mem, atom_imov_mem, atom_incdec_mem, ++ atom_imovx_mem, atom_imovx_2_mem, ++ atom_imul_mem, atom_icmp_mem, ++ atom_test_mem, atom_icmov_mem, atom_sselog_mem, ++ atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem, ++ atom_ishift_mem, atom_ishift1_mem, ++ atom_rotate_mem, atom_rotate1_mem" ++ "ix86_agi_dependent") ++ ++;; Stall from imul to lea is 8 cycles. ++(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea") ++ ++;; Stall from imul to memory address is 8 cycles. ++(define_bypass 9 "atom_imul, atom_imul_mem" ++ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, ++ atom_negnot_mem, atom_imov_mem, atom_incdec_mem, ++ atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem, ++ atom_rotate1_mem, atom_imul_mem, atom_icmp_mem, ++ atom_test_mem, atom_icmov_mem, atom_sselog_mem, ++ atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem" ++ "ix86_agi_dependent") ++ ++;; There will be 0 cycle stall from cmp/test to jcc ++ ++;; There will be 1 cycle stall from flag producer to cmov and adc/sbb ++(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry, ++ atom_alu1, atom_negnot, atom_incdec, atom_ishift, ++ atom_ishift1, atom_rotate, atom_rotate1" ++ "atom_icmov, atom_alu_carry") ++ ++;; lea to shift count stall is 2 cycles ++(define_bypass 3 "atom_lea" ++ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, ++ atom_ishift_mem, atom_ishift1_mem, ++ atom_rotate_mem, atom_rotate1_mem" ++ "ix86_dep_by_shift_count") ++ ++;; lea to shift source stall is 1 cycle ++(define_bypass 2 "atom_lea" ++ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1" ++ "!ix86_dep_by_shift_count") ++ ++;; non-lea to shift count stall is 1 cycle ++(define_bypass 2 "atom_alu_carry, ++ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, ++ atom_incdec,atom_ishift,atom_ishift1,atom_rotate, ++ atom_rotate1, atom_setcc, atom_icmov, atom_pop, ++ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, ++ atom_imovx_mem, atom_imovx_2_mem, ++ atom_imov_mem, atom_icmov_mem, atom_fmov_mem" ++ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, ++ atom_ishift_mem, atom_ishift1_mem, ++ atom_rotate_mem, atom_rotate1_mem" ++ "ix86_dep_by_shift_count") +diff -Nur a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h +--- a/gcc/config/i386/cpuid.h 2009-04-10 01:23:07.000000000 +0200 ++++ b/gcc/config/i386/cpuid.h 2010-01-25 09:50:28.995687913 +0100 +@@ -29,6 +29,7 @@ + #define bit_CMPXCHG16B (1 << 13) + #define bit_SSE4_1 (1 << 19) + #define bit_SSE4_2 (1 << 20) ++#define bit_MOVBE (1 << 22) + #define bit_POPCNT (1 << 23) + #define bit_AES (1 << 25) + #define bit_XSAVE (1 << 26) +diff -Nur a/gcc/config/i386/cs-linux.h b/gcc/config/i386/cs-linux.h +--- a/gcc/config/i386/cs-linux.h 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/i386/cs-linux.h 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,41 @@ ++/* Sourcery G++ IA32 GNU/Linux Configuration. ++ Copyright (C) 2007 ++ Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* This configuration may be used either with the system glibc (in ++ system32 and system64 subdirectories) or with the included glibc ++ (in the sgxx-glibc subdirectory). */ ++ ++#undef SYSROOT_SUFFIX_SPEC ++#define SYSROOT_SUFFIX_SPEC \ ++ "%{msgxx-glibc:/sgxx-glibc ; \ ++ m64:/system64 ; \ ++ mrhel3:/system64 ; \ ++ mrh73:/system32-old ; \ ++ :/system32}" ++ ++#undef SYSROOT_HEADERS_SUFFIX_SPEC ++#define SYSROOT_HEADERS_SUFFIX_SPEC SYSROOT_SUFFIX_SPEC ++ ++/* See mips/wrs-linux.h for details on this use of ++ STARTFILE_PREFIX_SPEC. */ ++#undef STARTFILE_PREFIX_SPEC ++#define STARTFILE_PREFIX_SPEC \ ++ "%{m64: /usr/local/lib64/ /lib64/ /usr/lib64/} \ ++ %{!m64: /usr/local/lib/ /lib/ /usr/lib/}" +diff -Nur a/gcc/config/i386/cs-linux-lite.h b/gcc/config/i386/cs-linux-lite.h +--- a/gcc/config/i386/cs-linux-lite.h 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/i386/cs-linux-lite.h 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,31 @@ ++/* Sourcery G++ Lite IA32 GNU/Linux Configuration. ++ Copyright (C) 2009 ++ Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#undef SYSROOT_SUFFIX_SPEC ++#define SYSROOT_SUFFIX_SPEC \ ++ "%{march=atom:%{!m64:/atom} ; \ ++ march=core2:%{m64:/core2}}" ++ ++/* See mips/wrs-linux.h for details on this use of ++ STARTFILE_PREFIX_SPEC. */ ++#undef STARTFILE_PREFIX_SPEC ++#define STARTFILE_PREFIX_SPEC \ ++ "%{m64: /usr/local/lib64/ /lib64/ /usr/lib64/} \ ++ %{!m64: /usr/local/lib/ /lib/ /usr/lib/}" +diff -Nur a/gcc/config/i386/cs-linux.opt b/gcc/config/i386/cs-linux.opt +--- a/gcc/config/i386/cs-linux.opt 1970-01-01 01:00:00.000000000 +0100 ++++ b/gcc/config/i386/cs-linux.opt 2010-01-25 09:50:28.995687913 +0100 +@@ -0,0 +1,11 @@ ++; Additional options for Sourcery G++. ++ ++mrh73 ++Target Undocumented ++ ++mrhel3 ++Target Undocumented ++ ++msgxx-glibc ++Target ++Use included version of GLIBC +diff -Nur a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h +--- a/gcc/config/i386/cygming.h 2009-07-12 17:56:41.000000000 +0200 ++++ b/gcc/config/i386/cygming.h 2010-01-25 09:50:28.995687913 +0100 +@@ -34,7 +34,7 @@ + #endif + + #undef TARGET_64BIT_MS_ABI +-#define TARGET_64BIT_MS_ABI (!cfun ? DEFAULT_ABI == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI) ++#define TARGET_64BIT_MS_ABI (!cfun ? ix86_abi == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI) + + #undef DEFAULT_ABI + #define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI) +@@ -203,7 +203,7 @@ + #define CHECK_STACK_LIMIT 4000 + + #undef STACK_BOUNDARY +-#define STACK_BOUNDARY (DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD) ++#define STACK_BOUNDARY (ix86_abi == MS_ABI ? 128 : BITS_PER_WORD) + + /* By default, target has a 80387, uses IEEE compatible arithmetic, + returns float values in the 387 and needs stack probes. +diff -Nur a/gcc/config/i386/cygming.opt b/gcc/config/i386/cygming.opt +--- a/gcc/config/i386/cygming.opt 2007-08-02 12:49:31.000000000 +0200 ++++ b/gcc/config/i386/cygming.opt 2010-01-25 09:50:28.995687913 +0100 +@@ -45,3 +45,7 @@ + mwindows + Target + Create GUI application ++ ++mpe-aligned-commons ++Target Var(use_pe_aligned_common) Init(HAVE_GAS_ALIGNED_COMM) ++Use the GNU extension to the PE format for aligned common data +diff -Nur a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c +--- a/gcc/config/i386/driver-i386.c 2009-05-27 16:54:00.000000000 +0200 ++++ b/gcc/config/i386/driver-i386.c 2010-01-25 09:50:29.005686600 +0100 +@@ -378,7 +378,7 @@ + /* Extended features */ + unsigned int has_lahf_lm = 0, has_sse4a = 0; + unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; +- unsigned int has_sse4_1 = 0, has_sse4_2 = 0; ++ unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; + unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; + unsigned int has_pclmul = 0; + +@@ -398,9 +398,22 @@ + + __cpuid (1, eax, ebx, ecx, edx); + +- /* We don't care for extended family. */ + model = (eax >> 4) & 0x0f; + family = (eax >> 8) & 0x0f; ++ if (vendor == SIG_INTEL) ++ { ++ unsigned int extended_model, extended_family; ++ ++ extended_model = (eax >> 12) & 0xf0; ++ extended_family = (eax >> 20) & 0xff; ++ if (family == 0x0f) ++ { ++ family += extended_family; ++ model += extended_model; ++ } ++ else if (family == 0x06) ++ model += extended_model; ++ } + + has_sse3 = ecx & bit_SSE3; + has_ssse3 = ecx & bit_SSSE3; +@@ -408,6 +421,7 @@ + has_sse4_2 = ecx & bit_SSE4_2; + has_avx = ecx & bit_AVX; + has_cmpxchg16b = ecx & bit_CMPXCHG16B; ++ has_movbe = ecx & bit_MOVBE; + has_popcnt = ecx & bit_POPCNT; + has_aes = ecx & bit_AES; + has_pclmul = ecx & bit_PCLMUL; +@@ -505,8 +519,8 @@ + break; + case PROCESSOR_PENTIUMPRO: + if (has_longmode) +- /* It is Core 2 Duo. */ +- cpu = "core2"; ++ /* It is Core 2 or Atom. */ ++ cpu = (model == 28) ? "atom" : "core2"; + else if (arch) + { + if (has_sse3) +@@ -597,6 +611,8 @@ + options = concat (options, "-mcx16 ", NULL); + if (has_lahf_lm) + options = concat (options, "-msahf ", NULL); ++ if (has_movbe) ++ options = concat (options, "-mmovbe ", NULL); + if (has_aes) + options = concat (options, "-maes ", NULL); + if (has_pclmul) +diff -Nur a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +--- a/gcc/config/i386/i386.c 2009-07-21 09:22:51.000000000 +0200 ++++ b/gcc/config/i386/i386.c 2010-01-25 09:50:29.005686600 +0100 +@@ -1036,6 +1036,79 @@ + 1, /* cond_not_taken_branch_cost. */ + }; + ++static const ++struct processor_costs atom_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (4), /* HI */ ++ COSTS_N_INSNS (3), /* SI */ ++ COSTS_N_INSNS (4), /* DI */ ++ COSTS_N_INSNS (2)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (26), /* HI */ ++ COSTS_N_INSNS (42), /* SI */ ++ COSTS_N_INSNS (74), /* DI */ ++ COSTS_N_INSNS (74)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 17, /* MOVE_RATIO */ ++ 2, /* cost for loading QImode using movzbl */ ++ {4, 4, 4}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {4, 4, 4}, /* cost of storing integer registers */ ++ 4, /* cost of reg,reg fld/fst */ ++ {12, 12, 12}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {6, 6, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {8, 8}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {8, 8, 8}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {8, 8, 8}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 5, /* MMX or SSE register to integer */ ++ 32, /* size of l1 cache. */ ++ 256, /* size of l2 cache. */ ++ 64, /* size of prefetch block */ ++ 6, /* number of parallel prefetches */ ++ 3, /* Branch cost */ ++ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (8), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (20), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (8), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ ++ {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, ++ {libcall, {{32, loop}, {64, rep_prefix_4_byte}, ++ {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ {{libcall, {{8, loop}, {15, unrolled_loop}, ++ {2048, rep_prefix_4_byte}, {-1, libcall}}}, ++ {libcall, {{24, loop}, {32, unrolled_loop}, ++ {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ + /* Generic64 should produce code tuned for Nocona and K8. */ + static const + struct processor_costs generic64_cost = { +@@ -1194,6 +1267,7 @@ + #define m_PENT4 (1<