wifipineapple-openwrt/target/linux/generic-2.6/patches-2.6.27/023-mips_delay_gcc4.4.0.patch

From: Wu Zhangjin <wuzj@lemote.com>

the gcc 4.4 support for MIPS mostly refer to this PATCH:
http://www.nabble.com/-PATCH--MIPS:-Handle-removal-of-%27h%27-constraint-in-GCC-4.4-td22192768.html
but have been tuned a little.

because only gcc 4.4 have loongson-specific support, so, we need to
choose the suitable -march argument for gcc <= 4.3 and gcc >= 4.4, and
we also need to consider use -march=loongson2e and -march=loongson2f for
loongson2e and loongson2f respectively. this is handled by adding two
new kernel options: CPU_LOONGSON2E and CPU_LOONGSON2F(thanks for the
solutin provided by ZhangLe).

I have tested it on FuLoong(2f) in 32bit and 64bit with gcc-4.4 and
gcc-4.3. so, basically, it works.

Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
---
 arch/mips/Makefile               |    9 +++++-
 arch/mips/include/asm/compiler.h |   10 ++++++
 arch/mips/include/asm/delay.h    |   58 +++++++++++++++++++++++++------------
 3 files changed, 57 insertions(+), 20 deletions(-)

--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -119,7 +119,14 @@ cflags-$(CONFIG_CPU_R4300)	+= -march=r43
 cflags-$(CONFIG_CPU_VR41XX)	+= -march=r4100 -Wa,--trap
 cflags-$(CONFIG_CPU_R4X00)	+= -march=r4600 -Wa,--trap
 cflags-$(CONFIG_CPU_TX49XX)	+= -march=r4600 -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2)	+= -march=r4600 -Wa,--trap
+
+# only gcc >= 4.4 have the loongson-specific support
+cflags-$(CONFIG_CPU_LOONGSON2)	+= -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2E)	+= $(shell if [ $(call cc-version) -lt 0440 ] ; then \
+	echo $(call cc-option,-march=r4600); else echo $(call cc-option,-march=loongson2e); fi ;)
+cflags-$(CONFIG_CPU_LOONGSON2F)	+= $(shell if [ $(call cc-version) -lt 0440 ] ; then \
+	echo $(call cc-option,-march=r4600); else echo $(call cc-option,-march=loongson2f); fi ;)
+
 cflags-$(CONFIG_CPU_MIPS32_R1)	+= $(call cc-option,-march=mips32,-mips32 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS32) \
 			-Wa,-mips32 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS32_R2)	+= $(call cc-option,-march=mips32r2,-mips32r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS32) \
--- a/include/asm-mips/compiler.h
+++ b/include/asm-mips/compiler.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2004, 2007  Maciej W. Rozycki
+ * Copyright (C) 2009  Wu Zhangjin, wuzj@lemote.com
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -16,4 +17,13 @@
 #define GCC_REG_ACCUM "accum"
 #endif

+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
+#define GCC_NO_H_CONSTRAINT
+#ifdef CONFIG_64BIT
+typedef unsigned int uintx_t __attribute__((mode(TI)));
+#else
+typedef u64 uintx_t;
+#endif
+#endif
+
 #endif /* _ASM_COMPILER_H */
--- a/include/asm-mips/delay.h
+++ b/include/asm-mips/delay.h
@@ -7,6 +7,7 @@
  * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  * Copyright (C) 2007  Maciej W. Rozycki
+ * Copyright (C) 2009  Wu Zhangjin, wuzj@lemote.com
  */
 #ifndef _ASM_DELAY_H
 #define _ASM_DELAY_H
@@ -48,6 +49,43 @@ static inline void __delay(unsigned long
 		: "0" (loops), "r" (1));
 }

+/*
+ * convert usecs to loops
+ *
+ * handle removal of 'h' constraint in GCC 4.4
+ */
+
+#ifndef GCC_NO_H_CONSTRAINT	/* gcc <= 4.3 */
+static inline unsigned long __usecs_to_loops(unsigned long usecs,
+		unsigned long lpj)
+{
+	unsigned long hi, lo;
+
+	if (sizeof(long) == 4)
+		__asm__("multu\t%2, %3"
+		: "=h" (usecs), "=l" (lo)
+		: "r" (usecs), "r" (lpj)
+		: GCC_REG_ACCUM);
+	else if (sizeof(long) == 8 && !R4000_WAR)
+		__asm__("dmultu\t%2, %3"
+		: "=h" (usecs), "=l" (lo)
+		: "r" (usecs), "r" (lpj)
+		: GCC_REG_ACCUM);
+	else if (sizeof(long) == 8 && R4000_WAR)
+		__asm__("dmultu\t%3, %4\n\tmfhi\t%0"
+		: "=r" (usecs), "=h" (hi), "=l" (lo)
+		: "r" (usecs), "r" (lpj)
+		: GCC_REG_ACCUM);
+
+	return usecs;
+}
+#else	/* GCC_NO_H_CONSTRAINT, gcc >= 4.4 */
+static inline unsigned long __usecs_to_loops(unsigned long usecs,
+		unsigned long lpj)
+{
+	return ((uintx_t)usecs * lpj) >> BITS_PER_LONG;
+}
+#endif

 /*
  * Division by multiplication: you don't have to worry about
@@ -62,8 +100,6 @@ static inline void __delay(unsigned long

 static inline void __udelay(unsigned long usecs, unsigned long lpj)
 {
-	unsigned long hi, lo;
-
 	/*
 	 * The rates of 128 is rounded wrongly by the catchall case
 	 * for 64-bit.  Excessive precission?  Probably ...
@@ -77,23 +113,7 @@ static inline void __udelay(unsigned lon
 	                           0x80000000ULL) >> 32);
 #endif

-	if (sizeof(long) == 4)
-		__asm__("multu\t%2, %3"
-		: "=h" (usecs), "=l" (lo)
-		: "r" (usecs), "r" (lpj)
-		: GCC_REG_ACCUM);
-	else if (sizeof(long) == 8 && !R4000_WAR)
-		__asm__("dmultu\t%2, %3"
-		: "=h" (usecs), "=l" (lo)
-		: "r" (usecs), "r" (lpj)
-		: GCC_REG_ACCUM);
-	else if (sizeof(long) == 8 && R4000_WAR)
-		__asm__("dmultu\t%3, %4\n\tmfhi\t%0"
-		: "=r" (usecs), "=h" (hi), "=l" (lo)
-		: "r" (usecs), "r" (lpj)
-		: GCC_REG_ACCUM);
-
-	__delay(usecs);
+	__delay(__usecs_to_loops(usecs, lpj));
 }

 #define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val