diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c 2007-06-03 13:59:39.000000000 +0200 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c 2007-06-03 14:11:43.000000000 +0200 @@ -110,45 +110,28 @@ If you want to check that the allocation was right; then check the equalities in the first comment. It should say "r13=r13, r12=r12, r11=r11" */ - __asm__ volatile (" - ;; Check that the following is true (same register names on - ;; both sides of equal sign, as in r8=r8): - ;; %0=r13, %1=r12, %4=r11 - ;; - ;; Save the registers we'll clobber in the movem process - ;; on the stack. Don't mention them to gcc, it will only be - ;; upset. - subq 11*4,$sp - movem $r10,[$sp] - - move.d $r11,$r0 - move.d $r11,$r1 - move.d $r11,$r2 - move.d $r11,$r3 - move.d $r11,$r4 - move.d $r11,$r5 - move.d $r11,$r6 - move.d $r11,$r7 - move.d $r11,$r8 - move.d $r11,$r9 - move.d $r11,$r10 - - ;; Now we've got this: - ;; r13 - dst - ;; r12 - n + __asm__ volatile ( + "subq 11*4,$sp\n\t" + "movem $r10,[$sp]\n\t" + "move.d $r11,$r0\n\t" + "move.d $r11,$r1\n\t" + "move.d $r11,$r2\n\t" + "move.d $r11,$r3\n\t" + "move.d $r11,$r4\n\t" + "move.d $r11,$r5\n\t" + "move.d $r11,$r6\n\t" + "move.d $r11,$r7\n\t" + "move.d $r11,$r8\n\t" + "move.d $r11,$r9\n\t" + "move.d $r11,$r10\n\t" + "subq 12*4,$r12\n\t" +"0:\n\t" + "subq 12*4,$r12\n\t" + "bge 0b\n\t" + "movem $r11,[$r13+]\n\t" + "addq 12*4,$r12\n\t" + "movem [$sp+],$r10" - ;; Update n for the first loop - subq 12*4,$r12 -0: - subq 12*4,$r12 - bge 0b - movem $r11,[$r13+] - - addq 12*4,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10" - /* Outputs */ : "=r" (dst), "=r" (n) /* Inputs */ : "0" (dst), "1" (n), "r" (lc)); @@ -161,10 +144,14 @@ while ( n >= 16 ) { - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; n -= 16; } @@ -182,67 +169,95 @@ *(short*)dst = (short) lc; break; case 3: - *((short*)dst)++ = (short) lc; + *((short*)dst) = (short) lc; + dst+=2; *(char*)dst = (char) lc; break; case 4: - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; break; case 5: - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; *(char*)dst = (char) lc; break; case 6: - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; *(short*)dst = (short) lc; break; case 7: - *((long*)dst)++ = lc; - *((short*)dst)++ = (short) lc; + *((long*)dst) = lc; + dst+=4; + *((short*)dst) = (short) lc; + dst+=2; *(char*)dst = (char) lc; break; case 8: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; break; case 9: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; *(char*)dst = (char) lc; break; case 10: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; *(short*)dst = (short) lc; break; case 11: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((short*)dst)++ = (short) lc; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; + *((short*)dst) = (short) lc; + dst+=2; *(char*)dst = (char) lc; break; case 12: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; break; case 13: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; *(char*)dst = (char) lc; break; case 14: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; *(short*)dst = (short) lc; break; case 15: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((short*)dst)++ = (short) lc; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; + *((long*)dst) = lc; + dst+=4; + *((short*)dst) = (short) lc; + dst+=2; *(char*)dst = (char) lc; break; } diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c linux-2.6.19.2/arch/cris/arch-v10/lib/string.c --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c 2007-06-03 13:59:39.000000000 +0200 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/string.c 2007-06-03 14:21:02.000000000 +0200 @@ -95,37 +95,19 @@ If you want to check that the allocation was right; then check the equalities in the first comment. It should say "r13=r13, r11=r11, r12=r12" */ - __asm__ volatile (" - ;; Check that the following is true (same register names on - ;; both sides of equal sign, as in r8=r8): - ;; %0=r13, %1=r11, %2=r12 - ;; - ;; Save the registers we'll use in the movem process - ;; on the stack. - subq 11*4,$sp - movem $r10,[$sp] - - ;; Now we've got this: - ;; r11 - src - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 44,$r12 -0: - movem [$r11+],$r10 - subq 44,$r12 - bge 0b - movem $r10,[$r13+] - - addq 44,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10" - + __asm__ volatile ( + "subq 11*4,$sp\n\t" + "movem $r10,[$sp]\n\t" + "subq 44,$r12\n\t" +"0:\n\t" + "movem [$r11+],$r10\n\t" + "subq 44,$r12\n\t" + "bge 0b\n\t" + "movem $r10,[$r13+]\n\t" + "addq 44,$r12\n\t" + "movem [$sp+],$r10\n\t" /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) /* Inputs */ : "0" (dst), "1" (src), "2" (n)); - } /* Either we directly starts copying, using dword copying @@ -135,10 +117,14 @@ while ( n >= 16 ) { - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; n -= 16; } @@ -156,67 +142,95 @@ *(short*)dst = *(short*)src; break; case 3: - *((short*)dst)++ = *((short*)src)++; + *((short*)dst) = *((short*)src); + src+=2;dst+=2; *(char*)dst = *(char*)src; break; case 4: - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; break; case 5: - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; *(char*)dst = *(char*)src; break; case 6: - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; *(short*)dst = *(short*)src; break; case 7: - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((short*)dst) = *((short*)src); + src+=2;dst+=2; *(char*)dst = *(char*)src; break; case 8: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; break; case 9: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; *(char*)dst = *(char*)src; break; case 10: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; *(short*)dst = *(short*)src; break; case 11: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((short*)dst) = *((short*)src); + src+=2;dst+=2; *(char*)dst = *(char*)src; break; case 12: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; break; case 13: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; *(char*)dst = *(char*)src; break; case 14: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; *(short*)dst = *(short*)src; break; case 15: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((long*)dst) = *((long*)src); + src+=4;dst+=4; + *((short*)dst) = *((short*)src); + src+=2;dst+=2; *(char*)dst = *(char*)src; break; } diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c --- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c 2007-06-03 13:59:39.000000000 +0200 +++ linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c 2007-06-03 14:25:55.000000000 +0200 @@ -88,63 +88,38 @@ If you want to check that the allocation was right; then check the equalities in the first comment. It should say "r13=r13, r11=r11, r12=r12". */ - __asm__ volatile ("\ - .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ - .err \n\ - .endif \n\ - - ;; Save the registers we'll use in the movem process - ;; on the stack. - subq 11*4,$sp - movem $r10,[$sp] - - ;; Now we've got this: - ;; r11 - src - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 44,$r12 - -; Since the noted PC of a faulting instruction in a delay-slot of a taken -; branch, is that of the branch target, we actually point at the from-movem -; for this case. There is no ambiguity here; if there was a fault in that -; instruction (meaning a kernel oops), the faulted PC would be the address -; after *that* movem. - -0: - movem [$r11+],$r10 - subq 44,$r12 - bge 0b - movem $r10,[$r13+] -1: - addq 44,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10 -2: - .section .fixup,\"ax\" - -; To provide a correct count in r10 of bytes that failed to be copied, -; we jump back into the loop if the loop-branch was taken. There is no -; performance penalty for sany use; the program will segfault soon enough. - -3: - move.d [$sp],$r10 - addq 44,$r10 - move.d $r10,[$sp] - jump 0b -4: - movem [$sp+],$r10 - addq 44,$r10 - addq 44,$r12 - jump 2b - - .previous - .section __ex_table,\"a\" - .dword 0b,3b - .dword 1b,4b - .previous" + __asm__ volatile ( + ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t" + ".err \n\t" + ".endif \n\t" + "subq 11*4,$sp\n\t" + "movem $r10,[$sp]\n\t" + "subq 44,$r12\n\t" + "0:\n\t" + "movem [$r11+],$r10\n\t" + "subq 44,$r12\n\t" + "bge 0b\n\t" + "movem $r10,[$r13+]\n\t" + "1:\n\t" + "addq 44,$r12 \n\t" + "movem [$sp+],$r10\n\t" + "2:\n\t" + ".section .fixup,\"ax\"\n\t" + "3:\n\t" + "move.d [$sp],$r10\n\t" + "addq 44,$r10\n\t" + "move.d $r10,[$sp]\n\t" + "jump 0b\n\t" + "4:\n\t" + "movem [$sp+],$r10\n\t" + "addq 44,$r10\n\t" + "addq 44,$r12\n\t" + "jump 2b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n\t" + ".dword 0b,3b\n\t" + ".dword 1b,4b\n\t" + ".previous\n\t" /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); @@ -253,60 +228,32 @@ If you want to check that the allocation was right; then check the equalities in the first comment. It should say "r13=r13, r11=r11, r12=r12" */ - __asm__ volatile (" - .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ - .err \n\ - .endif \n\ - - ;; Save the registers we'll use in the movem process - ;; on the stack. - subq 11*4,$sp - movem $r10,[$sp] - - ;; Now we've got this: - ;; r11 - src - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 44,$r12 -0: - movem [$r11+],$r10 -1: - subq 44,$r12 - bge 0b - movem $r10,[$r13+] - - addq 44,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10 -4: - .section .fixup,\"ax\" - -;; Do not jump back into the loop if we fail. For some uses, we get a -;; page fault somewhere on the line. Without checking for page limits, -;; we don't know where, but we need to copy accurately and keep an -;; accurate count; not just clear the whole line. To do that, we fall -;; down in the code below, proceeding with smaller amounts. It should -;; be kept in mind that we have to cater to code like what at one time -;; was in fs/super.c: -;; i = size - copy_from_user((void *)page, data, size); -;; which would cause repeated faults while clearing the remainder of -;; the SIZE bytes at PAGE after the first fault. -;; A caveat here is that we must not fall through from a failing page -;; to a valid page. - -3: - movem [$sp+],$r10 - addq 44,$r12 ;; Get back count before faulting point. - subq 44,$r11 ;; Get back pointer to faulting movem-line. - jump 4b ;; Fall through, pretending the fault didn't happen. - - .previous - .section __ex_table,\"a\" - .dword 1b,3b - .previous" + __asm__ volatile ( + ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t" + ".err \n\t" + ".endif \n\t" + "subq 11*4,$sp\n\t" + "movem $r10,[$sp]\n\t" + "subq 44,$r12\n\t" + "0:\n\t" + "movem [$r11+],$r10\n\t" + "1:\n\t" + "subq 44,$r12\n\t" + "bge 0b\n\t" + "movem $r10,[$r13+]\n\t" + "addq 44,$r12 \n\t" + "movem [$sp+],$r10\n\t" + "4:\n\t" + ".section .fixup,\"ax\"\n\t" + "3:\n\t" + "movem [$sp+],$r10\n\t" + "addq 44,$r12\n\t" + "subq 44,$r11\n\t" + "jump 4b \n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n\t" + ".dword 1b,3b\n\t" + ".previous\n\t" /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); @@ -425,66 +372,50 @@ If you want to check that the allocation was right; then check the equalities in the first comment. It should say something like "r13=r13, r11=r11, r12=r12". */ - __asm__ volatile (" - .ifnc %0%1%2,$r13$r12$r10 \n\ - .err \n\ - .endif \n\ - - ;; Save the registers we'll clobber in the movem process - ;; on the stack. Don't mention them to gcc, it will only be - ;; upset. - subq 11*4,$sp - movem $r10,[$sp] - - clear.d $r0 - clear.d $r1 - clear.d $r2 - clear.d $r3 - clear.d $r4 - clear.d $r5 - clear.d $r6 - clear.d $r7 - clear.d $r8 - clear.d $r9 - clear.d $r10 - clear.d $r11 - - ;; Now we've got this: - ;; r13 - dst - ;; r12 - n - - ;; Update n for the first loop - subq 12*4,$r12 -0: - subq 12*4,$r12 - bge 0b - movem $r11,[$r13+] -1: - addq 12*4,$r12 ;; compensate for last loop underflowing n - - ;; Restore registers from stack - movem [$sp+],$r10 -2: - .section .fixup,\"ax\" -3: - move.d [$sp],$r10 - addq 12*4,$r10 - move.d $r10,[$sp] - clear.d $r10 - jump 0b - -4: - movem [$sp+],$r10 - addq 12*4,$r10 - addq 12*4,$r12 - jump 2b - - .previous - .section __ex_table,\"a\" - .dword 0b,3b - .dword 1b,4b - .previous" - + __asm__ volatile ( + ".ifnc %0%1%2,$r13$r12$r10\n\t" + ".err \n\t" + ".endif\n\t" + "subq 11*4,$sp\n\t" + "movem $r10,[$sp]\n\t" + "clear.d $r0\n\t" + "clear.d $r1\n\t" + "clear.d $r2\n\t" + "clear.d $r3\n\t" + "clear.d $r4\n\t" + "clear.d $r5\n\t" + "clear.d $r6\n\t" + "clear.d $r7\n\t" + "clear.d $r8\n\t" + "clear.d $r9\n\t" + "clear.d $r10\n\t" + "clear.d $r11\n\t" + "subq 12*4,$r12\n\t" + "0:\n\t" + "subq 12*4,$r12\n\t" + "bge 0b\n\t" + "movem $r11,[$r13+]\n\t" + "1: \n\t" + "addq 12*4,$r12 \n\t" + "movem [$sp+],$r10\n\t" + "2:\n\t" + ".section .fixup,\"ax\"\n\t" + "3:\n\t" + "move.d [$sp],$r10\n\t" + "addq 12*4,$r10\n\t" + "move.d $r10,[$sp]\n\t" + "clear.d $r10\n\t" + "jump 0b\n\t" + "4:\n\t" + "movem [$sp+],$r10\n\t" + "addq 12*4,$r10\n\t" + "addq 12*4,$r12\n\t" + "jump 2b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n\t" + ".dword 0b,3b\n\t" + ".dword 1b,4b\n\t" + ".previous\n\t" /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) /* Inputs */ : "0" (dst), "1" (n), "2" (retn) /* Clobber */ : "r11");