patch-2.2.19 linux/arch/arm/lib/memset.S

Next file: linux/arch/arm/vmlinux-armo.lds
Previous file: linux/arch/arm/lib/delay.S
Back to the patch index
Back to the overall index

diff -u --new-file --recursive --exclude-from /usr/src/exclude v2.2.18/arch/arm/lib/memset.S linux/arch/arm/lib/memset.S
@@ -1,88 +1,80 @@
 /*
- * linux/arch/arm/lib/memset.S
+ *  linux/arch/arm/lib/memset.S
  *
- * Copyright (C) 1995-1999 Russell King
+ *  Copyright (C) 1995-2000 Russell King
  *
- * ASM optimised string functions
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
  *
+ *  ASM optimised string functions
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
-#include "constants.h"
 
-		.text
-		.align	5
-ENTRY(memset)
-		mov	r3, r0
-		cmp	r2, #16
-		blt	6f
-		ands	ip, r3, #3
-		beq	1f
-		cmp	ip, #2
-		strltb	r1, [r3], #1			@ Align destination
-		strleb	r1, [r3], #1
-		strb	r1, [r3], #1
-		rsb	ip, ip, #4
-		sub	r2, r2, ip
-1:		orr	r1, r1, r1, lsl #8
-		orr	r1, r1, r1, lsl #16
-		cmp	r2, #256
-		blt	4f
-		stmfd	sp!, {r4, r5, lr}
-		mov	r4, r1
-		mov	r5, r1
-		mov	lr, r1
-		mov	ip, r2, lsr #6
-		sub	r2, r2, ip, lsl #6
-2:		stmia	r3!, {r1, r4, r5, lr}		@ 64 bytes at a time.
-		stmia	r3!, {r1, r4, r5, lr}
-		stmia	r3!, {r1, r4, r5, lr}
-		stmia	r3!, {r1, r4, r5, lr}
-		subs	ip, ip, #1
-		bne	2b
-		teq	r2, #0
-		LOADREGS(eqfd, sp!, {r4, r5, pc})	@ Now <64 bytes to go.
-		tst	r2, #32
-		stmneia	r3!, {r1, r4, r5, lr}
-		stmneia	r3!, {r1, r4, r5, lr}
-		tst	r2, #16
-		stmneia	r3!, {r1, r4, r5, lr}
-		ldmia	sp!, {r4, r5}
-3:		tst	r2, #8
-		stmneia	r3!, {r1, lr}
-		tst	r2, #4
-		strne	r1, [r3], #4
-		tst	r2, #2
-		strneb	r1, [r3], #1
-		strneb	r1, [r3], #1
-		tst	r2, #1
-		strneb	r1, [r3], #1
-		LOADREGS(fd, sp!, {pc})
+	.text
+	.align	5
+	.word	0
 
-4:		movs	ip, r2, lsr #3
-		beq	3b
-		sub	r2, r2, ip, lsl #3
-		stmfd	sp!, {lr}
-		mov	lr, r1
-		subs	ip, ip, #4
-5:		stmgeia	r3!, {r1, lr}
-		stmgeia	r3!, {r1, lr}
-		stmgeia	r3!, {r1, lr}
-		stmgeia	r3!, {r1, lr}
-		subges	ip, ip, #4
-		bge	5b
-		tst	ip, #2
-		stmneia	r3!, {r1, lr}
-		stmneia	r3!, {r1, lr}
-		tst	ip, #1
-		stmneia	r3!, {r1, lr}
-		teq	r2, #0
-		LOADREGS(eqfd, sp!, {pc})
-		b	3b
+1:	subs	r2, r2, #4		@ 1 do we have enough
+	blt	5f			@ 1 bytes to align with?
+	cmp	r3, #2			@ 1
+	strltb	r1, [r0], #1		@ 1
+	strleb	r1, [r0], #1		@ 1
+	strb	r1, [r0], #1		@ 1
+	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memzero again.
+ */
 
-6:		subs	r2, r2, #1
-		strgeb	r1, [r3], #1
-		bgt	6b
-		RETINSTR(mov, pc, lr)
+ENTRY(memset)
+	ands	r3, r0, #3		@ 1 unaligned?
+	bne	1b			@ 1
+/*
+ * we know that the pointer in r0 is aligned to a word boundary.
+ */
+	orr	r1, r1, r1, lsl #8
+	orr	r1, r1, r1, lsl #16
+	mov	r3, r1
+	cmp	r2, #16
+	blt	4f
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+	str	lr, [sp, #-4]!
+	mov	ip, r1
+	mov	lr, r1
 
+2:	subs	r2, r2, #64
+	stmgeia	r0!, {r1, r3, ip, lr}	@ 64 bytes at a time.
+	stmgeia	r0!, {r1, r3, ip, lr}
+	stmgeia	r0!, {r1, r3, ip, lr}
+	stmgeia	r0!, {r1, r3, ip, lr}
+	bgt	2b
+	LOADREGS(eqfd, sp!, {pc})	@ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+	tst	r2, #32
+	stmneia	r0!, {r1, r3, ip, lr}
+	stmneia	r0!, {r1, r3, ip, lr}
+	tst	r2, #16
+	stmneia	r0!, {r1, r3, ip, lr}
+	ldr	lr, [sp], #4
 
+4:	tst	r2, #8
+	stmneia	r0!, {r1, r3}
+	tst	r2, #4
+	strne	r1, [r0], #4
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:	tst	r2, #2
+	strneb	r1, [r0], #1
+	strneb	r1, [r0], #1
+	tst	r2, #1
+	strneb	r1, [r0], #1
+	RETINSTR(mov,pc,lr)

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)