[multiple changes]

2003-08-27 Richard Earnshaw <rearnsha@arm.com> * lib1funcs.asm (L_ieee754_sp): New. Include ieee754-sf.S. (L_ieee754_dp): New. Include ieee754-df.S. * arm/ieee754-sf.S: Rework to allow interworking, calling from Thumb, and compilation in apcs-26 mode. * arm/ieee754-df.S: Likewise. * t-arm-elf (DPBIT, FPBIT, fp-bit.c dp-bit.c): Delete rules (LIB1ASMFUNCS): Add _ieee754_sp and _ieee754_dp targets. 2003-08-27 Nicolas Pitre <nico@cam.org> * arm/ieee754-sf.S: New. * arm/ieee754-df.S: New. From-SVN: r70845
2003-08-27 12:52:58 +00:00 · 2003-08-27 12:52:58 +00:00 · 4202ce8201
commit 4202ce8201
parent b7bc76e321
5 changed files with 2177 additions and 23 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,18 @@
+2003-08-27  Richard Earnshaw  <rearnsha@arm.com>
+
+	* lib1funcs.asm (L_ieee754_sp): New.  Include ieee754-sf.S.
+	(L_ieee754_dp): New.  Include ieee754-df.S.
+	* arm/ieee754-sf.S: Rework to allow interworking, calling from Thumb,
+	and compilation in apcs-26 mode.
+	* arm/ieee754-df.S: Likewise.
+	* t-arm-elf (DPBIT, FPBIT, fp-bit.c dp-bit.c): Delete rules
+	(LIB1ASMFUNCS): Add _ieee754_sp and _ieee754_dp targets.
+
+2003-08-27  Nicolas Pitre  <nico@cam.org>
+
+	* arm/ieee754-sf.S: New.
+	* arm/ieee754-df.S: New.
+
 2003-08-27  Jakub Jelinek  <jakub@redhat.com>

 	* builtins.c (expand_builtin_expect_jump): Save pending_stack_adjust
--- a/gcc/config/arm/ieee754-df.S
+++ b/gcc/config/arm/ieee754-df.S
--- a/gcc/config/arm/ieee754-sf.S
+++ b/gcc/config/arm/ieee754-sf.S
@ -0,0 +1,813 @@
+/* ieee754-sf.S single-precision floating point support for ARM
+
+   Copyright (C) 2003  Free Software Foundation, Inc.
+   Contributed by Nicolas Pitre (nico@cam.org)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   In addition to the permissions in the GNU General Public License, the
+   Free Software Foundation gives you unlimited permission to link the
+   compiled version of this file into combinations with other programs,
+   and to distribute those combinations without any restriction coming
+   from the use of this file.  (The General Public License restrictions
+   do apply in other respects; for example, they cover modification of
+   the file, and distribution when not linked into a combine
+   executable.)
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not, write to
+   the Free Software Foundation, 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/*
+ * Notes:
+ *
+ * The goal of this code is to be as fast as possible.  This is
+ * not meant to be easy to understand for the casual reader.
+ *
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+@ This selects the minimum architecture level required.
+#undef __ARM_ARCH__
+#define __ARM_ARCH__ 3
+
+#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
+	|| defined(__ARM_ARCH_4T__)
+#undef __ARM_ARCH__
+/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
+   long multiply instructions.  That includes v3M.  */
+#define __ARM_ARCH__ 4
+#endif
+	
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+	|| defined(__ARM_ARCH_5TE__)
+#undef __ARM_ARCH__
+#define __ARM_ARCH__ 5
+#endif
+
+#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
+#undef RET
+#undef RETc
+#define RET	bx	lr
+#define RETc(x) bx##x	lr
+#if (__ARM_ARCH__ == 4) && (defined(__thumb__) || defined(__THUMB_INTERWORK__))
+#define __FP_INTERWORKING__
+#endif
+#endif
+
+#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+.macro	ARM_FUNC_START name
+	FUNC_START \name
+	bx	pc
+	nop
+	.arm
+.endm
+#else
+.macro	ARM_FUNC_START name
+	FUNC_START \name
+.endm
+#endif
+
+ARM_FUNC_START negsf2
+	eor	r0, r0, #0x80000000	@ flip sign bit
+	RET
+
+ARM_FUNC_START subsf3
+	eor	r1, r1, #0x80000000	@ flip sign bit of second arg
+#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+	b	1f			@ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START addsf3
+
+1:	@ Compare both args, return zero if equal but the sign.
+	eor	r2, r0, r1
+	teq	r2, #0x80000000
+	beq	LSYM(Lad_z)
+
+	@ If first arg is 0 or -0, return second arg.
+	@ If second arg is 0 or -0, return first arg.
+	bics	r2, r0, #0x80000000
+	moveq	r0, r1
+	bicnes	r2, r1, #0x80000000
+	RETc(eq)
+
+	@ Mask out exponents.
+	mov	ip, #0xff000000
+	and	r2, r0, ip, lsr #1
+	and	r3, r1, ip, lsr #1
+
+	@ If either of them is 255, result will be INF or NAN
+	teq	r2, ip, lsr #1
+	teqne	r3, ip, lsr #1
+	beq	LSYM(Lad_i)
+
+	@ Compute exponent difference.  Make largest exponent in r2,
+	@ corresponding arg in r0, and positive exponent difference in r3.
+	subs	r3, r3, r2
+	addgt	r2, r2, r3
+	eorgt	r1, r0, r1
+	eorgt	r0, r1, r0
+	eorgt	r1, r0, r1
+	rsblt	r3, r3, #0
+
+	@ If exponent difference is too large, return largest argument
+	@ already in r0.  We need up to 25 bit to handle proper rounding
+	@ of 0x1p25 - 1.1.
+	cmp	r3, #(25 << 23)
+	RETc(hi)
+
+	@ Convert mantissa to signed integer.
+	tst	r0, #0x80000000
+	orr	r0, r0, #0x00800000
+	bic	r0, r0, #0xff000000
+	rsbne	r0, r0, #0
+	tst	r1, #0x80000000
+	orr	r1, r1, #0x00800000
+	bic	r1, r1, #0xff000000
+	rsbne	r1, r1, #0
+
+	@ If exponent == difference, one or both args were denormalized.
+	@ Since this is not common case, rescale them off line.
+	teq	r2, r3
+	beq	LSYM(Lad_d)
+LSYM(Lad_x):
+
+	@ Scale down second arg with exponent difference.
+	@ Apply shift one bit left to first arg and the rest to second arg
+	@ to simplify things later, but only if exponent does not become 0.
+	movs	r3, r3, lsr #23
+	teqne	r2, #(1 << 23)
+	movne	r0, r0, lsl #1
+	subne	r2, r2, #(1 << 23)
+	subne	r3, r3, #1
+
+	@ Shift second arg into ip, keep leftover bits into r1.
+	mov	ip, r1, asr r3
+	rsb	r3, r3, #32
+	mov	r1, r1, lsl r3
+
+	add	r0, r0, ip		@ the actual addition
+
+	@ We now have a 64 bit result in r0-r1.
+	@ Keep absolute value in r0-r1, sign in r3.
+	ands	r3, r0, #0x80000000
+	bpl	LSYM(Lad_p)
+	rsbs	r1, r1, #0
+	rsc	r0, r0, #0
+
+	@ Determine how to normalize the result.
+LSYM(Lad_p):
+	cmp	r0, #0x00800000
+	bcc	LSYM(Lad_l)
+	cmp	r0, #0x01000000
+	bcc	LSYM(Lad_r0)
+	cmp	r0, #0x02000000
+	bcc	LSYM(Lad_r1)
+
+	@ Result needs to be shifted right.
+	movs	r0, r0, lsr #1
+	mov	r1, r1, rrx
+	add	r2, r2, #(1 << 23)
+LSYM(Lad_r1):
+	movs	r0, r0, lsr #1
+	mov	r1, r1, rrx
+	add	r2, r2, #(1 << 23)
+
+	@ Our result is now properly aligned into r0, remaining bits in r1.
+	@ Round with MSB of r1. If halfway between two numbers, round towards
+	@ LSB of r0 = 0. 
+LSYM(Lad_r0):
+	add	r0, r0, r1, lsr #31
+	teq	r1, #0x80000000
+	biceq	r0, r0, #1
+
+	@ Rounding may have added a new MSB.  Adjust exponent.
+	@ That MSB will be cleared when exponent is merged below.
+	tst	r0, #0x01000000
+	addne	r2, r2, #(1 << 23)
+
+	@ Make sure we did not bust our exponent.
+	cmp	r2, #(254 << 23)
+	bhi	LSYM(Lad_o)
+
+	@ Pack final result together.
+LSYM(Lad_e):
+	bic	r0, r0, #0x01800000
+	orr	r0, r0, r2
+	orr	r0, r0, r3
+	RET
+
+	@ Result must be shifted left.
+	@ No rounding necessary since r1 will always be 0.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+	movs	ip, r0, lsr #12
+	moveq	r0, r0, lsl #12
+	subeq	r2, r2, #(12 << 23)
+	tst	r0, #0x00ff0000
+	moveq	r0, r0, lsl #8
+	subeq	r2, r2, #(8 << 23)
+	tst	r0, #0x00f00000
+	moveq	r0, r0, lsl #4
+	subeq	r2, r2, #(4 << 23)
+	tst	r0, #0x00c00000
+	moveq	r0, r0, lsl #2
+	subeq	r2, r2, #(2 << 23)
+	tst	r0, #0x00800000
+	moveq	r0, r0, lsl #1
+	subeq	r2, r2, #(1 << 23)
+	cmp	r2, #0
+	bgt	LSYM(Lad_e)
+
+#else
+
+	clz	ip, r0
+	sub	ip, ip, #8
+	mov	r0, r0, lsl ip
+	subs	r2, r2, ip, lsl #23
+	bgt	LSYM(Lad_e)
+
+#endif
+
+	@ Exponent too small, denormalize result.
+	mvn	r2, r2, asr #23
+	add	r2, r2, #2
+	orr	r0, r3, r0, lsr r2
+	RET
+
+	@ Fixup and adjust bit position for denormalized arguments.
+	@ Note that r2 must not remain equal to 0.
+LSYM(Lad_d):
+	teq	r2, #0
+	eoreq	r0, r0, #0x00800000
+	addeq	r2, r2, #(1 << 23)
+	eor	r1, r1, #0x00800000
+	subne	r3, r3, #(1 << 23)
+	b	LSYM(Lad_x)
+
+	@ Result is x - x = 0, unless x is INF or NAN.
+LSYM(Lad_z):
+	mov	ip, #0xff000000
+	and	r2, r0, ip, lsr #1
+	teq	r2, ip, lsr #1
+	moveq	r0, ip, asr #2
+	movne	r0, #0
+	RET
+
+	@ Overflow: return INF.
+LSYM(Lad_o):
+	orr	r0, r3, #0x7f000000
+	orr	r0, r0, #0x00800000
+	RET
+
+	@ At least one of r0/r1 is INF/NAN.
+	@   if r0 != INF/NAN: return r1 (which is INF/NAN)
+	@   if r1 != INF/NAN: return r0 (which is INF/NAN)
+	@   if r0 or r1 is NAN: return NAN
+	@   if opposite sign: return NAN
+	@   return r0 (which is INF or -INF)
+LSYM(Lad_i):
+	teq	r2, ip, lsr #1
+	movne	r0, r1
+	teqeq	r3, ip, lsr #1
+	RETc(ne)
+	movs	r2, r0, lsl #9
+	moveqs	r2, r1, lsl #9
+	teqeq	r0, r1
+	orrne	r0, r3, #0x00400000	@ NAN
+	RET
+
+
+ARM_FUNC_START floatunsisf
+	mov	r3, #0
+	b	1f
+
+ARM_FUNC_START floatsisf
+	ands	r3, r0, #0x80000000
+	rsbmi	r0, r0, #0
+
+1:	teq	r0, #0
+	RETc(eq)
+
+	mov	r1, #0
+	mov	r2, #((127 + 23) << 23)
+	tst	r0, #0xfc000000
+	beq	LSYM(Lad_p)
+
+	@ We need to scale the value a little before branching to code above.
+	tst	r0, #0xf0000000
+	movne	r1, r0, lsl #28
+	movne	r0, r0, lsr #4
+	addne	r2, r2, #(4 << 23)
+	tst	r0, #0x0c000000
+	beq	LSYM(Lad_p)
+	mov	r1, r1, lsr #2
+	orr	r1, r1, r0, lsl #30
+	mov	r0, r0, lsr #2
+	add	r2, r2, #(2 << 23)
+	b	LSYM(Lad_p)
+
+
+ARM_FUNC_START mulsf3
+
+	@ Mask out exponents.
+	mov	ip, #0xff000000
+	and	r2, r0, ip, lsr #1
+	and	r3, r1, ip, lsr #1
+
+	@ Trap any INF/NAN.
+	teq	r2, ip, lsr #1
+	teqne	r3, ip, lsr #1
+	beq	LSYM(Lml_s)
+
+	@ Trap any multiplication by 0.
+	bics	ip, r0, #0x80000000
+	bicnes	ip, r1, #0x80000000
+	beq	LSYM(Lml_z)
+
+	@ Shift exponents right one bit to make room for overflow bit.
+	@ If either of them is 0, scale denormalized arguments off line.
+	@ Then add both exponents together.
+	movs	r2, r2, lsr #1
+	teqne	r3, #0
+	beq	LSYM(Lml_d)
+LSYM(Lml_x):
+	add	r2, r2, r3, asr #1
+
+	@ Preserve final sign in r2 along with exponent for now.
+	teq	r0, r1
+	orrmi	r2, r2, #0x8000
+
+	@ Convert mantissa to unsigned integer.
+	bic	r0, r0, #0xff000000
+	bic	r1, r1, #0xff000000
+	orr	r0, r0, #0x00800000
+	orr	r1, r1, #0x00800000
+
+#if __ARM_ARCH__ < 4
+
+	@ Well, no way to make it shorter without the umull instruction.
+	@ We must perform that 24 x 24 -> 48 bit multiplication by hand.
+	stmfd	sp!, {r4, r5}
+	mov	r4, r0, lsr #16
+	mov	r5, r1, lsr #16
+	bic	r0, r0, #0x00ff0000
+	bic	r1, r1, #0x00ff0000
+	mul	ip, r4, r5
+	mul	r3, r0, r1
+	mul	r0, r5, r0
+	mla	r0, r4, r1, r0
+	adds	r3, r3, r0, lsl #16
+	adc	ip, ip, r0, lsr #16
+	ldmfd	sp!, {r4, r5}
+
+#else
+
+	umull	r3, ip, r0, r1		@ The actual multiplication.
+
+#endif
+
+	@ Put final sign in r0.
+	mov	r0, r2, lsl #16
+	bic	r2, r2, #0x8000
+
+	@ Adjust result if one extra MSB appeared.
+	@ The LSB may be lost but this never changes the result in this case.
+	tst	ip, #(1 << 15)
+	addne	r2, r2, #(1 << 22)
+	movnes	ip, ip, lsr #1
+	movne	r3, r3, rrx
+
+	@ Apply exponent bias, check range for underflow.
+	subs	r2, r2, #(127 << 22)
+	ble	LSYM(Lml_u)
+
+	@ Scale back to 24 bits with rounding.
+	@ r0 contains sign bit already.
+	orrs	r0, r0, r3, lsr #23
+	adc	r0, r0, ip, lsl #9
+
+	@ If halfway between two numbers, rounding should be towards LSB = 0.
+	mov	r3, r3, lsl #9
+	teq	r3, #0x80000000
+	biceq	r0, r0, #1
+
+	@ Note: rounding may have produced an extra MSB here.
+	@ The extra bit is cleared before merging the exponent below.
+	tst	r0, #0x01000000
+	addne	r2, r2, #(1 << 22)
+
+	@ Check for exponent overflow
+	cmp	r2, #(255 << 22)
+	bge	LSYM(Lml_o)
+
+	@ Add final exponent.
+	bic	r0, r0, #0x01800000
+	orr	r0, r0, r2, lsl #1
+	RET
+
+	@ Result is 0, but determine sign anyway.
+LSYM(Lml_z):	eor	r0, r0, r1
+	bic	r0, r0, #0x7fffffff
+	RET
+
+	@ Check if denormalized result is possible, otherwise return signed 0.
+LSYM(Lml_u):
+	cmn	r2, #(24 << 22)
+	RETc(le)
+
+	@ Find out proper shift value.
+	mvn	r1, r2, asr #22
+	subs	r1, r1, #7
+	bgt	LSYM(Lml_ur)
+
+	@ Shift value left, round, etc.
+	add	r1, r1, #32
+	orrs	r0, r0, r3, lsr r1
+	rsb	r1, r1, #32
+	adc	r0, r0, ip, lsl r1
+	mov	ip, r3, lsl r1
+	teq	ip, #0x80000000
+	biceq	r0, r0, #1
+	RET
+
+	@ Shift value right, round, etc.
+	@ Note: r1 must not be 0 otherwise carry does not get set.
+LSYM(Lml_ur):
+	orrs	r0, r0, ip, lsr r1
+	adc	r0, r0, #0
+	rsb	r1, r1, #32
+	mov	ip, ip, lsl r1
+	teq	r3, #0
+	teqeq	ip, #0x80000000
+	biceq	r0, r0, #1
+	RET
+
+	@ One or both arguments are denormalized.
+	@ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+	teq	r2, #0
+	and	ip, r0, #0x80000000
+1:	moveq	r0, r0, lsl #1
+	tsteq	r0, #0x00800000
+	subeq	r2, r2, #(1 << 22)
+	beq	1b
+	orr	r0, r0, ip
+	teq	r3, #0
+	and	ip, r1, #0x80000000
+2:	moveq	r1, r1, lsl #1
+	tsteq	r1, #0x00800000
+	subeq	r3, r3, #(1 << 23)
+	beq	2b
+	orr	r1, r1, ip
+	b	LSYM(Lml_x)
+
+	@ One or both args are INF or NAN.
+LSYM(Lml_s):
+	teq	r0, #0x0
+	teqne	r1, #0x0
+	teqne	r0, #0x80000000
+	teqne	r1, #0x80000000
+	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	teq	r2, ip, lsr #1
+	bne	1f
+	movs	r2, r0, lsl #9
+	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+1:	teq	r3, ip, lsr #1
+	bne	LSYM(Lml_i)
+	movs	r3, r1, lsl #9
+	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+
+	@ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+	eor	r0, r0, r1
+
+	@ Overflow: return INF (sign already in r0).
+LSYM(Lml_o):
+	and	r0, r0, #0x80000000
+	orr	r0, r0, #0x7f000000
+	orr	r0, r0, #0x00800000
+	RET
+
+	@ Return NAN.
+LSYM(Lml_n):
+	mov	r0, #0x7f000000
+	orr	r0, r0, #0x00c00000
+	RET
+
+
+ARM_FUNC_START divsf3
+
+	@ Mask out exponents.
+	mov	ip, #0xff000000
+	and	r2, r0, ip, lsr #1
+	and	r3, r1, ip, lsr #1
+
+	@ Trap any INF/NAN or zeroes.
+	teq	r2, ip, lsr #1
+	teqne	r3, ip, lsr #1
+	bicnes	ip, r0, #0x80000000
+	bicnes	ip, r1, #0x80000000
+	beq	LSYM(Ldv_s)
+
+	@ Shift exponents right one bit to make room for overflow bit.
+	@ If either of them is 0, scale denormalized arguments off line.
+	@ Then substract divisor exponent from dividend''s.
+	movs	r2, r2, lsr #1
+	teqne	r3, #0
+	beq	LSYM(Ldv_d)
+LSYM(Ldv_x):
+	sub	r2, r2, r3, asr #1
+
+	@ Preserve final sign into ip.
+	eor	ip, r0, r1
+
+	@ Convert mantissa to unsigned integer.
+	@ Dividend -> r3, divisor -> r1.
+	mov	r3, #0x10000000
+	movs	r1, r1, lsl #9
+	mov	r0, r0, lsl #9
+	beq	LSYM(Ldv_1)
+	orr	r1, r3, r1, lsr #4
+	orr	r3, r3, r0, lsr #4
+
+	@ Initialize r0 (result) with final sign bit.
+	and	r0, ip, #0x80000000
+
+	@ Ensure result will land to known bit position.
+	cmp	r3, r1
+	subcc	r2, r2, #(1 << 22)
+	movcc	r3, r3, lsl #1
+
+	@ Apply exponent bias, check range for over/underflow.
+	add	r2, r2, #(127 << 22)
+	cmn	r2, #(24 << 22)
+	RETc(le)
+	cmp	r2, #(255 << 22)
+	bge	LSYM(Lml_o)
+
+	@ The actual division loop.
+	mov	ip, #0x00800000
+1:	cmp	r3, r1
+	subcs	r3, r3, r1
+	orrcs	r0, r0, ip
+	cmp	r3, r1, lsr #1
+	subcs	r3, r3, r1, lsr #1
+	orrcs	r0, r0, ip, lsr #1
+	cmp	r3, r1, lsr #2
+	subcs	r3, r3, r1, lsr #2
+	orrcs	r0, r0, ip, lsr #2
+	cmp	r3, r1, lsr #3
+	subcs	r3, r3, r1, lsr #3
+	orrcs	r0, r0, ip, lsr #3
+	movs	r3, r3, lsl #4
+	movnes	ip, ip, lsr #4
+	bne	1b
+
+	@ Check if denormalized result is needed.
+	cmp	r2, #0
+	ble	LSYM(Ldv_u)
+
+	@ Apply proper rounding.
+	cmp	r3, r1
+	addcs	r0, r0, #1
+	biceq	r0, r0, #1
+
+	@ Add exponent to result.
+	bic	r0, r0, #0x00800000
+	orr	r0, r0, r2, lsl #1
+	RET
+
+	@ Division by 0x1p*: let''s shortcut a lot of code.
+LSYM(Ldv_1):
+	and	ip, ip, #0x80000000
+	orr	r0, ip, r0, lsr #9
+	add	r2, r2, #(127 << 22)
+	cmp	r2, #(255 << 22)
+	bge	LSYM(Lml_o)
+	cmp	r2, #0
+	orrgt	r0, r0, r2, lsl #1
+	RETc(gt)
+	cmn	r2, #(24 << 22)
+	movle	r0, ip
+	RETc(le)
+	orr	r0, r0, #0x00800000
+	mov	r3, #0
+
+	@ Result must be denormalized: prepare parameters to use code above.
+	@ r3 already contains remainder for rounding considerations.
+LSYM(Ldv_u):
+	bic	ip, r0, #0x80000000
+	and	r0, r0, #0x80000000
+	mvn	r1, r2, asr #22
+	add	r1, r1, #2
+	b	LSYM(Lml_ur)
+
+	@ One or both arguments are denormalized.
+	@ Scale them leftwards and preserve sign bit.
+LSYM(Ldv_d):
+	teq	r2, #0
+	and	ip, r0, #0x80000000
+1:	moveq	r0, r0, lsl #1
+	tsteq	r0, #0x00800000
+	subeq	r2, r2, #(1 << 22)
+	beq	1b
+	orr	r0, r0, ip
+	teq	r3, #0
+	and	ip, r1, #0x80000000
+2:	moveq	r1, r1, lsl #1
+	tsteq	r1, #0x00800000
+	subeq	r3, r3, #(1 << 23)
+	beq	2b
+	orr	r1, r1, ip
+	b	LSYM(Ldv_x)
+
+	@ One or both arguments is either INF, NAN or zero.
+LSYM(Ldv_s):
+	mov	ip, #0xff000000
+	teq	r2, ip, lsr #1
+	teqeq	r3, ip, lsr #1
+	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
+	teq	r2, ip, lsr #1
+	bne	1f
+	movs	r2, r0, lsl #9
+	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	b	LSYM(Lml_i)		@ INF / <anything> -> INF
+1:	teq	r3, ip, lsr #1
+	bne	2f
+	movs	r3, r1, lsl #9
+	bne	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+	b	LSYM(Lml_z)		@ <anything> / INF -> 0
+2:	@ One or both arguments are 0.
+	bics	r2, r0, #0x80000000
+	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	bics	r3, r1, #0x80000000
+	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
+	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+
+
+FUNC_START gesf2
+ARM_FUNC_START gtsf2
+	mov	r3, #-1
+	b	1f
+
+FUNC_START lesf2
+ARM_FUNC_START ltsf2
+	mov	r3, #1
+	b	1f
+
+FUNC_START nesf2
+FUNC_START eqsf2
+ARM_FUNC_START cmpsf2
+	mov	r3, #1			@ how should we specify unordered here?
+
+1:	@ Trap any INF/NAN first.
+	mov	ip, #0xff000000
+	and	r2, r1, ip, lsr #1
+	teq	r2, ip, lsr #1
+	and	r2, r0, ip, lsr #1
+	teqne	r2, ip, lsr #1
+	beq	3f
+
+	@ Test for equality.
+	@ Note that 0.0 is equal to -0.0.
+2:	orr	r3, r0, r1
+	bics	r3, r3, #0x80000000	@ either 0.0 or -0.0
+	teqne	r0, r1			@ or both the same
+	moveq	r0, #0
+	RETc(eq)
+
+	@ Check for sign difference.  The N flag is set if it is the case.
+	@ If so, return sign of r0.
+	movmi	r0, r0, asr #31
+	orrmi	r0, r0, #1
+	RETc(mi)
+
+	@ Compare exponents.
+	and	r3, r1, ip, lsr #1
+	cmp	r2, r3
+
+	@ Compare mantissa if exponents are equal
+	moveq	r0, r0, lsl #9
+	cmpeq	r0, r1, lsl #9
+	movcs	r0, r1, asr #31
+	mvncc	r0, r1, asr #31
+	orr	r0, r0, #1
+	RET
+
+	@ Look for a NAN. 
+3:	and	r2, r1, ip, lsr #1
+	teq	r2, ip, lsr #1
+	bne	4f
+	movs	r2, r1, lsl #9
+	bne	5f			@ r1 is NAN
+4:	and	r2, r0, ip, lsr #1
+	teq	r2, ip, lsr #1
+	bne	2b
+	movs	ip, r0, lsl #9
+	beq	2b			@ r0 is not NAN
+5:	mov	r0, r3			@ return unordered code from r3.
+	RET
+
+
+ARM_FUNC_START unordsf2
+	mov	ip, #0xff000000
+	and	r2, r1, ip, lsr #1
+	teq	r2, ip, lsr #1
+	bne	1f
+	movs	r2, r1, lsl #9
+	bne	3f			@ r1 is NAN
+1:	and	r2, r0, ip, lsr #1
+	teq	r2, ip, lsr #1
+	bne	2f
+	movs	r2, r0, lsl #9
+	bne	3f			@ r0 is NAN
+2:	mov	r0, #0			@ arguments are ordered.
+	RET
+3:	mov	r0, #1			@ arguments are unordered.
+	RET
+
+
+ARM_FUNC_START fixsfsi
+	movs	r0, r0, lsl #1
+	RETc(eq)			@ value is 0.
+	@ preserve C flag (the actual sign)
+#ifdef __APCS_26__
+	mov	r1, pc
+#else
+	mrs	r1, cpsr
+#endif
+
+	@ check exponent range.
+	and	r2, r0, #0xff000000
+	cmp	r2, #(127 << 24)
+	movcc	r0, #0			@ value is too small
+	RETc(cc)
+	cmp	r2, #((127 + 31) << 24)
+	bcs	1f			@ value is too large
+
+	mov	r0, r0, lsl #7
+	orr	r0, r0, #0x80000000
+	mov	r2, r2, lsr #24
+	rsb	r2, r2, #(127 + 31)
+	mov	r0, r0, lsr r2
+	tst	r1, #0x20000000		@ the sign bit
+	rsbne	r0, r0, #0
+	RET
+
+1:	teq	r2, #0xff000000
+	bne	2f
+	movs	r0, r0, lsl #8
+	bne	3f			@ r0 is NAN.
+2:	tst	r1, #0x20000000		@ the sign bit
+	moveq	r0, #0x7fffffff		@ the maximum signed positive si
+	movne	r0, #0x80000000		@ the maximum signed negative si
+	RET
+
+3:	mov	r0, #0			@ What should we convert NAN to?
+	RET
+
+
+ARM_FUNC_START fixunssfsi
+	movs	r0, r0, lsl #1
+	RETc(eq)			@ value is 0.
+	movcs	r0, #0
+	RETc(cs)			@ value is negative.
+
+	@ check exponent range.
+	and	r2, r0, #0xff000000
+	cmp	r2, #(127 << 24)
+	movcc	r0, #0			@ value is too small
+	RETc(cc)
+	cmp	r2, #((127 + 32) << 24)
+	bcs	1f			@ value is too large
+
+	mov	r0, r0, lsl #7
+	orr	r0, r0, #0x80000000
+	mov	r2, r2, lsr #24
+	rsb	r2, r2, #(127 + 31)
+	mov	r0, r0, lsr r2
+	RET
+
+1:	teq	r2, #0xff000000
+	bne	2f
+	movs	r0, r0, lsl #8
+	bne	3b			@ r0 is NAN.
+2:	mov	r0, #0xffffffff		@ maximum unsigned si
+	RET
+
+
--- a/gcc/config/arm/lib1funcs.asm
+++ b/gcc/config/arm/lib1funcs.asm
@ -782,3 +782,17 @@ _arm_return:
 	SIZE	(_interwork_call_via_lr)
 	
 #endif /* L_interwork_call_via_rX */
+
+#ifdef L_ieee754_dp
+	/* These functions are coded in ARM state, even when called from
+	   Thumb.  */
+	.arm
+#include "ieee754-df.S"
+#endif
+
+#ifdef L_ieee754_sp
+	/* These functions are coded in ARM state, even when called from
+	   Thumb.  */
+	.arm
+#include "ieee754-sf.S"
+#endif
--- a/gcc/config/arm/t-arm-elf
+++ b/gcc/config/arm/t-arm-elf
@ -1,29 +1,10 @@
 LIB1ASMSRC = arm/lib1funcs.asm
-LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX
+LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _ieee754_dp _ieee754_sp

-# We want fine grained libraries, so use the new code to build the
-# floating point emulation libraries.
-FPBIT = fp-bit.c
-DPBIT = dp-bit.c
-
-fp-bit.c: $(srcdir)/config/fp-bit.c
-	echo '#define FLOAT' > fp-bit.c
-	echo '#ifndef __ARMEB__' >> fp-bit.c
-	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
-	echo '#endif' >> fp-bit.c
-	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
-
-dp-bit.c: $(srcdir)/config/fp-bit.c
-	echo '#ifndef __ARMEB__' > dp-bit.c
-	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
-	echo '#define FLOAT_WORD_ORDER_MISMATCH' >> dp-bit.c
-	echo '#endif' >> dp-bit.c
-	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
-
-	
 MULTILIB_OPTIONS     = marm/mthumb
 MULTILIB_DIRNAMES    = arm thumb
 MULTILIB_EXCEPTIONS  = 
+MULTILIB_MATCHES     =

 # MULTILIB_OPTIONS    += mcpu=ep9312
 # MULTILIB_DIRNAMES   += ep9312
@ -31,8 +12,7 @@ MULTILIB_EXCEPTIONS  =
 	
 # MULTILIB_OPTIONS     += mlittle-endian/mbig-endian
 # MULTILIB_DIRNAMES    += le be
-# MULTILIB_EXCEPTIONS  = 
-# MULTILIB_MATCHES     = mbig-endian=mbe mlittle-endian=mle
+# MULTILIB_MATCHES     += mbig-endian=mbe mlittle-endian=mle
 # 
 # MULTILIB_OPTIONS    += mhard-float/msoft-float
 # MULTILIB_DIRNAMES   += fpu soft
@ -97,3 +77,4 @@ $(T)crti.o: $(srcdir)/config/arm/crti.asm $(GCC_PASSES)
 $(T)crtn.o: $(srcdir)/config/arm/crtn.asm $(GCC_PASSES)
 	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
 	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/arm/crtn.asm
+