From e24c3bec3e8e254a3784b3e4c97bd3a76fbcc807 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Thu, 13 Aug 2015 09:56:31 +0200 Subject: MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction MIPS R6 introduced the following instruction: Floating Point Fused Multiply Add: MADDF.fmt To perform a fused multiply-add of FP values. MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10956/ Signed-off-by: Ralf Baechle --- arch/mips/math-emu/sp_maddf.c | 255 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100644 arch/mips/math-emu/sp_maddf.c (limited to 'arch/mips/math-emu/sp_maddf.c') diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c new file mode 100644 index 000000000000..dd1dd83e34eb --- /dev/null +++ b/arch/mips/math-emu/sp_maddf.c @@ -0,0 +1,255 @@ +/* + * IEEE754 floating point arithmetic + * single precision: MADDF.f (Fused Multiply Add) + * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) + * + * MIPS floating point support + * Copyright (C) 2015 Imagination Technologies, Ltd. + * Author: Markos Chandras + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + int re; + int rs; + unsigned rm; + unsigned short lxm; + unsigned short hxm; + unsigned short lym; + unsigned short hym; + unsigned lrm; + unsigned hrm; + unsigned t; + unsigned at; + int s; + + COMPXSP; + COMPYSP; + u32 zm; int ze; int zs __maybe_unused; int zc; + + EXPLODEXSP; + EXPLODEYSP; + EXPLODESP(z, zc, zs, ze, zm) + + FLUSHXSP; + FLUSHYSP; + FLUSHSP(z, zc, zs, ze, zm); + + ieee754_clearcx(); + + switch (zc) { + case IEEE754_CLASS_SNAN: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_nanxcpt(z); + case IEEE754_CLASS_DNORM: + SPDNORMx(zm, ze); + /* QNAN is handled separately below */ + } + + switch (CLPAIR(xc, yc)) { + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + return ieee754sp_nanxcpt(y); + + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + return ieee754sp_nanxcpt(x); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + return y; + + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): + return x; + + /* + * Infinity handling + */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): + if (zc == IEEE754_CLASS_QNAN) + return z; + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + if (zc == IEEE754_CLASS_QNAN) + return z; + return ieee754sp_inf(xs ^ ys); + + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): + case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): + if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + /* Multiplication is 0 so just return z */ + return z; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): + SPDNORMX; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): + if (zc == IEEE754_CLASS_QNAN) + return z; + else if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + SPDNORMY; + break; + + case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): + if (zc == IEEE754_CLASS_QNAN) + return z; + else if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + SPDNORMX; + break; + + case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): + if (zc == IEEE754_CLASS_QNAN) + return z; + else if (zc == IEEE754_CLASS_INF) + return ieee754sp_inf(zs); + /* fall through to real computations */ + } + + /* Finally get to do some computation */ + + /* + * Do the multiplication bit first + * + * rm = xm * ym, re = xe + ye basically + * + * At this point xm and ym should have been normalized. + */ + + /* rm = xm * ym, re = xe+ye basically */ + assert(xm & SP_HIDDEN_BIT); + assert(ym & SP_HIDDEN_BIT); + + re = xe + ye; + rs = xs ^ ys; + + /* shunt to top of word */ + xm <<= 32 - (SP_FBITS + 1); + ym <<= 32 - (SP_FBITS + 1); + + /* + * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. + */ + lxm = xm & 0xffff; + hxm = xm >> 16; + lym = ym & 0xffff; + hym = ym >> 16; + + lrm = lxm * lym; /* 16 * 16 => 32 */ + hrm = hxm * hym; /* 16 * 16 => 32 */ + + t = lxm * hym; /* 16 * 16 => 32 */ + at = lrm + (t << 16); + hrm += at < lrm; + lrm = at; + hrm = hrm + (t >> 16); + + t = hxm * lym; /* 16 * 16 => 32 */ + at = lrm + (t << 16); + hrm += at < lrm; + lrm = at; + hrm = hrm + (t >> 16); + + rm = hrm | (lrm != 0); + + /* + * Sticky shift down to normal rounding precision. + */ + if ((int) rm < 0) { + rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | + ((rm << (SP_FBITS + 1 + 3)) != 0); + re++; + } else { + rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | + ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); + } + assert(rm & (SP_HIDDEN_BIT << 3)); + + /* And now the addition */ + + assert(zm & SP_HIDDEN_BIT); + + /* + * Provide guard,round and stick bit space. + */ + zm <<= 3; + + if (ze > re) { + /* + * Have to shift y fraction right to align. + */ + s = ze - re; + SPXSRSYn(s); + } else if (re > ze) { + /* + * Have to shift x fraction right to align. + */ + s = re - ze; + SPXSRSYn(s); + } + assert(ze == re); + assert(ze <= SP_EMAX); + + if (zs == rs) { + /* + * Generate 28 bit result of adding two 27 bit numbers + * leaving result in zm, zs and ze. + */ + zm = zm + rm; + + if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ + SPXSRSX1(); + } + } else { + if (zm >= rm) { + zm = zm - rm; + } else { + zm = rm - zm; + zs = rs; + } + if (zm == 0) + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + + /* + * Normalize in extended single precision + */ + while ((zm >> (SP_MBITS + 3)) == 0) { + zm <<= 1; + ze--; + } + + } + return ieee754sp_format(zs, ze, zm); +} -- cgit