summaryrefslogtreecommitdiff
path: root/arch/mips/math-emu/sp_maddf.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-15 20:43:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-15 20:43:33 -0700
commit7318413077a5141a50a753b1fab687b7907eef16 (patch)
tree21a59cf856f4bb762f6d3d0635c898ca6b24cff6 /arch/mips/math-emu/sp_maddf.c
parent8d93c7a4315711ea0f7a95ca353a89c4ed0763fb (diff)
parent35eed7cb2cf1c58a225a0140729ba787fbb06c88 (diff)
Merge branch '4.14-features' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
Pull MIPS updates from Ralf Baechle: "This is the main pull request for 4.14 for MIPS; below a summary of the non-merge commits: CM: - Rename mips_cm_base to mips_gcr_base - Specify register size when generating accessors - Use BIT/GENMASK for register fields, order & drop shifts - Add cluster & block args to mips_cm_lock_other() CPC: - Use common CPS accessor generation macros - Use BIT/GENMASK for register fields, order & drop shifts - Introduce register modify (set/clear/change) accessors - Use change_*, set_* & clear_* where appropriate - Add CM/CPC 3.5 register definitions - Use GlobalNumber macros rather than magic numbers - Have asm/mips-cps.h include CM & CPC headers - Cluster support for topology functions - Detect CPUs in secondary clusters CPS: - Read GIC_VL_IDENT directly, not via irqchip driver DMA: - Consolidate coherent and non-coherent dma_alloc code - Don't use dma_cache_sync to implement fd_cacheflush FPU emulation / FP assist code: - Another series of 14 commits fixing corner cases such as NaN propgagation and other special input values. - Zero bits 32-63 of the result for a CLASS.D instruction. - Enhanced statics via debugfs - Do not use bools for arithmetic. GCC 7.1 moans about this. - Correct user fault_addr type Generic MIPS: - Enhancement of stack backtraces - Cleanup from non-existing options - Handle non word sized instructions when examining frame - Fix detection and decoding of ADDIUSP instruction - Fix decoding of SWSP16 instruction - Refactor handling of stack pointer in get_frame_info - Remove unreachable code from force_fcr31_sig() - Convert to using %pOF instead of full_name - Remove the R6000 support. - Move FP code from *_switch.S to *_fpu.S - Remove unused ST_OFF from r2300_switch.S - Allow platform to specify multiple its.S files - Add #includes to various files to ensure code builds reliable and without warning.. - Remove __invalidate_kernel_vmap_range - Remove plat_timer_setup - Declare various variables & functions static - Abstract CPU core & VP(E) ID access through accessor functions - Store core & VP IDs in GlobalNumber-style variable - Unify checks for sibling CPUs - Add CPU cluster number accessors - Prevent direct use of generic_defconfig - Make CONFIG_MIPS_MT_SMP default y - Add __ioread64_copy - Remove unnecessary inclusions of linux/irqchip/mips-gic.h GIC: - Introduce asm/mips-gic.h with accessor functions - Use new GIC accessor functions in mips-gic-timer - Remove counter access functions from irq-mips-gic.c - Remove gic_read_local_vp_id() from irq-mips-gic.c - Simplify shared interrupt pending/mask reads in irq-mips-gic.c - Simplify gic_local_irq_domain_map() in irq-mips-gic.c - Drop gic_(re)set_mask() functions in irq-mips-gic.c - Remove gic_set_polarity(), gic_set_trigger(), gic_set_dual_edge(), gic_map_to_pin() and gic_map_to_vpe() from irq-mips-gic.c. - Convert remaining shared reg access, local int mask access and remaining local reg access to new accessors - Move GIC_LOCAL_INT_* to asm/mips-gic.h - Remove GIC_CPU_INT* macros from irq-mips-gic.c - Move various definitions to the driver - Remove gic_get_usm_range() - Remove __gic_irq_dispatch() forward declaration - Remove gic_init() - Use mips_gic_present() in place of gic_present and remove gic_present - Move gic_get_c0_*_int() to asm/mips-gic.h - Remove linux/irqchip/mips-gic.h - Inline __gic_init() - Inline gic_basic_init() - Make pcpu_masks a per-cpu variable - Use pcpu_masks to avoid reading GIC_SH_MASK* - Clean up mti, reserved-cpu-vectors handling - Use cpumask_first_and() in gic_set_affinity() - Let the core set struct irq_common_data affinity microMIPS: - Fix microMIPS stack unwinding on big endian systems MIPS-GIC: - SYNC after enabling GIC region NUMA: - Remove the unused parent_node() macro R6: - Constify r2_decoder_tables - Add accessor & bit definitions for GlobalNumber SMP: - Constify smp ops - Allow boot_secondary SMP op to return errors VDSO: - Drop gic_get_usm_range() usage - Avoid use of linux/irqchip/mips-gic.h Platform changes: Alchemy: - Add devboard machine type to cpuinfo - update cpu feature overrides - Threaded carddetect irqs for devboards AR7: - allow NULL clock for clk_get_rate BCM63xx: - Fix ENETDMA_6345_MAXBURST_REG offset - Allow NULL clock for clk_get_rate CI20: - Enable GPIO and RTC drivers in defconfig - Add ethernet and fixed-regulator nodes to DTS Generic platform: - Move Boston and NI 169445 FIT image source to their own files - Include asm/bootinfo.h for plat_fdt_relocated() - Include asm/time.h for get_c0_*_int() - Include asm/bootinfo.h for plat_fdt_relocated() - Include asm/time.h for get_c0_*_int() - Allow filtering enabled boards by requirements - Don't explicitly disable CONFIG_USB_SUPPORT - Bump default NR_CPUS to 16 JZ4700: - Probe the jz4740-rtc driver from devicetree Lantiq: - Drop check of boot select from the spi-falcon driver. - Drop check of boot select from the lantiq-flash MTD driver. - Access boot cause register in the watchdog driver through regmap - Add device tree binding documentation for the watchdog driver - Add docs for the RCU DT bindings. - Convert the fpi bus driver to a platform_driver - Remove ltq_reset_cause() and ltq_boot_select( - Switch to a proper reset driver - Switch to a new drivers/soc GPHY driver - Add an USB PHY driver for the Lantiq SoCs using the RCU module - Use of_platform_default_populate instead of __dt_register_buses - Enable MFD_SYSCON to be able to use it for the RCU MFD - Replace ltq_boot_select() with dummy implementation. Loongson 2F: - Allow NULL clock for clk_get_rate Malta: - Use new GIC accessor functions NI 169445: - Add support for NI 169445 board. - Only include in 32r2el kernels Octeon: - Add support for watchdog of 78XX SOCs. - Add support for watchdog of CN68XX SOCs. - Expose support for mips32r1, mips32r2 and mips64r1 - Enable more drivers in config file - Add support for accessing the boot vector. - Remove old boot vector code from watchdog driver - Define watchdog registers for 70xx, 73xx, 78xx, F75xx. - Make CSR functions node aware. - Allow access to CIU3 IRQ domains. - Misc cleanups in the watchdog driver Omega2+: - New board, add support and defconfig Pistachio: - Enable Root FS on NFS in defconfig Ralink: - Add Mediatek MT7628A SoC - Allow NULL clock for clk_get_rate - Explicitly request exclusive reset control in the pci-mt7620 PCI driver. SEAD3: - Only include in 32 bit kernels by default VoCore: - Add VoCore as a vendor t0 dt-bindings - Add defconfig file" * '4.14-features' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus: (167 commits) MIPS: Refactor handling of stack pointer in get_frame_info MIPS: Stacktrace: Fix microMIPS stack unwinding on big endian systems MIPS: microMIPS: Fix decoding of swsp16 instruction MIPS: microMIPS: Fix decoding of addiusp instruction MIPS: microMIPS: Fix detection of addiusp instruction MIPS: Handle non word sized instructions when examining frame MIPS: ralink: allow NULL clock for clk_get_rate MIPS: Loongson 2F: allow NULL clock for clk_get_rate MIPS: BCM63XX: allow NULL clock for clk_get_rate MIPS: AR7: allow NULL clock for clk_get_rate MIPS: BCM63XX: fix ENETDMA_6345_MAXBURST_REG offset mips: Save all registers when saving the frame MIPS: Add DWARF unwinding to assembly MIPS: Make SAVE_SOME more standard MIPS: Fix issues in backtraces MIPS: jz4780: DTS: Probe the jz4740-rtc driver from devicetree MIPS: Ci20: Enable RTC driver watchdog: octeon-wdt: Add support for 78XX SOCs. watchdog: octeon-wdt: Add support for cn68XX SOCs. watchdog: octeon-wdt: File cleaning. ...
Diffstat (limited to 'arch/mips/math-emu/sp_maddf.c')
-rw-r--r--arch/mips/math-emu/sp_maddf.c229
1 files changed, 107 insertions, 122 deletions
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index c91d5e5d9b5f..7195fe785d81 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -14,9 +14,6 @@
#include "ieee754sp.h"
-enum maddf_flags {
- maddf_negate_product = 1 << 0,
-};
static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
union ieee754sp y, enum maddf_flags flags)
@@ -24,14 +21,8 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
int re;
int rs;
unsigned rm;
- unsigned short lxm;
- unsigned short hxm;
- unsigned short lym;
- unsigned short hym;
- unsigned lrm;
- unsigned hrm;
- unsigned t;
- unsigned at;
+ uint64_t rm64;
+ uint64_t zm64;
int s;
COMPXSP;
@@ -48,51 +39,35 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
ieee754_clearcx();
- switch (zc) {
- case IEEE754_CLASS_SNAN:
- ieee754_setcx(IEEE754_INVALID_OPERATION);
+ /*
+ * Handle the cases when at least one of x, y or z is a NaN.
+ * Order of precedence is sNaN, qNaN and z, x, y.
+ */
+ if (zc == IEEE754_CLASS_SNAN)
return ieee754sp_nanxcpt(z);
- case IEEE754_CLASS_DNORM:
- SPDNORMZ;
- /* QNAN and ZERO cases are handled separately below */
- }
-
- switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+ if (xc == IEEE754_CLASS_SNAN)
+ return ieee754sp_nanxcpt(x);
+ if (yc == IEEE754_CLASS_SNAN)
return ieee754sp_nanxcpt(y);
+ if (zc == IEEE754_CLASS_QNAN)
+ return z;
+ if (xc == IEEE754_CLASS_QNAN)
+ return x;
+ if (yc == IEEE754_CLASS_QNAN)
+ return y;
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
- return ieee754sp_nanxcpt(x);
+ if (zc == IEEE754_CLASS_DNORM)
+ SPDNORMZ;
+ /* ZERO z cases are handled separately below */
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
- return y;
+ switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
- return x;
/*
* Infinity handling
*/
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754sp_indef();
@@ -101,9 +76,27 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- return ieee754sp_inf(xs ^ ys);
+ if ((zc == IEEE754_CLASS_INF) &&
+ ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ /*
+ * Cases of addition of infinities with opposite signs
+ * or subtraction of infinities with same signs.
+ */
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754sp_indef();
+ }
+ /*
+ * z is here either not an infinity, or an infinity having the
+ * same sign as product (x*y) (in case of MADDF.D instruction)
+ * or product -(x*y) (in MSUBF.D case). The result must be an
+ * infinity, and its sign is determined only by the value of
+ * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ */
+ if (flags & MADDF_NEGATE_PRODUCT)
+ return ieee754sp_inf(1 ^ (xs ^ ys));
+ else
+ return ieee754sp_inf(xs ^ ys);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -112,32 +105,42 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
- /* Multiplication is 0 so just return z */
+ if (zc == IEEE754_CLASS_ZERO) {
+ /* Handle cases +0 + (-0) and similar ones. */
+ if ((!(flags & MADDF_NEGATE_PRODUCT)
+ && (zs == (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT)
+ && (zs != (xs ^ ys))))
+ /*
+ * Cases of addition of zeros of equal signs
+ * or subtraction of zeroes of opposite signs.
+ * The sign of the resulting zero is in any
+ * such case determined only by the sign of z.
+ */
+ return z;
+
+ return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
+ }
+ /* x*y is here 0, and z is not 0, so just return z */
return z;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
SPDNORMY;
break;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
SPDNORMX;
break;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
/* fall through to real computations */
}
@@ -158,111 +161,93 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
re = xe + ye;
rs = xs ^ ys;
- if (flags & maddf_negate_product)
+ if (flags & MADDF_NEGATE_PRODUCT)
rs ^= 1;
- /* shunt to top of word */
- xm <<= 32 - (SP_FBITS + 1);
- ym <<= 32 - (SP_FBITS + 1);
-
- /*
- * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
- */
- lxm = xm & 0xffff;
- hxm = xm >> 16;
- lym = ym & 0xffff;
- hym = ym >> 16;
-
- lrm = lxm * lym; /* 16 * 16 => 32 */
- hrm = hxm * hym; /* 16 * 16 => 32 */
-
- t = lxm * hym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
+ /* Multiple 24 bit xm and ym to give 48 bit results */
+ rm64 = (uint64_t)xm * ym;
- t = hxm * lym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
+ /* Shunt to top of word */
+ rm64 = rm64 << 16;
- rm = hrm | (lrm != 0);
-
- /*
- * Sticky shift down to normal rounding precision.
- */
- if ((int) rm < 0) {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
- ((rm << (SP_FBITS + 1 + 3)) != 0);
+ /* Put explicit bit at bit 62 if necessary */
+ if ((int64_t) rm64 < 0) {
+ rm64 = rm64 >> 1;
re++;
- } else {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
- ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
}
- assert(rm & (SP_HIDDEN_BIT << 3));
-
- if (zc == IEEE754_CLASS_ZERO)
- return ieee754sp_format(rs, re, rm);
- /* And now the addition */
+ assert(rm64 & (1 << 62));
- assert(zm & SP_HIDDEN_BIT);
+ if (zc == IEEE754_CLASS_ZERO) {
+ /*
+ * Move explicit bit from bit 62 to bit 26 since the
+ * ieee754sp_format code expects the mantissa to be
+ * 27 bits wide (24 + 3 rounding bits).
+ */
+ rm = XSPSRS64(rm64, (62 - 26));
+ return ieee754sp_format(rs, re, rm);
+ }
- /*
- * Provide guard,round and stick bit space.
- */
- zm <<= 3;
+ /* Move explicit bit from bit 23 to bit 62 */
+ zm64 = (uint64_t)zm << (62 - 23);
+ assert(zm64 & (1 << 62));
+ /* Make the exponents the same */
if (ze > re) {
/*
* Have to shift r fraction right to align.
*/
s = ze - re;
- rm = XSPSRS(rm, s);
+ rm64 = XSPSRS64(rm64, s);
re += s;
} else if (re > ze) {
/*
* Have to shift z fraction right to align.
*/
s = re - ze;
- zm = XSPSRS(zm, s);
+ zm64 = XSPSRS64(zm64, s);
ze += s;
}
assert(ze == re);
assert(ze <= SP_EMAX);
+ /* Do the addition */
if (zs == rs) {
/*
- * Generate 28 bit result of adding two 27 bit numbers
- * leaving result in zm, zs and ze.
+ * Generate 64 bit result by adding two 63 bit numbers
+ * leaving result in zm64, zs and ze.
*/
- zm = zm + rm;
-
- if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
- zm = XSPSRS1(zm);
+ zm64 = zm64 + rm64;
+ if ((int64_t)zm64 < 0) { /* carry out */
+ zm64 = XSPSRS1(zm64);
ze++;
}
} else {
- if (zm >= rm) {
- zm = zm - rm;
+ if (zm64 >= rm64) {
+ zm64 = zm64 - rm64;
} else {
- zm = rm - zm;
+ zm64 = rm64 - zm64;
zs = rs;
}
- if (zm == 0)
+ if (zm64 == 0)
return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
/*
- * Normalize in extended single precision
+ * Put explicit bit at bit 62 if necessary.
*/
- while ((zm >> (SP_MBITS + 3)) == 0) {
- zm <<= 1;
+ while ((zm64 >> 62) == 0) {
+ zm64 <<= 1;
ze--;
}
-
}
+
+ /*
+ * Move explicit bit from bit 62 to bit 26 since the
+ * ieee754sp_format code expects the mantissa to be
+ * 27 bits wide (24 + 3 rounding bits).
+ */
+ zm = XSPSRS64(zm64, (62 - 26));
+
return ieee754sp_format(zs, ze, zm);
}
@@ -275,5 +260,5 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
union ieee754sp y)
{
- return _sp_maddf(z, x, y, maddf_negate_product);
+ return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}