Re: [PATCH 1/1] Implement math.trunc and math.fast2sum
[Thread Prev] | [Thread Next]
- Subject: Re: [PATCH 1/1] Implement math.trunc and math.fast2sum
- From: Ori Bernstein <ori@xxxxxxxxxxxxxx>
- Reply-to: myrddin-dev@xxxxxxxxxxxxxx
- Date: Mon, 5 Mar 2018 14:41:58 -0800
- To: myrddin-dev@xxxxxxxxxxxxxx
- Cc: "S. Gilles" <sgilles@xxxxxxxxxxxx>
Awesome start! Now, to find my grumpy hat and complain a bit -- I'm sure it's around here somewhere. Also, it might be good to include patches to docs as we go. (I'd be happy to move the docs into the mc/ repo if that makes things easier to maintain). On Mon, 5 Mar 2018 03:08:56 -0500 "S. Gilles" <sgilles@xxxxxxxxxxxx> wrote: > In the event that Kahan's summation, or Priest's doubly-compensated > summation, are desired, Fast2Sum will be useful. Trunc/floor/ceil > are probably the simplest FP functions that require bit-poking. > --- > lib/bld.sub | 1 + > lib/math/bld.sub | 5 ++ > lib/math/fpmath.myr | 199 +++++++++++++++++++++++++++++++++++++++++++++++ > lib/math/test/fpmath.myr | 163 ++++++++++++++++++++++++++++++++++++++ > 4 files changed, 368 insertions(+) > create mode 100644 lib/math/bld.sub > create mode 100644 lib/math/fpmath.myr > create mode 100644 lib/math/test/fpmath.myr > > diff --git a/lib/bld.sub b/lib/bld.sub > index 97effae9..8bcceb83 100644 > --- a/lib/bld.sub > +++ b/lib/bld.sub > @@ -8,6 +8,7 @@ sub = > inifile > iter > json > + math > regex > std > sys > diff --git a/lib/math/bld.sub b/lib/math/bld.sub > new file mode 100644 > index 00000000..da2e69ea > --- /dev/null > +++ b/lib/math/bld.sub > @@ -0,0 +1,5 @@ > +lib math = > + fpmath.myr We should split this out so that we don't end up with a giant file. With the amount of code in this commit it's managable, but it's going to be more important as libmath interesting. We don't support splitting up impls across files, so you'll have to just forward the calls: // fpmath.myr: impl fpmath flt64 = ... trunc = {f; -> trunc64(f)} ... ;; and, then split out the heavy lifting: // trunc.myr: const trunc32 = { ... } const trunc64 = { ... } Language note: It's not hard to allow splitting impls across files, but we'd lose the ability to check for completeness. It's not clear that this is a good idea. I should also look at what it would take to make aliases work. There's no reason that 'trunc = trunc64' should be invalid. > + lib ../std:std > +;; > diff --git a/lib/math/fpmath.myr b/lib/math/fpmath.myr > new file mode 100644 > index 00000000..8c86be98 > --- /dev/null > +++ b/lib/math/fpmath.myr > @@ -0,0 +1,199 @@ > +use std > + > +pkg math = > + trait fpmath @f = @f :: floating @f will mean that this trait requires a float for the impl to work. > + > + /* round floating point numbers to near, integer values */ > + trunc : (f : @f -> @f) > + ceil : (f : @f -> @f) > + floor : (f : @f -> @f) > + > + /* compute (s, t) with s = round-nearest(a+b), s + t = a + b */ > + fast2sum : (a : @f, b : @f -> (@f, @f)) > + ;; > + > + impl fpmath flt32 > + impl fpmath flt64 > +;; > + > +impl fpmath flt32 = > + trunc = {f : flt32 > + var u : uint32 = std.flt32bits(f) > + var e : uint32 = (((u >> 23) & 0xff) : uint32) - 127 No need to parenthesize the thing you're casting: ((x >> 123) : foo) => (x >> 123 : foo) > + var e_lt_zero : uint32 = ((e >> 31) : uint32) & 0x1 > + var e_ge_zero : uint32 = 1 - e_lt_zero > + var e_ge_23 : uint32 = 1 - ((e - 23) >> 31) > + /* Here, it looks like you're manually reimplementing std.flt32explode() here, which splits up like this: (isneg, exp, m) = std.flt32explode(f) I don't particularly feel strongly about this, but if there's something fixable that made the explode() variants inconvenient, we should break compatibility and fix it. > + The significand 1 . m1 m2 ... m23 needs to be > + truncated, which corresponds to zeroing all mi > + bits where i is beyond the exponent e (they are > + the actual sub-integer portion). > + */ > + var m : uint32 = ~(((1 << 23) - 1) >> e) > + m |= (-1 : uint32) * e_ge_23 the casts shouldn't be necessary here; constants are generically typed, so if it's used with uint32s, -1 will be a uint32. To make it clear that it's unsigned, you can write '~0' > + var v_ge_zero : uint32 = (u & m) * e_ge_zero > + > + /* > + On the other hand, if the exponent is < 0, "23 > + - e" is garbage, and we should just return +/- > + zero > + */ > + var v_lt_zero : uint32 = (u & (1 << 31)) * e_lt_zero > + > + /* Try to save a branch */ Nothing wrong with saving a branch, but it's not too important -- On modern CPU pipelines, branches are pretty cheap :) > + var v : uint32 = v_ge_zero + v_lt_zero > + -> std.flt32frombits(v) > + } > + > + floor = {f : flt32 > + var u : uint32 = std.flt32bits(f) > + var e : int32 = (((u >> 23) & 0xff) : int32) - 127 > + var shift_e : uint32 = (e : uint32) > + > + /* Many special cases */ > + if e >= 23 || u == 0x80000000 > + -> f > + elif (e < 0) && (u & (1 << 31) != 0) I'm noticing a lot of reuses of '(1 << 31) and '(1 << 23) - 1'. It's probably be worth defining 'const F32Sign = 1<<31' and friends for common constants. > + -> -1.0 > + elif e < 0 > + -> 0.0 > + ;; > + > + if u & (1 << 31) != 0 > + var fractional_mask : uint32 = (((1 << 23) - 1) >> shift_e) > + var v : uint32 = u & ~fractional_mask > + if (u & fractional_mask) != 0 > + v += ((1 << 23) >> shift_e) > + ;; > + -> std.flt32frombits(v) > + ;; > + > + var m : uint32 = ~(((1 << 23) - 1) >> shift_e) > + var v : uint32 = u & m > + -> std.flt32frombits(v) > + } > + > + ceil = {f; > + var u : uint32 = std.flt32bits(f) > + var e : int32 = (((u >> 23) & 0xff) : int32) - 127 > + var shift_e : uint32 = (e : uint32) > + if e >= 23 || u == 0x0 > + -> f > + elif (e < 0) && (u & (1 << 31) == 0) > + -> 1.0 > + elif e < 0 > + -> -0.0 > + ;; > + > + if u & (1 << 31) == 0 > + var fractional_mask : uint32 = (((1 << 23) - 1) >> shift_e) > + var v : uint32 = u & ~fractional_mask > + if (u & fractional_mask) != 0 > + v += ((1 << 23) >> shift_e) > + ;; > + -> std.flt32frombits(v) > + ;; > + > + var m : uint32 = ~(((1 << 23) - 1) >> shift_e) > + var v : uint32 = u & m > + -> std.flt32frombits(v) > + } > + > + fast2sum = {a, b; > + var u = std.flt32bits(a) > + var v = std.flt32bits(b) > + if (u & (0xff << 23)) < (v & (0xff << 23)) > + var t = a > + a = b > + b = t > + ;; > + var s = a + b > + var z = s - a > + var t = b - z > + -> (s, t) > + } > +;; > + > +impl fpmath flt64 = Noticing a lot of overlap in the code here. I'm wondering if we can do some dedup here. I feel like we'll probably want to do a second pass to try cleaning that up, once we have a bit better understanding. We may be able to parameterize, and work in 64 bit precision while passing in appropriate rounding. Not a big deal, and I'm happy with it as is for now, but it's something to keep in mind. > + trunc = {f : flt64 > + var u : uint64 = std.flt64bits(f) > + var e : uint64 = (((u >> 52) & 0x7ff) : uint64) - 1023 > + var e_lt_zero : uint64 = ((e >> 63) : uint64) & 0x1 > + var e_ge_zero : uint64 = 1 - e_lt_zero > + var e_ge_52 : uint64 = 1 - ((e - 52) >> 63) > + var m : uint64 = ~(((1 << 52) - 1) >> e) > + m |= (-1 : uint64) * e_ge_52 > + var v_ge_zero : uint64 = (u & m) * e_ge_zero > + var v_lt_zero : uint64 = (u & (1 << 63)) * e_lt_zero > + var v : uint64 = v_ge_zero + v_lt_zero > + -> std.flt64frombits(v) > + } > + > + floor = {f : flt64 > + var u : uint64 = std.flt64bits(f) > + var e : int64 = (((u >> 52) & 0x7ff) : int64) - 1023 > + var shift_e : uint64 = (e : uint64) > + > + if e >= 52 || u == 0x8000000000000000ul > + -> f > + elif (e < 0) && (u & (1 << 63) != 0) > + -> -1.0 > + elif e < 0 > + -> 0.0 > + ;; > + > + if u & (1 << 63) != 0 > + var fractional_mask : uint64 = (((1 << 52) - 1) >> shift_e) > + var v : uint64 = u & ~fractional_mask > + if (u & fractional_mask) != 0 > + v += ((1 << 52) >> shift_e) > + ;; > + -> std.flt64frombits(v) > + ;; > + > + var m : uint64 = ~(((1 << 52) - 1) >> shift_e) > + var v : uint64 = u & m > + -> std.flt64frombits(v) > + } > + > + ceil = {f; > + var u : uint64 = std.flt64bits(f) > + var e : int64 = (((u >> 52) & 0x7ff) : int64) - 1023 > + var shift_e : uint64 = (e : uint64) > + > + if e >= 52 || u == 0x0ul > + -> f > + elif (e < 0) && (u & (1 << 63) == 0) > + -> 1.0 > + elif e < 0 > + -> -0.0 > + ;; > + > + if u & (1 << 63) == 0 > + var fractional_mask : uint64 = (((1 << 52) - 1) >> shift_e) > + var v : uint64 = u & ~fractional_mask > + if (u & fractional_mask) != 0 > + v += ((1 << 52) >> shift_e) > + ;; > + -> std.flt64frombits(v) > + ;; > + > + var m : uint64 = ~(((1 << 52) - 1) >> shift_e) > + var v : uint64 = u & m > + -> std.flt64frombits(v) > + } > + > + fast2sum = {a, b; > + var u = std.flt64bits(a) > + var v = std.flt64bits(b) > + if (u & (0x7ff << 52)) < (v & (0x7ff << 52)) > + var t = a > + a = b > + b = t > + ;; > + var s = a + b > + var z = s - a > + var t = b - z > + -> (s, t) > + } > +;; > diff --git a/lib/math/test/fpmath.myr b/lib/math/test/fpmath.myr > new file mode 100644 > index 00000000..64de5160 > --- /dev/null > +++ b/lib/math/test/fpmath.myr > @@ -0,0 +1,163 @@ > +use std > +use math > +use testr > + > +const main = { > + testr.run([ > + [.name = "trunc-01", .fn = trunc01], > + [.name = "trunc-02", .fn = trunc02], > + [.name = "floor-01", .fn = floor01], > + [.name = "floor-02", .fn = floor02], > + [.name = "ceil-01", .fn = ceil01], > + [.name = "ceil-02", .fn = ceil02], > + [.name = "fast2sum-01", .fn = fast2sum01], > + ][:]) > +} > + > +impl std.equatable flt32 = > + eq = {a : flt32, b : flt32; -> std.flt32bits(a) == std.flt32bits(b)} > +;; > + > +impl std.equatable flt64 = > + eq = {a : flt64, b : flt64; -> std.flt64bits(a) == std.flt64bits(b)} > +;; > + Probably worth making this into public API, since I doubt that this is the only place we'll want to use floats. > +const trunc01 = {c > + var flt32s : (flt32, flt32)[:] = [ > + (0.0, 0.0), > + (-0.0, -0.0), > + (1.0, 1.0), > + (1.1, 1.0), > + (0.9, 0.0), > + (10664524000000000000.0, 10664524000000000000.0), > + (-3.5, -3.0), > + (101.999, 101.0), > + (std.flt32nan(), std.flt32nan()), > + ][:] > + > + for (f, g) : flt32s > + testr.eq(c, math.trunc(f), g) > + ;; > +} > + > +const trunc02 = {c > + var flt64s : (flt64, flt64)[:] = [ > + (0.0, 0.0), > + (-0.0, -0.0), > + (1.0, 1.0), > + (1.1, 1.0), > + (0.9, 0.0), > + (13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0, 13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0), > + (-3.5, -3.0), > + (101.999, 101.0), > + (std.flt64nan(), std.flt64nan()), > + ][:] > + > + for (f, g) : flt64s > + testr.eq(c, math.trunc(f), g) > + ;; > +} > + > +const floor01 = {c > + var flt32s : (flt32, flt32)[:] = [ > + (0.0, 0.0), > + (-0.0, -0.0), > + (0.5, 0.0), > + (1.1, 1.0), > + (10664524000000000000.0, 10664524000000000000.0), > + (-3.5, -4.0), > + (-101.999, -102.0), > + (std.flt32nan(), std.flt32nan()), > + ][:] > + > + for (f, g) : flt32s > + testr.eq(c, math.floor(f), g) > + ;; > +} > + > +const floor02 = {c > + var flt64s : (flt64, flt64)[:] = [ > + (0.0, 0.0), > + (-0.0, -0.0), > + (0.5, 0.0), > + (1.1, 1.0), > + (13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0, 13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0), > + (-3.5, -4.0), > + (-101.999, -102.0), > + (std.flt64nan(), std.flt64nan()), > + ][:] > + > + for (f, g) : flt64s > + testr.eq(c, math.floor(f), g) > + ;; > +} > + > +const ceil01 = {c > + var flt32s : (flt32, flt32)[:] = [ > + (0.0, 0.0), > + (-0.0, -0.0), > + (0.5, 1.0), > + (-0.1, -0.0), > + (1.1, 2.0), > + (10664524000000000000.0, 10664524000000000000.0), > + (-3.5, -3.0), > + (-101.999, -101.0), > + (std.flt32nan(), std.flt32nan()), > + ][:] > + > + for (f, g) : flt32s > + testr.eq(c, math.ceil(f), g) > + ;; > +} > + > +const ceil02 = {c > + var flt64s : (flt64, flt64)[:] = [ > + (0.0, 0.0), > + (-0.0, -0.0), > + (0.5, 1.0), > + (-0.1, -0.0), > + (1.1, 2.0), > + (13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0, 13809453812721350000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0), > + (-3.5, -3.0), > + (-101.999, -101.0), > + (std.flt64nan(), std.flt64nan()), > + ][:] > + > + for (f, g) : flt64s > + testr.eq(c, math.ceil(f), g) > + ;; > +} > + > +const fast2sum01 = {c > + var flt32s : (flt32, flt32, flt32, flt32)[:] = [ > + (1.0, 1.0, 2.0, 0.0), > + (10664524000000000000.0, 1.11842, 10664524000000000000.0, 1.11842), > + (1.11843, 10664524000000000000.0, 10664524000000000000.0, 1.11843), > + (-21897.1324, 17323.22, -4573.912, 0.0), > + ][:] > + > + for (a, b, s1, t1) : flt32s > + var s2, t2 > + (s2, t2) = math.fast2sum(a, b) > + testr.eq(c, s1, s2) > + testr.eq(c, t1, t2) > + ;; > + > + var flt64s : (flt64, flt64, flt64, flt64)[:] = [ > + (1.0, 1.0, 2.0, 0.0), > + (-21897.1324, 17323.22, -4573.912399999997, 0.0), > + (std.flt64frombits(0x78591b0672a81284), std.flt64frombits(0x6a8c3190e27a1884), > + std.flt64frombits(0x78591b0672a81284), std.flt64frombits(0x6a8c3190e27a1884)), > + (std.flt64frombits(0x6a8c3190e27a1884), std.flt64frombits(0x78591b0672a81284), > + std.flt64frombits(0x78591b0672a81284), std.flt64frombits(0x6a8c3190e27a1884)), > + (std.flt64frombits(0x78591b0672a81284), std.flt64frombits(0x7858273672ca19a0), > + std.flt64frombits(0x7868a11e72b91612), 0.0), > + ][:] > + > + for (a, b, s1, t1) : flt64s > + var s2, t2 > + (s2, t2) = math.fast2sum(a, b) > + testr.eq(c, s1, s2) > + testr.eq(c, std.flt64bits(t1), std.flt64bits(t2)) > + ;; > +} > -- > 2.16.2 > > -- Ori Bernstein <ori@xxxxxxxxxxxxxx>
[PATCH 0/1] Initial work on lib/math | "S. Gilles" <sgilles@xxxxxxxxxxxx> |
[PATCH 1/1] Implement math.trunc and math.fast2sum | "S. Gilles" <sgilles@xxxxxxxxxxxx> |
- Prev by Date: [PATCH 1/1] Implement math.trunc and math.fast2sum
- Next by Date: [PATCH] Implement logical XOR: ^^
- Previous by thread: [PATCH 1/1] Implement math.trunc and math.fast2sum
- Next by thread: [PATCH] Implement logical XOR: ^^
- Index(es):