Merge pull request #1 from mcabbott/iterate

cscherrer · web-flow · commit eb1b524213d1 · 2022-05-07T12:20:19.000-07:00
Fiddling with `sumlog`
diff --git a/src/sumlog.jl b/src/sumlog.jl
@@ -1,42 +1,94 @@
 """
-$(SIGNATURES)
+    sumlog(X::AbstractArray{T}; dims)
 
-Compute `sum(log.(X))` with a single `log` evaluation.
+Compute `sum(log.(X))` with a single `log` evaluation,
+provided `float(T) <: AbstractFloat`.
 
-This is faster than computing `sum(log.(X))` or even `sum(log, X)`, in
-particular as the size of `X` increases.
-
-This works by representing the `j`th element of `X` as ``x_j = a_j  2^{b_j}``,
+This is faster than computing `sum(log, X)`, especially for large `X`.
+It works by representing the `j`th element of `X` as ``x_j = a_j  2^{b_j}``,
 allowing us to write
 ```math
 \\sum_j \\log{x_j} = \\log(\\prod_j a_j) + \\log{2} \\sum_j b_j
 ```
-Since ``\\log{2}`` is constant, `sumlog` only requires a single `log`
-evaluation.
 """
-function sumlog(x)
-    T = float(eltype(x))
-    _sumlog(T, values(x))
+sumlog(x::AbstractArray{T}; dims=:) where T = _sumlog(float(T), dims, x)
+
+function _sumlog(::Type{T}, ::Colon, x) where {T<:AbstractFloat}
+    sig, ex = mapreduce(_sumlog_op, x; init=(one(T), 0)) do xj
+        xj < 0 && Base.Math.throw_complex_domainerror(:log, xj)
+        float_xj = float(xj)
+        significand(float_xj), _exponent(float_xj) 
+    end
+    return log(sig) + IrrationalConstants.logtwo * T(ex)
 end
 
-@inline function _sumlog(::Type{T}, x) where {T<:AbstractFloat}
-    sig, ex = mapreduce(_sumlog_op, x; init=(one(T), zero(exponent(one(T))))) do xj
+function _sumlog(::Type{T}, dims, x) where {T<:AbstractFloat}
+    sig_ex = mapreduce(_sumlog_op, x; dims=dims, init=(one(T), 0)) do xj
+        xj < 0 && Base.Math.throw_complex_domainerror(:log, xj)
         float_xj = float(xj)
-        significand(float_xj), exponent(float_xj) 
+        significand(float_xj), _exponent(float_xj) 
+    end
+    map(sig_ex) do (sig, ex)
+        log(sig) + IrrationalConstants.logtwo * T(ex)
     end
-    return log(sig) + IrrationalConstants.logtwo * ex
 end
 
+# Fallback: `float(T)` is not always `<: AbstractFloat`, e.g. complex, dual numbers or symbolics
+_sumlog(::Type, dims, x) = sum(log, x; dims)
+
 @inline function _sumlog_op((sig1, ex1), (sig2, ex2))
     sig = sig1 * sig2
+    # sig = ifelse(sig2<0, sig2, sig1 * sig2)
     ex = ex1 + ex2
     # Significands are in the range [1,2), so multiplication will eventually overflow
     if sig > floatmax(typeof(sig)) / 2
-        ex += exponent(sig)
+        ex += _exponent(sig)
         sig = significand(sig)
     end
     return sig, ex
 end
 
-# `float(T)` is not always `isa AbstractFloat`, e.g. dual numbers or symbolics
-@inline _sumlog(::Type{T}, x) where {T} = sum(log, x)
+# The exported `exponent(x)` checks for `NaN` etc, this function doesn't, which is fine as `sig` keeps track.
+_exponent(x::Base.IEEEFloat) = Base.Math._exponent_finite_nonzero(x)
+Base.@assume_effects :nothrow _exponent(x::AbstractFloat) = Int(exponent(x))  # e.g. for BigFloat
+
+"""
+    sumlog(x)
+    sumlog(f, x, ys...)
+
+For any iterator which produces `AbstractFloat` elements,
+this can use `sumlog`'s fast reduction strategy.
+
+Signature with `f` is equivalent to `sum(log, map(f, x, ys...))`
+or `mapreduce(log∘f, +, x, ys...)`, without intermediate allocations.
+
+Does not accept a `dims` keyword.
+"""
+sumlog(f, x) = sumlog(Iterators.map(f, x))
+sumlog(f, x, ys...) = sumlog(f(xy...) for xy in zip(x, ys...))
+
+# Iterator version, uses the same `_sumlog_op`, should be the same speed.
+function sumlog(x)
+    iter = iterate(x)
+    if isnothing(iter)
+        T = Base._return_type(first, Tuple{typeof(x)})
+        return T <: Number ? zero(float(T)) : 0.0
+    end
+    x1 = float(iter[1])
+    x1 isa AbstractFloat || return sum(log, x)
+    x1 < 0 && Base.Math.throw_complex_domainerror(:log, x1)
+    sig, ex = significand(x1), _exponent(x1)
+    nonfloat = zero(x1)
+    iter = iterate(x, iter[2])
+    while iter !== nothing
+        xj = float(iter[1])
+        if xj isa AbstractFloat
+            xj < 0 && Base.Math.throw_complex_domainerror(:log, xj)
+            sig, ex = _sumlog_op((sig, ex), (significand(xj), _exponent(xj)))
+        else
+            nonfloat += log(xj)
+        end
+        iter = iterate(x, iter[2])
+    end
+    return log(sig) + IrrationalConstants.logtwo * oftype(sig, ex) + nonfloat
+end
diff --git a/test/sumlog.jl b/test/sumlog.jl
@@ -1,25 +1,63 @@
 @testset "sumlog" begin
-    for T in [Int, Float16, Float32, Float64, BigFloat]
-        for x in [10 .* rand(1000), repeat([nextfloat(1.0)], 1000), repeat([prevfloat(2.0)], 1000)]
+    @testset for T in [Float16, Float32, Float64, BigFloat]
+        for x in (
+                T[1,2,3], 
+                10 .* rand(T, 1000),
+                fill(nextfloat(T(1.0)), 1000),
+                fill(prevfloat(T(2.0)), 1000),
+            )
+            @test sumlog(x) isa T
+            
             @test (@inferred sumlog(x)) ≈ sum(log, x)
 
-            y = view(x, 1:100)
-            @test (@inferred sumlog(y)) ≈ sum(log, y)
+            y = @view x[1:min(end, 100)]
+            @test (@inferred sumlog(y')) ≈ sum(log, y)
 
             tup = tuple(y...)
             @test (@inferred sumlog(tup)) ≈ sum(log, tup)
+            #
+            # gen = (sqrt(a) for a in y)
+            # # `eltype` of a `Base.Generator` returns `Any`
+            # @test_broken (@inferred sumlog(gen)) ≈ sum(log, gen)
 
-            gen = (sqrt(a) for a in y)
-            # `eltype` of a `Base.Generator` returns `Any`
-            @test_broken (@inferred sumlog(gen)) ≈ sum(log, gen)
+            # nt = NamedTuple{tuple(Symbol.(1:100)...)}(tup)
+            # @test (@inferred sumlog(y)) ≈ sum(log, y)
 
-            nt = NamedTuple{tuple(Symbol.(1:100)...)}(tup)
-            @test (@inferred sumlog(y)) ≈ sum(log, y)
-
-            i = Random.shuffle(x)
-            z = x .+ i * im
+            z = x .+ im .* Random.shuffle(x)
             @test (@inferred sumlog(z)) ≈ sum(log, z)
         end
 
+        # With dims
+        m = 1 .+ rand(T, 10, 10)
+        sumlog(m; dims=1) ≈ sum(log, m; dims=1)
+        sumlog(m; dims=2) ≈ sum(log, m; dims=2)
+        
+        # Iterator
+        @test sumlog(x^2 for x in m) ≈ sumlog(abs2, m) ≈ sumlog(*, m, m) ≈ sum(log.(m.^2))
+        @test sumlog(x for x in Any[1, 2, 3+im, 4]) ≈ sum(log, Any[1, 2, 3+im, 4])
+        
+        # NaN, Inf
+        if T != BigFloat  # exponent fails here
+            @test isnan(sumlog(T[1, 2, NaN]))
+            @test isinf(sumlog(T[1, 2, Inf]))
+            @test sumlog(T[1, 2, 0.0]) == -Inf
+            @test sumlog(T[1, 2, -0.0]) == -Inf
+        end
+        
+        # Empty
+        @test sumlog(T[]) isa T
+        @test eltype(sumlog(T[]; dims=1)) == T
+        @test sumlog(x for x in T[]) isa T
+
+        # Negative
+        @test_throws DomainError sumlog(T[1, -2, 3])  # easy
+        @test_throws DomainError sumlog(T[1, -2, -3]) # harder
+
+    end
+    @testset "Int" begin
+        @test sumlog([1,2,3]) isa Float64
+        @test sumlog([1,2,3]) ≈ sum(log, [1,2,3])
+        @test sumlog([1 2; 3 4]; dims=1) ≈ sum(log, [1 2; 3 4]; dims=1)
+        @test sumlog(Int(x) for x in Float64[1,2,3]) ≈ sum(log, [1,2,3])
     end
 end