From 1fd4cbd0cfa34f3ca424cf89c1667de8930d8726 Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Thu, 21 May 2015 20:27:51 +0200 Subject: [PATCH 01/15] starting on ROSW --- src/rosw.jl | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/rosw.jl diff --git a/src/rosw.jl b/src/rosw.jl new file mode 100644 index 000000000..70fe9dfaf --- /dev/null +++ b/src/rosw.jl @@ -0,0 +1,95 @@ +# Rosenbrock-Wanner methods +########################### + +# Rosenbrock-W methods are typically specified for autonomous DAE: +# Mẋ = f(x) +# +# by the stage equations +# M kᵢ = hf(x₀ + + Σⱼ aᵢⱼkⱼ) + h J Σⱼ γᵢⱼkⱼ +# +# and step completion formula +# x₁ = x₀ + \Sigmaⱼ bⱼ kⱼ +# +# The method used here uses transformed equations as done in PETSc. + +immutable TableauRosW{Name, S, T} <: Tableau{Name, S, T} + order::(@compat(Tuple{Vararg{Int}})) # the order of the methods + a::Matrix{T} + γ::Matrix{T} + # one or several row vectors. First row is used for the step, + # second for error calc. + b::Matrix{T} + function TableauRosW(order,a,γ,b) + @assert isa(S,Integer) + @assert isa(Name,Symbol) + @assert S==size(γ,1)==size(a,2)==size(γ,1)==size(a,2)==size(b,2) + @assert size(b,1)==length(order) + new(order,a,b,c) + end +end +function TableauRosW{T}(name::Symbol, order::(@compat(Tuple{Vararg{Int}})), + a::Matrix{T}, γ::Matrix{T}, b::Matrix{T}) + TableauRosW{name,size(b,2),T}(order, a, γ, c) +end +function TableauRosW(name::Symbol, order::(@compat(Tuple{Vararg{Int}})), T::Type, + a::Matrix, γ::Matrix, b::Matrix) + TableauRosW{name,size(b,2),T}(order, convert(Matrix{T},a), + convert(Matrix{T},γ), convert(Matrix{T},c) ) +end +conv_field{T,N}(D,a::Array{T,N}) = convert(Array{D,N}, a) +function Base.convert{Tnew<:Real,Name,S,T}(::Type{Tnew}, tab::TableauRKExplicit{Name,S,T}) + # Converts the tableau coefficients to the new type Tnew + newflds = () + @compat for n in fieldnames(tab) + fld = getfield(tab,n) + if eltype(fld)==T + newflds = tuple(newflds..., conv_field(Tnew, fld)) + else + newflds = tuple(newflds..., fld) + end + end + TableauRKExplicit{Name,S,Tnew}(newflds...) # TODO: could this be done more generically in a type-stable way? +end + + + +# Transformed Tableau, used only internally +immutable TableauRosW_T{Name, S, T} <: Tableau{Name, S, T} + order::(@compat(Tuple{Vararg{Int}})) # the order of the methods + a::Matrix{T} # this is TableauRosW.a transformed + γii::Vector{T} + γinv::Matrix{T} + b::Matrix{T} # this is TableauRosW.b transformed +end +function tabletransform{Name,S,T}(rt::TableauRosW{Name,S,T}) + γii = diag(rt.γ) γ + γinv = inv(rt.γ) + ahat = rt.A * γinv + bhat = similar(rt.b) + bhat[:,1] = squeeze(rt.b[:,1]'*γinv,1) + bhat[:,2] = squeeze(rt.b[:,2]'*γinv,1) + return RosWTableTrans{Name,S,T}(rt.order, ahat, γii, γinv, bhat) +end + + +## tableau for ros34pw2 +const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), + [0 0 0 0 + 8.7173304301691801e-01 0 0 0 + 8.4457060015369423e-01 -1.1299064236484185e-01 0 0 + 0 0 1. 0], + [4.3586652150845900e-01 0 0 0 + -8.7173304301691801e-01 4.3586652150845900e-01 0 0 + -9.0338057013044082e-01 5.4180672388095326e-02 4.3586652150845900e-01 0 + 2.4212380706095346e-01 -1.2232505839045147e+00 5.4526025533510214e-01 4.3586652150845900e-01], + [2.4212380706095346e-01 -1.2232505839045147e+00 1.5452602553351020e+00 4.3586652150845900e-01 + 3.7810903145819369e-01 -9.6042292212423178e-02 5.0000000000000000e-01 2.1793326075422950e-01]' + ) +function oderosw_fixed() + +end + +function oderosw_adapt() + +end + From b4b3b7bc58493b3a5a5e69541a2e663267163784 Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Fri, 22 May 2015 16:46:43 +0200 Subject: [PATCH 02/15] fixed step ROSW works --- examples/van_der_pol_rosw.jl | 75 +++++++++++++++++++++ src/ODE.jl | 3 + src/rosw.jl | 123 +++++++++++++++++++++++++++++++---- test/runtests.jl | 1 + 4 files changed, 189 insertions(+), 13 deletions(-) create mode 100644 examples/van_der_pol_rosw.jl diff --git a/examples/van_der_pol_rosw.jl b/examples/van_der_pol_rosw.jl new file mode 100644 index 000000000..746ec40cf --- /dev/null +++ b/examples/van_der_pol_rosw.jl @@ -0,0 +1,75 @@ +# Van der Pol using DASSL.jl +using ODE +abstol = 1e-6 +reltol = 1e-6 + +const mu = 100. + +# Explicit equation +function vdp(t, y) + # vdp(t, y) + # + # ydot of van der Pol equation. (consider using rescaled eq. so + # period stays the same for different mu. Cf. Wanner & Hairer 1991, + # p.5) + return [y[2], mu^2*((1-y[1]^2) * y[2] - y[1])] +end +function Jvdp(t, y) + # Jvdp(t, y, mu) + # + # Jacobian of vdp + return [0 1; + -mu^2*(y[2]*2*y[1] + 1) mu^2*(1-y[1]^2)] +end + +# implicit equation +function vdp_impl(y, ydot) + # vdp_impl(t, y) + # + # Implicit van der Pol equation. (consider using rescaled eq. so + # period stays the same for different mu. Cf. Wanner & Hairer 1991, + # p.5) + return vdp(0,y) - ydot +end +function Jvdp_impl(y, ydot, α) + # d vdp_impl /dy + α d vdp/dy + # + # Jacobian + return Jvdp(0,y) - α * eye(2) +end + +### +# reference as t=2 +## +refsol = [0.1706167732170483e1, -0.8928097010247975e0] + +### +# Fixed step +### + +nt = 50_000; +tspan = linspace(0.,2.0,nt) +y0 = [2.,0.]; + +t,yout1 = ode4s(vdp, y0, tspan; jacobian=Jvdp) +@time t,yout1 = ode4s(vdp, y0, tspan; jacobian=Jvdp) + +t,yout2 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) +@time t,yout2 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) + +println("Fixed step: abs error of ode4s vs ref:") +println(yout1[end]-refsol) +println("Fixed step: abs error of ode_rosw vs ref:") +println(yout2[end]-refsol) + +# #### adaptive rosw +# @time yout3, ts, steps, dts, xerrs = rosw_runner_adaptive( +# vdp_impl, Jvdp_impl, [0., 2.], y0; +# reltol=reltol, abstol=abstol, dt0=1e-5, mindt=1e-8) + +# check + +# using Winston +# plot(ts, yout3[1,:]) + +# oplot(tspan, yout2[1,:],"r") diff --git a/src/ODE.jl b/src/ODE.jl index a364988d8..2c8287ad9 100644 --- a/src/ODE.jl +++ b/src/ODE.jl @@ -423,4 +423,7 @@ const ms_coefficients4 = [ 1 0 0 0 -9/24 37/24 -59/24 55/24] +## Rosenbrock-Wanner methods +include("rosw.jl") + end # module ODE diff --git a/src/rosw.jl b/src/rosw.jl index 70fe9dfaf..33e2d1cac 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -1,6 +1,8 @@ # Rosenbrock-Wanner methods ########################### +export ode_rosw + # Rosenbrock-W methods are typically specified for autonomous DAE: # Mẋ = f(x) # @@ -12,6 +14,10 @@ # # The method used here uses transformed equations as done in PETSc. + +# Tableaus +########## + immutable TableauRosW{Name, S, T} <: Tableau{Name, S, T} order::(@compat(Tuple{Vararg{Int}})) # the order of the methods a::Matrix{T} @@ -24,20 +30,20 @@ immutable TableauRosW{Name, S, T} <: Tableau{Name, S, T} @assert isa(Name,Symbol) @assert S==size(γ,1)==size(a,2)==size(γ,1)==size(a,2)==size(b,2) @assert size(b,1)==length(order) - new(order,a,b,c) + new(order,a,γ,b) end end function TableauRosW{T}(name::Symbol, order::(@compat(Tuple{Vararg{Int}})), a::Matrix{T}, γ::Matrix{T}, b::Matrix{T}) - TableauRosW{name,size(b,2),T}(order, a, γ, c) + TableauRosW{name,size(b,2),T}(order, a, γ, b) end function TableauRosW(name::Symbol, order::(@compat(Tuple{Vararg{Int}})), T::Type, a::Matrix, γ::Matrix, b::Matrix) TableauRosW{name,size(b,2),T}(order, convert(Matrix{T},a), - convert(Matrix{T},γ), convert(Matrix{T},c) ) + convert(Matrix{T},γ), convert(Matrix{T},b) ) end conv_field{T,N}(D,a::Array{T,N}) = convert(Array{D,N}, a) -function Base.convert{Tnew<:Real,Name,S,T}(::Type{Tnew}, tab::TableauRKExplicit{Name,S,T}) +function Base.convert{Tnew<:Real,Name,S,T}(::Type{Tnew}, tab::TableauRosW{Name,S,T}) # Converts the tableau coefficients to the new type Tnew newflds = () @compat for n in fieldnames(tab) @@ -48,7 +54,7 @@ function Base.convert{Tnew<:Real,Name,S,T}(::Type{Tnew}, tab::TableauRKExplicit{ newflds = tuple(newflds..., fld) end end - TableauRKExplicit{Name,S,Tnew}(newflds...) # TODO: could this be done more generically in a type-stable way? + TableauRosW{Name,S,Tnew}(newflds...) # TODO: could this be done more generically in a type-stable way? end @@ -62,18 +68,18 @@ immutable TableauRosW_T{Name, S, T} <: Tableau{Name, S, T} b::Matrix{T} # this is TableauRosW.b transformed end function tabletransform{Name,S,T}(rt::TableauRosW{Name,S,T}) - γii = diag(rt.γ) γ + γii = diag(rt.γ) γinv = inv(rt.γ) - ahat = rt.A * γinv + ahat = rt.a * γinv bhat = similar(rt.b) - bhat[:,1] = squeeze(rt.b[:,1]'*γinv,1) - bhat[:,2] = squeeze(rt.b[:,2]'*γinv,1) - return RosWTableTrans{Name,S,T}(rt.order, ahat, γii, γinv, bhat) + bhat[1,:] = squeeze(rt.b[1,:]*γinv,1) + bhat[2,:] = squeeze(rt.b[2,:]*γinv,1) + return TableauRosW_T{Name,S,T}(rt.order, ahat, γii, γinv, bhat) end ## tableau for ros34pw2 -const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), +const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), Float64, [0 0 0 0 8.7173304301691801e-01 0 0 0 8.4457060015369423e-01 -1.1299064236484185e-01 0 0 @@ -83,13 +89,104 @@ const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), -9.0338057013044082e-01 5.4180672388095326e-02 4.3586652150845900e-01 0 2.4212380706095346e-01 -1.2232505839045147e+00 5.4526025533510214e-01 4.3586652150845900e-01], [2.4212380706095346e-01 -1.2232505839045147e+00 1.5452602553351020e+00 4.3586652150845900e-01 - 3.7810903145819369e-01 -9.6042292212423178e-02 5.0000000000000000e-01 2.1793326075422950e-01]' + 3.7810903145819369e-01 -9.6042292212423178e-02 5.0000000000000000e-01 2.1793326075422950e-01] ) -function oderosw_fixed() +################### +# Fixed step solver +################### +ode_rosw(fn, Jfn, y0, tspan) = oderosw_fixed(fn, Jfn, y0::AbstractVector, tspan, bt_ros34pw2) +function oderosw_fixed{N,S}(fn, Jfn, y0::AbstractVector, tspan, + btab::TableauRosW{N,S}) + # TODO: refactor with oderk_fixed + Et, Eyf, Ty, btab = make_consistent_types(fn, y0, tspan, btab) + btab = tabletransform(btab) + dof = length(y0) + + ys = Array(Ty, length(tspan)) + allocate!(ys, y0, dof) + ys[1] = deepcopy(y0) + + tspan = convert(Vector{Et}, tspan) + # work arrays: + ks = Array(Ty, S) + # allocate!(ks, y0, dof) # no need to allocate as fn is not in-place + ytmp = similar(y0, Eyf, dof) + for i=1:length(tspan)-1 + dt = tspan[i+1]-tspan[i] + ys[i+1] = rosw_step(fn, Jfn, ys[i], dt, btab, 2) + end + return tspan, ys +end + +function linsolve(h, hprime, y0) + # Does one Newton step, i.e. a linear solve. + return y0 - hprime(y0)\h(y0) end + function oderosw_adapt() end +function rosw_step(g, gprime, x0, dt, + btab::TableauRosW_T, bt_ind=1) + # This takes one step for a ode/dae system defined by + # g(x,xdot)=0 + # gprime(x, xdot, α) = dg/dx + α dg/dxdot + + stages = size(btab.a,1) + dof = length(x0) + ys = zeros(eltype(x0), stages, dof) + + # stage solutions + jacobian_stale = true + hprime_store = zeros(dof,dof) + + cij = (tril(btab.γinv)-diagm(diag(btab.γinv)))/dt + for i=1:stages + u = zeros(dof) + udot = zeros(dof) + function h(yi) # length(yi)==dof + # this function is Eq 5 + u[:] = x0 + yi + for j=1:i-1 + for d=1:dof + u[d] += btab.a[i,j]*ys[j,d] + end + end + udot[:] = 1./(dt*btab.γii[i]).*yi # jed: is this index with the stage? + for j=1:i + # this is Eq.5-3 + for d=1:dof + udot[d] += cij[i,j]*ys[j,d] + end + end + g(u, udot) + end + function hprime(yi) + # here we only update the jacobian once per time step + if jacobian_stale + hprime_store[:,:] = gprime(u, udot, 1./(dt*btab.γii[i])) # jed: is this index with the stage? + end + hprime_store + end + # this is just a linear solve, usually +# ys[i,:] = nlsolve(h, zeros(dof), hprime; opts=one_step_only) + ys[i,:] = linsolve(h, hprime, zeros(dof)) +# ys[i,:] = newtonsolve(h, hprime, zeros(dof), maxsteps=1, warn=false) + if i==1 + # calculate jacobian + jacobian_stale = false + end + end + + # completion: (done twice for error control) + x1 = zeros(eltype(x0), length(x0)) + for i=1:dof + for j=1:stages + x1[i] += btab.b[bt_ind,j]*ys[j,i] + end + end + return x0 + x1 +end diff --git a/test/runtests.jl b/test/runtests.jl index 64f57e0b7..86fc105f0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -23,6 +23,7 @@ solvers = [ # fixed-step ODE.ode4s_s, ODE.ode4s_kr, +# ODE.ode_rosw, # adaptive ODE.ode23s] From 136f4d9ef08b4bdee298d483375324823c4248a5 Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Fri, 22 May 2015 17:07:10 +0200 Subject: [PATCH 03/15] adaptive solver working --- examples/van_der_pol_rosw.jl | 41 +++++++---- src/rosw.jl | 129 ++++++++++++++++++++++++++++++++++- 2 files changed, 155 insertions(+), 15 deletions(-) diff --git a/examples/van_der_pol_rosw.jl b/examples/van_der_pol_rosw.jl index 746ec40cf..0a2751a01 100644 --- a/examples/van_der_pol_rosw.jl +++ b/examples/van_der_pol_rosw.jl @@ -42,27 +42,44 @@ end # reference as t=2 ## refsol = [0.1706167732170483e1, -0.8928097010247975e0] +y0 = [2.,0.]; +tstart = 0. +tend = 2. + + ### # Fixed step ### +# nt = 50_000; +# tspan = linspace(tstart, tend, nt) -nt = 50_000; -tspan = linspace(0.,2.0,nt) -y0 = [2.,0.]; +# t,yout1 = ode4s(vdp, y0, tspan; jacobian=Jvdp) +# @time t,yout1 = ode4s(vdp, y0, tspan; jacobian=Jvdp) + +# t,yout2 = ode_rosw_fixed(vdp_impl, Jvdp_impl, y0, tspan) +# @time t,yout2 = ode_rosw_fixed(vdp_impl, Jvdp_impl, y0, tspan) + +# println("Fixed step: abs error of ode4s vs ref:") +# println(yout1[end]-refsol) +# println("Fixed step: abs error of ode_rosw vs ref:") +# println(yout2[end]-refsol) + +# #### adaptive +tspan = linspace(tstart, tend, 2) -t,yout1 = ode4s(vdp, y0, tspan; jacobian=Jvdp) -@time t,yout1 = ode4s(vdp, y0, tspan; jacobian=Jvdp) +t,yout3 = ode23s(vdp, y0, tspan; jacobian=Jvdp) +@time t,yout3 = ode23s(vdp, y0, tspan; jacobian=Jvdp) -t,yout2 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) -@time t,yout2 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) +t,yout4 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) +@time t,yout4 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) -println("Fixed step: abs error of ode4s vs ref:") -println(yout1[end]-refsol) -println("Fixed step: abs error of ode_rosw vs ref:") -println(yout2[end]-refsol) +println("Adaptive step: abs error of ode23s vs ref:") +println(yout3[end]-refsol) +println("Adaptive step: abs error of ode_rosw vs ref:") +println(yout4[end]-refsol) -# #### adaptive rosw +# rosw # @time yout3, ts, steps, dts, xerrs = rosw_runner_adaptive( # vdp_impl, Jvdp_impl, [0., 2.], y0; # reltol=reltol, abstol=abstol, dt0=1e-5, mindt=1e-8) diff --git a/src/rosw.jl b/src/rosw.jl index 33e2d1cac..517262364 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -1,7 +1,7 @@ # Rosenbrock-Wanner methods ########################### -export ode_rosw +export ode_rosw, ode_rosw_fixed # Rosenbrock-W methods are typically specified for autonomous DAE: # Mẋ = f(x) @@ -95,7 +95,7 @@ const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), Float64, ################### # Fixed step solver ################### -ode_rosw(fn, Jfn, y0, tspan) = oderosw_fixed(fn, Jfn, y0::AbstractVector, tspan, bt_ros34pw2) +ode_rosw_fixed(fn, Jfn, y0, tspan) = oderosw_fixed(fn, Jfn, y0, tspan, bt_ros34pw2) function oderosw_fixed{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRosW{N,S}) # TODO: refactor with oderk_fixed @@ -124,8 +124,131 @@ function linsolve(h, hprime, y0) return y0 - hprime(y0)\h(y0) end +ode_rosw(fn, Jfn, y0, tspan;kwargs...) = oderosw_adapt(fn, Jfn, y0, tspan, bt_ros34pw2; kwargs...) +function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRosW{N,S}; + reltol = 1.0e-5, abstol = 1.0e-8, + norm=Base.norm, + minstep=abs(tspan[end] - tspan[1])/1e9, + maxstep=abs(tspan[end] - tspan[1])/2.5, + initstep=0., + points=:all + ) + fn_expl = (t,y)->fn(y, y*0) + Et, Eyf, Ty, btab = make_consistent_types(fn, y0, tspan, btab) + btab = tabletransform(btab) + # parameters + order = minimum(btab.order) + timeout_const = 5 # after step reduction do not increase step for + # timeout_const steps + + ## Initialization + dof = length(y0) + tspan = convert(Vector{Et}, tspan) + t = tspan[1] + tstart = tspan[1] + tend = tspan[end] + + # work arrays: + y = similar(y0, Eyf, dof) # y at time t + y[:] = y0 + ytrial = similar(y0, Eyf, dof) # trial solution at time t+dt + yerr = similar(y0, Eyf, dof) # error of trial solution + ks = Array(Ty, S) + # allocate!(ks, y0, dof) # no need to allocate as fn is not in-place + ytmp = similar(y0, Eyf, dof) + + # output ys + nsteps_fixed = length(tspan) # these are always output + ys = Array(Ty, nsteps_fixed) + allocate!(ys, y0, dof) + ys[1] = y0 + + # Option points determines where solution is returned: + if points==:all + tspan_fixed = tspan + tspan = Et[tstart] + iter_fixed = 2 # index into tspan_fixed + sizehint!(tspan, nsteps_fixed) + elseif points!=:specified + error("Unrecognized option points==$points") + end + # Time + dt, tdir, ks[1] = hinit(fn_expl, y, tstart, tend, order, reltol, abstol) # sets ks[1]=f0 + if initstep!=0 + dt = sign(initstep)==tdir ? initstep : error("initstep has wrong sign.") + end + # Diagnostics + dts = Et[] + errs = Float64[] + steps = [0,0] # [accepted, rejected] -function oderosw_adapt() + ## Integration loop + laststep = false + timeout = 0 # for step-control + iter = 2 # the index into tspan and ys + while true + ytrial[:] = rosw_step(fn, Jfn, y, dt, btab, 1) + yerr[:] = ytrial - rosw_step(fn, Jfn, y, dt, btab, 2) + err, newdt, timeout = stepsize_hw92!(dt, tdir, y, ytrial, yerr, order, timeout, + dof, abstol, reltol, maxstep, norm) + + if err<=1.0 # accept step + # diagnostics + steps[1] +=1 + push!(dts, dt) + push!(errs, err) + + # Output: + f0 = ks[1] + f1 = fn_expl(t+dt, ytrial) + if points==:specified + # interpolate onto given output points + while iter-1= tdir*tend + dt = tend-t + laststep = true # next step is the last, if it succeeds + end + elseif abs(newdt) Date: Mon, 25 May 2015 17:23:39 +0200 Subject: [PATCH 04/15] first performance updates --- examples/van_der_pol_rosw.jl | 2 + src/rosw.jl | 98 ++++++++++++++++++++++++++++++++---- 2 files changed, 89 insertions(+), 11 deletions(-) diff --git a/examples/van_der_pol_rosw.jl b/examples/van_der_pol_rosw.jl index 0a2751a01..d24f4444a 100644 --- a/examples/van_der_pol_rosw.jl +++ b/examples/van_der_pol_rosw.jl @@ -69,9 +69,11 @@ tend = 2. tspan = linspace(tstart, tend, 2) t,yout3 = ode23s(vdp, y0, tspan; jacobian=Jvdp) +gc() @time t,yout3 = ode23s(vdp, y0, tspan; jacobian=Jvdp) t,yout4 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) +gc() @time t,yout4 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) println("Adaptive step: abs error of ode23s vs ref:") diff --git a/src/rosw.jl b/src/rosw.jl index 517262364..64252f3f9 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -63,9 +63,11 @@ end immutable TableauRosW_T{Name, S, T} <: Tableau{Name, S, T} order::(@compat(Tuple{Vararg{Int}})) # the order of the methods a::Matrix{T} # this is TableauRosW.a transformed - γii::Vector{T} γinv::Matrix{T} b::Matrix{T} # this is TableauRosW.b transformed + # derived quantities: + γii::Vector{T} + c::Matrix{T} # = (tril(btab.γinv)-diagm(diag(btab.γinv))) end function tabletransform{Name,S,T}(rt::TableauRosW{Name,S,T}) γii = diag(rt.γ) @@ -74,7 +76,8 @@ function tabletransform{Name,S,T}(rt::TableauRosW{Name,S,T}) bhat = similar(rt.b) bhat[1,:] = squeeze(rt.b[1,:]*γinv,1) bhat[2,:] = squeeze(rt.b[2,:]*γinv,1) - return TableauRosW_T{Name,S,T}(rt.order, ahat, γii, γinv, bhat) + c = (tril(γinv)-diagm(diag(γinv))) + return TableauRosW_T{Name,S,T}(rt.order, ahat, γinv, bhat, γii, c) end @@ -114,7 +117,7 @@ function oderosw_fixed{N,S}(fn, Jfn, y0::AbstractVector, tspan, ytmp = similar(y0, Eyf, dof) for i=1:length(tspan)-1 dt = tspan[i+1]-tspan[i] - ys[i+1] = rosw_step(fn, Jfn, ys[i], dt, btab, 2) + ys[i+1] = rosw_step!(fn, Jfn, ys[i], dt, btab, 2) end return tspan, ys end @@ -133,6 +136,9 @@ function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRos initstep=0., points=:all ) + # TODO: refactor with oderk_adapt + + ## Figure types fn_expl = (t,y)->fn(y, y*0) Et, Eyf, Ty, btab = make_consistent_types(fn, y0, tspan, btab) btab = tabletransform(btab) @@ -141,7 +147,7 @@ function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRos timeout_const = 5 # after step reduction do not increase step for # timeout_const steps - ## Initialization + ## Setup dof = length(y0) tspan = convert(Vector{Et}, tspan) t = tspan[1] @@ -154,8 +160,10 @@ function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRos ytrial = similar(y0, Eyf, dof) # trial solution at time t+dt yerr = similar(y0, Eyf, dof) # error of trial solution ks = Array(Ty, S) - # allocate!(ks, y0, dof) # no need to allocate as fn is not in-place - ytmp = similar(y0, Eyf, dof) + allocate!(ks, y0, dof) # no need to allocate as fn is not in-place + hprime_store = zeros(Eyf,dof,dof) + u = zeros(Eyf,dof) + udot = zeros(Eyf,dof) # output ys nsteps_fixed = length(tspan) # these are always output @@ -187,11 +195,21 @@ function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRos timeout = 0 # for step-control iter = 2 # the index into tspan and ys while true - ytrial[:] = rosw_step(fn, Jfn, y, dt, btab, 1) - yerr[:] = ytrial - rosw_step(fn, Jfn, y, dt, btab, 2) + ytrial[:] = rosw_step!(fn, Jfn, y, dt, btab, ks, hprime_store, u, udot, 1) + # completion again, see line 927 of + # http://www.mcs.anl.gov/petsc/petsc-current/src/ts/impls/rosw/rosw.c.html#TSROSW + yerr[:] = 0.0 + for j=1:S + for i=1:dof + yerr[i] += (btab.b[2,j]-btab.b[1,j])*ks[j][i] + end + end + + # ytrial[:] = rosw_step(fn, Jfn, y, dt, btab, 1) + # yerr[:] = ytrial - rosw_step(fn, Jfn, y, dt, btab, 2) + err, newdt, timeout = stepsize_hw92!(dt, tdir, y, ytrial, yerr, order, timeout, dof, abstol, reltol, maxstep, norm) - if err<=1.0 # accept step # diagnostics steps[1] +=1 @@ -266,7 +284,6 @@ function rosw_step(g, gprime, x0, dt, jacobian_stale = true hprime_store = zeros(dof,dof) - cij = (tril(btab.γinv)-diagm(diag(btab.γinv)))/dt for i=1:stages u = zeros(dof) udot = zeros(dof) @@ -282,7 +299,7 @@ function rosw_step(g, gprime, x0, dt, for j=1:i # this is Eq.5-3 for d=1:dof - udot[d] += cij[i,j]*ys[j,d] + udot[d] += btab.c[i,j]/dt*ys[j,d] end end g(u, udot) @@ -313,3 +330,62 @@ function rosw_step(g, gprime, x0, dt, end return x0 + x1 end + +function rosw_step!(g, gprime, x0, dt, + btab::TableauRosW_T, ks, hprime_store, u, udot, bt_ind=1) + # This takes one step for a ode/dae system defined by + # g(x,xdot)=0 + # gprime(x, xdot, α) = dg/dx + α dg/dxdot + + stages = size(btab.a,1) + dof = length(x0) + + # stage solutions + jacobian_stale = true + + # calculate kappa + for i=1:stages + function h(yi) # length(yi)==dof + # this function is Eq 5 + u[:] = x0 + u[:] += yi + for j=1:i-1 + for d=1:dof + u[d] += btab.a[i,j]*ks[j][d] + end + end + udot[:] = 1./(dt*btab.γii[i]).*yi # jed: is this index with the stage? + for j=1:i + # this is Eq.5-3 + for d=1:dof + udot[d] += btab.c[i,j]/dt*ks[j][d] + end + end + g(u, udot) + end + function hprime(yi) + # here we only update the jacobian once per time step + if jacobian_stale + hprime_store[:,:] = gprime(u, udot, 1./(dt*btab.γii[i])) # jed: is this index with the stage? + end + hprime_store + end + # this is just a linear solve, usually +# ks[i,:] = nlsolve(h, zeros(dof), hprime; opts=one_step_only) + ks[i][:] = linsolve(h, hprime, zeros(dof)) +# ks[i,:] = newtonsolve(h, hprime, zeros(dof), maxsteps=1, warn=false) + if i==1 + # calculate jacobian + jacobian_stale = false + end + end + + # completion: (done twice for error control) + x1 = zeros(eltype(x0), length(x0)) + for j=1:stages + for i=1:dof + x1[i] += btab.b[bt_ind,j]*ks[j][i] + end + end + return x0 + x1 +end From 79291063d60b95920b737a700b8c18d1f43d1ca2 Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Tue, 26 May 2015 19:16:41 +0200 Subject: [PATCH 05/15] Tidier and running faster. Now onto in-place functions --- examples/van_der_pol_rosw.jl | 26 ++++ src/rosw.jl | 294 +++++++++++++++-------------------- 2 files changed, 151 insertions(+), 169 deletions(-) diff --git a/examples/van_der_pol_rosw.jl b/examples/van_der_pol_rosw.jl index d24f4444a..3663805d0 100644 --- a/examples/van_der_pol_rosw.jl +++ b/examples/van_der_pol_rosw.jl @@ -38,6 +38,32 @@ function Jvdp_impl(y, ydot, α) return Jvdp(0,y) - α * eye(2) end +# implicit inplace equation +function vdp_impl!(res, y, ydot) + # vdp_impl(t, y) + # + # Implicit van der Pol equation. (consider using rescaled eq. so + # period stays the same for different mu. Cf. Wanner & Hairer 1991, + # p.5) + # + # Note, that ydot can be used as res here. + res[1] = y[2] - ydot[1] + res[2] = mu^2*((1-y[1]^2) * y[2] - y[1]) - ydot[2] + return nothing +end +function Jvdp_impl!(res, y, ydot, α) + # d vdp_impl /dy + α d vdp/dy + # + # Jacobian + + res[1,1] = 0 - α + res[1,2] = 1 + res[2,1] = -mu^2*(y[2]*2*y[1] + 1) + res[2,2] = mu^2*(1-y[1]^2) - α + return nothing +end + + ### # reference as t=2 ## diff --git a/src/rosw.jl b/src/rosw.jl index 64252f3f9..20e983ea5 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -1,6 +1,8 @@ # Rosenbrock-Wanner methods ########################### - +# +# Main references: Wanner & Hairrere 1996, PETSc http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/TS/TSROSW.html +@show "NEW" export ode_rosw, ode_rosw_fixed # Rosenbrock-W methods are typically specified for autonomous DAE: @@ -66,17 +68,21 @@ immutable TableauRosW_T{Name, S, T} <: Tableau{Name, S, T} γinv::Matrix{T} b::Matrix{T} # this is TableauRosW.b transformed # derived quantities: - γii::Vector{T} + γii::T c::Matrix{T} # = (tril(btab.γinv)-diagm(diag(btab.γinv))) end function tabletransform{Name,S,T}(rt::TableauRosW{Name,S,T}) - γii = diag(rt.γ) + # the code only works if + if !all(x->x==rt.γ[1],diag(rt.γ)) + error("This Rosenbrock implementation only works for tableaus with γ_ii==γ_jj for all i,j.") + end + γii = rt.γ[1,1] γinv = inv(rt.γ) ahat = rt.a * γinv bhat = similar(rt.b) bhat[1,:] = squeeze(rt.b[1,:]*γinv,1) bhat[2,:] = squeeze(rt.b[2,:]*γinv,1) - c = (tril(γinv)-diagm(diag(γinv))) + c = (tril(γinv)-diagm(diag(γinv))) # negative of W&H definition return TableauRosW_T{Name,S,T}(rt.order, ahat, γinv, bhat, γii, c) end @@ -98,37 +104,33 @@ const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), Float64, ################### # Fixed step solver ################### -ode_rosw_fixed(fn, Jfn, y0, tspan) = oderosw_fixed(fn, Jfn, y0, tspan, bt_ros34pw2) -function oderosw_fixed{N,S}(fn, Jfn, y0::AbstractVector, tspan, +ode_rosw_fixed(fn, Jfn, x0, tspan) = oderosw_fixed(fn, Jfn, x0, tspan, bt_ros34pw2) +function oderosw_fixed{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRosW{N,S}) # TODO: refactor with oderk_fixed - Et, Eyf, Ty, btab = make_consistent_types(fn, y0, tspan, btab) + Et, Exf, Tx, btab = make_consistent_types(fn, x0, tspan, btab) btab = tabletransform(btab) - dof = length(y0) + dof = length(x0) - ys = Array(Ty, length(tspan)) - allocate!(ys, y0, dof) - ys[1] = deepcopy(y0) + xs = Array(Tx, length(tspan)) + allocate!(xs, x0, dof) + xs[1] = deepcopy(x0) tspan = convert(Vector{Et}, tspan) # work arrays: - ks = Array(Ty, S) - # allocate!(ks, y0, dof) # no need to allocate as fn is not in-place - ytmp = similar(y0, Eyf, dof) + ks = Array(Tx, S) + # allocate!(ks, x0, dof) # no need to allocate as fn is not in-place + xtmp = similar(x0, Exf, dof) for i=1:length(tspan)-1 dt = tspan[i+1]-tspan[i] - ys[i+1] = rosw_step!(fn, Jfn, ys[i], dt, btab, 2) + xs[i+1] = rosw_step!(fn, Jfn, xs[i], dt, btab, 2) end - return tspan, ys + return tspan, xs end -function linsolve(h, hprime, y0) - # Does one Newton step, i.e. a linear solve. - return y0 - hprime(y0)\h(y0) -end -ode_rosw(fn, Jfn, y0, tspan;kwargs...) = oderosw_adapt(fn, Jfn, y0, tspan, bt_ros34pw2; kwargs...) -function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRosW{N,S}; +ode_rosw(fn, Jfn, x0, tspan;kwargs...) = oderosw_adapt(fn, Jfn, x0, tspan, bt_ros34pw2; kwargs...) +function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRosW{N,S}; reltol = 1.0e-5, abstol = 1.0e-8, norm=Base.norm, minstep=abs(tspan[end] - tspan[1])/1e9, @@ -139,8 +141,8 @@ function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRos # TODO: refactor with oderk_adapt ## Figure types - fn_expl = (t,y)->fn(y, y*0) - Et, Eyf, Ty, btab = make_consistent_types(fn, y0, tspan, btab) + fn_expl = (t,x)->fn(x, x*0) + Et, Exf, Tx, btab = make_consistent_types(fn, x0, tspan, btab) btab = tabletransform(btab) # parameters order = minimum(btab.order) @@ -148,28 +150,30 @@ function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRos # timeout_const steps ## Setup - dof = length(y0) + dof = length(x0) tspan = convert(Vector{Et}, tspan) t = tspan[1] tstart = tspan[1] tend = tspan[end] # work arrays: - y = similar(y0, Eyf, dof) # y at time t - y[:] = y0 - ytrial = similar(y0, Eyf, dof) # trial solution at time t+dt - yerr = similar(y0, Eyf, dof) # error of trial solution - ks = Array(Ty, S) - allocate!(ks, y0, dof) # no need to allocate as fn is not in-place - hprime_store = zeros(Eyf,dof,dof) - u = zeros(Eyf,dof) - udot = zeros(Eyf,dof) - - # output ys + x = similar(x0, Exf, dof) # x at time t (time at beginning of step) + x[:] = x0 # fill with IC + xtrial = similar(x0, Exf, dof) # trial solution at time t+dt + xerr = similar(x0, Exf, dof) # error of trial solution + k = Array(Tx, S) # stage variables + allocate!(k, x0, dof) + ks = zeros(Exf,dof) # work vector for one k + h_store = zeros(Exf,dof) # work vector + jac_store = zeros(Exf,dof,dof) # Jacobian storage + u = zeros(Exf,dof) # work vector + udot = zeros(Exf,dof) # work vector + + # output xs nsteps_fixed = length(tspan) # these are always output - ys = Array(Ty, nsteps_fixed) - allocate!(ys, y0, dof) - ys[1] = y0 + xs = Array(Tx, nsteps_fixed) + allocate!(xs, x0, dof) + xs[1] = x0 # Option points determines where solution is returned: if points==:all @@ -181,7 +185,7 @@ function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRos error("Unrecognized option points==$points") end # Time - dt, tdir, ks[1] = hinit(fn_expl, y, tstart, tend, order, reltol, abstol) # sets ks[1]=f0 + dt, tdir, k[1] = hinit(fn_expl, x, tstart, tend, order, reltol, abstol) # sets k[1]=f0 if initstep!=0 dt = sign(initstep)==tdir ? initstep : error("initstep has wrong sign.") end @@ -193,22 +197,19 @@ function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRos ## Integration loop laststep = false timeout = 0 # for step-control - iter = 2 # the index into tspan and ys + iter = 2 # the index into tspan and xs while true - ytrial[:] = rosw_step!(fn, Jfn, y, dt, btab, ks, hprime_store, u, udot, 1) - # completion again, see line 927 of + rosw_step!(xtrial, fn, Jfn, x, dt, dof, btab, + k, h_store, jac_store, ks, u, udot, 1) + # Completion again for embedded method, see line 927 of # http://www.mcs.anl.gov/petsc/petsc-current/src/ts/impls/rosw/rosw.c.html#TSROSW - yerr[:] = 0.0 - for j=1:S - for i=1:dof - yerr[i] += (btab.b[2,j]-btab.b[1,j])*ks[j][i] + xerr[:] = 0.0 + for s=1:S + for d=1:dof + xerr[d] += (btab.b[2,s]-btab.b[1,s])*k[s][d] end end - - # ytrial[:] = rosw_step(fn, Jfn, y, dt, btab, 1) - # yerr[:] = ytrial - rosw_step(fn, Jfn, y, dt, btab, 2) - - err, newdt, timeout = stepsize_hw92!(dt, tdir, y, ytrial, yerr, order, timeout, + err, newdt, timeout = stepsize_hw92!(dt, tdir, x, xtrial, xerr, order, timeout, dof, abstol, reltol, maxstep, norm) if err<=1.0 # accept step # diagnostics @@ -217,35 +218,35 @@ function oderosw_adapt{N,S}(fn, Jfn, y0::AbstractVector, tspan, btab::TableauRos push!(errs, err) # Output: - f0 = ks[1] - f1 = fn_expl(t+dt, ytrial) + f0 = k[1] + f1 = fn_expl(t+dt, xtrial) if points==:specified # interpolate onto given output points while iter-1 Date: Tue, 26 May 2015 21:58:19 +0200 Subject: [PATCH 06/15] updated to work with inplace functions --- examples/van_der_pol_rosw.jl | 20 +++++++++++-- src/rosw.jl | 55 ++++++++++++++++-------------------- 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/examples/van_der_pol_rosw.jl b/examples/van_der_pol_rosw.jl index 3663805d0..75bc32c1d 100644 --- a/examples/van_der_pol_rosw.jl +++ b/examples/van_der_pol_rosw.jl @@ -38,6 +38,15 @@ function Jvdp_impl(y, ydot, α) return Jvdp(0,y) - α * eye(2) end +# Results 24dfdc9cacfd6 +# elapsed time: 0.025767095 seconds (25194952 bytes allocated) +# elapsed time: 0.00997568 seconds (6674864 bytes allocated) +# Adaptive step: abs error of ode23s vs ref: +# [0.012513592025336528,0.013225223452835055] +# Adaptive step: abs error of ode_rosw vs ref: +# [0.012416936355840624,0.013124961519338618] + + # implicit inplace equation function vdp_impl!(res, y, ydot) # vdp_impl(t, y) @@ -63,6 +72,13 @@ function Jvdp_impl!(res, y, ydot, α) return nothing end +# elapsed time: 0.02611999 seconds (25194952 bytes allocated) +# elapsed time: 0.006630583 seconds (4025352 bytes allocated) +# Adaptive step: abs error of ode23s vs ref: +# [0.012513592025336528,0.013225223452835055] +# Adaptive step: abs error of ode_rosw vs ref: +# [0.012416936355840624,0.013124961519338618] + ### # reference as t=2 @@ -98,9 +114,9 @@ t,yout3 = ode23s(vdp, y0, tspan; jacobian=Jvdp) gc() @time t,yout3 = ode23s(vdp, y0, tspan; jacobian=Jvdp) -t,yout4 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) +t,yout4 = ode_rosw(vdp_impl!, Jvdp_impl!, y0, tspan) gc() -@time t,yout4 = ode_rosw(vdp_impl, Jvdp_impl, y0, tspan) +@time t,yout4 = ode_rosw(vdp_impl!, Jvdp_impl!, y0, tspan) println("Adaptive step: abs error of ode23s vs ref:") println(yout3[end]-refsol) diff --git a/src/rosw.jl b/src/rosw.jl index 20e983ea5..03c3a1e0e 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -141,7 +141,7 @@ function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRos # TODO: refactor with oderk_adapt ## Figure types - fn_expl = (t,x)->fn(x, x*0) + fn_expl = (t,x)->(out=similar(x); fn(out, x, x*0); out) Et, Exf, Tx, btab = make_consistent_types(fn, x0, tspan, btab) btab = tabletransform(btab) # parameters @@ -164,7 +164,6 @@ function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRos k = Array(Tx, S) # stage variables allocate!(k, x0, dof) ks = zeros(Exf,dof) # work vector for one k - h_store = zeros(Exf,dof) # work vector jac_store = zeros(Exf,dof,dof) # Jacobian storage u = zeros(Exf,dof) # work vector udot = zeros(Exf,dof) # work vector @@ -200,7 +199,7 @@ function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRos iter = 2 # the index into tspan and xs while true rosw_step!(xtrial, fn, Jfn, x, dt, dof, btab, - k, h_store, jac_store, ks, u, udot, 1) + k, jac_store, ks, u, udot, 1) # Completion again for embedded method, see line 927 of # http://www.mcs.anl.gov/petsc/petsc-current/src/ts/impls/rosw/rosw.c.html#TSROSW xerr[:] = 0.0 @@ -270,29 +269,27 @@ function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRos return tspan, xs end -function rosw_step!{N,S}(xtrial, g, gprime, x, dt, dof, btab::TableauRosW_T{N,S}, - k, h_store, jac_store, ks, u, udot, bt_ind=1) +function rosw_step!{N,S}(xtrial, g!, gprime!, x, dt, dof, btab::TableauRosW_T{N,S}, + k, jac_store, ks, u, udot, bt_ind=1) # This takes one step for a ode/dae system defined by # g(x,xdot)=0 # gprime(x, xdot, α) = dg/dx + α dg/dxdot # first step - # ks[:] = 0 assumes ks==0 s = 1 - h!(h_store, ks, u, udot, g, x, btab, dt, k, s, dof) + h!(k[s], ks, u, udot, g!, x, btab, dt, k, s, dof) # k[s] now holds residual # It's sufficient to only update the Jacobian once per time step: # (Note that this uses u & udot calculated in h! above.) - jac, tmp = hprime!(jac_store, x, gprime, u, udot, btab, dt) + jac = hprime!(jac_store, x, gprime!, u, udot, btab, dt) # calculate k for s=1:S-1 # first step of Newton iteration with guess ks==0 - k[s][:] = ks - jac\h_store # TODO use A_ldiv_B! - - h!(h_store, ks, u, udot, g, x, btab, dt, k, s+1, dof) + k[s][:] = ks - jac\k[s] # in-place A_ldiv_B!(jac, k[s]) + h!(k[s+1], ks, u, udot, g!, x, btab, dt, k, s+1, dof) end # last step - k[S][:] = ks - jac\h_store + k[S][:] = ks - jac\k[S] # completion: xtrial[:] = x @@ -303,12 +300,13 @@ function rosw_step!{N,S}(xtrial, g, gprime, x, dt, dof, btab::TableauRosW_T{N,S} end return nothing end -function h!(res, ks, u, udot, g, x, btab, dt, k, s, dof) +function h!(res, ks, u, udot, g!, x, btab, dt, k, s, dof) # h(ks)=0 to be solved for ks. # # Calculates h(ks) and h'(ks) (if s==1). # Modifies its first 3 arguments. # + # res -- result: can use k[s] for this # ks -- guess for k[s] (usually==0) AND result g(u,udot) # u, udot -- work arrays for first and second argument to g # g -- obj function @@ -317,31 +315,28 @@ function h!(res, ks, u, udot, g, x, btab, dt, k, s, dof) # k -- stage vars (jed's yi) # s -- current stage to calculate # dof - # - # TODO: could re-use ks for res - # stage independent and first stage: - ss = 1 + # stage independent for d=1:dof - u[d] = (ks[d] + x[d] - + btab.a[s,ss]*k[ss][d]) - udot[d] = (1/(dt*btab.γii).*ks[d] # usually ==0 as ks==0 - + btab.c[s,ss]/dt*k[ss][d]) + u[d] = ks[d] + x[d] + udot[d] = 1/(dt*btab.γii).*ks[d] # usually ==0 as ks==0 end - # later stages: - for ss=2:s-1 + # stages + for ss=1:s-1 for d=1:dof u[d] += btab.a[s,ss]*k[ss][d] udot[d] += btab.c[s,ss]/dt*k[ss][d] end end - res[:] = g(u, udot) + g!(res, u, udot) return nothing end -function hprime!(res, xi, gprime, u, udot, btab, dt) - # The Jacobian of h! Note that u and udot should be calculated by - # running h! first. - res[:,:] = gprime(u, udot, 1./(dt*btab.γii)) - tmp = copy(res) - return lufact!(res), tmp +function hprime!(res, xi, gprime!, u, udot, btab, dt) + # The Jacobian of h! Note that u and udot need to be calculated + # by running h! first. + # + # The res will hold part of the LU factorization. However use the + # returned LU-type. + gprime!(res, u, udot, 1./(dt*btab.γii)) + return lufact!(res) end From b3f4697b20dd19c1ac520e977383a0c6b72305a2 Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Wed, 27 May 2015 14:55:22 +0200 Subject: [PATCH 07/15] DAE example working. ROSW faster than DASSL --- examples/hb1dae.jl | 62 +++++++++++++++++++++++++++++ examples/van_der_pol_rosw.jl | 75 ++++++++++++++++++++++++++++++------ src/rosw.jl | 69 +++++++++++++++++++++++++-------- src/runge_kutta.jl | 2 +- 4 files changed, 179 insertions(+), 29 deletions(-) create mode 100644 examples/hb1dae.jl diff --git a/examples/hb1dae.jl b/examples/hb1dae.jl new file mode 100644 index 000000000..13b0799ae --- /dev/null +++ b/examples/hb1dae.jl @@ -0,0 +1,62 @@ +# hb1dae reformulates the hb1ode example as a differential-algebraic equation (DAE) problem. +# See Matlab help and http://radio.feld.cvut.cz/matlab/techdoc/math_anal/ch_8_od8.html#670396 + +using ODE, DASSL +M = diagm([1.,1,1]) +M[3,3] = 0. + +function hb1dae(t, y,ydot) + res = zeros(length(y)) + hb1dae!(res, y,ydot) + return res +end + +function hb1dae!(res, y,ydot) + res[1] = -0.04*y[1] + 1e4*y[2]*y[3] - ydot[1] + res[2] = 0.04*y[1] - 1e4*y[2]*y[3] - 3e7*y[2]^2 - ydot[2] + res[3] = y[1] + y[2] + y[3] - 1 + return nothing +end +function Jhb1dae!(res, y, ydot, a) + res[1,1] = -0.04 - a + res[1,2] = 1e4*y[3] + res[1,3] = 1e4*y[2] + + res[2,1] = 0.04 + res[2,2] = -1e4*y[3] - 3e7*2*y[2] -a; + res[2,3] = -1e4*y[2] + + res[3,1] = 1. + res[3,2] = 1. + res[3,3] = 1. + + return nothing +end +function Jhb1dae(t, y, ydot, a) + res = zeros(length(y),length(y)) + Jhb1dae!(res, y, ydot, a) + return res +end + + +tspan = [0, 4e6] +y0 = [1., 0, 0] + +t,ydassl = dasslSolve(hb1dae, y0, tspan, jacobian=Jhb1dae) +@time t,ydassl = dasslSolve(hb1dae, y0, tspan, jacobian=Jhb1dae) +y = hcat(ydassl...); + +## ROSW +t,yrosw = ode_rosw(hb1dae!, Jhb1dae!, y0, tspan) +@time t,yrosw = ode_rosw(hb1dae!, Jhb1dae!, y0, tspan) +yr = hcat(yrosw...); + +println("Relative difference between DASSL vs ROSW:") +println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) + + +# using Winston +# plot(t, y[1,:], xlog=true) +# oplot(t, y[2,:]*1e4, xlog=true) +# oplot(t, y[3,:], xlog=true) +# oplot(t, y[1,:], xlog=true) diff --git a/examples/van_der_pol_rosw.jl b/examples/van_der_pol_rosw.jl index 75bc32c1d..ca77e8fa1 100644 --- a/examples/van_der_pol_rosw.jl +++ b/examples/van_der_pol_rosw.jl @@ -1,5 +1,6 @@ # Van der Pol using DASSL.jl using ODE +using DASSL abstol = 1e-6 reltol = 1e-6 @@ -32,7 +33,7 @@ function vdp_impl(y, ydot) return vdp(0,y) - ydot end function Jvdp_impl(y, ydot, α) - # d vdp_impl /dy + α d vdp/dy + # d vdp_impl /dy + α d vdp_impl/dydot # # Jacobian return Jvdp(0,y) - α * eye(2) @@ -61,7 +62,7 @@ function vdp_impl!(res, y, ydot) return nothing end function Jvdp_impl!(res, y, ydot, α) - # d vdp_impl /dy + α d vdp/dy + # d vdp_impl /dy + α d vdp/dydot # # Jacobian @@ -74,6 +75,7 @@ end # elapsed time: 0.02611999 seconds (25194952 bytes allocated) # elapsed time: 0.006630583 seconds (4025352 bytes allocated) +# elapsed time: 0.005866466 seconds (2670408 bytes allocated) (newer version) # Adaptive step: abs error of ode23s vs ref: # [0.012513592025336528,0.013225223452835055] # Adaptive step: abs error of ode_rosw vs ref: @@ -110,11 +112,11 @@ tend = 2. # #### adaptive tspan = linspace(tstart, tend, 2) -t,yout3 = ode23s(vdp, y0, tspan; jacobian=Jvdp) +t,yout3 = ode23s(vdp, y0, [0,0.1]; jacobian=Jvdp) gc() @time t,yout3 = ode23s(vdp, y0, tspan; jacobian=Jvdp) -t,yout4 = ode_rosw(vdp_impl!, Jvdp_impl!, y0, tspan) +t,yout4 = ode_rosw(vdp_impl!, Jvdp_impl!, y0, [0, 0.1]) gc() @time t,yout4 = ode_rosw(vdp_impl!, Jvdp_impl!, y0, tspan) @@ -123,14 +125,63 @@ println(yout3[end]-refsol) println("Adaptive step: abs error of ode_rosw vs ref:") println(yout4[end]-refsol) -# rosw -# @time yout3, ts, steps, dts, xerrs = rosw_runner_adaptive( -# vdp_impl, Jvdp_impl, [0., 2.], y0; -# reltol=reltol, abstol=abstol, dt0=1e-5, mindt=1e-8) -# check +#################### +# DAE: reduced Van der Pol + + + +# implicit inplace equation +function dae_vdp_impl!(res, y, ydot) + # dae_vdp_impl(t, y) + # + # Implicit, reduced van der Pol equation. mu->\infinity + + res[1] = y[2] - ydot[1] + res[2] = y[1] - (y[2]^3/3 - y[2]) + return nothing +end +function Jdae_vdp_impl!(res, y, ydot, α) + # d dae_vdp_impl /dy + α d dae_vdp_impl/dydot + # + # Jacobian + + res[1,1] = 0 - α + res[1,2] = 1 + res[2,1] = 1 + res[2,2] = -(y[2]^2 - 1) + return nothing +end + +t,yout5 = ode_rosw(dae_vdp_impl!, Jdae_vdp_impl!, y0, [0, 0.1]) +gc() +@time t,yout5 = ode_rosw(dae_vdp_impl!, Jdae_vdp_impl!, y0, tspan) + + + +# implicit inplace equation +function dae_vdp(t, y, ydot) + # dae_vdp_impl(t, y) + # + # Implicit, reduced van der Pol equation. mu->\infinity + + [y[2] - ydot[1], y[1] - (y[2]^3/3 - y[2])] +end +function Jdae_vdp(t, y, ydot, α) + # d dae_vdp_impl /dy + α d dae_vdp_impl/dydot + # + # Jacobian + res = zeros(2,2) + res[1,1] = 0 - α + res[1,2] = 1 + res[2,1] = 1 + res[2,2] = -(y[2]^2 - 1) + return res +end +tspan = [0, 4e6] +y0 = [1., 0, 0] + +t,ydassl = dasslSolve(dae_vdp, y0, tspan) + -# using Winston -# plot(ts, yout3[1,:]) -# oplot(tspan, yout2[1,:],"r") diff --git a/src/rosw.jl b/src/rosw.jl index 03c3a1e0e..7545d02c7 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -5,6 +5,10 @@ @show "NEW" export ode_rosw, ode_rosw_fixed +# TODO: +# - finite diff Jacobian +# - AD Jacobian + # Rosenbrock-W methods are typically specified for autonomous DAE: # Mẋ = f(x) # @@ -101,6 +105,19 @@ const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), Float64, 3.7810903145819369e-01 -9.6042292212423178e-02 5.0000000000000000e-01 2.1793326075422950e-01] ) +# binterpt[0][0]=1.0564298455794094; +# binterpt[1][0]=2.296429974281067; +# binterpt[2][0]=-1.307599564525376; +# binterpt[3][0]=-1.045260255335102; +# binterpt[0][1]=-1.3864882699759573; +# binterpt[1][1]=-8.262611700275677; +# binterpt[2][1]=7.250979895056055; +# binterpt[3][1]=2.398120075195581; +# binterpt[0][2]=0.5721822314575016; +# binterpt[1][2]=4.742931142090097; +# binterpt[2][2]=-4.398120075195578; +# binterpt[3][2]=-0.9169932983520199; + ################### # Fixed step solver ################### @@ -133,13 +150,19 @@ ode_rosw(fn, Jfn, x0, tspan;kwargs...) = oderosw_adapt(fn, Jfn, x0, tspan, bt_ro function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRosW{N,S}; reltol = 1.0e-5, abstol = 1.0e-8, norm=Base.norm, - minstep=abs(tspan[end] - tspan[1])/1e9, + minstep=abs(tspan[end] - tspan[1])/1e18, maxstep=abs(tspan[end] - tspan[1])/2.5, initstep=0., - points=:all +# points=:all ) # TODO: refactor with oderk_adapt + # FIXME: add interpolation + if length(tspan)>2 + error("specified output times not supported yet") + else + points=:all + end ## Figure types fn_expl = (t,x)->(out=similar(x); fn(out, x, x*0); out) Et, Exf, Tx, btab = make_consistent_types(fn, x0, tspan, btab) @@ -183,8 +206,9 @@ function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRos elseif points!=:specified error("Unrecognized option points==$points") end + # Time - dt, tdir, k[1] = hinit(fn_expl, x, tstart, tend, order, reltol, abstol) # sets k[1]=f0 + dt, tdir, f0 = hinit(fn_expl, x, tstart, tend, order, reltol, abstol) if initstep!=0 dt = sign(initstep)==tdir ? initstep : error("initstep has wrong sign.") end @@ -199,7 +223,7 @@ function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRos iter = 2 # the index into tspan and xs while true rosw_step!(xtrial, fn, Jfn, x, dt, dof, btab, - k, jac_store, ks, u, udot, 1) + k, jac_store, ks, u, udot) # Completion again for embedded method, see line 927 of # http://www.mcs.anl.gov/petsc/petsc-current/src/ts/impls/rosw/rosw.c.html#TSROSW xerr[:] = 0.0 @@ -217,17 +241,19 @@ function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRos push!(errs, err) # Output: - f0 = k[1] - f1 = fn_expl(t+dt, xtrial) + # f0 = k[1] + # f1 = fn_expl(t+dt, xtrial) if points==:specified # interpolate onto given output points while iter-1 Date: Wed, 27 May 2015 15:37:32 +0200 Subject: [PATCH 08/15] Numerical Jacobian working. (Copied from DASSL) --- examples/hb1dae.jl | 11 ++- examples/van_der_pol_rosw.jl | 126 ++++++++++++++++++----------------- src/rosw.jl | 52 +++++++++------ 3 files changed, 108 insertions(+), 81 deletions(-) diff --git a/examples/hb1dae.jl b/examples/hb1dae.jl index 13b0799ae..d0f99a525 100644 --- a/examples/hb1dae.jl +++ b/examples/hb1dae.jl @@ -47,13 +47,20 @@ t,ydassl = dasslSolve(hb1dae, y0, tspan, jacobian=Jhb1dae) y = hcat(ydassl...); ## ROSW -t,yrosw = ode_rosw(hb1dae!, Jhb1dae!, y0, tspan) -@time t,yrosw = ode_rosw(hb1dae!, Jhb1dae!, y0, tspan) +t,yrosw = ode_rosw(hb1dae!, y0, tspan) #, jacobian=Jhb1dae!) +@time t,yrosw = ode_rosw(hb1dae!, y0, tspan)#, jacobian=Jhb1dae!) yr = hcat(yrosw...); println("Relative difference between DASSL vs ROSW:") println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) +# with Jacobian +t,yrosw = ode_rosw(hb1dae!, y0, tspan, jacobian=Jhb1dae!) +@time t,yrosw = ode_rosw(hb1dae!, y0, tspan, jacobian=Jhb1dae!) +yr = hcat(yrosw...); + +println("Relative difference between DASSL vs ROSW with Jac:") +println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) # using Winston # plot(t, y[1,:], xlog=true) diff --git a/examples/van_der_pol_rosw.jl b/examples/van_der_pol_rosw.jl index ca77e8fa1..2bdb32c28 100644 --- a/examples/van_der_pol_rosw.jl +++ b/examples/van_der_pol_rosw.jl @@ -116,72 +116,78 @@ t,yout3 = ode23s(vdp, y0, [0,0.1]; jacobian=Jvdp) gc() @time t,yout3 = ode23s(vdp, y0, tspan; jacobian=Jvdp) -t,yout4 = ode_rosw(vdp_impl!, Jvdp_impl!, y0, [0, 0.1]) +t,yout4 = ode_rosw(vdp_impl!, y0, [0, 0.1], jacobian=Jvdp_impl!) gc() -@time t,yout4 = ode_rosw(vdp_impl!, Jvdp_impl!, y0, tspan) +@time t,yout4 = ode_rosw(vdp_impl!, y0, tspan, jacobian=Jvdp_impl!) + +# numerical jacobian +t,yout5 = ode_rosw(vdp_impl!, y0, [0, 0.1]) +gc() +@time t,yout5 = ode_rosw(vdp_impl!, y0, tspan) + println("Adaptive step: abs error of ode23s vs ref:") println(yout3[end]-refsol) println("Adaptive step: abs error of ode_rosw vs ref:") println(yout4[end]-refsol) - - -#################### -# DAE: reduced Van der Pol - - - -# implicit inplace equation -function dae_vdp_impl!(res, y, ydot) - # dae_vdp_impl(t, y) - # - # Implicit, reduced van der Pol equation. mu->\infinity - - res[1] = y[2] - ydot[1] - res[2] = y[1] - (y[2]^3/3 - y[2]) - return nothing -end -function Jdae_vdp_impl!(res, y, ydot, α) - # d dae_vdp_impl /dy + α d dae_vdp_impl/dydot - # - # Jacobian - - res[1,1] = 0 - α - res[1,2] = 1 - res[2,1] = 1 - res[2,2] = -(y[2]^2 - 1) - return nothing -end - -t,yout5 = ode_rosw(dae_vdp_impl!, Jdae_vdp_impl!, y0, [0, 0.1]) -gc() -@time t,yout5 = ode_rosw(dae_vdp_impl!, Jdae_vdp_impl!, y0, tspan) - - - -# implicit inplace equation -function dae_vdp(t, y, ydot) - # dae_vdp_impl(t, y) - # - # Implicit, reduced van der Pol equation. mu->\infinity - - [y[2] - ydot[1], y[1] - (y[2]^3/3 - y[2])] -end -function Jdae_vdp(t, y, ydot, α) - # d dae_vdp_impl /dy + α d dae_vdp_impl/dydot - # - # Jacobian - res = zeros(2,2) - res[1,1] = 0 - α - res[1,2] = 1 - res[2,1] = 1 - res[2,2] = -(y[2]^2 - 1) - return res -end -tspan = [0, 4e6] -y0 = [1., 0, 0] - -t,ydassl = dasslSolve(dae_vdp, y0, tspan) +println("Adaptive step: ROSW abs error with numerical Jacobian:") +println(yout5[end]-refsol) + + +# #################### +# # DAE: reduced Van der Pol + + + +# # implicit inplace equation +# function dae_vdp_impl!(res, y, ydot) +# # dae_vdp_impl(t, y) +# # +# # Implicit, reduced van der Pol equation. mu->\infinity + +# res[1] = y[2] - ydot[1] +# res[2] = y[1] - (y[2]^3/3 - y[2]) +# return nothing +# end +# function Jdae_vdp_impl!(res, y, ydot, α) +# # d dae_vdp_impl /dy + α d dae_vdp_impl/dydot +# # +# # Jacobian + +# res[1,1] = 0 - α +# res[1,2] = 1 +# res[2,1] = 1 +# res[2,2] = -(y[2]^2 - 1) +# return nothing +# end + +# t,yout5 = ode_rosw(dae_vdp_impl!, Jdae_vdp_impl!, y0, [0, 0.1]) +# gc() +# @time t,yout5 = ode_rosw(dae_vdp_impl!, Jdae_vdp_impl!, y0, tspan) + + + +# # implicit inplace equation +# function dae_vdp(t, y, ydot) +# # dae_vdp_impl(t, y) +# # +# # Implicit, reduced van der Pol equation. mu->\infinity + +# [y[2] - ydot[1], y[1] - (y[2]^3/3 - y[2])] +# end +# function Jdae_vdp(t, y, ydot, α) +# # d dae_vdp_impl /dy + α d dae_vdp_impl/dydot +# # +# # Jacobian +# res = zeros(2,2) +# res[1,1] = 0 - α +# res[1,2] = 1 +# res[2,1] = 1 +# res[2,2] = -(y[2]^2 - 1) +# return res +# end + +# t,ydassl = dasslSolve(dae_vdp, y0, tspan) diff --git a/src/rosw.jl b/src/rosw.jl index 7545d02c7..6d51c58f7 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -1,12 +1,12 @@ # Rosenbrock-Wanner methods ########################### # -# Main references: Wanner & Hairrere 1996, PETSc http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/TS/TSROSW.html -@show "NEW" +# Main references: +# - Wanner & Hairer 1996 +# - PETSc http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/TS/TSROSW.html export ode_rosw, ode_rosw_fixed # TODO: -# - finite diff Jacobian # - AD Jacobian # Rosenbrock-W methods are typically specified for autonomous DAE: @@ -105,19 +105,6 @@ const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), Float64, 3.7810903145819369e-01 -9.6042292212423178e-02 5.0000000000000000e-01 2.1793326075422950e-01] ) -# binterpt[0][0]=1.0564298455794094; -# binterpt[1][0]=2.296429974281067; -# binterpt[2][0]=-1.307599564525376; -# binterpt[3][0]=-1.045260255335102; -# binterpt[0][1]=-1.3864882699759573; -# binterpt[1][1]=-8.262611700275677; -# binterpt[2][1]=7.250979895056055; -# binterpt[3][1]=2.398120075195581; -# binterpt[0][2]=0.5721822314575016; -# binterpt[1][2]=4.742931142090097; -# binterpt[2][2]=-4.398120075195578; -# binterpt[3][2]=-0.9169932983520199; - ################### # Fixed step solver ################### @@ -146,13 +133,14 @@ function oderosw_fixed{N,S}(fn, Jfn, x0::AbstractVector, tspan, end -ode_rosw(fn, Jfn, x0, tspan;kwargs...) = oderosw_adapt(fn, Jfn, x0, tspan, bt_ros34pw2; kwargs...) -function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRosW{N,S}; +ode_rosw(fn, x0, tspan;kwargs...) = oderosw_adapt(fn, x0, tspan, bt_ros34pw2; kwargs...) +function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S}; reltol = 1.0e-5, abstol = 1.0e-8, norm=Base.norm, minstep=abs(tspan[end] - tspan[1])/1e18, maxstep=abs(tspan[end] - tspan[1])/2.5, initstep=0., + jacobian=numerical_jacobian(fn, reltol, abstol) # points=:all ) # TODO: refactor with oderk_adapt @@ -222,7 +210,7 @@ function oderosw_adapt{N,S}(fn, Jfn, x0::AbstractVector, tspan, btab::TableauRos timeout = 0 # for step-control iter = 2 # the index into tspan and xs while true - rosw_step!(xtrial, fn, Jfn, x, dt, dof, btab, + rosw_step!(xtrial, fn, jacobian, x, dt, dof, btab, k, jac_store, ks, u, udot) # Completion again for embedded method, see line 927 of # http://www.mcs.anl.gov/petsc/petsc-current/src/ts/impls/rosw/rosw.c.html#TSROSW @@ -377,3 +365,29 @@ function hprime!(res, xi, gprime!, u, udot, btab, dt) gprime!(res, u, udot, 1./(dt*btab.γii)) return lufact!(res) end + +# Copied & adapted from DASSL: +# generate a function that computes approximate jacobian using forward +# finite differences +function numerical_jacobian(fn!, reltol, abstol) + function numjac!(jac, y, dy, a) + # TODO use coloring + + ep = eps(1e6) # this is the machine epsilon # TODO: better value? + h = 1/a # h ~ 1/a + wt = reltol*abs(y).+abstol + # delta for approximation of jacobian. I removed the + # sign(h_next*dy0) from the definition of delta because it was + # causing trouble when dy0==0 (which happens for ord==1) + edelta = spdiagm(max(abs(y),abs(h*dy),wt)*sqrt(ep)) + + tmp = similar(y) + f0 = similar(y) + fn!(f0, y, dy) + for i=1:length(y) + fn!(tmp, y+edelta[:,i], a*edelta[:,i]+dy) + jac[:,i] = (tmp-f0)/edelta[i,i] + end + return nothing + end +end From e7196d260e26a3570a677037595014bad3bc163a Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Wed, 27 May 2015 16:10:19 +0200 Subject: [PATCH 09/15] Fixed fixed step solver added rodas3 solver --- examples/hb1dae.jl | 20 +++++++++++++++++++- src/rosw.jl | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/examples/hb1dae.jl b/examples/hb1dae.jl index d0f99a525..0d1be29ae 100644 --- a/examples/hb1dae.jl +++ b/examples/hb1dae.jl @@ -51,10 +51,18 @@ t,yrosw = ode_rosw(hb1dae!, y0, tspan) #, jacobian=Jhb1dae!) @time t,yrosw = ode_rosw(hb1dae!, y0, tspan)#, jacobian=Jhb1dae!) yr = hcat(yrosw...); -println("Relative difference between DASSL vs ROSW:") +println("Relative difference between DASSL vs ROSW, no Jac:") +println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) + +println("Using fixed step ROSW") +tspan = t +t,yrosw = ode_rosw_fixed(hb1dae!, y0, tspan) +@time t,yrosw = ode_rosw_fixed(hb1dae!, y0, tspan) +println("Relative difference between DASSL vs fixed step, no Jac:") println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) # with Jacobian +tspan = [0, 4e6] t,yrosw = ode_rosw(hb1dae!, y0, tspan, jacobian=Jhb1dae!) @time t,yrosw = ode_rosw(hb1dae!, y0, tspan, jacobian=Jhb1dae!) yr = hcat(yrosw...); @@ -62,6 +70,16 @@ yr = hcat(yrosw...); println("Relative difference between DASSL vs ROSW with Jac:") println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) + +println("Using ros_rodas3") +t,yrosw = ODE.oderosw_adapt(hb1dae!, y0, tspan, ODE.bt_ros_rodas3) +@time t,yrosw = ODE.oderosw_adapt(hb1dae!, y0, tspan, ODE.bt_ros_rodas3) +println("Relative difference between DASSL vs Robas2, no Jac:") +println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) + + + + # using Winston # plot(t, y[1,:], xlog=true) # oplot(t, y[2,:]*1e4, xlog=true) diff --git a/src/rosw.jl b/src/rosw.jl index 6d51c58f7..793186658 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -8,6 +8,7 @@ export ode_rosw, ode_rosw_fixed # TODO: # - AD Jacobian +# - fix fixed step solver # Rosenbrock-W methods are typically specified for autonomous DAE: # Mẋ = f(x) @@ -91,7 +92,7 @@ function tabletransform{Name,S,T}(rt::TableauRosW{Name,S,T}) end -## tableau for ros34pw2 +## tableau for ros34pw2 http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/TS/TSROSWRA34PW2.html const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), Float64, [0 0 0 0 8.7173304301691801e-01 0 0 0 @@ -105,12 +106,28 @@ const bt_ros34pw2 = TableauRosW(:ros34pw2, (3,4), Float64, 3.7810903145819369e-01 -9.6042292212423178e-02 5.0000000000000000e-01 2.1793326075422950e-01] ) +## tableau for RODAS3 http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/TS/TSROSWRODAS3.html +const bt_ros_rodas3 = TableauRosW(:ros_rodas3, (3,4), Rational{Int}, + [0 0 0 0 + 0 0 0 0 + 1 0 0 0 + 3//4 -1//4 1//2 0], + [1//2 0 0 0 + 1 1//2 0 0 + -1//4 -1//4 1//2 0 + 1//12 1//12 -2//3 1//2], + [5//6 -1//6 -1//6 1//2 + 3//4 -1//4 1//2 0] + ) + ################### # Fixed step solver ################### -ode_rosw_fixed(fn, Jfn, x0, tspan) = oderosw_fixed(fn, Jfn, x0, tspan, bt_ros34pw2) -function oderosw_fixed{N,S}(fn, Jfn, x0::AbstractVector, tspan, - btab::TableauRosW{N,S}) +ode_rosw_fixed(fn, x0, tspan; kwargs...) = oderosw_fixed(fn, x0, tspan, bt_ros34pw2, kwargs...) +function oderosw_fixed{N,S}(fn, x0::AbstractVector, tspan, + btab::TableauRosW{N,S}; + jacobian=numerical_jacobian(fn, 1e-6, 1e-6) + ) # TODO: refactor with oderk_fixed Et, Exf, Tx, btab = make_consistent_types(fn, x0, tspan, btab) btab = tabletransform(btab) @@ -122,18 +139,26 @@ function oderosw_fixed{N,S}(fn, Jfn, x0::AbstractVector, tspan, tspan = convert(Vector{Et}, tspan) # work arrays: - ks = Array(Tx, S) + k = Array(Tx, S) # stage variables + allocate!(k, x0, dof) + ks = zeros(Exf,dof) # work vector for one k + jac_store = zeros(Exf,dof,dof) # Jacobian storage + u = zeros(Exf,dof) # work vector + udot = zeros(Exf,dof) # work vector + # allocate!(ks, x0, dof) # no need to allocate as fn is not in-place xtmp = similar(x0, Exf, dof) for i=1:length(tspan)-1 dt = tspan[i+1]-tspan[i] - xs[i+1] = rosw_step!(fn, Jfn, xs[i], dt, btab, 2) + rosw_step!(xs[i+1], fn, jacobian, xs[i], dt, dof, btab, + k, jac_store, ks, u, udot) end return tspan, xs end ode_rosw(fn, x0, tspan;kwargs...) = oderosw_adapt(fn, x0, tspan, bt_ros34pw2; kwargs...) +ode_rodas3(fn, x0, tspan;kwargs...) = oderosw_adapt(fn, x0, tspan, bt_ros_rodas3; kwargs...) function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S}; reltol = 1.0e-5, abstol = 1.0e-8, norm=Base.norm, From 24346ed596d391dc90da6406fc3975854f871e8c Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Wed, 27 May 2015 20:33:11 +0200 Subject: [PATCH 10/15] Added tests --- src/rosw.jl | 11 +++++-- src/runge_kutta.jl | 1 - test/implicit-eqs.jl | 78 ++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 4 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 test/implicit-eqs.jl diff --git a/src/rosw.jl b/src/rosw.jl index 793186658..5df473709 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -7,7 +7,9 @@ export ode_rosw, ode_rosw_fixed # TODO: -# - AD Jacobian +# - Jacobian coloring: https://github.com/mlubin/ReverseDiffSparse.jl +# http://wiki.cs.purdue.edu:9835/coloringpage/abstracts/acyclic-SISC.pdf +# - AD Jacobian: https://github.com/mlubin/ReverseDiffSparse.jl # - fix fixed step solver # Rosenbrock-W methods are typically specified for autonomous DAE: @@ -172,7 +174,9 @@ function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S # FIXME: add interpolation if length(tspan)>2 - error("specified output times not supported yet") + warn("specified output times not supported yet") + tspan = [tspan[1], tspan[end]] + points=:all else points=:all end @@ -326,7 +330,8 @@ function rosw_step!{N,S}(xtrial, g!, gprime!, x, dt, dof, btab::TableauRosW_T{N, # first step of Newton iteration with guess ks==0 # k[s][:] = ks - jac\k[s] # in-place A_ldiv_B!(jac, k[s]) # TODO: add option to do more Newton iterations - A_ldiv_B!(jac, k[s]) + A_ldiv_B!(jac, k[s]) # TODO: see whether this is impacted by https://github.com/JuliaLang/julia/issues/10787 + # and https://github.com/JuliaLang/julia/issues/11325 for d=1:dof k[s][d] = ks[d]-k[s][d] end diff --git a/src/runge_kutta.jl b/src/runge_kutta.jl index 018197939..aa6d0d260 100644 --- a/src/runge_kutta.jl +++ b/src/runge_kutta.jl @@ -464,7 +464,6 @@ function hermite_interp!(y, tquery,t,dt,y0,y1,f0,f1) # # f_0 = f(x_0 , y_0) , f_1 = f(x_0 + h, y_1 ) # this is O(3). TODO for higher order. -error("") theta = (tquery-t)/dt for i=1:length(y0) y[i] = ((1-theta)*y0[i] + theta*y1[i] + theta*(theta-1) * diff --git a/test/implicit-eqs.jl b/test/implicit-eqs.jl new file mode 100644 index 000000000..ada0eb754 --- /dev/null +++ b/test/implicit-eqs.jl @@ -0,0 +1,78 @@ +using ODE +using Base.Test + +tol = 1e-2 + +solvers = [ + ## Stiff + ODE.ode_rosw + ] + +for solver in solvers + println("using $solver") + # dy + # -- = 6 ==> y = 6t + # dt + f = (res,y,ydot)->(res[:]=6.0-ydot; nothing) + t,y=solver(f, [0.], [0.,1]) + y = vcat(y...) + @test maximum(abs(y-6t)) < tol + + # dy + # -- = 2t ==> y = t.^2 + # dt + # t,y=solver((t,y)->2t, 0., [0:.001:1;]) + # @test maximum(abs(y-t.^2)) < tol + + + # dy + # -- = y ==> y = y0*e.^t + # dt + f = (res,y,ydot)->(res[:]=y-ydot;nothing) + t,y=solver(f, [1.], [0,1]) + y = vcat(y...) + @test maximum(abs(y-e.^t)) < tol + + t,y=solver((res,y,ydot)->(res[:]=y-ydot), [1.], [1, 0]) + y = vcat(y...) + @test maximum(abs(y-e.^(t-1))) < tol + + # dv dw + # -- = -w, -- = v ==> v = v0*cos(t) - w0*sin(t), w = w0*cos(t) + v0*sin(t) + # dt dt + # + # y = [v, w] + + function ff(res,y,ydot) + res[1] = -y[2]-ydot[1] + res[2] = y[1] -ydot[2] + end + + t,y=solver(ff, [1., 2.], [0, 2*pi]) + ys = hcat(y...).' # convert Vector{Vector{Float}} to Matrix{Float} + @test maximum(abs(ys-[cos(t)-2*sin(t) 2*cos(t)+sin(t)])) < tol +end + +# rober testcase from http://www.unige.ch/~hairer/testset/testset.html +let + println("ROBER test case") + function f(t, y) + ydot = similar(y) + ydot[1] = -0.04*y[1] + 1.0e4*y[2]*y[3] + ydot[3] = 3.0e7*y[2]*y[2] + ydot[2] = -ydot[1] - ydot[3] + ydot + end + f!(res, y, ydot) = (res[:] = f(0, y) - ydot; nothing) + + t = [0., 1e11] + t,y = ode_rosw(f!, [1.0, 0.0, 0.0], t; abstol=1e-8, reltol=1e-8, + maxstep=1e11/10, minstep=1e11/1e18) + + refsol = [0.2083340149701255e-07, + 0.8333360770334713e-13, + 0.9999999791665050] # reference solution at tspan[2] + @test norm(refsol-y[end], Inf) < 3e-10 +end + +println("All looks OK") diff --git a/test/runtests.jl b/test/runtests.jl index 86fc105f0..9376f8d8d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -81,5 +81,6 @@ let @test norm(refsol-y[end], Inf) < 2e-10 end include("interface-tests.jl") +include("implicit-eqs.jl") println("All looks OK") From 3bf2ce0d5b62324ae769ecec218f8998a8c6d828 Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Sun, 31 May 2015 11:17:51 +0200 Subject: [PATCH 11/15] Small fix in numeric jac --- src/rosw.jl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/rosw.jl b/src/rosw.jl index 5df473709..e95792cc1 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -4,7 +4,7 @@ # Main references: # - Wanner & Hairer 1996 # - PETSc http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/TS/TSROSW.html -export ode_rosw, ode_rosw_fixed +export ode_rosw, ode_rosw_fixed, ode_rodas3 # TODO: # - Jacobian coloring: https://github.com/mlubin/ReverseDiffSparse.jl @@ -409,14 +409,20 @@ function numerical_jacobian(fn!, reltol, abstol) # delta for approximation of jacobian. I removed the # sign(h_next*dy0) from the definition of delta because it was # causing trouble when dy0==0 (which happens for ord==1) - edelta = spdiagm(max(abs(y),abs(h*dy),wt)*sqrt(ep)) + edelta = max(abs(y),abs(h*dy),wt)*sqrt(ep) tmp = similar(y) + tmp1 = similar(y) + tmp2 = similar(y) f0 = similar(y) fn!(f0, y, dy) for i=1:length(y) - fn!(tmp, y+edelta[:,i], a*edelta[:,i]+dy) - jac[:,i] = (tmp-f0)/edelta[i,i] + copy!(tmp1, y) + copy!(tmp2, dy) + tmp1[i] += edelta[i] + tmp2[i] += a*edelta[i] + fn!(tmp, tmp1, tmp2) + jac[:,i] = (tmp-f0)/edelta[i] end return nothing end From 037d19cb6893e7bb20ad32968d8565e5ee571afe Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Tue, 9 Jun 2015 17:47:48 +0200 Subject: [PATCH 12/15] ... --- REQUIRE | 1 + examples/hb1dae.jl | 6 ++-- src/rosw.jl | 74 ++++++++++++++++++++++++++++++++++++++++++++-- src/runge_kutta.jl | 3 +- 4 files changed, 77 insertions(+), 7 deletions(-) diff --git a/REQUIRE b/REQUIRE index 8e8352dde..2f9450386 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,3 +1,4 @@ julia 0.3 Polynomials Compat 0.4.1 +MatrixColorings diff --git a/examples/hb1dae.jl b/examples/hb1dae.jl index 0d1be29ae..461b7e40d 100644 --- a/examples/hb1dae.jl +++ b/examples/hb1dae.jl @@ -33,9 +33,9 @@ function Jhb1dae!(res, y, ydot, a) return nothing end function Jhb1dae(t, y, ydot, a) - res = zeros(length(y),length(y)) - Jhb1dae!(res, y, ydot, a) - return res + jac = zeros(length(y),length(y)) + Jhb1dae!(jac, y, ydot, a) + return jac end diff --git a/src/rosw.jl b/src/rosw.jl index e95792cc1..04680c420 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -4,6 +4,8 @@ # Main references: # - Wanner & Hairer 1996 # - PETSc http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/TS/TSROSW.html + +using MatrixColorings export ode_rosw, ode_rosw_fixed, ode_rodas3 # TODO: @@ -24,6 +26,11 @@ export ode_rosw, ode_rosw_fixed, ode_rodas3 # The method used here uses transformed equations as done in PETSc. +rosw_poststep(xtrial, err, t, dt, steps) = (showcompact(t), print(", ") , + showcompact(dt), print(", "), + showcompact(err), print(", "), + println(steps)) + # Tableaus ########## @@ -204,7 +211,12 @@ function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S k = Array(Tx, S) # stage variables allocate!(k, x0, dof) ks = zeros(Exf,dof) # work vector for one k - jac_store = zeros(Exf,dof,dof) # Jacobian storage + if isa(jacobian, Tuple) + jac_store = jacobian[2] + jacobian = jacobian[1] + else + jac_store = zeros(Exf,dof,dof) # Jacobian storage + end u = zeros(Exf,dof) # work vector udot = zeros(Exf,dof) # work vector @@ -250,7 +262,9 @@ function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S end end err, newdt, timeout = stepsize_hw92!(dt, tdir, x, xtrial, xerr, order, timeout, - dof, abstol, reltol, maxstep, norm) + dof, abstol, reltol, maxstep, norm) + + rosw_poststep(xtrial, err, t, dt, steps) if err<=1.0 # accept step # diagnostics steps[1] +=1 @@ -300,6 +314,7 @@ function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S laststep = true # next step is the last, if it succeeds end elseif abs(newdt)0 no step size increase is allowed, timeout is # decremented in here. # - # Returns the error, newdt and the number of timeout-steps + # Returns the error, newdt and the number of timeout-steps. + # Updates xerr in-place with the relative error. # # TODO: # - allow component-wise reltol and abstol? From 4c5683a71d7d6cb89917f9c42f2974313748aaba Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Wed, 10 Jun 2015 13:32:01 +0200 Subject: [PATCH 13/15] Fixed bug in runge-kutta When intital time step overstepped tspan[end] then it didn't work correctly. --- src/ODE.jl | 3 ++- src/runge_kutta.jl | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/ODE.jl b/src/ODE.jl index 2c8287ad9..6738f12ed 100644 --- a/src/ODE.jl +++ b/src/ODE.jl @@ -62,6 +62,7 @@ Base.convert{Tnew<:Real}(::Type{Tnew}, tab::Tableau) = error("Define convert met # estimator for initial step based on book # "Solving Ordinary Differential Equations I" by Hairer et al., p.169 function hinit(F, x0, t0, tend, p, reltol, abstol) + # Returns first step, direction of integration and F evaluated at t0 tdir = sign(tend-t0) tdir==0 && error("Zero time span") tau = max(reltol*norm(x0, Inf), abstol) @@ -84,7 +85,7 @@ function hinit(F, x0, t0, tend, p, reltol, abstol) pow = -(2. + log10(max(d1, d2)))/(p + 1.) h1 = 10.^pow end - return tdir*min(100*h0, h1), tdir, f0 + return tdir*min(100*h0, h1, tdir*(tend-t0)), tdir, f0 end # isoutofdomain takes the state and returns true if state is outside diff --git a/src/runge_kutta.jl b/src/runge_kutta.jl index 043ee9064..b0dbb7399 100644 --- a/src/runge_kutta.jl +++ b/src/runge_kutta.jl @@ -286,7 +286,7 @@ function oderk_adapt{N,S}(fn, y0::AbstractVector, tspan, btab_::TableauRKExplici steps = [0,0] # [accepted, rejected] ## Integration loop - laststep = false + islaststep = abs(t+dt-tend)<=eps(tend) ? true : false timeout = 0 # for step-control iter = 2 # the index into tspan and ys while true @@ -307,7 +307,7 @@ function oderk_adapt{N,S}(fn, y0::AbstractVector, tspan, btab_::TableauRKExplici f1 = isFSAL(btab) ? ks[S] : fn(t+dt, ytrial) if points==:specified # interpolate onto given output points - while iter-1= tdir*tend dt = tend-t - laststep = true # next step is the last, if it succeeds + islaststep = true # next step is the last, if it succeeds end elseif abs(newdt) Date: Wed, 10 Jun 2015 13:34:26 +0200 Subject: [PATCH 14/15] Having a sparse Jacobian now works. The problem was that A_ldiv_B! does not update B in case A is a sparse lufact: https://github.com/JuliaLang/julia/issues/10787 Sparse coloring does not work yet. --- examples/bruss1d.jl | 202 ++++++++++++++++++++++++++++++++++++++++++++ examples/hb1dae.jl | 12 +-- src/rosw.jl | 84 ++++++++++-------- 3 files changed, 255 insertions(+), 43 deletions(-) create mode 100644 examples/bruss1d.jl diff --git a/examples/bruss1d.jl b/examples/bruss1d.jl new file mode 100644 index 000000000..4df7035eb --- /dev/null +++ b/examples/bruss1d.jl @@ -0,0 +1,202 @@ +using ODE,DASSL + +const T = Float64 # the datatype used, probably Float64 +Tarr = SparseMatrixCSC +const N = 500 # active gridpoints +const dof = 2N # degrees of freedom (boundary points are not free) +dx = 1/(N+1) +x = dx:dx:1-dx +dae = 0 # index of DAE, ==0 for ODE + +# parameters +alpha = 1/50 +const gamma = alpha/dx^2 + +# BC +const one_ = one(T) +const three = 3*one_ +ubc1() = one_ +ubc2() = one_ +vbc1() = three +vbc2() = three +#IC +u0(x) = 1 + 0.5*sin(2π*x) +v0(x) = 3 * ones(length(x)) + +inds(i) = (2i-1, 2i) +function fn!(res, y, dydt) + # the ode function res = f(y,dydt) + # + # Note the u and v are staggered: + # u = y[1:2:end], v=y[2:2:end] + + # setup for first step + i = 1 + iu, iv = inds(i) + ui0 = ubc1(); vi0 = vbc1() # BC + ui1 = y[iu]; vi1 = y[iv] + ui2 = y[iu+2]; vi2 = y[iv+2] + while i<=N + res[iu] = 1 + ui1^2*vi1 - 4*ui1 + gamma*(ui0 - 2*ui1 + ui2) - dydt[iu] + res[iv] = 3*ui1 - ui1^2*vi1 + gamma*(vi0 - 2*vi1 + vi2) - dydt[iv] + # setup for next step: + i += 1 + iu, iv = inds(i) + ui0 = ui1; vi0 = vi1 + ui1 = ui2; vi1 = vi2 + if i=1 + # du'(i-1)/du(i) + dfdy.nzval[ii] = gamma + ii +=1 + end + if iv-2>=1 + # dv'(i-1)/du(i)==0 + dfdy.nzval[ii] = 0 + ii +=1 + end + # du'(i)/du(i) + dfdy.nzval[ii] = 2*ui*vi - 4 -2*gamma - a + ii += 1 + # dv'(i)/du(i) + dfdy.nzval[ii] = 3 - 2*ui*vi + ii +=1 + if iu+2<=2N + # du'(i+1)/du(i) + dfdy.nzval[ii] = gamma + ii +=1 + end + + # iv: column belonging to d/dv(i) + if iv-2>=1 + # dv'(i-1)/dv(i) + dfdy.nzval[ii] = gamma + ii +=1 + end + # du'(i)/dv(i) + dfdy.nzval[ii] = ui^2 + ii +=1 + # dv'(i)/dv(i) + dfdy.nzval[ii] = -ui^2 -2*gamma - a + ii +=1 + if iu+2<=2N + # du'(i+1)/dv(i)==0 + dfdy.nzval[ii] = 0 + ii +=1 + end + if iv+2<=2N + # dv'(i+1)/dv(i) + dfdy.nzval[ii] = gamma + ii +=1 + end + end + return nothing +end +function jac!(;T_::Type=T, dof_=dof) + B = (ones(T_,dof_-2), ones(T_,dof_-1), ones(T_,dof_), ones(T_,dof_-1), ones(T_,dof_-2)) + spdiagm(B, (-2,-1,0,1,2)) +end +function jac(t, y, ydot, a) + J = jac!() + jac!(J, y, ydot, a) + return J +end +function jac_ex(t, y) + return jac(t, y, y, 0) +end + +refsol = T[0.9949197002317599, 3.0213845767604077, 0.9594350193986054, 3.0585989778165419, 0.9243010095428502, 3.0952478919989637, 0.8897959106772672, + 3.1310118289054731, 0.8561653620284367, 3.1656101198770159, 0.8236197147449046, 3.1988043370624344, 0.7923328094811884, 3.2303999530641514, + 0.7624421042573115, 3.2602463873623941, 0.7340499750795348, 3.2882356529108807, 0.7072259700779899, 3.3142998590079271, 0.6820097782458483, + 3.3384078449410937, 0.6584146743834650, 3.3605612157873943, 0.6364312187752559, 3.3807900316323134, 0.6160310186921587, 3.3991483695914764, + 0.5971703941198909, 3.4157099395342736, 0.5797938277687891, 3.4305638938070224, 0.5638371159206763, 3.4438109320334580, 0.5492301695479158, + 3.4555597666485198, 0.5358994429426996, 3.4659239846027008, 0.5237699892215797, 3.4750193162238476, 0.5127671585747183, 3.4829613034792271, + 0.5028179665048467, 3.4898633463634923, 0.4938521662914935, 3.4958350971335204, 0.4858030633656755, 3.5009811668111510, 0.4786081100251151, + 3.5054001059792705, 0.4722093177200750, 3.5091836216744015, 0.4665535216425440, 3.5124159935026285, 0.4615925290790646, 3.5151736544621075, + 0.4572831793403656, 3.5175249049438184, 0.4535873393501199, 3.5195297317024448, 0.4504718553589467, 3.5212397070273984, 0.4479084778719241, + 3.5226979467564341, 0.4458737738041973, 3.5239391090719634, 0.4443490371324889, 3.5249894191569453, 0.4433202068820853, 3.5258667077466495, + 0.4427777991494095, 3.5265804544017270, 0.4427168579654424, 3.5271318289682063, 0.4431369281018266, 3.5275137272135266, 0.4440420513508381, + 3.5277107990730161, 0.4454407863109616, 3.5276994703501980, 0.4473462502188303, 3.5274479611304068, 0.4497761798232572, 3.5269163066324394, + 0.4527530066369863, 3.5260563887768472, 0.4563039400688689, 3.5248119894251024, 0.4604610498812091, 3.5231188790654930, 0.4652613370907894, + 3.5209049576992761, 0.4707467798082714, 3.5180904678044698, 0.4769643375804777, 3.5145883024867057, 0.4839658945842979, 3.5103044351908528, + 0.4918081185812277, 3.5051385005173827, 0.5005522089940899, 3.4989845585737802, 0.5102635039989190, 3.4917320776245013, 0.5210109134090777, + 3.4832671712209993, 0.5328661417420937, 3.4734741260299615, 0.5459026646938675, 3.4622372546582585, 0.5601944229089820, 3.4494431032230182, + 0.5758142001453760, 3.4349830354873361, 0.5928316594749734, 3.4187562033108012, 0.6113110218368440, 3.4006728962523969, 0.6313083867734524, + 3.3806582409098729, 0.6528687160104193, 3.3586561928427350, 0.6760225267555723, 3.3346337311179157, 0.7007823726569721, 3.3085851288057930, + 0.7271392249346637, 3.2805361342349380, 0.7550589020044152, 3.2505478606008622, 0.7844787296769868, 3.2187201496972175, 0.8153046416214843, + 3.1851941538893653, 0.8474089465959840, 3.1501538739882800, 0.8806289904192589, 3.1138264039027113, 0.9147669230929857, 3.0764806689389470, + 0.9495907429372025, 3.0384245041548366, 0.9848367306701233] # reference sol from Hairer has 1.36 scd +refsolinds = collect(1:7:dof) # refsol is only at components 1:7:2N + + +ic = zeros(T,2N) +ic[1:2:2N] = u0(x) +ic[2:2:2N] = v0(x) +tspan = T[0.0, 10.0] # integration interval +tspan0 = T[0.0, 0.001] # integration interval + +#t,ydassl = dasslSolve(fn, ic, tspan, jacobian=jac) + +## ode23s +# tout, yout = ode23s(fn_ex, ic, tspan0, jacobian=jac_ex) +# @time tout, yout = ode23s(fn_ex, ic, tspan, jacobian=jac_ex) +# @show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) + +# ## DASSL +# t,ydassl = dasslSolve(fn, ic, tspan0, jacobian=jac) +# @time t,ydassl = dasslSolve(fn, ic, tspan, jacobian=jac) +# @show norm(abs(ydassl[end][refsolinds]-refsol)./refsol, Inf) + +## ROSW + +# # No jac works but is slower and gives much higher precision than +# # other methods. +# tout, yout = ode_rosw(fn!, ic, tspan0) +# @time tout, yout = ode_rosw(fn!, ic, tspan) +# @show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) + +# Analytic Jacobian +tout, yout = ode_rosw(fn!, ic, tspan0, jacobian=(jac!, jac!())) +@time tout, yout = ode_rosw(fn!, ic, tspan, jacobian=(jac!, jac!())) +@show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) + +# Numerical colored Jacobian +tout, yout = ode_rosw(fn!, ic, tspan0, jacobian=jac!()) +@time tout, yout = ode_rosw(fn!, ic, tspan, jacobian=jac!()) +@show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) + diff --git a/examples/hb1dae.jl b/examples/hb1dae.jl index 461b7e40d..e6902685f 100644 --- a/examples/hb1dae.jl +++ b/examples/hb1dae.jl @@ -54,12 +54,12 @@ yr = hcat(yrosw...); println("Relative difference between DASSL vs ROSW, no Jac:") println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) -println("Using fixed step ROSW") -tspan = t -t,yrosw = ode_rosw_fixed(hb1dae!, y0, tspan) -@time t,yrosw = ode_rosw_fixed(hb1dae!, y0, tspan) -println("Relative difference between DASSL vs fixed step, no Jac:") -println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) +# println("Using fixed step ROSW") +# tspan = t +# t,yrosw = ode_rosw_fixed(hb1dae!, y0, tspan) +# @time t,yrosw = ode_rosw_fixed(hb1dae!, y0, tspan) +# println("Relative difference between DASSL vs fixed step, no Jac:") +# println(abs(ydassl[end]-yrosw[end])./abs(ydassl[end])) # with Jacobian tspan = [0, 4e6] diff --git a/src/rosw.jl b/src/rosw.jl index 04680c420..bd36d3c34 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -26,10 +26,12 @@ export ode_rosw, ode_rosw_fixed, ode_rodas3 # The method used here uses transformed equations as done in PETSc. -rosw_poststep(xtrial, err, t, dt, steps) = (showcompact(t), print(", ") , - showcompact(dt), print(", "), - showcompact(err), print(", "), - println(steps)) +# rosw_poststep(xtrial, err, t, dt, steps) = (showcompact(t), print(", ") , +# showcompact(dt), print(", "), +# showcompact(err), print(", "), +# println(steps)) + +rosw_poststep(xtrial, err, t, dt, steps) = nothing # Tableaus ########## @@ -132,13 +134,13 @@ const bt_ros_rodas3 = TableauRosW(:ros_rodas3, (3,4), Rational{Int}, ################### # Fixed step solver ################### -ode_rosw_fixed(fn, x0, tspan; kwargs...) = oderosw_fixed(fn, x0, tspan, bt_ros34pw2, kwargs...) -function oderosw_fixed{N,S}(fn, x0::AbstractVector, tspan, +ode_rosw_fixed(fn!, x0, tspan; kwargs...) = oderosw_fixed(fn!, x0, tspan, bt_ros34pw2, kwargs...) +function oderosw_fixed{N,S}(fn!, x0::AbstractVector, tspan, btab::TableauRosW{N,S}; - jacobian=numerical_jacobian(fn, 1e-6, 1e-6) + jacobian=numerical_jacobian(fn!, 1e-6, 1e-6) ) # TODO: refactor with oderk_fixed - Et, Exf, Tx, btab = make_consistent_types(fn, x0, tspan, btab) + Et, Exf, Tx, btab = make_consistent_types(fn!, x0, tspan, btab) btab = tabletransform(btab) dof = length(x0) @@ -155,26 +157,26 @@ function oderosw_fixed{N,S}(fn, x0::AbstractVector, tspan, u = zeros(Exf,dof) # work vector udot = zeros(Exf,dof) # work vector - # allocate!(ks, x0, dof) # no need to allocate as fn is not in-place + # allocate!(ks, x0, dof) # no need to allocate as fn! is not in-place xtmp = similar(x0, Exf, dof) for i=1:length(tspan)-1 dt = tspan[i+1]-tspan[i] - rosw_step!(xs[i+1], fn, jacobian, xs[i], dt, dof, btab, + rosw_step!(xs[i+1], fn!, jacobian, xs[i], dt, dof, btab, k, jac_store, ks, u, udot) end return tspan, xs end -ode_rosw(fn, x0, tspan;kwargs...) = oderosw_adapt(fn, x0, tspan, bt_ros34pw2; kwargs...) -ode_rodas3(fn, x0, tspan;kwargs...) = oderosw_adapt(fn, x0, tspan, bt_ros_rodas3; kwargs...) -function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S}; +ode_rosw(fn!, x0, tspan;kwargs...) = oderosw_adapt(fn!, x0, tspan, bt_ros34pw2; kwargs...) +ode_rodas3(fn!, x0, tspan;kwargs...) = oderosw_adapt(fn!, x0, tspan, bt_ros_rodas3; kwargs...) +function oderosw_adapt{N,S}(fn!, x0::AbstractVector, tspan, btab::TableauRosW{N,S}; reltol = 1.0e-5, abstol = 1.0e-8, norm=Base.norm, minstep=abs(tspan[end] - tspan[1])/1e18, maxstep=abs(tspan[end] - tspan[1])/2.5, initstep=0., - jacobian=numerical_jacobian(fn, reltol, abstol) + jacobian=numerical_jacobian(fn!, reltol, abstol) # points=:all ) # TODO: refactor with oderk_adapt @@ -188,8 +190,8 @@ function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S points=:all end ## Figure types - fn_expl = (t,x)->(out=similar(x); fn(out, x, x*0); out) - Et, Exf, Tx, btab = make_consistent_types(fn, x0, tspan, btab) + fn_expl = (t,x)->(out=similar(x); fn!(out, x, x*0); out) + Et, Exf, Tx, btab = make_consistent_types(fn!, x0, tspan, btab) btab = tabletransform(btab) # parameters order = minimum(btab.order) @@ -211,10 +213,13 @@ function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S k = Array(Tx, S) # stage variables allocate!(k, x0, dof) ks = zeros(Exf,dof) # work vector for one k - if isa(jacobian, Tuple) + if isa(jacobian, Tuple) # Jacobian function and sparse storage provided jac_store = jacobian[2] jacobian = jacobian[1] - else + elseif isa(jacobian, SparseMatrixCSC) # Sparse storage provided, make colored Jacobian (TODO) + jac_store = jacobian + jacobian = numerical_jacobian(fn!, reltol, abstol) #, jac_store, jac_store) + else # nothing provided, make dense numerical jacobian jac_store = zeros(Exf,dof,dof) # Jacobian storage end u = zeros(Exf,dof) # work vector @@ -247,11 +252,11 @@ function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S steps = [0,0] # [accepted, rejected] ## Integration loop - laststep = false + islaststep = abs(t+dt-tend)<=eps(tend) ? true : false timeout = 0 # for step-control iter = 2 # the index into tspan and xs while true - rosw_step!(xtrial, fn, jacobian, x, dt, dof, btab, + rosw_step!(xtrial, fn!, jacobian, x, dt, dof, btab, k, jac_store, ks, u, udot) # Completion again for embedded method, see line 927 of # http://www.mcs.anl.gov/petsc/petsc-current/src/ts/impls/rosw/rosw.c.html#TSROSW @@ -276,13 +281,13 @@ function oderosw_adapt{N,S}(fn, x0::AbstractVector, tspan, btab::TableauRosW{N,S # f1 = fn_expl(t+dt, xtrial) if points==:specified # interpolate onto given output points - while iter-1= tdir*tend dt = tend-t - laststep = true # next step is the last, if it succeeds + islaststep = true # next step is the last, if it succeeds end elseif abs(newdt) Date: Wed, 10 Jun 2015 17:10:16 +0200 Subject: [PATCH 15/15] finally got it working [ci skip] --- examples/bruss1d.jl | 33 +++++++------- src/rosw.jl | 104 ++++++++++++++++++++++---------------------- 2 files changed, 71 insertions(+), 66 deletions(-) diff --git a/examples/bruss1d.jl b/examples/bruss1d.jl index 4df7035eb..582005496 100644 --- a/examples/bruss1d.jl +++ b/examples/bruss1d.jl @@ -169,34 +169,37 @@ ic[1:2:2N] = u0(x) ic[2:2:2N] = v0(x) tspan = T[0.0, 10.0] # integration interval tspan0 = T[0.0, 0.001] # integration interval +tspan1 = T[0.0, 0.3] # integration interval #t,ydassl = dasslSolve(fn, ic, tspan, jacobian=jac) ## ode23s -# tout, yout = ode23s(fn_ex, ic, tspan0, jacobian=jac_ex) -# @time tout, yout = ode23s(fn_ex, ic, tspan, jacobian=jac_ex) -# @show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) +tout, yout = ode23s(fn_ex, ic, tspan0, jacobian=jac_ex) +@time tout, yout = ode23s(fn_ex, ic, tspan, jacobian=jac_ex) +@show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) # ## DASSL -# t,ydassl = dasslSolve(fn, ic, tspan0, jacobian=jac) -# @time t,ydassl = dasslSolve(fn, ic, tspan, jacobian=jac) -# @show norm(abs(ydassl[end][refsolinds]-refsol)./refsol, Inf) +t,ydassl = dasslSolve(fn, ic, tspan0, jacobian=jac) +@time t,ydassl = dasslSolve(fn, ic, tspan, jacobian=jac) +@show norm(abs(ydassl[end][refsolinds]-refsol)./refsol, Inf) ## ROSW +# for some reason the abstol and reltol need to be way lower to reach +# the same precision. Presumably an error in the step control. +abstol, reltol = 1e-2, 1e-3 +# No jac +tout, yout = ode_rosw(fn!, ic, tspan0) +@time tout, yout = ode_rosw(fn!, ic, tspan, abstol=abstol, reltol=reltol); +@show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) -# # No jac works but is slower and gives much higher precision than -# # other methods. -# tout, yout = ode_rosw(fn!, ic, tspan0) -# @time tout, yout = ode_rosw(fn!, ic, tspan) -# @show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) - -# Analytic Jacobian +# Analytic Jacobian, same as previous tout, yout = ode_rosw(fn!, ic, tspan0, jacobian=(jac!, jac!())) -@time tout, yout = ode_rosw(fn!, ic, tspan, jacobian=(jac!, jac!())) +@time tout, yout = ode_rosw(fn!, ic, tspan, jacobian=(jac!, jac!()), abstol=abstol, reltol=reltol); @show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) # Numerical colored Jacobian tout, yout = ode_rosw(fn!, ic, tspan0, jacobian=jac!()) -@time tout, yout = ode_rosw(fn!, ic, tspan, jacobian=jac!()) +@time tout, yout = ode_rosw(fn!, ic, tspan, jacobian=jac!(), abstol=abstol, reltol=reltol); @show norm(abs(yout[end][refsolinds]-refsol)./refsol, Inf) +nothing diff --git a/src/rosw.jl b/src/rosw.jl index bd36d3c34..fa4bd1b7b 100644 --- a/src/rosw.jl +++ b/src/rosw.jl @@ -213,12 +213,12 @@ function oderosw_adapt{N,S}(fn!, x0::AbstractVector, tspan, btab::TableauRosW{N, k = Array(Tx, S) # stage variables allocate!(k, x0, dof) ks = zeros(Exf,dof) # work vector for one k - if isa(jacobian, Tuple) # Jacobian function and sparse storage provided + if isa(jacobian, Tuple) # Jacobian function and sparse pattern/storage provided jac_store = jacobian[2] jacobian = jacobian[1] elseif isa(jacobian, SparseMatrixCSC) # Sparse storage provided, make colored Jacobian (TODO) jac_store = jacobian - jacobian = numerical_jacobian(fn!, reltol, abstol) #, jac_store, jac_store) + jacobian = numerical_jacobian(fn!, reltol, abstol, jac_store) else # nothing provided, make dense numerical jacobian jac_store = zeros(Exf,dof,dof) # Jacobian storage end @@ -352,7 +352,6 @@ function rosw_step!{N,S}(xtrial, g!, gprime!, x, dt, dof, btab::TableauRosW_T{N, # TODO: add option to do more Newton iterations k[s] = A_ldiv_B!(jac, k[s]) # TODO: see whether this is impacted by https://github.com/JuliaLang/julia/issues/10787 # and https://github.com/JuliaLang/julia/issues/11325 -# k[s][:] = jac_store\k[s] for d=1:dof k[s][d] = ks[d]-k[s][d] end @@ -363,7 +362,6 @@ function rosw_step!{N,S}(xtrial, g!, gprime!, x, dt, dof, btab::TableauRosW_T{N, # k[S][:] = ks - jac\k[S] # TODO: add option to do more Newton iterations k[S] = A_ldiv_B!(jac, k[S]) -# k[S][:] = jac_store\k[S] for d=1:dof k[S][d] = ks[d]-k[S][d] end @@ -415,7 +413,7 @@ function hprime!(res, xi, gprime!, u, udot, btab, dt) # The res will hold part of the LU factorization. However use the # returned LU-type. gprime!(res, u, udot, 1./(dt*btab.γii)) - if true # issparse(res) + if issparse(res) # For issues with sparse https://github.com/JuliaLang/julia/issues/7488 # Basically lufact! destroys res and it cannot be used anymore later. return lufact(res) @@ -428,79 +426,83 @@ end # generate a function that computes approximate jacobian using forward # finite differences function numerical_jacobian(fn!, reltol, abstol) - function numjac!(jac, y, dy, a) + function numjac!(jac, y, ydot, a) # Numerical Jacobian, finite differences, no coloring. - ep = eps(1e6) # this is the machine epsilon # TODO: better value? + ep = eps(1.0) # this is the machine epsilon # TODO: better value? h = 1/a # h ~ 1/a wt = reltol*abs(y).+abstol # Delta for approximation of Jacobian. I removed the - # sign(h_next*dy0) from the definition of delta because it was - # causing trouble when dy0==0 (which happens for ord==1) - edelta = max(abs(y),abs(h*dy),wt)*sqrt(ep) - - tmp = similar(y) - tmp1 = similar(y) - tmp2 = similar(y) + # sign(h_next*ydot0) from the definition of delta because it was + # causing trouble when ydot0==0 (which happens for ord==1) + edelta = max(abs(y),abs(h*ydot),wt)*sqrt(ep) + f0 = similar(y) - fn!(f0, y, dy) + fn!(f0, y, ydot) + + dy = deepcopy(y) + dydot = deepcopy(ydot) + f1 = similar(y) for i=1:length(y) - copy!(tmp1, y) - copy!(tmp2, dy) - tmp1[i] += edelta[i] - tmp2[i] += a*edelta[i] - fn!(tmp, tmp1, tmp2) - if false #isa(jac, SparseMatrixCSC) - for nz in nzrange(jac,i) - nonzeros(jac)[nz] = tmpy[rowvals(jac)[nz]] - end - else - for j=1:length(y) - jac[j,i] = (tmp[j]-f0[j])/edelta[i] - end + dy[i] += edelta[i] + dydot[i] += a*edelta[i] + fn!(f1, dy, dydot) + for j=1:length(y) + jac[j,i] = (f1[j]-f0[j])/edelta[i] end + # remove delta again + dy[i] -= edelta[i] + dydot[i] -= a*edelta[i] end return nothing end end -function numerical_jacobian(fn!, reltol, abstol, JPattern1::SparseMatrixCSC, JPattern2::SparseMatrixCSC) - # JPattern1: sparsity pattern of dfn/dy - # JPattern2: sparsity pattern of dfn/dydot - - # S1 = color_jac(JPattern1) # seed matrix - # S2 = color_jac(JPattern2) # seed matrix - # nc1 = size(S1,2) # number of colors - # nc2 = size(S2,2) # number of colors +if VERSION < v"0.4.0-dev" + # TODO: remove these two for 0.4 + rowvals(S::SparseMatrixCSC) = S.rowval + nzrange(S::SparseMatrixCSC, col::Integer) = S.colptr[col]:(S.colptr[col+1]-1) +end +function numerical_jacobian(fn!, reltol, abstol, JPattern::SparseMatrixCSC) + # JPattern: sparsity pattern of dfn/dy + dfn/dydot + # Typically this matrix will be used for storage of the + # Jacobian as well. # TODO: there are probably more clever ways to do this: - S = color_jac(JPattern1+JPattern2) + S = color_jac(JPattern) nc = size(S,2) function numjac_c!(jac, y, ydot, a) # updates jac in-place - ep = eps(1e6) # this is the machine epsilon # TODO: better value? + ep = eps(1.0) # this is the machine epsilon # TODO: better value? h = 1/a # h ~ 1/a wt = reltol*abs(y).+abstol edelta = max(abs(y),abs(h*ydot),wt)*sqrt(ep) + a_edelta = a*edelta + + f0 = similar(y) + fn!(f0, y, ydot) - dof = length(y) - res0 = similar(y) - fn!(res0, y, ydot) dy = deepcopy(y) # y+dy - dydot = deepcopy(y) # y+dydot - res1 = similar(y) # fn(y+dy, ydot) - res2 = similar(y) # fn(y, ydot+dydot) + dydot = deepcopy(ydot) # y+dydot + f1 = similar(y) # fn(y+dy, ydot) for c in 1:nc add_delta!(dy, S, c, edelta) # adds a delta to all indices of color c - add_delta!(dydot, S, c, edelta) # adds a delta to all indices of color c - fn!(res1, dy, ydot) - fn!(res2, y, dydot) - for j=1:dof - res1[j] = ( res1[j]-res0[j] + a*(res2[j]-res0[j]) )/edelta[j] + add_delta!(dydot, S, c, a_edelta) # adds a delta to all indices of color c + fn!(f1, dy, dydot) + for j=1:length(y) + f1[j] = f1[j]-f0[j] end - recover_jac!(jac, res1, S, c) # recovers the columns of jac which correspond c + recover_jac!(jac, f1, S, c) # recovers the columns of jac which correspond c remove_delta!(dy, S, c, edelta) # remove delta again - remove_delta!(dydot, S, c, edelta) # remove delta again + remove_delta!(dydot, S, c, a_edelta) # remove delta again + end + # do the division by edelta + nz = nonzeros(jac) + for j =1:size(jac,2) + ed = edelta[j] + for i in nzrange(jac,j) + nz[i] /= ed + end end return nothing end