diff --git a/Project.toml b/Project.toml index 8977396..5882abd 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +Scratch = "6c6a2e73-6563-6170-7368-637461726353" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" @@ -20,9 +21,24 @@ RasterDataSourcesProjExt = "Proj" [compat] ASCIIrasters = "0.1" +Dates = "1" HTTP = "0.8, 0.9, 1" JSON = "0.21" Proj = "1" +Pkg = "1" +SafeTestsets = "0.0.1" +Scratch = "1" URIs = "1" ZipFile = "0.9, 0.10" -julia = "1.9" \ No newline at end of file +julia = "1.9" + +[extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Proj = "c94c279d-25a6-4763-9509-64d165bea63e" +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Aqua", "Dates", "Pkg", "Proj", "SafeTestsets", "Test"] diff --git a/README.md b/README.md index 29a246e..ef09fc0 100644 --- a/README.md +++ b/README.md @@ -57,14 +57,24 @@ Install as usual with: ] add RasterDataSources ``` -To download data you will need to specify a folder to put it in. You can do this -by assigning the environment variable `RASTERDATASOURCES_PATH`: +### Storage Configuration + +RasterDataSources.jl handles data storage for you. By default, it will create a +persistent scratch directory to store downloaded raster data. +However, raster data may be 100s of GB, or more. So make sure there is room in your home directory. + +#### Custom Storage Location + +To put data in a custom location, set `RASTERDATASOURCES_PATH` in your +environment, usually in your `[juliadir]/config/startup.jl` file: ```julia -ENV["RASTERDATASOURCES_PATH"] = "/home/user/Data/" +ENV["RASTERDATASOURCES_PATH"] = "/path/to/your/data" ``` -This can be put in your `startup.jl` file or the system environment. +#### Finding Your Storage Location + +`RasterDataSources.rasterpath()` will return the current data storage path. -RasterDataSources was based on code from the `SimpleSDMDataSoures.jl` package by Timothée Poisot. +_RasterDataSources was originally based on code from the `SimpleSDMDataSoures.jl` package by Timothée Poisot._ diff --git a/src/RasterDataSources.jl b/src/RasterDataSources.jl index 667148b..656e521 100644 --- a/src/RasterDataSources.jl +++ b/src/RasterDataSources.jl @@ -10,7 +10,8 @@ using Dates, URIs, ZipFile, ASCIIrasters, - DelimitedFiles + DelimitedFiles, + Scratch import JSON.Parser as JP diff --git a/src/alwb/alwb.jl b/src/alwb/alwb.jl index f99274f..31e225c 100644 --- a/src/alwb/alwb.jl +++ b/src/alwb/alwb.jl @@ -67,7 +67,7 @@ This will return the file containing annual averages, including your date: ```julia julia> getraster(ALWB{Values,Year}, :ss_pct; date=Date(2001, 2)) -"/your/RASTERDATASOURCES_PATH/ALWB/values/month/ss_pct.nc" +"/path/to/storage/ALWB/values/month/ss_pct.nc" ``` Returns the filepath/s of the downloaded or pre-existing files. diff --git a/src/modis/products.jl b/src/modis/products.jl index 162be68..dd5907e 100644 --- a/src/modis/products.jl +++ b/src/modis/products.jl @@ -17,7 +17,7 @@ end """ Lists available layers for a given MODIS Product -Looks in `joinpath(ENV["RASTERDATASOURCES_PATH"]/MODIS/layers` for +Looks in the storage directory under `MODIS/layers` for a file with the right name. If not found, sends a request to the server to get the list. diff --git a/src/shared.jl b/src/shared.jl index 450ca0b..423889e 100644 --- a/src/shared.jl +++ b/src/shared.jl @@ -1,3 +1,6 @@ +const RDS_PATH_STRING = "ENV[\"RASTERDATASOURCES_PATH\"] = \"/path/to/your/data\"" +const STARTUP_PATH_STRING = "`[julia folder]/config/startup.jl`" + # Vector layers are allowed, but converted to `Tuple` immediatedly. function getraster(T::Type, layers::AbstractArray; kw...) getraster(T, (layers...,); kw...) @@ -62,6 +65,7 @@ function _maybe_download(uri::URI, filepath, headers = []) if !isfile(filepath) mkpath(dirname(filepath)) @info "Starting download for $uri" + try HTTP.download(string(uri), filepath, headers) catch e @@ -73,11 +77,58 @@ function _maybe_download(uri::URI, filepath, headers = []) filepath end +""" + rasterpath() + +Returns the absolute path to the directory where raster data is stored. + +The storage location is determined using the following priority order: +1. If the `RASTERDATASOURCES_PATH` environment variable is set and points to a valid directory, that path is used +2. If no environment variable is set, a persistent scratch directory is automatically created using Scratch.jl + +The scratch directory persists across Julia sessions and package updates, ensuring downloaded data is not lost. +If scratch directory creation fails, an error is thrown with instructions to manually set the environment variable. + +# Examples +```julia +# With environment variable set +$RDS_PATH_STRING +rasterpath() # Returns "/path/to/your/data" + +# Without environment variable (automatic scratch directory) +rasterpath() # Returns something like "/Users/username/.julia/scratchspaces/12345.../raster_data" +``` +""" function rasterpath() - if haskey(ENV, "RASTERDATASOURCES_PATH") && isdir(ENV["RASTERDATASOURCES_PATH"]) - ENV["RASTERDATASOURCES_PATH"] - else - error("You must set `ENV[\"RASTERDATASOURCES_PATH\"]` to a path in your system") + # Priority 1: Use environment variable if set and valid + if haskey(ENV, "RASTERDATASOURCES_PATH") + path = ENV["RASTERDATASOURCES_PATH"] + isdir(path) || error("Your RASTERDATASOURCES_PATH is not a directory: $path") + return path + end + + # Priority 2: Use scratch directory + try + scratch_dir = @get_scratch!("raster_data") + if isempty(readdir(scratch_dir)) # If this is the first use, print info in the REPL + @info """ + Created scratch directory for raster data storage: + $scratch_dir. + Make sure there is adequate space, some datasets are 100GB+. + For a custom location set $RDS_PATH_STRING + in your $STARTUP_PATH_STRING file." + """ maxlog=1 + end + return scratch_dir + catch e + error( + """ + Failed to create scratch directory for raster data storage. + Please set $RDS_PATH_STRING manually + in your $STARTUP_PATH_STRING. + Error: $e + """ + ) end end diff --git a/test/Project.toml b/test/Project.toml deleted file mode 100644 index 7621b6a..0000000 --- a/test/Project.toml +++ /dev/null @@ -1,8 +0,0 @@ -[deps] -Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" -Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -Proj = "c94c279d-25a6-4763-9509-64d165bea63e" -SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" diff --git a/test/awap.jl b/test/awap.jl index 851b090..45d3094 100644 --- a/test/awap.jl +++ b/test/awap.jl @@ -4,14 +4,14 @@ using RasterDataSources: rastername, rasterpath, zipurl, zipname, zippath @testset "AWAP" begin using RasterDataSources: rastername, zipurl, zipname, zippath - raster_file = joinpath(ENV["RASTERDATASOURCES_PATH"], "AWAP", "vprp", "vprph09", "20010101.grid") + raster_file = joinpath(rasterpath(), "AWAP", "vprp", "vprph09", "20010101.grid") @test rasterpath(AWAP, :vprpress09; date=Date(2001, 1)) == raster_file @test rastername(AWAP, :vprpress09; date=Date(2001, 1)) == "20010101.grid" @test zipurl(AWAP, :vprpress09; date=Date(2001, 1)) == URI(scheme="http", host="www.bom.gov.au", path="/web03/ncc/www/awap/vprp/vprph09/daily/grid/0.05/history/nat/2001010120010101.grid.Z") @test zippath(AWAP, :vprpress09; date=Date(2001, 1)) == - joinpath(ENV["RASTERDATASOURCES_PATH"], "AWAP", "vprp", "vprph09", "20010101.grid.Z") + joinpath(rasterpath(), "AWAP", "vprp", "vprph09", "20010101.grid.Z") @test zipname(AWAP, :vprpress09; date=Date(2001, 1)) == "20010101.grid.Z" if Sys.islinux() diff --git a/test/runtests.jl b/test/runtests.jl index 88e197c..bb5f9cf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,3 +30,4 @@ end # @time @safetestset "modis utilities" begin include("modis-utilities.jl") end @time @safetestset "modis product info" begin include("modis-products.jl") end # @time @safetestset "modis interface" begin include("modis-interface.jl") end +@time @safetestset "storage path resolution" begin include("storage-path.jl") end diff --git a/test/storage-path.jl b/test/storage-path.jl new file mode 100644 index 0000000..ad50ca1 --- /dev/null +++ b/test/storage-path.jl @@ -0,0 +1,206 @@ +using Test, RasterDataSources, Scratch +using RasterDataSources: rasterpath +import Scratch: @get_scratch! + +@testset "Storage Path Resolution" begin + + @testset "rasterpath() with RASTERDATASOURCES_PATH set to valid directory" begin + # Create a temporary directory for testing + temp_dir = mktempdir() + + try + # Set environment variable to valid directory + ENV["RASTERDATASOURCES_PATH"] = temp_dir + + # Test that rasterpath() returns the environment variable path + @test rasterpath() == temp_dir + @test isdir(rasterpath()) + + finally + # Clean up + delete!(ENV, "RASTERDATASOURCES_PATH") + rm(temp_dir, recursive=true, force=true) + end + end + + @testset "rasterpath() with RASTERDATASOURCES_PATH unset (scratch directory creation)" begin + # Ensure environment variable is not set + if haskey(ENV, "RASTERDATASOURCES_PATH") + old_path = ENV["RASTERDATASOURCES_PATH"] + delete!(ENV, "RASTERDATASOURCES_PATH") + else + old_path = nothing + end + + try + # Test that rasterpath() creates and returns scratch directory + scratch_path = rasterpath() + + @test isa(scratch_path, String) + @test isdir(scratch_path) + @test isabspath(scratch_path) + + # Verify it's actually a scratch directory by checking it contains expected patterns + # Scratch directories typically contain package UUID or similar identifiers + @test occursin("raster_data", scratch_path) || occursin("RasterDataSources", scratch_path) + + # Test that subsequent calls return the same path + @test rasterpath() == scratch_path + + finally + # Restore environment variable if it existed + if old_path !== nothing + ENV["RASTERDATASOURCES_PATH"] = old_path + end + end + end + + @testset "rasterpath() with RASTERDATASOURCES_PATH set to invalid directory" begin + # Set environment variable to non-existent directory + invalid_path = "/this/path/does/not/exist/$(rand(UInt32))" + ENV["RASTERDATASOURCES_PATH"] = invalid_path + + try + # Should fall back to scratch directory when env var points to invalid path + scratch_path = rasterpath() + + @test isa(scratch_path, String) + @test isdir(scratch_path) + @test scratch_path != invalid_path + @test isabspath(scratch_path) + + finally + delete!(ENV, "RASTERDATASOURCES_PATH") + end + end + + @testset "get_raster_storage_path() function consistency" begin + # This test assumes get_raster_storage_path() will be implemented + # Skip if function doesn't exist yet + if isdefined(RasterDataSources, :get_raster_storage_path) + # Test with environment variable set + temp_dir = mktempdir() + try + ENV["RASTERDATASOURCES_PATH"] = temp_dir + + @test RasterDataSources.get_raster_storage_path() == rasterpath() + @test RasterDataSources.get_raster_storage_path() == temp_dir + + finally + delete!(ENV, "RASTERDATASOURCES_PATH") + rm(temp_dir, recursive=true, force=true) + end + + # Test with scratch directory + if haskey(ENV, "RASTERDATASOURCES_PATH") + old_path = ENV["RASTERDATASOURCES_PATH"] + delete!(ENV, "RASTERDATASOURCES_PATH") + else + old_path = nothing + end + + try + @test RasterDataSources.get_raster_storage_path() == rasterpath() + @test isdir(RasterDataSources.get_raster_storage_path()) + + finally + if old_path !== nothing + ENV["RASTERDATASOURCES_PATH"] = old_path + end + end + else + @test_skip "get_raster_storage_path() function not yet implemented" + end + end + + @testset "Mock Scratch.jl functions to test failure scenarios" begin + # Save original environment state + original_env = get(ENV, "RASTERDATASOURCES_PATH", nothing) + if haskey(ENV, "RASTERDATASOURCES_PATH") + delete!(ENV, "RASTERDATASOURCES_PATH") + end + + try + # Test scratch directory creation failure by mocking @get_scratch! + # This is tricky to test directly since @get_scratch! is a macro + # Instead, we'll test the error handling path by temporarily making + # the scratch directory inaccessible + + # First, get a valid scratch directory + scratch_path = rasterpath() + @test isdir(scratch_path) + + # The error scenario is difficult to mock without modifying the source + # So we'll test that the function handles the success case properly + # and document that failure testing would require dependency injection + # Note: rasterpath() may print info messages, so we just test it doesn't error + result = rasterpath() + @test isa(result, String) + + finally + # Restore original environment + if original_env !== nothing + ENV["RASTERDATASOURCES_PATH"] = original_env + end + end + end + + @testset "Storage path switching behavior" begin + # Test switching between environment variable and scratch directory + temp_dir = mktempdir() + + try + # Start with scratch directory + if haskey(ENV, "RASTERDATASOURCES_PATH") + old_path = ENV["RASTERDATASOURCES_PATH"] + delete!(ENV, "RASTERDATASOURCES_PATH") + else + old_path = nothing + end + + scratch_path = rasterpath() + @test isdir(scratch_path) + + # Switch to environment variable + ENV["RASTERDATASOURCES_PATH"] = temp_dir + env_path = rasterpath() + @test env_path == temp_dir + @test env_path != scratch_path + + # Switch back to scratch directory + delete!(ENV, "RASTERDATASOURCES_PATH") + back_to_scratch = rasterpath() + @test back_to_scratch == scratch_path + @test isdir(back_to_scratch) + + # Restore original state + if old_path !== nothing + ENV["RASTERDATASOURCES_PATH"] = old_path + end + + finally + rm(temp_dir, recursive=true, force=true) + end + end + + @testset "Path properties and validation" begin + # Test various properties of returned paths + path = rasterpath() + + @test isa(path, String) + @test !isempty(path) + @test isdir(path) + @test isabspath(path) + + # Test that the path is writable + test_file = joinpath(path, "test_write_$(rand(UInt32)).txt") + try + write(test_file, "test") + @test isfile(test_file) + @test read(test_file, String) == "test" + finally + rm(test_file, force=true) + end + end + +end \ No newline at end of file