From 51aff55c495fc37941cd02135ca4d8a6d6dbbbdd Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 17 Sep 2025 22:16:33 +0200 Subject: [PATCH 1/6] Update documentation for automatic storage functionality - Update README.md to explain automatic storage using Scratch.jl - Make RASTERDATASOURCES_PATH optional in installation instructions - Add comprehensive docstring to rasterpath() function - Update examples in ALWB and MODIS documentation to be more generic - Remove references to get_raster_storage_path() function per plan changes This addresses task 7 from the default-scratch-storage spec. --- README.md | 24 +++++++++++++++++++++--- src/alwb/alwb.jl | 2 +- src/modis/products.jl | 2 +- src/shared.jl | 37 ++++++++++++++++++++++++++++++++++--- 4 files changed, 57 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 29a246e..4556932 100644 --- a/README.md +++ b/README.md @@ -57,14 +57,32 @@ Install as usual with: ] add RasterDataSources ``` -To download data you will need to specify a folder to put it in. You can do this -by assigning the environment variable `RASTERDATASOURCES_PATH`: +### Storage Configuration + +RasterDataSources.jl automatically handles data storage for you. By default, it will create a persistent scratch directory to store downloaded raster data. No manual configuration is required to get started. + +#### Automatic Storage (Recommended) + +When you first use RasterDataSources.jl, it will automatically create a scratch directory using Julia's Scratch.jl package. This directory persists across Julia sessions and package updates, so your downloaded data won't be lost. + +```julia +julia> using RasterDataSources +julia> getraster(WorldClim{Climate}, :wind; month=1) # Automatically uses scratch storage +``` + +#### Custom Storage Location (Optional) + +If you prefer to specify your own storage location, you can set the `RASTERDATASOURCES_PATH` environment variable: ```julia ENV["RASTERDATASOURCES_PATH"] = "/home/user/Data/" ``` -This can be put in your `startup.jl` file or the system environment. +This can be put in your `startup.jl` file or the system environment. When this variable is set, RasterDataSources.jl will use your specified directory instead of the automatic scratch directory. + +#### Finding Your Storage Location + +The storage location is managed internally by RasterDataSources.jl. When using automatic storage, the exact path is handled by Julia's Scratch.jl package and will be in your system's scratch directory. RasterDataSources was based on code from the `SimpleSDMDataSoures.jl` package by Timothée Poisot. diff --git a/src/alwb/alwb.jl b/src/alwb/alwb.jl index f99274f..31e225c 100644 --- a/src/alwb/alwb.jl +++ b/src/alwb/alwb.jl @@ -67,7 +67,7 @@ This will return the file containing annual averages, including your date: ```julia julia> getraster(ALWB{Values,Year}, :ss_pct; date=Date(2001, 2)) -"/your/RASTERDATASOURCES_PATH/ALWB/values/month/ss_pct.nc" +"/path/to/storage/ALWB/values/month/ss_pct.nc" ``` Returns the filepath/s of the downloaded or pre-existing files. diff --git a/src/modis/products.jl b/src/modis/products.jl index 162be68..dd5907e 100644 --- a/src/modis/products.jl +++ b/src/modis/products.jl @@ -17,7 +17,7 @@ end """ Lists available layers for a given MODIS Product -Looks in `joinpath(ENV["RASTERDATASOURCES_PATH"]/MODIS/layers` for +Looks in the storage directory under `MODIS/layers` for a file with the right name. If not found, sends a request to the server to get the list. diff --git a/src/shared.jl b/src/shared.jl index 450ca0b..44a4f3e 100644 --- a/src/shared.jl +++ b/src/shared.jl @@ -73,11 +73,42 @@ function _maybe_download(uri::URI, filepath, headers = []) filepath end +""" + rasterpath() + +Returns the absolute path to the directory where raster data is stored. + +The storage location is determined using the following priority order: +1. If the `RASTERDATASOURCES_PATH` environment variable is set and points to a valid directory, that path is used +2. If no environment variable is set, a persistent scratch directory is automatically created using Scratch.jl + +The scratch directory persists across Julia sessions and package updates, ensuring downloaded data is not lost. +If scratch directory creation fails, an error is thrown with instructions to manually set the environment variable. + +# Examples +```julia +# With environment variable set +ENV["RASTERDATASOURCES_PATH"] = "/path/to/data" +rasterpath() # Returns "/path/to/data" + +# Without environment variable (automatic scratch directory) +rasterpath() # Returns something like "/Users/username/.julia/scratchspaces/12345.../raster_data" +``` +""" function rasterpath() + # Priority 1: Use environment variable if set and valid if haskey(ENV, "RASTERDATASOURCES_PATH") && isdir(ENV["RASTERDATASOURCES_PATH"]) - ENV["RASTERDATASOURCES_PATH"] - else - error("You must set `ENV[\"RASTERDATASOURCES_PATH\"]` to a path in your system") + return ENV["RASTERDATASOURCES_PATH"] + end + + # Priority 2: Use scratch directory + try + scratch_dir = @get_scratch!("raster_data") + @debug "Using scratch directory for raster data storage: $scratch_dir" + return scratch_dir + catch e + error("Failed to create scratch directory for raster data storage. " * + "Please set ENV[\"RASTERDATASOURCES_PATH\"] manually. Error: $e") end end From 5eb089498d903f3af7715ea38b68ab68a3d05e2c Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Thu, 18 Sep 2025 11:51:11 +0200 Subject: [PATCH 2/6] finish functionality --- src/RasterDataSources.jl | 3 +- test/awap.jl | 4 +- test/runtests.jl | 1 + test/storage-path.jl | 206 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 test/storage-path.jl diff --git a/src/RasterDataSources.jl b/src/RasterDataSources.jl index 667148b..656e521 100644 --- a/src/RasterDataSources.jl +++ b/src/RasterDataSources.jl @@ -10,7 +10,8 @@ using Dates, URIs, ZipFile, ASCIIrasters, - DelimitedFiles + DelimitedFiles, + Scratch import JSON.Parser as JP diff --git a/test/awap.jl b/test/awap.jl index 851b090..45d3094 100644 --- a/test/awap.jl +++ b/test/awap.jl @@ -4,14 +4,14 @@ using RasterDataSources: rastername, rasterpath, zipurl, zipname, zippath @testset "AWAP" begin using RasterDataSources: rastername, zipurl, zipname, zippath - raster_file = joinpath(ENV["RASTERDATASOURCES_PATH"], "AWAP", "vprp", "vprph09", "20010101.grid") + raster_file = joinpath(rasterpath(), "AWAP", "vprp", "vprph09", "20010101.grid") @test rasterpath(AWAP, :vprpress09; date=Date(2001, 1)) == raster_file @test rastername(AWAP, :vprpress09; date=Date(2001, 1)) == "20010101.grid" @test zipurl(AWAP, :vprpress09; date=Date(2001, 1)) == URI(scheme="http", host="www.bom.gov.au", path="/web03/ncc/www/awap/vprp/vprph09/daily/grid/0.05/history/nat/2001010120010101.grid.Z") @test zippath(AWAP, :vprpress09; date=Date(2001, 1)) == - joinpath(ENV["RASTERDATASOURCES_PATH"], "AWAP", "vprp", "vprph09", "20010101.grid.Z") + joinpath(rasterpath(), "AWAP", "vprp", "vprph09", "20010101.grid.Z") @test zipname(AWAP, :vprpress09; date=Date(2001, 1)) == "20010101.grid.Z" if Sys.islinux() diff --git a/test/runtests.jl b/test/runtests.jl index 88e197c..bb5f9cf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,3 +30,4 @@ end # @time @safetestset "modis utilities" begin include("modis-utilities.jl") end @time @safetestset "modis product info" begin include("modis-products.jl") end # @time @safetestset "modis interface" begin include("modis-interface.jl") end +@time @safetestset "storage path resolution" begin include("storage-path.jl") end diff --git a/test/storage-path.jl b/test/storage-path.jl new file mode 100644 index 0000000..ad50ca1 --- /dev/null +++ b/test/storage-path.jl @@ -0,0 +1,206 @@ +using Test, RasterDataSources, Scratch +using RasterDataSources: rasterpath +import Scratch: @get_scratch! + +@testset "Storage Path Resolution" begin + + @testset "rasterpath() with RASTERDATASOURCES_PATH set to valid directory" begin + # Create a temporary directory for testing + temp_dir = mktempdir() + + try + # Set environment variable to valid directory + ENV["RASTERDATASOURCES_PATH"] = temp_dir + + # Test that rasterpath() returns the environment variable path + @test rasterpath() == temp_dir + @test isdir(rasterpath()) + + finally + # Clean up + delete!(ENV, "RASTERDATASOURCES_PATH") + rm(temp_dir, recursive=true, force=true) + end + end + + @testset "rasterpath() with RASTERDATASOURCES_PATH unset (scratch directory creation)" begin + # Ensure environment variable is not set + if haskey(ENV, "RASTERDATASOURCES_PATH") + old_path = ENV["RASTERDATASOURCES_PATH"] + delete!(ENV, "RASTERDATASOURCES_PATH") + else + old_path = nothing + end + + try + # Test that rasterpath() creates and returns scratch directory + scratch_path = rasterpath() + + @test isa(scratch_path, String) + @test isdir(scratch_path) + @test isabspath(scratch_path) + + # Verify it's actually a scratch directory by checking it contains expected patterns + # Scratch directories typically contain package UUID or similar identifiers + @test occursin("raster_data", scratch_path) || occursin("RasterDataSources", scratch_path) + + # Test that subsequent calls return the same path + @test rasterpath() == scratch_path + + finally + # Restore environment variable if it existed + if old_path !== nothing + ENV["RASTERDATASOURCES_PATH"] = old_path + end + end + end + + @testset "rasterpath() with RASTERDATASOURCES_PATH set to invalid directory" begin + # Set environment variable to non-existent directory + invalid_path = "/this/path/does/not/exist/$(rand(UInt32))" + ENV["RASTERDATASOURCES_PATH"] = invalid_path + + try + # Should fall back to scratch directory when env var points to invalid path + scratch_path = rasterpath() + + @test isa(scratch_path, String) + @test isdir(scratch_path) + @test scratch_path != invalid_path + @test isabspath(scratch_path) + + finally + delete!(ENV, "RASTERDATASOURCES_PATH") + end + end + + @testset "get_raster_storage_path() function consistency" begin + # This test assumes get_raster_storage_path() will be implemented + # Skip if function doesn't exist yet + if isdefined(RasterDataSources, :get_raster_storage_path) + # Test with environment variable set + temp_dir = mktempdir() + try + ENV["RASTERDATASOURCES_PATH"] = temp_dir + + @test RasterDataSources.get_raster_storage_path() == rasterpath() + @test RasterDataSources.get_raster_storage_path() == temp_dir + + finally + delete!(ENV, "RASTERDATASOURCES_PATH") + rm(temp_dir, recursive=true, force=true) + end + + # Test with scratch directory + if haskey(ENV, "RASTERDATASOURCES_PATH") + old_path = ENV["RASTERDATASOURCES_PATH"] + delete!(ENV, "RASTERDATASOURCES_PATH") + else + old_path = nothing + end + + try + @test RasterDataSources.get_raster_storage_path() == rasterpath() + @test isdir(RasterDataSources.get_raster_storage_path()) + + finally + if old_path !== nothing + ENV["RASTERDATASOURCES_PATH"] = old_path + end + end + else + @test_skip "get_raster_storage_path() function not yet implemented" + end + end + + @testset "Mock Scratch.jl functions to test failure scenarios" begin + # Save original environment state + original_env = get(ENV, "RASTERDATASOURCES_PATH", nothing) + if haskey(ENV, "RASTERDATASOURCES_PATH") + delete!(ENV, "RASTERDATASOURCES_PATH") + end + + try + # Test scratch directory creation failure by mocking @get_scratch! + # This is tricky to test directly since @get_scratch! is a macro + # Instead, we'll test the error handling path by temporarily making + # the scratch directory inaccessible + + # First, get a valid scratch directory + scratch_path = rasterpath() + @test isdir(scratch_path) + + # The error scenario is difficult to mock without modifying the source + # So we'll test that the function handles the success case properly + # and document that failure testing would require dependency injection + # Note: rasterpath() may print info messages, so we just test it doesn't error + result = rasterpath() + @test isa(result, String) + + finally + # Restore original environment + if original_env !== nothing + ENV["RASTERDATASOURCES_PATH"] = original_env + end + end + end + + @testset "Storage path switching behavior" begin + # Test switching between environment variable and scratch directory + temp_dir = mktempdir() + + try + # Start with scratch directory + if haskey(ENV, "RASTERDATASOURCES_PATH") + old_path = ENV["RASTERDATASOURCES_PATH"] + delete!(ENV, "RASTERDATASOURCES_PATH") + else + old_path = nothing + end + + scratch_path = rasterpath() + @test isdir(scratch_path) + + # Switch to environment variable + ENV["RASTERDATASOURCES_PATH"] = temp_dir + env_path = rasterpath() + @test env_path == temp_dir + @test env_path != scratch_path + + # Switch back to scratch directory + delete!(ENV, "RASTERDATASOURCES_PATH") + back_to_scratch = rasterpath() + @test back_to_scratch == scratch_path + @test isdir(back_to_scratch) + + # Restore original state + if old_path !== nothing + ENV["RASTERDATASOURCES_PATH"] = old_path + end + + finally + rm(temp_dir, recursive=true, force=true) + end + end + + @testset "Path properties and validation" begin + # Test various properties of returned paths + path = rasterpath() + + @test isa(path, String) + @test !isempty(path) + @test isdir(path) + @test isabspath(path) + + # Test that the path is writable + test_file = joinpath(path, "test_write_$(rand(UInt32)).txt") + try + write(test_file, "test") + @test isfile(test_file) + @test read(test_file, String) == "test" + finally + rm(test_file, force=true) + end + end + +end \ No newline at end of file From 6102ebbc53d72cabf3350916c375594a1ddce200 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Fri, 24 Oct 2025 17:53:30 +1100 Subject: [PATCH 3/6] add Scratch dep --- Project.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 8977396..5209e39 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +Scratch = "6c6a2e73-6563-6170-7368-637461726353" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" @@ -23,6 +24,7 @@ ASCIIrasters = "0.1" HTTP = "0.8, 0.9, 1" JSON = "0.21" Proj = "1" +Scratch = "1" URIs = "1" ZipFile = "0.9, 0.10" -julia = "1.9" \ No newline at end of file +julia = "1.9" From b9590d0a212ced92109b35f4e4c3ddddf7411ed9 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 25 Oct 2025 11:55:37 +1100 Subject: [PATCH 4/6] better messages --- src/shared.jl | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/shared.jl b/src/shared.jl index 44a4f3e..423889e 100644 --- a/src/shared.jl +++ b/src/shared.jl @@ -1,3 +1,6 @@ +const RDS_PATH_STRING = "ENV[\"RASTERDATASOURCES_PATH\"] = \"/path/to/your/data\"" +const STARTUP_PATH_STRING = "`[julia folder]/config/startup.jl`" + # Vector layers are allowed, but converted to `Tuple` immediatedly. function getraster(T::Type, layers::AbstractArray; kw...) getraster(T, (layers...,); kw...) @@ -62,6 +65,7 @@ function _maybe_download(uri::URI, filepath, headers = []) if !isfile(filepath) mkpath(dirname(filepath)) @info "Starting download for $uri" + try HTTP.download(string(uri), filepath, headers) catch e @@ -88,8 +92,8 @@ If scratch directory creation fails, an error is thrown with instructions to man # Examples ```julia # With environment variable set -ENV["RASTERDATASOURCES_PATH"] = "/path/to/data" -rasterpath() # Returns "/path/to/data" +$RDS_PATH_STRING +rasterpath() # Returns "/path/to/your/data" # Without environment variable (automatic scratch directory) rasterpath() # Returns something like "/Users/username/.julia/scratchspaces/12345.../raster_data" @@ -97,18 +101,34 @@ rasterpath() # Returns something like "/Users/username/.julia/scratchspaces/123 """ function rasterpath() # Priority 1: Use environment variable if set and valid - if haskey(ENV, "RASTERDATASOURCES_PATH") && isdir(ENV["RASTERDATASOURCES_PATH"]) - return ENV["RASTERDATASOURCES_PATH"] + if haskey(ENV, "RASTERDATASOURCES_PATH") + path = ENV["RASTERDATASOURCES_PATH"] + isdir(path) || error("Your RASTERDATASOURCES_PATH is not a directory: $path") + return path end - + # Priority 2: Use scratch directory try scratch_dir = @get_scratch!("raster_data") - @debug "Using scratch directory for raster data storage: $scratch_dir" + if isempty(readdir(scratch_dir)) # If this is the first use, print info in the REPL + @info """ + Created scratch directory for raster data storage: + $scratch_dir. + Make sure there is adequate space, some datasets are 100GB+. + For a custom location set $RDS_PATH_STRING + in your $STARTUP_PATH_STRING file." + """ maxlog=1 + end return scratch_dir catch e - error("Failed to create scratch directory for raster data storage. " * - "Please set ENV[\"RASTERDATASOURCES_PATH\"] manually. Error: $e") + error( + """ + Failed to create scratch directory for raster data storage. + Please set $RDS_PATH_STRING manually + in your $STARTUP_PATH_STRING. + Error: $e + """ + ) end end From 336d32ff316b0535f71e8dac37f3ad8ea98a9420 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 25 Oct 2025 12:29:28 +1100 Subject: [PATCH 5/6] fix awful LLM readme --- README.md | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 4556932..ef09fc0 100644 --- a/README.md +++ b/README.md @@ -59,30 +59,22 @@ Install as usual with: ### Storage Configuration -RasterDataSources.jl automatically handles data storage for you. By default, it will create a persistent scratch directory to store downloaded raster data. No manual configuration is required to get started. +RasterDataSources.jl handles data storage for you. By default, it will create a +persistent scratch directory to store downloaded raster data. +However, raster data may be 100s of GB, or more. So make sure there is room in your home directory. -#### Automatic Storage (Recommended) +#### Custom Storage Location -When you first use RasterDataSources.jl, it will automatically create a scratch directory using Julia's Scratch.jl package. This directory persists across Julia sessions and package updates, so your downloaded data won't be lost. +To put data in a custom location, set `RASTERDATASOURCES_PATH` in your +environment, usually in your `[juliadir]/config/startup.jl` file: ```julia -julia> using RasterDataSources -julia> getraster(WorldClim{Climate}, :wind; month=1) # Automatically uses scratch storage +ENV["RASTERDATASOURCES_PATH"] = "/path/to/your/data" ``` -#### Custom Storage Location (Optional) - -If you prefer to specify your own storage location, you can set the `RASTERDATASOURCES_PATH` environment variable: - -```julia -ENV["RASTERDATASOURCES_PATH"] = "/home/user/Data/" -``` - -This can be put in your `startup.jl` file or the system environment. When this variable is set, RasterDataSources.jl will use your specified directory instead of the automatic scratch directory. - #### Finding Your Storage Location -The storage location is managed internally by RasterDataSources.jl. When using automatic storage, the exact path is handled by Julia's Scratch.jl package and will be in your system's scratch directory. +`RasterDataSources.rasterpath()` will return the current data storage path. -RasterDataSources was based on code from the `SimpleSDMDataSoures.jl` package by Timothée Poisot. +_RasterDataSources was originally based on code from the `SimpleSDMDataSoures.jl` package by Timothée Poisot._ From 2b4580c43bbf6fecdfb05d3d5c29b0c5b144017f Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 25 Oct 2025 14:01:45 +1100 Subject: [PATCH 6/6] fix deps --- Project.toml | 14 ++++++++++++++ test/Project.toml | 8 -------- 2 files changed, 14 insertions(+), 8 deletions(-) delete mode 100644 test/Project.toml diff --git a/Project.toml b/Project.toml index 5209e39..5882abd 100644 --- a/Project.toml +++ b/Project.toml @@ -21,10 +21,24 @@ RasterDataSourcesProjExt = "Proj" [compat] ASCIIrasters = "0.1" +Dates = "1" HTTP = "0.8, 0.9, 1" JSON = "0.21" Proj = "1" +Pkg = "1" +SafeTestsets = "0.0.1" Scratch = "1" URIs = "1" ZipFile = "0.9, 0.10" julia = "1.9" + +[extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Proj = "c94c279d-25a6-4763-9509-64d165bea63e" +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Aqua", "Dates", "Pkg", "Proj", "SafeTestsets", "Test"] diff --git a/test/Project.toml b/test/Project.toml deleted file mode 100644 index 7621b6a..0000000 --- a/test/Project.toml +++ /dev/null @@ -1,8 +0,0 @@ -[deps] -Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" -Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -Proj = "c94c279d-25a6-4763-9509-64d165bea63e" -SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"