Conversation
| # TODO: The load_labels doesn't actually allow the root to be | ||
| # modified. We should probably do this at a package level, not | ||
| # at a class level | ||
| self._labels = Engineer.load_labels(root=root) |
There was a problem hiding this comment.
@ivanzvonkov , do you think the idea of a root is still relevant? Or is the data is small enough that we can just have the DATAFOLDER_PATH act as the root and remove that as an option for the user?
There was a problem hiding this comment.
per discussion today I think removing is fine
| FEATURES_DIR = "features" | ||
| TEST_FEATURES_DIR = "test_features" | ||
|
|
||
| # These values describe the structure of the data folder |
There was a problem hiding this comment.
@ivanzvonkov , this locks in the folder structure but I think that's fine.
We could potentially have a way of over-riding this datafolder path at a package level, but otherwise I'd be for removing folder manipulation for the user entirely and controlling it here.
| array = np.asarray(array) | ||
| idx = (np.abs(array - value)).argmin() | ||
| return array[idx] | ||
| def load_labels(root=DATAFOLDER_PATH) -> geopandas.GeoDataFrame: |
There was a problem hiding this comment.
Most of the updates to the geojson here are from crop-mask
|
|
||
| labelled_np = da.sel(x=closest_lon).sel(y=closest_lat).values | ||
| else: | ||
| min_distance_from_point = np.inf |
There was a problem hiding this comment.
From crop-mask
| Mapping them to 3d space allows us to do that | ||
| """ | ||
| lat, lon = self.get_centre(in_radians=True) | ||
| return [cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat)] |
| from pathlib import Path | ||
|
|
||
| from typing import Optional | ||
| from cropharvest.boundingbox import BBox |
| ) | ||
| return tif_paths | ||
|
|
||
| def create_h5_dataset(self) -> None: |
There was a problem hiding this comment.
What do you think about the type of comments in https://github.com/nasaharvest/crop-mask/blob/69da6cef8258b3171c6a02771bfc2219d8eadf5b/src/ETL/dataset.py#L320
| hf = h5py.File(arrays_dir / file_name, "w") | ||
| filename = ( | ||
| f"lat={instance.label_lat}_lon={instance.label_lon}_year={instance.year}.h5" | ||
| ) |
There was a problem hiding this comment.
what about start and end date? How do we know the months will match up?
| pass | ||
|
|
||
|
|
||
| def filter_geojson(gpdf: geopandas.GeoDataFrame, bounding_box: BBox) -> geopandas.GeoDataFrame: |
| @@ -0,0 +1,66 @@ | |||
| """ | |||
| After 20220418_renaming.py was run, | |||
There was a problem hiding this comment.
maybe some underscores in the date would be cleaner in the file names?
|
@gabrieltseng what's the latest status on this? |
Related issue: #83
CropHarvestdataset to handle the new naming formatRelated issue: #83
In addition, some smaller updates and bugfixes:
"index"column in thelabels.geojsonconflicts with the index used by pandas. Rename it to"dataset_index"instead.dataset_identifierwas being incorrectly constructed - this is now fixed