-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Description
When trying to download data from IHME using the "risk" context, I receive an error:
from ddf_utils.factory.ihme import IHMELoader
# get loader and metadata
GBD = IHMELoader()
metadata = GBD.load_metadata()
# choose version: latest
version = metadata['version']['id'].max()
# choose locations: all but 'custom'
locations_md = metadata['location']
locations = locations_md[locations_md['id'] != 'custom']['id'].tolist()
# download risk data
zippath = GBD.bulk_download(
version = version, # which version of the data to use
out_dir = outdir, # where to save the data
context = 'risk', # GBD nomenclature for 'diseases'
age = [2,3,4,5], # early neonatal, late neonatal, postneonatal, 1-4 years
location = locations, # all locations
sex = 3, # both male and female
year = 2011, # year
metric = 1, # number of cases
measure = [2,3,4], # daly, yld, yll (no prevalence available)
cause = [302, 322], # diarrhea, lower respiratory infections
rei_id = [136, 137], # non-exclusive breastfeeding, discontinued breastfeeding
idsOrNames = 'both', # include plain text descriptions of the codes in the results file
)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\programs\miniconda\envs\my_notebook\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'rei_id'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-6-71661b7cf014> in <module>
1 # download risk data
----> 2 zippath = GBD.bulk_download(
3 version = version, # which version of the data to use
4 out_dir = outdir, # where to save the data
5 context = 'risk', # GBD nomenclature for 'diseases'
~\programs\miniconda\envs\my_notebook\lib\site-packages\ddf_utils\factory\ihme.py in bulk_download(self, out_dir, version, context, **kwargs)
111 query = [self._make_query(c, version, **kwargs) for c in context]
112 else:
--> 113 query = [self._make_query(context, version, **kwargs)]
114
115 taskIDs = set()
~\programs\miniconda\envs\my_notebook\lib\site-packages\ddf_utils\factory\ihme.py in _make_query(self, context, version, **kwargs)
225 measure = read_opt(kwargs, 'measure', default=measures)
226 metric = read_opt(kwargs, 'metric', default=[1, 2, 3])
--> 227 context_values = rei[rei['type'] == context]['rei_id'].tolist()
228 cause = read_opt(kwargs, 'cause', default=causes)
229 queries.update({
~\programs\miniconda\envs\my_notebook\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
~\programs\miniconda\envs\my_notebook\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 'rei_id'The error occurs because self.metadata['rei'] does not contain a column rei_id. It only contains a column id. Another problem: The current code does not permit users to filter risks. One can only download all risk data at once. My suggestion for a fix: Modfiy line 227 of ihme.py and add an additional line as follows:
context_values = rei[rei['type'] == context]['id'].tolist() #id instead of rei_id
context_values = read_opt(kwargs, 'rei_id', default=context_values) # use user-provided ids if givenAll the best.
Ben
Metadata
Metadata
Assignees
Labels
No labels