IHME risk data download fails

When trying to download data from IHME using the "risk" context, I receive an error:

```python
from ddf_utils.factory.ihme import IHMELoader

# get loader and metadata
GBD = IHMELoader()
metadata = GBD.load_metadata()

# choose version: latest
version = metadata['version']['id'].max()

# choose locations: all but 'custom'
locations_md = metadata['location']
locations = locations_md[locations_md['id'] != 'custom']['id'].tolist()

# download risk data
zippath = GBD.bulk_download(
    version = version, # which version of the data to use
    out_dir = outdir, # where to save the data
    context = 'risk', # GBD nomenclature for 'diseases'
    age = [2,3,4,5], # early neonatal, late neonatal, postneonatal, 1-4 years
    location = locations, # all locations
    sex = 3, # both male and female
    year = 2011, # year
    metric = 1, # number of cases
    measure = [2,3,4], # daly, yld, yll (no prevalence available)
    cause = [302, 322], # diarrhea, lower respiratory infections
    rei_id = [136, 137], # non-exclusive breastfeeding, discontinued breastfeeding
    idsOrNames = 'both', # include plain text descriptions of the codes in the results file
)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~\programs\miniconda\envs\my_notebook\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   3079             try:
-> 3080                 return self._engine.get_loc(casted_key)
   3081             except KeyError as err:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'rei_id'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
<ipython-input-6-71661b7cf014> in <module>
      1 # download risk data
----> 2 zippath = GBD.bulk_download(
      3     version = version, # which version of the data to use
      4     out_dir = outdir, # where to save the data
      5     context = 'risk', # GBD nomenclature for 'diseases'

~\programs\miniconda\envs\my_notebook\lib\site-packages\ddf_utils\factory\ihme.py in bulk_download(self, out_dir, version, context, **kwargs)
    111             query = [self._make_query(c, version, **kwargs) for c in context]
    112         else:
--> 113             query = [self._make_query(context, version, **kwargs)]
    114 
    115         taskIDs = set()

~\programs\miniconda\envs\my_notebook\lib\site-packages\ddf_utils\factory\ihme.py in _make_query(self, context, version, **kwargs)
    225             measure = read_opt(kwargs, 'measure', default=measures)
    226             metric = read_opt(kwargs, 'metric', default=[1, 2, 3])
--> 227             context_values = rei[rei['type'] == context]['rei_id'].tolist()
    228             cause = read_opt(kwargs, 'cause', default=causes)
    229             queries.update({

~\programs\miniconda\envs\my_notebook\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   3022             if self.columns.nlevels > 1:
   3023                 return self._getitem_multilevel(key)
-> 3024             indexer = self.columns.get_loc(key)
   3025             if is_integer(indexer):
   3026                 indexer = [indexer]

~\programs\miniconda\envs\my_notebook\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   3080                 return self._engine.get_loc(casted_key)
   3081             except KeyError as err:
-> 3082                 raise KeyError(key) from err
   3083 
   3084         if tolerance is not None:

KeyError: 'rei_id'
```

The error occurs because `self.metadata['rei']` does not contain a column `rei_id`. It only contains a column `id`. Another problem: The current code does not permit users to filter risks. One can only download all risk data at once. My suggestion for a fix: Modfiy line 227 of ihme.py and add an additional line as follows:

```python
            context_values = rei[rei['type'] == context]['id'].tolist() #id instead of rei_id
            context_values = read_opt(kwargs, 'rei_id', default=context_values) # use user-provided ids if given
```

All the best.
Ben


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

IHME risk data download fails #132

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

IHME risk data download fails #132

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions