Skip to content

Update sources.py to reflect accurate datetime settings #31

@VKeff

Description

@VKeff

Example of error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
[<ipython-input-2-5d8bc327a36c>](https://localhost:8080/#) in <cell line: 0>()
----> 1 bdf = BolideDataFrame(source='glm')

11 frames
[/usr/local/lib/python3.11/dist-packages/bolides/bdf.py](https://localhost:8080/#) in __init__(self, *args, **kwargs)
     99         if source in ['website', 'glm']:
    100             source = 'glm'
--> 101             init_gdf = glm_website()
    102 
    103         elif source == 'usg':

[/usr/local/lib/python3.11/dist-packages/bolides/sources.py](https://localhost:8080/#) in glm_website()
     18     # create DataFrame using JSON data
     19     df = pd.DataFrame(json['data'])
---> 20     df["datetime"] = df["datetime"].astype("datetime64")
     21 
     22     # add bolide energy data

[/usr/local/lib/python3.11/dist-packages/pandas/core/generic.py](https://localhost:8080/#) in astype(self, dtype, copy, errors)
   6641         else:
   6642             # else, only a single dtype is given
-> 6643             new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
   6644             res = self._constructor_from_mgr(new_data, axes=new_data.axes)
   6645             return res.__finalize__(self, method="astype")

[/usr/local/lib/python3.11/dist-packages/pandas/core/internals/managers.py](https://localhost:8080/#) in astype(self, dtype, copy, errors)
    428             copy = False
    429 
--> 430         return self.apply(
    431             "astype",
    432             dtype=dtype,

[/usr/local/lib/python3.11/dist-packages/pandas/core/internals/managers.py](https://localhost:8080/#) in apply(self, f, align_keys, **kwargs)
    361                 applied = b.apply(f, **kwargs)
    362             else:
--> 363                 applied = getattr(b, f)(**kwargs)
    364             result_blocks = extend_blocks(applied, result_blocks)
    365 

[/usr/local/lib/python3.11/dist-packages/pandas/core/internals/blocks.py](https://localhost:8080/#) in astype(self, dtype, copy, errors, using_cow, squeeze)
    756             values = values[0, :]  # type: ignore[call-overload]
    757 
--> 758         new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
    759 
    760         new_values = maybe_coerce_values(new_values)

[/usr/local/lib/python3.11/dist-packages/pandas/core/dtypes/astype.py](https://localhost:8080/#) in astype_array_safe(values, dtype, copy, errors)
    235 
    236     try:
--> 237         new_values = astype_array(values, dtype, copy=copy)
    238     except (ValueError, TypeError):
    239         # e.g. _astype_nansafe can fail on object-dtype of strings

[/usr/local/lib/python3.11/dist-packages/pandas/core/dtypes/astype.py](https://localhost:8080/#) in astype_array(values, dtype, copy)
    180 
    181     else:
--> 182         values = _astype_nansafe(values, dtype, copy=copy)
    183 
    184     # in pandas we don't store numpy str dtypes, so convert to object

[/usr/local/lib/python3.11/dist-packages/pandas/core/dtypes/astype.py](https://localhost:8080/#) in _astype_nansafe(arr, dtype, copy, skipna)
    108             from pandas.core.arrays import DatetimeArray
    109 
--> 110             dta = DatetimeArray._from_sequence(arr, dtype=dtype)
    111             return dta._ndarray
    112 

[/usr/local/lib/python3.11/dist-packages/pandas/core/arrays/datetimes.py](https://localhost:8080/#) in _from_sequence(cls, scalars, dtype, copy)
    325     @classmethod
    326     def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
--> 327         return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
    328 
    329     @classmethod

[/usr/local/lib/python3.11/dist-packages/pandas/core/arrays/datetimes.py](https://localhost:8080/#) in _from_sequence_not_strict(cls, data, dtype, copy, tz, freq, dayfirst, yearfirst, ambiguous)
    352             tz = timezones.maybe_get_tz(tz)
    353 
--> 354         dtype = _validate_dt64_dtype(dtype)
    355         # if dtype has an embedded tz, capture it
    356         tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)

[/usr/local/lib/python3.11/dist-packages/pandas/core/arrays/datetimes.py](https://localhost:8080/#) in _validate_dt64_dtype(dtype)
   2542                 "Please pass in 'datetime64[ns]' instead."
   2543             )
-> 2544             raise ValueError(msg)
   2545 
   2546         if (

ValueError: Passing in 'datetime64' dtype with no precision is not allowed. Please pass in 'datetime64[ns]' instead.

Proposed solution:

Error: df["datetime"] = df["datetime"].astype("datetime64[ns]")
Solution: df["datetime"] = pd.to_datetime(df["datetime"], utc=True, unit='ns').dt.tz_localize(None)

Solution passes in nanoseconds as a specified pandas unit (datetime64[ns]) and sets time as Coordinated Universal Time (UTC) before having timezone information removed by (.dt.tz_localize(None))
NOTE: This isn't a perfect fix and may contribute to timezone-specific errors. Great for timezone-naive data collection, otherwise more detailed investigation of timezone-specific datetimes would be recommended for future use.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions