From f98b27c232b0bf6587f88cf90109239f69236b2a Mon Sep 17 00:00:00 2001 From: Jonathan Barnoud Date: Wed, 5 Jul 2017 11:33:28 +0200 Subject: [PATCH 1/9] First try at TNG writing Adds a `write` method to the `TNGFile` class. At that point, this method successfully writes the positions and the box shape. It tries to write the time, but fails. It does not try to write velocities and forces yet, nor does it ties to write the topology. The written file can be read by pytng. It is also read by `gmx check`, but yields the following message: TNG library: Cannot uncompress data block. /home/jon/src/gromacs-2016.3/src/external/tng_io/src/lib/tng_io.c:5298 --- pytng/pytng.pyx | 136 +++++++++++++++++++++++++++++++++++++++++++++- tests/test_tng.py | 6 -- 2 files changed, 133 insertions(+), 9 deletions(-) diff --git a/pytng/pytng.pyx b/pytng/pytng.pyx index b937c26f..4d8169de 100644 --- a/pytng/pytng.pyx +++ b/pytng/pytng.pyx @@ -28,6 +28,7 @@ cdef extern from "tng/tng_io.h": const char *filename, const char mode, tng_trajectory_t *tng_data_p) + tng_function_status tng_util_trajectory_close( tng_trajectory_t *tng_data_p) @@ -66,6 +67,70 @@ cdef extern from "tng/tng_io.h": int64_t *n_values_per_frame, char *type) + tng_function_status tng_util_box_shape_write_interval_set( + const tng_trajectory_t tng_data, + const int64_t interval) + + tng_function_status tng_util_pos_write_interval_set( + const tng_trajectory_t tng_data, + const int64_t interval) + + tng_function_status tng_util_vel_write_interval_set( + const tng_trajectory_t tng_data, + const int64_t interval) + + tng_function_status tng_util_force_write_interval_set( + const tng_trajectory_t tng_data, + const int64_t interval) + + tng_function_status tng_util_box_shape_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const float *box_shape) + + tng_function_status tng_util_box_shape_with_time_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const double time, + const float *box_shape) + + tng_function_status tng_util_pos_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const float *positions) + + tng_function_status tng_util_pos_with_time_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const double time, + const float *positions) + + tng_function_status tng_util_vel_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const float *velocities) + + tng_function_status tng_util_vel_with_time_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const double time, + const float *velocities) + + tng_function_status tng_util_force_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const float *forces) + + tng_function_status tng_util_force_with_time_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const double time, + const float *forces) + + tng_function_status tng_implicit_num_particles_set( + const tng_trajectory_t tng_data, + const int64_t n) + TNGFrame = namedtuple("TNGFrame", "positions time step box") cdef class TNGFile: @@ -108,7 +173,6 @@ cdef class TNGFile: _mode = 'r' elif self.mode == 'w': _mode = 'w' - raise NotImplementedError('Writing is not implemented yet.') else: raise IOError('mode must be one of "r" or "w", you ' 'supplied {}'.format(mode)) @@ -138,6 +202,9 @@ cdef class TNGFile: if ok != TNG_SUCCESS: raise IOError("An error ocurred reading distance unit exponent. {}".format(status_error_message[ok])) self.distance_scale = 10.0**(exponent+9) + elif self.mode == 'w': + self._n_frames = 0 # No frame were written yet + # self._n_atoms ? self.is_open = True self.step = 0 @@ -203,8 +270,8 @@ cdef class TNGFile: if not self.is_open: raise IOError('No file opened') if self.mode != 'r': - raise IOError('File opened in mode: {}. Reading only allow ' - 'in mode "r"'.format('self.mode')) + raise IOError('File opened in mode: {}. Reading only allowed ' + 'in mode "r"'.format(self.mode)) if self.step >= self.n_frames: self.reached_eof = True raise StopIteration("Reached EOF in read") @@ -264,6 +331,64 @@ cdef class TNGFile: self.step += 1 return TNGFrame(xyz, time, self.step - 1, box) + def _init_first_frame_write(self, int64_t n_atoms, + box_shape_interval=1, pos_interval=1, + time=None): + cdef int64_t ok + + ok = tng_implicit_num_particles_set(self._traj, n_atoms) + if_not_ok(ok, 'Could not set the number of particles') + + ok = tng_util_pos_write_interval_set(self._traj, 1) + if_not_ok(ok, 'Could not set position write interval') + ok = tng_util_box_shape_write_interval_set(self._traj, 1) + if_not_ok(ok, 'Could not set box shape write interval') + + def write(self, + np.ndarray[np.float32_t, ndim=2, mode='c'] positions, + np.ndarray[np.float32_t, ndim=2, mode='c'] box, + time=None): + if self.mode != 'w': + raise IOError('File opened in mode: {}. Writing only allowed ' + 'in mode "w"'.format(self.mode)) + if not self.is_open: + raise IOError('No file currently opened') + + cdef int64_t ok + cdef np.ndarray[float, ndim=2, mode='c'] xyz + cdef np.ndarray[float, ndim=2, mode='c'] box_contiguous + + if self._n_frames == 0: + self._init_first_frame_write(n_atoms=positions.shape[0]) + + if time is not None: + try: + time = float(time) + except ValueError: + raise ValueError('time must be a real number or None') + + box_contiguous = np.ascontiguousarray(box, dtype=np.float32) + if time is None: + ok = tng_util_box_shape_write(self._traj, self.step, + &box_contiguous[0, 0]) + else: + ok = tng_util_box_shape_with_time_write(self._traj, + self.step, + time, + &box_contiguous[0, 0]) + if_not_ok(ok, 'Could not write box shape') + + xyz = np.ascontiguousarray(positions, dtype=np.float32) + if time is None: + ok = tng_util_pos_write(self._traj, self.step, &xyz[0, 0]) + else: + ok = tng_util_pos_with_time_write(self._traj, self.step, + time, &xyz[0, 0]) + if_not_ok(ok, 'Could not write positions') + + self.step += 1 + self._n_frames += 1 + def seek(self, step): """Move the file handle to a particular frame number @@ -319,3 +444,8 @@ cdef class TNGFile: else: raise TypeError("Trajectories must be an indexed using an integer," " slice or list of indices") + + +def if_not_ok(ok, message, exception=IOError): + if ok != TNG_SUCCESS: + raise exception(message) diff --git a/tests/test_tng.py b/tests/test_tng.py index 84f50fe0..567a642c 100644 --- a/tests/test_tng.py +++ b/tests/test_tng.py @@ -19,11 +19,6 @@ def test_open_missing_file_mode_r(MISSING_FILEPATH): assert 'does not exist' in str(excinfo.value) -def test_open_mode_w(MISSING_FILEPATH): - with pytest.raises(NotImplementedError): - pytng.TNGFile(MISSING_FILEPATH, mode='w') - - def test_open_invalide_mode(GMX_REF_FILEPATH): with pytest.raises(IOError) as excinfo: pytng.TNGFile(GMX_REF_FILEPATH, mode='invalid') @@ -150,7 +145,6 @@ def test_seek_IndexError(idx, GMX_REF_FILEPATH): tng[idx] -@pytest.mark.skip(reason="Write mode not implemented yet.") def test_seek_write(MISSING_FILEPATH): with pytng.TNGFile(MISSING_FILEPATH, mode='w') as tng: with pytest.raises(IOError) as excinfo: From 444374601f18d00f0f470bd15adefa0a263f4c07 Mon Sep 17 00:00:00 2001 From: Jonathan Barnoud Date: Wed, 5 Jul 2017 12:10:18 +0200 Subject: [PATCH 2/9] Remove useless writing initialization --- pytng/pytng.pyx | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/pytng/pytng.pyx b/pytng/pytng.pyx index 4d8169de..78007199 100644 --- a/pytng/pytng.pyx +++ b/pytng/pytng.pyx @@ -331,19 +331,6 @@ cdef class TNGFile: self.step += 1 return TNGFrame(xyz, time, self.step - 1, box) - def _init_first_frame_write(self, int64_t n_atoms, - box_shape_interval=1, pos_interval=1, - time=None): - cdef int64_t ok - - ok = tng_implicit_num_particles_set(self._traj, n_atoms) - if_not_ok(ok, 'Could not set the number of particles') - - ok = tng_util_pos_write_interval_set(self._traj, 1) - if_not_ok(ok, 'Could not set position write interval') - ok = tng_util_box_shape_write_interval_set(self._traj, 1) - if_not_ok(ok, 'Could not set box shape write interval') - def write(self, np.ndarray[np.float32_t, ndim=2, mode='c'] positions, np.ndarray[np.float32_t, ndim=2, mode='c'] box, @@ -359,7 +346,14 @@ cdef class TNGFile: cdef np.ndarray[float, ndim=2, mode='c'] box_contiguous if self._n_frames == 0: - self._init_first_frame_write(n_atoms=positions.shape[0]) + self._n_atoms = positions.shape[0] + ok = tng_implicit_num_particles_set(self._traj, self.n_atoms) + if_not_ok(ok, 'Could not set the number of particles') + elif self.n_atoms != positions.shape[0]: + message = ('Only fixed number of particles is supported. ' + 'Cannot write {} particles instead of {}.' + .format(positions.shape[0], self.n_atoms)) + raise NotImplementedError(message) if time is not None: try: From 220992d9a7bb47e336daba79f5345c0b0c9a14a9 Mon Sep 17 00:00:00 2001 From: Jonathan Barnoud Date: Thu, 6 Jul 2017 13:14:08 +0200 Subject: [PATCH 3/9] Some tweaking here and there on writing --- pytng/pytng.pyx | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/pytng/pytng.pyx b/pytng/pytng.pyx index 78007199..b3dddd82 100644 --- a/pytng/pytng.pyx +++ b/pytng/pytng.pyx @@ -131,6 +131,10 @@ cdef extern from "tng/tng_io.h": const tng_trajectory_t tng_data, const int64_t n) + tng_function_status tng_num_frames_per_frame_set_set( + const tng_trajectory_t tng_data, + const int64_t n) + TNGFrame = namedtuple("TNGFrame", "positions time step box") cdef class TNGFile: @@ -346,6 +350,7 @@ cdef class TNGFile: cdef np.ndarray[float, ndim=2, mode='c'] box_contiguous if self._n_frames == 0: + ok = tng_num_frames_per_frame_set_set(self._traj, 1) self._n_atoms = positions.shape[0] ok = tng_implicit_num_particles_set(self._traj, self.n_atoms) if_not_ok(ok, 'Could not set the number of particles') @@ -357,10 +362,21 @@ cdef class TNGFile: if time is not None: try: - time = float(time) + time = float(time) # Make sure time is a real + # Time is provided to this function in picoseconds, + # but functions from tng_io expect seconds. + time *= 1e-12 except ValueError: raise ValueError('time must be a real number or None') + xyz = np.ascontiguousarray(positions, dtype=np.float32) + if time is None: + ok = tng_util_pos_write(self._traj, self.step, &xyz[0, 0]) + else: + ok = tng_util_pos_with_time_write(self._traj, self.step, + time, &xyz[0, 0]) + if_not_ok(ok, 'Could not write positions') + box_contiguous = np.ascontiguousarray(box, dtype=np.float32) if time is None: ok = tng_util_box_shape_write(self._traj, self.step, @@ -372,14 +388,6 @@ cdef class TNGFile: &box_contiguous[0, 0]) if_not_ok(ok, 'Could not write box shape') - xyz = np.ascontiguousarray(positions, dtype=np.float32) - if time is None: - ok = tng_util_pos_write(self._traj, self.step, &xyz[0, 0]) - else: - ok = tng_util_pos_with_time_write(self._traj, self.step, - time, &xyz[0, 0]) - if_not_ok(ok, 'Could not write positions') - self.step += 1 self._n_frames += 1 From bbb40fd1be78595e09a727db1f78d082d2e988d2 Mon Sep 17 00:00:00 2001 From: Jonathan Barnoud Date: Sat, 8 Jul 2017 10:29:15 +0200 Subject: [PATCH 4/9] Fix box and time writing --- pytng/pytng.pyx | 103 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 91 insertions(+), 12 deletions(-) diff --git a/pytng/pytng.pyx b/pytng/pytng.pyx index b3dddd82..139a2e85 100644 --- a/pytng/pytng.pyx +++ b/pytng/pytng.pyx @@ -15,8 +15,12 @@ np.import_array() ctypedef enum tng_function_status: TNG_SUCCESS, TNG_FAILURE, TNG_CRITICAL ctypedef enum tng_data_type: TNG_CHAR_DATA, TNG_INT_DATA, TNG_FLOAT_DATA, TNG_DOUBLE_DATA ctypedef enum tng_hash_mode: TNG_SKIP_HASH, TNG_USE_HASH +ctypedef enum tng_block_type: TNG_NON_TRAJECTORY_BLOCK, TNG_TRAJECTORY_BLOCK +ctypedef enum tng_compression: TNG_UNCOMPRESSED, TNG_XTC_COMPRESSION, TNG_TNG_COMPRESSION, TNG_GZIP_COMPRESSION +ctypedef enum tng_particle_dependency: TNG_NON_PARTICLE_BLOCK_DATA, TNG_PARTICLE_BLOCK_DATA cdef long long TNG_TRAJ_BOX_SHAPE = 0x0000000010000000LL +cdef long long TNG_TRAJ_POSITIONS = 0x0000000010000001LL status_error_message = ['OK', 'Failure', 'Critical'] @@ -135,6 +139,42 @@ cdef extern from "tng/tng_io.h": const tng_trajectory_t tng_data, const int64_t n) + tng_function_status tng_util_generic_with_time_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const double time, + const float *values, + const int64_t n_values_per_frame, + const int64_t block_id, + const char *block_name, + const char particle_dependency, + const char compression) + + tng_function_status tng_time_per_frame_set( + const tng_trajectory_t tng_data, + const double time) + + tng_function_status tng_util_generic_with_time_write( + const tng_trajectory_t tng_data, + const int64_t frame_nr, + const double time, + const float *values, + const int64_t n_values_per_frame, + const int64_t block_id, + const char *block_name, + const char particle_dependency, + const char compression) + + tng_function_status tng_util_generic_write_interval_set( + const tng_trajectory_t tng_data, + const int64_t i, + const int64_t n_values_per_frame, + const int64_t block_id, + const char *block_name, + const char particle_dependency, + const char compression) + + TNGFrame = namedtuple("TNGFrame", "positions time step box") cdef class TNGFile: @@ -348,14 +388,37 @@ cdef class TNGFile: cdef int64_t ok cdef np.ndarray[float, ndim=2, mode='c'] xyz cdef np.ndarray[float, ndim=2, mode='c'] box_contiguous + cdef double dt if self._n_frames == 0: + # TODO: The number of frames per frame set should be tunable. ok = tng_num_frames_per_frame_set_set(self._traj, 1) + if_not_ok(ok, 'Could not set the number of frames per frame set') + # The number of atoms must be set either with a full description + # of the system content (topology), or with just the number of + # particles. We should fall back on the latter, but being + # able to write the topology would be a nice addition + # in the future. self._n_atoms = positions.shape[0] ok = tng_implicit_num_particles_set(self._traj, self.n_atoms) if_not_ok(ok, 'Could not set the number of particles') + # Set the writing interval to 1 for all blocks. + ok = tng_util_pos_write_interval_set(self._traj, 1) + if_not_ok(ok, 'Could not set the writing interval for positions') + # When we use the "tn_util_box_shape_*" functions to write the box + # shape, gromacs tools fail to uncompress the data block. Instead of + # using the default gzip to compress the box, we do not compress it. + # ok = tng_util_box_shape_write_interval_set(self._traj, 1) + ok = tng_util_generic_write_interval_set( + self._traj, 1, 9, + TNG_TRAJ_BOX_SHAPE, + "BOX SHAPE", + TNG_NON_PARTICLE_BLOCK_DATA, + TNG_UNCOMPRESSED + ) + if_not_ok(ok, 'Could not set the writing interval for the box shape') elif self.n_atoms != positions.shape[0]: - message = ('Only fixed number of particles is supported. ' + message = ('Only fixed number of particles is currently supported. ' 'Cannot write {} particles instead of {}.' .format(positions.shape[0], self.n_atoms)) raise NotImplementedError(message) @@ -368,6 +431,33 @@ cdef class TNGFile: time *= 1e-12 except ValueError: raise ValueError('time must be a real number or None') + # The time per frame has to be set for the time to be written in + # the frames. + # To be able to set an arbitrary time, we need to set the time per + # frame to 0 and to use one frame per frame set. Using the actual + # time difference between consecutive frames can cause issues if + # the difference is negative, or if the difference is 0 and the + # frame is not the first of the frame set. + ok = tng_time_per_frame_set(self._traj, 0) + if_not_ok(ok, 'Could not set the time per frame') + + box_contiguous = np.ascontiguousarray(box, dtype=np.float32) + if time is None: + ok = tng_util_box_shape_write(self._traj, self.step, + &box_contiguous[0, 0]) + else: + #ok = tng_util_box_shape_with_time_write(self._traj, + # self.step, + # time, + # &box_contiguous[0, 0]) + ok = tng_util_generic_with_time_write( + self._traj, self.step, time, + &box[0, 0], + 9, TNG_TRAJ_BOX_SHAPE, "BOX SHAPE", + TNG_NON_PARTICLE_BLOCK_DATA, + TNG_UNCOMPRESSED + ) + if_not_ok(ok, 'Could not write box shape') xyz = np.ascontiguousarray(positions, dtype=np.float32) if time is None: @@ -377,17 +467,6 @@ cdef class TNGFile: time, &xyz[0, 0]) if_not_ok(ok, 'Could not write positions') - box_contiguous = np.ascontiguousarray(box, dtype=np.float32) - if time is None: - ok = tng_util_box_shape_write(self._traj, self.step, - &box_contiguous[0, 0]) - else: - ok = tng_util_box_shape_with_time_write(self._traj, - self.step, - time, - &box_contiguous[0, 0]) - if_not_ok(ok, 'Could not write box shape') - self.step += 1 self._n_frames += 1 From f593b77c3c448adfd09bbfad0711f87df007cdf5 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Fri, 22 Sep 2017 18:30:40 +0200 Subject: [PATCH 5/9] fix unit test --- tests/test_tng.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_tng.py b/tests/test_tng.py index 567a642c..fad11b8a 100644 --- a/tests/test_tng.py +++ b/tests/test_tng.py @@ -204,9 +204,7 @@ def test_read_not_open(GMX_REF_FILEPATH): assert 'No file opened' in str(excinfo.value) -@pytest.mark.skip(reason="Write mode not implemented yet.") def test_read_not_mode_r(MISSING_FILEPATH): - with pytest.raises(IOError) as excinfo: + with pytest.raises(IOError, match='Reading only allow'): with pytng.TNGFile(MISSING_FILEPATH, mode='w') as tng: tng.read() - assert 'Reading only allow in mode "r"' in str(excinfo.value) From 8cd43541e99dba2435096947407e4776423beaf5 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Fri, 22 Sep 2017 18:53:59 +0200 Subject: [PATCH 6/9] add writing test currently results in a segfault --- tests/test_tng.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_tng.py b/tests/test_tng.py index fad11b8a..cf88c20a 100644 --- a/tests/test_tng.py +++ b/tests/test_tng.py @@ -208,3 +208,15 @@ def test_read_not_mode_r(MISSING_FILEPATH): with pytest.raises(IOError, match='Reading only allow'): with pytng.TNGFile(MISSING_FILEPATH, mode='w') as tng: tng.read() + + +def test_writting(GMX_REF_FILEPATH, tmpdir): + outfile = str(tmpdir.join('foo.tng')) + with pytng.TNGFile(GMX_REF_FILEPATH) as ref, pytng.TNGFile(outfile, + 'w') as out: + for ts in ref: + out.write(ts.positions, ts.box, ts.time) + + with pytng.TNGFile(GMX_REF_FILEPATH) as ref, pytng.TNGFile(outfile) as out: + for r, o in zip(ref, out): + np.testing.assert_almost_equal(r.positions, o.positions, decimal=4) From b676b7adedb0e2491866ed888253c4ed2c3a7b64 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Fri, 22 Sep 2017 18:54:11 +0200 Subject: [PATCH 7/9] finish frame set of one frame for writing --- pytng/pytng.pyx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pytng/pytng.pyx b/pytng/pytng.pyx index 139a2e85..79b99346 100644 --- a/pytng/pytng.pyx +++ b/pytng/pytng.pyx @@ -175,6 +175,10 @@ cdef extern from "tng/tng_io.h": const char compression) + tng_function_status tng_frame_set_write( + const tng_trajectory_t tng_data, + const char hash_mode) + TNGFrame = namedtuple("TNGFrame", "positions time step box") cdef class TNGFile: @@ -467,6 +471,9 @@ cdef class TNGFile: time, &xyz[0, 0]) if_not_ok(ok, 'Could not write positions') + # finish frame set to write step, hashing should be configurable + tng_frame_set_write(self._traj, TNG_USE_HASH) + self.step += 1 self._n_frames += 1 From 3d776bbd98b6a11111c46821c13c7828fe3b527e Mon Sep 17 00:00:00 2001 From: Max Linke Date: Fri, 22 Sep 2017 19:35:53 +0200 Subject: [PATCH 8/9] this isn't good since it all points to the same memory With this change I'm reminding myself of that --- pytng/pytng.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pytng/pytng.pyx b/pytng/pytng.pyx index 79b99346..ea8bef05 100644 --- a/pytng/pytng.pyx +++ b/pytng/pytng.pyx @@ -375,6 +375,8 @@ cdef class TNGFile: finally: if box_shape != NULL: free(box_shape) + # DO NOT FREE float_box or double_box here. They point to the same + # memory as box_shape self.step += 1 return TNGFrame(xyz, time, self.step - 1, box) From 5607bc38b266c12e01a38f6a2d238ab514160ce5 Mon Sep 17 00:00:00 2001 From: Max Linke Date: Fri, 22 Sep 2017 19:54:33 +0200 Subject: [PATCH 9/9] check if close file worked --- pytng/pytng.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytng/pytng.pyx b/pytng/pytng.pyx index ea8bef05..51222286 100644 --- a/pytng/pytng.pyx +++ b/pytng/pytng.pyx @@ -261,9 +261,11 @@ cdef class TNGFile: def close(self): """Make sure the file handle is closed""" if self.is_open: - tng_util_trajectory_close(&self._traj) + ok = tng_util_trajectory_close(&self._traj) + if_not_ok(ok, "couldn't close") self.is_open = False self._n_frames = -1 + print("closed file") def __enter__(self): # Support context manager