From 2ff5ce5b8a7b40515f49027557cb1c780ed3e0fb Mon Sep 17 00:00:00 2001 From: aplowman Date: Tue, 9 Nov 2021 19:26:25 +0000 Subject: [PATCH 1/5] Initial work on supporting phonon results --- castep_parse/readers.py | 33 ++++++++++++++++++++++++++------- castep_parse/utils.py | 8 ++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/castep_parse/readers.py b/castep_parse/readers.py index a8bd718..48001ab 100644 --- a/castep_parse/readers.py +++ b/castep_parse/readers.py @@ -6,7 +6,12 @@ import numpy as np -from castep_parse.utils import find_files_in_dir, flexible_open, map_species_to_castep +from castep_parse.utils import ( + find_files_in_dir, + flexible_open, + map_species_to_castep, + merge_str_list, +) __all__ = [ 'read_castep_file', @@ -46,6 +51,17 @@ def read_castep_file(path_or_file): header_split = re.split(pat_header, file_str)[1:] runs_list = [''.join(header_split[i:i + 6]) for i in range(0, len(header_split), 6)] + # Merge DFTP headers if present: + dftp_header_idx = [] + for i in range(0, len(header_split), 6): + if '| D D D D F F F F F P P P P T T T T T |' in header_split[i + 1]: + dftp_header_idx.append(i) + + for i in dftp_header_idx: + header_split = merge_str_list(header_split, i - 1, i + 6) + + runs_list = [''.join(header_split[i:i + 6]) for i in range(0, len(header_split), 6)] + runs = [] total_time = 0 geom_iters = [] @@ -84,7 +100,7 @@ def read_castep_file(path_or_file): t = run['final_info']['statistics']['total_time_s'] total_time += t - else: + elif 'geom' in run: # Add on the last-recorded SCF time (of completed geom iterations): all_iters = run['geom']['iterations'] if all_iters: @@ -93,8 +109,6 @@ def read_castep_file(path_or_file): t = final_step['scf'][-1, -1] total_time += t - if 'geom' in run: - run_geom_iters = run['geom'].pop('iterations') # Extract out SCF cycles and energies from geom iteration steps @@ -494,7 +508,7 @@ def merge_geom_data(castep_dat, geom_dat): iterations_idx = -1 for idx, iter_num in enumerate(geom_dat['iter_num']): - + iterations_idx += 1 while (iter_num != castep_dat['geom']['iterations'][iterations_idx]['iter_num']): iterations_idx += 1 @@ -700,7 +714,7 @@ def parse_castep_run(run_str, run_idx): header_split = re.split(pat_header, run_str) header_str = ''.join([header_split[i] for i in [1, 2, 3, 4, 5]]) - remainder_str = header_split[6] + remainder_str = header_split[len(header_split) - 1] # accounts for DFPT header header = parse_castep_file_header(header_str) # TODO: parse pseudopotential section @@ -827,6 +841,10 @@ def parse_castep_run(run_str, run_idx): } }) + elif parameters['general_parameters']['type_of_calculation'] == 'Phonon followed by E-field': + print('parsign phonon shizz') + run_info_str = remainder_str + run_info = parse_castep_file_run_info(run_info_str, parameters) run.update({'system_info': {k: v for k, v in run_info.items() @@ -1310,7 +1328,8 @@ def parse_castep_file_geom_iter(geom_iter_str, parameters): iter_steps_str_list = [i + j for i, j in zip(iter_steps_split[::2], iter_steps_split[1::2])] - iter_num_str = re.search(r'Starting [L]?BFGS iteration\s+([0-9]+)', iter_begin_str).groups()[0] + iter_num_str = re.search( + r'Starting [L]?BFGS iteration\s+([0-9]+)', iter_begin_str).groups()[0] iter_num = int(iter_num_str) # Remove iteration ending bit: diff --git a/castep_parse/utils.py b/castep_parse/utils.py index 52c5d6e..6c0188e 100644 --- a/castep_parse/utils.py +++ b/castep_parse/utils.py @@ -401,3 +401,11 @@ def array_nan_equal(a, b): return False else: return np.allclose(a[nonan_a], b[nonan_a]) + + +def merge_str_list(lst, merge_idx_start, merge_idx_end): + return ( + lst[:merge_idx_start] + + [''.join(lst[merge_idx_start: merge_idx_end])] + + lst[merge_idx_end:] + ) From 3aeb1452a991fad67aa1e3883ce40eb6039dac88 Mon Sep 17 00:00:00 2001 From: aplowman Date: Mon, 15 Nov 2021 18:16:19 +0000 Subject: [PATCH 2/5] fix: parse_castep_file_kpoint_info for old version --- castep_parse/readers.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/castep_parse/readers.py b/castep_parse/readers.py index 48001ab..dd48cfc 100644 --- a/castep_parse/readers.py +++ b/castep_parse/readers.py @@ -3,6 +3,7 @@ import re from pathlib import Path from warnings import warn +import warnings import numpy as np @@ -991,16 +992,22 @@ def parse_castep_file_run_info(run_info_str, parameters): def parse_castep_file_kpoint_info(kpoint_info_str): - body_str = re.split(r'[^\S\n]+-{31}\n', kpoint_info_str)[2] + body_str = re.split(r'[^\S\n]+-{31}|\+{55}\n', kpoint_info_str)[2] + # Splitting on "+" is for where there is a k-point table (only earlier versions?) + body_lns = body_str.strip().split('\n')[:3] lns_s = [ln.strip().split() for ln in body_lns] - out = { - 'kpoint_MP_grid': [int(lns_s[0][i]) for i in [-3, -2, -1]], - 'kpoint_MP_offset': [float(lns_s[1][i]) for i in [-3, -2, -1]], - 'kpoint_num': int(lns_s[2][-1]) - } + out = {'kpoint_MP_grid': [int(lns_s[0][i]) for i in [-3, -2, -1]]} + if len(lns_s) == 3: + out.update({ + 'kpoint_MP_offset': [float(lns_s[1][i]) for i in [-3, -2, -1]], + 'kpoint_num': int(lns_s[2][-1]) + }) + else: + out.update({'kpoint_num': int(lns_s[1][-1])}) + warnings.warn('Could not parse kpoint MP grid offset.') return out From ee212f5020745d4fc0277151663f2cb6397bcc61 Mon Sep 17 00:00:00 2001 From: aplowman Date: Mon, 15 Nov 2021 18:16:44 +0000 Subject: [PATCH 3/5] fix: parse_castep_file_unit_cell for old version --- castep_parse/readers.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/castep_parse/readers.py b/castep_parse/readers.py index dd48cfc..ea65ea1 100644 --- a/castep_parse/readers.py +++ b/castep_parse/readers.py @@ -1049,12 +1049,15 @@ def parse_castep_file_unit_cell(unit_cell_str, is_initial): } if is_initial: - cell_dens_amu_ang = float(lns_s[11][2]) - cell_dens_g_cm = float(lns_s[12][1]) - out.update({ - 'cell_density_AMU/Ang**3': cell_dens_amu_ang, - 'cell_density_g/cm**3': cell_dens_g_cm, - }) + try: + cell_dens_amu_ang = float(lns_s[11][2]) + cell_dens_g_cm = float(lns_s[12][1]) + out.update({ + 'cell_density_AMU/Ang**3': cell_dens_amu_ang, + 'cell_density_g/cm**3': cell_dens_g_cm, + }) + except: + warnings.warn('Could not parse unit cell densities.') return out From c46d501b789e6333c094a682ce37543aee0b59b0 Mon Sep 17 00:00:00 2001 From: aplowman Date: Mon, 15 Nov 2021 18:17:30 +0000 Subject: [PATCH 4/5] feat: parse species in parse_castep_file_cell_contents --- castep_parse/readers.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/castep_parse/readers.py b/castep_parse/readers.py index ea65ea1..59f847e 100644 --- a/castep_parse/readers.py +++ b/castep_parse/readers.py @@ -1084,7 +1084,7 @@ def parse_castep_file_resource_estimates(estimates_str): return out -def parse_castep_file_cell_contents(cell_contents_str, is_initial, parse_species=False): +def parse_castep_file_cell_contents(cell_contents_str, is_initial): if is_initial: patt = r'x-{58}x|x{60}' @@ -1094,13 +1094,21 @@ def parse_castep_file_cell_contents(cell_contents_str, is_initial, parse_species single_cell_split = re.split(patt, cell_contents_str) cell_lines = single_cell_split[2].strip().split('\n') atom_frac_coords = [] + elements = [] + atom_numbers = [] for ln in cell_lines: ln_s = ln.strip().split() + elements.append(ln_s[1]) + atom_numbers.append(int(ln_s[2])) atom_frac_coords.append([float(ln_s[i]) for i in [3, 4, 5]]) - # TODO atom species + out = { + 'element': np.array(elements), + 'atom_number': np.array(atom_numbers), + 'fractional_atom_coords': np.array(atom_frac_coords), + } - return np.array(atom_frac_coords) + return out def parse_castep_file_scf(scf_str, is_metallic): From 8a62d8b5857cbe605bca98665f39d7316fc459e9 Mon Sep 17 00:00:00 2001 From: aplowman Date: Mon, 15 Nov 2021 19:02:50 +0000 Subject: [PATCH 5/5] feat: parse point group --- castep_parse/readers.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/castep_parse/readers.py b/castep_parse/readers.py index 59f847e..46cdd97 100644 --- a/castep_parse/readers.py +++ b/castep_parse/readers.py @@ -943,6 +943,7 @@ def parse_castep_file_run_info(run_info_str, parameters): 'cell_contents': parse_castep_file_cell_contents(cell_contents_str, is_initial=True), 'unit_cell': parse_castep_file_unit_cell(unit_cell_str, is_initial=True), 'kpoints': parse_castep_file_kpoint_info(kpoints_str), + 'symmetry': parse_castep_file_symmetry(symm_str) } if len(info_list) == 6: @@ -1012,6 +1013,14 @@ def parse_castep_file_kpoint_info(kpoint_info_str): return out +def parse_castep_file_symmetry(sym_str): + pnt_group = re.search(r'Point\ group\ of\ crystal\ = (.*)', sym_str).group(1).strip() + out = { + 'point_group': pnt_group, + } + return out + + def parse_castep_file_unit_cell(unit_cell_str, is_initial): unit_cell_body_str = re.split(r'[^\S\n]+-{31}\n', unit_cell_str)[2]