Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 67 additions & 21 deletions castep_parse/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@
import re
from pathlib import Path
from warnings import warn
import warnings

import numpy as np

from castep_parse.utils import find_files_in_dir, flexible_open, map_species_to_castep
from castep_parse.utils import (
find_files_in_dir,
flexible_open,
map_species_to_castep,
merge_str_list,
)

__all__ = [
'read_castep_file',
Expand Down Expand Up @@ -46,6 +52,17 @@ def read_castep_file(path_or_file):
header_split = re.split(pat_header, file_str)[1:]
runs_list = [''.join(header_split[i:i + 6]) for i in range(0, len(header_split), 6)]

# Merge DFTP headers if present:
dftp_header_idx = []
for i in range(0, len(header_split), 6):
if '| D D D D F F F F F P P P P T T T T T |' in header_split[i + 1]:
dftp_header_idx.append(i)

for i in dftp_header_idx:
header_split = merge_str_list(header_split, i - 1, i + 6)

runs_list = [''.join(header_split[i:i + 6]) for i in range(0, len(header_split), 6)]

runs = []
total_time = 0
geom_iters = []
Expand Down Expand Up @@ -84,7 +101,7 @@ def read_castep_file(path_or_file):
t = run['final_info']['statistics']['total_time_s']
total_time += t

else:
elif 'geom' in run:
# Add on the last-recorded SCF time (of completed geom iterations):
all_iters = run['geom']['iterations']
if all_iters:
Expand All @@ -93,8 +110,6 @@ def read_castep_file(path_or_file):
t = final_step['scf'][-1, -1]
total_time += t

if 'geom' in run:

run_geom_iters = run['geom'].pop('iterations')

# Extract out SCF cycles and energies from geom iteration steps
Expand Down Expand Up @@ -494,7 +509,7 @@ def merge_geom_data(castep_dat, geom_dat):

iterations_idx = -1
for idx, iter_num in enumerate(geom_dat['iter_num']):

iterations_idx += 1
while (iter_num != castep_dat['geom']['iterations'][iterations_idx]['iter_num']):
iterations_idx += 1
Expand Down Expand Up @@ -700,7 +715,7 @@ def parse_castep_run(run_str, run_idx):

header_split = re.split(pat_header, run_str)
header_str = ''.join([header_split[i] for i in [1, 2, 3, 4, 5]])
remainder_str = header_split[6]
remainder_str = header_split[len(header_split) - 1] # accounts for DFPT header
header = parse_castep_file_header(header_str)

# TODO: parse pseudopotential section
Expand Down Expand Up @@ -827,6 +842,10 @@ def parse_castep_run(run_str, run_idx):
}
})

elif parameters['general_parameters']['type_of_calculation'] == 'Phonon followed by E-field':
print('parsign phonon shizz')
run_info_str = remainder_str

run_info = parse_castep_file_run_info(run_info_str, parameters)

run.update({'system_info': {k: v for k, v in run_info.items()
Expand Down Expand Up @@ -924,6 +943,7 @@ def parse_castep_file_run_info(run_info_str, parameters):
'cell_contents': parse_castep_file_cell_contents(cell_contents_str, is_initial=True),
'unit_cell': parse_castep_file_unit_cell(unit_cell_str, is_initial=True),
'kpoints': parse_castep_file_kpoint_info(kpoints_str),
'symmetry': parse_castep_file_symmetry(symm_str)
}

if len(info_list) == 6:
Expand Down Expand Up @@ -973,17 +993,31 @@ def parse_castep_file_run_info(run_info_str, parameters):

def parse_castep_file_kpoint_info(kpoint_info_str):

body_str = re.split(r'[^\S\n]+-{31}\n', kpoint_info_str)[2]
body_str = re.split(r'[^\S\n]+-{31}|\+{55}\n', kpoint_info_str)[2]
# Splitting on "+" is for where there is a k-point table (only earlier versions?)

body_lns = body_str.strip().split('\n')[:3]

lns_s = [ln.strip().split() for ln in body_lns]

out = {'kpoint_MP_grid': [int(lns_s[0][i]) for i in [-3, -2, -1]]}
if len(lns_s) == 3:
out.update({
'kpoint_MP_offset': [float(lns_s[1][i]) for i in [-3, -2, -1]],
'kpoint_num': int(lns_s[2][-1])
})
else:
out.update({'kpoint_num': int(lns_s[1][-1])})
warnings.warn('Could not parse kpoint MP grid offset.')

return out


def parse_castep_file_symmetry(sym_str):
pnt_group = re.search(r'Point\ group\ of\ crystal\ = (.*)', sym_str).group(1).strip()
out = {
'kpoint_MP_grid': [int(lns_s[0][i]) for i in [-3, -2, -1]],
'kpoint_MP_offset': [float(lns_s[1][i]) for i in [-3, -2, -1]],
'kpoint_num': int(lns_s[2][-1])
'point_group': pnt_group,
}

return out


Expand Down Expand Up @@ -1024,12 +1058,15 @@ def parse_castep_file_unit_cell(unit_cell_str, is_initial):
}

if is_initial:
cell_dens_amu_ang = float(lns_s[11][2])
cell_dens_g_cm = float(lns_s[12][1])
out.update({
'cell_density_AMU/Ang**3': cell_dens_amu_ang,
'cell_density_g/cm**3': cell_dens_g_cm,
})
try:
cell_dens_amu_ang = float(lns_s[11][2])
cell_dens_g_cm = float(lns_s[12][1])
out.update({
'cell_density_AMU/Ang**3': cell_dens_amu_ang,
'cell_density_g/cm**3': cell_dens_g_cm,
})
except:
warnings.warn('Could not parse unit cell densities.')

return out

Expand All @@ -1056,7 +1093,7 @@ def parse_castep_file_resource_estimates(estimates_str):
return out


def parse_castep_file_cell_contents(cell_contents_str, is_initial, parse_species=False):
def parse_castep_file_cell_contents(cell_contents_str, is_initial):

if is_initial:
patt = r'x-{58}x|x{60}'
Expand All @@ -1066,13 +1103,21 @@ def parse_castep_file_cell_contents(cell_contents_str, is_initial, parse_species
single_cell_split = re.split(patt, cell_contents_str)
cell_lines = single_cell_split[2].strip().split('\n')
atom_frac_coords = []
elements = []
atom_numbers = []
for ln in cell_lines:
ln_s = ln.strip().split()
elements.append(ln_s[1])
atom_numbers.append(int(ln_s[2]))
atom_frac_coords.append([float(ln_s[i]) for i in [3, 4, 5]])

# TODO atom species
out = {
'element': np.array(elements),
'atom_number': np.array(atom_numbers),
'fractional_atom_coords': np.array(atom_frac_coords),
}

return np.array(atom_frac_coords)
return out


def parse_castep_file_scf(scf_str, is_metallic):
Expand Down Expand Up @@ -1310,7 +1355,8 @@ def parse_castep_file_geom_iter(geom_iter_str, parameters):
iter_steps_str_list = [i + j for i,
j in zip(iter_steps_split[::2], iter_steps_split[1::2])]

iter_num_str = re.search(r'Starting [L]?BFGS iteration\s+([0-9]+)', iter_begin_str).groups()[0]
iter_num_str = re.search(
r'Starting [L]?BFGS iteration\s+([0-9]+)', iter_begin_str).groups()[0]
iter_num = int(iter_num_str)

# Remove iteration ending bit:
Expand Down
8 changes: 8 additions & 0 deletions castep_parse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,3 +401,11 @@ def array_nan_equal(a, b):
return False
else:
return np.allclose(a[nonan_a], b[nonan_a])


def merge_str_list(lst, merge_idx_start, merge_idx_end):
return (
lst[:merge_idx_start] +
[''.join(lst[merge_idx_start: merge_idx_end])] +
lst[merge_idx_end:]
)