Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions compare-models/compare-ligands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import sys
import os
import json
import gemmi
import glob

def get_ligand(ligand_tlc: str, pdb_file_name: str):
st = gemmi.read_pdb(pdb_file_name)
for model in st:
for chain in model:
for residue in chain:
if residue.name == ligand_tlc:
return residue
return False

def get_reso(pdb_file_name):
st = gemmi.read_pdb(pdb_file_name)
return st.resolution

def compare_ligands(pdb_file_name: str, refmac_pdb_file_name: str, accession_code:str, ligand_tlc: str, acedrg_cif_file_name: str):

if not os.path.exists(pdb_file_name):
print("Bad PDB file name", pdb_file_name)
if not os.path.exists(refmac_pdb_file_name):
print("Bad refmac file name", refmac_pdb_file_name)
if not os.path.exists(acedrg_cif_file_name):
print("bad acedrg cif file name", acedrg_cif_file_name)
if len(ligand_tlc) > 3:
print("bad ligand tlc", ligand_tlc)
if len(ligand_tlc) == 0:
print("bad tlc")

lig_1 = get_ligand(ligand_tlc, pdb_file_name)
lig_2 = get_ligand(ligand_tlc, refmac_pdb_file_name)
reso = get_reso(pdb_file_name)

for atom_1, atom_2 in zip(lig_1, lig_2):
pos_1 = atom_1.pos
pos_2 = atom_2.pos
d = pos_1.dist(pos_2)
print("delta:", ligand_tlc, accession_code, str(reso), atom_1.name, d)
if d > 1.0:
print(refmac_pdb_file_name)

return False

def multi_compare_ligands(refmac_dir):
for d in glob.glob(os.path.join(refmac_dir, "*")):
for info_file in glob.glob(os.path.join(d, "*.info.json")):
print(info_file)
with open(info_file, "r") as info_file_data:
info = json.load(info_file_data)
print(info)
refmac_out_fn = info['refmac_pdb_file_name']
# refmac_out_fn may not be there because refmac didn't properly complete
if os.path.exists(refmac_out_fn):
compare_ligands(info['pdb_file_name'], info['refmac_pdb_file_name'], info['accession_code'], info['ligand-code'], info['acedrg_cif_file_name'])
else:
print("missing refmac-out", refmac_out_fn, "for ligand-code", info['ligand-code'])

def compare_ligands_in_table_file(this_weeks_ligands_table_file_name):
with open(this_weeks_ligands_table_file_name) as f:
lines = f.readlines()
for line in lines:
parts = line.split()
if parts:
shell_script_file_name = parts[0]
refmac_log_file_name = parts[1]
fn = os.path.basename(refmac_log_file_name)
dir_name = os.path.dirname(refmac_log_file_name)
parts = fn.split("-")
p = parts[1]
parts_2 = p.split(".")
code = parts_2[0]
json_file_name = code.upper() + ".info.json"
info_file = os.path.join(dir_name, json_file_name)
print(info_file)
with open(info_file, "r") as info_file_data:
info = json.load(info_file_data)
print(info)
refmac_out_fn = info['refmac_pdb_file_name']
# refmac_out_fn may not be there because refmac didn't properly complete
if os.path.exists(refmac_out_fn):
compare_ligands(info['pdb_file_name'], info['refmac_pdb_file_name'], info['accession_code'], info['ligand-code'], info['acedrg_cif_file_name'])
else:
print("missing refmac-out", refmac_out_fn, "for ligand-code", info['ligand-code'])

# compare_ligands(info['pdb_file_name'], info['refmac_pdb_file_name'], info['accession_code'], info['ligand-code'], info['acedrg_cif_file_name'])



if __name__ == "__main__":
refmac_dir = "refmac"
dated_refmac_logs_table_list = []
for arg in sys.argv[1:]:
print(arg)
table_file = arg
compare_ligands_in_table_file(table_file)

# multi_compare_ligands(refmac_dir)
205 changes: 205 additions & 0 deletions compare-models/compare_models_for_ligands_of_the_week.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@

import argparse
import datetime
import gemmi
import os
import sys
import math
import numpy
import json
from scipy.stats import pearsonr

def print_to_file(text,fout,mode='a'):
stdout = sys.stdout
with open(fout,mode) as sys.stdout:
print(text)
sys.stdout.flush()
sys.stdout = stdout
return

def get_rscc(map_o,map_c,pos,mask_radius):
print("---- get_rsccs -start ---")
mask = gemmi.FloatGrid(numpy.array(map_o, copy=False),map_o.unit_cell,map_o.spacegroup)
mask.fill(0)
mask.set_points_around(pos, radius=mask_radius, value=1)
#mask.symmetrize_max()
maskarray = numpy.array(mask, copy=False)
points = numpy.argwhere(maskarray == 1)
print("points.size", points.size)
if points.size < 4:
return None
list_obs = []
list_calc = []
list_diff = []
for point in points:
obs = map_o.get_point(point[0],point[1],point[2]).value
calc = map_c.get_point(point[0],point[1],point[2]).value
list_obs.append(obs)
list_calc.append(calc)
list_diff.append(obs-calc)
n_points = len(list_obs)
sum_obs = round(sum(list_obs), 2)
sum_calc = round(sum(list_calc),2)
sum_diff = round(sum(list_diff),2)
sum_absdiff = round(sum(abs(el) for el in list_diff),2)
rmsd_obs_calc = round(math.sqrt(sum(el**2 for el in list_diff)/n_points),2)
cor_obs_calc,cor_obs_calc_p = pearsonr(list_obs,list_calc)
return cor_obs_calc

def modelComparison(model1, model2, mtzin, ligand_comp_id, opt):

if model1 == None:
print("modelComparison: model1 is None")
return

if model2 == None:
print("modelComparison: model2 is None")
return

print("debug:: in modelComparison() model1", model1, "model2", model2)

try:
if not 'sample_rate' in opt:
opt['sample_rate'] = 1.0
if not 'mask_radius' in opt:
opt['mask_radius'] = 1.5

print("Reading model file:",model1)
st1 = gemmi.read_structure(model1)
print("Reading model file:",model2)
st2 = gemmi.read_structure(model2)
print("Reading MTZ file:",mtzin)
mtz = gemmi.read_mtz_file(mtzin)

print("Converting FWT/PHWT to map")
map_obs = mtz.transform_f_phi_to_map('FWT','PHWT', sample_rate=opt['sample_rate'])
print("Converting FC_ALL/PHIC_ALL to map")
map_calc = mtz.transform_f_phi_to_map('FC_ALL','PHIC_ALL', sample_rate=opt['sample_rate'])

print("Comparing equivalent atoms in all polymeric entities")
result = []
for model in st1:
for chain in model:
for residue in chain:
if residue.name != ligand_comp_id:
if ligand_comp_id:
continue
print(" Found", residue, residue.entity_type)
if residue.entity_type == gemmi.EntityType.NonPolymer:
for atom in residue:
if atom.element.name == "H":
continue
addr = gemmi.make_address(chain, residue, atom)
cra1 = model.find_cra(addr)
cra2 = st2[0].find_cra(addr) # This always uses the first model in st2 - probably worth checking that both st1 and st2 only have one model, otherwise fail.
print("---\ncra1: ", cra1, "cra2: ", cra2)

if cra1 and cra2:
print("cra1 pos:", cra1.atom.pos)
print("cra2 pos:", cra2.atom.pos)
peak1 = map_obs.interpolate_value(cra1.atom.pos)
peak2 = map_obs.interpolate_value(cra2.atom.pos)
rscc1 = get_rscc(map_obs,map_calc,cra1.atom.pos,opt['mask_radius'])
rscc2 = get_rscc(map_obs,map_calc,cra2.atom.pos,opt['mask_radius'])

if rscc1 == None: continue
if rscc2 == None: continue

result.append({'chain':chain.name,
'residue':str(residue.seqid.num) + (residue.seqid.icode if residue.seqid.icode != ' ' else ''),
'tlc':residue.name,
'atom':atom.name,
'dist':cra1.atom.pos.dist(cra2.atom.pos),
'b1':cra1.atom.b_iso,
'b2':cra2.atom.b_iso,
'peak1':peak1,
'peak2':peak2,
'rscc1':rscc1,
'rscc2':rscc2})

print("Processed",len(result),"atoms")
return result

# except Exception as e:
except FloatingPointError as e: # I want to see the backtrace
print("keyError: %s" % e)
print("Cannot continue - program terminated.")
sys.exit(1)

def printModelComparison(analysis,output_file):
try:
output = '\t'.join(["chain","residue","TLC","atom","dist","b1","b2","peak1","peak2","rscc1","rscc2"])
for atom in analysis:
output = output + '\n' + '\t'.join([atom['chain'],
atom['residue'],
atom['tlc'],
atom['atom'],
str(round(atom['dist'],2)),
str(round(atom['b1'],2)),
str(round(atom['b2'],2)),
str(round(atom['peak1'],2)),
str(round(atom['peak2'],2)),
str(round(atom['rscc1'],2)),
str(round(atom['rscc2'],2))])

print("Writing output to file:",output_file)
print_to_file(output,output_file,'w')

except Exception as e:
print("Error: %s" % e)
print("Cannot continue - program terminated.")
sys.exit(1)

def proc_using_code(accession_code, ligand_analysis_base):

two_code = accession_code[1:3]
upcase_code = accession_code.upper()
refmac_dir = os.path.join(ligand_analysis_base, 'refmac')
sub_dir = os.path.join(refmac_dir, two_code)
info_file = os.path.join(sub_dir, upcase_code + ".info.json")
print("debug:: in proc_using_code() accession_code", accession_code, "info_file", info_file)
if os.path.exists(info_file):
print("INFO file", info_file)
with open(info_file, "r") as info_file_data:
info = json.load(info_file_data)
refmac_out_fn = info['refmac_pdb_file_name']
ligand_comp_id = info['ligand-code']
if os.path.exists(refmac_out_fn):
pdb_file_name = info['pdb_file_name']
mtz = info['refmac_mtz_file_name']
dict = info['acedrg_cif_file_name']
opt = {}
result = modelComparison(pdb_file_name, refmac_out_fn, mtz, ligand_comp_id, opt)
results_output_file_name = "results.out"
printModelComparison(result, results_output_file_name)
else:
print('Missing refmac pdb output file "{}"'.format(refmac_out_fn))


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--f1', dest='model1', help="First model (PDB or mmCIF format)")
parser.add_argument('--f2', dest='model2', help="Second model (PDB or mmCIF format)")
parser.add_argument('--mtz', dest='mtzin', help="MTZ file containing phases (columns required: FWT, PHWT, FC_ALL, PHIC_ALL). At the minute just one MTZ is used for model-map analysis... in future this should be extended to a second.")
parser.add_argument('-o','--output', dest='output', required=False, help="Output file containing results of comparative analysis", default='analysis.txt')
parser.add_argument('-s','--samplerate', dest='samplerate', required=False, help="Map sampling rate", default=1.0, type=float)
parser.add_argument('-r','--maskradius', dest='maskradius', required=False, help="Mask radius around atomic position", default=1.5, type=float)
parser.add_argument("--code", dest='accession_code')
parser.add_argument('--ligand-analysis-base', dest='ligand_analysis_base')
args = parser.parse_args()

opt = {}
if args.samplerate:
opt['sample_rate'] = args.samplerate
if args.maskradius:
opt['mask_radius'] = args.maskradius
if args.accession_code:
print("INFO:: Compare models using this accession code", args.accession_code, "with base", args.ligand_analysis_base)
proc_using_code(args.accession_code, args.ligand_analysis_base)
else:
time_start = datetime.datetime.now()
ligand_code = "" # special value, means "all residue types"
result = modelComparison(args.model1,args.model2,args.mtzin,ligand_code,opt)
printModelComparison(result,args.output)
time_end= datetime.datetime.now()
print("Job completed ("+str(round((time_end-time_start).total_seconds(),3))+" seconds)")
37 changes: 37 additions & 0 deletions compare-models/ligand-hist.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

st=format(Sys.time(), '%d-%m-%Y')
pdf(paste("acedrg-ligands_",st, ".pdf", sep = ""))
x=rnorm(100,40,3)
y=rnorm(100,100,5)

par(mfrow=c(3,3), cex=0.5)

a = read.table("15-11-refmacs-deltas.table")

# hist(a$V6, breaks=30, col="lightblue", main="Overall Displacement Histogram",
# xlab="Displacement (A)")

factors = factor(a$V3)
levels = levels(factors)
print(levels)

for (l in levels) {
ligand_code = a$V2[a$V3==l][1]
reso = a$V4[a$V3==l][2]
mt = l
mt = paste(mt, ligand_code)
mt = paste(mt, reso)
mt = paste(mt,'\uc5')

hist(a$V6[a$V3==l], col="lightblue", main=mt,
xlim=c(0, 1.2), xlab="Displacement (A)")
correl_table_fn <- paste("correlation-tables/results-", tolower(l), sep="")
correl_table_fn <- paste(correl_table_fn, ".table", sep="")
print(correl_table_fn)
b <- read.table(correl_table_fn, header=TRUE)
plot(b$rscc1, b$rscc2, xlim=c(0.3, 1.0), ylim=c(0.3, 1.0), xlab="correl", ylab="correl", main=mt)
abline(a=0, b=1)
plot(b$dist, b$b2, xlab="dist", ylab="b2", main=mt)
}

dev.off()
39 changes: 39 additions & 0 deletions compare-models/multi-robs-analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
import subprocess

todays_refmacs_table = "15-11-refmacs.table"

def log_name_to_code(log):
parts_1 = log.split(".")
pre = parts_1[0]
parts_2 = pre.split("-")
code = parts_2[1]
return code

if os.path.exists("rob-proc"):
pass
else:
os.mkdir("rob-proc")

if os.path.exists("correlation-tables"):
pass
else:
os.mkdir("correlation-tables")

with open(todays_refmacs_table) as f:
lines = f.readlines()
for line in lines:
parts = line.split()
log = parts[1]
accession_code = log_name_to_code(log)
print(log, accession_code)

rob_proc_log_file_name = os.path.join("rob-proc", accession_code + ".log")
args = ["python3", "robsrepo/Ligand-Tools/compare-models/compare_models_for_ligands_of_the_week.py",
'--code', accession_code, '--ligand-analysis-base', "."]
comp_proc = subprocess.run(args, capture_output=True)
with open(rob_proc_log_file_name, "bw") as f:
f.write(comp_proc.stdout)
if os.path.exists("results.out"):
new_results_out_name = os.path.join("correlation-tables", "results-" + accession_code + ".table")
os.rename("results.out", new_results_out_name)
Loading