From d0e449d122b18e64a5c14ec3b7f93cc6c6984f8e Mon Sep 17 00:00:00 2001
From: Paul Emsley <pemsley@mrc-lmb.cam.ac.uk>
Date: Tue, 15 Nov 2022 03:38:16 +0000
Subject: [PATCH 1/2] Make a copy and rework compare_models so that it works
 with the ligands-of-the-week infrastructure

---
 .../compare_models_for_ligands_of_the_week.py | 205 ++++++++++++++++++
 1 file changed, 205 insertions(+)
 create mode 100644 compare-models/compare_models_for_ligands_of_the_week.py

diff --git a/compare-models/compare_models_for_ligands_of_the_week.py b/compare-models/compare_models_for_ligands_of_the_week.py
new file mode 100644
index 0000000..58628ce
--- /dev/null
+++ b/compare-models/compare_models_for_ligands_of_the_week.py
@@ -0,0 +1,205 @@
+
+import argparse
+import datetime
+import gemmi
+import os
+import sys
+import math
+import numpy
+import json
+from scipy.stats import pearsonr
+
+def print_to_file(text,fout,mode='a'):
+  stdout = sys.stdout
+  with open(fout,mode) as sys.stdout:
+    print(text)
+    sys.stdout.flush()
+  sys.stdout = stdout
+  return
+
+def get_rscc(map_o,map_c,pos,mask_radius):
+  print("---- get_rsccs -start ---")
+  mask = gemmi.FloatGrid(numpy.array(map_o, copy=False),map_o.unit_cell,map_o.spacegroup)
+  mask.fill(0)
+  mask.set_points_around(pos, radius=mask_radius, value=1)
+  #mask.symmetrize_max()
+  maskarray = numpy.array(mask, copy=False)
+  points = numpy.argwhere(maskarray == 1)
+  print("points.size", points.size)
+  if points.size < 4:
+    return None
+  list_obs = []
+  list_calc = []
+  list_diff = []
+  for point in points:
+    obs =  map_o.get_point(point[0],point[1],point[2]).value
+    calc = map_c.get_point(point[0],point[1],point[2]).value
+    list_obs.append(obs)
+    list_calc.append(calc)
+    list_diff.append(obs-calc)
+  n_points = len(list_obs)
+  sum_obs =  round(sum(list_obs), 2)
+  sum_calc = round(sum(list_calc),2)
+  sum_diff = round(sum(list_diff),2)
+  sum_absdiff = round(sum(abs(el) for el in list_diff),2)
+  rmsd_obs_calc = round(math.sqrt(sum(el**2 for el in list_diff)/n_points),2)
+  cor_obs_calc,cor_obs_calc_p = pearsonr(list_obs,list_calc)
+  return cor_obs_calc
+
+def modelComparison(model1, model2, mtzin, ligand_comp_id, opt):
+
+  if model1 == None:
+    print("modelComparison: model1 is None")
+    return
+
+  if model2 == None:
+    print("modelComparison: model2 is None")
+    return
+
+  print("debug:: in modelComparison() model1", model1, "model2", model2)
+
+  try:
+    if not 'sample_rate' in opt:
+      opt['sample_rate'] = 1.0
+    if not 'mask_radius' in opt:
+      opt['mask_radius'] = 1.5
+
+    print("Reading model file:",model1)
+    st1 = gemmi.read_structure(model1)
+    print("Reading model file:",model2)
+    st2 = gemmi.read_structure(model2)
+    print("Reading MTZ file:",mtzin)
+    mtz = gemmi.read_mtz_file(mtzin)
+
+    print("Converting FWT/PHWT to map")
+    map_obs = mtz.transform_f_phi_to_map('FWT','PHWT', sample_rate=opt['sample_rate'])
+    print("Converting FC_ALL/PHIC_ALL to map")
+    map_calc = mtz.transform_f_phi_to_map('FC_ALL','PHIC_ALL', sample_rate=opt['sample_rate'])
+
+    print("Comparing equivalent atoms in all polymeric entities")
+    result = []
+    for model in st1:
+      for chain in model:
+        for residue in chain:
+          if residue.name != ligand_comp_id:
+            if ligand_comp_id:
+              continue
+          print(" Found", residue, residue.entity_type)
+          if residue.entity_type == gemmi.EntityType.NonPolymer:
+            for atom in residue:
+              if atom.element.name == "H":
+                  continue
+              addr = gemmi.make_address(chain, residue, atom)
+              cra1 = model.find_cra(addr)
+              cra2 = st2[0].find_cra(addr)  # This always uses the first model in st2 - probably worth checking that both st1 and st2 only have one model, otherwise fail.
+              print("---\ncra1: ", cra1, "cra2: ", cra2)
+
+              if cra1 and cra2:
+                print("cra1 pos:", cra1.atom.pos)
+                print("cra2 pos:", cra2.atom.pos)
+                peak1 = map_obs.interpolate_value(cra1.atom.pos)
+                peak2 = map_obs.interpolate_value(cra2.atom.pos)
+                rscc1 = get_rscc(map_obs,map_calc,cra1.atom.pos,opt['mask_radius'])
+                rscc2 = get_rscc(map_obs,map_calc,cra2.atom.pos,opt['mask_radius'])
+
+                if rscc1 == None: continue
+                if rscc2 == None: continue
+
+                result.append({'chain':chain.name,
+                               'residue':str(residue.seqid.num) + (residue.seqid.icode if residue.seqid.icode != ' ' else ''),
+                               'tlc':residue.name,
+                               'atom':atom.name,
+                               'dist':cra1.atom.pos.dist(cra2.atom.pos),
+                               'b1':cra1.atom.b_iso,
+                               'b2':cra2.atom.b_iso,
+                               'peak1':peak1,
+                               'peak2':peak2,
+                               'rscc1':rscc1,
+                               'rscc2':rscc2})
+
+    print("Processed",len(result),"atoms")
+    return result
+
+  # except Exception as e:
+  except FloatingPointError as e: #  I want to see the backtrace
+    print("keyError: %s" % e)
+    print("Cannot continue - program terminated.")
+    sys.exit(1)
+
+def printModelComparison(analysis,output_file):
+  try:
+    output = '\t'.join(["chain","residue","TLC","atom","dist","b1","b2","peak1","peak2","rscc1","rscc2"])
+    for atom in analysis:
+      output = output + '\n' + '\t'.join([atom['chain'],
+                          atom['residue'],
+                          atom['tlc'],
+                          atom['atom'],
+                          str(round(atom['dist'],2)),
+                          str(round(atom['b1'],2)),
+                          str(round(atom['b2'],2)),
+                          str(round(atom['peak1'],2)),
+                          str(round(atom['peak2'],2)),
+                          str(round(atom['rscc1'],2)),
+                          str(round(atom['rscc2'],2))])
+
+    print("Writing output to file:",output_file)
+    print_to_file(output,output_file,'w')
+
+  except Exception as e:
+    print("Error: %s" % e)
+    print("Cannot continue - program terminated.")
+    sys.exit(1)
+
+def proc_using_code(accession_code, ligand_analysis_base):
+
+    two_code    = accession_code[1:3]
+    upcase_code = accession_code.upper()
+    refmac_dir  = os.path.join(ligand_analysis_base, 'refmac')
+    sub_dir     = os.path.join(refmac_dir, two_code)
+    info_file   = os.path.join(sub_dir, upcase_code + ".info.json")
+    print("debug:: in proc_using_code() accession_code", accession_code, "info_file", info_file)
+    if os.path.exists(info_file):
+        print("INFO file", info_file)
+        with open(info_file, "r") as info_file_data:
+            info = json.load(info_file_data)
+            refmac_out_fn = info['refmac_pdb_file_name']
+            ligand_comp_id = info['ligand-code']
+            if os.path.exists(refmac_out_fn):
+                pdb_file_name = info['pdb_file_name']
+                mtz  = info['refmac_mtz_file_name']
+                dict = info['acedrg_cif_file_name']
+                opt = {}
+                result = modelComparison(pdb_file_name, refmac_out_fn, mtz, ligand_comp_id, opt)
+                results_output_file_name = "results.out"
+                printModelComparison(result, results_output_file_name)
+            else:
+                print('Missing refmac pdb output file "{}"'.format(refmac_out_fn))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--f1', dest='model1', help="First model (PDB or mmCIF format)")
+    parser.add_argument('--f2', dest='model2', help="Second model (PDB or mmCIF format)")
+    parser.add_argument('--mtz', dest='mtzin', help="MTZ file containing phases (columns required: FWT, PHWT,  FC_ALL, PHIC_ALL). At the minute just one MTZ is used for model-map analysis... in future this should be extended to a second.")
+    parser.add_argument('-o','--output', dest='output', required=False, help="Output file containing results of comparative analysis", default='analysis.txt')
+    parser.add_argument('-s','--samplerate', dest='samplerate', required=False, help="Map sampling rate", default=1.0, type=float)
+    parser.add_argument('-r','--maskradius', dest='maskradius', required=False, help="Mask radius around atomic position", default=1.5, type=float)
+    parser.add_argument("--code", dest='accession_code')
+    parser.add_argument('--ligand-analysis-base', dest='ligand_analysis_base')
+    args = parser.parse_args()
+
+    opt = {}
+    if args.samplerate:
+        opt['sample_rate'] = args.samplerate
+    if args.maskradius:
+        opt['mask_radius'] = args.maskradius
+    if args.accession_code:
+        print("INFO:: Compare models using this accession code", args.accession_code, "with base", args.ligand_analysis_base)
+        proc_using_code(args.accession_code, args.ligand_analysis_base)
+    else:
+        time_start = datetime.datetime.now()
+        ligand_code = "" # special value, means "all residue types"
+        result = modelComparison(args.model1,args.model2,args.mtzin,ligand_code,opt)
+        printModelComparison(result,args.output)
+        time_end= datetime.datetime.now()
+        print("Job completed ("+str(round((time_end-time_start).total_seconds(),3))+" seconds)")

From 037cc122a13a76ae80c361ed4d6b67525f9be822 Mon Sep 17 00:00:00 2001
From: Paul Emsley <pemsley@mrc-lmb.cam.ac.uk>
Date: Wed, 16 Nov 2022 03:04:06 +0000
Subject: [PATCH 2/2] Add scripts for ligands-of-the-week analysis

---
 compare-models/compare-ligands.py     | 100 +++++++
 compare-models/ligand-hist.r          |  37 +++
 compare-models/multi-robs-analysis.py |  39 +++
 compare-models/notes                  |  40 +++
 compare-models/pool-sub-1.py          |  70 +++++
 compare-models/proc-lig.py            | 363 ++++++++++++++++++++++++++
 compare-models/r-correl.r             |  15 ++
 compare-models/refmac-log-analysis.py |  27 ++
 8 files changed, 691 insertions(+)
 create mode 100644 compare-models/compare-ligands.py
 create mode 100644 compare-models/ligand-hist.r
 create mode 100644 compare-models/multi-robs-analysis.py
 create mode 100644 compare-models/notes
 create mode 100644 compare-models/pool-sub-1.py
 create mode 100644 compare-models/proc-lig.py
 create mode 100644 compare-models/r-correl.r
 create mode 100644 compare-models/refmac-log-analysis.py

diff --git a/compare-models/compare-ligands.py b/compare-models/compare-ligands.py
new file mode 100644
index 0000000..2177249
--- /dev/null
+++ b/compare-models/compare-ligands.py
@@ -0,0 +1,100 @@
+import sys
+import os
+import json
+import gemmi
+import glob
+
+def get_ligand(ligand_tlc: str, pdb_file_name: str):
+   st = gemmi.read_pdb(pdb_file_name)
+   for model in st:
+      for chain in model:
+         for residue in chain:
+            if residue.name == ligand_tlc:
+               return residue
+   return False
+
+def get_reso(pdb_file_name):
+    st = gemmi.read_pdb(pdb_file_name)
+    return st.resolution
+
+def compare_ligands(pdb_file_name: str, refmac_pdb_file_name: str, accession_code:str, ligand_tlc: str, acedrg_cif_file_name: str):
+
+   if not os.path.exists(pdb_file_name):
+      print("Bad PDB file name", pdb_file_name)
+   if not os.path.exists(refmac_pdb_file_name):
+      print("Bad refmac file name", refmac_pdb_file_name)
+   if not os.path.exists(acedrg_cif_file_name):
+      print("bad acedrg cif file name", acedrg_cif_file_name)
+   if len(ligand_tlc) > 3:
+      print("bad ligand tlc", ligand_tlc)
+   if len(ligand_tlc) == 0:
+      print("bad tlc")
+
+   lig_1 = get_ligand(ligand_tlc,        pdb_file_name)
+   lig_2 = get_ligand(ligand_tlc, refmac_pdb_file_name)
+   reso = get_reso(pdb_file_name)
+
+   for atom_1, atom_2 in zip(lig_1, lig_2):
+      pos_1 = atom_1.pos
+      pos_2 = atom_2.pos
+      d = pos_1.dist(pos_2)
+      print("delta:", ligand_tlc, accession_code, str(reso), atom_1.name, d)
+      if d > 1.0:
+         print(refmac_pdb_file_name)
+
+   return False
+
+def multi_compare_ligands(refmac_dir):
+   for d in glob.glob(os.path.join(refmac_dir, "*")):
+      for info_file in glob.glob(os.path.join(d, "*.info.json")):
+         print(info_file)
+         with open(info_file, "r") as info_file_data:
+            info = json.load(info_file_data)
+            print(info)
+            refmac_out_fn = info['refmac_pdb_file_name']
+            # refmac_out_fn may not be there because refmac didn't properly complete
+            if os.path.exists(refmac_out_fn):
+               compare_ligands(info['pdb_file_name'], info['refmac_pdb_file_name'], info['accession_code'], info['ligand-code'], info['acedrg_cif_file_name'])
+            else:
+               print("missing refmac-out", refmac_out_fn, "for ligand-code", info['ligand-code'])
+
+def compare_ligands_in_table_file(this_weeks_ligands_table_file_name):
+   with open(this_weeks_ligands_table_file_name) as f:
+      lines = f.readlines()
+      for line in lines:
+         parts = line.split()
+         if parts:
+               shell_script_file_name = parts[0]
+               refmac_log_file_name   = parts[1]
+               fn       = os.path.basename(refmac_log_file_name)
+               dir_name = os.path.dirname(refmac_log_file_name)
+               parts = fn.split("-")
+               p = parts[1]
+               parts_2 = p.split(".")
+               code = parts_2[0]
+               json_file_name = code.upper() + ".info.json"
+               info_file = os.path.join(dir_name, json_file_name)
+               print(info_file)
+               with open(info_file, "r") as info_file_data:
+                  info = json.load(info_file_data)
+                  print(info)
+                  refmac_out_fn = info['refmac_pdb_file_name']
+                  # refmac_out_fn may not be there because refmac didn't properly complete
+                  if os.path.exists(refmac_out_fn):
+                     compare_ligands(info['pdb_file_name'], info['refmac_pdb_file_name'], info['accession_code'], info['ligand-code'], info['acedrg_cif_file_name'])
+                  else:
+                     print("missing refmac-out", refmac_out_fn, "for ligand-code", info['ligand-code'])
+
+               # compare_ligands(info['pdb_file_name'], info['refmac_pdb_file_name'], info['accession_code'], info['ligand-code'], info['acedrg_cif_file_name'])
+
+
+
+if __name__ == "__main__":
+   refmac_dir = "refmac"
+   dated_refmac_logs_table_list = []
+   for arg in sys.argv[1:]:
+      print(arg)
+      table_file = arg
+      compare_ligands_in_table_file(table_file)
+
+   # multi_compare_ligands(refmac_dir)
diff --git a/compare-models/ligand-hist.r b/compare-models/ligand-hist.r
new file mode 100644
index 0000000..071ab66
--- /dev/null
+++ b/compare-models/ligand-hist.r
@@ -0,0 +1,37 @@
+
+st=format(Sys.time(), '%d-%m-%Y')
+pdf(paste("acedrg-ligands_",st, ".pdf", sep = ""))
+x=rnorm(100,40,3)
+y=rnorm(100,100,5)
+
+par(mfrow=c(3,3), cex=0.5)
+
+a = read.table("15-11-refmacs-deltas.table")
+
+# hist(a$V6, breaks=30, col="lightblue", main="Overall Displacement Histogram",
+#     xlab="Displacement (A)")
+
+factors = factor(a$V3)
+levels = levels(factors)
+print(levels)
+
+for (l in levels) {
+    ligand_code = a$V2[a$V3==l][1]
+    reso        = a$V4[a$V3==l][2]
+    mt = l
+    mt = paste(mt, ligand_code)
+    mt = paste(mt, reso)
+    mt = paste(mt,'\uc5')
+
+    hist(a$V6[a$V3==l], col="lightblue", main=mt,
+         xlim=c(0, 1.2), xlab="Displacement (A)")
+    correl_table_fn <- paste("correlation-tables/results-", tolower(l), sep="")
+    correl_table_fn <- paste(correl_table_fn, ".table", sep="")
+    print(correl_table_fn)
+    b <- read.table(correl_table_fn, header=TRUE)
+    plot(b$rscc1, b$rscc2, xlim=c(0.3, 1.0), ylim=c(0.3, 1.0), xlab="correl", ylab="correl", main=mt)
+    abline(a=0, b=1)
+    plot(b$dist, b$b2, xlab="dist", ylab="b2", main=mt)
+}
+
+dev.off()
diff --git a/compare-models/multi-robs-analysis.py b/compare-models/multi-robs-analysis.py
new file mode 100644
index 0000000..7bdd0aa
--- /dev/null
+++ b/compare-models/multi-robs-analysis.py
@@ -0,0 +1,39 @@
+import os
+import subprocess
+
+todays_refmacs_table = "15-11-refmacs.table"
+
+def log_name_to_code(log):
+    parts_1 = log.split(".")
+    pre = parts_1[0]
+    parts_2 = pre.split("-")
+    code = parts_2[1]
+    return code
+
+if os.path.exists("rob-proc"):
+    pass
+else:
+    os.mkdir("rob-proc")
+
+if os.path.exists("correlation-tables"):
+    pass
+else:
+    os.mkdir("correlation-tables")
+
+with open(todays_refmacs_table) as f:
+    lines = f.readlines()
+    for line in lines:
+        parts = line.split()
+        log = parts[1]
+        accession_code = log_name_to_code(log)
+        print(log, accession_code)
+
+        rob_proc_log_file_name = os.path.join("rob-proc", accession_code + ".log")
+        args = ["python3", "robsrepo/Ligand-Tools/compare-models/compare_models_for_ligands_of_the_week.py",
+                '--code', accession_code, '--ligand-analysis-base', "."]
+        comp_proc = subprocess.run(args, capture_output=True)
+        with open(rob_proc_log_file_name, "bw") as f:
+            f.write(comp_proc.stdout)
+        if os.path.exists("results.out"):
+            new_results_out_name = os.path.join("correlation-tables", "results-" + accession_code + ".table")
+            os.rename("results.out", new_results_out_name)
diff --git a/compare-models/notes b/compare-models/notes
new file mode 100644
index 0000000..f28aec0
--- /dev/null
+++ b/compare-models/notes
@@ -0,0 +1,40 @@
+
+Notes:
+
+- python3 proc-lig.py # download new ligands, runs acedrg downloads pdbs and makes mtzs and refmac scripts
+##- bash multisub-refmac.sh # run refmac for every script that doesn't have a log, creates "this_weeks_refmacs.table"
+##- [wait]
+- python3 pool-sub-1.py
+the above doesn't make the refmacs table, so
+- edit recover-refmac-table.sh and put in todays date
+- run recover-refmac-table.sh to make the refmacs table for this week.
+- edit refmac-log-analysis.py to set the refmac table for the analysis
+- python3 refmac-log-analysis.py # find the logs files that had an error in the above generated refmacs table
+- python3 compare-ligands.py 22_09_28_refmacs.table > 22_09_28_refmacs.log
+- grep ^delta: 22_09_28_refmacs.log > deltas.table # finds how much the ligands have moved. Makes deltas.table
+  # note in the above, 22_09_28_refmacs.table is just an example, you can use any number of table files.
+
+- Rscript ligand-hist.r # make plots using deltas.table
+
+
+Further extentions with Coot or gemmi
+
+Coot:
+  - learn coot API
+  - depends on Coot installation
+  - many functions available - most of the work is done
+
+Gemmi:
+  - learn GEMMI API
+  - gemmi is easily installed usig pip3
+  - more portable
+  - fewer functions - most of the functions will need to be written from scatch
+
+
+----
+Mon 14 Nov 18:47:43 GMT 2022
+
+  Using Rob's script:
+
+python3 robsrepo/Ligand-Tools/compare-models/compare_models_for_ligands_of_the_week.py --code 7s3x --ligand-analysis-base .
+ 
diff --git a/compare-models/pool-sub-1.py b/compare-models/pool-sub-1.py
new file mode 100644
index 0000000..a0b3458
--- /dev/null
+++ b/compare-models/pool-sub-1.py
@@ -0,0 +1,70 @@
+
+from multiprocessing import Pool
+import subprocess
+import os
+import glob
+
+pool_size = 6
+
+def bash_the_script(script_file_name: str):
+    log_file_name = script_name_to_log_name(script_file_name)
+    print("### bash", script_file_name, log_file_name)
+    process = subprocess.run(["bash", script_file_name], check=True, stdout=subprocess.PIPE, text=True)
+    output = process.stdout
+    with open(log_file_name, "w") as f_out:
+        f_out.write(output)
+
+def script_name_to_log_name(script_file_name):
+    s = script_file_name[:-3] + ".log"
+    return s
+
+if __name__ == '__main__':
+
+    today = "22_10_24" # generate this
+    # per line we have the script file name and the log file name
+    this_weeks_refmacs_file_name = today + "_refmacs.table"
+
+    ls = []
+    # run refmac if there was not a log file for that script
+    refmac_subdirs = glob.glob("refmac/*")
+    for subdir in refmac_subdirs:
+        subdir_scripts = glob.glob(subdir + "/refmac-*.sh")
+        for script_file_name in subdir_scripts:
+            log_file_name = script_name_to_log_name(script_file_name)
+            if os.path.exists(log_file_name):
+                pass
+            else:
+                # print("run that bad boy", script_file_name)
+                ls.append(script_file_name)
+
+
+    with Pool(pool_size) as pool:
+        m = pool.map(bash_the_script, ls)
+
+        with open(this_weeks_refmacs_file_name) as f_refmacs_table:
+            for scr in ls:
+                f_refmacs_table.write(scr)
+                f_refmacs_table.write(" ")
+                f_refmacs_table.writee(script_name_to_log_name(scr))
+                f_refmacs_table.write("\n")
+
+# this is the shell script:
+#
+# today=$(date +%y_%m_%d)
+# this_weeks_refmacs=$today"_refmacs.table"
+# echo "" > $this_weeks_refmacs
+# for refmac_script in refmac/*/refmac*.sh ;
+# do
+#    stub="${refmac_script%.*}"
+#    log=$stub.log
+#    # force a rerun
+#    log=$stub.logx
+#    # echo $log
+#    if [ -e $log ] ; then
+#       :
+#    else
+#       echo run refmac script $refmac_script $log
+#       # bash $refmac_script > $log 2>&1 &
+#       # echo $refmac_script $log >> $this_weeks_refmacs
+#    fi
+# done
diff --git a/compare-models/proc-lig.py b/compare-models/proc-lig.py
new file mode 100644
index 0000000..49aa9ea
--- /dev/null
+++ b/compare-models/proc-lig.py
@@ -0,0 +1,363 @@
+
+import os
+import json
+import urllib.request
+import time
+import subprocess
+
+def get_latest_ligands_json(json_file_name: str) -> bool:
+    # Thank you Sameer
+    s = "select?q=(q_document_type%3Alatest_chemistry)&fl=new_revised_ligand&rows=10000&omitHeader=true&wt=json"
+    s = "pdb/select?group=true&group.field=pdb_id&group.ngroups=true&facet.field=new_revised_ligand&facet=true&rows=0&facet.limit=100000&facet.mincount=1&json.nl=map&group.facet=true&q=(q_document_type%3Alatest_chemistry)&omitHeader=true&wt=json"
+    url = os.path.join("https://www.ebi.ac.uk/pdbe/search/", s)
+    urllib.request.urlretrieve(url, json_file_name)
+    # add a test that that overwrote the old one (if there was an old one)
+    success = True
+    return success
+
+def mkdir_maybe(dir: str, sub_dir: str) -> None:
+    dir_name = os.path.join(dir, sub_dir)
+    if os.path.exists(dir_name):
+        pass
+    else:
+        if os.path.exists(dir):
+            pass
+        else:
+            os.mkdir(dir)
+        os.mkdir(dir_name)
+
+def tlc_to_ccd_cif_file_name(cif_dir: str, tlc: str) -> str:
+    letter = tlc[0].upper()
+    tlc_local_file_name = tlc + ".cif"
+    fn = os.path.join(cif_dir, letter, tlc_local_file_name)
+    return fn
+
+def accession_code_to_pdb_file_name(code: str, pdb_dir: str) -> str:
+    sub_dir_local = code[1:3].lower()
+    pdb_fn_local = code.lower() + ".pdb"
+    sub_dir = os.path.join(pdb_dir, sub_dir_local)
+    fn = os.path.join(pdb_dir, sub_dir_local, pdb_fn_local)
+    if os.path.exists(pdb_dir):
+        pass
+    else:
+        os.mkdir(pdb_dir)
+    if os.path.exists(sub_dir):
+        pass
+    else:
+        os.mkdir(sub_dir)
+    return fn
+
+def fetch_ccd_cif_for(tlc: str, cif_dir: str) -> str:
+    fn = tlc_to_ccd_cif_file_name(cif_dir, tlc)
+    if os.path.exists(fn):
+        return fn
+    else:
+        url = os.path.join("https://www.ebi.ac.uk/pdbe/static/files/pdbechem_v2/", tlc + ".cif")
+        letter = tlc[0].upper()
+        mkdir_maybe(cif_dir, letter)
+        urllib.request.urlretrieve(url, fn)
+
+def get_model_db_code(tlc_cif: str) -> str:
+    if os.path.exists(tlc_cif):
+        with open(tlc_cif, 'r') as f:
+            lines = f.readlines()
+            for line in lines:
+                if "_chem_comp.pdbx_model_coordinates_db_code" in line:
+                    parts = line.split()
+                    if len(parts) == 2:
+                        model_db_code = parts[1]
+                        if model_db_code == "?":
+                            return False
+                        return model_db_code
+            return False
+    else:
+        print("No file", tlc_cif)
+        return False
+
+def fetch_pdb_for_model_db_code(accession_code: str, pdb_dir: str) -> str:
+    # print("fetch_pdb_for_model_db_code", accession_code)
+    fn = accession_code_to_pdb_file_name(accession_code, pdb_dir)
+    if os.path.exists(fn):
+        return fn
+    else:
+        s = "https://www.ebi.ac.uk/pdbe/entry-files/download"
+        fn_server = "pdb" + accession_code.lower() + ".ent"
+        url = os.path.join(s, fn_server)
+        print("Fetching pdb", url, fn)
+        try:
+            # the PDB file might not (yet) exist on the server
+            fn_tmp = fn + ".partial_download"
+            urllib.request.urlretrieve(url, fn_tmp)
+            os.rename(fn_tmp, fn)
+            return fn
+        except urllib.error.HTTPError as e:
+            print(e, url)
+            with open("missing-models.table", "w") as f:
+                f.write(url, fn)
+                f.write("\n")
+            return False
+    return False
+
+def ligand_has_only_organic_set_atoms(ccd_cif_file_name):
+
+    # the Right Way to do this is with GEMMI, but this simple
+    # hack will do now - so that I don't have to mess about
+    # with python and installing the gemmi module
+
+    def is_acceptable_element(ele):
+        goodies = ["H", "C", "N", "I", "CL", "BR", "F", "O", "P", "S", "B"]
+        return ele in goodies
+
+    with open(ccd_cif_file_name, 'r') as f:
+        lines = f.readlines()
+        atoms_block = False
+        for line in lines:
+            if "_chem_comp_atom.pdbx_ordinal" in line:
+                atoms_block = True
+            if "loop_" in line:
+                atoms_block = False
+            if atoms_block:
+                parts = line.split()
+                if len(parts) > 10:
+                    element = parts[3]
+                    if not is_acceptable_element(element):
+                        print("non-organic-set element: %s in %s" % (element, ccd_cif_file_name))
+                        return False
+    return True
+
+def run_acedrg(ccd_cif_file_name: str, ligand_tlc: str, acedrg_cif_root: str) -> bool:
+    args = ["acedrg", "-c", ccd_cif_file_name, "--coords", "-z", "-o", acedrg_cif_root]
+    print("   ", args)
+    comp_proc = subprocess.run(args, capture_output=True)
+    log_dir = "acedrg-logs"
+    if not os.path.exists(log_dir):
+        os.mkdir(log_dir)
+    log_file_name = os.path.join(log_dir, "acedrg-" + ligand_tlc + ".log")
+    with open(log_file_name, "bw") as f:
+        f.write(comp_proc.stdout)
+    return False
+
+def convert_to_mtz(accession_code: str, sub_dir: str, data_cif_file_name: str,
+                   data_mtz_file_name: str) -> bool:
+    args = ["cif2mtz", "HKLIN", data_cif_file_name, "HKLOUT", data_mtz_file_name]
+    s=b"END\n"
+    comp_proc = subprocess.run(args, capture_output=True, input=s)
+    log_file_name = os.path.join(sub_dir, "cif2mtz-" + accession_code + ".log")
+    with open(log_file_name, "bw") as f:
+        # print("writing cif2mtz log file: ", log_file_name)
+        f.write(comp_proc.stdout)
+    return comp_proc.returncode
+
+def fetch_and_make_mtz_data(accession_code: str,  pdb_data_dir: str) -> str:
+    sub_dir_local = accession_code[1:3].lower()
+    data_cif_fn_local = accession_code.lower() + ".cif"
+    data_mtz_fn_local = accession_code.lower() + ".mtz"
+    data_cif_file_name = os.path.join(pdb_data_dir, sub_dir_local, data_cif_fn_local)
+    data_mtz_file_name = os.path.join(pdb_data_dir, sub_dir_local, data_mtz_fn_local)
+    data_cif_file_name_tmp = data_cif_file_name + ".partial_download"
+
+    # note to self" check here for EXPDATA in the PDB file here. Don't try to download sfs
+    # for "ELECTRON MICROSCOPY" structures
+
+    if not os.path.exists(pdb_data_dir):
+        os.mkdir(pdb_data_dir)
+    if not os.path.exists(os.path.join(pdb_data_dir, sub_dir_local)):
+        os.mkdir(os.path.join(pdb_data_dir, sub_dir_local))
+    if os.path.exists(data_mtz_file_name):
+        return data_mtz_file_name
+    else:
+        sub_dir = os.path.join(pdb_data_dir, sub_dir_local)
+        server_dir = "https://www.ebi.ac.uk/pdbe/entry-files/download/"
+        sf_file_name_local = "r" + accession_code.lower() + "sf.ent"
+        url = os.path.join(server_dir, sf_file_name_local)
+        try:
+            result,headers = urllib.request.urlretrieve(url, data_cif_file_name_tmp)
+            if os.path.exists(result):
+                os.rename(data_cif_file_name_tmp, data_cif_file_name)
+                print("Download", data_cif_file_name, "created")
+                r = convert_to_mtz(accession_code, sub_dir, data_cif_file_name, data_mtz_file_name)
+                if r != 0:
+                    print("debug:: in fetch_and_make_mtz_data() with r:", r)
+                if r == 0: # shell program completed successfully
+                    return data_mtz_file_name
+        except urllib.error.HTTPError as e:
+            print("Error: in fetch_and_make_mtz_data() ", e, "for", url, data_mtz_file_name)
+        return False
+
+def write_refmac_shell_script(pdb_file_name: str, data_mtz_file_name: str, acedrg_cif_file_name: str,
+               refmac_sub_dir: str, accession_code: str, ligand_tlc: str) -> bool:
+
+    print("making refmac script for:", pdb_file_name, data_mtz_file_name, acedrg_cif_file_name)
+    if data_mtz_file_name == False:
+        print("No data_mtz_file - nothing doing")
+        return
+    if len(pdb_file_name) == 0:
+        return False
+    if len(data_mtz_file_name) == 0:
+        return False
+    if len(acedrg_cif_file_name) == 0:
+        return False
+    pdb_out = os.path.join(refmac_sub_dir, accession_code + "-refmac.pdb")
+    hkl_out = os.path.join(refmac_sub_dir, accession_code + "-refmac.mtz")
+    args = ["refmac5", "XYZIN", pdb_file_name, "HKLIN", data_mtz_file_name,
+            "XYZOUT", pdb_out, "HKLOUT", hkl_out, "LIBIN", acedrg_cif_file_name]
+    script_file_name_local = "refmac-" + accession_code.lower() + ".sh"
+    shell_script_file_name = os.path.join(refmac_sub_dir, script_file_name_local)
+    with open(shell_script_file_name, 'w') as f:
+        f.write("\n")
+        for arg in args:
+            f.write(arg)
+            f.write(" ")
+        f.write("<< !\n")
+        f.write("NCYCLES 5\n")
+        f.write("END\n")
+        f.write("!\n")
+
+    # now the info file: # it's used for validation of the ligand
+    info_file = os.path.join(refmac_sub_dir, accession_code + ".infox")
+    with open(info_file, 'w') as f:
+        f.write("accession_code ")
+        f.write(accession_code)
+        f.write("\n")
+        f.write("ligand-code ")
+        f.write(ligand_tlc)
+        f.write("\n")
+        f.write("pdb_file_name ")
+        f.write(pdb_file_name)
+        f.write("\n")
+        f.write("data_mtz_file_name ")
+        f.write(data_mtz_file_name)
+        f.write("\n")
+        f.write("ligand-cif ")
+        f.write(acedrg_cif_file_name)
+        f.write("\n")
+
+    info_file = os.path.join(refmac_sub_dir, accession_code + ".info.json")
+    dict = {}
+    dict['accession_code'] = accession_code
+    dict['ligand-code'] = ligand_tlc
+    dict['pdb_file_name'] = pdb_file_name
+    dict['refmac_pdb_file_name'] = pdb_out
+    dict['data_mtz_file_name'] = data_mtz_file_name
+    dict['refmac_mtz_file_name'] = hkl_out
+    dict['acedrg_cif_file_name'] = acedrg_cif_file_name
+    j = json.dumps(dict, indent = 4)
+    with open(info_file, 'w') as f:
+        f.write(j)
+        f.write("\n")
+
+def is_em(pdb_file_name: str) -> bool:
+    if os.path.exists(pdb_file_name):
+        f = open(pdb_file_name)
+        lines = f.readlines()
+        for line in lines:
+            if "EXPDTA" in line:
+                return "ELECTRON MICROSCOPY" in line
+    return False
+
+def proc_model_and_ligand(accession_code: str, pdb_file_name: str, ligand_tlc: str,
+                          cif_dir: str, acedrg_dir: str, refmac_dir: str) -> bool:
+
+    # Don't continue if the pdb_file_name is False or empty
+    try:
+        if len(pdb_file_name) == 0:
+            return False
+    except Exception as e:
+        print("in proc_model_and_ligand()", e)
+        return False
+
+    if os.path.exists(pdb_file_name):
+        letter = ligand_tlc[0].lower()
+        acedrg_cif_local  = ligand_tlc + "-acedrg.cif"
+        acedrg_root_local = ligand_tlc + "-acedrg"
+        acedrg_cif_root      = os.path.join(acedrg_dir, letter, acedrg_root_local)
+        acedrg_cif_file_name = os.path.join(acedrg_dir, letter, acedrg_cif_local)
+        if os.path.exists(acedrg_cif_file_name):
+            pass
+        else:
+            ccd_cif_file_name = tlc_to_ccd_cif_file_name(cif_dir, ligand_tlc)
+            if ligand_has_only_organic_set_atoms(ccd_cif_file_name):
+                run_acedrg(ccd_cif_file_name, ligand_tlc, acedrg_cif_root)
+        is_em_flag = is_em(pdb_file_name)
+        if is_em_flag:
+            print("EM data for", accession_code)
+        else:
+            data_mtz_file_name = fetch_and_make_mtz_data(accession_code, pdb_data_dir)
+            # print("data_mtz_file_name", data_mtz_file_name)
+            sub_dir = os.path.join(refmac_dir, accession_code[1:3].lower())
+            if not os.path.exists(refmac_dir):
+                os.mkdir(refmac_dir)
+            if not os.path.exists(sub_dir):
+                os.mkdir(sub_dir)
+
+            try:
+                # print("data_mtz_file_name: ", data_mtz_file_name, len(data_mtz_file_name))
+                if os.path.exists(acedrg_cif_file_name):
+                    if os.path.exists(pdb_file_name):
+                        if os.path.exists(data_mtz_file_name):
+                            write_refmac_shell_script(pdb_file_name, data_mtz_file_name,
+                                                      acedrg_cif_file_name,
+                                                      sub_dir, accession_code, ligand_tlc)
+                        else:
+                            print("missing mtz", data_mtz_file_name)
+            except TypeError as e:
+                print(e, "for in write_refmac_shell_script")
+        return False
+    else:
+        print("pdb file does not exist: " + pdb_file_name)
+        return False
+
+def parse_latest_ligands_json(j: json, cif_dir: str, pdb_dir: str, acedrg_dir: str, refmac_dir: str):
+
+    grouped = j["grouped"]
+    fc = j['facet_counts']
+    ff = fc["facet_fields"]
+    nrl = ff["new_revised_ligand"]
+    for lig_descr in nrl:
+        s = lig_descr
+        tlc = s[:3]
+        print("Processing: -----------", tlc, "--------------")
+        tlc_cif = tlc_to_ccd_cif_file_name(cif_dir, tlc)
+        fetch_ccd_cif_for(tlc, cif_dir)
+        try:
+            model_db_code = get_model_db_code(tlc_cif)
+            pdb_fn = fetch_pdb_for_model_db_code(model_db_code, pdb_dir)
+            if len(pdb_fn) > 0:
+                if os.path.exists(pdb_fn):
+                    proc_model_and_ligand(model_db_code, pdb_fn, tlc, cif_dir, acedrg_dir, refmac_dir)
+        except TypeError as e:
+            print(e, "for", tlc)
+
+def new_ligands_from_pdbe_json(json_file_name, cif_dir, pdb_dir: str, acedrg_dir: str, refmac_dir: str):
+    try:
+        with open(json_file_name, 'r') as f:
+            j = json.load(f)
+            parse_latest_ligands_json(j, cif_dir, pdb_dir, acedrg_dir, refmac_dir)
+    except TypeError as e:
+        print(e)
+
+def need_new_json_for_ligands(json_file_name: str, cif_dir: str) -> bool:
+    if os.path.exists(json_file_name):
+        # if it's more than a day old, get a new one
+        st=os.stat(json_file_name)
+        mtime=st.st_mtime
+        t = time.time()
+        delta_time = t - mtime
+        return delta_time > 24 * 60 * 60
+    else:
+        status = get_latest_ligands_json(json_file_name)
+        return False
+
+if __name__ == '__main__':
+
+    cif_dir = "CCD-cifs"
+    pdb_dir = "pdb"
+    pdb_data_dir = "pdb-data"
+    acedrg_dir = "acedrg"
+    refmac_dir = "refmac"
+    json_file_name = "new_revised_ligands.json"
+
+    if need_new_json_for_ligands(json_file_name, cif_dir):
+        get_latest_ligands_json(json_file_name) # download the json file
+
+    new_ligands_from_pdbe_json(json_file_name, cif_dir, pdb_dir, acedrg_dir, refmac_dir)
diff --git a/compare-models/r-correl.r b/compare-models/r-correl.r
new file mode 100644
index 0000000..93aa5b2
--- /dev/null
+++ b/compare-models/r-correl.r
@@ -0,0 +1,15 @@
+
+table_fn = "correlation-tables/results-8ers.table"
+table_fn = "correlation-tables/results-8ewa.table"
+table_fn = "correlation-tables/results-8dus.table"
+table_fn = "correlation-tables/results-8du9.table"
+table_fn = "correlation-tables/results-8bb5.table"
+table_fn = "correlation-tables/results-8aqn.table"
+table_fn = "correlation-tables/results-8duc.table"
+table_fn = "correlation-tables/results-8a92.table"
+table_fn = "correlation-tables/results-7fr6.table"
+
+a = read.table(table_fn, header=TRUE)
+
+plot(a$rscc1, a$rscc2, xlim=c(0.3, 1.0), ylim=c(0.3, 1.0), xlab="correl", ylab="correl")
+abline(a=0, b=1)
diff --git a/compare-models/refmac-log-analysis.py b/compare-models/refmac-log-analysis.py
new file mode 100644
index 0000000..91a9e31
--- /dev/null
+++ b/compare-models/refmac-log-analysis.py
@@ -0,0 +1,27 @@
+import glob
+import os
+
+this_weeks_refmacs_table = "this_weeks_refmacs.table"
+this_weeks_refmacs_table = "15-11-refmacs.table"
+
+
+def check_for_error(refmac_log_file_name_in):
+    refmac_log_file_name = str(refmac_log_file_name_in)
+    print('checking "{}"'.format(refmac_log_file_name)) # may not be needed
+    with open(refmac_log_file_name, "r") as refmac_log_file:
+        # print("analysing log", refmac_log_file_name)
+        for line in refmac_log_file.readlines():
+            if "Error termination." in line:
+                print("This log had a error termination:", refmac_log_file_name)
+            if "Error: Problem with ligand name" in line:
+                print("This log had a error due to ligand name:", refmac_log_file_name)
+
+
+with open(this_weeks_refmacs_table) as f:
+    lines = f.readlines()
+    for line in lines:
+        parts = line.split()
+        if parts:
+            shel_script_file_name = parts[0]
+            refmac_log_file_name  = parts[1]
+            check_for_error(refmac_log_file_name)