From 9919fb18b9874f7ae5a297bfceb25d572fc5c54e Mon Sep 17 00:00:00 2001 From: Me Date: Fri, 7 Apr 2023 10:55:47 +0100 Subject: [PATCH 1/4] Add SyntheticAccessibilityScore.yaml. --- python/local/SyntheticAccessibilityScore.yaml | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 python/local/SyntheticAccessibilityScore.yaml diff --git a/python/local/SyntheticAccessibilityScore.yaml b/python/local/SyntheticAccessibilityScore.yaml new file mode 100644 index 0000000..6df5619 --- /dev/null +++ b/python/local/SyntheticAccessibilityScore.yaml @@ -0,0 +1,69 @@ +id: 5ef62e8f-e15f-4675-9a2c-3b13c3265e43 +name: Synthetic Accessibility Score +description: Calculates the Synthetic Accessibility Score using the RDKit implementation. +category: Chemistry +version: 1.0.0 +serviceName: Script +serviceUri: glysade.python +executorId: Glysade.CPythonDataFxn +inputFields: +- control: + id: structureColumn + label: Select structure column + type: columnselect + filters: + - dataType: string + contentType: + - chemical/x-mdl-molfile + - chemical/x-mdl-molfile-v3000 + - chemical/x-smiles + - chemical/x-daylight-smiles + - dataType: binary + contentType: + - chemical/x-mdl-molfile + - chemical/x-mdl-molfile-v3000 + validationRules: + - type: required + message: Must select column of 2D structures + request: + id: structureColumn + dataType: string + selectorType: column +tags: +- color: '#50AF28' + text: chemistry +- color: '#c6fc00' + text: calcprop +updateBehavior: automatic +maximumOutputColumns: !!int 1 +maximumOutputTables: !!int 0 +chemistryFunction: !!bool false +script: | + # sascorer has been copied here from the RDKit Contrib area. The + # license is in the file. + from df.sascorer import calculateScore + from df.chem_helper import column_to_molecules + from df.data_transfer import (DataFunctionRequest, DataFunctionResponse, + DataType, ColumnData, string_input_field) + + + def execute(request: DataFunctionRequest) -> DataFunctionResponse: + column_id = string_input_field(request, 'structureColumn') + input_column = request.inputColumns[column_id] + mols = column_to_molecules(input_column) + scores = [None if m is None else calculateScore(m) for m in mols] + output_column = ColumnData(name=f'{input_column.name} SA Score', + dataType=DataType.DOUBLE, values=scores) + response = DataFunctionResponse(outputColumns=[output_column]) + return response +outputFields: +- id: column1 + source: column + type: filter + name: Output QED score column +allowedClients: +- Analyst +- WebPlayer +demoUrl: +limitBy: none +minimumChartsVersion: From 3d2bd2c7976f66b59a112a3e114825069630fa12 Mon Sep 17 00:00:00 2001 From: Me Date: Fri, 7 Apr 2023 11:03:32 +0100 Subject: [PATCH 2/4] Correct output column name. --- python/local/SyntheticAccessibilityScore.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/local/SyntheticAccessibilityScore.yaml b/python/local/SyntheticAccessibilityScore.yaml index 6df5619..b45a8be 100644 --- a/python/local/SyntheticAccessibilityScore.yaml +++ b/python/local/SyntheticAccessibilityScore.yaml @@ -60,7 +60,7 @@ outputFields: - id: column1 source: column type: filter - name: Output QED score column + name: Output SA Score column allowedClients: - Analyst - WebPlayer From 4b8897aa73d3c1b692f09dda0a20bc779c0e4c2a Mon Sep 17 00:00:00 2001 From: Me Date: Fri, 7 Apr 2023 11:11:51 +0100 Subject: [PATCH 3/4] Change output column name. --- python/local/SyntheticAccessibilityScore.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/local/SyntheticAccessibilityScore.yaml b/python/local/SyntheticAccessibilityScore.yaml index b45a8be..491e273 100644 --- a/python/local/SyntheticAccessibilityScore.yaml +++ b/python/local/SyntheticAccessibilityScore.yaml @@ -52,7 +52,7 @@ script: | input_column = request.inputColumns[column_id] mols = column_to_molecules(input_column) scores = [None if m is None else calculateScore(m) for m in mols] - output_column = ColumnData(name=f'{input_column.name} SA Score', + output_column = ColumnData(name=f'SA Score {input_column.name}', dataType=DataType.DOUBLE, values=scores) response = DataFunctionResponse(outputColumns=[output_column]) return response From a1ef9f47294851ad75d786ed1465ef977137ba6f Mon Sep 17 00:00:00 2001 From: Me Date: Mon, 24 Apr 2023 11:44:09 +0100 Subject: [PATCH 4/4] Add normalization. --- python/local/SyntheticAccessibilityScore.yaml | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/python/local/SyntheticAccessibilityScore.yaml b/python/local/SyntheticAccessibilityScore.yaml index 491e273..5ecf2cb 100644 --- a/python/local/SyntheticAccessibilityScore.yaml +++ b/python/local/SyntheticAccessibilityScore.yaml @@ -39,20 +39,38 @@ maximumOutputColumns: !!int 1 maximumOutputTables: !!int 0 chemistryFunction: !!bool false script: | - # sascorer has been copied here from the RDKit Contrib area. The - # license is in the file. from df.sascorer import calculateScore from df.chem_helper import column_to_molecules from df.data_transfer import (DataFunctionRequest, DataFunctionResponse, DataType, ColumnData, string_input_field) + from rdkit.Chem import AllChem + from rdkit.Chem.MolStandardize import rdMolStandardize + + from ruse.rdkit.rdkit_utils import standardize_mol def execute(request: DataFunctionRequest) -> DataFunctionResponse: column_id = string_input_field(request, 'structureColumn') input_column = request.inputColumns[column_id] mols = column_to_molecules(input_column) - scores = [None if m is None else calculateScore(m) for m in mols] - output_column = ColumnData(name=f'SA Score {input_column.name}', + + # The objects used for standardizing the molecules when creating + # the scores database. + # SureChEMBL has some odd azides that the normalizer doesn't touch. + fix_azide = AllChem.ReactionFromSmarts('[N-:1]=[N:2]#[N+:3]>>[N+0:1]#[N+:2][N-:3]') + normer = rdMolStandardize.Normalizer() + uncharger = rdMolStandardize.Uncharger() + metal_disconnector = rdMolStandardize.MetalDisconnector() + + scores = [] + for m in mols: + if m is None: + scores.append(None) + else: + sm = standardize_mol(m, normer, uncharger, metal_disconnector, + fix_azide) + scores.append(calculateScore(sm)) + output_column = ColumnData(name=f'{input_column.name} SA Score', dataType=DataType.DOUBLE, values=scores) response = DataFunctionResponse(outputColumns=[output_column]) return response