Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
120 changes: 88 additions & 32 deletions PyBioMed/PyGetMol/Getmol.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@

try:
# Python 3
from urllib.request import urlopen
from urllib.request import urlopen, Request
except ImportError:
# Python 2
from urllib2 import urlopen
from urllib2 import urlopen, Request
# Core Library modules
import os
import re
import string
import pybel

# Third party modules
from rdkit import Chem
Expand Down Expand Up @@ -134,31 +135,45 @@ def ReadMolFromMol(filename=""):

#############################################################################


def GetMolFromCAS(casid=""):
"""
Downloading the molecules from http://www.chemnet.com/cas/ by CAS ID (casid).
if you want to use this function, you must be install pybel.
Download molecules from http://www.chemnet.com/cas/ using CAS ID (casid).
Requires OpenBabel's pybel module.
"""
from openbabel import pybel

casid = casid.strip()
localfile = urlopen(
"http://www.chemnet.com/cas/supplier.cgi?terms=" + casid + "&l=&exact=dict"
)
temp = localfile.readlines()

# Download page from chemnet
link = f"http://www.chemnet.com/cas/supplier.cgi?terms={casid}&l=&exact=dict"
localfile = urlopen(link)

# Read and decode the content
temp = [line.decode('utf-8') for line in localfile.readlines()]

# Close the connection
localfile.close()

# Search for the InChI string in the page content
res = None
for i in temp:
if re.findall("InChI=", i) == ["InChI="]:
k = i.split(' <td align="left">')
if "InChI=" in i: # Check if the line contains "InChI="
k = i.split('<td align="left">')
kk = k[1].split("</td>\r\n")
if kk[0][0:5] == "InChI":
res = kk[0]
else:
res = "None"
localfile.close()
mol = pybel.readstring("inchi", res.strip())
smile = mol.write("smi")
return smile.strip()
if kk[0].startswith("InChI"):
res = kk[0].strip()
break

# Error handling if no InChI is found
if res is None:
raise ValueError(f"InChI string not found for CAS ID {casid}.")

# Convert the InChI string to a molecule using pybel
#mol = pybel.readstring("inchi", res)
mol = pybel.Molecule(res) # Use the Molecule constructor directly

# Convert the molecule to SMILES format
smile = mol.write("smi").strip()

return smile


def GetMolFromEBI():
Expand Down Expand Up @@ -193,30 +208,71 @@ def GetMolFromDrugbank(dbid=""):
Downloading the molecules from http://www.drugbank.ca/ by dbid (dbid).
"""
dbid = dbid.strip()

localfile = urlopen("http://www.drugbank.ca/drugs/" + dbid + ".sdf")
temp = localfile.readlines()
f = file("temp.sdf", "w")
f.writelines(temp)
link = "http://www.drugbank.ca/drugs/" + dbid + ".sdf"

# Create a request with headers
req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
localfile = urlopen(req)
lines = localfile.readlines()
#print(lines)
# Read and decode the contents of the file
temp = [line.decode('utf-8') for line in lines] # Decode each line
#print(temp)
with open("temp.sdf", "w") as f:
f.write("".join(temp)) # Join the list into a single string and write it to the file

# Close the files
f.close()
localfile.close()

# Check if the file was written successfully
if os.path.getsize("temp.sdf") == 0:
raise ValueError("The downloaded SDF file is empty or corrupted.")

# Load the molecule using RDKit
m = Chem.MolFromMolFile("temp.sdf")
os.remove("temp.sdf")

#print(f"extracted data: {m}")

# Check if the molecule was successfully loaded
if m is None:
raise ValueError("RDKit could not load the molecule from the SDF file.")

# Convert the molecule to SMILES
temp = Chem.MolToSmiles(m, isomericSmiles=True)

# Remove the temporary SDF file
os.remove("temp.sdf")

return temp


def GetMolFromKegg(kid=""):
"""
Downloading the molecules from http://www.genome.jp/ by kegg id (kid).
"""
ID = str(kid)
localfile = urlopen("http://www.genome.jp/dbget-bin/www_bget?-f+m+drug+" + ID)
temp = localfile.readlines()
f = file("temp.mol", "w")
f.writelines(temp)
link = urlopen("http://www.genome.jp/dbget-bin/www_bget?-f+m+drug+" + ID)
#temp = localfile.readlines()
#f = open("temp.mol", "w")
#f.writelines(temp)
#f.close()
#localfile.close()
# Create a request with headers

req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
localfile = urlopen(req)
lines = localfile.readlines()
#print(lines)
# Read and decode the contents of the file
temp = [line.decode('utf-8') for line in lines] # Decode each line
#print(temp)
with open("temp.sdf", "w") as f:
f.write("".join(temp)) # Join the list into a single string and write it to the file

# Close the files
f.close()
localfile.close()

m = Chem.MolFromMolFile("temp.mol")
os.remove("temp.mol")
temp = Chem.MolToSmiles(m, isomericSmiles=True)
Expand Down
96 changes: 91 additions & 5 deletions PyBioMed/PyMolecule/connectivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
##############################################################################
"""
# Third party modules
import numpy as np
# -*- coding: utf-8 -*-
import numpy
from rdkit import Chem
from rdkit.Chem import rdchem
Expand Down Expand Up @@ -303,7 +305,7 @@ def CalculateChi10p(mol):
"""
return _CalculateChinp(mol, NumPath=10)


'''
def CalculateChi3c(mol):
"""
#################################################################
Expand Down Expand Up @@ -333,8 +335,39 @@ def CalculateChi3c(mol):
deltas1 = numpy.array(deltas, numpy.float)
accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
return accum
'''
def CalculateChi3c(mol):
"""
#################################################################
Calculation of molecular connectivity chi index for cluster

---->Chi3c

Usage:

result=CalculateChi3c(mol)

Input: mol is a molecule object.

Output: result is a numeric value
#################################################################
"""

accum = 0.0
deltas = [x.GetDegree() for x in mol.GetAtoms()]
patt = Chem.MolFromSmarts("*~*(~*)~*")
HPatt = mol.GetSubstructMatches(patt)
for cluster in HPatt:
deltas = [mol.GetAtomWithIdx(x).GetDegree() for x in cluster]
while 0 in deltas:
deltas.remove(0)
if deltas != []:
# Change numpy.float to float
deltas1 = numpy.array(deltas, float) # or use numpy.float64 if needed
accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
return accum

''''''
def CalculateChi4c(mol):
"""
#################################################################
Expand Down Expand Up @@ -365,7 +398,18 @@ def CalculateChi4c(mol):
accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
return accum

def CalculateChi4c(mol):
accum = 0.0
deltas = [x.GetDegree() for x in mol.GetAtoms()]
# Assuming similar logic as in CalculateChi3c
while 0 in deltas:
deltas.remove(0)
if deltas != []:
deltas1 = numpy.array(deltas, float) # Change this line
accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
return accum

'''
def CalculateChi4pc(mol):
"""
#################################################################
Expand Down Expand Up @@ -396,7 +440,16 @@ def CalculateChi4pc(mol):
deltas1 = numpy.array(deltas, numpy.float)
accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
return accum

'''
def CalculateChi4pc(mol):
accum = 0.0
deltas = [x.GetDegree() for x in mol.GetAtoms()]
while 0 in deltas:
deltas.remove(0)
if deltas != []:
deltas1 = numpy.array(deltas, float) # Change this line
accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
return accum

def CalculateDeltaChi3c4pc(mol):
"""
Expand Down Expand Up @@ -924,7 +977,39 @@ def _AtomHKDeltas(atom, skipHs=0):
res.append(0.0)
return res

def CalculateChiv3c(mol):
"""
#################################################################
Calculation of valence molecular connectivity chi index for cluster

---->Chiv3c

Usage:
result = CalculateChiv3c(mol)

Input:
mol is a molecule object.

Output:
result is a numeric value
#################################################################
"""
accum = 0.0
deltas = [x.GetDegree() for x in mol.GetAtoms()]
patt = Chem.MolFromSmarts("*~*(~*)~*")
HPatt = mol.GetSubstructMatches(patt)

for cluster in HPatt:
deltas = [_AtomHKDeltas(mol.GetAtomWithIdx(x)) for x in cluster]
while 0 in deltas:
deltas.remove(0)
if deltas:
# Convert deltas to a NumPy array of type float
deltas1 = numpy.array(deltas, float)
accum += 1.0 / np.sqrt(deltas1.prod())

return accum
'''
def CalculateChiv3c(mol):
"""
#################################################################
Expand Down Expand Up @@ -952,9 +1037,10 @@ def CalculateChiv3c(mol):
deltas.remove(0)
if deltas != []:
deltas1 = numpy.array(deltas, numpy.float)
deltas1 = numpy.array(deltas, float)
accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
return accum

'''

def CalculateChiv4c(mol):
"""
Expand Down Expand Up @@ -982,7 +1068,7 @@ def CalculateChiv4c(mol):
while 0 in deltas:
deltas.remove(0)
if deltas != []:
deltas1 = numpy.array(deltas, numpy.float)
deltas1 = numpy.array(deltas, numpy.float64)
accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
return accum

Expand Down Expand Up @@ -1015,7 +1101,7 @@ def CalculateChiv4pc(mol):
while 0 in deltas:
deltas.remove(0)
if deltas != []:
deltas1 = numpy.array(deltas, numpy.float)
deltas1 = numpy.array(deltas, float)
accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
return accum

Expand Down
Loading