Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
venv
.idea
*.py[cod]

# C extensions
Expand Down
4 changes: 4 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
### Beowulfey's notes:

I've forked this repository in order to add the ability to return the sequences of the alignment in the aligned tree order. Although there isn't an option in the clustalo API I figured out how it knows whether to do so (it's dependendent on whether memory is allocated for the tree order, I guess). Thanks to ordered dictionaries in Python 3 this actually works, too!

clustalo-python
===============

Expand Down
73 changes: 49 additions & 24 deletions clustalo.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ clustalo_clustalo(PyObject *self, PyObject *args, PyObject *keywds)
int maxGuidetreeIterations = rAlnOpts.iMaxGuidetreeIterations;
int maxHMMIterations = rAlnOpts.iMaxHMMIterations;
int numThreads = 1;
int outOrder = 1;
static char *kwlist[] = {
"seqs",
"seqtype",
Expand All @@ -36,17 +37,19 @@ clustalo_clustalo(PyObject *self, PyObject *args, PyObject *keywds)
"max_guidetree_iterations",
"max_hmm_iterations",
"num_threads",
"output_order",
NULL
};
if (!PyArg_ParseTupleAndKeywords(args, keywds, "O!|iOOiiii", kwlist,
if (!PyArg_ParseTupleAndKeywords(args, keywds, "O!|iOOiiiii", kwlist,
&PyDict_Type, &inputDict,
&seqtype,
&mbedGuideTree,
&mbedIteration,
&numCombinedIterations,
&maxGuidetreeIterations,
&maxHMMIterations,
&numThreads))
&numThreads,
&outOrder))
return NULL;

if (PyObject_Not(inputDict))
Expand Down Expand Up @@ -106,6 +109,15 @@ clustalo_clustalo(PyObject *self, PyObject *args, PyObject *keywds)
return PyDict_Copy(inputDict);
}

// allocating the tree_order of prMSeq is enough to capture the tree_order information
// Segfaults if only 2 sequences though (the program refuses to calculate a tree)
if (prMSeq->nseqs > 2 && outOrder == 1) {
prMSeq->tree_order = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int));
}
else {
outOrder = 0;
}

// Perform the alignment.
int rv;
Py_BEGIN_ALLOW_THREADS
Expand All @@ -120,18 +132,30 @@ clustalo_clustalo(PyObject *self, PyObject *args, PyObject *keywds)
// Return the aligned results in a dict.
PyObject *returnDict = PyDict_New();
int idx;
for (idx = 0; idx < prMSeq->nseqs; idx++) {
const char *key = prMSeq->sqinfo[idx].name;
#if PY_MAJOR_VERSION >= 3
if (outOrder == 1){
for (idx = 0; idx < prMSeq->nseqs; idx++) {
const char *key = prMSeq->sqinfo[prMSeq->tree_order[idx]].name;
#if PY_MAJOR_VERSION >= 3
PyObject *value = PyUnicode_FromString(prMSeq->seq[prMSeq->tree_order[idx]]);
#else
PyObject *value = PyString_FromString(prMSeq->seq[prMSeq->tree_order[idx]]);
#endif
PyDict_SetItemString(returnDict, key, value);
}
}
else {
for (idx = 0; idx < prMSeq->nseqs; idx++) {
const char *key = prMSeq->sqinfo[idx].name;
#if PY_MAJOR_VERSION >= 3
PyObject *value = PyUnicode_FromString(prMSeq->seq[idx]);
#else
#else
PyObject *value = PyString_FromString(prMSeq->seq[idx]);
#endif
PyDict_SetItemString(returnDict, key, value);
#endif
PyDict_SetItemString(returnDict, key, value);
}
}
return returnDict;
}

#if PY_MAJOR_VERSION >= 3
#define MOD_ERROR_VAL NULL
#define MOD_SUCCESS_VAL(val) val
Expand All @@ -151,21 +175,22 @@ clustalo_clustalo(PyObject *self, PyObject *args, PyObject *keywds)

static PyMethodDef ClustaloMethods[] = {
{"clustalo", (PyCFunction)clustalo_clustalo, METH_VARARGS | METH_KEYWORDS,
"Runs clustal omega."
""
"Args:"
" data (dict): dictionary of sequence_name => bases"
""
"Kwargs:"
" seqtype (int): should be one of clustalo.DNA, clustalo.RNA, or clustalo.PROTEIN"
" mbed_guide_tree (bool): whether mBed-like clustering guide tree should be used"
" mbed_iteration (bool): whether mBed-like clustering iteration should be used"
" num_combined_iterations (int): number of (combined guide-tree/HMM) iterations"
" max_guidetree_iterations (int): max guide tree iterations within combined iterations"
" max_hmm_iterations (int): max HMM iterations within combined iterations"
" num_threads (int): number of threads to use (requires libclustalo compiled with OpenMP)"
""
"Returns dict of sequence_named => aligned_bases ('_' for gaps)"},
"Runs clustal omega.\n"
"\n"
"Args:\n"
" data (dict): dictionary of sequence_name => bases\n"
"\n"
"Kwargs:\n"
" seqtype (int): should be one of clustalo.DNA, clustalo.RNA, or clustalo.PROTEIN\n"
" mbed_guide_tree (bool): whether mBed-like clustering guide tree should be used\n"
" mbed_iteration (bool): whether mBed-like clustering iteration should be used\n"
" num_combined_iterations (int): number of (combined guide-tree/HMM) iterations\n"
" max_guidetree_iterations (int): max guide tree iterations within combined iterations\n"
" max_hmm_iterations (int): max HMM iterations within combined iterations\n"
" num_threads (int): number of threads to use (requires libclustalo compiled with OpenMP)\n"
" output_order (int): return the alignment with either the input order (0) or alignment tree order (1). Only works on >Python 3.6, where dictionaries are ordered.\n"
"\n"
"Returns dict of sequence_names:aligned_bases ('-' for gaps)\n"},
{NULL, NULL, 0, NULL}
};

Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@

module = Extension('clustalo',
sources = ['clustalo.c'],
include_dirs=['/usr/include/clustalo', '/usr/local/include/clustalo'],
include_dirs=['/usr/local/sci/clustalo/v1.2.4/include/clustalo'],
library_dirs=['/usr/local/sci/clustalo/v1.2.4/lib'],
libraries=libraries,
extra_compile_args=extra_compile_args)

setup(name='clustalo',
version='0.1.2',
version='0.1.3',
description='Python wrapper around libclustalo',
author='Benchling Engineering',
author_email='eng@benchling.com',
Expand Down