diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dcb9349..ad3f607 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,12 +53,6 @@ jobs: conda env update --file environment.yml --prune pdm sync - - name: Test with pytest - shell: bash -l {0} - run: | - pdm run pytest --cov --cov-report html - rm htmlcov/.gitignore - - name: Publish coverage report if: ${{ github.ref == 'refs/heads/main' && matrix.python-version == '3.10' }} uses: JamesIves/github-pages-deploy-action@v4 diff --git a/.gitignore b/.gitignore index 1621198..67deb6d 100644 --- a/.gitignore +++ b/.gitignore @@ -123,3 +123,5 @@ ENV/ *.cym *.simularium **/analysis_outputs/** +*.h5 +*.pdm-python diff --git a/docs/conf.py b/docs/conf.py index 9177489..651c26c 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -58,7 +58,7 @@ # List of modules to be mocked up. Useful when some external dependencies are # not met at build time and break the building process. -autodoc_mock_imports = [] +autodoc_mock_imports = ["readdy"] # Controls how to represent typehints. autodoc_typehints = "signature" diff --git a/docs/index.rst b/docs/index.rst index 4e90c35..d1fbdd2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,6 +16,7 @@ Simulations Analysis + Visualization .. autosummary:: :toctree: _summary @@ -25,6 +26,7 @@ subcell_pipeline.simulation subcell_pipeline.analysis + subcell_pipeline.visualization .. toctree:: :hidden: diff --git a/docs/visualization.rst b/docs/visualization.rst new file mode 100644 index 0000000..acc9f03 --- /dev/null +++ b/docs/visualization.rst @@ -0,0 +1,5 @@ +Visualization workflow notebooks +================================ + +.. include:: ../subcell_pipeline/visualization/README.md + :parser: myst_parser.sphinx_ diff --git a/pdm.lock b/pdm.lock index 891a1b0..fc9299d 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,10 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "lint", "dev", "test", "docs"] +groups = ["default", "lint", "dev", "test", "docs", "debug"] strategy = ["cross_platform", "inherit_metadata"] -lock_version = "4.4.1" -content_hash = "sha256:a44304e4a75c0069b62e7aa6b0184948abfc853e8067ad4e274c1ce12737d12c" +lock_version = "4.4.2" +content_hash = "sha256:5bb53b76f4a8f343d2e5d21113e972b10b13d0e830cc7857ee1629af1333c7f4" [[package]] name = "aiobotocore" @@ -78,6 +78,9 @@ version = "0.11.0" requires_python = ">=3.6" summary = "itertools and builtins for AsyncIO and mixed iterables" groups = ["default"] +dependencies = [ + "typing-extensions>=4.0; python_version < \"3.10\"", +] files = [ {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, @@ -131,6 +134,8 @@ groups = ["default"] dependencies = [ "Mako", "SQLAlchemy>=1.3.0", + "importlib-metadata; python_version < \"3.9\"", + "importlib-resources; python_version < \"3.9\"", "typing-extensions>=4", ] files = [ @@ -144,6 +149,9 @@ version = "0.7.0" requires_python = ">=3.8" summary = "Reusable constraint types to use with typing.Annotated" groups = ["default"] +dependencies = [ + "typing-extensions>=4.0.0; python_version < \"3.9\"", +] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -169,6 +177,7 @@ dependencies = [ "exceptiongroup; python_version < \"3.11\"", "idna>=2.8", "sniffio>=1.1", + "typing-extensions; python_version < \"3.8\"", ] files = [ {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"}, @@ -197,6 +206,7 @@ dependencies = [ "PyYAML", "certifi", "click>=5.0", + "dataclasses; python_version < \"3.7\"", "markdown", "requests", "requests-oauthlib", @@ -224,9 +234,10 @@ files = [ name = "asttokens" version = "2.4.1" summary = "Annotate AST trees with source code positions" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "six>=1.12.0", + "typing; python_version < \"3.5\"", ] files = [ {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, @@ -239,7 +250,9 @@ version = "4.0.3" requires_python = ">=3.7" summary = "Timeout context manager for asyncio programs" groups = ["default"] -marker = "python_version < \"3.12.0\"" +dependencies = [ + "typing-extensions>=3.6.5; python_version < \"3.8\"", +] files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -280,6 +293,9 @@ version = "23.2.0" requires_python = ">=3.7" summary = "Classes Without Boilerplate" groups = ["default", "dev"] +dependencies = [ + "importlib-metadata; python_version < \"3.8\"", +] files = [ {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, @@ -304,6 +320,9 @@ version = "2.15.0" requires_python = ">=3.8" summary = "Internationalization utilities" groups = ["docs"] +dependencies = [ + "pytz>=2015.7; python_version < \"3.9\"", +] files = [ {file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"}, {file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"}, @@ -377,6 +396,7 @@ dependencies = [ "jmespath<2.0.0,>=0.7.1", "python-dateutil<3.0.0,>=2.1", "urllib3!=2.2.0,<3,>=1.25.4; python_version >= \"3.10\"", + "urllib3<1.27,>=1.25.4; python_version < \"3.10\"", ] files = [ {file = "botocore-1.34.106-py3-none-any.whl", hash = "sha256:4baf0e27c2dfc4f4d0dee7c217c716e0782f9b30e8e1fff983fce237d88f73ae"}, @@ -535,6 +555,7 @@ summary = "Composable command line interface toolkit" groups = ["default", "lint"] dependencies = [ "colorama; platform_system == \"Windows\"", + "importlib-metadata; python_version < \"3.8\"", ] files = [ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, @@ -557,7 +578,7 @@ name = "colorama" version = "0.4.6" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" summary = "Cross-platform colored terminal text." -groups = ["default", "dev", "docs", "lint", "test"] +groups = ["debug", "default", "dev", "docs", "lint", "test"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -821,7 +842,7 @@ name = "decorator" version = "5.1.1" requires_python = ">=3.5" summary = "Decorators for Humans" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, @@ -911,7 +932,7 @@ name = "exceptiongroup" version = "1.2.1" requires_python = ">=3.7" summary = "Backport of PEP 654 (exception groups)" -groups = ["default", "test"] +groups = ["debug", "default", "test"] marker = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, @@ -923,7 +944,7 @@ name = "executing" version = "2.0.1" requires_python = ">=3.5" summary = "Get the currently executing AST node of a frame, and other information" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, {file = "executing-2.0.1.tar.gz", hash = "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147"}, @@ -1108,6 +1129,7 @@ requires_python = ">=3.8" summary = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." groups = ["default"] dependencies = [ + "astunparse>=1.6; python_version < \"3.9\"", "colorama>=0.4", ] files = [ @@ -1121,6 +1143,9 @@ version = "0.14.0" requires_python = ">=3.7" summary = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" groups = ["default"] +dependencies = [ + "typing-extensions; python_version < \"3.8\"", +] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, @@ -1141,6 +1166,27 @@ files = [ {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, ] +[[package]] +name = "h5py" +version = "3.11.0" +requires_python = ">=3.8" +summary = "Read and write HDF5 files from Python" +groups = ["default"] +dependencies = [ + "numpy>=1.17.3", +] +files = [ + {file = "h5py-3.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1625fd24ad6cfc9c1ccd44a66dac2396e7ee74940776792772819fc69f3a3731"}, + {file = "h5py-3.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c072655ad1d5fe9ef462445d3e77a8166cbfa5e599045f8aa3c19b75315f10e5"}, + {file = "h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77b19a40788e3e362b54af4dcf9e6fde59ca016db2c61360aa30b47c7b7cef00"}, + {file = "h5py-3.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:ef4e2f338fc763f50a8113890f455e1a70acd42a4d083370ceb80c463d803972"}, + {file = "h5py-3.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbd732a08187a9e2a6ecf9e8af713f1d68256ee0f7c8b652a32795670fb481ba"}, + {file = "h5py-3.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75bd7b3d93fbeee40860fd70cdc88df4464e06b70a5ad9ce1446f5f32eb84007"}, + {file = "h5py-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52c416f8eb0daae39dabe71415cb531f95dce2d81e1f61a74537a50c63b28ab3"}, + {file = "h5py-3.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:083e0329ae534a264940d6513f47f5ada617da536d8dccbafc3026aefc33c90e"}, + {file = "h5py-3.11.0.tar.gz", hash = "sha256:7b7e8f78072a2edec87c9836f25f34203fd492a4475709a18b417a33cfb21fa9"}, +] + [[package]] name = "hpack" version = "4.0.0" @@ -1245,6 +1291,21 @@ files = [ {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] +[[package]] +name = "imageio" +version = "2.34.2" +requires_python = ">=3.8" +summary = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats." +groups = ["default"] +dependencies = [ + "numpy", + "pillow>=8.3.2", +] +files = [ + {file = "imageio-2.34.2-py3-none-any.whl", hash = "sha256:a0bb27ec9d5bab36a9f4835e51b21d2cb099e1f78451441f94687ff3404b79f8"}, + {file = "imageio-2.34.2.tar.gz", hash = "sha256:5c0c0ee8faa018a1c42f649b90395dd4d3bb6187c09053a0cd6f1fdd51bbff5e"}, +] + [[package]] name = "imagesize" version = "1.4.1" @@ -1264,6 +1325,7 @@ summary = "Read metadata from Python packages" groups = ["dev"] marker = "python_full_version < \"3.10.2\"" dependencies = [ + "typing-extensions>=3.6.4; python_version < \"3.8\"", "zipp>=0.5", ] files = [ @@ -1277,6 +1339,9 @@ version = "6.1.3" requires_python = ">=3.8" summary = "Read resources from Python packages" groups = ["default"] +dependencies = [ + "zipp>=3.1.0; python_version < \"3.10\"", +] files = [ {file = "importlib_resources-6.1.3-py3-none-any.whl", hash = "sha256:4c0269e3580fe2634d364b39b38b961540a7738c02cb984e98add8b4221d793d"}, {file = "importlib_resources-6.1.3.tar.gz", hash = "sha256:56fb4525197b78544a3354ea27793952ab93f935bb4bf746b846bb1015020f2b"}, @@ -1314,6 +1379,24 @@ files = [ {file = "io_collection-0.10.2.tar.gz", hash = "sha256:40faa2fe94e8049dd900c42c09fbb4b1b5da2a226a2cd1618a1ffb89a636ea18"}, ] +[[package]] +name = "ipdb" +version = "0.13.13" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +summary = "IPython-enabled pdb" +groups = ["debug"] +dependencies = [ + "decorator; python_version > \"3.6\" and python_version < \"3.11\"", + "decorator; python_version >= \"3.11\"", + "ipython>=7.31.1; python_version > \"3.6\" and python_version < \"3.11\"", + "ipython>=7.31.1; python_version >= \"3.11\"", + "tomli; python_version > \"3.6\" and python_version < \"3.11\"", +] +files = [ + {file = "ipdb-0.13.13-py3-none-any.whl", hash = "sha256:45529994741c4ab6d2388bfa5d7b725c2cf7fe9deffabdb8a6113aa5ed449ed4"}, + {file = "ipdb-0.13.13.tar.gz", hash = "sha256:e3ac6018ef05126d442af680aad863006ec19d02290561ac88b8b1c0b0cfc726"}, +] + [[package]] name = "ipykernel" version = "6.29.4" @@ -1345,7 +1428,7 @@ name = "ipython" version = "8.24.0" requires_python = ">=3.10" summary = "IPython: Productive Interactive Computing" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "colorama; sys_platform == \"win32\"", "decorator", @@ -1391,7 +1474,7 @@ name = "jedi" version = "0.19.1" requires_python = ">=3.6" summary = "An autocompletion tool for Python that can be used for text editors." -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "parso<0.9.0,>=0.8.3", ] @@ -1497,7 +1580,9 @@ summary = "An implementation of JSON Schema validation for Python" groups = ["default", "dev"] dependencies = [ "attrs>=22.2.0", + "importlib-resources>=1.4.0; python_version < \"3.9\"", "jsonschema-specifications>=2023.03.6", + "pkgutil-resolve-name>=1.3.10; python_version < \"3.9\"", "referencing>=0.28.4", "rpds-py>=0.7.1", ] @@ -1513,6 +1598,7 @@ requires_python = ">=3.8" summary = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" groups = ["default", "dev"] dependencies = [ + "importlib-resources>=1.4.0; python_version < \"3.9\"", "referencing>=0.31.0", ] files = [ @@ -1527,6 +1613,7 @@ requires_python = ">=3.8" summary = "Jupyter protocol implementation and client libraries" groups = ["default"] dependencies = [ + "importlib-metadata>=4.8.3; python_version < \"3.10\"", "jupyter-core!=5.0.*,>=4.12", "python-dateutil>=2.8.2", "pyzmq>=23.0", @@ -1579,6 +1666,9 @@ version = "1.4.5" requires_python = ">=3.7" summary = "A fast implementation of the Cassowary constraint solver" groups = ["default"] +dependencies = [ + "typing-extensions; python_version < \"3.8\"", +] files = [ {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:05703cf211d585109fcd72207a31bb170a0f22144d68298dc5e61b3c946518af"}, {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:146d14bebb7f1dc4d5fbf74f8a6cb15ac42baadee8912eb84ac0b3b2a3dc6ac3"}, @@ -1637,6 +1727,7 @@ groups = ["default"] dependencies = [ "certifi>=14.05.14", "google-auth>=1.0.1", + "ipaddress>=1.0.17; python_version == \"2.7\"", "oauthlib>=3.2.2", "python-dateutil>=2.5.3", "pyyaml>=5.4.1", @@ -1691,6 +1782,9 @@ version = "3.6" requires_python = ">=3.8" summary = "Python implementation of John Gruber's Markdown." groups = ["default"] +dependencies = [ + "importlib-metadata>=4.4; python_version < \"3.10\"", +] files = [ {file = "Markdown-3.6-py3-none-any.whl", hash = "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f"}, {file = "Markdown-3.6.tar.gz", hash = "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224"}, @@ -1750,6 +1844,7 @@ dependencies = [ "contourpy>=1.0.1", "cycler>=0.10", "fonttools>=4.22.0", + "importlib-resources>=3.2.0; python_version < \"3.10\"", "kiwisolver>=1.3.1", "numpy>=1.23", "packaging>=20.0", @@ -1782,7 +1877,7 @@ name = "matplotlib-inline" version = "0.1.7" requires_python = ">=3.8" summary = "Inline Matplotlib backend for Jupyter" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "traitlets", ] @@ -2090,6 +2185,7 @@ requires_python = ">=3.8" summary = "Powerful data structures for data analysis, time series, and statistics" groups = ["default"] dependencies = [ + "numpy>=1.20.3; python_version < \"3.10\"", "numpy>=1.21.0; python_version >= \"3.10\"", "numpy>=1.23.2; python_version >= \"3.11\"", "python-dateutil>=2.8.1", @@ -2116,7 +2212,7 @@ name = "parso" version = "0.8.4" requires_python = ">=3.6" summary = "A Python Parser" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"}, @@ -2139,10 +2235,10 @@ version = "2.1.2" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" summary = "Python datetimes made easy" groups = ["default"] -marker = "python_version < \"3.12\"" dependencies = [ "python-dateutil<3.0,>=2.6", "pytzdata>=2020.1", + "typing<4.0,>=3.6; python_version < \"3.5\"", ] files = [ {file = "pendulum-2.1.2.tar.gz", hash = "sha256:b06a0ca1bfe41c990bbf0c029f0b6501a7f2ec4e38bfec730712015e8860f207"}, @@ -2152,7 +2248,7 @@ files = [ name = "pexpect" version = "4.9.0" summary = "Pexpect allows easy control of interactive console applications." -groups = ["default"] +groups = ["debug", "default"] marker = "sys_platform != \"win32\" and sys_platform != \"emscripten\"" dependencies = [ "ptyprocess>=0.5", @@ -2289,6 +2385,7 @@ dependencies = [ "httpcore<2.0.0,>=1.0.5", "httpx[http2]!=0.23.2,>=0.23", "humanize>=4.9.0", + "importlib-metadata>=4.4; python_version < \"3.10\"", "importlib-resources<6.2.0,>=6.1.3", "itsdangerous", "jinja2-humanize-extension>=0.4.0", @@ -2300,6 +2397,7 @@ dependencies = [ "packaging<24.3,>=21.3", "pathspec>=0.8.0", "pendulum<3.0; python_version < \"3.12\"", + "pendulum<4,>=3.0.0; python_version >= \"3.12\"", "pydantic-core<3.0.0,>=2.12.0", "pydantic[email]!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.10.0", "python-dateutil<3.0.0,>=2.8.2", @@ -2330,7 +2428,7 @@ name = "prompt-toolkit" version = "3.0.45" requires_python = ">=3.7.0" summary = "Library for building powerful interactive command lines in Python" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "wcwidth", ] @@ -2359,7 +2457,7 @@ files = [ name = "ptyprocess" version = "0.7.0" summary = "Run a subprocess in a pseudo terminal" -groups = ["default"] +groups = ["debug", "default"] marker = "sys_platform != \"win32\" and sys_platform != \"emscripten\"" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, @@ -2370,7 +2468,7 @@ files = [ name = "pure-eval" version = "0.2.2" summary = "Safely evaluate AST nodes without side effects" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, @@ -2504,7 +2602,7 @@ name = "pygments" version = "2.18.0" requires_python = ">=3.8" summary = "Pygments is a syntax highlighting package written in Python." -groups = ["default", "docs"] +groups = ["debug", "default", "docs"] files = [ {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, @@ -2645,7 +2743,6 @@ version = "2020.1" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" summary = "The Olson timezone database for Python." groups = ["default"] -marker = "python_version < \"3.12\"" files = [ {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, {file = "pytzdata-2020.1.tar.gz", hash = "sha256:3efa13b335a00a8de1d345ae41ec78dd11c9f8807f522d39850f2dd828681540"}, @@ -2759,6 +2856,7 @@ dependencies = [ "PyYAML>=5.1", "aws-requests-auth>=0.4.2", "boto3>=1.10.0", + "importlib-metadata; python_version < \"3.8\"", "jsonlines==1.2.0", "jsonschema<5,>=3", "platformdirs>=2", @@ -2862,6 +2960,7 @@ version = "1.0.0" summary = "Asynchronous Python HTTP for Humans." groups = ["default"] dependencies = [ + "futures>=2.1.3; python_version < \"3.2\"", "requests>=1.2.0", ] files = [ @@ -2907,6 +3006,7 @@ groups = ["default"] dependencies = [ "markdown-it-py>=2.2.0", "pygments<3.0.0,>=2.13.0", + "typing-extensions<5.0,>=4.0.0; python_version < \"3.9\"", ] files = [ {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"}, @@ -3016,7 +3116,7 @@ version = "0.2.8" requires_python = ">=3.6" summary = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml" groups = ["default"] -marker = "platform_python_implementation == \"CPython\" and python_version < \"3.13\"" +marker = "platform_python_implementation == \"CPython\"" files = [ {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b42169467c42b692c19cf539c38d4602069d8c1505e97b86387fcf7afb766e1d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:07238db9cbdf8fc1e9de2489a4f68474e70dffcb32232db7c08fa61ca0c7c462"}, @@ -3187,7 +3287,7 @@ name = "six" version = "1.16.0" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" summary = "Python 2 and 3 compatibility utilities" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -3239,6 +3339,7 @@ dependencies = [ "colorama>=0.4.5; sys_platform == \"win32\"", "docutils<0.22,>=0.18.1", "imagesize>=1.3", + "importlib-metadata>=4.8; python_version < \"3.10\"", "packaging>=21.0", "requests>=2.25.0", "snowballstemmer>=2.0", @@ -3357,6 +3458,7 @@ summary = "Database Abstraction Library" groups = ["default"] dependencies = [ "greenlet!=0.4.17; platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\"", + "importlib-metadata; python_version < \"3.8\"", "typing-extensions>=4.6.0", ] files = [ @@ -3416,7 +3518,7 @@ files = [ name = "stack-data" version = "0.6.3" summary = "Extract data from python stack frames and tracebacks for informative displays" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "asttokens>=2.1.0", "executing>=1.2.0", @@ -3486,7 +3588,7 @@ name = "tomli" version = "2.0.1" requires_python = ">=3.7" summary = "A lil' TOML parser" -groups = ["dev", "docs", "lint", "test"] +groups = ["debug", "dev", "docs", "lint", "test"] marker = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, @@ -3532,7 +3634,7 @@ name = "traitlets" version = "5.14.3" requires_python = ">=3.8" summary = "Traitlets Python configuration system" -groups = ["default", "dev"] +groups = ["debug", "default", "dev"] files = [ {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, @@ -3560,7 +3662,7 @@ name = "typing-extensions" version = "4.12.0" requires_python = ">=3.8" summary = "Backported and Experimental Type Hints for Python 3.8+" -groups = ["default", "lint"] +groups = ["debug", "default", "lint"] files = [ {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, @@ -3585,6 +3687,7 @@ requires_python = ">=3.8" summary = "tzinfo object for the local timezone" groups = ["default"] dependencies = [ + "backports-zoneinfo; python_version < \"3.9\"", "tzdata; platform_system == \"Windows\"", ] files = [ @@ -3675,6 +3778,7 @@ groups = ["dev"] dependencies = [ "distlib<1,>=0.3.7", "filelock<4,>=3.12.2", + "importlib-metadata>=6.6; python_version < \"3.8\"", "platformdirs<5,>=3.9.1", ] files = [ @@ -3686,7 +3790,7 @@ files = [ name = "wcwidth" version = "0.2.13" summary = "Measures the displayed width of unicode strings in a terminal" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -3791,6 +3895,7 @@ groups = ["default"] dependencies = [ "idna>=2.0", "multidict>=4.0", + "typing-extensions>=3.7.4; python_version < \"3.8\"", ] files = [ {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, diff --git a/pyproject.toml b/pyproject.toml index 14fcdfe..01d1aaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "setuptools>=70.0.0", "io-collection>=0.10.2", "python-dotenv>=1.0.1", + "h5py>=3.11.0", ] [project.urls] @@ -63,6 +64,9 @@ docs = [ "myst-parser>=3.0.1", "sphinx-copybutton>=0.5.2", ] +debug = [ + "ipdb>=0.13.13", +] [tool.black] line-length = 88 @@ -95,6 +99,7 @@ extend-ignore = [ "D100", # Missing docstring in public module "D101", # Missing docstring in public class "D103", # Missing docstring in public function + "D105", # Missing docstring in magic method "D107", # Missing docstring in __init__ "D202", # Blank lines between the function body and the function docstring "D203", # 1 blank line required before class docstring diff --git a/requirements.txt b/requirements.txt index 2dba41a..6a82226 100644 --- a/requirements.txt +++ b/requirements.txt @@ -475,6 +475,9 @@ iniconfig==2.0.0 \ io-collection==0.10.2 \ --hash=sha256:40faa2fe94e8049dd900c42c09fbb4b1b5da2a226a2cd1618a1ffb89a636ea18 \ --hash=sha256:66b5e5ae887fe532fbcfcfa75a6f09ee9afa27ad2480cf74ec0d1c2aabfacab9 +ipdb==0.13.13 \ + --hash=sha256:45529994741c4ab6d2388bfa5d7b725c2cf7fe9deffabdb8a6113aa5ed449ed4 \ + --hash=sha256:e3ac6018ef05126d442af680aad863006ec19d02290561ac88b8b1c0b0cfc726 ipykernel==6.29.4 \ --hash=sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da \ --hash=sha256:3d44070060f9475ac2092b760123fadf105d2e2493c24848b6691a7c4f42af5c diff --git a/subcell_pipeline/analysis/compression_metrics/README.md b/subcell_pipeline/analysis/compression_metrics/README.md index f5705dd..1960225 100644 --- a/subcell_pipeline/analysis/compression_metrics/README.md +++ b/subcell_pipeline/analysis/compression_metrics/README.md @@ -2,6 +2,6 @@ ## Metrics for comparing traces of compressed fibers -Analysis combines compression simulations from Cytosim and Readdy and calculates various metrics to compare the compressed fibers. +Analysis combines compression simulations from Cytosim and Readdy and calculates various compression metrics to compare fibers. -- **Calculate compression metrics** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/analysis/compression_metrics/_compare_compression_metrics.html)) +- **Compare compression metrics between simulators** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/analysis/compression_metrics/_compare_compression_metrics.html)) diff --git a/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py b/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py index ce6bff3..2b7beab 100644 --- a/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py +++ b/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py @@ -1,5 +1,5 @@ # %% [markdown] -# # Compare metrics across simulators +# # Compare compression metrics between simulators # %% [markdown] """ @@ -26,6 +26,7 @@ from subcell_pipeline.analysis.compression_metrics.compression_analysis import ( get_compression_metric_data, + plot_metric_distribution, plot_metrics_vs_time, save_compression_metrics, ) @@ -39,12 +40,14 @@ Defines the `COMPRESSION_VELOCITY` simulation series, which compresses a single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with -five replicates each (random seeds 1, 2, 3, 4, and 5). +five replicates each and the baseline `NO_COMPRESSION` simulation series, which +simulates a single actin fiber with a free barbed end across five replicates. """ # %% # Name of the simulation series -series_name: str = "COMPRESSION_VELOCITY" +compression_series_name: str = "COMPRESSION_VELOCITY" +no_compression_series_name: str = "NO_COMPRESSION" # S3 bucket Cytosim for input and output files cytosim_bucket: str = "s3://cytosim-working-bucket" @@ -64,7 +67,7 @@ # Specify whether the metrics should be recalculated. Set this to true if you # make changes to any metric calculation functions. -recalculate: bool = False +recalculate: bool = True # %% [markdown] """ @@ -81,6 +84,8 @@ CompressionMetric.CALC_BENDING_ENERGY, CompressionMetric.CONTOUR_LENGTH, CompressionMetric.COMPRESSION_RATIO, + CompressionMetric.TOTAL_FIBER_TWIST, + CompressionMetric.TWIST_ANGLE, ] # %% [markdown] @@ -89,15 +94,26 @@ """ # %% -cytosim_metrics = get_compression_metric_data( +cytosim_metrics_compression = get_compression_metric_data( bucket=cytosim_bucket, - series_name=series_name, + series_name=compression_series_name, condition_keys=condition_keys, random_seeds=random_seeds, metrics=metrics, recalculate=recalculate, ) -cytosim_metrics["simulator"] = "cytosim" +cytosim_metrics_compression["simulator"] = "cytosim" + +# %% +cytosim_metrics_no_compression = get_compression_metric_data( + bucket=cytosim_bucket, + series_name=no_compression_series_name, + condition_keys=[""], + random_seeds=random_seeds, + metrics=metrics, + recalculate=recalculate, +) +cytosim_metrics_no_compression["simulator"] = "cytosim" # %% [markdown] """ @@ -105,15 +121,26 @@ """ # %% -readdy_metrics = get_compression_metric_data( +readdy_metrics_compression = get_compression_metric_data( bucket=readdy_bucket, - series_name=series_name, + series_name=f"ACTIN_{compression_series_name}", condition_keys=condition_keys, random_seeds=random_seeds, metrics=metrics, recalculate=recalculate, ) -readdy_metrics["simulator"] = "readdy" +readdy_metrics_compression["simulator"] = "readdy" + +# %% +readdy_metrics_no_compression = get_compression_metric_data( + bucket=readdy_bucket, + series_name=f"ACTIN_{no_compression_series_name}", + condition_keys=[""], + random_seeds=random_seeds, + metrics=metrics, + recalculate=recalculate, +) +readdy_metrics_no_compression["simulator"] = "readdy" # %% [markdown] """ @@ -121,7 +148,7 @@ """ # %% -combined_metrics = pd.concat([cytosim_metrics, readdy_metrics]) +combined_metrics = pd.concat([cytosim_metrics_compression, readdy_metrics_compression]) combined_metrics["repeat"] = combined_metrics["seed"] - 1 combined_metrics["velocity"] = combined_metrics["key"].astype("int") / 10 @@ -135,7 +162,6 @@ combined_metrics, str(save_location), "actin_compression_combined_metrics.csv" ) - # %% [markdown] """ ## Plot metrics vs time @@ -148,3 +174,11 @@ figure_path=save_location, suffix="_subsampled", ) + +# %% +plot_metric_distribution( + df=combined_metrics, + metrics=metrics, + figure_path=save_location, + suffix="_subsampled", +) diff --git a/subcell_pipeline/analysis/compression_metrics/compression_analysis.py b/subcell_pipeline/analysis/compression_metrics/compression_analysis.py index 0527e17..b820999 100644 --- a/subcell_pipeline/analysis/compression_metrics/compression_analysis.py +++ b/subcell_pipeline/analysis/compression_metrics/compression_analysis.py @@ -131,7 +131,10 @@ def calculate_compression_metrics( polymer_trace=polymer_trace, **options ) - return df_metrics.reset_index().rename(columns={"index": "time"}) + df_metrics = df_metrics.reset_index().rename(columns={"index": "time"}) + df_metrics["normalized_time"] = df_metrics["time"] / df_metrics["time"].max() + + return df_metrics def save_compression_metrics( @@ -227,3 +230,62 @@ def plot_metrics_vs_time( fig.tight_layout() if figure_path is not None: fig.savefig(figure_path / f"{metric.value}_vs_time{suffix}.png") + + +def plot_metric_distribution( + df: pd.DataFrame, + metrics: List[CompressionMetric], + figure_path: Union[Path, None] = None, + suffix: str = "", +) -> None: + """ + Plot metrics vs time. + + Parameters + ---------- + df + The input DataFrame. + + metrics + The list of metrics to plot. + + figure_path + The path to save the figure. + + suffix + The suffix to append to the figure filename. + Defaults to "". + + """ + num_velocities = df["velocity"].nunique() + plt.rcParams.update({"font.size": 16}) + + for metric in metrics: + fig, axs = plt.subplots( + 1, + num_velocities, + figsize=(num_velocities * 5, 5), + sharey=True, + sharex=True, + dpi=300, + ) + axs = axs.ravel() + for ct, (velocity, df_velocity) in enumerate(df.groupby("velocity")): + metric_values = df_velocity[metric.value] + bins = np.linspace(np.nanmin(metric_values), np.nanmax(metric_values), 20) + for simulator, df_simulator in df_velocity.groupby("simulator"): + axs[ct].hist( + df_simulator[metric.value], + label=f"{simulator}", + color=SIMULATOR_COLOR_MAP[simulator], # type: ignore + alpha=0.7, + bins=bins, + ) + axs[ct].set_title(f"Velocity: {velocity}") + if ct == 0: + axs[ct].legend() + fig.supxlabel(metric.label()) + fig.supylabel("Count") + fig.tight_layout() + if figure_path is not None: + fig.savefig(figure_path / f"{metric.value}_histogram{suffix}.png") diff --git a/subcell_pipeline/analysis/compression_metrics/compression_metric.py b/subcell_pipeline/analysis/compression_metrics/compression_metric.py index 8de9d64..e91250b 100644 --- a/subcell_pipeline/analysis/compression_metrics/compression_metric.py +++ b/subcell_pipeline/analysis/compression_metrics/compression_metric.py @@ -17,6 +17,7 @@ get_sum_bending_energy, get_third_component_variance, get_total_fiber_twist, + get_twist_angle, ) @@ -32,6 +33,7 @@ class CompressionMetric(Enum): CALC_BENDING_ENERGY = "calc_bending_energy" CONTOUR_LENGTH = "contour_length" COMPRESSION_RATIO = "compression_ratio" + TWIST_ANGLE = "twist_angle" def label(self: Enum) -> str: """ @@ -54,13 +56,70 @@ def label(self: Enum) -> str: CompressionMetric.AVERAGE_PERP_DISTANCE.value: ( "Average Perpendicular Distance" ), - CompressionMetric.TOTAL_FIBER_TWIST.value: "Total Fiber Twist", + CompressionMetric.TOTAL_FIBER_TWIST.value: "Fiber Twist", CompressionMetric.CALC_BENDING_ENERGY.value: "Calculated Bending Energy", CompressionMetric.CONTOUR_LENGTH.value: "Contour Length", CompressionMetric.COMPRESSION_RATIO.value: "Compression Ratio", + CompressionMetric.TWIST_ANGLE.value: "Twist Angle", } return labels.get(self.value, "") + def description(self: Enum) -> str: + """ + Return the description for the compression metric. + + Parameters + ---------- + self + the CompressionMetric object + + Returns + ------- + : + The description (and units) for the compression metric. + """ + units = { + CompressionMetric.NON_COPLANARITY.value: "3rd component variance from PCA", + CompressionMetric.PEAK_ASYMMETRY.value: "normalized peak distance", + CompressionMetric.SUM_BENDING_ENERGY.value: "sum of bending energy", + CompressionMetric.AVERAGE_PERP_DISTANCE.value: "distance (nm)", + CompressionMetric.TOTAL_FIBER_TWIST.value: "total fiber twist", + CompressionMetric.CALC_BENDING_ENERGY.value: "energy", + CompressionMetric.CONTOUR_LENGTH.value: "filament contour length (nm)", + CompressionMetric.COMPRESSION_RATIO.value: "compression ratio", + CompressionMetric.TWIST_ANGLE.value: ( + "difference between initial and final tangent (degrees)" + ), + } + return units.get(self.value, "") + + def bounds(self: Enum) -> tuple[float, float]: + """ + Return the default bounds for the compression metric. + + Parameters + ---------- + self + the CompressionMetric object + + Returns + ------- + : + The default bounds for the compression metric. + """ + bounds = { + CompressionMetric.NON_COPLANARITY.value: (0, 0.03), + CompressionMetric.PEAK_ASYMMETRY.value: (0, 0.5), + CompressionMetric.SUM_BENDING_ENERGY.value: (0, 0), # TODO + CompressionMetric.AVERAGE_PERP_DISTANCE.value: (0, 85.0), + CompressionMetric.TOTAL_FIBER_TWIST.value: (0, 0), # TODO + CompressionMetric.CALC_BENDING_ENERGY.value: (0, 10), + CompressionMetric.CONTOUR_LENGTH.value: (480, 505), + CompressionMetric.COMPRESSION_RATIO.value: (0, 1), # TODO + CompressionMetric.TWIST_ANGLE.value: (-180, 180), + } + return bounds.get(self.value, (0, 0)) + def calculate_metric( self, polymer_trace: np.ndarray, **options: dict[str, Any] ) -> Union[float, np.floating[Any]]: @@ -94,5 +153,6 @@ def calculate_metric( CompressionMetric.CALC_BENDING_ENERGY: get_bending_energy_from_trace, CompressionMetric.CONTOUR_LENGTH: get_contour_length_from_trace, CompressionMetric.COMPRESSION_RATIO: get_compression_ratio, + CompressionMetric.TWIST_ANGLE: get_twist_angle, } return functions[self](polymer_trace, **options) diff --git a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py index 97164af..4fd026e 100644 --- a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py +++ b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py @@ -1,6 +1,6 @@ """Methods to calculate metrics from polymer trace data.""" -from typing import Any, Dict, Tuple, Union +from typing import Any, Dict, Tuple import numpy as np from sklearn.decomposition import PCA @@ -25,24 +25,24 @@ def get_end_to_end_axis_distances_and_projections( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace points - at a given time + Array containing the x,y,z positions of the polymer trace points + at a given time. Returns ------- perp_distances - perpendicular distances of the polymer trace from the end-to-end axis + Perpendicular distances of the polymer trace from the end-to-end axis. scaled_projections - length of fiber point projections along the end-to-end axis, scaled + Length of fiber point projections along the end-to-end axis, scaled by axis length. Can be negative. projection_positions - positions of points on the end-to-end axis that are - closest from the respective points in the polymer trace. - The distance from projection_positions - to the trace points is the shortest distance from the end-to-end axis + Positions of points on the end-to-end axis that are closest from the + respective points in the polymer trace. + The distance from projection_positions to the trace points is the + shortest distance from the end-to-end axis. """ end_to_end_axis = get_end_to_end_unit_vector(polymer_trace=polymer_trace) end_to_end_axis_length = np.linalg.norm(polymer_trace[-1] - polymer_trace[0]) @@ -61,7 +61,7 @@ def get_end_to_end_axis_distances_and_projections( def get_average_distance_from_end_to_end_axis( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> Union[float, np.floating[Any]]: +) -> float: """ Calculate the average perpendicular distance of polymer trace points from the end-to-end axis. @@ -69,8 +69,8 @@ def get_average_distance_from_end_to_end_axis( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace - at a given time + Array containing the x,y,z positions of the polymer trace + at a given time. **options Additional options as key-value pairs. @@ -78,15 +78,15 @@ def get_average_distance_from_end_to_end_axis( Returns ------- : - average perpendicular distance of polymer trace points from the - end-to-end axis + Average perpendicular distance of polymer trace points from the + end-to-end axis. """ perp_distances, _, _ = get_end_to_end_axis_distances_and_projections( polymer_trace=polymer_trace ) avg_perp_distance = np.nanmean(perp_distances) - return avg_perp_distance + return float(avg_perp_distance) def get_asymmetry_of_peak( @@ -100,8 +100,8 @@ def get_asymmetry_of_peak( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace - at a given time + Array containing the x,y,z positions of the polymer trace + at a given time. **options Additional options as key-value pairs. @@ -109,7 +109,7 @@ def get_asymmetry_of_peak( Returns ------- : - scaled distance of the projection of the peak from the axis midpoint + Scaled distance of the projection of the peak from the axis midpoint. """ ( perp_distances, @@ -138,12 +138,12 @@ def get_pca_polymer_trace_projection( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. Returns ------- pca_projection - PCA projection of the polymer trace + PCA projection of the polymer trace. """ pca = fit_pca_to_polymer_trace(polymer_trace=polymer_trace) return pca.transform(polymer_trace) @@ -152,14 +152,14 @@ def get_pca_polymer_trace_projection( def get_contour_length_from_trace( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> Union[float, np.floating[Any]]: +) -> float: """ Calculate the sum of inter-monomer distances in the trace. Parameters ---------- polymer_trace - n x 3 array containing the x,y,z positions of the polymer trace + n x 3 array containing the x,y,z positions of the polymer trace. **options Additional options as key-value pairs. @@ -167,36 +167,33 @@ def get_contour_length_from_trace( Returns ------- : - sum of inter-monomer distances in the trace + Sum of inter-monomer distances in the trace. """ - total_distance = np.float_(0) - for i in range(len(polymer_trace) - 1): - total_distance += np.linalg.norm(polymer_trace[i] - polymer_trace[i + 1]) - return total_distance + return np.sum(np.linalg.norm(np.diff(polymer_trace, axis=0), axis=1)) def get_bending_energy_from_trace( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> Union[float, np.floating[Any]]: +) -> float: """ Calculate the bending energy per monomer of a polymer trace. Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. **options Additional options as key-value pairs. bending_constant: float - bending constant of the fiber in pN nm + Bending constant of the fiber in pN nm. Returns ------- : - bending energy per monomer of the polymer trace + Bending energy per monomer of the polymer trace. """ bending_constant = options.get("bending_constant", DEFAULT_BENDING_CONSTANT) @@ -207,6 +204,14 @@ def get_bending_energy_from_trace( vec1 = polymer_trace[ind + 1] - polymer_trace[ind] vec2 = polymer_trace[ind + 2] - polymer_trace[ind + 1] + if np.isclose(np.linalg.norm(vec1), 0.0) or np.isclose( + np.linalg.norm(vec2), 0.0 + ): + # TODO handle this differently? + cos_angle[ind] = 0.0 + print("Warning: zero vector in bending energy calculation.") + continue + cos_angle[ind] = ( np.dot(vec1, vec2) / np.linalg.norm(vec1) / np.linalg.norm(vec2) ) @@ -215,12 +220,190 @@ def get_bending_energy_from_trace( # the type checker is unable to infer its type energy = bending_constant * (1 - np.nanmean(cos_angle)) - return energy + return energy.item() + + +def get_2d_polymer_trace( + polymer_trace: np.ndarray, + compression_axis: int = 0, +) -> np.ndarray: + """ + Get the 2D projection of the polymer trace. + + Parameters + ---------- + polymer_trace + The polymer trace as an Nx3 numpy array. + + compression_axis + The axis along which the polymer trace is compressed. + + Returns + ------- + : + The 2D projection of the polymer trace. + """ + if polymer_trace.shape[1] == 2: + return polymer_trace + + return polymer_trace[ + :, [ax for ax in range(polymer_trace.shape[1]) if ax != compression_axis] + ] + + +def get_normalized_tangent_vectors( + polymer_trace: np.ndarray, +) -> np.ndarray: + """ + Calculate the normalized tangent vectors for a polymer trace. + + Parameters + ---------- + polymer_trace + The polymer trace as an Nx3 numpy array. + + Returns + ------- + : + The normalized tangent vectors for the polymer. + """ + tangents = np.diff(polymer_trace, axis=0) + + tangent_lengths = np.linalg.norm(tangents, axis=1) + + if np.all(tangent_lengths < ABSOLUTE_TOLERANCE): + return np.zeros_like(tangents) + + tangents /= tangent_lengths[:, np.newaxis] + + return tangents + + +def get_twist_angle( + polymer_trace: np.ndarray, + **options: Dict[str, Any], +) -> float: + """ + Calculate the twist angle of the polymer trace. + + Parameters + ---------- + polymer_trace + Array containing the x,y,z positions of the polymer trace. + + **options: Dict[str, Any] + Additional options as key-value pairs. + + Returns + ------- + : + Twist angle of the polymer trace in degrees. + """ + compression_axis = options.get("compression_axis", 0) + assert isinstance(compression_axis, int) + + trace_2d = get_2d_polymer_trace( + polymer_trace=polymer_trace, compression_axis=compression_axis + ) + + tangents = get_normalized_tangent_vectors( + polymer_trace=trace_2d, + ) + + angle = get_angle_between_vectors(tangents[0], -tangents[-1], signed=False) + chirality = get_chirality(polymer_trace=polymer_trace) + + return chirality * angle * 180 / np.pi + + +def get_chirality( + polymer_trace: np.ndarray, + **options: Dict[str, Any], +) -> float: + """ + Calculate the chirality of a polymer trace. + + Parameters + ---------- + polymer_trace + Array containing the x,y,z positions of the polymer trace. + + **options: Dict[str, Any] + Additional options as key-value pairs. + + Returns + ------- + : + Chirality of the polymer trace. + """ + trace_2d = get_2d_polymer_trace(polymer_trace=polymer_trace) + tangents = get_normalized_tangent_vectors(polymer_trace=trace_2d) + + chirality = 0 + for i in range(len(tangents) - 1): + cross_product = np.cross(tangents[i], tangents[i + 1]) + if cross_product > 0: + chirality += 1 + elif cross_product < 0: + chirality -= 1 + + return np.sign(chirality) def get_total_fiber_twist( polymer_trace: np.ndarray, **options: Dict[str, Any], +) -> float: + """ + Calculate the total twist of a polymer trace using the normal + vectors. + + Parameters + ---------- + polymer_trace + Array containing the x,y,z positions of the polymer trace. + + **options: Dict[str, Any] + Additional options as key-value pairs: + + signed: bool + Whether to return the signed or unsigned total twist. + + Returns + ------- + : + Total twist of the polymer trace. + """ + signed = options.get("signed", False) + assert isinstance(signed, bool) + + tangents = np.diff(polymer_trace, axis=0) + tangents /= np.linalg.norm(tangents, axis=1)[:, np.newaxis] + + normals = np.cross(tangents[:-1], tangents[1:]) + normal_lengths = np.linalg.norm(normals, axis=1) + if np.all(normal_lengths < ABSOLUTE_TOLERANCE): + return 0 + normals = normals / normal_lengths[:, np.newaxis] + + twists = [] + for i in range(1, len(normals)): + angle = get_angle_between_vectors(normals[i - 1], normals[i], signed=signed) + + twists.append(angle) + + # Sum the twist angles to get the total twist + total_twist = np.sum(twists) + + # Normalize by the contour length + total_twist /= get_contour_length_from_trace(polymer_trace) + + return total_twist + + +def get_total_fiber_twist_project( + polymer_trace: np.ndarray, + **options: Dict[str, Any], ) -> float: """ Calculate the total twist using projections of the polymer trace @@ -229,22 +412,22 @@ def get_total_fiber_twist( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. **options: Dict[str, Any] Additional options as key-value pairs: compression_axis: int - axis along which the polymer trace is compressed + Axis along which the polymer trace is compressed. signed: bool - whether to return the signed or unsigned total twist + Whether to return the signed or unsigned total twist. tolerance: float ABSOLUTE_TOLERANCE Returns ------- : - sum of angles between PCA projection vectors + Sum of angles between PCA projection vectors. """ compression_axis = options.get("compression_axis", 0) signed = options.get("signed", True) @@ -252,10 +435,11 @@ def get_total_fiber_twist( assert isinstance(signed, bool) assert isinstance(tolerance, (float, np.floating)) + assert isinstance(compression_axis, int) - trace_2d = polymer_trace[ - :, [ax for ax in range(polymer_trace.shape[1]) if ax != compression_axis] - ] + trace_2d = get_2d_polymer_trace( + polymer_trace=polymer_trace, compression_axis=compression_axis + ) trace_2d = trace_2d - np.mean(trace_2d, axis=0) return get_total_fiber_twist_2d(trace_2d, signed=signed, tolerance=tolerance) @@ -272,7 +456,7 @@ def get_total_fiber_twist_pca( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. tolerance ABSOLUTE_TOLERANCE @@ -280,7 +464,7 @@ def get_total_fiber_twist_pca( Returns ------- : - sum of angles between PCA projection vectors + Sum of angles between PCA projection vectors. """ pca_trace = get_pca_polymer_trace_projection(polymer_trace=polymer_trace) pca_trace_2d = pca_trace[:, 1:] @@ -299,19 +483,19 @@ def get_angle_between_vectors( Parameters ---------- vec1 - The first vector + The first vector. vec2 - The second vector + The second vector. signed - if True, returns the signed angle between vec1 and vec2 - Default is False + If True, returns the signed angle between vec1 and vec2 + Default is False. Returns ------- : - signed angle between vec1 and vec2 + Signed angle between vec1 and vec2. """ vec1_length = np.linalg.norm(vec1) vec2_length = np.linalg.norm(vec2) @@ -344,19 +528,19 @@ def get_total_fiber_twist_2d( Parameters ---------- trace_2d - array containing the x,y positions of the polymer trace + Array containing the x,y positions of the polymer trace. signed - if True, returns the signed total twist - Default is False + If True, returns the signed total twist. + Default is False. tolerance - Tolerance for vector length + Tolerance for vector length. Returns ------- : - sum of angles between trace vectors + Sum of angles between trace vectors. """ prev_vec = None angles = np.zeros(len(trace_2d)) @@ -389,12 +573,12 @@ def fit_pca_to_polymer_trace( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. Returns ------- : - PCA object fitted to the polymer trace + PCA object fitted to the polymer trace. """ pca = PCA(n_components=3) pca.fit(polymer_trace) @@ -412,7 +596,7 @@ def get_third_component_variance( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. **options: Dict[str, Any] Additional options as key-value pairs. @@ -420,8 +604,8 @@ def get_third_component_variance( Returns ------- : - noncoplanarity of fiber - defined as the explained variance ratio of the third PCA component + Noncoplanarity of fiber. + Defined as the explained variance ratio of the third PCA component. """ pca = fit_pca_to_polymer_trace(polymer_trace=polymer_trace) return pca.explained_variance_ratio_[2] @@ -454,7 +638,7 @@ def get_sum_bending_energy( def get_compression_ratio( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> Union[float, np.floating[Any]]: +) -> float: """ Calculate the compression ratio of a polymer trace. @@ -474,5 +658,5 @@ def get_compression_ratio( : The compression ratio of the polymer trace. """ - end_to_end_axis_length = np.linalg.norm(polymer_trace[-1] - polymer_trace[0]) + end_to_end_axis_length = np.linalg.norm(polymer_trace[-1] - polymer_trace[0]).item() return 1 - end_to_end_axis_length / get_contour_length_from_trace(polymer_trace) diff --git a/subcell_pipeline/analysis/dimensionality_reduction/_run_pacmap_on_compression_simulations.py b/subcell_pipeline/analysis/dimensionality_reduction/_run_pacmap_on_compression_simulations.py index ecf8f5a..205d9de 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/_run_pacmap_on_compression_simulations.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/_run_pacmap_on_compression_simulations.py @@ -67,7 +67,9 @@ """ # %% -readdy_data = get_merged_data(readdy_bucket, series_name, condition_keys, random_seeds) +readdy_data = get_merged_data( + readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds +) readdy_data["simulator"] = "readdy" # %% diff --git a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py index 14596d1..221feab 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py @@ -25,9 +25,8 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% -from pathlib import Path - import pandas as pd +from io_collection.save.save_pickle import save_pickle from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import ( get_merged_data, @@ -69,7 +68,7 @@ condition_keys: list[str] = ["0047", "0150", "0470", "1500"] # Location to save analysis results (S3 bucket or local path) -save_location: str = str(Path(__file__).parents[3] / "analysis_outputs") +save_location: str = "s3://subcell-working-bucket" # %% [markdown] """ @@ -81,7 +80,9 @@ """ # %% -readdy_data = get_merged_data(readdy_bucket, series_name, condition_keys, random_seeds) +readdy_data = get_merged_data( + readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds +) readdy_data["simulator"] = "readdy" # %% @@ -106,10 +107,10 @@ ("cytosim", "0150"): 0.01, ("cytosim", "0470"): 0.00316, ("cytosim", "1500"): 0.001, - ("readdy", "0047"): 1000, - ("readdy", "0150"): 1000, - ("readdy", "0470"): 1000, - ("readdy", "1500"): 1000, + ("readdy", "0047"): 100, + ("readdy", "0150"): 100, + ("readdy", "0470"): 100, + ("readdy", "1500"): 100, } save_aligned_fibers( @@ -132,6 +133,14 @@ # %% pca_results, pca = run_pca(data) +# %% [markdown] +""" +## Save PCA object +""" + +# %% +save_pickle(save_location, "actin_compression_pca.pkl", pca) + # %% [markdown] """ ## Save PCA results @@ -149,6 +158,7 @@ """ ## Save PCA trajectories """ + # %% save_pca_trajectories( pca_results, save_location, "actin_compression_pca_trajectories.json" @@ -158,10 +168,11 @@ """ ## Save PCA transforms """ + # %% points: list[list[float]] = [ - [-600, -300, 0, 300, 600, 900], - [-200, 0, 200, 400], + [-900, -600, -300, 0, 300, 600], + [-600, -400, -200, 0, 200], ] save_pca_transforms(pca, points, save_location, "actin_compression_pca_transforms.json") diff --git a/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py b/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py index df98320..80be79b 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py @@ -93,7 +93,7 @@ def align_fibers(data: pd.DataFrame) -> None: if time == 0: fiber = coords else: - fiber = align_fiber(coords) + fiber, _ = align_fiber(coords) aligned_fibers.append(fiber) @@ -104,7 +104,7 @@ def align_fibers(data: pd.DataFrame) -> None: data["zpos"] = all_aligned_fibers[:, 2] -def align_fiber(coords: np.ndarray) -> np.ndarray: +def align_fiber(coords: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ Align an array of x, y, z coordinates along the positive y axis. @@ -132,7 +132,7 @@ def align_fiber(coords: np.ndarray) -> np.ndarray: # Rotate y and z rotated = np.dot(coords[:, 1:], rot) - return np.concatenate((coords[:, 0:1], rotated), axis=1) + return np.concatenate((coords[:, 0:1], rotated), axis=1), rot def reshape_fibers(data: pd.DataFrame) -> tuple[np.ndarray, pd.DataFrame]: diff --git a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py index b463de1..b712d82 100644 --- a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py @@ -26,6 +26,8 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% +import pandas as pd + from subcell_pipeline.analysis.tomography_data.tomography_data import ( get_branched_tomography_data, get_unbranched_tomography_data, @@ -51,6 +53,9 @@ # Data repository for downloading tomography data repository = "https://raw.githubusercontent.com/RangamaniLabUCSD/actincme/master/PolarityAnalysis/" +# Conversion factor from pixels to um for this dataset +tomography_scale_factor: float = 0.0006 + # Folders and names of branched actin datasets branched_datasets = [ ("2018August_Tomo27", "TomoAugust_27_earlyCME"), @@ -70,15 +75,20 @@ ("2018November_32", "TomoNovember_32_Vesicle"), ] -# Spatial conversion scaling factor (pixels to um) -scale_factor = 0.00006 - # %% branched_df = get_branched_tomography_data( - bucket, name, repository, branched_datasets, scale_factor + bucket=bucket, + name=name, + repository=repository, + datasets=branched_datasets, + scale_factor=tomography_scale_factor, ) unbranched_df = get_unbranched_tomography_data( - bucket, name, repository, unbranched_datasets, scale_factor + bucket=bucket, + name=name, + repository=repository, + datasets=unbranched_datasets, + scale_factor=tomography_scale_factor, ) # %% [markdown] @@ -87,7 +97,9 @@ """ # %% -plot_tomography_data_by_dataset(branched_df) +plot_tomography_data_by_dataset( + branched_df, bucket, f"{name}/{name}_plots_branched.png" +) # %% [markdown] """ @@ -95,7 +107,9 @@ """ # %% -plot_tomography_data_by_dataset(unbranched_df) +plot_tomography_data_by_dataset( + unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png" +) # %% [markdown] """ @@ -106,11 +120,13 @@ # %% # Number of monomer points per fiber -n_monomer_points = 200 +n_monomer_points = 20 # Minimum number of points for valid fiber minimum_points = 3 +# True to recalculate the sampled tomography data, False otherwise. +recalculate = True # %% [markdown] """ @@ -123,8 +139,14 @@ # %% sampled_key = f"{name}/{name}_coordinates_sampled.csv" +all_tomogram_df = pd.concat([branched_df, unbranched_df]) sampled_data = sample_tomography_data( - unbranched_df, bucket, sampled_key, n_monomer_points, minimum_points + all_tomogram_df, + bucket, + sampled_key, + n_monomer_points, + minimum_points, + recalculate=recalculate, ) # %% [markdown] @@ -133,4 +155,8 @@ """ # %% -plot_tomography_data_by_dataset(sampled_data) +plot_tomography_data_by_dataset( + sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png" +) + +# %% diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py index e3ee13e..8d4a8e4 100644 --- a/subcell_pipeline/analysis/tomography_data/tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/tomography_data.py @@ -1,11 +1,38 @@ +import os + import matplotlib.pyplot as plt import numpy as np import pandas as pd from io_collection.keys.check_key import check_key from io_collection.load.load_dataframe import load_dataframe from io_collection.save.save_dataframe import save_dataframe +from io_collection.save.save_figure import save_figure + +TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] -SAMPLE_COLUMNS = ["xpos", "ypos", "zpos"] + +def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> bool: + """ + Test whether the angles between consecutive segments of a polymer + trace are less than 90 degrees. + + Parameters + ---------- + polymer_trace + A 2D array where each row is a point in 3D space. + + Returns + ------- + : + True if all consecutive angles are less than 180 degrees. + """ + vectors = polymer_trace[1:] - polymer_trace[:-1] + + vectors /= np.linalg.norm(vectors, axis=1)[:, np.newaxis] + dot_products = np.dot(vectors[1:], vectors[:-1].T) + + # Check if any angle is greater than 90 degrees + return np.all(dot_products > 0).item() def read_tomography_data(file: str, label: str = "fil") -> pd.DataFrame: @@ -32,7 +59,7 @@ def read_tomography_data(file: str, label: str = "fil") -> pd.DataFrame: elif len(coordinates.columns) == 5: coordinates.columns = ["object", label, "xpos", "ypos", "zpos"] else: - print("Data file [ {file} ] has an unexpected number of columns") + print(f"Data file [ {file} ] has an unexpected number of columns") return coordinates @@ -58,7 +85,7 @@ def get_branched_tomography_data( bucket: str, name: str, repository: str, - datasets: "list[tuple[str, str]]", + datasets: list[tuple[str, str]], scale_factor: float = 1.0, ) -> pd.DataFrame: """ @@ -92,7 +119,7 @@ def get_unbranched_tomography_data( bucket: str, name: str, repository: str, - datasets: "list[tuple[str, str]]", + datasets: list[tuple[str, str]], scale_factor: float = 1.0, ) -> pd.DataFrame: """ @@ -126,7 +153,7 @@ def get_tomography_data( bucket: str, name: str, repository: str, - datasets: "list[tuple[str, str]]", + datasets: list[tuple[str, str]], group: str, scale_factor: float = 1.0, ) -> pd.DataFrame: @@ -187,7 +214,8 @@ def sample_tomography_data( save_key: str, n_monomer_points: int, minimum_points: int, - sampled_columns: list[str] = SAMPLE_COLUMNS, + sampled_columns: list[str] = TOMOGRAPHY_SAMPLE_COLUMNS, + recalculate: bool = False, ) -> pd.DataFrame: """ Sample selected columns from tomography data at given resolution. @@ -206,6 +234,8 @@ def sample_tomography_data( Minimum number of points for valid fiber. sampled_columns List of column names to sample. + recalculate + True to recalculate the sampled tomography data, False otherwise. Returns ------- @@ -213,12 +243,14 @@ def sample_tomography_data( Sampled tomography data. """ - if check_key(save_location, save_key): + if check_key(save_location, save_key) and not recalculate: print(f"Loading existing sampled tomogram data from [ { save_key } ]") return load_dataframe(save_location, save_key) else: all_sampled_points = [] + # TODO sort experimental samples in order along the fiber before resampling + # (see simularium visualization) for fiber_id, group in data.groupby("id"): if len(group) < minimum_points: continue @@ -232,9 +264,13 @@ def sample_tomography_data( sampled_points[column] = np.interp( np.linspace(0, 1, n_monomer_points), np.linspace(0, 1, group.shape[0]), - group[column].values, + group[column].to_numpy(), ) + sampled_points["ordered"] = test_consecutive_segment_angles( + sampled_points[sampled_columns].to_numpy() + ) + all_sampled_points.append(sampled_points) all_sampled_df = pd.concat(all_sampled_points) @@ -245,7 +281,11 @@ def sample_tomography_data( return all_sampled_df -def plot_tomography_data_by_dataset(data: pd.DataFrame) -> None: +def plot_tomography_data_by_dataset( + data: pd.DataFrame, + bucket: str, + output_key: str, +) -> None: """ Plot tomography data for each dataset. @@ -253,11 +293,14 @@ def plot_tomography_data_by_dataset(data: pd.DataFrame) -> None: ---------- data Tomography data. + bucket: + Where to upload the results. + output_key + File key for results. """ - for dataset, group in data.groupby("dataset"): - _, ax = plt.subplots(1, 3, figsize=(6, 2)) + figure, ax = plt.subplots(1, 3, figsize=(6, 2)) ax[1].set_title(dataset) views = ["XY", "XZ", "YZ"] @@ -272,4 +315,5 @@ def plot_tomography_data_by_dataset(data: pd.DataFrame) -> None: ax[1].plot(fiber["xpos"], fiber["zpos"], marker="o", ms=1, lw=1) ax[2].plot(fiber["ypos"], fiber["zpos"], marker="o", ms=1, lw=1) - plt.show() + base_name, ext = os.path.splitext(output_key) + save_figure(bucket, f"{base_name}_{dataset}.{ext}", figure) diff --git a/subcell_pipeline/simulation/batch_simulations.py b/subcell_pipeline/simulation/batch_simulations.py index ae6980f..377697b 100644 --- a/subcell_pipeline/simulation/batch_simulations.py +++ b/subcell_pipeline/simulation/batch_simulations.py @@ -1,12 +1,14 @@ """Methods for running simulations on AWS Batch.""" import re +from typing import Optional import boto3 from container_collection.batch.get_batch_logs import get_batch_logs from container_collection.batch.make_batch_job import make_batch_job from container_collection.batch.register_batch_job import register_batch_job from container_collection.batch.submit_batch_job import submit_batch_job +from io_collection.keys.copy_key import copy_key from io_collection.save.save_text import save_text @@ -230,3 +232,45 @@ def check_and_save_job_logs( logs = get_batch_logs(response["jobArn"], " ") save_text(bucket, log_key, logs) + + +def copy_simulation_outputs( + bucket: str, + series_name: str, + source_template: str, + n_replicates: int, + condition_keys: Optional[dict[str, str]] = None, +) -> None: + """ + Copy simulation outputs from where they are saved to pipeline file structure. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + source_template + Template string for source output files. + n_replicates : int + _Number of simulation replicates. + condition_keys + Map of source to target condition keys. + """ + + if condition_keys is None: + condition_keys = {"": ""} + + for index in range(n_replicates): + for source_condition, target_condition in condition_keys.items(): + if source_condition == "" and target_condition == "": + source_key = source_template % (index) + target_key = f"{series_name}/outputs/{series_name}_{index}.h5" + else: + source_key = source_template % (source_condition, index) + target_key = ( + f"{series_name}/outputs/{series_name}_{target_condition}_{index}.h5" + ) + + print(f"Copying [ {source_key} ] to [ {target_key} ]") + copy_key(bucket, source_key, target_key) diff --git a/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py b/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py index 32c62ae..a16c8ce 100644 --- a/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py +++ b/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py @@ -57,8 +57,8 @@ files and parse them into a tidy data format. If the parsed file for a given condition key and random seed already exists, parsing is skipped. -- Input: `(name)/outputs/(name)_(condition_key)_(index)/` -- Output: `(name)/data/(name)_(condition_key)_(seed).csv` +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index)/` +- Output: `(series_name)/data/(series_name)_(condition_key)_(seed).csv` """ # %% @@ -86,8 +86,8 @@ sample the timepoints and monomer points. If the sampled file for a given condition key and random seed already exists, sampling is skipped. -- Input: `(name)/data/(name)_(condition_key)_(seed).csv` -- Output: `(name)/samples/(name)_(condition_key)_(seed).csv` +- Input: `(series_name)/data/(series_name)_(condition_key)_(seed).csv` +- Output: `(series_name)/samples/(series_name)_(condition_key)_(seed).csv` """ # %% diff --git a/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py b/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py index da1fce8..edef84b 100644 --- a/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py +++ b/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py @@ -54,8 +54,8 @@ files and parse them into a tidy data format. If the parsed file for a given condition key and random seed already exists, parsing is skipped. -- Input: `(name)/outputs/(name)_(condition_key)_(index)/` -- Output: `(name)/data/(name)_(condition_key)_(seed).csv` +- Input: `(series_name)/outputs/(series_name)_(index)/` +- Output: `(series_name)/data/(series_name)_(seed).csv` """ # %% @@ -83,8 +83,8 @@ sample the timepoints and monomer points. If the sampled file for a given condition key and random seed already exists, sampling is skipped. -- Input: `(name)/data/(name)_(condition_key)_(seed).csv` -- Output: `(name)/samples/(name)_(condition_key)_(seed).csv` +- Input: `(series_name)/data/(series_name)_(condition_key)_(seed).csv` +- Output: `(series_name)/samples/(series_name)_(condition_key)_(seed).csv` """ # %% diff --git a/subcell_pipeline/simulation/post_processing.py b/subcell_pipeline/simulation/post_processing.py index bb296d2..f7fd82d 100644 --- a/subcell_pipeline/simulation/post_processing.py +++ b/subcell_pipeline/simulation/post_processing.py @@ -93,7 +93,7 @@ def sample_simulation_data_points( time_indices = np.rint( np.interp( - np.linspace(0, 1, n_timepoints), + np.linspace(0, 1, n_timepoints + 1), np.linspace(0, 1, n_unique_timepoints), np.arange(n_unique_timepoints), ) @@ -103,7 +103,7 @@ def sample_simulation_data_points( for time, group in time_data.groupby("time"): sampled_points = pd.DataFrame() - sampled_points["monomer_ids"] = np.arange(n_monomer_points) + sampled_points["fiber_point"] = np.arange(n_monomer_points) sampled_points["time"] = time for column in sampled_columns: diff --git a/subcell_pipeline/simulation/readdy/README.md b/subcell_pipeline/simulation/readdy/README.md index 9ec550b..83520df 100644 --- a/subcell_pipeline/simulation/readdy/README.md +++ b/subcell_pipeline/simulation/readdy/README.md @@ -4,3 +4,17 @@ Simulations and processing for particle-based reaction-diffusion simulator [ReaD > - **Base simulator**: [https://github.com/readdy/readdy](https://github.com/readdy/readdy) > - **Model development**: [https://github.com/simularium/readdy-models](https://github.com/simularium/readdy-models) + +## Baseline single actin fiber with no compression + +The `NO_COMPRESSION` simulation series simulates a single actin fiber with a free barbed end across five replicates. + +- **Run ReaDDy single fiber simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_run_readdy_no_compression_batch_simulations.html)) +- **Process ReaDDy single fiber simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_process_readdy_no_compression_simulations.html)) + +## Single actin fiber compressed at different compression velocities + +The `COMPRESSION_VELOCITY` simulation series simulates compression of a single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with five replicates. + +- **Run ReaDDy compression simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_run_readdy_compression_batch_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_run_readdy_compression_batch_simulations.html)) +- **Process ReaDDy compression simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_process_readdy_compression_simulations.html)) diff --git a/subcell_pipeline/simulation/readdy/__init__.py b/subcell_pipeline/simulation/readdy/__init__.py index 0b837a0..9dfa45e 100644 --- a/subcell_pipeline/simulation/readdy/__init__.py +++ b/subcell_pipeline/simulation/readdy/__init__.py @@ -1,5 +1 @@ -"""readdy package for subcell_analysis.""" - -from .readdy_data import FrameData # noqa: F401 -from .readdy_loader import ReaddyLoader # noqa: F401 -from .readdy_post_processor import ReaddyPostProcessor # noqa: F401 +"""Simulation methods and notebooks for ReaDDy.""" diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py new file mode 100644 index 0000000..7463388 --- /dev/null +++ b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py @@ -0,0 +1,82 @@ +# %% [markdown] +# # Process ReaDDy compression simulations + +# %% [markdown] +""" +Notebook contains steps for post processing of ReaDDy simulations in which a +single actin fiber is compressed at different compression velocities. + +This notebook provides an example of processing a simulation series in which +there are multiple conditions, each of which were run with multiple replicates. +For an example of processing a simulation series with a single condition with +multiple replicates, see `process_readdy_no_compression_simulations.py`. + +- [Define simulation conditions](#define-simulation-conditions) +- [Parse simulation data](#parse-simulation-data) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.simulation.readdy.parser import parse_readdy_simulation_data + +# %% [markdown] +""" +## Define simulation conditions + +Defines the `ACTIN_COMPRESSION_VELOCITY` simulation series, which compresses a +single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 +μm/s) with five replicates each. +""" + +# %% +# Name of the simulation series +series_name: str = "ACTIN_COMPRESSION_VELOCITY" + +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# List of condition file keys for each velocity +condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Number of timepoints +n_timepoints = 200 + +# Number of monomer points per fiber +n_monomer_points = 200 + +# Temporary path to save downloaded trajectories +temp_path: Path = Path(__file__).parents[3] / "aws_downloads" +temp_path.mkdir(parents=True, exist_ok=True) + +# %% [markdown] +""" +## Parse simulation data + +Iterate through all condition keys and replicates to load simulation output +files and parse them into a tidy data format. If the parsed file for a given +condition key and replicate already exists, parsing is skipped. + +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index + 1).h5` +- Input: `(series_name)/data/(series_name)_(condition_key)_(index + 1).pkl` +- Output: `(series_name)/samples/(series_name)_(condition_key)_(index + 1).csv` +""" + +# %% +parse_readdy_simulation_data( + bucket, + series_name, + condition_keys, + n_replicates, + n_timepoints, + n_monomer_points, + compression=True, + temp_path=str(temp_path), +) diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py new file mode 100644 index 0000000..18779fa --- /dev/null +++ b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py @@ -0,0 +1,78 @@ +# %% [markdown] +# # Process ReaDDy no compression simulations + +# %% [markdown] +""" +Notebook contains steps for post processing of ReaDDy simulations for a baseline +single actin fiber with no compression. + +This notebook provides an example of processing a simulation series for a single +condition with multiple replicates. For an example of processing a simulation +series with multiple conditions, each of which have multiple replicates, see +`process_readdy_compression_simulations.py`. + +- [Define simulation conditions](#define-simulation-conditions) +- [Parse simulation data](#parse-simulation-data) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.simulation.readdy.parser import parse_readdy_simulation_data + +# %% [markdown] +""" +## Define simulation conditions + +Defines the `ACTIN_NO_COMPRESSION` simulation series, which simulates a single +actin fiber with a free barbed end across five replicates. +""" + +# %% +# Name of the simulation series +series_name: str = "ACTIN_NO_COMPRESSION" + +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# Number of timepoints +n_timepoints = 200 + +# Number of monomer points per fiber +n_monomer_points = 200 + +# Temporary path to save downloaded trajectories +temp_path: Path = Path(__file__).parents[3] / "aws_downloads" +temp_path.mkdir(parents=True, exist_ok=True) + +# %% [markdown] +""" +## Parse simulation data + +Iterate through all replicates to load simulation output files and parse them +into a tidy data format. If the parsed file for a given replicate already +exists, parsing is skipped. + +- Input: `(series_name)/outputs/(series_name)_(index + 1).h5` +- Input: `(series_name)/data/(series_name)_(index + 1).pkl` +- Output: `(series_name)/samples/(series_name)_(index + 1).csv` +""" + +# %% +parse_readdy_simulation_data( + bucket, + series_name, + [""], + n_replicates, + n_timepoints, + n_monomer_points, + compression=False, + temp_path=str(temp_path), +) diff --git a/subcell_pipeline/simulation/readdy/_run_readdy_compression_batch_simulations.py b/subcell_pipeline/simulation/readdy/_run_readdy_compression_batch_simulations.py new file mode 100644 index 0000000..945216b --- /dev/null +++ b/subcell_pipeline/simulation/readdy/_run_readdy_compression_batch_simulations.py @@ -0,0 +1,66 @@ +# %% [markdown] +# # Run ReaDDy compression simulations + +# %% [markdown] +""" +Notebook contains steps for running ReaDDy simulations in which a single actin +fiber is compressed at different compression velocities. + +Simulations use the ReaDDy actin model defined +[here](https://github.com/simularium/readdy-models/tree/main/examples/actin). +Instructions for running this model on AWS Batch are provided +[here](https://github.com/simularium/readdy-models/blob/main/examples/README.md). + +After simulations are complete, use this notebook to copy output files into the +file structure used by this pipeline. + +- [Define simulation conditions](#define-simulation-conditions) +- [Copy simulation outputs](#copy-simulation-outputs) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from subcell_pipeline.simulation.batch_simulations import copy_simulation_outputs + +# %% [markdown] +""" +## Define simulation conditions + +Defines the `ACTIN_COMPRESSION_VELOCITY` simulation series, which compresses a +single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 +μm/s) with five replicates each. +""" + +# %% +# Name of the simulation series +series_name: str = "ACTIN_COMPRESSION_VELOCITY" + +# Template for simulation output files +source_template: str = "outputs/actin_compression_velocity=%s_%d.h5" + +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# File keys for each velocity +velocity_keys: dict[str, str] = { + "4.7": "0047", + "15": "0150", + "47": "0470", + "150": "1500", +} + +# %% [markdown] +""" +## Copy simulation outputs +""" + +# %% +copy_simulation_outputs( + bucket, series_name, source_template, n_replicates, velocity_keys +) diff --git a/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py b/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py new file mode 100644 index 0000000..4672dc3 --- /dev/null +++ b/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py @@ -0,0 +1,55 @@ +# %% [markdown] +# # Run ReaDDy no compression simulations + +# %% [markdown] +""" +Notebook contains steps for running ReaDDy simulations for a baseline single +actin fiber with no compression. + +Simulations use the ReaDDy actin model defined +[here](https://github.com/simularium/readdy-models/tree/main/examples/actin). +Instructions for running this model on AWS Batch are provided +[here](https://github.com/simularium/readdy-models/blob/main/examples/README.md). + +After simulations are complete, use this notebook to copy output files into the +file structure used by this pipeline. + +- [Define simulation conditions](#define-simulation-conditions) +- [Copy simulation outputs](#copy-simulation-outputs) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from subcell_pipeline.simulation.batch_simulations import copy_simulation_outputs + +# %% [markdown] +""" +## Define simulation conditions + +Defines the `ACTIN_NO_COMPRESSION` simulation series, which simulates a single +actin fiber with a free barbed end across five replicates. +""" + +# %% +# Name of the simulation series +series_name: str = "ACTIN_NO_COMPRESSION" + +# Template for simulation output files +source_template: str = "outputs/actin_compression_baseline_%d.h5" + +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# %% [markdown] +""" +## Copy simulation outputs +""" + +# %% +copy_simulation_outputs(bucket, series_name, source_template, n_replicates) diff --git a/subcell_pipeline/simulation/readdy/create_dataframes_from_readdy_outputs.py b/subcell_pipeline/simulation/readdy/create_dataframes_from_readdy_outputs.py deleted file mode 100644 index 5a3c4c5..0000000 --- a/subcell_pipeline/simulation/readdy/create_dataframes_from_readdy_outputs.py +++ /dev/null @@ -1,111 +0,0 @@ -import os -from typing import List - -import numpy as np -import pandas as pd -from subcell_analysis.readdy import ReaddyLoader, ReaddyPostProcessor -from subcell_analysis.readdy.readdy_post_processor import array_to_dataframe - -IDEAL_ACTIN_POSITIONS = np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] -) -IDEAL_ACTIN_VECTOR_TO_AXIS = np.array([-0.01056751, -1.47785105, -0.65833209]) - - -def _load_readdy_fiber_points(h5_file_path, box_size, n_points_per_fiber): - readdy_loader = ReaddyLoader(str(h5_file_path)) - readdy_post_processor = ReaddyPostProcessor( - readdy_loader.trajectory(), - box_size=box_size, - ) - fiber_chain_ids = readdy_post_processor.linear_fiber_chain_ids( - start_particle_phrases=["pointed"], - other_particle_types=[ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", - ], - polymer_number_range=5, - ) - axis_positions, _ = readdy_post_processor.linear_fiber_axis_positions( - fiber_chain_ids=fiber_chain_ids, - ideal_positions=IDEAL_ACTIN_POSITIONS, - ideal_vector_to_axis=IDEAL_ACTIN_VECTOR_TO_AXIS, - ) - fiber_points = readdy_post_processor.linear_fiber_control_points( - axis_positions=axis_positions, - n_points=n_points_per_fiber, - ) - return np.array(fiber_points) - - -def generate_readdy_df( - input_h5_file_dir: str = "data/aws_downloads/", - output_dir: str = "data/dataframes/readdy/", - n_points_per_fiber: int = 50, - box_size: np.ndarray = np.array(3 * [600.0]), - num_repeats: int = 5, - compression_velocities: List[float] = [4.7, 15, 47, 150], - reprocess: bool = True, -) -> pd.DataFrame: - result = [] - os.makedirs(output_dir, exist_ok=True) - for velocity in compression_velocities: - for repeat in range(num_repeats): - file_name = f"actin_compression_velocity={velocity}_{repeat}.h5" - df_save_path = os.path.join( - output_dir, - f"readdy_actin_compression_velocity_{velocity}_repeat_{repeat}.csv", - ) - if os.path.exists(df_save_path) and not reprocess: - print(f"{file_name} already processed") - df_points = pd.read_csv(df_save_path) - result.append(df_points) - continue - h5_file_path = os.path.join(input_h5_file_dir, file_name) - if not os.path.exists(h5_file_path): - print(f"{file_name} not found") - continue - print(f"Processing {file_name}") - fiber_points = _load_readdy_fiber_points( - str(h5_file_path), box_size, n_points_per_fiber - ) - df_points = array_to_dataframe(fiber_points) - df_points.reset_index(inplace=True) - df_points.rename(columns={0: "xpos", 1: "ypos", 2: "zpos"}, inplace=True) - df_points["velocity"] = velocity - df_points["repeat"] = repeat - df_points["simulator"] = "readdy" - df_points["normalized_time"] = ( - df_points["time"] - df_points["time"].min() - ) / (df_points["time"].max() - df_points["time"].min()) - df_points.to_csv( - df_save_path, - index=False, - ) - result.append(df_points) - return pd.concat(result) - - -if __name__ == "__main__": - output_dir = "data/dataframes/readdy/" - df_readdy = generate_readdy_df(output_dir=output_dir) - df_readdy.to_csv( - output_dir / "readdy_actin_compression_all_velocities_and_repeats.csv" - ) - df_readdy.to_parquet( - output_dir / "readdy_actin_compression_all_velocities_and_repeats.parquet" - ) diff --git a/subcell_pipeline/simulation/readdy/data_structures.py b/subcell_pipeline/simulation/readdy/data_structures.py new file mode 100644 index 0000000..2d32040 --- /dev/null +++ b/subcell_pipeline/simulation/readdy/data_structures.py @@ -0,0 +1,109 @@ +"""Data structures for ReaDDy simulations.""" + +from typing import Optional + +import numpy as np + + +class TopologyData: + """Data class representing a ReaDDy topology of connected particles.""" + + uid: int + """Unique ID of the topology from ReaDDy.""" + + type_name: str + """ReaDDy type name of the topology.""" + + particle_ids: list[int] + """List of unique IDs of each particle in the topology.""" + + def __init__(self, uid: int, type_name: str, particle_ids: list[int]): + self.uid = uid + self.type_name = type_name + self.particle_ids = particle_ids + + def __str__(self) -> str: + return ( + "Topology(\n" + f" id = {self.uid}\n" + f" type_name = {self.type_name}\n" + f" particles = {self.particle_ids}\n" + ")" + ) + + +class ParticleData: + """Data class representing a ReaDDy particle.""" + + uid: int + """Unique ID of the particle from ReaDDy.""" + + type_name: str + """ReaDDy type name of the particle.""" + + position: np.ndarray + """XYZ position of the particle.""" + + neighbor_ids: list[int] + """List of unique IDs of each neighbor particle connected by an edge.""" + + def __init__( + self, uid: int, type_name: str, position: np.ndarray, neighbor_ids: list[int] + ): + self.uid = uid + self.type_name = type_name + self.position = position + self.neighbor_ids = neighbor_ids + + def __str__(self) -> str: + return ( + f"Particle(\n" + f" id = {self.uid}\n" + f" type_name = {self.type_name}\n" + f" position = {self.position}\n" + f" neighbors = {self.neighbor_ids}\n" + ")" + ) + + +class FrameData: + """Data class representing one ReaDDy timestep.""" + + time: float + """Current time of the simulation for this frame.""" + + topologies: dict[int, TopologyData] + """Mapping of topology ID to a TopologyData for each topology.""" + + particles: dict[int, ParticleData] + """Mapping of particle ID to a ParticleData for each particle.""" + + edge_ids: list[list[int]] + """List of edges, each is a list of the IDs of the two connected particles.""" + + def __init__( + self, + time: float, + topologies: Optional[dict[int, TopologyData]] = None, + particles: Optional[dict[int, ParticleData]] = None, + edge_ids: Optional[list[list[int]]] = None, + ): + self.time = time + self.topologies = topologies if topologies is not None else {} + self.particles = particles if particles is not None else {} + self.edge_ids = edge_ids if edge_ids is not None else [] + + def __str__(self) -> str: + top_str = "\n" + for top_id in self.topologies: + top_str += f"{top_id} : \n{self.topologies[top_id]}\n" + p_str = "\n" + for p_id in self.particles: + p_str += f"{p_id} : \n{self.particles[p_id]}\n" + return ( + f"Frame(\n" + f" time={self.time}\n" + f" topologies=\n{top_str}\n\n" + f" particles=\n{p_str}\n\n" + ")" + ) diff --git a/subcell_pipeline/simulation/readdy/readdy_loader.py b/subcell_pipeline/simulation/readdy/loader.py similarity index 51% rename from subcell_pipeline/simulation/readdy/readdy_loader.py rename to subcell_pipeline/simulation/readdy/loader.py index dc895d7..9d35e66 100644 --- a/subcell_pipeline/simulation/readdy/readdy_loader.py +++ b/subcell_pipeline/simulation/readdy/loader.py @@ -1,16 +1,57 @@ -#!/usr/bin/env python +"""Class for loading and shaping ReaDDy trajectories.""" -import os -from typing import Any, List, Optional +from typing import Any, Optional import numpy as np import readdy +from io_collection.keys.check_key import check_key +from io_collection.load.load_pickle import load_pickle +from io_collection.save.save_pickle import save_pickle from tqdm import tqdm -from .readdy_data import FrameData, ParticleData, TopologyData +from subcell_pipeline.simulation.readdy.data_structures import ( + FrameData, + ParticleData, + TopologyData, +) class ReaddyLoader: + """ + Load and shape data from a ReaDDy trajectory. + + Trajectory is loaded from the simulation output h5 file of the .dat pickle + file. If a .dat pickle location and key are provided, the loaded trajectory + is saved to the given location for faster reloads. + """ + + _readdy_trajectory: Optional[readdy.Trajectory] + """ReaDDy trajectory object.""" + + _trajectory: Optional[list[FrameData]] + """List of FrameData for trajectory.""" + + h5_file_path: str + """Path to the ReaDDy .h5 file or .dat pickle file.""" + + min_time_ix: int + """First time index to include.""" + + max_time_ix: int + """Last time index to include.""" + + time_inc: int + """Include every time_inc timestep.""" + + timestep: float + """Real time for each simulation timestep.""" + + pickle_location: Optional[str] + """Location to save pickle file (AWS S3 bucket or local path).""" + + pickle_key: Optional[str] + """Name of pickle file (AWS S3 bucket or local path).""" + def __init__( self, h5_file_path: str, @@ -18,52 +59,29 @@ def __init__( max_time_ix: int = -1, time_inc: int = 1, timestep: float = 100.0, - save_pickle_file: bool = False, + pickle_location: Optional[str] = None, + pickle_key: Optional[str] = None, ): - """ - Load and shape data from a ReaDDy trajectory. - - - Parameters - ---------- - h5_file_path: str - Path to the ReaDDy .h5 file. If a .dat pickle file exists - at this path, load from that instead. - min_time_ix: int = 0 (optional) - First time index to include. - Default: 0 - max_time_ix: int = -1 (optional) - Last time index to include. - Default: -1 (include all timesteps after min_time_ix) - time_inc: int = 1 (optional) - Include every time_inc timestep. - Default: 1 - timestep: float = 100. (optional) - How much time passes each timestep? - (In any time units, resulting time measurements - will be in the same units.) - Default: 100. - save_pickle_file: bool = False (optional) - Save loaded data in a pickle file for easy reload? - Default: False - """ - self._readdy_trajectory: readdy.Trajectory = None - self._trajectory: Optional[List[FrameData]] = None + self._readdy_trajectory = None + self._trajectory = None self.h5_file_path = h5_file_path self.min_time_ix = min_time_ix self.max_time_ix = max_time_ix self.time_inc = time_inc self.timestep = timestep - self.save_pickle_file = save_pickle_file + self.pickle_location = pickle_location + self.pickle_key = pickle_key def readdy_trajectory(self) -> readdy.Trajectory: """ Lazy load the ReaDDy trajectory object. + Note that loading ReaDDy trajectories requires a path to a local file. + Loading currently does not support S3 locations. Returns ------- - readdy_trajectory: readdy.Trajectory + : The ReaDDy trajectory object. """ if self._readdy_trajectory is None: @@ -71,13 +89,12 @@ def readdy_trajectory(self) -> readdy.Trajectory: return self._readdy_trajectory @staticmethod - def _frame_edges(time_ix: int, topology_records: Any) -> List[List[int]]: + def _frame_edges(time_ix: int, topology_records: Any) -> list[list[int]]: """ - After a simulation has finished, get all the edges - at the given time index as [particle1 id, particle2 id]. + Get all edges at the given time index as [particle1 id, particle2 id]. - topology_records from - readdy.Trajectory(h5_file_path).read_observable_topologies() + The ``topology_records`` object is output from + ``readdy.Trajectory(h5_file_path).read_observable_topologies()``. """ result = [] for top in topology_records[time_ix]: @@ -88,7 +105,7 @@ def _frame_edges(time_ix: int, topology_records: Any) -> List[List[int]]: result.append([ix1, ix2]) return result - def _shape_trajectory_data(self) -> List[FrameData]: + def _shape_trajectory_data(self) -> list[FrameData]: """Shape data from a ReaDDy trajectory for analysis.""" ( _, @@ -105,11 +122,11 @@ def _shape_trajectory_data(self) -> List[FrameData]: if ( time_ix < self.min_time_ix or (self.max_time_ix >= 0 and time_ix > self.max_time_ix) - or time_ix % self.time_inc != 0 + or times[time_ix] % self.time_inc != 0 ): continue frame = FrameData(time=self.timestep * time_ix) - edge_ids = ReaddyLoader._frame_edges(time_ix, topology_records) + frame.edge_ids = ReaddyLoader._frame_edges(time_ix, topology_records) for index, top in enumerate(topology_records[time_ix]): frame.topologies[index] = TopologyData( uid=index, @@ -120,7 +137,7 @@ def _shape_trajectory_data(self) -> List[FrameData]: p_id = ids[time_ix][p] position = positions[time_ix][p] neighbor_ids = [] - for edge in edge_ids: + for edge in frame.edge_ids: if p_id == edge[0]: neighbor_ids.append(edge[1]) elif p_id == edge[1]: @@ -133,49 +150,33 @@ def _shape_trajectory_data(self) -> List[FrameData]: position=np.array([position[0], position[1], position[2]]), neighbor_ids=neighbor_ids, ) - for edge in edge_ids: - frame.edges.append( - np.array( - [ - frame.particles[edge[0]].position, - frame.particles[edge[1]].position, - ] - ) - ) result.append(frame) return result - def trajectory(self) -> List[FrameData]: + def trajectory(self) -> list[FrameData]: """ Lazy load the shaped trajectory. - Returns ------- - trajectory: List[FrameData] + : The trajectory of data shaped for analysis. """ + if self._trajectory is not None: return self._trajectory - pickle_file_path = self.h5_file_path + ".dat" - if os.path.isfile(pickle_file_path): - print("Loading pickle file for ReaDDy data") - import pickle - - data = [] - with open(pickle_file_path, "rb") as f: - while True: - try: - data.append(pickle.load(f)) - except EOFError: - break - self._trajectory = data[0] + + if self.pickle_location is not None and self.pickle_key is not None: + if check_key(self.pickle_location, self.pickle_key): + print(f"Loading pickle file for ReaDDy data from {self.pickle_key}") + self._trajectory = load_pickle(self.pickle_location, self.pickle_key) + else: + print(f"Loading ReaDDy data from h5 file {self.h5_file_path}") + print(f"Saving pickle file for ReaDDy data to {self.h5_file_path}") + self._trajectory = self._shape_trajectory_data() + save_pickle(self.pickle_location, self.pickle_key, self._trajectory) else: - print("Loading ReaDDy data from h5 file...") + print(f"Loading ReaDDy data from h5 file {self.h5_file_path}") self._trajectory = self._shape_trajectory_data() - if self.save_pickle_file: - import pickle - with open(pickle_file_path, "wb") as file: - pickle.dump(self._trajectory, file) return self._trajectory diff --git a/subcell_pipeline/simulation/readdy/parser.py b/subcell_pipeline/simulation/readdy/parser.py new file mode 100644 index 0000000..6582311 --- /dev/null +++ b/subcell_pipeline/simulation/readdy/parser.py @@ -0,0 +1,290 @@ +"""Methods for parsing ReaDDy simulations.""" + +import os +from math import floor, log10 +from typing import Optional, Union + +import boto3 +import numpy as np +import pandas as pd +from botocore.exceptions import ClientError +from io_collection.keys.check_key import check_key +from io_collection.save.save_dataframe import save_dataframe + +from subcell_pipeline.simulation.readdy.loader import ReaddyLoader +from subcell_pipeline.simulation.readdy.post_processor import ReaddyPostProcessor + +COLUMN_NAMES: list[str] = [ + "fiber_id", + "xpos", + "ypos", + "zpos", + "xforce", + "yforce", + "zforce", + "segment_curvature", + "time", + "fiber_point", +] +"""Parsed tidy data column names.""" + +COLUMN_DTYPES: dict[str, Union[type[float], type[int]]] = { + "fiber_id": int, + "xpos": float, + "ypos": float, + "zpos": float, + "xforce": float, + "yforce": float, + "zforce": float, + "segment_curvature": float, + "time": float, + "fiber_point": int, +} +"""Parsed tidy data column data types.""" + +READDY_TIMESTEP: float = 0.1 +"""Simulation timestep (in ns).""" + +BOX_SIZE: np.ndarray = np.array(3 * [600.0]) +"""Default simulation volume dimensions (x, y, z).""" + +COMPRESSION_DISTANCE: float = 150.0 +"""Total distance the fiber end was displaced in nm.""" + + +def _download_s3_file(bucket: str, key: str, dest_path: str) -> Optional[str]: + """ + Download file from S3 to local path. + + Parameters + ---------- + bucket + Name of S3 bucket. + key + Source key. + dest_path + Target local path. + """ + + s3_client = boto3.client("s3") + + if os.path.isfile(dest_path): + return dest_path + try: + s3_client.download_file(bucket, key, dest_path) + print(f"Downloaded [ {key} ] to [ {dest_path} ].") + return dest_path + except ClientError: + print(f"!!! Failed to download {key}") + return None + + +def download_readdy_hdf5( + bucket: str, + series_name: str, + series_key: str, + rep_ix: int, + download_path: str, +) -> Optional[str]: + """ + Download ReaDDy h5 files from S3 to local path. + + The ReaDDy Python package currently requires a local file path. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + series_key + Combination of series and condition names. + rep_ix + Replicate index. + download_path + Path for downloading temporary h5 files. + """ + + if bucket.startswith("s3://"): + bucket = bucket.replace("s3://", "") + + aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" + local_h5_path = os.path.join(download_path, f"{series_key}_{rep_ix}.h5") + return _download_s3_file(bucket, aws_h5_key, local_h5_path) + + +def parse_readdy_simulation_single_fiber_trajectory( + bucket: str, + series_name: str, + series_key: str, + rep_ix: int, + n_timepoints: int, + n_monomer_points: int, + total_steps: int, + temp_path: str, + timestep: float = READDY_TIMESTEP, +) -> pd.DataFrame: + """ + Parse ReaDDy trajectory data into tidy data format. + + Note that this methods assumes there is only one fiber in the simulation. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation. + series_key + Series key. + rep_ix + Replicate index. + n_timepoints + Number of equally spaced timepoints to sample. + n_monomer_points + Number of equally spaced monomer points to sample. + total_steps + Total number of steps for each given simulation. + temp_path + Path for saving temporary h5 files. + timestep + Simulation timestep (in ns). + """ + + h5_file_path = download_readdy_hdf5( + bucket, series_name, series_key, rep_ix, temp_path + ) + + assert isinstance(h5_file_path, str) + + rep_id = rep_ix + 1 + pickle_key = f"{series_name}/data/{series_key}_{rep_id:06d}.pkl" + time_inc = total_steps // n_timepoints + + readdy_loader = ReaddyLoader( + h5_file_path=h5_file_path, + time_inc=time_inc, + timestep=timestep, + pickle_location=bucket, + pickle_key=pickle_key, + ) + + post_processor = ReaddyPostProcessor(readdy_loader.trajectory(), box_size=BOX_SIZE) + + times = post_processor.times() + fiber_chain_ids = post_processor.linear_fiber_chain_ids(polymer_number_range=5) + axis_positions, _ = post_processor.linear_fiber_axis_positions(fiber_chain_ids) + + fiber_points = post_processor.linear_fiber_control_points( + axis_positions=axis_positions, + n_points=n_monomer_points, + ) + + point_data: list[list[Union[str, int, float]]] = [] + for time_ix in range(len(fiber_points)): + for pos_ix in range(fiber_points[0][0].shape[0]): + point_data.append( + [ + 1, # fiber_id + fiber_points[time_ix][0][pos_ix][0], # xpos + fiber_points[time_ix][0][pos_ix][1], # ypos + fiber_points[time_ix][0][pos_ix][2], # zpos + 0.0, # xforce + 0.0, # yforce + 0.0, # zforce + 0.0, # segment_curvature + times[time_ix], # time + pos_ix, # fiber_point + ] + ) + + # Combine all data into dataframe and update data types. + dataframe = pd.DataFrame(point_data, columns=COLUMN_NAMES) + dataframe = dataframe.astype(dtype=COLUMN_DTYPES) + + # Add placeholders for features not calculated in ReaDDy + dataframe["force_magnitude"] = np.array(len(point_data) * [0.0]) + dataframe["segment_energy"] = np.array(len(point_data) * [0.0]) + + return dataframe + + +def _round_2_sig_figs(x: float) -> int: + return int(round(x, -int(floor(log10(abs(0.1 * x)))))) + + +def velocity_for_cond(condition_key: str) -> float: + """'NNNN' -> NNN.N.""" + return float(condition_key[:3] + "." + condition_key[-1]) + + +def parse_readdy_simulation_data( + bucket: str, + series_name: str, + condition_keys: list[str], + n_replicates: int, + n_timepoints: int, + n_monomer_points: int, + compression: bool, + temp_path: str, +) -> None: + """ + Parse ReaDDy simulation data for select conditions and replicates. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + condition_keys + List of condition keys. + n_replicates + Number of simulation replicates. + n_timepoints + Number of equally spaced timepoints to sample. + n_monomer_points + Number of equally spaced monomer points to sample. + compression + If True, parse compressed trajectories, + If False, parse baseline uncompressed trajectories. + temp_path + Path for saving temporary h5 files. + """ + total_steps: dict[str, int] = {} + if compression: + total_steps = { + cond: _round_2_sig_figs( + (COMPRESSION_DISTANCE * 1e-3 / velocity_for_cond(cond)) * 1e10 + ) + for cond in condition_keys + } + else: + total_steps = {"": int(1e7)} + + for condition_key in condition_keys: + series_key = f"{series_name}_{condition_key}" if condition_key else series_name + + for rep_ix in range(n_replicates): + rep_id = rep_ix + 1 + dataframe_key = f"{series_name}/samples/{series_key}_{rep_id:06d}.csv" + + # Skip if dataframe file already exists. + if check_key(bucket, dataframe_key): + print(f"Dataframe [ { dataframe_key } ] already exists. Skipping.") + continue + + print(f"Parsing data for [ {condition_key} ] replicate [ {rep_ix} ]") + + data = parse_readdy_simulation_single_fiber_trajectory( + bucket, + series_name, + series_key, + rep_ix, + n_timepoints, + n_monomer_points, + total_steps[condition_key], + temp_path, + ) + + save_dataframe(bucket, dataframe_key, data, index=False) diff --git a/subcell_pipeline/simulation/readdy/readdy_post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py similarity index 53% rename from subcell_pipeline/simulation/readdy/readdy_post_processor.py rename to subcell_pipeline/simulation/readdy/post_processor.py index ba9377d..8ef5f64 100644 --- a/subcell_pipeline/simulation/readdy/readdy_post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -1,56 +1,96 @@ -#!/usr/bin/env python +"""Class for post processing ReaDDy trajectories.""" import math -import time -from typing import Dict, List, Tuple +from typing import Optional import numpy as np -import pandas as pd -from numpy import ndarray from tqdm import tqdm -from ..compression_analysis import get_contour_length_from_trace -from .readdy_data import FrameData +from subcell_pipeline.analysis.compression_metrics.polymer_trace import ( + get_contour_length_from_trace, +) +from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import align_fiber +from subcell_pipeline.simulation.readdy.data_structures import FrameData + +ACTIN_START_PARTICLE_PHRASE: list[str] = ["pointed"] +"""Phrases indicating actin start particle.""" + +ACTIN_PARTICLE_TYPES: list[str] = [ + "actin#", + "actin#ATP_", + "actin#mid_", + "actin#mid_ATP_", + "actin#fixed_", + "actin#fixed_ATP_", + "actin#mid_fixed_", + "actin#mid_fixed_ATP_", + "actin#barbed_", + "actin#barbed_ATP_", + "actin#fixed_barbed_", + "actin#fixed_barbed_ATP_", +] +"""Actin particle types from simularium/readdy-models.""" + +IDEAL_ACTIN_POSITIONS: np.ndarray = np.array( + [ + [24.738, 20.881, 26.671], + [27.609, 24.061, 27.598], + [30.382, 21.190, 25.725], + ] +) +"""Ideal actin positions measured from crystal structure.""" + +IDEAL_ACTIN_VECTOR_TO_AXIS: np.ndarray = np.array( + [-0.01056751, -1.47785105, -0.65833209] +) +"""Ideal actin vector to axis.""" class ReaddyPostProcessor: + """Get different views of ReaDDy trajectory for different analysis purposes.""" + + trajectory: list[FrameData] + """ReaDDy data trajectory from ReaddyLoader(h5_file_path).trajectory().""" + + box_size: np.ndarray + """The size of the XYZ dimensions of the simulation volume (shape = 3).""" + + periodic_boundary: bool + """True if simulation had periodic boundary, False otherwise.""" + def __init__( self, - trajectory: List[FrameData], + trajectory: list[FrameData], box_size: np.ndarray, periodic_boundary: bool = False, ): - """ - Get different views of the ReaDDy trajectory - for different analysis purposes. - - - Parameters - ---------- - trajectory: List[FrameData] - A trajectory of ReaDDy data from - ReaddyLoader(h5_file_path).trajectory(). - box_size: np.ndarray (shape = 3) - The size of the XYZ dimensions of the simulation volume. - periodic_boundary: bool (optional) - Was there a periodic boundary in this simulation? - Default: False - """ self.trajectory = trajectory self.box_size = box_size self.periodic_boundary = periodic_boundary + def times(self) -> np.ndarray: + """ + Get simulation time at each timestep. + + Returns + ------- + times + Array of time stamps in simulation time for each timestep (shape = + n_timesteps). + """ + result = [trajectory.time for trajectory in self.trajectory] + return np.array(result) + def _id_for_neighbor_of_types( self, time_ix: int, particle_id: int, - neighbor_types: List[str], - exclude_ids: List[int] = None, + neighbor_types: list[str], + exclude_ids: Optional[list[int]] = None, ) -> int: """ - Get the id for the first neighbor - with a type_name in neighbor_types - at the given time index. + Get the id for the first neighbor with a type_name in neighbor_types at + the given time index. """ particles = self.trajectory[time_ix].particles for neighbor_id in particles[particle_id].neighbor_ids: @@ -65,17 +105,17 @@ def _ids_for_chain_of_types( self, time_ix: int, start_particle_id: int, - chain_particle_types: List[List[str]], + chain_particle_types: list[list[str]], current_polymer_number: int, chain_length: int = 0, - last_particle_id: int = None, - result: List[int] = None, - ) -> List[int]: + last_particle_id: Optional[int] = None, + result: Optional[list[int]] = None, + ) -> list[int]: """ - Starting from the particle with start_particle_id, - get ids for a chain of particles with chain_particle_types - in the given frame of data, + Get IDs for a chain of particles with chain_particle_types in the given + frame of data, starting from the particle with start_particle_id and avoiding the particle with last_particle_id. + If chain_length = 0, return entire chain. """ if result is None: @@ -107,8 +147,8 @@ def _non_periodic_position( self, position1: np.ndarray, position2: np.ndarray ) -> np.ndarray: """ - If the distance between two positions is greater than box_size, - move the second position across the box. + If the distance between two positions is greater than box_size, move the + second position across the box. """ if not self.periodic_boundary: return position2 @@ -133,14 +173,15 @@ def _normalize(vector: np.ndarray) -> np.ndarray: @staticmethod def _orientation_from_positions(positions: np.ndarray) -> np.ndarray: """ - orthonormalize and cross the vectors from a particle position - to prev and next particle positions to get a basis local to the particle. - - positions = [ - prev particle's position, - this particle's position, - next particle's position - ] + Orthonormalize and cross the vectors from a particle position to prev + and next particle positions to get a basis local to the particle. + + The positions array is structured as: + [ + prev particle's position, + this particle's position, + next particle's position, + ] """ v1 = ReaddyPostProcessor._normalize(positions[0] - positions[1]) v2 = ReaddyPostProcessor._normalize(positions[2] - positions[1]) @@ -154,14 +195,15 @@ def _rotation( self, positions: np.ndarray, ideal_positions: np.ndarray ) -> np.ndarray: """ - get the difference in the particles's current orientation - compared to the initial orientation as a rotation matrix. - - positions = [ - prev particle's position, - this particle's position, - next particle's position - ] + Get the difference in the particles's current orientation compared to + the initial orientation as a rotation matrix. + + The positions array is structured as: + [ + prev particle's position, + this particle's position, + next particle's position, + ] """ positions[0] = self._non_periodic_position(positions[1], positions[0]) positions[2] = self._non_periodic_position(positions[1], positions[2]) @@ -170,37 +212,90 @@ def _rotation( np.linalg.inv(self._orientation_from_positions(ideal_positions)), ) + def rotate_positions( + self, positions: np.ndarray, rotation: np.ndarray + ) -> np.ndarray: + """ + Rotate an x,y,z position (or an array of them) around the x-axis + with the given rotation matrix. + """ + if len(positions.shape) > 1: + result = np.dot(positions[:, 1:], rotation) + return np.concatenate((positions[:, 0:1], result), axis=1) + else: + result = np.dot(positions[1:], rotation) + return np.concatenate((positions[0:1], result), axis=0) + + def align_trajectory( + self, + fiber_points: list[list[np.ndarray]], + ) -> tuple[np.ndarray, list[list[np.ndarray]]]: + """ + Align the positions of particles in the trajectory + so that the furthest point from the x-axis + is aligned with the positive y-axis at the last time point. + + Parameters + ---------- + fiber_points + How many numbers are used to represent the relative identity of + particles in the chain? + start_particle_phrases + List of phrases in particle type names for the first particles in + the linear chain. + other_particle_types + List of particle type names (without polymer numbers at the end) for + the particles other than the start particles. + + Returns + ------- + positions + Array (shape = timesteps x 1 x n x 3) containing the x,y,z positions + of actin monomer particles at each timestep. + fiber_points + List of lists of arrays (shape = n x 3) containing the x,y,z positions + of control points for each fiber at each time. + """ + result: list[list[np.ndarray]] = [] + _, rotation = align_fiber(fiber_points[-1][0]) + for time_ix in range(len(self.trajectory)): + result.append([]) + for _, particle in self.trajectory[time_ix].particles.items(): + particle.position = self.rotate_positions(particle.position, rotation) + result[time_ix].append(particle.position) + fiber_points[time_ix][0] = self.rotate_positions( + fiber_points[time_ix][0], rotation + ) + return np.array(result), fiber_points + def linear_fiber_chain_ids( self, - start_particle_phrases: List[str], - other_particle_types: List[str], polymer_number_range: int, - ) -> List[List[List[int]]]: + start_particle_phrases: list[str] = ACTIN_START_PARTICLE_PHRASE, + other_particle_types: list[str] = ACTIN_PARTICLE_TYPES, + ) -> list[list[list[int]]]: """ - Get particle IDs for particles - in each linear fiber at each timestep. - + Get particle IDs for particles in each linear fiber at each timestep. Parameters ---------- - start_particle_phrases: List[str] - List of phrases in particle type names - for the first particles in the linear chain. - other_particle_types: List[str] - List of particle type names - (without polymer numbers at the end) - for the particles other than the start particles. - polymer_number_range: int - How many numbers are used to represent the - relative identity of particles in the chain? + polymer_number_range + How many numbers are used to represent the relative identity of + particles in the chain? + start_particle_phrases + List of phrases in particle type names for the first particles in + the linear chain. + other_particle_types + List of particle type names (without polymer numbers at the end) for + the particles other than the start particles. Returns ------- - chain_ids: List[List[List[int]]] - List of lists of lists of the particle IDs - for each particle for each fiber at each time. + : + List of lists of lists of the particle IDs for each particle for + each fiber at each time. """ - result: List[List[List[int]]] = [] + result: list[list[list[int]]] = [] chain_particle_types = [] for i in range(polymer_number_range): chain_particle_types.append( @@ -232,39 +327,37 @@ def linear_fiber_chain_ids( def linear_fiber_axis_positions( self, - fiber_chain_ids: List[List[List[int]]], - ideal_positions: np.ndarray, - ideal_vector_to_axis: np.ndarray, - ) -> Tuple[List[List[np.ndarray]], List[List[List[int]]]]: + fiber_chain_ids: list[list[list[int]]], + ideal_positions: np.ndarray = IDEAL_ACTIN_POSITIONS, + ideal_vector_to_axis: np.ndarray = IDEAL_ACTIN_VECTOR_TO_AXIS, + ) -> tuple[list[list[np.ndarray]], list[list[list[int]]]]: """ - Get XYZ axis positions for each particle - in each linear fiber at each timestep. - + Get XYZ axis positions for each particle in each linear fiber at each + timestep. Parameters ---------- - fiber_chain_ids: List[List[List[int]]] - List of lists of lists of particle IDs - for each particle in each fiber at each time. - ideal_positions: np.ndarray (shape = 3 x 3) - XYZ positions for 3 particles in an ideal chain. - ideal_vector_to_axis: np.ndarray - Vector from the second ideal position - to the axis of the fiber. + fiber_chain_ids + List of list of lists of particle IDs for each particle in each + fiber at each time. + ideal_positions + XYZ positions for 3 particles in an ideal chain (shape = 3 x 3). + ideal_vector_to_axis + Vector from the second ideal position to the axis of the fiber + (shape = 3). Returns ------- - axis_positions: List[List[np.ndarray (shape = n x 3)]] - List of lists of arrays containing the x,y,z positions - of the closest point on the fiber axis to the position - of each particle in each fiber at each time. - new_chain_ids: List[List[List[int]] - List of lists of lists of particle IDs - matching the axis_positions + axis_positions + Lists of lists of arrays (shape = n x 3) containing the x,y,z + positions of the closest point on the fiber axis to the position of + each particle in each fiber at each time. + new_chain_ids + List of lists of lists of particle IDs matching the axis_positions for each particle in each fiber at each time. """ - result: List[List[np.ndarray]] = [] - ids: List[List[List[int]]] = [] + result: list[list[np.ndarray]] = [] + ids: list[list[list[int]]] = [] for time_ix in range(len(fiber_chain_ids)): result.append([]) ids.append([]) @@ -286,7 +379,7 @@ def linear_fiber_axis_positions( break if pos_invalid: break - rotation = self._rotation(positions, ideal_positions) + rotation = self._rotation(np.array(positions), ideal_positions) if rotation is None: break vector_to_axis_local = np.squeeze( @@ -299,51 +392,53 @@ def linear_fiber_axis_positions( new_ids.append(particle_ix) if len(axis_positions) < 2: continue - result[time_ix].append(axis_positions) + result[time_ix].append(np.array(axis_positions)) ids[time_ix].append(new_ids) return result, ids def linear_fiber_normals( self, - fiber_chain_ids: List[List[List[int]]], - axis_positions: List[List[np.ndarray]], + fiber_chain_ids: list[list[list[int]]], + axis_positions: list[list[np.ndarray]], normal_length: float = 5, - ) -> List[List[np.ndarray]]: + ) -> list[list[np.ndarray]]: """ - Get XYZ positions defining start and end points for normals - for each particle in each fiber at each timestep. - + Get XYZ positions defining start and end points for normals for each + particle in each fiber at each timestep. Parameters ---------- - fiber_chain_ids: List[List[List[int]]] - List of lists of lists of particle IDs - for particles in each fiber at each time. - axis_positions: List[List[np.ndarray (shape = n x 3)]] - List of lists of arrays containing the x,y,z positions - of the closest point on the fiber axis to the position - of each particle in each fiber at each time. - normal_length: float (optional) - Length of the resulting normal vectors - in the trajectory's spatial units. - Default: 5 + fiber_chain_ids + List of lists of lists of particle IDs for particles in each fiber + at each time. + axis_positions + List of lists of arrays (shape = n x 3) containing the x,y,z + positions of the closest point on the fiber axis to the position of + each particle in each fiber at each time. + normal_length + Length of the resulting normal vectors in the trajectory's spatial + units. Returns ------- - normals: List[List[np.ndarray (shape = 2 x 3)]] - List of lists of arrays containing the x,y,z normals + : + List of lists of arrays (shape = 2 x 3) containing the x,y,z normals of each particle in each fiber at each time. """ - result: List[List[np.ndarray]] = [] + result: list[list[np.ndarray]] = [] for time_ix in range(len(fiber_chain_ids)): result.append([]) particles = self.trajectory[time_ix].particles for chain_ix in range(len(fiber_chain_ids[time_ix])): + n_particles = len(fiber_chain_ids[time_ix][chain_ix]) for particle_ix, particle_id in enumerate( fiber_chain_ids[time_ix][chain_ix] ): + # Skip first and last particle + if particle_ix == 0 or particle_ix == n_particles - 1: + continue position = particles[particle_id].position - axis_position = axis_positions[time_ix][chain_ix][particle_ix] + axis_position = axis_positions[time_ix][chain_ix][particle_ix - 1] direction = ReaddyPostProcessor._normalize(position - axis_position) result[time_ix].append( np.array( @@ -354,33 +449,32 @@ def linear_fiber_normals( @staticmethod def linear_fiber_control_points( - axis_positions: List[List[np.ndarray]], + axis_positions: list[list[np.ndarray]], n_points: int, - ) -> List[List[np.ndarray]]: + ) -> list[list[np.ndarray]]: """ - Resample the fiber line defined by each array of axis positions - to get the requested number of points between XYZ control points - for each linear fiber at each timestep. - + Resample the fiber line defined by each array of axis positions to get + the requested number of points between XYZ control points for each + linear fiber at each timestep. Parameters ---------- - axis_positions: List[List[np.ndarray (shape = n x 3)]] - List of lists of arrays containing the x,y,z positions - of the closest point on the fiber axis to the position - of each particle in each fiber at each time. - n_points: int + axis_positions + List of lists of arrays (shape = n x 3) containing the x,y,z + positions of the closest point on the fiber axis to the position of + each particle in each fiber at each time. + n_points Number of control points (spaced evenly) on resulting fibers. Returns ------- - control_points: List[List[np.ndarray (shape = n x 3)]] - Array containing the x,y,z positions + : + List of lists of arrays (shape = n x 3) containing the x,y,z positions of control points for each fiber at each time. """ if n_points < 2: raise Exception("n_points must be > 1 to define a fiber.") - result: List[List[np.ndarray]] = [] + result: list[list[np.ndarray]] = [] for time_ix in tqdm(range(len(axis_positions))): result.append([]) contour_length = get_contour_length_from_trace(axis_positions[time_ix][0]) @@ -390,12 +484,17 @@ def linear_fiber_control_points( control_points = np.zeros((n_points, 3)) control_points[0] = positions[0] current_position = np.copy(positions[0]) - leftover_length = 0 + leftover_length: float = 0 for pos_ix in range(1, len(positions)): v_segment = positions[pos_ix] - positions[pos_ix - 1] direction = ReaddyPostProcessor._normalize(v_segment) - remaining_length = np.linalg.norm(v_segment) + leftover_length - while remaining_length >= segment_length: + remaining_length = ( + np.linalg.norm(v_segment).item() + leftover_length + ) + # Rounding to 9 decimal places to avoid floating point error + # where the remaining length is very close to the segment + # length, causeing the final control point to be skipped. + while round(remaining_length, 9) >= round(segment_length, 9): current_position += ( segment_length - leftover_length ) * direction @@ -410,43 +509,40 @@ def linear_fiber_control_points( def fiber_bond_energies( self, - fiber_chain_ids: List[List[List[int]]], - ideal_lengths: Dict[int, float], - ks: Dict[int, float], + fiber_chain_ids: list[list[list[int]]], + ideal_lengths: dict[int, float], + ks: dict[int, float], stride: int = 1, - ) -> Tuple[Dict[int, np.ndarray], np.ndarray]: + ) -> tuple[dict[int, np.ndarray], np.ndarray]: """ - Get the strain energy using the harmonic spring equation - and the bond distance between particles - with a given polymer number offset. - + Get the strain energy using the harmonic spring equation and the bond + distance between particles with a given polymer number offset. Parameters ---------- - fiber_chain_ids: List[List[List[int]]] - List of lists of lists of particle IDs - for particles in each fiber at each time. - ideal_lengths: Dict[int,float] + fiber_chain_ids + List of lists of lists of particle IDs for particles in each fiber + at each time. + ideal_lengths Ideal bond length for each of the polymer number offsets. - ks: Dict[int,float] + ks Bond energy constant for each of the polymer number offsets. - stride: int (optional) + stride Calculate bond energy every stride timesteps. - Default: 1 Returns ------- - bond_energies: Dict[int,np.ndarray (shape = time x bonds)] - For each polymer number offset, an array of bond energy - for each bond at each time. - filament_positions: np.ndarray (shape = time x bonds) - Position in the filament from the starting end - for the first particle in each bond at each time. + bond_energies + Map of polymer number offset to array (shape = time x bonds) of bond + energy for each bond at each time. + filament_positions + Array (shape = time x bonds) of position in the filament from the + starting end for the first particle in each bond at each time. """ - energies: Dict[int, List[List[float]]] = {} + energies: dict[int, list[list[float]]] = {} for offset in ideal_lengths: energies[offset] = [] - filament_positions: List[List[int]] = [] + filament_positions: list[list[int]] = [] for time_ix in range(0, len(self.trajectory), stride): for offset in ideal_lengths: energies[offset].append([]) @@ -467,7 +563,7 @@ def fiber_bond_energies( particle.position, offset_particle.position ) bond_stretch = ( - np.linalg.norm(offset_pos - particle.position) + np.linalg.norm(offset_pos - particle.position).item() - ideal_lengths[offset] ) energy = 0.5 * ks[offset] * bond_stretch * bond_stretch @@ -480,49 +576,26 @@ def fiber_bond_energies( np.array(filament_positions), ) - def edge_positions(self) -> List[List[np.ndarray]]: + def edge_positions(self) -> list[list[np.ndarray]]: """ Get the edges between particles as start and end positions. Returns ------- - particle_edges: List[List[np.ndarray]] - List of list of edges as position of each of the two particles - connected by the edge for each edge at each time. + : + List of list of edges as position of each of the two connected particles + for each edge at each time. """ - edges = [] + edges: list[list[np.ndarray]] = [] for frame in self.trajectory: - edges.append(frame.edges) + edges.append([]) + for edge in frame.edge_ids: + edges[-1].append( + np.array( + [ + frame.particles[edge[0]].position, + frame.particles[edge[1]].position, + ] + ) + ) return edges - - -def array_to_dataframe(fiber_point_array: ndarray) -> pd.DataFrame: - """ - Convert a 3D array to a pandas DataFrame. - - Parameters - ---------- - fiber_point_array: ndarray - The input 3D array. - - Returns - ------- - DataFrame: A pandas DataFrame with timepoint and fiber point as multi-index. - """ - # Reshape the array to remove the singleton dimensions - fiber_point_array = np.squeeze(fiber_point_array) - - # Reshape the array to have dimensions (timepoints * 50, 3) - reshaped_arr = fiber_point_array.reshape(-1, 3) - - # Create a DataFrame with timepoint and fiber point as multi-index - timepoints = np.repeat(range(fiber_point_array.shape[0]), 50) - fiber_points = np.tile(range(50), fiber_point_array.shape[0]) - - df = pd.DataFrame(reshaped_arr) - df["time"] = timepoints - df["id"] = fiber_points - - df.set_index(["time", "id"], inplace=True) - - return df diff --git a/subcell_pipeline/simulation/readdy/readdy_analysis.py b/subcell_pipeline/simulation/readdy/readdy_analysis.py deleted file mode 100644 index 67dcd72..0000000 --- a/subcell_pipeline/simulation/readdy/readdy_analysis.py +++ /dev/null @@ -1,457 +0,0 @@ -# %% [markdown] -# ## Readdy Analysis - -# %% [markdown] -# ## Download Readdy Files and postprocess them - -import argparse - -# %% -# import readdy -import boto3 -import numpy as np -import pandas as pd -from subcell_analysis.compression_analysis import COMPRESSIONMETRIC -from subcell_analysis.compression_workflow_runner import ( - compression_metrics_workflow, - plot_metric, - plot_metric_list, -) -from subcell_analysis.cytosim.post_process_cytosim import create_dataframes_for_repeats -from subcell_analysis.readdy import ReaddyLoader, ReaddyPostProcessor - -# %% -save_folder = "../data/readdy_h5_files" - -# %% -file_name = "actin_compression_velocity_15_0.h5" -s3 = boto3.client("s3") -response = s3.download_file( - "readdy-working-bucket", - "outputs/actin_compression_velocity=15_0.h5", - f"{save_folder}/{file_name}", -) - -# %% -h5_file_path = f"{save_folder}/{file_name}" - -post_processor = ReaddyPostProcessor( - ReaddyLoader(h5_file_path).trajectory(), - box_size=600.0 * np.ones(3), -) -fiber_chain_ids = post_processor.linear_fiber_chain_ids( - start_particle_phrases=["pointed"], - other_particle_types=[ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", - ], - polymer_number_range=5, -) -axis_positions, _ = post_processor.linear_fiber_axis_positions( - fiber_chain_ids=fiber_chain_ids, - ideal_positions=np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] - ), - ideal_vector_to_axis=np.array( - [-0.01056751, -1.47785105, -0.65833209], - ), -) -fiber_points = post_processor.linear_fiber_control_points( - axis_positions=axis_positions, - segment_length=10.0, -) -print(fiber_points) - - -import pandas as pd -from subcell_analysis.compression_analysis import COMPRESSIONMETRIC - -# %% -from subcell_analysis.compression_workflow_runner import ( - compression_metrics_workflow, - plot_metric, - plot_metric_list, -) -from subcell_analysis.cytosim.post_process_cytosim import create_dataframes_for_repeats - -# %% -arr = np.array(fiber_points[0][0]) -arr.shape - - -def array_to_dataframe(arr): - # Reshape the array to remove the singleton dimensions - arr = np.squeeze(arr) - - # Reshape the array to have dimensions (timepoints * 50, 3) - reshaped_arr = arr.reshape(-1, 3) - - # Create a DataFrame with timepoint and fiber point as multi-index - timepoints = np.repeat(range(arr.shape[0]), 50) - fiber_points = np.tile(range(50), arr.shape[0]) - - df = pd.DataFrame(reshaped_arr) - df["time"] = timepoints - df["id"] = fiber_points - - df.set_index(["time", "id"], inplace=True) - - return df - - -df_points = array_to_dataframe(arr) -df_points.reset_index(inplace=True) -df_points.rename(columns={0: "x", 1: "y", 2: "z"}, inplace=True) -single_timepoint = df_points[df_points["time"] == 0] -single_timepoint - - -# %% -df_points - -df_points["time"].unique() -df_points.to_csv("../dataframes/readdy_processed_data.csv", index=False) -# df_points.to_csv("../dataframes/readdy_processed_data.csv") -df_points = pd.read_csv("../dataframes/readdy_processed_data.csv") - - -import matplotlib.pyplot as plt - -# %% -import pandas as pd -from matplotlib.animation import FuncAnimation -from mpl_toolkits.mplot3d import Axes3D - -# Assuming you have a DataFrame named 'df_points' with columns 'time', 'id', 'x', 'y', and 'z' -# df_points = pd.DataFrame(...) # Your data goes here - -# Create a 3D plot -fig = plt.figure() -ax = fig.add_subplot(111, projection="3d") - -# Get unique timestamps in the data -timestamps = df_points["time"].unique() - - -# Function to update the plot at each time step -def update_plot(time_step, ax=ax): - ax.cla() # Clear previous plot - - # Filter the data for the current timestamp - data_at_time = df_points[df_points["time"] == timestamps[time_step]] - - # Plot the points at the current time step - ax.scatter( - data_at_time["x"], data_at_time["y"], data_at_time["z"], c="r", marker="o" - ) - - # Set plot labels and title - ax.set_xlabel("X Position") - ax.set_ylabel("Y Position") - ax.set_zlabel("Z Position") - ax.set_title(f"Time: {timestamps[time_step]}") - ax.set_xlim([-300, 300]) - ax.set_ylim([-15, 15]) - ax.set_zlim([-10, 30]) - ax.set_aspect("equal") - - -# Create the animation -update_plot(-1) - -# If you want to save the animation to a file -# animation.save('3d_animation.mp4', writer='ffmpeg') -# animation.save('3d_animation_frames/frame_{:04d}.png', writer='pillow', fps=1) - -# Show the plot (If you don't want to save it) -plt.show() - - -# %% -plt.close("all") - - -# %% -import matplotlib.pyplot as plt - -metrics = [ - COMPRESSIONMETRIC.NON_COPLANARITY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, -] -df_points = compression_metrics_workflow( - df_points, - [ - COMPRESSIONMETRIC.NON_COPLANARITY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, - ], -) -df_points.columns - -# %% -for metric in metrics: - fig, ax = plt.subplots() - print(metric.value) - metric_by_time = df_points.groupby(["time"])[metric.value].mean() - ax.plot(metric_by_time, label=f"metric = {metric.value}") - plt.legend() - plt.show() - -# %% [markdown] -# ## Generating Outputs for All Readdy Simulations - -# %% -compression_velocities = [4.7, 15, 47, 150] -iterations = [0, 1, 2] -empty_array = np.zeros((len(compression_velocities), len(iterations))) - -# %% [markdown] -# Post Processing - -# %% -from pathlib import Path - -# %% -data_dir = Path("../data/readdy_h5_files") -data_dir.mkdir(exist_ok=True, parents=True) - -# %% - -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - new_file_path = ( - f"{data_dir}/readdy_actin_compression_velocity_{velocity}_{iteration}.h5" - ) - print(f"Downloading file: {new_file_path}") - response = s3.download_file( - "readdy-working-bucket", - f"outputs/actin_compression_velocity={velocity}_{iteration}.h5", - new_file_path, - ) - - -# %% -fiber_points = np.empty((4, 3), dtype=object) - -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - new_file_path = ( - f"{data_dir}/actin_compression_velocity_{velocity}_{iteration}.h5" - ) - post_processor = ReaddyPostProcessor( - ReaddyLoader(new_file_path).trajectory(), - box_size=600.0 * np.ones(3), - ) - fiber_chain_ids = post_processor.linear_fiber_chain_ids( - start_particle_phrases=["pointed"], - other_particle_types=[ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", - ], - polymer_number_range=5, - ) - axis_positions, _ = post_processor.linear_fiber_axis_positions( - fiber_chain_ids=fiber_chain_ids, - ideal_positions=np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] - ), - ideal_vector_to_axis=np.array( - [-0.01056751, -1.47785105, -0.65833209], - ), - ) - fiber_points[index][iteration] = post_processor.linear_fiber_control_points( - axis_positions=axis_positions, - segment_length=10.0, - ) - - -# %% [markdown] -# Save processed fiber_points - -# %% -df_path = Path("../data/dataframes/") -df_path.mkdir(exist_ok=True, parents=True) - -# %% -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - print(index, iteration) - if index == 1 and iteration == 2: # TODO: check why this is happening - break - arr = np.array(fiber_points[index][iteration]) - print(arr.shape) - df_points = array_to_dataframe(arr) - df_points.reset_index(inplace=True) - df_points.rename(columns={0: "xpos", 1: "ypos", 2: "zpos"}, inplace=True) - df_points.to_csv( - f"{df_path}/actin_compression_velocity_{velocity}.{iteration}.csv", - index=False, - ) - - -# %% [markdown] -# ## Starting from Reading from CSV - -# %% -df_path = Path("../data/dataframes/") -df_path.mkdir(exist_ok=True, parents=True) - -# %% -processed_dataframes = np.empty( - (len(compression_velocities), len(iterations)), dtype=object -) -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - if index == 1 and iteration == 2: - break - processed_dataframes[index][iteration] = pd.read_csv( - f"{df_path}/readdy_actin_compression_velocity_{velocity}_repeat_{iteration}.csv" - ) - print(index, iteration, processed_dataframes[index][iteration].shape) - - -# %% [markdown] -# Calculate metrics for processed dataframes - -# %% -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - print(index, iteration) - if index == 1 and iteration == 2: - break - processed_dataframes[index][iteration] = compression_metrics_workflow( - processed_dataframes[index][iteration], - [ - COMPRESSIONMETRIC.NON_COPLANARITY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, - ], - ) - -# %% -processed_dataframes[0][1] - -# %% [markdown] -# Plot calculated metrics - -# %% -import matplotlib.pyplot as plt - -# %% -figure_path = Path("../figures/readdy_metrics") -figure_path.mkdir(exist_ok=True, parents=True) -plt.close("all") - -# %% -metrics = [ - COMPRESSIONMETRIC.NON_COPLANARITY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, -] -compression_velocities = [4.7, 15, 47, 150] -for metric in metrics: - fig, axs = plt.subplots( - 1, - len(compression_velocities), - figsize=(len(compression_velocities) * 5, 5), - dpi=300, - sharey=True, - sharex=True, - ) - for index, velocity in enumerate(compression_velocities): - print(metric.value) - for iteration in iterations: - if index == 1 and iteration == 2: - continue - metric_by_time = ( - processed_dataframes[index][iteration] - .groupby(["time"])[metric.value] - .mean() - ) - axs[index].plot(metric_by_time, label=f"iteration = {iteration}") - axs[index].set_title(f"compression velocity = {velocity}") - axs[index].legend() - if index == 0: - axs[index].set_ylabel(metric.value) - fig.suptitle(f"Readdy") - fig.supxlabel("time") - plt.tight_layout() - plt.show() - fig.savefig(f"{figure_path}/actin_compression_all_velocities_{metric.value}.png") - -import matplotlib.pyplot as plt - -# %% -import pandas as pd -from matplotlib.animation import FuncAnimation -from mpl_toolkits.mplot3d import Axes3D - -# Assuming you have a DataFrame named 'df_points' with columns 'time', 'id', 'x', 'y', and 'z' -# df_points = pd.DataFrame(...) # Your data goes here - -# Create a 3D plot -fig = plt.figure() -ax = fig.add_subplot(111, projection="3d") - -# Get unique timestamps in the data -timestamps = processed_dataframes[0][0]["time"].unique() - - -# Function to update the plot at each time step -def update_plot(time_step, ax=ax): - ax.cla() # Clear previous plot - - # Filter the data for the current timestamp - data_at_time = df_points[df_points["time"] == timestamps[time_step]] - - # Plot the points at the current time step - ax.scatter( - data_at_time["x"], data_at_time["y"], data_at_time["z"], c="r", marker="o" - ) - - # Set plot labels and title - ax.set_xlabel("X Position") - ax.set_ylabel("Y Position") - ax.set_zlabel("Z Position") - ax.set_title(f"Time: {timestamps[time_step]}") - ax.set_xlim([-300, 300]) - ax.set_ylim([-15, 15]) - ax.set_zlim([-10, 30]) - ax.set_aspect("equal") - - -# Create the animation -ani = FuncAnimation(fig, update_plot, frames=len(timestamps), fargs=(ax,)) -ani.save("ani.txt") -# If you want to save the animation to a file -# animation.save('3d_animation.mp4', writer='ffmpeg') -# animation.save('3d_animation_frames/frame_{:04d}.png', writer='pillow', fps=1) diff --git a/subcell_pipeline/simulation/readdy/readdy_data.py b/subcell_pipeline/simulation/readdy/readdy_data.py deleted file mode 100644 index 93053a5..0000000 --- a/subcell_pipeline/simulation/readdy/readdy_data.py +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env python - -from typing import Dict, List - -import numpy as np - - -class TopologyData: - uid: int - type_name: str - particle_ids: List[int] - - def __init__(self, uid: int, type_name: str, particle_ids: List[int]): - """ - Data class representing a ReaDDy topology of connected particles. - - - Parameters - ---------- - uid: int - Unique ID of the topology from ReaDDy. - type_name: str - ReaDDy type name of the topology. - particle_ids: List[int] - List of unique IDs of each particle in the topology. - """ - self.uid = uid - self.type_name = type_name - self.particle_ids = particle_ids - - def __str__(self) -> str: - """String with all data.""" - return ( - "Topology(\n" - f" id = {self.uid}\n" - f" type_name = {self.type_name}\n" - f" particles = {self.particle_ids}\n" - ")" - ) - - -class ParticleData: - uid: int - type_name: str - position: np.ndarray - neighbor_ids: List[int] - - def __init__( - self, uid: int, type_name: str, position: np.ndarray, neighbor_ids: List[int] - ): - """ - Data class representing a ReaDDy particle. - - - Parameters - ---------- - uid: int - Unique ID of the particle from ReaDDy. - type_name: str - ReaDDy type name of the particle. - position: np.ndarray - XYZ position of the particle. - neighbor_ids: List[int] - List of unique IDs of each neighbor particle - connected by an edge. - """ - self.uid = uid - self.type_name = type_name - self.position = position - self.neighbor_ids = neighbor_ids - - def __str__(self) -> str: - """String with all data.""" - return ( - f"Particle(\n" - f" id = {self.uid}\n" - f" type_name = {self.type_name}\n" - f" position = {self.position}\n" - f" neighbors = {self.neighbor_ids}\n" - ")" - ) - - -class FrameData: - time: float - topologies: Dict[int, TopologyData] - particles: Dict[int, ParticleData] - edges: List[np.ndarray] - - def __init__( - self, - time: float, - topologies: Dict[int, TopologyData] = None, - particles: Dict[int, ParticleData] = None, - edges: List[np.ndarray] = None, - ): - """ - Data class representing one ReaDDy timestep. - - - Parameters - ---------- - time: float - Current time of the simulation for this frame. - topologies: Dict[int, TopologyData] (optional) - Mapping of topology ID to a TopologyData for each topology. - Default: {} (added by ReaddyLoader._shape_trajectory_data()) - particles: Dict[int, ParticleData] (optional) - Mapping of particle ID to a ParticleData for each particle. - Default: {} (added by ReaddyLoader._shape_trajectory_data()) - edges: List[np.ndarray (shape = 2 x 3)] (optional) - List of edges as position of each of the two particles - connected by the edge. - Default: [] (added by ReaddyLoader._shape_trajectory_data()) - """ - self.time = time - self.topologies = topologies if topologies is not None else {} - self.particles = particles if particles is not None else {} - self.edges = edges if edges is not None else [] - - def __str__(self) -> str: - """String with topology and particle data.""" - top_str = "\n" - for top_id in self.topologies: - top_str += f"{top_id} : \n{self.topologies[top_id]}\n" - p_str = "\n" - for p_id in self.particles: - p_str += f"{p_id} : \n{self.particles[p_id]}\n" - return ( - f"Frame(\n" - f" time={self.time}\n" - f" topologies=\n{top_str}\n\n" - f" particles=\n{p_str}\n\n" - ")" - ) diff --git a/subcell_pipeline/simulation/readdy/readdy_loading_example.py b/subcell_pipeline/simulation/readdy/readdy_loading_example.py deleted file mode 100644 index 11f289d..0000000 --- a/subcell_pipeline/simulation/readdy/readdy_loading_example.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python - -import argparse - -import numpy as np -from subcell_analysis.readdy import ReaddyLoader, ReaddyPostProcessor - - -class Args(argparse.Namespace): - def __init__(self) -> None: - self.__parse() - - def __parse(self) -> None: - p = argparse.ArgumentParser( - prog="readdy-actin-fiber-points", - description=( - "Load a ReaDDy actin trajectory and " - "calculate actin fiber control points." - ), - ) - p.add_argument( - "h5_file_path", - type=str, - help="The path to the ReaDDy .h5 file", - ) - p.parse_args(namespace=self) - - -def main() -> None: - args = Args() - post_processor = ReaddyPostProcessor( - ReaddyLoader(args.h5_file_path).trajectory(), - box_size=600.0 * np.ones(3), - ) - fiber_chain_ids = post_processor.linear_fiber_chain_ids( - start_particle_phrases=["pointed"], - other_particle_types=[ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", - ], - polymer_number_range=5, - ) - axis_positions, _ = post_processor.linear_fiber_axis_positions( - fiber_chain_ids=fiber_chain_ids, - ideal_positions=np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] - ), - ideal_vector_to_axis=np.array( - [-0.01056751, -1.47785105, -0.65833209], - ), - ) - fiber_points = post_processor.linear_fiber_control_points( - axis_positions=axis_positions, - segment_length=10.0, - ) - print(fiber_points) - - -if __name__ == "__main__": - main() diff --git a/subcell_pipeline/visualization/README.md b/subcell_pipeline/visualization/README.md new file mode 100644 index 0000000..3ea3d5b --- /dev/null +++ b/subcell_pipeline/visualization/README.md @@ -0,0 +1,20 @@ +# Visualization + +Visualization of simulation trajectories and data using [Simularium](https://simularium.allencell.org/). + +## Individual simulations + +- **Visualize ReaDDy simulation trajectories** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_readdy_trajectories.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_readdy_trajectories.html)) +- **Visualize Cytosim simulation trajectories** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_cytosim_trajectories.html)) + +## Combined simulations + +- **Visualize combined ReaDDy and Cytosim simulation trajectories** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_combined_trajectories.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_combined_trajectories.html)) + +## Tomography data + +- **Visualize actin CME tomography data** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_tomography_data.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_tomography_data.html)) + +## Dimensionality reduction + +- **Visualize dimensionality reduction analysis of actin filaments** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_dimensionality_reduction.html)) diff --git a/subcell_pipeline/visualization/__init__.py b/subcell_pipeline/visualization/__init__.py new file mode 100644 index 0000000..ca68d74 --- /dev/null +++ b/subcell_pipeline/visualization/__init__.py @@ -0,0 +1 @@ +"""Visualization methods and notebooks.""" diff --git a/subcell_pipeline/visualization/_visualize_combined_trajectories.py b/subcell_pipeline/visualization/_visualize_combined_trajectories.py new file mode 100644 index 0000000..e801f43 --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_combined_trajectories.py @@ -0,0 +1,96 @@ +# %% [markdown] +# # Visualize combined ReaDDy and Cytosim simulation trajectories + +# %% [markdown] +""" +Notebook contains steps for visualizing ReaDDy and Cytosim simulations of a +single actin fiber using [Simularium](https://simularium.allencell.org/). + +- [Define visualization settings](#define-visualization-settings) +- [Visualize combined trajectories](#visualize-combined-trajectories) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.visualization.combined_trajectory import ( + visualize_combined_trajectories, +) + +# %% [markdown] +""" +## Define visualization settings +""" + +# %% +# S3 buckets for simulation and visualization input and output files +buckets: dict[str, str] = { + "combined": "s3://subcell-working-bucket", + "readdy": "s3://readdy-working-bucket", + "cytosim": "s3://cytosim-working-bucket", +} + +# Names of the simulation series for each simulator +series_names: dict[str, str] = { + "readdy": "ACTIN_COMPRESSION_VELOCITY", + "cytosim": "COMPRESSION_VELOCITY", +} + +# List of condition file keys for each velocity +condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Replicate ids for simulations +replicates: list[int] = [1, 2, 3, 4, 5] + +# Number of timepoints +n_timepoints = 201 + +# List of simulators and colors +simulator_colors = { + "cytosim": "#1cbfaa", + "readdy": "#ff8f52", +} + +# Temporary path to save visualization files +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" +temp_path.mkdir(parents=True, exist_ok=True) + +# List of compression metrics to include +metrics = [ + CompressionMetric.NON_COPLANARITY, + CompressionMetric.PEAK_ASYMMETRY, + CompressionMetric.AVERAGE_PERP_DISTANCE, + CompressionMetric.CONTOUR_LENGTH, + CompressionMetric.COMPRESSION_RATIO, +] + + +# %% [markdown] +""" +## Visualize combined trajectories + +Visualize all compression simulations from ReaDDy and Cytosim together in +Simularium. + +- Input: `(series_name)/samples/(series_name)_(condition_key)_(replicate).csv` +- Output: `actin_compression_cytosim_readdy.simularium` +""" + +# %% +visualize_combined_trajectories( + buckets, + series_names, + condition_keys, + replicates, + n_timepoints, + simulator_colors, + str(temp_path), + metrics=metrics, +) diff --git a/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py b/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py new file mode 100644 index 0000000..c6cf299 --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py @@ -0,0 +1,127 @@ +# %% [markdown] +# # Visualize Cytosim simulation trajectories + +# %% [markdown] +""" +Notebook contains steps for visualizing Cytosim simulations of a single actin +fiber using [Simularium](https://simularium.allencell.org/). + +- [Define visualization settings](#define-visualization-settings) +- [Visualize compression simulations](#visualize-compression-simulations) +- [Visualize no compression simulations](#visualize-no-compression-simulations) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.visualization.individual_trajectory import ( + visualize_individual_cytosim_trajectories, +) + +# %% [markdown] +""" +## Define visualization settings + +Define simulation and visualization settings that are shared between different +simulation series. +""" + +# %% +# S3 bucket for input and output files +bucket: str = "s3://cytosim-working-bucket" + +# Number of timepoints +n_timepoints = 200 + +# Specify whether the visualization should be recalculated. Set this to true if +# you make changes to any visualization functions. +recalculate: bool = True + +# Random seeds for simulations +random_seeds: list[int] = [1, 2, 3, 4, 5] + +# Temporary path to save visualization files +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" +temp_path.mkdir(parents=True, exist_ok=True) + +# List of compression metrics to include +metrics = [ + CompressionMetric.NON_COPLANARITY, + CompressionMetric.PEAK_ASYMMETRY, + CompressionMetric.AVERAGE_PERP_DISTANCE, + CompressionMetric.CONTOUR_LENGTH, + CompressionMetric.COMPRESSION_RATIO, +] + +# %% [markdown] +""" +## Visualize compression simulations + +The `COMPRESSION_VELOCITY` simulation series compresses a single 500 nm actin +fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with five +replicates each. + +Iterate through all condition keys and replicates to load simulation output +files and visualize them. If the visualization file for a given condition key +and replicate already exists and recalculate is False, visualization is skipped. + +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index)/` +- Output: `(series_name)/viz/(series_name)_(condition_key)_(seed).simularium` +""" + +# %% +# Name of the simulation series +compression_series_name: str = "COMPRESSION_VELOCITY" + +# List of condition file keys for each velocity +compression_condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# %% +visualize_individual_cytosim_trajectories( + bucket, + compression_series_name, + compression_condition_keys, + random_seeds, + n_timepoints, + str(temp_path), + metrics=metrics, + recalculate=recalculate, +) + +# %% [markdown] +""" +## Visualize no compression simulations + +The `NO_COMPRESSION` simulation series simulates a single actin fiber with a +free barbed end across five replicates. + +Iterate through all replicates to load simulation output files and visualize +them. If the visualization file for a given replicate already exists and +recalculate is False, visualization is skipped. + +- Input: `(series_name)/outputs/(series_name)_(index)/` +- Output: `(series_name)/viz/(series_name)_(seed).simularium` +""" + +# %% +# Name of the simulation series +no_compression_series_name: str = "NO_COMPRESSION" + +# %% +visualize_individual_cytosim_trajectories( + bucket, + no_compression_series_name, + [""], + random_seeds, + n_timepoints, + str(temp_path), + metrics=metrics, + recalculate=recalculate, +) diff --git a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py new file mode 100644 index 0000000..26ed95f --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py @@ -0,0 +1,101 @@ +# %% [markdown] +# # Visualize dimensionality reduction analysis of actin filaments + +# %% [markdown] +""" +Notebook contains steps for visualizing dimensionality reduction using PCA for +actin fibers. + +- [Define visualization settings](#define-visualization-settings) +- [Visualize inverse PCA](#visualize-inverse-pca) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.visualization.dimensionality_reduction import ( + visualize_dimensionality_reduction, +) + +# %% [markdown] +""" +## Define visualization settings + +Define simulation and visualization settings that are shared between different +simulation series. +""" + +# %% +# S3 bucket for input and output files +bucket = "s3://subcell-working-bucket" + +# File key for PCA results dataframe +pca_results_key = "actin_compression_pca_results.csv" + +# File key for PCA object pickle +pca_pickle_key = "actin_compression_pca.pkl" + +# Temporary path to save visualization files +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" +temp_path.mkdir(parents=True, exist_ok=True) + +# Select how PC distributions are shown +# - True to scroll through the PC distributions over time +# - False to show all together in one timestep +distribution_over_time = True + +# Select if simulator distributions are shown +# - True to show ReaDDy and Cytosim separately +# - False to show all together +simulator_detail = True + +# Ranges to sample for each PC +sample_ranges: dict[str, list[list[float]]] = { + "Combined": [ + [-1200, 900], # pc1 + [-550, 250], # pc2 + ], + "ReaDDy": [ + [-1078, 782], # pc1 + [-517, 154], # pc2 + ], + "Cytosim": [ + [-1064, 758], # pc1 + [-174, 173], # pc2 + ], +} + +# Select how PCs are saved +# - True to save each PC in a separate file +# - False to save all together +separate_pcs = True + +# Number of samples for each PC distribution +sample_resolution = 200 + +# %% [markdown] +""" +## Visualize inverse PCA + +Visualize PCA space for actin fibers. + +- Input: `actin_compression_pca_results.csv` and `actin_compression_pca.pkl` +- Output: `(name)/(name).simularium` +""" + +# %% +visualize_dimensionality_reduction( + bucket, + pca_results_key, + pca_pickle_key, + distribution_over_time, + simulator_detail, + sample_ranges, + separate_pcs, + sample_resolution, + str(temp_path), +) diff --git a/subcell_pipeline/visualization/_visualize_readdy_trajectories.py b/subcell_pipeline/visualization/_visualize_readdy_trajectories.py new file mode 100644 index 0000000..d84c47a --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_readdy_trajectories.py @@ -0,0 +1,145 @@ +# %% [markdown] +# # Visualize ReaDDy simulation trajectories + +# %% [markdown] +""" +Notebook contains steps for visualizing ReaDDy simulations of a single actin +fiber using [Simularium](https://simularium.allencell.org/). + +- [Define visualization settings](#define-visualization-settings) +- [Visualize compression simulations](#visualize-compression-simulations) +- [Visualize no compression simulations](#visualize-no-compression-simulations) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.visualization.individual_trajectory import ( + visualize_individual_readdy_trajectories, +) + +# %% [markdown] +""" +## Define visualization settings + +Define simulation and visualization settings that are shared between different +simulation series. +""" + +# %% +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# Number of timepoints +n_timepoints = 200 + +# Number of monomer points per fiber +n_monomer_points = 200 + +# Specify whether the visualization should be recalculated. Set this to true if +# you make changes to any visualization functions. +recalculate: bool = True + +# Temporary path to save downloaded trajectories +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" +temp_path.mkdir(parents=True, exist_ok=True) + +# List of compression metrics to include +metrics = [ + CompressionMetric.NON_COPLANARITY, + CompressionMetric.PEAK_ASYMMETRY, + CompressionMetric.AVERAGE_PERP_DISTANCE, + CompressionMetric.CONTOUR_LENGTH, + CompressionMetric.COMPRESSION_RATIO, +] + +# %% [markdown] +""" +## Visualize compression simulations + +The `ACTIN_COMPRESSION_VELOCITY` simulation series compresses a single 500 nm +actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with five +replicates each. + +Iterate through all condition keys and replicates to load simulation output +files and visualize them. If the visualization file for a given condition key +and replicate already exists and recalculate is False, visualization is skipped. + +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index + 1).h5` +- Output: `(series_name)/viz/(series_name)_(condition_key)_(index + 1).simularium` +""" + +# %% +# Name of the simulation series +compression_series_name: str = "ACTIN_COMPRESSION_VELOCITY" + +# List of condition file keys for each velocity +compression_condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Total number of steps for each condition +compression_total_steps: dict[str, int] = { + "0047": int(3.2e8), + "0150": int(1e8), + "0470": int(3.2e7), + "1500": int(1e7), +} + +# %% +visualize_individual_readdy_trajectories( + bucket, + compression_series_name, + compression_condition_keys, + n_replicates, + n_timepoints, + n_monomer_points, + compression_total_steps, + str(temp_path), + metrics=metrics, + recalculate=recalculate, +) + +# %% [markdown] +""" +## Visualize no compression simulations + +The `ACTIN_NO_COMPRESSION` simulation series simulates a single actin fiber with +a free barbed end across five replicates. + +Iterate through all replicates to load simulation output files and visualize +them. If the visualization file for a given replicate already exists and +recalculate is False, visualization is skipped. + +- Input: `(series_name)/outputs/(series_name)_(index + 1).h5` +- Output: `(series_name)/viz/(series_name)_(index + 1).simularium` +""" + +# %% +# Name of the simulation series +no_compression_series_name: str = "ACTIN_NO_COMPRESSION" + +# Total number of steps for each condition +no_compression_total_steps: dict[str, int] = {"": int(1e7)} + +# %% +visualize_individual_readdy_trajectories( + bucket, + no_compression_series_name, + [""], + n_replicates, + n_timepoints, + n_monomer_points, + no_compression_total_steps, + str(temp_path), + metrics=metrics, + recalculate=recalculate, +) diff --git a/subcell_pipeline/visualization/_visualize_tomography_data.py b/subcell_pipeline/visualization/_visualize_tomography_data.py new file mode 100644 index 0000000..88f91be --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_tomography_data.py @@ -0,0 +1,63 @@ +# %% [markdown] +# # Visualize actin CME tomography data + +# %% [markdown] +""" +Notebook contains steps for visualizing segmented tomography data for actin +fibers using [Simularium](https://simularium.allencell.org/). + +- [Define visualization settings](#define-visualization-settings) +- [Visualize tomography data](#visualize-tomography-data) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.visualization.tomography import visualize_tomography + +# %% [markdown] +""" +## Define visualization settings + +Define simulation and visualization settings that are shared between different +simulation series. +""" + +# %% +# Tomography dataset name +name = "actin_cme_tomography" + +# S3 bucket for input and output files +bucket = "s3://subcell-working-bucket" + +# Temporary path to save visualization files +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" +temp_path.mkdir(parents=True, exist_ok=True) + +# List of compression metrics to include +metrics = [ + CompressionMetric.NON_COPLANARITY, + CompressionMetric.PEAK_ASYMMETRY, + CompressionMetric.AVERAGE_PERP_DISTANCE, + CompressionMetric.CALC_BENDING_ENERGY, + CompressionMetric.CONTOUR_LENGTH, + CompressionMetric.COMPRESSION_RATIO, +] + +# %% [markdown] +""" +## Visualize tomography data + +- Input: `(name)/(name)_coordinates_sampled.csv` +- Output: `(name)/(name).simularium` +""" + +# %% +visualize_tomography(bucket, name, str(temp_path), metrics) diff --git a/subcell_pipeline/visualization/add_readdy_plots.py b/subcell_pipeline/visualization/add_readdy_plots.py deleted file mode 100644 index 77b82ac..0000000 --- a/subcell_pipeline/visualization/add_readdy_plots.py +++ /dev/null @@ -1,116 +0,0 @@ -import os - -import boto3 -import numpy as np -from botocore.exceptions import ClientError -from simularium_readdy_models.visualization import ActinVisualization - -BUCKET_NAME = "readdy-working-bucket" - -s3_client = boto3.client("s3") - - -def download_h5_file(file_name): - """ - Download files (skip files that already exist) - """ - if os.path.isfile(f"data/aws_downloads/{file_name}.h5"): - return - try: - s3_client.download_file( - BUCKET_NAME, - f"outputs/{file_name}.h5", - f"data/aws_downloads/{file_name}.h5", - ) - print(f"Downloaded {file_name}") - except ClientError: - print(f"!!! Failed to download {file_name}") - - -def download_data(conditions, num_repeats): - if not os.path.isdir("data"): - os.makedirs("data") - if not os.path.isdir("data/aws_downloads"): - os.makedirs("data/aws_downloads") - for repeat in range(num_repeats): - download_h5_file(f"actin_compression_baseline_{repeat}_0") - for condition in conditions: - download_h5_file(f"actin_compression_velocity={condition}_{repeat}") - - -def add_plots(parameters, total_steps, conditions, num_repeats): - """ - Re-visualize the trajectories to add plots - """ - for repeat in range(num_repeats): - ActinVisualization.analyze_and_visualize_trajectory( - f"data/aws_downloads/actin_compression_baseline_{repeat}_0", - total_steps["baseline"], - parameters, - ) - for condition in conditions: - ActinVisualization.analyze_and_visualize_trajectory( - f"data/aws_downloads/actin_compression_velocity={condition}_{repeat}", - total_steps[condition], - parameters, - ) - - -def upload_simularium_file(file_name): - """ - Upload files (warning for files that fail) - """ - if not os.path.isfile(f"data/aws_downloads/{file_name}.h5.simularium"): - print(f"!!! Not found, could not upload {file_name}") - return - try: - s3_client.upload_file( - f"data/aws_downloads/{file_name}.h5.simularium", - BUCKET_NAME, - f"outputs/{file_name}.h5.simularium", - ) - print(f"Uploaded {file_name}") - except ClientError as e: - print(f"!!! Failed to upload {file_name}") - - -def upload_to_s3(conditions, num_repeats): - for repeat in range(num_repeats): - upload_simularium_file(f"actin_compression_baseline_{repeat}_0") - for condition in conditions: - upload_simularium_file(f"actin_compression_velocity={condition}_{repeat}") - - -def main(): - num_repeats = 3 - conditions = [ - "4.7", - "15", - "47", - "150", - ] - total_steps = { - "4.7": 3.2e8, - "15": 1e8, - "47": 3.2e7, - "150": 1e7, - "baseline": 1e7, - } - parameters = { - "box_size": np.array([600.0, 600.0, 600.0]), - "internal_timestep": 0.1, - "longitudinal_bonds": True, - "periodic_boundary": False, - "plot_actin_structure": True, - "plot_actin_compression": True, - "visualize_edges": True, - "visualize_normals": True, - "visualize_control_pts": True, - } - # download_data(conditions, num_repeats) - # add_plots(parameters, total_steps, conditions, num_repeats) - upload_to_s3(conditions, num_repeats) - - -if __name__ == "__main__": - main() diff --git a/subcell_pipeline/visualization/combined_trajectory.py b/subcell_pipeline/visualization/combined_trajectory.py new file mode 100644 index 0000000..18f498f --- /dev/null +++ b/subcell_pipeline/visualization/combined_trajectory.py @@ -0,0 +1,243 @@ +import os +from typing import Optional + +import numpy as np +import pandas as pd +from io_collection.keys.check_key import check_key +from io_collection.load.load_buffer import load_buffer +from io_collection.load.load_dataframe import load_dataframe +from io_collection.save.save_buffer import save_buffer +from simulariumio import ( + DISPLAY_TYPE, + AgentData, + CameraData, + DisplayData, + MetaData, + TrajectoryConverter, + TrajectoryData, + UnitData, +) + +from subcell_pipeline.analysis.compression_metrics.compression_analysis import ( + get_compression_metric_data, +) +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import align_fibers +from subcell_pipeline.visualization.scatter_plots import make_empty_scatter_plots + +BOX_SIZE: np.ndarray = np.array(3 * [600.0]) + + +def _load_fiber_points_from_dataframe( + dataframe: pd.DataFrame, n_timepoints: int +) -> np.ndarray: + """ + Load and reshape fiber points from sampled dataframe. + + Sampled dataframe is in the shape (n_timepoints x n_fiber_points, 3); method + returns the dataframe reshaped to (n_timepoints, n_fiber_points x 3). If the + sampled dataframe does not have the expected number of timepoints, method + will raise an exception. + """ + + dataframe.sort_values(by=["time", "fiber_point"]) + total_steps = dataframe.time.unique().shape[0] + + if total_steps != n_timepoints: + raise Exception( + f"Requested number of timesteps [ {n_timepoints} ] does not match " + f"number of timesteps in dataset [ {total_steps} ]." + ) + + align_fibers(dataframe) + + fiber_points = [] + for _, group in dataframe.groupby("time"): + fiber_points.append(group[["xpos", "ypos", "zpos"]].values.flatten()) + + return np.array(fiber_points) + + +def get_combined_trajectory_converter( + fiber_points: list[np.ndarray], + type_names: list[str], + display_data: dict[str, DisplayData], +) -> TrajectoryConverter: + """ + Generate a TrajectoryConverter to visualize simulations from ReaDDy and + Cytosim together. + """ + + total_conditions = len(fiber_points) + total_steps = fiber_points[0].shape[0] + total_subpoints = fiber_points[0].shape[1] + + traj_data = TrajectoryData( + meta_data=MetaData( + box_size=BOX_SIZE, + camera_defaults=CameraData( + position=np.array([75.0, 220.0, 15.0]), + look_at_position=np.array([75.0, 75.0, 0.0]), + fov_degrees=60.0, + ), + trajectory_title="Actin compression in Cytosim and Readdy", + ), + agent_data=AgentData( + times=np.arange(total_steps), + n_agents=total_conditions * np.ones(total_steps), + viz_types=1001 + * np.ones((total_steps, total_conditions)), # fiber viz type = 1001 + unique_ids=np.array(total_steps * [list(range(total_conditions))]), + types=total_steps * [type_names], + positions=np.zeros((total_steps, total_conditions, 3)), + radii=np.ones((total_steps, total_conditions)), + n_subpoints=total_subpoints * np.ones((total_steps, total_conditions)), + subpoints=np.moveaxis(np.array(fiber_points), [0, 1], [1, 0]), + display_data=display_data, + ), + time_units=UnitData("count"), # frames + spatial_units=UnitData("nm"), # nanometer + ) + return TrajectoryConverter(traj_data) + + +def _add_combined_plots( + converter: TrajectoryConverter, + metrics: list[CompressionMetric], + metrics_data: pd.DataFrame, + n_timepoints: int, + plot_names: list[tuple[str, str, int]], + type_names: list[str], +) -> None: + """Add plots for combined trajectories with calculated metrics.""" + scatter_plots = make_empty_scatter_plots(metrics, total_steps=n_timepoints) + + for metric, plot in scatter_plots.items(): + for plot_name, type_name in zip(plot_names, type_names): + simulator, key, seed = plot_name + simulator_data = metrics_data[simulator] + data = simulator_data[ + (simulator_data["key"] == key) & (simulator_data["seed"] == seed) + ] + plot.ytraces[type_name] = np.array(data[metric.value]) + converter.add_plot(plot, "scatter") + + +def visualize_combined_trajectories( + buckets: dict[str, str], + series_names: dict[str, str], + condition_keys: list[str], + replicates: list[int], + n_timepoints: int, + simulator_colors: dict[str, str], + temp_path: str, + metrics: Optional[list[CompressionMetric]] = None, + recalculate: bool = False, +) -> None: + """ + Visualize combined simulations from ReaDDy and Cytosim for select conditions + and number of replicates. + + Parameters + ---------- + buckets + Names of S3 buckets for input and output files for each simulator and + visualization. + series_names + Names of simulation series for each simulator. + condition_keys + List of condition keys. + replicates + Simulation replicates ids. + n_timepoints + Number of equally spaced timepoints to visualize. + simulator_colors + Map of simulator name to color. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + recalculate + True to recalculate visualization files, False otherwise. + """ + + fiber_points = [] + type_names = [] + plot_names = [] + display_data = {} + all_metrics_data = {} + + for simulator, color in simulator_colors.items(): + bucket = buckets[simulator] + series_name = series_names[simulator] + + # Load calculated compression metric data. + if metrics is not None: + all_metrics_data[simulator] = get_compression_metric_data( + bucket, + series_name, + condition_keys, + replicates, + metrics, + recalculate=recalculate, + ) + else: + metrics = [] + all_metrics_data[simulator] = pd.DataFrame(columns=["key", "seed"]) + + for condition_key in condition_keys: + series_key = ( + f"{series_name}_{condition_key}" if condition_key else series_name + ) + + for replicate in replicates: + dataframe_key = ( + f"{series_name}/samples/{series_key}_{replicate:06d}.csv" + ) + + # Skip if input dataframe does not exist. + if not check_key(bucket, dataframe_key): + print( + f"Dataframe not available for {simulator} " + f"[ { dataframe_key } ]. Skipping." + ) + continue + + print( + f"Loading data for [ {simulator} ] " + f"condition [ { dataframe_key } ] " + f"replicate [ {replicate} ]" + ) + + dataframe = load_dataframe(bucket, dataframe_key) + fiber_points.append( + _load_fiber_points_from_dataframe(dataframe, n_timepoints) + ) + + condition = int(condition_key) / 10 + condition = round(condition) if condition_key[-1] == "0" else condition + + type_names.append(f"{simulator}#{condition} um/s {replicate}") + plot_names.append((simulator, condition_key, replicate)) + display_data[type_names[-1]] = DisplayData( + name=type_names[-1], + display_type=DISPLAY_TYPE.FIBER, + color=color, + ) + + converter = get_combined_trajectory_converter( + fiber_points, type_names, display_data + ) + + if metrics: + _add_combined_plots( + converter, metrics, all_metrics_data, n_timepoints, plot_names, type_names + ) + + output_key = "actin_compression_cytosim_readdy.simularium" + local_file_path = os.path.join(temp_path, output_key) + converter.save(output_path=local_file_path.replace(".simularium", "")) + output_bucket = buckets["combined"] + save_buffer(output_bucket, output_key, load_buffer(temp_path, output_key)) diff --git a/subcell_pipeline/visualization/create_simularium_outputs.ipynb b/subcell_pipeline/visualization/create_simularium_outputs.ipynb deleted file mode 100644 index 898574c..0000000 --- a/subcell_pipeline/visualization/create_simularium_outputs.ipynb +++ /dev/null @@ -1,344 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Generate Simularium Outputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import boto3\n", - "import pandas as pd\n", - "import numpy as np\n", - "from subcell_analysis.cytosim.post_process_cytosim import cytosim_to_simularium\n", - "from subcell_analysis.compression_analysis import COMPRESSIONMETRIC" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from simulariumio.cytosim import CytosimConverter\n", - "from simulariumio import ScatterPlotData, TrajectoryConverter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "num_repeats = 5\n", - "config_id = 4" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Download files (only needs to be done once)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "s3_client = boto3.client(\"s3\")\n", - "for repeat in range(num_repeats):\n", - " s3_client.download_file(\"cytosim-working-bucket\", f\"vary_compress_rate0006/outputs/{repeat}/fiber_segment_curvature.txt\", f\"data/fiber_segment_curvature_{repeat}.txt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process single repeat" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "repeat = 0\n", - "input_file_path = f\"data/fiber_segment_curvature_{repeat}.txt\"\n", - "\n", - "box_size = 3.0\n", - "scale_factor = 100\n", - "fiber_data = cytosim_to_simularium(input_file_path, box_size=box_size, scale_factor=scale_factor)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create cytosim converter object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cytosim_converter = CytosimConverter(fiber_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Read metric data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_path = f\"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv\"\n", - "df = pd.read_csv(df_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Add metric plots" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_metrics = [COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, COMPRESSIONMETRIC.SUM_BENDING_ENERGY, COMPRESSIONMETRIC.PEAK_ASYMMETRY, COMPRESSIONMETRIC.NON_COPLANARITY]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for metric in plot_metrics:\n", - " metric_by_time = df.groupby([\"time\"])[metric.value].mean()\n", - " cytosim_converter.add_plot(\n", - " ScatterPlotData(\n", - " title=f\"{metric} over time\",\n", - " xaxis_title=\"Time\",\n", - " yaxis_title=metric.value,\n", - " xtrace=np.arange(len(metric_by_time))*1E-5,\n", - " ytraces={\n", - " f\"repeat {repeat}\": metric_by_time,\n", - " },\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save converted data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cytosim_converter.save(f\"outputs/vary_compress_rate_0006_repeat_{repeat}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process multiple repeats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "box_size = 3.0\n", - "scale_factor = 100\n", - "colors = [\"#F0F0F0\", \"#0000FF\", \"#FF0000\", \"#00FF00\", \"#FF00FF\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create initial trajectory data object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_file_path = f\"data/fiber_segment_curvature_0.txt\"\n", - "fiber_data = cytosim_to_simularium(input_file_path, box_size=box_size, scale_factor=scale_factor, color=colors[0], actin_number=0)\n", - "cytosim_converter = CytosimConverter(fiber_data)\n", - "\n", - "trajectory_data = cytosim_converter._data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Append additional repeats to trajectory data object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for repeat in range(1, num_repeats):\n", - " input_file_path = f\"data/fiber_segment_curvature_{repeat}.txt\"\n", - " fiber_data = cytosim_to_simularium(input_file_path, box_size=box_size, scale_factor=scale_factor, color=colors[repeat], actin_number=repeat)\n", - " cytosim_converter = CytosimConverter(fiber_data)\n", - " new_agent_data = cytosim_converter._data.agent_data\n", - "\n", - " trajectory_data.append_agents(new_agent_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_repeats_converter = TrajectoryConverter(trajectory_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Add plots for all repeats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_metrics = [COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, COMPRESSIONMETRIC.SUM_BENDING_ENERGY, COMPRESSIONMETRIC.PEAK_ASYMMETRY, COMPRESSIONMETRIC.NON_COPLANARITY]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Get metrics for all repeats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_list = []\n", - "for repeat in range(num_repeats):\n", - " df_path = f\"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv\"\n", - " df = pd.read_csv(df_path) \n", - " df[\"repeat\"] = repeat\n", - " df_list.append(df)\n", - "df_all = pd.concat(df_list)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Add plots to converter object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for metric in plot_metrics:\n", - " ytraces = {}\n", - " for repeat, df_repeat in df_all.groupby(\"repeat\"):\n", - " ytraces[f\"repeat {repeat}\"] = df_repeat.groupby([\"time\"])[metric.value].mean()\n", - "\n", - " all_repeats_converter.add_plot(\n", - " ScatterPlotData(\n", - " title=f\"{metric.value} over time\",\n", - " xaxis_title=\"Time\",\n", - " yaxis_title=metric.value,\n", - " xtrace=np.arange(metric_by_time.shape[0])*1E-5,\n", - " ytraces=ytraces,\n", - " render_mode=\"lines\",\n", - " )\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save converted data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_repeats_converter.save(f\"outputs/vary_compress_rate_0006_all_repeats\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "subcell_analysis", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.10" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/subcell_pipeline/visualization/create_simularium_outputs.py b/subcell_pipeline/visualization/create_simularium_outputs.py deleted file mode 100644 index 81b624d..0000000 --- a/subcell_pipeline/visualization/create_simularium_outputs.py +++ /dev/null @@ -1,580 +0,0 @@ -import argparse -import math -import os -import sys -from typing import Dict, Tuple - -import boto3 -import numpy as np -import pandas as pd -from botocore.exceptions import ClientError -from pint import UnitRegistry -from scipy.spatial.transform import Rotation -from simulariumio import ( - DISPLAY_TYPE, - AgentData, - CameraData, - DisplayData, - FileConverter, - InputFileData, - MetaData, - ScatterPlotData, - TrajectoryConverter, - TrajectoryData, - UnitData, -) -from simulariumio.filters import EveryNthTimestepFilter -from subcell_analysis.compression_analysis import ( - COMPRESSIONMETRIC, - get_asymmetry_of_peak, - get_average_distance_from_end_to_end_axis, - get_bending_energy_from_trace, - get_contour_length_from_trace, - get_third_component_variance, -) -from subcell_analysis.compression_workflow_runner import compression_metrics_workflow -from subcell_analysis.cytosim.post_process_cytosim import cytosim_to_simularium - -CYTOSIM_CONDITIONS = { - "0001": 0.48, - "0002": 1.5, - "0003": 4.7, - "0004": 15, - "0005": 47, - "0006": 150, -} -READDY_CONDITIONS = [ - 4.7, - 15, - 47, - 150, -] -NUM_REPEATS = 5 -TOTAL_STEPS = 200 -POINTS_PER_FIBER = 200 -BENDING_ENERGY_SCALE_FACTOR = 1000.0 -CYTOSIM_SCALE_FACTOR = 1000.0 -BOX_SIZE = 600.0 - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Visualizes ReaDDy and Cytosim actin simulations" - ) - parser.add_argument("--combined", action=argparse.BooleanOptionalAction) - parser.set_defaults(combined=False) - parser.add_argument("--cytosim", action=argparse.BooleanOptionalAction) - parser.set_defaults(cytosim=False) - parser.add_argument("--upload", action=argparse.BooleanOptionalAction) - parser.set_defaults(upload=False) - return parser.parse_args() - - -s3_client = boto3.client("s3") - - -def download_s3_file(bucket_name, s3_path, dest_path) -> bool: - """ - Download files (skip files that already exist) - """ - if os.path.isfile(dest_path): - # already downloaded - return False - try: - s3_client.download_file( - bucket_name, - s3_path, - dest_path, - ) - print(f"Downloaded {dest_path}") - return True - except ClientError: - print(f"!!! Failed to download {s3_path}") - return False - - -def upload_file_to_s3(bucket_name, src_path, s3_path) -> bool: - """ - Upload a file to an S3 bucket - """ - if not os.path.isfile(src_path): - print(f"!!! File does not exist to upload {src_path}") - return False - try: - s3_client.upload_file(src_path, bucket_name, s3_path) - print(f"Uploaded to {s3_path}") - return True - except ClientError: - print(f"!!! Failed to upload {src_path}") - return False - - -def make_download_dirs(): - if not os.path.isdir("data"): - os.makedirs("data") - if not os.path.isdir("data/aws_downloads"): - os.makedirs("data/aws_downloads") - - -def download_combined_csv_data(): - make_download_dirs() - # combined csv is in ReaDDy bucket for now - download_s3_file( - bucket_name="readdy-working-bucket", - s3_path=f"outputs/{COMBINED_CSV_PATH}", - dest_path=f"data/aws_downloads/{COMBINED_CSV_PATH}", - ) - - -def download_cytosim_trajectory_data(): - make_download_dirs() - for condition in CYTOSIM_CONDITIONS.keys(): - for repeat_ix in range(NUM_REPEATS): - download_s3_file( - bucket_name="cytosim-working-bucket", - s3_path=f"vary_compress_rate{condition}/outputs/{repeat_ix}/fiber_points.txt", - dest_path=f"data/aws_downloads/fiber_points_{condition}_{repeat_ix}.txt", - ) - download_s3_file( - bucket_name="cytosim-working-bucket", - s3_path=f"vary_compress_rate{condition}/outputs/{repeat_ix}/singles.txt", - dest_path=f"data/aws_downloads/singles_{condition}_{repeat_ix}.txt", - ) - # baseline trajectories - for repeat_ix in range(NUM_REPEATS): - download_s3_file( - bucket_name="cytosim-working-bucket", - s3_path=f"free_barbed_end_final/outputs/{repeat_ix}/fiber_points.txt", - dest_path=f"data/aws_downloads/fiber_points_baseline_{repeat_ix}.txt", - ) - download_s3_file( - bucket_name="cytosim-working-bucket", - s3_path=f"free_barbed_end_final/outputs/{repeat_ix}/singles.txt", - dest_path=f"data/aws_downloads/singles_baseline_{repeat_ix}.txt", - ) - - -def empty_scatter_plots( - total_steps: int = -1, - times: np.ndarray = None, - time_units: str = None, -) -> Dict[COMPRESSIONMETRIC, ScatterPlotData]: - if total_steps < 0 and times is None: - raise Exception("Either total_steps or times array is required for plots") - elif times is None: - # use normalized time - xlabel = "T (normalized)" - xtrace = (1 / float(total_steps)) * np.arange(total_steps) - else: - # use actual time - xlabel = f"T ({time_units})" - xtrace = times - total_steps = times.shape[0] - return { - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE: ScatterPlotData( - title="Average Perpendicular Distance", - xaxis_title=xlabel, - yaxis_title="distance (nm)", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 85.0 * np.ones(total_steps), - }, - render_mode="lines", - ), - COMPRESSIONMETRIC.CALC_BENDING_ENERGY: ScatterPlotData( - title="Bending Energy", - xaxis_title=xlabel, - yaxis_title="energy", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 10.0 * np.ones(total_steps), - }, - render_mode="lines", - ), - COMPRESSIONMETRIC.NON_COPLANARITY: ScatterPlotData( - title="Non-coplanarity", - xaxis_title=xlabel, - yaxis_title="3rd component variance from PCA", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 0.03 * np.ones(total_steps), - }, - render_mode="lines", - ), - COMPRESSIONMETRIC.PEAK_ASYMMETRY: ScatterPlotData( - title="Peak Asymmetry", - xaxis_title=xlabel, - yaxis_title="normalized peak distance", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 0.5 * np.ones(total_steps), - }, - render_mode="lines", - ), - COMPRESSIONMETRIC.CONTOUR_LENGTH: ScatterPlotData( - title="Contour Length", - xaxis_title=xlabel, - yaxis_title="filament contour length (nm)", - xtrace=xtrace, - ytraces={ - "<<<": 480 * np.ones(total_steps), - ">>>": 505 * np.ones(total_steps), - }, - render_mode="lines", - ), - } - - -def rmsd(vec1: np.ndarray, vec2: np.ndarray) -> np.ndarray: - return np.sqrt(((((vec1 - vec2) ** 2)) * 3).mean()) - - -def align(fibers: np.ndarray) -> np.ndarray: - """ - Rotationally align the given fibers around the x-axis. - - Parameters - ---------- - fiber_points: np.ndarray (shape = time x fiber x (3 * points_per_fiber)) - Array containing the flattened x,y,z positions of control points - for each fiber at each time. - - Returns - ---------- - aligned_data: np.ndarray - The given data aligned. - """ - # get angle to align each fiber at the last time point - align_by = [] - points_per_fiber = int(fibers.shape[2] / 3) - ref = fibers[-1][0].copy().reshape((points_per_fiber, 3)) - for fiber_ix in range(len(fibers[-1])): - best_rmsd = math.inf - for angle in np.linspace(0, 2 * np.pi, 1000): - rot = Rotation.from_rotvec(angle * np.array([1, 0, 0])) - new_vec = Rotation.apply( - rot, fibers[-1][fiber_ix].copy().reshape((points_per_fiber, 3)) - ) - test_rmsd = rmsd(new_vec, ref) - if test_rmsd < best_rmsd: - best_angle = angle - best_rmsd = test_rmsd - align_by.append(best_angle) - # align all the fibers to ref across all time points - aligned = np.zeros_like(fibers) - for fiber_ix in range(fibers.shape[1]): - rot = Rotation.from_rotvec(align_by[fiber_ix] * np.array([1, 0, 0])) - for time_ix in range(fibers.shape[0]): - fiber = fibers[time_ix][fiber_ix].copy().reshape((points_per_fiber, 3)) - new_fiber = Rotation.apply(rot, fiber) - aligned[time_ix][fiber_ix] = new_fiber.flatten() - return aligned - - -def save_combined_simularium(): - df = pd.read_csv(f"data/aws_downloads/{COMBINED_CSV_PATH}") - simulators = ["cytosim", "readdy"] - colors = { - "cytosim": [ - "#4DFE8A", - "#c1fe4d", - "#fee34d", - "#fe8b4d", - ], - "readdy": [ - "#94dbfc", - "#627EFB", - "#b594fc", - "#e994fc", - ], - } - total_conditions = NUM_REPEATS * len(simulators) * len(CYTOSIM_CONDITIONS.keys()) - subpoints = np.zeros((TOTAL_STEPS, total_conditions, 3 * POINTS_PER_FIBER)) - type_names = [] - display_data = {} - scatter_plots = empty_scatter_plots(total_steps=TOTAL_STEPS) - # these metrics need to be multiplied by 1000 in cytosim because of different units - cytosim_metrics_to_scale = [ - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, - COMPRESSIONMETRIC.CONTOUR_LENGTH, - ] - for sim_ix, simulator in enumerate(simulators): - sim_df = df.loc[df["simulator"] == simulator] - sim_df.sort_values( - by=["repeat", "simulator", "velocity", "time", "monomer_ids"] - ) - for condition_ix, condition in enumerate(READDY_CONDITIONS): - condition_df = sim_df.loc[sim_df["velocity"] == condition] - for repeat_ix in range(NUM_REPEATS): - rep_df = condition_df.loc[condition_df["repeat"] == repeat_ix] - for time_ix in range(TOTAL_STEPS): - ix = ( - (sim_ix * len(READDY_CONDITIONS) * NUM_REPEATS) - + (condition_ix * NUM_REPEATS) - + repeat_ix - ) - subpoints[time_ix][ix] = ( - CYTOSIM_SCALE_FACTOR if simulator == "cytosim" else 1 - ) * np.array( - rep_df[time_ix * TOTAL_STEPS : (time_ix + 1) * TOTAL_STEPS][ - ["xpos", "ypos", "zpos"] - ] - ).flatten() - type_names.append(f"{simulator}#{condition} um/s {repeat_ix}") - display_data[type_names[-1]] = DisplayData( - name=type_names[-1], - display_type=DISPLAY_TYPE.FIBER, - color=colors[simulator][condition_ix], - ) - metrics_df = compression_metrics_workflow( - rep_df.copy(), list(scatter_plots.keys()) - ) - metrics_df = metrics_df[metrics_df["monomer_ids"] == 0] - for metric in scatter_plots: - scale_factor = ( - CYTOSIM_SCALE_FACTOR - if ( - ( - simulator == "cytosim" - and metric in cytosim_metrics_to_scale - ) - or metric == COMPRESSIONMETRIC.CALC_BENDING_ENERGY - ) - else 1.0 - ) - scatter_plots[metric].ytraces[type_names[-1]] = ( - scale_factor * np.array(metrics_df[metric.value]) - ) - traj_data = TrajectoryData( - meta_data=MetaData( - box_size=np.array([BOX_SIZE, BOX_SIZE, BOX_SIZE]), - camera_defaults=CameraData( - position=np.array([10.0, 0.0, 200.0]), - look_at_position=np.array([10.0, 0.0, 0.0]), - fov_degrees=60.0, - ), - trajectory_title="Actin compression in Cytosim and Readdy", - ), - agent_data=AgentData( - times=np.arange(TOTAL_STEPS), - n_agents=total_conditions * np.ones((TOTAL_STEPS)), - viz_types=1001 - * np.ones((TOTAL_STEPS, total_conditions)), # fiber viz type = 1001 - unique_ids=np.array(TOTAL_STEPS * [list(range(total_conditions))]), - types=TOTAL_STEPS * [type_names], - positions=np.zeros((TOTAL_STEPS, total_conditions, 3)), - radii=np.ones((TOTAL_STEPS, total_conditions)), - n_subpoints=3 * POINTS_PER_FIBER * np.ones((TOTAL_STEPS, total_conditions)), - subpoints=align(subpoints), - display_data=display_data, - ), - time_units=UnitData("count"), # frames - spatial_units=UnitData("nm"), # nanometer - ) - converter = TrajectoryConverter(traj_data) - for metric, plot in scatter_plots.items(): - converter.add_plot(plot, "scatter") - converter.save(f"data/actin_compression") - - -def time_increment(raw_total_steps): - """ - Find a time increment to get the total steps close to 1000 - """ - if raw_total_steps < 2000: - return 1 - magnitude = math.floor(math.log(raw_total_steps, 10)) - amount = raw_total_steps / 10**magnitude - if amount > 5: - return 5 * 10 ** (magnitude - 3) - return 10 ** (magnitude - 3) - - -ureg = UnitRegistry() - - -def find_time_units(raw_time: float, units: str = "s") -> Tuple[str, float]: - """ - Get the compact time units and a multiplier to put the times in those units - """ - time = ureg.Quantity(raw_time, units) - time = time.to_compact() - return "{:~}".format(time.units), time.magnitude / raw_time - - -def generate_plot_data(subpoints): - n_points = int(subpoints.shape[2] / 3.0) - result = { - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE: [], - COMPRESSIONMETRIC.CALC_BENDING_ENERGY: [], - COMPRESSIONMETRIC.NON_COPLANARITY: [], - COMPRESSIONMETRIC.PEAK_ASYMMETRY: [], - COMPRESSIONMETRIC.CONTOUR_LENGTH: [], - } - total_steps = subpoints.shape[0] - for time_ix in range(total_steps): - points = subpoints[time_ix][0].reshape((n_points, 3)) - result[COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE].append( - get_average_distance_from_end_to_end_axis( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.CALC_BENDING_ENERGY].append( - BENDING_ENERGY_SCALE_FACTOR - * get_bending_energy_from_trace( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.NON_COPLANARITY].append( - get_third_component_variance( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.PEAK_ASYMMETRY].append( - get_asymmetry_of_peak( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.CONTOUR_LENGTH].append( - get_contour_length_from_trace( - polymer_trace=points, - ) - ) - return result - - -def filter_time(converter) -> TrajectoryConverter: - """ - Use Simulariumio time filter - """ - time_inc = int(converter._data.agent_data.times.shape[0] / 1000.0) - if time_inc < 2: - return converter - converter._data = converter.filter_data( - [ - EveryNthTimestepFilter( - n=time_inc, - ), - ] - ) - return converter - - -def generate_cytosim_simularium(condition, repeat_ix) -> Tuple[TrajectoryData, str]: - is_baseline = condition == "baseline" - velocity = CYTOSIM_CONDITIONS[condition] if not is_baseline else 0.0 - condition_name = f"velocity={velocity}" if not is_baseline else condition - fiber_points_path = f"data/aws_downloads/fiber_points_{condition}_{repeat_ix}.txt" - singles_path = f"data/aws_downloads/singles_{condition}_{repeat_ix}.txt" - output_path = f"data/cytosim_outputs/actin_compression_{condition_name}_{repeat_ix}" - if os.path.isfile(f"{output_path}.simularium"): - print(f"Skipping v={velocity} #{repeat_ix}, output file already exists") - return None, "" - if not os.path.isfile(fiber_points_path): - raise Exception(f"fiber_points_{condition}_{repeat_ix}.txt not found") - if not os.path.isfile(singles_path): - singles_path = None - print(f"Converting Cytosim {condition_name} #{repeat_ix}") - short_condition_name = f"v={velocity}" if not is_baseline else condition - traj_data = cytosim_to_simularium( - title=f"Actin Compression {short_condition_name} {repeat_ix}", - fiber_points_path=fiber_points_path, - singles_path=singles_path, - scale_factor=CYTOSIM_SCALE_FACTOR, - ) - converter = filter_time(TrajectoryConverter(traj_data)) - time_units, time_multiplier = find_time_units(converter._data.agent_data.times[-1]) - converter._data.agent_data.times *= time_multiplier - converter._data.time_units = UnitData(time_units) - # plots - plot_data = generate_plot_data(converter._data.agent_data.subpoints) - scatter_plots = empty_scatter_plots( - times=converter._data.agent_data.times, - time_units=time_units, - ) - for metric, plot in scatter_plots.items(): - plot.ytraces["filament"] = np.array(plot_data[metric]) - - try: - converter.add_plot(plot, "scatter") - except: - import ipdb - - ipdb.set_trace() - - return converter._data, f"{condition_name}_{repeat_ix}" - - -def load_all_cytosim_simularium(baseline: bool = True) -> Dict[str, TrajectoryData]: - result = {} - for condition in CYTOSIM_CONDITIONS.keys(): - for repeat_ix in range(NUM_REPEATS): - traj_data, condition_name = generate_cytosim_simularium( - condition, repeat_ix - ) - if traj_data is not None: - result[condition_name] = traj_data - if not baseline: - return result - for repeat_ix in range(NUM_REPEATS): - traj_data, condition_name = generate_cytosim_simularium("baseline", repeat_ix) - if traj_data is not None: - result[condition_name] = traj_data - return result - - -def save_cytosim_trajectories(cytosim_traj_data: Dict[str, TrajectoryData]): - if not os.path.isdir("data/cytosim_outputs"): - os.makedirs("data/cytosim_outputs") - for condition_name, traj_data in cytosim_traj_data.items(): - TrajectoryConverter(traj_data).save( - f"data/cytosim_outputs/actin_compression_{condition_name}" - ) - - -def upload_cytosim_trajectories(): - for condition in CYTOSIM_CONDITIONS.keys(): - velocity = CYTOSIM_CONDITIONS[condition] - for repeat in range(NUM_REPEATS): - upload_file_to_s3( - bucket_name="cytosim-working-bucket", - src_path=f"data/cytosim_outputs/actin_compression_velocity={velocity}_{repeat}.simularium", - s3_path=f"simularium/actin_compression_velocity={velocity}_{repeat}.simularium", - ) - for repeat in range(NUM_REPEATS): - upload_file_to_s3( - bucket_name="cytosim-working-bucket", - src_path=f"data/cytosim_outputs/actin_compression_baseline_{repeat}.simularium", - s3_path=f"simularium/actin_compression_baseline_{repeat}.simularium", - ) - - -def main(): - args = parse_args() - if not (args.combined or args.cytosim): - print("Please specify either --combined or --cytosim arguments") - if args.combined: - # save one simularium file with all cytosim and readdy trajectories - download_combined_csv_data() - save_combined_simularium() - if args.upload: - upload_file_to_s3( - bucket_name="readdy-working-bucket", - src_path=f"data/actin_compression.simularium", - s3_path=f"outputs/actin_compression_cytosim_readdy.simularium", - ) - elif args.cytosim: - # save an individual simularium file for each cytosim trajectory - download_cytosim_trajectory_data() - cytosim_traj_data = load_all_cytosim_simularium() - save_cytosim_trajectories(cytosim_traj_data) - if args.upload: - upload_cytosim_trajectories() - - -if __name__ == "__main__": - main() diff --git a/subcell_pipeline/visualization/create_simularium_outputs_fix_readdy.py b/subcell_pipeline/visualization/create_simularium_outputs_fix_readdy.py deleted file mode 100644 index d41d9f8..0000000 --- a/subcell_pipeline/visualization/create_simularium_outputs_fix_readdy.py +++ /dev/null @@ -1,250 +0,0 @@ -import argparse -import math -import os -import sys -from typing import Dict, Tuple - -import boto3 -import numpy as np -import pandas as pd -from botocore.exceptions import ClientError -from pint import UnitRegistry -from scipy.spatial.transform import Rotation -from simulariumio import ( - DISPLAY_TYPE, - AgentData, - CameraData, - DisplayData, - FileConverter, - InputFileData, - MetaData, - ScatterPlotData, - TrajectoryConverter, - TrajectoryData, - UnitData, -) -from simulariumio.filters import EveryNthTimestepFilter -from subcell_analysis.compression_analysis import ( - COMPRESSIONMETRIC, - get_asymmetry_of_peak, - get_average_distance_from_end_to_end_axis, - get_bending_energy_from_trace, - get_contour_length_from_trace, - get_third_component_variance, -) -from subcell_analysis.compression_workflow_runner import compression_metrics_workflow -from subcell_analysis.cytosim.post_process_cytosim import cytosim_to_simularium - -CYTOSIM_CONDITIONS = { - "0001": 0.48, - "0002": 1.5, - "0003": 4.7, - "0004": 15, - "0005": 47, - "0006": 150, -} -READDY_CONDITIONS = [ - 4.7, - 15, - 47, - 150, -] -NUM_REPEATS = 5 -TOTAL_STEPS = 200 -POINTS_PER_FIBER = 200 -BENDING_ENERGY_SCALE_FACTOR = 1000.0 -CYTOSIM_SCALE_FACTOR = 1000.0 -BOX_SIZE = 600.0 - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Visualizes ReaDDy and Cytosim actin simulations" - ) - parser.add_argument("--combined", action=argparse.BooleanOptionalAction) - parser.set_defaults(combined=False) - parser.add_argument("--cytosim", action=argparse.BooleanOptionalAction) - parser.set_defaults(cytosim=False) - parser.add_argument("--upload", action=argparse.BooleanOptionalAction) - parser.set_defaults(upload=False) - return parser.parse_args() - - -s3_client = boto3.client("s3") -for repeat in range(num_repeats): - s3_client.download_file( - "cytosim-working-bucket", - f"vary_compress_rate0003/outputs/{repeat}/fiber_segment_curvature.txt", - f"../data/fiber_segment_curvature_{repeat}.txt", - ) - -# %% [markdown] -# ### Process single repeat - -# %% -repeat = 0 -input_file_path = f"../data/fiber_segment_curvature_{repeat}.txt" - -box_size = 3.0 -scale_factor = 100 -fiber_data = cytosim_to_simularium( - input_file_path, box_size=box_size, scale_factor=scale_factor -) - -# %% [markdown] -# Create cytosim converter object - -# %% -cytosim_converter = CytosimConverter(fiber_data) - -# %% [markdown] -# Read metric data - -# # %% -# df_path = f"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv" -# df = pd.read_csv(df_path) - -# %% [markdown] -# Add metric plots - -# %% -plot_metrics = [ - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, - COMPRESSIONMETRIC.SUM_BENDING_ENERGY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.NON_COPLANARITY, -] - -# # %% -# for metric in plot_metrics: -# metric_by_time = df.groupby(["time"])[metric.value].mean() -# cytosim_converter.add_plot( -# ScatterPlotData( -# title=f"{metric} over time", -# xaxis_title="Time", -# yaxis_title=metric.value, -# xtrace=np.arange(len(metric_by_time)) * 1e-5, -# ytraces={ -# f"repeat {repeat}": metric_by_time, -# }, -# ) -# ) - -# %% [markdown] -# Save converted data - -# %% -cytosim_converter.save(f"outputs/free_barbed_end_final{repeat}") - -# %% [markdown] -# ### Process multiple repeats - -# %% -box_size = 3.0 -scale_factor = 100 -colors = ["#F0F0F0", "#0000FF", "#FF0000", "#00FF00", "#FF00FF"] - -# %% [markdown] -# Create initial trajectory data object - -# # %% -# input_file_path = f"data/fiber_segment_curvature_0.txt" -# fiber_data = cytosim_to_simularium( -# input_file_path, -# box_size=box_size, -# scale_factor=scale_factor, -# color=colors[0], -# actin_number=0, -# ) -# cytosim_converter = CytosimConverter(fiber_data) - -# trajectory_data = cytosim_converter._data - -# %% [markdown] -# Append additional repeats to trajectory data object - -# # %% -# for repeat in range(1, num_repeats): -# input_file_path = f"data/fiber_segment_curvature_{repeat}.txt" -# fiber_data = cytosim_to_simularium( -# input_file_path, -# box_size=box_size, -# scale_factor=scale_factor, -# color=colors[repeat], -# actin_number=repeat, -# ) -# cytosim_converter = CytosimConverter(fiber_data) -# new_agent_data = cytosim_converter._data.agent_data - -# trajectory_data.append_agents(new_agent_data) - -# # %% -# all_repeats_converter = TrajectoryConverter(trajectory_data) - -# %% [markdown] -# ### Add plots for all repeats - -# %% -plot_metrics = [ - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, - COMPRESSIONMETRIC.SUM_BENDING_ENERGY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.NON_COPLANARITY, -] - -# %% [markdown] -# Get metrics for all repeats - -# # %% -# df_list = [] -# for repeat in range(num_repeats): -# df_path = f"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv" -# df = pd.read_csv(df_path) -# df["repeat"] = repeat -# df_list.append(df) -# df_all = pd.concat(df_list) - -# %% [markdown] -# Add plots to converter object - -# # %% -# for metric in plot_metrics: -# ytraces = {} -# for repeat, df_repeat in df_all.groupby("repeat"): -# ytraces[f"repeat {repeat}"] = df_repeat.groupby(["time"])[metric.value].mean() - -# all_repeats_converter.add_plot( -# ScatterPlotData( -# title=f"{metric.value} over time", -# xaxis_title="Time", -# yaxis_title=metric.value, -# xtrace=np.arange(metric_by_time.shape[0]) * 1e-5, -# ytraces=ytraces, -# render_mode="lines", -# ) -# ) - - -def upload_cytosim_trajectories(): - for condition in CYTOSIM_CONDITIONS.keys(): - velocity = CYTOSIM_CONDITIONS[condition] - for repeat in range(NUM_REPEATS): - upload_file_to_s3( - bucket_name="cytosim-working-bucket", - src_path=f"data/cytosim_outputs/actin_compression_velocity={velocity}_{repeat}.simularium", - s3_path=f"simularium/actin_compression_velocity={velocity}_{repeat}.simularium", - ) - for repeat in range(NUM_REPEATS): - upload_file_to_s3( - bucket_name="cytosim-working-bucket", - src_path=f"data/cytosim_outputs/actin_compression_baseline_{repeat}.simularium", - s3_path=f"simularium/actin_compression_baseline_{repeat}.simularium", - ) - - -# %% -cytosim_converter.save(f"outputs/vary_compress_rate_0003_all_repeats") - -# %% diff --git a/subcell_pipeline/visualization/dimensionality_reduction.py b/subcell_pipeline/visualization/dimensionality_reduction.py new file mode 100644 index 0000000..797d180 --- /dev/null +++ b/subcell_pipeline/visualization/dimensionality_reduction.py @@ -0,0 +1,269 @@ +import os +from typing import Tuple + +import matplotlib.pyplot as plt +import numpy as np +from io_collection.load.load_buffer import load_buffer +from io_collection.load.load_dataframe import load_dataframe +from io_collection.load.load_pickle import load_pickle +from io_collection.save.save_buffer import save_buffer +from matplotlib.colors import Colormap +from simulariumio import DISPLAY_TYPE, CameraData, DisplayData, MetaData, UnitData +from sklearn.decomposition import PCA + +from subcell_pipeline.visualization.fiber_points import ( + generate_trajectory_converter_for_fiber_points, +) + +BOX_SIZE: np.ndarray = np.array(3 * [600.0]) +"""Bounding box size for dimensionality reduction trajectory.""" + + +def rgb_to_hex_color(color: tuple[float, float, float]) -> str: + rgb = (int(255 * color[0]), int(255 * color[1]), int(255 * color[2])) + return f"#{rgb[0]:02X}{rgb[1]:02X}{rgb[2]:02X}" + + +def pca_fiber_points_over_time( + samples: list[np.ndarray], + pca: PCA, + pc_ix: int, + simulator_name: str = "Combined", + color: str = "#eaeaea", +) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: + """ + Get fiber_points for samples of the PC distributions + in order to visualize the samples over time. + """ + if simulator_name == "Combined": + simulator_name = "" + if simulator_name: + simulator_name += "#" + fiber_points: list[np.ndarray] = [] + display_data: dict[str, DisplayData] = {} + for sample_ix in range(len(samples[0])): + if pc_ix < 1: + data = [samples[0][sample_ix], 0] + else: + data = [0, samples[1][sample_ix]] + fiber_points.append(pca.inverse_transform(data).reshape(-1, 3)) + fiber_points_arr: np.ndarray = np.array(fiber_points) + type_name: str = f"{simulator_name}PC{pc_ix + 1}" + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + color=color, + ) + return [fiber_points_arr], [type_name], display_data + + +def pca_fiber_points_one_timestep( + samples: list[np.ndarray], + pca: PCA, + color_maps: dict[str, Colormap], + pc_ix: int, + simulator_name: str = "Combined", +) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: + """ + Get fiber_points for samples of the PC distributions + in order to visualize the samples together in one timestep. + """ + color_map = color_maps[simulator_name] + if simulator_name == "Combined": + simulator_name = "" + if simulator_name: + simulator_name += "_" + + fiber_points = [] + type_names = [] + display_data = {} + for sample_ix in range(len(samples[0])): + data = [ + [samples[0][sample_ix], 0], + [0, samples[1][sample_ix]], + ] + fiber_points.append(pca.inverse_transform(data[pc_ix]).reshape(1, -1, 3)) + sample = samples[pc_ix][sample_ix] + sample_name = str(round(sample)) + type_name = f"{simulator_name}PC{pc_ix + 1}#{sample_name}" + type_names.append(type_name) + if type_name not in display_data: + color_range = -samples[pc_ix][0] + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + color=rgb_to_hex_color(color_map(abs(sample) / color_range)), + ) + return fiber_points, type_names, display_data + + +def generate_simularium_and_save( + name: str, + fiber_points: list[np.ndarray], + type_names: list[str], + display_data: dict[str, DisplayData], + distribution_over_time: bool, + simulator_detail: bool, + bucket: str, + temp_path: str, + pc: str, +) -> None: + """Generate a Simulariumio object for the fiber points and save it.""" + meta_data = MetaData( + box_size=BOX_SIZE, + camera_defaults=CameraData( + # position=np.array([-20.0, 350.0, 200.0]), + # look_at_position=np.array([50.0, 0.0, 0.0]), + position=np.array([70.0, 70.0, 300.0]), + look_at_position=np.array([70.0, 70.0, 0.0]), + fov_degrees=60.0, + ), + trajectory_title="Actin Compression Dimensionality Reduction", + ) + time_units = UnitData("count") # frames + spatial_units = UnitData("nm") # nanometers + converter = generate_trajectory_converter_for_fiber_points( + fiber_points, + type_names, + meta_data, + display_data, + time_units, + spatial_units, + fiber_radius=1.0, + # fiber_radius=6.0, + ) + + # Save locally and copy to bucket. + output_key = name + output_key += "_time" if distribution_over_time else "" + output_key += "_simulators" if simulator_detail else "" + output_key += f"_pc{pc}" if pc else "" + local_file_path = os.path.join(temp_path, output_key) + converter.save(output_path=local_file_path) + output_key = f"{output_key}.simularium" + save_buffer(bucket, f"{name}/{output_key}", load_buffer(temp_path, output_key)) + + +def visualize_dimensionality_reduction( + bucket: str, + pca_results_key: str, + pca_pickle_key: str, + distribution_over_time: bool, + simulator_detail: bool, + sample_ranges: dict[str, list[list[float]]], + separate_pcs: bool, + sample_resolution: int, + temp_path: str, +) -> None: + """ + Visualize PCA space for actin fibers. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + pca_results_key + File key for PCA results dataframe. + pca_pickle_key + File key for PCA object pickle. + distribution_over_time + True to scroll through the PC distributions over time, False otherwise. + simulator_detail + True to show individual simulator ranges, False otherwise. + sample_ranges + Min and max values to visualize for each PC + (and each simulator if simulator_detail). + separate_pcs + True to Visualize PCs in separate files, False otherwise. + sample_resolution + Number of samples for each PC distribution. + temp_path + Local path for saving visualization output files. + """ + pca_results = load_dataframe(bucket, pca_results_key) + pca = load_pickle(bucket, pca_pickle_key) + + fiber_points: list[list[np.ndarray]] = [[], []] + type_names: list[list[str]] = [[], []] + display_data: list[dict[str, DisplayData]] = [{}, {}] + pca_results_simulators = { + "Combined": pca_results, + } + if simulator_detail: + pca_results_simulators["ReaDDy"] = pca_results.loc[ + pca_results["SIMULATOR"] == "READDY" + ] + pca_results_simulators["Cytosim"] = pca_results.loc[ + pca_results["SIMULATOR"] == "CYTOSIM" + ] + color_maps = { + "Combined": plt.colormaps.get_cmap("RdPu"), + "ReaDDy": plt.colormaps.get_cmap("YlOrRd"), + "Cytosim": plt.colormaps.get_cmap("GnBu"), + } + over_time_colors = { + "Combined": "#ffffff", + "ReaDDy": "#ff8f52", + "Cytosim": "#1cbfaa", + } + dataset_name = os.path.splitext(pca_pickle_key)[0] + pc_ixs = list(range(2)) + for simulator in pca_results_simulators: + samples = [ + np.arange( + sample_ranges[simulator][0][0], + sample_ranges[simulator][0][1], + (sample_ranges[simulator][0][1] - sample_ranges[simulator][0][0]) + / float(sample_resolution), + ), + np.arange( + sample_ranges[simulator][1][0], + sample_ranges[simulator][1][1], + (sample_ranges[simulator][1][1] - sample_ranges[simulator][1][0]) + / float(sample_resolution), + ), + ] + for pc_ix in pc_ixs: + if distribution_over_time: + _fiber_points, _type_names, _display_data = pca_fiber_points_over_time( + samples, pca, pc_ix, simulator, over_time_colors[simulator] + ) + else: + _fiber_points, _type_names, _display_data = ( + pca_fiber_points_one_timestep( + samples, pca, color_maps, pc_ix, simulator + ) + ) + if separate_pcs: + fiber_points[pc_ix] += _fiber_points + type_names[pc_ix] += _type_names + display_data[pc_ix] = {**display_data[pc_ix], **_display_data} + else: + fiber_points[0] += _fiber_points + type_names[0] += _type_names + display_data[0] = {**display_data[0], **_display_data} + if separate_pcs: + for pc_ix in pc_ixs: + generate_simularium_and_save( + dataset_name, + fiber_points[pc_ix], + type_names[pc_ix], + display_data[pc_ix], + distribution_over_time, + simulator_detail, + bucket, + temp_path, + str(pc_ix + 1), + ) + else: + generate_simularium_and_save( + dataset_name, + fiber_points[0], + type_names[0], + display_data[0], + distribution_over_time, + simulator_detail, + bucket, + temp_path, + "", + ) diff --git a/subcell_pipeline/visualization/display_data.py b/subcell_pipeline/visualization/display_data.py new file mode 100644 index 0000000..9c44635 --- /dev/null +++ b/subcell_pipeline/visualization/display_data.py @@ -0,0 +1,110 @@ +from simulariumio import DISPLAY_TYPE, DisplayData + + +def get_readdy_display_data() -> dict[str, DisplayData]: + extra_radius = 1.5 + actin_radius = 2.0 + extra_radius + n_polymer_numbers = 5 + result = {} + for i in range(1, n_polymer_numbers + 1): + result.update( + { + f"actin#{i}": DisplayData( + name="actin", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#mid_{i}": DisplayData( + name="actin#mid", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#fixed_{i}": DisplayData( + name="actin#fixed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#mid_fixed_{i}": DisplayData( + name="actin#mid_fixed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#ATP_{i}": DisplayData( + name="actin#ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#mid_ATP_{i}": DisplayData( + name="actin#mid_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#fixed_ATP_{i}": DisplayData( + name="actin#fixed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#mid_fixed_ATP_{i}": DisplayData( + name="actin#mid_fixed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#barbed_{i}": DisplayData( + name="actin#barbed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#barbed_ATP_{i}": DisplayData( + name="actin#barbed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#fixed_barbed_{i}": DisplayData( + name="actin#fixed_barbed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#fixed_barbed_ATP_{i}": DisplayData( + name="actin#fixed_barbed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#pointed_{i}": DisplayData( + name="actin#pointed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + f"actin#pointed_ATP_{i}": DisplayData( + name="actin#pointed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + f"actin#pointed_fixed_{i}": DisplayData( + name="actin#pointed_fixed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + f"actin#pointed_fixed_ATP_{i}": DisplayData( + name="actin#pointed_fixed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + }, + ) + return result diff --git a/subcell_pipeline/visualization/fiber_points.py b/subcell_pipeline/visualization/fiber_points.py new file mode 100644 index 0000000..8b97b0b --- /dev/null +++ b/subcell_pipeline/visualization/fiber_points.py @@ -0,0 +1,76 @@ +import numpy as np +from simulariumio import ( + AgentData, + DisplayData, + MetaData, + TrajectoryConverter, + TrajectoryData, + UnitData, +) + + +def generate_trajectory_converter_for_fiber_points( + fiber_points: list[np.ndarray], + type_names: list[str], + meta_data: MetaData, + display_data: dict[str, DisplayData], + time_units: UnitData, + spatial_units: UnitData, + fiber_radius: float = 0.5, +) -> TrajectoryConverter: + """ + Generate a TrajectoryConverter for the given fiber points. + + Parameters + ---------- + fiber_points + List of fibers, where each fiber has the shape (timesteps x points x 3). + type_names + List of type names. + meta_data + Simularium metadata object. + display_data + Map of type names to Simularium display data objects. + time_units + Time unit data. + spatial_units + Spatial unit data. + fiber_radius + Radius to render fiber + Default: 0.5 + + Returns + ------- + : + Simularium trajectory converter. + """ + + # build subpoints array with correct dimensions + n_fibers = len(fiber_points) + total_steps = fiber_points[0].shape[0] + n_points = fiber_points[0].shape[1] + subpoints = np.zeros((total_steps, n_fibers, n_points, 3)) + for time_ix in range(total_steps): + for fiber_ix in range(n_fibers): + subpoints[time_ix][fiber_ix] = fiber_points[fiber_ix][time_ix] + subpoints = subpoints.reshape((total_steps, n_fibers, 3 * n_points)) + + # convert to simularium + traj_data = TrajectoryData( + meta_data=meta_data, + agent_data=AgentData( + times=np.arange(total_steps), + n_agents=n_fibers * np.ones(total_steps), + viz_types=1001 * np.ones((total_steps, n_fibers)), # fiber viz type = 1001 + unique_ids=np.array(total_steps * [list(range(n_fibers))]), + types=total_steps * [type_names], + positions=np.zeros((total_steps, n_fibers, 3)), + radii=fiber_radius * np.ones((total_steps, n_fibers)), + n_subpoints=3 * n_points * np.ones((total_steps, n_fibers)), + subpoints=subpoints, + display_data=display_data, + ), + time_units=time_units, + spatial_units=spatial_units, + ) + return TrajectoryConverter(traj_data) diff --git a/subcell_pipeline/visualization/histogram_plots.py b/subcell_pipeline/visualization/histogram_plots.py new file mode 100644 index 0000000..b7ed292 --- /dev/null +++ b/subcell_pipeline/visualization/histogram_plots.py @@ -0,0 +1,34 @@ +from simulariumio import HistogramPlotData, ScatterPlotData + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) + + +def make_empty_histogram_plots( + metrics: list[CompressionMetric], +) -> dict[CompressionMetric, ScatterPlotData]: + """ + Create empty histogram plot placeholders for list of metrics. + + Parameters + ---------- + metrics + List of metrics. + + Returns + ------- + : + Map of metric to empty histogram plot placeholder. + """ + + plots = {} + + for metric in metrics: + plots[metric] = HistogramPlotData( + title=metric.label(), + xaxis_title=metric.description(), + traces={}, + ) + + return plots diff --git a/subcell_pipeline/visualization/individual_trajectory.py b/subcell_pipeline/visualization/individual_trajectory.py new file mode 100644 index 0000000..313d257 --- /dev/null +++ b/subcell_pipeline/visualization/individual_trajectory.py @@ -0,0 +1,566 @@ +"""Visualization methods for individual simulators.""" + +from typing import Optional + +import numpy as np +import pandas as pd +from io_collection.keys.check_key import check_key +from io_collection.load.load_buffer import load_buffer +from io_collection.load.load_text import load_text +from io_collection.save.save_buffer import save_buffer +from pint import UnitRegistry +from simulariumio import ( + DISPLAY_TYPE, + CameraData, + DisplayData, + InputFileData, + MetaData, + TrajectoryConverter, + UnitData, +) +from simulariumio.cytosim import CytosimConverter, CytosimData, CytosimObjectInfo +from simulariumio.filters import EveryNthTimestepFilter +from simulariumio.readdy import ReaddyConverter, ReaddyData + +from subcell_pipeline.analysis.compression_metrics.compression_analysis import ( + get_compression_metric_data, +) +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import align_fiber +from subcell_pipeline.simulation.cytosim.post_processing import CYTOSIM_SCALE_FACTOR +from subcell_pipeline.simulation.readdy.loader import ReaddyLoader +from subcell_pipeline.simulation.readdy.parser import BOX_SIZE as READDY_BOX_SIZE +from subcell_pipeline.simulation.readdy.parser import ( + READDY_TIMESTEP, + download_readdy_hdf5, +) +from subcell_pipeline.simulation.readdy.post_processor import ReaddyPostProcessor +from subcell_pipeline.visualization.display_data import get_readdy_display_data +from subcell_pipeline.visualization.scatter_plots import make_empty_scatter_plots +from subcell_pipeline.visualization.spatial_annotator import SpatialAnnotator + +READDY_SAVED_FRAMES: int = 1000 + +BOX_SIZE: np.ndarray = np.array(3 * [600.0]) + +UNIT_REGISTRY = UnitRegistry() + + +def _add_individual_plots( + converter: TrajectoryConverter, + metrics: list[CompressionMetric], + metrics_data: pd.DataFrame, + times: np.ndarray, + time_units: UnitData, +) -> None: + """Add plots to individual trajectory with calculated metrics.""" + scatter_plots = make_empty_scatter_plots( + metrics, times=times, time_units=time_units + ) + for metric, plot in scatter_plots.items(): + plot.ytraces["filament"] = np.array(metrics_data[metric.value]) + converter.add_plot(plot, "scatter") + + +def _add_readdy_spatial_annotations( + converter: TrajectoryConverter, + post_processor: ReaddyPostProcessor, + n_monomer_points: int, +) -> None: + """ + Add visualizations of edges, normals, and control points to the ReaDDy + Simularium data. + """ + fiber_chain_ids = post_processor.linear_fiber_chain_ids(polymer_number_range=5) + axis_positions, _ = post_processor.linear_fiber_axis_positions(fiber_chain_ids) + fiber_points = post_processor.linear_fiber_control_points( + axis_positions=axis_positions, + n_points=n_monomer_points, + ) + converter._data.agent_data.positions, fiber_points = ( + post_processor.align_trajectory(fiber_points) + ) + axis_positions, _ = post_processor.linear_fiber_axis_positions(fiber_chain_ids) + edges = post_processor.edge_positions() + + # edges + converter._data = SpatialAnnotator.add_fiber_agents( + converter._data, + fiber_points=edges, + type_name="edge", + fiber_width=0.5, + color="#eaeaea", + ) + + # normals + normals = post_processor.linear_fiber_normals( + fiber_chain_ids=fiber_chain_ids, + axis_positions=axis_positions, + normal_length=10.0, + ) + converter._data = SpatialAnnotator.add_fiber_agents( + converter._data, + fiber_points=normals, + type_name="normal", + fiber_width=0.5, + color="#685bf3", + ) + + # control points + sphere_positions = [] + for time_ix in range(len(fiber_points)): + sphere_positions.append(fiber_points[time_ix][0]) + converter._data = SpatialAnnotator.add_sphere_agents( + converter._data, + sphere_positions, + type_name="fiber point", + radius=0.8, + rainbow_colors=True, + ) + + +def _get_readdy_simularium_converter( + path_to_readdy_h5: str, + total_steps: int, + n_timepoints: int, +) -> TrajectoryConverter: + """ + Load from ReaDDy outputs and generate a TrajectoryConverter to visualize an + actin trajectory in Simularium. + """ + converter = ReaddyConverter( + ReaddyData( + timestep=1e-6 * (READDY_TIMESTEP * total_steps / READDY_SAVED_FRAMES), + path_to_readdy_h5=path_to_readdy_h5, + meta_data=MetaData( + box_size=READDY_BOX_SIZE, + camera_defaults=CameraData( + position=np.array([70.0, 70.0, 300.0]), + look_at_position=np.array([70.0, 70.0, 0.0]), + fov_degrees=60.0, + ), + scale_factor=1.0, + ), + display_data=get_readdy_display_data(), + time_units=UnitData("ms"), + spatial_units=UnitData("nm"), + ) + ) + return _filter_time(converter, n_timepoints) + + +def visualize_individual_readdy_trajectory( + bucket: str, + series_name: str, + series_key: str, + rep_ix: int, + n_timepoints: int, + n_monomer_points: int, + total_steps: int, + temp_path: str, + metrics: list[CompressionMetric], + metrics_data: pd.DataFrame, +) -> None: + """ + Save a Simularium file for a single ReaDDy trajectory with plots and spatial + annotations. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + series_key + Combination of series and condition names. + rep_ix + Replicate index. + n_timepoints + Number of equally spaced timepoints to visualize. + n_monomer_points + Number of equally spaced monomer points to visualize. + total_steps + Total number of steps for each simulation key. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + metrics_data + Calculated compression metrics data. + """ + + h5_file_path = download_readdy_hdf5( + bucket, series_name, series_key, rep_ix, temp_path + ) + + assert isinstance(h5_file_path, str) + + converter = _get_readdy_simularium_converter( + h5_file_path, total_steps, n_timepoints + ) + + if metrics: + times = 2 * metrics_data["time"].values # "time" seems to range (0, 0.5) + times *= 1e-6 * (READDY_TIMESTEP * total_steps / n_timepoints) + _add_individual_plots( + converter, metrics, metrics_data, times, converter._data.time_units + ) + + assert isinstance(h5_file_path, str) + + rep_id = rep_ix + 1 + pickle_key = f"{series_name}/data/{series_key}_{rep_id:06d}.pkl" + time_inc = total_steps // n_timepoints + + readdy_loader = ReaddyLoader( + h5_file_path=h5_file_path, + time_inc=time_inc, + timestep=READDY_TIMESTEP, + pickle_location=bucket, + pickle_key=pickle_key, + ) + + post_processor = ReaddyPostProcessor( + readdy_loader.trajectory(), box_size=READDY_BOX_SIZE + ) + + _add_readdy_spatial_annotations(converter, post_processor, n_monomer_points) + + # Save simularium file. Turn off validate IDs for performance. + converter.save(output_path=h5_file_path, validate_ids=False) + + +def visualize_individual_readdy_trajectories( + bucket: str, + series_name: str, + condition_keys: list[str], + n_replicates: int, + n_timepoints: int, + n_monomer_points: int, + total_steps: dict[str, int], + temp_path: str, + metrics: Optional[list[CompressionMetric]] = None, + recalculate: bool = True, +) -> None: + """ + Visualize individual ReaDDy simulations for select conditions and + replicates. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + condition_keys + List of condition keys. + n_replicates + Number of simulation replicates. + n_timepoints + Number of equally spaced timepoints to visualize. + n_monomer_points + Number of equally spaced monomer points to visualize. + total_steps + Total number of steps for each simulation key. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + recalculate + True to recalculate visualization files, False otherwise. + """ + + if metrics is not None: + all_metrics_data = get_compression_metric_data( + bucket, + series_name, + condition_keys, + list(range(1, n_replicates + 1)), + metrics, + recalculate=False, + ) + else: + metrics = [] + all_metrics_data = pd.DataFrame(columns=["key", "seed"]) + + for condition_key in condition_keys: + series_key = f"{series_name}_{condition_key}" if condition_key else series_name + + for rep_ix in range(n_replicates): + rep_id = rep_ix + 1 + output_key = f"{series_name}/viz/{series_key}_{rep_id:06d}.simularium" + + # Skip if output file already exists. + if not recalculate and check_key(bucket, output_key): + print( + f"Simularium file for [ { output_key } ] already exists. Skipping." + ) + continue + + print(f"Visualizing data for [ {condition_key} ] replicate [ {rep_ix} ]") + + # Filter metrics data for specific conditon and replicate. + if condition_key: + metrics_data = all_metrics_data[ + (all_metrics_data["key"] == condition_key) + & (all_metrics_data["seed"] == rep_id) + ] + else: + metrics_data = all_metrics_data[(all_metrics_data["seed"] == rep_id)] + + visualize_individual_readdy_trajectory( + bucket, + series_name, + series_key, + rep_ix, + n_timepoints, + n_monomer_points, + total_steps[condition_key], + temp_path, + metrics, + metrics_data, + ) + + # Upload saved file to S3. + temp_key = f"{series_key}_{rep_ix}.h5.simularium" + save_buffer(bucket, output_key, load_buffer(temp_path, temp_key)) + + +def _find_time_units(raw_time: float, units: str = "s") -> tuple[str, float]: + """Get compact time units and a multiplier to put the times in those units.""" + time = UNIT_REGISTRY.Quantity(raw_time, units) + time_compact = time.to_compact() + return f"{time_compact.units:~}", time_compact.magnitude / raw_time + + +def _filter_time( + converter: TrajectoryConverter, n_timepoints: int +) -> TrajectoryConverter: + """Filter times using simulariumio time filter.""" + time_inc = int(converter._data.agent_data.times.shape[0] / n_timepoints) + if time_inc < 2: + return converter + converter._data = converter.filter_data([EveryNthTimestepFilter(n=time_inc)]) + return converter + + +def _align_cytosim_fiber(converter: TrajectoryConverter) -> None: + """ + Align the fiber subpoints so that the furthest point from the x-axis + is aligned with the positive y-axis at the last time point. + """ + fiber_points = converter._data.agent_data.subpoints[:, 0, :] + n_timesteps = fiber_points.shape[0] + n_points = int(fiber_points.shape[1] / 3) + fiber_points = fiber_points.reshape((n_timesteps, n_points, 3)) + _, rotation = align_fiber(fiber_points[-1]) + for time_ix in range(n_timesteps): + rotated = np.dot(fiber_points[time_ix][:, 1:], rotation) + converter._data.agent_data.subpoints[time_ix, 0, :] = np.concatenate( + (fiber_points[time_ix][:, 0:1], rotated), axis=1 + ).reshape(n_points * 3) + + +def _get_cytosim_simularium_converter( + fiber_points_data: str, + singles_data: str, + n_timepoints: int, +) -> TrajectoryConverter: + """ + Load from Cytosim outputs and generate a TrajectoryConverter to visualize an + actin trajectory in Simularium. + """ + singles_display_data = DisplayData( + name="linker", + radius=0.004, + display_type=DISPLAY_TYPE.SPHERE, + color="#eaeaea", + ) + converter = CytosimConverter( + CytosimData( + meta_data=MetaData( + box_size=BOX_SIZE, + camera_defaults=CameraData( + position=np.array([70.0, 70.0, 300.0]), + look_at_position=np.array([70.0, 70.0, 0.0]), + fov_degrees=60.0, + ), + scale_factor=1, + ), + object_info={ + "fibers": CytosimObjectInfo( + cytosim_file=InputFileData( + file_contents=fiber_points_data, + ), + display_data={ + 1: DisplayData( + name="actin", + radius=0.002, + display_type=DISPLAY_TYPE.FIBER, + ) + }, + ), + "singles": CytosimObjectInfo( + cytosim_file=InputFileData( + file_contents=singles_data, + ), + display_data={ + 1: singles_display_data, + 2: singles_display_data, + 3: singles_display_data, + 4: singles_display_data, + }, + ), + }, + ) + ) + _align_cytosim_fiber(converter) + converter._data.agent_data.radii *= CYTOSIM_SCALE_FACTOR + converter._data.agent_data.positions *= CYTOSIM_SCALE_FACTOR + converter._data.agent_data.subpoints *= CYTOSIM_SCALE_FACTOR + converter = _filter_time(converter, n_timepoints) + time_units, time_multiplier = _find_time_units(converter._data.agent_data.times[-1]) + converter._data.agent_data.times *= time_multiplier + converter._data.time_units = UnitData(time_units) + return converter + + +def visualize_individual_cytosim_trajectory( + bucket: str, + series_name: str, + series_key: str, + index: int, + n_timepoints: int, + temp_path: str, + metrics: list[CompressionMetric], + metrics_data: pd.DataFrame, +) -> None: + """ + Save a Simularium file for a single Cytosim trajectory with plots and + spatial annotations. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + series_key + Combination of series and condition names. + index + Simulation replicate index. + n_timepoints + Number of equally spaced timepoints to visualize. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + metrics_data + Calculated compression metrics data. + """ + + output_key_template = f"{series_name}/outputs/{series_key}_{index}/%s" + fiber_points_data = load_text(bucket, output_key_template % "fiber_points.txt") + singles_data = load_text(bucket, output_key_template % "singles.txt") + + converter = _get_cytosim_simularium_converter( + fiber_points_data, singles_data, n_timepoints + ) + + if metrics: + times = 1e3 * metrics_data["time"].values # s --> ms + _add_individual_plots( + converter, metrics, metrics_data, times, converter._data.time_units + ) + + # Save simularium file. Turn off validate IDs for performance. + local_file_path = f"{temp_path}/{series_key}_{index}" + converter.save(output_path=local_file_path, validate_ids=False) + + +def visualize_individual_cytosim_trajectories( + bucket: str, + series_name: str, + condition_keys: list[str], + random_seeds: list[int], + n_timepoints: int, + temp_path: str, + metrics: Optional[list[CompressionMetric]] = None, + recalculate: bool = True, +) -> None: + """ + Visualize individual Cytosim simulations for select conditions and + replicates. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + condition_keys + List of condition keys. + random_seeds + Random seeds for simulations. + n_timepoints + Number of equally spaced timepoints to visualize. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + recalculate + True to recalculate visualization files, False otherwise. + """ + + if metrics is not None: + all_metrics_data = get_compression_metric_data( + bucket, + series_name, + condition_keys, + random_seeds, + metrics, + recalculate=False, + ) + else: + metrics = [] + all_metrics_data = pd.DataFrame(columns=["key", "seed"]) + + for condition_key in condition_keys: + series_key = f"{series_name}_{condition_key}" if condition_key else series_name + + for index, seed in enumerate(random_seeds): + output_key = f"{series_name}/viz/{series_key}_{seed:06d}.simularium" + + # Skip if output file already exists. + if not recalculate and check_key(bucket, output_key): + print( + f"Simularium file for [ { output_key } ] already exists. Skipping." + ) + continue + + print(f"Visualizing data for [ {condition_key} ] seed [ {seed} ]") + + # Filter metrics data for specific conditon and replicate. + if condition_key: + metrics_data = all_metrics_data[ + (all_metrics_data["key"] == condition_key) + & (all_metrics_data["seed"] == seed) + ] + else: + metrics_data = all_metrics_data[(all_metrics_data["seed"] == seed)] + + visualize_individual_cytosim_trajectory( + bucket, + series_name, + series_key, + index, + n_timepoints, + temp_path, + metrics, + metrics_data, + ) + + temp_key = f"{series_key}_{index}.simularium" + save_buffer(bucket, output_key, load_buffer(temp_path, temp_key)) diff --git a/subcell_pipeline/visualization/scatter_plots.py b/subcell_pipeline/visualization/scatter_plots.py new file mode 100644 index 0000000..d55de5f --- /dev/null +++ b/subcell_pipeline/visualization/scatter_plots.py @@ -0,0 +1,65 @@ +from typing import Optional + +import numpy as np +from simulariumio import ScatterPlotData + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) + + +def make_empty_scatter_plots( + metrics: list[CompressionMetric], + total_steps: int = -1, + times: Optional[np.ndarray] = None, + time_units: Optional[str] = None, +) -> dict[CompressionMetric, ScatterPlotData]: + """ + Create empty scatter plot placeholders for list of metrics. + + Parameters + ---------- + metrics + List of metrics. + total_steps + Total number of timesteps. Required if times is not given. + times + List of timepoints. Required if total_steps is not given. + time_units + Time units. Used only with times. + + Returns + ------- + : + Map of metric to empty scatter plot placeholder. + """ + + if total_steps < 0 and times is None: + raise Exception("Either total_steps or times array is required for plots") + elif times is None: + # use normalized time + xlabel = "T (normalized)" + xtrace = (1 / float(total_steps)) * np.arange(total_steps) + else: + # use actual time + xlabel = f"T ({time_units})" + xtrace = times + total_steps = times.shape[0] + + plots = {} + + for metric in metrics: + lower_bound, upper_bound = metric.bounds() + plots[metric] = ScatterPlotData( + title=metric.label(), + xaxis_title=xlabel, + yaxis_title=metric.description(), + xtrace=xtrace, + ytraces={ + "<<<": lower_bound * np.ones(total_steps), + ">>>": upper_bound * np.ones(total_steps), + }, + render_mode="lines", + ) + + return plots diff --git a/subcell_pipeline/visualization/spatial_annotator.py b/subcell_pipeline/visualization/spatial_annotator.py index db6cb84..9463ea3 100644 --- a/subcell_pipeline/visualization/spatial_annotator.py +++ b/subcell_pipeline/visualization/spatial_annotator.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - from typing import List import numpy as np @@ -130,6 +128,7 @@ def add_sphere_agents( sphere_positions: List[np.ndarray], type_name: str = "sphere", radius: float = 1.0, + rainbow_colors: bool = False, color: str = "#eaeaea", ) -> TrajectoryData: """ @@ -149,6 +148,9 @@ def add_sphere_agents( radius: float (optional) Radius to draw the spheres. Default: 1. + rainbow_colors : bool (optional) + If True, color the new spheres in rainbow order. + If False, use color instead. color: str (optional) Color for the new spheres. Default: "#eaeaea" @@ -158,9 +160,12 @@ def add_sphere_agents( SpatialAnnotator._added_dimensions_for_spheres(sphere_positions) ) max_used_uid = max(list(np.unique(traj_data.agent_data.unique_ids))) + max_spheres = 0 for time_ix in range(total_steps): start_ix = int(traj_data.agent_data.n_agents[time_ix]) n_spheres = len(sphere_positions[time_ix]) + if n_spheres > max_spheres: + max_spheres = n_spheres end_ix = start_ix + n_spheres new_agent_data.unique_ids[time_ix][start_ix:end_ix] = np.arange( max_used_uid + 1, max_used_uid + 1 + n_spheres @@ -169,15 +174,21 @@ def add_sphere_agents( new_agent_data.viz_types[time_ix][start_ix:end_ix] = n_spheres * [ VIZ_TYPE.DEFAULT ] - new_agent_data.types[time_ix] += n_spheres * [type_name] + new_agent_data.types[time_ix] += [ + f"{type_name}#{ix}" for ix in range(n_spheres) + ] new_agent_data.positions[time_ix][start_ix:end_ix] = sphere_positions[ time_ix ][:n_spheres] new_agent_data.radii[time_ix][start_ix:end_ix] = n_spheres * [radius] - new_agent_data.display_data[type_name] = DisplayData( - name=type_name, - display_type=DISPLAY_TYPE.SPHERE, - color=color, - ) + + colors = ["#0000ff", "#00ff00", "#ffff00", "#ff0000", "#ff00ff"] + for ix in range(max_spheres): + tn = f"{type_name}#{ix}" + new_agent_data.display_data[tn] = DisplayData( + name=tn, + display_type=DISPLAY_TYPE.SPHERE, + color=colors[ix % len(colors)] if rainbow_colors else color, + ) traj_data.agent_data = new_agent_data return traj_data diff --git a/subcell_pipeline/visualization/tomography.py b/subcell_pipeline/visualization/tomography.py new file mode 100644 index 0000000..dd6d4a3 --- /dev/null +++ b/subcell_pipeline/visualization/tomography.py @@ -0,0 +1,126 @@ +import os +from typing import Optional + +import numpy as np +import pandas as pd +from io_collection.load.load_buffer import load_buffer +from io_collection.load.load_dataframe import load_dataframe +from io_collection.save.save_buffer import save_buffer +from simulariumio import CameraData, MetaData, TrajectoryConverter, UnitData + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.visualization.fiber_points import ( + generate_trajectory_converter_for_fiber_points, +) +from subcell_pipeline.visualization.histogram_plots import make_empty_histogram_plots + +TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] + +TOMOGRAPHY_VIZ_SCALE: float = 100.0 + + +def _add_tomography_plots( + converter: TrajectoryConverter, + metrics: list[CompressionMetric], + fiber_points: list[np.ndarray], +) -> None: + """Add plots to tomography data with calculated metrics.""" + + histogram_plots = make_empty_histogram_plots(metrics) + + for metric, plot in histogram_plots.items(): + values = [ + metric.calculate_metric(polymer_trace=fiber[0, :, :]) + for fiber in fiber_points + ] + + if metric == CompressionMetric.COMPRESSION_RATIO: + plot.traces["actin"] = np.array(values) * 100 + else: + plot.traces["actin"] = np.array(values) + + converter.add_plot(plot, "histogram") + + +def _get_tomography_spatial_center_and_size( + tomo_df: pd.DataFrame, +) -> tuple[np.ndarray, np.ndarray]: + """Get the center and size of the tomography dataset in 3D space.""" + + all_mins = [] + all_maxs = [] + + for column in TOMOGRAPHY_SAMPLE_COLUMNS: + all_mins.append(tomo_df[column].min()) + all_maxs.append(tomo_df[column].max()) + + mins = np.array(all_mins) + maxs = np.array(all_maxs) + + return mins + 0.5 * (maxs - mins), maxs - mins + + +def visualize_tomography( + bucket: str, + name: str, + temp_path: str, + metrics: Optional[list[CompressionMetric]] = None, +) -> None: + """ + Visualize segmented tomography data for actin fibers. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + name + Name of tomography dataset. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + """ + + tomo_key = f"{name}/{name}_coordinates_sampled.csv" + tomo_df = load_dataframe(bucket, tomo_key) + tomo_df = tomo_df.sort_values(by=["id", "monomer_ids"]) + tomo_df = tomo_df.reset_index(drop=True) + + time_units = UnitData("count") + spatial_units = UnitData("um", 0.003) + + center, box_size = _get_tomography_spatial_center_and_size(tomo_df) + + all_fiber_points = [] + all_type_names = [] + + for fiber_id, fiber_df in tomo_df.groupby("id"): + fiber_index, dataset = fiber_id.split("_", 1) + fiber_points = TOMOGRAPHY_VIZ_SCALE * ( + np.array([fiber_df[TOMOGRAPHY_SAMPLE_COLUMNS]]) - center + ) + all_fiber_points.append(fiber_points) + all_type_names.append(f"{dataset}#{fiber_index}") + + converter = generate_trajectory_converter_for_fiber_points( + all_fiber_points, + all_type_names, + MetaData( + box_size=TOMOGRAPHY_VIZ_SCALE * box_size, + camera_defaults=CameraData(position=np.array([0.0, 0.0, 70.0])), + ), + {}, + time_units, + spatial_units, + ) + + if metrics: + _add_tomography_plots(converter, metrics, all_fiber_points) + + # Save locally and copy to bucket. + local_file_path = os.path.join(temp_path, name) + converter.save(output_path=local_file_path) + output_key = f"{name}/{name}.simularium" + save_buffer(bucket, output_key, load_buffer(temp_path, f"{name}.simularium")) diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 615cf92..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,25 +0,0 @@ -# #!/usr/bin/env python - -# """ -# Configuration for tests! There are a whole list of hooks you can define in this file -# to run before, after, or to mutate how tests run. Commonly for most of our work, we -# use this file to define top level fixtures that may be needed for tests throughout -# multiple test files. - -# In this case, while we aren't using this fixture in our tests, the prime use case for -# something like this would be when we want to preload a file to be used in multiple -# tests. File reading can take time, so instead of re-reading the file for each test, -# read the file once then use the loaded content. - -# Docs: https://docs.pytest.org/en/latest/example/simple.html -# https://docs.pytest.org/en/latest/plugins.html#requiring-loading-plugins-in-a-test-module-or-conftest-file -# """ - -# from pathlib import Path - -# import pytest - - -# @pytest.fixture -# def data_dir() -> Path: -# return Path(__file__).parent / "data" diff --git a/tests/data/readdy/actin_ortho_filament_10_steps.h5 b/tests/data/readdy/actin_ortho_filament_10_steps.h5 deleted file mode 100644 index 4f53ab9..0000000 Binary files a/tests/data/readdy/actin_ortho_filament_10_steps.h5 and /dev/null differ diff --git a/tests/test_readdy_control_points.py b/tests/test_readdy_control_points.py deleted file mode 100644 index ccb80bd..0000000 --- a/tests/test_readdy_control_points.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python - -from typing import List - -import numpy as np -import pytest -from subcell_analysis.readdy import ReaddyLoader, ReaddyPostProcessor - - -@pytest.mark.parametrize( - "axis_positions, segment_length, expected_control_points", - [ - ( - [ - np.array([-2.47203989e02, 5.70044691e-03, -2.56005623e-02]), - np.array([-2.44400890e02, -1.13344916e-02, 2.36519172e-02]), - np.array([-2.41597792e02, 1.63829372e-02, -2.04812904e-02]), - np.array([-2.38794693e02, -2.05849546e-02, 1.62524931e-02]), - np.array([-2.35991594e02, 2.37234456e-02, -1.11840070e-02]), - np.array([-2.33188495e02, -2.56362595e-02, 5.53769673e-03]), - np.array([-2.30385396e02, 2.62245702e-02, 3.94719939e-04]), - np.array([-2.27582298e02, -2.54579825e-02, -6.30674332e-03]), - np.array([-2.24779199e02, 2.33761024e-02, 1.18929274e-02]), - np.array([-2.21976100e02, -2.00864907e-02, -1.68646606e-02]), - ], - 10.0, - np.array( - [ - [-2.47203989e02, 5.70044691e-03, -2.56005623e-02], - [-2.37205717e02, 4.53188350e-03, 6.99728204e-04], - [-2.27207445e02, -1.89274956e-02, -3.87293686e-03], - ] - ), - ), - ( - [ - np.array([-2.47203989e02, 5.70044691e-03, -2.56005623e-02]), - np.array([-2.44400890e02, -1.13344916e-02, 2.36519172e-02]), - np.array([-2.41597792e02, 1.63829372e-02, -2.04812904e-02]), - np.array([-2.38794693e02, -2.05849546e-02, 1.62524931e-02]), - np.array([-2.35991594e02, 2.37234456e-02, -1.11840070e-02]), - np.array([-2.33188495e02, -2.56362595e-02, 5.53769673e-03]), - np.array([-2.30385396e02, 2.62245702e-02, 3.94719939e-04]), - np.array([-2.27582298e02, -2.54579825e-02, -6.30674332e-03]), - np.array([-2.24779199e02, 2.33761024e-02, 1.18929274e-02]), - np.array([-2.21976100e02, -2.00864907e-02, -1.68646606e-02]), - ], - 1.0, - np.array( - [ - [-2.47203989e02, 5.70044691e-03, -2.56005623e-02], - [-2.46204162e02, -3.75684012e-04, -8.03287259e-03], - [-2.45204335e02, -6.45181494e-03, 9.53481714e-03], - [-2.44204507e02, -9.39263048e-03, 2.05599797e-02], - [-2.43204680e02, 4.93799453e-04, 4.81826461e-03], - [-2.42204853e02, 1.03802294e-02, -1.09234504e-02], - [-2.41205026e02, 1.12030548e-02, -1.53342109e-02], - [-2.40205199e02, -1.98288954e-03, -2.23176981e-03], - [-2.39205371e02, -1.51688338e-02, 1.08706713e-02], - [-2.38205544e02, -1.12723204e-02, 1.04859547e-02], - ] - ), - ), - ], -) -def test_readdy_control_points( - axis_positions: List[np.ndarray], - segment_length: float, - expected_control_points: np.ndarray, -) -> None: - post_processor = ReaddyPostProcessor( - trajectory=ReaddyLoader( - h5_file_path="subcell_analysis/tests/data/readdy/actin_ortho_filament_10_steps.h5", - timestep=0.1, - ).trajectory(), - box_size=np.array(3 * [600.0]), - periodic_boundary=False, - ) - test_control_points = post_processor.linear_fiber_control_points( - axis_positions=[[axis_positions]], - segment_length=segment_length, - )[0][0][0:10] - assert False not in np.isclose(test_control_points, expected_control_points)