diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 81f56dd..7ddd490 100755
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,87 +1,23 @@
-name: CI
-on:
- push:
- branches: [main]
- pull_request:
- branches: [main]
+name: Pytest
-permissions:
- contents: write # needed for gh release upload
+on: [push]
jobs:
build:
- name: Build and Test Package
- runs-on: ${{ matrix.os }}
+ runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ["3.14"]
- os: [ubuntu-latest]
-
+ python-version: ["3.13", "3.14"]
steps:
- - name: Checkout Code
- uses: actions/checkout@v4
-
+ - uses: actions/checkout@v5
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v5
+ uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
-
- - name: Cache Python Dependencies
- uses: actions/cache@v4
- with:
- path: ~/.cache/pip
- key: ${{ runner.os }}-pip-${{ matrix.python-version }}
- restore-keys: |
- ${{ runner.os }}-pip-
-
- - name: Install Build Tools
+ - name: Install dependencies
run: |
- python -m pip install --upgrade pip build
- python -m pip install pybind11
-
- - name: Lint Code
- uses: wearerequired/lint-action@v2.3.0
- with:
- linters: |
- pylint
-
- - name: Build Wheel
- run: |
- python -m build --wheel
- ls -al dist
-
- - name: Upload Built Wheels (artifact)
- uses: actions/upload-artifact@v4
- with:
- name: binaryparser-wheels
- path: dist/*.whl
- if-no-files-found: error
-
- - name: Ensure release exists
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: |
- gh release view v1.2.0 >/dev/null 2>&1 || gh release create v1.2.0 -t "v1.2.0" -n ""
-
- - name: Upload Release Asset(s)
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: |
- ls -al dist
- gh release upload v1.2.0 dist/*cp314*.whl --clobber
-
- - name: Install Built Package
- run: pip install dist/*.whl
-
- - name: Run Tests
- # Remove PYTHONPATH so tests import the installed wheel, not files from the repo root
- run: |
- python -m pip install pytest
- pytest tests/
-
- - name: Show Python Environment (Debug)
- if: failure()
+ pip install --upgrade pip
+ pip install .[dev]
+ - name: Test the code with pytest
run: |
- python --version
- pip list
- ls -R
+ pytest .
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
new file mode 100644
index 0000000..f85a2e3
--- /dev/null
+++ b/.github/workflows/pylint.yml
@@ -0,0 +1,24 @@
+name: Pylint
+
+on: [push]
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.14"]
+ steps:
+ - uses: actions/checkout@v5
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v6
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ pip install --upgrade pip
+ pip install .[dev]
+ pip install pylint
+ - name: Analysing the code with pylint
+ run: |
+ pylint binary_parser
diff --git a/.gitignore b/.gitignore
index e59227a..915a744 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+.idea/
bin/TestDaten
.development
tests/__pycache__
+/BinaryPaser.egg-info/
+/build/
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..ab1f416
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,10 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Ignored default folder with query files
+/queries/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/.idea/BinaryParser.iml b/.idea/BinaryParser.iml
new file mode 100644
index 0000000..435b8ab
--- /dev/null
+++ b/.idea/BinaryParser.iml
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..491ef7d
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..04db826
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..d1a7504
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..f9d79c4
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,638 @@
+[MAIN]
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Clear in-memory caches upon conclusion of linting. Useful if running pylint
+# in a server-like mode.
+clear-cache-post-run=no
+
+# Load and enable all available extensions. Use --list-extensions to see a list
+# all available extensions.
+#enable-all-extensions=
+
+# In error mode, messages with a category besides ERROR or FATAL are
+# suppressed, and no reports are done by default. Error mode is compatible with
+# disabling specific errors.
+#errors-only=
+
+# Always return a 0 (non-error) status code, even if lint errors are found.
+# This is primarily useful in continuous integration scripts.
+#exit-zero=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
+# for backward compatibility.)
+extension-pkg-whitelist=
+
+# Return non-zero exit code if any of these messages/categories are detected,
+# even if score is above --fail-under value. Syntax same as enable. Messages
+# specified are enabled, while categories only check already-enabled messages.
+fail-on=
+
+# Specify a score threshold under which the program will exit with error.
+fail-under=5
+
+# Interpret the stdin as a python script, whose filename needs to be passed as
+# the module_or_package argument.
+#from-stdin=
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=CVS,
+ venv
+
+# Add files or directories matching the regular expressions patterns to the
+# ignore-list. The regex matches against paths and can be in Posix or Windows
+# format. Because '\\' represents the directory delimiter on Windows systems,
+# it can't be used as an escape character.
+ignore-paths=
+
+# Files or directories matching the regular expression patterns are skipped.
+# The regex matches against base names, not paths. The default value ignores
+# Emacs file locks
+ignore-patterns=^\.#
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use, and will cap the count on Windows to
+# avoid hangs.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.10
+
+# Discover python modules and packages in the file system subtree.
+recursive=no
+
+# Add paths to the list of the source roots. Supports globbing patterns. The
+# source root is an absolute path or a path relative to the current working
+# directory used to determine a package namespace for modules located under the
+# source root.
+source-roots=
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# In verbose mode, extra non-checker-related info will be displayed.
+#verbose=
+
+[MASTER]
+disable=
+ C0114,
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style. If left empty, argument names will be checked with the set
+# naming style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style. If left empty, attribute names will be checked with the set naming
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+ bar,
+ baz,
+ toto,
+ tutu,
+ tata
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style. If left empty, class attribute names will be checked
+# with the set naming style.
+#class-attribute-rgx=
+
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style. If left empty, class constant names will be checked with
+# the set naming style.
+#class-const-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style. If left empty, class names will be checked with the set naming style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style. If left empty, constant names will be checked with the set naming
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style. If left empty, function names will be checked with the set
+# naming style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+ j,
+ k,
+ ex,
+ Run,
+ _
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style. If left empty, inline iteration names will be checked
+# with the set naming style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style. If left empty, method names will be checked with the set naming style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style. If left empty, module names will be checked with the set naming style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Regular expression matching correct type alias names. If left empty, type
+# alias names will be checked with the set naming style.
+#typealias-rgx=
+
+# Regular expression matching correct type variable names. If left empty, type
+# variable names will be checked with the set naming style.
+#typevar-rgx=
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style. If left empty, variable names will be checked with the set
+# naming style.
+#variable-rgx=
+
+
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+ __new__,
+ setUp,
+ asyncSetUp,
+ __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+
+[DESIGN]
+
+# List of regular expressions of class ancestor names to ignore when counting
+# public methods (see R0903)
+exclude-too-few-public-methods=
+
+# List of qualified class names to ignore when counting class parents (see
+# R0901)
+ignored-parents=
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=8
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=0
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when caught.
+overgeneral-exceptions=builtins.BaseException,builtins.Exception
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )??$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string=' '
+
+# Maximum number of characters on a single line.
+max-line-length=150
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow explicit reexports by alias from a package __init__.
+allow-reexport-from-package=no
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=
+
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
+ext-import-graph=
+
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
+import-graph=
+
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
+# UNDEFINED.
+confidence=HIGH,
+ CONTROL_FLOW,
+ INFERENCE,
+ INFERENCE_FAILURE,
+ UNDEFINED
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then re-enable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=raw-checker-failed,
+ bad-inline-option,
+ locally-disabled,
+ file-ignored,
+ suppressed-message,
+ useless-suppression,
+ deprecated-pragma,
+ use-symbolic-message-instead,
+ use-implicit-booleaness-not-comparison-to-string,
+ use-implicit-booleaness-not-comparison-to-zero
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=
+
+
+[METHOD_ARGS]
+
+# List of qualified names (i.e., library.method) which require a timeout
+# parameter e.g. 'requests.api.get,requests.api.post'
+timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+ XXX,
+ TODO
+
+# Regular expression of note tags to take in consideration.
+notes-rgx=
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'fatal', 'error', 'warning', 'refactor',
+# 'convention', and 'info' which contain the number of messages in each
+# category, as well as 'statement' which is the total number of statements
+# analyzed. This score is used by the global evaluation report (RP0004).
+evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+msg-template=
+
+# Set the output format. Available formats are: text, parseable, colorized,
+# json2 (improved json format), json (old json format) and msvs (visual
+# studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+#output-format=
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[SIMILARITIES]
+
+# Comments are removed from the similarity computation
+ignore-comments=yes
+
+# Docstrings are removed from the similarity computation
+ignore-docstrings=yes
+
+# Imports are removed from the similarity computation
+ignore-imports=yes
+
+# Signatures are removed from the similarity computation
+ignore-signatures=yes
+
+# Minimum lines number of a similarity.
+min-similarity-lines=6
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. No available dictionaries : You need to install
+# both the python package and the system dependency for enchant to work.
+spelling-dict=
+
+# List of comma separated words that should be considered directives if they
+# appear at the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of symbolic message names to ignore for Mixin members.
+ignored-checks-for-mixins=no-member,
+ not-async-context-manager,
+ not-context-manager,
+ attribute-defined-outside-init
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# Regex pattern to define which classes are considered mixins.
+mixin-class-rgx=.*[Mm]ixin
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+ _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
diff --git a/BinaryParser.egg-info/PKG-INFO b/BinaryParser.egg-info/PKG-INFO
deleted file mode 100644
index cd2084c..0000000
--- a/BinaryParser.egg-info/PKG-INFO
+++ /dev/null
@@ -1,24 +0,0 @@
-Metadata-Version: 2.2
-Name: BinaryParser
-Version: 0.0.1
-Summary: Parsing binary files
-Author: Konrad Krämer
-Author-email: konrad.kraemer@kit.edu
-Requires-Python: >=3.7
-License-File: LICENSE
-Requires-Dist: pybind11
-Requires-Dist: pandas
-Requires-Dist: numpy
-Requires-Dist: typeguard
-Requires-Dist: plotly
-Requires-Dist: matplotlib
-Requires-Dist: seaborn
-Requires-Dist: netCDF4
-Provides-Extra: test
-Requires-Dist: pytest; extra == "test"
-Dynamic: author
-Dynamic: author-email
-Dynamic: provides-extra
-Dynamic: requires-dist
-Dynamic: requires-python
-Dynamic: summary
diff --git a/BinaryParser.egg-info/SOURCES.txt b/BinaryParser.egg-info/SOURCES.txt
deleted file mode 100644
index fe16ff3..0000000
--- a/BinaryParser.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-LICENSE
-__init__.py
-pyproject.toml
-setup.py
-./__init__.py
-./setup.py
-BinaryParser.egg-info/PKG-INFO
-BinaryParser.egg-info/SOURCES.txt
-BinaryParser.egg-info/dependency_links.txt
-BinaryParser.egg-info/not-zip-safe
-BinaryParser.egg-info/requires.txt
-BinaryParser.egg-info/top_level.txt
-chemstation/__init__.py
-chemstation/read_ms_file.py
-hplc/__init__.py
-hplc/read_files.py
-openlab/__init__.py
-openlab/openlab.py
-src/parser_hplc.cpp
-src/parser_ms.cpp
-src/parser_xray.cpp
-xray/__init__.py
-xray/bruker_xray.py
\ No newline at end of file
diff --git a/BinaryParser.egg-info/dependency_links.txt b/BinaryParser.egg-info/dependency_links.txt
deleted file mode 100644
index 8b13789..0000000
--- a/BinaryParser.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/BinaryParser.egg-info/not-zip-safe b/BinaryParser.egg-info/not-zip-safe
deleted file mode 100644
index 8b13789..0000000
--- a/BinaryParser.egg-info/not-zip-safe
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/BinaryParser.egg-info/requires.txt b/BinaryParser.egg-info/requires.txt
deleted file mode 100644
index 583a78d..0000000
--- a/BinaryParser.egg-info/requires.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-pybind11
-pandas
-numpy
-typeguard
-plotly
-matplotlib
-seaborn
-netCDF4
-
-[test]
-pytest
diff --git a/BinaryParser.egg-info/top_level.txt b/BinaryParser.egg-info/top_level.txt
deleted file mode 100644
index 5dea438..0000000
--- a/BinaryParser.egg-info/top_level.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-BinaryParser
-chemstation
-hplc
-openlab
-parser_hplc
-parser_ms
-parser_xray
-xray
diff --git a/__init__.py b/__init__.py
deleted file mode 100644
index cbe70c7..0000000
--- a/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from hplc import read_chromatograms, plot_chromatograms
-from chemstation import read_chemstation_file
-from openlab import read_attr, read_lc, read_ms
-
-__all__ = ["read_chromatograms", "plot_chromatograms", "read_chemstation_file"]
diff --git a/bin/bin.R b/bin/bin.R
deleted file mode 100644
index 452fa27..0000000
--- a/bin/bin.R
+++ /dev/null
@@ -1,247 +0,0 @@
-# path <- "./bin/TestDaten/CV_new/EL_2Elec_4p79mg_Et4NBF4inACN_EDLC_Pl2_211024_05_CV_C11/data/table_01.jdx"
-# [10] "##VOLTAGE_START=0.0"
-# [11] "##SCAN_RATE=0.05"
-# [12] "##VOLTAGE_LIMIT_ONE=2.700000047683716"
-# [13] "##VOLTAGE_LIMIT_TWO=0.0"
-# [14] "##RESOLUTION=Auto"
-# [15] "##CYCLES=9"
-# [16] "##DATE=45586"
-# [17] "##TIME=.56551085648"
-# [18] "##VOLTAGE_LIMIT_END=0.0"
-# [19] "##SOFTWARE_VERSION=11.43"
-# [20] "##CONCENTRATION_SALT=1"
-# [21] "##FIRSTX=0.013841687701642513"
-# [22] "##LASTX=-0.0004615047946572304"
-# Voltage
-# [23] "##MINX=-0.0009872515220195055" --> Set in device
-# [24] "##MAXX=2.699223756790161" --> Set in device
-# Current
-# [25] "##MINY=-0.006186341957729539"
-# [26] "##MAXY=0.007231874746488927"
-# [27] "##NPOINTS=27030"
-# [28] "##FIRSTY=3.6861687898635866e-05"
-# [31] "##XYPOINTS=(XY..XY)"
-# [32] "0.013841687701642513, 3.6861687898635866e-05"
-# [33] "0.015812207013368607, 0.0002835869200211467"
-# [34] "0.018651776015758514, 0.0006201966994459773"
-# [35] "0.021376779302954674, 0.0009041020984360451"
-# [36] "0.02406417950987816, 0.001166420330971846"
-# [37] "0.026055805385112762, 0.0013446788548112476"
-# [38] "0.02804425358772278, 0.001510192135544381"
-library(ggplot2)
-library(reshape2)
-Rcpp::sourceCpp("./bin/bin.cpp")
-
-# NOTE: print raw buffer pasted
-pr <- function(v, sep) {
- v <- as.character(v)
- v <- paste(v, collapse = "")
- cat(v, sep)
-}
-
-vec_pad <- function(vec, size) {
- if (length(vec) < size) {
- stop("Vector is too small")
- }
- if (length(vec) / size == 0) {
- return(vec)
- } else {
- times <- length(vec) %/% size
- return(vec[1:(times * size)])
- }
-}
-
-bin <- R6::R6Class(
- "bin",
- public = list(
- path = NULL,
- raw = NULL,
- raw_subset = NULL,
- endian = "big", # NOTE: most devices are constructed for windows
- signed = FALSE,
- initialize = function(path) {
- self$path <- path
- con <- file(path, "rb")
- size <- file.info(path)$size
- self$raw <- readBin(con, what = "raw", n = size)
- close(con)
- },
- to_char = function(elem) {
- rawToChar(elem)
- },
- to_int8 = function(elem) {
- res <- NULL
- if (self$signed) {
- res <- rawToChar(elem) |> as.integer()
- } else {
- res <- rawToChar(elem) |> CastToUint8()
- }
- return(res)
- },
- to_int16 = function(vec) {
- op <- NULL
- if (self$signed) {
- op <- CastToInt16
- } else {
- op <- CastToUint16
- }
- vec <- as.character(vec)
- vec <- vec_pad(vec, 2)
- vec <- split(vec, rep(seq_len(length(vec) / 2), each = 2))
- res <- lapply(vec, function(x) {
- op(x)
- })
- names <- sapply(vec, function(i) {
- paste(i, collapse = "")
- })
- names(res) <- names
- res
- },
- to_int32 = function(vec) {
- op <- NULL
- if (self$signed) {
- op <- CastToInt32
- } else {
- op <- CastToUint32
- }
- vec <- as.character(vec)
- vec <- vec_pad(vec, 4)
- vec <- split(vec, rep(seq_len(length(vec) / 4), each = 4))
- res <- lapply(vec, op)
- names <- sapply(vec, function(i) {
- paste(i, collapse = "")
- })
- names(res) <- names
- res
- },
- print_char = function(idx) {
- cat(" ")
- for (i in idx:(idx + 7)) {
- temp <- self$to_char(self$raw_subset[i])
- if (temp == "") temp <- "."
- cat(temp, "\t")
- }
- cat("\n")
- },
- print_uint8 = function(idx) {
- cat(" ")
- for (i in idx:(idx + 7)) {
- temp <- self$to_int8(self$raw_subset[i])
- if (temp == "") temp <- "."
- cat(temp, "\t")
- }
- cat("\n")
- },
- print_uint16 = function(idx) {
- cat(" ")
- temp <- self$to_int16(self$raw_subset[idx:(idx + 7)])
- for (i in seq_along(temp)) {
- pr(names(temp)[i], "\t")
- cat(temp[[i]], "\t")
- }
- cat("\n")
- },
- print_uint32 = function(idx) {
- cat(" ")
- temp <- self$to_int32(self$raw_subset[idx:(idx + 7)])
- for (i in seq_along(temp)) {
- pr(names(temp)[i], "\t\t")
- cat(temp[[i]], "\t")
- }
- cat("\n")
- },
- print = function(range) {
- self$raw_subset <- self$raw[range]
- for (i in seq_along(self$raw_subset)) {
- cat(self$raw_subset[i], "\t")
- if (i %% 8 == 0) {
- cat("\n")
- self$print_char(i - 7)
- self$print_uint8(i - 7)
- self$print_uint16(i - 7)
- self$print_uint32(i - 7)
- cat("\n")
- cat("Elems: ", i + 1, " - ", i + 8, "\n")
- }
- }
- },
- plot = function(range, type, op = NULL) {
- self$raw_subset <- self$raw[range]
- x <- NULL
- y <- NULL
- if (type == "int8") {
- y <- sapply(self$raw_subset, self$to_int8)
- x <- as.character(self$raw_subset)
- } else if (type == "int16") {
- res <- self$to_int16(self$raw_subset)
- x <- names(res)
- y <- unlist(res)
- attributes(y) <- NULL
- } else if (type == "int32") {
- res <- self$to_int32(self$raw_subset)
- x <- names(res)
- y <- unlist(res)
- attributes(y) <- NULL
- } else {
- stop("found unknown type")
- }
- if (!is.null(op)) {
- stopifnot("op has to be a function" = is.function(op))
- y <- op(y)
- }
- colors <- rep(c("black", "darkred"), length.out = length(y))
- bp <- barplot(y, names.arg = NULL, col = colors, border = "black")
- text(
- x = bp,
- y = par("usr")[3] - 1,
- labels = x, srt = 90,
- cex = 0.75,
- adj = 1, xpd = TRUE
- )
- }
- )
-)
-
-path <- "./bin/TestDaten/CV_new/EL_2Elec_4p79mg_Et4NBF4inACN_EDLC_Pl2_211024_05_CV_C11.mpr"
-b <- bin$new(path)
-b$print(1:100)
-
-b$signed <- FALSE
-range <- 7200:8001
-range <- 7247:(7247 + 400 * 4 - 1)
-b$plot(range, "int16")
-
-int_values <- b$to_int16(b$raw) |> unlist()
-summary(int_values)
-length(int_values)
-
-# Determine cycle length
-acf(int_values, lag.max = 10000, main = "Autocorrelation of Data")
-acf_values <- acf(int_values, lag.max = 10000, plot = FALSE)$acf[-1]
-lag_indices <- which(acf_values > 0.5)[1]
-print(lag_indices)
-
-# Plot raw data
-indices <- seq(1, length(int_values), by = 80) # 100
-data_start <- 0x1c40 # From hexdump
-values <- int_values[data_start:(data_start + 10000)]
-plot(values, type = "l", ylim = c(0, 10000))
-abline(v = seq(1, length(values), by = 47), col = "red", lty = 2)
-points(values, pch = 19)
-
-
-# Print bits
-bit_matrix <- uint16_to_bit_matrix(int_values)
-df <- melt(bit_matrix)
-colnames(bit_matrix) <- paste0("B_", 15:0)
-colnames(df) <- c("Idx", "Bit_Pos", "Value")
-df$Bit_Position <- as.numeric(gsub("B_", "", df$Bit_Pos))
-ggplot(df, aes(x = Bit_Pos, y = Idx, fill = Value)) +
- geom_tile(width = 1, height = 5.5) +
- scale_fill_gradient(low = "white", high = "black") +
- scale_x_reverse(breaks = 15:0) +
- labs(
- x = "Bit Position", y = "Number Index",
- title = "Bit Pattern of int Values"
- ) +
- theme_minimal()
diff --git a/bin/bin.cpp b/bin/bin.cpp
deleted file mode 100644
index 8dd16a9..0000000
--- a/bin/bin.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#include
-#include
-#include
-#include
-#include
-#include
-
-// [[Rcpp::export]]
-std::uint8_t CastToUint8(std::string &buffer) {
- const char *b = buffer.c_str();
- std::uint8_t res = *reinterpret_cast(b);
- return res;
-}
-
-// [[Rcpp::export]]
-int CastToInt8(std::string &buffer) {
- const char *b = buffer.c_str();
- std::int8_t res = *reinterpret_cast(b);
- return static_cast(res);
-}
-
-uint8_t hexToByte(const std::string &hex) {
- return static_cast(std::stoul(hex, nullptr, 16));
-}
-
-// [[Rcpp::export]]
-std::uint16_t CastToUint16(Rcpp::CharacterVector buffer) {
- if (buffer.size() != 2) {
- std::cout << "size = " << buffer.size() << std::endl;
- Rcpp::stop("Expected exactly 2 hex strings representing bytes.");
- }
- uint8_t byte1 = hexToByte(Rcpp::as(buffer[0]));
- uint8_t byte2 = hexToByte(Rcpp::as(buffer[1]));
- uint16_t res = (byte1 << 8) | byte2;
- return res;
-}
-
-// [[Rcpp::export]]
-int16_t CastToInt16(Rcpp::CharacterVector buffer) {
- if (buffer.size() != 2) {
- std::cout << "size = " << buffer.size() << std::endl;
- Rcpp::stop("Expected exactly 2 hex strings representing bytes.");
- }
- uint8_t byte1 = hexToByte(Rcpp::as(buffer[0]));
- uint8_t byte2 = hexToByte(Rcpp::as(buffer[1]));
- int16_t res = (static_cast(byte1) << 8) | byte2;
- return res;
-}
-
-// [[Rcpp::export]]
-std::uint32_t CastToUint32(Rcpp::CharacterVector buffer) {
- if (buffer.size() != 4) {
- Rcpp::stop("Expected exactly 4 hex strings representing bytes.");
- }
- uint8_t byte1 = hexToByte(Rcpp::as(buffer[0]));
- uint8_t byte2 = hexToByte(Rcpp::as(buffer[1]));
- uint8_t byte3 = hexToByte(Rcpp::as(buffer[2]));
- uint8_t byte4 = hexToByte(Rcpp::as(buffer[3]));
- uint32_t res = (byte1 << 24) | (byte2 << 16) | (byte3 << 8) | byte4;
- return res;
-}
-
-// [[Rcpp::export]]
-std::int32_t CastToInt32(Rcpp::CharacterVector buffer) {
- if (buffer.size() != 4) {
- Rcpp::stop("Expected exactly 4 hex strings representing bytes.");
- }
- uint8_t byte1 = hexToByte(Rcpp::as(buffer[0])); // unsigned
- uint8_t byte2 = hexToByte(Rcpp::as(buffer[1]));
- uint8_t byte3 = hexToByte(Rcpp::as(buffer[2]));
- uint8_t byte4 = hexToByte(Rcpp::as(buffer[3]));
- uint32_t ures = (static_cast(byte1) << 24) |
- (static_cast(byte2) << 16) |
- (static_cast(byte3) << 8) | byte4;
- return static_cast(ures);
-}
-
-// [[Rcpp::export]]
-Rcpp::IntegerMatrix uint16_to_bit_matrix(Rcpp::IntegerVector values) {
- int n = values.size();
- Rcpp::IntegerMatrix bit_matrix(n, 16);
- for (int i = 0; i < n; i++) {
- std::bitset<16> bits(values[i]);
- for (int j = 0; j < 16; j++) {
- bit_matrix(i, j) = bits[15 - j];
- }
- }
- return bit_matrix;
-}
diff --git a/binary_parser/__init__.py b/binary_parser/__init__.py
new file mode 100644
index 0000000..651b0f0
--- /dev/null
+++ b/binary_parser/__init__.py
@@ -0,0 +1,5 @@
+from binary_parser.hplc import read_chromatograms, plot_chromatograms
+from binary_parser.chemstation import read_chemstation_file
+from binary_parser.openlab import read_attr, read_lc, read_ms
+
+__all__ = ["read_chromatograms", "plot_chromatograms", "read_chemstation_file"]
diff --git a/binary_parser/chemstation/__init__.py b/binary_parser/chemstation/__init__.py
new file mode 100644
index 0000000..7928a24
--- /dev/null
+++ b/binary_parser/chemstation/__init__.py
@@ -0,0 +1,6 @@
+
+from binary_parser.chemstation.read_ms_file import read_chemstation_file
+
+__all__ = [
+ 'read_chemstation_file',
+]
diff --git a/build/lib.linux-x86_64-cpython-312/chemstation/read_ms_file.py b/binary_parser/chemstation/read_ms_file.py
similarity index 88%
rename from build/lib.linux-x86_64-cpython-312/chemstation/read_ms_file.py
rename to binary_parser/chemstation/read_ms_file.py
index f037b2e..84bda27 100644
--- a/build/lib.linux-x86_64-cpython-312/chemstation/read_ms_file.py
+++ b/binary_parser/chemstation/read_ms_file.py
@@ -1,7 +1,5 @@
-import parser_ms as pm
+import binary_parser.helper.parser_ms as pm
import pandas as pd
-import plotly.graph_objs as go
-import plotly.express as px
from typeguard import typechecked
from typing import List
@@ -32,5 +30,4 @@ def merge_cycles_into_df(cycles: List[dict]) -> pd.DataFrame:
@typechecked
def read_chemstation_file(file_path: str) -> pd.DataFrame:
cycles = pm.read_cycles(file_path)
- cycle_dfs = convert_cycles_to_dfs(cycles)
return merge_cycles_into_df(cycles)
diff --git a/bin/bin.py b/binary_parser/helper/__init__.py
similarity index 100%
rename from bin/bin.py
rename to binary_parser/helper/__init__.py
diff --git a/binary_parser/helper/parser_hplc.py b/binary_parser/helper/parser_hplc.py
new file mode 100644
index 0000000..4fbcf69
--- /dev/null
+++ b/binary_parser/helper/parser_hplc.py
@@ -0,0 +1,179 @@
+import struct
+import numpy as np
+
+
+def _u16_be_to_host(v):
+ return struct.unpack(">H", struct.pack(">H", v))[0]
+
+
+def _u32_be_to_host(v):
+ return struct.unpack(">I", struct.pack(">I", v))[0]
+
+
+def _i16_be(stream):
+ data = stream.read(2)
+ if not data:
+ raise EOFError
+ return struct.unpack(">h", data)[0]
+
+
+def _i32_be(stream):
+ data = stream.read(4)
+ if not data:
+ raise EOFError
+ return struct.unpack(">i", data)[0]
+
+
+def _u32_be(stream):
+ data = stream.read(4)
+ if not data:
+ raise EOFError
+ return struct.unpack(">I", data)[0]
+
+
+def readInt(filepath, offset):
+ with open(filepath, "rb") as f:
+ f.seek(offset)
+ data = f.read(4)
+ return struct.unpack("d", data)[0] # inverted endian
+ return val
+
+
+def readUint8(filepath, offset):
+ with open(filepath, "rb") as f:
+ f.seek(offset)
+ out = f.read(40)
+ return [chr(b) for b in out]
+
+
+def readTime(filepath, offset):
+ out = np.zeros(2, dtype=float)
+ with open(filepath, "rb") as f:
+ f.seek(offset)
+ for i in range(2):
+ raw = _i32_be(f)
+ out[i] = raw / 60000.0
+ return out
+
+
+def DeltaCompression(filepath, offset, n=None):
+ with open(filepath, "rb") as f:
+ f.seek(offset)
+ res = []
+ prev = 0
+
+ while True:
+ try:
+ header = _i16_be(f)
+ except EOFError:
+ break
+
+ # C++: if (buffer1 << 12 == 0)
+ if (header << 12) == 0:
+ break
+
+ count = header & 4095
+
+ for _ in range(count):
+ try:
+ delta = _i16_be(f)
+ except EOFError:
+ break
+
+ if delta != -32768:
+ prev += delta
+ res.append(prev)
+ else:
+ prev = _i32_be(f)
+ res.append(prev)
+
+ return np.array(res, dtype=np.int32)
+
+
+class UVClass:
+ """
+ Python-port of UVClass in pybind11 module
+ """
+
+ def __init__(self, filepath):
+ self.filepath = filepath
+
+ # read nscans
+ nscans = _read_int32_be(filepath, 0x116)
+
+ self.time = np.zeros(nscans, dtype=float)
+ self.wavelengths = []
+ self.ndata = []
+
+ with open(filepath, "rb") as f:
+ offset = 0x1002
+ prev_buffer7 = 0
+
+ for scan in range(nscans):
+ f.seek(offset)
+
+ # buffer1
+ size = struct.unpack("i", data)[0]
diff --git a/binary_parser/helper/parser_ms.py b/binary_parser/helper/parser_ms.py
new file mode 100644
index 0000000..02e3b5e
--- /dev/null
+++ b/binary_parser/helper/parser_ms.py
@@ -0,0 +1,85 @@
+import struct
+import numpy as np
+
+
+def _read_file(path):
+ with open(path, "rb") as f:
+ return f.read()
+
+
+def _u16_be(buf, offset):
+ return struct.unpack(">H", buf[offset:offset + 2])[0]
+
+
+def _u32_be(buf, offset):
+ return struct.unpack(">I", buf[offset:offset + 4])[0]
+
+
+def _find_number_of_cycles(buf):
+ return _u32_be(buf, 0x116)
+
+
+def _find_data_start(buf):
+ offset_correction = _u16_be(buf, 0x10A)
+ return offset_correction * 2 - 2
+
+
+def _convert_mz_intensity(data_u16):
+ n = len(data_u16)
+ n -= n % 2
+ mz = np.zeros(n // 2, dtype=float)
+ intensity = np.zeros(n // 2, dtype=float)
+
+ for i in range(n):
+ if (i & 1) == 0:
+ # MZ
+ mz[i >> 1] = data_u16[i] / 20.0
+ else:
+ # Intensity encoding: head = bits 14-15, tail = bits 0-13
+ head = data_u16[i] >> 14
+ tail = data_u16[i] & 0x3FFF
+ intensity[i >> 1] = (8 ** head) * tail
+
+ return mz, intensity
+
+
+def _read_cycle(buf, start, cycle_size):
+ data_u16 = []
+ for i in range(cycle_size*2):
+ data_u16.append(_u16_be(buf, start + i * 2))
+ return _convert_mz_intensity(data_u16)
+
+
+def read_cycles(path):
+ buf = _read_file(path)
+ data_start = _find_data_start(buf)
+ num_cycles = _find_number_of_cycles(buf)
+
+ cycles = []
+ counter = data_start
+
+ for _ in range(num_cycles):
+ if counter >= len(buf):
+ raise ValueError("Error extracting data")
+
+ counter += 2 # skip?
+ time = _u32_be(buf, counter)
+ counter += 10
+
+ cycle_size = _u16_be(buf, counter)
+ counter += 6
+
+ mz, intensity = _read_cycle(buf, counter, cycle_size)
+
+ rt = time / 60000.0
+
+ counter += cycle_size * 4
+ counter += 10
+
+ cycles.append({
+ "mz": mz,
+ "intensity": intensity,
+ "retention_time": rt,
+ })
+
+ return cycles
diff --git a/binary_parser/helper/parser_xray.py b/binary_parser/helper/parser_xray.py
new file mode 100644
index 0000000..6c09ff1
--- /dev/null
+++ b/binary_parser/helper/parser_xray.py
@@ -0,0 +1,65 @@
+import numpy as np
+import struct
+
+
+def read_doubles(filepath, offset=0):
+ """
+ Reads the entire file (from offset) as doubles (little-endian assumed).
+ """
+ with open(filepath, "rb") as f:
+ f.seek(0, 2)
+ size_bytes = f.tell() - offset
+ f.seek(offset)
+ n = size_bytes // 8
+ data = np.fromfile(f, dtype="i", raw[i*4:i*4+4]) # big-endian -> int32
+ out[i] = v
+ return out
+
+
+def read_chars(filepath):
+ """
+ Print file content in a human-readable char + hex format.
+ Matching original behavior for debugging.
+ """
+ with open(filepath, "rb") as f:
+ buf = f.read()
+
+ width = 8
+ addr = 0
+
+ for i in range(0, len(buf), width):
+ chunk = buf[i:i+width]
+
+ # address line (hex)
+ print(" ".join(f"{addr+j:03x}" for j in range(len(chunk))))
+ addr += len(chunk)
+
+ # character view
+ print(" ".join(f"'{chr(c)}'" for c in chunk))
diff --git a/binary_parser/helper/utils.py b/binary_parser/helper/utils.py
new file mode 100644
index 0000000..bf352bc
--- /dev/null
+++ b/binary_parser/helper/utils.py
@@ -0,0 +1,4 @@
+from typing import Union, List
+import numpy as np
+
+NumList = Union[List[float], np.ndarray]
\ No newline at end of file
diff --git a/binary_parser/hplc/__init__.py b/binary_parser/hplc/__init__.py
new file mode 100644
index 0000000..4fcd0b8
--- /dev/null
+++ b/binary_parser/hplc/__init__.py
@@ -0,0 +1,8 @@
+from binary_parser.hplc.read_files import read_chromatograms, plot_chromatograms, read_uv, plot_uv
+
+__all__ = [
+ 'read_chromatograms',
+ 'plot_chromatograms',
+ 'read_uv',
+ 'plot_uv'
+]
\ No newline at end of file
diff --git a/hplc/read_files.py b/binary_parser/hplc/read_files.py
similarity index 89%
rename from hplc/read_files.py
rename to binary_parser/hplc/read_files.py
index ce15916..a2079f1 100644
--- a/hplc/read_files.py
+++ b/binary_parser/hplc/read_files.py
@@ -1,13 +1,12 @@
-import parser_hplc as ph
+import binary_parser.helper.parser_hplc as ph
import pandas as pd
-import numpy as np
import plotly.graph_objs as go
import plotly.express as px
import re
-import sys
from os import listdir
from os.path import isfile, join
from typeguard import typechecked
+from binary_parser.helper.utils import NumList
from typing import List
@@ -23,9 +22,9 @@ def check_identical_lists(lst: List[List[float]]) -> bool:
@typechecked
-def read_time(file_path: str, length: int) -> List[float]:
+def read_time(file_path: str, length: int) -> NumList:
offsetTime = int("0000011a", 16)
- time: List[float] = ph.readTime(file_path, offsetTime)
+ time:NumList = ph.readTime(file_path, offsetTime)
step_size: float = (time[1] - time[0]) / (length - 1)
res: List[float] = [time[0] + i * step_size for i in range(length)]
return res
@@ -42,7 +41,7 @@ def read_file_info(file_path: str) -> int:
@typechecked
-def scale_data(file_path: str, l: List[int]) -> List[float]:
+def scale_data(file_path: str, l: NumList) -> NumList:
intercept: float = ph.readDouble(file_path, 4724)
slope: float = ph.readDouble(file_path, 4732)
res: List[float] = [float(i) * slope + intercept for i in l]
@@ -58,8 +57,8 @@ def read_chromatograms(path: str) -> pd.DataFrame:
]
wavelengths: List[str] = ["Wavelength_" + str(read_file_info(i)) for i in files]
offset: int = int("00001800", 16)
- result: List[List[int]] = [ph.DeltaCompresion(i, offset, 12) for i in files]
- result_scaled: List[List[float]] = [
+ result: List[NumList] = [ph.DeltaCompression(i, offset, 12) for i in files]
+ result_scaled: List[NumList] = [
scale_data(files[i], result[i]) for i in range(0, len(result))
]
times: List[List[float]] = [read_time(i, len(result[0])) for i in files]
diff --git a/binary_parser/openlab/__init__.py b/binary_parser/openlab/__init__.py
new file mode 100644
index 0000000..85652de
--- /dev/null
+++ b/binary_parser/openlab/__init__.py
@@ -0,0 +1,3 @@
+from binary_parser.openlab.openlab import read_attr, read_lc, read_ms
+
+__all__ = ["read_attr", "read_lc", "read_ms"]
diff --git a/openlab/openlab.py b/binary_parser/openlab/openlab.py
similarity index 70%
rename from openlab/openlab.py
rename to binary_parser/openlab/openlab.py
index b4d27bb..e4a6da4 100755
--- a/openlab/openlab.py
+++ b/binary_parser/openlab/openlab.py
@@ -1,13 +1,18 @@
import os
+import re
+from typing import List
+
import netCDF4 as nc
-import pandas as pd
import numpy as np
-import re
+import pandas as pd
from typeguard import typechecked
-from typing import List
+
@typechecked
def get_files(path: str) -> List[str]:
+ """
+ Return list of .cdf files in the given directory, sorted naturally.
+ """
fs = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".cdf")]
assert fs, "No files found"
@@ -23,7 +28,8 @@ def natkey(p: str):
# Attributes
@typechecked
-def get_attr(path: str):
+def _get_attr(path: str):
+ """Read global NetCDF attributes from a file."""
with nc.Dataset(path, "r") as dataset:
attr = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
return attr
@@ -31,14 +37,23 @@ def get_attr(path: str):
@typechecked
def read_attr(path: str) -> pd.DataFrame:
+ """
+ Read all NetCDF global attributes across all .cdf files in the directory.
+ Returns a normalized DataFrame.
+ """
fs = get_files(path)
- attrs_lc = [pd.DataFrame([get_attr(fs[x])]) for x in range(len(fs))]
+ attrs_lc = [pd.DataFrame([_get_attr(fs[x])]) for x in range(len(fs))]
attrs_lc = pd.concat(attrs_lc, ignore_index=True)
return attrs_lc
+
+# ---------------------------------------------------------------------------
# LC Data
+# ---------------------------------------------------------------------------
+
@typechecked
def get_lc_data(path: str) -> pd.DataFrame:
+ """Read LC detector signals from NetCDF."""
with nc.Dataset(path, "r") as dataset:
detector_signals = dataset.variables["ordinate_values"][:]
global_atts = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
@@ -57,6 +72,7 @@ def get_lc_data(path: str) -> pd.DataFrame:
@typechecked
def process_detector_info(df_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
+ """Extract wavelength from LC detector metadata."""
for df in df_list:
detector_name = df.attrs.get("detector", "")
wl_match = (
@@ -71,6 +87,7 @@ def process_detector_info(df_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
@typechecked
def read_lc(path: str) -> pd.DataFrame:
+ """Read all LC files containing 'DAD' in filename and concatenate."""
fs = get_files(path)
# Filter fs --> Files which contain DAD within their name
fs = [f for f in fs if "DAD" in os.path.basename(f)]
@@ -80,16 +97,19 @@ def read_lc(path: str) -> pd.DataFrame:
return df
+# ---------------------------------------------------------------------------
# MS Data
+# ---------------------------------------------------------------------------
+
@typechecked
-def get_point_counts(path: str) -> np.ma.MaskedArray:
+def _get_point_counts(path: str) -> np.ma.MaskedArray:
with nc.Dataset(path, "r") as dataset:
res = dataset.variables["point_count"][:]
return res
@typechecked
-def get_ms_data(path: str) -> pd.DataFrame:
+def _get_ms_data(path: str) -> pd.DataFrame:
with nc.Dataset(path, "r") as dataset:
mz_values = dataset.variables["mass_values"][:]
intensities = dataset.variables["intensity_values"][:]
@@ -97,14 +117,14 @@ def get_ms_data(path: str) -> pd.DataFrame:
@typechecked
-def get_scan_time(path: str) -> np.ma.MaskedArray:
+def _get_scan_time(path: str) -> np.ma.MaskedArray:
with nc.Dataset(path, "r") as dataset:
time = dataset.variables["scan_acquisition_time"][:]
return time / 60
@typechecked
-def split_data(
+def _split_data(
data: pd.DataFrame, point_counts: np.ma.MaskedArray
) -> List[pd.DataFrame]:
end_indices = np.cumsum(point_counts)
@@ -114,7 +134,7 @@ def split_data(
@typechecked
-def normalise(data_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
+def _normalise(data_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
return [
df.assign(intensities=df["intensities"] * (100 / df["intensities"].max()))
for df in data_list
@@ -125,15 +145,15 @@ def normalise(data_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
def read_ms(path: str) -> List[pd.DataFrame]:
fs = get_files(path)
fs_ms = [f for f in fs if "spectra" in os.path.basename(f)]
- data_minus = get_ms_data(fs_ms[0])
- point_counts_minus = get_point_counts(fs_ms[0])
- time_minus = get_scan_time(fs_ms[0])
- df_minus = normalise(split_data(data_minus, point_counts_minus))
-
- data_plus = get_ms_data(fs_ms[1])
- point_counts_plus = get_point_counts(fs_ms[1])
- time_plus = get_scan_time(fs_ms[1])
- df_plus = normalise(split_data(data_plus, point_counts_plus))
+ data_minus = _get_ms_data(fs_ms[0])
+ point_counts_minus = _get_point_counts(fs_ms[0])
+ time_minus = _get_scan_time(fs_ms[0])
+ df_minus = _normalise(_split_data(data_minus, point_counts_minus))
+
+ data_plus = _get_ms_data(fs_ms[1])
+ point_counts_plus = _get_point_counts(fs_ms[1])
+ time_plus = _get_scan_time(fs_ms[1])
+ df_plus = _normalise(_split_data(data_plus, point_counts_plus))
df_minus = pd.concat([df.assign(time=t) for df, t in zip(df_minus, time_minus)])
df_plus = pd.concat([df.assign(time=t) for df, t in zip(df_plus, time_plus)])
diff --git a/binary_parser/xray/__init__.py b/binary_parser/xray/__init__.py
new file mode 100644
index 0000000..40c9115
--- /dev/null
+++ b/binary_parser/xray/__init__.py
@@ -0,0 +1,4 @@
+from binary_parser.xray.bruker_xray import read_raw
+
+__all__ = ["read_raw"]
+
diff --git a/build/lib.linux-x86_64-cpython-312/xray/bruker_xray.py b/binary_parser/xray/bruker_xray.py
similarity index 96%
rename from build/lib.linux-x86_64-cpython-312/xray/bruker_xray.py
rename to binary_parser/xray/bruker_xray.py
index 009540d..d164664 100644
--- a/build/lib.linux-x86_64-cpython-312/xray/bruker_xray.py
+++ b/binary_parser/xray/bruker_xray.py
@@ -1,7 +1,5 @@
-import parser_xray as px
+import binary_parser.helper.parser_xray as px
import pandas as pd
-from typeguard import typechecked
-from typing import List
search_for = {
"GONIOMETER_RADIUS": 217.5,
diff --git a/build/lib.linux-x86_64-cpython-312/BinaryParser/__init__.py b/build/lib.linux-x86_64-cpython-312/BinaryParser/__init__.py
deleted file mode 100644
index cbe70c7..0000000
--- a/build/lib.linux-x86_64-cpython-312/BinaryParser/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from hplc import read_chromatograms, plot_chromatograms
-from chemstation import read_chemstation_file
-from openlab import read_attr, read_lc, read_ms
-
-__all__ = ["read_chromatograms", "plot_chromatograms", "read_chemstation_file"]
diff --git a/build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/__init__.py b/build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/__init__.py
deleted file mode 100644
index 41e3eaf..0000000
--- a/build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__all__ = ["read_attr", "read_lc", "read_ms"]
diff --git a/build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/openlab.py b/build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/openlab.py
deleted file mode 100644
index aeedbce..0000000
--- a/build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/openlab.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import os
-import netCDF4 as nc
-import pandas as pd
-import numpy as np
-import re
-from typeguard import typechecked
-from typing import List
-
-
-@typechecked
-def get_files(path: str) -> List[str]:
- fs = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".cdf")]
- assert len(fs) > 0, "No files found"
- return fs
-
-
-# Attributes
-@typechecked
-def get_attr(path: str):
- with nc.Dataset(path, "r") as dataset:
- attr = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
- return attr
-
-
-@typechecked
-def read_attr(path: str) -> pd.DataFrame:
- fs = get_files(path)
- attrs_lc = [pd.DataFrame([get_attr(fs[x])]) for x in range(len(fs))]
- attrs_lc = pd.concat(attrs_lc, ignore_index=True)
- return attrs_lc
-
-
-# LC Data
-@typechecked
-def get_lc_data(path: str) -> pd.DataFrame:
- with nc.Dataset(path, "r") as dataset:
- detector_signals = dataset.variables["ordinate_values"][:]
- global_atts = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
- detector = global_atts.get("detector_name", "")
- run_time_length = dataset.variables["actual_run_time_length"][...]
-
- data = pd.DataFrame(
- {
- "RetentionTime": np.linspace(0, run_time_length, num=len(detector_signals)),
- "DetectorSignal": detector_signals,
- }
- )
- data.attrs["detector"] = detector
- return data
-
-
-@typechecked
-def process_detector_info(df_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
- for df in df_list:
- detector_name = df.attrs.get("detector", "")
- wl_match = (
- re.search(r"\d+", detector_name.split(",")[1])
- if "," in detector_name
- else None
- )
- wl = float(wl_match.group()) if wl_match else None
- df["wavelength"] = wl
- return df_list
-
-
-@typechecked
-def read_lc(path: str) -> pd.DataFrame:
- fs = get_files(path)
- # Filter fs --> Files which contain DAD within their name
- fs = [f for f in fs if "DAD" in os.path.basename(f)]
- df = [get_lc_data(fs[x]) for x in range(len(fs))]
- df = process_detector_info(df)
- df = pd.concat(df, ignore_index=True)
- return df
-
-
-# MS Data
-@typechecked
-def get_point_counts(path: str) -> List[int]:
- with nc.Dataset(path, "r") as dataset:
- return dataset.variables["point_count"][:]
-
-
-@typechecked
-def get_ms_data(path: str) -> pd.DataFrame:
- with nc.Dataset(path, "r") as dataset:
- mz_values = dataset.variables["mass_values"][:]
- intensities = dataset.variables["intensity_values"][:]
- return pd.DataFrame({"mz": mz_values, "intensities": intensities})
-
-
-@typechecked
-def get_scan_time(path: str) -> List[float]:
- with nc.Dataset(path, "r") as dataset:
- time = dataset.variables["scan_acquisition_time"][:]
- return time / 60
-
-
-@typechecked
-def split_data(data: pd.DataFrame, point_counts: List[int]) -> List[pd.DataFrame]:
- end_indices = np.cumsum(point_counts)
- start_indices = np.insert(end_indices[:-1], 0, 0)
- return [data.iloc[start:end] for start, end in zip(start_indices, end_indices)]
-
-
-@typechecked
-def normalise(data_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
- return [
- df.assign(intensities=df["intensities"] * (100 / df["intensities"].max()))
- for df in data_list
- ]
-
-
-@typechecked
-def read_ms(path: str) -> List[pd.DataFrame]:
- fs = get_files(path)
- fs_ms = [f for f in fs if "spectra" in os.path.basename(f)]
- data_minus = get_ms_data(fs_ms[0])
- point_counts_minus = get_point_counts(fs_ms[0])
- time_minus = get_scan_time(fs_ms[0])
- df_minus = normalise(split_data(data_minus, point_counts_minus))
-
- data_plus = get_ms_data(fs_ms[1])
- point_counts_plus = get_point_counts(fs_ms[1])
- time_plus = get_scan_time(fs_ms[1])
- df_plus = normalise(split_data(data_plus, point_counts_plus))
-
- df_minus = pd.concat([df.assign(time=t) for df, t in zip(df_minus, time_minus)])
- df_plus = pd.concat([df.assign(time=t) for df, t in zip(df_plus, time_plus)])
- return [df_minus, df_plus]
diff --git a/build/lib.linux-x86_64-cpython-312/BinaryParser/setup.py b/build/lib.linux-x86_64-cpython-312/BinaryParser/setup.py
deleted file mode 100644
index 63173cb..0000000
--- a/build/lib.linux-x86_64-cpython-312/BinaryParser/setup.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from pybind11.setup_helpers import Pybind11Extension, build_ext
-from setuptools import setup, find_packages
-
-__version__ = "0.0.1"
-
-ext_modules = [
- Pybind11Extension(
- "parser_hplc",
- ["src/parser_hplc.cpp"],
- define_macros=[("VERSION_INFO", __version__)],
- extra_compile_args=["-std=c++17"],
- ),
- Pybind11Extension(
- "parser_ms",
- ["src/parser_ms.cpp"],
- define_macros=[("VERSION_INFO", __version__)],
- extra_compile_args=["-std=c++17"],
- ),
- Pybind11Extension(
- "parser_xray",
- ["src/parser_xray.cpp"],
- define_macros=[("VERSION_INFO", __version__)],
- extra_compile_args=["-std=c++17"],
- ),
-]
-
-setup(
- name="BinaryParser",
- version=__version__,
- author="Konrad Krämer",
- author_email="konrad.kraemer@kit.edu",
- description="Parsing binary files",
- long_description="",
- ext_modules=ext_modules,
- extras_require={"test": "pytest"},
- cmdclass={"build_ext": build_ext},
- zip_safe=False,
- python_requires=">=3.7",
- # packages=find_packages(),
- packages=(["BinaryParser"] + ["openlab"] + find_packages()),
- # package_dir={"BinaryParser": "."},
- package_dir={
- "BinaryParser": ".",
- "BinaryParser.openlab": "./openlab",
- },
- setup_requires=["pybind11"],
- install_requires=[
- "pybind11",
- "pandas",
- "numpy",
- "typeguard",
- "plotly",
- "matplotlib",
- "seaborn",
- "netCDF4",
- ],
-)
diff --git a/build/lib.linux-x86_64-cpython-312/BinaryParser/test_file.py b/build/lib.linux-x86_64-cpython-312/BinaryParser/test_file.py
deleted file mode 100644
index 4a66672..0000000
--- a/build/lib.linux-x86_64-cpython-312/BinaryParser/test_file.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import BinaryParser as bp
-import pandas as pd
-import plotly.express as px
-
-file_path = "/home/konrad/Documents/BinaryParser/Chemstation/ChemStationData/LCMS_DatenAgilent_SVS/SVS_1025F1.D/MSD1.MS"
-df = bp.read_chemstation_file(file_path)
-df.to_csv("output.csv", index=False)
-tic_df = df.groupby("retention_time", as_index=False)["intensity"].sum()
-fig = px.line(tic_df, x="retention_time", y="intensity",
- title="Total Ion Chromatogram (TIC)",
- labels={"retention_time": "Retention Time (min)", "intensity": "Total Ion Intensity"})
-fig.show()
diff --git a/build/lib.linux-x86_64-cpython-312/chemstation/__init__.py b/build/lib.linux-x86_64-cpython-312/chemstation/__init__.py
deleted file mode 100644
index fbadbd5..0000000
--- a/build/lib.linux-x86_64-cpython-312/chemstation/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-
-from .read_ms_file import read_chemstation_file
-
-__all__ = [
- 'read_chemstation_file',
-]
diff --git a/build/lib.linux-x86_64-cpython-312/hplc/__init__.py b/build/lib.linux-x86_64-cpython-312/hplc/__init__.py
deleted file mode 100644
index c95f498..0000000
--- a/build/lib.linux-x86_64-cpython-312/hplc/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from .read_files import read_chromatograms, plot_chromatograms, read_uv, plot_uv
-
-__all__ = [
- 'read_chromatograms',
- 'plot_chromatograms',
- 'read_uv',
- 'plot_uv'
-]
\ No newline at end of file
diff --git a/build/lib.linux-x86_64-cpython-312/hplc/read_files.py b/build/lib.linux-x86_64-cpython-312/hplc/read_files.py
deleted file mode 100644
index ce15916..0000000
--- a/build/lib.linux-x86_64-cpython-312/hplc/read_files.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import parser_hplc as ph
-import pandas as pd
-import numpy as np
-import plotly.graph_objs as go
-import plotly.express as px
-import re
-import sys
-from os import listdir
-from os.path import isfile, join
-from typeguard import typechecked
-from typing import List
-
-
-@typechecked
-def check_identical_lists(lst: List[List[float]]) -> bool:
- if not lst:
- return False
- first_sublist: List[float] = lst[0]
- for sublist in lst[1:]:
- if sublist != first_sublist:
- return False
- return True
-
-
-@typechecked
-def read_time(file_path: str, length: int) -> List[float]:
- offsetTime = int("0000011a", 16)
- time: List[float] = ph.readTime(file_path, offsetTime)
- step_size: float = (time[1] - time[0]) / (length - 1)
- res: List[float] = [time[0] + i * step_size for i in range(length)]
- return res
-
-
-@typechecked
-def read_file_info(file_path: str) -> int:
- offsetFileInfo = int("00001080", 16)
- res = ph.readUint8(file_path, offsetFileInfo)
- res = [x if x != "\x00" else "" for x in res]
- res = "".join(res)
- matches = int(re.findall(r"Sig=(\d+),", res)[0])
- return matches
-
-
-@typechecked
-def scale_data(file_path: str, l: List[int]) -> List[float]:
- intercept: float = ph.readDouble(file_path, 4724)
- slope: float = ph.readDouble(file_path, 4732)
- res: List[float] = [float(i) * slope + intercept for i in l]
- return res
-
-
-@typechecked
-def read_chromatograms(path: str) -> pd.DataFrame:
- files: List[str] = [
- path + "/" + f
- for f in listdir(path)
- if isfile(join(path, f)) and f.endswith(".ch")
- ]
- wavelengths: List[str] = ["Wavelength_" + str(read_file_info(i)) for i in files]
- offset: int = int("00001800", 16)
- result: List[List[int]] = [ph.DeltaCompresion(i, offset, 12) for i in files]
- result_scaled: List[List[float]] = [
- scale_data(files[i], result[i]) for i in range(0, len(result))
- ]
- times: List[List[float]] = [read_time(i, len(result[0])) for i in files]
- if not check_identical_lists(times):
- raise ValueError("File Error")
- time: List[float] = times[0]
- df: pd.DataFrame = pd.DataFrame(result_scaled).transpose()
- df.columns = wavelengths
- df["time"] = time
- return df
-
-
-@typechecked
-def plot_chromatograms(path: str):
- df = read_chromatograms(path)
- time = df["time"]
- data = df.drop(columns=["time"])
- wavelengths = df.columns[:-1]
- df_melted = df.melt(id_vars="time", var_name="Wavelengths", value_name="Data")
- fig = px.line_3d(
- df_melted, x="time", y="Wavelengths", z="Data", color="Wavelengths"
- )
- fig.update_traces(marker=dict(size=5))
- fig.show()
-
-
-@typechecked
-def read_uv(path: str) -> pd.DataFrame:
- uv = ph.UVClass(path)
- time: pd.DataFrame = pd.DataFrame(uv.getTime())
- wavelengths: List[int] = uv.getWavelengths().astype("int").tolist()
- data: pd.DataFrame = pd.DataFrame(uv.getData())
- data.columns = ["Wavelength_" + str(i) for i in wavelengths]
- data["time"] = time
- df_melted = data.melt(id_vars="time", var_name="Wavelengths", value_name="Data")
- max_data = df_melted["Data"].max()
- df_melted["Normalized_Data"] = df_melted["Data"] / max_data
- df_unmelted = df_melted.pivot_table(
- index="time", columns="Wavelengths", values="Normalized_Data"
- ).reset_index()
- return df_unmelted
-
-
-@typechecked
-def plot_uv(path: str):
- df = read_uv(path)
- time = df["time"]
- data = df.drop(columns=["time"])
- wavelengths = df.columns[:-1]
- trace = go.Surface(x=wavelengths, y=time, z=data.values)
- fig = go.Figure(data=[trace])
- fig.show()
-
-
-# path = "/home/konrad/Documents/GitHub/chromatogramsR/X-Vials/X3346.D/dad1.uv"
-# plot_uv(path)
diff --git a/build/lib.linux-x86_64-cpython-312/openlab/__init__.py b/build/lib.linux-x86_64-cpython-312/openlab/__init__.py
deleted file mode 100644
index c1b4c44..0000000
--- a/build/lib.linux-x86_64-cpython-312/openlab/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .openlab import read_attr
-from .openlab import read_lc
-from .openlab import read_ms
-
-__all__ = ["read_attr", "read_lc", "read_ms"]
diff --git a/build/lib.linux-x86_64-cpython-312/openlab/openlab.py b/build/lib.linux-x86_64-cpython-312/openlab/openlab.py
deleted file mode 100644
index 709c82b..0000000
--- a/build/lib.linux-x86_64-cpython-312/openlab/openlab.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import os
-import netCDF4 as nc
-import pandas as pd
-import numpy as np
-import re
-from typeguard import typechecked
-from typing import List
-
-
-@typechecked
-def get_files(path: str) -> List[str]:
- fs = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".cdf")]
- assert len(fs) > 0, "No files found"
- return fs
-
-
-# Attributes
-@typechecked
-def get_attr(path: str):
- with nc.Dataset(path, "r") as dataset:
- attr = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
- return attr
-
-
-@typechecked
-def read_attr(path: str) -> pd.DataFrame:
- fs = get_files(path)
- attrs_lc = [pd.DataFrame([get_attr(fs[x])]) for x in range(len(fs))]
- attrs_lc = pd.concat(attrs_lc, ignore_index=True)
- return attrs_lc
-
-
-# LC Data
-@typechecked
-def get_lc_data(path: str) -> pd.DataFrame:
- with nc.Dataset(path, "r") as dataset:
- detector_signals = dataset.variables["ordinate_values"][:]
- global_atts = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
- detector = global_atts.get("detector_name", "")
- run_time_length = dataset.variables["actual_run_time_length"][...]
-
- data = pd.DataFrame(
- {
- "RetentionTime": np.linspace(0, run_time_length, num=len(detector_signals)),
- "DetectorSignal": detector_signals,
- }
- )
- data.attrs["detector"] = detector
- return data
-
-
-@typechecked
-def process_detector_info(df_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
- for df in df_list:
- detector_name = df.attrs.get("detector", "")
- wl_match = (
- re.search(r"\d+", detector_name.split(",")[1])
- if "," in detector_name
- else None
- )
- wl = float(wl_match.group()) if wl_match else None
- df["wavelength"] = wl
- return df_list
-
-
-@typechecked
-def read_lc(path: str) -> pd.DataFrame:
- fs = get_files(path)
- # Filter fs --> Files which contain DAD within their name
- fs = [f for f in fs if "DAD" in os.path.basename(f)]
- df = [get_lc_data(fs[x]) for x in range(len(fs))]
- df = process_detector_info(df)
- df = pd.concat(df, ignore_index=True)
- return df
-
-
-# MS Data
-@typechecked
-def get_point_counts(path: str) -> np.ma.MaskedArray:
- with nc.Dataset(path, "r") as dataset:
- res = dataset.variables["point_count"][:]
- return res
-
-
-@typechecked
-def get_ms_data(path: str) -> pd.DataFrame:
- with nc.Dataset(path, "r") as dataset:
- mz_values = dataset.variables["mass_values"][:]
- intensities = dataset.variables["intensity_values"][:]
- return pd.DataFrame({"mz": mz_values, "intensities": intensities})
-
-
-@typechecked
-def get_scan_time(path: str) -> np.ma.MaskedArray:
- with nc.Dataset(path, "r") as dataset:
- time = dataset.variables["scan_acquisition_time"][:]
- return time / 60
-
-
-@typechecked
-def split_data(
- data: pd.DataFrame, point_counts: np.ma.MaskedArray
-) -> List[pd.DataFrame]:
- end_indices = np.cumsum(point_counts)
- start_indices = np.insert(end_indices[:-1], 0, 0)
- res = [data.iloc[start:end] for start, end in zip(start_indices, end_indices)]
- return res
-
-
-@typechecked
-def normalise(data_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
- return [
- df.assign(intensities=df["intensities"] * (100 / df["intensities"].max()))
- for df in data_list
- ]
-
-
-@typechecked
-def read_ms(path: str) -> List[pd.DataFrame]:
- fs = get_files(path)
- fs_ms = [f for f in fs if "spectra" in os.path.basename(f)]
- data_minus = get_ms_data(fs_ms[0])
- point_counts_minus = get_point_counts(fs_ms[0])
- time_minus = get_scan_time(fs_ms[0])
- df_minus = normalise(split_data(data_minus, point_counts_minus))
-
- data_plus = get_ms_data(fs_ms[1])
- point_counts_plus = get_point_counts(fs_ms[1])
- time_plus = get_scan_time(fs_ms[1])
- df_plus = normalise(split_data(data_plus, point_counts_plus))
-
- df_minus = pd.concat([df.assign(time=t) for df, t in zip(df_minus, time_minus)])
- df_plus = pd.concat([df.assign(time=t) for df, t in zip(df_plus, time_plus)])
- return [df_minus, df_plus]
diff --git a/build/lib.linux-x86_64-cpython-312/parser_hplc.cpython-312-x86_64-linux-gnu.so b/build/lib.linux-x86_64-cpython-312/parser_hplc.cpython-312-x86_64-linux-gnu.so
deleted file mode 100755
index a9a9368..0000000
Binary files a/build/lib.linux-x86_64-cpython-312/parser_hplc.cpython-312-x86_64-linux-gnu.so and /dev/null differ
diff --git a/build/lib.linux-x86_64-cpython-312/parser_ms.cpython-312-x86_64-linux-gnu.so b/build/lib.linux-x86_64-cpython-312/parser_ms.cpython-312-x86_64-linux-gnu.so
deleted file mode 100755
index ff2ea23..0000000
Binary files a/build/lib.linux-x86_64-cpython-312/parser_ms.cpython-312-x86_64-linux-gnu.so and /dev/null differ
diff --git a/build/lib.linux-x86_64-cpython-312/parser_xray.cpython-312-x86_64-linux-gnu.so b/build/lib.linux-x86_64-cpython-312/parser_xray.cpython-312-x86_64-linux-gnu.so
deleted file mode 100755
index 90efcf5..0000000
Binary files a/build/lib.linux-x86_64-cpython-312/parser_xray.cpython-312-x86_64-linux-gnu.so and /dev/null differ
diff --git a/build/lib.linux-x86_64-cpython-312/xray/__init__.py b/build/lib.linux-x86_64-cpython-312/xray/__init__.py
deleted file mode 100644
index 1a21900..0000000
--- a/build/lib.linux-x86_64-cpython-312/xray/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .bruker_xray import read_raw
-
-__all__ = ["read_raw"]
-
diff --git a/build/temp.linux-x86_64-cpython-312/src/parser_hplc.o b/build/temp.linux-x86_64-cpython-312/src/parser_hplc.o
deleted file mode 100644
index 61ae8c8..0000000
Binary files a/build/temp.linux-x86_64-cpython-312/src/parser_hplc.o and /dev/null differ
diff --git a/build/temp.linux-x86_64-cpython-312/src/parser_ms.o b/build/temp.linux-x86_64-cpython-312/src/parser_ms.o
deleted file mode 100644
index a66ce62..0000000
Binary files a/build/temp.linux-x86_64-cpython-312/src/parser_ms.o and /dev/null differ
diff --git a/build/temp.linux-x86_64-cpython-312/src/parser_xray.o b/build/temp.linux-x86_64-cpython-312/src/parser_xray.o
deleted file mode 100644
index 9337a11..0000000
Binary files a/build/temp.linux-x86_64-cpython-312/src/parser_xray.o and /dev/null differ
diff --git a/chemstation/__init__.py b/chemstation/__init__.py
deleted file mode 100644
index fbadbd5..0000000
--- a/chemstation/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-
-from .read_ms_file import read_chemstation_file
-
-__all__ = [
- 'read_chemstation_file',
-]
diff --git a/chemstation/read_ms_file.py b/chemstation/read_ms_file.py
deleted file mode 100644
index f037b2e..0000000
--- a/chemstation/read_ms_file.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import parser_ms as pm
-import pandas as pd
-import plotly.graph_objs as go
-import plotly.express as px
-from typeguard import typechecked
-from typing import List
-
-
-@typechecked
-def convert_cycles_to_dfs(cycles: List[dict]) -> List[pd.DataFrame]:
- """Convert Chemstation LC-MS cycles into a list of Pandas DataFrames."""
- cycle_dfs = []
- for i, cycle in enumerate(cycles):
- df = pd.DataFrame({
- "mz": cycle["mz"],
- "intensity": cycle["intensity"],
- # Repeat for each row
- "retention_time": [cycle["retention_time"]] * len(cycle["mz"])
- })
- df["cycle_id"] = i
- cycle_dfs.append(df)
- return cycle_dfs
-
-
-@typechecked
-def merge_cycles_into_df(cycles: List[dict]) -> pd.DataFrame:
- """Convert all cycles into a single Pandas DataFrame with cycle_id."""
- cycle_dfs = convert_cycles_to_dfs(cycles)
- return pd.concat(cycle_dfs, ignore_index=True)
-
-
-@typechecked
-def read_chemstation_file(file_path: str) -> pd.DataFrame:
- cycles = pm.read_cycles(file_path)
- cycle_dfs = convert_cycles_to_dfs(cycles)
- return merge_cycles_into_df(cycles)
diff --git a/hplc/__init__.py b/hplc/__init__.py
deleted file mode 100644
index c95f498..0000000
--- a/hplc/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from .read_files import read_chromatograms, plot_chromatograms, read_uv, plot_uv
-
-__all__ = [
- 'read_chromatograms',
- 'plot_chromatograms',
- 'read_uv',
- 'plot_uv'
-]
\ No newline at end of file
diff --git a/openlab/__init__.py b/openlab/__init__.py
deleted file mode 100644
index c1b4c44..0000000
--- a/openlab/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .openlab import read_attr
-from .openlab import read_lc
-from .openlab import read_ms
-
-__all__ = ["read_attr", "read_lc", "read_ms"]
diff --git a/parser_hplc.cpython-312-x86_64-linux-gnu.so b/parser_hplc.cpython-312-x86_64-linux-gnu.so
deleted file mode 100755
index a9a9368..0000000
Binary files a/parser_hplc.cpython-312-x86_64-linux-gnu.so and /dev/null differ
diff --git a/parser_ms.cpython-312-x86_64-linux-gnu.so b/parser_ms.cpython-312-x86_64-linux-gnu.so
deleted file mode 100755
index ff2ea23..0000000
Binary files a/parser_ms.cpython-312-x86_64-linux-gnu.so and /dev/null differ
diff --git a/parser_xray.cpython-312-x86_64-linux-gnu.so b/parser_xray.cpython-312-x86_64-linux-gnu.so
deleted file mode 100755
index 90efcf5..0000000
Binary files a/parser_xray.cpython-312-x86_64-linux-gnu.so and /dev/null differ
diff --git a/pyproject.toml b/pyproject.toml
index 2b482a3..b423483 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,4 +1,41 @@
-
[build-system]
-requires = ["setuptools", "wheel", "pybind11"]
+requires = ["setuptools"]
build-backend = "setuptools.build_meta"
+
+[project]
+name = "BinaryParser"
+version = "0.1.2"
+authors = [{ name = "Konrad Krämer", email = "konrad.kraemer@kit.edu" }, { name = "Martin Starman", email = "martin.starman@kit.edu" }, { name = "Nicole Jung", email = "nicole.jung@kit.edu" }]
+requires-python = ">= 3.12"
+description = "Parsing binary files"
+license = "AGPL-3.0-or-later"
+license-files = ["LICENSE"]
+readme = "README.md"
+keywords = ["chemistry", "convert", "file", "format"]
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "Operating System :: OS Independent",
+]
+
+dependencies = [
+ "pandas==2.3.3",
+ "numpy==2.4.1",
+ "typeguard==4.4.4",
+ "plotly==6.5.2",
+ "matplotlib==3.10.8",
+ "seaborn==0.13.2",
+ "netCDF4==1.7.4",
+]
+
+[project.optional-dependencies]
+dev = [
+ "pytest",
+]
+
+[project.urls]
+homepage = "https://github.com/ComPlat/BinaryParser"
+repository = "https://github.com/ComPlat/BinaryParser"
+
+[tool.setuptools.packages.find]
+include = ["binary_parser*"]
+
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 63173cb..0000000
--- a/setup.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from pybind11.setup_helpers import Pybind11Extension, build_ext
-from setuptools import setup, find_packages
-
-__version__ = "0.0.1"
-
-ext_modules = [
- Pybind11Extension(
- "parser_hplc",
- ["src/parser_hplc.cpp"],
- define_macros=[("VERSION_INFO", __version__)],
- extra_compile_args=["-std=c++17"],
- ),
- Pybind11Extension(
- "parser_ms",
- ["src/parser_ms.cpp"],
- define_macros=[("VERSION_INFO", __version__)],
- extra_compile_args=["-std=c++17"],
- ),
- Pybind11Extension(
- "parser_xray",
- ["src/parser_xray.cpp"],
- define_macros=[("VERSION_INFO", __version__)],
- extra_compile_args=["-std=c++17"],
- ),
-]
-
-setup(
- name="BinaryParser",
- version=__version__,
- author="Konrad Krämer",
- author_email="konrad.kraemer@kit.edu",
- description="Parsing binary files",
- long_description="",
- ext_modules=ext_modules,
- extras_require={"test": "pytest"},
- cmdclass={"build_ext": build_ext},
- zip_safe=False,
- python_requires=">=3.7",
- # packages=find_packages(),
- packages=(["BinaryParser"] + ["openlab"] + find_packages()),
- # package_dir={"BinaryParser": "."},
- package_dir={
- "BinaryParser": ".",
- "BinaryParser.openlab": "./openlab",
- },
- setup_requires=["pybind11"],
- install_requires=[
- "pybind11",
- "pandas",
- "numpy",
- "typeguard",
- "plotly",
- "matplotlib",
- "seaborn",
- "netCDF4",
- ],
-)
diff --git a/src/parser_hplc.cpp b/src/parser_hplc.cpp
deleted file mode 100644
index 517eaad..0000000
--- a/src/parser_hplc.cpp
+++ /dev/null
@@ -1,297 +0,0 @@
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-namespace py = pybind11;
-
-#define STRINGIFY(x) #x
-#define MACRO_STRINGIFY(x) STRINGIFY(x)
-
-template void endianSwap16(T &x) {
- x = (((x) >> 8) & 0xFF) | (((x) & 0xFF) << 8);
-}
-
-template void endianSwap32(T &x) {
- x = ((x >> 24) & 0xFF) | ((x << 8) & 0xFF0000) | ((x >> 8) & 0xFF00) |
- (x << 24);
-}
-
-void endianSwapU32(uint32_t &x) {
- x = ((x << 24) & 0xFF000000) | ((x << 8) & 0x00FF0000) |
- ((x >> 8) & 0x0000FF00) | ((x >> 24) & 0x000000FF);
-}
-
-uint16_t endianSwapU16(uint16_t value) {
- return ((value & 0xFF) << 8) | ((value >> 8) & 0xFF);
-}
-
-pybind11::list DeltaCompresion(std::string filepath, int offset, int n) {
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open())
- throw std::runtime_error("Error opening file");
- size_t sizeFile = file.tellg();
- std::vector res(sizeFile / 2);
- size_t currentPos = 0;
- file.seekg(currentPos + offset, std::ios::beg);
-
- int16_t buffer1 = 0;
- int32_t buffer2 = 0;
- int16_t buffer3 = 0;
- int32_t buffer4 = 0;
-
- int iter = 0;
- while (currentPos < sizeFile) {
- file.read(reinterpret_cast(&buffer1), sizeof(int16_t));
- endianSwap16(buffer1);
- buffer2 = buffer4;
-
- if (buffer1 << 12 == 0) {
- res.resize(iter);
- break;
- }
-
- for (int i = 0; i < (buffer1 & 4095); i++) {
- file.read(reinterpret_cast(&buffer3), sizeof(int16_t));
- endianSwap16(buffer3);
- if (buffer3 != -32768) {
- buffer2 = buffer2 + (int32_t)buffer3;
- res[iter] = buffer2;
- iter++;
- } else {
- file.read(reinterpret_cast(&buffer2), sizeof(int32_t));
- endianSwap32(buffer2);
- res[iter] = buffer2;
- iter++;
- }
- }
- buffer4 = buffer2;
- currentPos = file.tellg();
- file.seekg(currentPos, std::ios::beg);
- }
- return pybind11::cast(res);
-}
-
-double readDouble(std::string &filepath, int offset) {
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open())
- throw std::runtime_error("Error opening file");
- size_t currentPos = 0;
- file.seekg(currentPos + offset, std::ios::beg);
- double buffer = 0;
-
- for (int i = 0; i < 1; i++) {
- auto pos = file.tellg();
- file.read(reinterpret_cast(&buffer), sizeof(double));
- uint64_t *ptr = reinterpret_cast(&buffer);
- *ptr = __builtin_bswap64(*ptr);
- }
- file.close();
- return buffer;
-}
-
-double readInt(std::string &filepath, int offset) {
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open())
- throw std::runtime_error("Error opening file");
- size_t currentPos = 0;
- file.seekg(currentPos + offset, std::ios::beg);
- int32_t buffer = 0;
-
- for (int i = 0; i < 1; i++) {
- auto pos = file.tellg();
- file.read(reinterpret_cast(&buffer), sizeof(int32_t));
- }
- file.close();
- return buffer;
-}
-
-pybind11::list readUint8(std::string &filepath, int offset) {
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open())
- throw std::runtime_error("Error opening file");
- size_t currentPos = 0;
- file.seekg(currentPos + offset, std::ios::beg);
- uint8_t buffer = 0;
- std::vector res;
-
- for (int i = 0; i < 40; i++) {
- auto pos = file.tellg();
- file.read(reinterpret_cast(&buffer), sizeof(uint8_t));
- res.push_back(std::string(1, static_cast(buffer)));
- }
- file.close();
- return pybind11::cast(res);
-}
-
-pybind11::list readTime(std::string &filepath, int offset) {
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open())
- throw std::runtime_error("Error opening file");
- size_t currentPos = 0;
- file.seekg(currentPos + offset, std::ios::beg);
- int32_t buffer = 0;
- std::vector res(2);
- for (int i = 0; i < 2; i++) {
- file.read(reinterpret_cast(&buffer), sizeof(int32_t));
- endianSwap32(buffer);
- res[i] = static_cast(buffer) / 60000.0;
- }
- file.close();
- return pybind11::cast(res);
-}
-
-size_t updatePos(std::ifstream &file, int offset) {
- size_t currentPos = file.tellg();
- file.seekg(currentPos + offset, std::ios::beg);
- currentPos = file.tellg();
- return currentPos;
-}
-
-double readInt32(std::string &filepath, int offset) {
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open())
- throw std::runtime_error("Error opening file");
- size_t currentPos = 0;
- file.seekg(currentPos + offset, std::ios::beg);
- int32_t buffer = 0;
-
- for (int i = 0; i < 1; i++) {
- auto pos = file.tellg();
- file.read(reinterpret_cast(&buffer), sizeof(int32_t));
- }
- file.close();
- endianSwap32(buffer);
- return buffer;
-}
-
-struct UVClass {
- UVClass(std::string filepath_) : filepath(filepath_) {
- int offset = 0x1002;
- int nscansOffset = 0x116;
- int nscans = readInt32(filepath, nscansOffset);
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open())
- std::runtime_error("Error opening file");
- size_t sizeFile = file.tellg();
- size_t currentPos = 0;
- uint16_t buffer1 = 0;
- uint32_t buffer2 = 0;
- uint16_t buffer3 = 0;
- uint16_t buffer4 = 0;
- uint16_t buffer5 = 0;
- int16_t buffer6 = 0;
- int32_t buffer7 = 0;
- time.resize(nscans);
- ndata.resize(nscans);
-
- for (int i = 0; i < nscans; i++) {
- file.seekg(currentPos + offset, std::ios::beg);
- file.read(reinterpret_cast(&buffer1), sizeof(uint16_t)); // 2
- offset += buffer1;
- file.read(reinterpret_cast(&buffer2), sizeof(uint32_t)); // 4
- time[i] = static_cast(buffer2) / 60000.0;
- file.read(reinterpret_cast(&buffer3), sizeof(uint16_t)); // 2
- file.read(reinterpret_cast(&buffer4), sizeof(uint16_t)); // 2
- file.read(reinterpret_cast(&buffer5), sizeof(uint16_t)); // 2
- for (int wv = buffer3; wv < buffer4; wv += buffer5) {
- double current_w = static_cast(wv) / 20.0;
- auto it = std::find(wavelengths.begin(), wavelengths.end(), current_w);
- if (it == wavelengths.end()) {
- wavelengths.push_back(current_w);
- }
- }
- auto max_wavelength_it =
- std::max_element(wavelengths.begin(), wavelengths.end());
- int max_index = std::distance(wavelengths.begin(), max_wavelength_it);
- std::vector wv_index_map;
- for (int val = max_index + 1; val < wavelengths.size(); ++val) {
- wv_index_map.push_back(val);
- }
- for (int val = 0; val <= max_index; ++val) {
- wv_index_map.push_back(val);
- }
- ndata[i].resize(wavelengths.size());
- for (int j = 0; j < wv_index_map.size(); j++) {
- file.read(reinterpret_cast(&buffer6), sizeof(int16_t)); // 2
- if (buffer6 == -32768) {
- file.read(reinterpret_cast(&buffer7), sizeof(int32_t)); // 4
- } else {
- buffer7 += buffer6;
- }
- ndata[i][j] = buffer7; // / 2000.0; // correct?
- }
- }
- file.close();
- }
-
- py::array_t getTime() const { return py::cast(time); }
-
- py::array_t getWavelengths() const { return py::cast(wavelengths); }
-
- py::array_t getData() const {
- std::size_t nRows = ndata.size();
- std::size_t nCols = ndata.empty() ? 0 : ndata[0].size();
- pybind11::array_t npArray({nRows, nCols});
- auto ptr = npArray.mutable_data();
- for (std::size_t i = 0; i < nRows; ++i) {
- for (std::size_t j = 0; j < nCols; ++j) {
- ptr[i * nCols + j] = ndata[i][j];
- }
- }
- return npArray;
- }
-
- std::string filepath;
- std::vector time;
- std::vector wavelengths;
- std::vector> ndata;
-};
-
-PYBIND11_MODULE(parser_hplc, m) {
- py::class_(m, "UVClass")
- .def(py::init())
- .def("getTime", &UVClass::getTime)
- .def("getWavelengths", &UVClass::getWavelengths)
- .def("getData", &UVClass::getData);
-
- m.doc() = R"pbdoc(
- Pybind11 example plugin
- -----------------------
-
- .. currentmodule:: parser_hplc
-
- .. autosummary::
- :toctree: _generate
-
- readInt
- readDouble
- DeltaCompresionCpp
- )pbdoc";
- m.def("DeltaCompresion", &DeltaCompresion, R"pbdoc(
- read content of ch file and conduct delta compression on data
- )pbdoc");
- m.def("readDouble", &readInt, R"pbdoc(
- Reads a double at a specific location of a ch file
- )pbdoc");
- m.def("readInt", &readInt, R"pbdoc(
- Reads an int32_t at a specific location of a ch file
- )pbdoc");
- m.def("readUint8", &readUint8, R"pbdoc(
- Reads an uint8_t at a specific location of a ch file
- )pbdoc");
- m.def("readTime", &readTime, R"pbdoc(
- Reads the time of a ch file
- )pbdoc");
-
-#ifdef VERSION_INFO
- m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
-#else
- m.attr("__version__") = "dev";
-#endif
-}
diff --git a/src/parser_ms.cpp b/src/parser_ms.cpp
deleted file mode 100644
index 48b29da..0000000
--- a/src/parser_ms.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-// TODO: find min mz and max mz in meta data
-// TODO: read meta data
-// NOTE: Reading data of MSD1.MS file from
-// Agilent ChemStation
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-uint16_t endianSwapU16(uint16_t value) {
- return ((value & 0xFF) << 8) | ((value >> 8) & 0xFF);
-}
-
-void endianSwapU32(uint32_t &x) {
- x = ((x << 24) & 0xFF000000) | ((x << 8) & 0x00FF0000) |
- ((x >> 8) & 0x0000FF00) | ((x >> 24) & 0x000000FF);
-}
-
-std::vector ReadFile(const std::string &file_path) {
- std::ifstream file(file_path, std::ios::binary | std::ios::ate);
- if (!file.is_open()) {
- throw std::runtime_error("Error opening file");
- }
- std::size_t size = file.tellg();
- if (size == 0) {
- throw std::runtime_error("File is empty");
- }
- std::vector buffer(size);
- file.seekg(0, std::ios::beg);
- file.read(buffer.data(), size);
- if (!file) {
- throw std::runtime_error("Error reading file");
- }
- file.close();
- return buffer;
-}
-
-std::uint16_t CastToUint16(std::vector &buffer, std::size_t offset) {
- std::uint16_t res = *reinterpret_cast(&buffer[offset]);
- return endianSwapU16(res);
-}
-
-std::uint32_t CastToUint32(std::vector &buffer, std::size_t offset) {
- std::uint32_t res = *reinterpret_cast(&buffer[offset]);
- endianSwapU32(res);
- return res;
-}
-
-std::size_t NumberOfCycles(std::vector &buffer) {
- int data_start = 0x116;
- return CastToUint32(buffer, data_start);
-}
-
-std::size_t FindDataStart(std::vector &buffer) {
- int data_start = 0x10A;
- int offset_correction = CastToUint16(buffer, data_start);
- int where = offset_correction * 2 - 2;
- return where;
-}
-
-std::vector ConvertMZIntensity(std::vector &data) {
- std::vector mz_intensity;
- mz_intensity.resize(data.size());
- for (std::size_t i = 0; i < data.size(); i++) {
- if (i % 2 != 0) { // Intensity
- uint16_t head_bits = data[i] >> 14; // Shift right by 14 bits
- uint16_t tail_bits =
- data[i] & 0x3FFF; // Extract tail: 0x3FFF = 0011111111111111 (14 bits)
- mz_intensity[i] = std::pow(8, head_bits) * tail_bits;
- } else { // MZ
- mz_intensity[i] = static_cast(data[i]) / 20;
- }
- }
- return mz_intensity;
-}
-
-struct Cycle {
- std::vector mz;
- std::vector intensity;
- double retention_time;
-
- // Convert Cycle to a Python dictionary
- std::map to_dict() const {
- return {{"mz", pybind11::cast(mz)},
- {"intensity", pybind11::cast(intensity)},
- {"retention_time", pybind11::cast(retention_time)}};
- }
-};
-
-void ReadCycleData(Cycle &cycle, std::vector &buffer,
- std::size_t data_start, std::size_t cycle_size) {
- std::vector data;
- data.resize(cycle_size);
- for (std::size_t i = 0; i < cycle_size; i++) {
- data[i] = CastToUint16(buffer, data_start);
- data_start += 2;
- }
- std::vector mz_intensity = ConvertMZIntensity(data);
- cycle.mz.resize(mz_intensity.size() / 2);
- cycle.intensity.resize(mz_intensity.size() / 2);
- for (std::size_t i = 0; i < mz_intensity.size(); i++) {
- if (i % 2 == 0) {
- cycle.mz[i / 2] = mz_intensity[i];
- } else {
- cycle.intensity[i / 2] = mz_intensity[i];
- }
- }
-}
-
-std::vector readCycles(const std::string &file_path) {
- std::vector buffer = ReadFile(file_path);
- std::size_t data_start = FindDataStart(buffer);
- std::size_t number_of_cycles = NumberOfCycles(buffer);
- std::vector cycles;
- cycles.resize(number_of_cycles);
- std::size_t counter = data_start;
- for (std::size_t i = 0; i < number_of_cycles; i++) {
- if (counter >= buffer.size()) {
- throw std::runtime_error("Error extracting data");
- }
- counter += 2;
- std::size_t time = CastToUint32(buffer, counter);
- counter += 10;
- std::size_t temp = counter;
- std::size_t cycle_size = CastToUint16(buffer, counter);
- counter += 6;
- ReadCycleData(cycles[i], buffer, counter, cycle_size * 2);
- cycles[i].retention_time = static_cast(time) / 60000;
- counter += cycle_size * 4;
- counter += 10;
- }
- return cycles;
-}
-
-namespace py = pybind11;
-
-std::vector>
-py_readCycles(const std::string &file_path) {
- std::vector cycles = readCycles(file_path);
- std::vector> result;
- for (const auto &cycle : cycles) {
- result.push_back(cycle.to_dict());
- }
- return result;
-}
-
-PYBIND11_MODULE(parser_ms, m) {
- m.doc() = "Chemstation MS data extraction module";
- m.def("read_cycles", &py_readCycles,
- "Extract cycles from an Chemstation MS file");
-}
diff --git a/src/parser_xray.cpp b/src/parser_xray.cpp
deleted file mode 100644
index 5206e27..0000000
--- a/src/parser_xray.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-namespace py = pybind11;
-
-#define STRINGIFY(x) #x
-#define MACRO_STRINGIFY(x) STRINGIFY(x)
-
-template
-void endianSwap16(T &x)
-{
- x = (((x) >> 8) & 0xFF) | (((x) & 0xFF) << 8);
-}
-
-template
-void endianSwap32(T &x)
-{
- x = ((x >> 24) & 0xFF) | ((x << 8) & 0xFF0000) | ((x >> 8) & 0xFF00) | (x << 24);
-}
-
-void endianSwapU32(uint32_t &x)
-{
- x = ((x << 24) & 0xFF000000) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | ((x >> 24) & 0x000000FF);
-}
-
-uint16_t endianSwapU16(uint16_t value)
-{
- return ((value & 0xFF) << 8) | ((value >> 8) & 0xFF);
-}
-
-std::vector readInt32(std::string &filepath, int offset)
-{
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open())
- throw std::runtime_error("Error opening file");
- size_t currentPos = 0;
- file.seekg(currentPos + offset, std::ios::beg);
- int32_t buffer = 0;
- int n = 1000;
- std::vector res(n);
- for (int i = 0; i < n; i++)
- {
- auto pos = file.tellg();
- buffer = 0;
- file.read(reinterpret_cast(&buffer), sizeof(int32_t));
- endianSwap32(buffer);
- res[i] = buffer;
- }
- file.close();
- return res;
-}
-
-pybind11::list readDoubles(std::string &filepath, int offset)
-{
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open()) throw std::runtime_error("Error opening file");
- size_t currentPos = 0;
- size_t fileSize = file.tellg();
- size_t size = fileSize / sizeof(double);
- file.seekg(currentPos + offset, std::ios::beg);
- double buffer = 0;
- std::vector res(size);
-
- for (int i = 0; i < size; i++)
- {
- auto pos = file.tellg();
- file.read(reinterpret_cast(&buffer), sizeof(double));
- res[i] = buffer;
- }
- file.close();
- return pybind11::cast(res);
-}
-
-pybind11::list readFloates(std::string &filepath, int offset)
-{
- std::ifstream file(filepath, std::ios::binary | std::ios::ate);
- if (!file.is_open()) throw std::runtime_error("Error opening file");
- size_t currentPos = 0;
- size_t fileSize = file.tellg();
- size_t size = fileSize / sizeof(double);
- file.seekg(currentPos + offset, std::ios::beg);
- float buffer = 0;
- std::vector res(size);
-
- for (int i = 0; i < size; i++)
- {
- auto pos = file.tellg();
- file.read(reinterpret_cast(&buffer), sizeof(float));
- res[i] = buffer;
- }
- file.close();
- return pybind11::cast(res);
-}
-
-
-void readChars(std::string &filepath) {
- std::ifstream file(filepath, std::ios::binary);
- file.seekg(0, std::ios::end);
- size_t fileSize = file.tellg();
- file.seekg(0, std::ios::beg);
- std::vector buffer(fileSize);
- file.read(buffer.data(), fileSize);
- file.close();
-
- int address = 0x00000000;
- size_t n = 8;
- size_t size = buffer.size();
- for(size_t i = 0; i < (size / n); i++) {
- for(size_t j = 0; j < n; j++) {
- std::cout << std::hex << std::setfill('0') << std::setw(3) << address << " ";
- address++;
- }
- std::cout << std::endl;
- for(size_t j = 0; j < n; j++) {
- std::cout << std::dec << "'" << buffer[i*n + j] << "'" << " ";
- address++;
- }
- std::cout << std::endl;
- }
-}
-
-PYBIND11_MODULE(parser_xray, m)
-{
-
- m.doc() = R"pbdoc(
- Pybind11 example plugin
- -----------------------
-
- .. currentmodule:: parser_xray
-
- .. autosummary::
- :toctree: _generate
-
- test
- )pbdoc";
- m.def("readChars", &readChars, R"pbdoc(
- Read content of file as chars
- )pbdoc");
- m.def("readDoubles", &readDoubles, R"pbdoc(
- Read content of file as doubles
- )pbdoc");
- m.def("readFloates", &readFloates, R"pbdoc(
- Read content of file as floates
- )pbdoc");
-
-#ifdef VERSION_INFO
- m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
-#else
- m.attr("__version__") = "dev";
-#endif
-}
diff --git a/tests/chemstation_test.py b/tests/chemstation_test.py
index 5bb72a4..bbe601e 100644
--- a/tests/chemstation_test.py
+++ b/tests/chemstation_test.py
@@ -1,4 +1,4 @@
-import BinaryParser as bp
+import binary_parser as bp
import pandas as pd
import numpy as np
@@ -57,7 +57,6 @@ def compare_spectras(df, spectra_true, time):
def test_svs1025f1():
file_path = "./tests/Chemstation/SVS_1025F1.D/MSD1.MS"
df = bp.read_chemstation_file(file_path)
- # Compute TIC
tic_df = df.groupby("retention_time", as_index=False)["intensity"].sum()
tic_true = pd.read_csv(
"./tests/Chemstation/TIC_SVS1025F1.CSV",
@@ -77,8 +76,6 @@ def test_svs1025f1():
compare_spectras(df, spectra_true, time)
-test_svs1025f1()
-
def test_scs776roh():
file_path = "./tests/Chemstation/SVS-776ROH.D/MSD1.MS"
@@ -100,6 +97,3 @@ def test_scs776roh():
delimiter=",", encoding="utf-16", header=None
)
compare_spectras(df, spectra_true, time)
-
-
-test_scs776roh()
diff --git a/tests/chemstation_visualisation.py b/tests/chemstation_visualisation.py
index 8980d5e..81947fc 100644
--- a/tests/chemstation_visualisation.py
+++ b/tests/chemstation_visualisation.py
@@ -1,6 +1,5 @@
import matplotlib.pyplot as plt
-import BinaryParser as bp
-import pandas as pd
+import binary_parser as bp
file_path = "./tests/Chemstation/SVS_1025F1.D/MSD1.MS"
df = bp.read_chemstation_file(file_path)
diff --git a/tests/hplc_test.py b/tests/hplc_test.py
index 0a55177..c1be20b 100644
--- a/tests/hplc_test.py
+++ b/tests/hplc_test.py
@@ -1,4 +1,4 @@
-import BinaryParser as bp
+import binary_parser as bp
def test_read_chromatograms():
diff --git a/tests/openlab_test.py b/tests/openlab_test.py
index 445897a..c331025 100755
--- a/tests/openlab_test.py
+++ b/tests/openlab_test.py
@@ -1,4 +1,4 @@
-import BinaryParser as bp
+import binary_parser.openlab as bp
path = "./tests/OpenLab/"
@@ -8,7 +8,6 @@ def test_read_attr():
assert attr.shape == (12, 49)
assert attr["detector_unit"][1] == "mAU"
-test_read_attr()
def test_read_ls():
@@ -17,7 +16,6 @@ def test_read_ls():
assert data.columns.tolist() == ["RetentionTime", "DetectorSignal", "wavelength"]
assert all(data["wavelength"].unique() == [210, 230, 254, 280, 366, 450, 550, 580])
-test_read_ls()
def test_read_ms():
@@ -26,4 +24,3 @@ def test_read_ms():
assert ms[0].shape == (1358778, 3)
assert ms[1].shape == (1324471, 3)
-test_read_ms()
diff --git a/xray/__init__.py b/xray/__init__.py
deleted file mode 100644
index 1a21900..0000000
--- a/xray/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .bruker_xray import read_raw
-
-__all__ = ["read_raw"]
-
diff --git a/xray/bruker_xray.py b/xray/bruker_xray.py
deleted file mode 100644
index 009540d..0000000
--- a/xray/bruker_xray.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import parser_xray as px
-import pandas as pd
-from typeguard import typechecked
-from typing import List
-
-search_for = {
- "GONIOMETER_RADIUS": 217.5,
- "FIXED_DIVSLIT": 0.6,
- "SAMPLESLIT": 0.0,
- "DETSLIT": 10.39,
- "ANTISLIT": 6.17,
- "START": 20.0,
- "THETA": 10.0,
- "THETA2": 20.0,
- "TIMESTARTED": 14,
- "TEMP_RATE": -1,
- "TEMP_DELAY": -1,
- "KV": 35,
- "MA": 45,
- "WL1": 1.540600,
- "WL2": 1.544390,
- "WL3": 1.392220,
-}
-
-res = px.readDoubles(
- "/home/konrad/Documents/GitHub/RProjects/chromatogramsR/Bruker/PD.raw", 0
-)
-df = pd.DataFrame(res)
-df.columns = ["data"]
-
-for key, value in search_for.items():
- result_df = df[df["data"].eq(value)]
- if not result_df.empty:
- print(f"Match found for {key} at index {result_df.index[0]}")
-
-print()
-print()
-print()
-
-res = px.readFloates(
- "/home/konrad/Documents/GitHub/RProjects/chromatogramsR/Bruker/PD.raw", 0
-)
-df = pd.DataFrame(res)
-df.columns = ["data"]
-
-for key, value in search_for.items():
- result_df = df[df["data"].eq(value)]
- if not result_df.empty:
- print(f"Match found for {key} at index {result_df.index[0]}")
-
-
-# data starts at 0x420 --> 1056 --> in float index 264 --> 263 in python
-# data is read as float until end of file
-print()
-print()
-print()
-
-
-path = "/home/konrad/Documents/GitHub/RProjects/chromatogramsR/Bruker/WeitereDaten/XRD/7_80_3_001651_Cu_SSZ13_05_7.raw"
-# _WL1=1.540600
-# _WL2=1.544390
-# _WL3=0.00000
-# _WLRATIO=0.500000
-# _START=7.000000
-# _THETA=3.500000
-# _2THETA=7.000000
-
-# xxd
-# 000003c8: 0000 5c00 0000 0200 ..\..... --> 968
-# 000003d0: 0000 3254 6865 7461 ..2Theta --> 976
-# 000003d8: 0000 0000 0000 0000 ........
-# 000003e0: 0000 0000 0000 0000 ........
-# 000003e8: 0000 0000 0000 0000 ........
-# 000003f0: 0000 0000 0000 0000 ........
-# 000003f8: 0000 0100 0000 0000 ........
-# 00000400: 0000 0000 1c40 0000 .....@..
-# 00000408: 0000 0000 0000 0000 ........
-# 00000410: 0000 0000 0000 0000 ........
-# 00000418: 0000 0000 0000 0000 ........
-# 00000420: 0000 3200 0000 5c00 ..2...\.
-
-
-res = px.readDoubles(path, 0)
-df = pd.DataFrame(res)
-df.columns = ["data"]
-print(df.iloc[100:130])
-df = df[(df.round() != 0.0) & (df < 1000.0) & (df > 0.0)].dropna()
-df = df.iloc[0:60]
-
-
-res = px.readFloates(path, 0)
-
-df = pd.DataFrame(res)
-print(df.iloc[240:250])
-df.columns = ["data"]
-df = df[(df.round() != 0.0) & (df < 1000.0) & (df > 0.0)].dropna()
-df = df.iloc[0:60]