Skip to content

Commit d3e7c72

Browse files
committed
🗓 Jul 3, 2024 10:46:05 PM
🔥 to/from_base64 updated to align to cyberchef 🧪 tests added/updated 🤖 types added/updated
1 parent aae8022 commit d3e7c72

8 files changed

Lines changed: 240 additions & 94 deletions

File tree

.github/workflows/tests_multi_os.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,10 @@ jobs:
5959
pip install sphinx recommonmark pytest==8.1.1 pytest-cov==5.0.0 pyperclip
6060
6161
- name: Test with pytest
62+
env:
63+
COVERAGE_CORE: sysmon
6264
run: |
63-
COVERAGE_CORE=sysmon pytest -v --disable-pytest-warnings --cov-report=xml --cov=chepy --cov-config=.coveragerc tests/
65+
pytest -v --disable-pytest-warnings --cov-report=xml --cov=chepy --cov-config=.coveragerc tests/
6466
coverage report -m
6567
6668
- name: Test plugins osx

chepy/modules/dataformat.py

Lines changed: 46 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import sqlite3
1616
import collections
1717
from random import randint
18+
import regex as re
1819
from .internal.constants import Encoding
1920
from .internal.helpers import (
2021
detect_delimiter,
@@ -23,6 +24,8 @@
2324
UUEncoderDecoder,
2425
Base92,
2526
Base45,
27+
_Base64,
28+
expand_alpha_range,
2629
)
2730

2831
yaml = lazy_import.lazy_module("yaml")
@@ -494,7 +497,7 @@ def from_bytes(self) -> DataFormatT:
494497
return self
495498

496499
@ChepyDecorators.call_stack
497-
def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT:
500+
def to_base64(self, alphabet: str = "standard") -> DataFormatT:
498501
"""Encode as Base64
499502
500503
Base64 is a notation for encoding arbitrary byte data using a
@@ -503,8 +506,7 @@ def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT:
503506
into an ASCII Base64 string.
504507
505508
Args:
506-
custom (str, optional): Provide a custom charset to base64 with
507-
url_safe (bool, optional): Encode with url safe charset.
509+
alphabet (str, optional): Provide a custom charset to base64 with. Valid values are: filename_safe, itoa64, radix_64, rot13, standard, unix_crypt, url_safe, xml, xxencoding, z64
508510
509511
Returns:
510512
Chepy: The Chepy object.
@@ -515,27 +517,23 @@ def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT:
515517
>>> Chepy("Some data").to_base64(custom=custom).o
516518
b'IqxhNG/YMLFV'
517519
"""
518-
if url_safe:
519-
self.state = base64.urlsafe_b64encode(self._convert_to_bytes()).replace(
520-
b"=", b""
521-
)
522-
return self
523-
if custom is not None:
524-
x = base64.b64encode(self._convert_to_bytes())
525-
std_base64chars = (
526-
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
527-
)
528-
self.state = bytes(
529-
str(x)[2:-1].translate(str(x)[2:-1].maketrans(std_base64chars, custom)),
530-
"utf-8",
520+
data = self._convert_to_bytes()
521+
alphabet = alphabet.strip()
522+
523+
char_set = expand_alpha_range(
524+
_Base64.base_64_chars.get(alphabet, alphabet), join_by=""
525+
)
526+
if len(char_set) < 63 or len(char_set) > 66: # pragma: no cover
527+
raise ValueError(
528+
"Invalid base64 chars. Should be 63-66 chars. " + str(len(char_set))
531529
)
532-
else:
533-
self.state = base64.b64encode(self._convert_to_bytes())
530+
531+
self.state = _Base64.encode_base64(data, alphabet=char_set)
534532
return self
535533

536534
@ChepyDecorators.call_stack
537535
def from_base64(
538-
self, custom: str = None, url_safe: bool = False, remove_whitespace: bool = True
536+
self, alphabet: str = "standard", remove_non_alpha: bool = True
539537
) -> DataFormatT:
540538
"""Decode as Base64
541539
@@ -545,33 +543,46 @@ def from_base64(
545543
into an ASCII Base64 string.
546544
547545
Args:
548-
custom (str, optional): Provide a custom charset to base64 with
549-
url_safe (bool, optional): If true, decode url safe. Defaults to False
550-
remove_whitespace(bool, optional): If true, all whitespaces are removed
546+
alphabet (str, optional): Provide a custom charset to base64 with. Valid values are: filename_safe, itoa64, radix_64, rot13, standard, unix_crypt, url_safe, xml, xxencoding, z64
547+
remove_whitespace(bool, optional): If true, all whitespaces are removed (Defaults to True)
548+
remove_non_alpha(bool, optional): If true, all whitespaces are removed. (Defaults to True)
551549
552550
Returns:
553551
Chepy: The Chepy object.
554552
555553
Examples:
556554
Base64 decode using a custom string
557555
>>> c = Chepy("QqxhNG/mMKtYPqoz64FVR42=")
558-
>>> c.from_base64(custom="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
556+
>>> c.from_base64(alphabet="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
559557
>>> c.out
560558
b"some random? data"
561559
"""
562-
if remove_whitespace:
563-
data = self.remove_whitespace().o
564-
data = self._convert_to_str()
565-
if custom is not None:
566-
std_base64chars = (
567-
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
560+
alphabet = alphabet.strip()
561+
char_set = expand_alpha_range(
562+
_Base64.base_64_chars.get(alphabet, alphabet), join_by=""
563+
)
564+
if len(char_set) < 63 or len(char_set) > 65: # pragma: no cover
565+
raise ValueError(
566+
"Invalid base64 chars. Should be 63-65 chars. " + str(len(char_set))
568567
)
569-
data = data.translate(str.maketrans(custom, std_base64chars))
570-
data += "=="
571-
if url_safe:
572-
self.state = base64.urlsafe_b64decode(data)
573-
else:
574-
self.state = base64.b64decode(data)
568+
569+
data = self._convert_to_str()
570+
571+
if remove_non_alpha:
572+
data = re.sub("[^" + char_set + "]", "", data)
573+
574+
# if is_standard or alphabet == 'url_safe':
575+
# data += "=="
576+
padding_needed = len(data) % 4
577+
if padding_needed and alphabet != "url_safe":
578+
data += "=" * (4 - padding_needed)
579+
580+
# if is_standard:
581+
# self.state = base64.b64decode(data)
582+
# if alphabet == 'url_safe':
583+
# self.state = base64.urlsafe_b64decode(data)
584+
# else:
585+
self.state = _Base64.decode_base64(data, char_set)
575586
return self
576587

577588
@ChepyDecorators.call_stack

chepy/modules/dataformat.pyi

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ class DataFormat(ChepyCore):
2828
def to_int(self: DataFormatT) -> DataFormatT: ...
2929
def to_bytes(self: DataFormatT) -> DataFormatT: ...
3030
def from_bytes(self: DataFormatT) -> DataFormatT: ...
31-
def to_base64(self: DataFormatT, custom: str=...) -> DataFormatT: ...
32-
def from_base64(self: DataFormatT, custom: str=..., url_safe: bool=..., remove_whitespace: bool=True) -> DataFormatT: ...
31+
def to_base64(self: DataFormatT, alphabet: Literal[str, 'standard', 'url_safe', 'filename_safe', 'itoa64', 'xml', 'z64', 'radix_64', 'xxencoding', 'rot13', 'unix_crypt']='standard') -> DataFormatT: ...
32+
def from_base64(self: DataFormatT, alphabet: Literal[str, 'standard', 'url_safe', 'filename_safe', 'itoa64', 'xml', 'z64', 'radix_64', 'xxencoding', 'rot13', 'unix_crypt']='standard', remove_non_alpha: bool=True) -> DataFormatT: ...
3333
def decode_bytes(self: DataFormatT, errors: Literal['ignore', 'backslashreplace', 'replace']=...) -> DataFormatT: ...
3434
def to_hex(self: DataFormatT, delimiter: str=..., join_by: str=...) -> DataFormatT: ...
3535
def from_hex(self: DataFormatT, delimiter: Union[str, None]=None, join_by: str='', replace: Union[bytes, None]=b'%|0x') -> DataFormatT: ...
@@ -71,7 +71,7 @@ class DataFormat(ChepyCore):
7171
def swap_strings(self: DataFormatT, by:int) -> DataFormatT: ...
7272
def to_string(self: DataFormatT) -> DataFormatT: ...
7373
def stringify(self: DataFormatT, compact:bool=...) -> DataFormatT: ...
74-
def select(self: DataFormatT, start: int, end: int=None) -> DataFormatT: ...
74+
def select(self: DataFormatT, start: int, end: Union[None, int]=None) -> DataFormatT: ...
7575
def length(self: DataFormatT) -> DataFormatT: ...
7676
def to_leetcode(self: DataFormatT, replace_space: str=...) -> DataFormatT: ...
7777
def substitute(self: DataFormatT, x: str=..., y: str=...) -> DataFormatT: ...

chepy/modules/internal/helpers.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import List, Union
22
import binascii
3+
import regex as re
34

45

56
class Base45:
@@ -377,3 +378,158 @@ def rotate_left_carry(self):
377378
result[-1] |= carryBits
378379

379380
return b"".join([chr(x).encode() for x in result])
381+
382+
383+
class _Base64:
384+
base_64_chars = {
385+
"standard": "A-Za-z0-9+/=",
386+
"url_safe": "A-Za-z0-9-_",
387+
"filename_safe": "A-Za-z0-9+\\-=",
388+
"itoa64": "./0-9A-Za-z=",
389+
"xml": "A-Za-z0-9_.",
390+
# "y64": "A-Za-z0-9._-",
391+
"z64": "0-9a-zA-Z+/=",
392+
"radix_64": "0-9A-Za-z+/=",
393+
# "uuencoding": " -_",
394+
"xxencoding": "+\\-0-9A-Za-z",
395+
# "binHex": "!-,-0-689@A-NP-VX-Z[`a-fh-mp-r",
396+
"rot13": "N-ZA-Mn-za-m0-9+/=",
397+
"unix_crypt": "./0-9A-Za-z",
398+
# "atom128": "/128GhIoPQROSTeUbADfgHijKLM+n0pFWXY456xyzB7=39VaqrstJklmNuZvwcdEC",
399+
# "megan35": "3GHIJKLMNOPQRSTUb=cdefghijklmnopWXYZ/12+406789VaqrstuvwxyzABCDEF5",
400+
# "zong22": "ZKj9n+yf0wDVX1s/5YbdxSo=ILaUpPBCHg8uvNO4klm6iJGhQ7eFrWczAMEq3RTt2",
401+
# "hazz15": "HNO4klm6ij9n+J2hyf0gzA8uvwDEq3X1Q7ZKeFrWcVTts/MRGYbdxSo=ILaUpPBC5",
402+
}
403+
404+
@staticmethod
405+
def decode_base64(data, alphabet):
406+
output = []
407+
i = 0
408+
409+
# Calculate the necessary padding
410+
padding_required = (4 - len(data) % 4) % 4
411+
data += padding_required * "="
412+
413+
while i < len(data):
414+
enc1 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
415+
i += 1
416+
enc2 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
417+
i += 1
418+
enc3 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
419+
i += 1
420+
enc4 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
421+
i += 1
422+
423+
chr1 = (enc1 << 2) | (enc2 >> 4)
424+
chr2 = ((enc2 & 15) << 4) | (enc3 >> 2)
425+
chr3 = ((enc3 & 3) << 6) | enc4
426+
427+
if 0 <= chr1 < 256:
428+
output.append(chr1)
429+
if 0 <= chr2 < 256 and data[i - 2] != "=":
430+
output.append(chr2)
431+
if 0 <= chr3 < 256 and data[i - 1] != "=":
432+
output.append(chr3)
433+
434+
return bytes(output)
435+
436+
@staticmethod
437+
def encode_base64(data: bytes, alphabet: str):
438+
output = ""
439+
i = 0
440+
padding_char = (
441+
"=" if alphabet[-1] == "=" else None
442+
) # Check if '=' is in the alphabet, otherwise use None
443+
444+
while i < len(data):
445+
chr1 = data[i] if i < len(data) else 0
446+
i += 1
447+
chr2 = data[i] if i < len(data) else 0
448+
i += 1
449+
chr3 = data[i] if i < len(data) else 0
450+
i += 1
451+
452+
enc1 = chr1 >> 2
453+
enc2 = ((chr1 & 3) << 4) | (chr2 >> 4)
454+
enc3 = ((chr2 & 15) << 2) | (chr3 >> 6)
455+
enc4 = chr3 & 63
456+
457+
if i > len(data) + 1:
458+
enc3 = 64
459+
enc4 = 64
460+
elif i > len(data):
461+
enc4 = 64
462+
463+
output += alphabet[enc1]
464+
output += alphabet[enc2]
465+
output += (
466+
alphabet[enc3]
467+
if enc3 < 64
468+
else (padding_char if padding_char is not None else "")
469+
)
470+
output += (
471+
alphabet[enc4]
472+
if enc4 < 64
473+
else (padding_char if padding_char is not None else "")
474+
)
475+
476+
# Remove padding characters if they are not part of the alphabet
477+
if padding_char is None:
478+
output = output.rstrip(
479+
alphabet[-1]
480+
) # Strip the last character of the alphabet if it's not '='
481+
482+
return output
483+
484+
485+
def expand_alpha_range(alph_str: str, join_by: Union[str, None] = None):
486+
def expand_range(start, end):
487+
return [str(x) for x in range(int(start), int(end) + 1)]
488+
489+
def expand_char_range(start, end):
490+
return [chr(x) for x in range(ord(start), ord(end) + 1)]
491+
492+
hold = []
493+
i = 0
494+
length = len(alph_str)
495+
496+
while i < length:
497+
# Check for numeric ranges
498+
if (
499+
i < length - 2
500+
and alph_str[i].isdigit()
501+
and alph_str[i + 1] == "-"
502+
and alph_str[i + 2].isdigit()
503+
):
504+
start = ""
505+
while i < length and alph_str[i].isdigit():
506+
start += alph_str[i]
507+
i += 1
508+
i += 1 # Skip the '-'
509+
end = ""
510+
while i < length and alph_str[i].isdigit():
511+
end += alph_str[i]
512+
i += 1
513+
hold.extend(expand_range(start, end))
514+
elif (
515+
i < length - 2
516+
and alph_str[i].isalpha()
517+
and alph_str[i + 1] == "-"
518+
and alph_str[i + 2].isalpha()
519+
):
520+
start = alph_str[i]
521+
end = alph_str[i + 2]
522+
hold.extend(expand_char_range(start, end))
523+
i += 3
524+
elif (
525+
i < length - 2 and alph_str[i] == "\\" and alph_str[i + 1] == "-"
526+
): # pragma: no cover
527+
hold.append("-")
528+
i += 2
529+
else:
530+
hold.append(alph_str[i])
531+
i += 1
532+
533+
if join_by is not None:
534+
return join_by.join(hold)
535+
return hold

chepy/modules/utils.py

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import difflib
44
from collections import OrderedDict
55
from typing import TypeVar, Union, Any
6+
from .internal.helpers import expand_alpha_range as _ex_al_range
67

78
import chepy.modules.internal.colors as _int_colors
89

@@ -831,41 +832,5 @@ def expand_alpha_range(self, join_by: Union[str, None] = None):
831832
Chepy: The Chepy object.
832833
"""
833834
alph_str = self._convert_to_str()
834-
hold = []
835-
836-
def expand_range(start, end):
837-
return [str(x) for x in range(int(start), int(end) + 1)]
838-
839-
def expand_char_range(start, end):
840-
return [chr(x) for x in range(ord(start), ord(end) + 1)]
841-
842-
hold = []
843-
i = 0
844-
length = len(alph_str)
845-
846-
while i < length:
847-
# Match numerical ranges like 10-20
848-
num_match = re.match(r"(\d+)-(\d+)", alph_str[i:])
849-
if num_match:
850-
start, end = num_match.groups()
851-
hold.extend(expand_range(start, end))
852-
i += len(start) + len(end) + 1 # move past the number range
853-
elif i < length - 2 and alph_str[i + 1] == "-" and alph_str[i] != "\\":
854-
# Handle character ranges like a-z
855-
start = alph_str[i]
856-
end = alph_str[i + 2]
857-
hold.extend(expand_char_range(start, end))
858-
i += 2
859-
elif (
860-
i < length - 2 and alph_str[i] == "\\" and alph_str[i + 1] == "-"
861-
): # pragma: no cover
862-
hold.append("-")
863-
i += 1
864-
else:
865-
hold.append(alph_str[i])
866-
i += 1
867-
868-
if join_by is not None:
869-
hold = join_by.join(hold)
870-
self.state = hold
835+
self.state = _ex_al_range(alph_str=alph_str, join_by=join_by)
871836
return self

chepy/modules/utils.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ class Utils(ChepyCore):
4141
def drop_bytes(self: UtilsT, start: int, length: int) -> UtilsT: ...
4242
def without(self: UtilsT, *values: Any) -> UtilsT: ...
4343
def pick(self: UtilsT, *values: Any) -> UtilsT: ...
44-
def expand_alpha_range(self, join_by: Union[str, None]=None) -> UtilsT: ...
44+
def expand_alpha_range(self: UtilsT, join_by: Union[str, None]=None) -> UtilsT: ...

0 commit comments

Comments
 (0)