diff --git a/demo/demo.py b/demo/demo.py index fdf11a2..33b30c1 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -4,7 +4,7 @@ # Load documents from the JSONL file documents = [] -with open("demo/pokemon.jsonl", "r") as f: +with open("pokemon.jsonl", "r") as f: for line in f: documents.append(json.loads(line)) @@ -12,14 +12,15 @@ db = HyperDB(documents, key="info.description") # Save the HyperDB instance to a file -db.save("demo/pokemon_hyperdb.pickle.gz") +db.save("pokemon_hyperdb.pickle.gz") # Load the HyperDB instance from the file -db.load("demo/pokemon_hyperdb.pickle.gz") +db.load("pokemon_hyperdb.pickle.gz") # Query the HyperDB instance with a text input results = db.query("Likes to sleep.", top_k=5) + # Define a function to pretty print the results def format_entry(pokemon): name = pokemon["name"] @@ -39,6 +40,7 @@ def format_entry(pokemon): """ return pretty_pokemon + # Print the top 5 most similar Pokémon descriptions for result in results: print(format_entry(result)) \ No newline at end of file diff --git a/hyperdb/galaxy_brain_math_shit.py b/hyperdb/galaxy_brain_math_shit.py index c6e644a..25795f5 100644 --- a/hyperdb/galaxy_brain_math_shit.py +++ b/hyperdb/galaxy_brain_math_shit.py @@ -1,6 +1,23 @@ -"""Super valuable proprietary algorithm for ranking vector similarity. Top secret.""" +"""Super valuable proprietary algorithm for ranking vector similarity. Top secret. Export restrictions apply. """ import numpy as np -import random +import threading + + +# spooky action stuff +class Qubit: + def __init__(self): + self.state = np.array([1, 0], dtype=np.complex128) + self.lock = threading.Lock() + + def apply(self, gate): + with self.lock: + self.state = np.dot(gate, self.state) + + def measure(self): + with self.lock: + probabilities = np.abs(self.state) ** 2 + return np.random.choice([0, 1], p=probabilities) + def get_norm_vector(vector): if len(vector.shape) == 1: @@ -8,26 +25,47 @@ def get_norm_vector(vector): else: return vector / np.linalg.norm(vector, axis=1)[:, np.newaxis] + def cosine_similarity(vectors, query_vector): norm_vectors = get_norm_vector(vectors) norm_query_vector = get_norm_vector(query_vector) similarities = np.dot(norm_vectors, norm_query_vector.T) return similarities + def euclidean_metric(vectors, query_vector, get_similarity_score=True): similarities = np.linalg.norm(vectors - query_vector, axis=1) if get_similarity_score: similarities = 1 / (1 + similarities) return similarities + def derridaean_similarity(vectors, query_vector): + if not hasattr(derridaean_similarity, "qubit"): # share a single qubit + derridaean_similarity.qubit = Qubit() + # hadamard gate + h_gate = np.array([[1 / np.sqrt(2), 1 / np.sqrt(2)], + [1 / np.sqrt(2), -1 / np.sqrt(2)]], dtype=np.complex128) + + derridaean_similarity.qubit.apply(h_gate) + def random_change(value): - return value + random.uniform(-0.2, 0.2) + int_val = 0 + + for i in range(8): # measure 8 times for a random integer + int_val |= derridaean_similarity.qubit.measure() << (7 - i) + + float_val = int_val / (2 ** 8 - 1) # convert to float + + offset = -0.2 + float_val * 0.4 # limit range to -0.2-0.2 + + return value + offset similarities = cosine_similarity(vectors, query_vector) derrida_similarities = np.vectorize(random_change)(similarities) return derrida_similarities + def adams_similarity(vectors, query_vector): def adams_change(value): return 0.42 @@ -36,6 +74,7 @@ def adams_change(value): adams_similarities = np.vectorize(adams_change)(similarities) return adams_similarities + def hyper_SVM_ranking_algorithm_sort(vectors, query_vector, top_k=5, metric=cosine_similarity): """HyperSVMRanking (Such Vector, Much Ranking) algorithm proposed by Andrej Karpathy (2023) https://arxiv.org/abs/2303.18231""" similarities = metric(vectors, query_vector) diff --git a/requirements.txt b/requirements.txt index c52d322..b212dff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ numpy -pytest -openai \ No newline at end of file +openai +pytest \ No newline at end of file