diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..2574780 Binary files /dev/null and b/.DS_Store differ diff --git a/Regit/FHE_Regression.ipynb b/Regit/FHE_Regression.ipynb new file mode 100644 index 0000000..d008274 --- /dev/null +++ b/Regit/FHE_Regression.ipynb @@ -0,0 +1,833 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "FHE_Regression.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "g10qn096oe1b" + }, + "source": [ + "#Team: Regit\n", + "#Project: Privacy Preserving Credit Card Fraud Analysis\n", + "#Members: Jatan Loya, Tejas Bana, Siddhant Kulkarni" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "TImP-23w4sqx" + }, + "source": [ + "#Install TenSeal (Wrapper for SEAL)\n", + "!pip3 install tenseal" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zVC8AU9R4bQH" + }, + "source": [ + "#Importing dependencies: PyTorch, pandas, numpy, matplotlib\n", + "import torch\n", + "import tenseal as ts\n", + "import pandas as pd\n", + "import random\n", + "from time import time\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "q2ISbd-f5ZWH", + "outputId": "7825be0f-bacc-4690-9b38-ba563dc3bca7" + }, + "source": [ + "#Downloading dataset from GDrive\n", + "!gdown --id 18Z6ZrdDtZJU-YFTLJILLkwi_-4OneoJm" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + " 8%|▊ | 5.00M/66.0M [00:00<00:02, 29.4MB/s]" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "Downloading creditcardfraud.zip to ./creditcardfraud\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "100%|██████████| 66.0M/66.0M [00:00<00:00, 71.1MB/s]\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vm4-5zWW519w", + "outputId": "60b377a4-cf65-45ce-b41a-e2b80c7e92ac" + }, + "source": [ + "#Reading the dataset\n", + "dataset = pd.read_csv(\"./creditcard.csv\")\n", + "print(\"Few Entries: \")\n", + "print(dataset.head())\n", + "print(\"Dataset Shape: \", dataset.shape)\n", + "print(\"Maximum Transaction Value: \", np.max(dataset.Amount))\n", + "print(\"Minimum Transaction Value: \", np.min(dataset.Amount))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Few Entries: \n", + " Time V1 V2 V3 ... V27 V28 Amount Class\n", + "0 0.0 -1.359807 -0.072781 2.536347 ... 0.133558 -0.021053 149.62 0\n", + "1 0.0 1.191857 0.266151 0.166480 ... -0.008983 0.014724 2.69 0\n", + "2 1.0 -1.358354 -1.340163 1.773209 ... -0.055353 -0.059752 378.66 0\n", + "3 1.0 -0.966272 -0.185226 1.792993 ... 0.062723 0.061458 123.50 0\n", + "4 2.0 -1.158233 0.877737 1.548718 ... 0.219422 0.215153 69.99 0\n", + "\n", + "[5 rows x 31 columns]\n", + "Dataset Shape: (284807, 31)\n", + "Maximum Transaction Value: 25691.16\n", + "Minimum Transaction Value: 0.0\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 265 + }, + "id": "X51IG5Me7bF8", + "outputId": "6654edab-897c-4afa-aa7f-4801d1b7cdbe" + }, + "source": [ + "#Plotting a scatterplot for the dataset\n", + "color = {1:'blue',0:'yellow'}\n", + "fraudlist = dataset[dataset.Class == 1]\n", + "notfraudlist = dataset[dataset.Class == 0]\n", + "fig,axes = plt.subplots(1,2)\n", + "axes[0].scatter(list(range(1,fraudlist.shape[0]+1)),fraudlist.Amount,color='blue')\n", + "axes[1].scatter(list(range(1,notfraudlist.shape[0]+1)),notfraudlist.Amount,color='yellow')\n", + "plt.show()\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9EE1mi8v6Ixz", + "outputId": "afa4cf33-e314-4333-903f-fce33fa53719" + }, + "source": [ + "#Returns the number of unique class values\n", + "values = np.array(df[\"Class\"].values)\n", + "np.unique(values, return_counts=True)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([0, 1]), array([284315, 492]))" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TQmcswww5N7j", + "outputId": "62607bff-0597-4302-f1fb-25b1e00a4600" + }, + "source": [ + "torch.random.manual_seed(73)\n", + "random.seed(73)\n", + "\n", + "\n", + "def split_train_test(x, y, test_ratio=0.3):\n", + " idxs = [i for i in range(len(x))]\n", + " random.shuffle(idxs)\n", + " # delimiter between test and train data\n", + " delim = int(len(x) * test_ratio)\n", + " test_idxs, train_idxs = idxs[:delim], idxs[delim:]\n", + " return x[train_idxs], y[train_idxs], x[test_idxs], y[test_idxs]\n", + "\n", + "\n", + "def credit_card_data():\n", + " data = pd.read_csv(\"./creditcard.csv\")\n", + " # drop rows with missing values\n", + " data = data.dropna()\n", + " # drop some features\n", + " data = data.drop(columns=[\"Time\"])\n", + " # balance data\n", + " grouped = data.groupby('Class')\n", + " data = grouped.apply(lambda x: x.sample(grouped.size().min(), random_state=73).reset_index(drop=True))\n", + " # extract labels\n", + " y = torch.tensor(data[\"Class\"].values).float().unsqueeze(1)\n", + " data = data.drop(\"Class\", 'columns')\n", + " # standardize data\n", + " data = (data - data.mean()) / data.std()\n", + " x = torch.tensor(data.values).float()\n", + " return split_train_test(x, y)\n", + "\n", + "\n", + "def random_data(m=1024, n=2):\n", + " # data separable by the line `y = x`\n", + " x_train = torch.randn(m, n)\n", + " x_test = torch.randn(m // 2, n)\n", + " y_train = (x_train[:, 0] >= x_train[:, 1]).float().unsqueeze(0).t()\n", + " y_test = (x_test[:, 0] >= x_test[:, 1]).float().unsqueeze(0).t()\n", + " return x_train, y_train, x_test, y_test\n", + "\n", + "x_train, y_train, x_test, y_test = credit_card_data()\n", + "\n", + "print(\"############# Data summary #############\")\n", + "print(f\"x_train has shape: {x_train.shape}\")\n", + "print(f\"y_train has shape: {y_train.shape}\")\n", + "print(f\"x_test has shape: {x_test.shape}\")\n", + "print(f\"y_test has shape: {y_test.shape}\")\n", + "print(\"#######################################\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "############# Data summary #############\n", + "x_train has shape: torch.Size([689, 29])\n", + "y_train has shape: torch.Size([689, 1])\n", + "x_test has shape: torch.Size([295, 29])\n", + "y_test has shape: torch.Size([295, 1])\n", + "#######################################\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YwlEsLRF9J8h" + }, + "source": [ + "#Defining the class of linear regression model\n", + "class LR(torch.nn.Module):\n", + "\n", + " def __init__(self, n_features):\n", + " super(LR, self).__init__()\n", + " self.lr = torch.nn.Linear(n_features, 1)\n", + " \n", + " def forward(self, x):\n", + " out = torch.sigmoid(self.lr(x))\n", + " return out" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "wQKWae7x9Ur2" + }, + "source": [ + "#We have used SGD optimiser and Binary Cross entropy as Loss\n", + "n_features = x_train.shape[1]\n", + "model = LR(n_features)\n", + "optim = torch.optim.SGD(model.parameters(), lr=1)\n", + "criterion = torch.nn.BCELoss()\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "v4LEUvqn9XJq", + "outputId": "3a708903-08e9-4efc-bf70-ad11cfb2d95c" + }, + "source": [ + "#Number of epochs for training and the training function\n", + "EPOCHS = 15\n", + "\n", + "def train(model, optim, criterion, x, y, epochs=EPOCHS):\n", + " for e in range(1, epochs + 1):\n", + " optim.zero_grad()\n", + " out = model(x)\n", + " loss = criterion(out, y)\n", + " loss.backward()\n", + " optim.step()\n", + " print(f\"Loss at epoch {e}: {loss.data}\")\n", + " return model\n", + "\n", + "model = train(model, optim, criterion, x_train, y_train)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Loss at epoch 1: 0.26793885231018066\n", + "Loss at epoch 2: 0.2589355409145355\n", + "Loss at epoch 3: 0.25133609771728516\n", + "Loss at epoch 4: 0.2448013871908188\n", + "Loss at epoch 5: 0.2390972077846527\n", + "Loss at epoch 6: 0.2340555638074875\n", + "Loss at epoch 7: 0.2295524626970291\n", + "Loss at epoch 8: 0.22549419105052948\n", + "Loss at epoch 9: 0.22180868685245514\n", + "Loss at epoch 10: 0.21843957901000977\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UxhjnEkS9X0R", + "outputId": "aa27441e-437f-4ad4-c7c6-27e59a599d77" + }, + "source": [ + "#Accuracy of model on plaintext test_set\n", + "def accuracy(model, x, y):\n", + " out = model(x)\n", + " correct = torch.abs(y - out) < 0.5\n", + " return correct.float().mean()\n", + "\n", + "plain_accuracy = accuracy(model, x_test, y_test)\n", + "print(f\"Accuracy on plain test_set: {plain_accuracy}\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Accuracy on plain test_set: 0.9152542352676392\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "CIBldnpZ9eU6" + }, + "source": [ + "#Defining class of encrypted Linear Regression Model\n", + "class EncryptedLR:\n", + " \n", + " def __init__(self, torch_lr):\n", + " self.weight = torch_lr.lr.weight.data.tolist()[0]\n", + " self.bias = torch_lr.lr.bias.data.tolist()\n", + " \n", + " def forward(self, enc_x):\n", + "\n", + " enc_out = enc_x.dot(self.weight) + self.bias\n", + " return enc_out\n", + " \n", + " def __call__(self, *args, **kwargs):\n", + " return self.forward(*args, **kwargs)\n", + " \n", + "\n", + "#Functions for encrypting and decrypting data\n", + " \n", + " def encrypt(self, context):\n", + " self.weight = ts.ckks_vector(context, self.weight)\n", + " self.bias = ts.ckks_vector(context, self.bias)\n", + " \n", + " def decrypt(self, context):\n", + " self.weight = self.weight.decrypt()\n", + " self.bias = self.bias.decrypt()\n", + " \n", + "\n", + "eelr = EncryptedLR(model)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WXHKbc5M9rSE" + }, + "source": [ + "#Setting the parameters for FHE is very important since it has direct impact on\n", + "#Speed and accuracy of the model\n", + "\n", + "poly_mod_degree = 4096\n", + "coeff_mod_bit_sizes = [40, 20, 40]\n", + "#Creating TenSEALContext\n", + "ctx_eval = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)\n", + "#Setting Global scale for ciphertext\n", + "ctx_eval.global_scale = 2 ** 20\n", + "#Generating Galois keys for dot product operations\n", + "ctx_eval.generate_galois_keys()\n", + "\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "t1WDGHFx9uU_", + "outputId": "3b3f778e-b714-4c09-ea09-a253f21ca510" + }, + "source": [ + "#Encrypting the Test dataset\n", + "t_start = time()\n", + "enc_x_test = [ts.ckks_vector(ctx_eval, x.tolist()) for x in x_test]\n", + "t_end = time()\n", + "print(f\"Encryption of the test-set took {int(t_end - t_start)} seconds\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Encryption of the test-set took 0 seconds\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DlzRLONo9wQ0", + "outputId": "188d917b-ec7e-4d2d-a728-f6de7b553048" + }, + "source": [ + "#Written function for encrypted evaluation of the model\n", + "def encrypted_evaluation(model, enc_x_test, y_test):\n", + " t_start = time()\n", + " \n", + " correct = 0\n", + " for enc_x, y in zip(enc_x_test, y_test):\n", + " # encrypted evaluation\n", + " enc_out = model(enc_x)\n", + " # plain comparaison\n", + " out = enc_out.decrypt()\n", + " out = torch.tensor(out)\n", + " out = torch.sigmoid(out)\n", + " if torch.abs(out - y) < 0.5:\n", + " correct += 1\n", + " \n", + " t_end = time()\n", + " print(f\"Evaluated test_set of {len(x_test)} entries in {int(t_end - t_start)} seconds\")\n", + " print(f\"Accuracy: {correct}/{len(x_test)} = {correct / len(x_test)}\")\n", + " return correct / len(x_test)\n", + " \n", + "\n", + "encrypted_accuracy = encrypted_evaluation(eelr, enc_x_test, y_test)\n", + "diff_accuracy = plain_accuracy - encrypted_accuracy\n", + "print(f\"Difference between plain and encrypted accuracies: {diff_accuracy}\")\n", + "if diff_accuracy < 0:\n", + " print(\"Oh! We got a better accuracy on the encrypted test-set! The noise was on our side...\")\n", + "\n", + "#Our accuracy is better on the encrypted model as compared to plaintext eval" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Evaluated test_set of 295 entries in 2 seconds\n", + "Accuracy: 273/295 = 0.9254237288135593\n", + "Difference between plain and encrypted accuracies: -0.010169506072998047\n", + "Oh! We got a better accuracy on the encrypted test-set! The noise was on our side...\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lXp2M8Nn90D_" + }, + "source": [ + "class EncryptedLR:\n", + " \n", + " def __init__(self, torch_lr):\n", + " self.weight = torch_lr.lr.weight.data.tolist()[0]\n", + " self.bias = torch_lr.lr.bias.data.tolist()\n", + " # we accumulate gradients and counts the number of iterations\n", + " self._delta_w = 0\n", + " self._delta_b = 0\n", + " self._count = 0\n", + " \n", + " def forward(self, enc_x):\n", + " enc_out = enc_x.dot(self.weight) + self.bias\n", + " enc_out = EncryptedLR.sigmoid(enc_out)\n", + " return enc_out\n", + " \n", + " def backward(self, enc_x, enc_out, enc_y):\n", + " out_minus_y = (enc_out - enc_y)\n", + " self._delta_w += enc_x * out_minus_y\n", + " self._delta_b += out_minus_y\n", + " self._count += 1\n", + " \n", + " def update_parameters(self):\n", + " if self._count == 0:\n", + " raise RuntimeError(\"You should at least run one forward iteration\")\n", + " \n", + " self.weight -= self._delta_w * (1 / 2*self._count) + self.weight * 0.05\n", + " self.bias -= self._delta_b * (1 / 2*self._count)\n", + " self._delta_w = 0\n", + " self._delta_b = 0\n", + " self._count = 0\n", + " \n", + " @staticmethod\n", + " def sigmoid(enc_x):\n", + " #Using approximation of sigmoid function\n", + " return enc_x.polyval([0.5, 0.197, 0, -0.004])\n", + " \n", + " def plain_accuracy(self, x_test, y_test):\n", + "\n", + " w = torch.tensor(self.weight)\n", + " b = torch.tensor(self.bias)\n", + " out = torch.sigmoid(x_test.matmul(w) + b).reshape(-1, 1)\n", + " correct = torch.abs(y_test - out) < 0.5\n", + " return correct.float().mean() \n", + " \n", + " def encrypt(self, context):\n", + " self.weight = ts.ckks_vector(context, self.weight)\n", + " self.bias = ts.ckks_vector(context, self.bias)\n", + " \n", + " def decrypt(self):\n", + " self.weight = self.weight.decrypt()\n", + " self.bias = self.bias.decrypt()\n", + " \n", + " def __call__(self, *args, **kwargs):\n", + " return self.forward(*args, **kwargs)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "cwfAqRol99ug" + }, + "source": [ + "#Again setting the parameters for FHE operations\n", + "poly_mod_degree = 8192\n", + "coeff_mod_bit_sizes = [40, 21, 21, 21, 21, 21, 21, 40]\n", + "# Creating TenSEALContext\n", + "ctx_training = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)\n", + "ctx_training.global_scale = 2 ** 21\n", + "ctx_training.generate_galois_keys()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "fr6GKbG6LNEg" + }, + "source": [ + "import tenseal as ts\n", + "\n", + "bits_scale = 40\n", + "\n", + "# Create TenSEAL context\n", + "context = ts.context(\n", + " ts.SCHEME_TYPE.CKKS,\n", + " poly_modulus_degree=16384+16384,\n", + " coeff_mod_bit_sizes=[40, bits_scale, bits_scale, bits_scale, bits_scale, bits_scale, bits_scale, 40]\n", + ")\n", + "\n", + "#Setting Global scale\n", + "context.global_scale = pow(2, bits_scale)\n", + "\n", + "#Galois keys for ciphertext rotations\n", + "context.generate_galois_keys()\n", + "context.generate_relin_keys()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BAwjtT9b9_eJ", + "outputId": "830ca517-609d-47f3-c493-31ca2c9408fe" + }, + "source": [ + "#Encrypting training dataset\n", + "t_start = time()\n", + "enc_x_train = [ts.ckks_vector(ctx_training, x.tolist()) for x in x_train]\n", + "enc_y_train = [ts.ckks_vector(ctx_training, y.tolist()) for y in y_train]\n", + "t_end = time()\n", + "print(f\"Encryption of the training_set took {int(t_end - t_start)} seconds\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Encryption of the training_set took 17 seconds\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 550 + }, + "id": "IRh0vHSN-Bb9", + "outputId": "fca8339a-e2e2-4892-db5b-a79cb0f97c9c" + }, + "source": [ + "#Plotting distribution of plaintext and encrypted data\n", + "normal_dist = lambda x, mean, var: np.exp(- np.square(x - mean) / (2 * var)) / np.sqrt(2 * np.pi * var)\n", + "\n", + "def plot_normal_dist(mean, var, rmin=-10, rmax=10):\n", + " x = np.arange(rmin, rmax, 0.01)\n", + " y = normal_dist(x, mean, var)\n", + " fig = plt.plot(x, y)\n", + " \n", + "# Plaintext distribution\n", + "lr = LR(n_features)\n", + "data = lr.lr(x_test)\n", + "mean, var = map(float, [data.mean(), data.std() ** 2])\n", + "plot_normal_dist(mean, var)\n", + "print(\"Distribution on plain data:\")\n", + "plt.show()\n", + "\n", + "# Encrypted distribution\n", + "def encrypted_out_distribution(eelr, enc_x_test):\n", + " w = eelr.weight\n", + " b = eelr.bias\n", + " data = []\n", + " for enc_x in enc_x_test:\n", + " enc_out = enc_x.dot(w) + b\n", + " data.append(enc_out.decrypt())\n", + " data = torch.tensor(data)\n", + " mean, var = map(float, [data.mean(), data.std() ** 2])\n", + " plot_normal_dist(mean, var)\n", + " print(\"Distribution on encrypted data:\")\n", + " plt.show()\n", + "\n", + "eelr = EncryptedLR(lr)\n", + "eelr.encrypt(ctx_training)\n", + "encrypted_out_distribution(eelr, enc_x_train)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Distribution on plain data:\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + }, + { + "output_type": "stream", + "text": [ + "Distribution on encrypted data:\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mKxoA2Js-HEY", + "outputId": "724d8db2-5241-4863-f84f-147a4cedd092" + }, + "source": [ + "#Training the model on encrypted data\n", + "#Training for fewer epochs since FHE is very compute intensive\n", + "eelr = EncryptedLR(LR(n_features))\n", + "accuracy = eelr.plain_accuracy(x_test, y_test)\n", + "print(f\"Accuracy at epoch #0 is {accuracy}\")\n", + "\n", + "times = []\n", + "for epoch in range(1):\n", + " eelr.encrypt(ctx_training)\n", + " \n", + " t_start = time()\n", + " for enc_x, enc_y in zip(enc_x_train, enc_y_train):\n", + " enc_out = eelr.forward(enc_x)\n", + " eelr.backward(enc_x, enc_out, enc_y)\n", + " eelr.update_parameters()\n", + " t_end = time()\n", + " times.append(t_end - t_start)\n", + " \n", + " eelr.decrypt()\n", + " accuracy = eelr.plain_accuracy(x_test, y_test)\n", + " print(f\"Accuracy at epoch #{epoch + 1} is {accuracy}\")\n", + "\n", + "\n", + "print(f\"\\nAverage time per epoch: {int(sum(times) / len(times))} seconds\")\n", + "print(f\"Final accuracy is {accuracy}\")\n", + "\n", + "diff_accuracy = plain_accuracy - accuracy\n", + "print(f\"Difference between plain and encrypted accuracies: {diff_accuracy}\")\n", + "if diff_accuracy < 0:\n", + " print(\"Oh! We got a better accuracy when training on encrypted data! The noise was on our side...\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Accuracy at epoch #0 is 0.7423728704452515\n", + "Accuracy at epoch #1 is 0.8711864352226257\n", + "\n", + "Average time per epoch: 148 seconds\n", + "Final accuracy is 0.8711864352226257\n", + "Difference between plain and encrypted accuracies: 0.04406780004501343\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "01TXwJaKCPP1" + }, + "source": [ + "#There is difference of 4% in accuracy between plaintext model and model trained\n", + "#on encrypted data which is very close given less time for parameters tweaking" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/Regit/Regit.md b/Regit/Regit.md new file mode 100644 index 0000000..d4dbf4b --- /dev/null +++ b/Regit/Regit.md @@ -0,0 +1,30 @@ +# Privacy Preserving Credit Card Fraud Detection +## Team Regit + +We aim to develop a privacy preserving credit card fraud detection system using Fully Homomorphic Encryption (FHE) and Differential Privacy (DP). + +## Proposed Features + +- Training and evaluating a regression model on encrypted data +- Neural network trained on differentially private data and evaluated on encrypted data. + + +## Tech + +We use a number of open source projects to work: + +- [TenSeal] - A python wrapper for SEAL +- [Opacus] - Framework for differential privacy +- [Pytorch] - Open source ML framework + +##Results + +We have been able to implement an encrypted Regression model which performs very well, just shaving off 4% acc vs the plaintext model. + +Colab Link: https://colab.research.google.com/drive/1DzqSIne3s0p5mXZha8wLoqVE7FchCdyN?usp=sharing +## Team + +- Jatan Loya +- Tejas Bana +- Siddhant Kulkarni + diff --git a/Regit/Regit.pptx b/Regit/Regit.pptx new file mode 100644 index 0000000..04d662b Binary files /dev/null and b/Regit/Regit.pptx differ