diff --git a/kliver/B/task1.py b/kliver/B/task1.py new file mode 100644 index 0000000..8d7cb0c --- /dev/null +++ b/kliver/B/task1.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python + +in_file = "allvectors.in" +out_file = "allvectors.out" + +in_fd = open(in_file, "r") +length = int(in_fd.readline().strip()) +in_fd.close() + +out_fd = open(out_file, "w") +for i in range(0, 2**length): + out_fd.write(format(i, 'b').zfill(length) + "\n") +out_fd.close() \ No newline at end of file diff --git a/kliver/B/task2.py b/kliver/B/task2.py new file mode 100644 index 0000000..a3104f1 --- /dev/null +++ b/kliver/B/task2.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + + +def fibbonachi(n): + if n < 3: + return 1 + return fibbonachi(n-1) + fibbonachi(n-2) + + +def generate_binary_vectors_no_neighbor_ones(length, prefix=[]): + if len(prefix) == length: + yield prefix + else: + for vector in generate_binary_vectors_no_neighbor_ones(length, prefix + ["0"]): + yield vector + if not prefix or prefix[-1] != "1": + for vector in generate_binary_vectors_no_neighbor_ones(length, prefix + ["1"]): + yield vector + +in_file = "vectors.in" +out_file = "vectors.out" + +with open(in_file, "r") as in_fd: + length = int(in_fd.readline().strip()) + +with open(out_file, "w") as out_fd: + out_fd.write(str(fibbonachi(length + 2)) + "\n") + for vector in generate_binary_vectors_no_neighbor_ones(length): + out_fd.write("".join(vector) + "\n") diff --git a/kliver/B/task3.py b/kliver/B/task3.py new file mode 100644 index 0000000..36db7ee --- /dev/null +++ b/kliver/B/task3.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + + +def permutations_generator(objects_list, prefix=[]): + if len(objects_list) == 1: + yield prefix + objects_list + else: + for i in range(0, len(objects_list)): + for permutation in permutations_generator(objects_list[:i] + objects_list[i+1:], prefix+[objects_list[i]]): + yield permutation + +in_file = "permutations.in" +out_file = "permutations.out" + +with open(in_file, "r") as in_fd: + length = int(in_fd.readline().strip()) + +out_fd = open(out_file, "w") +for permutation in permutations_generator([str(i) for i in range(1, length + 1)]): + out_fd.write(" ".join(permutation) + "\n") +out_fd.close() \ No newline at end of file diff --git a/kliver/B/task4.py b/kliver/B/task4.py new file mode 100644 index 0000000..5b9c801 --- /dev/null +++ b/kliver/B/task4.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + + +def combination_generator(size, objects_list, prefix=[]): + if len(prefix) == size: + yield prefix + else: + for i in range(0, len(objects_list)): + for combination in combination_generator(size, objects_list[i+1:], prefix=prefix + [objects_list[i]]): + yield combination + +in_file = "choose.in" +out_file = "choose.out" + +with open(in_file, "r") as in_fd: + length, size = list(map(lambda x: int(x), in_fd.readline().strip().split())) + +with open(out_file, "w") as out_fd: + for combination in combination_generator(size, [str(i) for i in range(1, length + 1)]): + #print(permutation) + out_fd.write(" ".join(combination) + "\n") diff --git a/kliver/B/task5.py b/kliver/B/task5.py new file mode 100644 index 0000000..c1e7e9b --- /dev/null +++ b/kliver/B/task5.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python + + +def subsets_generator(objects_list, prefix=[]): + yield prefix + for i in range(0, len(objects_list)): + for subset in subsets_generator(objects_list[i+1:], prefix=prefix + [objects_list[i]]): + yield subset + +in_file = "subsets.in" +out_file = "subsets.out" + +with open(in_file, "r") as in_fd: + length = int(in_fd.readline().strip()) + +with open(out_file, "w") as out_fd: + for subset in subsets_generator([str(i) for i in range(1, length + 1)]): + #print(permutation) + out_fd.write(" ".join(subset) + "\n") \ No newline at end of file diff --git a/kliver/B/task6.py b/kliver/B/task6.py new file mode 100644 index 0000000..ea84f6d --- /dev/null +++ b/kliver/B/task6.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + + +def get_prev_and_next_vector(vector): + length = len(vector) + vector_value = int(vector, 2) + max_value = 2**length - 1 + if not vector_value: + return "-", format(1, 'b').zfill(length) + prev_vector = format(vector_value - 1, 'b').zfill(length) + if vector_value == max_value: + return prev_vector, "-" + next_vector = format(vector_value + 1, 'b').zfill(length) + return prev_vector, next_vector + +in_file = "nextvector.in" +out_file = "nextvector.out" + +with open(in_file, "r") as in_fd: + input_vector = in_fd.readline().strip() + +prev_vector, next_vector = get_prev_and_next_vector(input_vector) + +out_fd = open(out_file, "w") +out_fd.write(prev_vector + "\n") +out_fd.write(next_vector + "\n") +out_fd.close() \ No newline at end of file diff --git a/kliver/B/task7.py b/kliver/B/task7.py new file mode 100644 index 0000000..ca56f99 --- /dev/null +++ b/kliver/B/task7.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + + +def get_previous_permutation(current_permutation, length): + for i in range(1, length): + if current_permutation[-i] < current_permutation[-(i+1)]: + break + else: + return [0 for i in range(0, length)] + i += 1 + prefix = current_permutation[:-i] + suffix = sorted(current_permutation[-i:], reverse=True) + for j in range(0, i): + if suffix[j] < current_permutation[-i]: + insert = suffix.pop(j) + break + return prefix + [insert] + suffix + + +def get_next_permutation(current_permutation, length): + for i in range(1, length): + if current_permutation[-i] > current_permutation[-(i+1)]: + break + else: + return [0 for i in range(0, length)] + i += 1 + prefix = current_permutation[:-i] + suffix = sorted(current_permutation[-i:]) + for j in range(0, i): + if suffix[j] > current_permutation[-i]: + insert = suffix.pop(j) + break + return prefix + [insert] + suffix + +in_file = "nextperm.in" +out_file = "nextperm.out" + +with open(in_file, "r") as in_fd: + length = int(in_fd.readline().strip()) + current_permutation = list(map(lambda x: int(x), in_fd.readline().strip().split())) + +previous_permutation = get_previous_permutation(current_permutation, length) +next_permutation = get_next_permutation(current_permutation, length) + +with open(out_file, "w") as out_fd: + out_fd.write(" ".join(list(map(lambda x: str(x), previous_permutation))) + "\n") + out_fd.write(" ".join(list(map(lambda x: str(x), next_permutation))) + "\n") \ No newline at end of file diff --git a/kliver/B/task8.py b/kliver/B/task8.py new file mode 100644 index 0000000..555ef92 --- /dev/null +++ b/kliver/B/task8.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + + +def get_next_combination(current_combination, length, size): + for i in range(1, size + 1): + if current_combination[-i] != length - i + 1: + break + else: + return [-1] + prefix = current_combination[:-i] + for j in range(1, i + 1): + prefix.append(current_combination[-i] + j) + return prefix + +in_file = "nextchoose.in" +out_file = "nextchoose.out" + +with open(in_file, "r") as in_fd: + length, size = list(map(lambda x: int(x), in_fd.readline().strip().split())) + current_combination = list(map(lambda x: int(x), in_fd.readline().strip().split())) + +next_combination = get_next_combination(current_combination, length, size) + +with open(out_file, "w") as out_fd: + out_fd.write(" ".join(list(map(lambda x: str(x), next_combination))) + "\n") \ No newline at end of file diff --git a/kliver/regression/regression.ipynb b/kliver/regression/regression.ipynb new file mode 100644 index 0000000..001067f --- /dev/null +++ b/kliver/regression/regression.ipynb @@ -0,0 +1,186 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:b231ce8a4d535a5f2e35c003bbd1ee3620c354b1e34ca2c93f816eff12af1168" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "code", + "collapsed": false, + "input": [ + "#!/usr/bin/env python\n", + "import numpy as np\n", + "import scipy as sp\n", + "import scipy.stats as stat\n", + "import matplotlib.pyplot as pl\n", + "\n", + "def generate_linear_dataset_with_deviations(size, min_x, max_x, alpha, beta, sigma, mu):\n", + " x = (max_x - min_x) * np.random.random_sample((size)) + min_x\n", + " y = alpha * x + beta\n", + " deviation = sigma*sp.randn(size) + mu\n", + " return x, y, y + deviation\n", + "\n", + "def construct_linear_regression(x_learn, y_dev_learn, x_test, y_dev_test):\n", + "\n", + " regression = sp.polyfit(x_learn, y_dev_learn, 1)\n", + "\n", + " y_exp_learn = sp.polyval(regression, x_learn)\n", + " y_exp_test = sp.polyval(regression, x_test)\n", + "\n", + " print(\"For train dataset:\\n\\ty = a*x + b\\n\\ta = %f\\n\\tb = %f\" % (regression[0], regression[1]))\n", + "\n", + " mse_learn = np.sqrt(np.mean((y_exp_learn - y_dev_learn) ** 2))\n", + " mse_test = np.sqrt(np.mean((y_exp_test - y_dev_test) ** 2))\n", + " mse_total = np.sqrt((((mse_learn**2) * learn_set_size)\n", + " + ((mse_test**2) * (dataset_size - learn_set_size)))\n", + " / dataset_size)\n", + " print(\"Train MSE = %f\\nTest MSE = %f\\nTotal MSE = %f\" % (mse_learn, mse_test,mse_total ))\n", + " return regression, mse_learn, mse_test, mse_total, y_exp_learn, y_exp_test\n", + "\n", + "\n", + "def draw_plots(x_learn, y_dev_learn, y_exp_learn, x_test, y_dev_test, y_exp_test, x, y):\n", + " learn_plot, = pl.plot(x_learn, y_dev_learn, 'b.')\n", + " test_plot, = pl.plot(x_test, y_dev_test, 'g.')\n", + " theoretical_line_plot, = pl.plot(x, y, 'r-')\n", + " empirical_line_plot, = pl.plot(x, np.hstack((y_exp_learn,y_exp_test)), 'c-')\n", + " pl.xlabel('X')\n", + " pl.ylabel('Y')\n", + " pl.legend([learn_plot, test_plot, theoretical_line_plot, empirical_line_plot],\n", + " [\"train data\", \"test data\", \"theoretical regression\", \"empirical regression\"], loc=\"upper left\")\n", + " #pl.savefig('regression.png', format='png')\n", + " pl.show()\n", + "\n", + "def draw_deviations(x, y_dev, y_emp, error):\n", + " y_subtract = np.abs(y_dev - y_emp)\n", + " in_interval = []\n", + " out_of_interval = []\n", + " for i in range(0, len(y_subtract)):\n", + " if y_subtract[i] < error:\n", + " in_interval.append([x[i], y_dev[i]])\n", + " else:\n", + " out_of_interval.append([x[i], y_dev[i]])\n", + " in_interval = np.array(in_interval)\n", + " out_of_interval = np.array(out_of_interval)\n", + "\n", + " in_interval_plot, = pl.plot(in_interval[:, 0], in_interval[:, 1], 'b.')\n", + " out_of_interval_plot, = pl.plot(out_of_interval[:, 0], out_of_interval[:, 1], 'g.')\n", + " regeression_line_plot, = pl.plot(x, y_emp, 'r-')\n", + " pl.xlabel('X')\n", + " pl.ylabel('Y')\n", + " pl.legend([in_interval_plot, out_of_interval_plot, regeression_line_plot],\n", + " [\"points within MSE\", \"point out of MSE\", \"regresion\"], loc=\"upper left\")\n", + " #pl.savefig('interval.png', format='png')\n", + " pl.show()" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 20 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "dataset_size = 1000\n", + "min_x = -111\n", + "max_x = 111\n", + "alpha = 4.8\n", + "beta = 9.5\n", + "sigma = 70\n", + "mu = 0\n", + "learn_set_size = int(dataset_size/2)\n", + "sig_level = 0.05\n", + "\n", + "print(\"\\nGenerating dataset...\")\n", + "x, y, y_dev = generate_linear_dataset_with_deviations(dataset_size, min_x, max_x, alpha, beta, sigma, mu)\n", + "\n", + "print(\"\\nConstructing regression...\")\n", + "regression, mse_learn, mse_test, mse_total, \\\n", + " y_exp_learn, y_exp_test = construct_linear_regression(x[:learn_set_size],\n", + " y_dev[:learn_set_size],\n", + " x[learn_set_size:],\n", + " y_dev[learn_set_size:])\n", + "\n", + "print(\"Calculating coefficients for total dataset...\")\n", + "exp_alpha, exp_beta, r_value, p_value, std_err = stat.linregress(x, y_dev)\n", + "t_student = r_value * np.sqrt(dataset_size-2) / (1 - r_value**2)\n", + "print(\"For total dataset:\\n\\ty = a*x + b\\n\\ta = %f\\n\\tb = %f\\n\\tr = %f\\n\\tR2 = %f\"\n", + " % (exp_alpha, exp_beta, r_value, r_value**2))\n", + "print(\"\\tStudent's t = %f\\n\\tp-value = %f\\n\\tsig_level = %f\" % (t_student, p_value, sig_level))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "Generating dataset...\n", + "\n", + "Constructing regression...\n", + "For train dataset:\n", + "\ty = a*x + b\n", + "\ta = 4.805950\n", + "\tb = 11.383505\n", + "Train MSE = 71.640602\n", + "Test MSE = 71.327319\n", + "Total MSE = 71.484132\n", + "Calculating coefficients for total dataset...\n", + "For total dataset:\n", + "\ty = a*x + b\n", + "\ta = 4.824075\n", + "\tb = 10.959370\n", + "\tr = 0.974042\n", + "\tR2 = 0.948757\n", + "\tStudent's t = 600.498318\n", + "\tp-value = 0.000000\n", + "\tsig_level = 0.050000\n" + ] + } + ], + "prompt_number": 21 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "draw_plots(x[:learn_set_size], y_dev[:learn_set_size], y_exp_learn,\n", + " x[learn_set_size:], y_dev[learn_set_size:], y_exp_test,\n", + " x, y)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 22 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "draw_deviations(x, y_dev,\n", + " np.hstack((y_exp_learn, y_exp_test)),\n", + " mse_total)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 23 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/kliver/regression/regression.py b/kliver/regression/regression.py new file mode 100755 index 0000000..255493c --- /dev/null +++ b/kliver/regression/regression.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python + +import argparse +import numpy as np +import scipy as sp +import scipy.stats as stat +import matplotlib.pyplot as pl + + +def parse_script_arg(): + parser = argparse.ArgumentParser() + parser.add_argument('-s', + action='store', + type=int, + dest='dataset_size', + default=1000, + help='Size of simulated dataset') + parser.add_argument('--min_x', + action='store', + type=float, + dest='min_x', + default=-100, + help='Minimal value of independent variable') + parser.add_argument('--max_x', + action='store', + type=float, + dest='max_x', + default=100, + help='Maximal value of independent variable') + parser.add_argument('-a', + action='store', + type=float, + dest='alpha', + default=4.5, + help='Alpha value for dataset. y = ax + b') + parser.add_argument('-b', + action='store', + type=float, + dest='beta', + default=-2.8, + help='Beta value for dataset. y = ax + b') + parser.add_argument('--sigma', + action='store', + type=float, + dest='sigma', + default=80, + help='Sigma value for noise') + parser.add_argument('-mu', + action='store', + type=float, + dest='mu', + default=0, + help='Mean value for noise') + return parser + + +def generate_linear_dataset_with_deviations(size, min_x, max_x, alpha, beta, sigma, mu): + x = (max_x - min_x) * np.random.random_sample((size)) + min_x + y = alpha * x + beta + deviation = sigma*sp.randn(size) + mu + return x, y, y + deviation + + +def save_data(x, y, y_dev, learn_set_size, x_prefix="x", y_prefix="y", y_dev_prefix="ydev"): + for data, data_prefix in (x, x_prefix), (y, y_prefix), (y_dev, y_dev_prefix): + with open("%s_learn.t" % data_prefix, "wb") as fd: + np.savetxt(fd, data[:learn_set_size]) + with open("%s_test.t" % data_prefix, "wb") as fd: + np.savetxt(fd, data[:learn_set_size]) + + +def construct_linear_regression(x_learn, y_dev_learn, x_test, y_dev_test): + + regression = sp.polyfit(x_learn, y_dev_learn, 1) + + y_exp_learn = sp.polyval(regression, x_learn) + y_exp_test = sp.polyval(regression, x_test) + + print("For train dataset:\n\ty = a*x + b\n\ta = %f\n\tb = %f" % (regression[0], regression[1])) + + mse_learn = np.sqrt(np.mean((y_exp_learn - y_dev_learn) ** 2)) + mse_test = np.sqrt(np.mean((y_exp_test - y_dev_test) ** 2)) + mse_total = np.sqrt((((mse_learn**2) * learn_set_size) + + ((mse_test**2) * (dataset_size - learn_set_size))) + / dataset_size) + print("Train MSE = %f\nTest MSE = %f\nTotal MSE = %f" % (mse_learn, mse_test,mse_total )) + return regression, mse_learn, mse_test, mse_total, y_exp_learn, y_exp_test + + +def draw_plots(x_learn, y_dev_learn, y_exp_learn, x_test, y_dev_test, y_exp_test, x, y): + learn_plot, = pl.plot(x_learn, y_dev_learn, 'b.') + test_plot, = pl.plot(x_test, y_dev_test, 'g.') + theoretical_line_plot, = pl.plot(x, y, 'r-') + empirical_line_plot, = pl.plot(x, np.hstack((y_exp_learn,y_exp_test)), 'c-') + pl.xlabel('X') + pl.ylabel('Y') + pl.legend([learn_plot, test_plot, theoretical_line_plot, empirical_line_plot], + ["train data", "test data", "theoretical regression", "empirical regression"], loc="upper left") + pl.savefig('regression.png', format='png') + + +def draw_deviations(x, y_dev, y_emp, error): + y_subtract = np.abs(y_dev - y_emp) + in_interval = [] + out_of_interval = [] + for i in range(0, len(y_subtract)): + if y_subtract[i] < error: + in_interval.append([x[i], y_dev[i]]) + else: + out_of_interval.append([x[i], y_dev[i]]) + in_interval = np.array(in_interval) + out_of_interval = np.array(out_of_interval) + + in_interval_plot, = pl.plot(in_interval[:, 0], in_interval[:, 1], 'b.') + out_of_interval_plot, = pl.plot(out_of_interval[:, 0], out_of_interval[:, 1], 'g.') + regeression_line_plot, = pl.plot(x, y_emp, 'r-') + pl.xlabel('X') + pl.ylabel('Y') + pl.legend([in_interval_plot, out_of_interval_plot, regeression_line_plot], + ["points within MSE", "point out of MSE", "regresion"], loc="upper left") + pl.savefig('interval.png', format='png') + + +if __name__ == "__main__": + parser = parse_script_arg() + arguments = parser.parse_args() + + dataset_size = arguments.dataset_size + min_x = arguments.min_x + max_x = arguments.max_x + alpha = arguments.alpha + beta = arguments.beta + sigma = arguments.sigma + mu = arguments.mu + learn_set_size = int(dataset_size/2) + sig_level = 0.05 + + parameters_string = "dataset_size\t%i\nmin_x\t%f\nmax_x\t%f\nalpha\t%f\nbeta\t%f\nlearn_set_size\t%i\n" %\ + (dataset_size, min_x, max_x, alpha, beta, learn_set_size) + parameters_string += "noise_mu\t%f\nnoise_sigma\t%f\n" % (mu, sigma) + with open("parameters.t", "w") as fd: + fd.write(parameters_string) + print("\nInput parameters:\n%s" % parameters_string) + + print("\nGenerating dataset...") + x, y, y_dev = generate_linear_dataset_with_deviations(dataset_size, min_x, max_x, alpha, beta, sigma, mu) + save_data(x, y, y_dev, learn_set_size) + + print("\nConstructing regression...") + regression, mse_learn, mse_test, mse_total, \ + y_exp_learn, y_exp_test = construct_linear_regression(x[:learn_set_size], + y_dev[:learn_set_size], + x[learn_set_size:], + y_dev[learn_set_size:]) + + print("Calculating coefficients for total dataset...") + exp_alpha, exp_beta, r_value, p_value, std_err = stat.linregress(x, y_dev) + t_student = r_value * np.sqrt(dataset_size-2) / (1 - r_value**2) + print("For total dataset:\n\ty = a*x + b\n\ta = %f\n\tb = %f\n\tr = %f\n\tR2 = %f" + % (exp_alpha, exp_beta, r_value, r_value**2)) + print("\tStudent's t = %f\n\tp-value = %f\n\tsig_level = %f" % (t_student, p_value, sig_level)) + + draw_plots(x[:learn_set_size], y_dev[:learn_set_size], y_exp_learn, + x[learn_set_size:], y_dev[learn_set_size:], y_exp_test, + x, y) + + draw_deviations(x, y_dev, + np.hstack((y_exp_learn, y_exp_test)), + mse_total) \ No newline at end of file