diff --git a/project/data_exploration/score_analysis.ipynb b/project/data_exploration/score_analysis.ipynb new file mode 100644 index 0000000..3d8e4ba --- /dev/null +++ b/project/data_exploration/score_analysis.ipynb @@ -0,0 +1,235 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 172, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import operator\n", + "from statistic_helper import *\n", + "from graph_helper import *\n", + "po.offline.init_notebook_mode()" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "students = load_data_from_file(\"studentBehaviorInfo_all.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "lower_indegrees_outdegrees = [12667, 13300, 13574, 11695, 12318, 12134, 4336, 11700, 14208, 14710, 12669, 4561, 3940, 12605, 13703, 14293, 14712, 13506, 10301, 4220, 14709, 15055, 14707, 14708, 14292, 14807]" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "low_rention_score = []\n", + "high_rention_score = []\n", + "for student in students:\n", + " videos = student['chosenVideo']\n", + " scores = student['listenScore']\n", + " for item in lower_indegrees_outdegrees:\n", + " if item in videos:\n", + " if videos[len(videos) - 1] == item :\n", + " gather_videos = []\n", + " for key, items in itertools.groupby(scores, operator.itemgetter('postId')):\n", + " gather_videos.append(list(items))\n", + "\n", + " for video in gather_videos:\n", + " videoID = video[0]['postId']\n", + " size = len(video)\n", + " sum = 0\n", + " times = 0\n", + " for i in range(size):\n", + " if int(video[i]['score']) != -1:\n", + " sum += int(video[i]['score'])\n", + " times += 1\n", + "\n", + " if times != 0:\n", + " average = sum/float(times)\n", + " low_rention_score.append(round(average, 0))\n", + "\n", + " else:\n", + " gather_videos = []\n", + " for key, items in itertools.groupby(scores, operator.itemgetter('postId')):\n", + " gather_videos.append(list(items))\n", + "\n", + " for video in gather_videos:\n", + " videoID = video[0]['postId']\n", + " size = len(video)\n", + " sum = 0\n", + " times = 0\n", + " for i in range(size):\n", + " if int(video[i]['score']) != -1:\n", + " sum += int(video[i]['score'])\n", + " times += 1\n", + "\n", + " if times != 0:\n", + " average = sum/float(times)\n", + " high_rention_score.append(round(average, 0))" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "sorted_low_rention_score = sorted(low_rention_score, reverse=True)\n", + "sorted_high_rention_score = sorted(high_rention_score, reverse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "generate_histograms(\"Distribution of score: low rention vs high rention\", sorted_low_rention_score, \"Low Rention\", sorted_high_rention_score, \"High Rention\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}