diff --git a/project/data_exploration/score_analysis.ipynb b/project/data_exploration/score_analysis.ipynb
new file mode 100644
index 0000000..3d8e4ba
--- /dev/null
+++ b/project/data_exploration/score_analysis.ipynb
@@ -0,0 +1,235 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 172,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import operator\n",
+ "from statistic_helper import *\n",
+ "from graph_helper import *\n",
+ "po.offline.init_notebook_mode()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 173,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "students = load_data_from_file(\"studentBehaviorInfo_all.json\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 174,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "lower_indegrees_outdegrees = [12667, 13300, 13574, 11695, 12318, 12134, 4336, 11700, 14208, 14710, 12669, 4561, 3940, 12605, 13703, 14293, 14712, 13506, 10301, 4220, 14709, 15055, 14707, 14708, 14292, 14807]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 178,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "low_rention_score = []\n",
+ "high_rention_score = []\n",
+ "for student in students:\n",
+ " videos = student['chosenVideo']\n",
+ " scores = student['listenScore']\n",
+ " for item in lower_indegrees_outdegrees:\n",
+ " if item in videos:\n",
+ " if videos[len(videos) - 1] == item :\n",
+ " gather_videos = []\n",
+ " for key, items in itertools.groupby(scores, operator.itemgetter('postId')):\n",
+ " gather_videos.append(list(items))\n",
+ "\n",
+ " for video in gather_videos:\n",
+ " videoID = video[0]['postId']\n",
+ " size = len(video)\n",
+ " sum = 0\n",
+ " times = 0\n",
+ " for i in range(size):\n",
+ " if int(video[i]['score']) != -1:\n",
+ " sum += int(video[i]['score'])\n",
+ " times += 1\n",
+ "\n",
+ " if times != 0:\n",
+ " average = sum/float(times)\n",
+ " low_rention_score.append(round(average, 0))\n",
+ "\n",
+ " else:\n",
+ " gather_videos = []\n",
+ " for key, items in itertools.groupby(scores, operator.itemgetter('postId')):\n",
+ " gather_videos.append(list(items))\n",
+ "\n",
+ " for video in gather_videos:\n",
+ " videoID = video[0]['postId']\n",
+ " size = len(video)\n",
+ " sum = 0\n",
+ " times = 0\n",
+ " for i in range(size):\n",
+ " if int(video[i]['score']) != -1:\n",
+ " sum += int(video[i]['score'])\n",
+ " times += 1\n",
+ "\n",
+ " if times != 0:\n",
+ " average = sum/float(times)\n",
+ " high_rention_score.append(round(average, 0))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 179,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "sorted_low_rention_score = sorted(low_rention_score, reverse=True)\n",
+ "sorted_high_rention_score = sorted(high_rention_score, reverse=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 180,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "generate_histograms(\"Distribution of score: low rention vs high rention\", sorted_low_rention_score, \"Low Rention\", sorted_high_rention_score, \"High Rention\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}