diff --git a/data_exploration/path_evaluation.ipynb b/data_exploration/path_evaluation.ipynb new file mode 100644 index 0000000..c3571ab --- /dev/null +++ b/data_exploration/path_evaluation.ipynb @@ -0,0 +1,588 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9.71\n", + "9.84\n", + "0.71\n", + "[135.42739443872296, 205.48780487804876, 15.492957746478876, 431]\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "import csv\n", + "behaviorInfo = file(\"../data/studentBehaviorInfo_all.json\");\n", + "behaviorInfoJ = json.load(behaviorInfo)\n", + "behaviorScore = {}\n", + "i=0\n", + "for row in behaviorInfoJ:\n", + " j=1\n", + " videoN=0\n", + " tempScore = 0\n", + " tempId = 3913\n", + " length_of_listenS = len(row.get('listenScore'))\n", + " current_index = 0\n", + " behaviorScore[i] = {}\n", + " for onePScore in row.get('listenScore'):\n", + " if onePScore.get('postId') == tempId:\n", + " tempScore = tempScore + onePScore.get('score')\n", + " j = j + 1\n", + " \n", + " if onePScore.get('score') == -1:\n", + " tempScore = 0\n", + " \n", + " if current_index == length_of_listenS - 1:\n", + " if videoN == 0:\n", + " j = j -1\n", + " if j == 0:\n", + " j = 1\n", + " behaviorScore[i].update({videoN : float (tempScore/j)})\n", + " tempScore = onePScore.get('score')\n", + " j = 1\n", + " videoN = videoN + 1\n", + " tempId = onePScore.get('postId') \n", + " else:\n", + " if videoN == 0:\n", + " j = j -1\n", + " if j == 0:\n", + " j = 1\n", + " behaviorScore[i].update({videoN : float (tempScore/j)})\n", + " tempScore = onePScore.get('score')\n", + " j = 1\n", + " videoN = videoN + 1\n", + " tempId = onePScore.get('postId')\n", + " \n", + " current_index = current_index + 1 \n", + " i=i+1 \n", + "behaviorInfo.close\n", + "#----------------------------------\n", + " \n", + "videoDataInfo = open(\"../data/videoDataInfo.csv\",'r'); \n", + "videoDataInfo2 = open(\"../data/videoDataInfo.csv\",'r'); \n", + "videoSpeedData = {}\n", + "\n", + "\n", + "MaxSpeed = -1\n", + "MaxWordLevel = -1\n", + "MaxsubtitleLengthRatio = -1\n", + " \n", + " \n", + " \n", + "for rows in csv.DictReader(videoDataInfo):\n", + " if MaxSpeed < rows.get('videoSpeed'):\n", + " MaxSpeed = rows.get('videoSpeed')\n", + " if MaxWordLevel < rows.get('wordLevel'):\n", + " MaxWordLevel = rows.get('wordLevel')\n", + " if MaxsubtitleLengthRatio < rows.get('subtitleLengthRatio'):\n", + " MaxsubtitleLengthRatio = rows.get('subtitleLengthRatio')\n", + "\n", + " \n", + "for row in csv.DictReader(videoDataInfo2):\n", + " difficulty = float(row.get('videoSpeed')) * float(row.get('wordLevel')) * float(row.get('subtitleLengthRatio'))/float(MaxSpeed)/float(MaxWordLevel)/float(MaxsubtitleLengthRatio)\n", + " videoSpeedData[int(row.get('postId'))]= [float(row.get('videoSpeed'))/float(MaxSpeed)*100 ,\n", + " float(row.get('wordLevel'))/float(MaxWordLevel)*100\n", + " , float(row.get('subtitleLengthRatio'))/float(MaxsubtitleLengthRatio)*100 ,\n", + " int(difficulty*1000)]\n", + "\n", + " \n", + " \n", + "print MaxSpeed\n", + "print MaxWordLevel\n", + "print MaxsubtitleLengthRatio\n", + "print videoSpeedData[3585]\n", + "videoDataInfo.close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "userImprovement = {}\n", + "\n", + "now = 0\n", + "for row in behaviorInfoJ:\n", + " tempScoreNow = 0\n", + " tempDifficultyNow = 0\n", + " ImprovementScore = 0\n", + " innerLen = len(behaviorScore[now])\n", + " for innerNow in range(0,innerLen-1):\n", + " temp = row.get('chosenVideo')[innerNow]\n", + " thisDifficulty = videoSpeedData[temp][3]\n", + " thisScore = behaviorScore[now][innerNow]\n", + " \n", + " if thisScore > tempScoreNow and thisDifficulty > tempDifficultyNow:\n", + " ImprovementScore = ImprovementScore + 0\n", + " elif thisScore < tempScoreNow and thisDifficulty > tempDifficultyNow:\n", + " ImprovementScore = ImprovementScore + 1\n", + " elif thisScore < tempScoreNow and thisDifficulty < tempDifficultyNow:\n", + " ImprovementScore = ImprovementScore - 1\n", + " else:\n", + " ImprovementScore = ImprovementScore + 0\n", + " \n", + " tempScoreNow = thisScore\n", + " tempDifficultyNow = thisDifficulty\n", + " \n", + " \n", + " userImprovement[now] = ImprovementScore\n", + " \n", + " #print row.get('chosenVideo')\n", + " #print userImprovement[now]\n", + " now = now + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1757\n" + ] + } + ], + "source": [ + "behaviorInfoX = {}\n", + "intt = 0\n", + "for row in behaviorInfoJ:\n", + " behaviorInfoX[intt] = {}\n", + " check = 0\n", + " for row2 in row.get('chosenVideo'):\n", + " if (row2 == 4336 or row2 == 14026 or row2 == 13388 or row2 == 14712 or row2 == 14293 or row2 == 13300\n", + " or row2 == 14193 or row2 == 4561 or row2 == 12605 or row2 == 10301 or row2 == 3940\n", + " or row2 == 13506 or row2 == 14709 or row2 == 13729 or row2 == 4220 or row2 == 14707 or row2 == 14708\n", + " or row2 == 14292 or row2 == 15055 or row2 == 14807) and check == 0:\n", + " check = 1\n", + " \n", + " if check == 0:\n", + " behaviorInfoX[intt].update(row)\n", + " intt = intt + 1\n", + " \n", + "print intt\n", + "\n", + "userImprovementX = {}\n", + "behaviorScoreX = {}\n", + "i=0\n", + "for row in behaviorInfoX:\n", + " j=1\n", + " videoN=0\n", + " tempScore = 0\n", + " tempId = 3913\n", + " length_of_listenS = len(behaviorInfoX[row].get('listenScore'))\n", + " current_index = 0\n", + " behaviorScoreX[i] = {}\n", + " for onePScore in behaviorInfoX[row].get('listenScore'):\n", + " if onePScore.get('postId') == tempId:\n", + " tempScore = tempScore + onePScore.get('score')\n", + " j = j + 1\n", + " \n", + " if onePScore.get('score') == -1:\n", + " tempScore = 0\n", + " \n", + " if current_index == length_of_listenS - 1:\n", + " if videoN == 0:\n", + " j = j -1\n", + " if j == 0:\n", + " j = 1\n", + " behaviorScoreX[i].update({videoN : float (tempScore/j)})\n", + " tempScore = onePScore.get('score')\n", + " j = 1\n", + " videoN = videoN + 1\n", + " tempId = onePScore.get('postId') \n", + " else:\n", + " if videoN == 0:\n", + " j = j -1\n", + " if j == 0:\n", + " j = 1\n", + " behaviorScoreX[i].update({videoN : float (tempScore/j)})\n", + " tempScore = onePScore.get('score')\n", + " j = 1\n", + " videoN = videoN + 1\n", + " tempId = onePScore.get('postId')\n", + " \n", + " current_index = current_index + 1 \n", + " i=i+1 \n", + "\n", + "\n", + "now = 0\n", + "for row in behaviorInfoX:\n", + " tempScoreNow = 0\n", + " tempDifficultyNow = 0\n", + " ImprovementScore = 0\n", + " innerLen = len(behaviorScoreX[now])\n", + " for innerNow in range(0,innerLen-1):\n", + " temp = behaviorInfoX[row].get('chosenVideo')[innerNow]\n", + " thisDifficulty = videoSpeedData[temp][3]\n", + " thisScore = behaviorScoreX[now][innerNow]\n", + " \n", + " if thisScore > tempScoreNow and thisDifficulty > tempDifficultyNow:\n", + " ImprovementScore = ImprovementScore + 0\n", + " elif thisScore < tempScoreNow and thisDifficulty > tempDifficultyNow:\n", + " ImprovementScore = ImprovementScore + 1\n", + " elif thisScore < tempScoreNow and thisDifficulty < tempDifficultyNow:\n", + " ImprovementScore = ImprovementScore - 1\n", + " else:\n", + " ImprovementScore = ImprovementScore + 0\n", + " \n", + " tempScoreNow = thisScore\n", + " tempDifficultyNow = thisDifficulty\n", + " \n", + " \n", + " userImprovementX[now] = ImprovementScore\n", + " \n", + " #print behaviorInfoX[row].get('chosenVideo')\n", + " #print userImprovementX[now]\n", + " now = now + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1341\n" + ] + } + ], + "source": [ + "behaviorInfoY = {}\n", + "intt = 0\n", + "for row in behaviorInfoJ:\n", + " check = 0\n", + " for row2 in row.get('chosenVideo'):\n", + " if (row2 == 4336 or row2 == 14026 or row2 == 13388 or row2 == 14712 or row2 == 14293 or row2 == 13300\n", + " or row2 == 14193 or row2 == 4561 or row2 == 12605 or row2 == 10301 or row2 == 3940\n", + " or row2 == 13506 or row2 == 14709 or row2 == 13729 or row2 == 4220 or row2 == 14707 or row2 == 14708\n", + " or row2 == 14292 or row2 == 15055 or row2 == 14807) and check == 0:\n", + " check = 1\n", + " \n", + " if check == 1:\n", + " behaviorInfoY[intt] = row\n", + " intt = intt + 1\n", + "print intt\n", + "\n", + "\n", + "userImprovementY = {}\n", + "behaviorScoreY = {}\n", + "i=0\n", + "for row in behaviorInfoY:\n", + " j=1\n", + " videoN=0\n", + " tempScore = 0\n", + " tempId = 3913\n", + "\n", + " length_of_listenS = len( behaviorInfoY[row].get('listenScore') )\n", + " current_index = 0\n", + " behaviorScoreY[i] = {}\n", + " for onePScore in behaviorInfoY[row].get('listenScore'):\n", + " if onePScore.get('postId') == tempId:\n", + " tempScore = tempScore + onePScore.get('score')\n", + " j = j + 1\n", + " \n", + " if onePScore.get('score') == -1:\n", + " tempScore = 0\n", + " \n", + " if current_index == length_of_listenS - 1:\n", + " if videoN == 0:\n", + " j = j -1\n", + " if j == 0:\n", + " j = 1\n", + " behaviorScoreY[i].update({videoN : float (tempScore/j)})\n", + " tempScore = onePScore.get('score')\n", + " j = 1\n", + " videoN = videoN + 1\n", + " tempId = onePScore.get('postId') \n", + " else:\n", + " if videoN == 0:\n", + " j = j -1\n", + " if j == 0:\n", + " j = 1\n", + " behaviorScoreY[i].update({videoN : float (tempScore/j)})\n", + " tempScore = onePScore.get('score')\n", + " j = 1\n", + " videoN = videoN + 1\n", + " tempId = onePScore.get('postId')\n", + " \n", + " current_index = current_index + 1 \n", + " i=i+1 \n", + "\n", + "\n", + "now = 0\n", + "for row in behaviorInfoY:\n", + " tempScoreNow = 0\n", + " tempDifficultyNow = 0\n", + " ImprovementScore = 0\n", + " innerLen = len(behaviorScoreY[now])\n", + " for innerNow in range(0,innerLen-1):\n", + " temp = behaviorInfoY[row].get('chosenVideo')[innerNow]\n", + " thisDifficulty = videoSpeedData[temp][3]\n", + " thisScore = behaviorScoreY[now][innerNow]\n", + " \n", + " if thisScore > tempScoreNow and thisDifficulty > tempDifficultyNow:\n", + " ImprovementScore = ImprovementScore + 0\n", + " elif thisScore < tempScoreNow and thisDifficulty > tempDifficultyNow:\n", + " ImprovementScore = ImprovementScore + 1\n", + " elif thisScore < tempScoreNow and thisDifficulty < tempDifficultyNow:\n", + " ImprovementScore = ImprovementScore - 1\n", + " else:\n", + " ImprovementScore = ImprovementScore + 0\n", + " \n", + " tempScoreNow = thisScore\n", + " tempDifficultyNow = thisDifficulty\n", + " \n", + " \n", + " userImprovementY[now] = ImprovementScore\n", + " \n", + " #print behaviorInfoY[row].get('chosenVideo')\n", + " #print userImprovementY[now]\n", + " now = now + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from statistic_helper import *\n", + "po.offline.init_notebook_mode()\n", + "\n", + "generate_histogram([value for key, value in userImprovement.iteritems()], \"test\", \"student\")\n", + "generate_histograms(\"Compare\",[value for key, value in userImprovementX.iteritems()],\"Right\",[value for key, value in userImprovementY.iteritems()],\"LEFT\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'weight' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mbehaviorInfoJ\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mweight\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mbehaviorScore\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'chosenVideo'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mbehaviorScore\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'weight' is not defined" + ] + } + ], + "source": [ + "i = 0\n", + "for row in behaviorInfoJ: \n", + " weight[1] = weight[1] + behaviorScore[i]\n", + " print row.get('chosenVideo')\n", + " print behaviorScore[i]\n", + " print userImprovement[i]\n", + " \n", + " i = i + 1\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}