potacho · javiergispert · Jun 30, 2022 · Jun 30, 2022
diff --git a/data/labs_status.csv b/data/labs_status.csv
@@ -0,0 +1,21 @@
+Student Name,PR Number,Lab Name,PR Status,Lab Status,PR Created at,PR Updated at,PR Closed at,PR URL,base repository,base,head repository,compare,Pushed at
+No student name provided,109,[dataframe-calculations-1y2],open,lab-started,2022-06-26 12:18:09,2022-06-26 15:25:53,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/109,ih-datapt-mad/dataptmad0522_labs,main,Fvperez1/dataptmad0522_labs,dataframe-calculations,2022-06-26 15:22:18
+No student name provided,105,[dataframe-calculations-1],open,lab-started,2022-06-21 18:39:10,2022-06-21 18:39:10,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/105,ih-datapt-mad/dataptmad0522_labs,main,angel-barruz/dataptmad0522_labs,dataframe-calculations-1,2022-06-27 16:39:28
+No student name provided,108,Lab format name is incorrect,open,lab-started,2022-06-25 09:38:22,2022-06-25 09:38:22,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/108,ih-datapt-mad/dataptmad0522_labs,main,guiston04/dataptmad0522_labs,mysql_select,2022-06-25 09:36:33
+No student name provided,107,Lab format name is incorrect,open,lab-started,2022-06-23 15:35:16,2022-06-23 15:35:16,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/107,ih-datapt-mad/dataptmad0522_labs,main,angel-barruz/dataptmad0522_labs,dataframe-calculations-2,2022-06-27 16:39:28
+No student name provided,102,Lab format name is incorrect,open,lab-started,2022-06-20 22:16:19,2022-06-22 18:38:44,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/102,ih-datapt-mad/dataptmad0522_labs,main,guiston04/dataptmad0522_labs,dataframe_calculation_transformation,2022-06-25 09:36:33
+No student name provided,114,[mysql-select],open,lab-finished,2022-06-27 16:09:37,2022-06-27 16:09:37,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/114,ih-datapt-mad/dataptmad0522_labs,main,pedroConsuegraMateo/dataptmad0522_labs,mysql-select,2022-06-26 16:12:31
+No student name provided,111,[mysql-select],open,lab-finished,2022-06-27 09:32:44,2022-06-27 11:43:07,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/111,ih-datapt-mad/dataptmad0522_labs,main,elvestevez/dataptmad0522_labs,mysql-select,2022-06-27 11:43:05
+No student name provided,110,[mysql-select],open,lab-finished,2022-06-26 19:47:32,2022-06-27 14:02:09,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/110,ih-datapt-mad/dataptmad0522_labs,main,HEBA-7/dataptmad0522_labs,mysql-select,2022-06-27 14:02:58
+No student name provided,99,[dataframe-calculations],open,lab-finished,2022-06-19 17:17:36,2022-06-21 15:53:33,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/99,ih-datapt-mad/dataptmad0522_labs,main,jdediegoAb/dataptmad0522_labs,dataframe-calculations,2022-06-21 15:47:15
+No student name provided,104,[dataframe-calculations-2],open,lab-finished,2022-06-21 17:05:53,2022-06-26 19:42:07,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/104,ih-datapt-mad/dataptmad0522_labs,main,HEBA-7/dataptmad0522_labs,dataframe-calculations_2,2022-06-27 14:02:58
+No student name provided,103,[dataframe-calculations-2],open,lab-finished,2022-06-21 08:43:11,2022-06-21 08:43:11,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/103,ih-datapt-mad/dataptmad0522_labs,main,elvestevez/dataptmad0522_labs,dataframe-calculations-2,2022-06-27 11:43:05
+No student name provided,98,[dataframe-calculations-2],open,lab-finished,2022-06-19 16:17:59,2022-06-19 16:17:59,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/98,ih-datapt-mad/dataptmad0522_labs,main,javiergispert/dataptmad0522_labs,dataframe-calculations-2,2022-06-27 10:01:42
+No student name provided,97,[dataframe-calculations-2],open,lab-finished,2022-06-19 12:07:44,2022-06-23 15:51:08,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/97,ih-datapt-mad/dataptmad0522_labs,main,pedroConsuegraMateo/dataptmad0522_labs,dataframe-calculations-2,2022-06-26 16:12:31
+No student name provided,101,[dataframe-calculations-1],open,lab-finished,2022-06-20 10:27:13,2022-06-21 16:59:20,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/101,ih-datapt-mad/dataptmad0522_labs,main,HEBA-7/dataptmad0522_labs,dataframe-calculations,2022-06-27 14:02:58
+No student name provided,100,[dataframe-calculations-1],open,lab-finished,2022-06-19 22:06:07,2022-06-19 22:06:07,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/100,ih-datapt-mad/dataptmad0522_labs,main,elvestevez/dataptmad0522_labs,dataframe-calculations-1,2022-06-27 11:43:05
+No student name provided,96,[dataframe-calculations-1],open,lab-finished,2022-06-19 10:12:04,2022-06-19 10:12:04,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/96,ih-datapt-mad/dataptmad0522_labs,main,pedroConsuegraMateo/dataptmad0522_labs,dataframe-calculations-1,2022-06-26 16:12:31
+No student name provided,95,[dataframe-calculations-1],open,lab-finished,2022-06-18 18:26:06,2022-06-18 18:26:06,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/95,ih-datapt-mad/dataptmad0522_labs,main,javiergispert/dataptmad0522_labs,dataframe-calculations,2022-06-27 10:01:42
+No student name provided,113,[advanced-mysql],open,lab-finished,2022-06-27 14:04:49,2022-06-27 14:04:49,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/113,ih-datapt-mad/dataptmad0522_labs,main,HEBA-7/dataptmad0522_labs,advanced-mysql,2022-06-27 14:02:58
+No student name provided,112,[advanced-mysql],open,lab-finished,2022-06-27 11:39:58,2022-06-27 11:39:58,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/112,ih-datapt-mad/dataptmad0522_labs,main,elvestevez/dataptmad0522_labs,advanced-mysql,2022-06-27 11:43:05
+No student name provided,106,Lab format name is incorrect,open,lab-finished,2022-06-21 19:22:59,2022-06-25 09:19:31,,https://github.com/ih-datapt-mad/dataptmad0522_labs/pull/106,ih-datapt-mad/dataptmad0522_labs,main,anadeondarza/dataptmad0522_labs,dataframe-calculations,2022-06-25 09:16:25
diff --git a/main.py b/main.py
diff --git a/main_damned.py b/main_damned.py
@@ -0,0 +1,68 @@
+#imports
+from modules import module_aux as ma
+from modules import module_pipe as mp
+
+API_TOKEN = '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' #API TOKEN (REMEMBER: do not push these to your repo)
+USERNAME = 'javiergispert' #USERNAME
+BASE_URL = 'https://api.github.com/'
+KEY = 'repos/'
+OWNER = 'ih-datapt-mad/'
+REPO = 'dataptmad0522_labs' #LAB_REPOSITORY
+SEARCH = 'search/issues?q=repo:'+OWNER+REPO+'+type:pr+state:{}'
+PULLS = 'pulls?page={}&per_page=100&state={}'
+COMMITS = 'pulls/{}/commits'
+STATE = 'open'
+
+field_list1 = ['number',
+               'title',
+               'state',
+               'created_at',
+               'updated_at',
+               'closed_at',
+               'html_url',
+               'base.repo.full_name',
+               'base.ref',
+               'head.repo.full_name',
+               'head.ref',
+               'head.repo.pushed_at']
+
+field_list2 = ['student_name',
+               'number',
+               'lab_name',
+               'state',
+               'lab_status',
+               'created_at',
+               'updated_at',
+               'closed_at',
+               'html_url',
+               'base.repo.full_name',
+               'base.ref',
+               'head.repo.full_name',
+               'head.ref',
+               'head.repo.pushed_at']
+
+field_sort1 = ['lab_status',
+               'lab_name',
+               'student_name']
+
+field_name1 = ['Student Name',
+               'PR Number',
+               'Lab Name',
+               'PR Status',
+               'Lab Status',
+               'PR Created at',
+               'PR Updated at',
+               'PR Closed at',
+               'PR URL',
+               'base repository',
+               'base',
+               'head repository',
+               'compare',
+               'Pushed at']
+
+
+if __name__ == '__main__':
+    DF_PULLS = mp.get_pulls(BASE_URL, KEY, OWNER, REPO, PULLS, SEARCH, STATE, USERNAME, API_TOKEN, field_list1)
+    DF_STATUS = mp.df_status(DF_PULLS, BASE_URL, KEY, OWNER, REPO, COMMITS, USERNAME, API_TOKEN, field_list2)
+    DF_CSV = mp.create_csv(DF_STATUS, field_sort1, field_name1)
+    DF_CSV
diff --git a/modules/module.py b/modules/module.py
diff --git a/modules/module_aux.py b/modules/module_aux.py
@@ -0,0 +1,76 @@
+#imports
+import requests
+import pandas as pd
+import re
+import json
+import math
+
+# Aux Function 1: You can get only 100 results per page so it is important to know the number of pages you'll need.
+def pages(base_url, search, state, username, api_token):
+    pages = requests.get(base_url + search.format(state), auth=(username,api_token)).json()['total_count']
+    if STATE == 'open':
+        pages = math.ceil(pages/100)
+        return pages
+    elif STATE == 'closed':
+        pages = math.ceil(pages/100)
+        return pages 
+
+# Auc Function 2: Check the committs in order to know which labs are ready to be reviewed.
+
+def get_commits(base_url, key, owner, repo, commits, pull, username, api_token):
+    r_commits = requests.get(base_url + key + owner + repo + commits.format(pull),
+                             auth=(username, api_token)).json()
+    df_commits = pd.json_normalize(r_commits)
+    list_commits = list(df_commits['commit.message'])
+    commit = list(set([commit if commit == 'lab-finished' else 'lab-started' for commit in list_commits]))
+    if 'lab-finished' in commit:
+        return 'lab-finished'
+    else:
+        return 'lab-started'
+
+# Aux Function 3: But the students aren't careful with the naming...
+
+def student_name(x):
+    if ']' in x:
+        x = x.split(']')
+        x = x[1].replace('_', ' ').strip()
+        len_x = len(x.split(' '))
+        if len_x > 1:
+            x = re.findall('\w[a-zA-Z áéíóúÁÉÍÓÚñÑ-]+', x)
+            x = x[0].strip()
+            return x
+        else:
+            x = 'No student name provided'
+            return x
+    else:
+        x = 'Pull request is not properly named'
+        return x
+
+#Aux Function 4
+
+def lab_name(x):
+    if ']' in x:
+        x = x.split(']')
+        x = x[0] + ']'
+        x = x.strip()
+        lower_case = re.findall('[A-ZÁÉÍÓÚñÑ]+', x)
+        if x[0] == '[' and x[-1] == ']' and ' ' not in x and len(lower_case) == 0:
+            return x
+        else:
+            x = 'Lab format name is incorrect'
+            return x
+    else:
+        x = 'Pull request is not properly named'
+        return x
+
+# Aux Function 5: ...or forget to push their work!!!
+
+def time_parser(x):
+    try:
+        x = x.strip()
+        x = re.findall('[0-9]+', x)
+        x = ''.join(x)
+        x = pd.to_datetime(x, format='%Y%m%d%H%M%S', errors='coerce')
+        return x
+    except:
+        return 'Nothing pushed yet'
diff --git a/modules/module_pipe.py b/modules/module_pipe.py
@@ -0,0 +1,48 @@
+import requests
+import pandas as pd
+import re
+import json
+import math
+
+# Pipeline Function 1: And finally get the 'pull requests'.
+
+def get_pulls(base_url, key, owner, repo, pulls, search, state, username, api_token, field_list):
+    pulls_list = []
+    max_pages = pages(base_url, search, state, username, api_token)
+    for i in range(max_pages):
+        r_pulls = requests.get(base_url + key + owner + repo + pulls.format(i+1, state),
+                               auth=(username, api_token)).json()
+        df_pulls = pd.json_normalize(r_pulls)
+        pulls_list.append(df_pulls)
+    df_pulls = pd.concat(pulls_list)
+    df_pulls = df_pulls[field_list]
+    return df_pulls
+
+
+# Pipeline Function 2: Apply!!!!!!
+
+def df_status(df_pulls, base_url, key, owner, repo, commits, username, api_token, field_list):
+    df_pulls['student_name'] = df_pulls['title'].apply(student_name)
+    df_pulls['lab_name'] = df_pulls['title'].apply(lab_name)
+    df_pulls['created_at'] = df_pulls['created_at'].apply(time_parser)
+    df_pulls['updated_at'] = df_pulls['updated_at'].apply(time_parser)
+    df_pulls['head.repo.pushed_at'] = df_pulls['head.repo.pushed_at'].apply(time_parser)
+    df_pulls['lab_status'] = df_pulls.apply(lambda col: get_commits(base_url,
+                                                                    key,
+                                                                    owner,
+                                                                    repo,
+                                                                    commits,
+                                                                    col['number'],
+                                                                    username,
+                                                                    api_token), axis=1)
+    df_status = df_pulls[field_list]
+    return df_status
+
+
+# Pipeline function 3: And there you have it!!!
+
+def create_csv(df_status, field_sort, field_name):
+    df_csv = df_status.sort_values(by=field_sort, ascending=False)
+    df_csv.columns = field_name
+    df_csv.to_csv('./data/labs_status.csv', index=False)
+    return df_csv