-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathytAPI.py
More file actions
119 lines (92 loc) · 4.42 KB
/
ytAPI.py
File metadata and controls
119 lines (92 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
import string
import re
import pandas as pd
# from Home import form
class youtubeAPI:
def __init__(self):
self.all_comments = []
self.video_id = ''
self.api_key = 'AIzaSyCeWL7FWHsYoFgrCEIjOug1qnhTEZzprIE'
# recursive function to get all replies in a comment thread
def get_replies(self, comment_id, token):
replies_response = self.yt_object.comments().list(part = 'snippet', maxResults = 100, parentId = comment_id, pageToken = token).execute()
for reply in replies_response['items']:
self.self.all_comments.append(reply['snippet']['textDisplay'])
if replies_response.get("nextPageToken"):
return self.get_replies(comment_id, replies_response['nextPageToken'])
else:
return []
# recursive function to get all comments
def get_comments(self, youtube, video_id, next_view_token):
# global self.all_comments
# video_id = self.video_id
# check for token
if len(next_view_token.strip()) == 0:
self.all_comments = []
if next_view_token == '':
# get the initial response
comment_list = youtube.commentThreads().list(part = 'snippet', maxResults = 100, videoId = video_id, order = 'relevance').execute()
else:
# get the next page response
comment_list = youtube.commentThreads().list(part = 'snippet', maxResults = 100, videoId = video_id, order='relevance', pageToken=next_view_token).execute()
# loop through all top level comments
for comment in comment_list['items']:
# add comment to list
self.all_comments.append([comment['snippet']['topLevelComment']['snippet']['textDisplay']])
# get number of replies
reply_count = comment['snippet']['totalReplyCount']
all_replies = []
# if replies greater than 0
if reply_count > 0:
# get first 100 replies
replies_list = youtube.comments().list(part='snippet', maxResults=100, parentId=comment['id']).execute()
for reply in replies_list['items']:
# add reply to list
all_replies.append(reply['snippet']['textDisplay'])
# check for more replies
while "nextPageToken" in replies_list:
token_reply = replies_list['nextPageToken']
# get next set of 100 replies
replies_list = youtube.comments().list(part = 'snippet', maxResults = 100, parentId = comment['id'], pageToken = token_reply).execute()
for reply in replies_list['items']:
# add reply to list
all_replies.append(reply['snippet']['textDisplay'])
# add all replies to the comment
self.all_comments[-1].append(all_replies)
if "nextPageToken" in comment_list:
return self.get_comments(youtube, video_id, comment_list['nextPageToken'])
else:
return []
# self.all_comments = []
def clean_text(self, text):
text = text.lower()
text = re.sub('\[.*?\]','', text)
text = re.sub('[%s]'%re.escape(string.punctuation), '', text)
text = re.sub('\w*\d\w*','', text)
text = re.sub('\n','', text)
return text
def get_yt_comments(self, vID):
self.video_id = vID
# build a youtube object using our api key
yt_object = build('youtube', 'v3', developerKey = self.api_key)
print(type(yt_object))
# get all comments and replies
self.get_comments(yt_object, self.video_id, '')
# Create an empty list to store the comments and replies
data = []
for comment, replies in self.all_comments:
data.append({'comment': comment, 'replies': replies})
# Create a DataFrame from the list of comments
self.df = pd.DataFrame(data)
self.df.drop(columns=['replies'], inplace=True)
cleantext = lambda x: self.clean_text(x)
self.df['comment'] = pd.DataFrame(self.df['comment'].apply(cleantext))
# print(self.df.head(10))
return self.df
# obj = youtubeAPI()
# # obj.set_video_id('bB7xkRsEq-g')
# dff = obj.get_yt_comments('bB7xkRsEq-g')
# print(dff)
# # obj.main()