-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathyoutube_utils.py
More file actions
52 lines (38 loc) · 1.3 KB
/
youtube_utils.py
File metadata and controls
52 lines (38 loc) · 1.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
def get_video_id_from_url(url: str) -> str:
"""
Extract the youtube video id from given youtube link
@params
url: Youtube video link
@return
video id
"""
url_chunks = urlparse(url)
if not url_chunks.query:
video_id = url_chunks.path.split("/")[-1]
return video_id
video_id = parse_qs(url_chunks.query).get("v", [""])[0]
if not video_id:
video_id = url_chunks.path.split("/")[-1]
return video_id
return video_id
def get_youtube_transcript(youtube_link: str) -> str:
"""
Extract the youtube transcrip from a given script
@params
youtube_link: Youtube link
@return
full text script
"""
#get the video id from the link
video_id = get_video_id_from_url(youtube_link)
full_transcript = None
try:
sub_transcript = YouTubeTranscriptApi.get_transcript(video_id)
full_transcript = " ".join([sub_content["text"] for sub_content in sub_transcript])
except Exception as error:
message = "cannot extract the transcript. Error: {}".format(str(error))
print(message)
return full_transcript
return full_transcript