-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest_subtitle_generation.py
More file actions
182 lines (149 loc) Β· 6.75 KB
/
test_subtitle_generation.py
File metadata and controls
182 lines (149 loc) Β· 6.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
"""
Test subtitle generation with working video URLs
"""
import requests
import time
def test_subtitle_generation():
"""Test subtitle generation with known working video URLs"""
api_base = "http://localhost:5000"
# Test with a working video URL that has speech
test_video = {
"name": "W3Schools Video with Audio",
"url": "https://www.w3schools.com/html/mov_bbb.mp4",
"language": "en"
}
print("π€ Testing Subtitle Generation")
print("=" * 50)
print(f"Video: {test_video['name']}")
print(f"URL: {test_video['url']}")
print(f"Language: {test_video['language']}")
try:
# Test API health
health_response = requests.get(f"{api_base}/health", timeout=10)
if health_response.status_code == 200:
print("β
API Health: OK")
else:
print(f"β API Health: FAILED ({health_response.status_code})")
return
# Create subtitle generation job
payload = {
"url": test_video['url'],
"language": test_video['language'],
"return_subtitles_file": True,
"settings": {
"font-size": 100,
"line-color": "#FFFFFF",
"position": "bottom-center"
}
}
print(f"\\nπ Creating subtitle job...")
response = requests.post(f"{api_base}/add-subtitles", json=payload, timeout=30)
if response.status_code == 200:
job_data = response.json()
job_id = job_data['job_id']
print(f"β
Job Created: {job_id}")
# Monitor job progress
max_attempts = 30 # 5 minutes max
attempt = 0
print(f"\\nπ Monitoring subtitle generation...")
while attempt < max_attempts:
attempt += 1
time.sleep(10) # Check every 10 seconds
try:
status_response = requests.get(f"{api_base}/job-status/{job_id}", timeout=10)
if status_response.status_code == 200:
status_data = status_response.json()
status = status_data.get('status')
print(f"π Attempt {attempt}: Status = {status}")
if status == 'completed':
print("β
SUCCESS! Subtitle generation completed!")
print(f"π Video Output: {status_data.get('output_path', 'N/A')}")
print(f"π Subtitle File: {status_data.get('subtitle_path', 'N/A')}")
return True
elif status == 'failed':
error_msg = status_data.get('error', 'Unknown error')
print(f"β FAILED: {error_msg}")
return False
elif status in ['pending', 'processing']:
continue
else:
print(f"β οΈ Unknown status: {status}")
else:
print(f"β Status check error: {status_response.status_code}")
except Exception as e:
print(f"β Error during status check: {e}")
print("β° Subtitle generation timed out")
return False
else:
print(f"β Job creation failed: {response.status_code}")
try:
error_data = response.json()
print(f"Error details: {error_data}")
except:
print(f"Response: {response.text}")
return False
except Exception as e:
print(f"β Test failed: {e}")
return False
def test_audio_extraction_diagnosis():
"""Test basic audio extraction capability"""
api_base = "http://localhost:5000"
print(f"\\nπ§ Testing Audio Extraction Diagnosis")
print("=" * 50)
# Test the video that worked for splitting
working_video = "https://www.w3schools.com/html/mov_bbb.mp4"
# Create a simple job to see if the issue is in download or audio processing
payload = {
"url": working_video,
"start_time": 1.0,
"end_time": 3.0
}
print(f"π Testing video download first...")
response = requests.post(f"{api_base}/split-video", json=payload, timeout=30)
if response.status_code == 200:
job_data = response.json()
job_id = job_data['job_id']
print(f"β
Download test job: {job_id}")
# Quick status check
time.sleep(5)
status_response = requests.get(f"{api_base}/job-status/{job_id}")
if status_response.status_code == 200:
status_data = status_response.json()
status = status_data.get('status')
print(f"π Download status: {status}")
if status == 'failed':
print(f"β Download failed: {status_data.get('error')}")
else:
print(f"β
Download appears successful")
return True
else:
print(f"β Download test failed: {response.status_code}")
return False
if __name__ == "__main__":
print("π§ͺ Audio/Subtitle System Diagnosis")
print("=" * 60)
# First test basic download
download_ok = test_audio_extraction_diagnosis()
if download_ok:
print(f"\\n" + "=" * 60)
# Then test subtitle generation
subtitle_ok = test_subtitle_generation()
print(f"\\n" + "=" * 60)
print("π DIAGNOSIS RESULTS:")
print(f" β’ Video Download: {'β
WORKING' if download_ok else 'β FAILED'}")
print(f" β’ Subtitle Generation: {'β
WORKING' if subtitle_ok else 'β FAILED'}")
if not subtitle_ok:
print(f"\\nπ‘ RECOMMENDATION:")
print(f" β’ The issue is confirmed: Google Drive large files return HTML")
print(f" β’ Audio extraction fails because the file is HTML, not MP4")
print(f" β’ Solution: Use alternative video hosting for subtitle generation")
print(f" β’ Test command that should work:")
print(f" curl -X POST http://localhost:5000/add-subtitles \\")
print(f" -H 'Content-Type: application/json' \\")
print(f" -d '{{")
print(f" \"url\": \"https://www.w3schools.com/html/mov_bbb.mp4\",")
print(f" \"language\": \"en\"")
print(f" }}'")
else:
print(f"\\nβ Basic download failed - investigating further...")