VideoEditorAPI/test_subtitle_generation.py at main · jdportugal/VideoEditorAPI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
"""
Test subtitle generation with working video URLs
"""

import requests
import time

def test_subtitle_generation():
    """Test subtitle generation with known working video URLs"""

    api_base = "http://localhost:5000"

    # Test with a working video URL that has speech
    test_video = {
        "name": "W3Schools Video with Audio",
        "url": "https://www.w3schools.com/html/mov_bbb.mp4",
        "language": "en"
    }

    print("🎤 Testing Subtitle Generation")
    print("=" * 50)
    print(f"Video: {test_video['name']}")
    print(f"URL: {test_video['url']}")
    print(f"Language: {test_video['language']}")

    try:
        # Test API health
        health_response = requests.get(f"{api_base}/health", timeout=10)
        if health_response.status_code == 200:
            print("✅ API Health: OK")
        else:
            print(f"❌ API Health: FAILED ({health_response.status_code})")
            return

        # Create subtitle generation job
        payload = {
            "url": test_video['url'],
            "language": test_video['language'],
            "return_subtitles_file": True,
            "settings": {
                "font-size": 100,
                "line-color": "#FFFFFF",
                "position": "bottom-center"
            }
        }

        print(f"\\n📋 Creating subtitle job...")
        response = requests.post(f"{api_base}/add-subtitles", json=payload, timeout=30)

        if response.status_code == 200:
            job_data = response.json()
            job_id = job_data['job_id']
            print(f"✅ Job Created: {job_id}")

            # Monitor job progress
            max_attempts = 30  # 5 minutes max
            attempt = 0

            print(f"\\n📋 Monitoring subtitle generation...")

            while attempt < max_attempts:
                attempt += 1
                time.sleep(10)  # Check every 10 seconds

                try:
                    status_response = requests.get(f"{api_base}/job-status/{job_id}", timeout=10)
                    if status_response.status_code == 200:
                        status_data = status_response.json()
                        status = status_data.get('status')

                        print(f"🔄 Attempt {attempt}: Status = {status}")

                        if status == 'completed':
                            print("✅ SUCCESS! Subtitle generation completed!")
                            print(f"📁 Video Output: {status_data.get('output_path', 'N/A')}")
                            print(f"📄 Subtitle File: {status_data.get('subtitle_path', 'N/A')}")
                            return True
                        elif status == 'failed':
                            error_msg = status_data.get('error', 'Unknown error')
                            print(f"❌ FAILED: {error_msg}")
                            return False
                        elif status in ['pending', 'processing']:
                            continue
                        else:
                            print(f"⚠️  Unknown status: {status}")
                    else:
                        print(f"❌ Status check error: {status_response.status_code}")

                except Exception as e:
                    print(f"❌ Error during status check: {e}")

            print("⏰ Subtitle generation timed out")
            return False

        else:
            print(f"❌ Job creation failed: {response.status_code}")
            try:
                error_data = response.json()
                print(f"Error details: {error_data}")
            except:
                print(f"Response: {response.text}")
            return False

    except Exception as e:
        print(f"❌ Test failed: {e}")
        return False

def test_audio_extraction_diagnosis():
    """Test basic audio extraction capability"""

    api_base = "http://localhost:5000"

    print(f"\\n🔧 Testing Audio Extraction Diagnosis")
    print("=" * 50)

    # Test the video that worked for splitting
    working_video = "https://www.w3schools.com/html/mov_bbb.mp4"

    # Create a simple job to see if the issue is in download or audio processing
    payload = {
        "url": working_video,
        "start_time": 1.0,
        "end_time": 3.0
    }

    print(f"📋 Testing video download first...")
    response = requests.post(f"{api_base}/split-video", json=payload, timeout=30)

    if response.status_code == 200:
        job_data = response.json()
        job_id = job_data['job_id']
        print(f"✅ Download test job: {job_id}")

        # Quick status check
        time.sleep(5)
        status_response = requests.get(f"{api_base}/job-status/{job_id}")
        if status_response.status_code == 200:
            status_data = status_response.json()
            status = status_data.get('status')
            print(f"📊 Download status: {status}")
            if status == 'failed':
                print(f"❌ Download failed: {status_data.get('error')}")
            else:
                print(f"✅ Download appears successful")

        return True
    else:
        print(f"❌ Download test failed: {response.status_code}")
        return False

if __name__ == "__main__":
    print("🧪 Audio/Subtitle System Diagnosis")
    print("=" * 60)

    # First test basic download
    download_ok = test_audio_extraction_diagnosis()

    if download_ok:
        print(f"\\n" + "=" * 60)
        # Then test subtitle generation
        subtitle_ok = test_subtitle_generation()

        print(f"\\n" + "=" * 60)
        print("📊 DIAGNOSIS RESULTS:")
        print(f"   • Video Download: {'✅ WORKING' if download_ok else '❌ FAILED'}")
        print(f"   • Subtitle Generation: {'✅ WORKING' if subtitle_ok else '❌ FAILED'}")

        if not subtitle_ok:
            print(f"\\n💡 RECOMMENDATION:")
            print(f"   • The issue is confirmed: Google Drive large files return HTML")
            print(f"   • Audio extraction fails because the file is HTML, not MP4")
            print(f"   • Solution: Use alternative video hosting for subtitle generation")
            print(f"   • Test command that should work:")
            print(f"     curl -X POST http://localhost:5000/add-subtitles \\")
            print(f"       -H 'Content-Type: application/json' \\")
            print(f"       -d '{{")
            print(f"         \"url\": \"https://www.w3schools.com/html/mov_bbb.mp4\",")
            print(f"         \"language\": \"en\"")
            print(f"       }}'")
    else:
        print(f"\\n❌ Basic download failed - investigating further...")