forked from Sukanya41455/AccentChanger
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
182 lines (160 loc) · 6.4 KB
/
app.py
File metadata and controls
182 lines (160 loc) · 6.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import streamlit as st
import boto3
import pyaudio
from io import BytesIO
import os
from tempfile import NamedTemporaryFile
import speech_recognition as sr
# AWS Credentials setup (directly within the code)
AWS_DEFAULT_REGION = os.getenv("AWS_DEFAULT_REGION", "us-west-2")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")
# Initialize AWS Polly client
def initialize_polly_client():
try:
return boto3.client(
'polly',
region_name=AWS_DEFAULT_REGION,
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
aws_session_token=AWS_SESSION_TOKEN
)
except Exception as e:
st.error(f"Error initializing AWS Polly client: {e}")
return None
# Session state initialization
if 'text' not in st.session_state:
st.session_state['text'] = '' # Store transcribed text
if 'run' not in st.session_state:
st.session_state['run'] = False # Recording status
if 'status' not in st.session_state:
st.session_state['status'] = "Ready to start recording." # Status message
# Start transcription function
def start_transcription():
st.session_state['run'] = True
st.session_state['text'] = '' # Clear previous text
st.session_state['status'] = "Recording started. Please speak clearly."
def stop_transcription():
st.session_state['run'] = False
st.session_state['status'] = "Recording stopped. Processing final text..."
# Function to convert text to speech using AWS Polly with an Indian accent
def text_to_speech(text, voice_id='Aditi'):
polly = initialize_polly_client()
if not polly:
return None
try:
response = polly.synthesize_speech(Text=text, OutputFormat='mp3', VoiceId=voice_id)
audio_file = NamedTemporaryFile(delete=False, suffix=".mp3")
audio_file.write(response['AudioStream'].read())
audio_file.close()
return audio_file.name
except Exception as e:
st.error(f"Error converting text to speech: {e}")
return None
# Function to listen to audio and transcribe it synchronously
def listen_and_transcribe():
recognizer = sr.Recognizer()
microphone = sr.Microphone()
with microphone as source:
# Adjust for ambient noise
recognizer.adjust_for_ambient_noise(source)
st.session_state['status'] = "Listening... Please speak."
try:
audio = recognizer.listen(source, timeout=5, phrase_time_limit=5)
st.session_state['status'] = "Processing speech..."
# Recognize the speech using Google Web Speech API
transcribed_text = recognizer.recognize_google(audio)
print(transcribed_text)
st.session_state['text'] += " " + transcribed_text
st.session_state['status'] = "Transcription successful!"
except sr.UnknownValueError:
st.warning("Could not understand the audio.")
st.session_state['status'] = "Could not understand the audio."
except sr.WaitTimeoutError:
st.session_state['status'] = "Listening timed out, waiting for speech..."
except sr.RequestError as e:
st.error(f"Could not request results from Google Web Speech API; {e}")
st.session_state['status'] = f"API request error: {e}"
except Exception as e:
st.error(f"Error occurred during transcription: {e}")
st.session_state['status'] = f"Error occurred: {e}"
def main():
# Set page configuration
st.set_page_config(page_title="AccentFlow", page_icon="🎙️")
# Custom CSS for centering and styling
st.markdown("""
<style>
.center-content {
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
}
.title {
font-size: 2.5em;
color: #FFA500;
text-align: center;
font-weight: bold;
}
.subtitle {
font-size: 1.5em;
color: #666;
text-align: center;
margin-bottom: 1.5em;
}
.status-box {
border: 2px solid #FFA500;
padding: 10px;
margin-top: 15px;
border-radius: 8px;
color: #FFA500;
font-weight: bold;
text-align: center;
}
.transcription-box {
font-size: 1.2em;
color: #333;
border: 2px solid #ccc;
border-radius: 8px;
padding: 15px;
margin-top: 10px;
max-width: 600px;
}
.button-container {
display: flex;
justify-content: center;
gap: 15px;
margin-top: 20px;
}
</style>
""", unsafe_allow_html=True)
# Header Section
st.markdown("<div class='center-content'><div class='title'>AccentFlow 🎙️</div>", unsafe_allow_html=True)
st.markdown("<div class='subtitle'>Real-Time Accent Changer App</div></div>", unsafe_allow_html=True)
# Status Message
st.markdown(f"<div class='status-box'>{st.session_state['status']}</div>", unsafe_allow_html=True)
# Start/Stop buttons for live transcription
st.markdown("<div class='button-container'>", unsafe_allow_html=True)
col1, col2 = st.columns([1, 1])
with col1:
if st.button("🎤 Start Listening"):
start_transcription()
listen_and_transcribe() # Call the function synchronously after clicking start
with col2:
if st.button("⏹️ Stop Listening"):
stop_transcription()
st.markdown("</div>", unsafe_allow_html=True)
# Display the live transcription with some styling
st.subheader("Live Transcription:")
st.markdown(f"<div class='transcription-box'>{st.session_state['text']}</div>", unsafe_allow_html=True)
# Show conversion button only after recording is stopped and text is available
if not st.session_state['run'] and st.session_state['text']:
st.subheader("Convert to Speech with Indian Accent")
if st.button("🗣️ Convert to Indian Accent"):
accent_audio = text_to_speech(st.session_state['text'], voice_id='Aditi')
if accent_audio:
st.audio(accent_audio, format='audio/mp3', autoplay=True)
# Run the main function
if __name__ == '__main__':
main()