-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
130 lines (107 loc) · 4.38 KB
/
main.py
File metadata and controls
130 lines (107 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# My goal is to gain a better understanding of the frontier models, so I'll use Gemini
import speech_recognition as sr
import datetime
import os
import logging
import google.generativeai as gai
import streamlit as st
from gtts import gTTS
def greet():
now = datetime.datetime.now()
# greets the user
hour = now.hour
minute = now.minute
second = now.second
period = "AM" if hour < 12 else "PM"# store the if else in a variable
hour %= 12
hour = 12 if hour == 0 else hour # if hour is 0, set it to 12
if period == "AM" and hour <= 12:
st.write(f"Good Morning! \n It is {hour}:{minute}:{second} {period}")
elif period == "PM" and hour <= 6:
st.write(f"Good Afternoon! \n It is {hour}:{minute}:{second} {period}")
else:
st.write(f"Good Evening! \n It is {hour}:{minute}:{second} {period}")
st.write("I am your assistant. How may I help you?")
logging.info("Greeted the user")
logging.info("Greeted the user")
LOG_DIR = 'logs' #optional log directory
LOG_FILE_NAME = 'app.log'
os.makedirs(LOG_DIR, exist_ok=True)
# mkdir is used to create a directory
# os.makedirs is used to create a directory and all parent directories
# exist_ok is used to prevent an error if the directory already exists
log_path = os.path.join(LOG_DIR, LOG_FILE_NAME)
# join the path of the directory and the file name
logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s - %(message)s')
# basic configuration for the logger
# level is the info, and format is the time and the message
def take_command():
# reckons the command
r = sr.Recognizer()
with sr.Microphone() as source:
# use the microphone as the audio source
print("Listening...")
r.pause_threshold = 1
audio = r.listen(source)
try:
print("Recognizing...")
query = r.recognize_google(audio, language='en') # recognize_google is used to convert the audio to text
print(f"User said: {query}\n")
#convert the audio to text
except Exception as e:
# logging.info(e) # log the error
print("Say that again please...")
return "None"
return query # return the query as a string
def text_to_speech(text):
# converts text to speech
speech = gTTS(text=text, lang='en')
speech.save("test_speech.mp3")
os.system("start speech.mp3")
# logging.info("Converted text to speech")
#google tts
# greet()
# text_to_speech(take_command())
def gemini_model(input):
gai.configure(api_key='')
# replace the above with your own key
model = gai.GenerativeModel('gemini-1.5-flash')
# cheap model
response = model.generate_content(input)
# use input to generate content
results = response.text
return results
def main():
st.title("AI Voice Assistant with Gemini")
greet()
if st.button("Speak"):
with st.spinner("Listening..."):
text = take_command()
# get the text from the user
response = gemini_model(text) # calls take command first since its inside
# get the response from the model
# display the response
text_to_speech(response)
# convert the response to speech in mp3
audio_file = open("test_speech.mp3", "rb")
audio_bytes = audio_file.read()
st.text_area(label="AI Response", value=response, height=200)
st.audio(audio_bytes, format="audio/mp3", start_time=0, autoplay=True)
now = datetime.datetime.now()
hour = now.hour
minute = now.minute
second = now.second
period = "AM" if hour < 12 else "PM"
hour %= 12
hour = 12 if hour == 0 else hour
st.download_button(label="Download Audio", data=audio_bytes, file_name=f"{hour}:{minute}:{second} .mp3", mime="audio/mp3")
#streamlit run main.py
# alternative to gradio
main()
'''In Summary:
I created a voice assistant using the speech_recognition library to recognize the users voice input.
1. I used the Google Text-to-Speech (gTTS) library to convert text to speech.
2. I used the GenerativeAI library to generate responses to the user's queries.
3. I used the Streamlit library to create a web application for the voice assistant.
4. I used the logging library to log information about the voice assistant.
5. I used the datetime library to get the current time and date.'''