HandScribeVoice/new_app.py at main · InnoTech-Titans/HandScribeVoice · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import streamlit as st
from PIL import Image
import easyocr
import pyttsx3
from streamlit_drawable_canvas import st_canvas
import requests
api_key = "AIzaSyDVxTej4dwhhFTaGRFHYHeuFwwxYBWaWE8"
cx = "c5bc3c5d3db9e4084"


def fetch_image_urls(query):
    base_url = "https://www.googleapis.com/customsearch/v1"
    params = {
        "q": query,
        "cx": cx,
        "key": api_key,
        "searchType": "image",
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    # Extract image URLs from the response
    image_urls = [item["link"] for item in data.get("items", [])]

    return image_urls

# Function to recognize text from image
def recognize_text(image):
    reader = easyocr.Reader(['en'], gpu=False, verbose=False)
    result_words = reader.readtext(image)  # Recognize words
    result_chars = reader.readtext(image, detail=0)  # Recognize individual characters
    return result_words, result_chars

# Function to generate speech from recognized text
def generate_speech(text, gender='female'):
    engine = pyttsx3.init()
    voices = engine.getProperty('voices')
    if gender == 'male':
        engine.setProperty('voice', voices[0].id)  # Select male voice
    else:
        engine.setProperty('voice', voices[1].id)  # Select female voice
    engine.say(text)
    engine.runAndWait()

def save_image_to_file(image_data, filename):
    image = Image.fromarray(image_data.astype("uint8")).convert("RGB")
    image.save(filename)

def main():
    st.set_page_config(page_title="HandScribeVoice", page_icon=":pencil2:")

    # Custom CSS styles
    st.markdown(
        """
        <style>
        .sidebar .sidebar-content {
            background-color: #f0f2f6;
            padding: 20px;
            border-radius: 10px;
            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
        }
        .sidebar .sidebar-content .block-container {
            margin-bottom: 20px;
        }
        .sidebar .sidebar-content .block-container .stButton>button {
            width: 100%;
            border-radius: 5px;
            background-color: #4CAF50;
            color: white;
            padding: 10px;
            font-size: 16px;
            cursor: pointer;
            border: none;
            margin-top: 10px;
        }
        .sidebar .sidebar-content .block-container .stButton>button:hover {
            background-color: #45a049;
        }
        </style>
        """,
        unsafe_allow_html=True,
    )

    st.title("Canvas Text Recognizer")

    # Draw on canvas
    st.sidebar.title("HandScribeVoice")
    st.sidebar.markdown("Draw a text on the canvas below and convert it to voice")
    canvas_result = st_canvas(
        fill_color="rgb(255, 255, 255)",  # Fixed fill color with white
        stroke_width=10,
        stroke_color="rgb(0, 0, 0)",
        background_color="rgb(255, 255, 255)",
        height=300,
        drawing_mode="freedraw",
        key="canvas",
    )

    # Select voice gender
    voice_gender = st.sidebar.radio("Select Voice Gender", ("Male", "Female"))

    # Button to recognize text
    if st.sidebar.button("Recognize Text", key="recognize_button"):
        if canvas_result.image_data is not None:
            st.image(canvas_result.image_data, caption='Drawing', use_column_width=True)
            st.write("Recognizing text...")

            # Save the drawn image to a temporary file
            tmp_file_path = "drawn_image.png"
            save_image_to_file(canvas_result.image_data, tmp_file_path)

            # Read the image using easyocr
            result_words, result_chars = recognize_text(tmp_file_path)

            # Process recognized words
            recognized_words = " ".join([text[1] for text in result_words])
            st.write("Recognized Words:", recognized_words)

            # Process recognized characters
            recognized_chars = "".join(result_chars)
            st.write("Recognized Characters:", recognized_chars)

            # Combine recognized words and characters
            recognized_text = recognized_words if recognized_words else recognized_chars
            st.write("Recognized Text:", recognized_text)

            image_urls = fetch_image_urls(recognized_text)
            image_url = image_urls[0]
            print(image_url)

            st.image(
            image_url,
            width=400, # Manually Adjust the width of the image as per requirement
        )

            # Speak out the recognized text with selected voice gender
            gender = 'male' if voice_gender == "Male" else 'female'
            generate_speech(recognized_text, gender=gender)

# Run the app
if __name__ == "__main__":
    main()