-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp2.py
More file actions
137 lines (112 loc) · 4.5 KB
/
app2.py
File metadata and controls
137 lines (112 loc) · 4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Libraries
import pandas as pd
import numpy as np
import os
import pickle
from tqdm.notebook import tqdm
from PIL import Image
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import Model
import streamlit as st
import unicodedata
from tensorflow.keras.models import load_model
# Libraries
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mping
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input,Dense, LSTM, Embedding,Dropout,add
# Load the model and tokenizer
BASE_DIR = "D:\\Data_science\\Mini Project\\image caption generater"
WORKING_DIR = 'C:\\Users\\mades\\Documents\\Image Caption Generater'
# Load features from pickle
with open(os.path.join(WORKING_DIR, 'features.pkl'), 'rb') as f:
features = pickle.load(f)
with open(os.path.join(BASE_DIR, 'captions.txt'), 'r') as f:
next(f)
captions_doc = f.read()
# create mapping of image to captions
mapping = {}
# process lines
for line in tqdm(captions_doc.split('\n')):
# split the line by comma(,)
tokens = line.split(',')
if len(line) < 2:
continue
image_id, caption = tokens[0], tokens[1:]
# remove extension from image ID
image_id = image_id.split('.')[0]
# convert caption list to string
caption = " ".join(caption)
# create list if needed
if image_id not in mapping:
mapping[image_id] = []
# store the caption
mapping[image_id].append(caption)
def idx_to_word(integer, tokenizer):
for word, index in tokenizer.word_index.items():
if index == integer:
return word
return None
# generate caption for an image
def predict_caption(model, image, tokenizer, max_length):
# add start tag for generation process
in_text = 'startseq'
# iterate over the max length of sequence
for i in range(max_length):
# encode input sequence
sequence = tokenizer.texts_to_sequences([in_text])[0]
# pad the sequence
sequence = pad_sequences([sequence], max_length)
# predict next word
yhat = model.predict([image, sequence], verbose=0)
# get index with high probability
yhat = np.argmax(yhat)
# convert index to word
word = idx_to_word(yhat, tokenizer)
# stop if word not found
if word is None:
break
# append word as input for generating next word
in_text += " " + word
# stop if we reach end tag
if word == 'endseq':
break
return in_text
#model
model = load_model(os.path.join(WORKING_DIR, "best200_model.keras"))
#tokenizer
with open("tokenizer.pkl", "rb") as f:
tokenizer = pickle.load(f)
def main():
st.title("Image Caption Generator")
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Display the uploaded image
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image', use_column_width=True)
# Generate and display caption on button click
if st.button('Generate Caption'):
# Resize the image to match the input shape of the VGG16 model
image = image.resize((224, 224))
# Convert the image to array and preprocess it
image_array = img_to_array(image)
image_array = image_array.reshape((1, image_array.shape[0], image_array.shape[1], image_array.shape[2]))
image_array = preprocess_input(image_array)
# Extract features from the image using the VGG16 model
vgg_model = VGG16()
vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
feature = vgg_model.predict(image_array, verbose=0)
# Predict caption using the captioning model
caption = predict_caption(model, feature, tokenizer, max_length=35)
# Print the caption without startseq and endseq
# Print the caption
st.markdown(f"<div style='border: 2px solid red; border-radius: 5px; padding: 10px;'><b>Generated Caption:</b> {' '.join(caption.split()[1:-1])}</div>", unsafe_allow_html=True)
# Run the app
if __name__ == "__main__":
main()