diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..3ffc2af
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,18 @@
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.mp3
+*.wav
+.env
+.venv/
+.idea/
+.git/
+.gitignore
+*.log
+node_modules/
+*.pt
+*.ckpt
+*.zip
+*.tar
+*.onnx
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
new file mode 100644
index 0000000..df41045
--- /dev/null
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,29 @@
+name: Docker Build & Push
+
+on:
+  push:
+    branches: [ "main", "dev" ]
+  pull_request:
+    branches: [ "main", "dev" ]
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: |
+            ${{ secrets.DOCKER_USERNAME }}/fastapi-app:latest
+            ${{ secrets.DOCKER_USERNAME }}/fastapi-app:${{ github.sha }}
diff --git a/.gitignore b/.gitignore
index 6be00c6..d80845d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,11 +28,16 @@ replay_pid*
 .venv
 env/
 venv/
-ENV/
 env.bak/
 venv.bak/
 
 *.idea
 .DS_Store
 
-*.h5
\ No newline at end of file
+
+__pycache__/
+*.pyc
+audio/
+#emotion_diary/
+emotion_png/
+pyvenv.cfg
\ No newline at end of file
diff --git a/253cfe2a-382f-4bbe-8586-5e5fcbbe85ef.wav b/253cfe2a-382f-4bbe-8586-5e5fcbbe85ef.wav
new file mode 100644
index 0000000..ee18674
Binary files /dev/null and b/253cfe2a-382f-4bbe-8586-5e5fcbbe85ef.wav differ
diff --git a/4d37625a-4026-494e-912f-fcf2744cbdf0.wav b/4d37625a-4026-494e-912f-fcf2744cbdf0.wav
new file mode 100644
index 0000000..86d332e
Binary files /dev/null and b/4d37625a-4026-494e-912f-fcf2744cbdf0.wav differ
diff --git a/791fcb5a-f100-4e73-a5f7-05306f930005.wav b/791fcb5a-f100-4e73-a5f7-05306f930005.wav
new file mode 100644
index 0000000..d385133
Binary files /dev/null and b/791fcb5a-f100-4e73-a5f7-05306f930005.wav differ
diff --git a/Adafruit_Python_DHT b/Adafruit_Python_DHT
new file mode 160000
index 0000000..8f5e2c4
--- /dev/null
+++ b/Adafruit_Python_DHT
@@ -0,0 +1 @@
+Subproject commit 8f5e2c4d6ebba8836f6d31ec9a0c171948e3237d
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..acd2f1f
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,9 @@
+FROM python:3.9-slim
+# ffmpeg 설치 추가
+RUN apt-get update && apt-get install -y ffmpeg
+
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["uvicorn", "app.service.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/app/ML/ModelService.py b/app/ML/ModelService.py
new file mode 100644
index 0000000..a89245c
--- /dev/null
+++ b/app/ML/ModelService.py
@@ -0,0 +1,97 @@
+# import numpy as np
+# from dotenv import load_dotenv
+# from fastapi import Request, UploadFile, File, APIRouter
+# from typing import List
+# from tensorflow.keras.models import load_model
+# from sentence_transformers import SentenceTransformer
+# import io
+# import requests
+#
+# from app.ML.audio_extractor_utils import get_features
+# from app.ML.loss import boundary_enhanced_focal_loss
+# from app.ML.plot_utils import save_plot, get_s3_png_url
+# from app.ML.speech_to_text import speech_to_text
+#
+# import os
+#
+# from app.service.gpt import EmotionReportGPT
+# from app.utils.convertFileExtension import convert_to_wav
+#
+# router = APIRouter(
+#     prefix="/api/fastapi",
+# )
+# load_dotenv()
+# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+#
+#
+#
+#
+# @router.post("/predict")
+# async def predict(request: Request, files: List[UploadFile] = File(...)):
+#     # token = request.headers.get("Authorization").split(" ")[1]
+#     print(files)
+#     # 1) 임시 파일 저장 or 메모리 내 처리
+#     wav_data_list = []
+#     for file in files:
+#         raw = await file.read()
+#         ext = file.filename.split('.')[-1]  # 'm4a', 'mp3' 등
+#         wav_bytes = convert_to_wav(raw, ext)  # BytesIO 변환
+#         wav_data_list.append(wav_bytes)
+#
+#     # 2) 오디오 특징 추출
+#     all_feats = []
+#     for wav_bytes in wav_data_list:
+#         # get_features 함수가 경로 입력이면, 아래처럼 메모리 파일 처리 필요
+#         # 임시파일로 저장 후 경로 전달 or get_features 수정 필요
+#
+#         temp_path = f"temp_{file.filename}"
+#         with open(temp_path, "wb") as f:
+#             f.write(wav_bytes)
+#         feats = get_features(temp_path)
+#         os.remove(temp_path)
+#         all_feats.append(feats)
+#
+#     all_feats = np.stack(all_feats, axis=0)
+#     pooled_feats = all_feats.mean(axis=0)
+#     audio_input = pooled_feats[np.newaxis, :, np.newaxis]
+#
+#     # 3) STT & 텍스트 임베딩
+#     texts = []
+#     for wav_bytes in wav_data_list:
+#         temp_path = f"temp_stt.wav"
+#         with open(temp_path, "wb") as f:
+#             f.write(wav_bytes)
+#         text = speech_to_text(temp_path)
+#         os.remove(temp_path)
+#         texts.append(text)
+#
+#     full_text = " . ".join(texts)
+#     text_vec = embedding_model.encode([full_text])[0]
+#     text_input = text_vec[np.newaxis, :]
+#
+#     # 4) 예측
+#     prediction = model.predict([audio_input, text_input])
+#     pred_percent = (prediction[0] * 100).tolist()
+#
+#     # 5) JSON 응답
+#     result = {label: round(p, 2) for label, p in zip(emotion_labels, pred_percent)}
+#     top_idx = np.argmax(pred_percent)
+#     result['predicted_emotion'] = emotion_labels[top_idx]
+#
+#     local_path = save_plot(pred_percent)
+#     s3_path = get_s3_png_url(local_path)
+#     reporter = EmotionReportGPT(full_text, pred_percent)
+#     report_text = reporter.get_report_text()
+#
+#     print(s3_path)
+#
+#     # send_emotion_report_to_spring(s3_path, report_text)
+#
+#     data = {
+#         "imageUrl": s3_path,
+#         "report_text": report_text
+#     }
+#     return data
+#
+#
+#
diff --git a/app/ML/audio_extractor_utils.py b/app/ML/audio_extractor_utils.py
new file mode 100644
index 0000000..dcace4c
--- /dev/null
+++ b/app/ML/audio_extractor_utils.py
@@ -0,0 +1,69 @@
+import librosa
+import librosa.display
+import numpy as np
+
+
+def noise(data):
+    noise_amp = 0.035 * np.random.uniform() * np.amax(data)
+    data = data + noise_amp * np.random.normal(size=data.shape[0])
+    return data
+
+
+def stretch(data, rate=0.8):
+    return librosa.effects.time_stretch(y=data, rate=rate)
+
+
+def shift(data):
+    shift_range = int(np.random.uniform(low=-5, high=5) * 1000)
+    return np.roll(data, shift_range)
+
+
+def pitch(data, sampling_rate, pitch_factor=0.7):
+    return librosa.effects.pitch_shift(y=data, sr=sampling_rate, n_steps=pitch_factor)
+
+
+def extract_features(data, sample_rate):
+    # ZCR
+    result = np.array([])
+    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
+    result = np.hstack((result, zcr))  # stacking horizontally
+
+    # Chroma_stft
+    stft = np.abs(librosa.stft(data))
+    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
+    result = np.hstack((result, chroma_stft))  # stacking horizontally
+
+    # MFCC
+    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
+    result = np.hstack((result, mfcc))  # stacking horizontally
+
+    # Root Mean Square Value
+    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
+    result = np.hstack((result, rms))  # stacking horizontally
+
+    # MelSpectogram
+    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
+    result = np.hstack((result, mel))  # stacking horizontally
+
+    return result
+
+
+def get_features(path):
+    data, sample_rate = librosa.load(path, duration=2.5, offset=0.0)
+
+    # without augmentation
+    res1 = extract_features(data, sample_rate)
+    result = np.array(res1)
+
+    # data with noise
+    noise_data = noise(data)
+    res2 = extract_features(noise_data, sample_rate)
+    result = np.concatenate((result, res2), axis=0)
+
+    # data with stretching and pitching
+    new_data = stretch(data)
+    data_stretch_pitch = pitch(new_data, sample_rate)
+    res3 = extract_features(data_stretch_pitch, sample_rate)
+    result = np.concatenate((result, res3), axis=0)
+
+    return result
diff --git a/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5 b/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5
new file mode 100644
index 0000000..b4f22d5
Binary files /dev/null and b/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5 differ
diff --git a/app/ML/loss.py b/app/ML/loss.py
new file mode 100644
index 0000000..c41199b
--- /dev/null
+++ b/app/ML/loss.py
@@ -0,0 +1,29 @@
+import tensorflow as tf
+
+
+# 1. Boundary-Enhanced Focal Loss 구현 (소수 클래스 식별 강화)
+def boundary_enhanced_focal_loss(y_true, y_pred, gamma=2.0, margin=0.3):
+    y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0 - 1e-7)
+
+    # 하드 샘플 마이닝 (낮은 확률로 예측된 샘플 식별)
+    correct_prob = tf.reduce_sum(y_true * y_pred, axis=-1)
+    hard_mask = tf.cast(tf.less(correct_prob, margin), tf.float32)
+
+    # 클래스별 가중치 계산 (소수 클래스에 더 높은 가중치)
+    effective_counts = tf.reduce_sum(y_true, axis=0)
+    alpha = 1.0 / (effective_counts + 1e-7)
+    alpha = alpha / tf.reduce_sum(alpha)
+
+    # 소수 클래스 추가 가중치 부여 (surprise, neutral)
+    class_boost = tf.constant([1.0, 0.5, 1.0, 1.0, 1.0, 2.5, 5.0], dtype=tf.float32)
+    alpha = alpha * class_boost
+
+    # Focal Loss 계산
+    cross_entropy = -y_true * tf.math.log(y_pred)
+    focal_weight = tf.pow(1.0 - y_pred, gamma)
+
+    # 하드 샘플에 추가 가중치 부여
+    sample_weight = 1.0 + hard_mask * 2.0
+    loss = sample_weight[:, tf.newaxis] * alpha * focal_weight * cross_entropy
+
+    return tf.reduce_sum(loss)
diff --git a/app/ML/plot_utils.py b/app/ML/plot_utils.py
new file mode 100644
index 0000000..b7744d0
--- /dev/null
+++ b/app/ML/plot_utils.py
@@ -0,0 +1,40 @@
+# 그래프 그리기
+import os
+from datetime import datetime
+
+from matplotlib import pyplot as plt
+from app.service.s3Service import upload_to_s3_png
+
+colors = ['#e74c3c', '#3498db', '#f1c40f', '#e67e22', '#9b59b6', '#1abc9c', '#95a5a6']
+emotion_labels = ['angry', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'neutral']
+
+
+def save_plot(predictions_percent):
+    plt.figure(figsize=(10, 6))
+    bars = plt.barh(emotion_labels, predictions_percent, color=colors, alpha=0.85)
+
+    plt.title('Emotion Probability Distribution', fontsize=20, weight='bold', pad=15)
+    plt.xlabel('Probability (%)', fontsize=14)
+    plt.xlim(0, max(predictions_percent) + 10)
+    plt.grid(axis='x', linestyle='--', alpha=0.6)
+
+    for bar, percent in zip(bars, predictions_percent):
+        width = bar.get_width()
+        plt.text(width + 0.8, bar.get_y() + bar.get_height() / 2, f'{percent:.1f}%', va='center', fontsize=13,
+                 weight='bold', color='#333')
+
+    plt.yticks(fontsize=14, weight='bold')
+    plt.tight_layout()
+
+    date_str = datetime.now().strftime("%Y%m%d")
+    filename = f"{date_str}"
+    local_path = os.getcwd() + f"/app/emotion_png/{filename}_emotion_distribution.png"
+    # 이미지 파일로 저장
+    plt.savefig(local_path, dpi=300, bbox_inches='tight')
+    plt.show()
+
+    return local_path
+
+
+def get_s3_png_url(local_path):
+    return upload_to_s3_png(local_path)
diff --git a/app/ML/predict_colab.py b/app/ML/predict_colab.py
new file mode 100644
index 0000000..a95d54a
--- /dev/null
+++ b/app/ML/predict_colab.py
@@ -0,0 +1,95 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from tensorflow.keras.models import load_model
+from sentence_transformers import SentenceTransformer
+import glob
+import os
+
+from app.ML.audio_extractor_utils import get_features
+from app.ML.loss import boundary_enhanced_focal_loss
+from app.ML.speech_to_text import speech_to_text
+import os
+os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
+
+
+BASE_DIR_resp = "/home/team4/Desktop/capstone/AI/app/emotion_diary"
+BASE_DIR_win = "C:/Users/YJG/Desktop/2025_1_capstone_2/AI/app/emotion_diary"
+emotion_labels = ['angry', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'neutral']
+model_path_resp = "/home/team4/Desktop/capstone/AI/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5"
+model_path_win = "C:/Users/YJG/Desktop/2025_1_capstone_2/AI/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5"
+
+
+def predict():
+    BASE_DIR = BASE_DIR_win
+    model_path = model_path_win
+    # (가정) 미리 정의된 함수/변수
+    # get_features(path): (486,) 벡터 반환
+    # speech_to_text(path): STT → 문자열 반환
+    # boundary_enhanced_focal_loss: 커스텀 손실
+    # emotion_labels: ['angry','sadness','happiness','fear','disgust','surprise','neutral']
+    # model_path, sample_path: 경로 문자열
+
+    # 1) WAV 파일 리스트
+    # sample_wav_list = [
+    #     sample_path + "/jg_sadness_1.wav",
+    #     sample_path + "/jg_sadness_2.wav",
+    #     sample_path + "/jg_sadness_3.wav",
+    #     sample_path + "/jg_sadness_4.wav",
+    #     sample_path + "/jg_sadness_5.wav"
+    # ]
+    sample_wav_list = glob.glob(os.path.join(BASE_DIR, "**", "*.wav"), recursive=True)
+
+    # 2) 오디오 특징 평균 풀링
+    all_feats = np.stack([get_features(p) for p in sample_wav_list], axis=0)  # (5,486)
+    pooled_feats = all_feats.mean(axis=0)  # (486,)
+
+    # 3) 모델 입력 형태 맞추기
+    audio_input = pooled_feats[np.newaxis, :, np.newaxis]  # (1,486,1)
+
+    # 4) 전체 텍스트 STT → 하나의 문장으로 결합
+    texts = [speech_to_text(p) for p in sample_wav_list]
+    full_text = " . ".join(texts)
+
+    # 5) 텍스트 임베딩
+    embedding_model = SentenceTransformer('jhgan/ko-sbert-multitask')
+    text_vec = embedding_model.encode([full_text])[0]  # (768,)
+    text_input = text_vec[np.newaxis, :]  # (1,768)
+
+    # 6) 모델 로드 및 예측
+    model = load_model(model_path, custom_objects={
+        'boundary_enhanced_focal_loss': boundary_enhanced_focal_loss
+    })
+    prediction = model.predict([audio_input, text_input])  # (1,7)
+    pred_percent = prediction[0] * 100  # (7,)
+
+    # 7) 콘솔에 출력
+    for lbl, p in zip(emotion_labels, pred_percent):
+        print(f"{lbl}: {p:.2f}%")
+    top_idx = np.argmax(pred_percent)
+    print(f"\n최종 예측 감정: {emotion_labels[top_idx]}")
+
+    # 8) 가로 막대그래프 시각화
+    colors = ['#e74c3c', '#3498db', '#f1c40f', '#e67e22', '#9b59b6', '#1abc9c', '#95a5a6']
+
+    plt.figure(figsize=(10, 6))
+    bars = plt.barh(emotion_labels, pred_percent, color=colors, alpha=0.85)
+
+    plt.title('Emotion Probability Distribution', fontsize=18, weight='bold', pad=15)
+    plt.xlabel('Probability (%)', fontsize=14)
+    plt.xlim(0, pred_percent.max() + 10)
+    plt.grid(axis='x', linestyle='--', alpha=0.6)
+
+    for bar, p in zip(bars, pred_percent):
+        plt.text(p + 1, bar.get_y() + bar.get_height() / 2,
+                 f'{p:.1f}%', va='center', fontsize=12, weight='bold', color='#333')
+
+    plt.yticks(fontsize=13, weight='bold')
+    plt.tight_layout()
+
+    # 이미지 파일로 저장
+    plt.savefig('emotion_distribution.png', dpi=300, bbox_inches='tight')
+    plt.show()
+
+
+if __name__ == "__main__":
+    predict()
diff --git a/app/ML/speech_to_text.py b/app/ML/speech_to_text.py
new file mode 100644
index 0000000..8cd19c5
--- /dev/null
+++ b/app/ML/speech_to_text.py
@@ -0,0 +1,26 @@
+import speech_recognition as sr
+
+# sample_wav_path = sample_path + "/sh_sadness_2.wav"
+
+
+# STT 변환 함수
+def speech_to_text(audio_path):
+    recognizer = sr.Recognizer()
+
+    # 음성 파일 로드
+    with sr.AudioFile(audio_path) as source:
+        audio_data = recognizer.record(source)  # 음성 데이터 읽기
+
+    try:
+        # 구글 STT API 사용 (무료)
+        text = recognizer.recognize_google(audio_data, language="ko-KR")
+        return text
+    except sr.UnknownValueError:
+        return "음성을 인식할 수 없습니다."
+    except sr.RequestError:
+        return "STT 요청 실패"
+
+#
+# # MP3에서 변환한 WAV 파일 입력
+# sample_text = speech_to_text(sample_wav_path)
+# print("변환된 텍스트:", sample_text)
diff --git a/app/controller/RecordController.py b/app/controller/RecordController.py
new file mode 100644
index 0000000..3b83cd9
--- /dev/null
+++ b/app/controller/RecordController.py
@@ -0,0 +1,211 @@
+import json
+import os
+import subprocess
+from typing import List
+
+import numpy as np
+import requests
+from boto3 import client
+from fastapi import APIRouter, Request, UploadFile, File, Form
+# from sentence_transformers import SentenceTransformer
+
+from app.ML.audio_extractor_utils import get_features
+from app.ML.loss import boundary_enhanced_focal_loss
+from app.ML.plot_utils import save_plot, get_s3_png_url
+from app.ML.speech_to_text import speech_to_text
+from app.dto.ScheduleSpeakRequestDto import ScheduleSpeakRequestDto
+from app.dto.ScheduleTTSRequestDto import ScheduleTTSRequestDto
+from app.service.elevenLabs import text_to_speech_file_save_AWS, text_to_speech_file
+from app.service.gpt import ChatgptAPI, EmotionReportGPT
+from app.service.s3Service import download_from_s3, save_local_file
+from app.utils import play_file
+from tensorflow.keras.models import load_model
+
+from app.utils.convertFileExtension import convert_to_wav
+
+router = APIRouter(
+    prefix="/api/fastapi",
+)
+
+access_key = os.getenv("S3_ACCESS_KEY")
+secret_key = os.getenv("S3_SECRET_KEY")
+bucket_name = os.getenv("S3_BUCKET_NAME")
+url_base = os.getenv("S3_URL")
+yjg_voice_id = os.getenv("YJG_VOICE_ID")
+
+s3_client = client(
+    "s3",
+    aws_access_key_id=access_key,
+    aws_secret_access_key=secret_key,
+    region_name="ap-northeast-2",
+)
+
+os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
+
+# app = FastAPI()
+
+BASE_DIR_win = os.getcwd() + "/app/emotion_diary"
+model_path_win = os.getcwd() + "/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5"
+emotion_labels = ['angry', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'neutral']
+
+embedding_model = SentenceTransformer('jhgan/ko-sbert-multitask')
+model = load_model(model_path_win, custom_objects={'boundary_enhanced_focal_loss': boundary_enhanced_focal_loss})
+
+
+async def save_local_files(files: List[UploadFile]) -> list:
+    """업로드된 파일을 로컬에 저장하고 파일 경로를 반환합니다."""
+    audio_dir = "./audio"
+    local_file_path_list = []
+    if not os.path.exists(audio_dir):
+        os.makedirs(audio_dir)
+    for file in files:
+        local_file_path = os.path.join(audio_dir, file.filename)  # 파일 경로 생성
+        with open(local_file_path, "wb") as f:
+            f.write(await file.read())  # 파일 내용을 저장
+        local_file_path_list.append(local_file_path)
+    return local_file_path_list
+
+
+# 첫 로그인 시 목소리 녹음 api
+@router.post("/voices")
+async def getVoice(request: Request, file: UploadFile = File(...)):
+    token = request.headers.get("Authorization").split(" ")[1]
+    # local_file_path = await save_local_file(file)
+    # voice_id = add_voice(name=name, local_file_paths=[local_file_path])
+    # voice_url = s3Service.upload_to_s3(local_file_path)
+    # os.remove(local_file_path)
+
+    send_user_voice_file_to_spring(token=token, voice_url=yjg_voice_id)
+
+
+# 만약 voice_id와 요구하는 분야가 오면 맞춰서 return
+@router.post("/schedules")
+async def schedule_tts(request: Request, schedules: ScheduleTTSRequestDto):
+    # token = request.headers.get("Authorization").split(" ")[1]
+    voice_id = yjg_voice_id
+
+    prompt = ChatgptAPI(schedules.schedule_text, schedules.alias)
+
+    # schedule_dict: {"저녁": "엄마~ 저녁 잘 챙겨 먹었어?", "운동": "오늘 운동했어? 건강 챙겨~!"}
+    schedule_dict = prompt.get_schedule_json()
+
+    # TTS 처리 (MP3 파일 생성 후 s3 저장)
+    response = {
+        schedules.schedule_id[i]: text_to_speech_file_save_AWS(
+            schedule_dict.get(schedules.schedule_text[i], ""),
+            yjg_voice_id
+        )
+        # schedules.schedule_id[i]: str(schedules.schedule_id[i])
+        for i in range(len(schedules.schedule_id))
+    }
+    return response
+
+
+@router.post("/predict")
+async def predict(request: Request, files: List[UploadFile] = File(...)):
+    # token = request.headers.get("Authorization").split(" ")[1]
+    print(files)
+    # 1) 임시 파일 저장 or 메모리 내 처리
+    wav_data_list = []
+    for file in files:
+        raw = await file.read()
+        ext = file.filename.split('.')[-1]  # 'm4a', 'mp3' 등
+        wav_bytes = convert_to_wav(raw, ext)  # BytesIO 변환
+        wav_data_list.append(wav_bytes)
+
+    # 2) 오디오 특징 추출
+    all_feats = []
+    for wav_bytes in wav_data_list:
+        # get_features 함수가 경로 입력이면, 아래처럼 메모리 파일 처리 필요
+        # 임시파일로 저장 후 경로 전달 or get_features 수정 필요
+
+        temp_path = f"temp_{file.filename}"
+        with open(temp_path, "wb") as f:
+            f.write(wav_bytes)
+        feats = get_features(temp_path)
+        os.remove(temp_path)
+        all_feats.append(feats)
+
+    all_feats = np.stack(all_feats, axis=0)
+    pooled_feats = all_feats.mean(axis=0)
+    audio_input = pooled_feats[np.newaxis, :, np.newaxis]
+
+    # 3) STT & 텍스트 임베딩
+    texts = []
+    for wav_bytes in wav_data_list:
+        temp_path = f"temp_stt.wav"
+        with open(temp_path, "wb") as f:
+            f.write(wav_bytes)
+        text = speech_to_text(temp_path)
+        os.remove(temp_path)
+        texts.append(text)
+
+    full_text = " . ".join(texts)
+    text_vec = embedding_model.encode([full_text])[0]
+    text_input = text_vec[np.newaxis, :]
+
+    # 4) 예측
+    prediction = model.predict([audio_input, text_input])
+    pred_percent = (prediction[0] * 100).tolist()
+
+    # 5) JSON 응답
+    result = {label: round(p, 2) for label, p in zip(emotion_labels, pred_percent)}
+    top_idx = np.argmax(pred_percent)
+    result['predicted_emotion'] = emotion_labels[top_idx]
+
+    local_path = save_plot(pred_percent)
+    s3_path = get_s3_png_url(local_path)
+    reporter = EmotionReportGPT(full_text, pred_percent)
+    report_text = reporter.get_report_text()
+
+    print(s3_path)
+
+    # send_emotion_report_to_spring(s3_path, report_text)
+
+    data = {
+        "imageUrl": s3_path,
+        "report_text": report_text
+    }
+    return data
+
+
+def send_user_voice_file_to_spring(token: str, voice_url: str):
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "text/plain"
+    }
+    # requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data)
+    # requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data)
+
+    requests.post(
+        "http://springboot:8080/api/spring/records/voices",
+        headers=headers,
+        data=voice_url  # 주의: 'data='를 써야 함
+    )
+
+
+def send_user_voice_id_to_spring(token: str, voice_id: str):
+    headers = {
+        "Authorization": f"Bearer {token}"
+    }
+    data = {
+        "voiceId": voice_id
+    }
+    requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data)
+    # requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data)
+
+
+def send_emotion_report_to_spring(image_url: str, analysis_text):
+    headers = {
+        # "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "imageUrl": image_url,
+        "report_text": analysis_text
+    }
+    requests.post(
+        "http://springboot:8080/api/spring/report",
+        headers=headers,
+        json=data
+    )
diff --git a/app/dto/ExtraTTSRequestDto.py b/app/dto/ExtraTTSRequestDto.py
new file mode 100644
index 0000000..0a1b600
--- /dev/null
+++ b/app/dto/ExtraTTSRequestDto.py
@@ -0,0 +1,8 @@
+from pydantic import BaseModel
+
+
+class ExtraTTSRequestDto(BaseModel):
+    schedule_id: int
+    is_basic_schedule: bool
+    schedule_text: str
+    target_time: str  # "10:00:00" 형식
diff --git a/app/dto/ScheduleSpeakRequestDto.py b/app/dto/ScheduleSpeakRequestDto.py
new file mode 100644
index 0000000..cef86db
--- /dev/null
+++ b/app/dto/ScheduleSpeakRequestDto.py
@@ -0,0 +1,7 @@
+from pydantic import BaseModel
+
+
+class ScheduleSpeakRequestDto(BaseModel):
+    schedule_id: int
+    schedule_voice_Url: str
+    target_time: str  # "10:00:00" 형식
diff --git a/app/dto/ScheduleTTSRequestDto.py b/app/dto/ScheduleTTSRequestDto.py
new file mode 100644
index 0000000..41feeaf
--- /dev/null
+++ b/app/dto/ScheduleTTSRequestDto.py
@@ -0,0 +1,10 @@
+from typing import List
+
+from pydantic import BaseModel
+
+
+class ScheduleTTSRequestDto(BaseModel):
+    voice_id: str
+    alias: str
+    schedule_id: List[int]
+    schedule_text: List[str]
diff --git a/app/emotion_diary/20250608_0.wav b/app/emotion_diary/20250608_0.wav
new file mode 100644
index 0000000..d01f445
Binary files /dev/null and b/app/emotion_diary/20250608_0.wav differ
diff --git a/app/emotion_diary/20250608_1.wav b/app/emotion_diary/20250608_1.wav
new file mode 100644
index 0000000..609d7a6
Binary files /dev/null and b/app/emotion_diary/20250608_1.wav differ
diff --git a/app/emotion_diary/20250608_2.wav b/app/emotion_diary/20250608_2.wav
new file mode 100644
index 0000000..ef2c161
Binary files /dev/null and b/app/emotion_diary/20250608_2.wav differ
diff --git a/app/emotion_diary/20250608_3.wav b/app/emotion_diary/20250608_3.wav
new file mode 100644
index 0000000..98e0ba5
Binary files /dev/null and b/app/emotion_diary/20250608_3.wav differ
diff --git a/app/emotion_diary/jg_sadness_1.m4a b/app/emotion_diary/jg_sadness_1.m4a
new file mode 100644
index 0000000..7a0015c
Binary files /dev/null and b/app/emotion_diary/jg_sadness_1.m4a differ
diff --git a/app/emotion_diary/jg_sadness_2.m4a b/app/emotion_diary/jg_sadness_2.m4a
new file mode 100644
index 0000000..9375565
Binary files /dev/null and b/app/emotion_diary/jg_sadness_2.m4a differ
diff --git a/app/emotion_diary/jg_sadness_3.m4a b/app/emotion_diary/jg_sadness_3.m4a
new file mode 100644
index 0000000..ee1a08a
Binary files /dev/null and b/app/emotion_diary/jg_sadness_3.m4a differ
diff --git a/app/emotion_diary/jg_sadness_4.m4a b/app/emotion_diary/jg_sadness_4.m4a
new file mode 100644
index 0000000..26b0d4d
Binary files /dev/null and b/app/emotion_diary/jg_sadness_4.m4a differ
diff --git a/app/emotion_diary/jg_sadness_5.m4a b/app/emotion_diary/jg_sadness_5.m4a
new file mode 100644
index 0000000..235860c
Binary files /dev/null and b/app/emotion_diary/jg_sadness_5.m4a differ
diff --git a/app/service/elevenLabs.py b/app/service/elevenLabs.py
new file mode 100644
index 0000000..a9318c6
--- /dev/null
+++ b/app/service/elevenLabs.py
@@ -0,0 +1,97 @@
+import os
+import uuid
+
+from dotenv import load_dotenv
+from elevenlabs import ElevenLabs, VoiceSettings
+
+from app.service.s3Service import upload_to_s3
+
+load_dotenv()
+client = ElevenLabs(
+    api_key=os.getenv("ELEVENLABS_KEY"),
+)
+yjg_voice_id = os.getenv("YJG_VOICE_ID")
+
+
+def get_voice():
+    response = client.voices.get_all()
+    voice_ids = [voice.voice_id for voice in response.voices]  # Voice 객체의 voice_id 속성 사용
+    return voice_ids
+
+
+def delete_voice(voice: str):
+    try:
+        response = client.voices.delete(voice_id=voice)
+        print(f"Deleted voice_id: {voice}")
+    except Exception as e:
+        print(f"Error deleting voice_id {voice}: {e}")
+
+
+def delete_all_voice(voices: list):
+    for voice in voices:
+        delete_voice(voice)
+
+
+def add_voice(name: str, local_file_paths: list):
+    # 파일 경로를 통해 파일 객체 생성
+    files = []
+    for path in local_file_paths:
+        with open(path, 'rb') as audio_file:
+            files.append(audio_file.read())  # 파일 내용을 리스트에 저장
+
+    response = client.voices.add(name=name, files=files)
+    return response.voice_id
+
+
+def text_to_speech_file_save_AWS(text: str, voice_id=yjg_voice_id) -> str:
+    response = client.text_to_speech.convert(
+        voice_id=voice_id,
+        output_format="mp3_22050_32",
+        text=text,
+        model_id="eleven_multilingual_v2",
+        # voice_settings=VoiceSettings(
+        #     stability=0.3,
+        #     similarity_boost=1.0,
+        #     style=0.0,
+        #     use_speaker_boost=True,
+        # ),
+    )
+
+    save_file_path = f"{uuid.uuid4()}.mp3"
+    with open(save_file_path, "wb") as f:
+        for chunk in response:
+            if chunk:
+                f.write(chunk)
+    aws_file_url = upload_to_s3(local_file_path=save_file_path)
+    os.remove(save_file_path)
+
+    # delete_voice(voice_id)
+
+    return aws_file_url
+
+
+def text_to_speech_file(text: str, voice_id=yjg_voice_id) -> str:
+    response = client.text_to_speech.convert(
+        voice_id=voice_id,
+        # output_format="mp3_22050_32",
+        text=text,
+        model_id="eleven_multilingual_v2",
+        # voice_settings=VoiceSettings(
+        #     stability=0.3,
+        #     similarity_boost=1.0,
+        #     style=0.0,
+        #     use_speaker_boost=True,
+        # ),
+    )
+
+    save_file_path = f"{uuid.uuid4()}.wav"
+    with open(save_file_path, "wb") as f:
+        for chunk in response:
+            if chunk:
+                f.write(chunk)
+    # aws_file_url = upload_to_s3(local_file_path=save_file_path)
+    # os.remove(save_file_path)
+
+    # delete_voice(voice_id)
+
+    return save_file_path
diff --git a/app/service/gpt.py b/app/service/gpt.py
new file mode 100644
index 0000000..eb06023
--- /dev/null
+++ b/app/service/gpt.py
@@ -0,0 +1,139 @@
+import os
+
+from dotenv import load_dotenv
+from openai import OpenAI
+
+from app.utils import parsing_json
+
+load_dotenv()
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+
+class ChatgptAPI:
+    def __init__(self, schedules, alias):
+        self.schedules = schedules
+        self.alias = alias
+
+    def create_schedule_prompt(self):
+        system_message = f"""
+            너는 지금부터 혼자 사시는 부모님을 걱정하는 보호자야.
+            네 역할은 키워드를 보고, 키워드와 관련한 문제에 대해서 부모님을 걱정하고, 생활은 챙겨주는거야.
+            키워드는 다음과 같아: {str(self.schedules)}
+
+            너의 목표는 두 가지야:
+            1. 키워드에 대한 질문 혹은 문장을 한 줄의 텍스트로 만들어.
+                ex) 키워드가 '저녁' 이라면, "{self.alias}~~  하루 잘 보냈어??  저녁도 맛있는거 챙겨먹어!!  사랑해~~  "
+            2. 만든 텍스트는 ?? !! ~~ ,, .. 등의 다양한 특수문자가 많이 들어갈 수 있어. 감정이 강하게 느껴지게 작성해줘.
+                2-a. 특수문자를 붙일 때는 꼭 2개씩 붙여줘
+            3. 부모님을 지칭하는 별명은 {self.alias} 로 해줘. 
+            4. 문장과 문장 사이의 띄어쓰기를 2개씩 넣어줘 
+
+            결과는 {{"키워드": "문장"}} 형태의 JSON 문자열로 반환해줘. 꼭 큰따옴표(")만 사용해.
+
+        """
+
+        messages = [
+            {"role": "system", "content": system_message}
+        ]
+        return messages
+
+    def get_schedule_json(self):
+        prompt = self.create_schedule_prompt()
+        response = client.chat.completions.create(
+            model="gpt-4-turbo",
+            messages=prompt,
+            temperature=0.5,
+            max_tokens=2048
+        )
+
+        content = response.choices[0].message.content
+        schedule_dict = parsing_json.extract_json_from_content(content)
+
+        return schedule_dict
+
+
+class GenerateQuestionGPT:
+    def __init__(self, text, alias):
+        self.text = text
+        self.alias = alias
+
+    def create_schedule_prompt(self):
+        system_message = f"""
+            너는 지금부터 혼자 사시는 부모님을 걱정하는 보호자야.
+            
+            네 역할은 키워드를 보고, 키워드와 관련한 문제에 대해서 부모님을 걱정하고, 생활은 챙겨주는거야.
+            키워드는 다음과 같아: {str(self.schedules)}
+
+            너의 목표는 두 가지야:
+            1. 키워드에 대한 질문 혹은 문장을 한 줄의 텍스트로 만들어.
+                ex) 키워드가 '저녁' 이라면, "{self.alias}~~  하루 잘 보냈어??  저녁도 맛있는거 챙겨먹어!!  사랑해~~  "
+            2. 만든 텍스트는 ?? !! ~~ ,, .. 등의 다양한 특수문자가 많이 들어갈 수 있어. 감정이 강하게 느껴지게 작성해줘.
+                2-a. 특수문자를 붙일 때는 꼭 2개씩 붙여줘
+            3. 부모님을 지칭하는 별명은 {self.alias} 로 해줘. 
+            4. 문장과 문장 사이의 띄어쓰기를 2개씩 넣어줘 
+
+            결과는 {{"키워드": "문장"}} 형태의 JSON 문자열로 반환해줘. 꼭 큰따옴표(")만 사용해.
+
+        """
+
+        messages = [
+            {"role": "system", "content": system_message}
+        ]
+        return messages
+
+    def get_schedule_json(self):
+        prompt = self.create_schedule_prompt()
+        response = client.chat.completions.create(
+            model="gpt-4-turbo",
+            messages=prompt,
+            temperature=0.5,
+            max_tokens=2048
+        )
+
+        content = response.choices[0].message.content
+        schedule_dict = parsing_json.extract_json_from_content(content)
+
+        return schedule_dict
+
+
+class EmotionReportGPT:
+    def __init__(self, text, percent_list):
+        self.text = text
+        self.percent_list = percent_list
+
+    def create_report_prompt(self):
+        system_message = f"""
+            너는 지금부터 감정을 분석 하는 심리 상담사야. 
+
+            네 역할은 텍스트와 수치를 보고, 해당 발화의 인물이 하루 동안 어떤 감정 상태를 가졌는지 체크해주는 거야.
+            텍스트는 다음과 같아: {str(self.text)}
+            수치는 다음과 같아 : {self.percent_list}
+
+            너의 목표는 두 가지야:
+            1. 텍스트와 수치를 보고 발화의 인물의 하루 감정을 종합적으로 분석해줘. 
+                1-a) 분석을 할 때는 텍스트나 문맥에서 근거를 들어서 논리적으로 서술해줘.
+                1-b) 분석 말투는 보호자에게 피보호자의 상태를 설명하는 존댓말 말투로 해줘. 
+                1-c) '발화자'를 지칭하는 말은 '피보호자'로 해줘 
+            2. 분석 문장은 4-5 줄이어야 해.
+
+            결과는 꼭 큰따옴표(")만 사용해서 str로 반환해줘. 만약 여러 문장이라면 "\n"를 문장 끝에 넣어줘.
+
+        """
+
+        messages = [
+            {"role": "system", "content": system_message}
+        ]
+        return messages
+
+    def get_report_text(self):
+        prompt = self.create_report_prompt()
+        response = client.chat.completions.create(
+            model="gpt-4-turbo",
+            messages=prompt,
+            temperature=0.5,
+            max_tokens=2048
+        )
+
+        content = response.choices[0].message.content
+
+        return content
diff --git a/app/service/interaction.py b/app/service/interaction.py
new file mode 100644
index 0000000..c48da29
--- /dev/null
+++ b/app/service/interaction.py
@@ -0,0 +1,98 @@
+import os
+import subprocess
+from datetime import datetime
+
+from faster_whisper import WhisperModel
+from openai import OpenAI
+from elevenlabs import ElevenLabs
+from dotenv import load_dotenv
+
+from app.service.elevenLabs import text_to_speech_file
+# 녹음 함수 (arecord 사용) - 수정된 record_respberry.py 참고
+from record_respberry import emotion_record
+
+# ==== 공통 설정 ====
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+ELEVENLABS_KEY = os.getenv("ELEVENLABS_KEY")
+
+if not OPENAI_API_KEY or not ELEVENLABS_KEY:
+    raise RuntimeError(".env 에 OPENAI_API_KEY/ELEVENLABS_KEY 를 설정하세요")
+
+# OpenAI / ElevenLabs 클라이언트
+gpt_client = OpenAI(api_key=OPENAI_API_KEY)
+tts_client = ElevenLabs(api_key=ELEVENLABS_KEY)
+
+# Whisper 모델 (tiny, CPU, int8)
+whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
+
+
+def interaction(alias: str):
+    """
+    alias: 사용자 이름 또는 AI가 부르는 별칭 (ex: "홍길동")
+    1) alias 인사 → TTS → 재생
+    2) 이후 반복: emotion_record → Whisper STT → GPT 질문 생성 → TTS → 재생
+    """
+    # 1) alias 인사
+    greet_text = f"{alias}~~ 오늘 좋은 하루 보냈나~~?? 어떻게 지냈어!!"
+    print("👋 인사:", greet_text)
+    greet_audio = text_to_speech_file(greet_text)
+    subprocess.run(["mpg321", greet_audio], check=True)
+
+    # 대화 이력 초기화
+    messages = [
+        {"role": "system",
+         "content": "너는 대화를 자연스럽게 이어가는 AI야. 사용자와 계속 이어지는 대화를 만들어야 해."},
+        {"role": "assistant", "content": greet_text}
+    ]
+
+    record_idx = 0
+    try:
+        while True:
+            # 2-1) 감정 녹음 (침묵 기준으로 자동 종료)
+            wav_path = emotion_record(record_idx)
+            print(f"[녹음 완료] {wav_path}")
+            record_idx += 1
+
+            # 2-2) Whisper STT (한국어)
+            segments, _ = whisper_model.transcribe(wav_path,
+                                                   beam_size=1,
+                                                   language="ko")
+            user_text = " ".join(seg.text for seg in segments).strip()
+            print("▶ 사용자 음성(텍스트):", user_text or "(인식 안됨)")
+
+            if not user_text:
+                print("(음성 인식 실패 → 다시 녹음)")
+                continue
+
+            # 2-3) GPT-4o 에 질문 생성 요청
+            messages.append({"role": "user", "content": user_text})
+            resp = gpt_client.chat.completions.create(
+                model="gpt-4o",
+                messages=messages
+            )
+            question = resp.choices[0].message.content.strip()
+            print("생성된 질문:", question)
+
+            # 2-4) 대화 이력에 어시스턴트 질문 추가
+            messages.append({"role": "assistant", "content": question})
+
+            # 2-5) 질문 → ElevenLabs TTS → 파일
+            tts_path = text_to_speech_file(question)
+            print("  (TTS 파일 생성:", tts_path, ")")
+
+            # 2-6) 재생
+            subprocess.run(["mpg321", tts_path], check=True)
+
+    except KeyboardInterrupt:
+        print("\n[사용자 종료 요청] interaction을 종료합니다.")
+    except Exception as e:
+        print("예외 발생:", e)
+
+    print("=== interaction 종료 ===")
+
+
+if __name__ == "__main__":
+    # 스크립트를 직접 실행할 때만 동작
+    # alias를 원하는 이름으로 바꿔주세요
+    interaction("아빠")
diff --git a/app/service/main.py b/app/service/main.py
new file mode 100644
index 0000000..18296c5
--- /dev/null
+++ b/app/service/main.py
@@ -0,0 +1,92 @@
+import asyncio
+
+from fastapi import FastAPI, Depends, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.openapi.utils import get_openapi
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from contextlib import asynccontextmanager
+
+from app.controller.RecordController import router
+
+# from app.controller.RecordController import router
+
+
+# from app.service.subscribe import subscribe_schedule
+
+# @asynccontextmanager
+# async def lifespan(app: FastAPI):
+#     task = asyncio.create_task(subscribe_schedule())
+#     yield
+#     task.cancel()
+#     try:
+#         await task
+#     except asyncio.CancelledError:
+#         print("Redis task cancelled")
+
+
+app = FastAPI()
+
+auth_scheme = HTTPBearer()
+
+
+def get_current_token(credentials: HTTPAuthorizationCredentials = Depends(auth_scheme)):
+    token = credentials.credentials
+    if not token:
+        raise HTTPException(status_code=403, detail="Invalid or missing token")
+    return token
+
+
+def custom_openapi():
+    if app.openapi_schema:
+        return app.openapi_schema
+    openapi_schema = get_openapi(
+        title="HumaniCare API Documentation",
+        version="1.0",
+        description="HumaniCare API documentation for the application",
+        routes=app.routes,
+    )
+    # Add the security scheme for Bearer token
+    openapi_schema["components"]["securitySchemes"] = {
+        "bearerAuth": {
+            "type": "http",
+            "scheme": "bearer",
+            "bearerFormat": "JWT"
+        }
+    }
+    openapi_schema["security"] = [{"bearerAuth": []}]
+    app.openapi_schema = openapi_schema
+    return app.openapi_schema
+
+
+app.openapi = custom_openapi
+
+# # Swagger UI 경로 설정
+# @app.get("/docs", include_in_schema=False)
+# async def custom_swagger_ui_html(req: Request):
+#     root_path = req.scope.get("root_path", "").rstrip("/")
+#     openapi_url = root_path + "/openapi.json"  # OpenAPI 경로 설정
+#     return get_swagger_ui_html(
+#         openapi_url=openapi_url,
+#         title="Peach API Documentation",
+#     )
+#
+# # OpenAPI JSON 경로 설정
+# @app.get("/openapi.json", include_in_schema=False)
+# async def custom_openapi_json():
+#     return app.openapi()
+
+origins = [
+    "http://localhost:8080",
+    "http://localhost:3000",
+]
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.include_router(router)
+
diff --git a/app/service/predict_resp.py b/app/service/predict_resp.py
new file mode 100644
index 0000000..1d49a67
--- /dev/null
+++ b/app/service/predict_resp.py
@@ -0,0 +1,36 @@
+import requests
+import glob
+import os
+import mimetypes
+
+
+def predict():
+    # ip = "192.168.1.243"
+    ip = "15.165.21.152"
+    # FastAPI 라우터 경로에 맞춘 URL
+    url = f"http://{ip}:8000/api/fastapi/predict"
+
+    # 전송할 오디오 파일 경로 (wav, m4a, mp3 등 모두 포함)
+    BASE_DIR = "/home/team4/Desktop/capstone/AI/app/emotion_diary"
+    audio_paths = glob.glob(os.path.join(BASE_DIR, "**", "*.*"), recursive=True)
+
+    files = []
+    for path in audio_paths:
+        filename = os.path.basename(path)
+        # 확장자에 맞는 MIME 타입 추출 (fallback: application/octet-stream)
+        content_type = mimetypes.guess_type(path)[0] or "application/octet-stream"
+        files.append(
+            ("files", (filename, open(path, "rb"), content_type))
+        )
+
+    response = requests.post(url, files=files)
+    if response.status_code == 200:
+        print("감정 예측 결과:")
+        for label, score in response.json().items():
+            print(f"{label}: {score}")
+    else:
+        print(f"Error: {response.status_code} - {response.text}")
+
+
+if __name__ == "__main__":
+    predict()
diff --git a/app/service/record.py b/app/service/record.py
new file mode 100644
index 0000000..a5d61f1
--- /dev/null
+++ b/app/service/record.py
@@ -0,0 +1,78 @@
+import os
+import time
+from datetime import datetime
+
+import numpy as np
+import sounddevice as sd
+import torch
+from scipy.io.wavfile import write
+
+# 사일로 VAD 모델 불러오기
+model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=False)
+(get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils
+
+FRAME_SIZE = 512
+SILENCE_LIMIT = 2.0  # 2초 이상 침묵하면 종료
+FILENAME = "output.wav"  # 녹음된 오디오 파일 이름
+
+audio_queue = []
+recorded_audio = []
+
+
+def callback(indata, frames, time_info, status):
+    # 받은 오디오 데이터를 audio_queue에 추가
+    audio_queue.append(indata[:, 0].copy())
+
+
+print("Start talking... (녹음 중, 침묵 시 자동 종료)")
+
+with sd.InputStream(callback=callback, channels=1, samplerate=SAMPLE_RATE, blocksize=FRAME_SIZE):
+    silence_counter = 0
+    while True:
+        if len(audio_queue) == 0:
+            time.sleep(0.01)
+            continue
+
+        chunk = audio_queue.pop(0)
+        if len(chunk) < 512:
+            continue
+
+        audio_tensor = torch.from_numpy(chunk[:512]).float()
+        audio_tensor = audio_tensor / (torch.max(torch.abs(audio_tensor)) + 1e-9)
+
+        speech_prob = model(audio_tensor, SAMPLE_RATE).item()
+        print(f"Speech prob: {speech_prob:.3f}")
+
+        # 음성이 인식되었을 때만 녹음
+        if speech_prob > 0.5:
+            recorded_audio.append(chunk)
+            silence_counter = 0  # 음성이 인식되면 침묵 카운터 리셋
+        else:
+            silence_counter += FRAME_SIZE / SAMPLE_RATE
+            print(f"Silence counter: {silence_counter:.2f}")
+
+        # 침묵이 2초 이상 지속되면 녹음 종료
+        if silence_counter >= SILENCE_LIMIT:
+            print("Silence detected for 2 seconds! Stopping.")
+            break
+
+# 녹음된 오디오가 있을 경우에만 파일로 저장
+
+# 저장할 디렉토리 설정
+print(os.getcwd())
+save_dir = os.path.join(os.getcwd(), "first_audio")
+os.makedirs(save_dir, exist_ok=True)  # 디렉토리가 없으면 생성
+
+# 오늘 날짜 문자열
+today_str = datetime.now().strftime("%Y%m%d")
+# 파일 이름 설정
+FILENAME = "output.wav"
+file_path = os.path.join(save_dir, FILENAME)
+if recorded_audio:
+    recorded_audio = np.concatenate(recorded_audio)
+
+    # 오디오 데이터를 .wav 파일로 저장
+    write(file_path, SAMPLE_RATE, recorded_audio.astype(np.float32))  # 저장 형식: .wav
+    print(f"녹음된 파일을 {FILENAME}로 저장했습니다.")
+else:
+    print("녹음된 음성이 없습니다.")
diff --git a/app/service/record_respberry.py b/app/service/record_respberry.py
new file mode 100644
index 0000000..3e0460f
--- /dev/null
+++ b/app/service/record_respberry.py
@@ -0,0 +1,72 @@
+import os
+import wave
+from datetime import datetime
+import numpy as np
+import sounddevice as sd
+from scipy.io.wavfile import write
+
+# === 녹음 설정 ===
+CHANNELS = 1
+RATE = 44100
+CHUNK_DURATION = 0.1  # 초 단위, 약 100ms
+CHUNK = int(RATE * CHUNK_DURATION)
+SILENCE_LIMIT = 5  # 5초 연속 침묵이면 녹음 종료
+THRESHOLD = 1000.0  # 침묵 판별 기준 (RMS)
+
+BASE_DIR = "/home/team4/Desktop/capstone/AI/app/emotion_diary"
+
+
+# 날짜 기반 하위 디렉터리(매일 한 번만 생성)
+def _ensure_dir():
+    os.makedirs(BASE_DIR, exist_ok=True)
+
+
+def is_silent(data: np.ndarray, threshold: float = THRESHOLD) -> bool:
+    """
+    float32 numpy 배열을 받아 RMS 기준으로 침묵 여부를 판단
+    """
+    rms = np.sqrt(np.mean(data ** 2))
+    return rms < threshold
+
+
+def emotion_record(index: int) -> str:
+    """
+    index: 녹음 파일 구분을 위한 정수 인덱스
+    return: 저장된 .wav 파일의 전체 경로
+    """
+    _ensure_dir()
+    date_str = datetime.now().strftime("%Y%m%d")
+    filename = f"{date_str}_{index}.wav"
+    filepath = os.path.join(BASE_DIR, filename)
+
+    print(f"[녹음 시작] {filename}")
+
+    frames = []
+    silent_secs = 0.0
+
+    try:
+        with sd.InputStream(samplerate=RATE, channels=CHANNELS, dtype='float32') as stream:
+            while True:
+                data, _ = stream.read(CHUNK)
+                audio_chunk = data[:, 0]  # mono
+                frames.append(audio_chunk.copy())
+
+                if is_silent(audio_chunk):
+                    silent_secs += CHUNK_DURATION
+                else:
+                    silent_secs = 0.0
+
+                if silent_secs >= SILENCE_LIMIT:
+                    print(f"[침묵 {SILENCE_LIMIT}초 감지 → 녹음 종료]")
+                    break
+
+    except Exception as e:
+        print("녹음 중 예외:", e)
+
+    # float32 → int16 변환 후 저장
+    all_audio = np.concatenate(frames)
+    int_audio = np.int16(np.clip(all_audio * 32767, -32768, 32767))
+
+    write(filepath, RATE, int_audio)
+    print(f"[저장 완료] {filepath}\n")
+    return filepath
diff --git a/app/service/s3Service.py b/app/service/s3Service.py
new file mode 100644
index 0000000..aa2ca35
--- /dev/null
+++ b/app/service/s3Service.py
@@ -0,0 +1,147 @@
+import os
+import time
+import uuid
+from datetime import datetime
+from typing import List
+
+import requests
+from boto3 import client
+from botocore.exceptions import ClientError
+from dotenv import load_dotenv
+from fastapi import UploadFile
+
+from app.utils.convertFileExtension import convert_to_mp3
+
+load_dotenv()
+
+access_key = os.getenv("S3_ACCESSKEY")
+secret_key = os.getenv("S3_SECRETKEY")
+bucket_name = os.getenv("S3_BUCKET")
+url_base = os.getenv("S3_URL")
+
+s3_client = client(
+    "s3",
+    aws_access_key_id=access_key,
+    aws_secret_access_key=secret_key,
+    region_name="ap-northeast-2",
+)
+
+
+async def save_local_file(file: UploadFile) -> str:
+    """업로드된 파일을 로컬에 저장하고 파일 경로를 반환합니다."""
+    audio_dir = "./audio"
+    if not os.path.exists(audio_dir):
+        os.makedirs(audio_dir)
+    local_file_path = os.path.join(audio_dir, file.filename)  # 파일 경로 생성
+    with open(local_file_path, "wb") as f:
+        f.write(await file.read())  # 파일 내용을 저장
+    return local_file_path
+
+
+def upload_to_s3(local_file_path: str) -> str:
+    """로컬 파일을 S3에 업로드하고 S3 URL을 반환합니다."""
+    try:
+        if not os.path.isfile(local_file_path):
+            print(f"Local file does not exist: {local_file_path}")
+            return None
+
+        timestamp = int(time.time())
+        unique_id = str(uuid.uuid4())
+        s3_file_name = f"record/audio_{timestamp}_{unique_id}.wav"
+
+        # S3에 파일 업로드
+        with open(local_file_path, "rb") as data:
+            s3_client.upload_fileobj(data, bucket_name, s3_file_name)
+
+        # S3 URL 생성
+        aws_file_url = f"{url_base}/{s3_file_name}"
+        return aws_file_url
+
+    except ClientError as e:
+        print(f'Credential error => {e}')
+    except Exception as e:
+        print(f"Another error => {e}")
+
+
+def upload_to_s3_png(local_file_path: str) -> str:
+    """로컬 파일을 S3에 업로드하고 S3 URL을 반환합니다."""
+    try:
+        if not os.path.isfile(local_file_path):
+            print(f"Local file does not exist: {local_file_path}")
+            return None
+
+        date_str = datetime.now().strftime("%Y%m%d")
+        filename = f"{date_str}"
+
+        timestamp = int(time.time())
+        unique_id = str(uuid.uuid4())
+        s3_file_name = f"image/{filename}_{timestamp}_{unique_id}.png"
+
+        # S3에 파일 업로드
+        with open(local_file_path, "rb") as data:
+            s3_client.upload_fileobj(data, bucket_name, s3_file_name)
+
+        # S3 URL 생성
+        aws_file_url = f"{url_base}/{s3_file_name}"
+        return aws_file_url
+
+    except ClientError as e:
+        print(f'Credential error => {e}')
+    except Exception as e:
+        print(f"Another error => {e}")
+
+
+# AWS S3에서 녹음 파일 다운로드
+def download_from_s3(file_s3_url: str) -> str:
+    """S3에서 파일을 다운로드하고 로컬에 저장합니다."""
+    audio_dir = "./audio"
+    if not os.path.exists(audio_dir):
+        os.makedirs(audio_dir)  # 디렉토리가 없으면 생성
+
+    try:
+        response = requests.get(file_s3_url)
+        response.raise_for_status()  # 요청이 실패하면 예외를 발생시킴
+
+        unique_filename = f"{uuid.uuid4()}.wav"
+        local_save_path = os.path.join(audio_dir, unique_filename)  # 저장할 파일 경로
+
+        with open(local_save_path, 'wb') as f:
+            f.write(response.content)  # 파일 내용을 로컬에 저장
+
+        mp3_file_path = convert_to_mp3(local_save_path)
+        return mp3_file_path
+
+    except requests.exceptions.HTTPError as e:
+        print(f"HTTP error occurred: {e}")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+
+def download_from_s3_links(urls: List[str]) -> List[str]:
+    file_s3_urls = []
+    for url in urls:
+        file_s3_url = download_from_s3(url)
+        file_s3_urls.append(file_s3_url)
+    return file_s3_urls
+
+
+def download_from_s3_model(file_s3_url: str) -> str:
+    model_dir = "./model"
+    if not os.path.exists(model_dir):
+        os.makedirs(model_dir)  # 디렉토리가 없으면 생성
+
+    try:
+        response = requests.get(file_s3_url)
+        response.raise_for_status()  # 요청이 실패하면 예외를 발생시킴
+
+        unique_filename = str(uuid.uuid4())
+        local_save_path = os.path.join(model_dir, unique_filename)  # 저장할 파일 경로
+
+        with open(local_save_path, 'wb') as f:
+            f.write(response.content)  # 파일 내용을 로컬에 저장
+        return local_save_path
+
+    except requests.exceptions.HTTPError as e:
+        print(f"HTTP error occurred: {e}")
+    except Exception as e:
+        print(f"An error occurred: {e}")
diff --git a/app/service/subscribe.py b/app/service/subscribe.py
new file mode 100644
index 0000000..7cde317
--- /dev/null
+++ b/app/service/subscribe.py
@@ -0,0 +1,29 @@
+# import os
+# import redis.asyncio as redis
+# import json
+# import subprocess
+# from app.service.s3Service import download_from_s3
+# REDIS_HOST = os.getenv("REDIS_HOST", "15.165.21.152")
+# REDIS_PORT = int(os.getenv("REDIS_PORT", "6380"))
+# REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "babyy1023@")
+# CHANNEL_NAME = "spring-scheduler-channel"
+#
+# async def subscribe_schedule():
+#     r = redis.Redis(
+#         host=REDIS_HOST,
+#         port=REDIS_PORT,
+#         password=REDIS_PASSWORD,
+#         decode_responses=True
+#     )
+#
+#     pubsub = r.pubsub()
+#     await pubsub.subscribe(CHANNEL_NAME)
+#
+#     print(f"Subscribed to Redis '{CHANNEL_NAME}")
+#
+#     async for message in pubsub.listen():
+#         if message["type"] == "message":
+#             local_path = download_from_s3("https://humanicare-bucket.s3.ap-northeast-2.amazonaws.com/record/audio_1743069498_081a9673-aebe-4b86-a4ba-c32f4424e8b9.wav")
+#             subprocess.run(["mpg321", local_path])
+#             print("speaker out")
+#
\ No newline at end of file
diff --git a/app/utils/convertFileExtension.py b/app/utils/convertFileExtension.py
new file mode 100644
index 0000000..d58eee0
--- /dev/null
+++ b/app/utils/convertFileExtension.py
@@ -0,0 +1,98 @@
+import io
+import os
+import tempfile
+from datetime import datetime
+
+from pydub import AudioSegment
+from pydub.exceptions import CouldntDecodeError
+
+
+def merge_all_wavs_to_mp3(audio_dir="audio", silence_duration_ms=500):
+    wav_files = sorted([
+        os.path.join(audio_dir, f) for f in os.listdir(audio_dir)
+        if f.endswith(".wav")
+    ])
+
+    if not wav_files:
+        print("병합할 .wav 파일이 없습니다.")
+        return None
+
+    print(f"{len(wav_files)}개의 wav 파일을 병합 중...")
+
+    combined = AudioSegment.empty()
+    silence = AudioSegment.silent(duration=silence_duration_ms)
+
+    for i, wav in enumerate(wav_files):
+        audio = AudioSegment.from_wav(wav)
+        combined += audio
+        if i != len(wav_files) - 1:
+            combined += silence  # 마지막 파일 뒤에는 무음 안 넣음
+
+    today_str = datetime.now().strftime("%Y%m%d")
+    mp3_path = os.path.join(audio_dir, f"{today_str}_final.mp3")
+
+    combined.export(mp3_path, format="mp3")
+
+    for wav in wav_files:
+        os.remove(wav)
+
+    print(f"최종 mp3 저장 완료: {mp3_path}")
+    return mp3_path
+
+
+def convert_to_mp3(file_path):
+    audio = AudioSegment.from_file(file_path)
+    output_path = file_path.replace(".wav", ".mp3")
+    os.remove(file_path)
+    audio.export(output_path, format="mp3")
+    return output_path
+
+
+def convert_to_wav(raw_bytes: bytes, ext: str) -> bytes:
+    ext = ext.lower()
+    # 이미 WAV라면 바로 반환
+    if ext == "wav":
+        return raw_bytes
+
+    # 임시 입력 파일 생성
+    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as fin:
+        fin.write(raw_bytes)
+        fin.flush()
+        fin_path = fin.name
+
+    try:
+        # 1) format 인자 없이 자동 감지 시도
+        audio = AudioSegment.from_file(fin_path)
+    except CouldntDecodeError:
+        try:
+            # 2) 자동 감지도 실패하면, 프로브 크기 늘려서 재시도
+            audio = AudioSegment.from_file(
+                fin_path,
+                parameters=["-probesize", "50M", "-analyzeduration", "100M"]
+            )
+        except CouldntDecodeError as e:
+            os.unlink(fin_path)
+            raise RuntimeError(f"FFmpeg 디코딩 실패({ext}): {e}") from e
+
+    # WAV(PCM) 사양으로 맞춰주기
+    audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
+
+    # 메모리로 WAV 내보내기
+    out = io.BytesIO()
+    audio.export(out, format="wav")
+    wav_bytes = out.getvalue()
+
+    os.unlink(fin_path)
+    return wav_bytes
+
+    # 3) 원하는 파라메터로 변환 (16kHz, mono, 16-bit)
+    audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
+
+    # 4) 메모리로 WAV 내보내기
+    out = io.BytesIO()
+    audio.export(out, format="wav")
+    wav_bytes = out.getvalue()
+
+    # 5) 임시 입력 파일 삭제
+    os.unlink(fin_path)
+    return wav_bytes
diff --git a/app/utils/parsing_json.py b/app/utils/parsing_json.py
new file mode 100644
index 0000000..e0eb1be
--- /dev/null
+++ b/app/utils/parsing_json.py
@@ -0,0 +1,15 @@
+import json
+import re
+
+
+def extract_json_from_content(content):
+    match = re.search(r"\{[\s\S]*\}", content)
+    if match:
+        try:
+            return json.loads(match.group())
+        except json.JSONDecodeError as e:
+            print("JSON 파싱 실패:", e)
+            return {}
+    else:
+        print("JSON 형태가 아님")
+        return {}
diff --git a/app/utils/play_file.py b/app/utils/play_file.py
new file mode 100644
index 0000000..ce1032a
--- /dev/null
+++ b/app/utils/play_file.py
@@ -0,0 +1,31 @@
+import time
+from datetime import datetime
+import os
+import subprocess
+
+
+def play_at_target_time(target_time: str, local_file_path: str):
+    # 현재 시간과 target_time 비교
+    current_time = datetime.now().strftime("%H:%M:%S")
+
+    # target_time이 현재 시간보다 크면 대기 (target_time까지 대기)
+    while current_time < target_time:
+        time.sleep(1)  # 1초마다 시간 확인
+        current_time = datetime.now().strftime("%H:%M:%S")
+
+    #블루투스 헤드셋 또는 기본 스피커로 출력
+    os.system("pactl list sinks | grep 'bluez_sink'")  # 블루투스 출력 장치 확인
+    os.system("pactl set-default-sink `pactl list sinks short | grep bluez_sink | awk '{print $2}'`")  # 기본 출력 변경
+
+    # 스피커를 기본 출력 장치로 설정
+    os.system("pactl list sinks | grep 'analog-output'")  # 스피커 장치 확인
+    os.system("pactl set-default-sink `pactl list sinks short | grep analog-output | awk '{print $2}'`")  # 기본 출력 변경
+
+    #로컬 파일을 직접 재생
+    subprocess.run(["mpg321", local_file_path])
+
+    # window 테스트 용
+    # from playsound import playsound
+    # from pathlib import Path
+    # safe_path = Path(local_file_path).resolve().as_posix()
+    # playsound(safe_path)
diff --git a/audio/99ef70a1-0fe0-4b82-af32-9e32cbea7800.mp3 b/audio/99ef70a1-0fe0-4b82-af32-9e32cbea7800.mp3
new file mode 100644
index 0000000..c21adb1
Binary files /dev/null and b/audio/99ef70a1-0fe0-4b82-af32-9e32cbea7800.mp3 differ
diff --git a/audio/d9da92b8-6a16-4886-bee1-2222a98a8cf3.mp3 b/audio/d9da92b8-6a16-4886-bee1-2222a98a8cf3.mp3
new file mode 100644
index 0000000..c21adb1
Binary files /dev/null and b/audio/d9da92b8-6a16-4886-bee1-2222a98a8cf3.mp3 differ
diff --git a/audio/test8.mp3 b/audio/test8.mp3
new file mode 100644
index 0000000..93287b8
Binary files /dev/null and b/audio/test8.mp3 differ
diff --git a/d9fd6461-6b0b-4c16-b1fc-f63c92048627.wav b/d9fd6461-6b0b-4c16-b1fc-f63c92048627.wav
new file mode 100644
index 0000000..391f582
Binary files /dev/null and b/d9fd6461-6b0b-4c16-b1fc-f63c92048627.wav differ
diff --git a/f8fa5cff-fbba-467c-8855-b44e71ebf9e5.wav b/f8fa5cff-fbba-467c-8855-b44e71ebf9e5.wav
new file mode 100644
index 0000000..1bc00fa
Binary files /dev/null and b/f8fa5cff-fbba-467c-8855-b44e71ebf9e5.wav differ
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..2e48eca
--- /dev/null
+++ b/main.py
@@ -0,0 +1,79 @@
+# import RPi.GPIO as GPIO
+# import time
+# from datetime import datetime
+
+# # ───────────────────────────────
+# # PIR 센서 관련
+# # ───────────────────────────────
+# PIR_PIN = 17  # GPIO17
+
+# def detect_motion():
+#     GPIO.setmode(GPIO.BCM)
+#     GPIO.setup(PIR_PIN, GPIO.IN)
+
+#     print("PIR 센서 디버깅 시작 (Ctrl+C 종료)")
+#     prev_state = None
+
+#     try:
+#         while True:
+#             signal = GPIO.input(PIR_PIN)
+
+#             if signal != prev_state:
+#                 timestamp = datetime.now().strftime("%H:%M:%S")
+#                 state_str = "감지됨 (HIGH)" if signal else " 없음 (LOW)"
+#                 print(f"[{timestamp}] 상태 변경 ▶ {state_str}")
+#                 prev_state = signal
+
+#             time.sleep(0.1)
+#     except KeyboardInterrupt:
+#         print("⛔ 종료 중...")
+#         GPIO.cleanup()
+
+
+# # ───────────────────────────────
+# # DHT11 센서 관련 (5회 재시도 버전)
+# # ───────────────────────────────
+# import adafruit_dht
+# import board
+
+# def read_dht11():
+#     print("🌡️ DHT11 센서 측정 시작...")
+#     dhtDevice = adafruit_dht.DHT11(board.D4)  # GPIO4 (멀티보드 IO4)
+
+#     for i in range(5):  # 최대 5번 재시도
+#         try:
+#             print(f"📡 시도 {i + 1} ...")
+#             temperature = dhtDevice.temperature
+#             humidity = dhtDevice.humidity
+
+#             if temperature is not None and humidity is not None:
+#                 print(f"✅ 온도: {temperature}°C")
+#                 print(f"✅ 습도: {humidity}%")
+#                 break
+#             else:
+#                 print("⚠️ 센서로부터 데이터를 읽을 수 없습니다.")
+#         except RuntimeError as error:
+#             print(f"⚠️ 에러 발생: {error.args[0]}")
+#         except Exception as error:
+#             print(f"❌ 심각한 오류: {error}")
+#             break
+#         time.sleep(2)  # 재시도 간 간격
+
+#     # 종료 함수는 비활성화 (라이브러리 오류 방지)
+#     # dhtDevice.exit()
+
+import uvicorn
+
+# ───────────────────────────────
+# 메인 함수
+# ───────────────────────────────
+if __name__ == "__main__":
+    uvicorn.run(
+        app="app.service.main:app",
+        # host="localhost",
+        host="0.0.0.0",
+        port=8000,
+    )
+    # detect_motion()  # PIR 센서 테스트 시 주석 해제
+#     read_dht11()        # 현재는 DHT11만 테스트
+
diff --git a/requirements.txt b/requirements.txt
index 8d58cce..376bdd9 100644
Binary files a/requirements.txt and b/requirements.txt differ
diff --git a/requirements_server.txt b/requirements_server.txt
new file mode 100644
index 0000000..45b9215
--- /dev/null
+++ b/requirements_server.txt
@@ -0,0 +1,170 @@
+absl-py==2.3.0
+annotated-types==0.7.0
+anyio==4.9.0
+asttokens==3.0.0
+astunparse==1.6.3
+async-timeout==5.0.1
+attrs==25.3.0
+audioread==3.0.1
+av==14.4.0
+backcall==0.2.0
+beautifulsoup4==4.13.4
+bleach==6.2.0
+boto3==1.37.16
+botocore==1.37.16
+cachetools==5.5.2
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+click==8.1.8
+colorama==0.4.6
+coloredlogs==15.0.1
+contourpy==1.3.0
+ctranslate2==4.6.0
+cycler==0.12.1
+decorator==5.2.1
+defusedxml==0.7.1
+distro==1.9.0
+docopt==0.6.2
+elevenlabs==1.54.0
+exceptiongroup==1.2.2
+executing==2.2.0
+fastapi==0.115.11
+faster-whisper==1.1.1
+fastjsonschema==2.21.1
+filelock==3.18.0
+flatbuffers==25.2.10
+fonttools==4.58.1
+fsspec==2025.3.2
+gast==0.4.0
+google-auth==2.40.2
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+grpcio==1.71.0
+h11==0.14.0
+h5py==3.13.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.31.2
+humanfriendly==10.0
+idna==3.10
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+ipython==8.12.3
+jedi==0.19.2
+Jinja2==3.1.6
+jiter==0.10.0
+jmespath==1.0.1
+joblib==1.5.1
+jsonschema==4.24.0
+jsonschema-specifications==2025.4.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_pygments==0.3.0
+keras==3.10.0
+kiwisolver==1.4.7
+lazy_loader==0.4
+libclang==18.1.1
+librosa==0.11.0
+llvmlite==0.43.0
+Markdown==3.8
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib==3.9.4
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.1.3
+ml_dtypes==0.5.1
+mpmath==1.3.0
+msgpack==1.1.0
+namex==0.1.0
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+networkx==3.2.1
+numba==0.60.0
+numpy==1.26.4
+oauthlib==3.2.2
+onnxruntime==1.19.2
+openai==1.68.2
+opt_einsum==3.4.0
+optree==0.16.0
+packaging==25.0
+pandocfilters==1.5.1
+parso==0.8.4
+pickleshare==0.7.5
+pillow==11.2.1
+pipreqs==0.5.0
+platformdirs==4.3.8
+playsound==1.3.0
+pooch==1.8.2
+prompt_toolkit==3.0.51
+protobuf==5.29.5
+pure_eval==0.2.3
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.22
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydub==0.25.1
+Pygments==2.19.1
+pyparsing==3.2.3
+pyreadline3==3.5.4
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.20
+pywin32==310
+PyYAML==6.0.2
+pyzmq==26.4.0
+redis==6.2.0
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-oauthlib==2.0.0
+rich==14.0.0
+rpds-py==0.25.1
+rsa==4.9.1
+s3transfer==0.11.4
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.13.1
+sentence-transformers==4.1.0
+six==1.17.0
+sniffio==1.3.1
+sounddevice==0.5.1
+soundfile==0.13.1
+soupsieve==2.7
+soxr==0.5.0.post1
+SpeechRecognition==3.14.3
+stack-data==0.6.3
+starlette==0.46.1
+sympy==1.14.0
+tensorboard==2.19.0
+tensorboard-data-server==0.7.2
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.19.0
+tensorflow-estimator==2.11.0
+tensorflow-io-gcs-filesystem==0.31.0
+tensorflow_intel==2.18.0
+termcolor==3.1.0
+tf_keras==2.19.0
+threadpoolctl==3.6.0
+tinycss2==1.4.0
+tokenizers==0.21.1
+torch==2.7.0
+torchaudio==2.7.0
+torchvision==0.22.0
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.52.4
+typing_extensions==4.12.2
+urllib3==1.26.20
+uvicorn==0.34.0
+wcwidth==0.2.13
+webencodings==0.5.1
+websockets==15.0.1
+Werkzeug==3.1.3
+wrapt==1.17.2
+yarg==0.1.9
+zipp==3.22.0
diff --git a/test_audio/test8.mp3 b/test_audio/test8.mp3
new file mode 100644
index 0000000..93287b8
Binary files /dev/null and b/test_audio/test8.mp3 differ