diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..3ffc2af --- /dev/null +++ b/.dockerignore @@ -0,0 +1,18 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +*.mp3 +*.wav +.env +.venv/ +.idea/ +.git/ +.gitignore +*.log +node_modules/ +*.pt +*.ckpt +*.zip +*.tar +*.onnx diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..df41045 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,29 @@ +name: Docker Build & Push + +on: + push: + branches: [ "main", "dev" ] + pull_request: + branches: [ "main", "dev" ] + +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: | + ${{ secrets.DOCKER_USERNAME }}/fastapi-app:latest + ${{ secrets.DOCKER_USERNAME }}/fastapi-app:${{ github.sha }} diff --git a/.gitignore b/.gitignore index 6be00c6..d80845d 100644 --- a/.gitignore +++ b/.gitignore @@ -28,11 +28,16 @@ replay_pid* .venv env/ venv/ -ENV/ env.bak/ venv.bak/ *.idea .DS_Store -*.h5 \ No newline at end of file + +__pycache__/ +*.pyc +audio/ +#emotion_diary/ +emotion_png/ +pyvenv.cfg \ No newline at end of file diff --git a/253cfe2a-382f-4bbe-8586-5e5fcbbe85ef.wav b/253cfe2a-382f-4bbe-8586-5e5fcbbe85ef.wav new file mode 100644 index 0000000..ee18674 Binary files /dev/null and b/253cfe2a-382f-4bbe-8586-5e5fcbbe85ef.wav differ diff --git a/4d37625a-4026-494e-912f-fcf2744cbdf0.wav b/4d37625a-4026-494e-912f-fcf2744cbdf0.wav new file mode 100644 index 0000000..86d332e Binary files /dev/null and b/4d37625a-4026-494e-912f-fcf2744cbdf0.wav differ diff --git a/791fcb5a-f100-4e73-a5f7-05306f930005.wav b/791fcb5a-f100-4e73-a5f7-05306f930005.wav new file mode 100644 index 0000000..d385133 Binary files /dev/null and b/791fcb5a-f100-4e73-a5f7-05306f930005.wav differ diff --git a/Adafruit_Python_DHT b/Adafruit_Python_DHT new file mode 160000 index 0000000..8f5e2c4 --- /dev/null +++ b/Adafruit_Python_DHT @@ -0,0 +1 @@ +Subproject commit 8f5e2c4d6ebba8836f6d31ec9a0c171948e3237d diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..acd2f1f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.9-slim +# ffmpeg 설치 추가 +RUN apt-get update && apt-get install -y ffmpeg + +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY . . +CMD ["uvicorn", "app.service.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/app/ML/ModelService.py b/app/ML/ModelService.py new file mode 100644 index 0000000..a89245c --- /dev/null +++ b/app/ML/ModelService.py @@ -0,0 +1,97 @@ +# import numpy as np +# from dotenv import load_dotenv +# from fastapi import Request, UploadFile, File, APIRouter +# from typing import List +# from tensorflow.keras.models import load_model +# from sentence_transformers import SentenceTransformer +# import io +# import requests +# +# from app.ML.audio_extractor_utils import get_features +# from app.ML.loss import boundary_enhanced_focal_loss +# from app.ML.plot_utils import save_plot, get_s3_png_url +# from app.ML.speech_to_text import speech_to_text +# +# import os +# +# from app.service.gpt import EmotionReportGPT +# from app.utils.convertFileExtension import convert_to_wav +# +# router = APIRouter( +# prefix="/api/fastapi", +# ) +# load_dotenv() +# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +# +# +# +# +# @router.post("/predict") +# async def predict(request: Request, files: List[UploadFile] = File(...)): +# # token = request.headers.get("Authorization").split(" ")[1] +# print(files) +# # 1) 임시 파일 저장 or 메모리 내 처리 +# wav_data_list = [] +# for file in files: +# raw = await file.read() +# ext = file.filename.split('.')[-1] # 'm4a', 'mp3' 등 +# wav_bytes = convert_to_wav(raw, ext) # BytesIO 변환 +# wav_data_list.append(wav_bytes) +# +# # 2) 오디오 특징 추출 +# all_feats = [] +# for wav_bytes in wav_data_list: +# # get_features 함수가 경로 입력이면, 아래처럼 메모리 파일 처리 필요 +# # 임시파일로 저장 후 경로 전달 or get_features 수정 필요 +# +# temp_path = f"temp_{file.filename}" +# with open(temp_path, "wb") as f: +# f.write(wav_bytes) +# feats = get_features(temp_path) +# os.remove(temp_path) +# all_feats.append(feats) +# +# all_feats = np.stack(all_feats, axis=0) +# pooled_feats = all_feats.mean(axis=0) +# audio_input = pooled_feats[np.newaxis, :, np.newaxis] +# +# # 3) STT & 텍스트 임베딩 +# texts = [] +# for wav_bytes in wav_data_list: +# temp_path = f"temp_stt.wav" +# with open(temp_path, "wb") as f: +# f.write(wav_bytes) +# text = speech_to_text(temp_path) +# os.remove(temp_path) +# texts.append(text) +# +# full_text = " . ".join(texts) +# text_vec = embedding_model.encode([full_text])[0] +# text_input = text_vec[np.newaxis, :] +# +# # 4) 예측 +# prediction = model.predict([audio_input, text_input]) +# pred_percent = (prediction[0] * 100).tolist() +# +# # 5) JSON 응답 +# result = {label: round(p, 2) for label, p in zip(emotion_labels, pred_percent)} +# top_idx = np.argmax(pred_percent) +# result['predicted_emotion'] = emotion_labels[top_idx] +# +# local_path = save_plot(pred_percent) +# s3_path = get_s3_png_url(local_path) +# reporter = EmotionReportGPT(full_text, pred_percent) +# report_text = reporter.get_report_text() +# +# print(s3_path) +# +# # send_emotion_report_to_spring(s3_path, report_text) +# +# data = { +# "imageUrl": s3_path, +# "report_text": report_text +# } +# return data +# +# +# diff --git a/app/ML/audio_extractor_utils.py b/app/ML/audio_extractor_utils.py new file mode 100644 index 0000000..dcace4c --- /dev/null +++ b/app/ML/audio_extractor_utils.py @@ -0,0 +1,69 @@ +import librosa +import librosa.display +import numpy as np + + +def noise(data): + noise_amp = 0.035 * np.random.uniform() * np.amax(data) + data = data + noise_amp * np.random.normal(size=data.shape[0]) + return data + + +def stretch(data, rate=0.8): + return librosa.effects.time_stretch(y=data, rate=rate) + + +def shift(data): + shift_range = int(np.random.uniform(low=-5, high=5) * 1000) + return np.roll(data, shift_range) + + +def pitch(data, sampling_rate, pitch_factor=0.7): + return librosa.effects.pitch_shift(y=data, sr=sampling_rate, n_steps=pitch_factor) + + +def extract_features(data, sample_rate): + # ZCR + result = np.array([]) + zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0) + result = np.hstack((result, zcr)) # stacking horizontally + + # Chroma_stft + stft = np.abs(librosa.stft(data)) + chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0) + result = np.hstack((result, chroma_stft)) # stacking horizontally + + # MFCC + mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0) + result = np.hstack((result, mfcc)) # stacking horizontally + + # Root Mean Square Value + rms = np.mean(librosa.feature.rms(y=data).T, axis=0) + result = np.hstack((result, rms)) # stacking horizontally + + # MelSpectogram + mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0) + result = np.hstack((result, mel)) # stacking horizontally + + return result + + +def get_features(path): + data, sample_rate = librosa.load(path, duration=2.5, offset=0.0) + + # without augmentation + res1 = extract_features(data, sample_rate) + result = np.array(res1) + + # data with noise + noise_data = noise(data) + res2 = extract_features(noise_data, sample_rate) + result = np.concatenate((result, res2), axis=0) + + # data with stretching and pitching + new_data = stretch(data) + data_stretch_pitch = pitch(new_data, sample_rate) + res3 = extract_features(data_stretch_pitch, sample_rate) + result = np.concatenate((result, res3), axis=0) + + return result diff --git a/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5 b/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5 new file mode 100644 index 0000000..b4f22d5 Binary files /dev/null and b/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5 differ diff --git a/app/ML/loss.py b/app/ML/loss.py new file mode 100644 index 0000000..c41199b --- /dev/null +++ b/app/ML/loss.py @@ -0,0 +1,29 @@ +import tensorflow as tf + + +# 1. Boundary-Enhanced Focal Loss 구현 (소수 클래스 식별 강화) +def boundary_enhanced_focal_loss(y_true, y_pred, gamma=2.0, margin=0.3): + y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0 - 1e-7) + + # 하드 샘플 마이닝 (낮은 확률로 예측된 샘플 식별) + correct_prob = tf.reduce_sum(y_true * y_pred, axis=-1) + hard_mask = tf.cast(tf.less(correct_prob, margin), tf.float32) + + # 클래스별 가중치 계산 (소수 클래스에 더 높은 가중치) + effective_counts = tf.reduce_sum(y_true, axis=0) + alpha = 1.0 / (effective_counts + 1e-7) + alpha = alpha / tf.reduce_sum(alpha) + + # 소수 클래스 추가 가중치 부여 (surprise, neutral) + class_boost = tf.constant([1.0, 0.5, 1.0, 1.0, 1.0, 2.5, 5.0], dtype=tf.float32) + alpha = alpha * class_boost + + # Focal Loss 계산 + cross_entropy = -y_true * tf.math.log(y_pred) + focal_weight = tf.pow(1.0 - y_pred, gamma) + + # 하드 샘플에 추가 가중치 부여 + sample_weight = 1.0 + hard_mask * 2.0 + loss = sample_weight[:, tf.newaxis] * alpha * focal_weight * cross_entropy + + return tf.reduce_sum(loss) diff --git a/app/ML/plot_utils.py b/app/ML/plot_utils.py new file mode 100644 index 0000000..b7744d0 --- /dev/null +++ b/app/ML/plot_utils.py @@ -0,0 +1,40 @@ +# 그래프 그리기 +import os +from datetime import datetime + +from matplotlib import pyplot as plt +from app.service.s3Service import upload_to_s3_png + +colors = ['#e74c3c', '#3498db', '#f1c40f', '#e67e22', '#9b59b6', '#1abc9c', '#95a5a6'] +emotion_labels = ['angry', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'neutral'] + + +def save_plot(predictions_percent): + plt.figure(figsize=(10, 6)) + bars = plt.barh(emotion_labels, predictions_percent, color=colors, alpha=0.85) + + plt.title('Emotion Probability Distribution', fontsize=20, weight='bold', pad=15) + plt.xlabel('Probability (%)', fontsize=14) + plt.xlim(0, max(predictions_percent) + 10) + plt.grid(axis='x', linestyle='--', alpha=0.6) + + for bar, percent in zip(bars, predictions_percent): + width = bar.get_width() + plt.text(width + 0.8, bar.get_y() + bar.get_height() / 2, f'{percent:.1f}%', va='center', fontsize=13, + weight='bold', color='#333') + + plt.yticks(fontsize=14, weight='bold') + plt.tight_layout() + + date_str = datetime.now().strftime("%Y%m%d") + filename = f"{date_str}" + local_path = os.getcwd() + f"/app/emotion_png/{filename}_emotion_distribution.png" + # 이미지 파일로 저장 + plt.savefig(local_path, dpi=300, bbox_inches='tight') + plt.show() + + return local_path + + +def get_s3_png_url(local_path): + return upload_to_s3_png(local_path) diff --git a/app/ML/predict_colab.py b/app/ML/predict_colab.py new file mode 100644 index 0000000..a95d54a --- /dev/null +++ b/app/ML/predict_colab.py @@ -0,0 +1,95 @@ +import numpy as np +import matplotlib.pyplot as plt +from tensorflow.keras.models import load_model +from sentence_transformers import SentenceTransformer +import glob +import os + +from app.ML.audio_extractor_utils import get_features +from app.ML.loss import boundary_enhanced_focal_loss +from app.ML.speech_to_text import speech_to_text +import os +os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' + + +BASE_DIR_resp = "/home/team4/Desktop/capstone/AI/app/emotion_diary" +BASE_DIR_win = "C:/Users/YJG/Desktop/2025_1_capstone_2/AI/app/emotion_diary" +emotion_labels = ['angry', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'neutral'] +model_path_resp = "/home/team4/Desktop/capstone/AI/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5" +model_path_win = "C:/Users/YJG/Desktop/2025_1_capstone_2/AI/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5" + + +def predict(): + BASE_DIR = BASE_DIR_win + model_path = model_path_win + # (가정) 미리 정의된 함수/변수 + # get_features(path): (486,) 벡터 반환 + # speech_to_text(path): STT → 문자열 반환 + # boundary_enhanced_focal_loss: 커스텀 손실 + # emotion_labels: ['angry','sadness','happiness','fear','disgust','surprise','neutral'] + # model_path, sample_path: 경로 문자열 + + # 1) WAV 파일 리스트 + # sample_wav_list = [ + # sample_path + "/jg_sadness_1.wav", + # sample_path + "/jg_sadness_2.wav", + # sample_path + "/jg_sadness_3.wav", + # sample_path + "/jg_sadness_4.wav", + # sample_path + "/jg_sadness_5.wav" + # ] + sample_wav_list = glob.glob(os.path.join(BASE_DIR, "**", "*.wav"), recursive=True) + + # 2) 오디오 특징 평균 풀링 + all_feats = np.stack([get_features(p) for p in sample_wav_list], axis=0) # (5,486) + pooled_feats = all_feats.mean(axis=0) # (486,) + + # 3) 모델 입력 형태 맞추기 + audio_input = pooled_feats[np.newaxis, :, np.newaxis] # (1,486,1) + + # 4) 전체 텍스트 STT → 하나의 문장으로 결합 + texts = [speech_to_text(p) for p in sample_wav_list] + full_text = " . ".join(texts) + + # 5) 텍스트 임베딩 + embedding_model = SentenceTransformer('jhgan/ko-sbert-multitask') + text_vec = embedding_model.encode([full_text])[0] # (768,) + text_input = text_vec[np.newaxis, :] # (1,768) + + # 6) 모델 로드 및 예측 + model = load_model(model_path, custom_objects={ + 'boundary_enhanced_focal_loss': boundary_enhanced_focal_loss + }) + prediction = model.predict([audio_input, text_input]) # (1,7) + pred_percent = prediction[0] * 100 # (7,) + + # 7) 콘솔에 출력 + for lbl, p in zip(emotion_labels, pred_percent): + print(f"{lbl}: {p:.2f}%") + top_idx = np.argmax(pred_percent) + print(f"\n최종 예측 감정: {emotion_labels[top_idx]}") + + # 8) 가로 막대그래프 시각화 + colors = ['#e74c3c', '#3498db', '#f1c40f', '#e67e22', '#9b59b6', '#1abc9c', '#95a5a6'] + + plt.figure(figsize=(10, 6)) + bars = plt.barh(emotion_labels, pred_percent, color=colors, alpha=0.85) + + plt.title('Emotion Probability Distribution', fontsize=18, weight='bold', pad=15) + plt.xlabel('Probability (%)', fontsize=14) + plt.xlim(0, pred_percent.max() + 10) + plt.grid(axis='x', linestyle='--', alpha=0.6) + + for bar, p in zip(bars, pred_percent): + plt.text(p + 1, bar.get_y() + bar.get_height() / 2, + f'{p:.1f}%', va='center', fontsize=12, weight='bold', color='#333') + + plt.yticks(fontsize=13, weight='bold') + plt.tight_layout() + + # 이미지 파일로 저장 + plt.savefig('emotion_distribution.png', dpi=300, bbox_inches='tight') + plt.show() + + +if __name__ == "__main__": + predict() diff --git a/app/ML/speech_to_text.py b/app/ML/speech_to_text.py new file mode 100644 index 0000000..8cd19c5 --- /dev/null +++ b/app/ML/speech_to_text.py @@ -0,0 +1,26 @@ +import speech_recognition as sr + +# sample_wav_path = sample_path + "/sh_sadness_2.wav" + + +# STT 변환 함수 +def speech_to_text(audio_path): + recognizer = sr.Recognizer() + + # 음성 파일 로드 + with sr.AudioFile(audio_path) as source: + audio_data = recognizer.record(source) # 음성 데이터 읽기 + + try: + # 구글 STT API 사용 (무료) + text = recognizer.recognize_google(audio_data, language="ko-KR") + return text + except sr.UnknownValueError: + return "음성을 인식할 수 없습니다." + except sr.RequestError: + return "STT 요청 실패" + +# +# # MP3에서 변환한 WAV 파일 입력 +# sample_text = speech_to_text(sample_wav_path) +# print("변환된 텍스트:", sample_text) diff --git a/app/controller/RecordController.py b/app/controller/RecordController.py new file mode 100644 index 0000000..3b83cd9 --- /dev/null +++ b/app/controller/RecordController.py @@ -0,0 +1,211 @@ +import json +import os +import subprocess +from typing import List + +import numpy as np +import requests +from boto3 import client +from fastapi import APIRouter, Request, UploadFile, File, Form +# from sentence_transformers import SentenceTransformer + +from app.ML.audio_extractor_utils import get_features +from app.ML.loss import boundary_enhanced_focal_loss +from app.ML.plot_utils import save_plot, get_s3_png_url +from app.ML.speech_to_text import speech_to_text +from app.dto.ScheduleSpeakRequestDto import ScheduleSpeakRequestDto +from app.dto.ScheduleTTSRequestDto import ScheduleTTSRequestDto +from app.service.elevenLabs import text_to_speech_file_save_AWS, text_to_speech_file +from app.service.gpt import ChatgptAPI, EmotionReportGPT +from app.service.s3Service import download_from_s3, save_local_file +from app.utils import play_file +from tensorflow.keras.models import load_model + +from app.utils.convertFileExtension import convert_to_wav + +router = APIRouter( + prefix="/api/fastapi", +) + +access_key = os.getenv("S3_ACCESS_KEY") +secret_key = os.getenv("S3_SECRET_KEY") +bucket_name = os.getenv("S3_BUCKET_NAME") +url_base = os.getenv("S3_URL") +yjg_voice_id = os.getenv("YJG_VOICE_ID") + +s3_client = client( + "s3", + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + region_name="ap-northeast-2", +) + +os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' + +# app = FastAPI() + +BASE_DIR_win = os.getcwd() + "/app/emotion_diary" +model_path_win = os.getcwd() + "/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5" +emotion_labels = ['angry', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'neutral'] + +embedding_model = SentenceTransformer('jhgan/ko-sbert-multitask') +model = load_model(model_path_win, custom_objects={'boundary_enhanced_focal_loss': boundary_enhanced_focal_loss}) + + +async def save_local_files(files: List[UploadFile]) -> list: + """업로드된 파일을 로컬에 저장하고 파일 경로를 반환합니다.""" + audio_dir = "./audio" + local_file_path_list = [] + if not os.path.exists(audio_dir): + os.makedirs(audio_dir) + for file in files: + local_file_path = os.path.join(audio_dir, file.filename) # 파일 경로 생성 + with open(local_file_path, "wb") as f: + f.write(await file.read()) # 파일 내용을 저장 + local_file_path_list.append(local_file_path) + return local_file_path_list + + +# 첫 로그인 시 목소리 녹음 api +@router.post("/voices") +async def getVoice(request: Request, file: UploadFile = File(...)): + token = request.headers.get("Authorization").split(" ")[1] + # local_file_path = await save_local_file(file) + # voice_id = add_voice(name=name, local_file_paths=[local_file_path]) + # voice_url = s3Service.upload_to_s3(local_file_path) + # os.remove(local_file_path) + + send_user_voice_file_to_spring(token=token, voice_url=yjg_voice_id) + + +# 만약 voice_id와 요구하는 분야가 오면 맞춰서 return +@router.post("/schedules") +async def schedule_tts(request: Request, schedules: ScheduleTTSRequestDto): + # token = request.headers.get("Authorization").split(" ")[1] + voice_id = yjg_voice_id + + prompt = ChatgptAPI(schedules.schedule_text, schedules.alias) + + # schedule_dict: {"저녁": "엄마~ 저녁 잘 챙겨 먹었어?", "운동": "오늘 운동했어? 건강 챙겨~!"} + schedule_dict = prompt.get_schedule_json() + + # TTS 처리 (MP3 파일 생성 후 s3 저장) + response = { + schedules.schedule_id[i]: text_to_speech_file_save_AWS( + schedule_dict.get(schedules.schedule_text[i], ""), + yjg_voice_id + ) + # schedules.schedule_id[i]: str(schedules.schedule_id[i]) + for i in range(len(schedules.schedule_id)) + } + return response + + +@router.post("/predict") +async def predict(request: Request, files: List[UploadFile] = File(...)): + # token = request.headers.get("Authorization").split(" ")[1] + print(files) + # 1) 임시 파일 저장 or 메모리 내 처리 + wav_data_list = [] + for file in files: + raw = await file.read() + ext = file.filename.split('.')[-1] # 'm4a', 'mp3' 등 + wav_bytes = convert_to_wav(raw, ext) # BytesIO 변환 + wav_data_list.append(wav_bytes) + + # 2) 오디오 특징 추출 + all_feats = [] + for wav_bytes in wav_data_list: + # get_features 함수가 경로 입력이면, 아래처럼 메모리 파일 처리 필요 + # 임시파일로 저장 후 경로 전달 or get_features 수정 필요 + + temp_path = f"temp_{file.filename}" + with open(temp_path, "wb") as f: + f.write(wav_bytes) + feats = get_features(temp_path) + os.remove(temp_path) + all_feats.append(feats) + + all_feats = np.stack(all_feats, axis=0) + pooled_feats = all_feats.mean(axis=0) + audio_input = pooled_feats[np.newaxis, :, np.newaxis] + + # 3) STT & 텍스트 임베딩 + texts = [] + for wav_bytes in wav_data_list: + temp_path = f"temp_stt.wav" + with open(temp_path, "wb") as f: + f.write(wav_bytes) + text = speech_to_text(temp_path) + os.remove(temp_path) + texts.append(text) + + full_text = " . ".join(texts) + text_vec = embedding_model.encode([full_text])[0] + text_input = text_vec[np.newaxis, :] + + # 4) 예측 + prediction = model.predict([audio_input, text_input]) + pred_percent = (prediction[0] * 100).tolist() + + # 5) JSON 응답 + result = {label: round(p, 2) for label, p in zip(emotion_labels, pred_percent)} + top_idx = np.argmax(pred_percent) + result['predicted_emotion'] = emotion_labels[top_idx] + + local_path = save_plot(pred_percent) + s3_path = get_s3_png_url(local_path) + reporter = EmotionReportGPT(full_text, pred_percent) + report_text = reporter.get_report_text() + + print(s3_path) + + # send_emotion_report_to_spring(s3_path, report_text) + + data = { + "imageUrl": s3_path, + "report_text": report_text + } + return data + + +def send_user_voice_file_to_spring(token: str, voice_url: str): + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "text/plain" + } + # requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data) + # requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data) + + requests.post( + "http://springboot:8080/api/spring/records/voices", + headers=headers, + data=voice_url # 주의: 'data='를 써야 함 + ) + + +def send_user_voice_id_to_spring(token: str, voice_id: str): + headers = { + "Authorization": f"Bearer {token}" + } + data = { + "voiceId": voice_id + } + requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data) + # requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data) + + +def send_emotion_report_to_spring(image_url: str, analysis_text): + headers = { + # "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + } + data = { + "imageUrl": image_url, + "report_text": analysis_text + } + requests.post( + "http://springboot:8080/api/spring/report", + headers=headers, + json=data + ) diff --git a/app/dto/ExtraTTSRequestDto.py b/app/dto/ExtraTTSRequestDto.py new file mode 100644 index 0000000..0a1b600 --- /dev/null +++ b/app/dto/ExtraTTSRequestDto.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel + + +class ExtraTTSRequestDto(BaseModel): + schedule_id: int + is_basic_schedule: bool + schedule_text: str + target_time: str # "10:00:00" 형식 diff --git a/app/dto/ScheduleSpeakRequestDto.py b/app/dto/ScheduleSpeakRequestDto.py new file mode 100644 index 0000000..cef86db --- /dev/null +++ b/app/dto/ScheduleSpeakRequestDto.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class ScheduleSpeakRequestDto(BaseModel): + schedule_id: int + schedule_voice_Url: str + target_time: str # "10:00:00" 형식 diff --git a/app/dto/ScheduleTTSRequestDto.py b/app/dto/ScheduleTTSRequestDto.py new file mode 100644 index 0000000..41feeaf --- /dev/null +++ b/app/dto/ScheduleTTSRequestDto.py @@ -0,0 +1,10 @@ +from typing import List + +from pydantic import BaseModel + + +class ScheduleTTSRequestDto(BaseModel): + voice_id: str + alias: str + schedule_id: List[int] + schedule_text: List[str] diff --git a/app/emotion_diary/20250608_0.wav b/app/emotion_diary/20250608_0.wav new file mode 100644 index 0000000..d01f445 Binary files /dev/null and b/app/emotion_diary/20250608_0.wav differ diff --git a/app/emotion_diary/20250608_1.wav b/app/emotion_diary/20250608_1.wav new file mode 100644 index 0000000..609d7a6 Binary files /dev/null and b/app/emotion_diary/20250608_1.wav differ diff --git a/app/emotion_diary/20250608_2.wav b/app/emotion_diary/20250608_2.wav new file mode 100644 index 0000000..ef2c161 Binary files /dev/null and b/app/emotion_diary/20250608_2.wav differ diff --git a/app/emotion_diary/20250608_3.wav b/app/emotion_diary/20250608_3.wav new file mode 100644 index 0000000..98e0ba5 Binary files /dev/null and b/app/emotion_diary/20250608_3.wav differ diff --git a/app/emotion_diary/jg_sadness_1.m4a b/app/emotion_diary/jg_sadness_1.m4a new file mode 100644 index 0000000..7a0015c Binary files /dev/null and b/app/emotion_diary/jg_sadness_1.m4a differ diff --git a/app/emotion_diary/jg_sadness_2.m4a b/app/emotion_diary/jg_sadness_2.m4a new file mode 100644 index 0000000..9375565 Binary files /dev/null and b/app/emotion_diary/jg_sadness_2.m4a differ diff --git a/app/emotion_diary/jg_sadness_3.m4a b/app/emotion_diary/jg_sadness_3.m4a new file mode 100644 index 0000000..ee1a08a Binary files /dev/null and b/app/emotion_diary/jg_sadness_3.m4a differ diff --git a/app/emotion_diary/jg_sadness_4.m4a b/app/emotion_diary/jg_sadness_4.m4a new file mode 100644 index 0000000..26b0d4d Binary files /dev/null and b/app/emotion_diary/jg_sadness_4.m4a differ diff --git a/app/emotion_diary/jg_sadness_5.m4a b/app/emotion_diary/jg_sadness_5.m4a new file mode 100644 index 0000000..235860c Binary files /dev/null and b/app/emotion_diary/jg_sadness_5.m4a differ diff --git a/app/service/elevenLabs.py b/app/service/elevenLabs.py new file mode 100644 index 0000000..a9318c6 --- /dev/null +++ b/app/service/elevenLabs.py @@ -0,0 +1,97 @@ +import os +import uuid + +from dotenv import load_dotenv +from elevenlabs import ElevenLabs, VoiceSettings + +from app.service.s3Service import upload_to_s3 + +load_dotenv() +client = ElevenLabs( + api_key=os.getenv("ELEVENLABS_KEY"), +) +yjg_voice_id = os.getenv("YJG_VOICE_ID") + + +def get_voice(): + response = client.voices.get_all() + voice_ids = [voice.voice_id for voice in response.voices] # Voice 객체의 voice_id 속성 사용 + return voice_ids + + +def delete_voice(voice: str): + try: + response = client.voices.delete(voice_id=voice) + print(f"Deleted voice_id: {voice}") + except Exception as e: + print(f"Error deleting voice_id {voice}: {e}") + + +def delete_all_voice(voices: list): + for voice in voices: + delete_voice(voice) + + +def add_voice(name: str, local_file_paths: list): + # 파일 경로를 통해 파일 객체 생성 + files = [] + for path in local_file_paths: + with open(path, 'rb') as audio_file: + files.append(audio_file.read()) # 파일 내용을 리스트에 저장 + + response = client.voices.add(name=name, files=files) + return response.voice_id + + +def text_to_speech_file_save_AWS(text: str, voice_id=yjg_voice_id) -> str: + response = client.text_to_speech.convert( + voice_id=voice_id, + output_format="mp3_22050_32", + text=text, + model_id="eleven_multilingual_v2", + # voice_settings=VoiceSettings( + # stability=0.3, + # similarity_boost=1.0, + # style=0.0, + # use_speaker_boost=True, + # ), + ) + + save_file_path = f"{uuid.uuid4()}.mp3" + with open(save_file_path, "wb") as f: + for chunk in response: + if chunk: + f.write(chunk) + aws_file_url = upload_to_s3(local_file_path=save_file_path) + os.remove(save_file_path) + + # delete_voice(voice_id) + + return aws_file_url + + +def text_to_speech_file(text: str, voice_id=yjg_voice_id) -> str: + response = client.text_to_speech.convert( + voice_id=voice_id, + # output_format="mp3_22050_32", + text=text, + model_id="eleven_multilingual_v2", + # voice_settings=VoiceSettings( + # stability=0.3, + # similarity_boost=1.0, + # style=0.0, + # use_speaker_boost=True, + # ), + ) + + save_file_path = f"{uuid.uuid4()}.wav" + with open(save_file_path, "wb") as f: + for chunk in response: + if chunk: + f.write(chunk) + # aws_file_url = upload_to_s3(local_file_path=save_file_path) + # os.remove(save_file_path) + + # delete_voice(voice_id) + + return save_file_path diff --git a/app/service/gpt.py b/app/service/gpt.py new file mode 100644 index 0000000..eb06023 --- /dev/null +++ b/app/service/gpt.py @@ -0,0 +1,139 @@ +import os + +from dotenv import load_dotenv +from openai import OpenAI + +from app.utils import parsing_json + +load_dotenv() +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + + +class ChatgptAPI: + def __init__(self, schedules, alias): + self.schedules = schedules + self.alias = alias + + def create_schedule_prompt(self): + system_message = f""" + 너는 지금부터 혼자 사시는 부모님을 걱정하는 보호자야. + 네 역할은 키워드를 보고, 키워드와 관련한 문제에 대해서 부모님을 걱정하고, 생활은 챙겨주는거야. + 키워드는 다음과 같아: {str(self.schedules)} + + 너의 목표는 두 가지야: + 1. 키워드에 대한 질문 혹은 문장을 한 줄의 텍스트로 만들어. + ex) 키워드가 '저녁' 이라면, "{self.alias}~~ 하루 잘 보냈어?? 저녁도 맛있는거 챙겨먹어!! 사랑해~~ " + 2. 만든 텍스트는 ?? !! ~~ ,, .. 등의 다양한 특수문자가 많이 들어갈 수 있어. 감정이 강하게 느껴지게 작성해줘. + 2-a. 특수문자를 붙일 때는 꼭 2개씩 붙여줘 + 3. 부모님을 지칭하는 별명은 {self.alias} 로 해줘. + 4. 문장과 문장 사이의 띄어쓰기를 2개씩 넣어줘 + + 결과는 {{"키워드": "문장"}} 형태의 JSON 문자열로 반환해줘. 꼭 큰따옴표(")만 사용해. + + """ + + messages = [ + {"role": "system", "content": system_message} + ] + return messages + + def get_schedule_json(self): + prompt = self.create_schedule_prompt() + response = client.chat.completions.create( + model="gpt-4-turbo", + messages=prompt, + temperature=0.5, + max_tokens=2048 + ) + + content = response.choices[0].message.content + schedule_dict = parsing_json.extract_json_from_content(content) + + return schedule_dict + + +class GenerateQuestionGPT: + def __init__(self, text, alias): + self.text = text + self.alias = alias + + def create_schedule_prompt(self): + system_message = f""" + 너는 지금부터 혼자 사시는 부모님을 걱정하는 보호자야. + + 네 역할은 키워드를 보고, 키워드와 관련한 문제에 대해서 부모님을 걱정하고, 생활은 챙겨주는거야. + 키워드는 다음과 같아: {str(self.schedules)} + + 너의 목표는 두 가지야: + 1. 키워드에 대한 질문 혹은 문장을 한 줄의 텍스트로 만들어. + ex) 키워드가 '저녁' 이라면, "{self.alias}~~ 하루 잘 보냈어?? 저녁도 맛있는거 챙겨먹어!! 사랑해~~ " + 2. 만든 텍스트는 ?? !! ~~ ,, .. 등의 다양한 특수문자가 많이 들어갈 수 있어. 감정이 강하게 느껴지게 작성해줘. + 2-a. 특수문자를 붙일 때는 꼭 2개씩 붙여줘 + 3. 부모님을 지칭하는 별명은 {self.alias} 로 해줘. + 4. 문장과 문장 사이의 띄어쓰기를 2개씩 넣어줘 + + 결과는 {{"키워드": "문장"}} 형태의 JSON 문자열로 반환해줘. 꼭 큰따옴표(")만 사용해. + + """ + + messages = [ + {"role": "system", "content": system_message} + ] + return messages + + def get_schedule_json(self): + prompt = self.create_schedule_prompt() + response = client.chat.completions.create( + model="gpt-4-turbo", + messages=prompt, + temperature=0.5, + max_tokens=2048 + ) + + content = response.choices[0].message.content + schedule_dict = parsing_json.extract_json_from_content(content) + + return schedule_dict + + +class EmotionReportGPT: + def __init__(self, text, percent_list): + self.text = text + self.percent_list = percent_list + + def create_report_prompt(self): + system_message = f""" + 너는 지금부터 감정을 분석 하는 심리 상담사야. + + 네 역할은 텍스트와 수치를 보고, 해당 발화의 인물이 하루 동안 어떤 감정 상태를 가졌는지 체크해주는 거야. + 텍스트는 다음과 같아: {str(self.text)} + 수치는 다음과 같아 : {self.percent_list} + + 너의 목표는 두 가지야: + 1. 텍스트와 수치를 보고 발화의 인물의 하루 감정을 종합적으로 분석해줘. + 1-a) 분석을 할 때는 텍스트나 문맥에서 근거를 들어서 논리적으로 서술해줘. + 1-b) 분석 말투는 보호자에게 피보호자의 상태를 설명하는 존댓말 말투로 해줘. + 1-c) '발화자'를 지칭하는 말은 '피보호자'로 해줘 + 2. 분석 문장은 4-5 줄이어야 해. + + 결과는 꼭 큰따옴표(")만 사용해서 str로 반환해줘. 만약 여러 문장이라면 "\n"를 문장 끝에 넣어줘. + + """ + + messages = [ + {"role": "system", "content": system_message} + ] + return messages + + def get_report_text(self): + prompt = self.create_report_prompt() + response = client.chat.completions.create( + model="gpt-4-turbo", + messages=prompt, + temperature=0.5, + max_tokens=2048 + ) + + content = response.choices[0].message.content + + return content diff --git a/app/service/interaction.py b/app/service/interaction.py new file mode 100644 index 0000000..c48da29 --- /dev/null +++ b/app/service/interaction.py @@ -0,0 +1,98 @@ +import os +import subprocess +from datetime import datetime + +from faster_whisper import WhisperModel +from openai import OpenAI +from elevenlabs import ElevenLabs +from dotenv import load_dotenv + +from app.service.elevenLabs import text_to_speech_file +# 녹음 함수 (arecord 사용) - 수정된 record_respberry.py 참고 +from record_respberry import emotion_record + +# ==== 공통 설정 ==== +load_dotenv() +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +ELEVENLABS_KEY = os.getenv("ELEVENLABS_KEY") + +if not OPENAI_API_KEY or not ELEVENLABS_KEY: + raise RuntimeError(".env 에 OPENAI_API_KEY/ELEVENLABS_KEY 를 설정하세요") + +# OpenAI / ElevenLabs 클라이언트 +gpt_client = OpenAI(api_key=OPENAI_API_KEY) +tts_client = ElevenLabs(api_key=ELEVENLABS_KEY) + +# Whisper 모델 (tiny, CPU, int8) +whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8") + + +def interaction(alias: str): + """ + alias: 사용자 이름 또는 AI가 부르는 별칭 (ex: "홍길동") + 1) alias 인사 → TTS → 재생 + 2) 이후 반복: emotion_record → Whisper STT → GPT 질문 생성 → TTS → 재생 + """ + # 1) alias 인사 + greet_text = f"{alias}~~ 오늘 좋은 하루 보냈나~~?? 어떻게 지냈어!!" + print("👋 인사:", greet_text) + greet_audio = text_to_speech_file(greet_text) + subprocess.run(["mpg321", greet_audio], check=True) + + # 대화 이력 초기화 + messages = [ + {"role": "system", + "content": "너는 대화를 자연스럽게 이어가는 AI야. 사용자와 계속 이어지는 대화를 만들어야 해."}, + {"role": "assistant", "content": greet_text} + ] + + record_idx = 0 + try: + while True: + # 2-1) 감정 녹음 (침묵 기준으로 자동 종료) + wav_path = emotion_record(record_idx) + print(f"[녹음 완료] {wav_path}") + record_idx += 1 + + # 2-2) Whisper STT (한국어) + segments, _ = whisper_model.transcribe(wav_path, + beam_size=1, + language="ko") + user_text = " ".join(seg.text for seg in segments).strip() + print("▶ 사용자 음성(텍스트):", user_text or "(인식 안됨)") + + if not user_text: + print("(음성 인식 실패 → 다시 녹음)") + continue + + # 2-3) GPT-4o 에 질문 생성 요청 + messages.append({"role": "user", "content": user_text}) + resp = gpt_client.chat.completions.create( + model="gpt-4o", + messages=messages + ) + question = resp.choices[0].message.content.strip() + print("생성된 질문:", question) + + # 2-4) 대화 이력에 어시스턴트 질문 추가 + messages.append({"role": "assistant", "content": question}) + + # 2-5) 질문 → ElevenLabs TTS → 파일 + tts_path = text_to_speech_file(question) + print(" (TTS 파일 생성:", tts_path, ")") + + # 2-6) 재생 + subprocess.run(["mpg321", tts_path], check=True) + + except KeyboardInterrupt: + print("\n[사용자 종료 요청] interaction을 종료합니다.") + except Exception as e: + print("예외 발생:", e) + + print("=== interaction 종료 ===") + + +if __name__ == "__main__": + # 스크립트를 직접 실행할 때만 동작 + # alias를 원하는 이름으로 바꿔주세요 + interaction("아빠") diff --git a/app/service/main.py b/app/service/main.py new file mode 100644 index 0000000..18296c5 --- /dev/null +++ b/app/service/main.py @@ -0,0 +1,92 @@ +import asyncio + +from fastapi import FastAPI, Depends, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from fastapi.openapi.utils import get_openapi +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from contextlib import asynccontextmanager + +from app.controller.RecordController import router + +# from app.controller.RecordController import router + + +# from app.service.subscribe import subscribe_schedule + +# @asynccontextmanager +# async def lifespan(app: FastAPI): +# task = asyncio.create_task(subscribe_schedule()) +# yield +# task.cancel() +# try: +# await task +# except asyncio.CancelledError: +# print("Redis task cancelled") + + +app = FastAPI() + +auth_scheme = HTTPBearer() + + +def get_current_token(credentials: HTTPAuthorizationCredentials = Depends(auth_scheme)): + token = credentials.credentials + if not token: + raise HTTPException(status_code=403, detail="Invalid or missing token") + return token + + +def custom_openapi(): + if app.openapi_schema: + return app.openapi_schema + openapi_schema = get_openapi( + title="HumaniCare API Documentation", + version="1.0", + description="HumaniCare API documentation for the application", + routes=app.routes, + ) + # Add the security scheme for Bearer token + openapi_schema["components"]["securitySchemes"] = { + "bearerAuth": { + "type": "http", + "scheme": "bearer", + "bearerFormat": "JWT" + } + } + openapi_schema["security"] = [{"bearerAuth": []}] + app.openapi_schema = openapi_schema + return app.openapi_schema + + +app.openapi = custom_openapi + +# # Swagger UI 경로 설정 +# @app.get("/docs", include_in_schema=False) +# async def custom_swagger_ui_html(req: Request): +# root_path = req.scope.get("root_path", "").rstrip("/") +# openapi_url = root_path + "/openapi.json" # OpenAPI 경로 설정 +# return get_swagger_ui_html( +# openapi_url=openapi_url, +# title="Peach API Documentation", +# ) +# +# # OpenAPI JSON 경로 설정 +# @app.get("/openapi.json", include_in_schema=False) +# async def custom_openapi_json(): +# return app.openapi() + +origins = [ + "http://localhost:8080", + "http://localhost:3000", +] + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(router) + diff --git a/app/service/predict_resp.py b/app/service/predict_resp.py new file mode 100644 index 0000000..1d49a67 --- /dev/null +++ b/app/service/predict_resp.py @@ -0,0 +1,36 @@ +import requests +import glob +import os +import mimetypes + + +def predict(): + # ip = "192.168.1.243" + ip = "15.165.21.152" + # FastAPI 라우터 경로에 맞춘 URL + url = f"http://{ip}:8000/api/fastapi/predict" + + # 전송할 오디오 파일 경로 (wav, m4a, mp3 등 모두 포함) + BASE_DIR = "/home/team4/Desktop/capstone/AI/app/emotion_diary" + audio_paths = glob.glob(os.path.join(BASE_DIR, "**", "*.*"), recursive=True) + + files = [] + for path in audio_paths: + filename = os.path.basename(path) + # 확장자에 맞는 MIME 타입 추출 (fallback: application/octet-stream) + content_type = mimetypes.guess_type(path)[0] or "application/octet-stream" + files.append( + ("files", (filename, open(path, "rb"), content_type)) + ) + + response = requests.post(url, files=files) + if response.status_code == 200: + print("감정 예측 결과:") + for label, score in response.json().items(): + print(f"{label}: {score}") + else: + print(f"Error: {response.status_code} - {response.text}") + + +if __name__ == "__main__": + predict() diff --git a/app/service/record.py b/app/service/record.py new file mode 100644 index 0000000..a5d61f1 --- /dev/null +++ b/app/service/record.py @@ -0,0 +1,78 @@ +import os +import time +from datetime import datetime + +import numpy as np +import sounddevice as sd +import torch +from scipy.io.wavfile import write + +# 사일로 VAD 모델 불러오기 +model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=False) +(get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils + +FRAME_SIZE = 512 +SILENCE_LIMIT = 2.0 # 2초 이상 침묵하면 종료 +FILENAME = "output.wav" # 녹음된 오디오 파일 이름 + +audio_queue = [] +recorded_audio = [] + + +def callback(indata, frames, time_info, status): + # 받은 오디오 데이터를 audio_queue에 추가 + audio_queue.append(indata[:, 0].copy()) + + +print("Start talking... (녹음 중, 침묵 시 자동 종료)") + +with sd.InputStream(callback=callback, channels=1, samplerate=SAMPLE_RATE, blocksize=FRAME_SIZE): + silence_counter = 0 + while True: + if len(audio_queue) == 0: + time.sleep(0.01) + continue + + chunk = audio_queue.pop(0) + if len(chunk) < 512: + continue + + audio_tensor = torch.from_numpy(chunk[:512]).float() + audio_tensor = audio_tensor / (torch.max(torch.abs(audio_tensor)) + 1e-9) + + speech_prob = model(audio_tensor, SAMPLE_RATE).item() + print(f"Speech prob: {speech_prob:.3f}") + + # 음성이 인식되었을 때만 녹음 + if speech_prob > 0.5: + recorded_audio.append(chunk) + silence_counter = 0 # 음성이 인식되면 침묵 카운터 리셋 + else: + silence_counter += FRAME_SIZE / SAMPLE_RATE + print(f"Silence counter: {silence_counter:.2f}") + + # 침묵이 2초 이상 지속되면 녹음 종료 + if silence_counter >= SILENCE_LIMIT: + print("Silence detected for 2 seconds! Stopping.") + break + +# 녹음된 오디오가 있을 경우에만 파일로 저장 + +# 저장할 디렉토리 설정 +print(os.getcwd()) +save_dir = os.path.join(os.getcwd(), "first_audio") +os.makedirs(save_dir, exist_ok=True) # 디렉토리가 없으면 생성 + +# 오늘 날짜 문자열 +today_str = datetime.now().strftime("%Y%m%d") +# 파일 이름 설정 +FILENAME = "output.wav" +file_path = os.path.join(save_dir, FILENAME) +if recorded_audio: + recorded_audio = np.concatenate(recorded_audio) + + # 오디오 데이터를 .wav 파일로 저장 + write(file_path, SAMPLE_RATE, recorded_audio.astype(np.float32)) # 저장 형식: .wav + print(f"녹음된 파일을 {FILENAME}로 저장했습니다.") +else: + print("녹음된 음성이 없습니다.") diff --git a/app/service/record_respberry.py b/app/service/record_respberry.py new file mode 100644 index 0000000..3e0460f --- /dev/null +++ b/app/service/record_respberry.py @@ -0,0 +1,72 @@ +import os +import wave +from datetime import datetime +import numpy as np +import sounddevice as sd +from scipy.io.wavfile import write + +# === 녹음 설정 === +CHANNELS = 1 +RATE = 44100 +CHUNK_DURATION = 0.1 # 초 단위, 약 100ms +CHUNK = int(RATE * CHUNK_DURATION) +SILENCE_LIMIT = 5 # 5초 연속 침묵이면 녹음 종료 +THRESHOLD = 1000.0 # 침묵 판별 기준 (RMS) + +BASE_DIR = "/home/team4/Desktop/capstone/AI/app/emotion_diary" + + +# 날짜 기반 하위 디렉터리(매일 한 번만 생성) +def _ensure_dir(): + os.makedirs(BASE_DIR, exist_ok=True) + + +def is_silent(data: np.ndarray, threshold: float = THRESHOLD) -> bool: + """ + float32 numpy 배열을 받아 RMS 기준으로 침묵 여부를 판단 + """ + rms = np.sqrt(np.mean(data ** 2)) + return rms < threshold + + +def emotion_record(index: int) -> str: + """ + index: 녹음 파일 구분을 위한 정수 인덱스 + return: 저장된 .wav 파일의 전체 경로 + """ + _ensure_dir() + date_str = datetime.now().strftime("%Y%m%d") + filename = f"{date_str}_{index}.wav" + filepath = os.path.join(BASE_DIR, filename) + + print(f"[녹음 시작] {filename}") + + frames = [] + silent_secs = 0.0 + + try: + with sd.InputStream(samplerate=RATE, channels=CHANNELS, dtype='float32') as stream: + while True: + data, _ = stream.read(CHUNK) + audio_chunk = data[:, 0] # mono + frames.append(audio_chunk.copy()) + + if is_silent(audio_chunk): + silent_secs += CHUNK_DURATION + else: + silent_secs = 0.0 + + if silent_secs >= SILENCE_LIMIT: + print(f"[침묵 {SILENCE_LIMIT}초 감지 → 녹음 종료]") + break + + except Exception as e: + print("녹음 중 예외:", e) + + # float32 → int16 변환 후 저장 + all_audio = np.concatenate(frames) + int_audio = np.int16(np.clip(all_audio * 32767, -32768, 32767)) + + write(filepath, RATE, int_audio) + print(f"[저장 완료] {filepath}\n") + return filepath diff --git a/app/service/s3Service.py b/app/service/s3Service.py new file mode 100644 index 0000000..aa2ca35 --- /dev/null +++ b/app/service/s3Service.py @@ -0,0 +1,147 @@ +import os +import time +import uuid +from datetime import datetime +from typing import List + +import requests +from boto3 import client +from botocore.exceptions import ClientError +from dotenv import load_dotenv +from fastapi import UploadFile + +from app.utils.convertFileExtension import convert_to_mp3 + +load_dotenv() + +access_key = os.getenv("S3_ACCESSKEY") +secret_key = os.getenv("S3_SECRETKEY") +bucket_name = os.getenv("S3_BUCKET") +url_base = os.getenv("S3_URL") + +s3_client = client( + "s3", + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + region_name="ap-northeast-2", +) + + +async def save_local_file(file: UploadFile) -> str: + """업로드된 파일을 로컬에 저장하고 파일 경로를 반환합니다.""" + audio_dir = "./audio" + if not os.path.exists(audio_dir): + os.makedirs(audio_dir) + local_file_path = os.path.join(audio_dir, file.filename) # 파일 경로 생성 + with open(local_file_path, "wb") as f: + f.write(await file.read()) # 파일 내용을 저장 + return local_file_path + + +def upload_to_s3(local_file_path: str) -> str: + """로컬 파일을 S3에 업로드하고 S3 URL을 반환합니다.""" + try: + if not os.path.isfile(local_file_path): + print(f"Local file does not exist: {local_file_path}") + return None + + timestamp = int(time.time()) + unique_id = str(uuid.uuid4()) + s3_file_name = f"record/audio_{timestamp}_{unique_id}.wav" + + # S3에 파일 업로드 + with open(local_file_path, "rb") as data: + s3_client.upload_fileobj(data, bucket_name, s3_file_name) + + # S3 URL 생성 + aws_file_url = f"{url_base}/{s3_file_name}" + return aws_file_url + + except ClientError as e: + print(f'Credential error => {e}') + except Exception as e: + print(f"Another error => {e}") + + +def upload_to_s3_png(local_file_path: str) -> str: + """로컬 파일을 S3에 업로드하고 S3 URL을 반환합니다.""" + try: + if not os.path.isfile(local_file_path): + print(f"Local file does not exist: {local_file_path}") + return None + + date_str = datetime.now().strftime("%Y%m%d") + filename = f"{date_str}" + + timestamp = int(time.time()) + unique_id = str(uuid.uuid4()) + s3_file_name = f"image/{filename}_{timestamp}_{unique_id}.png" + + # S3에 파일 업로드 + with open(local_file_path, "rb") as data: + s3_client.upload_fileobj(data, bucket_name, s3_file_name) + + # S3 URL 생성 + aws_file_url = f"{url_base}/{s3_file_name}" + return aws_file_url + + except ClientError as e: + print(f'Credential error => {e}') + except Exception as e: + print(f"Another error => {e}") + + +# AWS S3에서 녹음 파일 다운로드 +def download_from_s3(file_s3_url: str) -> str: + """S3에서 파일을 다운로드하고 로컬에 저장합니다.""" + audio_dir = "./audio" + if not os.path.exists(audio_dir): + os.makedirs(audio_dir) # 디렉토리가 없으면 생성 + + try: + response = requests.get(file_s3_url) + response.raise_for_status() # 요청이 실패하면 예외를 발생시킴 + + unique_filename = f"{uuid.uuid4()}.wav" + local_save_path = os.path.join(audio_dir, unique_filename) # 저장할 파일 경로 + + with open(local_save_path, 'wb') as f: + f.write(response.content) # 파일 내용을 로컬에 저장 + + mp3_file_path = convert_to_mp3(local_save_path) + return mp3_file_path + + except requests.exceptions.HTTPError as e: + print(f"HTTP error occurred: {e}") + except Exception as e: + print(f"An error occurred: {e}") + + +def download_from_s3_links(urls: List[str]) -> List[str]: + file_s3_urls = [] + for url in urls: + file_s3_url = download_from_s3(url) + file_s3_urls.append(file_s3_url) + return file_s3_urls + + +def download_from_s3_model(file_s3_url: str) -> str: + model_dir = "./model" + if not os.path.exists(model_dir): + os.makedirs(model_dir) # 디렉토리가 없으면 생성 + + try: + response = requests.get(file_s3_url) + response.raise_for_status() # 요청이 실패하면 예외를 발생시킴 + + unique_filename = str(uuid.uuid4()) + local_save_path = os.path.join(model_dir, unique_filename) # 저장할 파일 경로 + + with open(local_save_path, 'wb') as f: + f.write(response.content) # 파일 내용을 로컬에 저장 + return local_save_path + + except requests.exceptions.HTTPError as e: + print(f"HTTP error occurred: {e}") + except Exception as e: + print(f"An error occurred: {e}") diff --git a/app/service/subscribe.py b/app/service/subscribe.py new file mode 100644 index 0000000..7cde317 --- /dev/null +++ b/app/service/subscribe.py @@ -0,0 +1,29 @@ +# import os +# import redis.asyncio as redis +# import json +# import subprocess +# from app.service.s3Service import download_from_s3 +# REDIS_HOST = os.getenv("REDIS_HOST", "15.165.21.152") +# REDIS_PORT = int(os.getenv("REDIS_PORT", "6380")) +# REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "babyy1023@") +# CHANNEL_NAME = "spring-scheduler-channel" +# +# async def subscribe_schedule(): +# r = redis.Redis( +# host=REDIS_HOST, +# port=REDIS_PORT, +# password=REDIS_PASSWORD, +# decode_responses=True +# ) +# +# pubsub = r.pubsub() +# await pubsub.subscribe(CHANNEL_NAME) +# +# print(f"Subscribed to Redis '{CHANNEL_NAME}") +# +# async for message in pubsub.listen(): +# if message["type"] == "message": +# local_path = download_from_s3("https://humanicare-bucket.s3.ap-northeast-2.amazonaws.com/record/audio_1743069498_081a9673-aebe-4b86-a4ba-c32f4424e8b9.wav") +# subprocess.run(["mpg321", local_path]) +# print("speaker out") +# \ No newline at end of file diff --git a/app/utils/convertFileExtension.py b/app/utils/convertFileExtension.py new file mode 100644 index 0000000..d58eee0 --- /dev/null +++ b/app/utils/convertFileExtension.py @@ -0,0 +1,98 @@ +import io +import os +import tempfile +from datetime import datetime + +from pydub import AudioSegment +from pydub.exceptions import CouldntDecodeError + + +def merge_all_wavs_to_mp3(audio_dir="audio", silence_duration_ms=500): + wav_files = sorted([ + os.path.join(audio_dir, f) for f in os.listdir(audio_dir) + if f.endswith(".wav") + ]) + + if not wav_files: + print("병합할 .wav 파일이 없습니다.") + return None + + print(f"{len(wav_files)}개의 wav 파일을 병합 중...") + + combined = AudioSegment.empty() + silence = AudioSegment.silent(duration=silence_duration_ms) + + for i, wav in enumerate(wav_files): + audio = AudioSegment.from_wav(wav) + combined += audio + if i != len(wav_files) - 1: + combined += silence # 마지막 파일 뒤에는 무음 안 넣음 + + today_str = datetime.now().strftime("%Y%m%d") + mp3_path = os.path.join(audio_dir, f"{today_str}_final.mp3") + + combined.export(mp3_path, format="mp3") + + for wav in wav_files: + os.remove(wav) + + print(f"최종 mp3 저장 완료: {mp3_path}") + return mp3_path + + +def convert_to_mp3(file_path): + audio = AudioSegment.from_file(file_path) + output_path = file_path.replace(".wav", ".mp3") + os.remove(file_path) + audio.export(output_path, format="mp3") + return output_path + + +def convert_to_wav(raw_bytes: bytes, ext: str) -> bytes: + ext = ext.lower() + # 이미 WAV라면 바로 반환 + if ext == "wav": + return raw_bytes + + # 임시 입력 파일 생성 + with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as fin: + fin.write(raw_bytes) + fin.flush() + fin_path = fin.name + + try: + # 1) format 인자 없이 자동 감지 시도 + audio = AudioSegment.from_file(fin_path) + except CouldntDecodeError: + try: + # 2) 자동 감지도 실패하면, 프로브 크기 늘려서 재시도 + audio = AudioSegment.from_file( + fin_path, + parameters=["-probesize", "50M", "-analyzeduration", "100M"] + ) + except CouldntDecodeError as e: + os.unlink(fin_path) + raise RuntimeError(f"FFmpeg 디코딩 실패({ext}): {e}") from e + + # WAV(PCM) 사양으로 맞춰주기 + audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2) + + # 메모리로 WAV 내보내기 + out = io.BytesIO() + audio.export(out, format="wav") + wav_bytes = out.getvalue() + + os.unlink(fin_path) + return wav_bytes + + # 3) 원하는 파라메터로 변환 (16kHz, mono, 16-bit) + audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2) + + # 4) 메모리로 WAV 내보내기 + out = io.BytesIO() + audio.export(out, format="wav") + wav_bytes = out.getvalue() + + # 5) 임시 입력 파일 삭제 + os.unlink(fin_path) + return wav_bytes diff --git a/app/utils/parsing_json.py b/app/utils/parsing_json.py new file mode 100644 index 0000000..e0eb1be --- /dev/null +++ b/app/utils/parsing_json.py @@ -0,0 +1,15 @@ +import json +import re + + +def extract_json_from_content(content): + match = re.search(r"\{[\s\S]*\}", content) + if match: + try: + return json.loads(match.group()) + except json.JSONDecodeError as e: + print("JSON 파싱 실패:", e) + return {} + else: + print("JSON 형태가 아님") + return {} diff --git a/app/utils/play_file.py b/app/utils/play_file.py new file mode 100644 index 0000000..ce1032a --- /dev/null +++ b/app/utils/play_file.py @@ -0,0 +1,31 @@ +import time +from datetime import datetime +import os +import subprocess + + +def play_at_target_time(target_time: str, local_file_path: str): + # 현재 시간과 target_time 비교 + current_time = datetime.now().strftime("%H:%M:%S") + + # target_time이 현재 시간보다 크면 대기 (target_time까지 대기) + while current_time < target_time: + time.sleep(1) # 1초마다 시간 확인 + current_time = datetime.now().strftime("%H:%M:%S") + + #블루투스 헤드셋 또는 기본 스피커로 출력 + os.system("pactl list sinks | grep 'bluez_sink'") # 블루투스 출력 장치 확인 + os.system("pactl set-default-sink `pactl list sinks short | grep bluez_sink | awk '{print $2}'`") # 기본 출력 변경 + + # 스피커를 기본 출력 장치로 설정 + os.system("pactl list sinks | grep 'analog-output'") # 스피커 장치 확인 + os.system("pactl set-default-sink `pactl list sinks short | grep analog-output | awk '{print $2}'`") # 기본 출력 변경 + + #로컬 파일을 직접 재생 + subprocess.run(["mpg321", local_file_path]) + + # window 테스트 용 + # from playsound import playsound + # from pathlib import Path + # safe_path = Path(local_file_path).resolve().as_posix() + # playsound(safe_path) diff --git a/audio/99ef70a1-0fe0-4b82-af32-9e32cbea7800.mp3 b/audio/99ef70a1-0fe0-4b82-af32-9e32cbea7800.mp3 new file mode 100644 index 0000000..c21adb1 Binary files /dev/null and b/audio/99ef70a1-0fe0-4b82-af32-9e32cbea7800.mp3 differ diff --git a/audio/d9da92b8-6a16-4886-bee1-2222a98a8cf3.mp3 b/audio/d9da92b8-6a16-4886-bee1-2222a98a8cf3.mp3 new file mode 100644 index 0000000..c21adb1 Binary files /dev/null and b/audio/d9da92b8-6a16-4886-bee1-2222a98a8cf3.mp3 differ diff --git a/audio/test8.mp3 b/audio/test8.mp3 new file mode 100644 index 0000000..93287b8 Binary files /dev/null and b/audio/test8.mp3 differ diff --git a/d9fd6461-6b0b-4c16-b1fc-f63c92048627.wav b/d9fd6461-6b0b-4c16-b1fc-f63c92048627.wav new file mode 100644 index 0000000..391f582 Binary files /dev/null and b/d9fd6461-6b0b-4c16-b1fc-f63c92048627.wav differ diff --git a/f8fa5cff-fbba-467c-8855-b44e71ebf9e5.wav b/f8fa5cff-fbba-467c-8855-b44e71ebf9e5.wav new file mode 100644 index 0000000..1bc00fa Binary files /dev/null and b/f8fa5cff-fbba-467c-8855-b44e71ebf9e5.wav differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..2e48eca --- /dev/null +++ b/main.py @@ -0,0 +1,79 @@ +# import RPi.GPIO as GPIO +# import time +# from datetime import datetime + +# # ─────────────────────────────── +# # PIR 센서 관련 +# # ─────────────────────────────── +# PIR_PIN = 17 # GPIO17 + +# def detect_motion(): +# GPIO.setmode(GPIO.BCM) +# GPIO.setup(PIR_PIN, GPIO.IN) + +# print("PIR 센서 디버깅 시작 (Ctrl+C 종료)") +# prev_state = None + +# try: +# while True: +# signal = GPIO.input(PIR_PIN) + +# if signal != prev_state: +# timestamp = datetime.now().strftime("%H:%M:%S") +# state_str = "감지됨 (HIGH)" if signal else " 없음 (LOW)" +# print(f"[{timestamp}] 상태 변경 ▶ {state_str}") +# prev_state = signal + +# time.sleep(0.1) +# except KeyboardInterrupt: +# print("⛔ 종료 중...") +# GPIO.cleanup() + + +# # ─────────────────────────────── +# # DHT11 센서 관련 (5회 재시도 버전) +# # ─────────────────────────────── +# import adafruit_dht +# import board + +# def read_dht11(): +# print("🌡️ DHT11 센서 측정 시작...") +# dhtDevice = adafruit_dht.DHT11(board.D4) # GPIO4 (멀티보드 IO4) + +# for i in range(5): # 최대 5번 재시도 +# try: +# print(f"📡 시도 {i + 1} ...") +# temperature = dhtDevice.temperature +# humidity = dhtDevice.humidity + +# if temperature is not None and humidity is not None: +# print(f"✅ 온도: {temperature}°C") +# print(f"✅ 습도: {humidity}%") +# break +# else: +# print("⚠️ 센서로부터 데이터를 읽을 수 없습니다.") +# except RuntimeError as error: +# print(f"⚠️ 에러 발생: {error.args[0]}") +# except Exception as error: +# print(f"❌ 심각한 오류: {error}") +# break +# time.sleep(2) # 재시도 간 간격 + +# # 종료 함수는 비활성화 (라이브러리 오류 방지) +# # dhtDevice.exit() + +import uvicorn + +# ─────────────────────────────── +# 메인 함수 +# ─────────────────────────────── +if __name__ == "__main__": + uvicorn.run( + app="app.service.main:app", + # host="localhost", + host="0.0.0.0", + port=8000, + ) + # detect_motion() # PIR 센서 테스트 시 주석 해제 +# read_dht11() # 현재는 DHT11만 테스트 + diff --git a/requirements.txt b/requirements.txt index 8d58cce..376bdd9 100644 Binary files a/requirements.txt and b/requirements.txt differ diff --git a/requirements_server.txt b/requirements_server.txt new file mode 100644 index 0000000..45b9215 --- /dev/null +++ b/requirements_server.txt @@ -0,0 +1,170 @@ +absl-py==2.3.0 +annotated-types==0.7.0 +anyio==4.9.0 +asttokens==3.0.0 +astunparse==1.6.3 +async-timeout==5.0.1 +attrs==25.3.0 +audioread==3.0.1 +av==14.4.0 +backcall==0.2.0 +beautifulsoup4==4.13.4 +bleach==6.2.0 +boto3==1.37.16 +botocore==1.37.16 +cachetools==5.5.2 +certifi==2025.1.31 +cffi==1.17.1 +charset-normalizer==3.4.1 +click==8.1.8 +colorama==0.4.6 +coloredlogs==15.0.1 +contourpy==1.3.0 +ctranslate2==4.6.0 +cycler==0.12.1 +decorator==5.2.1 +defusedxml==0.7.1 +distro==1.9.0 +docopt==0.6.2 +elevenlabs==1.54.0 +exceptiongroup==1.2.2 +executing==2.2.0 +fastapi==0.115.11 +faster-whisper==1.1.1 +fastjsonschema==2.21.1 +filelock==3.18.0 +flatbuffers==25.2.10 +fonttools==4.58.1 +fsspec==2025.3.2 +gast==0.4.0 +google-auth==2.40.2 +google-auth-oauthlib==0.4.6 +google-pasta==0.2.0 +grpcio==1.71.0 +h11==0.14.0 +h5py==3.13.0 +httpcore==1.0.7 +httpx==0.28.1 +huggingface-hub==0.31.2 +humanfriendly==10.0 +idna==3.10 +importlib_metadata==8.7.0 +importlib_resources==6.5.2 +ipython==8.12.3 +jedi==0.19.2 +Jinja2==3.1.6 +jiter==0.10.0 +jmespath==1.0.1 +joblib==1.5.1 +jsonschema==4.24.0 +jsonschema-specifications==2025.4.1 +jupyter_client==8.6.3 +jupyter_core==5.8.1 +jupyterlab_pygments==0.3.0 +keras==3.10.0 +kiwisolver==1.4.7 +lazy_loader==0.4 +libclang==18.1.1 +librosa==0.11.0 +llvmlite==0.43.0 +Markdown==3.8 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +matplotlib==3.9.4 +matplotlib-inline==0.1.7 +mdurl==0.1.2 +mistune==3.1.3 +ml_dtypes==0.5.1 +mpmath==1.3.0 +msgpack==1.1.0 +namex==0.1.0 +nbclient==0.10.2 +nbconvert==7.16.6 +nbformat==5.10.4 +networkx==3.2.1 +numba==0.60.0 +numpy==1.26.4 +oauthlib==3.2.2 +onnxruntime==1.19.2 +openai==1.68.2 +opt_einsum==3.4.0 +optree==0.16.0 +packaging==25.0 +pandocfilters==1.5.1 +parso==0.8.4 +pickleshare==0.7.5 +pillow==11.2.1 +pipreqs==0.5.0 +platformdirs==4.3.8 +playsound==1.3.0 +pooch==1.8.2 +prompt_toolkit==3.0.51 +protobuf==5.29.5 +pure_eval==0.2.3 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.10.6 +pydantic_core==2.27.2 +pydub==0.25.1 +Pygments==2.19.1 +pyparsing==3.2.3 +pyreadline3==3.5.4 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-multipart==0.0.20 +pywin32==310 +PyYAML==6.0.2 +pyzmq==26.4.0 +redis==6.2.0 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +requests-oauthlib==2.0.0 +rich==14.0.0 +rpds-py==0.25.1 +rsa==4.9.1 +s3transfer==0.11.4 +safetensors==0.5.3 +scikit-learn==1.6.1 +scipy==1.13.1 +sentence-transformers==4.1.0 +six==1.17.0 +sniffio==1.3.1 +sounddevice==0.5.1 +soundfile==0.13.1 +soupsieve==2.7 +soxr==0.5.0.post1 +SpeechRecognition==3.14.3 +stack-data==0.6.3 +starlette==0.46.1 +sympy==1.14.0 +tensorboard==2.19.0 +tensorboard-data-server==0.7.2 +tensorboard-plugin-wit==1.8.1 +tensorflow==2.19.0 +tensorflow-estimator==2.11.0 +tensorflow-io-gcs-filesystem==0.31.0 +tensorflow_intel==2.18.0 +termcolor==3.1.0 +tf_keras==2.19.0 +threadpoolctl==3.6.0 +tinycss2==1.4.0 +tokenizers==0.21.1 +torch==2.7.0 +torchaudio==2.7.0 +torchvision==0.22.0 +tornado==6.5.1 +tqdm==4.67.1 +traitlets==5.14.3 +transformers==4.52.4 +typing_extensions==4.12.2 +urllib3==1.26.20 +uvicorn==0.34.0 +wcwidth==0.2.13 +webencodings==0.5.1 +websockets==15.0.1 +Werkzeug==3.1.3 +wrapt==1.17.2 +yarg==0.1.9 +zipp==3.22.0 diff --git a/test_audio/test8.mp3 b/test_audio/test8.mp3 new file mode 100644 index 0000000..93287b8 Binary files /dev/null and b/test_audio/test8.mp3 differ