HumaniCare · yjg0815 · Jun 8, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,18 @@
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.mp3
+*.wav
+.env
+.venv/
+.idea/
+.git/
+.gitignore
+*.log
+node_modules/
+*.pt
+*.ckpt
+*.zip
+*.tar
+*.onnx
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -0,0 +1,29 @@
+name: Docker Build & Push
+
+on:
+  push:
+    branches: [ "main", "dev" ]
+  pull_request:
+    branches: [ "main", "dev" ]
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: |
+            ${{ secrets.DOCKER_USERNAME }}/fastapi-app:latest
+            ${{ secrets.DOCKER_USERNAME }}/fastapi-app:${{ github.sha }}
diff --git a/.gitignore b/.gitignore
@@ -28,11 +28,16 @@ replay_pid*
 .venv
 env/
 venv/
-ENV/
 env.bak/
 venv.bak/
 
 *.idea
 .DS_Store
 
-*.h5
+
+__pycache__/
+*.pyc
+audio/
+#emotion_diary/
+emotion_png/
+pyvenv.cfg
diff --git a/253cfe2a-382f-4bbe-8586-5e5fcbbe85ef.wav b/253cfe2a-382f-4bbe-8586-5e5fcbbe85ef.wav
diff --git a/4d37625a-4026-494e-912f-fcf2744cbdf0.wav b/4d37625a-4026-494e-912f-fcf2744cbdf0.wav
diff --git a/791fcb5a-f100-4e73-a5f7-05306f930005.wav b/791fcb5a-f100-4e73-a5f7-05306f930005.wav
diff --git a/Adafruit_Python_DHT b/Adafruit_Python_DHT
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,9 @@
+FROM python:3.9-slim
+# ffmpeg 설치 추가
+RUN apt-get update && apt-get install -y ffmpeg
+
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["uvicorn", "app.service.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/app/ML/ModelService.py b/app/ML/ModelService.py
@@ -0,0 +1,97 @@
+# import numpy as np
+# from dotenv import load_dotenv
+# from fastapi import Request, UploadFile, File, APIRouter
+# from typing import List
+# from tensorflow.keras.models import load_model
+# from sentence_transformers import SentenceTransformer
+# import io
+# import requests
+#
+# from app.ML.audio_extractor_utils import get_features
+# from app.ML.loss import boundary_enhanced_focal_loss
+# from app.ML.plot_utils import save_plot, get_s3_png_url
+# from app.ML.speech_to_text import speech_to_text
+#
+# import os
+#
+# from app.service.gpt import EmotionReportGPT
+# from app.utils.convertFileExtension import convert_to_wav
+#
+# router = APIRouter(
+#     prefix="/api/fastapi",
+# )
+# load_dotenv()
+# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+#
+#
+#
+#
+# @router.post("/predict")
+# async def predict(request: Request, files: List[UploadFile] = File(...)):
+#     # token = request.headers.get("Authorization").split(" ")[1]
+#     print(files)
+#     # 1) 임시 파일 저장 or 메모리 내 처리
+#     wav_data_list = []
+#     for file in files:
+#         raw = await file.read()
+#         ext = file.filename.split('.')[-1]  # 'm4a', 'mp3' 등
+#         wav_bytes = convert_to_wav(raw, ext)  # BytesIO 변환
+#         wav_data_list.append(wav_bytes)
+#
+#     # 2) 오디오 특징 추출
+#     all_feats = []
+#     for wav_bytes in wav_data_list:
+#         # get_features 함수가 경로 입력이면, 아래처럼 메모리 파일 처리 필요
+#         # 임시파일로 저장 후 경로 전달 or get_features 수정 필요
+#
+#         temp_path = f"temp_{file.filename}"
+#         with open(temp_path, "wb") as f:
+#             f.write(wav_bytes)
+#         feats = get_features(temp_path)
+#         os.remove(temp_path)
+#         all_feats.append(feats)
+#
+#     all_feats = np.stack(all_feats, axis=0)
+#     pooled_feats = all_feats.mean(axis=0)
+#     audio_input = pooled_feats[np.newaxis, :, np.newaxis]
+#
+#     # 3) STT & 텍스트 임베딩
+#     texts = []
+#     for wav_bytes in wav_data_list:
+#         temp_path = f"temp_stt.wav"
+#         with open(temp_path, "wb") as f:
+#             f.write(wav_bytes)
+#         text = speech_to_text(temp_path)
+#         os.remove(temp_path)
+#         texts.append(text)
+#
+#     full_text = " . ".join(texts)
+#     text_vec = embedding_model.encode([full_text])[0]
+#     text_input = text_vec[np.newaxis, :]
+#
+#     # 4) 예측
+#     prediction = model.predict([audio_input, text_input])
+#     pred_percent = (prediction[0] * 100).tolist()
+#
+#     # 5) JSON 응답
+#     result = {label: round(p, 2) for label, p in zip(emotion_labels, pred_percent)}
+#     top_idx = np.argmax(pred_percent)
+#     result['predicted_emotion'] = emotion_labels[top_idx]
+#
+#     local_path = save_plot(pred_percent)
+#     s3_path = get_s3_png_url(local_path)
+#     reporter = EmotionReportGPT(full_text, pred_percent)
+#     report_text = reporter.get_report_text()
+#
+#     print(s3_path)
+#
+#     # send_emotion_report_to_spring(s3_path, report_text)
+#
+#     data = {
+#         "imageUrl": s3_path,
+#         "report_text": report_text
+#     }
+#     return data
+#
+#
+#
diff --git a/app/ML/audio_extractor_utils.py b/app/ML/audio_extractor_utils.py
@@ -0,0 +1,69 @@
+import librosa
+import librosa.display
+import numpy as np
+
+
+def noise(data):
+    noise_amp = 0.035 * np.random.uniform() * np.amax(data)
+    data = data + noise_amp * np.random.normal(size=data.shape[0])
+    return data
+
+
+def stretch(data, rate=0.8):
+    return librosa.effects.time_stretch(y=data, rate=rate)
+
+
+def shift(data):
+    shift_range = int(np.random.uniform(low=-5, high=5) * 1000)
+    return np.roll(data, shift_range)
+
+
+def pitch(data, sampling_rate, pitch_factor=0.7):
+    return librosa.effects.pitch_shift(y=data, sr=sampling_rate, n_steps=pitch_factor)
+
+
+def extract_features(data, sample_rate):
+    # ZCR
+    result = np.array([])
+    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
+    result = np.hstack((result, zcr))  # stacking horizontally
+
+    # Chroma_stft
+    stft = np.abs(librosa.stft(data))
+    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
+    result = np.hstack((result, chroma_stft))  # stacking horizontally
+
+    # MFCC
+    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
+    result = np.hstack((result, mfcc))  # stacking horizontally
+
+    # Root Mean Square Value
+    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
+    result = np.hstack((result, rms))  # stacking horizontally
+
+    # MelSpectogram
+    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
+    result = np.hstack((result, mel))  # stacking horizontally
+
+    return result
+
+
+def get_features(path):
+    data, sample_rate = librosa.load(path, duration=2.5, offset=0.0)
+
+    # without augmentation
+    res1 = extract_features(data, sample_rate)
+    result = np.array(res1)
+
+    # data with noise
+    noise_data = noise(data)
+    res2 = extract_features(noise_data, sample_rate)
+    result = np.concatenate((result, res2), axis=0)
+
+    # data with stretching and pitching
+    new_data = stretch(data)
+    data_stretch_pitch = pitch(new_data, sample_rate)
+    res3 = extract_features(data_stretch_pitch, sample_rate)
+    result = np.concatenate((result, res3), axis=0)
+
+    return result
diff --git a/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5 b/app/ML/ko-sbert_multimodal_0501_3_resnet_augment_h.h5
diff --git a/app/ML/loss.py b/app/ML/loss.py
@@ -0,0 +1,29 @@
+import tensorflow as tf
+
+
+# 1. Boundary-Enhanced Focal Loss 구현 (소수 클래스 식별 강화)
+def boundary_enhanced_focal_loss(y_true, y_pred, gamma=2.0, margin=0.3):
+    y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0 - 1e-7)
+
+    # 하드 샘플 마이닝 (낮은 확률로 예측된 샘플 식별)
+    correct_prob = tf.reduce_sum(y_true * y_pred, axis=-1)
+    hard_mask = tf.cast(tf.less(correct_prob, margin), tf.float32)
+
+    # 클래스별 가중치 계산 (소수 클래스에 더 높은 가중치)
+    effective_counts = tf.reduce_sum(y_true, axis=0)
+    alpha = 1.0 / (effective_counts + 1e-7)
+    alpha = alpha / tf.reduce_sum(alpha)
+
+    # 소수 클래스 추가 가중치 부여 (surprise, neutral)
+    class_boost = tf.constant([1.0, 0.5, 1.0, 1.0, 1.0, 2.5, 5.0], dtype=tf.float32)
+    alpha = alpha * class_boost
+
+    # Focal Loss 계산
+    cross_entropy = -y_true * tf.math.log(y_pred)
+    focal_weight = tf.pow(1.0 - y_pred, gamma)
+
+    # 하드 샘플에 추가 가중치 부여
+    sample_weight = 1.0 + hard_mask * 2.0
+    loss = sample_weight[:, tf.newaxis] * alpha * focal_weight * cross_entropy
+
+    return tf.reduce_sum(loss)
diff --git a/app/ML/plot_utils.py b/app/ML/plot_utils.py
@@ -0,0 +1,40 @@
+# 그래프 그리기
+import os
+from datetime import datetime
+
+from matplotlib import pyplot as plt
+from app.service.s3Service import upload_to_s3_png
+
+colors = ['#e74c3c', '#3498db', '#f1c40f', '#e67e22', '#9b59b6', '#1abc9c', '#95a5a6']
+emotion_labels = ['angry', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'neutral']
+
+
+def save_plot(predictions_percent):
+    plt.figure(figsize=(10, 6))
+    bars = plt.barh(emotion_labels, predictions_percent, color=colors, alpha=0.85)
+
+    plt.title('Emotion Probability Distribution', fontsize=20, weight='bold', pad=15)
+    plt.xlabel('Probability (%)', fontsize=14)
+    plt.xlim(0, max(predictions_percent) + 10)
+    plt.grid(axis='x', linestyle='--', alpha=0.6)
+
+    for bar, percent in zip(bars, predictions_percent):
+        width = bar.get_width()
+        plt.text(width + 0.8, bar.get_y() + bar.get_height() / 2, f'{percent:.1f}%', va='center', fontsize=13,
+                 weight='bold', color='#333')
+
+    plt.yticks(fontsize=14, weight='bold')
+    plt.tight_layout()
+
+    date_str = datetime.now().strftime("%Y%m%d")
+    filename = f"{date_str}"
+    local_path = os.getcwd() + f"/app/emotion_png/{filename}_emotion_distribution.png"
+    # 이미지 파일로 저장
+    plt.savefig(local_path, dpi=300, bbox_inches='tight')
+    plt.show()
+
+    return local_path
+
+
+def get_s3_png_url(local_path):
+    return upload_to_s3_png(local_path)