gen_music/sleep_original.py at dev · bestKUFO/gen_music · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pickle

import tensorflow as tf
import numpy as np

import sys

sys.path.append('')

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Input, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM


# user별 수면 데이터 불러오는 함수
def read_user_data(file_path):
    user_data = pd.read_csv(file_path)
    user_data.drop(['Unnamed: 0'], axis=1, inplace=True)
    return user_data

#csv데이터 입력된 컬럼내용
# 아이디     날짜       수면시간   수면잠복기   수면의 효율    취침 중 자다 깬 횟수    렘 수면
# user01 2020-08-31     479        33         93              1                125
# 스트레스  감정 상태   수면 문제   컨디션     카페인 섭취량     알코올 섭취량     만족도     보행수
#  3          3         6         3         250                 0          3      369148
# user1 수면 데이터
user1_data_all = read_user_data("user1_data.csv")
# user6 수면 데이터
user6_data_all = read_user_data("user6_data.csv")
# user23 수면 데이터
user23_data_all = read_user_data("user23_data.csv")

# print(user1_data_all)


## 개인별 수면의 질에 영향을 미치는 요인 추출
# 모델 저장 및 로드 함수
def save_model(model, file_path):
    with open(file_path, 'wb') as file:
        pickle.dump(model, file)


def load_model(file_path):
    with open(file_path, 'rb') as file:
        loaded_model = pickle.load(file)
    return loaded_model


# 요인 추출 함수
def feature_importance_model(user_data_all, model_name):
    # train, test 데이터 분류
    x = user_data_all[['수면 시간', '수면 잠복기', '수면의 효율', '취침 중 자다 깬 횟수', '렘 수면',
                       '스트레스', '감정 상태', '수면 문제', '컨디션', '카페인 섭취량', '알코올 섭취량', '보행수']]
    y = user_data_all['만족도']
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

    # 학습
    forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
    reg = forest_model.fit(x_train, y_train)

    # 모델 저장
    save_model(reg, f'{model_name}.pkl')
    # 모델 불러오기
    loaded_model = load_model(f'{model_name}.pkl')

    # 변수 중요도
    ftr_importances_values = loaded_model.feature_importances_
    ftr_importances = pd.Series(ftr_importances_values, index=x_train.columns)
    ftr_value = ftr_importances.sort_values(ascending=False)

    # 중요도 값이 0.1이상인 것만 추가
    feature_importance = []
    for i in range(len(ftr_value)):
        if ftr_value[i] >= 0.1:
            feature_importance.append(ftr_value.index[i])

    return feature_importance


# 데이터 리스트
data_list = [user1_data_all, user6_data_all, user23_data_all]

# 각 데이터에 대해 모델 저장
for i, data in enumerate(data_list):
    model_name = f'model_{i + 1}'
    feature_importance = feature_importance_model(data, model_name)


# 날짜, 만족도, 개인별 변수만 추출하여 새로운 데이터프레임 생성
def extract_features(dataframe):
    features = feature_importance_model(dataframe, model_name)
    new_dataframe = dataframe[['날짜', '만족도'] + features]
    new_dataframe.set_index('날짜', inplace=True)

    return new_dataframe


## 점수 계산을 위한 lstm 예측 모델 함수
def score_model(dataframe):
    new_data = extract_features(dataframe)
    new_data.drop('만족도', axis=1, inplace=True)

    # 입력 데이터와 타겟 데이터 나누기
    X = new_data.values
    y = dataframe['만족도'].values

    # 데이터 크기 조정
    timesteps = X.shape[1]
    X = np.reshape(X, (X.shape[0], timesteps))

    # 데이터 정규화
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    # 학습 데이터와 테스트 데이터 나누기
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # LSTM 모델 정의하기
    lstm_model = Sequential()
    lstm_model.add(LSTM(64, input_shape=(timesteps, 1), return_sequences=True))
    lstm_model.add(BatchNormalization())
    lstm_model.add(LSTM(32, return_sequences=True))
    lstm_model.add(BatchNormalization())
    lstm_model.add(LSTM(16))
    lstm_model.add(BatchNormalization())
    lstm_model.add(Dense(1))

    # 모델 컴파일하기
    lstm_model.compile(loss='mean_squared_error', optimizer='adam')

    # 모델 학습하기
    lstm_model.fit(X_train, y_train, epochs=200, batch_size=16, verbose=2)

    # 모델 평가하기(정확도 측정)
    loss = lstm_model.evaluate(X_test, y_test)
    accuracy = 1 - loss  # 정확도 계산 (1 - 손실값)
    print("Test accuracy:", accuracy)

    return lstm_model


# 점수 도출 함수
def predict_new_score_with_model(new_input, lstm_model, scaler):
    # 데이터 정규화
    new_input_scaled = scaler.transform(new_input)
    new_input_reshaped = np.reshape(new_input_scaled, (1, new_input.shape[1], 1))

    # 모델 예측
    prediction = lstm_model.predict(new_input_reshaped)

    return int(prediction[0][0])


## user1 점수 예측
# user1의 수면의 질에 영향을 미치는 요인 추출
feature_importance_model(user1_data_all, model_name)
#['수면 잠복기', '수면 시간', '컨디션', '스트레스', '수면의 효율']


# 예측할 수면 데이터 입력
#['수면 잠복기', '수면 시간', '컨디션', '스트레스', '수면의 효율'] 순으로 입력
new_input1 = np.array([[30, 400, 4, 2, 92]])

# 모델 학습
lstm_model = score_model(user1_data_all)

# 새로운 입력 데이터로 예측
scaler = MinMaxScaler()
new_data = extract_features(user1_data_all)
new_data.drop('만족도', axis=1, inplace=True)

scaler.fit(new_data.values)  # 데이터 정규화에 사용된 scaler 객체 생성
prediction = predict_new_score_with_model(new_input1, lstm_model, scaler)
print("만족도 점수:", prediction)