TouristForge/plot_importance.py at main · HPC-ULL/TouristForge · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import pandas as pd
import os
import argparse

import plotly.graph_objects as go


def main(args):

    importance = pd.read_csv(os.path.join('results', f"importance_{args.index:04d}.csv"))

    # Eliminar la columna 'p_value' si existe
    if 'P_value' in importance.columns:
        importance = importance.drop(columns=['P_value'])

    # Crear un gráfico de barras
    fig1 = go.Figure()

    # Iterar sobre todas las columnas, excepto 'Feature'
    for col in importance.columns:
        if col != 'Feature':
            fig1.add_trace(go.Bar(
                x=importance['Feature'],
                y=importance[col],
                name=col.replace('_', ' '),  # Cambiar '_' por espacios en los nombres
            ))

    # Actualizar el diseño del gráfico
    fig1.update_layout(
        title='Comparación de Importancias y Correlación',
        xaxis_title='Features',
        yaxis_title='Importancia',
        barmode='group'  # Agrupar las barras
    )

    # Mostrar el gráfico
    fig1.show()


    # Crear un DataFrame para almacenar los errores relativos
    error_relative_df = pd.DataFrame()
    error_relative_df['Feature'] = importance['Feature']

    # Calcular el error relativo (en porcentaje) respecto a Theorical_Importance
    for col in importance.columns:
        if col not in ['Feature', 'Theoretical_Importance', 'Correlation']:
            error_relative_df[f"{col}"] = (
                (importance[col] - importance['Theoretical_Importance']).abs() / importance['Theoretical_Importance']
            ) * 100  # Multiplicar por 100 para convertir a porcentaje

    # Gráfico del error relativo respecto a Theorical_Importance
    fig2 = go.Figure()
    for col in error_relative_df.columns[1:]:  # Excluir la columna 'Feature'
        if col != 'Feature':
            fig2.add_trace(go.Bar(
                x=error_relative_df['Feature'],
                y=error_relative_df[col],
                name=col.replace('_', ' ')
            ))

    fig2.update_layout(
        title='Error Relativo (%) de Importancias con Respecto a Theorical Importance',
        xaxis_title='Features',
        yaxis_title='Error Relativo (%)',
        barmode='group'
    )
    fig2.show()

    # Calcular el error relativo (en porcentaje) respecto a Correlation
    error_relative_df = pd.DataFrame()
    error_relative_df['Feature'] = importance['Feature']

    for col in importance.columns:
        if col not in ['Feature', 'Theoretical_Importance', 'Correlation']:
            error_relative_df[f"{col}"] = (
                (importance[col] - importance['Correlation']).abs() / importance['Correlation']
            ) * 100  # Multiplicar por 100 para convertir a porcentaje

    # Gráfico del error relativo respecto a Correlation
    fig3 = go.Figure()
    for col in error_relative_df.columns[1:]:  # Excluir la columna 'Feature'
        if col != 'Feature':
            fig3.add_trace(go.Bar(
                x=error_relative_df['Feature'],
                y=error_relative_df[col],
                name=col.replace('_', ' ')
            ))

    fig3.update_layout(
        title='Error Relativo (%) de Importancias con Respecto a Correlation',
        xaxis_title='Features',
        yaxis_title='Error Relativo (%)',
        barmode='group'
    )
    fig3.show()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument("--index", type=int, default=1, help="Specifies the index of the Zip file to be used for ploting. The file should be located in the 'data' folder. Defaults to index 1 if not provided.")
    args = parser.parse_args()

    main(args)