Skip to content

NameError: name 'np' is not defined #45

@Daizwpa

Description

@Daizwpa

Here is my code:

import pandas as pd
from DataSynthesizer.DataDescriber import DataDescriber
from DataSynthesizer.DataGenerator import DataGenerator
from DataSynthesizer.lib.utils import display_bayesian_network

# Load your dataset from the CSV file
input_data_file = 'C:\\Users\\DAIZO\\Documents\\Python\\thyroid-cancer-dataset-2\\dataset\\data_train.csv'
data = pd.read_csv(input_data_file)

# Backup the original dataset
data_backup = data.copy()
# Specify categorical attributes
categorical_attributes = {
        # Binary 
        "binary__SEXE":True,
        "binary__NIVEAU_INSTRUC":True,
        "binary__SECURITE_SOCIALE":True,
        "binary__ACTIVITE_VIGOUREUSE":True,
        "binary__ATCD_PER_KC":True,
        "binary__MCV_FAM":True,
        "binary__M":True,
        "binary__INVASION_VASCULAIRE":True,
        "binary__MULTIFOCALITE":True,
        "binary__META":True,
        "binary__MALADIE_CV":True,
        "binary__Tabagisme":True,
        "binary__Alcoolisme":True,
        "binary__Papillaire":True,
        "binary__Medullaire":True,
        "binary__Vésiculaire":True,
        "binary__b_ETE":True,
        # Ordinal 
        "ordinal__REVENU_ANNUEL": True, 
        "ordinal__RISQUE_RECIDIVE_ATA": True, 
        "ordinal__YEAR_CHIRURGIE": True, 
        "ordinal__T":  True, 
        "ordinal__DOSE_CUMULEE_IODE": True, 
        "ordinal__ETE": True, 
        "ordinal__RISK_AJCC8": True, 
        # Nominal
        "nominal__STATUT_MATRIMONIAL Marié": True,
        "nominal__STATUT_MATRIMONIAL Célibataire": True,
        "nominal__STATUT_MATRIMONIAL Divorcé": True,
        "nominal__STATUT_MATRIMONIAL Veuf": True,
        "nominal__ACTIVITE_POFESSIONNELLE Employé": True,
        "nominal__ACTIVITE_POFESSIONNELLE Indépendant": True,
        "nominal__ACTIVITE_POFESSIONNELLE Maître (sse) de maison": True,
        "nominal__ACTIVITE_POFESSIONNELLE Retraité(e)": True,
        "nominal__ACTIVITE_POFESSIONNELLE Étudiant": True,
        "nominal__ACTIVITE_POFESSIONNELLE Chômeur (se)": True,
        "nominal__TYPE_HISTOLOGIQUE NIFT": True,
        "nominal__TYPE_HISTOLOGIQUE Tumeur vesiculaire à potentiel de malignité incertain": True,
        "nominal__TYPE_HISTOLOGIQUE Papillaire": True,
        "nominal__TYPE_HISTOLOGIQUE Vésiculaire": True,
        "nominal__TYPE_HISTOLOGIQUE Peu différencié": True,
        "nominal__TYPE_HISTOLOGIQUE Anaplasique": True,
        "nominal__TYPE_HISTOLOGIQUE Medullaire": True,
        "nominal__MALADIE_CV_CONNUE Non": True,
        "nominal__MALADIE_CV_CONNUE cardiopathie ischémique": True,
        "nominal__MALADIE_CV_CONNUE Insuffisance cardiaque": True,
        "nominal__MALADIE_CV_CONNUE Maladie rythmique": True,
        "nominal__MALADIE_CV_CONNUE AOMI": True,
        "nominal__MALADIE_CV_CONNUE Maladie rythmique+ IC": True,
        "nominal__MALADIE_CV_CONNUE TVP": True,
        "nominal__MALADIE_CV_CONNUE AVC": True,
        "nominal__TABAC_STAT Jamais": True,
        "nominal__TABAC_STAT Actif": True,
        "nominal__TABAC_STAT Ancien": True,
        "nominal__ALCOOL_STATUS Jamais": True,
        "nominal__ALCOOL_STATUS Actif": True,
        "nominal__ALCOOL_STATUS Ancien": True,
        "nominal__RISK_DYNAMIQ Excellente réponse": True,
        "nominal__RISK_DYNAMIQ Excellente Réponse indeterminée": True,
        "nominal__RISK_DYNAMIQ Excellente Réponse biologique incomplète": True,
        "nominal__RISK_DYNAMIQ Excellente Réponse radiologique incomplète": True,
        "N": True,

    }
# Define privacy settings
epsilon = 0.1
degree_of_bayesian_network = 2
num_tuples_to_generate = 1000

# Initialize DataDescriber with category threshold
describer = DataDescriber(category_threshold=5)
# Describe the dataset to create a Bayesian network
describer.describe_dataset_in_correlated_attribute_mode(dataset_file=input_data_file, 
                                                        epsilon=epsilon, 
                                                        k=degree_of_bayesian_network,
                                                        attribute_to_is_categorical=categorical_attributes
                                                        )   

description_file = 'C:\\Users\\DAIZO\\Documents\\Python\\thyroid-cancer-dataset-2\\Notebooks\\models\\out.json'
describer.save_dataset_description_to_file(description_file)
display_bayesian_network(describer.bayesian_network)\
generator = DataGenerator()
generator.generate_dataset_in_correlated_attribute_mode(num_tuples_to_generate, description_file)
# Save synthetic data to a CSV file
synthetic_data_file = 'synthetic__data.csv'
generator.save_synthetic_data(synthetic_data_file)

here is the error I got:

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[7], [line 2](vscode-notebook-cell:?execution_count=7&line=2)
      1 generator = DataGenerator()
----> [2](vscode-notebook-cell:?execution_count=7&line=2) generator.generate_dataset_in_correlated_attribute_mode(num_tuples_to_generate, description_file)
      3 # Save synthetic data to a CSV file
      4 synthetic_data_file = 'synthetic_retail_data.csv'

File c:\Users\DAIZO\miniconda3\envs\BRAF_lab\Lib\site-packages\DataSynthesizer\DataGenerator.py:65, in DataGenerator.generate_dataset_in_correlated_attribute_mode(self, n, description_file, seed)
     63 all_attributes = self.description['meta']['all_attributes']
     64 candidate_keys = set(self.description['meta']['candidate_keys'])
---> [65](file:///C:/Users/DAIZO/miniconda3/envs/BRAF_lab/Lib/site-packages/DataSynthesizer/DataGenerator.py:65) self.encoded_dataset = DataGenerator.generate_encoded_dataset(self.n, self.description)
     66 self.synthetic_dataset = DataFrame(columns=all_attributes)
     67 for attr in all_attributes:

File c:\Users\DAIZO\miniconda3\envs\BRAF_lab\Lib\site-packages\DataSynthesizer\DataGenerator.py:99, in DataGenerator.generate_encoded_dataset(n, description)
     97 for parents_instance in child_conditional_distributions.keys():
     98     dist = child_conditional_distributions[parents_instance]
---> [99](file:///C:/Users/DAIZO/miniconda3/envs/BRAF_lab/Lib/site-packages/DataSynthesizer/DataGenerator.py:99)     parents_instance = list(eval(parents_instance))
    101     filter_condition = ''
    102     for parent, value in zip(parents, parents_instance):

File <string>:1

NameError: name 'np' is not defined

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions