GemmaExploration/intersection_traces.py at main · gboxo/GemmaExploration · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150


import torch
from collections import defaultdict, Counter
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict

# %%
from  rich.console import Console
from rich.table import Table

def display_table(dataframe):
    table = Table(show_header=True, header_style="bold magenta")
    for column in dataframe.columns:
        table.add_column(column)

    for index, row in dataframe.iterrows():
        table.add_row(*[str(value) for value in row])

    console = Console()
    console.print(table)
# %%


def display_heatmap(x, comp):
    topic_feat_dict = defaultdict(lambda: defaultdict(float))
    for topic, topic_dict in x.items():
        for eg, features_dict in topic_dict.items():
            if comp in features_dict:
                indices = features_dict[comp].indices()[1]

                values = features_dict[comp].values()
                ind = values.argsort(descending=True)[:300]
                indices = indices[ind]
                values = values[ind]
                for i, feat in enumerate(indices):
                    topic_feat_dict[topic][feat.item()] += values[i]

    unique_features = sorted(set(feat for topic in topic_feat_dict for feat in topic_feat_dict[topic].keys()))
    heatmap_data = pd.DataFrame(0, index=topic_feat_dict.keys(), columns=unique_features, dtype = float)

    for topic, features in topic_feat_dict.items():
        for feat, count in features.items():
            heatmap_data.at[topic, feat] = count.item() if count > 0 else 0

    heatmap_data = heatmap_data[heatmap_data.sum(axis=0).sort_values(ascending=False).index]
    heatmap_data = heatmap_data.iloc[:, 20:300]

    plt.figure(figsize=(12, 8))
    sns.heatmap(heatmap_data.T, annot=False, cmap="YlGnBu", cbar=False, linewidths=0.5, linecolor='white')

    plt.title("Feature Presence Heatmap")
    plt.xlabel("Unique Features")
    plt.ylabel("Topics")
    plt.xticks(rotation=45, ha='right')
    plt.show()

# %%

x = torch.load("traces/all_max_traces_dict.pt", map_location="cpu")


# %%
topic_features = defaultdict(lambda: defaultdict(list))
topic_act = defaultdict(lambda: defaultdict(list))
count_features = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

for key, val in x.items():
    for eg_id, eg_dict in val.items():
        all_features = []
        for comp, tensors in eg_dict.items():
            indices = tensors.indices()[1]
            values = tensors.values()
            topic_features[key][comp] = indices.tolist()
            topic_act[key][comp] = values.tolist()
            all_features.extend(indices.tolist())
        for feat in all_features:
            count_features[key][comp][feat] += 1

all_tuples = []
for topic,topic_dict in count_features.items():
    for comp, comp_dict in topic_dict.items():
        ind = torch.tensor(list(comp_dict.keys()))
        val = torch.tensor(list(comp_dict.values()))
        top_ind = val.argsort(descending=True)[5:10]
        top_feat = ind[top_ind]
        all_tuples.append({"Topic": topic, "Component": comp, "Top-5 feats": top_feat.tolist(), "Top-5 counts": val[top_ind].tolist()})


all_tuples_df = pd.DataFrame(all_tuples)
all_tuples_df.to_html("tables/all_tuples_df.html")


display_table(all_tuples_df)


for l in [0]:
    display_heatmap(x,f"blocks.{l}.hook_resid_post")


def generate_expression_matrix(x):
    total_examples = 0
    unique_features = []
    for key, val in x.items():
        for eg_id, eg_dict in val.items():
            for comp, tensor in eg_dict.items():
                total_examples += 1
                values = tensor.values()
                indices = tensor.indices()[1]
                ind = values.argsort(descending=True)[:300]
                indices = indices[ind]
                values = values[ind]
                unique_features.extend(feat.item() for feat in indices)
    unique_features = list(set(unique_features))

    count_matrix = torch.zeros((len(unique_features), total_examples))

    for i, (key, val) in enumerate(x.items()):
        for eg_id, eg_dict in val.items():
            for comp, tensor in eg_dict.items():
                values = tensor.values()
                indices = tensor.indices()[1]
                ind = values.argsort(descending=True)[:300]
                indices = indices[ind]
                values = values[ind]
                for j, feat in enumerate(indices):
                    count_matrix[unique_features.index(feat.item()), i] = values[j]

    df = pd.DataFrame(count_matrix / 50, columns=range(total_examples), index=unique_features)
    df.fillna(0, inplace=True)
    df.replace([float('inf')], 0, inplace=True)

    return df

df = generate_expression_matrix(x)

plt.figure(figsize=(14, 10))
sns.clustermap(df.T, cmap='RdYlGn', standard_scale=1, figsize=(12, 12), cbar_kws={'label': 'Expression Level'})
plt.title('Hierarchical Clustering of Gene Expression Data')
plt.show()