service_community_evlution_analysis/community_operations.py at master · icecity96/service_community_evlution_analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import pickle

from cdlib import algorithms

from community_features import *


def static_community_detection(snapshots, pkl=None) -> list:
    """
    Detect community structure in each snapshot
    :param snapshots: generated snapshots
    :param pkl: pickle file path to store the community detection results
    :return list of community structure in each snapshot.
            And by default, detection results will be written to data/communities.pkl
    """
    if pkl is not None:
        print(f"loading communities from {pkl}")
        return pickle.load(open(pkl, 'rb'))
    snapshots = snapshots["snapshots"]
    communities = []
    for snapshot in snapshots:
        communities.append(algorithms.louvain(snapshot, 'weight'))
    # only stable communities will be included
    for index, community_struct in enumerate(communities):
        community_struct.communities = [c for c in community_struct.communities if len(c) >= 3]
        communities[index] = community_struct
    pickle.dump(communities, open("data/communities.pkl", "wb"))
    return communities


def social_position_score(snapshots, pkl=None) -> list:
    """
    Calculate the social position score of each node in each snapshot (In this paper we use PageRank score)
    :param snapshots: generated snapshots
    :param pkl: a pickle file storing the social position score
    :return: list of social position scores in each snapshot.[{node: score, ...},...]
             And by default, the results will be stored to data/social_position.pkl
    """
    if pkl is not None:
        print(f"loading social position score from {pkl}")
        return pickle.load(open(pkl, 'rb'))
    snapshots, social_positions = snapshots["snapshots"], []
    for snapshot in snapshots:
        page_rank_score = nx.pagerank(snapshot, alpha=0.85, weight="weight")
        social_positions.append(dict(page_rank_score))
    pickle.dump(social_positions, open("data/social_positions.pkl", "wb"))
    return social_positions


def _inclusion(C1: list, C2: list, SP1: dict) -> float:
    """
    *inclusion* allows to evaluate the inclusion of one community in another.
    $I(C1, C2) = \frac{|C_1 \cap C_2|}{|G_1|}$ \frac{\sum_{x \in (G_1 \cap C_2)}SP_{G_1}(x)}{\sum_{x \in G_1}SP_{G_1}(x)}
    :param C1: community 1
    :param C2: community 2
    :param SP1: social_position of nodes in the C1
    :return: inclusion socre
    """
    quantity = len(set(C1) & set(C2)) / len(C1)
    quality = sum([SP1[node] for node in list(set(C1) & set(C2))])
    quality /= sum([SP1[node] for node in C1])
    return quantity * quality


def _event_identifier(C1, C2, SP1, SP2, alpha=0.5, beta=0.6):
    """
    based on both inclusion I(C1,C2) and I(C2,C1)
    :param C1:
    :param C2:
    :param SP1:
    :param SP2:
    :return:
    """
    I1, I2 = _inclusion(C1, C2, SP1), _inclusion(C2, C1, SP2)
    # Continuing
    if I1 >= alpha and I2 >= beta and len(C1) == len(C2):
        return "continuing"

    # shrinking
    if (I1 >= alpha and I2 >= beta and len(C1) > len(C2)) or (I1 < alpha and I2 >= beta and len(C1) >= len(C2)):
        return "shrinking"

    # growing
    if (I1 > alpha and I2 > beta and len(C1) < len(C2)) or (I1 >= alpha and I2 < beta and len(C1) <= len(C2)):
        return "growing"

    # splitting
    if I1 < alpha and I2 >= beta and len(C1) >= len(C2):
        return "splitting"

    # Merging
    if I1 >= alpha and I2 <= beta and len(C1) <= len(C2):
        return "merging"

    return None


def GED(communities1: list, communities2: list, SP1: dict, SP2: dict, alpha: float, beta: float):
    """
    Group Evolution Discovery method
    :param beta:
    :param alpha:
    :param communities1:
    :param communities2:
    :param SP1:
    :param SP2:
    :return:
    """
    pre_window_event, next_window_event = {}, {}
    possible_events = []
    for i, community1 in enumerate(communities1):
        if "A-{:d}".format(i) not in next_window_event:
            next_window_event["A-{:d}".format(i)] = []
        for j, community2 in enumerate(communities2):
            if "B-{:d}".format(j) not in pre_window_event:
                pre_window_event["B-{:d}".format(j)] = []
            event = _event_identifier(community1, community2, SP1, SP2, alpha, beta)
            if event is None:
                continue
            next_window_event["A-{:d}".format(i)].append(event)
            pre_window_event["B-{:d}".format(j)].append(event)
            possible_events.append(("A-{:d}".format(i), "B-{:d}".format(j), event))
    events = []
    for key, value in next_window_event.items():
        if len(value) == 0:
            events.append((key, "dissolving"))
        if len(value) == 1 and value[0] == "shrinking":
            events.append((key, "shrinking"))
        if len(value) == 1 and value[0] == "continuing":
            events.append((key, "continuing"))
        if len(value) > 1:
            events.append((key, "splitting"))

    for key, value in pre_window_event.items():
        if len(value) == 0:
            events.append((key, "forming"))
        if len(value) == 1 and value[0] == "growing":
            events.append((key, "growing"))
        if len(value) > 1:
            events.append((key, "merging"))
    return possible_events, events


def meta_community_network_generation(communities, social_positions, alpha=None, beta=None, pkl=None) -> nx.DiGraph:
    """
    construct a meta community network.
    :param beta:
    :param alpha:
    :param communities:
    :param social_positions:
    :param pkl:
    :return:
    """
    if pkl is not None:
        print(f"loading meta community network from {pkl}")
        return pickle.load(open(pkl, 'rb'))
    meta_community_network = nx.DiGraph()

    for index, community_struct in enumerate(communities):
        for index_j, community in enumerate(community_struct.communities):
            meta_community_network.add_node(f"T{index}C{index_j}", pre="None", nex="None")

    for index in range(len(communities) - 1):
        C1, C2 = communities[index].communities, communities[index + 1].communities
        SP1, SP2 = social_positions[index], social_positions[index + 1]
        possible_events, events = GED(C1, C2, SP1, SP2, alpha, beta)
        for possible_event in possible_events:
            source, target = possible_event[0], possible_event[1]
            source = "T{:d}C".format(index) + source[2:]
            target = "T{:d}C".format(index + 1) + target[2:]
            meta_community_network.add_edge(source, target)
        for event in events:
            node, event_type = event[0], event[1]
            if node[0] == 'A':
                node = "T{:d}C".format(index) + node[2:]
                meta_community_network.nodes[node]["nex"] = event_type
            else:
                node = "T{:d}C".format(index + 1) + node[2:]
                meta_community_network.nodes[node]["pre"] = event_type

    pickle.dump(meta_community_network, open("data/meta_community_network.pkl", "wb"))
    return meta_community_network


FEATURE_NAMES = [
    "size", "density", "clustering", "avg_closeness_centrality", "degree",
    # "eigenvectors_centrality",
    "leadership", "cohesion", "#Keynodes", "max_activity", "mean_activity", "sum_activity", "%Stakeholder",
    "%Service", "Kdegree", "Kavg_closeness_centrality"
    # "Keigenvectors_centrality"
]


def feature_extraction(snapshots, communities, social_positions, pkl=None):
    """
    extract features for each community
    :param pkl:
    :param snapshots:
    :param communities:
    :param social_positions:
    :return:
    """
    if pkl is not None:
        print(f"loading features from {pkl}")
        return pickle.load(open(pkl, 'rb'))
    features, snapshots = [], snapshots["snapshots"]
    for snapshot, community_struct, social_position in zip(snapshots, communities, social_positions):
        communities_features = []
        for community in community_struct.communities:
            tpratio = community_tpratio(snapshot, community)
            keynodes = community_keynodes(snapshot, community, [social_position[node] for node in community])
            activity = community_activity(snapshot, community)
            community_features = [
                len(community),  # community size
                community_density(snapshot, community),  # community density
                community_clustering(snapshot, community),  # community clustering
                community_average_closeness_centrality(snapshot, community),  # average closeness centrality
                community_degree(snapshot, community),  # community degree
                # community_eigenvector_centrality(snapshot, community),  # eigenvector centrality
                community_leadership(snapshot, community),  # community leadership
                community_cohesion(snapshot, community),  # community cohesion
                len(keynodes),  # number of keynodes
                activity[0],  # max activity
                activity[1],  # mean activity
                activity[2],  # sum activity
                tpratio.get("Stakeholder", 0),  # number of stakeholders in community
                tpratio.get("Service", 0),  # number of services in community
                community_degree(snapshot, keynodes),  # key nodes degree
                community_average_closeness_centrality(snapshot, keynodes),  # key nodes average closeness
                # community_eigenvector_centrality(snapshot, keynodes),  # key nodes eigenvectors centrality
            ]
            communities_features.append(community_features.copy())
        features.append(communities_features.copy())
    pickle.dump(features, open("data/features.pkl", "wb"))
    return features