Ver código fonte

Small parameter fix

Josep Argelich 4 semanas atrás
pai
commit
dbbdca76b0

BIN
data/2024-Algorithms/ds1.tgz Ver arquivo


BIN
data/2024-Algorithms/ds2.tgz Ver arquivo


+ 1
- 1
data/2024-Algorithms/src/gen-rUDebG.sh Ver arquivo

@@ -15,7 +15,7 @@ NUM_INSTANCES=50
15 15
 for ALPHA in `seq $INIT_ALPHA $STEP_ALPHA $FINAL_ALPHA`; do
16 16
     for NODES in `seq $INIT_NODES $STEP_NODES $FINAL_NODES`; do
17 17
         for i in `seq $NUM_INSTANCES`; do
18
-            ./reddit_at.py -a $ALPHA -lb 10 -al g200 -p $NODES,0 -s $i -so;
18
+            ./reddit_at.py -a $ALPHA -lb 10 -al g211 -p $NODES,0,3.0 -s $i -so;
19 19
         done
20 20
         mkdir dataset-rUDebG-$NODES-$ALPHA;
21 21
         mv rnd-UDebG-* dataset-rUDebG-$NODES-$ALPHA;

+ 1
- 1
data/2024-Algorithms/src/gen-rUDebG_around01.sh Ver arquivo

@@ -15,7 +15,7 @@ NUM_INSTANCES=50
15 15
 for ALPHA in `seq $INIT_ALPHA $STEP_ALPHA $FINAL_ALPHA`; do
16 16
     for NODES in `seq $INIT_NODES $STEP_NODES $FINAL_NODES`; do
17 17
         for i in `seq $NUM_INSTANCES`; do
18
-            ./reddit_at.py -a $ALPHA -lb 10 -al g211 -p $NODES,0 -s $i -so;
18
+            ./reddit_at.py -a $ALPHA -lb 10 -al g211 -p $NODES,0,3.0 -s $i -so;
19 19
         done
20 20
         mkdir dataset-rUDebG-$NODES-$ALPHA;
21 21
         mv rnd-UDebG-* dataset-rUDebG-$NODES-$ALPHA;

+ 913
- 0
data/2024-Algorithms/src/udebg.py Ver arquivo

@@ -0,0 +1,913 @@
1
+#!/usr/bin/python3 -ttOO
2
+'''
3
+Artificial Intelligence Research Group
4
+University of Lleida
5
+'''
6
+
7
+# Libraries
8
+
9
+import sys
10
+import math
11
+import ast
12
+import xml.etree.ElementTree as ET
13
+import networkx
14
+import random
15
+import time
16
+import psutil
17
+import pandas
18
+import numpy
19
+import scipy
20
+import reddit_at
21
+
22
+# Global functions
23
+
24
+def sentiment(sentiment_distribution):
25
+    '''
26
+    Computes the sentiment from a sentiment distribution of 5 values --> [-2, 2]
27
+    '''
28
+    sentiment_relevance = [-2, -1, 0, 1, 2]
29
+    res = [a * b for a, b in zip(sentiment_relevance, sentiment_distribution)]
30
+    return sum(res)
31
+
32
+def scale_weight(weight, args):
33
+    '''
34
+    Scales the weight using a log function
35
+    '''
36
+    if weight >= 1:
37
+        return int(math.floor(math.log(weight, args.log_base)) + 1)
38
+    else:
39
+        return 0
40
+
41
+def get_weighted_color(base_color, min_weight, max_weight, w, hw = 0xCF):
42
+    #hw = 0xFF #0xCF # Highest light color to not be completely white
43
+    if w >= max_weight:
44
+        hw = 0
45
+    elif max_weight > min_weight:
46
+        hw = int(hw * (float(max_weight - w) / float(max_weight - min_weight)))
47
+    color = [a | b for a, b in zip(base_color, [hw, hw, hw])]
48
+    contrast = '#FFFFFF' if hw < 0x80 else '#000000'
49
+    return contrast, color
50
+
51
+def ecai2023_bounded_normal_distribution(mean, minv, maxv ):
52
+    '''
53
+    Bounded normal distribution, like normal distribution but the two values are always in [min, max]
54
+    '''
55
+    mu = mean
56
+    # sigma = 2 / 3
57
+    sigma = 2.0 / (3.0 + (mean * 10.0)) if (mean > 0.0) else 2.0 / (3.0 + (-mean * 10.0))
58
+    f = scipy.stats.truncnorm((minv - mu) / sigma, (maxv - mu) / sigma, loc = mu, scale = sigma)
59
+    return f.rvs()
60
+
61
+def ecai2023_getTNormal(minv, maxv, sigma, mu):
62
+    print("  Params for one-sided tnormal: ", minv, maxv, sigma, mu)
63
+    return scipy.stats.truncnorm((minv - mu) / sigma, (maxv - mu) / sigma, loc = mu, scale = sigma) 
64
+
65
+# Classes
66
+
67
+class UDebG():
68
+    '''
69
+    User-based Debate Graph
70
+    '''
71
+    def __init__(self, DebT, root_id, args):
72
+        self.DebT = DebT
73
+        self.DebT_root_id = root_id
74
+        self.UDebG = networkx.DiGraph()
75
+        self.max_weight = None
76
+        self.min_weight = None
77
+        self.max_pos_edge_weight = None
78
+        self.min_pos_edge_weight = None
79
+        self.max_neg_edge_weight = None
80
+        self.min_neg_edge_weight = None
81
+        self.VAF_accepted = None
82
+        if args.user == 'wia2021' or not args.input_file:
83
+            random.seed(args.seed)
84
+            if args.seed is not None:
85
+                numpy.random.seed(int(args.seed)) # For scipy
86
+            if not args.input_file:
87
+                self.ecai2023_gen_UDebG(args)
88
+            else:
89
+                self.wia2021_set_nodes()
90
+                self.wia2021_set_edges()
91
+                self.wia2021_UDebG(args)
92
+            if args.algorithm.startswith('g'):
93
+                if len(args.algorithm) == 2:
94
+                    self.wia2021_polarized_partition_greedy(args)
95
+                elif len(args.algorithm) == 4:
96
+                    self.ccia2022_polarized_partition_greedy(args)
97
+                else:
98
+                    sys.exit('ERROR: Number of algorithm parameters (%s) not recognized.' % args.algorithm)
99
+            else:
100
+                sys.exit('ERROR: Algorithm (%s) not recognized.' % args.algorithm)
101
+        elif args.user == 'mdai2020':
102
+            self.mdai2020_set_nodes()
103
+            self.mdai2020_set_edges()
104
+            self.mdai2020_draw_DebT(args)
105
+            print('MDAI 2020 UDebG...')
106
+            self.mdai2020_UDebG()
107
+            self.UDebG.remove_node('0') # No root node for the solution
108
+            self.mdai2020_skeptical()
109
+            self.mdai2020_VAF_valuation(args)
110
+            if args.draw_graphs:
111
+                self.mdai2020_draw_UDebG(args)
112
+            self.mdai2020_UDebG_to_xml(args)
113
+            self.VAF_accepted = reddit_at.VAF_solver(args, "%s.udebg.xml" % args.input_file)
114
+            if args.draw_graphs:
115
+                self.mdai2020_draw_UDebG(args)
116
+
117
+    def ecai2023_gen_UDebG(self, args):
118
+        '''
119
+        Generates a random UDebG with specific properties
120
+        arg.params comma separated format: N,B
121
+          - N: number of nodes (int)
122
+          - B: beta parameter for p value (float)
123
+        '''
124
+        print('ecai2023 Generating random UDebG...')
125
+        try:
126
+            N, B, DEGCTE = args.params.split(',')
127
+            N = int(N)
128
+            B = float(B)
129
+            DEGCTE = float(DEGCTE)
130
+        except:
131
+            sys.exit('ERROR: Incorrect parameters for UDebG random generation (%s)' % (args.params))
132
+        
133
+        A = args.alpha # alpha parameter for nodes        
134
+        if N < 0 or A > 1 or A < 0 or B > 1 or B < 0 or args.log_base < 2 or DEGCTE < 1.0:
135
+            sys.exit('ERROR: Incorrect parameter ({}-{}-{}-{}-{})'.format(N, A, B, DEGCTE, args.log_base))        
136
+        E = int(math.ceil(math.log(N, args.log_base))*DEGCTE) # maximum number of edges
137
+        print('  Parameters: nodes = {}, max out edges per node = {}, alpha = {}, beta = {}, DegMultConstant = {}, seed = {}'.format(N, E, A, B, DEGCTE, args.seed))
138
+        args.input_file = 'rnd-UDebG-{}-{}-{}-{}-{}-{}'.format(N, E, A, B, args.log_base, args.seed)
139
+        self.ecai2023_set_nodes(N, A)
140
+        self.ecai2023_set_edges(E, B)
141
+        if args.draw_graphs:
142
+            self.wia2021_draw_UDebG(args)
143
+        if args.scip_output:
144
+            self.ccia2022_scip_output(args)
145
+        
146
+    def ecai2023_set_nodes(self, N, A):
147
+        '''
148
+        Generate N nodes for the random UDebG with opinion weighting scheme in [-A, A]
149
+        '''
150
+               
151
+        fl = ecai2023_getTNormal(-A, 0, 1.0 / (1.0 + (A * 20.0)), -A)        
152
+        fr = ecai2023_getTNormal(0, A, 1.0 / (1.0 + (A * 20.0)), A)
153
+        for node_id in range(1, N + 1): # No root node with Id = 0
154
+            if (A == 0.0):
155
+              ows = random.uniform(-A, A)
156
+            else:
157
+              ows = fr.rvs() if (random.randint(0, 1)) else fl.rvs() 
158
+            self.UDebG.add_node('user%i' % node_id, opinion_ws = ows, node_id = node_id)
159
+
160
+    def ecai2023_set_edges(self, E, B ):
161
+        '''
162
+        Generate a maximum of E out edges for each node for the random UDebG
163
+          Parameter B is ignored
164
+        '''
165
+        for n1 in self.UDebG.nodes():
166
+            list_nodes = list(self.UDebG.nodes())
167
+            list_nodes.remove(n1) # No self-answers
168
+            for e in range(random.randint(1, E)):
169
+                n2 = list_nodes.pop(random.randint(0, len(list_nodes) - 1))
170
+                n1_ows = self.UDebG.nodes[n1]['opinion_ws']
171
+                n2_ows = self.UDebG.nodes[n2]['opinion_ws']
172
+                # when they are in different sides:
173
+                if ( (n1_ows * n2_ows < 0.0) or (n1_ows == 0.0 and n2_ows > 0.0) or
174
+                     (n1_ows > 0.0 and n2_ows == 0.0) ):
175
+                     dist = -abs(n1_ows - n2_ows) * abs(n1_ows)
176
+                else:
177
+                   # in the same side
178
+                   dist = 2 * abs(n1_ows) - abs(n1_ows - n2_ows)
179
+                #dist = abs(n1_ows - n2_ows) * abs(n1_ows) # Maximum range of [0, 2] for alpha 1
180
+                #if n2_ows == 0: # dist must be sign of n1_ows
181
+                #    dist = dist if n1_ows > 0 else -dist
182
+                #else:
183
+                #    if n1_ows * n2_ows < 0: # Different sign, opposite opinions 
184
+                #        dist = -dist
185
+                #if dist > 0: # Max range of dist in [0, 1]
186
+                #    dist = dist * 2
187
+                p = 1 # NOTE: set to 1 (or 0) in order to w = iws, but intended to be random.uniform(0, B)
188
+                w = ecai2023_bounded_normal_distribution(dist, -2, 2)
189
+                iws = (p,  w) # ([0, 1], [-2, 2])
190
+                self.UDebG.add_edge(n1, n2, interaction_ws = iws)
191
+
192
+    def wia2021_set_nodes(self):
193
+        '''
194
+        UDebG has a node for each user and stores each comment of the user in a list as node data
195
+        '''
196
+        node_id = 1
197
+        for n, nd in self.DebT.nodes(data = True):
198
+            if 'title' in nd['data'].attrib: # Root comment
199
+                self.UDebG.add_node('0', data = nd, node_id = 0) # Id = 0 for root node (special node)
200
+            else: # Regular comment
201
+                user = nd['data'].get('author')
202
+                if user not in self.UDebG:
203
+                    self.UDebG.add_node(user, data = [nd], node_id = node_id)
204
+                    node_id = node_id + 1
205
+                else:
206
+                    self.UDebG.nodes[user]['data'].append(nd)
207
+
208
+    def wia2021_set_edges(self):
209
+        '''
210
+        UDebG has an edge between u_a and u_b if there is a reply form u_a to u_b
211
+        '''
212
+        for e1, e2, ed in self.DebT.edges(data = True):
213
+            # u_e1 replies to u_e2
214
+            u_e1 = self.DebT.nodes[e1]['data'].get('author')
215
+            if 'title' in self.DebT.nodes[e2]['data'].attrib: # Reply to root comment
216
+                u_e2 = '0'
217
+            else:
218
+                u_e2 = self.DebT.nodes[e2]['data'].get('author')
219
+            if not self.UDebG.has_edge(u_e1, u_e2):
220
+                self.UDebG.add_edge(u_e1, u_e2, data = [ed])
221
+            else:
222
+                self.UDebG[u_e1][u_e2]['data'].append(ed)
223
+
224
+    def wia2021_UDebG(self, args):
225
+        '''
226
+        WIA2021 UDebG. Name of variables following paper notation
227
+        '''
228
+        print('Generating UDebG for wia2021...')
229
+        # Discard auto-replies
230
+        for user in self.UDebG.nodes():
231
+            if self.UDebG.has_edge(user, user):
232
+                self.UDebG.remove_edge(user, user)
233
+        # No root node for the solution
234
+        self.UDebG.remove_node('0')
235
+        # Opinion weighting scheme for nodes (users)
236
+        print('  Number of nodes UDebG = {}'.format(self.UDebG.number_of_nodes()))
237
+        for n, nd in self.UDebG.nodes(data = True):
238
+            s = 0
239
+            for cs in nd['data']:
240
+                s = s + cs['side']
241
+            s = s / len(nd['data'])
242
+            nd['opinion_ws'] = s
243
+            if s < -1 or s > 1:
244
+                sys.exit('ERROR: S value (%f) out of range [-1, 1].' % s)
245
+        # Interaction weighting scheme for edges
246
+        for e1, e2, ed in self.UDebG.edges(data = True):
247
+            p = 0
248
+            w = 0 
249
+            for d in ed['data']:
250
+                cu1 = d['data'].find('t').get('id')
251
+                W = self.DebT.nodes[cu1]['sentiment_not_normalized']
252
+                if W > 0:
253
+                    p = p + 1 # Counts edges with positive W
254
+                w = w + W # Sums all W
255
+            p = p / len(ed['data'])
256
+            w = w / len(ed['data']) # Aggregation operator (mean)
257
+            ed['interaction_ws'] = (p, w)
258
+            if p < 0 or p > 1 or w < -2 or w > 2:
259
+                sys.exit('ERROR: pair (p, w) (%f, %f) out of range ([0, 1], [-2, 2]).' % (p, w))
260
+
261
+        if args.draw_graphs:
262
+            self.wia2021_draw_UDebG(args)
263
+        if args.scip_output:
264
+            self.ccia2022_scip_output(args)
265
+            sys.exit()
266
+
267
+    def wia2021_sideness_consistency(self, L, R):
268
+        '''
269
+        WIA2021 We define the sideness consistency of two sides L and R as:
270
+            SC(L, R, G) = LC(L, G) * RC (R, G)
271
+        '''
272
+        LC = 0
273
+        RC = 0
274
+        for n in L:
275
+            S = self.UDebG.nodes[n]['opinion_ws'] 
276
+            if S <= 0:
277
+                LC = LC - S
278
+        for n in R:
279
+            S = self.UDebG.nodes[n]['opinion_ws'] 
280
+            if S > 0:
281
+                RC = RC + S
282
+        cardC = len(L) + len(R)
283
+        LC = LC / cardC
284
+        RC = RC / cardC
285
+        SC = LC * RC
286
+        if SC < 0 or SC > 0.25:
287
+            sys.exit('ERROR: SC (%f) out of range [0, 0.25].' % SC)
288
+        return LC, RC, SC
289
+
290
+    def wia2021_interactions_sentiment(self, L, R):
291
+        '''
292
+        WIA2021 We define the sentiment of the interactions between users of different sides as follows...
293
+        '''
294
+        sum_inter = 0
295
+        for e1, e2, ed in self.UDebG.edges(data = True):
296
+            p = ed['interaction_ws'][0]
297
+            w = ed['interaction_ws'][1]
298
+            if (e1 in L and e2 in R) or (e1 in R and e2 in L):
299
+                sum_inter = sum_inter + (-(2 * ((p - 0.5) ** 2) + 0.5)) * w
300
+        SWeight = sum_inter / self.UDebG.number_of_edges() + 2
301
+        if SWeight < 0 or SWeight > 4:
302
+            sys.exit('ERROR: SWeight (%f) out of range [0, 4].' % SWeight)
303
+        return SWeight
304
+
305
+    def wia2021_BipPol(self, L, R, all_stats = False):
306
+        '''
307
+        WIA2021 Combine both measures SC and SWeight to define the Bipartition Polarization level of a given partition (L, R)
308
+        '''
309
+        LC, RC, SC = self.wia2021_sideness_consistency(L, R)
310
+        SWeight = self.wia2021_interactions_sentiment(L, R)
311
+        BipPol = SC * SWeight
312
+        if all_stats:
313
+            return LC, RC, SC, SWeight, BipPol
314
+        else:
315
+            return BipPol
316
+
317
+    def wia2021_initial_partition(self, args):
318
+        '''
319
+        WIA2021 Initial partition for polarized partition algorithm
320
+        See docstring of wia2021_polarized_partition_greedy for more information
321
+        '''
322
+        L = []
323
+        R = []
324
+        if len(args.algorithm) > 1 and args.algorithm[1] == '0': # Random initialization
325
+            for n, nd in self.UDebG.nodes(data = True):
326
+                if random.random() < 0.5:
327
+                    L.append(n)
328
+                else:
329
+                    R.append(n)
330
+        elif len(args.algorithm) > 1 and args.algorithm[1] == '1': # Negatives and neutral to L and positives to R
331
+            for n, nd in self.UDebG.nodes(data = True):
332
+                if nd['opinion_ws'] <= 0:
333
+                    L.append(n)
334
+                else:
335
+                    R.append(n)
336
+        elif len(args.algorithm) > 1 and args.algorithm[1] == '2': # L with P = (1 − S(c)) / 2 and in R with 1 − P
337
+            for n, nd in self.UDebG.nodes(data = True):
338
+                P = (1 - nd['opinion_ws']) / 2
339
+                if random.random() < P:
340
+                    L.append(n)
341
+                else:
342
+                    R.append(n)
343
+        else:
344
+            sys.exit('ERROR: Algorithm settings (%s) with no partition initialization.' % args.algorithm)
345
+        return L, R
346
+
347
+    def wia2021_find_better_v(self, L, R, LtoR):
348
+        '''
349
+        WIA2021 Find a node in L such that removing it from L and adding it to R increases BipPol value. If LtoR is false, finds a node in R such that removing it from R and adding it to L increases BipPol value.
350
+        '''
351
+        L1 = list(L)
352
+        R1 = list(R)
353
+        BipPol = self.wia2021_BipPol(L, R)
354
+        if LtoR: # Search for a node from L to move to R
355
+            for n in L:
356
+                L1.remove(n)
357
+                R1.append(n)
358
+                if self.wia2021_BipPol(L1, R1) > BipPol:
359
+                    return n
360
+                L1.append(n)
361
+                R1.pop()
362
+        else: # Search for a node from R to move to L
363
+            for n in R:
364
+                R1.remove(n)
365
+                L1.append(n)
366
+                if self.wia2021_BipPol(L1, R1) > BipPol:
367
+                    return n
368
+                R1.append(n)
369
+                L1.pop()
370
+        return None
371
+
372
+    def wia2021_polarized_partition_greedy(self, args):
373
+        '''
374
+        WIA2021 Polarized Partition greedy algorithm.
375
+        Set L contains mainly users in the negative side of the debate and the set R contains mainly user in the positive side of the debate.
376
+        Algorithm parameters args.algorithm:
377
+          - First char of the string, 'g': Chooses this Greedy Polarized Partition algorithm
378
+          - Second char of the string, sets initial partition:
379
+            - '0': Distributes uniformly at random in either L or R
380
+            - '1': Order vertices by polarity assigning the negatives and neutral to L and the positives to R
381
+            - '2': Place user's opinion c randomly in L with probability P = (1 − S(c)) / 2 and in R with probability 1 − P
382
+        '''
383
+        print('WIA2021 Greedy Bipartite Polarization algorithm...')
384
+        L, R = self.wia2021_initial_partition(args)
385
+        improving = True
386
+        steps = 0
387
+        init_BipPol = self.wia2021_BipPol(L, R)
388
+        print("  Initial BipPol = %f" % init_BipPol)
389
+
390
+        while improving and steps < len(self.UDebG):
391
+            print('\r  Step %i/%i...' % (steps + 1, len(self.UDebG)), end = '')
392
+            improving = False
393
+            v = self.wia2021_find_better_v(L, R, LtoR = True)
394
+            if v:
395
+                L.remove(v)
396
+                R.append(v)
397
+                improving = True
398
+            v = self.wia2021_find_better_v(L, R, LtoR = False)
399
+            if v:
400
+                R.remove(v)
401
+                L.append(v)
402
+                improving = True
403
+            steps = steps + 1
404
+        if not improving:
405
+            print(' not improving...', end = '')
406
+
407
+        LC, RC, SC, SWeight, BipPol = self.wia2021_BipPol(L, R, all_stats = True)
408
+        print("\n  Final BipPol = %f" % BipPol)
409
+        if args.draw_graphs:
410
+            self.wia2021_draw_UDebG(args, L)
411
+        self.wia2021_stats_to_file(args, init_BipPol, steps, LC, RC, SC, SWeight, BipPol, L, R)
412
+
413
+    def ccia2022_select_neighbor_HC(self, L, R):
414
+        '''
415
+        CCIA2022 Select Neighbor with Hill Climbing strategy and applies changes: Find a node in L such that removing it from L and adding it to R increases BipPol value, then do the same from R to L.
416
+        '''
417
+        BipPol = self.wia2021_BipPol(L, R)
418
+        changes = 0
419
+        i = 0
420
+        while i < len(L):
421
+            n = L.pop(0)
422
+            R.append(n)
423
+            bp = self.wia2021_BipPol(L, R)
424
+            if bp > BipPol:
425
+                BipPol = bp
426
+                changes = changes + 1
427
+                break
428
+            L.append(n)
429
+            R.pop()
430
+            i = i + 1
431
+        i = 0
432
+        while i < len(R):
433
+            n = R.pop(0)
434
+            L.append(n)
435
+            bp = self.wia2021_BipPol(L, R)
436
+            if bp > BipPol:
437
+                changes = changes + 1
438
+                break
439
+            R.append(n)
440
+            L.pop()
441
+            i = i + 1
442
+        return changes
443
+
444
+    def ccia2022_select_neighbor_SAHC(self, L, R):
445
+        '''
446
+        CCIA2022 Select Neighbor with Steepest Ascent Hill Climbing strategy and applies changes: Find a node that changing it from L to R or from R to L increases more the BipPol value. Ties broken randomly. 
447
+        '''
448
+        best_bp = self.wia2021_BipPol(L, R)
449
+        best_init_bp = best_bp
450
+        imp_nbs = []
451
+        i = 0
452
+        while i < len(L):
453
+            n = L.pop(i)
454
+            R.append(n)
455
+            bp = self.wia2021_BipPol(L, R)
456
+            if bp >= best_bp and bp > best_init_bp:
457
+                if bp > best_bp:
458
+                    best_bp = bp
459
+                    imp_nbs = []
460
+                imp_nbs.append((i, L, R))
461
+            L.insert(i, n)
462
+            R.pop()
463
+            i = i + 1
464
+        i = 0
465
+        while i < len(R):
466
+            n = R.pop(i)
467
+            L.append(n)
468
+            bp = self.wia2021_BipPol(L, R)
469
+            if bp >= best_bp and bp > best_init_bp:
470
+                if bp > best_bp:
471
+                    best_bp = bp
472
+                    imp_nbs = []
473
+                imp_nbs.append((i, R, L))
474
+            R.insert(i, n)
475
+            L.pop()
476
+            i = i + 1
477
+        if len(imp_nbs) == 0:
478
+            return 0
479
+        elif len(imp_nbs) > 0:
480
+            i, from_set, to_set = random.choice(imp_nbs)
481
+            n = from_set.pop(i)
482
+            to_set.append(n)
483
+            return 1            
484
+
485
+    def ccia2022_select_neighbor_funtion(self, args):
486
+        '''
487
+        CCIA2022 Select "select_neighbor" function according to third char of algorithm parameters
488
+        '''
489
+        if args.algorithm[2] == '0':
490
+            return self.ccia2022_select_neighbor_HC
491
+        elif args.algorithm[2] == '1':
492
+            return self.ccia2022_select_neighbor_SAHC
493
+        else:
494
+            sys.exit('ERROR: Algorithm parameter for select_neighbor function (%s) not recognized.' % args.algorithm)
495
+
496
+    def ccia2022_select_restarts(self, args):
497
+        '''
498
+        CCIA2022 Select number of restarts according to fourth char of algorithm parameters
499
+        '''
500
+        if args.algorithm[3] == '0':
501
+            return 1
502
+        elif args.algorithm[3] == '1':
503
+            return 10
504
+        else:
505
+            sys.exit('ERROR: Algorithm parameter for select_restarts function (%s) not recognized.' % args.algorithm)
506
+
507
+    def ccia2022_randomize_sets(self, L, R, noise):
508
+        '''
509
+        CCIA2022 Randomize sets by noise percentage (probability to switch set)
510
+        '''
511
+        R_init_size = len(R)
512
+        i = len(L) - 1
513
+        while i >= 0:
514
+            if random.random() < noise:
515
+                n = L.pop(i)
516
+                R.append(n)
517
+            i = i - 1
518
+        i = R_init_size - 1
519
+        while i >= 0:
520
+            if random.random() < noise:
521
+                n = R.pop(i)
522
+                L.append(n)
523
+            i = i - 1
524
+
525
+    def ccia2022_polarized_partition_greedy(self, args):
526
+        '''
527
+        CCIA2022 Polarized Partition greedy algorithm.
528
+        Set L contains mainly users in the negative side of the debate and the set R contains mainly user in the positive side of the debate.
529
+        Algorithm parameters args.algorithm:
530
+          - First char of the string, 'g': Chooses this Greedy Polarized Partition algorithm
531
+          - Second char of the string sets initial partition:
532
+            - '0': Distributes uniformly at random in either L or R
533
+            - '1': Order vertices by polarity assigning the negatives and neutral to L and the positives to R
534
+            - '2': Place user's opinion c randomly in L with probability P = (1 − S(c)) / 2 and in R with probability 1 − P
535
+          - Third char of the string sets better solution strategy:
536
+            - '0': Hill Climbing strategy, pick the first neighbor that improve
537
+            - '1': Steepest Ascent Hill Climbing strategy, pick the neighbor that improves more
538
+          - Fourth char of the string sets the restarts
539
+            - '0': No restarts
540
+            - '1': 10 restarts
541
+        '''
542
+        print('CCIA2022 Greedy Bipartite Polarization algorithm...')
543
+        p = psutil.Process()
544
+        init_cpu_time = p.cpu_times()[0]
545
+        select_neighbor = self.ccia2022_select_neighbor_funtion(args)
546
+        L, R = self.wia2021_initial_partition(args)
547
+        restarts = self.ccia2022_select_restarts(args)
548
+        init_BipPol = self.wia2021_BipPol(L, R)
549
+        best_BipPol = init_BipPol
550
+        best_L = L[:]
551
+        best_R = R[:]
552
+        print("  Initial BipPol = %f" % init_BipPol)
553
+
554
+        while restarts:
555
+            changes = 1
556
+            steps = 0
557
+            while changes and steps < len(self.UDebG):
558
+                print('\r  Step %i/%i...' % (steps + 1, len(self.UDebG)), end = '')
559
+                changes = select_neighbor(L, R)
560
+                steps = steps + changes
561
+            if not changes:
562
+                print(' not improving...')
563
+            restarts = restarts - 1
564
+            BipPol = self.wia2021_BipPol(L, R)
565
+            print("  Try final BipPol = %f" % BipPol)
566
+            if best_BipPol < BipPol:
567
+                best_BipPol = BipPol
568
+                best_L = L[:]
569
+                best_R = R[:]
570
+            if restarts:
571
+                self.ccia2022_randomize_sets(L, R, 0.1)
572
+                BipPol = self.wia2021_BipPol(L, R)
573
+                print("  Try init BipPol = %f" % BipPol)
574
+
575
+        LC, RC, SC, SWeight, BipPol = self.wia2021_BipPol(best_L, best_R, all_stats = True)
576
+        print("  Final best BipPol = %f" % BipPol)
577
+        final_cpu_time = p.cpu_times()[0]
578
+        print("  CPU time = %0.3f" % (final_cpu_time - init_cpu_time))
579
+        if args.draw_graphs:
580
+            self.wia2021_draw_UDebG(args, best_L)
581
+        self.wia2021_stats_to_file(args, init_BipPol, steps, LC, RC, SC, SWeight, BipPol, best_L, best_R)
582
+
583
+    def ccia2022_scip_output(self, args):
584
+        '''
585
+        UDebG Bipartition problem output format for SCIP solver
586
+        '''
587
+        print('Writing UDebG Bipartition problem in SCIP format...')
588
+        file_vertices = '%s.udebg-vertices.scp' % args.input_file
589
+        file_edges = '%s.udebg-edges.scp' % args.input_file
590
+        file_stats = '%s.stats' % args.input_file
591
+
592
+        # Vertices
593
+        values = []
594
+        with open(file_vertices, 'w') as f:
595
+            for n, nd in self.UDebG.nodes(data = True):
596
+                f.write('v{} {}\n'.format(self.UDebG.nodes[n]['node_id'], nd['opinion_ws']))
597
+                values.append(nd['opinion_ws'])
598
+        df = pandas.DataFrame(data = values, columns = ['Node weights'])
599
+        stats = str(df.describe()) + '\n'
600
+
601
+        # Edges
602
+        values = []
603
+        with open(file_edges, 'w') as f:
604
+            for e1, e2, ed in self.UDebG.edges(data = True):
605
+                p = ed['interaction_ws'][0]
606
+                w = ed['interaction_ws'][1]
607
+                cost = (-(2 * ((p - 0.5) ** 2) + 0.5)) * w
608
+                f.write('v{} v{} {}\n'.format(self.UDebG.nodes[e1]['node_id'], self.UDebG.nodes[e2]['node_id'], cost))
609
+                values.append(cost)
610
+        df = pandas.DataFrame(data = values, columns = ['Edge weights'])
611
+        stats += str(df.describe()) + '\n'
612
+
613
+        # Compute stats for the out degree of each node
614
+        od = [t[1] for t in self.UDebG.out_degree(self.UDebG.nodes())]
615
+        stats += f'Min out degree: {min(od)}\n'
616
+        stats += f'Max out degree: {max(od)}\n'
617
+        stats += f'Mean out degree: {numpy.mean(od)}\n'
618
+
619
+        # Write stats to file
620
+        with open(file_stats, 'w') as f:
621
+            f.write(stats)
622
+
623
+    def wia2021_stats_to_file(self, args, init_BipPol, steps, LC, RC, SC, SWeight, BipPol, L, R):
624
+        '''
625
+        Write Greedy polarized partition algorithm statistics and information to file
626
+        '''
627
+        print('Writing statistics to file...')
628
+
629
+        out_str = 'WIA2021 stats\n------------\n'
630
+        out_str += 'Timestamp = %s\n' % time.ctime()
631
+        out_str += 'Input file = %s\n' % args.input_file
632
+        out_str += 'UDebG #nodes = %i\n' % self.UDebG.number_of_nodes()
633
+        out_str += 'UDebG #edges = %i\n' % self.UDebG.number_of_edges()
634
+        out_str += 'UDebG ratio #edges/#nodes = %f\n' % (self.UDebG.number_of_edges() / self.UDebG.number_of_nodes())
635
+        out_str += 'Algorithm = %s\n' % args.algorithm
636
+        out_str += 'Initial BipPol = %f\n' % init_BipPol
637
+        out_str += 'Algorithm steps = %i\n' % steps
638
+        out_str += 'Final BipPol = %f\n' % BipPol
639
+        out_str += 'LC = %f\n' % LC
640
+        out_str += 'RC = %f\n' % RC
641
+        out_str += 'SC = %f\n' % SC
642
+        out_str += 'SWeight = %f\n' % SWeight
643
+        out_str += '#users in L = %i\n' % len(L)
644
+        out_str += '#users in R = %i\n' % len(R)
645
+        out_str += 'Set L = %s\n' % str([self.UDebG.nodes[n]['node_id'] for n in L])
646
+        out_str += 'Set R = %s\n' % str([self.UDebG.nodes[n]['node_id'] for n in R])
647
+
648
+        # Write to file
649
+        output_file_name = '%s.%s.info' % (args.input_file, args.algorithm)
650
+        output_file = open(output_file_name, 'w')
651
+        output_file.write(out_str)
652
+        output_file.close()
653
+
654
+    def wia2021_draw_UDebG(self, args, L = None):
655
+        '''
656
+        Drawing WIA2021 UDebG
657
+        '''
658
+        if L:
659
+            print('Drawing wia2021 Bipartite UDebG...')
660
+            output_file_name = '%s.udebg.bip.png' % args.input_file
661
+        else:
662
+            print('Drawing wia2021 UDebG...')
663
+            output_file_name = '%s.udebg.png' % args.input_file
664
+
665
+        gv = networkx.nx_agraph.to_agraph(self.UDebG)
666
+
667
+        gv.node_attr['style'] = 'filled'
668
+        gv.node_attr['fixedsize'] = 'true'
669
+        gv.node_attr['width'] = '0.4'
670
+        gv.node_attr['height'] = '0.4'
671
+        for n in gv.nodes():
672
+            node_id = self.UDebG.nodes[n]['node_id']
673
+            n.attr['label'] = str(node_id)
674
+            bordercolor = '#000000'
675
+            n.attr['penwidth'] = 1
676
+            if L: # UDebG Bipartite
677
+                if n in L:
678
+                    n.attr['fillcolor'] = '#FF0000'
679
+                    n.attr['fontcolor'] = '#FFFFFF'
680
+                else:
681
+                    n.attr['fillcolor'] = '#0000FF'
682
+                    n.attr['fontcolor'] = '#FFFFFF'
683
+            else: # UDebG
684
+                s = self.UDebG.nodes[n]['opinion_ws']
685
+                if s > 0: # cyan = '#4FCFFF'
686
+                    contrast, color = get_weighted_color([0x4F, 0xCF, 0xFF], 0, 1, s, hw = 0xAF)
687
+                    contrast = '#000000'
688
+                elif s < 0: # dark blue = '#00007F'
689
+                    contrast, color = get_weighted_color([0x00, 0x00, 0x7F], 0, 1, -s, hw = 0xAF)
690
+                else:
691
+                    color = [0xFF, 0xFF, 0xFF]
692
+                    contrast = '#000000'
693
+                n.attr['fillcolor'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, color)])
694
+                n.attr['fontcolor'] = contrast
695
+
696
+        gv.edge_attr['color'] = '#000000'
697
+        for e in gv.edges():
698
+            p = self.UDebG[e[0]][e[1]]['interaction_ws'][0] 
699
+            w = self.UDebG[e[0]][e[1]]['interaction_ws'][1] 
700
+            if w > 0:
701
+                contrast, color = get_weighted_color([0x00, 0xFF, 0x00], 0, 1, p)
702
+            elif w < 0:
703
+                contrast, color = get_weighted_color([0xFF, 0x00, 0x00], 0, 1, 1 - p)
704
+            else:
705
+                color = [0x00, 0x00, 0x00]
706
+            e.attr['color'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, color)])
707
+
708
+        gv.layout(prog = 'dot', args='-Goverlap=false -Gnodesep=0.2 -Granksep=0.2  -Grankdir=BT -GK=800 -Gstart=17 -Gmaxiter=600')
709
+        gv.draw(output_file_name, format = 'png')
710
+
711
+    def mdai2020_set_nodes(self):
712
+        '''
713
+        UDebG has a node for each user and stores each comment of the user in a list as node data
714
+        '''
715
+        # Set chronological id to comments in DebT
716
+        self.DebT.nodes[self.DebT_root_id]['chrono_id'] = 0
717
+        id_list = sorted([n for n, nd in self.DebT.nodes(data = True) if 'title' not in nd['data'].attrib])
718
+        for i, c_id in enumerate(id_list):
719
+            self.DebT.nodes[c_id]['chrono_id'] = i + 1
720
+
721
+        node_id = 1
722
+        for n, nd in self.DebT.nodes(data = True):
723
+            if 'title' in nd['data'].attrib: # Root comment
724
+                self.UDebG.add_node('0', data = nd['data'], node_id = 0) # Id = 0 for root node (special node)
725
+            else: # Regular comment
726
+                user = nd['data'].get('author')
727
+                if user not in self.UDebG:
728
+                    self.UDebG.add_node(user, data = [nd['data']], node_id = node_id)
729
+                    node_id = node_id + 1
730
+                else:
731
+                    self.UDebG.nodes[user]['data'].append(nd['data'])
732
+
733
+        # Print number of comments per user
734
+        # print('User\tId\t#comments')
735
+        # for n, nd in self.UDebG.nodes(data = True):
736
+        #     print('%s\t%i\t%i' % (n, nd['node_id'], len(nd['data'])))
737
+
738
+    def mdai2020_set_edges(self):
739
+        '''
740
+        UDebG has an edge between u_a and u_b if there is a reply form u_a to u_b
741
+        '''
742
+        for e1, e2, ed in self.DebT.edges(data = True):
743
+            # u_e1 replies to u_e2
744
+            u_e1 = self.DebT.nodes[e1]['data'].get('author')
745
+            if 'title' in self.DebT.nodes[e2]['data'].attrib: # Reply to root comment
746
+                u_e2 = '0'
747
+            else:
748
+                u_e2 = self.DebT.nodes[e2]['data'].get('author')
749
+            if not self.UDebG.has_edge(u_e1, u_e2):
750
+                self.UDebG.add_edge(u_e1, u_e2, data = [ed['data']])
751
+            else:
752
+                self.UDebG[u_e1][u_e2]['data'].append(ed['data'])
753
+
754
+    def mdai2020_UDebG(self):
755
+        '''
756
+        MDAI 2020 requirements: Auto-replies are discarded
757
+        '''
758
+        for user in self.UDebG.nodes():
759
+            if self.UDebG.has_edge(user, user):
760
+                self.UDebG.remove_edge(user, user)
761
+
762
+    def mdai2020_skeptical(self):
763
+        '''
764
+        Skeptical sentiment weighting scheme L:E --> [-2, 2]
765
+        min(sentiment) of replies from u_a to u_b if (all comments of u_a agree with u_b) or (all comments of u_a disagree with u_b)
766
+        0 otherwise
767
+        '''
768
+        for u1, u2, ed in self.UDebG.edges(data = True):
769
+            ed['skeptical'] = None
770
+            for answer in ed['data']:
771
+                cu1 = answer.find('t').get('id')
772
+                s = sentiment(ast.literal_eval(self.DebT.nodes[cu1]['data'].get('sentiment_distribution')))
773
+                if ed['skeptical'] == None:
774
+                    ed['skeptical'] = s
775
+                else:
776
+                    if (s > 0 and ed['skeptical'] > 0) or (s < 0 and ed['skeptical'] < 0):
777
+                        ed['skeptical'] = min(s, ed['skeptical'])
778
+                    else:
779
+                        ed['skeptical'] = 0
780
+                        break
781
+
782
+    def mdai2020_VAF_valuation(self, args):
783
+        '''
784
+        Valuation function for a Valued Argumentation Framework (VAF) over UDebG
785
+        '''
786
+        if not args.user_valuation:
787
+            sys.exit('ERROR: no valuation function selected for the VAF over UDebG.')
788
+
789
+        for n, nd in self.UDebG.nodes(data = True):
790
+            if args.user_valuation == 'comment_karma':
791
+                nd['valuation'] = scale_weight(max([int(c.get(args.user_valuation)) for c in nd['data']]), args)
792
+            elif args.user_valuation == 'sum_scores':
793
+                nd['valuation'] = scale_weight(sum([int(c.get('score')) for c in nd['data']]), args)
794
+        vals = [nd['valuation'] for n_id, nd in self.UDebG.nodes(data = True)]
795
+        self.max_weight = max(vals)
796
+        self.min_weight = min(vals)
797
+        
798
+        pos_vals = [ed['skeptical'] for u1, u2, ed in self.UDebG.edges(data = True) if ed['skeptical'] > 0]
799
+        neg_vals = [ed['skeptical'] for u1, u2, ed in self.UDebG.edges(data = True) if ed['skeptical'] < 0]
800
+        self.max_pos_edge_weight = max(pos_vals)
801
+        self.min_pos_edge_weight = min(pos_vals)
802
+        self.max_neg_edge_weight = max(neg_vals)
803
+        self.min_neg_edge_weight = min(neg_vals)
804
+
805
+    def mdai2020_UDebG_to_xml(self, args):
806
+        '''
807
+        Saves self.UDebG graph to xml file
808
+        '''
809
+        xml = ET.Element('entailment-corpus')
810
+        xml.append(ET.Comment(reddit_at.args2str(args)))
811
+        xml.set('num_nodes', str(len(self.UDebG)))
812
+        xml.set('num_edges', str(self.UDebG.number_of_edges()))
813
+
814
+        al_xml = ET.SubElement(xml, 'argument-list')
815
+        al_xml.set('minweight', str(self.min_weight))
816
+        al_xml.set('maxweight', str(self.max_weight))
817
+        for n_id, nd in self.UDebG.nodes(data = True):
818
+            a = ET.SubElement(al_xml, 'arg')
819
+            a.set('weight', str(nd['valuation']))
820
+            a.set('user', str(n_id))
821
+            a.set('id', str(nd['node_id']))
822
+
823
+        ap_xml = ET.SubElement(xml, 'argument-pairs')
824
+        for u1, u2, ed in self.UDebG.edges(data = True):
825
+            if ed['skeptical'] < 0 and abs(ed['skeptical']) > args.alpha:
826
+                p = ET.SubElement(ap_xml, 'pair')
827
+                p.set('entailment', 'ATTACKS')
828
+                t = ET.SubElement(p, 't')
829
+                t.set('id', str(self.UDebG.nodes[u1]['node_id']))
830
+                h = ET.SubElement(p, 'h')
831
+                h.set('id', str(self.UDebG.nodes[u2]['node_id']))
832
+        
833
+        ET.ElementTree(xml).write("%s.udebg.xml" % args.input_file)
834
+
835
+    def mdai2020_draw_DebT(self, args):
836
+        '''
837
+        Drawing Debate Tree
838
+        '''
839
+        print('Drawing DebT...')
840
+
841
+        gv = networkx.nx_agraph.to_agraph(self.DebT)
842
+
843
+        gv.node_attr['style'] = 'filled'
844
+        gv.node_attr['fixedsize'] = 'true'
845
+        gv.node_attr['width'] = '0.4'
846
+        gv.node_attr['height'] = '0.4'
847
+        gv.node_attr['fillcolor'] = '#0000FF'
848
+        gv.node_attr['fontcolor'] = '#FFFFFF'
849
+        for n in gv.nodes():
850
+            n.attr['label'] = str(self.DebT.nodes[n]['chrono_id'])
851
+
852
+        gv.edge_attr['color'] = '#000000'
853
+        for e in gv.edges():
854
+            s = sentiment(ast.literal_eval(self.DebT.nodes[e[0]]['data'].get('sentiment_distribution')))
855
+            if s > 0:
856
+                e.attr['color'] = '#00FF00'
857
+            elif s < 0:
858
+                e.attr['color'] = '#FF0000'
859
+
860
+        gv.layout(prog = 'dot', args='-Goverlap=false -Gnodesep=0.2 -Granksep=0.2  -Grankdir=BT -GK=800 -Gstart=17 -Gmaxiter=600')
861
+        gv.draw("%s.debt.png" % args.input_file, format = 'png')
862
+
863
+    def mdai2020_draw_UDebG(self, args):
864
+        '''
865
+        Drawing UDebG
866
+        '''
867
+        if self.VAF_accepted:
868
+            print('Drawing UDebG solution...')
869
+            output_file_name = '%s.udebg-sol.png' % args.input_file
870
+        else:
871
+            print('Drawing UDebG...')
872
+            output_file_name = '%s.udebg.png' % args.input_file
873
+
874
+        gv = networkx.nx_agraph.to_agraph(self.UDebG)
875
+
876
+        gv.node_attr['style'] = 'filled'
877
+        gv.node_attr['fixedsize'] = 'true'
878
+        gv.node_attr['width'] = '0.4'
879
+        gv.node_attr['height'] = '0.4'
880
+        for n in gv.nodes():
881
+            node_id = self.UDebG.nodes[n]['node_id']
882
+            n.attr['label'] = str(node_id)
883
+            bordercolor = [0x00, 0x00, 0x00]
884
+            penwidth = 1
885
+            fontcolor = '#FFFFFF'
886
+            fillcolor = [0x00, 0x00, 0xFF]
887
+            if self.VAF_accepted:
888
+                fontcolor, fillcolor = get_weighted_color([0x00, 0x00, 0xFF], self.min_weight, self.max_weight, self.UDebG.nodes[n]['valuation'])
889
+                if node_id not in self.VAF_accepted:
890
+                    bordercolor = fillcolor
891
+                    penwidth = 3
892
+                    fontcolor, fillcolor = get_weighted_color([0x00, 0x00, 0x00], self.min_weight, self.max_weight, self.UDebG.nodes[n]['valuation'])
893
+            n.attr['fontcolor'] = fontcolor
894
+            n.attr['fillcolor'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, fillcolor)])
895
+            n.attr['color'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, bordercolor)])
896
+            n.attr['penwidth'] = penwidth
897
+
898
+        gv.edge_attr['color'] = '#000000'
899
+        for e in gv.edges():
900
+            if self.UDebG[e[0]][e[1]]['skeptical'] > 0:
901
+                contrast, color = get_weighted_color([0x00, 0xFF, 0x00], self.min_pos_edge_weight, self.max_pos_edge_weight, self.UDebG[e[0]][e[1]]['skeptical'])
902
+                e.attr['color'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, color)])
903
+            elif self.UDebG[e[0]][e[1]]['skeptical'] < 0:
904
+                contrast, color = get_weighted_color([0xFF, 0x00, 0x00], -self.max_neg_edge_weight, -self.min_neg_edge_weight, -self.UDebG[e[0]][e[1]]['skeptical'])
905
+                e.attr['color'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, color)])
906
+                if self.VAF_accepted:
907
+                    if abs(self.UDebG[e[0]][e[1]]['skeptical']) > args.alpha:
908
+                        e.attr['color'] = '#FF0000'
909
+                    else:
910
+                        e.attr['color'] = 'transparent' # Like do not draw edge
911
+
912
+        gv.layout(prog = 'dot', args='-Goverlap=false -Gnodesep=0.2 -Granksep=0.2  -Grankdir=BT -GK=800 -Gstart=17 -Gmaxiter=600')
913
+        gv.draw(output_file_name, format = 'png')

Powered by TurnKey Linux.