Sfoglia il codice sorgente

2024-Algorithms data

Josep Argelich 3 mesi fa
parent
commit
314e3d8431

BIN
data/2024-Algorithms/ds1.tgz Vedi File


BIN
data/2024-Algorithms/ds2.tgz Vedi File


+ 23
- 0
data/2024-Algorithms/src/gen-rUDebG.sh Vedi File

@@ -0,0 +1,23 @@
1
+#!/bin/bash
2
+
3
+export LC_NUMERIC="en_US.UTF-8" #For the alpha decimal dot
4
+
5
+INIT_ALPHA=0.1
6
+STEP_ALPHA=0.3
7
+FINAL_ALPHA=1
8
+
9
+INIT_NODES=20
10
+STEP_NODES=10
11
+FINAL_NODES=50
12
+
13
+NUM_INSTANCES=50
14
+
15
+for ALPHA in `seq $INIT_ALPHA $STEP_ALPHA $FINAL_ALPHA`; do
16
+    for NODES in `seq $INIT_NODES $STEP_NODES $FINAL_NODES`; do
17
+        for i in `seq $NUM_INSTANCES`; do
18
+            ./reddit_at.py -a $ALPHA -lb 10 -al g200 -p $NODES,0 -s $i -so;
19
+        done
20
+        mkdir dataset-rUDebG-$NODES-$ALPHA;
21
+        mv rnd-UDebG-* dataset-rUDebG-$NODES-$ALPHA;
22
+    done
23
+done

+ 23
- 0
data/2024-Algorithms/src/gen-rUDebG_around01.sh Vedi File

@@ -0,0 +1,23 @@
1
+#!/bin/bash
2
+
3
+export LC_NUMERIC="en_US.UTF-8" #For the alpha decimal dot
4
+
5
+INIT_ALPHA=0.05
6
+STEP_ALPHA=0.03
7
+FINAL_ALPHA=0.14
8
+
9
+INIT_NODES=25
10
+STEP_NODES=5
11
+FINAL_NODES=40
12
+
13
+NUM_INSTANCES=50
14
+
15
+for ALPHA in `seq $INIT_ALPHA $STEP_ALPHA $FINAL_ALPHA`; do
16
+    for NODES in `seq $INIT_NODES $STEP_NODES $FINAL_NODES`; do
17
+        for i in `seq $NUM_INSTANCES`; do
18
+            ./reddit_at.py -a $ALPHA -lb 10 -al g211 -p $NODES,0 -s $i -so;
19
+        done
20
+        mkdir dataset-rUDebG-$NODES-$ALPHA;
21
+        mv rnd-UDebG-* dataset-rUDebG-$NODES-$ALPHA;
22
+    done
23
+done

+ 23
- 0
data/2024-Algorithms/src/gen-rUDebG_around04.sh Vedi File

@@ -0,0 +1,23 @@
1
+#!/bin/bash
2
+
3
+export LC_NUMERIC="en_US.UTF-8" #For the alpha decimal dot
4
+
5
+INIT_ALPHA=0.4
6
+STEP_ALPHA=0.3
7
+FINAL_ALPHA=1
8
+
9
+INIT_NODES=40
10
+STEP_NODES=5
11
+FINAL_NODES=60
12
+
13
+NUM_INSTANCES=50
14
+
15
+for ALPHA in `seq $INIT_ALPHA $STEP_ALPHA $FINAL_ALPHA`; do
16
+    for NODES in `seq $INIT_NODES $STEP_NODES $FINAL_NODES`; do
17
+        for i in `seq $NUM_INSTANCES`; do
18
+            ./reddit_at.py -a $ALPHA -lb 10 -al g211 -p $NODES,0,3.0 -s $i -so;
19
+        done
20
+        mkdir dataset-rUDebG-$NODES-$ALPHA;
21
+        mv rnd-UDebG-* dataset-rUDebG-$NODES-$ALPHA;
22
+    done
23
+done

+ 23
- 0
data/2024-Algorithms/src/gen-rUDebG_around04_m65_m90.sh Vedi File

@@ -0,0 +1,23 @@
1
+#!/bin/bash
2
+
3
+export LC_NUMERIC="en_US.UTF-8" #For the alpha decimal dot
4
+
5
+INIT_ALPHA=0.4
6
+STEP_ALPHA=0.3
7
+FINAL_ALPHA=1
8
+
9
+INIT_NODES=65
10
+STEP_NODES=5
11
+FINAL_NODES=90
12
+
13
+NUM_INSTANCES=50
14
+
15
+for ALPHA in `seq $INIT_ALPHA $STEP_ALPHA $FINAL_ALPHA`; do
16
+    for NODES in `seq $INIT_NODES $STEP_NODES $FINAL_NODES`; do
17
+        for i in `seq $NUM_INSTANCES`; do
18
+            ./reddit_at.py -a $ALPHA -lb 10 -al g211 -p $NODES,0,3.0 -s $i -so;
19
+        done
20
+        mkdir dataset-rUDebG-$NODES-$ALPHA;
21
+        mv rnd-UDebG-* dataset-rUDebG-$NODES-$ALPHA;
22
+    done
23
+done

+ 23
- 0
data/2024-Algorithms/src/gen-rUDebG_around05.sh Vedi File

@@ -0,0 +1,23 @@
1
+#!/bin/bash
2
+
3
+export LC_NUMERIC="en_US.UTF-8" #For the alpha decimal dot
4
+
5
+INIT_ALPHA=0.5
6
+STEP_ALPHA=0.1
7
+FINAL_ALPHA=0.6
8
+
9
+INIT_NODES=40
10
+STEP_NODES=5
11
+FINAL_NODES=90
12
+
13
+NUM_INSTANCES=50
14
+
15
+for ALPHA in `seq $INIT_ALPHA $STEP_ALPHA $FINAL_ALPHA`; do
16
+    for NODES in `seq $INIT_NODES $STEP_NODES $FINAL_NODES`; do
17
+        for i in `seq $NUM_INSTANCES`; do
18
+            ./reddit_at.py -a $ALPHA -lb 10 -al g211 -p $NODES,0,3.0 -s $i -so;
19
+        done
20
+        mkdir dataset-rUDebG-$NODES-$ALPHA;
21
+        mv rnd-UDebG-* dataset-rUDebG-$NODES-$ALPHA;
22
+    done
23
+done

+ 23
- 0
data/2024-Algorithms/src/gen-rUDebG_around08.sh Vedi File

@@ -0,0 +1,23 @@
1
+#!/bin/bash
2
+
3
+export LC_NUMERIC="en_US.UTF-8" #For the alpha decimal dot
4
+
5
+INIT_ALPHA=0.8
6
+STEP_ALPHA=0.1
7
+FINAL_ALPHA=0.9
8
+
9
+INIT_NODES=40
10
+STEP_NODES=5
11
+FINAL_NODES=90
12
+
13
+NUM_INSTANCES=50
14
+
15
+for ALPHA in `seq $INIT_ALPHA $STEP_ALPHA $FINAL_ALPHA`; do
16
+    for NODES in `seq $INIT_NODES $STEP_NODES $FINAL_NODES`; do
17
+        for i in `seq $NUM_INSTANCES`; do
18
+            ./reddit_at.py -a $ALPHA -lb 10 -al g211 -p $NODES,0,3.0 -s $i -so;
19
+        done
20
+        mkdir dataset-rUDebG-$NODES-$ALPHA;
21
+        mv rnd-UDebG-* dataset-rUDebG-$NODES-$ALPHA;
22
+    done
23
+done

+ 817
- 0
data/2024-Algorithms/src/reddit_at.py Vedi File

@@ -0,0 +1,817 @@
1
+#!/usr/bin/python3 -ttOO
2
+'''
3
+Artificial Intelligence Research Group
4
+University of Lleida
5
+'''
6
+
7
+# Libraries
8
+
9
+import os
10
+import sys
11
+import argparse
12
+import xml.etree.ElementTree as ET
13
+import math
14
+import networkx
15
+import ast
16
+import udebg
17
+
18
+# Classes
19
+
20
+class RedditG():
21
+    '''
22
+    Reddit post in graph structure for its analysis
23
+    '''
24
+    def __init__(self, input_file):
25
+        self.xml = None
26
+        self.root_id = None
27
+        self.DebT = networkx.DiGraph() # Debate Tree
28
+        self.PDebT = networkx.DiGraph() # Pruned Debate Tree
29
+        self.SDebT = networkx.DiGraph() # Two-Sided Debate Tree
30
+        self.WBDebG = None # Weighted Bipartite Debate Graph
31
+        self.comment_id = {}
32
+        self.min_weight = None
33
+        self.max_weight = None
34
+        self.VAF_accepted = None
35
+        self.read_xml(input_file)
36
+
37
+    def remove_deleted_comments(self):
38
+        rg = self.DebT.reverse() # Reverse graph (direction of edges reversed)
39
+        while True: # To avoid "RuntimeError: dictionary changed size during iteration" in inner loop
40
+            changed = False
41
+            for n, nd in self.DebT.nodes(data = True):
42
+                if nd['data'].get('author') == 'None':
43
+                    if n in self.DebT: # Not already removed (appears in a previous subtree)
44
+                        # Get subtree of node n and remove it
45
+                        st = networkx.algorithms.traversal.depth_first_search.dfs_tree(rg, n)
46
+                        self.DebT.remove_nodes_from(st.nodes())
47
+                        changed = True
48
+                        break
49
+            if not changed:
50
+                break
51
+
52
+    def read_xml(self, input_file):
53
+        '''
54
+        Read XML file with the conversation
55
+        '''
56
+        print('Reading xml input file...')
57
+        self.xml = ET.parse(input_file)
58
+        al = self.xml.find('argument-list')
59
+        for arg in al.iter('arg'):
60
+            if 'title' in arg.attrib:
61
+                self.root_id = arg.attrib['id']
62
+                if arg.attrib['author'] == 'None': # To not delete full tree when root node author is deleted
63
+                    arg.attrib['author'] = 'root_node_author'
64
+            self.DebT.add_node(arg.attrib['id'], data = arg)
65
+        ap = self.xml.find('argument-pairs')
66
+        for pair in ap.iter('pair'): # Argument pair (relation) t replies to h
67
+            self.DebT.add_edge(pair.find('t').get('id'), pair.find('h').get('id'), data = pair)
68
+        self.remove_deleted_comments()
69
+
70
+    def wia2021_DebT(self, args):
71
+        '''
72
+        DebT for wia2021
73
+        '''
74
+        print('Generating DebT for wia2021...')
75
+        # Set chronological id to comments in DebT
76
+        id_list = sorted([n for n, nd in self.DebT.nodes(data = True) if 'title' not in nd['data'].attrib])
77
+        for i, c_id in enumerate(id_list):
78
+            self.DebT.nodes[c_id]['chrono_id'] = i + 1 # chrono_id for root node set below
79
+        print('  Number of nodes DebT = {}'.format(self.DebT.number_of_nodes()))
80
+        # Initializations
81
+        self.DebT.nodes[self.root_id]['sentiment_not_normalized'] = 0
82
+        self.DebT.nodes[self.root_id]['chrono_id'] = 0
83
+        # BFS on DebT to compute sentiment not normalized [-2, 2]
84
+        list_edges = [e for e in self.DebT.in_edges(self.root_id)]
85
+        while list_edges:
86
+            current_edge = list_edges.pop(0)
87
+            node_id = current_edge[0]
88
+            sentiment = self.ccia18_sentiment(False, ast.literal_eval(self.DebT.nodes[node_id]['data'].get('sentiment_distribution')), args)
89
+            self.DebT.nodes[node_id]['sentiment_not_normalized'] = sentiment
90
+            list_edges.extend([e for e in self.DebT.in_edges(node_id)])
91
+
92
+        if args.draw_graphs:
93
+            self.wia2021_draw_DebT(args)
94
+
95
+    def wia2021_SDebT(self, args):
96
+        '''
97
+        SDebT for wia2021
98
+        '''
99
+        print('Generating SDebT for wia2021...')
100
+        # Copy DebT to SDebT
101
+        self.SDebT.add_nodes_from(self.DebT.nodes(data = True))
102
+        for e1, e2, ed in self.DebT.edges(data = True):
103
+            self.SDebT.add_edge(e1, e2, data = self.DebT[e1][e2]['data'])
104
+        # Initializations
105
+        self.SDebT.nodes[self.root_id]['side'] = 1
106
+        # BFS to compute the side of each node
107
+        list_edges = [e for e in self.SDebT.in_edges(self.root_id)]
108
+        while list_edges:
109
+            current_edge = list_edges.pop(0)
110
+            node_id = current_edge[0]
111
+            parent_node_id = current_edge[1]
112
+            if (self.SDebT.nodes[parent_node_id]['side'] == 1 and self.DebT.nodes[node_id]['sentiment_not_normalized'] > 0) or (self.SDebT.nodes[parent_node_id]['side'] == -1 and self.DebT.nodes[node_id]['sentiment_not_normalized'] <= 0):
113
+                self.SDebT.nodes[node_id]['side'] = 1
114
+            else:
115
+                self.SDebT.nodes[node_id]['side'] = -1
116
+            list_edges.extend([e for e in self.SDebT.in_edges(node_id)])
117
+
118
+        if args.draw_graphs:
119
+            self.wia2021_draw_SDebT(args)
120
+
121
+    def wia2021_draw_DebT(self, args):
122
+        '''
123
+        Drawing wia2021 DebT
124
+        '''
125
+        print('Drawing wia2021 DebT...')
126
+        gv = networkx.nx_agraph.to_agraph(self.DebT)
127
+        gv.node_attr['style'] = 'filled'
128
+        gv.node_attr['fixedsize'] = 'true'
129
+        gv.node_attr['width'] = '0.4'
130
+        gv.node_attr['height'] = '0.4'
131
+        gv.node_attr['fillcolor'] = '#0000FF'
132
+        gv.node_attr['fontcolor'] = '#FFFFFF'
133
+
134
+        for n in gv.nodes():
135
+            n.attr['label'] = str(self.DebT.nodes[n]['chrono_id'])
136
+
137
+        gv.edge_attr['color'] = '#000000'
138
+        for e in gv.edges():
139
+            s = self.DebT.nodes[e[0]]['sentiment_not_normalized']
140
+            if s > 0:
141
+                contrast, color = udebg.get_weighted_color([0x00, 0xFF, 0x00], 0, 2, s)
142
+                e.attr['color'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, color)])
143
+            elif s < 0:
144
+                contrast, color = udebg.get_weighted_color([0xFF, 0x00, 0x00], 0, 2, -s)
145
+                e.attr['color'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, color)])
146
+
147
+        gv.layout(prog = 'dot', args='-Goverlap=false -Gnodesep=0.2 -Granksep=0.2  -Grankdir=BT -GK=800 -Gstart=17 -Gmaxiter=600')
148
+        gv.draw("%s.debt.png" % args.input_file, format = 'png')
149
+
150
+    def wia2021_draw_SDebT(self, args):
151
+        '''
152
+        Drawing wia2021 SDebT
153
+        '''
154
+        print('Drawing wia2021 SDebT...')
155
+        gv = networkx.nx_agraph.to_agraph(self.SDebT)
156
+        gv.node_attr['style'] = 'filled'
157
+        gv.node_attr['fixedsize'] = 'true'
158
+        gv.node_attr['width'] = '0.4'
159
+        gv.node_attr['height'] = '0.4'
160
+        gv.node_attr['fillcolor'] = '#0000FF'
161
+        gv.node_attr['fontcolor'] = '#FFFFFF'
162
+
163
+        for n in gv.nodes():
164
+            n.attr['label'] = str(self.SDebT.nodes[n]['chrono_id'])
165
+            side = self.SDebT.nodes[n]['side']
166
+            if side == 1:
167
+                n.attr['fontcolor'] = '#000000'
168
+                n.attr['fillcolor'] = '#4FCFFF' # light green = '#6FFF6F', cyan = '#4FCFFF'
169
+            else:
170
+                n.attr['fontcolor'] = '#FFFFFF'
171
+                n.attr['fillcolor'] = '#00007F' # light red = '#FF6F6F', dark blue = '#00007F'
172
+
173
+        gv.edge_attr['color'] = '#000000'
174
+        for e in gv.edges():
175
+            s = self.SDebT.nodes[e[0]]['sentiment_not_normalized']
176
+            if s > 0:
177
+                contrast, color = udebg.get_weighted_color([0x00, 0xFF, 0x00], 0, 2, s)
178
+                e.attr['color'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, color)])
179
+            elif s < 0:
180
+                contrast, color = udebg.get_weighted_color([0xFF, 0x00, 0x00], 0, 2, -s)
181
+                e.attr['color'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, color)])
182
+
183
+        gv.layout(prog = 'dot', args='-Goverlap=false -Gnodesep=0.2 -Granksep=0.2  -Grankdir=BT -GK=800 -Gstart=17 -Gmaxiter=600')
184
+        gv.draw("%s.sdebt.png" % args.input_file, format = 'png')
185
+
186
+    def ccia18_sentiment(self, normalized, sentiment_distribution, args):
187
+        '''
188
+        Computes the sentiment from a sentiment distribution of 5 values
189
+        Normalized: [very neg, neg, neutral, pos, very pos] --> [neg = -1, neutral = 0, pos = 1]
190
+        Not normalized: --> [-2, 2]
191
+        '''
192
+        sentiment_relevance = [-2, -1, 0, 1, 2]
193
+        res = [a * b for a, b in zip(sentiment_relevance, sentiment_distribution)]
194
+        res = sum(res)
195
+        if not normalized:
196
+            return res
197
+        if abs(res) > args.alpha:
198
+            if res > 0:
199
+                return 1 # Positive
200
+            else:
201
+                return -1 # Negative
202
+        else:
203
+            return 0 # Neutral
204
+
205
+    def get_node_color(self, base_color, w):
206
+        hw = 0xCF
207
+        if w >= self.max_weight:
208
+            hw = 0
209
+        elif self.max_weight > self.min_weight:
210
+            hw = int(hw * (float(self.max_weight - w) / float(self.max_weight - self.min_weight)))
211
+        color = [a | b for a, b in zip(base_color, [hw, hw, hw])]
212
+        return color
213
+
214
+    def ccia18_analysis(self, args):
215
+        '''
216
+        Weighted Bipartite Graph analysis
217
+        '''
218
+        print('Generating PDebT and WBDebG...')
219
+        # Copy DebT to PDebT
220
+        self.PDebT.add_nodes_from(self.DebT.nodes(data = True))
221
+        for e1, e2, ed in self.DebT.edges(data = True):
222
+            self.PDebT.add_edge(e1, e2, data = self.DebT[e1][e2]['data'])
223
+        # Initializations
224
+        self.PDebT.nodes[self.root_id]['bipartite_set'] = 1 # 1 in favor of root, -1 not in favor
225
+        self.PDebT.nodes[self.root_id]['sentiment'] = 1
226
+        self.DebT.nodes[self.root_id]['sentiment_not_normalized'] = 0
227
+        rg = self.PDebT.reverse() # Reverse graph (direction of edges reversed)
228
+        # DFS on PDebT before removing nodes to save DebT sentiment not normalized
229
+        list_edges = [e for e in self.PDebT.in_edges(self.root_id)]
230
+        self.PDebT.nodes[self.root_id]['depth'] = 1
231
+        max_depth = 1
232
+        while list_edges:
233
+            current_edge = list_edges.pop()
234
+            node_id = current_edge[0]
235
+            self.PDebT.nodes[node_id]['depth'] = self.PDebT.nodes[current_edge[1]]['depth'] + 1
236
+            if self.PDebT.nodes[node_id]['depth'] > max_depth:
237
+                max_depth = self.PDebT.nodes[node_id]['depth']
238
+            sentiment = self.ccia18_sentiment(False, ast.literal_eval(self.PDebT.nodes[node_id]['data'].get('sentiment_distribution')), args)
239
+            self.DebT.nodes[node_id]['sentiment_not_normalized'] = sentiment
240
+            list_edges.extend([e for e in self.PDebT.in_edges(node_id)])
241
+        self.PDebT.nodes[self.root_id]['max_depth'] = max_depth
242
+        # DFS and prune PDebT
243
+        list_edges = [e for e in self.PDebT.in_edges(self.root_id)]
244
+        while list_edges:
245
+            current_edge = list_edges.pop()
246
+            node_id = current_edge[0]
247
+            father_id = current_edge[1]
248
+            sentiment = self.ccia18_sentiment(True, ast.literal_eval(self.PDebT.nodes[node_id]['data'].get('sentiment_distribution')), args)
249
+            if sentiment == 1: # Positive
250
+                self.PDebT.nodes[node_id]['bipartite_set'] = self.PDebT.nodes[father_id]['bipartite_set']
251
+            elif sentiment == -1: # Negative
252
+                self.PDebT.nodes[node_id]['bipartite_set'] = -self.PDebT.nodes[father_id]['bipartite_set']
253
+            if sentiment == 0: # Neutral: remove subtree
254
+                st = networkx.algorithms.traversal.depth_first_search.dfs_tree(rg, node_id)
255
+                self.PDebT.remove_nodes_from(st.nodes())
256
+            else: # Not Neutral
257
+                self.PDebT.nodes[node_id]['sentiment'] = sentiment
258
+                list_edges.extend([e for e in self.PDebT.in_edges(node_id)])
259
+        # Create the WBDebG
260
+        self.WBDebG = self.PDebG_to_WBDebG(self.PDebT)
261
+
262
+    def PDebG_to_WBDebG(self, PDebT):
263
+        '''
264
+        Create the WBDebG from the PDebT
265
+        '''
266
+        WBDebG = networkx.DiGraph()
267
+        WBDebG.add_nodes_from(PDebT.nodes(data = True))
268
+        for e1, e2, ed in PDebT.edges(data = True):
269
+            if WBDebG.nodes[e1]['bipartite_set'] != WBDebG.nodes[e2]['bipartite_set']:
270
+                WBDebG.add_edge(e1, e2, data = PDebT[e1][e2]['data'])
271
+        return WBDebG
272
+
273
+    def WBDebG2xml(self, args):
274
+        '''
275
+        Saves self.WBDebG graph to xml file
276
+        '''
277
+        xml = ET.Element('entailment-corpus')
278
+        xml.append(ET.Comment(args2str(args)))
279
+        al_xml = ET.SubElement(xml, 'argument-list')
280
+        i = 1 # 0 for root
281
+        maxw = minw = scale_weight(int(self.WBDebG.nodes[self.root_id]['data'].get('score')), args)
282
+        for n_id, nd in self.WBDebG.nodes(data = True):
283
+            a = nd['data']
284
+            w = scale_weight(int(a.get('score')), args)
285
+            self.WBDebG.nodes[n_id]['weight'] = w
286
+            if w < minw:
287
+                minw = w
288
+            elif w > maxw:
289
+                maxw = w
290
+            a.set('weight', str(w))
291
+            a.set('bipartite_set', str(self.WBDebG.nodes[n_id]['bipartite_set']))
292
+            a.set('comment_id', a.get('id'))
293
+            if a.get('id') == self.root_id:
294
+                # Id 0 for root node
295
+                self.comment_id[a.get('id')] = '0'
296
+                a.set('id', '0')
297
+            else:
298
+                self.comment_id[a.get('id')] = str(i)
299
+                a.set('id', str(i))
300
+                i = i + 1
301
+            al_xml.append(a)
302
+        al_xml.set('minweight', str(minw))
303
+        al_xml.set('maxweight', str(maxw))
304
+        self.min_weight = minw
305
+        self.max_weight = maxw
306
+        xml.set('num_nodes', str(i))
307
+        ap_xml = ET.SubElement(xml, 'argument-pairs')
308
+        i = 0
309
+        for e1, e2, ed in self.WBDebG.edges(data = True):
310
+            p = ed['data']
311
+            p.set('entailment', 'ATTACKS')
312
+            t = p.find('t')
313
+            t.set('comment_id', t.get('id'))
314
+            t.set('id', self.WBDebG.nodes[t.get('id')]['data'].get('id'))
315
+            h = p.find('h')
316
+            h.set('comment_id', h.get('id'))
317
+            h.set('id', self.WBDebG.nodes[h.get('id')]['data'].get('id'))
318
+            ap_xml.append(p)
319
+            i = i + 1
320
+        xml.set('num_edges', str(i))
321
+        ET.ElementTree(xml).write("%s.wbg.xml" % args.input_file)
322
+
323
+    def WBDebG_to_xml(self, args, WBDebG, tmp_file_name):
324
+        '''
325
+        Saves a WBDebG graph to a xml file, it uses the information generated in WBDebG2xml
326
+        '''
327
+        xml = ET.Element('entailment-corpus')
328
+        xml.append(ET.Comment(args2str(args)))
329
+        al_xml = ET.SubElement(xml, 'argument-list')
330
+        maxw = minw = scale_weight(int(WBDebG.nodes[self.root_id]['data'].get('score')), args)
331
+        for n_id, nd in WBDebG.nodes(data = True):
332
+            a = nd['data']
333
+            w = scale_weight(int(a.get('score')), args)
334
+            WBDebG.nodes[n_id]['weight'] = w
335
+            if w < minw:
336
+                minw = w
337
+            elif w > maxw:
338
+                maxw = w
339
+            al_xml.append(a)
340
+        al_xml.set('minweight', str(minw))
341
+        al_xml.set('maxweight', str(maxw))
342
+        xml.set('num_nodes', str(WBDebG.number_of_nodes()))
343
+        ap_xml = ET.SubElement(xml, 'argument-pairs')
344
+        for e1, e2, ed in WBDebG.edges(data = True):
345
+            p = ed['data']
346
+            ap_xml.append(p)
347
+        xml.set('num_edges', str(WBDebG.number_of_edges()))
348
+        ET.ElementTree(xml).write('%s' % tmp_file_name)
349
+
350
+    def draw_ccia18_PCT(self, args):
351
+        '''
352
+        Drawing Polarized Comment Tree
353
+        '''
354
+        print('Drawing Polarized Comment Tree...')
355
+        gv = networkx.nx_agraph.to_agraph(self.PDebT)
356
+        gv.node_attr['style'] = 'filled'
357
+        gv.node_attr['fixedsize'] = 'true'
358
+        gv.node_attr['width'] = '0.4'
359
+        gv.node_attr['height'] = '0.4'
360
+        for n in gv.nodes():
361
+            node_id = int(self.PDebT.nodes[n]['data'].get('id'))
362
+            n.attr['label'] = str(node_id)
363
+            if self.PDebT.nodes[n]['sentiment'] == 1:
364
+                fillcolor = self.get_node_color([0x80, 0xFF, 0x40], self.WBDebG.nodes[n]['weight'])
365
+            else:
366
+                fillcolor = self.get_node_color([0x7F, 0x00, 0x7F], self.WBDebG.nodes[n]['weight'])
367
+            n.attr['fillcolor'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, fillcolor)])
368
+        gv.layout(prog = 'dot', args='-Goverlap=false -Gnodesep=0.2 -Granksep=0.2  -Grankdir=BT -GK=800 -Gstart=17 -Gmaxiter=600')
369
+        gv.draw("%s.pct.png" % args.input_file, format = 'png')
370
+
371
+    def draw_ccia18_WBG(self, args):
372
+        '''
373
+        Drawing Weighted Biparite Graph
374
+        '''
375
+        # TODO: Grouping nodes
376
+        # https://stackoverflow.com/questions/19674316/grouping-nodes-with-the-same-color-near-each-other-in-graphviz
377
+        if self.VAF_accepted:
378
+            print('Drawing Weighted Biparite Graph solution...')
379
+            output_file_name = '%s.wbg-sol.png' % args.input_file
380
+        else:
381
+            print('Drawing Weighted Biparite Graph...')
382
+            output_file_name = '%s.wbg.png' % args.input_file
383
+        gv = networkx.nx_agraph.to_agraph(self.WBDebG)
384
+        gv.node_attr['style'] = 'filled'
385
+        gv.node_attr['fixedsize'] = 'true'
386
+        gv.node_attr['width'] = '0.4'
387
+        gv.node_attr['height'] = '0.4'
388
+        #gv.edge_attr['color'] = '#FF8080'
389
+        for n in gv.nodes():
390
+            node_id = int(self.WBDebG.nodes[n]['data'].get('id'))
391
+            n.attr['label'] = str(node_id)
392
+            bordercolor = [0x00, 0x00, 0x00]
393
+            penwidth = 1
394
+            if self.WBDebG.nodes[n]['bipartite_set'] == 1:
395
+                fillcolor = self.get_node_color([0x00, 0x00, 0xFF], self.WBDebG.nodes[n]['weight'])
396
+            else:
397
+                fillcolor = self.get_node_color([0xFF, 0x00, 0x00], self.WBDebG.nodes[n]['weight'])
398
+            if self.VAF_accepted:
399
+                if node_id not in self.VAF_accepted:
400
+                    bordercolor = fillcolor
401
+                    penwidth = 3
402
+                    fillcolor = self.get_node_color([0x00, 0x00, 0x00], self.WBDebG.nodes[n]['weight'])
403
+            # fillcolor format '#RRGGBB', for example: fillcolor = '#FF8080'
404
+            n.attr['fillcolor'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, fillcolor)])
405
+            n.attr['color'] = '#%s' % ''.join([c[2:].zfill(2) for c in map(hex, bordercolor)])
406
+            n.attr['penwidth'] = penwidth
407
+        #for e in gv.edges():
408
+        #    e.attr['color'] = '#FF8080'
409
+        gv.layout(prog = 'dot', args='-Goverlap=false -Gnodesep=0.2 -Granksep=0.2  -Grankdir=BT -GK=800 -Gstart=17 -Gmaxiter=600')
410
+        gv.draw(output_file_name, format = 'png')
411
+
412
+    def ccia2018_stats_to_file(self, args):
413
+        '''
414
+        Write WBDebG statistics and information to file
415
+        '''
416
+        print('Writting statistics to file...')
417
+        num_nodes = self.WBDebG.number_of_nodes()
418
+        out_str = 'CCIA 2018 stats\n---------------\nInput file = %s\n' % args.input_file
419
+        out_str += 'Number of nodes = %i\n' % num_nodes
420
+        out_str += 'Number of edges = %i\n' % self.WBDebG.number_of_edges()
421
+        out_str += 'PDebT maximum depth = %i\n\n' % self.PDebT.nodes[self.root_id]['max_depth']
422
+        yes_fav = not_fav = 0 # Number of comments in favor and not in favor
423
+        yes_fav_sol = not_fav_sol = 0 # Number of comments in favor and not in favor in the solution
424
+        yes_fav_solw = not_fav_solw = 0 # Weight of comments in favor and not in favor in the solution
425
+        for n, nd in self.WBDebG.nodes(data = True):
426
+            node_id = int(self.WBDebG.nodes[n]['data'].get('id'))
427
+            if nd['bipartite_set'] == 1:
428
+                yes_fav += 1
429
+                if node_id in self.VAF_accepted:
430
+                    yes_fav_sol += 1
431
+                    yes_fav_solw += nd['weight']
432
+            else:
433
+                not_fav += 1
434
+                if node_id in self.VAF_accepted:
435
+                    not_fav_sol += 1
436
+                    not_fav_solw += nd['weight']
437
+        out_str += '#Nodes in favor = %i (%.2f%% of total)\n' % (yes_fav, 100.0 * yes_fav / num_nodes)
438
+        out_str += '#Nodes not in favor = %i (%.2f%% of total)\n\n' % (not_fav, 100.0 * not_fav / num_nodes)
439
+
440
+        out_str += '#Nodes in favor in solution = %i (%.2f%% of in favor)\n' % (yes_fav_sol, 100.0 * yes_fav_sol / yes_fav)
441
+        out_str += '#Nodes not in favor in solution = %i (%.2f%% of not in favor)\n' % (not_fav_sol, 100.0 * not_fav_sol / not_fav)
442
+        out_str += 'Percentage difference = %0.2f%%\n\n' % abs(100.0 * yes_fav_sol / yes_fav - 100.0 * not_fav_sol / not_fav)
443
+
444
+        out_str += 'Weight of solution, in favor - not in favor = %i - %i = %i\n' % (yes_fav_solw, not_fav_solw, yes_fav_solw - not_fav_solw)
445
+        out_str += 'Weight of solution normalized [-1, 1] = %0.2f\n' % (float(yes_fav_solw - not_fav_solw) / float(yes_fav_solw + not_fav_solw))
446
+
447
+        # More author based stats for CCIA 2019
448
+        if args.ccia2019_order:
449
+            authors = {}
450
+            for n, nd in self.WBDebG.nodes(data = True):
451
+                yes_fav_in_sol = 0
452
+                not_fav_in_sol = 0
453
+                a = self.WBDebG.nodes[n]['data'].get('author')
454
+                if a not in authors:
455
+                    authors[a] = {}
456
+                    authors[a]['num_nodes'] = 0 # Number of nodes
457
+                    authors[a]['num_nodes_yes_fav'] = 0 # Number of nodes in favor of root
458
+                    authors[a]['num_nodes_not_fav'] = 0 # Number of nodes not in favor of root
459
+                    authors[a]['num_nodes_in_sol'] = 0 # Number of nodes in solution
460
+                    authors[a]['num_nodes_not_in_sol'] = 0 # Number of nodes not in solution
461
+                    authors[a]['num_nodes_yes_fav_in_sol'] = 0 # Number of nodes in favor of root and in solution
462
+                    authors[a]['num_nodes_not_fav_in_sol'] = 0 # Number of nodes not in favor of root and in solution
463
+                    authors[a]['num_in_edges'] = 0 # Replies to this node
464
+                    authors[a]['num_in_edges_pos'] = 0 # Replies to this node that support it
465
+                    authors[a]['num_in_edges_neg'] = 0 # Replies to this node that not support it
466
+                    authors[a]['num_init_nodes'] = 0 # Number of nodes before alpha prunning
467
+                    authors[a]['sum_sentiment_radicality'] = 0 # sum_{c \in Comments} abs(sentiment_rel(c))
468
+                    authors[a]['scores'] = [] # List of scores of the author's comments
469
+
470
+                authors[a]['num_nodes'] += 1
471
+                authors[a]['scores'].append(int(self.WBDebG.nodes[n]['data'].get('score')))
472
+                if self.WBDebG.nodes[n]['bipartite_set'] == 1:
473
+                    authors[a]['num_nodes_yes_fav'] += 1
474
+                    yes_fav_in_sol += 1
475
+                else:
476
+                    authors[a]['num_nodes_not_fav'] += 1
477
+                    not_fav_in_sol += 1
478
+                node_id = int(self.WBDebG.nodes[n]['data'].get('id'))
479
+                if node_id in self.VAF_accepted:
480
+                    authors[a]['num_nodes_in_sol'] += 1
481
+                    yes_fav_in_sol += 1
482
+                    not_fav_in_sol += 1
483
+                else:
484
+                    authors[a]['num_nodes_not_in_sol'] += 1
485
+                if yes_fav_in_sol == 2:
486
+                    authors[a]['num_nodes_yes_fav_in_sol'] += 1
487
+                elif not_fav_in_sol == 2:
488
+                    authors[a]['num_nodes_not_fav_in_sol'] += 1
489
+
490
+                authors[a]['num_in_edges'] += self.PDebT.in_degree(n)
491
+                for e in self.PDebT.in_edges(n):
492
+                    if self.PDebT.nodes[e[0]]['bipartite_set'] == self.PDebT.nodes[e[1]]['bipartite_set']:
493
+                        authors[a]['num_in_edges_pos'] += 1
494
+                    else:
495
+                        authors[a]['num_in_edges_neg'] += 1
496
+
497
+            for n, nd in self.DebT.nodes(data = True):
498
+                a = self.DebT.nodes[n]['data'].get('author')
499
+                if a in authors:
500
+                    authors[a]['num_init_nodes'] += 1 # Counting nodes removed by alpha prune
501
+                    authors[a]['sum_sentiment_radicality'] += abs(self.DebT.nodes[n]['sentiment_not_normalized'])
502
+
503
+            out_str += self.get_stats_ccia2019(authors, args)
504
+
505
+        # Write to file
506
+        output_file_name = '%s.wbg-sol.info' % args.input_file
507
+        output_file = open(output_file_name, 'w')
508
+        output_file.write(out_str)
509
+        output_file.close()
510
+
511
+    def get_stats_ccia2019(self, authors, args):
512
+        '''
513
+        Get statistics based on the CCIA 2019 paper
514
+        '''
515
+        for a in authors:
516
+            # Radicality
517
+            authors[a]['radicality'] = float(authors[a]['sum_sentiment_radicality']) / authors[a]['num_init_nodes']
518
+            # Attention generator
519
+            authors[a]['attention_generator_pos'] = float(authors[a]['num_in_edges_pos']) / authors[a]['num_nodes']
520
+            authors[a]['attention_generator_neg'] = float(authors[a]['num_in_edges_neg']) / authors[a]['num_nodes']
521
+            # Author polarization --> [-1, 1]
522
+            if (authors[a]['num_nodes_yes_fav_in_sol'] + authors[a]['num_nodes_not_fav_in_sol']) > 0:
523
+                authors[a]['pol_sol'] = float(authors[a]['num_nodes_yes_fav_in_sol'] - authors[a]['num_nodes_not_fav_in_sol']) / (authors[a]['num_nodes_yes_fav_in_sol'] + authors[a]['num_nodes_not_fav_in_sol'])
524
+            else:
525
+                authors[a]['pol_sol'] = None
526
+            authors[a]['pol'] = float(authors[a]['num_nodes_yes_fav'] - authors[a]['num_nodes_not_fav']) / (authors[a]['num_nodes_yes_fav'] + authors[a]['num_nodes_not_fav'])
527
+            # max(|score|) for all author comments
528
+            authors[a]['max_abs_score'] = max(map(abs, authors[a]['scores']))
529
+            # sum(|score|) for all author comments
530
+            authors[a]['sum_abs_score'] = sum(map(abs, authors[a]['scores']))
531
+            # sum(score) for all author comments
532
+            authors[a]['sum_score'] = sum(authors[a]['scores'])
533
+            # number of author comments
534
+            authors[a]['num_comments'] = len(authors[a]['scores'])
535
+
536
+        out_str = 'CCIA 2019 stats\n---------------\n'
537
+        for ordering in args.ccia2019_order:
538
+            # Sort by authors relevance choice (ccia2019_order paramater)
539
+            sorted_authors = sorted(authors.items(), key = lambda a: a[1][ordering], reverse = True)
540
+            
541
+            out_str += 'Number of authors: %i\n' % len(authors)
542
+            out_str += 'Sorted by: %s\n' % ordering
543
+
544
+            # Output top X authors data
545
+            data = ['author', 'max_abs_score', 'sum_abs_score', 'sum_score', 'num_comments', 'radicality', 'att_gen_pos', 'att_gen_neg', 'polarization']
546
+            out_str += format_data(data)
547
+            for a in sorted_authors[:20]:
548
+                data = [a[0], a[1]['max_abs_score'], a[1]['sum_abs_score'], a[1]['sum_score'], a[1]['num_comments'], a[1]['radicality'], a[1]['attention_generator_pos'], a[1]['attention_generator_neg'], a[1]['pol']]
549
+                out_str += format_data(data)
550
+
551
+        return out_str
552
+
553
+    def prca2019_authors_relevance(self, args):
554
+        '''
555
+        Compute relevance(u) = sum_{c in Gamma | user(c) = u} W(score(c))
556
+        '''
557
+        print('  Computing authors relevance...')
558
+        authors = {}
559
+        for n, nd in self.PDebT.nodes(data = True):
560
+            a = self.PDebT.nodes[n]['data'].get('author')
561
+            if a not in authors:
562
+                authors[a] = {}
563
+                authors[a]['wscores'] = [] # List of scores of the author's comments
564
+            authors[a]['wscores'].append(scale_weight(int(self.PDebT.nodes[n]['data'].get('score')), args))
565
+        for a in authors:
566
+            authors[a]['sum_wscore'] = sum(authors[a]['wscores'])
567
+        return sorted(authors.items(), key = lambda a: a[1]['sum_wscore'], reverse = True)
568
+
569
+    def prca2019_remove_author(self, author, G):
570
+        '''
571
+        Return a copy of WBDebG with author removed (RDebT, Restricted DebT)
572
+        '''
573
+        # Copy graph
574
+        res_G = networkx.DiGraph()
575
+        res_G.add_nodes_from(G.nodes(data = True))
576
+        for e1, e2, ed in G.edges(data = True):
577
+            res_G.add_edge(e1, e2, data = G[e1][e2]['data'])
578
+        # Remove author
579
+        rg = res_G.reverse() # Reverse graph (direction of edges reversed)
580
+        for n, nd in res_G.nodes(data = True):
581
+            if nd['data'].get('author') == author:
582
+                if n in res_G and nd['data'].get('comment_id') != self.root_id: # Not already removed (appears in a previous subtree) and not root node
583
+                    # Get subtree of node n and remove it
584
+                    st = networkx.algorithms.traversal.depth_first_search.dfs_tree(rg, n)
585
+                    res_G.remove_nodes_from(st.nodes())
586
+        return res_G
587
+
588
+    def prca2019_get_stats(self, G):
589
+        '''
590
+        Get the stats needed of the authors' list from G
591
+        '''
592
+        G_stats = {}
593
+        G_stats['num_comments'] = G.number_of_nodes() # Number of comments
594
+        G_stats['list_comments_id'] = list(G.nodes()) # List of comments id (Reddit id)
595
+        G_stats['list_nodes_id'] = map(lambda x: int(self.comment_id[x]), G_stats['list_comments_id']) # List of nodes id [0, ...] (0 = root node)
596
+        G_stats['list_nodes_id_Cplus'] = [int(self.comment_id[n]) for n, nd in G.nodes(data = True) if nd['bipartite_set'] == 1] # List of nodes id in favor of root node
597
+        G_stats['list_nodes_id_Cminus'] = [int(self.comment_id[n]) for n, nd in G.nodes(data = True) if nd['bipartite_set'] == -1] # List of nodes id NOT in favor of root node
598
+        return G_stats
599
+
600
+    def prca2019_analysis(self, args):
601
+        '''
602
+        Perform PRL VSI PR&CA 2019 analysis
603
+        '''
604
+        print('Performing VSI PR&CA 2019 analysis...')
605
+        tmp_file_name = '/tmp/tmp-reddit-at-WBDebG.tmp.xml'
606
+        sorted_authors = self.prca2019_authors_relevance(args) # Most relevant authors
607
+        PDebT_stats = self.prca2019_get_stats(self.PDebT)
608
+        res_PDebT_stats = {}
609
+        res_VAF_accepted = {}
610
+        ai = 0
611
+        for a, ad in sorted_authors[:args.prca2019]:
612
+            print('  Analysing author "%s" (%i/%i)...' % (a, ai + 1, min(args.prca2019, len(sorted_authors))))
613
+            res_PDebT = self.prca2019_remove_author(a, self.PDebT)
614
+            res_PDebT_stats[a] = self.prca2019_get_stats(res_PDebT)
615
+            res_WBDebG = self.PDebG_to_WBDebG(res_PDebT)
616
+            self.WBDebG_to_xml(args, res_WBDebG, tmp_file_name)
617
+            res_VAF_accepted[a] = VAF_solver(args, tmp_file_name)
618
+            ai = ai + 1
619
+        self.prca2019_stats_to_file(sorted_authors, PDebT_stats, res_PDebT_stats, res_VAF_accepted, args)
620
+
621
+    def prca2019_stats_to_file(self, sorted_authors, PDebT_stats, res_PDebT_stats, res_VAF_accepted, args):
622
+        '''
623
+        Compute PRL VSI PR&CA 2019 stats and outputs them to file
624
+        '''
625
+        output_file_name = '%s.prca2019.info' % args.input_file
626
+        print('Writting statistics to file "%s"...' % output_file_name)
627
+        out_str = 'Input file: %s\n' % args.input_file
628
+        out_str += 'Number of authors: %i\n' % len(sorted_authors)
629
+        out_str += 'Number of comments: %i\n' % PDebT_stats['num_comments']
630
+        data = ['author', '#comments', 'relevance', 'engaging', 'influence', 'rebalancing', 'rebalancing2']
631
+        out_str += format_data(data)
632
+        # Data from initial graphs and solution with all users
633
+        Ca = frozenset(PDebT_stats['list_nodes_id'])
634
+        Cplus = frozenset(PDebT_stats['list_nodes_id_Cplus'])
635
+        Cminus = frozenset(PDebT_stats['list_nodes_id_Cminus'])
636
+        S = frozenset(self.VAF_accepted)
637
+        polS = (len(S & Cplus) - len(S & Cminus)) / float(len(S))
638
+        lengaging = []
639
+        linfluence = []
640
+        lrebalancing = []
641
+        lrebalancing2 = []
642
+        for a, ad in sorted_authors[:args.prca2019]:
643
+            # Data of the restricted (without user) graphs and solution
644
+            Cau = frozenset(res_PDebT_stats[a]['list_nodes_id'])
645
+            Cplusu = frozenset(res_PDebT_stats[a]['list_nodes_id_Cplus'])
646
+            Cminusu = frozenset(res_PDebT_stats[a]['list_nodes_id_Cminus'])
647
+            Su = frozenset(res_VAF_accepted[a])
648
+            polSu = (len(Su & Cplusu) - len(Su & Cminusu)) / float(len(Su))
649
+            polSCau = (len(S & Cplus & Cau) - len(S & Cminus & Cau)) / float(len(S & Cau))
650
+            # engaging(u) = #(Ca \ Cau) / #Ca, conversation remaining after removing user
651
+            engaging = len(Ca - Cau) / float(len(Ca))
652
+            lengaging.append(engaging)
653
+            # influence(u) = (#((Cau \ Su) \cap S) + #(Su \cap (Ca \ S))) / #Cau, (u moved to S + u moved outside S) / # u comments
654
+            influence = (len((Cau - Su) & S) + len(Su & (Ca - S))) / float(len(Cau))
655
+            linfluence.append(influence)
656
+            # rebalancing(u) = |polarization(S) - polarization(Su)|, polarization(S) = (#(S \cap C+) - #(S \cap C-)) / #S, absolute change in polarization after removing user
657
+            rebalancing = abs(polS - polSu)
658
+            lrebalancing.append(rebalancing)
659
+            # rebalancing2(u) = |polarization(S \cap Cu) - polarization(Su)|, polarization(S) = (#(S \cap C+) - #(S \cap C-)) / #S, absolute change in polarization after removing user
660
+            rebalancing2 = abs(polSCau - polSu)
661
+            lrebalancing2.append(rebalancing2)
662
+            # Add row to output string
663
+            data = [a, len(ad['wscores']), ad['sum_wscore'], engaging, influence, rebalancing, rebalancing2]
664
+            out_str += format_data(data)
665
+        data = ['Mean', '', '', sum(lengaging) / len(lengaging), sum(linfluence) / len(linfluence), sum(lrebalancing) / len(lrebalancing), sum(lrebalancing2) / len(lrebalancing2)]
666
+        out_str += format_data(data)
667
+        # Write to file
668
+        output_file = open(output_file_name, 'w')
669
+        output_file.write(out_str)
670
+        output_file.close()
671
+
672
+# Functions
673
+
674
+def args2str(args):
675
+    argstr = '=== Begin arguments ===\n'
676
+    argstr += 'Input file: %s\n' % args.input_file
677
+    argstr += 'Alpha parameter: %f\n' % args.alpha
678
+    argstr += 'Algorithm: %s\n' % args.algorithm
679
+    argstr += 'Log scale base: %i\n' % args.log_base
680
+    argstr += 'Socialarg git path: %s\n' % args.socialarg_path
681
+    argstr += 'Spark path: %s\n' % args.spark_path
682
+    argstr += 'Draw graphs: %s\n' % args.draw_graphs
683
+    argstr += 'Neutral comments: %s\n' % args.neutral_comments
684
+    argstr += 'CCIA2019 author ordering: %s\n' % args.ccia2019_order
685
+    argstr += 'VSI PR&CA 2019 analysis: %i\n' % args.prca2019
686
+    argstr += 'User-based analysis: %s\n' % args.user
687
+    argstr += 'User valuation: %s\n' % args.user_valuation
688
+    argstr += 'SCIP output: %s\n' % args.scip_output
689
+    argstr += 'Parameters: %s\n' % args.params
690
+    argstr += 'Random seed: %s\n' % args.seed
691
+    argstr += '=== End arguments ==='
692
+    return argstr
693
+
694
+def scale_weight(weight, args):
695
+    '''Scales the weight using a log function'''
696
+    if weight >= 1:
697
+        return int(math.floor(math.log(weight, args.log_base)) + 1)
698
+    else:
699
+        return 0
700
+
701
+def VAF_solver(args, input_file_name = None):
702
+    '''
703
+    Solves the discusion using the VAF solver and return the accepte nodes
704
+    '''
705
+    if input_file_name:
706
+        print('    Solving graph with VAF solver...')
707
+        input_file_name = input_file_name[:-4]
708
+    else:
709
+        print('Solving graph with VAF solver...')
710
+        input_file_name = '%s.wbg' % args.input_file
711
+    output_file_name = '/tmp/tmp-reddit-at.out'
712
+    # Check files
713
+    if os.path.isdir(os.path.expanduser(args.spark_path)):
714
+        args.spark_path = os.path.abspath(os.path.expanduser(args.spark_path))
715
+        spark_submit = '%s/bin/spark-submit' % args.spark_path
716
+        if not os.path.exists(spark_submit):
717
+            sys.exit('ERROR: spark-submit not found at "%s".' % spark_submit)
718
+    else:
719
+        sys.exit('ERROR: Spark folder not found "%s".' % args.spark_path)
720
+    if os.path.isdir(os.path.expanduser(args.socialarg_path)):
721
+        args.socialarg_path = os.path.abspath(os.path.expanduser(args.socialarg_path))
722
+        #analyzer_jar = '%s/distributed_social_network_analyzer/target/scala-2.11/social-network-analyzer_2.11-1.0.jar' % args.socialarg_path
723
+        analyzer_jar = '%s/distributed_social_network_analyzer/target/scala-2.12/social-network-analyzer_2.12-1.0.jar' % args.socialarg_path
724
+        if not os.path.exists(analyzer_jar):
725
+            sys.exit('ERROR: analyzer jar file not found at "%s".' % analyzer_jar)
726
+    else:
727
+        sys.exit('ERROR: socialarg git repo folder not found "%s".' % args.socialarg_path)
728
+    # Run solver
729
+    cmd = '(time %s --master local[4] --class "MainAppFromXML" %s %s) &> %s' % (spark_submit, analyzer_jar, input_file_name, output_file_name)
730
+    os.system(cmd)
731
+    # Parse output
732
+    accepted = []
733
+    try:
734
+        xml = ET.parse('%s-xml.sol' % input_file_name)
735
+        answer = xml.find('answer')
736
+        for a in answer.iter('arg'):
737
+            accepted.append(int(a.attrib['id']))
738
+    except:
739
+        sys.exit('ERROR: something happened while parsing solver output "%s-xml.sol".' % input_file_name)
740
+    return accepted
741
+
742
+def format_data(data):
743
+    '''
744
+    Format data list for an output of fixed column width
745
+    '''
746
+    width = [20, 9, 9, 9, 9, 11, 12] + [12] * len(data)
747
+    sep = '|'
748
+    eol = '\n'
749
+    out_str = ''
750
+    while data:
751
+        w = width.pop(0)
752
+        d = data.pop(0)
753
+        if isinstance(d, float) and d != 0:
754
+            data_str = '{:0.10f}'.format(d)
755
+        else:
756
+            data_str = str(d)
757
+        out_str += data_str[:w].ljust(w)
758
+        if data:
759
+            out_str += sep
760
+    out_str += eol
761
+    return out_str
762
+
763
+# Main
764
+
765
+if __name__ == '__main__' :
766
+    # Parse arguments
767
+    parser = argparse.ArgumentParser(description = 'Reddit Analysis Tool.')
768
+    # Optional arguments
769
+    parser.add_argument('-a', '--alpha', default = 0.5, type = float, help = 'Alpha parameter used as threshold for several functions (default: 0.5)', dest = 'alpha')
770
+    parser.add_argument('-al', '--algorithm', type = str, default = 'g0', help = 'Algorithm and parameters in case available, see docstring for more information (default: g0)', dest = 'algorithm')
771
+    parser.add_argument('--ccia2019_order', nargs = '+', type = str, choices = ['max_abs_score', 'sum_abs_score', 'sum_score', 'num_comments'], help = 'Author ordering for CCIA 2019 stats (default: max_abs_score)', dest = 'ccia2019_order')
772
+    parser.add_argument('-d', '--draw_graphs', action = 'store_true', default = False, help = 'Draws the grafs of all the steps of the analysis (default: False)', dest = 'draw_graphs')
773
+    parser.add_argument('-if', '--input_file', default = None, type = str, help = 'Input file name of the xml with the Reddit post information', dest = 'input_file')
774
+    parser.add_argument('-lb', '--log_base', default = 10, type = int, help = 'Logarithmic scale base for weighting (default: 10)', dest = 'log_base')
775
+    parser.add_argument('-nc', '--neutral_comments', nargs = '?', type = str, default = 'do_nothing', choices = ['do_nothing', 'remove_subtree', 'to_positive'], help = 'Neutral comments treatment (default: do_nothing)', dest = 'neutral_comments')
776
+    parser.add_argument('-p', '--params', default = None, type = str, help = 'Argument used to specify parameters for some functionalities', dest = 'params')
777
+    parser.add_argument('--prca2019', default = 0, type = int, help = 'PRL VSI in PR&CA 2019 analysis (default: 0)', dest = 'prca2019')
778
+    parser.add_argument('-s', '--seed', default = None, type = str, help = 'Seed to initialize random numbers (default: None)', dest = 'seed')
779
+    parser.add_argument('-so', '--scip_output', action = 'store_true', default = False, help = 'Outputs UDebG files in SCIP format to solve bipartition problem (default: False)', dest = 'scip_output')
780
+    parser.add_argument('--socialarg_path', default = '~/git/socialarg', type = str, help = 'Path to the socialarg git repo (default: ~/git/socialarg)', dest = 'socialarg_path')
781
+    parser.add_argument('-sp', '--spark_path', default = '~/.local/spark-2.2.1-bin-hadoop2.7', type = str, help = 'Spark path (default: ~/.local/spark-2.2.1-bin-hadoop2.7)', dest = 'spark_path')
782
+    parser.add_argument('-u', '--user', default = None, type = str, choices = ['mdai2020', 'wia2021'], help = 'User-based analysis (default: None)', dest = 'user')
783
+    parser.add_argument('-uv', '--user_valuation', default = None, type = str, choices = ['comment_karma', 'sum_scores'], help = 'User valuation for a VAF over UDebG (default: None)', dest = 'user_valuation')
784
+    args = parser.parse_args()
785
+    print(args2str(args))
786
+
787
+    # No input file defined
788
+    if not args.input_file:
789
+        # Generate random UDebG
790
+        udebg.UDebG(None, None, args)
791
+        exit()
792
+    # Read debate and create inital Graph
793
+    rg = RedditG(args.input_file)
794
+    # User-oriented analysis
795
+    if args.user:
796
+        if args.user == 'wia2021':
797
+            rg.wia2021_DebT(args)
798
+            rg.wia2021_SDebT(args)
799
+        udebg.UDebG(rg.SDebT, rg.root_id, args)
800
+        exit()
801
+    # Perform analysis (WBDebG)
802
+    rg.ccia18_analysis(args)
803
+    # Output results
804
+    rg.WBDebG2xml(args)
805
+    if args.draw_graphs:
806
+        rg.draw_ccia18_PCT(args)
807
+        rg.draw_ccia18_WBG(args)
808
+    # Compute solution using VAF solver
809
+    rg.VAF_accepted = VAF_solver(args)
810
+    # Output results with solution
811
+    if args.draw_graphs:
812
+        rg.draw_ccia18_WBG(args)
813
+    # Compute stats
814
+    if args.prca2019:
815
+        rg.prca2019_analysis(args)
816
+    else:
817
+        rg.ccia2018_stats_to_file(args)

Powered by TurnKey Linux.