1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93#!/usr/bin/env python
# reducer.py
import sys
# associates URLs with to the term frequency counts
frequencyCount = {}
# input comes from STDIN
for line in sys.stdin:
# remove leading and trailing whitespace
line = line.strip()
try:
# parse the input we got from mapper.py
# url, col1, col2 = line.split(',')
url, iphone, samsunggalaxy, sonyxperia, nokialumina, htcphone, ios, googleandroid, iphonecampos, samsungcampos, sonycampos, nokiacampos, htccampos, iphonecamneg, samsungcamneg, sonycamneg, nokiacamneg, htccamneg, iphonecamunc, samsungcamunc, sonycamunc, nokiacamunc, htccamunc, iphonedispos, samsungdispos, sonydispos, nokiadispos, htcdispos, iphonedisneg, samsungdisneg, sonydisneg, nokiadisneg, htcdisneg, iphonedisunc, samsungdisunc, sonydisunc, nokiadisunc, htcdisunc, iphoneperpos, samsungperpos, sonyperpos, nokiaperpos, htcperpos, iphoneperneg, samsungperneg, sonyperneg, nokiaperneg, htcperneg, iphoneperunc, samsungperunc, sonyperunc, nokiaperunc, htcperunc, iosperpos, googleperpos, iosperneg, googleperneg, iosperunc, googleperunc = line.split(',')
# convert columns (currently strings) to ints
iphone = int(iphone)
samsunggalaxy = int(samsunggalaxy)
sonyxperia = int(sonyxperia)
nokialumina = int(nokialumina)
htcphone = int(htcphone)
ios = int(ios)
googleandroid = int(googleandroid)
iphonecampos = int(iphonecampos)
samsungcampos = int(samsungcampos)
sonycampos = int(sonycampos)
nokiacampos = int(nokiacampos)
htccampos = int(htccampos)
iphonecamneg = int(iphonecamneg)
samsungcamneg = int(samsungcamneg)
sonycamneg = int(sonycamneg)
nokiacamneg = int(nokiacamneg)
htccamneg = int(htccamneg)
iphonecamunc = int(iphonecamunc)
samsungcamunc = int(samsungcamunc)
sonycamunc = int(sonycamunc)
nokiacamunc = int(nokiacamunc)
htccamunc = int(htccamunc)
iphonedispos = int(iphonedispos)
samsungdispos = int(samsungdispos)
sonydispos = int(sonydispos)
nokiadispos = int(nokiadispos)
htcdispos = int(htcdispos)
iphonedisneg = int(iphonedisneg)
samsungdisneg = int(samsungdisneg)
sonydisneg = int(sonydisneg)
nokiadisneg = int(nokiadisneg)
htcdisneg = int(htcdisneg)
iphonedisunc = int(iphonedisunc)
samsungdisunc = int(samsungdisunc)
sonydisunc = int(sonydisunc)
nokiadisunc = int(nokiadisunc)
htcdisunc = int(htcdisunc)
iphoneperpos = int(iphoneperpos)
samsungperpos = int(samsungperpos)
sonyperpos = int(sonyperpos)
nokiaperpos = int(nokiaperpos)
htcperpos = int(htcperpos)
iphoneperneg = int(iphoneperneg)
samsungperneg = int(samsungperneg)
sonyperneg = int(sonyperneg)
nokiaperneg = int(nokiaperneg)
htcperneg = int(htcperneg)
iphoneperunc = int(iphoneperunc)
samsungperunc = int(samsungperunc)
sonyperunc = int(sonyperunc)
nokiaperunc = int(nokiaperunc)
htcperunc = int(htcperunc)
iosperpos = int(iosperpos)
googleperpos = int(googleperpos)
iosperneg = int(iosperneg)
googleperneg = int(googleperneg)
iosperunc = int(iosperunc)
googleperunc = int(googleperunc)
columnTuple = (iphone, samsunggalaxy, sonyxperia, nokialumina, htcphone, ios, googleandroid, iphonecampos, samsungcampos, sonycampos, nokiacampos, htccampos, iphonecamneg, samsungcamneg, sonycamneg, nokiacamneg, htccamneg, iphonecamunc, samsungcamunc, sonycamunc, nokiacamunc, htccamunc, iphonedispos, samsungdispos, sonydispos, nokiadispos, htcdispos, iphonedisneg, samsungdisneg, sonydisneg, nokiadisneg, htcdisneg, iphonedisunc, samsungdisunc, sonydisunc, nokiadisunc, htcdisunc, iphoneperpos, samsungperpos, sonyperpos, nokiaperpos, htcperpos, iphoneperneg, samsungperneg, sonyperneg, nokiaperneg, htcperneg, iphoneperunc, samsungperunc, sonyperunc, nokiaperunc, htcperunc, iosperpos, googleperpos, iosperneg, googleperneg, iosperunc, googleperunc)
if url in frequencyCount:
frequencyCount[url] = tuple(sum(t) for t in zip(frequencyCount[url], columnTuple))
else:
frequencyCount[url] = columnTuple
except Exception as e:
continue
# write the tuples to stdout
# Note: they are unsorted
for url in frequencyCount.keys():
print '%s, %s' % (url, str(frequencyCount[url]).rstrip('\)').lstrip('\('))