1+ # -*- coding: utf-8 -*-
2+ """
3+ Created on Wed Dec 20 09:38:02 2017
4+
5+ @author: gualandi
6+ """
7+
8+ # Parse a file where for each row we have:
9+ # user id | age | gender | occupation | zip code
10+ # 1|24|M|technician|85711
11+ def ParseUsers (filename ):
12+ fh = open (filename , 'r' , encoding = "utf-8" )
13+ Rs = {}
14+ for line in fh :
15+ row = line .replace ('\n ' ,'' ).split ('|' )
16+ Rs [int (row [0 ])] = (int (row [1 ]), row [2 ], row [3 ], row [4 ])
17+
18+ return Rs
19+
20+ # user id | item id | rating | timestamp
21+ def ParseRatings (filename ):
22+ fh = open (filename , 'r' )
23+ Rs = {}
24+ for line in fh :
25+ row = line .replace ('\n ' ,'' ).split ('\t ' )
26+ user_id , item_id = int (row [0 ]), int (row [1 ])
27+ Rs [(user_id , item_id )] = int (row [2 ])
28+ return Rs
29+
30+ def PrintTop (Ds , top = 5 ):
31+ for key in sorted (Ds , key = Ds .get , reverse = True )[:top ]:
32+ print (key , Ds [key ])
33+
34+ # Support: compute average of a list of values
35+ def Mean (Ls ):
36+ return sum (Ls )/ len (Ls )
37+
38+ # In alternativa si può usare la libreria "statistics"
39+ # Link: https://docs.python.org/3/library/statistics.html
40+ # ===> from statistics import mean
41+
42+ # Esercizio 8.2: compute average of all rating
43+ def ComputeAverage (Ls ):
44+ return Mean (Ls .values ())
45+
46+ # Esercizio 8.3
47+ def ComputeItemAverage (Ls ):
48+ Is = {}
49+ for key in Ls :
50+ user_id , item_id = key # Unfolding
51+ Is [item_id ] = Is .get (item_id , []) + [Ls [key ]]
52+
53+ for item in Is :
54+ Is [item ] = Mean (Is [item ])#, len(Is[item])
55+
56+ return Is
57+
58+ # Esercizio 8.4
59+ def ComputeUserAverage (Ls ):
60+ Is = {}
61+ for key in Ls :
62+ user_id , item_id = key # Unfolding
63+ Is [user_id ] = Is .get (user_id , []) + [Ls [key ]]
64+
65+ for key in Is :
66+ Is [key ] = Mean (Is [key ])
67+
68+ return Is
69+
70+ # Esercizio 8.5
71+ def ComputeUserTypeAverage (Ls , Us ):
72+ Is = {}
73+ for key in Ls :
74+ user_id , item_id = key # Unfolding
75+ type_id = Us .get (user_id , 'none' )[2 ]
76+ Is [type_id ] = Is .get (type_id , []) + [Ls [key ]]
77+
78+ for key in Is :
79+ Is [key ] = Mean (Is [key ])
80+
81+ return Is
82+
83+ # Esercizio 8.6
84+ def Round (x ):
85+ return round (x , 0 )
86+
87+ def PredictAvg (TrainingSet , TestSet ):
88+ avg = ComputeAverage (TrainingSet )
89+ Ps = {}
90+ for key in TestSet :
91+ Ps [key ] = Round (avg )
92+ return Ps
93+
94+ # Esercizio 8.7
95+ from math import sqrt
96+ def RMSE (Yb , Y ):
97+ return sqrt (Mean (list (map (lambda k : (Yb [k ]- Y [k ])** 2 , Yb ))))
98+
99+ def nRMSE (Yb , Y ):
100+ return RMSE (Yb ,Y )/ 4
101+
102+ def RMSE2 (Yb , Y ):
103+ # Riusare metodi da libreria di Machine Learning
104+ # Link: http://scikit-learn.org/stable/
105+ from sklearn .metrics import mean_squared_error
106+ A = list (map (lambda k : Y [k ], sorted (Y )))
107+ B = list (map (lambda k : Yb [k ], sorted (Y )))
108+ return sqrt (mean_squared_error (A , B ))
109+
110+ def R2_Score (Yb , Y ):
111+ # Riusare metodi da libreria di Machine Learning
112+ # Link: http://scikit-learn.org/stable/
113+ from sklearn .metrics import r2_score
114+ A = list (map (lambda k : Y [k ], sorted (Y )))
115+ B = list (map (lambda k : Yb [k ], sorted (Y )))
116+ return r2_score (A , B )
117+
118+
119+ def MAE (Yb , Y ):
120+ # Riusare metodi da libreria di Machine Learning
121+ # Link: http://scikit-learn.org/stable/
122+ from sklearn .metrics import mean_absolute_error
123+ A = list (map (lambda k : Y [k ], sorted (Y )))
124+ B = list (map (lambda k : Yb [k ], sorted (Y )))
125+ return mean_absolute_error (A , B )
126+
127+ # Esercizio 8.9
128+ def PredictAvgItem (TrainingSet , TestSet ):
129+ avg = ComputeAverage (TrainingSet )
130+ As = ComputeItemAverage (TrainingSet )
131+ Ps = {}
132+ for key in TestSet :
133+ _ , item_id = key # Unfolding
134+ Ps [key ] = Round (As .get (item_id , avg ))
135+ return Ps
136+
137+ def PredictAvgUser (TrainingSet , TestSet ):
138+ avg = ComputeAverage (TrainingSet )
139+ As = ComputeUserAverage (TrainingSet )
140+ Ps = {}
141+ for key in TestSet :
142+ user_id , _ = key # Unfolding
143+ Ps [key ] = Round (As .get (user_id , avg ))
144+ return Ps
145+
146+ def PredictAvgCategory (TrainingSet , TestSet , Users ):
147+ avg = ComputeAverage (TrainingSet )
148+ As = ComputeUserTypeAverage (TrainingSet , Users )
149+ Ps = {}
150+ for key in TestSet :
151+ user_id , _ = key # Unfolding
152+ type_id = Users .get (user_id , 'none' )[2 ]
153+ Ps [key ] = Round (As .get (type_id , avg ))
154+ return Ps
155+
156+ def SingleTest (Users , n , Metric = RMSE ):
157+ TrainingSet = ParseRatings ('../data/u{}.base' .format (n ))
158+ TestSet = ParseRatings ('../data/u{}.test' .format (n ))
159+
160+ print ('Avg globale: ' , Metric (PredictAvg (TrainingSet , TestSet ), TestSet ))
161+ print ('Avg film: ' , Metric (PredictAvgItem (TrainingSet , TestSet ), TestSet ))
162+ print ('Avg utente: ' , Metric (PredictAvgUser (TrainingSet , TestSet ), TestSet ))
163+ print ('Avg cat user: ' , Metric (PredictAvgCategory (TrainingSet , TestSet , Users ), TestSet ))
164+ print ('Avg file/user:' , Metric (PredictAvgAvg (TrainingSet , TestSet ), TestSet ))
165+
166+
167+ def PredictAvgAvg (TrainingSet , TestSet ):
168+ avg = ComputeAverage (TrainingSet )
169+ Is = ComputeItemAverage (TrainingSet )
170+ Us = ComputeUserAverage (TrainingSet )
171+ Ps = {}
172+ for key in TestSet :
173+ user_id , item_id = key
174+ avg1 = Is .get (item_id , avg )
175+ avg2 = Us .get (user_id , avg )
176+ Ps [key ] = Round ((avg1 + avg2 )/ 2 )
177+ return Ps
178+
179+
180+ #-----------------------------------------------
181+ # MAIN function
182+ #-----------------------------------------------
183+ if __name__ == "__main__" :
184+ Users = ParseUsers ('../data/u.user' )
185+ if False :
186+ TrainingSet = ParseRatings ('../data/u1.base' )
187+ TestSet = ParseRatings ('../data/u1.test' )
188+ print (len (TrainingSet ), len (TestSet ))
189+
190+ print ('Compute Global Average {}' .format (ComputeAverage (TrainingSet )))
191+ Is = ComputeItemAverage (TrainingSet )
192+ PrintTop (Is )
193+
194+ Us = ComputeUserAverage (TrainingSet )
195+ PrintTop (Us )
196+
197+ Ts = ComputeUserTypeAverage (TrainingSet , Users )
198+ PrintTop (Ts , top = 100 )
199+
200+ # Naive prediction
201+ Pbar = PredictAvg (TrainingSet , TestSet )
202+
203+ # Esercizio 8.8
204+ print (RMSE (Pbar , TestSet ))
205+ # Esercizio 8.9
206+ print (RMSE (PredictAvgItem (TrainingSet , TestSet ), TestSet ))
207+ print (RMSE (PredictAvgUser (TrainingSet , TestSet ), TestSet ))
208+ print (RMSE (PredictAvgCategory (TrainingSet , TestSet , Users ), TestSet ))
209+ else :
210+ for n in range (1 ,6 ):
211+ print ('Test set: training=u{}.base, test=u{}.test:' .format (n , n ))
212+ SingleTest (Users , n , RMSE )
0 commit comments