|
1 | 1 | import csv |
2 | 2 | from collections import defaultdict, namedtuple, Counter |
| 3 | +from statistics import mean |
3 | 4 |
|
4 | | -MOVIE_DATA = 'movie_metadata.csv' |
| 5 | +MOVIE_DATA = 'days/04-06 collections/movie_metadata.csv' |
5 | 6 | NUM_TOP_DIRECTORS = 20 |
6 | 7 | MIN_MOVIES = 4 |
7 | 8 | MIN_YEAR = 1960 |
8 | 9 |
|
9 | | -Movie = namedtuple('Movie', 'director title year score') |
| 10 | +Movie = namedtuple('Movie', 'title year score') |
10 | 11 |
|
11 | 12 | def get_movies_by_director(data): |
12 | 13 | '''Extracts all movies from csv and stores them in a dictionary |
13 | 14 | where keys are directors, and values is a list of movies (named tuples)''' |
| 15 | + directors = defaultdict(list) |
14 | 16 | with open(data, newline='',encoding='utf8') as csvfile: |
15 | 17 | reader = csv.DictReader(csvfile) |
16 | | - headers = reader.fieldnames |
17 | | - keep = ['director_name', 'movie_title', 'title_year', 'imdb_rating'] |
18 | | - for row in reader if keep in headers: |
19 | | - print(row) |
20 | | - |
21 | | - return |
| 18 | + for row in reader: |
| 19 | + if int(row['title_year'].replace('','0')) >= MIN_MOVIES: |
| 20 | + directors[row['director_name']].append( |
| 21 | + Movie(row['movie_title'], row['title_year'], row['imdb_score']) |
| 22 | + ) |
| 23 | + return directors |
22 | 24 |
|
23 | 25 |
|
24 | 26 |
|
25 | | -def get_average_scores(movies): |
| 27 | +def get_average_scores(directors): |
26 | 28 | '''Filter directors with < MIN_MOVIES and calculate averge score''' |
| 29 | + # dictionary with tuples as keys, and lists as values, confusing AF |
| 30 | + directors = {(director, _calc_mean(movies)) : movies for director, movies in directors.items() if len(movies) > MIN_MOVIES} |
27 | 31 | return directors |
28 | 32 |
|
29 | | - |
30 | 33 | def _calc_mean(movies): |
31 | 34 | '''Helper method to calculate mean of list of Movie namedtuples''' |
32 | | - pass |
| 35 | + return round(mean([float(movie.score.replace('','0')) for movie in movies]),2) |
33 | 36 |
|
34 | 37 |
|
35 | | -def print_results(movies): |
| 38 | +def print_results(directors): |
36 | 39 | '''Print directors ordered by highest average rating. For each director |
37 | 40 | print his/her movies also ordered by highest rated movie. |
38 | 41 | See http://pybit.es/codechallenge13.html for example output''' |
39 | | - fmt_director_entry = '{counter}. {director:<52} {avg}' |
40 | | - fmt_movie_entry = '{year}] {title:<50} {score}' |
41 | | - sep_line = '-' * 60 |
42 | | - pass |
| 42 | + counter = 1 |
| 43 | + for (director, avg), movie in sorted(directors.items(), key = lambda x: x[0][1], reverse=True): |
| 44 | + fmt_director_entry = f'{counter}. {director:<52} {avg}' |
| 45 | + print(fmt_director_entry) |
| 46 | + for item in movie: |
| 47 | + fmt_movie_entry = f'{item.year}] {item.title:<50} {item.score}' |
| 48 | + print(fmt_movie_entry) |
| 49 | + sep_line = '-' * 60 |
| 50 | + print(sep_line) |
| 51 | + counter += 1 |
| 52 | + if counter == 21: |
| 53 | + break |
| 54 | + return |
43 | 55 |
|
44 | 56 |
|
45 | 57 | def main(): |
46 | | - |
47 | | - get_movies_by_director(MOVIE_DATA) |
| 58 | + print_results(get_average_scores(get_movies_by_director(MOVIE_DATA))) |
48 | 59 |
|
49 | 60 |
|
50 | 61 | if __name__ == '__main__': |
|
0 commit comments