|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 1, |
| 6 | + "metadata": {}, |
| 7 | + "outputs": [ |
| 8 | + { |
| 9 | + "ename": "FileNotFoundError", |
| 10 | + "evalue": "[Errno 2] No such file or directory: './data/movietweetings/movies.dat'", |
| 11 | + "output_type": "error", |
| 12 | + "traceback": [ |
| 13 | + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", |
| 14 | + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", |
| 15 | + "\u001b[1;32m<ipython-input-1-7efaf53b7077>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m movies = pd.read_csv('./data/movietweetings/movies.dat', delimiter='::',\n\u001b[1;32m----> 5\u001b[1;33m engine='python', header=None, names=['Movie ID', 'Movie Title', 'Genre'])\n\u001b[0m\u001b[0;32m 6\u001b[0m users = pd.read_csv('./data/movietweetings/users.dat', delimiter='::',\n\u001b[0;32m 7\u001b[0m engine='python', header=None, names=['User ID', 'Twitter ID'])\n", |
| 16 | + "\u001b[1;32mD:\\ProgramData\\conda3.05\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)\u001b[0m\n\u001b[0;32m 653\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[0;32m 654\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 655\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 656\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 657\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", |
| 17 | + "\u001b[1;32mD:\\ProgramData\\conda3.05\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 403\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 404\u001b[0m \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 405\u001b[1;33m \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 406\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 407\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", |
| 18 | + "\u001b[1;32mD:\\ProgramData\\conda3.05\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 762\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'has_index_names'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'has_index_names'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 763\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 764\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 765\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 766\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", |
| 19 | + "\u001b[1;32mD:\\ProgramData\\conda3.05\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m 993\u001b[0m ' \"c\", \"python\", or' ' \"python-fwf\")'.format(\n\u001b[0;32m 994\u001b[0m engine=engine))\n\u001b[1;32m--> 995\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mklass\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 996\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 997\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_failover_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", |
| 20 | + "\u001b[1;32mD:\\ProgramData\\conda3.05\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, **kwds)\u001b[0m\n\u001b[0;32m 1983\u001b[0m f, handles = _get_handle(f, mode, encoding=self.encoding,\n\u001b[0;32m 1984\u001b[0m \u001b[0mcompression\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompression\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1985\u001b[1;33m memory_map=self.memory_map)\n\u001b[0m\u001b[0;32m 1986\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhandles\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mhandles\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1987\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", |
| 21 | + "\u001b[1;32mD:\\ProgramData\\conda3.05\\lib\\site-packages\\pandas\\io\\common.py\u001b[0m in \u001b[0;36m_get_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text)\u001b[0m\n\u001b[0;32m 383\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mis_text\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 384\u001b[0m \u001b[1;31m# Python 3 and no explicit encoding\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 385\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath_or_buf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'replace'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 386\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 387\u001b[0m \u001b[1;31m# Python 3 and binary mode\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", |
| 22 | + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './data/movietweetings/movies.dat'" |
| 23 | + ] |
| 24 | + } |
| 25 | + ], |
| 26 | + "source": [ |
| 27 | + "import pandas as pd\n", |
| 28 | + "import numpy as np\n", |
| 29 | + "\n", |
| 30 | + "movies = pd.read_csv('./data/movietweetings/movies.dat', delimiter='::',\n", |
| 31 | + " engine='python', header=None, names=['Movie ID', 'Movie Title', 'Genre'])\n", |
| 32 | + "users = pd.read_csv('./data/movietweetings/users.dat', delimiter='::',\n", |
| 33 | + " engine='python', header=None, names=['User ID', 'Twitter ID'])\n", |
| 34 | + "ratings = pd.read_csv('./data/movietweetings/ratings.dat', delimiter='::', engine='python',\n", |
| 35 | + " header=None, names=['User ID', 'Movie ID', 'Rating', 'Rating Timestamp'])\n", |
| 36 | + "\n", |
| 37 | + "print(movies.head(10))\n", |
| 38 | + "mask = movies.Genre.str.contains('comedy', case=False, na=False)\n", |
| 39 | + "print(mask.head(10))\n", |
| 40 | + "comedy = movies[mask]\n", |
| 41 | + "comedy_ids = comedy['Movie ID']\n", |
| 42 | + "print(comedy_ids.head(10))\n", |
| 43 | + "\n", |
| 44 | + "combine = ratings.join(comedy, on='Movie ID', rsuffix='right')\n", |
| 45 | + "print(combine.head(50))\n", |
| 46 | + "result = combine[combine['Movie IDright'] != np.NaN]\n", |
| 47 | + "print(result)" |
| 48 | + ] |
| 49 | + }, |
| 50 | + { |
| 51 | + "cell_type": "code", |
| 52 | + "execution_count": null, |
| 53 | + "metadata": { |
| 54 | + "collapsed": true |
| 55 | + }, |
| 56 | + "outputs": [], |
| 57 | + "source": [] |
| 58 | + } |
| 59 | + ], |
| 60 | + "metadata": { |
| 61 | + "kernelspec": { |
| 62 | + "display_name": "Python 3", |
| 63 | + "language": "python", |
| 64 | + "name": "python3" |
| 65 | + }, |
| 66 | + "language_info": { |
| 67 | + "codemirror_mode": { |
| 68 | + "name": "ipython", |
| 69 | + "version": 3 |
| 70 | + }, |
| 71 | + "file_extension": ".py", |
| 72 | + "mimetype": "text/x-python", |
| 73 | + "name": "python", |
| 74 | + "nbconvert_exporter": "python", |
| 75 | + "pygments_lexer": "ipython3", |
| 76 | + "version": "3.6.3" |
| 77 | + } |
| 78 | + }, |
| 79 | + "nbformat": 4, |
| 80 | + "nbformat_minor": 2 |
| 81 | +} |
0 commit comments