{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'multiple lines of comments are being shown here'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#comments in Python\n", "'''multiple lines of comments are being shown here'''" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Important\n", "this is a markdown and not a code window\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "10" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "2+3+5" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "67" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "66-3-(-4)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "96" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "32*3" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "8" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "2**3" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "2^3" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "14.333333333333334" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "43/3" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "14" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "43//3" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "43%3" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import math as mt" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "7.38905609893065" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mt.exp(2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2.302585092994046" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mt.log(10)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2.718281828459045" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mt.exp(1)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "3.0" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mt.log(8,2)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "31.622776601683793" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mt.sqrt(1000)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "21.123150806638673" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.std([23,45,67,78])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['__doc__',\n", " '__loader__',\n", " '__name__',\n", " '__package__',\n", " '__spec__',\n", " 'acos',\n", " 'acosh',\n", " 'asin',\n", " 'asinh',\n", " 'atan',\n", " 'atan2',\n", " 'atanh',\n", " 'ceil',\n", " 'copysign',\n", " 'cos',\n", " 'cosh',\n", " 'degrees',\n", " 'e',\n", " 'erf',\n", " 'erfc',\n", " 'exp',\n", " 'expm1',\n", " 'fabs',\n", " 'factorial',\n", " 'floor',\n", " 'fmod',\n", " 'frexp',\n", " 'fsum',\n", " 'gamma',\n", " 'gcd',\n", " 'hypot',\n", " 'inf',\n", " 'isclose',\n", " 'isfinite',\n", " 'isinf',\n", " 'isnan',\n", " 'ldexp',\n", " 'lgamma',\n", " 'log',\n", " 'log10',\n", " 'log1p',\n", " 'log2',\n", " 'modf',\n", " 'nan',\n", " 'pi',\n", " 'pow',\n", " 'radians',\n", " 'sin',\n", " 'sinh',\n", " 'sqrt',\n", " 'tan',\n", " 'tanh',\n", " 'trunc']" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dir(mt)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "int" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(1)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "str" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(\"Ajay\")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "list" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type([23,45,67])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": true }, "outputs": [], "source": [ "a=[23,45,67]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(a)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "17.962924780409974" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.std(a)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "322.66666666666669" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.var(a)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1234567891234567766543210876543211" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "123456789123456789*9999999999999999" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [], "source": [ "np.random??" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from random import randrange,randint" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "78\n" ] } ], "source": [ "print(randint(0,90))" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "286" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "randrange(1000)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2472965195555081\n", "6352816454724336\n", "4809973335770632\n", "5246909950815852\n", "6348106781629098\n", "2586909203145681\n", "2509370301745813\n", "4082241628288070\n", "7691514263873286\n", "8069700113941950\n" ] } ], "source": [ "for x in range(0,10):\n", " print(randrange(10000000000000000))" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def mynewfunction(x,y):\n", " taxes=((x-1000000)*0.35+100000-min(y,100000))\n", " print(taxes)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "420000.0\n" ] } ], "source": [ "mynewfunction(2200000,300000)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os as os" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [], "source": [ "os??" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "6\n", "12\n", "18\n", "24\n" ] } ], "source": [ "for x in range(0,30,6):\n", " print(x)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def mynewfunction(x,y):\n", " z=x**3+3*x*y+20*y\n", " print(z)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "200\n", "596\n", "2288\n", "6572\n", "14744\n" ] } ], "source": [ "for x in range(0,30,6):\n", " mynewfunction(x,10)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os as os" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'C:\\\\Users\\\\Dell'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.getcwd()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['.bash_history',\n", " '.git',\n", " '.gitconfig',\n", " '.gitignore',\n", " '.idlerc',\n", " '.ipynb_checkpoints',\n", " '.ipython',\n", " '.jupyter',\n", " '.matplotlib',\n", " '.spyder-py3',\n", " '.ssh',\n", " '.VirtualBox',\n", " 'Anaconda3',\n", " 'AppData',\n", " 'Application Data',\n", " 'Contacts',\n", " 'Cookies',\n", " 'data munging again.ipynb',\n", " 'data wrangling titanic dataset.ipynb',\n", " 'Desktop',\n", " 'Documents',\n", " 'Downloads',\n", " 'Dropbox',\n", " 'Favorites',\n", " 'home',\n", " 'IntelGraphicsProfiles',\n", " 'introductory python.ipynb',\n", " 'Links',\n", " 'Local Settings',\n", " 'month_ridership.png',\n", " 'multiple file concat in pandas.ipynb',\n", " 'Music',\n", " 'My Documents',\n", " 'NetHood',\n", " 'new notebook.ipynb',\n", " 'nltk.ipynb',\n", " 'NTUSER.DAT',\n", " 'ntuser.dat.LOG1',\n", " 'ntuser.dat.LOG2',\n", " 'NTUSER.DAT{016888bd-6c6f-11de-8d1d-001e0bcde3ec}.TM.blf',\n", " 'NTUSER.DAT{016888bd-6c6f-11de-8d1d-001e0bcde3ec}.TMContainer00000000000000000001.regtrans-ms',\n", " 'NTUSER.DAT{016888bd-6c6f-11de-8d1d-001e0bcde3ec}.TMContainer00000000000000000002.regtrans-ms',\n", " 'ntuser.ini',\n", " 'pandas 11.ipynb',\n", " 'pandas analysis 1.ipynb',\n", " 'pandas data manipulation.ipynb',\n", " 'Pictures',\n", " 'PrintHood',\n", " 'Rdatasets',\n", " 'Recent',\n", " 'rforanalytics',\n", " 'Saved Games',\n", " 'Searches',\n", " 'SendTo',\n", " 'Start Menu',\n", " 'Templates',\n", " 'test web scraping.ipynb',\n", " 'time series.ipynb',\n", " 'Untitled.ipynb',\n", " 'untitled.txt',\n", " 'Untitled1.ipynb',\n", " 'untitled1.txt',\n", " 'Untitled2.ipynb',\n", " 'Untitled3.ipynb',\n", " 'Untitled4.ipynb',\n", " 'Untitled5.ipynb',\n", " 'Videos',\n", " 'VirtualBox VMs',\n", " 'Web Scraping Yelp with Beautiful Soup.ipynb']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.listdir()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "os.chdir('C:\\\\Users\\\\Dell')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mystring='Hello World'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'Hello World'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mystring" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'e'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mystring[1]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'H'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mystring[0]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hello World\n" ] } ], "source": [ "print(mystring)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "str" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(mystring)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "11" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(mystring)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "newstring2='Aye aye me heartie\\'s'" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "newstring3=\"Aye aye me heartie's\"" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\"Aye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie'sAye aye me heartie's\"" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "10*newstring3" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ne1= \"'Ajay','Vijay','Anita','Ankit'\"" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "str" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(ne1)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\"'Ajay','Vijay','Anita','Ankit'\"" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "str(ne1)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'A'" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ne1[1]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ne2= ['Ajay','Vijay','Anita','Ankit']" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\"['Ajay', 'Vijay', 'Anita', 'Ankit']\"" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "str(ne2)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'Vijay'" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ne2[1]" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true }, "outputs": [], "source": [ "myname1='Ajay'\n", "myname2='John'" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": true }, "outputs": [], "source": [ "message= \"Hi I am %s howdy\"" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'Hi I am Ajay howdy'" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "message %myname1\n" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'Hi I am John howdy'" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "message %myname2\n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['Ajay', 'Vijay', 'Anita', 'Ankit']" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ne2" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ne2.append('Anna')" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['Ajay', 'Vijay', 'Anita', 'Ankit', 'Anna']" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ne2" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": true }, "outputs": [], "source": [ "del ne2[0]" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['Vijay', 'Anita', 'Ankit', 'Anna']" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ne2" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ne3=('Sachin','Dhoni','Gavaskar','Kapil')" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['__add__',\n", " '__class__',\n", " '__contains__',\n", " '__delattr__',\n", " '__dir__',\n", " '__doc__',\n", " '__eq__',\n", " '__format__',\n", " '__ge__',\n", " '__getattribute__',\n", " '__getitem__',\n", " '__getnewargs__',\n", " '__gt__',\n", " '__hash__',\n", " '__init__',\n", " '__iter__',\n", " '__le__',\n", " '__len__',\n", " '__lt__',\n", " '__mul__',\n", " '__ne__',\n", " '__new__',\n", " '__reduce__',\n", " '__reduce_ex__',\n", " '__repr__',\n", " '__rmul__',\n", " '__setattr__',\n", " '__sizeof__',\n", " '__str__',\n", " '__subclasshook__',\n", " 'count',\n", " 'index']" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dir(ne3)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": true }, "outputs": [], "source": [ "favourite_movie=['micky mouse,steamboat willie', 'vijay,slumdog millionaire', 'john,passion of christ', 'donald,arthur']\n", "\n" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "list" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(favourite_movie)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [], "source": [ "favourite_movie2={'micky mouse:steamboat willie', 'vijay:slumdog millionaire', 'john:passion of christ', 'donald:arthur'}\n" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "set" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(favourite_movie2)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": true }, "outputs": [], "source": [ "favourite_movie3={'micky mouse':'steamboat willie', 'vijay':'slumdog millionaire', 'john':'passion of christ', 'donald':'arthur'}\n" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dict" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(favourite_movie3)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'steamboat willie'" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "favourite_movie3['micky mouse']\n", "\n" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import re" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": true }, "outputs": [], "source": [ "names =[\"Anna\", \"Anne\", \"Annaporna\",\"Shubham\",\"Aruna\"]" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<_sre.SRE_Match object; span=(0, 2), match='An'>\n", "<_sre.SRE_Match object; span=(0, 2), match='An'>\n", "<_sre.SRE_Match object; span=(0, 2), match='An'>\n", "None\n", "None\n" ] } ], "source": [ "for name in names:\n", " print(re.search(r'(An)',name))" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<_sre.SRE_Match object; span=(0, 1), match='A'>\n", "<_sre.SRE_Match object; span=(0, 1), match='A'>\n", "<_sre.SRE_Match object; span=(0, 1), match='A'>\n", "None\n", "<_sre.SRE_Match object; span=(0, 1), match='A'>\n" ] } ], "source": [ "for name in names:\n", " print(re.search(r'(A)',name))" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<_sre.SRE_Match object; span=(3, 4), match='a'>\n", "None\n", "<_sre.SRE_Match object; span=(3, 4), match='a'>\n", "<_sre.SRE_Match object; span=(5, 6), match='a'>\n", "<_sre.SRE_Match object; span=(4, 5), match='a'>\n" ] } ], "source": [ "for name in names:\n", " print(re.search(r'(a)',name))" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "True\n", "False\n", "True\n", "True\n", "True\n" ] } ], "source": [ "for name in names:\n", " print(bool(re.search(r'(a)',name)))" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": true }, "outputs": [], "source": [ "numlist=[\"$10000\",\"$20,000\",\"30,000\",40000,\"50000 \"] \n" ] }, { "cell_type": "code", "execution_count": 75, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "$10000\n", "1\n", "$20,000\n", "2\n", "30,000\n", "3\n", "40000\n", "4\n", "50000 \n" ] } ], "source": [ "for i,value in enumerate(numlist):\n", " print(i) \n", " print(value)" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": true }, "outputs": [], "source": [ "for i,value in enumerate(numlist):\n", " \n", " numlist[i]=re.sub(r\"([$,])\",\"\",str(value))\n", " numlist[i]=int(numlist[i])" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[10000, 20000, 30000, 40000, 50000]" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "numlist" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "30000.0" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(numlist)" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from datetime import datetime" ] }, { "cell_type": "code", "execution_count": 82, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "datetime.datetime(2017, 4, 15, 14, 35, 5, 932765)" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "datetime.now()" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "collapsed": true }, "outputs": [], "source": [ "date_obj=datetime.strptime(\"15/August/2007\",\"%d/%B/%Y\")" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "datetime.datetime(2007, 8, 15, 0, 0)" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "date_obj" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "collapsed": false }, "outputs": [], "source": [ "a=date_obj-datetime.now()" ] }, { "cell_type": "code", "execution_count": 86, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "-3532" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a.days" ] }, { "cell_type": "code", "execution_count": 88, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "33861" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a.seconds" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'C:\\\\Users\\\\Dell'" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.getcwd()" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'commit_hash': '5c9c918',\n", " 'commit_source': 'installation',\n", " 'default_encoding': 'cp1252',\n", " 'ipython_path': 'C:\\\\Users\\\\Dell\\\\Anaconda3\\\\lib\\\\site-packages\\\\IPython',\n", " 'ipython_version': '5.1.0',\n", " 'os_name': 'nt',\n", " 'platform': 'Windows-7-6.1.7600-SP0',\n", " 'sys_executable': 'C:\\\\Users\\\\Dell\\\\Anaconda3\\\\python.exe',\n", " 'sys_platform': 'win32',\n", " 'sys_version': '3.5.2 |Anaconda custom (64-bit)| (default, Jul 5 2016, '\n", " '11:41:13) [MSC v.1900 64 bit (AMD64)]'}\n" ] } ], "source": [ "import IPython \n", "print (IPython.sys_info())\n" ] }, { "cell_type": "code", "execution_count": 94, "metadata": { "collapsed": false }, "outputs": [ { "data": { "application/json": { "Software versions": [ { "module": "Python", "version": "3.5.2 64bit [MSC v.1900 64 bit (AMD64)]" }, { "module": "IPython", "version": "5.1.0" }, { "module": "OS", "version": "Windows 7 6.1.7600 SP0" } ] }, "text/html": [ "
SoftwareVersion
Python3.5.2 64bit [MSC v.1900 64 bit (AMD64)]
IPython5.1.0
OSWindows 7 6.1.7600 SP0
Sat Apr 15 14:49:54 2017 India Standard Time
" ], "text/latex": [ "\\begin{tabular}{|l|l|}\\hline\n", "{\\bf Software} & {\\bf Version} \\\\ \\hline\\hline\n", "Python & 3.5.2 64bit [MSC v.1900 64 bit (AMD64)] \\\\ \\hline\n", "IPython & 5.1.0 \\\\ \\hline\n", "OS & Windows 7 6.1.7600 SP0 \\\\ \\hline\n", "\\hline \\multicolumn{2}{|l|}{Sat Apr 15 14:49:54 2017 India Standard Time} \\\\ \\hline\n", "\\end{tabular}\n" ], "text/plain": [ "Software versions\n", "Python 3.5.2 64bit [MSC v.1900 64 bit (AMD64)]\n", "IPython 5.1.0\n", "OS Windows 7 6.1.7600 SP0\n", "Sat Apr 15 14:49:54 2017 India Standard Time" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%load_ext version_information\n", "%version_information " ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "collapsed": true }, "outputs": [], "source": [ "os.chdir('C:\\\\Users\\\\Dell\\\\Downloads')" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['140749_2017.pdf',\n", " '2011-F01-0700-Rev4-MDDS.XLSX',\n", " '20150817143155.pdf',\n", " '20160111060911.pdf',\n", " '20170214052225.pdf',\n", " '7z1604-x64.exe',\n", " '7z1604.exe',\n", " '861415_10151432783238421_2124270505_o (1).jpg',\n", " '861415_10151432783238421_2124270505_o.jpg',\n", " 'AirPassengers.csv',\n", " 'ajayo.jpg',\n", " 'Alison Python Invoice - Sheet1.pdf',\n", " 'Alison SAS Invoice - Sheet1.pdf',\n", " 'All+CSV+Files+in+a+Folder.ipynb',\n", " 'Allison Interview Jones Invoice - Sheet1.pdf',\n", " 'Anaconda3-4.2.0-Windows-x86_64.exe',\n", " 'apachehttpd.exe',\n", " 'April invoice adaptive analytics - Sheet1.pdf',\n", " 'Assignment14_BusinessAnalytics (1).docx',\n", " 'Assignment14_BusinessAnalytics.docx',\n", " 'Assignment15_BusinessAnalytics.docx',\n", " 'Assignment16_BusinessAnalytics (1).docx',\n", " 'Assignment16_BusinessAnalytics (2).docx',\n", " 'Assignment16_BusinessAnalytics.docx',\n", " 'aug ust 2008.JPG',\n", " 'avast_free_antivirus_setup_online.exe',\n", " 'avinash_ltv.zip',\n", " 'BigDiamonds.csv',\n", " 'BigDiamonds.csv (1).zip',\n", " 'BigDiamonds.csv (2)',\n", " 'BigDiamonds.csv (2).zip',\n", " 'BigDiamonds.csv (3).zip',\n", " 'BigDiamonds.csv.zip',\n", " 'Boston (1).csv',\n", " 'Boston.csv',\n", " 'CAM- Ajay Ohri (1).pdf',\n", " 'CAM- Ajay Ohri.pdf',\n", " 'camtasia.exe',\n", " 'ccFraud.csv',\n", " 'Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf',\n", " 'CHAP1-6PythonforRUsersAnapproachforDataScience.docx',\n", " 'chapter+3+_+spark.html',\n", " 'chi+square+test.ipynb',\n", " 'chromeinstall-8u111.exe',\n", " 'Cisco_WebEx_Add-On.exe',\n", " 'class2.csv',\n", " 'Collabera Invoice (1).pdf',\n", " 'Collabera Invoice.pdf',\n", " 'Collectcent Invoice.pdf',\n", " 'college degrees.pdf',\n", " 'DAP 1.pdf',\n", " 'DAP 1.pptx',\n", " 'DAP 6 RDBMS and SQL.pdf',\n", " 'DAP 6 RDBMS and SQL.pptx',\n", " 'Data Analysis (1).7z',\n", " 'Data Analysis (1).rar',\n", " 'Data Analysis.rar',\n", " 'Data Viz.pptx',\n", " 'data+exploration.ipynb',\n", " 'data+manipulation.ipynb',\n", " 'data+munging+again.ipynb',\n", " 'data+wrangling+titanic+dataset.ipynb',\n", " 'data1.csv',\n", " 'datasets.csv',\n", " 'Decision Trees.pdf',\n", " 'DecisionStatsOfferLetter.docx',\n", " 'DecisionStatsRelievingLetter.docx',\n", " 'descriptive+stats+in+Python.ipynb',\n", " 'desktop.ini',\n", " 'Diamond (1).csv',\n", " 'Diamond (2).csv',\n", " 'Diamond (3).csv',\n", " 'Diamond (4).csv',\n", " 'Diamond (5).csv',\n", " 'Diamond (6).csv',\n", " 'Diamond (7).csv',\n", " 'Diamond (8).csv',\n", " 'Diamond.csv',\n", " 'DropboxInstaller.exe',\n", " 'edb_npgsql.exe',\n", " 'edb_pgjdbc.exe',\n", " 'edb_psqlodbc.exe',\n", " 'edb_psqlodbc.exe-20170203172812',\n", " 'edb_psqlodbc.exe-20170307203617',\n", " 'final invoice edureka - Sheet1.pdf',\n", " 'FinalPythonforRUsersAnapproachforDataScience (1).docx',\n", " 'FinalPythonforRUsersAnapproachforDataScience (2).docx',\n", " 'FinalPythonforRUsersAnapproachforDataScience (3).docx',\n", " 'FinalPythonforRUsersAnapproachforDataScience (4).docx',\n", " 'FinalPythonforRUsersAnapproachforDataScience.docx',\n", " 'final_webinar (1).pdf',\n", " 'final_webinar.pdf',\n", " 'Git-2.11.0-64-bit.exe',\n", " 'Git-2.12.0-64-bit.exe',\n", " 'GitHubSetup (1).exe',\n", " 'GitHubSetup (2).exe',\n", " 'GitHubSetup.exe',\n", " 'GOMAUDIOGLOBALSETUP.EXE',\n", " 'Hdma.csv',\n", " 'Hedonic.csv',\n", " 'HP Downloads',\n", " 'HPSupportSolutionsFramework-12.5.32.203.exe',\n", " 'image.png',\n", " 'IMS PROSCHOOL Workshop.pptx.pdf',\n", " 'IMS PROSCHOOL Workshop.pptx.pptx',\n", " 'internship.docx',\n", " 'Introduction to SAS (1).pdf',\n", " 'Introduction to SAS Part 1 (1).pdf',\n", " 'Introduction to SAS Part 1.pdf',\n", " 'Introduction to SAS.pdf',\n", " 'Invoice for Digital Vidya.pdf',\n", " 'Invoice for Weekendr.pdf',\n", " 'Invoice format - Ajay Ohri CONTATA (1).xls',\n", " 'Invoice format - Ajay Ohri CONTATA.xls',\n", " 'invoice rapid miner.pdf',\n", " 'Invoice trafla format.docx',\n", " 'iris2 (1).ipynb',\n", " 'iris2 (2).ipynb',\n", " 'iris2.ipynb',\n", " 'January invoice Indicus .pdf',\n", " 'June AV Invoice - Sheet1.pdf',\n", " 'Lecture 6 - KNN & Naive Bayes.ppt',\n", " 'Local Disk (C) - Shortcut.lnk',\n", " 'logistic regression - script for ppt.R',\n", " 'logistic_regression_-_script_for_ppt.html',\n", " 'March invoice Indicus - Sheet1.pdf',\n", " 'mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi',\n", " 'mongodb-win32-x86_64-3.4.2-signed.msi',\n", " 'mortDefault',\n", " 'mortDefault.zip',\n", " 'mtcarslm.R',\n", " 'multiple+file+concat+in+pandas (1).ipynb',\n", " 'multiple+file+concat+in+pandas.ipynb',\n", " 'my+first+class+in+python.ipynb',\n", " 'nltk.ipynb',\n", " 'notebook-Copy1.html',\n", " 'Offer Letter - Ajay Ohri (1).pdf',\n", " 'Offer Letter - Ajay Ohri.pdf',\n", " 'Other Data Mining Methods (1).pdf',\n", " 'Other Data Mining Methods.pdf',\n", " 'output1 (1).xls',\n", " 'output1 (2).xls',\n", " 'output1.xls',\n", " 'pandas+11.ipynb',\n", " 'pandas+analysis+1.ipynb',\n", " 'pandas+data+manipulation.ipynb',\n", " 'passport image.pdf',\n", " 'Pawconinvoice2016.pdf',\n", " 'Pawconinvoice2017 (1).pdf',\n", " 'Pawconinvoice2017 (2).pdf',\n", " 'Pawconinvoice2017 (3).pdf',\n", " 'Pawconinvoice2017.pdf',\n", " 'Payslip Feb 2016 - Sheet1.pdf',\n", " 'Payslip Feb 2016.pdf',\n", " 'Payslip Format Decisionstats - Sheet1.pdf',\n", " 'Payslip Jan 2016 - Sheet1.pdf',\n", " 'Payslip Jan 2016.pdf',\n", " 'Payslip March 2016 - Sheet1.pdf',\n", " 'Payslip March 2016.pdf',\n", " 'pgd.csv',\n", " 'postgresql-9.6.1-1-windows-x64.exe',\n", " 'Program 1-results.rtf',\n", " 'protein.csv',\n", " 'python+with+postgres (1).ipynb',\n", " 'python+with+postgres.ipynb',\n", " 'Python.docx',\n", " 'R-3.3.2-win.exe',\n", " 'R-3.3.3-win.exe',\n", " 'RCertificationExam.pdf',\n", " 'reg+model.ipynb',\n", " 'Revision - Business Analytics (1).pdf',\n", " 'Revision - Business Analytics.pdf',\n", " 'RidingMowers.csv',\n", " 'rsconnect',\n", " 'RStudio-1.0.136.exe',\n", " 'Salary Slip, Feb 2016.pdf',\n", " 'Salary Slip, Jan 2016.pdf',\n", " 'Salary Slip, March 2016 (1).pdf',\n", " 'Salary Slip, March 2016 (2).pdf',\n", " 'Salary Slip, March 2016.pdf',\n", " 'sales-of-shampoo-over-a-three-ye.csv',\n", " 'SAS part 2.pdf',\n", " 'SAS Part 3.pdf',\n", " 'sas-university-edition-107140.pdf',\n", " 'Scan0095.pdf',\n", " 'Scanned Invoice for Collabera.pdf',\n", " 'Screenshot 2017-01-23 12.36.55.png',\n", " 'September invoice adaptive analytics - Sheet1.pdf',\n", " 'Sollers January.pdf',\n", " 'sqlalchemy.ipynb',\n", " 'stackoverflow-dump-analysis.html',\n", " 'Sunstone.pdf',\n", " 'Tableau.pdf',\n", " 'TableauPublicDesktop-64bit-10-1-3.exe',\n", " 'TableauPublicDesktop-64bit-10-1-4.exe',\n", " 'telecom.csv',\n", " 'TelecomServiceProviderCaseStudy.pdf',\n", " 'test+web+scraping.ipynb',\n", " 'Text Mining (1).pdf',\n", " 'Text Mining.pdf',\n", " 'third.sas7bdat',\n", " 'Time Series Forecasting (1).pdf',\n", " 'Time Series Forecasting.pdf',\n", " 'ts.html',\n", " 'ts.R',\n", " 'Unconfirmed 373974.crdownload',\n", " 'Unconfirmed 376991.crdownload',\n", " 'Unconfirmed 950045.crdownload',\n", " 'VirtualBox-5.1.8-111374-Win (1).exe',\n", " 'VirtualBox-5.1.8-111374-Win.exe',\n", " 'Web+Scraping+Yelp+with+Beautiful+Soup.ipynb',\n", " 'Webinar for Business Analytics.pdf',\n", " 'WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg',\n", " 'WhatsApp Image 2017-02-18 at 08.42.55.jpeg']" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.listdir()" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import glob as glob" ] }, { "cell_type": "code", "execution_count": 100, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['AirPassengers.csv', 'BigDiamonds.csv', 'Boston (1).csv', 'Boston.csv', 'ccFraud.csv', 'class2.csv', 'data1.csv', 'datasets.csv', 'Diamond (1).csv', 'Diamond (2).csv', 'Diamond (3).csv', 'Diamond (4).csv', 'Diamond (5).csv', 'Diamond (6).csv', 'Diamond (7).csv', 'Diamond (8).csv', 'Diamond.csv', 'Hdma.csv', 'Hedonic.csv', 'pgd.csv', 'protein.csv', 'RidingMowers.csv', 'sales-of-shampoo-over-a-three-ye.csv', 'telecom.csv']\n" ] } ], "source": [ "path = os.getcwd()\n", "extension = 'csv'\n", "os.chdir(path)\n", "result = [i for i in glob.glob('*.{}'.format(extension))]\n", "print(result)\n" ] }, { "cell_type": "code", "execution_count": 95, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 97, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fraud=pd.read_csv('ccFraud.csv')" ] }, { "cell_type": "code", "execution_count": 104, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mtcars=pd.read_csv(\"https://vincentarelbundock.github.io/Rdatasets/csv/datasets/mtcars.csv\")" ] }, { "cell_type": "code", "execution_count": 101, "metadata": { "collapsed": true }, "outputs": [], "source": [ "smalldiamonds=pd.read_csv(\"C:\\\\Users\\\\Dell\\\\Desktop\\\\Diamond (8).csv\")" ] }, { "cell_type": "code", "execution_count": 111, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index(['custID', 'gender', 'state', 'cardholder', 'balance', 'numTrans',\n", " 'numIntlTrans', 'creditLine', 'fraudRisk'],\n", " dtype='object')" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.columns" ] }, { "cell_type": "code", "execution_count": 110, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(10000000, 9)" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.shape" ] }, { "cell_type": "code", "execution_count": 120, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "10000000" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(fraud)" ] }, { "cell_type": "code", "execution_count": 121, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(fraud.columns)" ] }, { "cell_type": "code", "execution_count": 109, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "custID int64\n", "gender int64\n", "state int64\n", "cardholder int64\n", "balance int64\n", "numTrans int64\n", "numIntlTrans int64\n", "creditLine int64\n", "fraudRisk int64\n", "dtype: object" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.dtypes" ] }, { "cell_type": "code", "execution_count": 102, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 10000000 entries, 0 to 9999999\n", "Data columns (total 9 columns):\n", "custID int64\n", "gender int64\n", "state int64\n", "cardholder int64\n", "balance int64\n", "numTrans int64\n", "numIntlTrans int64\n", "creditLine int64\n", "fraudRisk int64\n", "dtypes: int64(9)\n", "memory usage: 686.6 MB\n" ] } ], "source": [ "fraud.info()" ] }, { "cell_type": "code", "execution_count": 105, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 32 entries, 0 to 31\n", "Data columns (total 12 columns):\n", "Unnamed: 0 32 non-null object\n", "mpg 32 non-null float64\n", "cyl 32 non-null int64\n", "disp 32 non-null float64\n", "hp 32 non-null int64\n", "drat 32 non-null float64\n", "wt 32 non-null float64\n", "qsec 32 non-null float64\n", "vs 32 non-null int64\n", "am 32 non-null int64\n", "gear 32 non-null int64\n", "carb 32 non-null int64\n", "dtypes: float64(5), int64(6), object(1)\n", "memory usage: 3.1+ KB\n" ] } ], "source": [ "mtcars.info()" ] }, { "cell_type": "code", "execution_count": 106, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 308 entries, 0 to 307\n", "Data columns (total 6 columns):\n", "Unnamed: 0 308 non-null int64\n", "carat 308 non-null float64\n", "colour 308 non-null object\n", "clarity 308 non-null object\n", "certification 308 non-null object\n", "price 308 non-null int64\n", "dtypes: float64(1), int64(2), object(3)\n", "memory usage: 14.5+ KB\n" ] } ], "source": [ "smalldiamonds.info()" ] }, { "cell_type": "code", "execution_count": 108, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
custIDgenderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
011351300041420
12221090180
232210279160
341151012050
4514610111670
\n", "
" ], "text/plain": [ " custID gender state cardholder balance numTrans numIntlTrans \\\n", "0 1 1 35 1 3000 4 14 \n", "1 2 2 2 1 0 9 0 \n", "2 3 2 2 1 0 27 9 \n", "3 4 1 15 1 0 12 0 \n", "4 5 1 46 1 0 11 16 \n", "\n", " creditLine fraudRisk \n", "0 2 0 \n", "1 18 0 \n", "2 16 0 \n", "3 5 0 \n", "4 7 0 " ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.head()" ] }, { "cell_type": "code", "execution_count": 112, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
custIDgenderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
999999599999961371010090
999999699999971161033240
999999799999981241900038080
9999998999999912817000201960
9999999100000001231013070
\n", "
" ], "text/plain": [ " custID gender state cardholder balance numTrans numIntlTrans \\\n", "9999995 9999996 1 37 1 0 10 0 \n", "9999996 9999997 1 16 1 0 33 2 \n", "9999997 9999998 1 24 1 9000 38 0 \n", "9999998 9999999 1 28 1 7000 20 19 \n", "9999999 10000000 1 23 1 0 13 0 \n", "\n", " creditLine fraudRisk \n", "9999995 9 0 \n", "9999996 4 0 \n", "9999997 8 0 \n", "9999998 6 0 \n", "9999999 7 0 " ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.tail()" ] }, { "cell_type": "code", "execution_count": 113, "metadata": { "collapsed": true }, "outputs": [], "source": [ "fraud2=fraud.copy()" ] }, { "cell_type": "code", "execution_count": 115, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
custIDgenderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
count1.000000e+071.000000e+071.000000e+071.000000e+071.000000e+071.000000e+071.000000e+071.000000e+071.000000e+07
mean5.000000e+061.382177e+002.466127e+011.030004e+004.109920e+032.893519e+014.047190e+009.134469e+005.960140e-02
std2.886751e+064.859195e-011.497012e+011.705991e-013.996847e+032.655378e+018.602970e+009.641974e+002.367469e-01
min1.000000e+001.000000e+001.000000e+001.000000e+000.000000e+000.000000e+000.000000e+001.000000e+000.000000e+00
25%2.500001e+061.000000e+001.000000e+011.000000e+000.000000e+001.000000e+010.000000e+004.000000e+000.000000e+00
50%5.000000e+061.000000e+002.400000e+011.000000e+003.706000e+031.900000e+010.000000e+006.000000e+000.000000e+00
75%7.500000e+062.000000e+003.800000e+011.000000e+006.000000e+033.900000e+014.000000e+001.100000e+010.000000e+00
max1.000000e+072.000000e+005.100000e+012.000000e+004.148500e+041.000000e+026.000000e+017.500000e+011.000000e+00
\n", "
" ], "text/plain": [ " custID gender state cardholder balance \\\n", "count 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 \n", "mean 5.000000e+06 1.382177e+00 2.466127e+01 1.030004e+00 4.109920e+03 \n", "std 2.886751e+06 4.859195e-01 1.497012e+01 1.705991e-01 3.996847e+03 \n", "min 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 0.000000e+00 \n", "25% 2.500001e+06 1.000000e+00 1.000000e+01 1.000000e+00 0.000000e+00 \n", "50% 5.000000e+06 1.000000e+00 2.400000e+01 1.000000e+00 3.706000e+03 \n", "75% 7.500000e+06 2.000000e+00 3.800000e+01 1.000000e+00 6.000000e+03 \n", "max 1.000000e+07 2.000000e+00 5.100000e+01 2.000000e+00 4.148500e+04 \n", "\n", " numTrans numIntlTrans creditLine fraudRisk \n", "count 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 \n", "mean 2.893519e+01 4.047190e+00 9.134469e+00 5.960140e-02 \n", "std 2.655378e+01 8.602970e+00 9.641974e+00 2.367469e-01 \n", "min 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 \n", "25% 1.000000e+01 0.000000e+00 4.000000e+00 0.000000e+00 \n", "50% 1.900000e+01 0.000000e+00 6.000000e+00 0.000000e+00 \n", "75% 3.900000e+01 4.000000e+00 1.100000e+01 0.000000e+00 \n", "max 1.000000e+02 6.000000e+01 7.500000e+01 1.000000e+00 " ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.describe()" ] }, { "cell_type": "code", "execution_count": 116, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 1.000000e+07\n", "mean 1.382177e+00\n", "std 4.859195e-01\n", "min 1.000000e+00\n", "25% 1.000000e+00\n", "50% 1.000000e+00\n", "75% 2.000000e+00\n", "max 2.000000e+00\n", "Name: gender, dtype: float64" ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.gender.describe()" ] }, { "cell_type": "code", "execution_count": 117, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0mpgcyldisphpdratwtqsecvsamgearcarb
0Mazda RX421.06160.01103.902.62016.460144
1Mazda RX4 Wag21.06160.01103.902.87517.020144
2Datsun 71022.84108.0933.852.32018.611141
3Hornet 4 Drive21.46258.01103.083.21519.441031
4Hornet Sportabout18.78360.01753.153.44017.020032
\n", "
" ], "text/plain": [ " Unnamed: 0 mpg cyl disp hp drat wt qsec vs am gear \\\n", "0 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 \n", "1 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 \n", "2 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 \n", "3 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 \n", "4 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 \n", "\n", " carb \n", "0 4 \n", "1 4 \n", "2 1 \n", "3 1 \n", "4 2 " ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mtcars.head()" ] }, { "cell_type": "code", "execution_count": 118, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mtcars=mtcars.drop(\"Unnamed: 0\",1)" ] }, { "cell_type": "code", "execution_count": 119, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mpgcyldisphpdratwtqsecvsamgearcarb
021.06160.01103.902.62016.460144
121.06160.01103.902.87517.020144
222.84108.0933.852.32018.611141
321.46258.01103.083.21519.441031
418.78360.01753.153.44017.020032
\n", "
" ], "text/plain": [ " mpg cyl disp hp drat wt qsec vs am gear carb\n", "0 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4\n", "1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4\n", "2 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1\n", "3 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1\n", "4 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mtcars.head()" ] }, { "cell_type": "code", "execution_count": 124, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'commit_hash': '5c9c918',\n", " 'commit_source': 'installation',\n", " 'default_encoding': 'cp1252',\n", " 'ipython_path': 'C:\\\\Users\\\\Dell\\\\Anaconda3\\\\lib\\\\site-packages\\\\IPython',\n", " 'ipython_version': '5.1.0',\n", " 'os_name': 'nt',\n", " 'platform': 'Windows-7-6.1.7600-SP0',\n", " 'sys_executable': 'C:\\\\Users\\\\Dell\\\\Anaconda3\\\\python.exe',\n", " 'sys_platform': 'win32',\n", " 'sys_version': '3.5.2 |Anaconda custom (64-bit)| (default, Jul 5 2016, '\n", " '11:41:13) [MSC v.1900 64 bit (AMD64)]'}\n" ] } ], "source": [ "import IPython\n", "print (IPython.sys_info())\n" ] }, { "cell_type": "code", "execution_count": 125, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: version_information in c:\\users\\dell\\anaconda3\\lib\\site-packages\n", "The version_information extension is already loaded. To reload it, use:\n", " %reload_ext version_information\n", "alabaster==0.7.9\n", "anaconda-clean==1.0\n", "anaconda-client==1.5.1\n", "anaconda-navigator==1.3.1\n", "argcomplete==1.0.0\n", "astroid==1.4.7\n", "astropy==1.2.1\n", "Babel==2.3.4\n", "backports.shutil-get-terminal-size==1.0.0\n", "beautifulsoup4==4.5.1\n", "bitarray==0.8.1\n", "blaze==0.10.1\n", "bokeh==0.12.2\n", "boto==2.42.0\n", "Bottleneck==1.1.0\n", "brewer2mpl==1.4.1\n", "cffi==1.7.0\n", "chest==0.2.3\n", "click==6.6\n", "cloudpickle==0.2.1\n", "clyent==1.2.2\n", "colorama==0.3.7\n", "comtypes==1.1.2\n", "conda==4.3.9\n", "conda-build==2.0.2\n", "configobj==5.0.6\n", "contextlib2==0.5.3\n", "cryptography==1.5\n", "cycler==0.10.0\n", "Cython==0.24.1\n", "cytoolz==0.8.0\n", "dask==0.11.0\n", "datashape==0.5.2\n", "decorator==4.0.10\n", "dill==0.2.5\n", "docutils==0.12\n", "dynd===c328ab7\n", "et-xmlfile==1.0.1\n", "fastcache==1.0.2\n", "filelock==2.0.6\n", "Flask==0.11.1\n", "Flask-Cors==2.1.2\n", "gevent==1.1.2\n", "ggplot==0.11.5\n", "greenlet==0.4.10\n", "h5py==2.6.0\n", "HeapDict==1.0.0\n", "idna==2.1\n", "imagesize==0.7.1\n", "ipykernel==4.5.0\n", "ipython==5.1.0\n", "ipython-genutils==0.1.0\n", "ipywidgets==5.2.2\n", "itsdangerous==0.24\n", "jdcal==1.2\n", "jedi==0.9.0\n", "Jinja2==2.8\n", "jsonschema==2.5.1\n", "jupyter==1.0.0\n", "jupyter-client==4.4.0\n", "jupyter-console==5.0.0\n", "jupyter-core==4.2.0\n", "lazy-object-proxy==1.2.1\n", "llvmlite==0.13.0\n", "locket==0.2.0\n", "lxml==3.6.4\n", "MarkupSafe==0.23\n", "matplotlib==1.5.3\n", "menuinst==1.4.1\n", "mistune==0.7.3\n", "mpmath==0.19\n", "multipledispatch==0.4.8\n", "nb-anacondacloud==1.2.0\n", "nb-conda==2.0.0\n", "nb-conda-kernels==2.0.0\n", "nbconvert==4.2.0\n", "nbformat==4.1.0\n", "nbpresent==3.0.2\n", "networkx==1.11\n", "nltk==3.2.1\n", "nose==1.3.7\n", "notebook==4.2.3\n", "numba==0.28.1\n", "numexpr==2.6.1\n", "numpy==1.11.1\n", "odo==0.5.0\n", "openpyxl==2.3.2\n", "pandas==0.18.1\n", "pandasql==0.7.3\n", "partd==0.3.6\n", "path.py==0.0.0\n", "pathlib2==2.1.0\n", "patsy==0.4.1\n", "pep8==1.7.0\n", "pickleshare==0.7.4\n", "Pillow==3.3.1\n", "pkginfo==1.3.2\n", "ply==3.9\n", "prompt-toolkit==1.0.3\n", "psutil==4.3.1\n", "psycopg2==2.6.2\n", "py==1.4.31\n", "pyasn1==0.1.9\n", "pycosat==0.6.1\n", "pycparser==2.14\n", "pycrypto==2.6.1\n", "pycurl==7.43.0\n", "pyflakes==1.3.0\n", "Pygments==2.1.3\n", "pylint==1.5.4\n", "pyodbc==3.0.10\n", "pyOpenSSL==16.2.0\n", "pyparsing==2.1.4\n", "pytest==2.9.2\n", "python-dateutil==2.5.3\n", "pytz==2016.6.1\n", "pywin32==220\n", "PyYAML==3.12\n", "pyzmq==15.4.0\n", "QtAwesome==0.3.3\n", "qtconsole==4.2.1\n", "QtPy==1.1.2\n", "requests==2.12.4\n", "rope-py3k==0.9.4.post1\n", "ruamel-yaml===-VERSION\n", "scikit-image==0.12.3\n", "scikit-learn==0.17.1\n", "scipy==0.18.1\n", "seaborn==0.7.1\n", "simplegeneric==0.8.1\n", "singledispatch==3.4.0.3\n", "six==1.10.0\n", "snowballstemmer==1.2.1\n", "sockjs-tornado==1.0.3\n", "sphinx==1.4.6\n", "spyder==3.0.0\n", "SQLAlchemy==1.0.13\n", "statsmodels==0.6.1\n", "sympy==1.0\n", "tables==3.2.2\n", "toolz==0.8.0\n", "tornado==4.4.1\n", "traitlets==4.3.0\n", "unicodecsv==0.14.1\n", "urllib3==1.20\n", "version-information==1.0.3\n", "wcwidth==0.1.7\n", "Werkzeug==0.11.11\n", "widgetsnbextension==1.2.6\n", "win-unicode-console==0.5\n", "wrapt==1.10.6\n", "xlrd==1.0.0\n", "XlsxWriter==0.9.3\n", "xlwings==0.10.0\n", "xlwt==1.1.2\n" ] } ], "source": [ "\n", "\n", "!pip install version_information\n", "%load_ext version_information\n", "%version_information\n", "\n", "\n", "!pip freeze" ] }, { "cell_type": "code", "execution_count": 127, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting guppy\n", " Downloading guppy-0.1.10.tar.gz (484kB)\n", "Building wheels for collected packages: guppy\n", " Running setup.py bdist_wheel for guppy: started\n", " Running setup.py bdist_wheel for guppy: finished with status 'error'\n", " Complete output from command c:\\users\\dell\\anaconda3\\python.exe -u -c \"import setuptools, tokenize;__file__='C:\\\\Users\\\\Dell\\\\AppData\\\\Local\\\\Temp\\\\pip-build-d3t4jj4u\\\\guppy\\\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\\r\\n', '\\n');f.close();exec(compile(code, __file__, 'exec'))\" bdist_wheel -d C:\\Users\\Dell\\AppData\\Local\\Temp\\tmppr8koym2pip-wheel- --python-tag cp35:\n", " running bdist_wheel\n", " running build\n", " running build_py\n", " creating build\n", " creating build\\lib.win-amd64-3.5\n", " creating build\\lib.win-amd64-3.5\\guppy\n", " copying guppy\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\n", " creating build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " creating build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Cat.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\cmd.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Code.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Compat.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\etc.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\ExecfileWithModuleInfo.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\FSA.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Glue.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Help.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\IterPermute.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\KanExtension.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\KnuthBendix.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\OutputHandling.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\RE.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\RE_Rect.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\textView.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\tkcursors.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Unpack.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\xterm.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " creating build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Document.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\DottedTree.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Exceptions.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\FileIO.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Filer.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Gsml.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Help.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Html.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Latex.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Main.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\SpecNodes.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Tester.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Text.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\XHTML.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " creating build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\AbstractAlgebra.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Classifiers.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Console.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Doc.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\ImpSet.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Monitor.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\OutputHandling.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Part.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Path.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\pbhelp.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Prof.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\RefPat.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Remote.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\RemoteConstants.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\RM.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Spec.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Target.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\UniSet.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Use.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\View.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " creating build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\support.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_all.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_Classifiers.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_dependencies.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_ER.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_heapyc.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_menuleak.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_OutputHandling.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_Part.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_Path.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_RefPat.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_RetaGraph.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_sf.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_Spec.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_UniSet.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_View.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " creating build\\lib.win-amd64-3.5\\guppy\\sets\n", " copying guppy\\sets\\test.py -> build\\lib.win-amd64-3.5\\guppy\\sets\n", " copying guppy\\sets\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\sets\n", " copying guppy\\doc\\docexample.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\gsl.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\gslexample.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\guppy.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapyc.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapy_RootState.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapy_tutorial.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapy_UniSet.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapy_Use.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\index.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\ProfileBrowser.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\sets.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\pbscreen.jpg -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " running build_ext\n", " building 'guppy.sets.setsc' extension\n", " error: Microsoft Visual C++ 14.0 is required. Get it with \"Microsoft Visual C++ Build Tools\": http://landinghub.visualstudio.com/visual-cpp-build-tools\n", " \n", " ----------------------------------------\n", " Running setup.py clean for guppy\n", "Failed to build guppy\n", "Installing collected packages: guppy\n", " Running setup.py install for guppy: started\n", " Running setup.py install for guppy: finished with status 'error'\n", " Complete output from command c:\\users\\dell\\anaconda3\\python.exe -u -c \"import setuptools, tokenize;__file__='C:\\\\Users\\\\Dell\\\\AppData\\\\Local\\\\Temp\\\\pip-build-d3t4jj4u\\\\guppy\\\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\\r\\n', '\\n');f.close();exec(compile(code, __file__, 'exec'))\" install --record C:\\Users\\Dell\\AppData\\Local\\Temp\\pip-_nlam_7o-record\\install-record.txt --single-version-externally-managed --compile:\n", " running install\n", " running build\n", " running build_py\n", " creating build\n", " creating build\\lib.win-amd64-3.5\n", " creating build\\lib.win-amd64-3.5\\guppy\n", " copying guppy\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\n", " creating build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " creating build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Cat.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\cmd.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Code.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Compat.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\etc.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\ExecfileWithModuleInfo.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\FSA.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Glue.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Help.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\IterPermute.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\KanExtension.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\KnuthBendix.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\OutputHandling.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\RE.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\RE_Rect.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\textView.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\tkcursors.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\Unpack.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\xterm.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " copying guppy\\etc\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\etc\n", " creating build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Document.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\DottedTree.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Exceptions.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\FileIO.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Filer.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Gsml.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Help.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Html.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Latex.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Main.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\SpecNodes.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Tester.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\Text.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\XHTML.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " copying guppy\\gsl\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\gsl\n", " creating build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\AbstractAlgebra.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Classifiers.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Console.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Doc.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\ImpSet.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Monitor.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\OutputHandling.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Part.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Path.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\pbhelp.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Prof.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\RefPat.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Remote.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\RemoteConstants.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\RM.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Spec.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Target.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\UniSet.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\Use.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\View.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " copying guppy\\heapy\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\n", " creating build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\support.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_all.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_Classifiers.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_dependencies.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_ER.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_heapyc.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_menuleak.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_OutputHandling.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_Part.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_Path.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_RefPat.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_RetaGraph.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_sf.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_Spec.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_UniSet.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\test_View.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " copying guppy\\heapy\\test\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\heapy\\test\n", " creating build\\lib.win-amd64-3.5\\guppy\\sets\n", " copying guppy\\sets\\test.py -> build\\lib.win-amd64-3.5\\guppy\\sets\n", " copying guppy\\sets\\__init__.py -> build\\lib.win-amd64-3.5\\guppy\\sets\n", " copying guppy\\doc\\docexample.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\gsl.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\gslexample.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\guppy.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapyc.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapy_RootState.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapy_tutorial.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapy_UniSet.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\heapy_Use.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\index.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\ProfileBrowser.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\sets.html -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " copying guppy\\doc\\pbscreen.jpg -> build\\lib.win-amd64-3.5\\guppy\\doc\n", " running build_ext\n", " building 'guppy.sets.setsc' extension\n", " error: Microsoft Visual C++ 14.0 is required. Get it with \"Microsoft Visual C++ Build Tools\": http://landinghub.visualstudio.com/visual-cpp-build-tools\n", " \n", " ----------------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " Failed building wheel for guppy\n", "Command \"c:\\users\\dell\\anaconda3\\python.exe -u -c \"import setuptools, tokenize;__file__='C:\\\\Users\\\\Dell\\\\AppData\\\\Local\\\\Temp\\\\pip-build-d3t4jj4u\\\\guppy\\\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\\r\\n', '\\n');f.close();exec(compile(code, __file__, 'exec'))\" install --record C:\\Users\\Dell\\AppData\\Local\\Temp\\pip-_nlam_7o-record\\install-record.txt --single-version-externally-managed --compile\" failed with error code 1 in C:\\Users\\Dell\\AppData\\Local\\Temp\\pip-build-d3t4jj4u\\guppy\\\n" ] } ], "source": [ "!pip install guppy" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
custIDgenderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
011351300041420
12221090180
232210279160
341151012050
4514610111670
\n", "
" ], "text/plain": [ " custID gender state cardholder balance numTrans numIntlTrans \\\n", "0 1 1 35 1 3000 4 14 \n", "1 2 2 2 1 0 9 0 \n", "2 3 2 2 1 0 27 9 \n", "3 4 1 15 1 0 12 0 \n", "4 5 1 46 1 0 11 16 \n", "\n", " creditLine fraudRisk \n", "0 2 0 \n", "1 18 0 \n", "2 16 0 \n", "3 5 0 \n", "4 7 0 " ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.head()" ] }, { "cell_type": "code", "execution_count": 135, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 2\n", "2 2\n", "3 1\n", "4 1\n", "Name: gender, dtype: int64" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.head().gender" ] }, { "cell_type": "code", "execution_count": 133, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 2\n", "2 2\n", "3 1\n", "4 1\n", "Name: gender, dtype: int64" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.gender.head()" ] }, { "cell_type": "code", "execution_count": 132, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 2\n", "2 2\n", "3 1\n", "4 1\n", "Name: gender, dtype: int64" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud['gender'].head()" ] }, { "cell_type": "code", "execution_count": 131, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderstatebalance
01353000
1220
2220
31150
41460
\n", "
" ], "text/plain": [ " gender state balance\n", "0 1 35 3000\n", "1 2 2 0\n", "2 2 2 0\n", "3 1 15 0\n", "4 1 46 0" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud[['gender','state','balance']].head()" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
custIDgenderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
10111461460154040
11121101300020020
1213161045240
13142381900041380
141512715227600170
15161441022050
1617218113970200130
17181351311313680
1819151900020280
192023111860211080
20211391400024030
\n", "
" ], "text/plain": [ " custID gender state cardholder balance numTrans numIntlTrans \\\n", "10 11 1 46 1 4601 54 0 \n", "11 12 1 10 1 3000 20 0 \n", "12 13 1 6 1 0 45 2 \n", "13 14 2 38 1 9000 41 3 \n", "14 15 1 27 1 5227 60 0 \n", "15 16 1 44 1 0 22 0 \n", "16 17 2 18 1 13970 20 0 \n", "17 18 1 35 1 3113 13 6 \n", "18 19 1 5 1 9000 20 2 \n", "19 20 2 31 1 1860 21 10 \n", "20 21 1 39 1 4000 24 0 \n", "\n", " creditLine fraudRisk \n", "10 4 0 \n", "11 2 0 \n", "12 4 0 \n", "13 8 0 \n", "14 17 0 \n", "15 5 0 \n", "16 13 0 \n", "17 8 0 \n", "18 8 0 \n", "19 8 0 \n", "20 3 0 " ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.ix[10:20]" ] }, { "cell_type": "code", "execution_count": 137, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
custIDgenderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
011351300041420
12221090180
232210279160
341151012050
4514610111670
5624425546210130
67131200041010
781101601620360
8923212428410220
91012310185650
10111461460154040
11121101300020020
1213161045240
13142381900041380
141512715227600170
15161441022050
1617218113970200130
17181351311313680
1819151900020280
192023111860211080
20211391400024030
21221341022030
2223151070110
23242211015030
242512510120650
2526229150004940
26271381400021530
272819112000200110
28292201019020
293024915192840131
..............................
9999970999997111010120110
999997199999722401368331370
999997299999731461500069040
99999739999974110203631140
999997499999751251018030
999997599999762481421023080
99999769999977241014070
999997799999781351800024070
999997899999791441500012040
9999979999998016176695370
99999809999981110113889540130
99999819999982133102326180
999998299999831441300014020
99999839999984213102050
999998499999851391700030060
99999859999986245230004020
999998699999871231700059060
999998799999882241600046050
99999889999989118115000720141
9999989999999024112000177110
99999909999991116215618050
99999919999992236152176050
99999929999993138107020
99999939999994243126076050
9999994999999511621737630200
999999599999961371010090
999999699999971161033240
999999799999981241900038080
9999998999999912817000201960
9999999100000001231013070
\n", "

10000000 rows × 9 columns

\n", "
" ], "text/plain": [ " custID gender state cardholder balance numTrans numIntlTrans \\\n", "0 1 1 35 1 3000 4 14 \n", "1 2 2 2 1 0 9 0 \n", "2 3 2 2 1 0 27 9 \n", "3 4 1 15 1 0 12 0 \n", "4 5 1 46 1 0 11 16 \n", "5 6 2 44 2 5546 21 0 \n", "6 7 1 3 1 2000 41 0 \n", "7 8 1 10 1 6016 20 3 \n", "8 9 2 32 1 2428 4 10 \n", "9 10 1 23 1 0 18 56 \n", "10 11 1 46 1 4601 54 0 \n", "11 12 1 10 1 3000 20 0 \n", "12 13 1 6 1 0 45 2 \n", "13 14 2 38 1 9000 41 3 \n", "14 15 1 27 1 5227 60 0 \n", "15 16 1 44 1 0 22 0 \n", "16 17 2 18 1 13970 20 0 \n", "17 18 1 35 1 3113 13 6 \n", "18 19 1 5 1 9000 20 2 \n", "19 20 2 31 1 1860 21 10 \n", "20 21 1 39 1 4000 24 0 \n", "21 22 1 34 1 0 22 0 \n", "22 23 1 5 1 0 7 0 \n", "23 24 2 21 1 0 15 0 \n", "24 25 1 25 1 0 12 0 \n", "25 26 2 29 1 5000 4 9 \n", "26 27 1 38 1 4000 21 5 \n", "27 28 1 9 1 12000 20 0 \n", "28 29 2 20 1 0 19 0 \n", "29 30 2 49 1 5192 84 0 \n", "... ... ... ... ... ... ... ... \n", "9999970 9999971 1 10 1 0 12 0 \n", "9999971 9999972 2 40 1 3683 31 3 \n", "9999972 9999973 1 46 1 5000 69 0 \n", "9999973 9999974 1 10 2 0 36 31 \n", "9999974 9999975 1 25 1 0 18 0 \n", "9999975 9999976 2 48 1 4210 23 0 \n", "9999976 9999977 2 4 1 0 14 0 \n", "9999977 9999978 1 35 1 8000 24 0 \n", "9999978 9999979 1 44 1 5000 12 0 \n", "9999979 9999980 1 6 1 7669 5 3 \n", "9999980 9999981 1 10 1 13889 54 0 \n", "9999981 9999982 1 33 1 0 23 26 \n", "9999982 9999983 1 44 1 3000 14 0 \n", "9999983 9999984 2 13 1 0 2 0 \n", "9999984 9999985 1 39 1 7000 30 0 \n", "9999985 9999986 2 45 2 3000 4 0 \n", "9999986 9999987 1 23 1 7000 59 0 \n", "9999987 9999988 2 24 1 6000 46 0 \n", "9999988 9999989 1 18 1 15000 72 0 \n", "9999989 9999990 2 4 1 12000 17 7 \n", "9999990 9999991 1 16 2 1561 8 0 \n", "9999991 9999992 2 36 1 5217 6 0 \n", "9999992 9999993 1 38 1 0 7 0 \n", "9999993 9999994 2 43 1 2607 6 0 \n", "9999994 9999995 1 16 2 17376 3 0 \n", "9999995 9999996 1 37 1 0 10 0 \n", "9999996 9999997 1 16 1 0 33 2 \n", "9999997 9999998 1 24 1 9000 38 0 \n", "9999998 9999999 1 28 1 7000 20 19 \n", "9999999 10000000 1 23 1 0 13 0 \n", "\n", " creditLine fraudRisk \n", "0 2 0 \n", "1 18 0 \n", "2 16 0 \n", "3 5 0 \n", "4 7 0 \n", "5 13 0 \n", "6 1 0 \n", "7 6 0 \n", "8 22 0 \n", "9 5 0 \n", "10 4 0 \n", "11 2 0 \n", "12 4 0 \n", "13 8 0 \n", "14 17 0 \n", "15 5 0 \n", "16 13 0 \n", "17 8 0 \n", "18 8 0 \n", "19 8 0 \n", "20 3 0 \n", "21 3 0 \n", "22 11 0 \n", "23 3 0 \n", "24 65 0 \n", "25 4 0 \n", "26 3 0 \n", "27 11 0 \n", "28 2 0 \n", "29 13 1 \n", "... ... ... \n", "9999970 11 0 \n", "9999971 7 0 \n", "9999972 4 0 \n", "9999973 14 0 \n", "9999974 3 0 \n", "9999975 8 0 \n", "9999976 7 0 \n", "9999977 7 0 \n", "9999978 4 0 \n", "9999979 7 0 \n", "9999980 13 0 \n", "9999981 18 0 \n", "9999982 2 0 \n", "9999983 5 0 \n", "9999984 6 0 \n", "9999985 2 0 \n", "9999986 6 0 \n", "9999987 5 0 \n", "9999988 14 1 \n", "9999989 11 0 \n", "9999990 5 0 \n", "9999991 5 0 \n", "9999992 2 0 \n", "9999993 5 0 \n", "9999994 20 0 \n", "9999995 9 0 \n", "9999996 4 0 \n", "9999997 8 0 \n", "9999998 6 0 \n", "9999999 7 0 \n", "\n", "[10000000 rows x 9 columns]" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.iloc[:,:]" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderstatecardholder
101461
111101
12161
132381
141271
151441
162181
171351
18151
192311
\n", "
" ], "text/plain": [ " gender state cardholder\n", "10 1 46 1\n", "11 1 10 1\n", "12 1 6 1\n", "13 2 38 1\n", "14 1 27 1\n", "15 1 44 1\n", "16 2 18 1\n", "17 1 35 1\n", "18 1 5 1\n", "19 2 31 1" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.iloc[10:20,1:4]" ] }, { "cell_type": "code", "execution_count": 140, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
custIDgenderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
count1.000000e+071.000000e+071.000000e+071.000000e+071.000000e+071.000000e+071.000000e+071.000000e+071.000000e+07
mean5.000000e+061.382177e+002.466127e+011.030004e+004.109920e+032.893519e+014.047190e+009.134469e+005.960140e-02
std2.886751e+064.859195e-011.497012e+011.705991e-013.996847e+032.655378e+018.602970e+009.641974e+002.367469e-01
min1.000000e+001.000000e+001.000000e+001.000000e+000.000000e+000.000000e+000.000000e+001.000000e+000.000000e+00
25%2.500001e+061.000000e+001.000000e+011.000000e+000.000000e+001.000000e+010.000000e+004.000000e+000.000000e+00
50%5.000000e+061.000000e+002.400000e+011.000000e+003.706000e+031.900000e+010.000000e+006.000000e+000.000000e+00
75%7.500000e+062.000000e+003.800000e+011.000000e+006.000000e+033.900000e+014.000000e+001.100000e+010.000000e+00
max1.000000e+072.000000e+005.100000e+012.000000e+004.148500e+041.000000e+026.000000e+017.500000e+011.000000e+00
\n", "
" ], "text/plain": [ " custID gender state cardholder balance \\\n", "count 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 \n", "mean 5.000000e+06 1.382177e+00 2.466127e+01 1.030004e+00 4.109920e+03 \n", "std 2.886751e+06 4.859195e-01 1.497012e+01 1.705991e-01 3.996847e+03 \n", "min 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 0.000000e+00 \n", "25% 2.500001e+06 1.000000e+00 1.000000e+01 1.000000e+00 0.000000e+00 \n", "50% 5.000000e+06 1.000000e+00 2.400000e+01 1.000000e+00 3.706000e+03 \n", "75% 7.500000e+06 2.000000e+00 3.800000e+01 1.000000e+00 6.000000e+03 \n", "max 1.000000e+07 2.000000e+00 5.100000e+01 2.000000e+00 4.148500e+04 \n", "\n", " numTrans numIntlTrans creditLine fraudRisk \n", "count 1.000000e+07 1.000000e+07 1.000000e+07 1.000000e+07 \n", "mean 2.893519e+01 4.047190e+00 9.134469e+00 5.960140e-02 \n", "std 2.655378e+01 8.602970e+00 9.641974e+00 2.367469e-01 \n", "min 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 \n", "25% 1.000000e+01 0.000000e+00 4.000000e+00 0.000000e+00 \n", "50% 1.900000e+01 0.000000e+00 6.000000e+00 0.000000e+00 \n", "75% 3.900000e+01 4.000000e+00 1.100000e+01 0.000000e+00 \n", "max 1.000000e+02 6.000000e+01 7.500000e+01 1.000000e+00 " ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.describe()" ] }, { "cell_type": "code", "execution_count": 141, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 6178231\n", "2 3821769\n", "Name: gender, dtype: int64" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.gender.value_counts()" ] }, { "cell_type": "code", "execution_count": 142, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "5 1216069\n", "44 812638\n", "10 608630\n", "35 608575\n", "39 405892\n", "15 404720\n", "36 364531\n", "23 304553\n", "11 303984\n", "29 303833\n", "32 284428\n", "46 252812\n", "43 203827\n", "16 203143\n", "25 203045\n", "48 202972\n", "4 202776\n", "21 202444\n", "20 201918\n", "49 182557\n", "24 182201\n", "6 171774\n", "2 162574\n", "41 152253\n", "19 151715\n", "18 142170\n", "37 122191\n", "38 121846\n", "7 121802\n", "13 111775\n", "26 101829\n", "3 101740\n", "45 91375\n", "34 91326\n", "17 91127\n", "33 81332\n", "50 61385\n", "14 60992\n", "28 60617\n", "12 50438\n", "22 40819\n", "31 40563\n", "9 30333\n", "40 30233\n", "27 30131\n", "51 20691\n", "8 20603\n", "42 20449\n", "30 20215\n", "1 20137\n", "47 20017\n", "Name: state, dtype: int64" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.state.value_counts()" ] }, { "cell_type": "code", "execution_count": 143, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 9403986\n", "1 596014\n", "Name: fraudRisk, dtype: int64" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.fraudRisk.value_counts()" ] }, { "cell_type": "code", "execution_count": 144, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gender12
fraudRisk
058530533550933
1325178270836
\n", "
" ], "text/plain": [ "gender 1 2\n", "fraudRisk \n", "0 5853053 3550933\n", "1 325178 270836" ] }, "execution_count": 144, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(fraud.fraudRisk,fraud.gender)" ] }, { "cell_type": "code", "execution_count": 145, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gender12All
fraudRisk
0585305335509339403986
1325178270836596014
All6178231382176910000000
\n", "
" ], "text/plain": [ "gender 1 2 All\n", "fraudRisk \n", "0 5853053 3550933 9403986\n", "1 325178 270836 596014\n", "All 6178231 3821769 10000000" ] }, "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(fraud.fraudRisk,fraud.gender,margins=True)" ] }, { "cell_type": "code", "execution_count": 148, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([36, 51, 88, 10, 86, 2, 92, 45, 22, 33])" ] }, "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.random.choice(100,10)" ] }, { "cell_type": "code", "execution_count": 150, "metadata": { "collapsed": false }, "outputs": [], "source": [ "a=len(fraud)" ] }, { "cell_type": "code", "execution_count": 151, "metadata": { "collapsed": true }, "outputs": [], "source": [ "b=0.0001" ] }, { "cell_type": "code", "execution_count": 152, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "10.0" ] }, "execution_count": 152, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a*b" ] }, { "cell_type": "code", "execution_count": 154, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Dell\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:1: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n", " if __name__ == '__main__':\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
custIDgenderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
121370412137051411600034050
9188257918825814106060
7040966704096713210150150
910698691069871361200028010
244177624417771511000039590
641512664151272391010260
630065963006601111800041770
952480952481161600010050
44078534407854141103030
30297283029729191400032030
\n", "
" ], "text/plain": [ " custID gender state cardholder balance numTrans numIntlTrans \\\n", "1213704 1213705 1 41 1 6000 34 0 \n", "9188257 9188258 1 4 1 0 6 0 \n", "7040966 7040967 1 32 1 0 15 0 \n", "9106986 9106987 1 36 1 2000 28 0 \n", "2441776 2441777 1 5 1 10000 39 5 \n", "6415126 6415127 2 39 1 0 10 2 \n", "6300659 6300660 1 11 1 8000 4 17 \n", "952480 952481 1 6 1 6000 10 0 \n", "4407853 4407854 1 41 1 0 3 0 \n", "3029728 3029729 1 9 1 4000 32 0 \n", "\n", " creditLine fraudRisk \n", "1213704 5 0 \n", "9188257 6 0 \n", "7040966 15 0 \n", "9106986 1 0 \n", "2441776 9 0 \n", "6415126 6 0 \n", "6300659 7 0 \n", "952480 5 0 \n", "4407853 3 0 \n", "3029728 3 0 " ] }, "execution_count": 154, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.ix[np.random.choice(len(fraud),a*b)]" ] }, { "cell_type": "code", "execution_count": 156, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pandasql in c:\\users\\dell\\anaconda3\\lib\\site-packages\n", "Requirement already satisfied: pandas in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandasql)\n", "Requirement already satisfied: numpy in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandasql)\n", "Requirement already satisfied: sqlalchemy in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandasql)\n", "Requirement already satisfied: python-dateutil>=2 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandas->pandasql)\n", "Requirement already satisfied: pytz>=2011k in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandas->pandasql)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from python-dateutil>=2->pandas->pandasql)\n" ] } ], "source": [ "! pip install pandasql" ] }, { "cell_type": "code", "execution_count": 163, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from pandasql import sqldf\n", "pysqldf = lambda q: sqldf(q, globals())" ] }, { "cell_type": "code", "execution_count": 157, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mpgcyldisphpdratwtqsecvsamgearcarb
021.06160.01103.902.62016.460144
121.06160.01103.902.87517.020144
222.84108.0933.852.32018.611141
321.46258.01103.083.21519.441031
418.78360.01753.153.44017.020032
\n", "
" ], "text/plain": [ " mpg cyl disp hp drat wt qsec vs am gear carb\n", "0 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4\n", "1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4\n", "2 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1\n", "3 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1\n", "4 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2" ] }, "execution_count": 157, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mtcars.head()" ] }, { "cell_type": "code", "execution_count": 164, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mpgcyldisphpdratwtqsecvsamgearcarb
021.06160.01103.902.62016.460144
121.06160.01103.902.87517.020144
222.84108.0933.852.32018.611141
321.46258.01103.083.21519.441031
418.78360.01753.153.44017.020032
518.16225.01052.763.46020.221031
614.38360.02453.213.57015.840034
724.44146.7623.693.19020.001042
822.84140.8953.923.15022.901042
919.26167.61233.923.44018.301044
\n", "
" ], "text/plain": [ " mpg cyl disp hp drat wt qsec vs am gear carb\n", "0 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4\n", "1 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4\n", "2 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1\n", "3 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1\n", "4 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2\n", "5 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1\n", "6 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4\n", "7 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2\n", "8 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2\n", "9 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4" ] }, "execution_count": 164, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pysqldf(\"SELECT * FROM mtcars LIMIT 10;\")\n" ] }, { "cell_type": "code", "execution_count": 165, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mpgcyldisphpdratwtqsecvsamgearcarb
026.04120.3914.432.14016.70152
130.4495.11133.771.51316.91152
215.88351.02644.223.17014.50154
319.76145.01753.622.77015.50156
415.08301.03353.543.57014.60158
\n", "
" ], "text/plain": [ " mpg cyl disp hp drat wt qsec vs am gear carb\n", "0 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2\n", "1 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2\n", "2 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4\n", "3 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6\n", "4 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8" ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pysqldf(\"SELECT * FROM mtcars WHERE gear > 4;\")\n" ] }, { "cell_type": "code", "execution_count": 166, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AVG(mpg)gear
016.1066673
124.5333334
221.3800005
\n", "
" ], "text/plain": [ " AVG(mpg) gear\n", "0 16.106667 3\n", "1 24.533333 4\n", "2 21.380000 5" ] }, "execution_count": 166, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pysqldf(\"SELECT AVG(mpg),gear FROM mtcars group by gear ;\")\n" ] }, { "cell_type": "code", "execution_count": 167, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "20.090624999999996" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mtcars.mpg.mean()" ] }, { "cell_type": "code", "execution_count": 169, "metadata": { "collapsed": false }, "outputs": [], "source": [ "g1=pd.groupby(mtcars,mtcars.gear)" ] }, { "cell_type": "code", "execution_count": 170, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mpgcyldisphpdratwtqsecvsamcarb
gear
316.1066677.466667326.300000176.1333333.1326673.89260017.6920.2000000.0000002.666667
424.5333334.666667123.01666789.5000004.0433332.61666718.9650.8333330.6666672.333333
521.3800006.000000202.480000195.6000003.9160002.63260015.6400.2000001.0000004.400000
\n", "
" ], "text/plain": [ " mpg cyl disp hp drat wt qsec \\\n", "gear \n", "3 16.106667 7.466667 326.300000 176.133333 3.132667 3.892600 17.692 \n", "4 24.533333 4.666667 123.016667 89.500000 4.043333 2.616667 18.965 \n", "5 21.380000 6.000000 202.480000 195.600000 3.916000 2.632600 15.640 \n", "\n", " vs am carb \n", "gear \n", "3 0.200000 0.000000 2.666667 \n", "4 0.833333 0.666667 2.333333 \n", "5 0.200000 1.000000 4.400000 " ] }, "execution_count": 170, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g1.mean()" ] }, { "cell_type": "code", "execution_count": 171, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "3 15\n", "4 12\n", "5 5\n", "Name: gear, dtype: int64" ] }, "execution_count": 171, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mtcars.gear.value_counts()" ] }, { "cell_type": "code", "execution_count": 173, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([6, 4, 8], dtype=int64)" ] }, "execution_count": 173, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mtcars.cyl.unique()" ] }, { "cell_type": "code", "execution_count": 174, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cyl468
gear
31212
4840
5212
\n", "
" ], "text/plain": [ "cyl 4 6 8\n", "gear \n", "3 1 2 12\n", "4 8 4 0\n", "5 2 1 2" ] }, "execution_count": 174, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(mtcars.gear,mtcars.cyl)" ] }, { "cell_type": "code", "execution_count": 175, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cyl468
gear
321.50019.7515.05
426.92519.750.00
528.20019.7015.40
\n", "
" ], "text/plain": [ "cyl 4 6 8\n", "gear \n", "3 21.500 19.75 15.05\n", "4 26.925 19.75 0.00\n", "5 28.200 19.70 15.40" ] }, "execution_count": 175, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mtcars.pivot_table(index='gear', columns='cyl', values='mpg', fill_value=0)" ] }, { "cell_type": "code", "execution_count": 176, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
custIDgenderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
011351300041420
12221090180
232210279160
341151012050
4514610111670
\n", "
" ], "text/plain": [ " custID gender state cardholder balance numTrans numIntlTrans \\\n", "0 1 1 35 1 3000 4 14 \n", "1 2 2 2 1 0 9 0 \n", "2 3 2 2 1 0 27 9 \n", "3 4 1 15 1 0 12 0 \n", "4 5 1 46 1 0 11 16 \n", "\n", " creditLine fraudRisk \n", "0 2 0 \n", "1 18 0 \n", "2 16 0 \n", "3 5 0 \n", "4 7 0 " ] }, "execution_count": 176, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.head()" ] }, { "cell_type": "code", "execution_count": 181, "metadata": { "collapsed": false }, "outputs": [], "source": [ "del fraud['custID']" ] }, { "cell_type": "code", "execution_count": 182, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderstatecardholderbalancenumTransnumIntlTranscreditLinefraudRisk
01351300041420
1221090180
22210279160
31151012050
414610111670
\n", "
" ], "text/plain": [ " gender state cardholder balance numTrans numIntlTrans creditLine \\\n", "0 1 35 1 3000 4 14 2 \n", "1 2 2 1 0 9 0 18 \n", "2 2 2 1 0 27 9 16 \n", "3 1 15 1 0 12 0 5 \n", "4 1 46 1 0 11 16 7 \n", "\n", " fraudRisk \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 " ] }, "execution_count": 182, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud.head()" ] }, { "cell_type": "code", "execution_count": 183, "metadata": { "collapsed": true }, "outputs": [], "source": [ "fraud3=fraud" ] }, { "cell_type": "code", "execution_count": 186, "metadata": { "collapsed": false }, "outputs": [], "source": [ "del fraud['state']" ] }, { "cell_type": "code", "execution_count": 187, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gendercardholderbalancenumTransnumIntlTranscreditLinefraudRisk
011300041420
121090180
2210279160
311012050
4110111670
\n", "
" ], "text/plain": [ " gender cardholder balance numTrans numIntlTrans creditLine fraudRisk\n", "0 1 1 3000 4 14 2 0\n", "1 2 1 0 9 0 18 0\n", "2 2 1 0 27 9 16 0\n", "3 1 1 0 12 0 5 0\n", "4 1 1 0 11 16 7 0" ] }, "execution_count": 187, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fraud3.head()" ] }, { "cell_type": "code", "execution_count": 190, "metadata": { "collapsed": true }, "outputs": [], "source": [ "wine=pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\",header=None)" ] }, { "cell_type": "code", "execution_count": 191, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 178 entries, 0 to 177\n", "Data columns (total 14 columns):\n", "0 178 non-null int64\n", "1 178 non-null float64\n", "2 178 non-null float64\n", "3 178 non-null float64\n", "4 178 non-null float64\n", "5 178 non-null int64\n", "6 178 non-null float64\n", "7 178 non-null float64\n", "8 178 non-null float64\n", "9 178 non-null float64\n", "10 178 non-null float64\n", "11 178 non-null float64\n", "12 178 non-null float64\n", "13 178 non-null int64\n", "dtypes: float64(11), int64(3)\n", "memory usage: 19.5 KB\n" ] } ], "source": [ "wine.info()" ] }, { "cell_type": "code", "execution_count": 200, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "wine.columns=['WineClass','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315 of diluted wines','Proline'] " ] }, { "cell_type": "code", "execution_count": 201, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 178 entries, 0 to 177\n", "Data columns (total 14 columns):\n", "WineClass 178 non-null int64\n", "Alcohol 178 non-null float64\n", "Malic acid 178 non-null float64\n", "Ash 178 non-null float64\n", "Alcalinity of ash 178 non-null float64\n", "Magnesium 178 non-null int64\n", "Total phenols 178 non-null float64\n", "Flavanoids 178 non-null float64\n", "Nonflavanoid phenols 178 non-null float64\n", "Proanthocyanins 178 non-null float64\n", "Color intensity 178 non-null float64\n", "Hue 178 non-null float64\n", "OD280/OD315 of diluted wines 178 non-null float64\n", "Proline 178 non-null int64\n", "dtypes: float64(11), int64(3)\n", "memory usage: 19.5 KB\n" ] } ], "source": [ "wine.info()" ] }, { "cell_type": "code", "execution_count": 202, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WineClassAlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
0114.231.712.4315.61272.803.060.282.295.641.043.921065
1113.201.782.1411.21002.652.760.261.284.381.053.401050
2113.162.362.6718.61012.803.240.302.815.681.033.171185
3114.371.952.5016.81133.853.490.242.187.800.863.451480
4113.242.592.8721.01182.802.690.391.824.321.042.93735
\n", "
" ], "text/plain": [ " WineClass Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n", "0 1 14.23 1.71 2.43 15.6 127 \n", "1 1 13.20 1.78 2.14 11.2 100 \n", "2 1 13.16 2.36 2.67 18.6 101 \n", "3 1 14.37 1.95 2.50 16.8 113 \n", "4 1 13.24 2.59 2.87 21.0 118 \n", "\n", " Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins \\\n", "0 2.80 3.06 0.28 2.29 \n", "1 2.65 2.76 0.26 1.28 \n", "2 2.80 3.24 0.30 2.81 \n", "3 3.85 3.49 0.24 2.18 \n", "4 2.80 2.69 0.39 1.82 \n", "\n", " Color intensity Hue OD280/OD315 of diluted wines Proline \n", "0 5.64 1.04 3.92 1065 \n", "1 4.38 1.05 3.40 1050 \n", "2 5.68 1.03 3.17 1185 \n", "3 7.80 0.86 3.45 1480 \n", "4 4.32 1.04 2.93 735 " ] }, "execution_count": 202, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine.head()" ] }, { "cell_type": "code", "execution_count": 204, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2 71\n", "1 59\n", "3 48\n", "Name: WineClass, dtype: int64" ] }, "execution_count": 204, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine.WineClass.value_counts()" ] }, { "cell_type": "code", "execution_count": 205, "metadata": { "collapsed": true }, "outputs": [], "source": [ "classby=pd.groupby(wine,wine.WineClass)" ] }, { "cell_type": "code", "execution_count": 206, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
WineClass
113.7447462.0106782.45559317.037288106.3389832.8401692.9823730.2900001.8993225.5283051.0620343.1577971115.711864
212.2787321.9326762.24478920.23802894.5492962.2588732.0808450.3636621.6302823.0866201.0562822.785352519.507042
313.1537503.3337502.43708321.41666799.3125001.6787500.7814580.4475001.1535427.3962500.6827081.683542629.895833
\n", "
" ], "text/plain": [ " Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n", "WineClass \n", "1 13.744746 2.010678 2.455593 17.037288 106.338983 \n", "2 12.278732 1.932676 2.244789 20.238028 94.549296 \n", "3 13.153750 3.333750 2.437083 21.416667 99.312500 \n", "\n", " Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins \\\n", "WineClass \n", "1 2.840169 2.982373 0.290000 1.899322 \n", "2 2.258873 2.080845 0.363662 1.630282 \n", "3 1.678750 0.781458 0.447500 1.153542 \n", "\n", " Color intensity Hue OD280/OD315 of diluted wines \\\n", "WineClass \n", "1 5.528305 1.062034 3.157797 \n", "2 3.086620 1.056282 2.785352 \n", "3 7.396250 0.682708 1.683542 \n", "\n", " Proline \n", "WineClass \n", "1 1115.711864 \n", "2 519.507042 \n", "3 629.895833 " ] }, "execution_count": 206, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classby.mean()" ] }, { "cell_type": "code", "execution_count": 207, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WineClassAlcoholMalic acidAshAlcalinity of ashMagnesiumTotal phenolsFlavanoidsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
count178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000
mean1.93820213.0006182.3363482.36651719.49494499.7415732.2951122.0292700.3618541.5908995.0580900.9574492.611685746.893258
std0.7750350.8118271.1171460.2743443.33956414.2824840.6258510.9988590.1244530.5723592.3182860.2285720.709990314.907474
min1.00000011.0300000.7400001.36000010.60000070.0000000.9800000.3400000.1300000.4100001.2800000.4800001.270000278.000000
25%1.00000012.3625001.6025002.21000017.20000088.0000001.7425001.2050000.2700001.2500003.2200000.7825001.937500500.500000
50%2.00000013.0500001.8650002.36000019.50000098.0000002.3550002.1350000.3400001.5550004.6900000.9650002.780000673.500000
75%3.00000013.6775003.0825002.55750021.500000107.0000002.8000002.8750000.4375001.9500006.2000001.1200003.170000985.000000
max3.00000014.8300005.8000003.23000030.000000162.0000003.8800005.0800000.6600003.58000013.0000001.7100004.0000001680.000000
\n", "
" ], "text/plain": [ " WineClass Alcohol Malic acid Ash Alcalinity of ash \\\n", "count 178.000000 178.000000 178.000000 178.000000 178.000000 \n", "mean 1.938202 13.000618 2.336348 2.366517 19.494944 \n", "std 0.775035 0.811827 1.117146 0.274344 3.339564 \n", "min 1.000000 11.030000 0.740000 1.360000 10.600000 \n", "25% 1.000000 12.362500 1.602500 2.210000 17.200000 \n", "50% 2.000000 13.050000 1.865000 2.360000 19.500000 \n", "75% 3.000000 13.677500 3.082500 2.557500 21.500000 \n", "max 3.000000 14.830000 5.800000 3.230000 30.000000 \n", "\n", " Magnesium Total phenols Flavanoids Nonflavanoid phenols \\\n", "count 178.000000 178.000000 178.000000 178.000000 \n", "mean 99.741573 2.295112 2.029270 0.361854 \n", "std 14.282484 0.625851 0.998859 0.124453 \n", "min 70.000000 0.980000 0.340000 0.130000 \n", "25% 88.000000 1.742500 1.205000 0.270000 \n", "50% 98.000000 2.355000 2.135000 0.340000 \n", "75% 107.000000 2.800000 2.875000 0.437500 \n", "max 162.000000 3.880000 5.080000 0.660000 \n", "\n", " Proanthocyanins Color intensity Hue \\\n", "count 178.000000 178.000000 178.000000 \n", "mean 1.590899 5.058090 0.957449 \n", "std 0.572359 2.318286 0.228572 \n", "min 0.410000 1.280000 0.480000 \n", "25% 1.250000 3.220000 0.782500 \n", "50% 1.555000 4.690000 0.965000 \n", "75% 1.950000 6.200000 1.120000 \n", "max 3.580000 13.000000 1.710000 \n", "\n", " OD280/OD315 of diluted wines Proline \n", "count 178.000000 178.000000 \n", "mean 2.611685 746.893258 \n", "std 0.709990 314.907474 \n", "min 1.270000 278.000000 \n", "25% 1.937500 500.500000 \n", "50% 2.780000 673.500000 \n", "75% 3.170000 985.000000 \n", "max 4.000000 1680.000000 " ] }, "execution_count": 207, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine.describe()" ] }, { "cell_type": "code", "execution_count": 212, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 178.000000\n", "mean 2.366517\n", "std 0.274344\n", "min 1.360000\n", "25% 2.210000\n", "50% 2.360000\n", "75% 2.557500\n", "max 3.230000\n", "Name: Ash, dtype: float64" ] }, "execution_count": 212, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine.Ash.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }