{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from random import randint,randrange" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4\n", "7\n", "3\n", "0\n", "9\n", "13\n", "5\n", "3\n", "6\n", "4\n", "14\n", "6\n", "7\n", "0\n", "9\n", "4\n", "8\n", "10\n", "14\n", "1\n", "14\n", "2\n", "7\n", "13\n", "14\n", "10\n", "14\n", "7\n", "0\n", "13\n", "3\n", "6\n", "14\n", "5\n", "13\n", "5\n", "11\n", "2\n", "4\n", "5\n" ] } ], "source": [ "for x in range(10,50):\n", " print(randrange(15))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def mynewfunction(x,y):\n", " z=x**2+3*x**2*y+20*y**2\n", " print(z)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "190\n" ] } ], "source": [ "mynewfunction(1,3)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "27100\n" ] } ], "source": [ "mynewfunction(10,30)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def mybadfunction(x,y):\n", " z=x**2+3*x**2*y+20*y**2\n", " return(z)\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "24" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mybadfunction(1,1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "scores=(23,46,69,7,5)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tuple" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(scores)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "sc=(46,45)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tuple" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(sc)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(23, 46, 69, 7, 5, 46, 45)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scores+sc" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": true }, "outputs": [], "source": [ "favourite_movie2={'micky mouse':'steamboat willie','vijay':'slumdog millionaire','john':'passion of christ','donald':'arthur'}\n", " \n" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(favourite_movie2)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'slumdog millionaire'" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "favourite_movie2['vijay']" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import re" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": true }, "outputs": [], "source": [ "names=[\"Agung\",\"Deja\", \"Brahm\",\"Nathan\",\"Ratna\",\"Naufal\",\"Scholly\",\"Siska\",\"Bintang\",\"Sandra\"]\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n", "None\n", "None\n", "<_sre.SRE_Match object; span=(4, 6), match='an'>\n", "None\n", "None\n", "None\n", "None\n", "<_sre.SRE_Match object; span=(4, 6), match='an'>\n", "<_sre.SRE_Match object; span=(1, 3), match='an'>\n" ] } ], "source": [ "for name in names:\n", " print (re.search(r'(an)',name))" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "False\n", "False\n", "False\n", "True\n", "False\n", "False\n", "False\n", "False\n", "True\n", "True\n" ] } ], "source": [ "for name in names:\n", " print (bool(re.search(r'(an)',name)))" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import re\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": true }, "outputs": [], "source": [ "numlist=[\"$60000\",\"$80,000\",\"30,000\",70000,\"55000 \"]\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "enumerate?" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'60000'" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ " re.sub(r\"([$,])\",\"\",str(\"$60000\"))\n" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "60000" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "int('60000')" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[60000, '$80,000', '30,000', 70000, '55000 ']\n", "[60000, 80000, '30,000', 70000, '55000 ']\n", "[60000, 80000, 30000, 70000, '55000 ']\n", "[60000, 80000, 30000, 70000, '55000 ']\n", "[60000, 80000, 30000, 70000, 55000]\n" ] } ], "source": [ "for i,value in enumerate(numlist):\n", " numlist[i]=re.sub(r\"([$,])\",\"\",str(value))\n", " numlist[i]=int(numlist[i])\n", " print(numlist)\n", " " ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[60000, 80000, 30000, 70000, 55000]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "numlist" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "59000.0" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(numlist)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from datetime import datetime" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "datetime_object = datetime.strptime('June/17/2016 1:33PM', '%B/%d/%Y %I:%M%p')" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "datetime.datetime(2016, 6, 17, 13, 33)" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "datetime_object" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": true }, "outputs": [], "source": [ "date_object2=datetime.strptime(\"12dec-2007\",\"%d%b-%Y\")" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "datetime.datetime(2007, 12, 12, 0, 0)" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "date_object2" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "a=date_object2-datetime_object" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "datetime.timedelta(-3111, 37620)" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-3111" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a.days" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-103.7" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a.days/30" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from dateutil import relativedelta\n" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": true }, "outputs": [], "source": [ "r =- relativedelta.relativedelta(date_object2, datetime_object)\n" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r.months" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r.years" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def f(x):return x**3+3*x**2\n" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f(1)" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "function" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(f)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": true }, "outputs": [], "source": [ "g=lambda x:x**3+3*x**2\n" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1300" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g(10)" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "function" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(g)" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os as os" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "diamonds=pd.read_csv(\"C:\\\\Users\\\\KOGENTIX\\\\Desktop\\\\training\\\\BigDiamonds.csv\\\\BigDiamonds.csv\")" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 598024 entries, 0 to 598023\n", "Data columns (total 13 columns):\n", "Unnamed: 0 598024 non-null int64\n", "carat 598024 non-null float64\n", "cut 598024 non-null object\n", "color 598024 non-null object\n", "clarity 598024 non-null object\n", "table 598024 non-null float64\n", "depth 598024 non-null float64\n", "cert 598024 non-null object\n", "measurements 597978 non-null object\n", "price 597311 non-null float64\n", "x 596209 non-null float64\n", "y 596172 non-null float64\n", "z 595480 non-null float64\n", "dtypes: float64(7), int64(1), object(5)\n", "memory usage: 59.3+ MB\n" ] } ], "source": [ "diamonds.info()" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "adult=pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",header=None)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'this is \\na multiple\\nline comment\\n'" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#pd.read_csv?\n", "'''this is \n", "a multiple\n", "line comment\n", "'''" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234567891011121314
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 \\\n", "0 39 State-gov 77516 Bachelors 13 Never-married \n", "1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse \n", "2 38 Private 215646 HS-grad 9 Divorced \n", "3 53 Private 234721 11th 7 Married-civ-spouse \n", "4 28 Private 338409 Bachelors 13 Married-civ-spouse \n", "\n", " 6 7 8 9 10 11 12 \\\n", "0 Adm-clerical Not-in-family White Male 2174 0 40 \n", "1 Exec-managerial Husband White Male 0 0 13 \n", "2 Handlers-cleaners Not-in-family White Male 0 0 40 \n", "3 Handlers-cleaners Husband Black Male 0 0 40 \n", "4 Prof-specialty Wife Black Female 0 0 40 \n", "\n", " 13 14 \n", "0 United-States <=50K \n", "1 United-States <=50K \n", "2 United-States <=50K \n", "3 United-States <=50K \n", "4 Cuba <=50K " ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult.head()" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], dtype='int64')" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult.columns" ] }, { "cell_type": "code", "execution_count": 102, "metadata": { "collapsed": true }, "outputs": [], "source": [ "adult.columns=[\"age \",\n", "\"workclass \",\n", "\"fnlwgt\",\n", "\"education \",\n", "\"education-num\",\n", "\"marital-status\",\n", "\"occupation\",\n", "\"relationship\",\n", "\"race\",\n", "\"sex\",\n", "\"capital-gain\",\n", "\"capital-loss\",\n", "\"hours-per-week\",\n", "\"native-country\",\n", "\"income\",\n", "]" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countryincome
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", "
" ], "text/plain": [ " age workclass fnlwgt education education-num \\\n", "0 39 State-gov 77516 Bachelors 13 \n", "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", "2 38 Private 215646 HS-grad 9 \n", "3 53 Private 234721 11th 7 \n", "4 28 Private 338409 Bachelors 13 \n", "\n", " marital-status occupation relationship race sex \\\n", "0 Never-married Adm-clerical Not-in-family White Male \n", "1 Married-civ-spouse Exec-managerial Husband White Male \n", "2 Divorced Handlers-cleaners Not-in-family White Male \n", "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", "4 Married-civ-spouse Prof-specialty Wife Black Female \n", "\n", " capital-gain capital-loss hours-per-week native-country income \n", "0 2174 0 40 United-States <=50K \n", "1 0 0 13 United-States <=50K \n", "2 0 0 40 United-States <=50K \n", "3 0 0 40 United-States <=50K \n", "4 0 0 40 Cuba <=50K " ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult.head()" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [], "source": [ "wb=pd.read_json(\"C:\\\\Users\\\\KOGENTIX\\\\Desktop\\\\training\\\\world_bank.json\",lines=True)" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idapprovalfyboard_approval_monthboardapprovaldateborrowerclosingdatecountry_namecodecountrycodecountrynamecountryshortname...sectorcodesourcestatussupplementprojectflgtheme1theme_namecodethemecodetotalamttotalcommamturl
0{'$oid': '52b213b38594d8a2be17c780'}1999November2013-11-12T00:00:00ZFEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA2018-07-07T00:00:00ZFederal Democratic Republic of Ethiopia!$!ETETFederal Democratic Republic of EthiopiaEthiopia...ET,BS,ES,EPIBRDActiveN{'Name': 'Education for all', 'Percent': 100}[{'name': 'Education for all', 'code': '65'}]65130000000130000000http://www.worldbank.org/projects/P129828/ethi...
1{'$oid': '52b213b38594d8a2be17c781'}2015November2013-11-04T00:00:00ZGOVERNMENT OF TUNISIANaNRepublic of Tunisia!$!TNTNRepublic of TunisiaTunisia...BZ,BSIBRDActiveN{'Name': 'Other economic management', 'Percent...[{'name': 'Other economic management', 'code':...54,2404700000http://www.worldbank.org/projects/P144674?lang=en
2{'$oid': '52b213b38594d8a2be17c782'}2014November2013-11-01T00:00:00ZMINISTRY OF FINANCE AND ECONOMIC DEVELNaNTuvalu!$!TVTVTuvaluTuvalu...TIIBRDActiveY{'Name': 'Regional integration', 'Percent': 46}[{'name': 'Regional integration', 'code': '47'...52,81,25,4760600006060000http://www.worldbank.org/projects/P145310?lang=en
3{'$oid': '52b213b38594d8a2be17c783'}2014October2013-10-31T00:00:00ZMIN. OF PLANNING AND INT'L COOPERATIONNaNRepublic of Yemen!$!RYRYRepublic of YemenYemen, Republic of...JBIBRDActiveN{'Name': 'Participation and civic engagement',...[{'name': 'Participation and civic engagement'...59,5701500000http://www.worldbank.org/projects/P144665?lang=en
4{'$oid': '52b213b38594d8a2be17c784'}2014October2013-10-31T00:00:00ZMINISTRY OF FINANCE2019-04-30T00:00:00ZKingdom of Lesotho!$!LSLSKingdom of LesothoLesotho...FH,YW,YZIBRDActiveN{'Name': 'Export development and competitivene...[{'name': 'Export development and competitiven...41,451310000013100000http://www.worldbank.org/projects/P144933/seco...
\n", "

5 rows × 50 columns

\n", "
" ], "text/plain": [ " _id approvalfy board_approval_month \\\n", "0 {'$oid': '52b213b38594d8a2be17c780'} 1999 November \n", "1 {'$oid': '52b213b38594d8a2be17c781'} 2015 November \n", "2 {'$oid': '52b213b38594d8a2be17c782'} 2014 November \n", "3 {'$oid': '52b213b38594d8a2be17c783'} 2014 October \n", "4 {'$oid': '52b213b38594d8a2be17c784'} 2014 October \n", "\n", " boardapprovaldate borrower \\\n", "0 2013-11-12T00:00:00Z FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA \n", "1 2013-11-04T00:00:00Z GOVERNMENT OF TUNISIA \n", "2 2013-11-01T00:00:00Z MINISTRY OF FINANCE AND ECONOMIC DEVEL \n", "3 2013-10-31T00:00:00Z MIN. OF PLANNING AND INT'L COOPERATION \n", "4 2013-10-31T00:00:00Z MINISTRY OF FINANCE \n", "\n", " closingdate country_namecode \\\n", "0 2018-07-07T00:00:00Z Federal Democratic Republic of Ethiopia!$!ET \n", "1 NaN Republic of Tunisia!$!TN \n", "2 NaN Tuvalu!$!TV \n", "3 NaN Republic of Yemen!$!RY \n", "4 2019-04-30T00:00:00Z Kingdom of Lesotho!$!LS \n", "\n", " countrycode countryname countryshortname \\\n", "0 ET Federal Democratic Republic of Ethiopia Ethiopia \n", "1 TN Republic of Tunisia Tunisia \n", "2 TV Tuvalu Tuvalu \n", "3 RY Republic of Yemen Yemen, Republic of \n", "4 LS Kingdom of Lesotho Lesotho \n", "\n", " ... sectorcode source \\\n", "0 ... ET,BS,ES,EP IBRD \n", "1 ... BZ,BS IBRD \n", "2 ... TI IBRD \n", "3 ... JB IBRD \n", "4 ... FH,YW,YZ IBRD \n", "\n", " status supplementprojectflg \\\n", "0 Active N \n", "1 Active N \n", "2 Active Y \n", "3 Active N \n", "4 Active N \n", "\n", " theme1 \\\n", "0 {'Name': 'Education for all', 'Percent': 100} \n", "1 {'Name': 'Other economic management', 'Percent... \n", "2 {'Name': 'Regional integration', 'Percent': 46} \n", "3 {'Name': 'Participation and civic engagement',... \n", "4 {'Name': 'Export development and competitivene... \n", "\n", " theme_namecode themecode totalamt \\\n", "0 [{'name': 'Education for all', 'code': '65'}] 65 130000000 \n", "1 [{'name': 'Other economic management', 'code':... 54,24 0 \n", "2 [{'name': 'Regional integration', 'code': '47'... 52,81,25,47 6060000 \n", "3 [{'name': 'Participation and civic engagement'... 59,57 0 \n", "4 [{'name': 'Export development and competitiven... 41,45 13100000 \n", "\n", " totalcommamt url \n", "0 130000000 http://www.worldbank.org/projects/P129828/ethi... \n", "1 4700000 http://www.worldbank.org/projects/P144674?lang=en \n", "2 6060000 http://www.worldbank.org/projects/P145310?lang=en \n", "3 1500000 http://www.worldbank.org/projects/P144665?lang=en \n", "4 13100000 http://www.worldbank.org/projects/P144933/seco... \n", "\n", "[5 rows x 50 columns]" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wb.head()" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['_id', 'approvalfy', 'board_approval_month', 'boardapprovaldate',\n", " 'borrower', 'closingdate', 'country_namecode', 'countrycode',\n", " 'countryname', 'countryshortname', 'docty', 'envassesmentcategorycode',\n", " 'grantamt', 'ibrdcommamt', 'id', 'idacommamt', 'impagency',\n", " 'lendinginstr', 'lendinginstrtype', 'lendprojectcost',\n", " 'majorsector_percent', 'mjsector_namecode', 'mjtheme',\n", " 'mjtheme_namecode', 'mjthemecode', 'prodline', 'prodlinetext',\n", " 'productlinetype', 'project_abstract', 'project_name', 'projectdocs',\n", " 'projectfinancialtype', 'projectstatusdisplay', 'regionname', 'sector',\n", " 'sector1', 'sector2', 'sector3', 'sector4', 'sector_namecode',\n", " 'sectorcode', 'source', 'status', 'supplementprojectflg', 'theme1',\n", " 'theme_namecode', 'themecode', 'totalamt', 'totalcommamt', 'url'],\n", " dtype='object')" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wb.columns" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.frame.DataFrame" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(adult)" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[39, ' State-gov', 77516, ..., 40, ' United-States', ' <=50K'],\n", " [50, ' Self-emp-not-inc', 83311, ..., 13, ' United-States', ' <=50K'],\n", " [38, ' Private', 215646, ..., 40, ' United-States', ' <=50K'],\n", " ..., \n", " [58, ' Private', 151910, ..., 40, ' United-States', ' <=50K'],\n", " [22, ' Private', 201490, ..., 20, ' United-States', ' <=50K'],\n", " [52, ' Self-emp-inc', 287927, ..., 40, ' United-States', ' >50K']], dtype=object)" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult.values" ] }, { "cell_type": "code", "execution_count": 110, "metadata": { "collapsed": true }, "outputs": [], "source": [ "b=adult.values" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "numpy.ndarray" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(b)" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "32561" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(b)" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0, 1, 2, ..., 32558, 32559, 32560])" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.arange(len(b))" ] }, { "cell_type": "code", "execution_count": 116, "metadata": { "collapsed": true }, "outputs": [], "source": [ "c=[\"age \",\n", "\"workclass \",\n", "\"fnlwgt\",\n", "\"education \",\n", "\"education-num\",\n", "\"marital-status\",\n", "\"occupation\",\n", "\"relationship\",\n", "\"race\",\n", "\"sex\",\n", "\"capital-gain\",\n", "\"capital-loss\",\n", "\"hours-per-week\",\n", "\"native-country\",\n", "\"income\",\n", "]" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[39, ' State-gov', 77516, ..., 40, ' United-States', ' <=50K'],\n", " [50, ' Self-emp-not-inc', 83311, ..., 13, ' United-States', ' <=50K'],\n", " [38, ' Private', 215646, ..., 40, ' United-States', ' <=50K'],\n", " ..., \n", " [58, ' Private', 151910, ..., 40, ' United-States', ' <=50K'],\n", " [22, ' Private', 201490, ..., 20, ' United-States', ' <=50K'],\n", " [52, ' Self-emp-inc', 287927, ..., 40, ' United-States', ' >50K']], dtype=object)" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['age ',\n", " 'workclass ',\n", " 'fnlwgt',\n", " 'education ',\n", " 'education-num',\n", " 'marital-status',\n", " 'occupation',\n", " 'relationship',\n", " 'race',\n", " 'sex',\n", " 'capital-gain',\n", " 'capital-loss',\n", " 'hours-per-week',\n", " 'native-country',\n", " 'income']" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [], "source": [ "d=np.arange(len(b))" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0, 1, 2, ..., 32558, 32559, 32560])" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d" ] }, { "cell_type": "code", "execution_count": 123, "metadata": { "collapsed": true }, "outputs": [], "source": [ "adult3=pd.DataFrame(data=b, # values\n", " index=d, # 1st column as index\n", " columns=c) # 1st row as the column names" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageworkclassfnlwgteducationeducation-nummarital-statusoccupationrelationshipracesexcapital-gaincapital-losshours-per-weeknative-countryincome
039State-gov77516Bachelors13Never-marriedAdm-clericalNot-in-familyWhiteMale2174040United-States<=50K
150Self-emp-not-inc83311Bachelors13Married-civ-spouseExec-managerialHusbandWhiteMale0013United-States<=50K
238Private215646HS-grad9DivorcedHandlers-cleanersNot-in-familyWhiteMale0040United-States<=50K
353Private23472111th7Married-civ-spouseHandlers-cleanersHusbandBlackMale0040United-States<=50K
428Private338409Bachelors13Married-civ-spouseProf-specialtyWifeBlackFemale0040Cuba<=50K
\n", "
" ], "text/plain": [ " age workclass fnlwgt education education-num \\\n", "0 39 State-gov 77516 Bachelors 13 \n", "1 50 Self-emp-not-inc 83311 Bachelors 13 \n", "2 38 Private 215646 HS-grad 9 \n", "3 53 Private 234721 11th 7 \n", "4 28 Private 338409 Bachelors 13 \n", "\n", " marital-status occupation relationship race sex \\\n", "0 Never-married Adm-clerical Not-in-family White Male \n", "1 Married-civ-spouse Exec-managerial Husband White Male \n", "2 Divorced Handlers-cleaners Not-in-family White Male \n", "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n", "4 Married-civ-spouse Prof-specialty Wife Black Female \n", "\n", " capital-gain capital-loss hours-per-week native-country income \n", "0 2174 0 40 United-States <=50K \n", "1 0 0 13 United-States <=50K \n", "2 0 0 40 United-States <=50K \n", "3 0 0 40 United-States <=50K \n", "4 0 0 40 Cuba <=50K " ] }, "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult3.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }