{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from collections import OrderedDict\n",
"import pandas as pd\n",
"\n",
"data = OrderedDict([('data(xxx_a1)_first_type', 0.12),\n",
" ('data(xxx_a2)_first_type', 0.14),\n",
" ('test(xx_b15)_second_type', 0.15)])"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"OrderedDict([('data(xxx_a1)_first_type', 0.12),\n",
" ('data(xxx_a2)_first_type', 0.14),\n",
" ('test(xx_b15)_second_type', 0.15)])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"odict_keys(['data(xxx_a1)_first_type', 'data(xxx_a2)_first_type', 'test(xx_b15)_second_type'])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.keys()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"odict_values([0.12, 0.14, 0.15])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.values()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"fields = ['Date', 'Name', 'SoldItem']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"df = pd.read_csv(r'/Users/msaha/Work/Projects/Flock/fiveobjects/github/reference/data/sales.csv', sep=' ')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Date | \n",
" Name | \n",
" SoldItem | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 15-Jul | \n",
" Joe | \n",
" TV | \n",
"
\n",
" \n",
" | 1 | \n",
" 15-Jul | \n",
" Joe | \n",
" Fridge | \n",
"
\n",
" \n",
" | 2 | \n",
" 15-Jul | \n",
" Joe | \n",
" Washing Machine | \n",
"
\n",
" \n",
" | 3 | \n",
" 15-Jul | \n",
" Joe | \n",
" TV | \n",
"
\n",
" \n",
" | 4 | \n",
" 15-Jul | \n",
" Joe | \n",
" Fridge | \n",
"
\n",
" \n",
" | 5 | \n",
" 15-Jul | \n",
" Mary | \n",
" Chair | \n",
"
\n",
" \n",
" | 6 | \n",
" 15-Jul | \n",
" Mary | \n",
" Fridge | \n",
"
\n",
" \n",
" | 7 | \n",
" 16-Jul | \n",
" Joe | \n",
" Fridge | \n",
"
\n",
" \n",
" | 8 | \n",
" 16-Jul | \n",
" Joe | \n",
" Fridge | \n",
"
\n",
" \n",
" | 9 | \n",
" 16-Jul | \n",
" Tim | \n",
" Washing Machine | \n",
"
\n",
" \n",
" | 10 | \n",
" 17-Jul | \n",
" Joe | \n",
" Washing Machine | \n",
"
\n",
" \n",
" | 11 | \n",
" 17-Jul | \n",
" Jimmy Washing Machine | \n",
" None | \n",
"
\n",
" \n",
" | 12 | \n",
" 17-Jul | \n",
" Joe | \n",
" Washing Machine | \n",
"
\n",
" \n",
" | 13 | \n",
" 17-Jul | \n",
" Joe | \n",
" Washing Machine | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Date Name SoldItem\n",
"0 15-Jul Joe TV\n",
"1 15-Jul Joe Fridge\n",
"2 15-Jul Joe Washing Machine\n",
"3 15-Jul Joe TV\n",
"4 15-Jul Joe Fridge\n",
"5 15-Jul Mary Chair\n",
"6 15-Jul Mary Fridge\n",
"7 16-Jul Joe Fridge\n",
"8 16-Jul Joe Fridge\n",
"9 16-Jul Tim Washing Machine\n",
"10 17-Jul Joe Washing Machine\n",
"11 17-Jul Jimmy Washing Machine None\n",
"12 17-Jul Joe Washing Machine\n",
"13 17-Jul Joe Washing Machine"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df_fridge = df[df['SoldItem'] == 'Fridge']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Date | \n",
" Name | \n",
" SoldItem | \n",
"
\n",
" \n",
" \n",
" \n",
" | 6 | \n",
" 15-Jul | \n",
" Mary | \n",
" Fridge | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Date Name SoldItem\n",
"6 15-Jul Mary Fridge"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_fridge"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"a = np.array([2,3,6])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 3, 6])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 3, 6])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[2, 3, 6]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.reshape(1,3)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"df=pd.DataFrame([[1,2],[3,4],[5,6],[7,8],[9,10]])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"a = df[0:3]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"b = df[3:]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 3 | \n",
" 7 | \n",
" 8 | \n",
"
\n",
" \n",
" | 4 | \n",
" 9 | \n",
" 10 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1\n",
"3 7 8\n",
"4 9 10"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"a = [['t1', 10, 12], ['t2', 21, 45], ['t3', 12, 23]]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"a = np.array(a)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([['t1', '10', '12'],\n",
" ['t2', '21', '45'],\n",
" ['t3', '12', '23']], dtype=' pd.to_datetime(df[1]), \"var\"]=1\n",
"df.loc[pd.to_datetime(df[0]) <= pd.to_datetime(df[1]), \"var\"]=0"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" var | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2019-07-05 | \n",
" 2019-08-01 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2019-08-18 | \n",
" 2019-09-03 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2019-10-02 | \n",
" 2019-05-03 | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 var\n",
"0 2019-07-05 2019-08-01 0.0\n",
"1 2019-08-18 2019-09-03 0.0\n",
"2 2019-10-02 2019-05-03 1.0"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"df['var1']=np.where(pd.to_datetime(df[0])>pd.to_datetime('2019-07-31'), 1, 0)\n",
"df['var2']=np.where(pd.to_datetime(df[1]) > pd.to_datetime(df[0]), 1, 0)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" var | \n",
" var1 | \n",
" var2 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2019-07-05 | \n",
" 2019-08-01 | \n",
" 0.0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2019-08-18 | \n",
" 2019-09-03 | \n",
" 0.0 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2019-10-02 | \n",
" 2019-05-03 | \n",
" 1.0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 var var1 var2\n",
"0 2019-07-05 2019-08-01 0.0 0 1\n",
"1 2019-08-18 2019-09-03 0.0 1 1\n",
"2 2019-10-02 2019-05-03 1.0 1 0"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}