-
-
Notifications
You must be signed in to change notification settings - Fork 220
Expand file tree
/
Copy pathindex.html
More file actions
163 lines (153 loc) · 182 KB
/
index.html
File metadata and controls
163 lines (153 loc) · 182 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
<!DOCTYPE html><html lang="en" class="" style="scroll-padding:60px"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1"/><title>Masked Arrays - Numpy Tutorials</title><meta property="og:title" content="Masked Arrays - Numpy Tutorials"/><meta name="generator" content="mystmd"/><meta name="keywords" content=""/><meta name="image" content="/numpy-tutorials/build/b77199e99a54e59b2e3c037c2cc90f21.svg"/><meta property="og:image" content="/numpy-tutorials/build/b77199e99a54e59b2e3c037c2cc90f21.svg"/><link rel="stylesheet" href="/numpy-tutorials/build/_assets/app-MOQGDXHO.css"/><link rel="stylesheet" href="/numpy-tutorials/build/_assets/thebe-core-VKVHG5VY.css"/><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/jupyter-matplotlib@0.11.3/css/mpl_widget.css"/><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.css"/><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.15.2/dist/katex.min.css" integrity="sha384-MlJdn/WNKDGXveldHDdyRP1R4CTHr3FeuDNfhsLPYrq2t0UBkUdK2jyTnXPEK1NQ" crossorigin="anonymous"/><link rel="icon" href="/numpy-tutorials/favicon.ico"/><link rel="stylesheet" href="/numpy-tutorials/myst-theme.css"/><script>
const savedTheme = localStorage.getItem("myst:theme");
const theme = window.matchMedia("(prefers-color-scheme: light)").matches ? 'light' : 'dark';
const classes = document.documentElement.classList;
const hasAnyTheme = classes.contains('light') || classes.contains('dark');
if (!hasAnyTheme) classes.add(savedTheme ?? theme);
</script></head><body class="m-0 transition-colors duration-500 bg-white dark:bg-stone-900"><div class="myst-skip-to-article fixed top-1 left-1 h-[0px] w-[0px] focus-within:z-40 focus-within:h-auto focus-within:w-auto bg-white overflow-hidden focus-within:p-2 focus-within:ring-1" aria-label="skip to content options"><a href="#skip-to-frontmatter" class="myst-skip-to-link block px-2 py-1 text-black underline">Skip to article frontmatter</a><a href="#skip-to-article" class="myst-skip-to-link block px-2 py-1 text-black underline">Skip to article content</a></div><dialog id="myst-no-css" style="position:fixed;left:0px;top:0px;width:100vw;height:100vh;font-size:4rem;padding:1rem;color:black;background:white"><strong>Site not loading correctly?</strong><p>This may be due to an incorrect <code>BASE_URL</code> configuration. See<!-- --> <a href="https://mystmd.org/guide/deployment#deploy-base-url">the MyST Documentation</a> <!-- -->for reference.</p><script>
(() => {
// Test for has-styling variable set by the MyST stylesheet
const node = document.currentScript.parentNode;
const hasCSS = window.getComputedStyle(node).getPropertyValue("--has-styling");
if (hasCSS === ""){
node.showModal();
}
})()
</script></dialog><div class="myst-top-nav bg-white/80 backdrop-blur dark:bg-stone-900/80 shadow dark:shadow-stone-700 p-3 md:px-8 sticky w-screen top-0 z-30 h-[60px]"><nav class="myst-top-nav-bar flex items-center justify-between flex-nowrap max-w-[1440px] mx-auto"><div class="flex flex-row xl:min-w-[19.5rem] mr-2 sm:mr-7 justify-start items-center shrink-0"><div class="block xl:hidden"><button class="myst-top-nav-menu-button flex items-center justify-center border-stone-400 text-stone-800 hover:text-stone-900 dark:text-stone-200 hover:dark:text-stone-100 w-10 h-10"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" data-slot="icon" width="1.5rem" height="1.5rem"><path fill-rule="evenodd" d="M3 6.75A.75.75 0 0 1 3.75 6h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 6.75ZM3 12a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75A.75.75 0 0 1 3 12Zm0 5.25a.75.75 0 0 1 .75-.75h16.5a.75.75 0 0 1 0 1.5H3.75a.75.75 0 0 1-.75-.75Z" clip-rule="evenodd"></path></svg><span class="sr-only">Open Menu</span></button></div><a class="myst-home-link flex items-center ml-3 dark:text-white w-fit md:ml-5 xl:ml-7" href="/numpy-tutorials/"><div class="myst-home-link-logo mr-3 flex items-center dark:bg-white dark:rounded px-1"><img src="/numpy-tutorials/build/numpylogo-28b47a276e91aa58e8056b86c12e6958.svg" class="h-9" height="2.25rem"/></div><span class="text-md sm:text-xl tracking-tight sm:mr-5 sr-only">Made with MyST</span></a></div><div class="flex items-center flex-grow w-auto"><div class="flex-grow hidden text-md lg:block"></div><div class="flex-grow block"></div><button type="button" aria-haspopup="dialog" aria-expanded="false" aria-controls="radix-:R75cp:" data-state="closed" class="myst-search-bar flex items-center h-10 aspect-square sm:w-64 text-left text-gray-600 dark:text-gray-300 border border-gray-300 dark:border-gray-600 rounded-lg bg-gray-50 dark:bg-gray-700 myst-search-bar-disabled hover:ring-blue-500 dark:hover:ring-blue-500 hover:border-blue-500 dark:hover:border-blue-500"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" data-slot="icon" class="p-2.5 h-10 w-10 aspect-square"><path fill-rule="evenodd" d="M10.5 3.75a6.75 6.75 0 1 0 0 13.5 6.75 6.75 0 0 0 0-13.5ZM2.25 10.5a8.25 8.25 0 1 1 14.59 5.28l4.69 4.69a.75.75 0 1 1-1.06 1.06l-4.69-4.69A8.25 8.25 0 0 1 2.25 10.5Z" clip-rule="evenodd"></path></svg><span class="myst-search-text-placeholder hidden sm:block grow">Search</span><div aria-hidden="true" class="myst-search-shortcut items-center hidden mx-1 font-mono text-sm text-gray-600 dark:text-gray-300 sm:flex gap-x-1"><kbd class="px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-[0px_2px_0px_0px_rgba(0,0,0,0.08)] dark:shadow-none hide-mac">CTRL</kbd><kbd class="px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-[0px_2px_0px_0px_rgba(0,0,0,0.08)] dark:shadow-none show-mac">⌘</kbd><kbd class="px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-[0px_2px_0px_0px_rgba(0,0,0,0.08)] dark:shadow-none ">K</kbd><script>
;(() => {
const script = document.currentScript;
const root = script.parentElement;
const isMac = /mac/i.test(
window.navigator.userAgentData?.platform ?? window.navigator.userAgent,
);
root.querySelectorAll(".hide-mac").forEach(node => {node.classList.add(isMac ? "hidden" : "block")});
root.querySelectorAll(".show-mac").forEach(node => {node.classList.add(!isMac ? "hidden" : "block")});
})()</script></div></button><button class="myst-theme-button theme rounded-full aspect-square border border-stone-700 dark:border-white hover:bg-neutral-100 border-solid overflow-hidden text-stone-700 dark:text-white hover:text-stone-500 dark:hover:text-neutral-800 w-10 h-10 mx-3" title="Toggle theme between light and dark mode" aria-label="Toggle theme between light and dark mode"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" data-slot="icon" class="myst-theme-moon-icon h-full w-full p-0.5 hidden dark:block"><path fill-rule="evenodd" d="M9.528 1.718a.75.75 0 0 1 .162.819A8.97 8.97 0 0 0 9 6a9 9 0 0 0 9 9 8.97 8.97 0 0 0 3.463-.69.75.75 0 0 1 .981.98 10.503 10.503 0 0 1-9.694 6.46c-5.799 0-10.5-4.7-10.5-10.5 0-4.368 2.667-8.112 6.46-9.694a.75.75 0 0 1 .818.162Z" clip-rule="evenodd"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="myst-theme-sun-icon h-full w-full p-0.5 dark:hidden"><path stroke-linecap="round" stroke-linejoin="round" d="M12 3v2.25m6.364.386-1.591 1.591M21 12h-2.25m-.386 6.364-1.591-1.591M12 18.75V21m-4.773-4.227-1.591 1.591M5.25 12H3m4.227-4.773L5.636 5.636M15.75 12a3.75 3.75 0 1 1-7.5 0 3.75 3.75 0 0 1 7.5 0Z"></path></svg></button><div class="block sm:hidden"></div><div class="hidden sm:block"></div></div></nav></div><div class="myst-primary-sidebar fixed xl:article-grid grid-gap xl:w-screen xl:pointer-events-none overflow-auto max-xl:min-w-[300px] hidden z-10" style="top:60px"><div class="myst-primary-sidebar-pointer pointer-events-auto xl:col-margin-left flex-col overflow-hidden hidden xl:flex"><div class="myst-primary-sidebar-nav flex-grow py-6 overflow-y-auto primary-scrollbar"><nav aria-label="Navigation" class="myst-primary-sidebar-topnav overflow-y-hidden transition-opacity ml-3 xl:ml-0 mr-3 max-w-[350px] lg:hidden"><div class="w-full px-1 dark:text-white font-medium"></div></nav><div class="my-3 border-b-2 lg:hidden"></div><nav aria-label="Table of Contents" class="myst-primary-sidebar-toc flex-grow overflow-y-hidden transition-opacity ml-3 xl:ml-0 mr-3 max-w-[350px]"><div class="myst-toc w-full px-1 dark:text-white"><a title="Numpy Tutorials" class="block break-words focus:outline outline-blue-200 outline-2 rounded myst-toc-item p-2 my-1 rounded-lg hover:bg-slate-300/30 font-bold" href="/numpy-tutorials/">Numpy Tutorials</a><div data-state="closed" class="w-full"><div class="myst-toc-item flex flex-row w-full gap-2 pl-2 my-1 text-left rounded-lg outline-none hover:bg-slate-300/30"><div title="Applications" class="block break-words rounded py-2 grow cursor-pointer">Applications</div><button class="self-stretch flex items-center flex-none px-1 rounded-l-md group hover:bg-slate-300/30 focus-visible:outline outline-blue-200 outline-2" aria-label="Open Folder" type="button" aria-controls="radix-:Rmpsp:" aria-expanded="false" data-state="closed"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" data-slot="icon" class="transition-transform duration-300 group-data-[state=open]:rotate-90 text-text-slate-700 dark:text-slate-100" height="1.5rem" width="1.5rem"><path fill-rule="evenodd" d="M16.28 11.47a.75.75 0 0 1 0 1.06l-7.5 7.5a.75.75 0 0 1-1.06-1.06L14.69 12 7.72 5.03a.75.75 0 0 1 1.06-1.06l7.5 7.5Z" clip-rule="evenodd"></path></svg></button></div><div data-state="closed" id="radix-:Rmpsp:" hidden="" class="pl-3 pr-[2px] collapsible-content"></div></div><div data-state="closed" class="w-full"><div class="myst-toc-item flex flex-row w-full gap-2 pl-2 my-1 text-left rounded-lg outline-none hover:bg-slate-300/30"><div title="Features" class="block break-words rounded py-2 grow cursor-pointer">Features</div><button class="self-stretch flex items-center flex-none px-1 rounded-l-md group hover:bg-slate-300/30 focus-visible:outline outline-blue-200 outline-2" aria-label="Open Folder" type="button" aria-controls="radix-:Rupsp:" aria-expanded="false" data-state="closed"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" data-slot="icon" class="transition-transform duration-300 group-data-[state=open]:rotate-90 text-text-slate-700 dark:text-slate-100" height="1.5rem" width="1.5rem"><path fill-rule="evenodd" d="M16.28 11.47a.75.75 0 0 1 0 1.06l-7.5 7.5a.75.75 0 0 1-1.06-1.06L14.69 12 7.72 5.03a.75.75 0 0 1 1.06-1.06l7.5 7.5Z" clip-rule="evenodd"></path></svg></button></div><div data-state="closed" id="radix-:Rupsp:" hidden="" class="pl-3 pr-[2px] collapsible-content"></div></div><div data-state="closed" class="w-full"><div class="myst-toc-item flex flex-row w-full gap-2 pl-2 my-1 text-left rounded-lg outline-none hover:bg-slate-300/30"><a title="Contributing" class="block break-words focus:outline outline-blue-200 outline-2 rounded py-2 grow" href="/numpy-tutorials/contributing">Contributing</a><button class="self-stretch flex items-center flex-none px-1 rounded-l-md group hover:bg-slate-300/30 focus-visible:outline outline-blue-200 outline-2" aria-label="Open Folder" type="button" aria-controls="radix-:R16psp:" aria-expanded="false" data-state="closed"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" data-slot="icon" class="transition-transform duration-300 group-data-[state=open]:rotate-90 text-text-slate-700 dark:text-slate-100" height="1.5rem" width="1.5rem"><path fill-rule="evenodd" d="M16.28 11.47a.75.75 0 0 1 0 1.06l-7.5 7.5a.75.75 0 0 1-1.06-1.06L14.69 12 7.72 5.03a.75.75 0 0 1 1.06-1.06l7.5 7.5Z" clip-rule="evenodd"></path></svg></button></div><div data-state="closed" id="radix-:R16psp:" hidden="" class="pl-3 pr-[2px] collapsible-content"></div></div></div></nav></div><div class="myst-primary-sidebar-footer flex-none py-6 transition-all duration-700 translate-y-6 opacity-0"><a class="myst-made-with-myst flex mx-auto text-gray-700 w-fit hover:text-blue-700 dark:text-gray-200 dark:hover:text-blue-400" href="https://mystmd.org/made-with-myst" target="_blank" rel="noreferrer"><svg style="width:24px;height:24px" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" stroke="none"><g id="icon"><path fill="currentColor" d="M23.8,54.8v-3.6l4.7-0.8V17.5l-4.7-0.8V13H36l13.4,31.7h0.2l13-31.7h12.6v3.6l-4.7,0.8v32.9l4.7,0.8v3.6h-15
v-3.6l4.9-0.8V20.8H65L51.4,53.3h-3.8l-14-32.5h-0.1l0.2,17.4v12.1l5,0.8v3.6H23.8z"></path><path fill="#F37726" d="M47,86.9c0-5.9-3.4-8.8-10.1-8.8h-8.4c-5.2,0-9.4-1.3-12.5-3.8c-3.1-2.5-5.4-6.2-6.8-11l4.8-1.6
c1.8,5.6,6.4,8.6,13.8,8.8h9.2c6.4,0,10.8,2.5,13.1,7.5c2.3-5,6.7-7.5,13.1-7.5h8.4c7.8,0,12.7-2.9,14.6-8.7l4.8,1.6
c-1.4,4.9-3.6,8.6-6.8,11.1c-3.1,2.5-7.3,3.7-12.4,3.8H63c-6.7,0-10,2.9-10,8.8"></path></g></svg><span class="self-center ml-2 text-sm">Made with MyST</span></a></div></div></div><main class="article-grid grid-gap"><article class="article-grid subgrid-gap col-screen article content"><div class="hidden"></div><div id="skip-to-frontmatter" aria-label="article frontmatter" class="myst-fm-block mb-8 pt-9"><div class="myst-fm-block-header flex items-center mb-5 h-6 text-sm font-light"><div class="flex-grow"></div><div class="myst-fm-block-badges"><a href="https://github.com/numpy/numpy-tutorials" title="GitHub Repository: numpy/numpy-tutorials" target="_blank" rel="noopener noreferrer" class="myst-fm-github-link text-inherit hover:text-inherit"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" width="1.25rem" height="1.25rem" class="myst-fm-github-icon inline-block mr-1 opacity-60 hover:opacity-100"><path d="M12 2.5c-5.4 0-9.8 4.4-9.8 9.7 0 4.3 2.8 8 6.7 9.2.5.1.7-.2.7-.5v-1.8c-2.4.5-3.1-.6-3.3-1.1-.1-.3-.6-1.1-1-1.4-.3-.2-.8-.6 0-.6s1.3.7 1.5 1c.9 1.5 2.3 1.1 2.8.8.1-.6.3-1.1.6-1.3-2.2-.2-4.4-1.1-4.4-4.8 0-1.1.4-1.9 1-2.6-.1-.2-.4-1.2.1-2.6 0 0 .8-.3 2.7 1 .8-.2 1.6-.3 2.4-.3.8 0 1.7.1 2.4.3 1.9-1.3 2.7-1 2.7-1 .5 1.3.2 2.3.1 2.6.6.7 1 1.5 1 2.6 0 3.7-2.3 4.6-4.4 4.8.4.3.7.9.7 1.8V21c0 .3.2.6.7.5 3.9-1.3 6.6-4.9 6.6-9.2 0-5.4-4.4-9.8-9.8-9.8z"></path></svg></a></div><a href="https://github.com/numpy/numpy-tutorials/edit/main/content/tutorial-ma.md" title="Edit This Page" target="_blank" rel="noopener noreferrer" class="myst-fm-edit-link text-inherit hover:text-inherit"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="1.25rem" height="1.25rem" class="myst-fm-edit-icon inline-block mr-1 opacity-60 hover:opacity-100"><path stroke-linecap="round" stroke-linejoin="round" d="m16.862 4.487 1.687-1.688a1.875 1.875 0 1 1 2.652 2.652L10.582 16.07a4.5 4.5 0 0 1-1.897 1.13L6 18l.8-2.685a4.5 4.5 0 0 1 1.13-1.897l8.932-8.931Zm0 0L19.5 7.125M18 14v4.75A2.25 2.25 0 0 1 15.75 21H5.25A2.25 2.25 0 0 1 3 18.75V8.25A2.25 2.25 0 0 1 5.25 6H10"></path></svg></a><div class="myst-fm-downloads-dropdown relative flex inline-block mx-1 grow-0" data-headlessui-state=""><button class="myst-fm-downloads-button relative ml-2 -mr-1" id="headlessui-menu-button-:Rs8ucp:" type="button" aria-haspopup="menu" aria-expanded="false" data-headlessui-state=""><span class="sr-only">Downloads</span><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="1.25rem" height="1.25rem" class="myst-fm-downloads-icon"><title>Download</title><path stroke-linecap="round" stroke-linejoin="round" d="M3 16.5v2.25A2.25 2.25 0 0 0 5.25 21h13.5A2.25 2.25 0 0 0 21 18.75V16.5M16.5 12 12 16.5m0 0L7.5 12m4.5 4.5V3"></path></svg></button></div></div><h1 class="myst-fm-block-title mb-0">Masked Arrays</h1><header class="myst-fm-authors-affiliations mt-4 not-prose"><div class="myst-fm-authors-list"><span class="myst-fm-author font-semibold text-sm myst-fm-author-item inline-block"><button class="myst-fm-author-popover focus:shadow-[0_0_0_2px] focus:shadow-black outline-none hover:underline" aria-label="Author Details" type="button" aria-haspopup="dialog" aria-expanded="false" aria-controls="radix-:R78ucp:" data-state="closed"><span class="myst-fm-author-name">Numpy Community</span></button></span></div></header></div><div class="block my-10 lg:sticky lg:z-10 lg:h-0 lg:pt-0 lg:my-0 lg:ml-10 lg:col-margin-right" style="top:60px"><nav></nav></div><div id="skip-to-article"></div><h2 id="what-youll-do" class="relative group"><span class="heading-text">What you’ll do</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#what-youll-do" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><p>Use the masked arrays module from NumPy to analyze COVID-19 data and deal with missing values.</p><h2 id="what-youll-learn" class="relative group"><span class="heading-text">What you’ll learn</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#what-youll-learn" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><ul><li><p>You’ll understand what are masked arrays and how they can be created</p></li><li><p>You’ll see how to access and modify data for masked arrays</p></li><li><p>You’ll be able to decide when the use of masked arrays is appropriate in some of your applications</p></li></ul><h2 id="what-youll-need" class="relative group"><span class="heading-text">What you’ll need</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#what-youll-need" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><ul><li><p>Basic familiarity with Python. If you would like to refresh your memory, take a look at the <a target="_blank" rel="noreferrer" href="https://docs.python.org/dev/tutorial/index.html" class="link">Python tutorial<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a>.</p></li><li><p>Basic familiarity with NumPy</p></li><li><p>To run the plots on your computer, you need <a target="_blank" rel="noreferrer" href="https://matplotlib.org" class="link">matplotlib<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a>.</p></li></ul><hr class="py-2 my-5 translate-y-2"/><h2 id="what-are-masked-arrays" class="relative group"><span class="heading-text">What are masked arrays?</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#what-are-masked-arrays" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><p>Consider the following problem. You have a dataset with missing or invalid entries. If you’re doing any kind of processing on this data, and want to <em>skip</em> or flag these unwanted entries without just deleting them, you may have to use conditionals or filter your data somehow. The <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/maskedarray.generic.html#module-numpy.ma" class="link">numpy.ma<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> module provides some of the same functionality of <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/generated/numpy.ndarray.html#numpy.ndarray" class="link">NumPy ndarrays<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> with added structure to ensure invalid entries are not used in computation.</p><p>From the <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/maskedarray.generic.html#module-numpy.ma" class="link">Reference Guide<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a>:</p><blockquote><p>A masked array is the combination of a standard <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/generated/numpy.ndarray.html#numpy.ndarray" class="link">numpy.ndarray<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> and a <strong>mask</strong>. A mask is either <code>nomask</code>, indicating that no value of the associated array is invalid, or an array of booleans that determines for each element of the associated array whether the value is valid or not. When an element of the mask is <code>False</code>, the corresponding element of the associated array is valid and is said to be unmasked. When an element of the mask is <code>True</code>, the corresponding element of the associated array is said to be masked (invalid).</p></blockquote><p>We can think of a <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray" class="link">MaskedArray<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> as a combination of:</p><ul><li><p>Data, as a regular <code>numpy.ndarray</code> of any shape or datatype;</p></li><li><p>A boolean mask with the same shape as the data;</p></li><li><p>A <code>fill_value</code>, a value that may be used to replace the invalid entries in order to return a standard <code>numpy.ndarray</code>.</p></li></ul><h2 id="when-can-they-be-useful" class="relative group"><span class="heading-text">When can they be useful?</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#when-can-they-be-useful" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><p>There are a few situations where masked arrays can be more useful than just eliminating the invalid entries of an array:</p><ul><li><p>When you want to preserve the values you masked for later processing, without copying the array;</p></li><li><p>When you have to handle many arrays, each with their own mask. If the mask is part of the array, you avoid bugs and the code is possibly more compact;</p></li><li><p>When you have different flags for missing or invalid values, and wish to preserve these flags without replacing them in the original dataset, but exclude them from computations;</p></li><li><p>If you can’t avoid or eliminate missing values, but don’t want to deal with <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/constants.html#numpy.nan" class="link">NaN (Not a Number)<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> values in your operations.</p></li></ul><p>Masked arrays are also a good idea since the <code>numpy.ma</code> module also comes with a specific implementation of most <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/glossary.html#term-ufunc" class="link">NumPy universal functions (ufuncs)<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a>, which means that you can still apply fast vectorized functions and operations on masked data. The output is then a masked array. We’ll see some examples of how this works in practice below.</p><h2 id="using-masked-arrays-to-see-covid-19-data" class="relative group"><span class="heading-text">Using masked arrays to see COVID-19 data</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#using-masked-arrays-to-see-covid-19-data" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><p>From <a target="_blank" rel="noreferrer" href="https://www.kaggle.com/atilamadai/covid19" class="link">Kaggle<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> it is possible to download a dataset with initial data about the COVID-19 outbreak in the beginning of 2020. We are going to look at a small subset of this data, contained in the file <code>who_covid_19_sit_rep_time_series.csv</code>. <em>(Note that this file has been replaced with a version without missing data sometime in late 2020.)</em></p><div id="dUNOyUDIEh" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">import numpy as np
import os
# The os.getcwd() function returns the current folder; you can change
# the filepath variable to point to the folder where you saved the .csv file
filepath = os.getcwd()
filename = os.path.join(filepath, "who_covid_19_sit_rep_time_series.csv")</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="iyRudBsnADbwfv-er3k0C" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left"></div></div><p>The data file contains data of different types and is organized as follows:</p><ul><li><p>The first row is a header line that (mostly) describes the data in each column that follow in the rows below, and beginning in the fourth column, the header is the date of the observation.</p></li><li><p>The second through seventh row contain summary data that is of a different type than that which we are going to examine, so we will need to exclude that from the data with which we will work.</p></li><li><p>The numerical data we wish to work with begins at column 4, row 8, and extends from there to the rightmost column and the lowermost row.</p></li></ul><p>Let’s explore the data inside this file for the first 14 days of records. To gather data from the <code>.csv</code> file, we will use the <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/generated/numpy.genfromtxt.html#numpy.genfromtxt" class="link">numpy.genfromtxt<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> function, making sure we select only the columns with actual numbers instead of the first four columns which contain location data. We also skip the first 6
rows of this file, since they contain other data we are not interested in. Separately, we will extract the information about dates and location for this data.</p><div id="moRMRRzq9n" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre"># Note we are using skip_header and usecols to read only portions of the
# data file into each variable.
# Read just the dates for columns 4-18 from the first row
dates = np.genfromtxt(
filename,
dtype=np.str_,
delimiter=",",
max_rows=1,
usecols=range(4, 18),
encoding="utf-8-sig",
)
# Read the names of the geographic locations from the first two
# columns, skipping the first six rows
locations = np.genfromtxt(
filename,
dtype=np.str_,
delimiter=",",
skip_header=6,
usecols=(0, 1),
encoding="utf-8-sig",
)
# Read the numeric data from just the first 14 days
nbcases = np.genfromtxt(
filename,
dtype=np.int_,
delimiter=",",
skip_header=6,
usecols=range(4, 18),
encoding="utf-8-sig",
)</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="3Bdc8V_mb_3bBgTaw1k0S" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left"></div></div><p>Included in the <code>numpy.genfromtxt</code> function call, we have selected the <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/generated/numpy.dtype.html#numpy.dtype" class="link">numpy.dtype<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> for each subset of the data (either an integer - <code>numpy.int_</code> - or a string of characters - <code>numpy.str_</code>). We have also used the <code>encoding</code> argument to select <code>utf-8-sig</code> as the encoding for the file (read more about encoding in the <a target="_blank" rel="noreferrer" href="https://docs.python.org/3/library/codecs.html#encodings-and-unicode" class="link">official Python documentation<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a>. You can read more about the <code>numpy.genfromtxt</code> function from the <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/generated/numpy.genfromtxt.html#numpy.genfromtxt" class="link">Reference Documentation<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> or from the <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/user/basics.io.genfromtxt.html" class="link">Basic IO tutorial<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a>.</p><h2 id="exploring-the-data" class="relative group"><span class="heading-text">Exploring the data</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#exploring-the-data" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><p>First of all, we can plot the whole set of data we have and see what it looks like. In order to get a readable plot, we select only a few of the dates to show in our <a target="_blank" rel="noreferrer" href="https://matplotlib.org/api/_as_gen/matplotlib.pyplot.xticks.html#matplotlib.pyplot.xticks" class="link">x-axis ticks<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a>. Note also that in our plot command, we use <code>nbcases.T</code> (the transpose of the <code>nbcases</code> array) since this means we will plot each row of the file as a separate line. We choose to plot a dashed line (using the <code>'--'</code> line style). See the <a target="_blank" rel="noreferrer" href="https://matplotlib.org/" class="link">matplotlib<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> documentation for more info on this.</p><div id="OWkQSiFO3N" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">import matplotlib.pyplot as plt
selected_dates = [0, 3, 11, 13]
fig, ax = plt.subplots()
ax.plot(dates, nbcases.T, "--")
ax.set_xticks(selected_dates, dates[selected_dates])
ax.set_title("COVID-19 cumulative cases from Jan 21 to Feb 3 2020")</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="hXeOSFyl8mGCrgjXif6fS" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-image"><img src="/numpy-tutorials/build/d08c79eba17c823037f90ad9c9ac2cc1.png" alt="<Figure size 640x480 with 1 Axes>"/></div></div></div><p>The graph has a strange shape from January 24th to February 1st. It would be interesting to know where this data comes from. If we look at the <code>locations</code> array we extracted from the <code>.csv</code> file, we can see that we have two columns, where the first would contain regions and the second would contain the name of the country. However, only the first few rows contain data for the the first column (province names in China). Following that, we only have country names. So it would make sense to group all the data from China into a single row. For this, we’ll select from the <code>nbcases</code> array only the rows for which the second entry of the <code>locations</code> array corresponds to China. Next, we’ll use the <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/generated/numpy.sum.html#numpy.sum" class="link">numpy.sum<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> function to sum all the selected rows (<code>axis=0</code>). Note also that row 35 corresponds to the total counts for the whole country for each date. Since we want to calculate the sum ourselves from the provinces data, we have to remove that row first from both <code>locations</code> and <code>nbcases</code>:</p><div id="YNFThK7AiZ" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">totals_row = 35
locations = np.delete(locations, (totals_row), axis=0)
nbcases = np.delete(nbcases, (totals_row), axis=0)
china_total = nbcases[locations[:, 1] == "China"].sum(axis=0)
china_total</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="4ESkLRgsIuLgD1mbJ9f1A" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>array([ 247, 288, 556, 817, -22, -22, -15, -10, -9,
-7, -4, 11820, 14410, 17237])</span></code></div></div></div><p>Something’s wrong with this data - we are not supposed to have negative values in a cumulative data set. What’s going on?</p><h2 id="missing-data" class="relative group"><span class="heading-text">Missing data</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#missing-data" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><p>Looking at the data, here’s what we find: there is a period with <strong>missing data</strong>:</p><div id="OSgxDmKH0V" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">nbcases</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="bApvnRcnM4E6tekpy1Sgp" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>array([[ 258, 270, 375, ..., 7153, 9074, 11177],
[ 14, 17, 26, ..., 520, 604, 683],
[ -1, 1, 1, ..., 422, 493, 566],
...,
[ -1, -1, -1, ..., -1, -1, -1],
[ -1, -1, -1, ..., -1, -1, -1],
[ -1, -1, -1, ..., -1, -1, -1]], shape=(263, 14))</span></code></div></div></div><p>All the <code>-1</code> values we are seeing come from <code>numpy.genfromtxt</code> attempting to read missing data from the original <code>.csv</code> file. Obviously, we
don’t want to compute missing data as <code>-1</code> - we just want to skip this value so it doesn’t interfere in our analysis. After importing the <code>numpy.ma</code> module, we’ll create a new array, this time masking the invalid values:</p><div id="u3UhS9HwHX" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">from numpy import ma
nbcases_ma = ma.masked_values(nbcases, -1)</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="uUWn9L_lYnaQfxfqe6-zZ" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left"></div></div><p>If we look at the <code>nbcases_ma</code> masked array, this is what we have:</p><div id="k2tHiqiIhr" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">nbcases_ma</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="E0gHsBcVUX2WrBwUax0zi" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>masked_array(
data=[[258, 270, 375, ..., 7153, 9074, 11177],
[14, 17, 26, ..., 520, 604, 683],
[--, 1, 1, ..., 422, 493, 566],
...,
[--, --, --, ..., --, --, --],
[--, --, --, ..., --, --, --],
[--, --, --, ..., --, --, --]],
mask=[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[ True, False, False, ..., False, False, False],
...,
[ True, True, True, ..., True, True, True],
[ True, True, True, ..., True, True, True],
[ True, True, True, ..., True, True, True]],
fill_value=-1)</span></code></div></div></div><p>We can see that this is a different kind of array. As mentioned in the introduction, it has three attributes (<code>data</code>, <code>mask</code> and <code>fill_value</code>).
Keep in mind that the <code>mask</code> attribute has a <code>True</code> value for elements corresponding to <strong>invalid</strong> data (represented by two dashes in the <code>data</code> attribute).</p><p>Let’s try and see what the data looks like excluding the first row (data from the Hubei province in China) so we can look at the missing data more
closely:</p><div id="RpADBWcKLw" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">fig, ax = plt.subplots()
ax.plot(dates, nbcases_ma[1:].T, "--")
ax.set_xticks(selected_dates, dates[selected_dates])
ax.set_title("COVID-19 cumulative cases from Jan 21 to Feb 3 2020")</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="OnmFem7UI9BNWiST7L07E" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-image"><img src="/numpy-tutorials/build/613f065b167d8d181b5dc5e1173b74db.png" alt="<Figure size 640x480 with 1 Axes>"/></div></div></div><p>Now that our data has been masked, let’s try summing up all the cases in China:</p><div id="w4PiJASbRC" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">china_masked = nbcases_ma[locations[:, 1] == "China"].sum(axis=0)
china_masked</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="w41J1ltChiRKSDNChsDCh" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>masked_array(data=[278, 309, 574, 835, 10, 10, 17, 22, 23, 25, 28, 11821,
14411, 17238],
mask=[False, False, False, False, False, False, False, False,
False, False, False, False, False, False],
fill_value=999999)</span></code></div></div></div><p>Note that <code>china_masked</code> is a masked array, so it has a different data structure than a regular NumPy array. Now, we can access its data directly by using the <code>.data</code> attribute:</p><div id="eqxoky7z0N" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">china_total = china_masked.data
china_total</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="E0hc4TSfgZt-7-LY89XoH" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>array([ 278, 309, 574, 835, 10, 10, 17, 22, 23,
25, 28, 11821, 14411, 17238])</span></code></div></div></div><p>That is better: no more negative values. However, we can still see that for some days, the cumulative number of cases seems to go down (from 835 to 10, for example), which does not agree with the definition of “cumulative data”. If we look more closely at the data, we can see that in the period where there was missing data in mainland China, there was valid data for Hong Kong, Taiwan, Macau and “Unspecified” regions of China. Maybe we can remove those from the total sum of cases in China, to get a better understanding of the data.</p><p>First, we’ll identify the indices of locations in mainland China:</p><div id="c2kHlxA4A7" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">china_mask = (
(locations[:, 1] == "China")
& (locations[:, 0] != "Hong Kong")
& (locations[:, 0] != "Taiwan")
& (locations[:, 0] != "Macau")
& (locations[:, 0] != "Unspecified*")
)</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="EgxOi8i9Xccvlt65bECD2" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left"></div></div><p>Now, <code>china_mask</code> is an array of boolean values (<code>True</code> or <code>False</code>); we can check that the indices are what we wanted with the <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/generated/numpy.ma.nonzero.html#numpy.ma.nonzero" class="link">ma.nonzero<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> method for masked arrays:</p><div id="c3NDITxXI4" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">china_mask.nonzero()</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="SSjehsND5lCRjpj-9wLDe" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 31, 33]),)</span></code></div></div></div><p>Now we can correctly sum entries for mainland China:</p><div id="AIhB1xRM2l" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">china_total = nbcases_ma[china_mask].sum(axis=0)
china_total</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="kFN7Qkvl5KVrVXE8t1zBm" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>masked_array(data=[278, 308, 440, 446, --, --, --, --, --, --, --, 11791,
14380, 17205],
mask=[False, False, False, False, True, True, True, True,
True, True, True, False, False, False],
fill_value=999999)</span></code></div></div></div><p>We can replace the data with this information and plot a new graph, focusing on Mainland China:</p><div id="y0xqdPHyMh" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">fig, ax = plt.subplots()
ax.plot(dates, china_total.T, "--")
ax.set_xticks(selected_dates, dates[selected_dates])
ax.set_title("COVID-19 cumulative cases from Jan 21 to Feb 3 2020 - Mainland China")</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="z9ZPh7E5c47tVJsvRRLo7" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-image"><img src="/numpy-tutorials/build/9c07c4f86dd02b243a8eec9094bc14fc.png" alt="<Figure size 640x480 with 1 Axes>"/></div></div></div><p>It’s clear that masked arrays are the right solution here. We cannot represent the missing data without mischaracterizing the evolution of the curve.</p><h2 id="fitting-data" class="relative group"><span class="heading-text">Fitting Data</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#fitting-data" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><p>One possibility we can think of is to interpolate the missing data to estimate the number of cases in late January. Observe that we can select the masked elements using the <code>.mask</code> attribute:</p><div id="CS6dNqtsTj" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">china_total.mask
invalid = china_total[china_total.mask]
invalid</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="yWbzoRPOrBNHW67Vwvrgd" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>masked_array(data=[--, --, --, --, --, --, --],
mask=[ True, True, True, True, True, True, True],
fill_value=999999,
dtype=int64)</span></code></div></div></div><p>We can also access the valid entries by using the logical negation for this mask:</p><div id="aqeFAZoO88" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">valid = china_total[~china_total.mask]
valid</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="Qa5CvxdOWp_hU8_5zxgxL" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>masked_array(data=[278, 308, 440, 446, 11791, 14380, 17205],
mask=[False, False, False, False, False, False, False],
fill_value=999999)</span></code></div></div></div><p>Now, if we want to create a very simple approximation for this data, we should take into account the valid entries around the invalid ones. So first let’s select the dates for which the data is valid. Note that we can use the mask from the <code>china_total</code> masked array to index the dates array:</p><div id="u7m5NfksI8" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">dates[~china_total.mask]</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="-vx6pxVp3pApdw97aUKnW" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-text" class="font-mono text-sm whitespace-pre-wrap myst-jp-safe-output-text"><code><span>array(['1/21/20', '1/22/20', '1/23/20', '1/24/20', '2/1/20', '2/2/20',
'2/3/20'], dtype='<U7')</span></code></div></div></div><p>Finally, we can use the
<a target="_blank" rel="noreferrer" href="https://numpy.org/doc/stable/reference/generated/numpy.polynomial.polynomial.Polynomial.fit.html" class="link">fitting functionality of the numpy.polynomial<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a>
package to create a cubic polynomial model that fits the data as best as possible:</p><div id="kDJhHiZU2L" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">t = np.arange(len(china_total))
model = np.polynomial.Polynomial.fit(t[~china_total.mask], valid, deg=3)
fig, ax = plt.subplots()
ax.plot(t, china_total)
ax.plot(t, model(t), "--")</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="dsmc8-2mly_fUjdf09IPy" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-image"><img src="/numpy-tutorials/build/705b0ce4ba67115fce959db22ca0f373.png" alt="<Figure size 640x480 with 1 Axes>"/></div></div></div><p>This plot is not so readable since the lines seem to be over each other, so let’s summarize in a more elaborate plot. We’ll plot the real data when
available, and show the cubic fit for unavailable data, using this fit to compute an estimate to the observed number of cases on January 28th 2020, 7 days after the beginning of the records:</p><div id="oL6tgNuoGb" class="myst-jp-nb-block relative group/block"><div class="myst-jp-nb-block-spinner flex sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:hidden"><div class="flex absolute top-0 right-0"></div></div><div class="myst-jp-nb-block sticky top-[115px] z-10 opacity-90 group-hover/block:opacity-100 group-hover/block:flex"><div class="absolute -top-[12px] right-0 flex flex-row rounded bg-white dark:bg-slate-800"></div></div><div class="relative myst-code group not-prose my-5 text-sm shadow hover:shadow-md dark:shadow-2xl dark:shadow-neutral-900 border border-l-4 border-gray-200 border-l-blue-400 dark:border-l-blue-400 dark:border-gray-800"><pre class="block overflow-auto p-3 myst-code-body hljs" style="background-color:unset"><code class="language-python" style="white-space:pre">fig, ax = plt.subplots()
ax.plot(t, china_total)
ax.plot(t[china_total.mask], model(t)[china_total.mask], "--", color="orange")
ax.plot(7, model(7), "r*")
ax.set_xticks([0, 7, 13], dates[[0, 7, 13]])
ax.set_yticks([0, model(7), 10000, 17500])
ax.legend(["Mainland China", "Cubic estimate", "7 days after start"])
ax.set_title(
"COVID-19 cumulative cases from Jan 21 to Feb 3 2020 - Mainland China\n"
"Cubic estimate for 7 days after start"
)</code></pre><button title="Copy to Clipboard" class="inline-flex items-center opacity-0 group-hover:opacity-100 hover:opacity-100 focus:opacity-100 active:opacity-100 cursor-pointer ml-2 transition-color duration-200 ease-in-out text-blue-400 hover:text-blue-500 absolute right-1 myst-code-copy-icon top-1" aria-pressed="false" aria-label="Copy code to clipboard"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="24" height="24"><path stroke-linecap="round" stroke-linejoin="round" d="M15.75 17.25v3.375c0 .621-.504 1.125-1.125 1.125h-9.75a1.125 1.125 0 0 1-1.125-1.125V7.875c0-.621.504-1.125 1.125-1.125H6.75a9.06 9.06 0 0 1 1.5.124m7.5 10.376h3.375c.621 0 1.125-.504 1.125-1.125V11.25c0-4.46-3.243-8.161-7.5-8.876a9.06 9.06 0 0 0-1.5-.124H9.375c-.621 0-1.125.504-1.125 1.125v3.5m7.5 10.375H9.375a1.125 1.125 0 0 1-1.125-1.125v-9.25m12 6.625v-1.875a3.375 3.375 0 0 0-3.375-3.375h-1.5a1.125 1.125 0 0 1-1.125-1.125v-1.5a3.375 3.375 0 0 0-3.375-3.375H9.75"></path></svg></button></div><div data-name="outputs-container" data-mdast-node-id="c32FLmZQLONK4_obxxawj" class="max-w-full overflow-y-visible overflow-x-auto m-0 group not-prose relative text-left mb-5"><div data-name="safe-output-image"><img src="/numpy-tutorials/build/0af5964dc0b4e30f177f4bb98384bc83.png" alt="<Figure size 640x480 with 1 Axes>"/></div></div></div><h2 id="in-practice" class="relative group"><span class="heading-text">In practice</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#in-practice" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><ul><li><p>Adding <code>-1</code> to missing data is not a problem with <code>numpy.genfromtxt</code>; in this particular case, substituting the missing value with <code>0</code> might have been fine, but we’ll see later that this is far from a general solution. Also, it is possible to call the <code>numpy.genfromtxt</code> function using the <code>usemask</code> parameter. If <code>usemask=True</code>, <code>numpy.genfromtxt</code> automatically returns a masked array.</p></li></ul><h2 id="further-reading" class="relative group"><span class="heading-text">Further reading</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#further-reading" title="Link to this Section" aria-label="Link to this Section">¶</a></h2><p>Topics not covered in this tutorial can be found in the documentation:</p><ul><li><p><a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/generated/numpy.ma.harden_mask.html#numpy.ma.harden_mask" class="link">Hardmasks<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a> vs. <a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/generated/numpy.ma.soften_mask.html#numpy.ma.soften_mask" class="link">softmasks<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a></p></li><li><p><a target="_blank" rel="noreferrer" href="https://numpy.org/devdocs/reference/maskedarray.generic.html#maskedarray-generic" class="link">The numpy.ma module<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" class="link-icon"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 6H5.25A2.25 2.25 0 0 0 3 8.25v10.5A2.25 2.25 0 0 0 5.25 21h10.5A2.25 2.25 0 0 0 18 18.75V10.5m-10.5 6L21 3m0 0h-5.25M21 3v5.25"></path></svg></a></p></li></ul><h3 id="reference" class="relative group"><span class="heading-text">Reference</span><a class="no-underline text-inherit hover:text-inherit inline-block w-0 px-0 translate-x-[10px] font-normal select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#reference" title="Link to this Section" aria-label="Link to this Section">¶</a></h3><ul><li><p>Ensheng Dong, Hongru Du, Lauren Gardner, <em>An interactive web-based dashboard to track COVID-19 in real time</em>, The Lancet Infectious Diseases, Volume 20, Issue 5, 2020, Pages 533-534, ISSN 1473-3099, <cite class="" data-state="closed"><a href="https://doi.org/10.1016/s1473-3099(20)30120-1" target="_blank" rel="noreferrer" class="hover-link">Dong <em>et al.</em> (2020)</a></cite>.</p></li></ul><div class="myst-backmatter-parts"></div><section id="references" class="myst-bibliography article-grid subgrid-gap col-screen"><div><header class="myst-bibliography-header text-lg font-semibold text-stone-900 dark:text-white group">References<a class="no-underline text-inherit hover:text-inherit ml-2 select-none transition-opacity opacity-0 focus:opacity-100 group-hover:opacity-70" href="#references" title="Link to References" aria-label="Link to References">¶</a></header></div><div class="myst-bibliography-list pl-3 mb-8 text-xs text-stone-500 dark:text-stone-300"><ol><li class="myst-bibliography-item break-words" id="cite-Dong_2020">Dong, E., Du, H., & Gardner, L. (2020). An interactive web-based dashboard to track COVID-19 in real time. <i>The Lancet Infectious Diseases</i>, <i>20</i>(5), 533–534. <a target="_blank" rel="noreferrer" href="https://doi.org/10.1016/s1473-3099(20)30120-1">10.1016/s1473-3099(20)30120-1</a></li></ol></div></section><div class="myst-footer-links flex pt-10 mb-10 space-x-4"><a class="myst-footer-link flex-1 block p-4 font-normal text-gray-600 no-underline border border-gray-200 rounded shadow-sm group hover:border-blue-600 dark:hover:border-blue-400 hover:text-blue-600 dark:hover:text-blue-400 dark:text-gray-100 dark:border-gray-500 hover:shadow-lg dark:shadow-neutral-700 myst-footer-link-prev" href="/numpy-tutorials/save-load-arrays"><div class="flex h-full align-middle"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="1.5rem" height="1.5rem" class="myst-footer-link-icon self-center transition-transform group-hover:-translate-x-1 shrink-0"><path stroke-linecap="round" stroke-linejoin="round" d="M10.5 19.5 3 12m0 0 7.5-7.5M3 12h18"></path></svg><div class="flex-grow text-right"><div class="myst-footer-link-group text-xs text-gray-500 dark:text-gray-400">Features</div>Sharing Array Data</div></div></a><a class="myst-footer-link flex-1 block p-4 font-normal text-gray-600 no-underline border border-gray-200 rounded shadow-sm group hover:border-blue-600 dark:hover:border-blue-400 hover:text-blue-600 dark:hover:text-blue-400 dark:text-gray-100 dark:border-gray-500 hover:shadow-lg dark:shadow-neutral-700 myst-footer-link-next" href="/numpy-tutorials/contributing"><div class="flex h-full align-middle"><div class="flex-grow"><div class="myst-footer-link-group text-xs text-gray-500 dark:text-gray-400">Features</div>Contributing</div><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="1.5rem" height="1.5rem" class="myst-footer-link-icon self-center transition-transform group-hover:translate-x-1 shrink-0"><path stroke-linecap="round" stroke-linejoin="round" d="M13.5 4.5 21 12m0 0-7.5 7.5M21 12H3"></path></svg></div></a></div></article></main><script>((a,l)=>{if(!window.history.state||!window.history.state.key){let u=Math.random().toString(32).slice(2);window.history.replaceState({key:u},"")}try{let d=JSON.parse(sessionStorage.getItem(a)||"{}")[l||window.history.state.key];typeof d=="number"&&window.scrollTo(0,d)}catch(u){console.error(u),sessionStorage.removeItem(a)}})("positions", null)</script><link rel="modulepreload" href="/numpy-tutorials/build/entry.client-PCJPW7TK.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-AQ2CODAG.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-JJXTQVMA.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-OZE3FFNP.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-CH4FVTDV.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-C4DFGG5C.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-J7TUH54J.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-FZ2S7OYD.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-JEM6JXYA.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-34XIY2DH.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-KQM5FBHR.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-OCWQY3HK.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-7HNKBP4B.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-CUKUDK3R.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-3EBOCCHJ.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-O4VQNZ62.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-4OEDG4JQ.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-GUCIBHGO.js"/><link rel="modulepreload" href="/numpy-tutorials/build/root-SIO6LUTY.js"/><link rel="modulepreload" href="/numpy-tutorials/build/_shared/chunk-FAHZZXAC.js"/><link rel="modulepreload" href="/numpy-tutorials/build/routes/$-PRP77N34.js"/><script>window.__remixContext = {"url":"/tutorial-ma","state":{"loaderData":{"root":{"config":{"version":3,"myst":"1.8.0","options":{"favicon":"/numpy-tutorials/build/favicon-5dd127103743f9fd9d1ab5b34dfbe735.png","logo":"/numpy-tutorials/build/numpylogo-28b47a276e91aa58e8056b86c12e6958.svg"},"nav":[],"actions":[],"projects":[{"title":"Numpy Tutorials","authors":[{"id":"Numpy Community","name":"Numpy Community"}],"github":"https://github.com/numpy/numpy-tutorials","toc":[{"file":"content/index.md"},{"children":[{"file":"content/mooreslaw-tutorial.md"},{"file":"content/tutorial-deep-learning-on-mnist.md"},{"file":"content/tutorial-x-ray-image-processing.md"},{"file":"content/tutorial-static_equilibrium.md"},{"file":"content/tutorial-plotting-fractals.md"},{"file":"content/tutorial-air-quality-analysis.md"}],"title":"Applications"},{"children":[{"file":"content/tutorial-svd.md"},{"file":"content/save-load-arrays.md"},{"file":"content/tutorial-ma.md"}],"title":"Features"},{"children":[{"file":"content/tutorial-style-guide.md"}],"file":"content/contributing.md","title":"Contributing"}],"thumbnail":"/numpy-tutorials/build/b77199e99a54e59b2e3c037c2cc90f21.svg","exports":[],"bibliography":[],"index":"index","pages":[{"level":1,"title":"Applications"},{"slug":"mooreslaw-tutorial","title":"Determining Moore’s Law with real data in NumPy","short_title":"Moore's Law","description":"","date":"","thumbnail":"/numpy-tutorials/build/01-mooreslaw-tutoria-68d0ad466c300d347c517c09cd29d0d9.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-deep-learning-on-mnist","title":"Deep learning on MNIST","description":"","date":"","thumbnail":"/numpy-tutorials/build/tutorial-deep-learni-1a6932c38d13641ad9a1eda7d431b1f5.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-x-ray-image-processing","title":"X-ray image processing","description":"","date":"","thumbnail":"/numpy-tutorials/build/tutorial-x-ray-image-cb14ad1cb4fb37f341c5954e91e694b8.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-static-equilibrium","title":"Determining Static Equilibrium in NumPy","short_title":"Static Equilibrium","description":"","date":"","thumbnail":"/numpy-tutorials/build/static_eqbm-fig01-ee029c471a69f4e98c09d962ac7d60dd.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-plotting-fractals","title":"Plotting Fractals","description":"","date":"","thumbnail":"/numpy-tutorials/build/fractal-4ebbc2569665376d939ff6f78ae5e5ab.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-air-quality-analysis","title":"Analyzing the impact of the lockdown on air quality in Delhi, India","short_title":"Analyzing Air Quality","description":"","date":"","thumbnail":"/numpy-tutorials/build/11-delhi-aqi-5fa295dd14ed05daaf4cd0193122e2f7.jpg","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"level":1,"title":"Features"},{"slug":"tutorial-svd","title":"Linear algebra on n-dimensional arrays","short_title":"Linear Algebra on n-D arrays","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"save-load-arrays","title":"Saving and sharing your NumPy arrays","short_title":"Sharing Array Data","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-ma","title":"Masked Arrays","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"contributing","title":"Contributing","short_title":"Contributing","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":1},{"slug":"tutorial-style-guide","title":"Learn to write a NumPy tutorial","short_title":"Style Guide","description":"","date":"","thumbnail":"/numpy-tutorials/build/56554e3d11983df8f484e8d7b2c2bdae.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2}]}]},"CONTENT_CDN_PORT":"3100","MODE":"static","BASE_URL":"/numpy-tutorials"},"routes/$":{"config":{"version":3,"myst":"1.8.0","options":{"favicon":"/numpy-tutorials/build/favicon-5dd127103743f9fd9d1ab5b34dfbe735.png","logo":"/numpy-tutorials/build/numpylogo-28b47a276e91aa58e8056b86c12e6958.svg"},"nav":[],"actions":[],"projects":[{"title":"Numpy Tutorials","authors":[{"id":"Numpy Community","name":"Numpy Community"}],"github":"https://github.com/numpy/numpy-tutorials","toc":[{"file":"content/index.md"},{"children":[{"file":"content/mooreslaw-tutorial.md"},{"file":"content/tutorial-deep-learning-on-mnist.md"},{"file":"content/tutorial-x-ray-image-processing.md"},{"file":"content/tutorial-static_equilibrium.md"},{"file":"content/tutorial-plotting-fractals.md"},{"file":"content/tutorial-air-quality-analysis.md"}],"title":"Applications"},{"children":[{"file":"content/tutorial-svd.md"},{"file":"content/save-load-arrays.md"},{"file":"content/tutorial-ma.md"}],"title":"Features"},{"children":[{"file":"content/tutorial-style-guide.md"}],"file":"content/contributing.md","title":"Contributing"}],"thumbnail":"/numpy-tutorials/build/b77199e99a54e59b2e3c037c2cc90f21.svg","exports":[],"bibliography":[],"index":"index","pages":[{"level":1,"title":"Applications"},{"slug":"mooreslaw-tutorial","title":"Determining Moore’s Law with real data in NumPy","short_title":"Moore's Law","description":"","date":"","thumbnail":"/numpy-tutorials/build/01-mooreslaw-tutoria-68d0ad466c300d347c517c09cd29d0d9.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-deep-learning-on-mnist","title":"Deep learning on MNIST","description":"","date":"","thumbnail":"/numpy-tutorials/build/tutorial-deep-learni-1a6932c38d13641ad9a1eda7d431b1f5.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-x-ray-image-processing","title":"X-ray image processing","description":"","date":"","thumbnail":"/numpy-tutorials/build/tutorial-x-ray-image-cb14ad1cb4fb37f341c5954e91e694b8.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-static-equilibrium","title":"Determining Static Equilibrium in NumPy","short_title":"Static Equilibrium","description":"","date":"","thumbnail":"/numpy-tutorials/build/static_eqbm-fig01-ee029c471a69f4e98c09d962ac7d60dd.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-plotting-fractals","title":"Plotting Fractals","description":"","date":"","thumbnail":"/numpy-tutorials/build/fractal-4ebbc2569665376d939ff6f78ae5e5ab.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-air-quality-analysis","title":"Analyzing the impact of the lockdown on air quality in Delhi, India","short_title":"Analyzing Air Quality","description":"","date":"","thumbnail":"/numpy-tutorials/build/11-delhi-aqi-5fa295dd14ed05daaf4cd0193122e2f7.jpg","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"level":1,"title":"Features"},{"slug":"tutorial-svd","title":"Linear algebra on n-dimensional arrays","short_title":"Linear Algebra on n-D arrays","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"save-load-arrays","title":"Saving and sharing your NumPy arrays","short_title":"Sharing Array Data","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-ma","title":"Masked Arrays","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"contributing","title":"Contributing","short_title":"Contributing","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":1},{"slug":"tutorial-style-guide","title":"Learn to write a NumPy tutorial","short_title":"Style Guide","description":"","date":"","thumbnail":"/numpy-tutorials/build/56554e3d11983df8f484e8d7b2c2bdae.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2}]}]},"page":{"version":3,"kind":"Notebook","sha256":"f91c6b2c1c8f4a5106175c4eadbe8a01bbbd9760d5a4101fa9139e79a6b8b4d9","slug":"tutorial-ma","location":"/content/tutorial-ma.md","dependencies":[],"frontmatter":{"title":"Masked Arrays","kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"jupytext":{"formats":"ipynb,md:myst","text_representation":{"extension":".md","format_name":"myst","format_version":"0.13","jupytext_version":"1.11.1"}},"content_includes_title":false,"authors":[{"id":"Numpy Community","name":"Numpy Community"}],"github":"https://github.com/numpy/numpy-tutorials","numbering":{"title":{"offset":1}},"source_url":"https://github.com/numpy/numpy-tutorials/blob/main/content/tutorial-ma.md","edit_url":"https://github.com/numpy/numpy-tutorials/edit/main/content/tutorial-ma.md","exports":[{"format":"md","filename":"tutorial-ma.md","url":"/numpy-tutorials/build/tutorial-ma-7716231bf204ceb0a9fe85db76453ca5.md"}]},"mdast":{"type":"root","children":[{"type":"block","children":[{"type":"heading","depth":2,"position":{"start":{"line":17,"column":1},"end":{"line":17,"column":1}},"children":[{"type":"text","value":"What you’ll do","position":{"start":{"line":17,"column":1},"end":{"line":17,"column":1}},"key":"jOcqirfT0V"}],"identifier":"what-youll-do","label":"What you’ll do","html_id":"what-youll-do","implicit":true,"key":"F6PGkdNnou"},{"type":"paragraph","position":{"start":{"line":19,"column":1},"end":{"line":19,"column":1}},"children":[{"type":"text","value":"Use the masked arrays module from NumPy to analyze COVID-19 data and deal with missing values.","position":{"start":{"line":19,"column":1},"end":{"line":19,"column":1}},"key":"s1UZ1twtdb"}],"key":"dWtX2fw7HU"},{"type":"heading","depth":2,"position":{"start":{"line":21,"column":1},"end":{"line":21,"column":1}},"children":[{"type":"text","value":"What you’ll learn","position":{"start":{"line":21,"column":1},"end":{"line":21,"column":1}},"key":"d8d4foi8E1"}],"identifier":"what-youll-learn","label":"What you’ll learn","html_id":"what-youll-learn","implicit":true,"key":"gPCJUce6xr"},{"type":"list","ordered":false,"spread":false,"position":{"start":{"line":23,"column":1},"end":{"line":26,"column":1}},"children":[{"type":"listItem","spread":true,"position":{"start":{"line":23,"column":1},"end":{"line":23,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"You’ll understand what are masked arrays and how they can be created","position":{"start":{"line":23,"column":1},"end":{"line":23,"column":1}},"key":"q32hqMuVHQ"}],"key":"kA1dofTHrR"}],"key":"hw77sO9f7p"},{"type":"listItem","spread":true,"position":{"start":{"line":24,"column":1},"end":{"line":24,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"You’ll see how to access and modify data for masked arrays","position":{"start":{"line":24,"column":1},"end":{"line":24,"column":1}},"key":"dLU1NvLypG"}],"key":"CQAb3DKKKR"}],"key":"t3HM4r1Ayt"},{"type":"listItem","spread":true,"position":{"start":{"line":25,"column":1},"end":{"line":26,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"You’ll be able to decide when the use of masked arrays is appropriate in some of your applications","position":{"start":{"line":25,"column":1},"end":{"line":25,"column":1}},"key":"gYWhSFD9uk"}],"key":"O4NjiKjU9e"}],"key":"hUMgHMo53C"}],"key":"p04vzilGxy"},{"type":"heading","depth":2,"position":{"start":{"line":27,"column":1},"end":{"line":27,"column":1}},"children":[{"type":"text","value":"What you’ll need","position":{"start":{"line":27,"column":1},"end":{"line":27,"column":1}},"key":"FNdvf7EHpy"}],"identifier":"what-youll-need","label":"What you’ll need","html_id":"what-youll-need","implicit":true,"key":"aHKNvKtNnQ"},{"type":"list","ordered":false,"spread":false,"position":{"start":{"line":29,"column":1},"end":{"line":32,"column":1}},"children":[{"type":"listItem","spread":true,"position":{"start":{"line":29,"column":1},"end":{"line":29,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"Basic familiarity with Python. If you would like to refresh your memory, take a look at the ","position":{"start":{"line":29,"column":1},"end":{"line":29,"column":1}},"key":"hhBOiikiMH"},{"type":"link","url":"https://docs.python.org/dev/tutorial/index.html","position":{"start":{"line":29,"column":1},"end":{"line":29,"column":1}},"children":[{"type":"text","value":"Python tutorial","position":{"start":{"line":29,"column":1},"end":{"line":29,"column":1}},"key":"ygph4uu49H"}],"urlSource":"https://docs.python.org/dev/tutorial/index.html","key":"fFXiOqoPOL"},{"type":"text","value":".","position":{"start":{"line":29,"column":1},"end":{"line":29,"column":1}},"key":"S6ZXeadC9d"}],"key":"MGxfkYU4sD"}],"key":"QkaJJLNxEE"},{"type":"listItem","spread":true,"position":{"start":{"line":30,"column":1},"end":{"line":30,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"Basic familiarity with NumPy","position":{"start":{"line":30,"column":1},"end":{"line":30,"column":1}},"key":"biDpPZpHvT"}],"key":"OhhNTAWwly"}],"key":"GIyUoLKrJX"},{"type":"listItem","spread":true,"position":{"start":{"line":31,"column":1},"end":{"line":32,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"To run the plots on your computer, you need ","position":{"start":{"line":31,"column":1},"end":{"line":31,"column":1}},"key":"blsIjS4fub"},{"type":"link","url":"https://matplotlib.org","position":{"start":{"line":31,"column":1},"end":{"line":31,"column":1}},"children":[{"type":"text","value":"matplotlib","position":{"start":{"line":31,"column":1},"end":{"line":31,"column":1}},"key":"ExFhPPutv9"}],"urlSource":"https://matplotlib.org","key":"NhERufJpIo"},{"type":"text","value":".","position":{"start":{"line":31,"column":1},"end":{"line":31,"column":1}},"key":"icfylvKsMk"}],"key":"CTWSyPwS0v"}],"key":"dvy2Q4mXt5"}],"key":"ZvJXHa2J5i"}],"key":"ibSfA8kSZh"},{"type":"block","position":{"start":{"line":33,"column":1},"end":{"line":33,"column":1}},"children":[{"type":"thematicBreak","position":{"start":{"line":35,"column":1},"end":{"line":35,"column":1}},"key":"OvZ10MF917"}],"key":"Se1jnMAH9x"},{"type":"block","position":{"start":{"line":37,"column":1},"end":{"line":37,"column":1}},"children":[{"type":"heading","depth":2,"position":{"start":{"line":39,"column":1},"end":{"line":39,"column":1}},"children":[{"type":"text","value":"What are masked arrays?","position":{"start":{"line":39,"column":1},"end":{"line":39,"column":1}},"key":"HxYuJ9Btyd"}],"identifier":"what-are-masked-arrays","label":"What are masked arrays?","html_id":"what-are-masked-arrays","implicit":true,"key":"ACv2UIV8Pb"},{"type":"paragraph","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"children":[{"type":"text","value":"Consider the following problem. You have a dataset with missing or invalid entries. If you’re doing any kind of processing on this data, and want to ","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"key":"tg0IAti4NW"},{"type":"emphasis","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"children":[{"type":"text","value":"skip","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"key":"oriiiiZuRu"}],"key":"TUqjpb9wBB"},{"type":"text","value":" or flag these unwanted entries without just deleting them, you may have to use conditionals or filter your data somehow. The ","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"key":"YxLOJTwJdk"},{"type":"link","url":"https://numpy.org/devdocs/reference/maskedarray.generic.html#module-numpy.ma","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"children":[{"type":"text","value":"numpy.ma","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"key":"hxtNl9Ldrm"}],"urlSource":"https://numpy.org/devdocs/reference/maskedarray.generic.html#module-numpy.ma","key":"edUMWvsI0M"},{"type":"text","value":" module provides some of the same functionality of ","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"key":"rnsqJhwX3x"},{"type":"link","url":"https://numpy.org/devdocs/reference/generated/numpy.ndarray.html#numpy.ndarray","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"children":[{"type":"text","value":"NumPy ndarrays","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"key":"Q6vAh3Mm7k"}],"urlSource":"https://numpy.org/devdocs/reference/generated/numpy.ndarray.html#numpy.ndarray","key":"vyGvVLduOF"},{"type":"text","value":" with added structure to ensure invalid entries are not used in computation.","position":{"start":{"line":41,"column":1},"end":{"line":41,"column":1}},"key":"tQumAzvW6t"}],"key":"eX0mjwHtLO"},{"type":"paragraph","position":{"start":{"line":43,"column":1},"end":{"line":43,"column":1}},"children":[{"type":"text","value":"From the ","position":{"start":{"line":43,"column":1},"end":{"line":43,"column":1}},"key":"nVYRBeLqcQ"},{"type":"link","url":"https://numpy.org/devdocs/reference/maskedarray.generic.html#module-numpy.ma","position":{"start":{"line":43,"column":1},"end":{"line":43,"column":1}},"children":[{"type":"text","value":"Reference Guide","position":{"start":{"line":43,"column":1},"end":{"line":43,"column":1}},"key":"pAfmgwqNGt"}],"urlSource":"https://numpy.org/devdocs/reference/maskedarray.generic.html#module-numpy.ma","key":"R5ZBbENp7P"},{"type":"text","value":":","position":{"start":{"line":43,"column":1},"end":{"line":43,"column":1}},"key":"Qgh9gc67Bb"}],"key":"RC8G2vDSIP"},{"type":"blockquote","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"children":[{"type":"paragraph","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"children":[{"type":"text","value":"A masked array is the combination of a standard ","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"f0BAC6KZoM"},{"type":"link","url":"https://numpy.org/devdocs/reference/generated/numpy.ndarray.html#numpy.ndarray","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"children":[{"type":"text","value":"numpy.ndarray","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"ZbvLQy2LUR"}],"urlSource":"https://numpy.org/devdocs/reference/generated/numpy.ndarray.html#numpy.ndarray","key":"mgJaellbaf"},{"type":"text","value":" and a ","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"UXibO3frM2"},{"type":"strong","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"children":[{"type":"text","value":"mask","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"csZJYj0oxr"}],"key":"coDvlhfGWI"},{"type":"text","value":". A mask is either ","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"U9RxdYaTQh"},{"type":"inlineCode","value":"nomask","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"WvG5DVpGiI"},{"type":"text","value":", indicating that no value of the associated array is invalid, or an array of booleans that determines for each element of the associated array whether the value is valid or not. When an element of the mask is ","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"jWnJC1QERt"},{"type":"inlineCode","value":"False","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"xP1M0Vtaag"},{"type":"text","value":", the corresponding element of the associated array is valid and is said to be unmasked. When an element of the mask is ","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"li2GnJsahG"},{"type":"inlineCode","value":"True","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"rx3X1CIh4b"},{"type":"text","value":", the corresponding element of the associated array is said to be masked (invalid).","position":{"start":{"line":45,"column":1},"end":{"line":45,"column":1}},"key":"EHkfk2OWnq"}],"key":"h3BVyZZV4I"}],"key":"tYMpEfKyRa"},{"type":"paragraph","position":{"start":{"line":47,"column":1},"end":{"line":47,"column":1}},"children":[{"type":"text","value":"We can think of a ","position":{"start":{"line":47,"column":1},"end":{"line":47,"column":1}},"key":"Ob2FulVfMI"},{"type":"link","url":"https://numpy.org/devdocs/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray","position":{"start":{"line":47,"column":1},"end":{"line":47,"column":1}},"children":[{"type":"text","value":"MaskedArray","position":{"start":{"line":47,"column":1},"end":{"line":47,"column":1}},"key":"Kf1ahfW6My"}],"urlSource":"https://numpy.org/devdocs/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray","key":"xXVVOfUSVG"},{"type":"text","value":" as a combination of:","position":{"start":{"line":47,"column":1},"end":{"line":47,"column":1}},"key":"emFkGOLQub"}],"key":"kwWKqfSEvL"},{"type":"list","ordered":false,"spread":false,"position":{"start":{"line":49,"column":1},"end":{"line":52,"column":1}},"children":[{"type":"listItem","spread":true,"position":{"start":{"line":49,"column":1},"end":{"line":49,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"Data, as a regular ","position":{"start":{"line":49,"column":1},"end":{"line":49,"column":1}},"key":"DfdPnMwLKa"},{"type":"inlineCode","value":"numpy.ndarray","position":{"start":{"line":49,"column":1},"end":{"line":49,"column":1}},"key":"zWZ5KH8cZF"},{"type":"text","value":" of any shape or datatype;","position":{"start":{"line":49,"column":1},"end":{"line":49,"column":1}},"key":"ttmVHAKIWc"}],"key":"CGZ5CZXJAN"}],"key":"vke0F397e2"},{"type":"listItem","spread":true,"position":{"start":{"line":50,"column":1},"end":{"line":50,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"A boolean mask with the same shape as the data;","position":{"start":{"line":50,"column":1},"end":{"line":50,"column":1}},"key":"TWnYBAiZHz"}],"key":"VzXfY7guVL"}],"key":"zTpj8kvt7Q"},{"type":"listItem","spread":true,"position":{"start":{"line":51,"column":1},"end":{"line":52,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"A ","position":{"start":{"line":51,"column":1},"end":{"line":51,"column":1}},"key":"hUIJNGkKXC"},{"type":"inlineCode","value":"fill_value","position":{"start":{"line":51,"column":1},"end":{"line":51,"column":1}},"key":"w7EjUjXKNo"},{"type":"text","value":", a value that may be used to replace the invalid entries in order to return a standard ","position":{"start":{"line":51,"column":1},"end":{"line":51,"column":1}},"key":"oG0lQauStz"},{"type":"inlineCode","value":"numpy.ndarray","position":{"start":{"line":51,"column":1},"end":{"line":51,"column":1}},"key":"GmIKwcD8bS"},{"type":"text","value":".","position":{"start":{"line":51,"column":1},"end":{"line":51,"column":1}},"key":"AzZUlGzWtf"}],"key":"PyMbrPM26q"}],"key":"uDqR8O4fmV"}],"key":"cB3e4FVvfp"},{"type":"heading","depth":2,"position":{"start":{"line":53,"column":1},"end":{"line":53,"column":1}},"children":[{"type":"text","value":"When can they be useful?","position":{"start":{"line":53,"column":1},"end":{"line":53,"column":1}},"key":"PpC1XkkC4B"}],"identifier":"when-can-they-be-useful","label":"When can they be useful?","html_id":"when-can-they-be-useful","implicit":true,"key":"TAvqcOnuKb"},{"type":"paragraph","position":{"start":{"line":55,"column":1},"end":{"line":55,"column":1}},"children":[{"type":"text","value":"There are a few situations where masked arrays can be more useful than just eliminating the invalid entries of an array:","position":{"start":{"line":55,"column":1},"end":{"line":55,"column":1}},"key":"VO0iNl2FcM"}],"key":"xEzQn5jYEL"},{"type":"list","ordered":false,"spread":false,"position":{"start":{"line":57,"column":1},"end":{"line":61,"column":1}},"children":[{"type":"listItem","spread":true,"position":{"start":{"line":57,"column":1},"end":{"line":57,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"When you want to preserve the values you masked for later processing, without copying the array;","position":{"start":{"line":57,"column":1},"end":{"line":57,"column":1}},"key":"z7tp9MCqGz"}],"key":"ga4LsvaJlK"}],"key":"pbMGVxa0rF"},{"type":"listItem","spread":true,"position":{"start":{"line":58,"column":1},"end":{"line":58,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"When you have to handle many arrays, each with their own mask. If the mask is part of the array, you avoid bugs and the code is possibly more compact;","position":{"start":{"line":58,"column":1},"end":{"line":58,"column":1}},"key":"cAbnD3WNSb"}],"key":"yrILKKgxba"}],"key":"tyqYom3xqV"},{"type":"listItem","spread":true,"position":{"start":{"line":59,"column":1},"end":{"line":59,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"When you have different flags for missing or invalid values, and wish to preserve these flags without replacing them in the original dataset, but exclude them from computations;","position":{"start":{"line":59,"column":1},"end":{"line":59,"column":1}},"key":"oGDu5tGdZj"}],"key":"YNDJObbhjh"}],"key":"zdF4vcEm1X"},{"type":"listItem","spread":true,"position":{"start":{"line":60,"column":1},"end":{"line":61,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"If you can’t avoid or eliminate missing values, but don’t want to deal with ","position":{"start":{"line":60,"column":1},"end":{"line":60,"column":1}},"key":"DMaRMxuYZR"},{"type":"link","url":"https://numpy.org/devdocs/reference/constants.html#numpy.nan","position":{"start":{"line":60,"column":1},"end":{"line":60,"column":1}},"children":[{"type":"text","value":"NaN (Not a Number)","position":{"start":{"line":60,"column":1},"end":{"line":60,"column":1}},"key":"FlTvRSseha"}],"urlSource":"https://numpy.org/devdocs/reference/constants.html#numpy.nan","key":"MYIzrvrzQ6"},{"type":"text","value":" values in your operations.","position":{"start":{"line":60,"column":1},"end":{"line":60,"column":1}},"key":"K8Hgdd7cnQ"}],"key":"LJW8rxclfG"}],"key":"FfdyGq3eIC"}],"key":"vHs29QgXFy"},{"type":"paragraph","position":{"start":{"line":62,"column":1},"end":{"line":62,"column":1}},"children":[{"type":"text","value":"Masked arrays are also a good idea since the ","position":{"start":{"line":62,"column":1},"end":{"line":62,"column":1}},"key":"JOIfsC3pSk"},{"type":"inlineCode","value":"numpy.ma","position":{"start":{"line":62,"column":1},"end":{"line":62,"column":1}},"key":"bzdzyatnzZ"},{"type":"text","value":" module also comes with a specific implementation of most ","position":{"start":{"line":62,"column":1},"end":{"line":62,"column":1}},"key":"o16kfH6ce9"},{"type":"link","url":"https://numpy.org/devdocs/glossary.html#term-ufunc","position":{"start":{"line":62,"column":1},"end":{"line":62,"column":1}},"children":[{"type":"text","value":"NumPy universal functions (ufuncs)","position":{"start":{"line":62,"column":1},"end":{"line":62,"column":1}},"key":"ioB8hU5sNT"}],"urlSource":"https://numpy.org/devdocs/glossary.html#term-ufunc","key":"UilDBaCF89"},{"type":"text","value":", which means that you can still apply fast vectorized functions and operations on masked data. The output is then a masked array. We’ll see some examples of how this works in practice below.","position":{"start":{"line":62,"column":1},"end":{"line":62,"column":1}},"key":"sRrCevU3IP"}],"key":"WBviHL5fM2"}],"key":"ZIfDXfhmSY"},{"type":"block","position":{"start":{"line":64,"column":1},"end":{"line":64,"column":1}},"children":[{"type":"heading","depth":2,"position":{"start":{"line":66,"column":1},"end":{"line":66,"column":1}},"children":[{"type":"text","value":"Using masked arrays to see COVID-19 data","position":{"start":{"line":66,"column":1},"end":{"line":66,"column":1}},"key":"VnBJRysFtI"}],"identifier":"using-masked-arrays-to-see-covid-19-data","label":"Using masked arrays to see COVID-19 data","html_id":"using-masked-arrays-to-see-covid-19-data","implicit":true,"key":"HktiutrgGI"},{"type":"paragraph","position":{"start":{"line":68,"column":1},"end":{"line":68,"column":1}},"children":[{"type":"text","value":"From ","position":{"start":{"line":68,"column":1},"end":{"line":68,"column":1}},"key":"MvxZ2JWNWv"},{"type":"link","url":"https://www.kaggle.com/atilamadai/covid19","position":{"start":{"line":68,"column":1},"end":{"line":68,"column":1}},"children":[{"type":"text","value":"Kaggle","position":{"start":{"line":68,"column":1},"end":{"line":68,"column":1}},"key":"LpgVh8FytA"}],"urlSource":"https://www.kaggle.com/atilamadai/covid19","key":"KBSo882Y1w"},{"type":"text","value":" it is possible to download a dataset with initial data about the COVID-19 outbreak in the beginning of 2020. We are going to look at a small subset of this data, contained in the file ","position":{"start":{"line":68,"column":1},"end":{"line":68,"column":1}},"key":"ZmeR0nkbLs"},{"type":"inlineCode","value":"who_covid_19_sit_rep_time_series.csv","position":{"start":{"line":68,"column":1},"end":{"line":68,"column":1}},"key":"OBry7epFQH"},{"type":"text","value":". ","position":{"start":{"line":68,"column":1},"end":{"line":68,"column":1}},"key":"GGcd97eBt2"},{"type":"emphasis","position":{"start":{"line":68,"column":1},"end":{"line":68,"column":1}},"children":[{"type":"text","value":"(Note that this file has been replaced with a version without missing data sometime in late 2020.)","position":{"start":{"line":68,"column":1},"end":{"line":68,"column":1}},"key":"geQQUNLIie"}],"key":"fb5hjLMp3K"}],"key":"hT7yM8bwee"}],"key":"JodPmaQD2P"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"import numpy as np\nimport os\n\n# The os.getcwd() function returns the current folder; you can change\n# the filepath variable to point to the folder where you saved the .csv file\nfilepath = os.getcwd()\nfilename = os.path.join(filepath, \"who_covid_19_sit_rep_time_series.csv\")","key":"WQiDsertZ8"},{"type":"outputs","id":"iyRudBsnADbwfv-er3k0C","children":[],"key":"wvkX4b3F51"}],"key":"dUNOyUDIEh"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":80,"column":1},"end":{"line":80,"column":1}},"children":[{"type":"text","value":"The data file contains data of different types and is organized as follows:","position":{"start":{"line":80,"column":1},"end":{"line":80,"column":1}},"key":"tDwG7oNYX2"}],"key":"V9yMSyZrTk"},{"type":"list","ordered":false,"spread":false,"position":{"start":{"line":82,"column":1},"end":{"line":85,"column":1}},"children":[{"type":"listItem","spread":true,"position":{"start":{"line":82,"column":1},"end":{"line":82,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"The first row is a header line that (mostly) describes the data in each column that follow in the rows below, and beginning in the fourth column, the header is the date of the observation.","position":{"start":{"line":82,"column":1},"end":{"line":82,"column":1}},"key":"wfdp0OTjVM"}],"key":"CPhEz0r3hy"}],"key":"nMlHdsBwAc"},{"type":"listItem","spread":true,"position":{"start":{"line":83,"column":1},"end":{"line":83,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"The second through seventh row contain summary data that is of a different type than that which we are going to examine, so we will need to exclude that from the data with which we will work.","position":{"start":{"line":83,"column":1},"end":{"line":83,"column":1}},"key":"UmvBUyBFER"}],"key":"l2cAe5YeL2"}],"key":"RVHLEBl6z3"},{"type":"listItem","spread":true,"position":{"start":{"line":84,"column":1},"end":{"line":85,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"The numerical data we wish to work with begins at column 4, row 8, and extends from there to the rightmost column and the lowermost row.","position":{"start":{"line":84,"column":1},"end":{"line":84,"column":1}},"key":"rBHPXi0kKm"}],"key":"a7bLEBpxi6"}],"key":"pwohLGhAAg"}],"key":"LGoGyo7kKV"},{"type":"paragraph","position":{"start":{"line":86,"column":1},"end":{"line":87,"column":1}},"children":[{"type":"text","value":"Let’s explore the data inside this file for the first 14 days of records. To gather data from the ","position":{"start":{"line":86,"column":1},"end":{"line":86,"column":1}},"key":"JnRLbV0mA9"},{"type":"inlineCode","value":".csv","position":{"start":{"line":86,"column":1},"end":{"line":86,"column":1}},"key":"EWKnqekL6h"},{"type":"text","value":" file, we will use the ","position":{"start":{"line":86,"column":1},"end":{"line":86,"column":1}},"key":"RwM1hIgkX5"},{"type":"link","url":"https://numpy.org/devdocs/reference/generated/numpy.genfromtxt.html#numpy.genfromtxt","position":{"start":{"line":86,"column":1},"end":{"line":86,"column":1}},"children":[{"type":"text","value":"numpy.genfromtxt","position":{"start":{"line":86,"column":1},"end":{"line":86,"column":1}},"key":"lP3bEm8UQz"}],"urlSource":"https://numpy.org/devdocs/reference/generated/numpy.genfromtxt.html#numpy.genfromtxt","key":"noFJLqxjye"},{"type":"text","value":" function, making sure we select only the columns with actual numbers instead of the first four columns which contain location data. We also skip the first 6\nrows of this file, since they contain other data we are not interested in. Separately, we will extract the information about dates and location for this data.","position":{"start":{"line":86,"column":1},"end":{"line":86,"column":1}},"key":"nnEMJehP9G"}],"key":"m3Tz9TAKWw"}],"key":"Vo2SHdOezX"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"# Note we are using skip_header and usecols to read only portions of the\n# data file into each variable.\n# Read just the dates for columns 4-18 from the first row\ndates = np.genfromtxt(\n filename,\n dtype=np.str_,\n delimiter=\",\",\n max_rows=1,\n usecols=range(4, 18),\n encoding=\"utf-8-sig\",\n)\n# Read the names of the geographic locations from the first two\n# columns, skipping the first six rows\nlocations = np.genfromtxt(\n filename,\n dtype=np.str_,\n delimiter=\",\",\n skip_header=6,\n usecols=(0, 1),\n encoding=\"utf-8-sig\",\n)\n# Read the numeric data from just the first 14 days\nnbcases = np.genfromtxt(\n filename,\n dtype=np.int_,\n delimiter=\",\",\n skip_header=6,\n usecols=range(4, 18),\n encoding=\"utf-8-sig\",\n)","key":"aneBQRPgnm"},{"type":"outputs","id":"3Bdc8V_mb_3bBgTaw1k0S","children":[],"key":"HEb9aMwTpU"}],"key":"moRMRRzq9n"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"children":[{"type":"text","value":"Included in the ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"yJjdj5sgRK"},{"type":"inlineCode","value":"numpy.genfromtxt","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"RAGtGdZ6PR"},{"type":"text","value":" function call, we have selected the ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"FWfCHE6Xge"},{"type":"link","url":"https://numpy.org/devdocs/reference/generated/numpy.dtype.html#numpy.dtype","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"children":[{"type":"text","value":"numpy.dtype","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"cnqlr4yVos"}],"urlSource":"https://numpy.org/devdocs/reference/generated/numpy.dtype.html#numpy.dtype","key":"OBVNoSVLg6"},{"type":"text","value":" for each subset of the data (either an integer - ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"ZjExSoKw4E"},{"type":"inlineCode","value":"numpy.int_","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"amPpUBKjIi"},{"type":"text","value":" - or a string of characters - ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"WGqlx9JoQx"},{"type":"inlineCode","value":"numpy.str_","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"AEaDpgVxDH"},{"type":"text","value":"). We have also used the ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"pnhPJbGA56"},{"type":"inlineCode","value":"encoding","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"VTjnWATozT"},{"type":"text","value":" argument to select ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"zjIOi3LK1t"},{"type":"inlineCode","value":"utf-8-sig","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"eNDXsKyQOC"},{"type":"text","value":" as the encoding for the file (read more about encoding in the ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"pTJ2EPdUjH"},{"type":"link","url":"https://docs.python.org/3/library/codecs.html#encodings-and-unicode","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"children":[{"type":"text","value":"official Python documentation","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"FMePpA7dv0"}],"urlSource":"https://docs.python.org/3/library/codecs.html#encodings-and-unicode","key":"bgTAwo10xx"},{"type":"text","value":". You can read more about the ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"FrCROp5jJQ"},{"type":"inlineCode","value":"numpy.genfromtxt","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"tIn7mITnGn"},{"type":"text","value":" function from the ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"EiahKDTIOU"},{"type":"link","url":"https://numpy.org/devdocs/reference/generated/numpy.genfromtxt.html#numpy.genfromtxt","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"children":[{"type":"text","value":"Reference Documentation","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"SPrd6C3m7B"}],"urlSource":"https://numpy.org/devdocs/reference/generated/numpy.genfromtxt.html#numpy.genfromtxt","key":"O9LD6Mg9eB"},{"type":"text","value":" or from the ","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"acMYwINQD3"},{"type":"link","url":"https://numpy.org/devdocs/user/basics.io.genfromtxt.html","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"children":[{"type":"text","value":"Basic IO tutorial","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"ZStjzJTVzk"}],"urlSource":"https://numpy.org/devdocs/user/basics.io.genfromtxt.html","key":"wil8ZU2E6G"},{"type":"text","value":".","position":{"start":{"line":122,"column":1},"end":{"line":122,"column":1}},"key":"Hl8wsgC2yP"}],"key":"R36uTuMUMk"}],"key":"KYfb51qHnq"},{"type":"block","position":{"start":{"line":124,"column":1},"end":{"line":124,"column":1}},"children":[{"type":"heading","depth":2,"position":{"start":{"line":126,"column":1},"end":{"line":126,"column":1}},"children":[{"type":"text","value":"Exploring the data","position":{"start":{"line":126,"column":1},"end":{"line":126,"column":1}},"key":"OR6FQpqtWd"}],"identifier":"exploring-the-data","label":"Exploring the data","html_id":"exploring-the-data","implicit":true,"key":"eZKY2cuhsz"},{"type":"paragraph","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"children":[{"type":"text","value":"First of all, we can plot the whole set of data we have and see what it looks like. In order to get a readable plot, we select only a few of the dates to show in our ","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"O4Eft4xqYJ"},{"type":"link","url":"https://matplotlib.org/api/_as_gen/matplotlib.pyplot.xticks.html#matplotlib.pyplot.xticks","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"children":[{"type":"text","value":"x-axis ticks","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"U6SWBAcHQr"}],"urlSource":"https://matplotlib.org/api/_as_gen/matplotlib.pyplot.xticks.html#matplotlib.pyplot.xticks","key":"yWeYbPH2Db"},{"type":"text","value":". Note also that in our plot command, we use ","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"Gd0xvlwCHs"},{"type":"inlineCode","value":"nbcases.T","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"RCs4rZ5eg5"},{"type":"text","value":" (the transpose of the ","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"wUyMhG0RAw"},{"type":"inlineCode","value":"nbcases","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"DYDhAGDIFz"},{"type":"text","value":" array) since this means we will plot each row of the file as a separate line. We choose to plot a dashed line (using the ","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"TvIzDXo3x7"},{"type":"inlineCode","value":"'--'","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"OBQZffqhDd"},{"type":"text","value":" line style). See the ","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"levyKeL7Yu"},{"type":"link","url":"https://matplotlib.org/","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"children":[{"type":"text","value":"matplotlib","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"ugdzARTPZs"}],"urlSource":"https://matplotlib.org/","key":"xO6Dz1dbok"},{"type":"text","value":" documentation for more info on this.","position":{"start":{"line":128,"column":1},"end":{"line":128,"column":1}},"key":"GWFMjvjxTq"}],"key":"uVtWLnFuXX"}],"key":"rzRl1TSNJi"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"import matplotlib.pyplot as plt\n\nselected_dates = [0, 3, 11, 13]\n\nfig, ax = plt.subplots()\nax.plot(dates, nbcases.T, \"--\")\nax.set_xticks(selected_dates, dates[selected_dates])\nax.set_title(\"COVID-19 cumulative cases from Jan 21 to Feb 3 2020\")","key":"L4KPu8zr4p"},{"type":"outputs","id":"hXeOSFyl8mGCrgjXif6fS","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"display_data","metadata":{},"data":{"text/plain":{"content":"\u003cFigure size 640x480 with 1 Axes\u003e","content_type":"text/plain"},"image/png":{"content_type":"image/png","hash":"d08c79eba17c823037f90ad9c9ac2cc1","path":"/numpy-tutorials/build/d08c79eba17c823037f90ad9c9ac2cc1.png"}}},"key":"D5cOi0Ami8"}],"key":"VeGY0bkFj9"}],"key":"OWkQSiFO3N"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"children":[{"type":"text","value":"The graph has a strange shape from January 24th to February 1st. It would be interesting to know where this data comes from. If we look at the ","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"MqttbOSCg7"},{"type":"inlineCode","value":"locations","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"FzrYvxhe42"},{"type":"text","value":" array we extracted from the ","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"n0j6xMB43G"},{"type":"inlineCode","value":".csv","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"KGwLp54Ix4"},{"type":"text","value":" file, we can see that we have two columns, where the first would contain regions and the second would contain the name of the country. However, only the first few rows contain data for the the first column (province names in China). Following that, we only have country names. So it would make sense to group all the data from China into a single row. For this, we’ll select from the ","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"yLKA8zlXCe"},{"type":"inlineCode","value":"nbcases","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"ItwfjS2z91"},{"type":"text","value":" array only the rows for which the second entry of the ","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"H5V8aK2akH"},{"type":"inlineCode","value":"locations","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"MuPXL9BWsz"},{"type":"text","value":" array corresponds to China. Next, we’ll use the ","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"zg3Zi0Fypz"},{"type":"link","url":"https://numpy.org/devdocs/reference/generated/numpy.sum.html#numpy.sum","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"children":[{"type":"text","value":"numpy.sum","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"rlRsUfKmZD"}],"urlSource":"https://numpy.org/devdocs/reference/generated/numpy.sum.html#numpy.sum","key":"qXRMHWHGNF"},{"type":"text","value":" function to sum all the selected rows (","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"iZuABks7V0"},{"type":"inlineCode","value":"axis=0","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"QfFqnw7uat"},{"type":"text","value":"). Note also that row 35 corresponds to the total counts for the whole country for each date. Since we want to calculate the sum ourselves from the provinces data, we have to remove that row first from both ","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"XpV42vbbuh"},{"type":"inlineCode","value":"locations","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"WtMrZwG25b"},{"type":"text","value":" and ","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"ziGcLysejE"},{"type":"inlineCode","value":"nbcases","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"gDnyiMvV4b"},{"type":"text","value":":","position":{"start":{"line":141,"column":1},"end":{"line":141,"column":1}},"key":"hx13NDQKp2"}],"key":"uIXPOODGvC"}],"key":"ADLnZolnQy"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"totals_row = 35\nlocations = np.delete(locations, (totals_row), axis=0)\nnbcases = np.delete(nbcases, (totals_row), axis=0)\n\nchina_total = nbcases[locations[:, 1] == \"China\"].sum(axis=0)\nchina_total","key":"sNWerJc6W5"},{"type":"outputs","id":"4ESkLRgsIuLgD1mbJ9f1A","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":4,"metadata":{},"data":{"text/plain":{"content":"array([ 247, 288, 556, 817, -22, -22, -15, -10, -9,\n -7, -4, 11820, 14410, 17237])","content_type":"text/plain"}}},"key":"wPxNI3GGG3"}],"key":"LlKBppux4R"}],"key":"YNFThK7AiZ"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":152,"column":1},"end":{"line":152,"column":1}},"children":[{"type":"text","value":"Something’s wrong with this data - we are not supposed to have negative values in a cumulative data set. What’s going on?","position":{"start":{"line":152,"column":1},"end":{"line":152,"column":1}},"key":"zIa6s3ZDiq"}],"key":"k7FueggLDm"}],"key":"BNZWol5H5A"},{"type":"block","position":{"start":{"line":154,"column":1},"end":{"line":154,"column":1}},"children":[{"type":"heading","depth":2,"position":{"start":{"line":156,"column":1},"end":{"line":156,"column":1}},"children":[{"type":"text","value":"Missing data","position":{"start":{"line":156,"column":1},"end":{"line":156,"column":1}},"key":"ASgnsTPZwC"}],"identifier":"missing-data","label":"Missing data","html_id":"missing-data","implicit":true,"key":"JM4tbYfMiH"},{"type":"paragraph","position":{"start":{"line":158,"column":1},"end":{"line":158,"column":1}},"children":[{"type":"text","value":"Looking at the data, here’s what we find: there is a period with ","position":{"start":{"line":158,"column":1},"end":{"line":158,"column":1}},"key":"Q4y5TewLz6"},{"type":"strong","position":{"start":{"line":158,"column":1},"end":{"line":158,"column":1}},"children":[{"type":"text","value":"missing data","position":{"start":{"line":158,"column":1},"end":{"line":158,"column":1}},"key":"vxNidvC1Ut"}],"key":"fLDkAqb2kX"},{"type":"text","value":":","position":{"start":{"line":158,"column":1},"end":{"line":158,"column":1}},"key":"Ybw9THRhSC"}],"key":"B7LJuuws2M"}],"key":"NDgtz8pufe"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"nbcases","key":"m0eas7vUkK"},{"type":"outputs","id":"bApvnRcnM4E6tekpy1Sgp","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":5,"metadata":{},"data":{"text/plain":{"content":"array([[ 258, 270, 375, ..., 7153, 9074, 11177],\n [ 14, 17, 26, ..., 520, 604, 683],\n [ -1, 1, 1, ..., 422, 493, 566],\n ...,\n [ -1, -1, -1, ..., -1, -1, -1],\n [ -1, -1, -1, ..., -1, -1, -1],\n [ -1, -1, -1, ..., -1, -1, -1]], shape=(263, 14))","content_type":"text/plain"}}},"key":"EG1GTvCAE7"}],"key":"S6ueULbCC3"}],"key":"OSgxDmKH0V"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":164,"column":1},"end":{"line":165,"column":1}},"children":[{"type":"text","value":"All the ","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"DSLVmtuOJH"},{"type":"inlineCode","value":"-1","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"Jq3M7ECh0y"},{"type":"text","value":" values we are seeing come from ","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"paGGRKZpr4"},{"type":"inlineCode","value":"numpy.genfromtxt","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"eV6xKW9xF7"},{"type":"text","value":" attempting to read missing data from the original ","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"TtE7XfjA4B"},{"type":"inlineCode","value":".csv","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"Bn607uPULk"},{"type":"text","value":" file. Obviously, we\ndon’t want to compute missing data as ","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"mYOj5o7953"},{"type":"inlineCode","value":"-1","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"SiNyXQDXTN"},{"type":"text","value":" - we just want to skip this value so it doesn’t interfere in our analysis. After importing the ","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"Nj25HHWxbA"},{"type":"inlineCode","value":"numpy.ma","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"D29vyUM9rl"},{"type":"text","value":" module, we’ll create a new array, this time masking the invalid values:","position":{"start":{"line":164,"column":1},"end":{"line":164,"column":1}},"key":"BEeP2qp4ec"}],"key":"jQMXAmHM6k"}],"key":"khXPca6Hfp"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"from numpy import ma\n\nnbcases_ma = ma.masked_values(nbcases, -1)","key":"n6P8LKLTJc"},{"type":"outputs","id":"uUWn9L_lYnaQfxfqe6-zZ","children":[],"key":"QvejjfzNau"}],"key":"u3UhS9HwHX"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":173,"column":1},"end":{"line":173,"column":1}},"children":[{"type":"text","value":"If we look at the ","position":{"start":{"line":173,"column":1},"end":{"line":173,"column":1}},"key":"PkrBwauYvZ"},{"type":"inlineCode","value":"nbcases_ma","position":{"start":{"line":173,"column":1},"end":{"line":173,"column":1}},"key":"CosoHcOIrA"},{"type":"text","value":" masked array, this is what we have:","position":{"start":{"line":173,"column":1},"end":{"line":173,"column":1}},"key":"kGoSmMsV8t"}],"key":"PGFWKLRzZf"}],"key":"VmzxnYPbGZ"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"nbcases_ma","key":"IIQErKuxRD"},{"type":"outputs","id":"E0gHsBcVUX2WrBwUax0zi","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":7,"metadata":{},"data":{"text/plain":{"content":"masked_array(\n data=[[258, 270, 375, ..., 7153, 9074, 11177],\n [14, 17, 26, ..., 520, 604, 683],\n [--, 1, 1, ..., 422, 493, 566],\n ...,\n [--, --, --, ..., --, --, --],\n [--, --, --, ..., --, --, --],\n [--, --, --, ..., --, --, --]],\n mask=[[False, False, False, ..., False, False, False],\n [False, False, False, ..., False, False, False],\n [ True, False, False, ..., False, False, False],\n ...,\n [ True, True, True, ..., True, True, True],\n [ True, True, True, ..., True, True, True],\n [ True, True, True, ..., True, True, True]],\n fill_value=-1)","content_type":"text/plain"}}},"key":"DaGFRF6jDT"}],"key":"okbqCAK1Fw"}],"key":"k2tHiqiIhr"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":179,"column":1},"end":{"line":180,"column":1}},"children":[{"type":"text","value":"We can see that this is a different kind of array. As mentioned in the introduction, it has three attributes (","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"odl5WTmIAu"},{"type":"inlineCode","value":"data","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"sCJxi0z6LU"},{"type":"text","value":", ","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"dlrbXvdSlc"},{"type":"inlineCode","value":"mask","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"qDumE7EwWz"},{"type":"text","value":" and ","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"R6JFowdZfC"},{"type":"inlineCode","value":"fill_value","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"HVB9D7zvT4"},{"type":"text","value":").\nKeep in mind that the ","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"uCdyFZanFZ"},{"type":"inlineCode","value":"mask","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"IlpxCRhhbD"},{"type":"text","value":" attribute has a ","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"g11eSVSz2h"},{"type":"inlineCode","value":"True","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"CJGpJSCBTy"},{"type":"text","value":" value for elements corresponding to ","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"rjwjueOYtZ"},{"type":"strong","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"children":[{"type":"text","value":"invalid","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"tdqCIOFoqE"}],"key":"R9BIsJ8Rdu"},{"type":"text","value":" data (represented by two dashes in the ","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"skOpEr5JrO"},{"type":"inlineCode","value":"data","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"y2qysTff3E"},{"type":"text","value":" attribute).","position":{"start":{"line":179,"column":1},"end":{"line":179,"column":1}},"key":"Gn1uk3ePhR"}],"key":"DNoYe57B3S"}],"key":"MYH2UqXLyO"},{"type":"block","position":{"start":{"line":182,"column":1},"end":{"line":182,"column":1}},"children":[{"type":"paragraph","position":{"start":{"line":184,"column":1},"end":{"line":185,"column":1}},"children":[{"type":"text","value":"Let’s try and see what the data looks like excluding the first row (data from the Hubei province in China) so we can look at the missing data more\nclosely:","position":{"start":{"line":184,"column":1},"end":{"line":184,"column":1}},"key":"eBgRFcWEzS"}],"key":"f8em4NaBVJ"}],"key":"gv4YWgxtKX"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"fig, ax = plt.subplots()\nax.plot(dates, nbcases_ma[1:].T, \"--\")\nax.set_xticks(selected_dates, dates[selected_dates])\nax.set_title(\"COVID-19 cumulative cases from Jan 21 to Feb 3 2020\")","key":"Oyu4lA0szZ"},{"type":"outputs","id":"OnmFem7UI9BNWiST7L07E","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"display_data","metadata":{},"data":{"text/plain":{"content":"\u003cFigure size 640x480 with 1 Axes\u003e","content_type":"text/plain"},"image/png":{"content_type":"image/png","hash":"613f065b167d8d181b5dc5e1173b74db","path":"/numpy-tutorials/build/613f065b167d8d181b5dc5e1173b74db.png"}}},"key":"b5XnyYy8fS"}],"key":"njbB3aWx3v"}],"key":"RpADBWcKLw"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":194,"column":1},"end":{"line":194,"column":1}},"children":[{"type":"text","value":"Now that our data has been masked, let’s try summing up all the cases in China:","position":{"start":{"line":194,"column":1},"end":{"line":194,"column":1}},"key":"N2hKGYcCqS"}],"key":"ynl68m9uCU"}],"key":"LiuUGQeeOs"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"china_masked = nbcases_ma[locations[:, 1] == \"China\"].sum(axis=0)\nchina_masked","key":"JUeQ9XLIk6"},{"type":"outputs","id":"w41J1ltChiRKSDNChsDCh","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":9,"metadata":{},"data":{"text/plain":{"content":"masked_array(data=[278, 309, 574, 835, 10, 10, 17, 22, 23, 25, 28, 11821,\n 14411, 17238],\n mask=[False, False, False, False, False, False, False, False,\n False, False, False, False, False, False],\n fill_value=999999)","content_type":"text/plain"}}},"key":"efmkSqJISE"}],"key":"AbFFhgBhV3"}],"key":"w4PiJASbRC"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":201,"column":1},"end":{"line":201,"column":1}},"children":[{"type":"text","value":"Note that ","position":{"start":{"line":201,"column":1},"end":{"line":201,"column":1}},"key":"SfN35vUo3i"},{"type":"inlineCode","value":"china_masked","position":{"start":{"line":201,"column":1},"end":{"line":201,"column":1}},"key":"c33aX5blqL"},{"type":"text","value":" is a masked array, so it has a different data structure than a regular NumPy array. Now, we can access its data directly by using the ","position":{"start":{"line":201,"column":1},"end":{"line":201,"column":1}},"key":"SaKzUOo9Ei"},{"type":"inlineCode","value":".data","position":{"start":{"line":201,"column":1},"end":{"line":201,"column":1}},"key":"BVr0B0gHul"},{"type":"text","value":" attribute:","position":{"start":{"line":201,"column":1},"end":{"line":201,"column":1}},"key":"ScODMdXQBc"}],"key":"Ig2uklqRyb"}],"key":"ExPzY71jhm"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"china_total = china_masked.data\nchina_total","key":"JIJcEKa4Hh"},{"type":"outputs","id":"E0hc4TSfgZt-7-LY89XoH","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":10,"metadata":{},"data":{"text/plain":{"content":"array([ 278, 309, 574, 835, 10, 10, 17, 22, 23,\n 25, 28, 11821, 14411, 17238])","content_type":"text/plain"}}},"key":"L6tdXrpdnK"}],"key":"ugCHIb56fB"}],"key":"eqxoky7z0N"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":208,"column":1},"end":{"line":208,"column":1}},"children":[{"type":"text","value":"That is better: no more negative values. However, we can still see that for some days, the cumulative number of cases seems to go down (from 835 to 10, for example), which does not agree with the definition of “cumulative data”. If we look more closely at the data, we can see that in the period where there was missing data in mainland China, there was valid data for Hong Kong, Taiwan, Macau and “Unspecified” regions of China. Maybe we can remove those from the total sum of cases in China, to get a better understanding of the data.","position":{"start":{"line":208,"column":1},"end":{"line":208,"column":1}},"key":"PAXfFxAWqW"}],"key":"COa4PNTgzn"},{"type":"paragraph","position":{"start":{"line":210,"column":1},"end":{"line":210,"column":1}},"children":[{"type":"text","value":"First, we’ll identify the indices of locations in mainland China:","position":{"start":{"line":210,"column":1},"end":{"line":210,"column":1}},"key":"JAUnGdv3FD"}],"key":"ohNTAj8vn2"}],"key":"AA7xDQMTuX"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"china_mask = (\n (locations[:, 1] == \"China\")\n \u0026 (locations[:, 0] != \"Hong Kong\")\n \u0026 (locations[:, 0] != \"Taiwan\")\n \u0026 (locations[:, 0] != \"Macau\")\n \u0026 (locations[:, 0] != \"Unspecified*\")\n)","key":"GUgpToD48w"},{"type":"outputs","id":"EgxOi8i9Xccvlt65bECD2","children":[],"key":"FkM8DFl2NT"}],"key":"c2kHlxA4A7"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"children":[{"type":"text","value":"Now, ","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"key":"CGl3voummA"},{"type":"inlineCode","value":"china_mask","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"key":"QsvmhpkzGG"},{"type":"text","value":" is an array of boolean values (","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"key":"gQkOfbeEdx"},{"type":"inlineCode","value":"True","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"key":"udLFk5DLuO"},{"type":"text","value":" or ","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"key":"AoiHrGbFkv"},{"type":"inlineCode","value":"False","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"key":"KrMwUawGSq"},{"type":"text","value":"); we can check that the indices are what we wanted with the ","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"key":"vMn68iVnoE"},{"type":"link","url":"https://numpy.org/devdocs/reference/generated/numpy.ma.nonzero.html#numpy.ma.nonzero","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"children":[{"type":"text","value":"ma.nonzero","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"key":"FNvum31amv"}],"urlSource":"https://numpy.org/devdocs/reference/generated/numpy.ma.nonzero.html#numpy.ma.nonzero","key":"hXDuw7tkvE"},{"type":"text","value":" method for masked arrays:","position":{"start":{"line":222,"column":1},"end":{"line":222,"column":1}},"key":"ACYK67pIGT"}],"key":"neNgrU9GSc"}],"key":"zezNfAsccc"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"china_mask.nonzero()","key":"Z1wI1QhYVK"},{"type":"outputs","id":"SSjehsND5lCRjpj-9wLDe","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":12,"metadata":{},"data":{"text/plain":{"content":"(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 31, 33]),)","content_type":"text/plain"}}},"key":"gcW7vHx15Y"}],"key":"NcUOilAwmW"}],"key":"c3NDITxXI4"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":228,"column":1},"end":{"line":228,"column":1}},"children":[{"type":"text","value":"Now we can correctly sum entries for mainland China:","position":{"start":{"line":228,"column":1},"end":{"line":228,"column":1}},"key":"cS39mQG1pS"}],"key":"rYmQE06UQ7"}],"key":"n5rYq8RNma"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"china_total = nbcases_ma[china_mask].sum(axis=0)\nchina_total","key":"SNCyRYbfXs"},{"type":"outputs","id":"kFN7Qkvl5KVrVXE8t1zBm","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":13,"metadata":{},"data":{"text/plain":{"content":"masked_array(data=[278, 308, 440, 446, --, --, --, --, --, --, --, 11791,\n 14380, 17205],\n mask=[False, False, False, False, True, True, True, True,\n True, True, True, False, False, False],\n fill_value=999999)","content_type":"text/plain"}}},"key":"B7KLbSft3H"}],"key":"yXf9A07gjs"}],"key":"AIhB1xRM2l"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":235,"column":1},"end":{"line":235,"column":1}},"children":[{"type":"text","value":"We can replace the data with this information and plot a new graph, focusing on Mainland China:","position":{"start":{"line":235,"column":1},"end":{"line":235,"column":1}},"key":"MOTFpf79wo"}],"key":"CqPPDuCLEh"}],"key":"bJkrnkRhtf"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"fig, ax = plt.subplots()\nax.plot(dates, china_total.T, \"--\")\nax.set_xticks(selected_dates, dates[selected_dates])\nax.set_title(\"COVID-19 cumulative cases from Jan 21 to Feb 3 2020 - Mainland China\")","key":"fLgNQPw0hW"},{"type":"outputs","id":"z9ZPh7E5c47tVJsvRRLo7","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"display_data","metadata":{},"data":{"text/plain":{"content":"\u003cFigure size 640x480 with 1 Axes\u003e","content_type":"text/plain"},"image/png":{"content_type":"image/png","hash":"9c07c4f86dd02b243a8eec9094bc14fc","path":"/numpy-tutorials/build/9c07c4f86dd02b243a8eec9094bc14fc.png"}}},"key":"W6r91ZuUdn"}],"key":"AIP9kGVKgn"}],"key":"y0xqdPHyMh"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":244,"column":1},"end":{"line":244,"column":1}},"children":[{"type":"text","value":"It’s clear that masked arrays are the right solution here. We cannot represent the missing data without mischaracterizing the evolution of the curve.","position":{"start":{"line":244,"column":1},"end":{"line":244,"column":1}},"key":"uHjSzxhekF"}],"key":"GeyQGEx9iG"}],"key":"UWFu8ANxuW"},{"type":"block","position":{"start":{"line":246,"column":1},"end":{"line":246,"column":1}},"children":[{"type":"heading","depth":2,"position":{"start":{"line":248,"column":1},"end":{"line":248,"column":1}},"children":[{"type":"text","value":"Fitting Data","position":{"start":{"line":248,"column":1},"end":{"line":248,"column":1}},"key":"jv15l6T0Ci"}],"identifier":"fitting-data","label":"Fitting Data","html_id":"fitting-data","implicit":true,"key":"iCmv6L0AaW"},{"type":"paragraph","position":{"start":{"line":250,"column":1},"end":{"line":250,"column":1}},"children":[{"type":"text","value":"One possibility we can think of is to interpolate the missing data to estimate the number of cases in late January. Observe that we can select the masked elements using the ","position":{"start":{"line":250,"column":1},"end":{"line":250,"column":1}},"key":"ZyQBi2j9rc"},{"type":"inlineCode","value":".mask","position":{"start":{"line":250,"column":1},"end":{"line":250,"column":1}},"key":"ZNQ3bNeYQp"},{"type":"text","value":" attribute:","position":{"start":{"line":250,"column":1},"end":{"line":250,"column":1}},"key":"weVJd9S886"}],"key":"R6GN3yH3ik"}],"key":"klnDrdmBRX"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"china_total.mask\ninvalid = china_total[china_total.mask]\ninvalid","key":"bybo083EP6"},{"type":"outputs","id":"yWbzoRPOrBNHW67Vwvrgd","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":15,"metadata":{},"data":{"text/plain":{"content":"masked_array(data=[--, --, --, --, --, --, --],\n mask=[ True, True, True, True, True, True, True],\n fill_value=999999,\n dtype=int64)","content_type":"text/plain"}}},"key":"Ju3kISdBRC"}],"key":"J1rLVhDdyi"}],"key":"CS6dNqtsTj"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":258,"column":1},"end":{"line":258,"column":1}},"children":[{"type":"text","value":"We can also access the valid entries by using the logical negation for this mask:","position":{"start":{"line":258,"column":1},"end":{"line":258,"column":1}},"key":"BwHRfoRVla"}],"key":"Hx5jNWROC5"}],"key":"woUeqc7i6P"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"valid = china_total[~china_total.mask]\nvalid","key":"tB5TIxU9F5"},{"type":"outputs","id":"Qa5CvxdOWp_hU8_5zxgxL","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":16,"metadata":{},"data":{"text/plain":{"content":"masked_array(data=[278, 308, 440, 446, 11791, 14380, 17205],\n mask=[False, False, False, False, False, False, False],\n fill_value=999999)","content_type":"text/plain"}}},"key":"TYBBGAcr5O"}],"key":"wfUn6LXQVz"}],"key":"aqeFAZoO88"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":265,"column":1},"end":{"line":265,"column":1}},"children":[{"type":"text","value":"Now, if we want to create a very simple approximation for this data, we should take into account the valid entries around the invalid ones. So first let’s select the dates for which the data is valid. Note that we can use the mask from the ","position":{"start":{"line":265,"column":1},"end":{"line":265,"column":1}},"key":"dR8aKhLTPZ"},{"type":"inlineCode","value":"china_total","position":{"start":{"line":265,"column":1},"end":{"line":265,"column":1}},"key":"yc640qoNcu"},{"type":"text","value":" masked array to index the dates array:","position":{"start":{"line":265,"column":1},"end":{"line":265,"column":1}},"key":"r4TrlIIPhv"}],"key":"mpRCf3jMGO"}],"key":"areRZN0iGC"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"dates[~china_total.mask]","key":"km7SIYd8a9"},{"type":"outputs","id":"-vx6pxVp3pApdw97aUKnW","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"execute_result","execution_count":17,"metadata":{},"data":{"text/plain":{"content":"array(['1/21/20', '1/22/20', '1/23/20', '1/24/20', '2/1/20', '2/2/20',\n '2/3/20'], dtype='\u003cU7')","content_type":"text/plain"}}},"key":"cEU1aGpF9K"}],"key":"p4PoSF4a0q"}],"key":"u7m5NfksI8"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":271,"column":1},"end":{"line":273,"column":1}},"children":[{"type":"text","value":"Finally, we can use the\n","position":{"start":{"line":271,"column":1},"end":{"line":271,"column":1}},"key":"M32nqZvy6C"},{"type":"link","url":"https://numpy.org/doc/stable/reference/generated/numpy.polynomial.polynomial.Polynomial.fit.html","position":{"start":{"line":271,"column":1},"end":{"line":271,"column":1}},"children":[{"type":"text","value":"fitting functionality of the numpy.polynomial","position":{"start":{"line":271,"column":1},"end":{"line":271,"column":1}},"key":"udyDWlPju9"}],"urlSource":"https://numpy.org/doc/stable/reference/generated/numpy.polynomial.polynomial.Polynomial.fit.html","key":"dDzsrcRNyG"},{"type":"text","value":"\npackage to create a cubic polynomial model that fits the data as best as possible:","position":{"start":{"line":271,"column":1},"end":{"line":271,"column":1}},"key":"iqCA45YQfF"}],"key":"QIB76FTGjA"}],"key":"IgIKBlXeh2"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"t = np.arange(len(china_total))\nmodel = np.polynomial.Polynomial.fit(t[~china_total.mask], valid, deg=3)\n\nfig, ax = plt.subplots()\nax.plot(t, china_total)\nax.plot(t, model(t), \"--\")","key":"zd66r4noQH"},{"type":"outputs","id":"dsmc8-2mly_fUjdf09IPy","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"display_data","metadata":{},"data":{"text/plain":{"content":"\u003cFigure size 640x480 with 1 Axes\u003e","content_type":"text/plain"},"image/png":{"content_type":"image/png","hash":"705b0ce4ba67115fce959db22ca0f373","path":"/numpy-tutorials/build/705b0ce4ba67115fce959db22ca0f373.png"}}},"key":"zfwqe1Sczm"}],"key":"Bd5wUUU1Mt"}],"key":"kDJhHiZU2L"},{"type":"block","children":[{"type":"paragraph","position":{"start":{"line":284,"column":1},"end":{"line":285,"column":1}},"children":[{"type":"text","value":"This plot is not so readable since the lines seem to be over each other, so let’s summarize in a more elaborate plot. We’ll plot the real data when\navailable, and show the cubic fit for unavailable data, using this fit to compute an estimate to the observed number of cases on January 28th 2020, 7 days after the beginning of the records:","position":{"start":{"line":284,"column":1},"end":{"line":284,"column":1}},"key":"J1jtzy5E1e"}],"key":"OBmCE4JTSU"}],"key":"SiAMD4IHeb"},{"type":"block","kind":"notebook-code","children":[{"type":"code","lang":"python","executable":true,"value":"fig, ax = plt.subplots()\nax.plot(t, china_total)\nax.plot(t[china_total.mask], model(t)[china_total.mask], \"--\", color=\"orange\")\nax.plot(7, model(7), \"r*\")\n\nax.set_xticks([0, 7, 13], dates[[0, 7, 13]])\nax.set_yticks([0, model(7), 10000, 17500])\nax.legend([\"Mainland China\", \"Cubic estimate\", \"7 days after start\"])\nax.set_title(\n \"COVID-19 cumulative cases from Jan 21 to Feb 3 2020 - Mainland China\\n\"\n \"Cubic estimate for 7 days after start\"\n)","key":"ifR1iP4w5I"},{"type":"outputs","id":"c32FLmZQLONK4_obxxawj","children":[{"type":"output","children":[],"jupyter_data":{"output_type":"display_data","metadata":{},"data":{"text/plain":{"content":"\u003cFigure size 640x480 with 1 Axes\u003e","content_type":"text/plain"},"image/png":{"content_type":"image/png","hash":"0af5964dc0b4e30f177f4bb98384bc83","path":"/numpy-tutorials/build/0af5964dc0b4e30f177f4bb98384bc83.png"}}},"key":"zpJMuVKMNi"}],"key":"EKYZeW71mt"}],"key":"oL6tgNuoGb"},{"type":"block","children":[{"type":"heading","depth":2,"position":{"start":{"line":302,"column":1},"end":{"line":302,"column":1}},"children":[{"type":"text","value":"In practice","position":{"start":{"line":302,"column":1},"end":{"line":302,"column":1}},"key":"yc0CamnNdf"}],"identifier":"in-practice","label":"In practice","html_id":"in-practice","implicit":true,"key":"P2VUxS1ien"}],"key":"jWp3QsZKwh"},{"type":"block","position":{"start":{"line":304,"column":1},"end":{"line":304,"column":1}},"children":[{"type":"list","ordered":false,"spread":false,"position":{"start":{"line":306,"column":1},"end":{"line":307,"column":1}},"children":[{"type":"listItem","spread":true,"position":{"start":{"line":306,"column":1},"end":{"line":307,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"Adding ","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"JoT61nVmTO"},{"type":"inlineCode","value":"-1","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"azYeIpDLVU"},{"type":"text","value":" to missing data is not a problem with ","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"S10poNjURQ"},{"type":"inlineCode","value":"numpy.genfromtxt","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"pNta44wyzg"},{"type":"text","value":"; in this particular case, substituting the missing value with ","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"h1rLFKrMRi"},{"type":"inlineCode","value":"0","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"rlGVUCvBh0"},{"type":"text","value":" might have been fine, but we’ll see later that this is far from a general solution. Also, it is possible to call the ","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"bC8JAv9Glj"},{"type":"inlineCode","value":"numpy.genfromtxt","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"EPyAC39GFG"},{"type":"text","value":" function using the ","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"P2HQvAuFI1"},{"type":"inlineCode","value":"usemask","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"ghwvUnq8E5"},{"type":"text","value":" parameter. If ","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"wyf8RxBGMC"},{"type":"inlineCode","value":"usemask=True","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"ay1JbaDTSO"},{"type":"text","value":", ","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"Wgd2ffRYgh"},{"type":"inlineCode","value":"numpy.genfromtxt","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"WLW7z7rIcy"},{"type":"text","value":" automatically returns a masked array.","position":{"start":{"line":306,"column":1},"end":{"line":306,"column":1}},"key":"m7gkzfEq6p"}],"key":"FE01cRPZM2"}],"key":"adUREB81nZ"}],"key":"JkMrzBjJsi"}],"key":"INMhghWfbD"},{"type":"block","position":{"start":{"line":308,"column":1},"end":{"line":308,"column":1}},"children":[{"type":"heading","depth":2,"position":{"start":{"line":310,"column":1},"end":{"line":310,"column":1}},"children":[{"type":"text","value":"Further reading","position":{"start":{"line":310,"column":1},"end":{"line":310,"column":1}},"key":"Lo2ISzW4Uk"}],"identifier":"further-reading","label":"Further reading","html_id":"further-reading","implicit":true,"key":"Npq96sQ3KR"},{"type":"paragraph","position":{"start":{"line":312,"column":1},"end":{"line":312,"column":1}},"children":[{"type":"text","value":"Topics not covered in this tutorial can be found in the documentation:","position":{"start":{"line":312,"column":1},"end":{"line":312,"column":1}},"key":"oWML18WpcP"}],"key":"zt5SMnv7KH"},{"type":"list","ordered":false,"spread":false,"position":{"start":{"line":314,"column":1},"end":{"line":316,"column":1}},"children":[{"type":"listItem","spread":true,"position":{"start":{"line":314,"column":1},"end":{"line":314,"column":1}},"children":[{"type":"paragraph","children":[{"type":"link","url":"https://numpy.org/devdocs/reference/generated/numpy.ma.harden_mask.html#numpy.ma.harden_mask","position":{"start":{"line":314,"column":1},"end":{"line":314,"column":1}},"children":[{"type":"text","value":"Hardmasks","position":{"start":{"line":314,"column":1},"end":{"line":314,"column":1}},"key":"awmoclNVPZ"}],"urlSource":"https://numpy.org/devdocs/reference/generated/numpy.ma.harden_mask.html#numpy.ma.harden_mask","key":"vnZXyOCKEu"},{"type":"text","value":" vs. ","position":{"start":{"line":314,"column":1},"end":{"line":314,"column":1}},"key":"efI9EQIjK1"},{"type":"link","url":"https://numpy.org/devdocs/reference/generated/numpy.ma.soften_mask.html#numpy.ma.soften_mask","position":{"start":{"line":314,"column":1},"end":{"line":314,"column":1}},"children":[{"type":"text","value":"softmasks","position":{"start":{"line":314,"column":1},"end":{"line":314,"column":1}},"key":"ygXO04ZCSv"}],"urlSource":"https://numpy.org/devdocs/reference/generated/numpy.ma.soften_mask.html#numpy.ma.soften_mask","key":"tOUfOU9lvc"}],"key":"FdETPcFdUe"}],"key":"MpyO9w4S8I"},{"type":"listItem","spread":true,"position":{"start":{"line":315,"column":1},"end":{"line":316,"column":1}},"children":[{"type":"paragraph","children":[{"type":"link","url":"https://numpy.org/devdocs/reference/maskedarray.generic.html#maskedarray-generic","position":{"start":{"line":315,"column":1},"end":{"line":315,"column":1}},"children":[{"type":"text","value":"The numpy.ma module","position":{"start":{"line":315,"column":1},"end":{"line":315,"column":1}},"key":"b1FHiTHBxw"}],"urlSource":"https://numpy.org/devdocs/reference/maskedarray.generic.html#maskedarray-generic","key":"XJ7lGdmyyb"}],"key":"kxqse6j26x"}],"key":"JC4K8tW9iU"}],"key":"owiTjLlA1b"},{"type":"heading","depth":3,"position":{"start":{"line":317,"column":1},"end":{"line":317,"column":1}},"children":[{"type":"text","value":"Reference","position":{"start":{"line":317,"column":1},"end":{"line":317,"column":1}},"key":"uoGsaFsEN9"}],"identifier":"reference","label":"Reference","html_id":"reference","implicit":true,"key":"M6DKQPb3Fr"},{"type":"list","ordered":false,"spread":false,"position":{"start":{"line":319,"column":1},"end":{"line":319,"column":1}},"children":[{"type":"listItem","spread":true,"position":{"start":{"line":319,"column":1},"end":{"line":319,"column":1}},"children":[{"type":"paragraph","children":[{"type":"text","value":"Ensheng Dong, Hongru Du, Lauren Gardner, ","position":{"start":{"line":319,"column":1},"end":{"line":319,"column":1}},"key":"Qy3tAc4ZT5"},{"type":"emphasis","position":{"start":{"line":319,"column":1},"end":{"line":319,"column":1}},"children":[{"type":"text","value":"An interactive web-based dashboard to track COVID-19 in real time","position":{"start":{"line":319,"column":1},"end":{"line":319,"column":1}},"key":"yGCh2xyHEo"}],"key":"K2rpFOUwJv"},{"type":"text","value":", The Lancet Infectious Diseases, Volume 20, Issue 5, 2020, Pages 533-534, ISSN 1473-3099, ","position":{"start":{"line":319,"column":1},"end":{"line":319,"column":1}},"key":"lFcCBQbVQ3"},{"type":"cite","url":"https://doi.org/10.1016/S1473-3099(20)30120-1","position":{"start":{"line":319,"column":1},"end":{"line":319,"column":1}},"children":[{"type":"text","value":"Dong ","key":"mwt90OaHYd"},{"type":"emphasis","children":[{"type":"text","value":"et al.","key":"VPAEKBlcBO"}],"key":"GMwyP50Ixx"},{"type":"text","value":" (2020)","key":"Hg79gq70ee"}],"kind":"narrative","label":"Dong_2020","identifier":"https://doi.org/10.1016/S1473-3099(20)30120-1","enumerator":"1","key":"Ec4y81pE88"},{"type":"text","value":".","position":{"start":{"line":319,"column":1},"end":{"line":319,"column":1}},"key":"AY6GNeSTIQ"}],"key":"N35dU7adMZ"}],"key":"j0SJE4pPBj"}],"key":"Nn8jo617e9"}],"key":"YbYxUOxo2q"}],"key":"Z77uW159MJ"},"references":{"cite":{"order":["Dong_2020"],"data":{"Dong_2020":{"label":"Dong_2020","enumerator":"1","doi":"10.1016/s1473-3099(20)30120-1","html":"Dong, E., Du, H., \u0026 Gardner, L. (2020). An interactive web-based dashboard to track COVID-19 in real time. \u003ci\u003eThe Lancet Infectious Diseases\u003c/i\u003e, \u003ci\u003e20\u003c/i\u003e(5), 533–534. \u003ca target=\"_blank\" rel=\"noreferrer\" href=\"https://doi.org/10.1016/s1473-3099(20)30120-1\"\u003e10.1016/s1473-3099(20)30120-1\u003c/a\u003e","url":"https://doi.org/10.1016/s1473-3099(20)30120-1"}}}},"footer":{"navigation":{"prev":{"title":"Saving and sharing your NumPy arrays","short_title":"Sharing Array Data","url":"/save-load-arrays","group":"Features"},"next":{"title":"Contributing","short_title":"Contributing","url":"/contributing","group":"Features"}}},"domain":"http://localhost:3000"},"project":{"title":"Numpy Tutorials","authors":[{"id":"Numpy Community","name":"Numpy Community"}],"github":"https://github.com/numpy/numpy-tutorials","toc":[{"file":"content/index.md"},{"children":[{"file":"content/mooreslaw-tutorial.md"},{"file":"content/tutorial-deep-learning-on-mnist.md"},{"file":"content/tutorial-x-ray-image-processing.md"},{"file":"content/tutorial-static_equilibrium.md"},{"file":"content/tutorial-plotting-fractals.md"},{"file":"content/tutorial-air-quality-analysis.md"}],"title":"Applications"},{"children":[{"file":"content/tutorial-svd.md"},{"file":"content/save-load-arrays.md"},{"file":"content/tutorial-ma.md"}],"title":"Features"},{"children":[{"file":"content/tutorial-style-guide.md"}],"file":"content/contributing.md","title":"Contributing"}],"thumbnail":"/numpy-tutorials/build/b77199e99a54e59b2e3c037c2cc90f21.svg","exports":[],"bibliography":[],"index":"index","pages":[{"level":1,"title":"Applications"},{"slug":"mooreslaw-tutorial","title":"Determining Moore’s Law with real data in NumPy","short_title":"Moore's Law","description":"","date":"","thumbnail":"/numpy-tutorials/build/01-mooreslaw-tutoria-68d0ad466c300d347c517c09cd29d0d9.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-deep-learning-on-mnist","title":"Deep learning on MNIST","description":"","date":"","thumbnail":"/numpy-tutorials/build/tutorial-deep-learni-1a6932c38d13641ad9a1eda7d431b1f5.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-x-ray-image-processing","title":"X-ray image processing","description":"","date":"","thumbnail":"/numpy-tutorials/build/tutorial-x-ray-image-cb14ad1cb4fb37f341c5954e91e694b8.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-static-equilibrium","title":"Determining Static Equilibrium in NumPy","short_title":"Static Equilibrium","description":"","date":"","thumbnail":"/numpy-tutorials/build/static_eqbm-fig01-ee029c471a69f4e98c09d962ac7d60dd.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-plotting-fractals","title":"Plotting Fractals","description":"","date":"","thumbnail":"/numpy-tutorials/build/fractal-4ebbc2569665376d939ff6f78ae5e5ab.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-air-quality-analysis","title":"Analyzing the impact of the lockdown on air quality in Delhi, India","short_title":"Analyzing Air Quality","description":"","date":"","thumbnail":"/numpy-tutorials/build/11-delhi-aqi-5fa295dd14ed05daaf4cd0193122e2f7.jpg","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"level":1,"title":"Features"},{"slug":"tutorial-svd","title":"Linear algebra on n-dimensional arrays","short_title":"Linear Algebra on n-D arrays","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"save-load-arrays","title":"Saving and sharing your NumPy arrays","short_title":"Sharing Array Data","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"tutorial-ma","title":"Masked Arrays","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2},{"slug":"contributing","title":"Contributing","short_title":"Contributing","description":"","date":"","thumbnail":"","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":1},{"slug":"tutorial-style-guide","title":"Learn to write a NumPy tutorial","short_title":"Style Guide","description":"","date":"","thumbnail":"/numpy-tutorials/build/56554e3d11983df8f484e8d7b2c2bdae.png","thumbnailOptimized":"","banner":"","bannerOptimized":"","tags":[],"level":2}]}}},"actionData":null,"errors":null},"future":{"unstable_dev":false,"unstable_postcss":false,"unstable_tailwind":false,"v2_errorBoundary":true,"v2_headers":true,"v2_meta":true,"v2_normalizeFormMethod":true,"v2_routeConvention":true}};</script><script type="module" async="">import "/numpy-tutorials/build/manifest-3595279D.js";
import * as route0 from "/numpy-tutorials/build/root-SIO6LUTY.js";
import * as route1 from "/numpy-tutorials/build/routes/$-PRP77N34.js";
window.__remixRouteModules = {"root":route0,"routes/$":route1};
import("/numpy-tutorials/build/entry.client-PCJPW7TK.js");</script></body></html>