-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.html
More file actions
2309 lines (966 loc) · 122 KB
/
index.html
File metadata and controls
2309 lines (966 loc) · 122 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html class="theme-next pisces use-motion" lang="zh-Hans">
<head>
<meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">
<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />
<link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />
<link href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic&subset=latin,latin-ext" rel="stylesheet" type="text/css">
<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />
<link href="/css/main.css?v=5.1.2" rel="stylesheet" type="text/css" />
<meta name="keywords" content="Hexo, Albert, NLP, ML" />
<link rel="alternate" href="/atom.xml" title="学无止境" type="application/atom+xml" />
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico?v=5.1.2" />
<meta name="description" content="Stay Hungry, Stay Foolish">
<meta property="og:type" content="website">
<meta property="og:title" content="学无止境">
<meta property="og:url" content="http://linanblog.cn/index.html">
<meta property="og:site_name" content="学无止境">
<meta property="og:description" content="Stay Hungry, Stay Foolish">
<meta property="og:locale" content="zh-Hans">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="学无止境">
<meta name="twitter:description" content="Stay Hungry, Stay Foolish">
<script type="text/javascript" id="hexo.configurations">
var NexT = window.NexT || {};
var CONFIG = {
root: '/',
scheme: 'Pisces',
sidebar: {"position":"left","display":"post","offset":12,"offset_float":12,"b2t":false,"scrollpercent":false,"onmobile":false},
fancybox: true,
tabs: true,
motion: true,
duoshuo: {
userId: '0',
author: '博主'
},
algolia: {
applicationID: '',
apiKey: '',
indexName: '',
hits: {"per_page":10},
labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
}
};
</script>
<link rel="canonical" href="http://linanblog.cn/"/>
<title>学无止境</title>
</head>
<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">
<div class="container sidebar-position-left
page-home
">
<div class="headband"></div>
<header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
<div class="header-inner"><div class="site-brand-wrapper">
<div class="site-meta ">
<div class="custom-logo-site-title">
<a href="/" class="brand" rel="start">
<span class="logo-line-before"><i></i></span>
<span class="site-title">学无止境</span>
<span class="logo-line-after"><i></i></span>
</a>
</div>
<p class="site-subtitle">黑发不知勤学早,白首方悔读书迟。</p>
</div>
<div class="site-nav-toggle">
<button>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
</button>
</div>
</div>
<nav class="site-nav">
<ul id="menu" class="menu">
<li class="menu-item menu-item-home">
<a href="/" rel="section">
<i class="menu-item-icon fa fa-fw fa-home"></i> <br />
首页
</a>
</li>
<li class="menu-item menu-item-categories">
<a href="/categories/" rel="section">
<i class="menu-item-icon fa fa-fw fa-th"></i> <br />
分类
</a>
</li>
<li class="menu-item menu-item-archives">
<a href="/archives/" rel="section">
<i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
归档
</a>
</li>
<li class="menu-item menu-item-tags">
<a href="/tags/" rel="section">
<i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
标签
</a>
</li>
<li class="menu-item menu-item-about">
<a href="/about/" rel="section">
<i class="menu-item-icon fa fa-fw fa-user"></i> <br />
关于
</a>
</li>
<li class="menu-item menu-item-commonweal">
<a href="/404.html" rel="section">
<i class="menu-item-icon fa fa-fw fa-heartbeat"></i> <br />
公益404
</a>
</li>
</ul>
</nav>
</div>
</header>
<main id="main" class="main">
<div class="main-inner">
<div class="content-wrap">
<div id="content" class="content">
<section id="posts" class="posts-expand">
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://linanblog.cn/2018/06/14/Trie树/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Albert">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/WechatIMG7.jpeg">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="学无止境">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/2018/06/14/Trie树/" itemprop="url">Trie树</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2018-06-14T15:15:38+08:00">
2018-06-14
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/常用算法/" itemprop="url" rel="index">
<span itemprop="name">常用算法</span>
</a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h4 id="一、知识简介"><a href="#一、知识简介" class="headerlink" title="一、知识简介"></a>一、知识简介</h4><p> Trie树,又称单词查找树或键树,是一种树形结构,是一种哈希树的变种。典型应用是用于统计和排序大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计。它的优点是:最大限度地减少无谓的字符串比较,查询效率比哈希表高。</p>
<h5 id="Trie树优点:"><a href="#Trie树优点:" class="headerlink" title="Trie树优点:"></a>Trie树优点:</h5><p>Trie 的强大之处就在于它的时间复杂度。<font color="red">它的插入和查询时间复杂度都为 O(k)</font> ,其中 k 为 key 的长度,与 Trie 中保存了多少个元素无关。Hash 表号称是 O(1) 的,但在计算 hash 的时候就肯定会是 O(k) ,而且还有碰撞之类的问题。</p>
<h5 id="Trie树缺点:"><a href="#Trie树缺点:" class="headerlink" title="Trie树缺点:"></a>Trie树缺点:</h5><p>Trie 的缺点是空间消耗很高。</p>
<p><font color="blue">Trie的核心思想是空间换时间。利用字符串的公共前缀来降低查询时间的开销以达到提高效率的目的。</font></p>
<h5 id="Trie树有一些特性:"><a href="#Trie树有一些特性:" class="headerlink" title="Trie树有一些特性:"></a>Trie树有一些特性:</h5><p>1)根节点不包含字符,除根节点外每一个节点都只包含一个字符。<br>2)从根节点到某一节点,路径上经过的字符连接起来,为该节点对应的字符串。<br>3)每个节点的所有子节点包含的字符都不相同。<br>4)如果字符的种数为n,则每个结点的出度为n,这也是空间换时间的体现,浪费了很多的空间。<br>5)插入查找的复杂度为O(n),n为字符串长度。</p>
<h5 id="基本思想(以字母树为例):"><a href="#基本思想(以字母树为例):" class="headerlink" title="基本思想(以字母树为例):"></a>基本思想(以字母树为例):</h5><p>1、插入过程<br>对于一个单词,从根开始,沿着单词的各个字母所对应的树中的节点分支向下走,直到单词遍历完,将最后的节点标记为红色,表示该单词已插入Trie树。<br>2、查询过程<br>同样的,从根开始按照单词的字母顺序向下遍历trie树,一旦发现某个节点标记不存在或者单词遍历完成而最后的节点未标记为红色,则表示该单词不存在,若最后的节点标记为红色,表示该单词存在。</p>
<h5 id="应用"><a href="#应用" class="headerlink" title="应用"></a>应用</h5><p>词频统计<br>比直接用hash节省空间<br>搜索提示<br>输入前缀的时候提示可以构成的词<br>作为辅助结构<br>如后缀树,AC自动机等的辅助结构<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br><span class="line">93</span><br><span class="line">94</span><br><span class="line">95</span><br><span class="line">96</span><br><span class="line">97</span><br></pre></td><td class="code"><pre><span class="line">coding:utf-8</span><br><span class="line">"""</span><br><span class="line">Implement a trie with insert, search, and startsWith methods.</span><br><span class="line"></span><br><span class="line">Note:</span><br><span class="line">You may assume that all inputs are consist of lowercase letters a-z.</span><br><span class="line"></span><br><span class="line">Subscribe to see which companies asked this question</span><br><span class="line">"""</span><br><span class="line">class TrieNode(object):</span><br><span class="line"> def __init__(self):</span><br><span class="line"> """</span><br><span class="line"> Initialize your data structure here.</span><br><span class="line"> """</span><br><span class="line"> self.data = {}</span><br><span class="line"> self.is_word = False</span><br><span class="line"></span><br><span class="line"></span><br><span class="line">class Trie(object):</span><br><span class="line"> def __init__(self):</span><br><span class="line"> self.root = TrieNode()</span><br><span class="line"></span><br><span class="line"> def insert(self, word):</span><br><span class="line"> """</span><br><span class="line"> Inserts a word into the trie.</span><br><span class="line"> :type word: str</span><br><span class="line"> :rtype: void</span><br><span class="line"> """</span><br><span class="line"> node = self.root</span><br><span class="line"> for letter in word:</span><br><span class="line"> child = node.data.get(letter)</span><br><span class="line"> if not child:</span><br><span class="line"> node.data[letter] = TrieNode()</span><br><span class="line"> node = node.data[letter]</span><br><span class="line"> node.is_word = True</span><br><span class="line"></span><br><span class="line"> def search(self, word):</span><br><span class="line"> """</span><br><span class="line"> Returns if the word is in the trie.</span><br><span class="line"> :type word: str</span><br><span class="line"> :rtype: bool</span><br><span class="line"> """</span><br><span class="line"> node = self.root</span><br><span class="line"> for letter in word:</span><br><span class="line"> node = node.data.get(letter)</span><br><span class="line"> if not node:</span><br><span class="line"> return False</span><br><span class="line"> return node.is_word # 判断单词是否是完整的存在在trie树中</span><br><span class="line"></span><br><span class="line"> def starts_with(self, prefix):</span><br><span class="line"> """</span><br><span class="line"> Returns if there is any word in the trie</span><br><span class="line"> that starts with the given prefix.</span><br><span class="line"> :type prefix: str</span><br><span class="line"> :rtype: bool</span><br><span class="line"> """</span><br><span class="line"> node = self.root</span><br><span class="line"> for letter in prefix:</span><br><span class="line"> node = node.data.get(letter)</span><br><span class="line"> if not node:</span><br><span class="line"> return False</span><br><span class="line"> return True</span><br><span class="line"></span><br><span class="line"> def get_start(self, prefix):</span><br><span class="line"> """</span><br><span class="line"> Returns words started with prefix</span><br><span class="line"> :param prefix:</span><br><span class="line"> :return: words (list)</span><br><span class="line"> """</span><br><span class="line"> def _get_key(pre, pre_node):</span><br><span class="line"> words_list = []</span><br><span class="line"> if pre_node.is_word:</span><br><span class="line"> words_list.append(pre)</span><br><span class="line"> for x in pre_node.data.keys():</span><br><span class="line"> words_list.extend(_get_key(pre + str(x), pre_node.data.get(x)))</span><br><span class="line"> return words_list</span><br><span class="line"></span><br><span class="line"> words = []</span><br><span class="line"> if not self.starts_with(prefix):</span><br><span class="line"> return words</span><br><span class="line"> if self.search(prefix):</span><br><span class="line"> words.append(prefix)</span><br><span class="line"> return words</span><br><span class="line"> node = self.root</span><br><span class="line"> for letter in prefix:</span><br><span class="line"> node = node.data.get(letter)</span><br><span class="line"> return _get_key(prefix, node)</span><br><span class="line"> </span><br><span class="line"># Your Trie object will be instantiated and called as such:</span><br><span class="line">trie = Trie()</span><br><span class="line">trie.insert("somestring")</span><br><span class="line">trie.insert("somebody")</span><br><span class="line">trie.insert("somebody1")</span><br><span class="line">trie.insert("somebody3")</span><br><span class="line">print trie.search("key")</span><br><span class="line">print trie.search("somebody3")</span><br><span class="line">print trie.get_start('some')</span><br></pre></td></tr></table></figure></p>
<h5 id="海量数据情况下,trie树的应用"><a href="#海量数据情况下,trie树的应用" class="headerlink" title="海量数据情况下,trie树的应用"></a>海量数据情况下,trie树的应用</h5><p>适用范围:数据量大,重复多,但是数据种类小可以放入内存<br>基本原理及要点:实现方式,节点孩子的表示方式<br> 1、一个文本文件,大约有一万行,每行一个词,要求统计出其中最频繁出现的前10个词,请给出思想,给出时间复杂度分析.<br>答:先用trie树统计每个词出现的次数,时间复杂度是O(n<em>le)(le表示单词的平均长度);<br>然后是用小顶堆找出出现最频繁的前10个词,时间复杂度是O(n</em>lg10)。</p>
<p>2、寻找热门查询<br>原题:搜索引擎会通过日志文件把用户每次检索使用的所有检索串都记录下来,每个查询串的长度为1-255字节。假设目前有一千万个记录,这些查询串的重复读比较高,虽然总数是1千万,但是如果去除重复和,不超过3百万个。一个查询串的重复度越高,说明查询它的用户越多,也就越热门。请你统计最热门的10个查询串,要求使用的内存不能超过1G。<br>答:利用trie树,关键字域存该查询串出现的次数,没有出现为0。最后用10个元素的最小推来对出现频率进行排序。</p>
<p>3.1000万字符串,其中有些是相同的(重复),需要把重复的全部去掉,保留没有重复的字符串。<br>答:使用hash_map或者trie树。<br> 比如trie树,在构建trie树的过程中,如果某个字符串已经存在于trie中则不输出,否则输出到文本中,这样就可以得到不重复的字符串。<br>hash_map的速度会要快一些,因为在添加一个字符串的时候,hashmap直接用哈希函数就能定位,然后选择是否写入文件,但是trie树需要在子节点中比较。<br>trie树对hashmap的优势是,在大量重复的单词中,trie树需要的内存会低一些</p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://linanblog.cn/2018/02/06/Pandas手记(2)/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Albert">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/WechatIMG7.jpeg">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="学无止境">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/2018/02/06/Pandas手记(2)/" itemprop="url">Pandas手记(2)</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2018-02-06T10:39:35+08:00">
2018-02-06
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/常用机器学习库/" itemprop="url" rel="index">
<span itemprop="name">常用机器学习库</span>
</a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p>继上一篇文章简要介绍了Pandas的Series和DataFrame结构之后,这块文章重点介绍一下Pandas的核心DataFrame结构的常用操作。</p>
<p>DataFrame的删除操作:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br></pre></td><td class="code"><pre><span class="line">>>> data = DataFrame(np.arange(16).reshape((4, 4)),</span><br><span class="line">... index=['Ohio', 'Colorado', 'Utah', 'New York'],</span><br><span class="line">... columns=['one', 'two', 'three', 'four'])</span><br><span class="line">>>> data</span><br><span class="line"> one two three four</span><br><span class="line">Ohio 0 1 2 3</span><br><span class="line">Colorado 4 5 6 7</span><br><span class="line">Utah 8 9 10 11</span><br><span class="line">New York 12 13 14 15</span><br><span class="line">>>> print(data.drop(['Colorado', 'Ohio']))</span><br><span class="line"> one two three four</span><br><span class="line">Utah 8 9 10 11</span><br><span class="line">New York 12 13 14 15</span><br><span class="line">>>> print(data.drop('two', axis=1))</span><br><span class="line"> one three four</span><br><span class="line">Ohio 0 2 3</span><br><span class="line">Colorado 4 6 7</span><br><span class="line">Utah 8 10 11</span><br><span class="line">New York 12 14 15</span><br><span class="line">>>> print(data.drop(['two', 'four'], axis=1))</span><br><span class="line"> one three</span><br><span class="line">Ohio 0 2</span><br><span class="line">Colorado 4 6</span><br><span class="line">Utah 8 10</span><br><span class="line">New York 12 14</span><br></pre></td></tr></table></figure></p>
<p><strong><em> 数据过滤 </em></strong><br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br></pre></td><td class="code"><pre><span class="line">>>> print data[data.three < 10] # three列上值大于等于10的行扔掉,小于的保留。</span><br><span class="line"> one two three four</span><br><span class="line">Ohio 0 1 2 3</span><br><span class="line">Colorado 4 5 6 7</span><br><span class="line">>>> data</span><br><span class="line"> one two three four</span><br><span class="line">Ohio 0 1 2 3</span><br><span class="line">Colorado 4 5 6 7</span><br><span class="line">Utah 8 9 10 11</span><br><span class="line">New York 12 13 14 15</span><br><span class="line">>>> print data.loc[data.three < 5, ]</span><br><span class="line"> one two three four</span><br><span class="line">Ohio 0 1 2 3</span><br><span class="line">>>> data[data > 10] = 0</span><br><span class="line">>>> data</span><br><span class="line"> one two three four</span><br><span class="line">Ohio 0 1 2 3</span><br><span class="line">Colorado 4 5 6 7</span><br><span class="line">Utah 8 9 10 0</span><br><span class="line">New York 0 0 0 0</span><br><span class="line">>>> data + 100</span><br><span class="line"> one two three four</span><br><span class="line">Ohio 100 101 102 103</span><br><span class="line">Colorado 104 105 106 107</span><br><span class="line">Utah 108 109 110 100</span><br><span class="line">New York 100 100 100 100</span><br><span class="line"></span><br><span class="line">>>> # DataFrame和Series的计算</span><br><span class="line">... frame = DataFrame(np.arange(12.).reshape((4, 3)),</span><br><span class="line">... columns=list('bde'),</span><br><span class="line">... index=['Utah', 'Ohio', 'Texas', 'Oregon'])</span><br><span class="line">>>> s = frame.iloc[0]</span><br><span class="line">>>> print(frame)</span><br><span class="line"> b d e</span><br><span class="line">Utah 0.0 1.0 2.0</span><br><span class="line">Ohio 3.0 4.0 5.0</span><br><span class="line">Texas 6.0 7.0 8.0</span><br><span class="line">Oregon 9.0 10.0 11.0</span><br><span class="line">>>> print(s)</span><br><span class="line">b 0.0</span><br><span class="line">d 1.0</span><br><span class="line">e 2.0</span><br><span class="line">Name: Utah, dtype: float64</span><br><span class="line">>>> print(frame - s) # 每一行减去对应的s,本质上每一行在对应的索引位置上相减。</span><br><span class="line"> b d e</span><br><span class="line">Utah 0.0 0.0 0.0</span><br><span class="line">Ohio 3.0 3.0 3.0</span><br><span class="line">Texas 6.0 6.0 6.0</span><br><span class="line">Oregon 9.0 9.0 9.0</span><br></pre></td></tr></table></figure></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://linanblog.cn/2018/02/05/Pandas手记(1)/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Albert">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/WechatIMG7.jpeg">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="学无止境">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/2018/02/05/Pandas手记(1)/" itemprop="url">Pandas手记(1)</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2018-02-05T09:31:01+08:00">
2018-02-05
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/常用机器学习库/" itemprop="url" rel="index">
<span itemprop="name">常用机器学习库</span>
</a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p>概述:pandas含有使数据分析工作变得更快更简单的高级数据结构和操作工具,pandas是基于Numpy构建的。pandas在过去的几年中逐渐成长为一个非常强大的库。</p>
<h2 id="Pandas的数据结构介绍"><a href="#Pandas的数据结构介绍" class="headerlink" title="Pandas的数据结构介绍"></a>Pandas的数据结构介绍</h2><p><strong>Pandas的数据结构主要有两个:Series和DataFrame.</strong><br> <font color="red">引入pandas</font><br> <figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">>>> from pandas import Series,DataFrame</span><br><span class="line">>>> import pandas as pd</span><br></pre></td></tr></table></figure></p>
<h3 id="Series"><a href="#Series" class="headerlink" title="Series"></a>Series</h3><p> Series是一种类似于一维数组的对象,它由一组数据(各种Numpy数据类型)以及一组与之相关的数据标签(即索引)组成。仅由一组数据即可产生最简单的Series:<br> <figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">>>> obj = Series([4,7,-5,3])</span><br><span class="line">>>> obj</span><br><span class="line">0 4</span><br><span class="line">1 7</span><br><span class="line">2 -5</span><br><span class="line">3 3</span><br><span class="line">dtype: int64</span><br></pre></td></tr></table></figure></p>
<p>Series的字符串表现形式为:索引在左边,值在右边。你可以通过Series的values和index属性获取其数组表现形式和索引对象:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">>>> obj.values</span><br><span class="line">array([ 4, 7, -5, 3], dtype=int64)</span><br><span class="line">>>> obj.index</span><br><span class="line">RangeIndex(start=0, stop=4, step=1)</span><br></pre></td></tr></table></figure></p>
<p>Series带有一个可以对各个数据点进行标记的索引:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">>>> obj2 = Series([4,7,-5,3],index=['d','b','a','c'])</span><br><span class="line">>>> obj2</span><br><span class="line">d 4</span><br><span class="line">b 7</span><br><span class="line">a -5</span><br><span class="line">c 3</span><br><span class="line">dtype: int64</span><br></pre></td></tr></table></figure></p>
<p>与Numpy相比,Series既可以通过下标也可以通过索引方式选取Series的单个或者一组值:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line">>>> obj2[1]</span><br><span class="line">7</span><br><span class="line">>>> obj2['b']</span><br><span class="line">7</span><br><span class="line">>>> obj2[['a','b']] #注意这里是双层[]</span><br><span class="line">a -5</span><br><span class="line">b 7</span><br><span class="line">dtype: int64</span><br></pre></td></tr></table></figure></p>
<p>Numpy数组运算都会保留索引和值之间的链接:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">>>> obj2 * 2</span><br><span class="line">d 8</span><br><span class="line">b 14</span><br><span class="line">a -10</span><br><span class="line">c 6</span><br></pre></td></tr></table></figure></p>
<p>还可以将Series看成是定长的有序字典,因为它是索引值到数据值的一个映射:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">>>> 'b' in obj2</span><br><span class="line">True</span><br></pre></td></tr></table></figure></p>
<p>如果数据存放在Python字典中,可以直接转成Series:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line">>>> sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}</span><br><span class="line">>>> obj3 = Series(sdata) # 使用字典初始化Series,但是顺序没有保证。</span><br><span class="line">>>> obj3</span><br><span class="line">Ohio 35000</span><br><span class="line">Oregon 16000</span><br><span class="line">Texas 71000</span><br><span class="line">Utah 5000</span><br><span class="line">dtype: int64</span><br></pre></td></tr></table></figure></p>
<p>如果使用字典和索引数组结合可以保证顺序:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><span class="line">>>> states = ['California', 'Ohio', 'Oregon', 'Texas', 'Utah', 'New Jersy']</span><br><span class="line">>>> obj4 = Series(sdata, index=states) # 字典和索引数组结合保证顺序</span><br><span class="line">>>> obj4</span><br><span class="line">California NaN</span><br><span class="line">Ohio 35000.0</span><br><span class="line">Oregon 16000.0</span><br><span class="line">Texas 71000.0</span><br><span class="line">Utah 5000.0</span><br><span class="line">New Jersy NaN</span><br><span class="line">dtype: float64</span><br></pre></td></tr></table></figure></p>
<p>NaN(非数字),在pandas中,它代表缺失或NA值。使用缺失(missing)或NA表示缺失数据。pandas的isnull和notnull函数可以用于检测缺失数据。<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br></pre></td><td class="code"><pre><span class="line">>>> pd.isnull(obj4) # 检测缺失值</span><br><span class="line">California True</span><br><span class="line">Ohio False</span><br><span class="line">Oregon False</span><br><span class="line">Texas False</span><br><span class="line">Utah False</span><br><span class="line">New Jersy True</span><br><span class="line">dtype: bool</span><br><span class="line">>>> obj4.isnull()</span><br><span class="line">California True</span><br><span class="line">Ohio False</span><br><span class="line">Oregon False</span><br><span class="line">Texas False</span><br><span class="line">Utah False</span><br><span class="line">New Jersy True</span><br><span class="line">dtype: bool</span><br></pre></td></tr></table></figure></p>
<p>Series的一个重要功能是:它在算术运算中会自动地对其不同索引的数据<br>Series对应索引位置相加,对不上的设置为None:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line">>>> obj4</span><br><span class="line">California NaN</span><br><span class="line">Ohio 35000.0</span><br><span class="line">Oregon 16000.0</span><br><span class="line">Texas 71000.0</span><br><span class="line">Utah 5000.0</span><br><span class="line">New Jersy NaN</span><br><span class="line">dtype: float64</span><br></pre></td></tr></table></figure></p>
<p>Series对象本身及索引都有一个name的属性,该属性跟pandas其他的关键功能关系非常密切:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line">>>> obj4.name = 'population' # 给Series起名</span><br><span class="line">>>> obj4.index.name = 'state' # 给Series的index起名</span><br><span class="line">>>> obj4</span><br><span class="line">state</span><br><span class="line">California NaN</span><br><span class="line">Ohio 35000.0</span><br><span class="line">Oregon 16000.0</span><br><span class="line">Texas 71000.0</span><br><span class="line">Utah 5000.0</span><br><span class="line">New Jersy NaN</span><br><span class="line">Name: population, dtype: float64</span><br></pre></td></tr></table></figure></p>
<p> <font color="red">Series指定索引顺序和指定填充值:</font><br> <figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br></pre></td><td class="code"><pre><span class="line"> >>> obj = Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])</span><br><span class="line">>>> obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e']) # 指定索引顺序</span><br><span class="line">>>> obj2</span><br><span class="line">a -5.3</span><br><span class="line">b 7.2</span><br><span class="line">c 3.6</span><br><span class="line">d 4.5</span><br><span class="line">e NaN</span><br><span class="line">dtype: float64</span><br><span class="line">>>> obj.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=obj.mean()) # 指定填充值</span><br><span class="line">a -5.3</span><br><span class="line">b 7.2</span><br><span class="line">c 3.6</span><br><span class="line">d 4.5</span><br><span class="line">e 2.5</span><br><span class="line">dtype: float64</span><br></pre></td></tr></table></figure></p>
<p> <font color="red">Series指定填充方式:</font><br> <figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line">>>> obj3 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])</span><br><span class="line">>>> obj3.reindex(range(6), method='ffill') # 指定填充方式为ffill</span><br><span class="line">0 blue</span><br><span class="line">1 blue</span><br><span class="line">2 purple</span><br><span class="line">3 purple</span><br><span class="line">4 yellow</span><br><span class="line">5 yellow</span><br><span class="line">dtype: object</span><br></pre></td></tr></table></figure></p>
<p> Series根据值排序:<br> <figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">>>> obj = Series([4, 7, -3, 2])</span><br><span class="line">>>> obj.sort_values() # 根据值排序</span><br><span class="line">2 -3</span><br><span class="line">3 2</span><br><span class="line">0 4</span><br><span class="line">1 7</span><br><span class="line">dtype: int64</span><br></pre></td></tr></table></figure></p>
<p> Series根据索引排序:<br> <figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><span class="line">>>> index = ['d', 'c', 'a', 'b']</span><br><span class="line">>>> obj = Series([4, 7, -3, 2], index=index)</span><br><span class="line">>>> index = sorted(index)</span><br><span class="line">>>> obj = obj.reindex(index)</span><br><span class="line">>>> obj</span><br><span class="line">a -3</span><br><span class="line">b 2</span><br><span class="line">c 7</span><br><span class="line">d 4</span><br><span class="line">dtype: int64</span><br></pre></td></tr></table></figure></p>
<p>Series的索引也可以通过赋值的方式就地修改,也有切片操作:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br></pre></td><td class="code"><pre><span class="line">>>> index = ['d', 'c', 'a', 'b', 'f', 'g']</span><br><span class="line">>>> obj = Series([4, 7, -3, 2, 9, 3], index=index)</span><br><span class="line">>>> x = obj[1:5] # 左闭右开区间,并且x没有复制生成一个新的Series。</span><br><span class="line">>>> x</span><br><span class="line">c 7</span><br><span class="line">a -3</span><br><span class="line">b 2</span><br><span class="line">f 9</span><br><span class="line">dtype: int64</span><br><span class="line">>>> x[2] = 10</span><br><span class="line">>>> obj</span><br><span class="line">d 4</span><br><span class="line">c 7</span><br><span class="line">a -3</span><br><span class="line">b 10</span><br><span class="line">f 9</span><br><span class="line">g 3</span><br><span class="line">dtype: int64</span><br><span class="line">>>> obj['c':'f']</span><br><span class="line">c 7</span><br><span class="line">a -3</span><br><span class="line">b 10</span><br><span class="line">f 9</span><br><span class="line">dtype: int64</span><br><span class="line">>>> obj['b':'d':-1]</span><br><span class="line">b 10</span><br><span class="line">a -3</span><br><span class="line">c 7</span><br><span class="line">d 4</span><br><span class="line">dtype: int64</span><br></pre></td></tr></table></figure></p>
<h3 id="DataFrame"><a href="#DataFrame" class="headerlink" title="DataFrame"></a>DataFrame</h3><p> DataFrame是一个表格型的数据结构,它含有一组有序的列,每列可以是不同的值类型(数值、字符串、布尔型值)。DataFrame既有行索引也有列索引,它可以被看做由Series组成的字典(共同用一个索引)。和其他类似的数据结构相比,DataFrame中面向行和面向列的操作基本上是平衡的。其实,DataFrame中的数据是以一个或多个二维块存放的(而不是列表、字典或别的数据结构)</p>
<pre><code>## DataFrame
本质上就是一个二维数组,用index定位行,用columns定位列。
col_1 col_2 ... col_n
index_1 x11 x12 x1n
index_2 x21 x22 x2n
...
index_m xm1 xm2 xmn
房价 人口 GDP
上海 ...
北京
天津
</code></pre><p>构建DataFrame的方法有很多,最常用的方法是直接传入一个由等长列表或Numpy数组组成的字典:<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line">>>> data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],</span><br><span class="line"> 'year': [2000, 2001, 2002, 2001, 2002],</span><br><span class="line"> 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]} # key代表列的名字,对应的数组就是这个列的值。</span><br><span class="line">>>> frame = DataFrame(data)</span><br><span class="line">>>> frame</span><br><span class="line"> pop state year</span><br><span class="line">0 1.5 Ohio 2000</span><br><span class="line">1 1.7 Ohio 2001</span><br><span class="line">2 3.6 Ohio 2002</span><br><span class="line">3 2.4 Nevada 2001</span><br><span class="line">4 2.9 Nevada 2002</span><br></pre></td></tr></table></figure></p>
<p>DataFrame可以指定列的顺序<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">>>> DataFrame(data, columns=['year', 'state', 'pop']) # 可以指定列的顺序</span><br><span class="line"> year state pop</span><br><span class="line">0 2000 Ohio 1.5</span><br><span class="line">1 2001 Ohio 1.7</span><br><span class="line">2 2002 Ohio 3.6</span><br><span class="line">3 2001 Nevada 2.4</span><br><span class="line">4 2002 Nevada 2.9</span><br></pre></td></tr></table></figure></p>
<p>DataFrame匹配不到的行和列设置为None<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><span class="line">>>> frame2 = DataFrame(data, </span><br><span class="line"> columns=['year', 'state', 'pop', 'debt'], # 匹配不到的行和列设置为None</span><br><span class="line"> index=['one', 'two', 'three', 'four', 'five']) # 使用index指定列</span><br><span class="line">>>> frame2</span><br><span class="line"> year state pop debt</span><br><span class="line">one 2000 Ohio 1.5 NaN</span><br><span class="line">two 2001 Ohio 1.7 NaN</span><br><span class="line">three 2002 Ohio 3.6 NaN</span><br><span class="line">four 2001 Nevada 2.4 NaN</span><br><span class="line">five 2002 Nevada 2.9 NaN</span><br></pre></td></tr></table></figure></p>
<p>DataFrame把列变成索引<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><span class="line">>>> frame = DataFrame({'year': [2000, 2001, 2002, 2003, 2004],</span><br><span class="line">... 'sales': [1500, 3700, 2400, 2900, 4900]})</span><br><span class="line">>>> frame</span><br><span class="line"> sales year</span><br><span class="line">0 1500 2000</span><br><span class="line">1 3700 2001</span><br><span class="line">2 2400 2002</span><br><span class="line">3 2900 2003</span><br><span class="line">4 4900 2004</span><br><span class="line">>>> frame.set_index('year') # 列变成索引</span><br><span class="line"> sales</span><br><span class="line">year</span><br><span class="line">2000 1500</span><br><span class="line">2001 3700</span><br><span class="line">2002 2400</span><br><span class="line">2003 2900</span><br><span class="line">2004 4900</span><br></pre></td></tr></table></figure></p>
<p>DataFrame快速访问列<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><span class="line">>>> print(frame2.year) # 快速访问列,不管是一行还是一列,返回的是一个Series。</span><br><span class="line">>>> print(frame2['pop']) # 和普通二维数组非常不一样!!!</span><br><span class="line">one 2000</span><br><span class="line">two 2001</span><br><span class="line">three 2002</span><br><span class="line">four 2001</span><br><span class="line">five 2002</span><br><span class="line">Name: year, dtype: int64</span><br><span class="line">one 1.5</span><br><span class="line">two 1.7</span><br><span class="line">three 3.6</span><br><span class="line">four 2.4</span><br><span class="line">five 2.9</span><br><span class="line">Name: pop, dtype: float64</span><br></pre></td></tr></table></figure></p>
<p><strong><em> DataFrame通过iloc用数字索引访问行和列(左闭右开区间),ix方法已经被淘汰。</em></strong><br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br></pre></td><td class="code"><pre><span class="line">>>> frame2</span><br><span class="line"> year state pop debt</span><br><span class="line">one 2000 Ohio 1.5 NaN</span><br><span class="line">two 2001 Ohio 1.7 NaN</span><br><span class="line">three 2002 Ohio 3.6 NaN</span><br><span class="line">four 2001 Nevada 2.4 NaN</span><br><span class="line">five 2002 Nevada 2.9 NaN</span><br><span class="line"></span><br><span class="line">>>> print(frame2.iloc[1,]) # 访问第1行</span><br><span class="line">year 2001</span><br><span class="line">state Ohio</span><br><span class="line">pop 1.7</span><br><span class="line">debt NaN</span><br><span class="line">Name: two, dtype: object</span><br><span class="line"></span><br><span class="line">>>> print(frame2.iloc[:,2]) # 访问第2列pop</span><br><span class="line">one 1.5</span><br><span class="line">two 1.7</span><br><span class="line">three 3.6</span><br><span class="line">four 2.4</span><br><span class="line">five 2.9</span><br><span class="line">Name: pop, dtype: float64</span><br><span class="line"></span><br><span class="line">>>> print(frame2.iloc[1:3, 0:3]) #第1/2行,第0/1/2列切片</span><br><span class="line"> year state pop</span><br><span class="line">two 2001 Ohio 1.7</span><br><span class="line">three 2002 Ohio 3.6</span><br></pre></td></tr></table></figure></p>
<p><strong><em> DataFrame通过loc用名字索引访问行和列(闭区间)</em></strong><br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br></pre></td><td class="code"><pre><span class="line">>>> frame2</span><br><span class="line"> year state pop debt</span><br><span class="line">one 2000 Ohio 1.5 NaN</span><br><span class="line">two 2001 Ohio 1.7 NaN</span><br><span class="line">three 2002 Ohio 3.6 NaN</span><br><span class="line">four 2001 Nevada 2.4 NaN</span><br><span class="line">five 2002 Nevada 2.9 NaN</span><br><span class="line"></span><br><span class="line">>>> print(frame2.loc['one',])</span><br><span class="line">year 2000</span><br><span class="line">state Ohio</span><br><span class="line">pop 1.5</span><br><span class="line">debt NaN</span><br><span class="line">Name: one, dtype: object</span><br><span class="line"></span><br><span class="line">>>> print(frame2.loc['one':'three',])</span><br><span class="line"> year state pop debt</span><br><span class="line">one 2000 Ohio 1.5 NaN</span><br><span class="line">two 2001 Ohio 1.7 NaN</span><br><span class="line">three 2002 Ohio 3.6 NaN</span><br><span class="line"></span><br><span class="line">>>> print(frame2.loc[:,'state'])</span><br><span class="line">one Ohio</span><br><span class="line">two Ohio</span><br><span class="line">three Ohio</span><br><span class="line">four Nevada</span><br><span class="line">five Nevada</span><br><span class="line">Name: state, dtype: object</span><br><span class="line"></span><br><span class="line">>>> print(frame2.loc['two':'four','year':'debt'])</span><br><span class="line"> year state pop debt</span><br><span class="line">two 2001 Ohio 1.7 NaN</span><br><span class="line">three 2002 Ohio 3.6 NaN</span><br><span class="line">four 2001 Nevada 2.4 NaN</span><br></pre></td></tr></table></figure></p>
<p><strong><em> DataFrame使用iloc或loc选择指定的行列组合 </em></strong><br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br></pre></td><td class="code"><pre><span class="line">>>> frame2</span><br><span class="line"> year state pop debt</span><br><span class="line">one 2000 Ohio 1.5 NaN</span><br><span class="line">two 2001 Ohio 1.7 NaN</span><br><span class="line">three 2002 Ohio 3.6 NaN</span><br><span class="line">four 2001 Nevada 2.4 NaN</span><br><span class="line">five 2002 Nevada 2.9 NaN</span><br><span class="line"></span><br><span class="line">>>> frame2.iloc[[0, 1, 3], [0, 2]]</span><br><span class="line"> year pop</span><br><span class="line">one 2000 1.5</span><br><span class="line">two 2001 1.7</span><br><span class="line">four 2001 2.4</span><br><span class="line"></span><br><span class="line">>>> frame2.loc[['one', 'three', 'four'], ['state', 'pop']]</span><br><span class="line"> state pop</span><br><span class="line">one Ohio 1.5</span><br><span class="line">three Ohio 3.6</span><br><span class="line">four Nevada 2.4</span><br></pre></td></tr></table></figure></p>
<p>DataFrame使用 <font color="red">二维ndarray</font>可以传入行标和列标<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">>>> ARR=np.array([[1,2,3,4],[5,6,7,8]])</span><br><span class="line">>>> ff=pd.DataFrame(ARR,columns=list('abcd'),index=['first','second'])</span><br><span class="line">>>> ff</span><br><span class="line"> a b c d</span><br><span class="line">first 1 2 3 4</span><br><span class="line">second 5 6 7 8</span><br></pre></td></tr></table></figure></p>
<p>DataFrame修改列值<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br></pre></td><td class="code"><pre><span class="line">>>> frame2['debt'] = 100 # 作用在所有元素上</span><br><span class="line">>>> frame2</span><br><span class="line"> year state pop debt</span><br><span class="line">one 2000 Ohio 1.5 100</span><br><span class="line">two 2001 Ohio 1.7 100</span><br><span class="line">three 2002 Ohio 3.6 100</span><br><span class="line">four 2001 Nevada 2.4 100</span><br><span class="line">five 2002 Nevada 2.9 100</span><br><span class="line"></span><br><span class="line">>>> frame2['debt'] = np.arange(5) # 长度必须一致</span><br><span class="line">>>> frame2</span><br><span class="line"> year state pop debt</span><br><span class="line">one 2000 Ohio 1.5 0</span><br><span class="line">two 2001 Ohio 1.7 1</span><br><span class="line">three 2002 Ohio 3.6 2</span><br><span class="line">four 2001 Nevada 2.4 3</span><br><span class="line">five 2002 Nevada 2.9 4</span><br><span class="line"></span><br><span class="line">>>> frame2['debt'] = Series([-1.2, -1.5, -1.7, 2], index=['two', 'four', 'five', 'six']) # 匹配不到的索引自动扔掉</span><br><span class="line">>>> frame2</span><br><span class="line"> year state pop debt</span><br><span class="line">one 2000 Ohio 1.5 NaN</span><br><span class="line">two 2001 Ohio 1.7 -1.2</span><br><span class="line">three 2002 Ohio 3.6 NaN</span><br><span class="line">four 2001 Nevada 2.4 -1.5</span><br><span class="line">five 2002 Nevada 2.9 -1.7</span><br></pre></td></tr></table></figure></p>
<p>DataFrame按条件添加列值<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><span class="line">>>> frame2['eastern'] = (frame2.state == 'Ohio')</span><br><span class="line">>>> frame2</span><br><span class="line"> year state pop debt eastern</span><br><span class="line">one 2000 Ohio 1.5 NaN True</span><br><span class="line">two 2001 Ohio 1.7 -1.2 True</span><br><span class="line">three 2002 Ohio 3.6 NaN True</span><br><span class="line">four 2001 Nevada 2.4 -1.5 False</span><br><span class="line">five 2002 Nevada 2.9 -1.7 False</span><br><span class="line"></span><br><span class="line">>>> frame2['big'] = (frame2['pop'] > 2.5)</span><br><span class="line">>>> frame2</span><br><span class="line"> year state pop debt eastern big</span><br><span class="line">one 2000 Ohio 1.5 NaN True False</span><br><span class="line">two 2001 Ohio 1.7 -1.2 True False</span><br><span class="line">three 2002 Ohio 3.6 NaN True True</span><br><span class="line">four 2001 Nevada 2.4 -1.5 False False</span><br><span class="line">five 2002 Nevada 2.9 -1.7 False True</span><br></pre></td></tr></table></figure></p>
<p>DataFrame重新指定行/列索引顺序<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br></pre></td><td class="code"><pre><span class="line">>>> frame = DataFrame(np.arange(9).reshape((3, 3)),</span><br><span class="line">... index=['a', 'c', 'd'],</span><br><span class="line">... columns=['Ohio', 'Texas', 'California'])</span><br><span class="line">>>> frame</span><br><span class="line"> Ohio Texas California</span><br><span class="line">a 0 1 2</span><br><span class="line">c 3 4 5</span><br><span class="line">d 6 7 8</span><br><span class="line"></span><br><span class="line">>>> frame2 = frame.reindex(['a', 'b', 'c', 'd']) # 数量不用匹配,没有的自动填充NaN。</span><br><span class="line">>>> frame2</span><br><span class="line"> Ohio Texas California</span><br><span class="line">a 0.0 1.0 2.0</span><br><span class="line">b NaN NaN NaN</span><br><span class="line">c 3.0 4.0 5.0</span><br><span class="line">d 6.0 7.0 8.0</span><br><span class="line"></span><br><span class="line">>>> states = ['Texas', 'Utah', 'California']</span><br><span class="line">>>> frame3 = frame2.reindex(columns=states) # 重新指定行/列索引顺序</span><br><span class="line">>>> frame3</span><br><span class="line"> Texas Utah California</span><br><span class="line">a 1.0 NaN 2.0</span><br><span class="line">b NaN NaN NaN</span><br><span class="line">c 4.0 NaN 5.0</span><br><span class="line">d 7.0 NaN 8.0</span><br></pre></td></tr></table></figure></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://linanblog.cn/2018/01/03/贝叶斯公式的概率问题/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Albert">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/WechatIMG7.jpeg">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="学无止境">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/2018/01/03/贝叶斯公式的概率问题/" itemprop="url">贝叶斯公式的概率问题</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2018-01-03T14:35:51+08:00">
2018-01-03
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/机器学习/" itemprop="url" rel="index">
<span itemprop="name">机器学习</span>
</a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h3 id="疾病问题"><a href="#疾病问题" class="headerlink" title="疾病问题"></a>疾病问题</h3><p>假设你去医院检查某项疾病,检查完以后,医生有个好消息和坏消息,坏消息是你的病情呈阳性,而这个测试的准确度是99%,好消息是这是一种罕见的疾病,每10000人中只有1人患病。请问,你患病的几率有多大?<br>补充问题:如果你第二次检查还是阳性,你患病的几率有多大?<br><img src="/2018/01/03/贝叶斯公式的概率问题/答案.png" alt="答案"><br><img src="/2018/01/03/贝叶斯公式的概率问题/编程.png" alt="编程"><br>可以得到结果:<br><img src="/2018/01/03/贝叶斯公式的概率问题/结果.png" alt="结果"></p>
<h3 id="Monty-Hall-问题"><a href="#Monty-Hall-问题" class="headerlink" title="Monty Hall 问题"></a>Monty Hall 问题</h3><p>游戏参赛者会看见三扇关闭了的门,其中一扇的后面有一辆汽车,选中后面有车的那扇门就可以赢得该汽车,而另外两扇门后面则各藏有一只山羊。当参赛者选定了一扇门,但未去开启它的时候,节目主持人会开启剩下两扇门的其中一扇,露出其中一只山羊。主持人其后会问参赛者要不要换另一扇仍然关上的门。问题是:换另一扇门会否增加参赛者赢得汽车的机会率?<br><img src="/2018/01/03/贝叶斯公式的概率问题/图1.png" alt="图1"><br><img src="/2018/01/03/贝叶斯公式的概率问题/图2.png" alt="图2"></p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</div>
</article>
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://linanblog.cn/2018/01/03/nginx实现Neo4j的转发服务/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Albert">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/WechatIMG7.jpeg">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="学无止境">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
<a class="post-title-link" href="/2018/01/03/nginx实现Neo4j的转发服务/" itemprop="url">nginx实现Neo4j的转发服务</a></h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2018-01-03T11:37:21+08:00">
2018-01-03
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/知识图谱/" itemprop="url" rel="index">
<span itemprop="name">知识图谱</span>
</a>
</span>
</span>
</div>
</header>