forked from hankcs/HanLP
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTerm.java
More file actions
92 lines (82 loc) · 1.99 KB
/
Term.java
File metadata and controls
92 lines (82 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
/*
* <summary></summary>
* <author>He Han</author>
* <email>hankcs.cn@gmail.com</email>
* <create-date>2014/05/2014/5/17 13:25</create-date>
*
* <copyright file="WordResult.java" company="上海林原信息科技有限公司">
* Copyright (c) 2003-2014, 上海林原信息科技有限公司. All Right Reserved, http://www.linrunsoft.com/
* This source is subject to the LinrunSpace License. Please contact 上海林原信息科技有限公司 to get more information.
* </copyright>
*/
package com.hankcs.hanlp.seg.common;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.utility.LexiconUtility;
/**
* 一个单词,用户可以直接访问此单词的全部属性
* @author hankcs
*/
public class Term
{
/**
* 词语
*/
public String word;
/**
* 词性
*/
public Nature nature;
/**
* 在文本中的起始位置(需开启分词器的offset选项)
*/
public int offset;
/**
* 构造一个单词
* @param word 词语
* @param nature 词性
*/
public Term(String word, Nature nature)
{
this.word = word;
this.nature = nature;
}
@Override
public String toString()
{
if (HanLP.Config.ShowTermNature)
return word + "/" + nature;
return word;
}
/**
* 长度
* @return
*/
public int length()
{
return word.length();
}
/**
* 获取本词语在HanLP词库中的频次
* @return 频次,0代表这是个OOV
*/
public int getFrequency()
{
return LexiconUtility.getFrequency(word);
}
/**
* 判断Term是否相等
*/
@Override
public boolean equals(Object obj) {
if (obj instanceof Term)
{
Term term = (Term)obj;
if (this.nature == term.nature && this.word.equals(term.word))
{
return true;
}
}
return super.equals(obj);
}
}