forked from feature23/StringSimilarity.NET
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStringSet.cs
More file actions
61 lines (54 loc) · 2.08 KB
/
StringSet.cs
File metadata and controls
61 lines (54 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/*
* The MIT License
*
* Copyright 2016 feature[23]
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
using System;
using F23.StringSimilarity.Utils;
namespace F23.StringSimilarity
{
public class StringSet
{
private readonly SparseBooleanVector _vector;
private readonly KShingling _ks;
public StringSet(SparseBooleanVector vector, KShingling ks)
{
_vector = vector;
_ks = ks;
}
public double JaccardSimilarity(StringSet other)
{
if (_ks != other._ks)
{
throw new Exception("Profiles were not created using the same kshingling object!");
}
return _vector.Jaccard(other._vector);
}
public double SorensenDiceSimilarity(StringSet other)
{
if (_ks != other._ks)
{
throw new Exception("Profiles were not created using the same kshingling object!");
}
return 2.0 * _vector.Intersection(other._vector) / (_vector.Size + other._vector.Size);
}
}
}