forked from amrali-eg/EncodingChecker
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTextEncoding.cs
More file actions
96 lines (93 loc) · 3.64 KB
/
TextEncoding.cs
File metadata and controls
96 lines (93 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
using System;
using System.IO;
using System.Text;
using UtfUnknown;
namespace EncodingChecker
{
public static class TextEncoding
{
/// <summary>
/// https://netvignettes.wordpress.com/2011/07/03/how-to-detect-encoding/
/// </summary>
private static readonly DecoderExceptionFallback DecoderExceptionFallback = new DecoderExceptionFallback();
public static bool Validate(this Encoding encoding, byte[] bytes, int offset = 0, int? length = null)
{
if (encoding == null)
{
throw new ArgumentNullException(nameof(encoding));
}
if (bytes == null)
{
throw new ArgumentNullException(nameof(bytes));
}
length = length ?? bytes.Length;
if (offset < 0 || offset > bytes.Length)
{
throw new ArgumentOutOfRangeException(nameof(offset), @"Offset is out of range.");
}
if (length < 0 || length > bytes.Length)
{
throw new ArgumentOutOfRangeException(nameof(length), @"Length is out of range.");
}
else if ((offset + length) > bytes.Length)
{
throw new ArgumentOutOfRangeException(nameof(offset), @"The specified range is outside of the specified buffer.");
}
var decoder = encoding.GetDecoder();
decoder.Fallback = DecoderExceptionFallback;
try
{
decoder.GetCharCount(bytes, offset, length.Value);
}
catch (DecoderFallbackException)
{
return false;
}
return true;
}
/// <summary>
/// Get the System.Text.Encoding of this file.
/// </summary>
/// <param name="filePath">Path to file</param>
/// <returns>System.Text.Encoding (can be null if not available or not supported by .NET).</returns>
public static Encoding GetFileEncoding(string filePath, ref bool hasBOM)
{
return GetFileEncoding(filePath, null, ref hasBOM);
}
/// <summary>
/// Get the System.Text.Encoding of this file.
/// </summary>
/// <param name="filePath">Path to file</param>
/// <param name="maxBytesToRead">max bytes to read from <paramref name="filePath"/>. If <c>null</c>, then no max</param>
/// <returns>System.Text.Encoding (can be null if not available or not supported by .NET).</returns>
public static Encoding GetFileEncoding(string filePath, int? maxBytesToRead, ref bool hasBOM)
{
hasBOM = false;
try
{
using (FileStream stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
// Check for possible UTF-16 encoding (LE or BE).
Encoding encoding = Utf16Detector.DetectFromStream(stream, maxBytesToRead);
if (encoding != null)
{
return encoding;
}
// https://github.com/CharsetDetector/UTF-unknown
stream.Position = 0L;
var result = CharsetDetector.DetectFromStream(stream, maxBytesToRead);
if (result.Detected != null)
{
hasBOM = result.Detected.HasBOM;
return result.Detected.Encoding;
}
return null;
}
}
catch
{
return null;
}
}
}
}