-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUTF8Utils.cpp
More file actions
126 lines (118 loc) · 2.74 KB
/
UTF8Utils.cpp
File metadata and controls
126 lines (118 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include "UTF8Utils.h"
#include <wchar.h>
#if defined _WIN32 || defined _WIN64
#include <windows.h>
#else
#include <string.h>
#include <stdlib.h>
#endif
void Gb2312ToUnicode(wchar_t *pOut, const char *gbBuffer)
{
#if defined _WIN32 || defined _WIN64
MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED, gbBuffer, 2, pOut, 1);
#else
mbtowc(pOut, gbBuffer, 2);
#endif
}
void UnicodeToUTF_8(char *pOut, wchar_t *pText)
{
char *pchar = (char *)pText;
pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);
pOut[2] = (0x80 | (pchar[0] & 0x3F));
}
std::string GB2312ToUTF_8(const char *pText, int pLen)
{
std::string out = "";
char buf[4];
memset(buf,0,4);
int i = 0;
while(i < pLen)
{
//copy directly if it is a english char
if (pText[i] >= 0)
{
char asciistr[2] = {0};
asciistr[0] = (pText[i++]);
out.append(asciistr);
}
else
{
wchar_t pbuffer;
Gb2312ToUnicode(&pbuffer, pText + i);
UnicodeToUTF_8(buf, &pbuffer);
out.append(buf);
i += 2;
}
}
return out;
}
std::wstring GB2312StrToUnicode(const char *pText, int pLen)
{
std::wstring out = L"";
int i = 0;
while (i < pLen)
{
//copy directly if it is a english char
if (pText[i] >= 0)
{
char asciistr[2] = { 0 };
asciistr[0] = (pText[i++]);
out.append((wchar_t*)asciistr);
}
else
{
wchar_t pbuffer;
Gb2312ToUnicode(&pbuffer, pText + i);
out.append(&pbuffer);
i += 2;
}
}
return out;
}
void UTF_8ToUnicode(wchar_t *pOut, const char *pText)
{
char *uchar = (char *)pOut;
uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);
}
void UnicodeToGB2312(char *pOut, wchar_t uData)
{
#if defined _WIN32 || defined _WIN64
WideCharToMultiByte(CP_ACP, NULL, &uData, 1, pOut, sizeof(wchar_t), NULL, NULL);
#else
wctomb(pOut, uData);
#endif
}
std::string UTF_8ToGB2312(const char *pText, int pLen)
{
std::string out = "";
char buf[4];
char *rst = new char[pLen + (pLen >> 2) + 2];
memset(buf, 0, 4);
memset(rst, 0, pLen + (pLen >> 2) + 2);
int i = 0;
int j = 0;
while (i < pLen)
{
if (*(pText + i) >= 0)
{
rst[j++] = pText[i++];
}
else
{
wchar_t Wtemp;
UTF_8ToUnicode(&Wtemp, pText + i);
UnicodeToGB2312(buf, Wtemp);
rst[j] = buf[0];
rst[j+1] = buf[1];
rst[j+2] = buf[2];
i += 3;
j += 2;
}
}
rst[j] = '\0';
out = rst;
delete [] rst;
return out;
}