Skip to content

Commit fab30a0

Browse files
committed
Start to ANSI Support
- Basic support for ANSI encoding with traits class - BoostRegexMatch::expand doesn't compile yet, needs help for conversion to char* - Needs refactoring as types are repeated in Traits class
1 parent 472f190 commit fab30a0

9 files changed

Lines changed: 307 additions & 143 deletions

File tree

PythonScript.Tests/PythonScript.Tests.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
</PropertyGroup>
4747
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
4848
<LinkIncremental>false</LinkIncremental>
49+
<IncludePath>$(BoostBase);$(PythonBase)\Include;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSdkDir)include;$(FrameworkSDKDir)\include;</IncludePath>
50+
<LibraryPath>$(BoostPythonLibPath);$(PythonLibPath);$(VCInstallDir)lib;$(VCInstallDir)atlmfc\lib;$(WindowsSdkDir)lib;$(FrameworkSDKDir)\lib</LibraryPath>
4951
</PropertyGroup>
5052
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
5153
<ClCompile>

PythonScript.Tests/TestRunner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ void deleteEntry(NppPythonScript::ReplaceEntry* entry)
1414
void runReplace()
1515
{
1616

17-
NppPythonScript::Replacer replacer;
17+
NppPythonScript::Replacer<NppPythonScript::Utf8CharTraits> replacer;
1818
std::list<NppPythonScript::ReplaceEntry* > entries;
1919
bool moreEntries = replacer.startReplace("aaabbbaaabb", 12, "(b+)", "x$1x", entries);
2020
//ASSERT_EQ(2, entries.size());
@@ -37,6 +37,7 @@ _CrtSetReportFile( _CRT_ASSERT, _CRTDBG_FILE_STDOUT );
3737
_CrtMemCheckpoint(&state);
3838
#endif
3939

40+
4041
::testing::InitGoogleTest(&argc, argv);
4142
RUN_ALL_TESTS();
4243

PythonScript.Tests/tests/TestReplacer.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ void deleteEntry(ReplaceEntry* entry)
3030
}
3131

3232
TEST_F(ReplacerTest, SimpleReplace) {
33-
NppPythonScript::Replacer replacer;
33+
NppPythonScript::Replacer<NppPythonScript::Utf8CharTraits> replacer;
3434
std::list<NppPythonScript::ReplaceEntry* > entries;
3535
bool moreEntries = replacer.startReplace("aaabbbaaabb", 12, "(b+)", "x$1x", entries);
3636
ASSERT_EQ(2, entries.size());
@@ -54,7 +54,7 @@ TEST_F(ReplacerTest, SimpleReplace) {
5454
/** Test ensures that the replacer is treating the text as UTF8, and not simply a byte sequence
5555
*/
5656
TEST_F(ReplacerTest, ReplaceUtf8) {
57-
NppPythonScript::Replacer replacer;
57+
NppPythonScript::Replacer<NppPythonScript::Utf8CharTraits> replacer;
5858
std::list<NppPythonScript::ReplaceEntry* > entries;
5959
bool moreEntries = replacer.startReplace("aaa\xc3\xb4" "bbbaaa\xc3\xbc" "bb", 15, "aaa([\xc3\xbc])", "x$1x", entries);
6060
ASSERT_EQ(1, entries.size());
@@ -71,7 +71,7 @@ TEST_F(ReplacerTest, ReplaceUtf8) {
7171
/** Test ensures characters outside of the BMP are matched correctly
7272
*/
7373
TEST_F(ReplacerTest, ReplaceExtendedUtf8) {
74-
NppPythonScript::Replacer replacer;
74+
NppPythonScript::Replacer<NppPythonScript::Utf8CharTraits> replacer;
7575
std::list<NppPythonScript::ReplaceEntry* > entries;
7676
bool moreEntries = replacer.startReplace("aaa\xF0\x9F\x82\xB7" "ZZZ" "bbbaaa\xF0\x9F\x82\xB8" "ZZZ", 23, "aaa([\xF0\x9F\x82\xB8])", "x$1x", entries);
7777
ASSERT_EQ(1, entries.size());
@@ -84,6 +84,27 @@ TEST_F(ReplacerTest, ReplaceExtendedUtf8) {
8484
for_each(entries.begin(), entries.end(), deleteEntry);
8585
}
8686

87+
/** Tests simple ANSI (8 bit ASCII) replacement
88+
*/
89+
TEST_F(ReplacerTest, ReplaceSimpleAnsi) {
90+
91+
NppPythonScript::Replacer<NppPythonScript::AnsiCharTraits> replacer;
92+
std::list<NppPythonScript::ReplaceEntry* > entries;
93+
bool moreEntries = replacer.startReplace("aaa\xF0\x9F" "ZZZ" "aaa\x9F\xB8" "ZZZ", 16, "aaa([\xF0\x9F])", "x$1x", entries);
94+
ASSERT_EQ(2, entries.size());
95+
std::list<NppPythonScript::ReplaceEntry*>::const_iterator it = entries.begin();
96+
ASSERT_EQ(0, (*it)->getStart());
97+
ASSERT_STREQ("x\xF0x", (*it)->getReplacement());
98+
ASSERT_EQ(3, (*it)->getReplacementLength());
99+
100+
++it;
101+
102+
ASSERT_EQ(8, (*it)->getStart());
103+
ASSERT_STREQ("x\x9Fx", (*it)->getReplacement());
104+
ASSERT_EQ(3, (*it)->getReplacementLength());
105+
106+
for_each(entries.begin(), entries.end(), deleteEntry);
87107
}
88108

89109

110+
}

PythonScript/project/PythonScript2010.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@
261261
<ItemGroup>
262262
<ClInclude Include="..\include\PythonScript\NppPythonScript.h" />
263263
<ClInclude Include="..\src\AboutDialog.h" />
264+
<ClInclude Include="..\src\ANSIIterator.h" />
264265
<ClInclude Include="..\src\ConfigFile.h" />
265266
<ClInclude Include="..\src\ConsoleDialog.h" />
266267
<ClInclude Include="..\src\ConsoleInterface.h" />

PythonScript/project/PythonScript2010.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,9 @@
239239
<ClInclude Include="..\src\ReplacementContainer.h">
240240
<Filter>Header Files</Filter>
241241
</ClInclude>
242+
<ClInclude Include="..\src\ANSIIterator.h">
243+
<Filter>Source Files</Filter>
244+
</ClInclude>
242245
</ItemGroup>
243246
<ItemGroup>
244247
<ResourceCompile Include="..\res\PythonScript.rc">

PythonScript/src/ANSIIterator.h

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#ifndef ANSIITERATOR_H_1245813891234891238
2+
#define ANSIITERATOR_H_1245813891234891238
3+
4+
5+
6+
7+
8+
class ANSIIterator : public std::iterator<std::bidirectional_iterator_tag, char>
9+
{
10+
public:
11+
ANSIIterator(const char* doc = NULL, int pos = 0, int end = 0) :
12+
_doc(doc),
13+
_pos(pos),
14+
_end(end)
15+
{
16+
// Check for debug builds
17+
assert(_pos <= _end);
18+
// Ensure for release.
19+
if (_pos > _end)
20+
_pos = _end;
21+
22+
}
23+
24+
ANSIIterator(const ANSIIterator& copy) :
25+
_doc(copy._doc),
26+
_pos(copy._pos),
27+
_end(copy._end)
28+
{
29+
}
30+
31+
bool operator == (const ANSIIterator& other) const
32+
{
33+
return (ended() == other.ended()) && (_doc == other._doc) && (_pos == other._pos);
34+
}
35+
36+
bool operator != (const ANSIIterator& other) const
37+
{
38+
return !(*this == other);
39+
}
40+
41+
char operator * () const
42+
{
43+
return _doc[_pos];
44+
}
45+
46+
ANSIIterator& operator = (int pos)
47+
{
48+
_pos = pos;
49+
return *this;
50+
}
51+
52+
ANSIIterator& operator ++ ()
53+
{
54+
assert(_pos < _end);
55+
++_pos;
56+
if (ended())
57+
_pos = _end;
58+
return *this;
59+
}
60+
61+
ANSIIterator& operator -- ()
62+
{
63+
assert(_pos > 0);
64+
--_pos;
65+
if (_pos < 0) {
66+
_pos = 0;
67+
}
68+
return *this;
69+
}
70+
71+
int pos() const
72+
{
73+
return _pos;
74+
}
75+
76+
private:
77+
78+
bool ended() const
79+
{
80+
return bytesLeft() <= 0;
81+
}
82+
83+
int bytesLeft() const
84+
{
85+
return _end - _pos;
86+
}
87+
88+
const char* _doc;
89+
int _pos;
90+
int _end;
91+
};
92+
93+
#endif // ANSIITERATOR_H_1245813891234891238

PythonScript/src/Replacer.cpp

Lines changed: 0 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -19,145 +19,7 @@ using UtfConversion::toStringType;
1919
typedef boost::basic_regex<U32, u32_regex_traits> u32_regex;
2020
typedef boost::regex_iterator<UTF8Iterator, U32, u32_regex_traits> u32_regex_iterator;
2121

22-
class BoostRegexGroupDetail : public GroupDetail
23-
{
24-
public:
25-
BoostRegexGroupDetail(const boost::sub_match<UTF8Iterator>& subMatch)
26-
: m_subMatch(subMatch)
27-
{}
28-
int start() const { return m_subMatch.first.pos(); }
29-
int end() const { return m_subMatch.second.pos(); }
30-
31-
private:
32-
boost::sub_match<UTF8Iterator> m_subMatch;
33-
};
34-
35-
36-
class BoostRegexMatch : public Match
37-
{
38-
public:
39-
BoostRegexMatch(const char *text, boost::match_results<UTF8Iterator>* match)
40-
: m_text(text),
41-
m_match(match)
42-
{}
43-
44-
BoostRegexMatch(const char *text)
45-
: m_text(text),
46-
m_match(NULL)
47-
{}
48-
49-
virtual ~BoostRegexMatch();
50-
BoostRegexMatch& operator= (BoostRegexMatch& rhs) {
51-
m_text = rhs.m_text;
52-
m_match = rhs.m_match;
53-
54-
/* We explicitely don't copy the list, as the allocatedGroupDetails will simply be destructed when this object gets destroyed.
55-
* In theory, this would be bad, as we would delete the allocated GroupDetail objects when this object is deleted,
56-
* even though the various groups may still be in use.
57-
* In practice however, these GroupDetails don't actually live as long as this object, as we've created a ReplaceEntry
58-
* by the time this object gets destroyed, and have no need for the allocated GroupDetails any more.
59-
*/
60-
}
61-
62-
void setMatchResults(boost::match_results<UTF8Iterator>* match) { m_match = match; }
63-
64-
virtual int groupCount() { return m_match->size(); }
65-
66-
virtual GroupDetail* group(int groupNo);
67-
virtual GroupDetail* groupName(const char *groupName);
68-
virtual void expand(const char* format, char **result, int *resultLength);
69-
70-
private:
71-
const char *m_text;
72-
boost::match_results<UTF8Iterator>* m_match;
73-
std::list<BoostRegexGroupDetail*> m_allocatedGroupDetails;
74-
75-
static void deleteEntry(BoostRegexGroupDetail*);
76-
};
77-
78-
void BoostRegexMatch::deleteEntry(BoostRegexGroupDetail* entry)
79-
{
80-
delete entry;
81-
}
82-
83-
BoostRegexMatch::~BoostRegexMatch()
84-
{
85-
for_each(m_allocatedGroupDetails.begin(), m_allocatedGroupDetails.end(), deleteEntry);
86-
}
87-
88-
GroupDetail* BoostRegexMatch::group(int groupNo)
89-
{
90-
BoostRegexGroupDetail* groupDetail = new BoostRegexGroupDetail((*m_match)[groupNo]);
91-
m_allocatedGroupDetails.push_back(groupDetail);
92-
return groupDetail;
93-
}
94-
95-
GroupDetail* BoostRegexMatch::groupName(const char *groupName)
96-
{
97-
u32string groupNameU32 = toStringType<u32string>(ConstString<char>(groupName));
98-
BoostRegexGroupDetail* groupDetail = new BoostRegexGroupDetail((*m_match)[groupNameU32.c_str()]);
99-
m_allocatedGroupDetails.push_back(groupDetail);
100-
return groupDetail;
101-
}
102-
103-
void BoostRegexMatch::expand(const char *format, char **result, int *resultLength)
104-
{
105-
u32string resultString = m_match->format(format);
106-
107-
// TODO: There's probably more copying, allocing and deleting going on here than there actually needs to be
108-
// We just want a u32string to utf8 char*
109-
u8string utf8result(UtfConversion::toUtf8(ConstString<U32>(resultString)));
110-
111-
*resultLength = utf8result.size();
112-
*result = new char[(*resultLength) + 1];
113-
memcpy(*result, utf8result.c_str(), *resultLength);
114-
(*result)[*resultLength] = '\0';
115-
}
116-
11722

118-
ReplaceEntry* NppPythonScript::Replacer::matchToReplaceEntry(const char * /* text */, Match *match, void *state)
119-
{
120-
// TODO: state is replacer instance, and contains the replacement string
121-
// need to add format call in here,
122-
Replacer *replacer = reinterpret_cast<Replacer*>(state);
123-
char *replacement;
124-
int replacementLength;
125-
match->expand(replacer->m_replaceFormat, &replacement, &replacementLength);
126-
127-
GroupDetail *fullMatch = match->group(0);
128-
ReplaceEntry* replaceEntry = new ReplaceEntry(fullMatch->start(), fullMatch->end(), replacement, replacementLength);
129-
delete [] replacement;
130-
return replaceEntry;
131-
}
132-
133-
bool NppPythonScript::Replacer::startReplace(const char *text, const int textLength, const char *search,
134-
const char *replace,
135-
std::list<ReplaceEntry*>& replacements)
136-
{
137-
m_replaceFormat = replace;
138-
return startReplace(text, textLength, search, matchToReplaceEntry, this, replacements);
139-
}
140-
141-
bool NppPythonScript::Replacer::startReplace(const char *text, const int textLength, const char *search,
142-
matchConverter converter,
143-
void *converterState,
144-
std::list<ReplaceEntry*> &replacements) {
145-
146-
u32_regex r = u32_regex(toStringType<u32string>(ConstString<char>(search)));
147-
UTF8Iterator start(text, 0, textLength);
148-
UTF8Iterator end(text, textLength, textLength);
149-
u32_regex_iterator iteratorEnd;
150-
BoostRegexMatch match(text);
151-
for(u32_regex_iterator it(start, end, r); it != iteratorEnd; ++it) {
152-
boost::match_results<UTF8Iterator> boost_match_results(*it);
153-
154-
match.setMatchResults(&boost_match_results);
155-
ReplaceEntry* entry = converter(text, &match, converterState);
156-
replacements.push_back(entry);
157-
}
158-
159-
return false;
160-
}
16123

16224
}
16325
#endif

0 commit comments

Comments
 (0)