Skip to content

Commit 4391f08

Browse files
committed
Tokenizer: Add special tokenize method for the Preprocessor with only basic simplifications
1 parent 0a88a13 commit 4391f08

4 files changed

Lines changed: 175 additions & 28 deletions

File tree

lib/preprocessor.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,8 +1335,7 @@ std::list<std::string> Preprocessor::getcfgs(const std::string &filedata, const
13351335

13361336
if (s.find("&&") != std::string::npos) {
13371337
Tokenizer tokenizer(_settings, _errorLogger);
1338-
std::istringstream tempIstr(s);
1339-
if (!tokenizer.tokenize(tempIstr, filename.c_str(), "", true)) {
1338+
if (!tokenizer.tokenizeCondition(s)) {
13401339
std::ostringstream lineStream;
13411340
lineStream << __LINE__;
13421341

@@ -1444,8 +1443,7 @@ void Preprocessor::simplifyCondition(const std::map<std::string, std::string> &c
14441443
{
14451444
const Settings settings;
14461445
Tokenizer tokenizer(&settings, _errorLogger);
1447-
std::istringstream istr("(" + condition + ")");
1448-
if (!tokenizer.tokenize(istr, "", "", true)) {
1446+
if (!tokenizer.tokenizeCondition("(" + condition + ")")) {
14491447
// If tokenize returns false, then there is syntax error in the
14501448
// code which we can't handle. So stop here.
14511449
return;
@@ -1505,8 +1503,7 @@ void Preprocessor::simplifyCondition(const std::map<std::string, std::string> &c
15051503
if (!it->second.empty()) {
15061504
// Tokenize the value
15071505
Tokenizer tokenizer2(&settings,NULL);
1508-
std::istringstream istr2(it->second);
1509-
tokenizer2.tokenize(istr2,"","",true);
1506+
tokenizer2.tokenizeCondition(it->second);
15101507

15111508
// Copy the value tokens
15121509
std::stack<Token *> link;

lib/tokenize.cpp

Lines changed: 162 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,8 +1554,7 @@ void Tokenizer::simplifyMulAndParens()
15541554

15551555
bool Tokenizer::tokenize(std::istream &code,
15561556
const char FileName[],
1557-
const std::string &configuration,
1558-
const bool preprocessorCondition)
1557+
const std::string &configuration)
15591558
{
15601559
// make sure settings specified
15611560
assert(_settings);
@@ -1743,11 +1742,9 @@ bool Tokenizer::tokenize(std::istream &code,
17431742
// ";a+=b;" => ";a=a+b;"
17441743
simplifyCompoundAssignment();
17451744

1746-
if (!preprocessorCondition) {
1747-
if (hasComplicatedSyntaxErrorsInTemplates()) {
1748-
list.deallocateTokens();
1749-
return false;
1750-
}
1745+
if (hasComplicatedSyntaxErrorsInTemplates()) {
1746+
list.deallocateTokens();
1747+
return false;
17511748
}
17521749

17531750
simplifyDefaultAndDeleteInsideClass();
@@ -2002,19 +1999,17 @@ bool Tokenizer::tokenize(std::istream &code,
20021999
// Split up variable declarations.
20032000
simplifyVarDecl(false);
20042001

2005-
if (!preprocessorCondition) {
2006-
if (m_timerResults) {
2007-
Timer t("Tokenizer::tokenize::setVarId", _settings->_showtime, m_timerResults);
2008-
setVarId();
2009-
} else {
2010-
setVarId();
2011-
}
2002+
if (m_timerResults) {
2003+
Timer t("Tokenizer::tokenize::setVarId", _settings->_showtime, m_timerResults);
2004+
setVarId();
2005+
} else {
2006+
setVarId();
2007+
}
20122008

2013-
createLinks2();
2009+
createLinks2();
20142010

2015-
// Change initialisation of variable to assignment
2016-
simplifyInitVar();
2017-
}
2011+
// Change initialisation of variable to assignment
2012+
simplifyInitVar();
20182013

20192014
// Convert e.g. atol("0") into 0
20202015
simplifyMathFunctions();
@@ -2040,6 +2035,155 @@ bool Tokenizer::tokenize(std::istream &code,
20402035
}
20412036
//---------------------------------------------------------------------------
20422037

2038+
bool Tokenizer::tokenizeCondition(const std::string &code) {
2039+
assert(_settings);
2040+
2041+
// Fill the map _typeSize..
2042+
_typeSize.clear();
2043+
_typeSize["char"] = 1;
2044+
_typeSize["bool"] = _settings->sizeof_bool;
2045+
_typeSize["short"] = _settings->sizeof_short;
2046+
_typeSize["int"] = _settings->sizeof_int;
2047+
_typeSize["long"] = _settings->sizeof_long;
2048+
_typeSize["float"] = _settings->sizeof_float;
2049+
_typeSize["double"] = _settings->sizeof_double;
2050+
_typeSize["wchar_t"] = _settings->sizeof_wchar_t;
2051+
_typeSize["size_t"] = _settings->sizeof_size_t;
2052+
_typeSize["*"] = _settings->sizeof_pointer;
2053+
2054+
{
2055+
std::istringstream istr(code);
2056+
if (!list.createTokens(istr, "")) {
2057+
cppcheckError(0);
2058+
return false;
2059+
}
2060+
}
2061+
2062+
// Combine wide strings
2063+
for (Token *tok = list.front();
2064+
tok;
2065+
tok = tok->next()) {
2066+
while (tok->str() == "L" && tok->next() && tok->next()->type() == Token::eString) {
2067+
// Combine 'L "string"'
2068+
tok->str(tok->next()->str());
2069+
tok->deleteNext();
2070+
tok->isLong(true);
2071+
}
2072+
}
2073+
2074+
// Combine strings
2075+
for (Token *tok = list.front();
2076+
tok;
2077+
tok = tok->next()) {
2078+
if (tok->str()[0] != '"')
2079+
continue;
2080+
2081+
tok->str(simplifyString(tok->str()));
2082+
while (tok->next() && tok->next()->type() == Token::eString) {
2083+
tok->next()->str(simplifyString(tok->next()->str()));
2084+
2085+
// Two strings after each other, combine them
2086+
tok->concatStr(tok->next()->str());
2087+
tok->deleteNext();
2088+
}
2089+
}
2090+
2091+
// Remove "volatile", "inline", "register", and "restrict"
2092+
simplifyKeyword();
2093+
2094+
// convert platform dependent types to standard types
2095+
// 32 bits: size_t -> unsigned long
2096+
// 64 bits: size_t -> unsigned long long
2097+
simplifyPlatformTypes();
2098+
2099+
// collapse compound standard types into a single token
2100+
// unsigned long long int => long _isUnsigned=true,_isLong=true
2101+
simplifyStdType();
2102+
2103+
// Concatenate double sharp: 'a ## b' -> 'ab'
2104+
concatenateDoubleSharp();
2105+
2106+
if (!createLinks()) {
2107+
// Source has syntax errors, can't proceed
2108+
return false;
2109+
}
2110+
2111+
// replace 'NULL' and similar '0'-defined macros with '0'
2112+
simplifyNull();
2113+
2114+
// replace 'sin(0)' to '0' and other similar math expressions
2115+
simplifyMathExpressions();
2116+
2117+
// combine "- %num%"
2118+
concatenateNegativeNumberAndAnyPositive();
2119+
2120+
// simplify simple calculations
2121+
for (Token *tok = list.front() ? list.front()->next() : NULL;
2122+
tok;
2123+
tok = tok->next()) {
2124+
if (tok->isNumber())
2125+
TemplateSimplifier::simplifyNumericCalculations(tok->previous());
2126+
}
2127+
2128+
// Combine tokens..
2129+
for (Token *tok = list.front();
2130+
tok && tok->next();
2131+
tok = tok->next()) {
2132+
const char c1 = tok->str()[0];
2133+
2134+
if (tok->str().length() == 1 && tok->next()->str().length() == 1) {
2135+
const char c2 = tok->next()->str()[0];
2136+
2137+
// combine +-*/ and =
2138+
if (c2 == '=' && (std::strchr("+-*/%&|^=!<>", c1))) {
2139+
tok->str(tok->str() + c2);
2140+
tok->deleteNext();
2141+
continue;
2142+
}
2143+
2144+
// replace "->" with "."
2145+
else if (c1 == '-' && c2 == '>') {
2146+
tok->str(".");
2147+
tok->deleteNext();
2148+
continue;
2149+
}
2150+
}
2151+
2152+
else if (tok->str() == ">>" && tok->next()->str() == "=") {
2153+
tok->str(">>=");
2154+
tok->deleteNext();
2155+
}
2156+
2157+
else if (tok->str() == "<<" && tok->next()->str() == "=") {
2158+
tok->str("<<=");
2159+
tok->deleteNext();
2160+
}
2161+
2162+
else if ((c1 == 'p' || c1 == '_') && tok->next()->str() == ":" && tok->strAt(2) != ":") {
2163+
if (tok->str() == "private" || tok->str() == "protected" || tok->str() == "public" || tok->str() == "__published") {
2164+
tok->str(tok->str() + ":");
2165+
tok->deleteNext();
2166+
continue;
2167+
}
2168+
}
2169+
}
2170+
2171+
simplifyRedundantParentheses();
2172+
for (Token *tok = list.front();
2173+
tok;
2174+
tok = tok->next())
2175+
while (TemplateSimplifier::simplifyNumericCalculations(tok));
2176+
2177+
while (simplifyLogicalOperators()) { }
2178+
2179+
// Convert e.g. atol("0") into 0
2180+
simplifyMathFunctions();
2181+
2182+
simplifyDoublePlusAndDoubleMinus();
2183+
2184+
return true;
2185+
}
2186+
20432187
bool Tokenizer::hasComplicatedSyntaxErrorsInTemplates()
20442188
{
20452189
const Token *tok = TemplateSimplifier::hasComplicatedSyntaxErrorsInTemplates(list.front());

lib/tokenize.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,14 @@ class CPPCHECKLIB Tokenizer {
8989
*/
9090
bool tokenize(std::istream &code,
9191
const char FileName[],
92-
const std::string &configuration = "",
93-
const bool preprocessorCondition = false);
92+
const std::string &configuration = "");
93+
94+
/**
95+
* tokenize condition and run simple simplifications on it
96+
* @param code code
97+
* @return true if success.
98+
*/
99+
bool tokenizeCondition(const std::string &code);
94100

95101
/** Set variable id */
96102
void setVarId();

test/testtokenize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5285,7 +5285,7 @@ class TestTokenizer : public TestFixture {
52855285
Settings settings;
52865286
Tokenizer tokenizer(&settings, this);
52875287
std::istringstream istr(code);
5288-
tokenizer.tokenize(istr, "test.cpp", "", false);
5288+
tokenizer.tokenize(istr, "test.cpp", "");
52895289
ASSERT_EQUALS(true, tokenizer.validate());
52905290
}
52915291

@@ -5296,7 +5296,7 @@ class TestTokenizer : public TestFixture {
52965296
Settings settings;
52975297
Tokenizer tokenizer(&settings, this);
52985298
std::istringstream istr(code);
5299-
tokenizer.tokenize(istr, "test.cpp", "", false);
5299+
tokenizer.tokenize(istr, "test.cpp", "");
53005300
ASSERT_EQUALS(true, tokenizer.validate());
53015301
}
53025302

0 commit comments

Comments
 (0)