// // docopt.cpp // docopt // // Created by Jared Grubb on 2013-11-03. // Copyright (c) 2013 Jared Grubb. All rights reserved. // #include "docopt.h" #include "docopt_util.h" #include "docopt_private.h" #include "docopt_value.h" #include #include #include #include #include #include #include #include using namespace docopt; DocoptExitHelp::DocoptExitHelp() : std::runtime_error("Docopt --help argument encountered") {} DocoptExitVersion::DocoptExitVersion() : std::runtime_error("Docopt --version argument encountered") {} const char* value::kindAsString(Kind kind) { switch (kind) { case Kind::Empty: return "empty"; case Kind::Bool: return "bool"; case Kind::Long: return "long"; case Kind::String: return "string"; case Kind::StringList: return "string-list"; } return "unknown"; } void value::throwIfNotKind(Kind expected) const { if (kind == expected) return; std::string error = "Illegal cast to "; error += kindAsString(expected); error += "; type is actually "; error += kindAsString(kind); throw std::runtime_error(std::move(error)); } std::ostream& docopt::operator<<(std::ostream& os, value const& val) { if (val.isBool()) { bool b = val.asBool(); std::cout << (b ? "true" : "false"); } else if (val.isLong()) { long v = val.asLong(); std::cout << v; } else if (val.isString()) { std::string const& str = val.asString(); std::cout << '"' << str << '"'; } else if (val.isStringList()) { auto const& list = val.asStringList(); std::cout << "["; bool first = true; for(auto const& el : list) { if (first) { first = false; } else { std::cout << ", "; } std::cout << '"' << el << '"'; } std::cout << "]"; } else { std::cout << "null"; } return os; } #pragma mark - #pragma mark Pattern types std::vector Pattern::leaves() { std::vector ret; collect_leaves(ret); return ret; } bool Required::match(PatternList& left, std::vector>& collected) const { auto l = left; auto c = collected; for(auto const& pattern : fChildren) { bool ret = pattern->match(l, c); if (!ret) { // leave (left, collected) untouched return false; } } left = std::move(l); collected = std::move(c); return true; } bool LeafPattern::match(PatternList& left, std::vector>& collected) const { auto match = single_match(left); if (!match.second) { return false; } left.erase(left.begin()+match.first); auto same_name = std::find_if(collected.begin(), collected.end(), [&](std::shared_ptr const& p) { return p->name()==name(); }); if (getValue().isLong()) { long val = 1; if (same_name == collected.end()) { collected.push_back(match.second); match.second->setValue(value{val}); } else if ((**same_name).getValue().isLong()) { val += (**same_name).getValue().asLong(); (**same_name).setValue(value{val}); } else { (**same_name).setValue(value{val}); } } else if (getValue().isStringList()) { std::vector val; if (match.second->getValue().isString()) { val.push_back(match.second->getValue().asString()); } else if (match.second->getValue().isStringList()) { val = match.second->getValue().asStringList(); } else { /// cant be!? } if (same_name == collected.end()) { collected.push_back(match.second); match.second->setValue(value{val}); } else if ((**same_name).getValue().isStringList()) { std::vector const& list = (**same_name).getValue().asStringList(); val.insert(val.begin(), list.begin(), list.end()); (**same_name).setValue(value{val}); } else { (**same_name).setValue(value{val}); } } else { collected.push_back(match.second); } return true; } Option Option::parse(std::string const& option_description) { std::string shortOption, longOption; int argcount = 0; value val { false }; auto double_space = option_description.find(" "); auto options_end = option_description.end(); if (double_space != std::string::npos) { options_end = option_description.begin() + double_space; } static const std::regex pattern {"(--|-)?(.*?)([,= ]|$)"}; for(std::sregex_iterator i {option_description.begin(), options_end, pattern, std::regex_constants::match_not_null}, e{}; i != e; ++i) { std::smatch const& match = *i; if (match[1].matched) { // [1] is optional. if (match[1].length()==1) { shortOption = "-" + match[2].str(); } else { longOption = "--" + match[2].str(); } } else if (match[2].length() > 0) { // [2] always matches. std::string m = match[2]; argcount = 1; } else { // delimeter } if (match[3].length() == 0) { // [3] always matches. // Hit end of string. For some reason 'match_not_null' will let us match empty // at the end, and then we'll spin in an infinite loop. So, if we hit an empty // match, we know we must be at the end. break; } } if (argcount) { std::smatch match; if (std::regex_search(options_end, option_description.end(), match, std::regex{"\\[default: (.*)\\]", std::regex::icase})) { val = match[1].str(); } } return {std::move(shortOption), std::move(longOption), argcount, std::move(val)}; } bool OneOrMore::match(PatternList& left, std::vector>& collected) const { assert(fChildren.size() == 1); auto l = left; auto c = collected; bool matched = true; size_t times = 0; decltype(l) l_; bool firstLoop = true; while (matched) { // could it be that something didn't match but changed l or c? matched = fChildren[0]->match(l, c); if (matched) ++times; if (firstLoop) { firstLoop = false; } else if (l == l_) { break; } l_ = l; } if (times == 0) { return false; } left = std::move(l); collected = std::move(c); return true; } bool Either::match(PatternList& left, std::vector>& collected) const { using Outcome = std::pair>>; std::vector outcomes; for(auto const& pattern : fChildren) { // need a copy so we apply the same one for every iteration auto l = left; auto c = collected; bool matched = pattern->match(l, c); if (matched) { outcomes.emplace_back(std::move(l), std::move(c)); } } auto min = std::min_element(outcomes.begin(), outcomes.end(), [](Outcome const& o1, Outcome const& o2) { return o1.first.size() < o2.first.size(); }); if (min == outcomes.end()) { // (left, collected) unchanged return false; } std::tie(left, collected) = std::move(*min); return true; } std::pair> Argument::single_match(PatternList const& left) const { std::pair> ret {}; for(size_t i = 0, size = left.size(); i < size; ++i) { auto arg = dynamic_cast(left[i].get()); if (arg) { ret.first = i; ret.second = std::make_shared(name(), arg->getValue()); break; } } return ret; } std::pair> Command::single_match(PatternList const& left) const { std::pair> ret {}; for(size_t i = 0, size = left.size(); i < size; ++i) { auto arg = dynamic_cast(left[i].get()); if (arg) { if (name() == arg->getValue()) { ret.first = i; ret.second = std::make_shared(name(), value{true}); } break; } } return ret; } std::pair> Option::single_match(PatternList const& left) const { std::pair> ret {}; for(size_t i = 0, size = left.size(); i < size; ++i) { auto leaf = std::dynamic_pointer_cast(left[i]); if (leaf && name() == leaf->name()) { ret.first = i; ret.second = leaf; break; } } return ret; } #pragma mark - #pragma mark Parsing stuff std::vector transform(PatternList pattern); void BranchPattern::fix_repeating_arguments() { std::vector either = transform(children()); for(auto const& group : either) { // use multiset to help identify duplicate entries std::unordered_multiset, PatternHasher> group_set {group.begin(), group.end()}; for(auto const& e : group_set) { if (group_set.count(e) == 1) continue; LeafPattern* leaf = dynamic_cast(e.get()); if (!leaf) continue; bool ensureList = false; bool ensureInt = false; if (dynamic_cast(leaf)) { ensureInt = true; } else if (dynamic_cast(leaf)) { ensureList = true; } else if (Option* o = dynamic_cast(leaf)) { if (o->argCount()) { ensureList = true; } else { ensureInt = true; } } if (ensureList) { std::vector newValue; if (leaf->getValue().isString()) { newValue = split(leaf->getValue().asString()); } if (!leaf->getValue().isStringList()) { leaf->setValue(value{newValue}); } } else if (ensureInt) { leaf->setValue(value{0}); } } } } std::vector transform(PatternList pattern) { std::vector result; std::vector groups; groups.emplace_back(std::move(pattern)); while(!groups.empty()) { // pop off the first element auto children = std::move(groups[0]); groups.erase(groups.begin()); // find the first branch node in the list auto child_iter = std::find_if(children.begin(), children.end(), [](std::shared_ptr const& p) { return dynamic_cast(p.get()); }); // no branch nodes left : expansion is complete for this grouping if (child_iter == children.end()) { result.emplace_back(std::move(children)); continue; } // pop the child from the list auto child = std::move(*child_iter); children.erase(child_iter); // expand the branch in the appropriate way if (Either* either = dynamic_cast(child.get())) { // "[e] + children" for each child 'e' in Either for(auto const& eitherChild : either->children()) { PatternList group = { eitherChild }; group.insert(group.end(), children.begin(), children.end()); groups.emplace_back(std::move(group)); } } else if (OneOrMore* oneOrMore = dynamic_cast(child.get())) { // child.children * 2 + children auto const& subchildren = oneOrMore->children(); PatternList group = subchildren; group.insert(group.end(), subchildren.begin(), subchildren.end()); group.insert(group.end(), children.begin(), children.end()); groups.emplace_back(std::move(group)); } else { // Required, Optional, OptionsShortcut BranchPattern* branch = dynamic_cast(child.get()); // child.children + children PatternList group = branch->children(); group.insert(group.end(), children.begin(), children.end()); groups.emplace_back(std::move(group)); } } return result; } class Tokens { public: Tokens(std::vector tokens, bool isParsingArgv = true) : fTokens(std::move(tokens)), fIsParsingArgv(isParsingArgv) {} explicit operator bool() const { return fIndex < fTokens.size(); } static Tokens from_pattern(std::string const& source) { static const std::regex re_separators { "(?:\\s*)" // any spaces (non-matching subgroup) "(" "[\\[\\]\\(\\)\\|]" // one character of brackets or parens or pipe character "|" "\\.\\.\\." // elipsis ")" }; static const std::regex re_strings { "(?:\\s*)" // any spaces (non-matching subgroup) "(" "\\S*<.*?>" // strings, but make sure to keep "< >" strings together "|" "\\S+" // string without <> ")" }; // We do two stages of regex matching. The '[]()' and '...' are strong delimeters // and need to be split out anywhere they occur (even at the end of a token). We // first split on those, and then parse the stuff between them to find the string // tokens. This is a little harder than the python version, since they have regex.split // and we dont have anything like that. std::vector tokens; std::for_each(std::sregex_iterator{ source.begin(), source.end(), re_separators }, std::sregex_iterator{}, [&](std::smatch const& match) { // handle anything before the separator (this is the "stuff" between the delimeters) if (match.prefix().matched) { std::for_each(std::sregex_iterator{match.prefix().first, match.prefix().second, re_strings}, std::sregex_iterator{}, [&](std::smatch const& m) { tokens.push_back(m[1].str()); }); } // handle the delimter token itself if (match[1].matched) { tokens.push_back(match[1].str()); } }); return Tokens(tokens, false); } std::string const& current() const { if (*this) return fTokens[fIndex]; static std::string const empty; return empty; } std::string the_rest() const { if (!*this) return {}; return join(fTokens.begin()+fIndex, fTokens.end(), " "); } std::string pop() { return std::move(fTokens.at(fIndex++)); } bool isParsingArgv() const { return fIsParsingArgv; } struct OptionError : std::runtime_error { using runtime_error::runtime_error; }; private: std::vector fTokens; size_t fIndex = 0; bool fIsParsingArgv; }; // Get all instances of 'T' from the pattern template std::vector flat_filter(Pattern& pattern) { std::vector flattened = pattern.flat([](Pattern const* p) -> bool { return dynamic_cast(p); }); // now, we're guaranteed to have T*'s, so just use static_cast std::vector ret; std::transform(flattened.begin(), flattened.end(), std::back_inserter(ret), [](Pattern* p) { return static_cast(p); }); return ret; } std::vector parse_section(std::string const& name, std::string const& source) { // ECMAScript regex only has "?=" for a non-matching lookahead. In order to make sure we always have // a newline to anchor our matching, we have to avoid matching the final newline of each grouping. // Therefore, our regex is adjusted from the docopt Python one to use ?= to match the newlines before // the following lines, rather than after. std::regex const re_section_pattern { "(?:^|\\n)" // anchored at a linebreak (or start of string) "(" "[^\\n]*" + name + "[^\\n]*(?=\\n?)" // a line that contains the name "(?:\\n[ \\t].*?(?=\\n|$))*" // followed by any number of lines that are indented ")", std::regex::icase }; std::vector ret; std::for_each(std::sregex_iterator(source.begin(), source.end(), re_section_pattern), std::sregex_iterator(), [&](std::smatch const& match) { ret.push_back(trim(match[1].str())); }); return ret; } bool is_argument_spec(std::string const& token) { if (token.empty()) return false; if (token[0]=='<' && token[token.size()-1]=='>') return true; if (std::all_of(token.begin(), token.end(), &::isupper)) return true; return false; } template std::vector longOptions(I iter, I end) { std::vector ret; std::transform(iter, end, std::back_inserter(ret), [](typename I::reference opt) { return opt->longOption(); }); return ret; } PatternList parse_long(Tokens& tokens, std::vector