string.cpp
#include <string> #include <sstream> #include "string.h" // Example: // numberToString(69); template <typename T> std::string numberToString(T pNumber) { std::ostringstream oOStrStream; oOStrStream << pNumber; return oOStrStream.str(); } #include <iostream> #include <regex> // Returns all occurences of the regex within the string. // // Example: // std::string regex = "([A-Z]+)([\\d]+)"; // std::string ss = "aaaMAY14bbbJUNE4"; // // Returns: // [0]=MAY14# // [1]=JUNE4# std::vector<std::string> string_find(const std::string& s, const std::string& regex) { std::vector<std::string> result; std::regex reg(regex); //std::sregex_token_iterator it(s.begin(), s.end(), reg, { 1, 2, 3, 4, 5, 6, 7, 8, 9 }); //std::sregex_token_iterator it(s.begin(), s.end(), reg, { 1, 0 }); // The 4th param indicates: // -1 would indicate to return all none-occurences. // 0 indicates to return all occurences found. // 1 would return all the 1st sub-expression occurences. // 2 would return all the 2nd sub-expression occurences. // 3... std::sregex_token_iterator it(s.begin(), s.end(), reg, 0); std::sregex_token_iterator reg_end; for (int i=0; it != reg_end; ++it, i++) { //std::cout << "[" << i << "]=" << it->str() << "#" << std::endl; //std::cout << "[" << i << "]=" << *it << "#" << std::endl; result.push_back(*it); } return result; } // Replaces all occurences of the regex within the replacement string. // // Parameters: // // replacement: // The replacement string may contain references of the form $n. Every such reference will be replaced by the // text captured by the n'th parenthesized pattern. // n can be from 0 to 99, and $0 refers to the text matched by the whole pattern. // // This may include format specifiers and escape sequences that are replaced by the characters they represent. // // For format_default, the possible specifiers are: // $n n-th backreference(i.e., a copy of the n-th matched group specified with parentheses in the regex pattern). // n must be an integer value designating a valid backreference, greater than 0, and of two digits at most. // $& A copy of the entire match // $` The prefix(i.e., the part of the target sequence that precedes the match). // $' The suffix(i.e., the part of the target sequence that follows the match). // $$ A single $ character. // // flags: // One or more of these constants can be combined (using the bitwise OR operator, |) to // form a valid bitmask value of type regex_constants::match_flag_type: // // flag effects notes // ------------------ // match_default Default Default matching behavior. This constant has a value of zero**. // match_not_bol Not Beginning-Of-Line The first character is not considered a beginning of line("^" does not match). // match_not_eol Not End-Of-Line The last character is not considered an end of line("$" does not match). // match_not_bow Not Beginning-Of - Word The escape sequence "\b" does not match as a beginning-of-word. // match_not_eow Not End-Of-Word The escape sequence "\b" does not match as an end-of-word. // match_any Any match Any match is acceptable if more than one match is possible. // match_not_null Not null Empty sequences do not match. // match_continuous Continuous The expression must match a sub-sequence that begins at the first character. // Sub-sequences must begin at the first character to match. // match_prev_avail Previous Available One or more characters exist before the first one. (match_not_bol and match_not_bow are ignored). // format_default Default formatting Uses the standard formatting rules to replace matches(those used by ECMAScript's replace method). // This constant has a value of zero**. // format_sed sed formatting Uses the same rules as the sed utility in POSIX to replace matches. // format_no_copy No copy The sections in the target sequence that do not match the regular expression are not copied when replacing matches. // format_first_only First only Only the first occurrence of a regular expression is replaced. // // NOTE: ** Constants with a value of zero are ignored if some other flag is set. // // Example: // std::string s("This is a catfish"); // std::string regex("(cat)"); // std::string replacement("(dog)"); // // result = string_replace(ss, regex, "dog"); // // Returns: // This is a dogfish. // // Example2: // std::string regex("([A-Za-z]+)&([A-Za-z]+)"); // Find word&word // std::string replacement = "$2&$1"; // Switch order. // // result = string_replace(s, regex, replacement); // // Example3: // std::string s = "April 15, 2003"; // std::string regex = "(\\w+) (\\d+), (\\d+)"; // std::string result = string_replace(ss, regex, "$011,$3"); // // Returns: // April1,2003. // // NOTE: Isolated $1 backreferences. // The $011 says to use $01, or the 1st regex match. // If $11 was used, the system would try to use the 11th regex match. // This only works because the limit of set to 99 maximum matches. // // Example4: // result = string_replace(ss, regex, "dog", std::regex_constants::format_first_only); std::string string_replace(const std::string& s, const std::string& regex, const std::string& replacement, std::regex_constants::match_flag_type flags) { std::string result = s; std::regex reg(regex); // using string/c-string (3) version: result = std::regex_replace(result, reg, replacement, flags); /* // using string/c-string (3) version: std::cout << std::regex_replace(s3, e, "sub-$2"); // using range/c-string (6) version: std::string result2; std::regex_replace(std::back_inserter(result2), s3.begin(), s3.end(), e, "$2"); std::cout << result2; // with flags: std::cout << std::regex_replace(s3, e, "$1 and $2", std::regex_constants::format_no_copy); std::cout << std::endl; */ return result; } // Replaces all occurences of the regex within the replacement string. // // Parameters: // // replacement: // The replacement string may contain references of the form $n. Every such reference will be replaced by the // text captured by the n'th parenthesized pattern. // n can be from 0 to 99, and $0 refers to the text matched by the whole pattern. // // This may include format specifiers and escape sequences that are replaced by the characters they represent. // // For format_default, the possible specifiers are: // $n n-th backreference(i.e., a copy of the n-th matched group specified with parentheses in the regex pattern). // n must be an integer value designating a valid backreference, greater than 0, and of two digits at most. // $& A copy of the entire match // $` The prefix(i.e., the part of the target sequence that precedes the match). // $' The suffix(i.e., the part of the target sequence that follows the match). // $$ A single $ character. // // retain: // If false then the replacement string completely overwrites the previous string by the replacement. // // Example: // std::string s = " 14MAY 15JUNE "; // result = string_replace(ss, regex, "$1 $2"); // // Returns: // std::string s = " 14 MAY 15 JUNE "; // // Example2: // result = string_replace(ss, regex, "$1 $2", std::regex_constants::format_no_copy); // // Returns: // std::string s = "14 MAY15 JUNE "; // // Example3: // result = string_replace(ss, regex, "$1 $2", false); // // Returns: // std::string s = "14 MAY15 JUNE "; std::string string_replace(const std::string& s, const std::string& regex, const std::string& replacement, bool retain) { if (retain) return string_replace(s, regex, replacement); else return string_replace(s, regex, replacement, std::regex_constants::format_no_copy); } // Returns true if the string matches the regex. // // Example: bool string_match(const std::string& s, const std::string& regex, std::regex_constants::match_flag_type flags) { std::smatch m; std::regex_search(s, m, std::regex(regex), flags); if (m.empty()) { return false; } else { return true; } } // Shows all matches of the regex within the string. // // Example: // show_matches("abcdef", "abc|def"); // show_matches("abc", "ab|abc"); // left Alernative matched first // // Match of the input against the left Alternative (a) followed by the remainder of the // regex (c|bc) succeeds, with results: // m[1]="a" and m[4]="bc". // The skipped Alternatives (ab) and (c) leave their submatches // m[3] and m[5] empty. // // show_matches("abc", "((a)|(ab))((c)|(bc))"); void show_matches(const std::string& s, const std::string& regex) { std::smatch m; std::regex_search(s, m, std::regex(regex)); if (m.empty()) { std::cout << "input=[" << s << "], regex=[" << regex << "]: NO MATCH\n"; } else { std::cout << "input=[" << s << "], regex=[" << regex << "]: "; std::cout << "prefix=[" << m.prefix() << "] "; for (std::size_t n = 0; n < m.size(); ++n) std::cout << " m[" << n << "]=[" << m[n] << "] "; std::cout << "suffix=[" << m.suffix() << "]\n"; } } // Splits a string into seperate tokens. // // Example: // s = "0 HEAD"; // regex = "([\\d]+)[\\s]+([A-Z]*)"; std::vector<std::string> string_tokenize(const std::string& s, const std::string& regex) { std::vector<std::string> result; std::smatch m; std::regex_search(s, m, std::regex(regex)); if (m.empty()) { return result; } else { //result.push_back(m.prefix()); for (std::size_t n = 0; n < m.size(); ++n) result.push_back(m[n]); //result.push_back(m.suffix()); } return result; /* std::vector<std::string> result; std::regex rgx(regex); std::sregex_token_iterator iter(s.begin(), s.end(), rgx, -1); std::sregex_token_iterator end; for (; iter != end; ++iter) result.push_back(*iter); return result; */ /* std::vector<std::string> result; std::regex rgx(regex); std::sregex_token_iterator i(s.begin(), s.end(), rgx, -1); std::sregex_token_iterator j; while (i != j) { //std::cout << *i++ << " "; result.push_back(*i++); } return result; */ }