string.cpp

#include <string>
#include <sstream>
 
#include "string.h"
 
 
// Example:
//   numberToString(69);
template <typename T>
std::string numberToString(T pNumber)
{
	std::ostringstream oOStrStream;
	oOStrStream << pNumber;
	return oOStrStream.str();
}
 
 
#include <iostream>
#include <regex>
 
 
// Returns all occurences of the regex within the string.
//
// Example:
//   std::string regex = "([A-Z]+)([\\d]+)";
//   std::string ss = "aaaMAY14bbbJUNE4";
//
// Returns:
//   [0]=MAY14#
//   [1]=JUNE4#
std::vector<std::string> string_find(const std::string& s, const std::string& regex)
{
  std::vector<std::string> result;
  std::regex reg(regex);
  //std::sregex_token_iterator it(s.begin(), s.end(), reg, { 1, 2, 3, 4, 5, 6, 7, 8, 9 });
  //std::sregex_token_iterator it(s.begin(), s.end(), reg, { 1, 0 });
 
  // The 4th param indicates:
  //   -1 would indicate to return all none-occurences.
  //   0 indicates to return all occurences found.
  //   1 would return all the 1st sub-expression occurences.
  //   2 would return all the 2nd sub-expression occurences.
  //   3...
  std::sregex_token_iterator it(s.begin(), s.end(), reg, 0); 
  std::sregex_token_iterator reg_end;
  for (int i=0; it != reg_end; ++it, i++) 
  {
    //std::cout << "[" << i << "]=" << it->str() << "#" << std::endl;
    //std::cout << "[" << i << "]=" << *it << "#" << std::endl;
    result.push_back(*it);
  }
 
  return result;
}
 
 
// Replaces all occurences of the regex within the replacement string.
//
// Parameters:
//
//   replacement:
//     The replacement string may contain references of the form $n. Every such reference will be replaced by the 
//     text captured by the n'th parenthesized pattern. 
//     n can be from 0 to 99, and $0 refers to the text matched by the whole pattern.
//            
//     This may include format specifiers and escape sequences that are replaced by the characters they represent.
//               
//     For format_default, the possible specifiers are:
//       $n n-th backreference(i.e., a copy of the n-th matched group specified with parentheses in the regex pattern).
//          n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
//       $&	A copy of the entire match
//       $`	The prefix(i.e., the part of the target sequence that precedes the match).
//       $'	The suffix(i.e., the part of the target sequence that follows the match).
//       $$ A single $ character.
//      
//   flags:
//     One or more of these constants can be combined (using the bitwise OR operator, |) to 
//     form a valid bitmask value of type regex_constants::match_flag_type:
//
//     flag	effects	notes
//     ------------------
//     match_default	Default	Default matching behavior. This constant has a value of zero**.
//     match_not_bol	Not Beginning-Of-Line	The first character is not considered a beginning of line("^" does not match).
//     match_not_eol	Not End-Of-Line	The last character is not considered an end of line("$" does not match).
//     match_not_bow	Not Beginning-Of - Word	The escape sequence "\b" does not match as a beginning-of-word.
//     match_not_eow	Not End-Of-Word	The escape sequence "\b" does not match as an end-of-word.
//     match_any	Any match	Any match is acceptable if more than one match is possible.
//     match_not_null	Not null	Empty sequences do not match.
//     match_continuous	Continuous	The expression must match a sub-sequence that begins at the first character.
//                                  Sub-sequences must begin at the first character to match.
//     match_prev_avail	Previous Available	One or more characters exist before the first one. (match_not_bol and match_not_bow are ignored).
//     format_default	Default formatting	Uses the standard formatting rules to replace matches(those used by ECMAScript's replace method).
//                                        This constant has a value of zero**.
//     format_sed	sed formatting	Uses the same rules as the sed utility in POSIX to replace matches.
//     format_no_copy	No copy	The sections in the target sequence that do not match the regular expression are not copied when replacing matches.
//     format_first_only	First only	Only the first occurrence of a regular expression is replaced.
//
//     NOTE:  ** Constants with a value of zero are ignored if some other flag is set.
//
// Example:
//   std::string s("This is a catfish");
//   std::string regex("(cat)");
//   std::string replacement("(dog)");
//
//   result = string_replace(ss, regex, "dog");
//
// Returns:
//   This is a dogfish.
//
// Example2:
//   std::string regex("([A-Za-z]+)&([A-Za-z]+)");  // Find word&word
//   std::string replacement = "$2&$1";             // Switch order.
//
//   result = string_replace(s, regex, replacement);
//
// Example3:
//   std::string s = "April 15, 2003";
//   std::string regex = "(\\w+) (\\d+), (\\d+)";
//   std::string result = string_replace(ss, regex, "$011,$3");
//
// Returns:
//   April1,2003.
//
//  NOTE:  Isolated $1 backreferences.
//         The $011 says to use $01, or the 1st regex match.
//         If $11 was used, the system would try to use the 11th regex match.
//         This only works because the limit of set to 99 maximum matches.
//
// Example4:
//   result = string_replace(ss, regex, "dog", std::regex_constants::format_first_only);
std::string string_replace(const std::string& s, const std::string& regex, const std::string& replacement,
  std::regex_constants::match_flag_type flags)
{
  std::string result = s;
  std::regex reg(regex);
 
  // using string/c-string (3) version:
  result = std::regex_replace(result, reg, replacement, flags);
 
 
  /*
  // using string/c-string (3) version:
  std::cout << std::regex_replace(s3, e, "sub-$2");
 
  // using range/c-string (6) version:
  std::string result2;
  std::regex_replace(std::back_inserter(result2), s3.begin(), s3.end(), e, "$2");
  std::cout << result2;
 
  // with flags:
  std::cout << std::regex_replace(s3, e, "$1 and $2", std::regex_constants::format_no_copy);
  std::cout << std::endl;
  */
 
 
  return result;
}
 
 
// Replaces all occurences of the regex within the replacement string.
//
// Parameters:
//
//   replacement:
//     The replacement string may contain references of the form $n. Every such reference will be replaced by the 
//     text captured by the n'th parenthesized pattern. 
//     n can be from 0 to 99, and $0 refers to the text matched by the whole pattern.
//            
//     This may include format specifiers and escape sequences that are replaced by the characters they represent.
//               
//     For format_default, the possible specifiers are:
//       $n n-th backreference(i.e., a copy of the n-th matched group specified with parentheses in the regex pattern).
//          n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
//       $&	A copy of the entire match
//       $`	The prefix(i.e., the part of the target sequence that precedes the match).
//       $'	The suffix(i.e., the part of the target sequence that follows the match).
//       $$ A single $ character.
//
//    retain:
//      If false then the replacement string completely overwrites the previous string by the replacement.
//
// Example:
//   std::string s = "  14MAY  15JUNE ";
//   result = string_replace(ss, regex, "$1 $2");
// 
// Returns:
//   std::string s = "  14 MAY  15 JUNE ";
//
// Example2:
//   result = string_replace(ss, regex, "$1 $2", std::regex_constants::format_no_copy);
//
// Returns:
//   std::string s = "14 MAY15 JUNE ";
//
// Example3:
//   result = string_replace(ss, regex, "$1 $2", false);
//
// Returns:
//   std::string s = "14 MAY15 JUNE ";
std::string string_replace(const std::string& s, const std::string& regex, const std::string& replacement,
  bool retain)
{
  if (retain)
    return string_replace(s, regex, replacement);
  else
    return string_replace(s, regex, replacement, std::regex_constants::format_no_copy);
}
 
 
// Returns true if the string matches the regex.
//
// Example:
bool string_match(const std::string& s, const std::string& regex, std::regex_constants::match_flag_type flags)
{
  std::smatch m;
  std::regex_search(s, m, std::regex(regex), flags);
  if (m.empty()) {
    return false;
  }
  else {
    return true;
  }
 
}
 
 
// Shows all matches of the regex within the string.
//
// Example:
//   show_matches("abcdef", "abc|def");
//   show_matches("abc", "ab|abc"); // left Alernative matched first
//
//   Match of the input against the left Alternative (a) followed by the remainder of the 
//   regex (c|bc) succeeds, with results:
//     m[1]="a" and m[4]="bc".
//   The skipped Alternatives (ab) and (c) leave their submatches
//     m[3] and m[5] empty.
//
//  show_matches("abc", "((a)|(ab))((c)|(bc))");
void show_matches(const std::string& s, const std::string& regex)
{
  std::smatch m;
  std::regex_search(s, m, std::regex(regex));
  if (m.empty()) {
    std::cout << "input=[" << s << "], regex=[" << regex << "]: NO MATCH\n";
  }
  else {
    std::cout << "input=[" << s << "], regex=[" << regex << "]: ";
    std::cout << "prefix=[" << m.prefix() << "] ";
    for (std::size_t n = 0; n < m.size(); ++n)
      std::cout << " m[" << n << "]=[" << m[n] << "] ";
    std::cout << "suffix=[" << m.suffix() << "]\n";
  }
}
 
 
 
// Splits a string into seperate tokens.
//
// Example:
//   s = "0 HEAD";
//   regex = "([\\d]+)[\\s]+([A-Z]*)";
std::vector<std::string> string_tokenize(const std::string& s, const std::string& regex)
{
 
  std::vector<std::string> result;
  std::smatch m;
  std::regex_search(s, m, std::regex(regex));
  if (m.empty()) {
    return result;
  }
  else {
    //result.push_back(m.prefix());
    for (std::size_t n = 0; n < m.size(); ++n)
      result.push_back(m[n]);
    //result.push_back(m.suffix());
  }
 
  return result;
 
  /*
  std::vector<std::string> result;
  std::regex rgx(regex);
  std::sregex_token_iterator iter(s.begin(),
    s.end(),
    rgx,
    -1);
  std::sregex_token_iterator end;
  for (; iter != end; ++iter)
    result.push_back(*iter);
 
  return result;
  */
 
  /*
  std::vector<std::string> result;
  std::regex rgx(regex);
  std::sregex_token_iterator i(s.begin(), s.end(), rgx, -1);
  std::sregex_token_iterator j;
  while (i != j) {
    //std::cout << *i++ << " ";
    result.push_back(*i++);
  }
 
  return result;
  */
}