/**
 * Lecture code 18.0
 *
 * A regular expression matcher built using template metaprogramming.  This matcher
 * is composed of several smaller template classes, each of which are designed to match
 * specific patterns.  These are then synthesized together into more complex matchers
 * that ultimately can match very complicated regular expressions.
 *
 * As a recap, regular expressions are defined recursively:
 *
 *   Any letter is a regexp matching itself: a matches "a"
 *
 *   Any concatenations of regexps is a regexp matching strings that can be split into
 *   two pieces, one matching the first regexp and one matching the second: ab matches "ab"
 *                                             
 *   The "either-or" (disjunction) of two regexps is a regexp that matches a string if the
 *   string matches either of the regexps: a|b matches "a" or "b"
 *
 *   The Kleene closure of a regexp matches zero or more copies of that regexp: a* matches
 *   "", "a", "aa", "aaa", ..., and (a|b)* matches "", "a", "b", "aa", "ab", "ba", "bb", ...
 *
 * Additionally, we introduce a few new regexps which make it easier to encode many simple
 * regular expressions.
 *
 *   The + operator means "one or more copies" and is defined as
 *
 *       R+  ==   RR*
 *
 *   The notation R{n} means "exactly n copies of R" and is defined as
 *
 *       R{n} == RRR...R  (n times)
 *     
 */
#include <iostream>
#include <string>
#include <ctype.h>
using namespace std;

/* A simple class representing a matcher that matches a single character.  For example:
 *
 * Ch<'a'>::matches("a")  returns true.
 * Ch<'a'>::matches("aa") returns false.
 * Ch<'a'>::matches("b")  returns false.
 */
template <char ch> struct Ch {
	static bool matches(const string& str) {
		/* The string must have length one and its sole character must be the character
		 * we want to match.
		 */
		return str.size() == 1 && str[0] == ch;
	}
};

/* A more complex regular expression class that matches any character for which a given
 * predicate function returns true.  This function is designed to work on the functions
 * exported by the <ctype.h> header, which all take in ints and return ints (though the
 * parameter is actually a char and the return type is really a boolean value).  This
 * odd template signature means that the class is parameterized over a function to use.
 * For example, to make a matcher that matches any upper-case letter, we could write
 * ChClass<isupper>.
 */
template <int (*function)(int)> struct ChClass {
	static bool matches(const string& str) {
		/* Check that the string has length one and that the predicate accepts the
		 * unique character in the string.
		 */
		return str.size() == 1 && function(str[0]);
	}
};

/* A regexp matcher that matches the disjunction (or) of two regular expressions.  The
 * class is parameterized over the two regular expressions to consider, and matches a
 * string if either of the two match that string.
 */
template <typename Regexp1, typename Regexp2> struct Or {
	static bool matches(const string& str) {
		/* Check both regexps.  Note that this will fail to compile unless both template
		 * arguments export a matches() function.
		 */
		return Regexp1::matches(str) || Regexp2::matches(str);
	}
};

/* A regexp matcher that matches the concatenation (and) of two regular expressions.  The
 * matcher works by constructing all partitions of the string into two pieces and checking
 * whether some partition exists such that the first matcher accepts the first string and
 * the second matcher accepts the second string.
 */
template <typename Regexp1, typename Regexp2> struct And {
	static bool matches(const string& str) {
		/* Generating all partitions means that we consider all possible numbers of
		 * characters that we could feed into the first matcher.
		 */
		for (size_t i = 0; i <= str.size(); ++i)
			if (Regexp1::matches(str.substr(0, i)) && Regexp2::matches(str.substr(i)))
				return true;
		return false;
	}
};

/* A matcher class that matches the Kleene closure (star) of a regular expression.  It
 * works by first checking if the string is empty (since the empty string always matches
 * star).  If not, the matcher tries to break the string up into two groups - a nonempty
 * group of characters that are fed into the regexp and a remainder of characters that are
 * in turn fed back into this matcher.  The logic here is that A* matches either the empty
 * string, or AA*.  We also only consider nonempty splits here, because while it's
 * possible that pulling off the empty string will get something back that matches, we
 * only "make progress" on parsing the string if we take characters out of it.
 */
template <typename Regexp> struct Star {
	static bool matches(const string& str) {
		if (str.empty()) return true;

		/* Consider all nonempty partitions of the string. */
		for (size_t i = 1; i <= str.size(); ++i)
			if (Regexp::matches(str.substr(0, i)) && matches(str.substr(i)))
				return true;
		return false;
	}
};

/* Plus is defined as
 *
 *    R+ == RR*
 *
 * That's exactly what we do here.
 */
template <typename Regexp> struct Plus {
	static bool matches(const string& str) {
		return And< Regexp, Star<Regexp> >::matches(str);
	}
};

/* Recursive definition of a regular expression to match some number of copies of some
 * regular expression.  This works as follows:
 *
 *   R{0} matches the empty string.
 *   R{n+1} matches RR{n}
 *
 * Note that we use template specialization to determine when to stop the recursion.
 */
template <typename Regexp, size_t NumCopies> struct Repeat {
	static bool matches(const string& str) {
		return And< Regexp, Repeat<Regexp, NumCopies - 1> >::matches(str);
	}
};
template <typename Regexp> struct Repeat<Regexp, 0> {
	static bool matches(const string& str) {
		return str.empty();
	}
};

/**
 * Regular expression for email addresses:
 *
 *      [alnum]+(.[alnum]+)*@[alnum]+.[alnum]+(.[alnum]+)*
 */
typedef Plus< ChClass<isalnum> > String;
typedef And< Ch<'.'>, String> DotString;
typedef And< String,
			 And< Star<DotString>,
			      And< Ch<'@'>,
					   And< String,
					        And< Ch<'.'>,
								 And< String, 
								      Star<DotString> > > > > > > EmailMatcher;

/**
 * Regular expression for phone numbers:
 *
 *                 ([digit]{3}) [digit]{3}(-| )[digit]{4}
 */
typedef And< Ch<'('>,
			And< Repeat<ChClass<isdigit>, 3>,
			    And< Ch<')'>,
			        And< Ch<' '>,
			            And< Repeat<ChClass<isdigit>, 3>,
						    And< Or<Ch<' '>, Ch<'-'> >,
							     Repeat< ChClass<isdigit>, 4> > > > > > > PhoneMatcher;

int main() {
	/* Sit in a loop asking for strings and matching them. */
	while (true) {
		cout << "Enter string to match: ";
		string line;
		getline(cin, line);

		cout << "Is email address? " << boolalpha << EmailMatcher::matches(line) << endl;
		cout << "Is phone number?  " << boolalpha << PhoneMatcher::matches(line) << endl;
	}
}