X-Git-Url: https://git.teslayout.com/public/public/public/?a=blobdiff_plain;f=include%2Fanna%2Fcore%2Futil%2FTokenizer.hpp;h=829077577342ad817d4842f264fe3b4152fd3816;hb=4c3f0a4d7e4db76996404d80c6f939548fca656f;hp=693b2cfb917b8a96352c9427416859a485f43cbd;hpb=c82a3818b279727e943a76343f3cf1a278ac9e19;p=anna.git diff --git a/include/anna/core/util/Tokenizer.hpp b/include/anna/core/util/Tokenizer.hpp index 693b2cf..8290775 100644 --- a/include/anna/core/util/Tokenizer.hpp +++ b/include/anna/core/util/Tokenizer.hpp @@ -18,9 +18,12 @@ namespace anna { class RuntimeException; /** - Separa la cadena recibida en distintos elementos. + Tokenize the input string into several elements */ class Tokenizer { + + int _apply(const char* str, const char* separator) throw(RuntimeException); + public: typedef char* const* const_iterator; @@ -93,30 +96,28 @@ public: const char* operator [](const int i) const throw(RuntimeException) { return at(i); } /** - Aplica la separacion sobre la cadena str con el separador recibido como parametro. + Process the separation over the string str with the separator provided. - @param str Cadena sobre la que aplicar la separacion. - @param separator Caracteres que van a actuar como separador de las subcadenas contenidas en el - primer parametro. + Internally used strtok_r has these imitations: sequence of two or more contiguous delimiter + bytes in the parsed string is considered to be a single delimiter. Delimiter bytes at the start + or end of the string are ignored. Put another way: the tokens returned by strtok() are always + nonempty strings. To override these limitations, the string provided can be internally modified + inserting a artificial token in order to cheat on strtok_r. For this feature, you may provide + '' or whatever string (non-empty) you prefer. - @return Numero de elementos obtenidos al aplicar la separacion. + @param str String to apply the separation. + @param separator Characters used as separator within the string tokenized + @param tokenizeContiguous If provided, it will be the artificial token used internally. The + resulting tokens shall store this string in case of contiguous separators. NULL by + default (original strtok_r behaviour). + @return Number of tokens */ - int apply(const std::string& str, const char* separator) throw(RuntimeException) { - return apply(str.c_str(), separator); + int apply(const char* str, const char* separator, const char *tokenizeContiguous = NULL) throw(RuntimeException); + int apply(const std::string& str, const char* separator, const char *tokenizeContiguous = NULL) throw(RuntimeException) { + return apply(str.c_str(), separator, tokenizeContiguous); } - /** - Aplica la separacion sobre la cadena str con el separador recibido como parametro. - - @param str Cadena sobre la que aplicar la separacion. - @param separator Caracteres que van a actuar como separador de las subcadenas contenidas en el - primer parametro. - - @return Numero de elementos obtenidos al aplicar la separacion. - - */ - int apply(const char* str, const char* separator) throw(RuntimeException); // Metodos /**