X-Git-Url: https://git.teslayout.com/public/public/public/?a=blobdiff_plain;f=source%2Fcore%2Futil%2FTokenizer.cpp;h=d49f6edd2069a5798e6ecefcc387d1e1299113b7;hb=af9c86ffb0e28d35ad94d99c5f77e41578c972b4;hp=b4f6d121a1aed251988179591bf0031e4577025e;hpb=93366a0bda79e6fd6e7dad6316bfcf8cc82f5731;p=anna.git diff --git a/source/core/util/Tokenizer.cpp b/source/core/util/Tokenizer.cpp index b4f6d12..d49f6ed 100644 --- a/source/core/util/Tokenizer.cpp +++ b/source/core/util/Tokenizer.cpp @@ -10,12 +10,16 @@ #include #include +// temporary +#include + using namespace std; using namespace anna; //static const int Tokenizer::MaxItem = 64; + Tokenizer::Tokenizer() : a_dataBlock(true), a_activateStrip(false) { @@ -53,12 +57,13 @@ Tokenizer::~Tokenizer() { delete [] a_items; } -int Tokenizer::apply(const char* str, const char* separator) +int Tokenizer::_apply(const char* str, const char* separator) throw(RuntimeException) { + a_maxItem = 0; - if(str == NULL) - return 0; + //if(str == NULL) + // return 0; DataBlock mb(str, anna_strlen(str) + 1, false); a_dataBlock = mb; @@ -86,6 +91,32 @@ throw(RuntimeException) { return a_maxItem; } +int Tokenizer::apply(const char *str, const char* separator, const char *tokenizeContiguous) throw(RuntimeException) { + + if(str == NULL) + return 0; + + if (!separator) + throw RuntimeException("Cannot tokenize with a NULL separator", ANNA_FILE_LOCATION); + + if (!tokenizeContiguous) return _apply(str, separator); + + std::string _str = str; + std::string _sep = separator; + std::string _tok = tokenizeContiguous; + if (_sep == _tok) + throw RuntimeException("Using the separator as artifial token is a nonsense (original behaviour)", ANNA_FILE_LOCATION); + if (_tok == "") + throw RuntimeException("Use another artifial token. Empty is a nonsense (original behaviour)", ANNA_FILE_LOCATION); + + std::string seps = _sep + _sep; + std::size_t pos, sepsL = seps.size(); + std::string artificialToken = _sep + _tok + _sep; + + while ((pos = _str.find(seps)) != std::string::npos) _str.replace(pos, sepsL, artificialToken); + return _apply(_str.c_str(), separator); +} + const char* Tokenizer::at(const int i) throw(RuntimeException) { if(i >= a_maxItem)