You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
223 lines
5.9 KiB
C++
223 lines
5.9 KiB
C++
/************************************************************************
|
|
The zlib/libpng License
|
|
|
|
Copyright (c) 2006 Joerg Wiedenmann
|
|
|
|
This software is provided 'as-is', without any express or implied warranty.
|
|
In no event will the authors be held liable for any damages arising from
|
|
the use of this software.
|
|
|
|
Permission is granted to anyone to use this software for any purpose,
|
|
including commercial applications, and to alter it and redistribute it
|
|
freely, subject to the following restrictions:
|
|
|
|
1. The origin of this software must not be misrepresented;
|
|
you must not claim that you wrote the original software.
|
|
If you use this software in a product, an acknowledgment
|
|
in the product documentation would be appreciated but is
|
|
not required.
|
|
|
|
2. Altered source versions must be plainly marked as such,
|
|
and must not be misrepresented as being the original software.
|
|
|
|
3. This notice may not be removed or altered from any source distribution.
|
|
|
|
***********************************************************************/
|
|
|
|
/********************************************************************
|
|
created: 2006-01-28
|
|
filename: tokenizer.cpp
|
|
author: Jörg Wiedenmann
|
|
|
|
purpose: A tokenizer function which provides a very
|
|
customizable way of breaking up strings.
|
|
|
|
history: 2006-01-28, Original version
|
|
2006-03-04, Fixed a small parsing bug, thanks Elias.
|
|
*********************************************************************/
|
|
|
|
#include "tokenizer.h"
|
|
|
|
using namespace std;
|
|
|
|
void tokenize ( const string& str, vector<string>& result,
|
|
const string& delimiters, const string& delimiters_preserve,
|
|
const string& quote, const string& esc )
|
|
{
|
|
// clear the vector
|
|
if ( false == result.empty() )
|
|
{
|
|
result.clear();
|
|
}
|
|
|
|
string::size_type pos = 0; // the current position (char) in the string
|
|
char ch = 0; // buffer for the current character
|
|
char delimiter = 0; // the buffer for the delimiter char which
|
|
// will be added to the tokens if the delimiter
|
|
// is preserved
|
|
char current_quote = 0; // the char of the current open quote
|
|
bool quoted = false; // indicator if there is an open quote
|
|
string token; // string buffer for the token
|
|
bool token_complete = false; // indicates if the current token is
|
|
// read to be added to the result vector
|
|
string::size_type len = str.length(); // length of the input-string
|
|
|
|
// for every char in the input-string
|
|
while ( len > pos )
|
|
{
|
|
// get the character of the string and reset the delimiter buffer
|
|
ch = str.at(pos);
|
|
delimiter = 0;
|
|
|
|
// assume ch isn't a delimiter
|
|
bool add_char = true;
|
|
|
|
// check ...
|
|
|
|
// ... if the delimiter is an escaped character
|
|
bool escaped = false; // indicates if the next char is protected
|
|
if ( false == esc.empty() ) // check if esc-chars are provided
|
|
{
|
|
if ( string::npos != esc.find_first_of(ch) )
|
|
{
|
|
// get the escaped char
|
|
++pos;
|
|
if ( pos < len ) // if there are more chars left
|
|
{
|
|
// get the next one
|
|
ch = str.at(pos);
|
|
|
|
// add the escaped character to the token
|
|
add_char = true;
|
|
}
|
|
else // cannot get any more characters
|
|
{
|
|
// don't add the esc-char
|
|
add_char = false;
|
|
}
|
|
|
|
// ignore the remaining delimiter checks
|
|
escaped = true;
|
|
}
|
|
}
|
|
|
|
// ... if the delimiter is a quote
|
|
if ( false == quote.empty() && false == escaped )
|
|
{
|
|
// if quote chars are provided and the char isn't protected
|
|
if ( string::npos != quote.find_first_of(ch) )
|
|
{
|
|
// if not quoted, set state to open quote and set
|
|
// the quote character
|
|
if ( false == quoted )
|
|
{
|
|
quoted = true;
|
|
current_quote = ch;
|
|
|
|
// don't add the quote-char to the token
|
|
add_char = false;
|
|
}
|
|
else // if quote is open already
|
|
{
|
|
// check if it is the matching character to close it
|
|
if ( current_quote == ch )
|
|
{
|
|
// close quote and reset the quote character
|
|
quoted = false;
|
|
current_quote = 0;
|
|
|
|
// don't add the quote-char to the token
|
|
add_char = false;
|
|
}
|
|
} // else
|
|
}
|
|
}
|
|
|
|
// ... if the delimiter isn't preserved
|
|
if ( false == delimiters.empty() && false == escaped &&
|
|
false == quoted )
|
|
{
|
|
// if a delimiter is provided and the char isn't protected by
|
|
// quote or escape char
|
|
if ( string::npos != delimiters.find_first_of(ch) )
|
|
{
|
|
// if ch is a delimiter and the token string isn't empty
|
|
// the token is complete
|
|
if ( false == token.empty() ) // BUGFIX: 2006-03-04
|
|
{
|
|
token_complete = true;
|
|
}
|
|
|
|
// don't add the delimiter to the token
|
|
add_char = false;
|
|
}
|
|
}
|
|
|
|
// ... if the delimiter is preserved - add it as a token
|
|
bool add_delimiter = false;
|
|
if ( false == delimiters_preserve.empty() && false == escaped &&
|
|
false == quoted )
|
|
{
|
|
// if a delimiter which will be preserved is provided and the
|
|
// char isn't protected by quote or escape char
|
|
if ( string::npos != delimiters_preserve.find_first_of(ch) )
|
|
{
|
|
// if ch is a delimiter and the token string isn't empty
|
|
// the token is complete
|
|
if ( false == token.empty() ) // BUGFIX: 2006-03-04
|
|
{
|
|
token_complete = true;
|
|
}
|
|
|
|
// don't add the delimiter to the token
|
|
add_char = false;
|
|
|
|
// add the delimiter
|
|
delimiter = ch;
|
|
add_delimiter = true;
|
|
}
|
|
}
|
|
|
|
|
|
// add the character to the token
|
|
if ( true == add_char )
|
|
{
|
|
// add the current char
|
|
token.push_back( ch );
|
|
}
|
|
|
|
// add the token if it is complete
|
|
if ( true == token_complete && false == token.empty() )
|
|
{
|
|
// add the token string
|
|
result.push_back( token );
|
|
|
|
// clear the contents
|
|
token.clear();
|
|
|
|
// build the next token
|
|
token_complete = false;
|
|
}
|
|
|
|
// add the delimiter
|
|
if ( true == add_delimiter )
|
|
{
|
|
// the next token is the delimiter
|
|
string delim_token;
|
|
delim_token.push_back( delimiter );
|
|
result.push_back( delim_token );
|
|
|
|
// REMOVED: 2006-03-04, Bugfix
|
|
}
|
|
|
|
// repeat for the next character
|
|
++pos;
|
|
} // while
|
|
|
|
// add the final token
|
|
if ( false == token.empty() )
|
|
{
|
|
result.push_back( token );
|
|
}
|
|
}
|