Home
CodeBlog
Articles
Downloads
Links
Books
About
|
Websites |
Simple Tokenizer class// // Copyright (C) 2009, Thiago Adams (thiago.adams@gmail.com) // Permission to copy, use, modify, sell and distribute this software // is granted provided this copyright notice appears in all copies. // This software is provided "as is" without express or implied // warranty, and with no claim as to its suitability for any purpose. // #include <sstream> #include <string> #include <iostream> template<class CharType> class Tokenizer { public: typedef CharType Char; typedef Char Token; typedef std::basic_istream<Char> Istream; typedef std::basic_string<Char> String; private: Istream & m_istream; Tokenizer(); // not imp int m_linepos; int m_colpos; int m_charpos; String m_tokenstr; Token m_token; static const Token TokenEof = 0; protected: void ResetToken() { m_token = TokenEof; m_tokenstr.clear(); } Tokenizer(Istream & i) : m_istream(i) { m_linepos = m_colpos = m_charpos = 0; } void PutBack(Char ch) { m_istream.putback(ch); m_tokenstr.resize(m_tokenstr.size() - 1); } int Get() { if (m_istream.eof()) return -1; Char ch = 0; m_istream.get(ch); m_charpos++; if (ch == '\n') { m_linepos++; m_colpos = 0;; } else { m_colpos++; } m_tokenstr += ch; return (int)ch; } void SetToken(Token t) { m_token = t; } public: Token GetToken() const { return m_token; } const String & GetTokenStr() const { return m_tokenstr; } }; class MyTokenizer : public Tokenizer<wchar_t> { public: static const Token TokenNumber = 1; static const Token TokenBlank = 2; static const Token TokenIdentifier = 3; public: MyTokenizer(Istream & is) : Tokenizer(is) { Next(); } bool Next() { ResetToken(); int iChar = Get(); if (iChar == -1) return false; Char ch = (Char) iChar; if (isdigit(ch)) { SetToken(TokenNumber); for(;;) { int iChar = Get(); if (iChar == -1) return true; char ch = (Char)iChar; if (!isdigit(ch)) { PutBack(ch); break; } } } else if (isalpha(ch)) { SetToken(TokenIdentifier); for(;;) { int iChar = Get(); if (iChar == -1) return true; char ch = (Char)iChar; if (!isalpha(ch)) { PutBack(ch); break; } } } else if (isspace(ch)) { SetToken(TokenBlank); for (;;) { int iChar = Get(); if (iChar == -1) return true; char ch = (Char)iChar; if (ch != ' ') { PutBack(ch); break; } } } return true; } };Sample using namespace std; const wchar_t * TokenStr(Tokenizer<wchar_t>::Token tk) { switch(tk) { case 1: return L"number :"; case 0: return L"eof :"; case 2: return L"blank :"; case 3: return L"identifier :"; } return L""; }; int main() { std::wstringstream ss; ss << L"identifier1 012334identifier2 identifier3"; MyTokenizer tk(ss); do { wcout << TokenStr(tk.GetToken()) << "'" << tk.GetTokenStr() << "'" << endl; }while (tk.Next()); wcout << TokenStr(tk.GetToken()) << "'" << tk.GetTokenStr() << "'" << endl; return 0; }
|