Home
CodeBlog
Articles
Downloads
Links
Books
About
|
Websites |
Home made parserSee also : http://www.thradams.com/codeblog/tkgen.htm// // Copyright (C) 2009, Thiago R. Adams // http://www.thradams.com/ // Permission to copy, use, modify, sell and distribute this software // is granted provided this copyright notice appears in all copies. // This software is provided "as is" without express or implied // warranty, and with no claim as to its suitability for any purpose. // #ifndef __PARSER_H__ #define __PARSER_H__ #include <iostream> #include <fstream> #include <string> #include <vector> #ifndef ASSERT #include <cassert> #define ASSERT assert #endif namespace Tra { namespace Detail { template<class T> bool CheckRange(const T &v, const T & begin, const T & end) { return v >= begin && v <= end; } template<class Char> bool IsDigit(Char c) { return CheckRange(c, Char('0'), Char('9')); } template<class Char> bool IsLetter(Char c) { return CheckRange(c, Char('A'), Char('Z')) || CheckRange(c, Char('a'), Char('z')) || c == Char('_') || c == Char('&') || c == Char(';'); } template<class Iterator> bool IsTokenSymbol(Iterator &it) { if (IsLetter(*it)) { ++it; //a2 ab a_2 etc... while (IsLetter(*it) || IsDigit(*it)) { ++it; } return true; } return false; } template<class Iterator> int IsTokenNumber(Iterator &it) { int result = 0; if (*it == '-' || IsDigit(*it)) { ++it; while (IsDigit(*it)) { ++it; } result = 1; //integer while (IsDigit(*it) || *it == '.' || *it == 'e' || *it == ('E') || *it == '+' || *it == ('-')) { ++it; } result = 2;//double } return result; } } // namespace Detail //Customizable class template <class CharType> struct Tokens { typedef CharType Token; //Token list static const Token TokenEof = 0; //Reserved for eof static const Token TokenBlank = 1; //Reserved for blanks //yours static const Token TokenSymbol = 2; static const Token TokenInteger = 3; static const Token TokenDouble = 4; //Token finder function must be implemented template<class Iterator> static Token FindToken(Iterator& it) { Token NextToken = 0; int r = Detail::IsTokenNumber(it); if (r != 0) { NextToken = r == 1 ? TokenInteger : TokenDouble; } else if (Detail::IsTokenSymbol(it)) { NextToken = TokenSymbol; } else if (*it < 33) { NextToken = TokenBlank; while (*it < 33) { if (*it == TokenEof) break; ++it; } } else { NextToken = *it; ++it; } return NextToken; } static bool IsBlank(Token tk) { return tk == TokenBlank; } }; template < class CharType, class TokensClass = Tokens<CharType>, int BufferSize = 4024> class BasicParser : public TokensClass { public: typedef CharType Char; typedef Char Token; typedef std::basic_string<Char> String; typedef unsigned int Position; private: typedef std::vector<Char> Buffer; typedef std::basic_istream<Char> Stream; typedef typename Buffer::iterator Iterator; static const Char NullChar = '\0'; Stream& m_Stream; Buffer m_Buffer; Iterator m_BufferEndIt; Iterator m_BufferIt; Iterator m_SourceIt; Iterator m_SourceLineEndIt; Iterator m_SourceTokenIt; Char m_SourceEndChar; Token m_Token; size_t m_CharPos; bool m_SkipWs; Position m_LinePos; template<class iter> static iter FindLineStart(iter pbuffer, iter pbufferPosition) { ASSERT(pbufferPosition >= pbuffer); for (; pbufferPosition > pbuffer; --pbufferPosition) { if (*pbufferPosition == '\n') break; } return pbufferPosition; } void ReadBuffer() { // update current Position m_CharPos += std::distance(m_Buffer.begin(), m_SourceIt); //restore the source end *m_SourceLineEndIt = m_SourceEndChar; // there are characters remaining? const size_t nRemainingChars = m_BufferIt - m_SourceIt; if (nRemainingChars != 0) { //move the remaining chars to the buffer begin std::copy(m_SourceIt, m_BufferIt, m_Buffer.begin()); } // set the BufferPtr to the last remaining char (after copy to begin) m_BufferIt = m_Buffer.begin() + nRemainingChars; // fill the buffer until the end const int nchars = std::distance(m_BufferIt, m_BufferEndIt); m_Stream.read(&m_BufferIt[0], nchars /* * sizeof (Char)*/); if (m_Stream.bad()) throw std::runtime_error("parser: bad stream"); // set BufferPtr to the last char read const std::streamsize nc = m_Stream.gcount(); m_BufferIt += nc; // source pointer returns to the begin m_SourceIt = m_Buffer.begin(); // source end is the last char read m_SourceLineEndIt = m_BufferIt; if (m_SourceLineEndIt == m_BufferEndIt) { //return the source end to the line start m_SourceLineEndIt = FindLineStart(m_Buffer.begin(), m_SourceLineEndIt); if (m_SourceLineEndIt == m_Buffer.begin()) { throw std::overflow_error("parser: line too long"); } } else { //last line, don't need to return } ASSERT(m_BufferIt >= m_SourceLineEndIt); // save the end point char m_SourceEndChar = *m_SourceLineEndIt; //marks the end point with null character *m_SourceLineEndIt = NullChar; } void UpdateBuffer() { for (;;) { // this is the first time to read the buffer // or the sourcebuf end point has been reached if (*m_SourceIt == NullChar) { ReadBuffer(); //read more chars... if (*m_SourceIt == NullChar) return; //there is no more chars to read continue; } if (*m_SourceIt == 10) m_LinePos++; return; } } const BasicParser & operator =(const BasicParser &); //not imp BasicParser(const BasicParser &); //not imp public: BasicParser(Stream &stream, bool Skipws = true, size_t buffer_size = BufferSize) : m_Buffer(buffer_size > 2 ? buffer_size : 2, NullChar), m_Stream(stream), m_Token(TokenEof), m_LinePos(1), m_SkipWs(Skipws), m_CharPos(0) { if (!m_Stream) { throw std::runtime_error("parser: invalid istream"); } //points always to the buffer's end m_BufferEndIt = m_Buffer.end() - 1; m_BufferIt = m_Buffer.begin(); //represents the end point used m_SourceLineEndIt = m_BufferIt; // cursor m_SourceIt = m_BufferIt; //points to the Tokens begin m_SourceTokenIt = m_BufferIt; NextToken(); } Token NextToken() { for (;;) { UpdateBuffer(); m_SourceTokenIt = m_SourceIt; if (*m_SourceIt == TokenEof) { m_Token = TokenEof;//reserved } else { m_Token = FindToken(m_SourceIt); } if (Skipws() && IsBlank(m_Token)) { } else break; } return m_Token; } bool Eof() const { return GetToken() == TokenEof; } Token GetToken() const { return m_Token; } bool Skipws() const { return m_SkipWs; } bool Skipws(bool sws) { bool previous(Skipws()); m_SkipWs = sws; return previous; } Position GetCharPos() const { return static_cast<Position>(m_CharPos + (m_SourceTokenIt - m_Buffer.begin())); } Position GetLinePos() const { return m_LinePos; } Position GetColPos() const { BasicParser * p = const_cast<BasicParser*>(this); Iterator it = FindLineStart(p->m_Buffer.begin(), p->m_SourceTokenIt); return static_cast<Position>(m_SourceTokenIt - it); } String GetTokenStr() const { return String(&m_SourceTokenIt[0], std::distance(m_SourceTokenIt, m_SourceIt)); } }; typedef Tra::BasicParser<char> AParser; typedef Tra::BasicParser<wchar_t> WParser; typedef WParser Parser; } //namespace Tra #endif
|