Add a matcher `testing::ReturnRoundRobin` which, on each call, returns the next element in the sequence, restarting at the beginning once it has reached the end. PiperOrigin-RevId: 276312136
		
			
				
	
	
		
			299 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			299 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env python
 | 
						|
#
 | 
						|
# Copyright 2007, Google Inc.
 | 
						|
# All rights reserved.
 | 
						|
#
 | 
						|
# Redistribution and use in source and binary forms, with or without
 | 
						|
# modification, are permitted provided that the following conditions are
 | 
						|
# met:
 | 
						|
#
 | 
						|
#     * Redistributions of source code must retain the above copyright
 | 
						|
# notice, this list of conditions and the following disclaimer.
 | 
						|
#     * Redistributions in binary form must reproduce the above
 | 
						|
# copyright notice, this list of conditions and the following disclaimer
 | 
						|
# in the documentation and/or other materials provided with the
 | 
						|
# distribution.
 | 
						|
#     * Neither the name of Google Inc. nor the names of its
 | 
						|
# contributors may be used to endorse or promote products derived from
 | 
						|
# this software without specific prior written permission.
 | 
						|
#
 | 
						|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | 
						|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | 
						|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | 
						|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | 
						|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | 
						|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | 
						|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
						|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
						|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
						|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
						|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
						|
 | 
						|
"""Tokenize C++ source code."""
 | 
						|
 | 
						|
try:
 | 
						|
    # Python 3.x
 | 
						|
    import builtins
 | 
						|
except ImportError:
 | 
						|
    # Python 2.x
 | 
						|
    import __builtin__ as builtins
 | 
						|
 | 
						|
 | 
						|
import sys
 | 
						|
 | 
						|
from cpp import utils
 | 
						|
 | 
						|
 | 
						|
if not hasattr(builtins, 'set'):
 | 
						|
    # Nominal support for Python 2.3.
 | 
						|
    from sets import Set as set
 | 
						|
 | 
						|
 | 
						|
# Add $ as a valid identifier char since so much code uses it.
 | 
						|
_letters = 'abcdefghijklmnopqrstuvwxyz'
 | 
						|
VALID_IDENTIFIER_CHARS = set(_letters + _letters.upper() + '_0123456789$')
 | 
						|
HEX_DIGITS = set('0123456789abcdefABCDEF')
 | 
						|
INT_OR_FLOAT_DIGITS = set('01234567890eE-+')
 | 
						|
 | 
						|
 | 
						|
# C++0x string preffixes.
 | 
						|
_STR_PREFIXES = set(('R', 'u8', 'u8R', 'u', 'uR', 'U', 'UR', 'L', 'LR'))
 | 
						|
 | 
						|
 | 
						|
# Token types.
 | 
						|
UNKNOWN = 'UNKNOWN'
 | 
						|
SYNTAX = 'SYNTAX'
 | 
						|
CONSTANT = 'CONSTANT'
 | 
						|
NAME = 'NAME'
 | 
						|
PREPROCESSOR = 'PREPROCESSOR'
 | 
						|
 | 
						|
# Where the token originated from.  This can be used for backtracking.
 | 
						|
# It is always set to WHENCE_STREAM in this code.
 | 
						|
WHENCE_STREAM, WHENCE_QUEUE = range(2)
 | 
						|
 | 
						|
 | 
						|
class Token(object):
 | 
						|
    """Data container to represent a C++ token.
 | 
						|
 | 
						|
    Tokens can be identifiers, syntax char(s), constants, or
 | 
						|
    pre-processor directives.
 | 
						|
 | 
						|
    start contains the index of the first char of the token in the source
 | 
						|
    end contains the index of the last char of the token in the source
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, token_type, name, start, end):
 | 
						|
        self.token_type = token_type
 | 
						|
        self.name = name
 | 
						|
        self.start = start
 | 
						|
        self.end = end
 | 
						|
        self.whence = WHENCE_STREAM
 | 
						|
 | 
						|
    def __str__(self):
 | 
						|
        if not utils.DEBUG:
 | 
						|
            return 'Token(%r)' % self.name
 | 
						|
        return 'Token(%r, %s, %s)' % (self.name, self.start, self.end)
 | 
						|
 | 
						|
    __repr__ = __str__
 | 
						|
 | 
						|
 | 
						|
def _GetString(source, start, i):
 | 
						|
    i = source.find('"', i+1)
 | 
						|
    while source[i-1] == '\\':
 | 
						|
        # Count the trailing backslashes.
 | 
						|
        backslash_count = 1
 | 
						|
        j = i - 2
 | 
						|
        while source[j] == '\\':
 | 
						|
            backslash_count += 1
 | 
						|
            j -= 1
 | 
						|
        # When trailing backslashes are even, they escape each other.
 | 
						|
        if (backslash_count % 2) == 0:
 | 
						|
            break
 | 
						|
        i = source.find('"', i+1)
 | 
						|
    return i + 1
 | 
						|
 | 
						|
 | 
						|
def _GetChar(source, start, i):
 | 
						|
    # NOTE(nnorwitz): may not be quite correct, should be good enough.
 | 
						|
    i = source.find("'", i+1)
 | 
						|
    while source[i-1] == '\\':
 | 
						|
        # Need to special case '\\'.
 | 
						|
        if (i - 2) > start and source[i-2] == '\\':
 | 
						|
            break
 | 
						|
        i = source.find("'", i+1)
 | 
						|
    # Try to handle unterminated single quotes (in a #if 0 block).
 | 
						|
    if i < 0:
 | 
						|
        i = start
 | 
						|
    return i + 1
 | 
						|
 | 
						|
 | 
						|
def GetTokens(source):
 | 
						|
    """Returns a sequence of Tokens.
 | 
						|
 | 
						|
    Args:
 | 
						|
      source: string of C++ source code.
 | 
						|
 | 
						|
    Yields:
 | 
						|
      Token that represents the next token in the source.
 | 
						|
    """
 | 
						|
    # Cache various valid character sets for speed.
 | 
						|
    valid_identifier_chars = VALID_IDENTIFIER_CHARS
 | 
						|
    hex_digits = HEX_DIGITS
 | 
						|
    int_or_float_digits = INT_OR_FLOAT_DIGITS
 | 
						|
    int_or_float_digits2 = int_or_float_digits | set('.')
 | 
						|
 | 
						|
    # Only ignore errors while in a #if 0 block.
 | 
						|
    ignore_errors = False
 | 
						|
    count_ifs = 0
 | 
						|
 | 
						|
    i = 0
 | 
						|
    end = len(source)
 | 
						|
    while i < end:
 | 
						|
        # Skip whitespace.
 | 
						|
        while i < end and source[i].isspace():
 | 
						|
            i += 1
 | 
						|
        if i >= end:
 | 
						|
            return
 | 
						|
 | 
						|
        token_type = UNKNOWN
 | 
						|
        start = i
 | 
						|
        c = source[i]
 | 
						|
        if c.isalpha() or c == '_':              # Find a string token.
 | 
						|
            token_type = NAME
 | 
						|
            while source[i] in valid_identifier_chars:
 | 
						|
                i += 1
 | 
						|
            # String and character constants can look like a name if
 | 
						|
            # they are something like L"".
 | 
						|
            if (source[i] == "'" and (i - start) == 1 and
 | 
						|
                source[start:i] in 'uUL'):
 | 
						|
                # u, U, and L are valid C++0x character preffixes.
 | 
						|
                token_type = CONSTANT
 | 
						|
                i = _GetChar(source, start, i)
 | 
						|
            elif source[i] == "'" and source[start:i] in _STR_PREFIXES:
 | 
						|
                token_type = CONSTANT
 | 
						|
                i = _GetString(source, start, i)
 | 
						|
        elif c == '/' and source[i+1] == '/':    # Find // comments.
 | 
						|
            i = source.find('\n', i)
 | 
						|
            if i == -1:  # Handle EOF.
 | 
						|
                i = end
 | 
						|
            continue
 | 
						|
        elif c == '/' and source[i+1] == '*':    # Find /* comments. */
 | 
						|
            i = source.find('*/', i) + 2
 | 
						|
            continue
 | 
						|
        elif c in ':+-<>&|*=':                   # : or :: (plus other chars).
 | 
						|
            token_type = SYNTAX
 | 
						|
            i += 1
 | 
						|
            new_ch = source[i]
 | 
						|
            if new_ch == c and c != '>':         # Treat ">>" as two tokens.
 | 
						|
                i += 1
 | 
						|
            elif c == '-' and new_ch == '>':
 | 
						|
                i += 1
 | 
						|
            elif new_ch == '=':
 | 
						|
                i += 1
 | 
						|
        elif c in '()[]{}~!?^%;/.,':             # Handle single char tokens.
 | 
						|
            token_type = SYNTAX
 | 
						|
            i += 1
 | 
						|
            if c == '.' and source[i].isdigit():
 | 
						|
                token_type = CONSTANT
 | 
						|
                i += 1
 | 
						|
                while source[i] in int_or_float_digits:
 | 
						|
                    i += 1
 | 
						|
                # Handle float suffixes.
 | 
						|
                for suffix in ('l', 'f'):
 | 
						|
                    if suffix == source[i:i+1].lower():
 | 
						|
                        i += 1
 | 
						|
                        break
 | 
						|
        elif c.isdigit():                        # Find integer.
 | 
						|
            token_type = CONSTANT
 | 
						|
            if c == '0' and source[i+1] in 'xX':
 | 
						|
                # Handle hex digits.
 | 
						|
                i += 2
 | 
						|
                while source[i] in hex_digits:
 | 
						|
                    i += 1
 | 
						|
            else:
 | 
						|
                while source[i] in int_or_float_digits2:
 | 
						|
                    i += 1
 | 
						|
            # Handle integer (and float) suffixes.
 | 
						|
            for suffix in ('ull', 'll', 'ul', 'l', 'f', 'u'):
 | 
						|
                size = len(suffix)
 | 
						|
                if suffix == source[i:i+size].lower():
 | 
						|
                    i += size
 | 
						|
                    break
 | 
						|
        elif c == '"':                           # Find string.
 | 
						|
            token_type = CONSTANT
 | 
						|
            i = _GetString(source, start, i)
 | 
						|
        elif c == "'":                           # Find char.
 | 
						|
            token_type = CONSTANT
 | 
						|
            i = _GetChar(source, start, i)
 | 
						|
        elif c == '#':                           # Find pre-processor command.
 | 
						|
            token_type = PREPROCESSOR
 | 
						|
            got_if = source[i:i+3] == '#if' and source[i+3:i+4].isspace()
 | 
						|
            if got_if:
 | 
						|
                count_ifs += 1
 | 
						|
            elif source[i:i+6] == '#endif':
 | 
						|
                count_ifs -= 1
 | 
						|
                if count_ifs == 0:
 | 
						|
                    ignore_errors = False
 | 
						|
 | 
						|
            # TODO(nnorwitz): handle preprocessor statements (\ continuations).
 | 
						|
            while 1:
 | 
						|
                i1 = source.find('\n', i)
 | 
						|
                i2 = source.find('//', i)
 | 
						|
                i3 = source.find('/*', i)
 | 
						|
                i4 = source.find('"', i)
 | 
						|
                # NOTE(nnorwitz): doesn't handle comments in #define macros.
 | 
						|
                # Get the first important symbol (newline, comment, EOF/end).
 | 
						|
                i = min([x for x in (i1, i2, i3, i4, end) if x != -1])
 | 
						|
 | 
						|
                # Handle #include "dir//foo.h" properly.
 | 
						|
                if source[i] == '"':
 | 
						|
                    i = source.find('"', i+1) + 1
 | 
						|
                    assert i > 0
 | 
						|
                    continue
 | 
						|
                # Keep going if end of the line and the line ends with \.
 | 
						|
                if not (i == i1 and source[i-1] == '\\'):
 | 
						|
                    if got_if:
 | 
						|
                        condition = source[start+4:i].lstrip()
 | 
						|
                        if (condition.startswith('0') or
 | 
						|
                            condition.startswith('(0)')):
 | 
						|
                            ignore_errors = True
 | 
						|
                    break
 | 
						|
                i += 1
 | 
						|
        elif c == '\\':                          # Handle \ in code.
 | 
						|
            # This is different from the pre-processor \ handling.
 | 
						|
            i += 1
 | 
						|
            continue
 | 
						|
        elif ignore_errors:
 | 
						|
            # The tokenizer seems to be in pretty good shape.  This
 | 
						|
            # raise is conditionally disabled so that bogus code
 | 
						|
            # in an #if 0 block can be handled.  Since we will ignore
 | 
						|
            # it anyways, this is probably fine.  So disable the
 | 
						|
            # exception and  return the bogus char.
 | 
						|
            i += 1
 | 
						|
        else:
 | 
						|
            sys.stderr.write('Got invalid token in %s @ %d token:%s: %r\n' %
 | 
						|
                             ('?', i, c, source[i-10:i+10]))
 | 
						|
            raise RuntimeError('unexpected token')
 | 
						|
 | 
						|
        if i <= 0:
 | 
						|
            print('Invalid index, exiting now.')
 | 
						|
            return
 | 
						|
        yield Token(token_type, source[start:i], start, i)
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    def main(argv):
 | 
						|
        """Driver mostly for testing purposes."""
 | 
						|
        for filename in argv[1:]:
 | 
						|
            source = utils.ReadFile(filename)
 | 
						|
            if source is None:
 | 
						|
                continue
 | 
						|
 | 
						|
            for token in GetTokens(source):
 | 
						|
                print('%-12s: %s' % (token.token_type, token.name))
 | 
						|
                # print('\r%6.2f%%' % (100.0 * index / token.end),)
 | 
						|
            sys.stdout.write('\n')
 | 
						|
 | 
						|
 | 
						|
    main(sys.argv)
 |