Sequence Types

There are seven sequence types: strings, Unicode strings, lists, tuples, bytearrays, buffers, and xrange objects. For other containers see the built in dict and set classes. These types can be used in RPython same as in Python.

def sequence():
    s = "this is a string"
    u = u"this is a unicode string"
    l = ["this", "is", "a", "list", "of", "strings"]
    t = ("first", "second")
    ba = bytearray(b"\x44\x45\x41\x44\x42\x45\x45\x46")
    buf = buffer(s, 10, 6)
    r = xrange(0, 5, 2)

    print s; print u; print l; print t; print ba; print buf
    for i in r: print i

    # x in s: True if an item of s is equal to x, else False
    if 't' in s:
        print "t is in string", s

    # x not in s:  False if an item of s is equal to x, else True
    if 'b' not in u:
        print "b is not in unicode string", u

    # s + t: the concatenation of s and t
    print l + [":)"]

    # s * n, n * s: equivalent to adding s to itself n times
    print t * 2

    # s[i]: ith item of s, origin 0
    print "3rd item of l is:", l[2]

    # s[i:j]: slice of s from i to j
    print "slice of s:", s[2:-1]

    # len(s): length of s
    print "length of ba is", len(ba)

    # s.index(x): index of the first occurrence of x in s
    print l.index("of")

    # s.count(x): total number of occurrences of x in s
    print s.count("is")

def entry_point(argv):
    sequence()
    return 0

def target(*args): return entry_point
if __name__ == "__main__": import sys; entry_point(sys.argv)

Most operations for sequence types are supported in RPython, but some have restrictions. Such as slicing with negative indexes and min/max builtin methods.

################### UNSUPPORTED IN RPYTHON ######################

def sequence_unsupported():
    l = ["this", "is", "a", "list", "of", "strings"]

    print "slice of l from 3rd to end:", l[2:-1]
    # s[i:j:k]: slice of s from i to j with step k
    print "slice of l from 1st to 4th with 2 steps:", l[0:5:2]

    # min(s): smallest item of s
    list_number = [1, 2, 3]
    print "smallest item of s", min(list_number)

    # max(s): largest item of s
    print "largest item of s", max(list_number)

def entry_point(argv):
    sequence_unsupported()
    return 0

def target(*args): return entry_point
if __name__ == "__main__": import sys; entry_point(sys.argv)

string

Only a few string methods are supported in RPython. Besides the limited methods in RPython, there are also some restrictions of indexing, slicing, and string formating. Also, note that the type of items in a list should be same, and you cannot mix types in a RPython list. The following examples show some supported methods and usages in RPython.

# -*- coding: utf-8 -*-
def strings():
    s = "this is a string"

    print s.count("this", 0, len(s))

    if s.endswith("ing"): print "s ends with string"

    if s.find("is"): print "found is in s"

    s2 = "thisisastring2"
    if s2.isalnum(): print "s2 is alnum"

    s3 = "thisisastringthree"
    if s3.isalpha(): print "s3 is alpha"

    s4 = "12345"
    if s4.isdigit(): print "s4 is digit"

    l = ["this", "is", "a", "string"]
    print " ".join(l)

    print "THI IS A STRING".lower()
    print '   spacious   '.lstrip()
    print s.rfind("is")
    print s.rsplit(" ")
    print s.split(" ")

    s_lines = "This is a string.\nAnd another string."
    print s_lines.splitlines()

    if s.startswith("this"): print "s starts with this"

    print '   spacious   '.strip()
    print s.upper()

    print "%s, %d, %x, %o, %f" % ("string", 1, 16, 9, 3.14)

def entry_point(argv):
    strings()
    return 0

def target(*args): return entry_point
if __name__ == "__main__":
    import sys; entry_point(sys.argv)

Attention

  • Not all string methods are supported and those that are supported, not necesarilly accept all arguments.

  • Indexes can be negative. In case they are not, then you get slightly more efficient code if the translator can prove that they are non-negative. When slicing a string it is necessary to prove that the slice start and stop indexes are non-negative.

  • No implicit str-to-unicode cast.

  • Simple string formatting using the % operator works, as long as the format string is known at translation time; the only supported formatting specifiers are %s, %d, %x, %o, %f, plus %r but only for user-defined instances

  • Modifiers such as conversion flags, precision, length etc. are not supported.

  • It is forbidden to mix unicode and strings when formatting.

The following examples show some unsupported methods and usages in RPython, but can be used in a normal Python implementation.

# -*- coding: utf-8 -*-

################### UNSUPPORTED IN RPYTHON ######################
def strings_unsupported():
    s = "this is a string"

    print s.capitalize()
    print s.center(30)

    s_utf8 = "你好".decode("utf-8")
    print s_utf8

    print '01\t012\t0123\t01234'.expandtabs()

    print "The sum of 1 + 2 is {0}".format(1+2)
    print s.index("is")

    s5 = "this is lowercased string"
    if s5.islower(): print "s5 is lowercased"

    s6 = "         "
    if s6.isspace(): print "s6 contains only whitespaces"

    s7 = "This Is A Title String"
    if s7.istitle(): print "s7 is a title cased string"

    s8 = "THIS IS A SUPPER STRING"
    if s8.isupper(): print "s8 is all cased characters"

    print s.ljust(10)
    print s.partition(" ")
    print s.replace("this", "that")
    print s.rindex("is")
    print s.rjust(10)
    print s.rpartition(" ")
    print s.swapcase()
    print "they're bill's friends from the UK".title()
    print s.zfill(20)
    print "Hello, %(name)s" % {"name": "world"}

def entry_point(argv):
    strings_unsupported()
    return 0

def target(*args): return entry_point
if __name__ == "__main__":
    import sys; entry_point(sys.argv)
    strings_unsupported()

list

Most list operations and methods are supported in RPython. However, methods may have some restrictions. The following examples illustrate usages of the list in RPython and some unsupported operations as well.

def lists():
    l = [0, 1, 2, 3, 4]
    print l[3]
    print l[-1]

    # Indexes are checked when requested by an IndexError exception clause.
    try:
        print l[100]
    except IndexError:
        print "IndexError"

    print l[3:5]
    print l[0:-1]

    l[0:2] = [100, 101, 102]
    print l

    # other operators: +, +=, in, *, *=, ==, != work as expected.

    l = [0] + [1]    # [0, 1]
    l *= 2           # [0, 1, 0, 1]

    # append, index, insert, extend, reverse, pop. The index used in pop()
    # follows the same rules as for indexing above. The index used in insert()
    # must be within bounds and not negative.
    l = []
    l.append(0)          # [0]
    print l.index(0)
    l.insert(1, 1)       # [0, 1]
    l.extend([2, 3, 4])  # [0, 1, 2, 3, 4]
    l.reverse()          # [4, 3, 2, 1, 0]
    l.pop()              # [4, 3, 2, 1]
    del l[0:1]           # [3, 2, 1]
    l.remove(1)          # [3, 2]
    print l

def entry_point(argv):
    lists()
    return 0

def target(*args): return entry_point
if __name__ == "__main__":
    import sys; entry_point(sys.argv)

Attention

lists are used as an allocated array. Lists are over-allocated, so list.append() is reasonably fast. However, if you use a fixed-size list, the code is more efficient. Annotator can figure out most of the time that your list is fixed-size, even when you use list comprehension. Negative or out-of-bound indexes are only allowed for the most common operations, as follows:

  • indexing: positive and negative indexes are allowed. Indexes are checked when requested by an IndexError exception clause.

  • slicing: the slice start must be within bounds. The stop doesn’t need to, but it must not be smaller than the start. All negative indexes are disallowed, except for the [:-1] special case. No step. Slice deletion follows the same rules.

  • slice assignment: only supports lst[x:y] = sublist, if len(sublist) == y - x. In other words, slice assignment cannot change the total length of the list, but just replace items.

  • other operators: +, +=, in, *, *=, ==, != work as expected.

  • methods: append, index, insert, extend, reverse, pop. The index used in pop() follows the same rules as for indexing above. The index used in insert() must be within bounds and not negative.

################### UNSUPPORTED IN RPYTHON ######################

def lists_unsupported():
    l = [0, 1, 2, 3, 4]

    # In RPython, stop doesn't need to within bounds, but it must not be
    # smaller than the start
    print l[3:2]

    # In RPython, all negative indexes are disallowed, except for the [:-1]
    # special case.
    print l[0:-2]

    # No step in RPython
    print l[0:-1:2]

    # Slice assignment cannot change the total length of the list, but just
    # replace items
    l[0:2] = [100, 101, 102, 103]
    print l

    # The sort() and count() methods are not supported
    l.sort()
    print l.count(103)

    # Mixing types in a list is not supported in RPython
    l.append("mix integer with string types in a list")
    print l

def entry_point(argv):
    lists_unsupported()
    return 0

def target(*args): return entry_point
if __name__ == "__main__":
    import sys; entry_point(sys.argv)
    lists_unsupported()

tuple

Tuples in RPython are very different. There are many restrictions to use tuples.

def tuples():
    t = (1, 2)
    i, j = t    # "unpack" a tuple
    print t, i, j
    t = (1, "123") # mixing types in a tuple
    print t

    l = [1, 2]
    t = (l[0], l[1])    # manually convert from a list to a tuple
    print t


def tuples_unsupported():
    l = [1, "123"]   # mixing types in lists is not supported in RPython
    t = tuple(l)     # converting from list to tuple is unsupported
    print t

def entry_point(argv):
    tuples()
    return 0

def target(*args): return entry_point
if __name__ == "__main__":
    import sys; entry_point(sys.argv)
    tuples_unsupported()

The restrictions can be summarized as follows.

Attention

  • no variable-length tuples; use them to store or return pairs or n-tuples of values. Each combination of types for elements and length constitute a separate and not mixable type.

  • There is no general way to convert a list into a tuple, because the length of the result would not be known statically. (You can of course do t = (lst[0], lst[1], lst[2]) if you know that lst has got 3 items.)

xrange

The following examples illustrate the usage of range and xrange in RPython. range and xrange are identical. range does not necessarily create an array, only if the result is modified.

def rrange():
    # range and xrange are same in RPython
    for i in range(0, 10, 2):
        print i
    for i in xrange(0, 10, 3):
        print i

def entry_point(argv):
    rrange()
    return 0

def target(*args): return entry_point
if __name__ == "__main__": import sys; entry_point(sys.argv)