braceexpand.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. """
  2. Bash-style brace expansion
  3. Copied from: https://github.com/trendels/braceexpand/blob/main/src/braceexpand/__init__.py
  4. License: MIT
  5. """
  6. import re
  7. import string
  8. from itertools import chain, product
  9. from typing import Iterable, Iterator, Optional
  10. __all__ = ["braceexpand", "alphabet", "UnbalancedBracesError"]
  11. class UnbalancedBracesError(ValueError):
  12. pass
  13. alphabet = string.ascii_uppercase + string.ascii_lowercase
  14. int_range_re = re.compile(r"^(-?\d+)\.\.(-?\d+)(?:\.\.-?(\d+))?$")
  15. char_range_re = re.compile(r"^([A-Za-z])\.\.([A-Za-z])(?:\.\.-?(\d+))?$")
  16. escape_re = re.compile(r"\\(.)")
  17. def braceexpand(pattern: str, escape: bool = True) -> Iterator[str]:
  18. """braceexpand(pattern) -> iterator over generated strings
  19. Returns an iterator over the strings resulting from brace expansion
  20. of pattern. This function implements Brace Expansion as described in
  21. bash(1), with the following limitations:
  22. * A pattern containing unbalanced braces will raise an
  23. UnbalancedBracesError exception. In bash, unbalanced braces will either
  24. be partly expanded or ignored.
  25. * A mixed-case character range like '{Z..a}' or '{a..Z}' will not
  26. include the characters '[]^_`' between 'Z' and 'a'.
  27. When escape is True (the default), characters in pattern can be
  28. prefixed with a backslash to cause them not to be interpreted as
  29. special characters for brace expansion (such as '{', '}', ',').
  30. To pass through a a literal backslash, double it ('\\\\').
  31. When escape is False, backslashes in pattern have no special
  32. meaning and will be preserved in the output.
  33. Examples:
  34. >>> from braceexpand import braceexpand
  35. # Integer range
  36. >>> list(braceexpand('item{1..3}'))
  37. ['item1', 'item2', 'item3']
  38. # Character range
  39. >>> list(braceexpand('{a..c}'))
  40. ['a', 'b', 'c']
  41. # Sequence
  42. >>> list(braceexpand('index.html{,.backup}'))
  43. ['index.html', 'index.html.backup']
  44. # Nested patterns
  45. >>> list(braceexpand('python{2.{5..7},3.{2,3}}'))
  46. ['python2.5', 'python2.6', 'python2.7', 'python3.2', 'python3.3']
  47. # Prefixing an integer with zero causes all numbers to be padded to
  48. # the same width.
  49. >>> list(braceexpand('{07..10}'))
  50. ['07', '08', '09', '10']
  51. # An optional increment can be specified for ranges.
  52. >>> list(braceexpand('{a..g..2}'))
  53. ['a', 'c', 'e', 'g']
  54. # Ranges can go in both directions.
  55. >>> list(braceexpand('{4..1}'))
  56. ['4', '3', '2', '1']
  57. # Numbers can be negative
  58. >>> list(braceexpand('{2..-1}'))
  59. ['2', '1', '0', '-1']
  60. # Unbalanced braces raise an exception.
  61. >>> list(braceexpand('{1{2,3}'))
  62. Traceback (most recent call last):
  63. ...
  64. UnbalancedBracesError: Unbalanced braces: '{1{2,3}'
  65. # By default, the backslash is the escape character.
  66. >>> list(braceexpand(r'{1\\{2,3}'))
  67. ['1{2', '3']
  68. # Setting 'escape' to False disables backslash escaping.
  69. >>> list(braceexpand(r'\\{1,2}', escape=False))
  70. ['\\\\1', '\\\\2']
  71. """
  72. return (
  73. escape_re.sub(r"\1", s) if escape else s for s in parse_pattern(pattern, escape)
  74. )
  75. def parse_pattern(pattern: str, escape: bool) -> Iterator[str]:
  76. start = 0
  77. pos = 0
  78. bracketdepth = 0
  79. items: list[Iterable[str]] = []
  80. # print 'pattern:', pattern
  81. while pos < len(pattern):
  82. if escape and pattern[pos] == "\\":
  83. pos += 2
  84. continue
  85. elif pattern[pos] == "{":
  86. if bracketdepth == 0 and pos > start:
  87. # print 'literal:', pattern[start:pos]
  88. items.append([pattern[start:pos]])
  89. start = pos
  90. bracketdepth += 1
  91. elif pattern[pos] == "}":
  92. bracketdepth -= 1
  93. if bracketdepth == 0:
  94. # print 'expression:', pattern[start+1:pos]
  95. expr = pattern[start + 1 : pos]
  96. item = parse_expression(expr, escape)
  97. if item is None: # not a range or sequence
  98. items.extend([["{"], parse_pattern(expr, escape), ["}"]])
  99. else:
  100. items.append(item)
  101. start = pos + 1 # skip the closing brace
  102. pos += 1
  103. if bracketdepth != 0: # unbalanced braces
  104. raise UnbalancedBracesError("Unbalanced braces: '%s'" % pattern)
  105. if start < pos:
  106. items.append([pattern[start:]])
  107. return ("".join(item) for item in product(*items))
  108. def parse_expression(expr: str, escape: bool) -> Optional[Iterable[str]]:
  109. int_range_match = int_range_re.match(expr)
  110. if int_range_match:
  111. return make_int_range(*int_range_match.groups())
  112. char_range_match = char_range_re.match(expr)
  113. if char_range_match:
  114. return make_char_range(*char_range_match.groups())
  115. return parse_sequence(expr, escape)
  116. def parse_sequence(seq: str, escape: bool) -> Optional[Iterator[str]]:
  117. # sequence -> chain(*sequence_items)
  118. start = 0
  119. pos = 0
  120. bracketdepth = 0
  121. items: list[Iterable[str]] = []
  122. # print 'sequence:', seq
  123. while pos < len(seq):
  124. if escape and seq[pos] == "\\":
  125. pos += 2
  126. continue
  127. elif seq[pos] == "{":
  128. bracketdepth += 1
  129. elif seq[pos] == "}":
  130. bracketdepth -= 1
  131. elif seq[pos] == "," and bracketdepth == 0:
  132. items.append(parse_pattern(seq[start:pos], escape))
  133. start = pos + 1 # skip the comma
  134. pos += 1
  135. if bracketdepth != 0:
  136. raise UnbalancedBracesError
  137. if not items:
  138. return None
  139. # part after the last comma (may be the empty string)
  140. items.append(parse_pattern(seq[start:], escape))
  141. return chain(*items)
  142. def make_int_range(left: str, right: str, incr: Optional[str] = None) -> Iterator[str]:
  143. if any([s.startswith(("0", "-0")) for s in (left, right) if s not in ("0", "-0")]):
  144. padding = max(len(left), len(right))
  145. else:
  146. padding = 0
  147. step = (int(incr) or 1) if incr else 1
  148. start = int(left)
  149. end = int(right)
  150. r = range(start, end + 1, step) if start < end else range(start, end - 1, -step)
  151. fmt = "%0{}d".format(padding)
  152. return (fmt % i for i in r)
  153. def make_char_range(left: str, right: str, incr: Optional[str] = None) -> str:
  154. step = (int(incr) or 1) if incr else 1
  155. start = alphabet.index(left)
  156. end = alphabet.index(right)
  157. if start < end:
  158. return alphabet[start : end + 1 : step]
  159. else:
  160. end = end or -len(alphabet)
  161. return alphabet[start : end - 1 : -step]
  162. if __name__ == "__main__":
  163. import doctest
  164. import sys
  165. failed, _ = doctest.testmod(optionflags=doctest.IGNORE_EXCEPTION_DETAIL)
  166. if failed:
  167. sys.exit(1)