| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- """
- Bash-style brace expansion
- Copied from: https://github.com/trendels/braceexpand/blob/main/src/braceexpand/__init__.py
- License: MIT
- """
- import re
- import string
- from itertools import chain, product
- from typing import Iterable, Iterator, Optional
- __all__ = ["braceexpand", "alphabet", "UnbalancedBracesError"]
- class UnbalancedBracesError(ValueError):
- pass
- alphabet = string.ascii_uppercase + string.ascii_lowercase
- int_range_re = re.compile(r"^(-?\d+)\.\.(-?\d+)(?:\.\.-?(\d+))?$")
- char_range_re = re.compile(r"^([A-Za-z])\.\.([A-Za-z])(?:\.\.-?(\d+))?$")
- escape_re = re.compile(r"\\(.)")
- def braceexpand(pattern: str, escape: bool = True) -> Iterator[str]:
- """braceexpand(pattern) -> iterator over generated strings
- Returns an iterator over the strings resulting from brace expansion
- of pattern. This function implements Brace Expansion as described in
- bash(1), with the following limitations:
- * A pattern containing unbalanced braces will raise an
- UnbalancedBracesError exception. In bash, unbalanced braces will either
- be partly expanded or ignored.
- * A mixed-case character range like '{Z..a}' or '{a..Z}' will not
- include the characters '[]^_`' between 'Z' and 'a'.
- When escape is True (the default), characters in pattern can be
- prefixed with a backslash to cause them not to be interpreted as
- special characters for brace expansion (such as '{', '}', ',').
- To pass through a a literal backslash, double it ('\\\\').
- When escape is False, backslashes in pattern have no special
- meaning and will be preserved in the output.
- Examples:
- >>> from braceexpand import braceexpand
- # Integer range
- >>> list(braceexpand('item{1..3}'))
- ['item1', 'item2', 'item3']
- # Character range
- >>> list(braceexpand('{a..c}'))
- ['a', 'b', 'c']
- # Sequence
- >>> list(braceexpand('index.html{,.backup}'))
- ['index.html', 'index.html.backup']
- # Nested patterns
- >>> list(braceexpand('python{2.{5..7},3.{2,3}}'))
- ['python2.5', 'python2.6', 'python2.7', 'python3.2', 'python3.3']
- # Prefixing an integer with zero causes all numbers to be padded to
- # the same width.
- >>> list(braceexpand('{07..10}'))
- ['07', '08', '09', '10']
- # An optional increment can be specified for ranges.
- >>> list(braceexpand('{a..g..2}'))
- ['a', 'c', 'e', 'g']
- # Ranges can go in both directions.
- >>> list(braceexpand('{4..1}'))
- ['4', '3', '2', '1']
- # Numbers can be negative
- >>> list(braceexpand('{2..-1}'))
- ['2', '1', '0', '-1']
- # Unbalanced braces raise an exception.
- >>> list(braceexpand('{1{2,3}'))
- Traceback (most recent call last):
- ...
- UnbalancedBracesError: Unbalanced braces: '{1{2,3}'
- # By default, the backslash is the escape character.
- >>> list(braceexpand(r'{1\\{2,3}'))
- ['1{2', '3']
- # Setting 'escape' to False disables backslash escaping.
- >>> list(braceexpand(r'\\{1,2}', escape=False))
- ['\\\\1', '\\\\2']
- """
- return (
- escape_re.sub(r"\1", s) if escape else s for s in parse_pattern(pattern, escape)
- )
- def parse_pattern(pattern: str, escape: bool) -> Iterator[str]:
- start = 0
- pos = 0
- bracketdepth = 0
- items: list[Iterable[str]] = []
- # print 'pattern:', pattern
- while pos < len(pattern):
- if escape and pattern[pos] == "\\":
- pos += 2
- continue
- elif pattern[pos] == "{":
- if bracketdepth == 0 and pos > start:
- # print 'literal:', pattern[start:pos]
- items.append([pattern[start:pos]])
- start = pos
- bracketdepth += 1
- elif pattern[pos] == "}":
- bracketdepth -= 1
- if bracketdepth == 0:
- # print 'expression:', pattern[start+1:pos]
- expr = pattern[start + 1 : pos]
- item = parse_expression(expr, escape)
- if item is None: # not a range or sequence
- items.extend([["{"], parse_pattern(expr, escape), ["}"]])
- else:
- items.append(item)
- start = pos + 1 # skip the closing brace
- pos += 1
- if bracketdepth != 0: # unbalanced braces
- raise UnbalancedBracesError("Unbalanced braces: '%s'" % pattern)
- if start < pos:
- items.append([pattern[start:]])
- return ("".join(item) for item in product(*items))
- def parse_expression(expr: str, escape: bool) -> Optional[Iterable[str]]:
- int_range_match = int_range_re.match(expr)
- if int_range_match:
- return make_int_range(*int_range_match.groups())
- char_range_match = char_range_re.match(expr)
- if char_range_match:
- return make_char_range(*char_range_match.groups())
- return parse_sequence(expr, escape)
- def parse_sequence(seq: str, escape: bool) -> Optional[Iterator[str]]:
- # sequence -> chain(*sequence_items)
- start = 0
- pos = 0
- bracketdepth = 0
- items: list[Iterable[str]] = []
- # print 'sequence:', seq
- while pos < len(seq):
- if escape and seq[pos] == "\\":
- pos += 2
- continue
- elif seq[pos] == "{":
- bracketdepth += 1
- elif seq[pos] == "}":
- bracketdepth -= 1
- elif seq[pos] == "," and bracketdepth == 0:
- items.append(parse_pattern(seq[start:pos], escape))
- start = pos + 1 # skip the comma
- pos += 1
- if bracketdepth != 0:
- raise UnbalancedBracesError
- if not items:
- return None
- # part after the last comma (may be the empty string)
- items.append(parse_pattern(seq[start:], escape))
- return chain(*items)
- def make_int_range(left: str, right: str, incr: Optional[str] = None) -> Iterator[str]:
- if any([s.startswith(("0", "-0")) for s in (left, right) if s not in ("0", "-0")]):
- padding = max(len(left), len(right))
- else:
- padding = 0
- step = (int(incr) or 1) if incr else 1
- start = int(left)
- end = int(right)
- r = range(start, end + 1, step) if start < end else range(start, end - 1, -step)
- fmt = "%0{}d".format(padding)
- return (fmt % i for i in r)
- def make_char_range(left: str, right: str, incr: Optional[str] = None) -> str:
- step = (int(incr) or 1) if incr else 1
- start = alphabet.index(left)
- end = alphabet.index(right)
- if start < end:
- return alphabet[start : end + 1 : step]
- else:
- end = end or -len(alphabet)
- return alphabet[start : end - 1 : -step]
- if __name__ == "__main__":
- import doctest
- import sys
- failed, _ = doctest.testmod(optionflags=doctest.IGNORE_EXCEPTION_DETAIL)
- if failed:
- sys.exit(1)
|