kwutil.util_pattern module¶
An encapsulation of regex and glob (and maybe other) patterns.
Note
This implementation is maintained in kwutil and xdev. These versions should be kept in sync.
- See:
~/code/kwutil/kwutil/util_pattern.py ~/code/xdev/xdev/patterns.py
Todo
rectify with xdev / whatever package this goes in
- class kwutil.util_pattern.PatternBase[source]¶
Bases:
objectAbstract class that defines the Pattern api
- kwutil.util_pattern._maybe_expandable_glob(pat)[source]¶
Determine if a string might be a expandable glob pattern by looking for special glob characters: *, ? and [].
Note
! is also special, but always inside of a [] bracket, so we dont need to check it.
- Returns:
- if False then the input is 100% not an expandable glob pattern
(although it could still be a glob pattern, but it is equivalent to strict matching). if True, then there are special glob characters in the string, but it is not guaranteed to be a valid glob pattern.
- Return type:
- class kwutil.util_pattern.Pattern(pattern, backend)[source]¶
Bases:
PatternBase,NiceReprProvides a common API to several common pattern matching syntaxes.
A general patterns class, which can use a backend from BACKENDS
- Parameters:
pattern (str | object) – The pattern text or a precompiled backend pattern object
backend (str) – Code indicating what backend the pattern text should be interpreted with. See BACKENDS for available choices.
Notes
# BACKENDS
The glob backend uses the
fnmatchmodule [fnmatch_docs]. The regex backend uses the Pythonremodule. The strict backend uses the “==” string equality testing. The parse backend uses theparsemodule.References
Example
>>> # The most flexible way to define a pattern is using the >>> # coerce method with a prefixed pattern string. >>> import kwutil
>>> # Glob pattern: matches filenames ending in .jpg >>> pat = kwutil.Pattern.coerce('glob:*.jpg') >>> assert pat.match('image.jpg') >>> assert not pat.match('image.png')
>>> # Regex pattern: similar logic using regular expressions >>> pat = kwutil.Pattern.coerce(r'regex:.*\.jpg') >>> assert pat.match('photo.jpg') >>> assert not pat.match('photo.jpeg')
>>> # Strict pattern: exact string match >>> pat = kwutil.Pattern.coerce('strict:hello.jpg') >>> assert pat.match('hello.jpg') >>> assert not pat.match('hello2.jpg')
>>> # Parse pattern: extract named groups from string >>> # xdoctest: +REQUIRES(module:parse) >>> pat = kwutil.Pattern.coerce('parse:{name}.jpg') >>> assert pat.match('cat.jpg').named == {'name': 'cat'} >>> assert pat.match('cat.png') is None
Example
>>> # But you can also explicitly define the backend with a hint. >>> # Test Regex backend >>> repat = Pattern.coerce('foo.*', 'regex') >>> assert repat.match('foobar') >>> assert not repat.match('barfoo') >>> match = repat.search('baz-biz-foobar') >>> match = repat.match('baz-biz-foobar') >>> # Test Glob backend >>> globpat = Pattern.coerce('foo*', 'glob') >>> assert globpat.match('foobar') >>> assert not globpat.match('barfoo') >>> globpat = Pattern.coerce('[foo|bar]', 'glob') >>> assert not globpat.match('foo')
Example
>>> # xdoctest: +REQUIRES(module:parse) >>> # Test parse backend >>> pattern1 = Pattern.coerce('A {adjective} pattern', 'parse') >>> result1 = pattern1.match('A cool pattern') >>> print(f'result1.named = {ub.urepr(result1.named, nl=1)}') >>> pattern2 = pattern1.to_regex() >>> result2 = pattern2.match('A cool pattern')
- to_regex()[source]¶
Returns an equivalent pattern with the regular expression backend
- Returns:
Pattern
Example
>>> globpat = Pattern.coerce('foo*', 'glob') >>> strictpat = Pattern.coerce('foo*', 'strict') >>> repat1 = strictpat.to_regex() >>> repat2 = globpat.to_regex() >>> print(f'repat1={repat1}') >>> print(f'repat2={repat2}')
- classmethod from_regex(data, flags=0, multiline=False, dotall=False, ignorecase=False)[source]¶
Create a Pattern object with a regex backend.
- _prefix_mappings = {'exact:': 'strict', 'glob:': 'glob', 'parse:': 'parse', 'regex:': 'regex', 'strict:': 'strict'}¶
- classmethod coerce_backend(data, hint='auto')[source]¶
Example
>>> assert Pattern.coerce_backend('foo', hint='auto')[1] == 'strict' >>> assert Pattern.coerce_backend('foo*', hint='auto')[1] == 'glob' >>> assert Pattern.coerce_backend(re.compile('foo*'), hint='auto')[1] == 'regex'
- classmethod coerce(data, hint='auto')[source]¶
Attempt to automatically interpret the input data with the appropriate pattern backend. If it cannot be determined, then fallback to the hint.
- Parameters:
data (str | Pattern | PathLike) – an input string or existing object
hint (str) – can be ‘glob’, ‘regex’, ‘strict’ or ‘auto’. In ‘auto’ we will use ‘glob’ if the input is a string and ‘*’ is in the pattern, otherwise we will use strict. Pattern inputs keep their existing interpretation.
Example
>>> import kwutil >>> # Coerce assumes glob if there is a star >>> pat = kwutil.Pattern.coerce('foo*') >>> bool(pat.match('foobar')) True >>> # Otherwise it is a strict match >>> pat = kwutil.Pattern.coerce('foo') >>> bool(pat.match('foobar')) False
Example
>>> # xdoctest: +REQUIRES(module:parse) >>> import kwutil >>> # The hint can explicitly specify the backend to use >>> pat1 = kwutil.Pattern.coerce('foo.*', 'glob') >>> pat2 = kwutil.Pattern.coerce('foo.*', 'regex') >>> pat3 = kwutil.Pattern.coerce('foo.{}*', 'parse') >>> inputs = ['spam', 'foobar', 'foo.bar', 'foo.bar*'] >>> print([bool(pat1.match(x)) for x in inputs]) >>> print([bool(pat2.match(x)) for x in inputs]) >>> print([bool(pat3.match(x)) for x in inputs]) [False, False, True, True] [False, True, True, True] [False, False, False, True]
Example
>>> # The hint can explicitly specify the backend to use >>> import kwutil >>> pat = kwutil.Pattern.coerce('foo*', 'glob') >>> # A hint is ignored if the input data is not a string >>> pat2 = kwutil.Pattern.coerce(pat, 'regex') >>> assert pat2.backend == 'glob'
Example
>>> from kwutil.util_pattern import * # NOQA >>> assert Pattern.coerce('glob:*.jpg').backend == 'glob' >>> assert Pattern.coerce('regex:.*\.jpg').backend == 'regex' >>> assert Pattern.coerce('exact:hello.jpg').backend == 'strict' >>> assert Pattern.coerce('strict:hello.jpg').backend == 'strict' >>> assert Pattern.coerce('hello*.jpg').backend == 'glob' >>> assert Pattern.coerce('hello.jpg').backend == 'strict' >>> assert Pattern.coerce('nopat:data').backend == 'strict' >>> assert Pattern.coerce('foo').backend == 'strict' >>> assert Pattern.coerce('foo*').backend == 'glob' >>> assert Pattern.coerce(re.compile('foo*')) .backend == 'regex' >>> # xdoctest: +REQUIRES(module:parse) >>> assert Pattern.coerce('parse:hello.jpg').backend == 'parse'
- class kwutil.util_pattern.MultiPattern(patterns, predicate)[source]¶
Bases:
PatternBase,NiceReprGroups multiple patterns together with an “any” or “all” predicate.
Note
We may remove the idea of a predicate in the future and just use behavior that currently corresponds to the “any” predicate.
Example
>>> import kwutil >>> pat = kwutil.MultiPattern.coerce(['aaa*', 'bbb']) >>> assert not pat.match('aabb') >>> assert pat.match('aaabb') >>> assert pat.match('bbb') >>> assert not pat.match('bbbaaa')
Example
>>> dpath = ub.Path.appdir('xdev/tests/multipattern_paths').ensuredir().delete().ensuredir() >>> (dpath / 'file0.txt').touch() >>> (dpath / 'data0.dat').touch() >>> (dpath / 'other0.txt').touch() >>> ((dpath / 'dir1').ensuredir() / 'file1.txt').touch() >>> ((dpath / 'dir2').ensuredir() / 'file2.txt').touch() >>> ((dpath / 'dir2').ensuredir() / 'file3.txt').touch() >>> ((dpath / 'dir1').ensuredir() / 'data.dat').touch() >>> ((dpath / 'dir2').ensuredir() / 'data.dat').touch() >>> ((dpath / 'dir2').ensuredir() / 'data.dat').touch() >>> pat = MultiPattern.coerce(['*.txt'], 'glob') >>> print(list(pat.paths(cwd=dpath))) >>> pat = MultiPattern.coerce(['*0*', '**/*.txt'], 'glob') >>> print(list(pat.paths(cwd=dpath, recursive=1))) >>> pat = MultiPattern.coerce(['*.txt', '**/*.txt', '**/*.dat'], 'glob') >>> print(list(pat.paths(cwd=dpath)))
- match(text)[source]¶
Check if a string matches this multipattern
- Parameters:
text (str) – text to check matches against
Example
>>> # xdoctest: +REQUIRES(module:parse) >>> import kwutil >>> self = kwutil.MultiPattern.coerce([ >>> kwutil.Pattern.coerce('{key1}={val1},{key2}={val2}', hint='parse') >>> ]) >>> text = 'aaa=bbb,ccc=ddd' >>> result = self.match(text) >>> assert result >>> assert result.named['val1'] == 'bbb'
- classmethod coerce(data, hint='auto', predicate='any')[source]¶
- Parameters:
data (str | List | Pattern | PathLike | MultiPattern)
hint (str) – can be ‘glob’, ‘regex’, ‘strict’ or ‘auto’. In ‘auto’ we will use ‘glob’ if the input is a string and ‘*’ is in the pattern, otherwise we will use strict. Pattern inputs keep their existing interpretation.
- Returns:
MultiPattern
Example
>>> from kwutil.util_pattern import * # NOQA >>> pat = MultiPattern.coerce('foo*', 'glob') >>> pat2 = MultiPattern.coerce(pat, 'regex') >>> pat3 = MultiPattern.coerce([pat, pat], 'regex') >>> pat4 = MultiPattern.coerce([ub.Path('bar*'), pat], 'regex') >>> print('pat = {}'.format(ub.urepr(pat, nl=1))) >>> print('pat2 = {}'.format(ub.urepr(pat2, nl=1))) >>> print('pat3 = {!r}'.format(pat3)) >>> print('pat4 = {!r}'.format(pat4))
>>> pat00 = MultiPattern.coerce('foo', 'glob') >>> pat01 = MultiPattern.coerce('foo*', 'glob') >>> pat02 = MultiPattern.coerce('foo*', 'regex') >>> pat5 = MultiPattern.coerce(['foo', 'foo*', pat, pat00, pat01, pat02]) >>> print(f'pat5={pat5}')
Example
>>> # Test all acceptable input types >>> from kwutil.util_pattern import * # NOQA >>> import itertools as it >>> str_pat = 'pattern*' >>> scalar_inputs = { >>> 'str': str_pat, >>> 'path': ub.Path(str_pat), >>> 'pat': Pattern.coerce(str_pat), >>> 'mpat': MultiPattern.coerce(str_pat) >>> } >>> # Test scalar input types >>> scalar_outputs = {} >>> for k, v in scalar_inputs.items(): >>> scalar_outputs[k] = MultiPattern.coerce(v) >>> print('scalar_outputs = {}'.format(ub.urepr(scalar_outputs, nl=1))) >>> # >>> # Test iterable input types >>> multi_outputs = [] >>> for v in it.combinations(scalar_inputs.values(), 2): >>> multi_outputs.append(MultiPattern.coerce(v)) >>> for v in it.combinations(scalar_inputs.values(), 3): >>> multi_outputs.append(MultiPattern.coerce(v)) >>> # Higher order nesting test >>> higher_order_output = MultiPattern.coerce(multi_outputs) >>> print('higher_order_output = {}'.format(ub.urepr(higher_order_output, nl=1)))