"""
Wrappers around :mod:`pyyaml` or :mod:`ruamel.yaml`.
The important functions to know are:
* :func:`Yaml.loads`
* :func:`Yaml.dumps`
* :func:`Yaml.coerce`
Loads and Dumps are strightforward. Loads takes a block of text and passes it
through the ruamel.yaml or pyyaml to parse the string. Dumps takes a data
structure and turns it into a YAML string. Roundtripping is supported with the
ruamel.yaml backend.
Coerce will accept input as a non-string data structure, and simply return it,
a path to a file, or a string which it assumes is YAML text (note: there is a
small ambiguity introduced here). If coerce encounters a string that looks like
an existing path it reads it. This does not happen by default in longer YAML
text inputs, but the parser does respect a !include constructor, which does let
you make nested configs by pointing to other configs.
"""
import io
import os
import ubelt as ub
NEW_RUAMEL = 1
[docs]
class _YamlRepresenter:
[docs]
@staticmethod
def str_presenter(dumper, data):
# https://stackoverflow.com/questions/8640959/how-can-i-control-what-scalar-form-pyyaml-uses-for-my-data
if len(data.splitlines()) > 1 or '\n' in data:
text_list = [line.rstrip() for line in data.splitlines()]
fixed_data = '\n'.join(text_list)
return dumper.represent_scalar('tag:yaml.org,2002:str', fixed_data, style='|')
return dumper.represent_scalar('tag:yaml.org,2002:str', data)
[docs]
@ub.memoize
def _custom_ruaml_loader():
"""
old method
References:
https://stackoverflow.com/questions/59635900/ruamel-yaml-custom-commentedmapping-for-custom-tags
https://stackoverflow.com/questions/528281/how-can-i-include-a-yaml-file-inside-another
https://stackoverflow.com/questions/76870413/using-a-custom-loader-with-ruamel-yaml-0-15-0
"""
import ruamel.yaml
Loader = ruamel.yaml.RoundTripLoader
def _construct_include_tag(self, node):
# print(f'node={node}')
if isinstance(node.value, list):
return [Yaml.coerce(v.value) for v in node.value]
else:
external_fpath = ub.Path(node.value)
if not external_fpath.exists():
raise IOError(f'Included external yaml file {external_fpath} '
'does not exist')
return Yaml.load(node.value)
Loader.add_constructor("!include", _construct_include_tag)
return Loader
[docs]
@ub.memoize
def _custom_ruaml_dumper():
"""
References:
https://stackoverflow.com/questions/59635900/ruamel-yaml-custom-commentedmapping-for-custom-tags
"""
import ruamel.yaml
Dumper = ruamel.yaml.RoundTripDumper
Dumper.add_representer(str, _YamlRepresenter.str_presenter)
Dumper.add_representer(ub.udict, Dumper.represent_dict)
if 0:
# Should we try to handle other types of dictionaries?
import omegaconf
Dumper.add_representer(omegaconf.DictConfig, Dumper.represent_dict)
return Dumper
[docs]
@ub.memoize
def _custom_pyaml_dumper():
import yaml
class Dumper(yaml.Dumper):
pass
# dumper = yaml.dumper.Dumper
# dumper = yaml.SafeDumper(sort_keys=False)
# yaml.dump(data, s, Dumper=yaml.SafeDumper, sort_keys=False, width=float("inf"))
# yaml.dump(data, s, sort_keys=False)
Dumper.add_representer(str, _YamlRepresenter.str_presenter)
Dumper.add_representer(ub.udict, Dumper.represent_dict)
if 0:
# Should we try to handle other types of dictionaries?
import omegaconf
Dumper.add_representer(omegaconf.DictConfig, Dumper.represent_dict)
return Dumper
# @ub.memoize
[docs]
def _custom_new_ruaml_yaml_obj(version=None):
"""
new method
Args:
version (None | Tuple[int, int]): if specified, force a specific YAML version (e.g. (1, 1))
References:
https://stackoverflow.com/questions/59635900/ruamel-yaml-custom-commentedmapping-for-custom-tags
https://stackoverflow.com/questions/528281/how-can-i-include-a-yaml-file-inside-another
https://stackoverflow.com/questions/76870413/using-a-custom-loader-with-ruamel-yaml-0-15-0
CommandLine:
xdoctest -m kwutil.util_yaml _custom_new_ruaml_yaml_obj
Example:
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> from kwutil.util_yaml import * # NOQA
>>> from kwutil.util_yaml import _custom_ruaml_loader, _custom_new_ruaml_yaml_obj, _custom_ruaml_dumper, _custom_pyaml_dumper
>>> from kwutil.util_yaml import _YamlRepresenter
>>> # Test new load
>>> import io
>>> file = io.StringIO('[a, b, c]')
>>> yaml_obj = _custom_new_ruaml_yaml_obj()
>>> data = yaml_obj.load(file)
>>> print(data)
>>> # Test round trip tump
>>> file = io.StringIO()
>>> yaml_obj.dump(data, file)
>>> print(file.getvalue())
>>> #
>>> # Test new dump
>>> data2 = ub.udict(a=1, b=2)
>>> file = io.StringIO()
>>> yaml_obj.dump(data2, file)
>>> print(file.getvalue())
Example:
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> from kwutil.util_yaml import * # NOQA
>>> from kwutil.util_yaml import _custom_ruaml_loader, _custom_new_ruaml_yaml_obj, _custom_ruaml_dumper, _custom_pyaml_dumper
>>> from kwutil.util_yaml import _YamlRepresenter
>>> # Test handling of different YAML versions
>>> yaml_obj = _custom_new_ruaml_yaml_obj(version='1.1')
>>> print(yaml_obj.load(io.StringIO('on')))
True
>>> file = io.StringIO()
>>> yaml_obj.dump('on', file)
>>> print(file.getvalue())
%YAML 1.1
--- 'on'
>>> yaml_obj = _custom_new_ruaml_yaml_obj(version='1.2')
>>> print(yaml_obj.load(io.StringIO('on')))
False
>>> file = io.StringIO()
>>> yaml_obj.dump('on', file)
>>> print(file.getvalue())
%YAML 1.2
--- on
"""
import ruamel.yaml
from collections import Counter, OrderedDict, defaultdict
# make a new instance, although you could get the YAML
# instance from the constructor argument
class CustomConstructor(ruamel.yaml.constructor.RoundTripConstructor):
...
class CustomRepresenter(ruamel.yaml.representer.RoundTripRepresenter):
...
CustomRepresenter.add_representer(str, _YamlRepresenter.str_presenter)
CustomRepresenter.add_representer(ub.udict, CustomRepresenter.represent_dict)
CustomRepresenter.add_representer(Counter, CustomRepresenter.represent_dict)
CustomRepresenter.add_representer(OrderedDict, CustomRepresenter.represent_dict)
CustomRepresenter.add_representer(defaultdict, CustomRepresenter.represent_dict)
def _construct_include_tag(self, node):
print(f'node={node}')
value = node.value
print(f'value={value}')
if isinstance(value, list):
return [Yaml.coerce(v.value) for v in value]
else:
external_fpath = ub.Path(value)
if not external_fpath.exists():
raise IOError(f'Included external yaml file {external_fpath} '
'does not exist')
# Not sure why we can't recurse here...
# yaml_obj
# print(f'yaml_obj={yaml_obj}')
# import xdev
# xdev.embed()
return Yaml.load(value)
# Loader = ruamel.yaml.RoundTripLoader
# Loader.add_constructor("!include", _construct_include_tag)
CustomConstructor.add_constructor('!include', _construct_include_tag)
# yaml_obj = ruamel.yaml.YAML(typ='unsafe', pure=True)
yaml_obj = ruamel.yaml.YAML()
if version is not None:
if isinstance(version, str):
version = tuple(map(int, version.split('.')))
yaml_obj.version = version
yaml_obj.Constructor = CustomConstructor
yaml_obj.Representer = CustomRepresenter
yaml_obj.preserve_quotes = True
yaml_obj.width = float('inf')
return yaml_obj
[docs]
class Yaml:
"""
Namespace for yaml functions
Example:
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> from kwutil.util_yaml import Yaml
>>> import ubelt as ub
>>> data = {
>>> 'a': 'hello world',
>>> 'b': ub.udict({'a': 3})
>>> }
>>> text1 = Yaml.dumps(data, backend='ruamel')
>>> # Coerce is idempotent and resolves the input to nested Python
>>> # structures.
>>> resolved1 = Yaml.coerce(data)
>>> resolved2 = Yaml.coerce(text1)
>>> resolved3 = Yaml.coerce(resolved2)
>>> assert resolved1 == resolved2 == resolved3 == data
>>> # with ruamel
>>> data2 = Yaml.loads(text1)
>>> assert data2 == data
>>> # with pyyaml
>>> data2 = Yaml.loads(text1, backend='pyyaml')
>>> assert data2 == data
"""
[docs]
@staticmethod
def dumps(data, backend='ruamel', version=None):
"""
Dump yaml to a string representation
(and account for some of our use-cases)
Args:
data (Any): yaml representable data
backend (str): either ruamel or pyyaml
version (str): version of YAML spec to use. (e.g. '1.1')
Returns:
str: yaml text
Example:
>>> # xdoctest: +REQUIRES(module:pyyaml)
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> import ubelt as ub
>>> data = {
>>> 'a': 'hello world',
>>> 'b': ub.udict({'a': 3})
>>> }
>>> text2 = Yaml.dumps(data, backend='pyyaml')
>>> print(text2)
>>> text1 = Yaml.dumps(data, backend='ruamel')
>>> print(text1)
>>> assert text1 == text2
>>> print(Yaml.dumps({'key': 'on'}, backend='ruamel', version='1.1'))
%YAML 1.1
---
key: 'on'
"""
file = io.StringIO()
if backend == 'ruamel':
if NEW_RUAMEL:
yaml_obj = _custom_new_ruaml_yaml_obj(version=version)
yaml_obj.dump(data, file)
else:
import ruamel.yaml
Dumper = _custom_ruaml_dumper()
ruamel.yaml.round_trip_dump(data, file, Dumper=Dumper, width=float("inf"))
elif backend == 'pyyaml':
if version is not None:
raise NotImplementedError('pyyaml does not support version yet, use ruamel backend')
import yaml
Dumper = _custom_pyaml_dumper()
yaml.dump(data, file, Dumper=Dumper, sort_keys=False, width=float("inf"))
else:
raise KeyError(backend)
text = file.getvalue()
return text
[docs]
@staticmethod
def load(file, backend='ruamel', version=None):
"""
Load yaml from a file
Args:
file (io.TextIOBase | PathLike | str): yaml file path or file object
backend (str): either ruamel or pyyaml
Returns:
object
Example:
>>> # xdoctest: +REQUIRES(module:pyyaml)
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> from kwutil.util_yaml import Yaml
>>> import ubelt as ub
>>> data = {
>>> 'a': 'hello world',
>>> 'b': ub.udict({'a': 3})
>>> }
>>> text1 = Yaml.dumps(data, backend='ruamel')
>>> import io
>>> # with ruamel
>>> file = io.StringIO(text1)
>>> data2 = Yaml.load(file)
>>> assert data2 == data
>>> # with pyyaml
>>> file = io.StringIO(text1)
>>> data2 = Yaml.load(file, backend='pyyaml')
>>> assert data2 == data
"""
if isinstance(file, (str, os.PathLike)):
fpath = file
with open(fpath, 'r') as fp:
return Yaml.load(fp, backend=backend)
else:
if backend == 'ruamel':
import ruamel.yaml # NOQA
# TODO: seems like there will be a deprecation
# from ruamel.yaml import YAML
if NEW_RUAMEL:
yaml_obj = _custom_new_ruaml_yaml_obj(version=version)
data = yaml_obj.load(file)
else:
# yaml = YAML(typ='unsafe', pure=True)
# data = yaml.load(file, Loader=Loader, preserve_quotes=True)
Loader = _custom_ruaml_loader()
data = ruamel.yaml.load(file, Loader=Loader, preserve_quotes=True)
# data = ruamel.yaml.load(file, Loader=ruamel.yaml.RoundTripLoader, preserve_quotes=True)
elif backend == 'pyyaml':
if version is not None:
raise NotImplementedError('pyyaml does not support version yet, use ruamel backend')
import yaml
# data = yaml.load(file, Loader=yaml.SafeLoader)
data = yaml.load(file, Loader=yaml.Loader)
else:
raise KeyError(backend)
return data
[docs]
@staticmethod
def loads(text, backend='ruamel', version=None):
"""
Load yaml from a text
Args:
text (str): yaml text
backend (str): either ruamel or pyyaml
Returns:
object
Example:
>>> # xdoctest: +REQUIRES(module:pyyaml)
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> import ubelt as ub
>>> data = {
>>> 'a': 'hello world',
>>> 'b': ub.udict({'a': 3})
>>> }
>>> print('data = {}'.format(ub.urepr(data, nl=1)))
>>> print('---')
>>> text = Yaml.dumps(data)
>>> print(ub.highlight_code(text, 'yaml'))
>>> print('---')
>>> data2 = Yaml.loads(text)
>>> assert data == data2
>>> data3 = Yaml.loads(text, backend='pyyaml')
>>> print('data2 = {}'.format(ub.urepr(data2, nl=1)))
>>> print('data3 = {}'.format(ub.urepr(data3, nl=1)))
>>> assert data == data3
"""
# TODO: add debugging helpers when a loads fails
file = io.StringIO(text)
if backend == 'ruamel':
import ruamel.yaml # NOQA
try:
data = Yaml.load(file, backend=backend, version=version)
except ruamel.yaml.parser.ParserError as ex_:
ex = ex_
print(f'YAML ERROR: {ex!r}')
try:
from xdoctest.utils import add_line_numbers, highlight_code
lines = text.split('\n')
error_line = ex.context_mark.line
context_before = 3
context_after = 3
start_line = error_line - context_before
stop_line = error_line + context_after
show_lines = lines[start_line:stop_line]
show_lines = highlight_code('\n'.join(show_lines), 'YAML').split('\n')
lines = add_line_numbers(show_lines, start=start_line + 1)
print(f'ex.context_mark.line={ex.context_mark.line + 1}')
print(f'ex.context_mark.column={ex.context_mark.column}')
print('\n'.join(lines))
except Exception:
...
raise
else:
if version is not None:
raise NotImplementedError('pyyaml does not support version yet, use ruamel backend')
data = Yaml.load(file, backend=backend)
return data
[docs]
@staticmethod
def coerce(data, backend='ruamel', path_policy='existing_file_with_extension'):
"""
Attempt to convert input into a parsed yaml / json data structure.
If the data looks like a path, it tries to load and parse file contents.
If the data looks like a yaml/json string it tries to parse it.
If the data looks like parsed data, then it returns it as-is.
Args:
data (str | PathLike | dict | list):
backend (str): either ruamel or pyyaml
path_policy (str):
Determines how we determine if something looks like a path.
Pre 0.3.2 behavior is from path_policy='existing_file'.
Default is 'existing_file_with_extension'.
Can also be 'never' to disable the path feature and decrease
ambiguity.
Returns:
object: parsed yaml data
Note:
The input to the function cannot distinguish a string that should be
loaded and a string that should be parsed. If it looks like a file that
exists it will read it. To avoid this coerner case use this only for
data where you expect the output is a List or Dict.
References:
https://stackoverflow.com/questions/528281/how-can-i-include-a-yaml-file-inside-another
Example:
>>> # xdoctest: +REQUIRES(module:pyyaml)
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> from kwutil.util_yaml import Yaml
>>> text = ub.codeblock(
'''
- !!float nan
- !!float inf
- nan
- inf
# Seems to break older ruamel.yaml 0.17.21
# - .nan
# - .inf
- null
''')
>>> Yaml.coerce(text, backend='pyyaml')
>>> Yaml.coerce(text, backend='ruamel')
Example:
>>> # xdoctest: +REQUIRES(module:pyyaml)
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> Yaml.coerce('"[1, 2, 3]"')
[1, 2, 3]
>>> fpath = ub.Path.appdir('cmd_queue/tests/util_yaml').ensuredir() / 'file.yaml'
>>> fpath.write_text(Yaml.dumps([4, 5, 6]))
>>> Yaml.coerce(fpath)
[4, 5, 6]
>>> Yaml.coerce(str(fpath))
[4, 5, 6]
>>> dict(Yaml.coerce('{a: b, c: d}'))
{'a': 'b', 'c': 'd'}
>>> Yaml.coerce(None)
None
Example:
>>> # xdoctest: +REQUIRES(module:pyyaml)
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> assert Yaml.coerce('') is None
Example:
>>> # xdoctest: +REQUIRES(module:pyyaml)
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> dpath = ub.Path.appdir('cmd_queue/tests/util_yaml').ensuredir()
>>> fpath = dpath / 'external.yaml'
>>> fpath.write_text(Yaml.dumps({'foo': 'bar'}))
>>> text = ub.codeblock(
>>> f'''
>>> items:
>>> - !include {dpath}/external.yaml
>>> ''')
>>> data = Yaml.coerce(text, backend='ruamel')
>>> print(Yaml.dumps(data, backend='ruamel'))
items:
- foo: bar
>>> text = ub.codeblock(
>>> f'''
>>> items:
>>> !include [{dpath}/external.yaml, blah, 1, 2, 3]
>>> ''')
>>> data = Yaml.coerce(text, backend='ruamel')
>>> print('data = {}'.format(ub.urepr(data, nl=1)))
>>> print(Yaml.dumps(data, backend='ruamel'))
"""
if isinstance(data, os.PathLike):
result = Yaml.load(data, backend=backend)
elif isinstance(data, str):
maybe_path = None
if path_policy == 'never':
...
else:
if path_policy == 'existing_file':
path_requires_extension = False
elif path_policy == 'existing_file_with_extension':
path_requires_extension = True
else:
raise KeyError(path_policy)
if '\n' not in data and len(data.strip()) > 0:
# Ambiguous case: might this be path-like?
maybe_path = ub.Path(data)
try:
if not maybe_path.is_file():
maybe_path = None
except OSError:
maybe_path = None
if maybe_path and path_requires_extension:
# If the input looks like a path, try to load it. This was
# added because I tried to coerce "auto" as a string, but
# for some reason there was a file "auto" in my cwd and
# that was confusing.
if '.' not in maybe_path.name:
maybe_path = None
if maybe_path is not None:
result = Yaml.coerce(maybe_path, backend=backend)
else:
result = Yaml.loads(data, backend=backend)
elif hasattr(data, 'read'):
# assume file
result = Yaml.load(data, backend=backend)
else:
# Probably already parsed. Return the input
result = data
return result
[docs]
@staticmethod
def InlineList(items):
"""
References:
.. [SO56937691] https://stackoverflow.com/questions/56937691/making-yaml-ruamel-yaml-always-dump-lists-inline
"""
import ruamel.yaml
ret = ruamel.yaml.comments.CommentedSeq(items)
ret.fa.set_flow_style()
return ret
[docs]
@staticmethod
def Dict(data):
"""
Get a ruamel-enhanced dictionary
Example:
>>> # xdoctest: +REQUIRES(module:pyyaml)
>>> # xdoctest: +REQUIRES(module:ruamel.yaml)
>>> data = {'a': 'avalue', 'b': 'bvalue'}
>>> data = Yaml.Dict(data)
>>> data.yaml_set_start_comment('hello')
>>> # Note: not working https://sourceforge.net/p/ruamel-yaml/tickets/400/
>>> data.yaml_set_comment_before_after_key('a', before='a comment', indent=2)
>>> data.yaml_set_comment_before_after_key('b', 'b comment')
>>> print(Yaml.dumps(data))
"""
import ruamel.yaml
ret = ruamel.yaml.comments.CommentedMap(data)
return ret
[docs]
@staticmethod
def CodeBlock(text):
import ruamel.yaml
return ruamel.yaml.scalarstring.LiteralScalarString(ub.codeblock(text))