Skip to content

Commit

Permalink
remove dependency from ansible.utils.unicode.to_str
Browse files Browse the repository at this point in the history
  • Loading branch information
fabriziopandini committed Nov 2, 2017
1 parent f6320bd commit c8e3d19
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='0.7.3',
version='0.7.5',

description='Declarative cluster definition for vagrant, to be used with vagrant-compose plugin.',
long_description=long_description,
Expand Down
4 changes: 2 additions & 2 deletions vagrantplaybook/ansible.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type

from vagrantplaybook.compat import compat_text_type, to_str

#TODO: remove dependencies from ansible (use default json library + ninja2)
from ansible.utils.unicode import to_str
from ansible.parsing.dataloader import DataLoader
from vagrantplaybook.compat import compat_text_type
from ansible.template import Templar

ansible_tempar = Templar
Expand Down
126 changes: 125 additions & 1 deletion vagrantplaybook/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,133 @@

# this module provides utilities for smoothing over the differences
# between Python 2 (currently supported) and 3 (to be supported in future)
# see https://github.com/benjaminp/six
# see
# - https://github.com/benjaminp/six
# - https://github.com/ansible/ansible/blob/devel/lib/ansible/utils/unicode.py
# - https://github.com/ansible/ansible/blob/devel/lib/ansible/module_utils/_text.py

compat_string_types = basestring
compat_text_type = unicode
compat_integer_types = (int, long)
compat_binary_type = str

def to_str(*args, **kwargs):
if 'errors' not in kwargs:
kwargs['errors'] = 'replace'
return to_bytes(*args, **kwargs)

import codecs

try:
codecs.lookup_error('surrogateescape')
HAS_SURROGATEESCAPE = True
except LookupError:
HAS_SURROGATEESCAPE = False


_COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_replace',
'surrogate_or_strict',
'surrogate_then_replace'))

def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
"""Make sure that a string is a byte string
:arg obj: An object to make sure is a byte string. In most cases this
will be either a text string or a byte string. However, with
``nonstring='simplerepr'``, this can be used as a traceback-free
version of ``str(obj)``.
:kwarg encoding: The encoding to use to transform from a text string to
a byte string. Defaults to using 'utf-8'.
:kwarg errors: The error handler to use if the text string is not
encodable using the specified encoding. Any valid `codecs error
handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_
may be specified. There are three additional error strategies
specifically aimed at helping people to port code. The first two are:
:surrogate_or_strict: Will use ``surrogateescape`` if it is a valid
handler, otherwise it will use ``strict``
:surrogate_or_replace: Will use ``surrogateescape`` if it is a valid
handler, otherwise it will use ``replace``.
Because ``surrogateescape`` was added in Python3 this usually means that
Python3 will use ``surrogateescape`` and Python2 will use the fallback
error handler. Note that the code checks for ``surrogateescape`` when the
module is imported. If you have a backport of ``surrogateescape`` for
Python2, be sure to register the error handler prior to importing this
module.
The last error handler is:
:surrogate_then_replace: Will use ``surrogateescape`` if it is a valid
handler. If encoding with ``surrogateescape`` would traceback,
surrogates are first replaced with a replacement characters
and then the string is encoded using ``replace`` (which replaces
the rest of the nonencodable bytes). If ``surrogateescape`` is
not present it will simply use ``replace``. (Added in Ansible 2.3)
This strategy is designed to never traceback when it attempts
to encode a string.
The default until Ansible-2.2 was ``surrogate_or_replace``
From Ansible-2.3 onwards, the default is ``surrogate_then_replace``.
:kwarg nonstring: The strategy to use if a nonstring is specified in
``obj``. Default is 'simplerepr'. Valid values are:
:simplerepr: The default. This takes the ``str`` of the object and
then returns the bytes version of that string.
:empty: Return an empty byte string
:passthru: Return the object passed in
:strict: Raise a :exc:`TypeError`
:returns: Typically this returns a byte string. If a nonstring object is
passed in this may be a different type depending on the strategy
specified by nonstring. This will never return a text string.
.. note:: If passed a byte string, this function does not check that the
string is valid in the specified encoding. If it's important that the
byte string is in the specified encoding do::
encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')
.. version_changed:: 2.3
Added the ``surrogate_then_replace`` error handler and made it the default error handler.
"""
if isinstance(obj, compat_binary_type):
return obj

# We're given a text string
# If it has surrogates, we know because it will decode
original_errors = errors
if errors in _COMPOSED_ERROR_HANDLERS:
if HAS_SURROGATEESCAPE:
errors = 'surrogateescape'
elif errors == 'surrogate_or_strict':
errors = 'strict'
else:
errors = 'replace'

if isinstance(obj, compat_text_type):
try:
# Try this first as it's the fastest
return obj.encode(encoding, errors)
except UnicodeEncodeError:
if original_errors in (None, 'surrogate_then_replace'):
# We should only reach this if encoding was non-utf8 original_errors was
# surrogate_then_escape and errors was surrogateescape

# Slow but works
return_string = obj.encode('utf-8', 'surrogateescape')
return_string = return_string.decode('utf-8', 'replace')
return return_string.encode(encoding, 'replace')
raise

# Note: We do these last even though we have to call to_bytes again on the
# value because we're optimizing the common case
if nonstring == 'simplerepr':
try:
value = str(obj)
except UnicodeError:
try:
value = repr(obj)
except UnicodeError:
# Giving up
return to_bytes('')
elif nonstring == 'passthru':
return obj
elif nonstring == 'empty':
# python2.4 doesn't have b''
return to_bytes('')
elif nonstring == 'strict':
raise TypeError('obj must be a string type')
else:
raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring)

return to_bytes(value, encoding, errors)

0 comments on commit c8e3d19

Please sign in to comment.