Rework is/to ASCII and implement printable ASCII is/to functions.

Add Utils tests.
This commit is contained in:
Andris Raugulis 2016-11-02 18:23:55 +02:00
parent 11b6155c64
commit 5bb0ae0ceb
2 changed files with 269 additions and 13 deletions

View File

@ -1914,28 +1914,58 @@ class Utils(object):
if isinstance(v, str): if isinstance(v, str):
return v return v
elif isinstance(v, text_type): elif isinstance(v, text_type):
return v.encode(enc) return v.encode(enc) # PY2 only
elif isinstance(v, binary_type): elif isinstance(v, binary_type):
return v.decode(enc) return v.decode(enc) # PY3 only
raise cls._type_err(v, 'native text') raise cls._type_err(v, 'native text')
@classmethod
def _is_ascii(cls, v, char_filter=lambda x: x <= 127):
# type: (Union[text_type, str], Callable[[int], bool]) -> bool
r = False
if isinstance(v, (text_type, str)):
for c in v:
i = cls.ctoi(c)
if not char_filter(i):
return r
r = True
return r
@classmethod
def _to_ascii(cls, v, char_filter=lambda x: x <= 127, errors='replace'):
# type: (Union[text_type, str], Callable[[int], bool], str) -> str
if isinstance(v, (text_type, str)):
r = bytearray()
for c in v:
i = cls.ctoi(c)
if char_filter(i):
r.append(i)
else:
if errors == 'ignore':
continue
r.append(63)
return cls.to_ntext(r.decode('ascii'))
raise cls._type_err(v, 'ascii')
@classmethod @classmethod
def is_ascii(cls, v): def is_ascii(cls, v):
# type: (Union[text_type, str]) -> bool # type: (Union[text_type, str]) -> bool
try: return cls._is_ascii(v)
if isinstance(v, (text_type, str)):
v.encode('ascii')
return True
except UnicodeEncodeError:
pass
return False
@classmethod @classmethod
def to_ascii(cls, v, errors='replace'): def to_ascii(cls, v, errors='replace'):
# type: (Union[text_type, str], str) -> str # type: (Union[text_type, str], str) -> str
if isinstance(v, (text_type, str)): return cls._to_ascii(v, errors=errors)
return cls.to_ntext(v.encode('ascii', errors))
raise cls._type_err(v, 'ascii') @classmethod
def is_print_ascii(cls, v):
# type: (Union[text_type, str]) -> bool
return cls._is_ascii(v, lambda x: x >= 32 and x <= 126)
@classmethod
def to_print_ascii(cls, v, errors='replace'):
# type: (Union[text_type, str], str) -> str
return cls._to_ascii(v, lambda x: x >= 32 and x <= 126, errors)
@classmethod @classmethod
def unique_seq(cls, seq): def unique_seq(cls, seq):
@ -1951,7 +1981,15 @@ class Utils(object):
return tuple(x for x in seq if x not in seen and not _seen_add(x)) return tuple(x for x in seq if x not in seen and not _seen_add(x))
else: else:
return [x for x in seq if x not in seen and not _seen_add(x)] return [x for x in seq if x not in seen and not _seen_add(x)]
@classmethod
def ctoi(cls, c):
# type: (Union[text_type, str, int]) -> int
if isinstance(c, (text_type, str)):
return ord(c[0])
else:
return c
@staticmethod @staticmethod
def parse_int(v): def parse_int(v):
# type: (Any) -> int # type: (Any) -> int

218
test/test_utils.py Normal file
View File

@ -0,0 +1,218 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import pytest
# pylint: disable=attribute-defined-outside-init
class TestUtils(object):
@pytest.fixture(autouse=True)
def init(self, ssh_audit):
self.utils = ssh_audit.Utils
self.PY3 = sys.version_info >= (3,)
def test_to_bytes_py2(self):
if self.PY3:
return
# binary_type (native str, bytes as str)
assert self.utils.to_bytes('fran\xc3\xa7ais') == 'fran\xc3\xa7ais'
assert self.utils.to_bytes(b'fran\xc3\xa7ais') == 'fran\xc3\xa7ais'
# text_type (unicode)
assert self.utils.to_bytes(u'fran\xe7ais') == 'fran\xc3\xa7ais'
# other
with pytest.raises(TypeError):
self.utils.to_bytes(123)
def test_to_bytes_py3(self):
if not self.PY3:
return
# binary_type (bytes)
assert self.utils.to_bytes(b'fran\xc3\xa7ais') == b'fran\xc3\xa7ais'
# text_type (native str as unicode, unicode)
assert self.utils.to_bytes('fran\xe7ais') == b'fran\xc3\xa7ais'
assert self.utils.to_bytes(u'fran\xe7ais') == b'fran\xc3\xa7ais'
# other
with pytest.raises(TypeError):
self.utils.to_bytes(123)
def test_to_utext_py2(self):
if self.PY3:
return
# binary_type (native str, bytes as str)
assert self.utils.to_utext('fran\xc3\xa7ais') == u'fran\xe7ais'
assert self.utils.to_utext(b'fran\xc3\xa7ais') == u'fran\xe7ais'
# text_type (unicode)
assert self.utils.to_utext(u'fran\xe7ais') == u'fran\xe7ais'
# other
with pytest.raises(TypeError):
self.utils.to_utext(123)
def test_to_utext_py3(self):
if not self.PY3:
return
# binary_type (bytes)
assert self.utils.to_utext(b'fran\xc3\xa7ais') == u'fran\xe7ais'
# text_type (native str as unicode, unicode)
assert self.utils.to_utext('fran\xe7ais') == 'fran\xe7ais'
assert self.utils.to_utext(u'fran\xe7ais') == u'fran\xe7ais'
# other
with pytest.raises(TypeError):
self.utils.to_utext(123)
def test_to_ntext_py2(self):
if self.PY3:
return
# str (native str, bytes as str)
assert self.utils.to_ntext('fran\xc3\xa7ais') == 'fran\xc3\xa7ais'
assert self.utils.to_ntext(b'fran\xc3\xa7ais') == 'fran\xc3\xa7ais'
# text_type (unicode)
assert self.utils.to_ntext(u'fran\xe7ais') == 'fran\xc3\xa7ais'
# other
with pytest.raises(TypeError):
self.utils.to_ntext(123)
def test_to_ntext_py3(self):
if not self.PY3:
return
# str (native str)
assert self.utils.to_ntext('fran\xc3\xa7ais') == 'fran\xc3\xa7ais'
assert self.utils.to_ntext(u'fran\xe7ais') == 'fran\xe7ais'
# binary_type (bytes)
assert self.utils.to_utext(b'fran\xc3\xa7ais') == 'fran\xe7ais'
# other
with pytest.raises(TypeError):
self.utils.to_ntext(123)
def test_is_ascii_py2(self):
if self.PY3:
return
# text_type (unicode)
assert self.utils.is_ascii(u'francais') is True
assert self.utils.is_ascii(u'fran\xe7ais') is False
# str
assert self.utils.is_ascii('francais') is True
assert self.utils.is_ascii('fran\xc3\xa7ais') is False
# other
assert self.utils.is_ascii(123) is False
def test_is_ascii_py3(self):
if not self.PY3:
return
# text_type (str)
assert self.utils.is_ascii('francais') is True
assert self.utils.is_ascii(u'francais') is True
assert self.utils.is_ascii('fran\xe7ais') is False
assert self.utils.is_ascii(u'fran\xe7ais') is False
# other
assert self.utils.is_ascii(123) is False
def test_to_ascii_py2(self):
if self.PY3:
return
# text_type (unicode)
assert self.utils.to_ascii(u'francais') == 'francais'
assert self.utils.to_ascii(u'fran\xe7ais') == 'fran?ais'
assert self.utils.to_ascii(u'fran\xe7ais', 'ignore') == 'franais'
# str
assert self.utils.to_ascii('francais') == 'francais'
assert self.utils.to_ascii('fran\xc3\xa7ais') == 'fran??ais'
assert self.utils.to_ascii('fran\xc3\xa7ais', 'ignore') == 'franais'
with pytest.raises(TypeError):
self.utils.to_ascii(123)
def test_to_ascii_py3(self):
if not self.PY3:
return
# text_type (str)
assert self.utils.to_ascii('francais') == 'francais'
assert self.utils.to_ascii(u'francais') == 'francais'
assert self.utils.to_ascii('fran\xe7ais') == 'fran?ais'
assert self.utils.to_ascii('fran\xe7ais', 'ignore') == 'franais'
assert self.utils.to_ascii(u'fran\xe7ais') == 'fran?ais'
assert self.utils.to_ascii(u'fran\xe7ais', 'ignore') == 'franais'
with pytest.raises(TypeError):
self.utils.to_ascii(123)
def test_is_print_ascii_py2(self):
if self.PY3:
return
# text_type (unicode)
assert self.utils.is_print_ascii(u'francais') is True
assert self.utils.is_print_ascii(u'francais\n') is False
assert self.utils.is_print_ascii(u'fran\xe7ais') is False
assert self.utils.is_print_ascii(u'fran\xe7ais\n') is False
# str
assert self.utils.is_print_ascii('francais') is True
assert self.utils.is_print_ascii('francais\n') is False
assert self.utils.is_print_ascii('fran\xc3\xa7ais') is False
# other
assert self.utils.is_print_ascii(123) is False
def test_is_print_ascii_py3(self):
if not self.PY3:
return
# text_type (str)
assert self.utils.is_print_ascii('francais') is True
assert self.utils.is_print_ascii('francais\n') is False
assert self.utils.is_print_ascii(u'francais') is True
assert self.utils.is_print_ascii(u'francais\n') is False
assert self.utils.is_print_ascii('fran\xe7ais') is False
assert self.utils.is_print_ascii(u'fran\xe7ais') is False
# other
assert self.utils.is_print_ascii(123) is False
def test_to_print_ascii_py2(self):
if self.PY3:
return
# text_type (unicode)
assert self.utils.to_print_ascii(u'francais') == 'francais'
assert self.utils.to_print_ascii(u'francais\n') == 'francais?'
assert self.utils.to_print_ascii(u'fran\xe7ais') == 'fran?ais'
assert self.utils.to_print_ascii(u'fran\xe7ais\n') == 'fran?ais?'
assert self.utils.to_print_ascii(u'fran\xe7ais', 'ignore') == 'franais'
assert self.utils.to_print_ascii(u'fran\xe7ais\n', 'ignore') == 'franais'
# str
assert self.utils.to_print_ascii('francais') == 'francais'
assert self.utils.to_print_ascii('francais\n') == 'francais?'
assert self.utils.to_print_ascii('fran\xc3\xa7ais') == 'fran??ais'
assert self.utils.to_print_ascii('fran\xc3\xa7ais\n') == 'fran??ais?'
assert self.utils.to_print_ascii('fran\xc3\xa7ais', 'ignore') == 'franais'
assert self.utils.to_print_ascii('fran\xc3\xa7ais\n', 'ignore') == 'franais'
with pytest.raises(TypeError):
self.utils.to_print_ascii(123)
def test_to_print_ascii_py3(self):
if not self.PY3:
return
# text_type (str)
assert self.utils.to_print_ascii('francais') == 'francais'
assert self.utils.to_print_ascii('francais\n') == 'francais?'
assert self.utils.to_print_ascii(u'francais') == 'francais'
assert self.utils.to_print_ascii(u'francais\n') == 'francais?'
assert self.utils.to_print_ascii('fran\xe7ais') == 'fran?ais'
assert self.utils.to_print_ascii('fran\xe7ais\n') == 'fran?ais?'
assert self.utils.to_print_ascii('fran\xe7ais', 'ignore') == 'franais'
assert self.utils.to_print_ascii('fran\xe7ais\n', 'ignore') == 'franais'
assert self.utils.to_print_ascii(u'fran\xe7ais') == 'fran?ais'
assert self.utils.to_print_ascii(u'fran\xe7ais\n') == 'fran?ais?'
assert self.utils.to_print_ascii(u'fran\xe7ais', 'ignore') == 'franais'
assert self.utils.to_print_ascii(u'fran\xe7ais\n', 'ignore') == 'franais'
with pytest.raises(TypeError):
self.utils.to_print_ascii(123)
def test_ctoi(self):
assert self.utils.ctoi(123) == 123
assert self.utils.ctoi('ABC') == 65
def test_parse_int(self):
assert self.utils.parse_int(123) == 123
assert self.utils.parse_int('123') == 123
assert self.utils.parse_int(-123) == -123
assert self.utils.parse_int('-123') == -123
assert self.utils.parse_int('abc') == 0
def test_unique_seq(self):
assert self.utils.unique_seq((1, 2, 2, 3, 3, 3)) == (1, 2, 3)
assert self.utils.unique_seq((3, 3, 3, 2, 2, 1)) == (3, 2, 1)
assert self.utils.unique_seq([1, 2, 2, 3, 3, 3]) == [1, 2, 3]
assert self.utils.unique_seq([3, 3, 3, 2, 2, 1]) == [3, 2, 1]