From 1b43b51599e4a3b39662b069af0140bf24ac3a43 Mon Sep 17 00:00:00 2001 From: Paul Kehrer Date: Wed, 11 Oct 2017 11:47:46 +0800 Subject: backwards incompatible change to UniformResourceIdentifier (#3954) * backwards incompatible change to UniformResourceIdentifier During this release cycle we decided to officially deprecate passing U-labels to our GeneralName constructors. At first we tried changing this in a purely backwards compatible way but get_values_for_type made that untenable. This PR modifies URI to accept two types: U-label strings (which raises a deprecation warning) and A-label strings (the new preferred type). There is also a constructor for URI that bypasses validation so we can parse garbage out of certificates (and round trip it if necessary) * nonsense empty commit 2.6 and codecov are the worst --- .../hazmat/backends/openssl/decode_asn1.py | 12 +++- .../hazmat/backends/openssl/encode_asn1.py | 7 +- src/cryptography/x509/general_name.py | 75 ++++++---------------- 3 files changed, 33 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/cryptography/hazmat/backends/openssl/decode_asn1.py b/src/cryptography/hazmat/backends/openssl/decode_asn1.py index 86f8f8d4..24eb55b1 100644 --- a/src/cryptography/hazmat/backends/openssl/decode_asn1.py +++ b/src/cryptography/hazmat/backends/openssl/decode_asn1.py @@ -97,8 +97,16 @@ def _decode_general_name(backend, gn): # when a certificate (against the RFC) contains them. return x509.DNSName._init_without_validation(data) elif gn.type == backend._lib.GEN_URI: - data = _asn1_string_to_bytes(backend, gn.d.uniformResourceIdentifier) - return x509.UniformResourceIdentifier(data) + # Convert to bytes and then decode to utf8. We don't use + # asn1_string_to_utf8 here because it doesn't properly convert + # utf8 from ia5strings. + data = _asn1_string_to_bytes( + backend, gn.d.uniformResourceIdentifier + ).decode("utf8") + # We don't use the constructor for URI so we can bypass validation + # This allows us to create URI objects that have unicode chars + # when a certificate (against the RFC) contains them. + return x509.UniformResourceIdentifier._init_without_validation(data) elif gn.type == backend._lib.GEN_RID: oid = _obj2txt(backend, gn.d.registeredID) return x509.RegisteredID(x509.ObjectIdentifier(oid)) diff --git a/src/cryptography/hazmat/backends/openssl/encode_asn1.py b/src/cryptography/hazmat/backends/openssl/encode_asn1.py index 3f0a4c8c..89b2f7f7 100644 --- a/src/cryptography/hazmat/backends/openssl/encode_asn1.py +++ b/src/cryptography/hazmat/backends/openssl/encode_asn1.py @@ -443,9 +443,10 @@ def _encode_general_name(backend, name): elif isinstance(name, x509.UniformResourceIdentifier): gn = backend._lib.GENERAL_NAME_new() backend.openssl_assert(gn != backend._ffi.NULL) - asn1_str = _encode_asn1_str( - backend, name.bytes_value, len(name.bytes_value) - ) + # ia5strings are supposed to be ITU T.50 but to allow round-tripping + # of broken certs that encode utf8 we'll encode utf8 here too. + data = name.value.encode("utf8") + asn1_str = _encode_asn1_str(backend, data, len(data)) gn.type = backend._lib.GEN_URI gn.d.uniformResourceIdentifier = asn1_str else: diff --git a/src/cryptography/x509/general_name.py b/src/cryptography/x509/general_name.py index 6f7fa7a7..776219e2 100644 --- a/src/cryptography/x509/general_name.py +++ b/src/cryptography/x509/general_name.py @@ -163,30 +163,29 @@ class UniformResourceIdentifier(object): def __init__(self, value): if isinstance(value, six.text_type): try: - value = value.encode("ascii") + value.encode("ascii") except UnicodeEncodeError: value = self._idna_encode(value) warnings.warn( - "UniformResourceIdentifier values should be passed as " - "bytes with the hostname idna encoded, not strings. " - "Support for passing unicode strings will be removed in a " - "future version.", - utils.DeprecatedIn21, - stacklevel=2, - ) - else: - warnings.warn( - "UniformResourceIdentifier values should be passed as " - "bytes with the hostname idna encoded, not strings. " - "Support for passing unicode strings will be removed in a " - "future version.", + "URI values should be passed as an A-label string. " + "This means unicode characters should be encoded via " + "idna. Support for passing unicode strings (aka U-label) " + " will be removed in a future version.", utils.DeprecatedIn21, stacklevel=2, ) - elif not isinstance(value, bytes): - raise TypeError("value must be bytes") + else: + raise TypeError("value must be string") + + self._value = value - self._bytes_value = value + value = utils.read_only_property("_value") + + @classmethod + def _init_without_validation(cls, value): + instance = cls.__new__(cls) + instance._value = value + return instance def _idna_encode(self, value): parsed = urllib_parse.urlparse(value) @@ -208,58 +207,22 @@ class UniformResourceIdentifier(object): parsed.params, parsed.query, parsed.fragment - )).encode("ascii") - - @property - def value(self): - warnings.warn( - "UniformResourceIdentifier.bytes_value should be used instead of " - "UniformResourceIdentifier.value; it contains the name as raw " - "bytes, instead of as an idna-decoded unicode string. " - "UniformResourceIdentifier.value will be removed in a future " - "version.", - utils.DeprecatedIn21, - stacklevel=2 - ) - parsed = urllib_parse.urlparse(self.bytes_value) - if not parsed.hostname: - # There's no idna here so we can immediately return - return self.bytes_value.decode("utf-8") - elif parsed.port: - netloc = idna.decode(parsed.hostname) + ":{0}".format(parsed.port) - else: - netloc = idna.decode(parsed.hostname) - - # Note that building a URL in this fashion means it should be - # semantically indistinguishable from the original but is not - # guaranteed to be exactly the same. - return urllib_parse.urlunparse(( - parsed.scheme.decode('utf8'), - netloc, - parsed.path.decode('utf8'), - parsed.params.decode('utf8'), - parsed.query.decode('utf8'), - parsed.fragment.decode('utf8') )) - bytes_value = utils.read_only_property("_bytes_value") - def __repr__(self): - return "".format( - self.bytes_value - ) + return "".format(self.value) def __eq__(self, other): if not isinstance(other, UniformResourceIdentifier): return NotImplemented - return self.bytes_value == other.bytes_value + return self.value == other.value def __ne__(self, other): return not self == other def __hash__(self): - return hash(self.bytes_value) + return hash(self.value) @utils.register_interface(GeneralName) -- cgit v1.2.3