Source code for astropy.utils.xml.check
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
A collection of functions for checking various XML-related strings for
standards compliance.
"""
import re
import urllib.parse
[docs]
def check_id(ID):
"""
Returns `True` if *ID* is a valid XML ID.
"""
return re.match(r"^[A-Za-z_][A-Za-z0-9_\.\-]*$", ID) is not None
[docs]
def fix_id(ID):
"""
Given an arbitrary string, create one that can be used as an xml
id. This is rather simplistic at the moment, since it just
replaces non-valid characters with underscores.
"""
if re.match(r"^[A-Za-z_][A-Za-z0-9_\.\-]*$", ID):
return ID
if len(ID):
corrected = ID
if not len(corrected) or re.match("^[^A-Za-z_]$", corrected[0]):
corrected = "_" + corrected
corrected = re.sub(r"[^A-Za-z_]", "_", corrected[0]) + re.sub(
r"[^A-Za-z0-9_\.\-]", "_", corrected[1:]
)
return corrected
return ""
_token_regex = r"(?![\r\l\t ])[^\r\l\t]*(?![\r\l\t ])"
[docs]
def check_token(token):
"""
Returns `True` if *token* is a valid XML token, as defined by XML
Schema Part 2.
"""
return (
token == ""
or re.match(r"[^\r\n\t ]?([^\r\n\t ]| [^\r\n\t ])*[^\r\n\t ]?$", token)
is not None
)
[docs]
def check_mime_content_type(content_type):
"""
Returns `True` if *content_type* is a valid MIME content type
(syntactically at least), as defined by RFC 2045.
"""
ctrls = "".join(chr(x) for x in range(0x20))
token_regex = f'[^()<>@,;:\\"/[\\]?= {ctrls}\x7f]+'
return (
re.match(rf"(?P<type>{token_regex})/(?P<subtype>{token_regex})$", content_type)
is not None
)
[docs]
def check_anyuri(uri):
"""
Returns `True` if *uri* is a valid URI as defined in RFC 2396.
"""
if (
re.match(
(
r"(([a-zA-Z][0-9a-zA-Z+\-\.]*:)?/{0,2}[0-9a-zA-Z;"
r"/?:@&=+$\.\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\.\-_!~*'()%]+)?"
),
uri,
)
is None
):
return False
try:
urllib.parse.urlparse(uri)
except Exception:
return False
return True