urlstd.parse#

Python implementation of the WHATWG URL Standard

class urlstd.parse.BasicURLParser#

Bases: object

An implementation of the basic URL parser in Python.

classmethod parse(urlstring: str, base: URLRecord | None = None, encoding: str = 'utf-8', url: URLRecord | None = None, state_override: URLParserState | None = None, **kwargs) → URLRecord#

Parses a string urlstring against a base URL base.

Parameters:

urlstring – A string to parse.
base – A base URL.
encoding – The encoding to encode URL’s query. If the encoding fails, it will be replaced with the appropriate XML character reference.
url – An input URL record. It will be replaced with the parsing result.
state_override – URLParserState enum.

Returns:

If url is specified, it will be updated and returned, a new URL record will be created otherwise.

Raises:

urlstd.error.URLParseError – Raised when URL parsing fails.

Examples

To parse a string as a whole URL:

>>> url = BasicURLParser.parse('http://example.org/foo/bar')
>>> str(url)
'http://example.org/foo/bar'

To replace a URL’s scheme with a string:

>>> url = BasicURLParser.parse('a://example.net')
>>> str(url)
'a://example.net'
>>> BasicURLParser.parse('B:', url=url,
...     state_override=URLParserState.SCHEME_START_STATE)
>>> str(url)
'b://example.net'

To replace a URL’s username, password, and host with a string:

>>> url = BasicURLParser.parse('http://example.org/foo/bar')
>>> str(url)
'http://example.org/foo/bar'
>>> BasicURLParser.parse('user:pass@example.net', url=url,
...     state_override=URLParserState.AUTHORITY_STATE)
>>> str(url)
'http://user:pass@example.net/foo/bar'

To replace a URL’s host and port with a string:

>>> url = BasicURLParser.parse(
...     'http://user:pass@example.net/foo/bar')
>>> str(url)
'http://user:pass@example.net/foo/bar'
>>> BasicURLParser.parse('0x7F000001:8080', url=url,
...     state_override=URLParserState.HOST_STATE)
>>> str(url)
'http://user:pass@127.0.0.1:8080/foo/bar'

To replace a URL’s port with a string:

>>> url = BasicURLParser.parse(
...     'http://user:pass@example.net:8080/foo/bar')
>>> str(url)
'http://user:pass@example.net:8080/foo/bar'
>>> BasicURLParser.parse('80', url=url,
...     state_override=URLParserState.PORT_STATE)
>>> str(url)
'http://user:pass@example.net/foo/bar'

To replace a URL’s path with a string:

>>> url = BasicURLParser.parse('http://example.org/foo/bar')
>>> str(url)
'http://example.org/foo/bar'
>>> if not url.has_opaque_path():
...     url.path = []
...     BasicURLParser.parse('?', url=url,
...         state_override=URLParserState.PATH_START_STATE)
>>> str(url)
'http://example.org/%3F'

To replace a URL’s query with a string:

>>> url = BasicURLParser.parse(
...     'http://example.net/foo/bar?a=1')
>>> str(url)
'http://example.net/foo/bar?a=1'
>>> url.query = ''
>>> BasicURLParser.parse('baz=2', url=url,
...     state_override=URLParserState.QUERY_STATE)
>>> str(url)
'http://example.net/foo/bar?baz=2'

To replace a URL’s fragment with a string:

>>> url = BasicURLParser.parse('http://example.org/foo/bar#nav')
>>> str(url)
'http://example.org/foo/bar#nav'
>>> url.fragment = ''
>>> BasicURLParser.parse('main', url=url,
...     state_override=URLParserState.FRAGMENT_STATE)
>>> str(url)
'http://example.org/foo/bar#main'

class urlstd.parse.Host#

Bases: object

Utility class for hosts (domains and IP addresses).

classmethod parse(host: str, is_not_special: bool = False, **kwargs) → str | int | tuple[int, ...]#

Parses a string host, and returns a domain, IP address, opaque host, or empty host.

Parameters:

host – A host string to parse.
is_not_special – True if a URL’s scheme is not a special scheme, False otherwise.

Returns:

str – A domain, an opaque host, or an empty host.
int – An IPv4 address.
tuple[int, …] – An IPv6 address.

Raises:

urlstd.error.HostParseError – Raised when a host string is not valid.
urlstd.error.IDNAError – Raised when IDNA processing fails.
urlstd.error.IPv4AddressParseError – Raised when IPv4 address parsing fails.
urlstd.error.IPv6AddressParseError – Raised when IPv6 address parsing fails.

classmethod serialize(host: str | int | Sequence[int]) → str#

Returns a string representation of a host.

Parameters:: host – A domain, an IP address, an opaque host, or an empty host.
Returns:: A host string.

class urlstd.parse.HostValidator#

Bases: object

Validates a host string.

Examples

>>> HostValidator.is_valid('a..b')
False
>>> HostValidator.is_valid('127.0.0x0.1')
False
>>> HostValidator.is_valid('[1::1::1]')
False

>>> validity = ValidityState()
>>> HostValidator.is_valid('a..b', validity=validity)
False
>>> validity
ValidityState(valid=False, error_types=['domain-to-ASCII'],
descriptions=["domain-to-ASCII: Unicode ToASCII records an error: domain='a..b' errors=UIDNA_ERROR_EMPTY_LABEL (0x0001)"],
validation_errors=1, disable_logging=True)

>>> HostValidator.is_valid('127.0.0x0.1', validity=validity)
False
>>> validity
ValidityState(valid=False, error_types=['IPv4-non-decimal-part'],
descriptions=["IPv4-non-decimal-part: IPv4 address contains numbers expressed using hexadecimal or octal digits: '0x0' in '127.0.0x0.1'"],
validation_errors=1, disable_logging=True)

>>> HostValidator.is_valid('[1::1::1]', validity=validity)
False
>>> validity
ValidityState(valid=False, error_types=['IPv6-multiple-compression'],
descriptions=["IPv6-multiple-compression: IPv6 address is compressed in more than one spot: '1::1::1'"],
validation_errors=1, disable_logging=True)

classmethod is_valid(host: str, **kwargs) → bool#

Returns True if host is a valid host string (a domain string and an IP address string).

Parameters:: host – A host string (a domain string and an IP address string) to verify.
Keyword Arguments:: validity – A ValidityState object that stores validation results.
Returns:: True if host is a valid host, False otherwise.

classmethod is_valid_domain(domain: str, **kwargs) → bool#

Returns True if domain is a valid domain string.

Parameters:: domain – A domain string to verify.
Keyword Arguments:: validity – A ValidityState object that stores validation results.
Returns:: True if domain is a valid domain, False otherwise.

classmethod is_valid_ipv4_address(address: str, **kwargs) → bool#

Returns True if address is a valid IPv4-address string.

Parameters:: address – An IPv4-address string to verify.
Keyword Arguments:: validity – A ValidityState object that stores validation results.
Returns:: True if address is a valid IPv4-address, False otherwise.

classmethod is_valid_ipv6_address(address: str, **kwargs) → bool#

Returns True if address is a valid IPv6-address string.

Parameters:: address – An IPv6-address string to verify.
Keyword Arguments:: validity – A ValidityState object that stores validation results.
Returns:: True if address is a valid IPv6-address, False otherwise.

classmethod is_valid_opaque_host(host: str, **kwargs) → bool#

Returns True if host is a valid opaque-host string.

Parameters:: host – A opaque-host string to verify.
Keyword Arguments:: validity – A ValidityState object that stores validation results.
Returns:: True if host is a valid opaque-host, False otherwise.

class urlstd.parse.IDNA#

Bases: object

Utility class for IDNA processing.

classmethod domain_to_ascii(domain: str, be_strict: bool = False, **kwargs) → str#

Converts a domain name to IDNA ASCII form.

Parameters:

domain – A domain name.
be_strict – If True, set UseSTD3ASCIIRules flag and VerifyDnsLength flag to true. See RFC 3490 for more details.

Returns:

A domain name in IDNA ASCII form.

Raises:

urlstd.error.HostParseError – Raised when a domain name is not valid. See UIDNA_ERROR_* constants in uidna.h for more details on IDNA processing errors.
urlstd.error.IDNAError – Raised when IDNA processing fails.

classmethod domain_to_unicode(domain: str, be_strict: bool = False, **kwargs) → str#

Converts a domain name to IDNA Unicode form.

Parameters:

domain – A domain name.
be_strict – If True, set UseSTD3ASCIIRules flag to true. See RFC 3490 for more details.

Returns:

A domain name in IDNA Unicode form.

Raises:

urlstd.error.IDNAError – Raised when IDNA processing fails.

Bases: NamedTuple

A named tuple that represents the origin of the URL.

scheme: str#: A URL’s scheme.

host: str | int | tuple[int, ...] | None#: A URL’s host.

port: int | None#: A URL’s port.

domain: str | None#: A URL’s domain.

__str__() → str#

Returns a string representation of the origin.

Returns:: A string representation of the origin.

is_same_origin(other: Origin) → bool#

Returns True if other can be said to be of same origin as this object.

Parameters:: other – The Origin to compare to this one.
Returns:: True if the schemes, hosts, and ports of this object and other are identical, False otherwise.

is_same_origin_domain(other: Origin) → bool#

Returns True if other can be said to be of same origin-domain as this object.

Parameters:

other – The Origin to compare to this one.

Returns:

True if the schemes of this object and other are identical and their domains are identical and not None, or if this object and other are same origin and their domains are identical and None, False otherwise.

class urlstd.parse.URL(url: str, base: str | URL | None = None)#

Bases: object

Parses a string url against a base URL base.

Parameters:

url – An absolute-URL or a relative-URL. If url is a relative-URL, base is required.
base – An absolute-URL for a relative-URL url.

Raises:

urlstd.error.URLParseError – Raised when URL parsing fails.

Examples

To parse a string into a URL:

>>> URL('http://user:pass@foo:21/bar;par?b#c')
<URL(href='http://user:pass@foo:21/bar;par?b#c', origin='http://foo:21',
protocol='http:', username='user', password='pass', host='foo:21',
hostname='foo', port='21', pathname='/bar;par', search='?b', hash='#c')>

To parse a string into a URL with using a base URL:

>>> URL('//foo/bar', base='http://example.org/foo/bar')
<URL(href='http://foo/bar', origin='http://foo', protocol='http:',
username='', password='', host='foo', hostname='foo', port='',
pathname='/bar', search='', hash='')>

>>> URL('/', base='http://example.org/foo/bar')
<URL(href='http://example.org/', origin='http://example.org',
protocol='http:', username='', password='', host='example.org',
hostname='example.org', port='', pathname='/', search='', hash='')>

>>> URL('https://test:@test', base='about:blank')
<URL(href='https://test@test/', origin='https://test',
protocol='https:', username='test', password='', host='test',
hostname='test', port='', pathname='/', search='', hash='')>

>>> URL('?a=b&c=d', base='http://example.org/foo/bar')
<URL(href='http://example.org/foo/bar?a=b&c=d',
origin='http://example.org', protocol='http:', username='', password='',
host='example.org', hostname='example.org', port='',
pathname='/foo/bar', search='?a=b&c=d', hash='')>

>>> URL('#β', base='http://example.org/foo/bar')
<URL(href='http://example.org/foo/bar#%CE%B2',
origin='http://example.org', protocol='http:', username='', password='',
host='example.org', hostname='example.org', port='',
pathname='/foo/bar', search='', hash='#%CE%B2')>

>>> URL('', base='http://example.org/foo/bar')
<URL(href='http://example.org/foo/bar', origin='http://example.org',
protocol='http:', username='', password='', host='example.org',
hostname='example.org', port='', pathname='/foo/bar', search='',
hash='')>

>>> URL('https://x/\ufffd?\ufffd#\ufffd', base='about:blank')
<URL(href='https://x/%EF%BF%BD?%EF%BF%BD#%EF%BF%BD', origin='https://x',
protocol='https:', username='', password='', host='x', hostname='x',
port='', pathname='/%EF%BF%BD', search='?%EF%BF%BD', hash='#%EF%BF%BD')>

__eq__(other: Any) → bool#

Returns True if other is equal to this object.

This is equivalent to equals(other).

Parameters:: other – A URL to compare to this one.
Returns:: True if other is equal to this object, False otherwise.

__str__() → str#

Returns a string representation of a URL.

This is equivalent to href.

Returns:: A string representation of a URL.

classmethod can_parse(url: str, base: str | URL | None = None, **kwargs) → bool#

Returns True if url against a base URL base is parsable.

Parameters:

url – An absolute-URL or a relative-URL. If url is a relative-URL, base is required.
base – An absolute-URL for a relative-URL url.

Keyword Arguments:

validity – A ValidityState object that stores validation results.

Returns:

True if url against a base URL base is parsable, False otherwise.

See also

URLValidator.is_valid()

equals(other: URL, exclude_fragments: bool = False) → bool#

Returns True if other is equal to this object.

Parameters:

other – A URL to compare to this one.
exclude_fragments – If True, the fragment is excluded from the comparison.

Returns:

True if other is equal to this object, False otherwise.

property hash: str#

A URL’s fragment (includes leading U+0023 (#) if non-empty).

Examples

>>> url = URL('http://example.net')
>>> str(url)
'http://example.net/'
>>> url.hash
''
>>> url.hash = '%c3%89té'
>>> url.hash
'#%c3%89t%C3%A9'
>>> str(url)
'http://example.net/#%c3%89t%C3%A9'

property host: str#

A URL’s host, and then, if a URL’s port is different from the default port for a URL’s scheme, U+003A (:), followed by URL’s port.

If a URL has an opaque path, setting the value has no effect.

Examples

>>> url = URL('http://example.net')
>>> str(url)
'http://example.net/'
>>> url.host
'example.net'
>>> url.host = 'example.com:8080'
>>> url.host
'example.com:8080'
>>> str(url)
'http://example.com:8080/'

property hostname: str#

A URL’s host.

If a URL has an opaque path, setting the value has no effect.

Examples

>>> url = URL('http://example.net:8080')
>>> str(url)
'http://example.net:8080/'
>>> url.hostname
'example.net'
>>> url.hostname = 'example.com'
>>> url.hostname
'example.com'
>>> str(url)
'http://example.com:8080/'

property href: str#

A string representation of a URL.

Must be an absolute-URL when setting a value.

Examples

>>> url = URL('http://example.org/foo/bar')
>>> url.href
'http://example.org/foo/bar'
>>> url.href = 'http:/example.com/'
>>> url.href
'http://example.com/'

property origin: str#

Returns a string representation of a URL’s origin.

Examples

>>> URL('blob:https://example.com:443/').origin
'https://example.com'

>>> URL('blob:d3958f5c-0777-0845-9dcf-2cb28783acaf').origin
'null'

>>> URL('http://example.org:82/foo/bar').origin
'http://example.org:82'

>>> URL('non-special://test/x').origin
'null'

property password: str#

A URL’s password.

If a URL can’t have a username/password/port, setting the value has no effect.

Examples

>>> url = URL('http://example.net')
>>> str(url)
'http://example.net/'
>>> url.password
''
>>> url.password = '%c3%89té'
>>> url.password
'%c3%89t%C3%A9'
>>> str(url)
'http://:%c3%89t%C3%A9@example.net/'

property pathname: str#

A URL’s path.

If a URL has an opaque path, setting the value has no effect.

Examples

>>> url = URL('http://example.net')
>>> str(url)
'http://example.net/'
>>> url.pathname
'/'
>>> url.pathname = '%2e%2E%c3%89té'
>>> url.pathname
'/%2e%2E%c3%89t%C3%A9'
>>> str(url)
'http://example.net/%2e%2E%c3%89t%C3%A9'

property port: str#

A URL’s port.

If a URL can’t have a username/password/port, setting the value has no effect.

Examples

>>> url = URL('http://example.net:8080')
>>> str(url)
'http://example.net:8080/'
>>> url.port
'8080'
>>> url.port = '80'
>>> url.port
''
>>> str(url)
'http://example.net/'

property protocol: str#

A URL’s scheme, followed by U+003A (:).

Examples

>>> url = URL('a://example.net')
>>> str(url)
'a://example.net'
>>> url.protocol
'a:'
>>> url.protocol = 'B'
>>> url.protocol
'b:'
>>> str(url)
'b://example.net'

property search: str#

A URL’s query (includes leading U+003F (?) if non-empty).

Examples

>>> url = URL('http://example.net')
>>> str(url)
'http://example.net/'
>>> url.search
''
>>> url.search = '%c3%89té'
>>> url.search
'?%c3%89t%C3%A9'
>>> str(url)
'http://example.net/?%c3%89t%C3%A9'

property search_params: URLSearchParams#

Returns a URLSearchParams object associated with this URL object.

Examples

>>> url = URL('http://example.net/file')
>>> str(url)
'http://example.net/file'
>>> url.search
''
>>> params = url.search_params
>>> params.append('a', '1')
>>> params.append('b', '2')
>>> params.append('a', '3')
>>> list(params)
[('a', '1'), ('b', '2'), ('a', '3')]
>>> url.search
'?a=1&b=2&a=3'
>>> str(url)
'http://example.net/file?a=1&b=2&a=3'

property username: str#

A URL’s username.

If a URL can’t have a username/password/port, setting the value has no effect.

Examples

>>> url = URL('http://example.net')
>>> str(url)
'http://example.net/'
>>> url.username
''
>>> url.username = '%c3%89té'
>>> url.username
'%c3%89t%C3%A9'
>>> str(url)
'http://%c3%89t%C3%A9@example.net/'

class urlstd.parse.URLParserState(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)#

Bases: IntEnum

State machine enums for the basic URL parser.

EOF = -1#

AUTHORITY_STATE = 1#

FILE_HOST_STATE = 2#

FILE_SLASH_STATE = 3#

FILE_STATE = 4#

FRAGMENT_STATE = 5#

HOSTNAME_STATE = 6#

HOST_STATE = 7#

NO_SCHEME_STATE = 8#

OPAQUE_PATH_STATE = 9#

PATH_OR_AUTHORITY_STATE = 10#

PATH_START_STATE = 11#

PATH_STATE = 12#

PORT_STATE = 13#

QUERY_STATE = 14#

RELATIVE_SLASH_STATE = 15#

RELATIVE_STATE = 16#

SCHEME_START_STATE = 17#

SCHEME_STATE = 18#

SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE = 19#

SPECIAL_AUTHORITY_SLASHES_STATE = 20#

SPECIAL_RELATIVE_OR_AUTHORITY_STATE = 21#

Bases: object

A data class that represents a universal identifier.

scheme: str = ''#: A URL’s scheme.

username: str = ''#: A URL’s username.

password: str = ''#: A URL’s password.

host: str | int | tuple[int, ...] | None = None#: A URL’s host.

port: int | None = None#: A URL’s port.

path: list[str] | str#: A URL’s path.

query: str | None = None#: A URL’s query.

fragment: str | None = None#: A URL’s fragment.

blob_url_entry: str | None = None#: A URL’s blob URL entry. (unused)

__eq__(other: Any) → bool#

Returns True if other is equal to this object.

This is equivalent to equals(other).

Parameters:: other – The URL record to compare to this one.
Returns:: True if other is equal to this object, False otherwise.

__str__() → str#

Returns a string representation of a URL.

This is equivalent to href.

Returns:: A string representation of a URL.

cannot_have_username_password_port() → bool#

Returns True if a URL’s host is None, the empty string, or scheme is “file”.

Returns:: True if a URL’s host is None, the empty string, or scheme is “file”, False otherwise.

equals(other: URLRecord, exclude_fragments: bool = False) → bool#

Returns True if other is equal to this object.

Parameters:

other – The URL record to compare to this one.
exclude_fragments – If True, the fragment is excluded from the comparison.

Returns:

True if other is equal to this object, False otherwise.

has_opaque_path() → bool#

Returns True if a URL has an opaque path.

Returns:: True if a URL’s path is a string, False otherwise.

property href: str#

Returns a string representation of a URL.

This is equivalent to serialize_url().

includes_credentials() → bool#

Returns True if a URL’s username or password is not the empty string.

Returns:: True if a URL’s username or password is not the empty string, False otherwise.

is_not_special() → bool#

Returns True if a URL’s scheme is not a special scheme (“ftp”, “file”, “http”, “https”, “ws”, or “wss”).

Returns:: True if a URL’s scheme is not a special scheme (“ftp”, “file”, “http”, “https”, “ws”, or “wss”), False otherwise.

is_special() → bool#

Returns True if a URL’s scheme is a special scheme (“ftp”, “file”, “http”, “https”, “ws”, or “wss”).

Returns:: True if a URL’s scheme is a special scheme (“ftp”, “file”, “http”, “https”, “ws”, or “wss”), False otherwise.

property origin: Origin | None#

Returns a URL’s origin or None as an opaque origin.

Examples

>>> parse_url('blob:https://example.com:443/').origin
Origin(scheme='https', host='example.com', port=None, domain=None)

>>> parse_url('blob:d3958f5c-0777-0845-9dcf-2cb28783acaf').origin  # → None

>>> parse_url('http://example.org:82/foo/bar').origin
Origin(scheme='http', host='example.org', port=82, domain=None)

>>> parse_url('non-special://test/x').origin  # → None

serialize_host() → str#

Returns a string representation of a URL’s host.

Returns:: A string representation of a URL’s host.

serialize_path() → str#

Returns a string representation of a URL’s path.

Returns:: A string representation of a URL’s path.

serialize_url(exclude_fragment: bool = False) → str#

Returns a string representation of a URL.

Parameters:: exclude_fragment – If True, fragment identifiers will be removed from the output string.
Returns:: A string representation of a URL.

shorten_path() → None#: Shortens a URL’s path.

class urlstd.parse.URLSearchParams(init: str)#

class urlstd.parse.URLSearchParams(init: Sequence[Sequence[str | int | float]])

class urlstd.parse.URLSearchParams(init: dict[str, str | int | float])

class urlstd.parse.URLSearchParams(init: URLRecord)

class urlstd.parse.URLSearchParams(init: URLSearchParams)

class urlstd.parse.URLSearchParams

Bases: Collection

Parses and manipulates URL’s query.

Parameters:: init – One of: A string in application/x-www-form-urlencoded form, a sequence of name-value pairs, a dictionary containing name-value pairs, URLRecord object, or URLSearchParams object.

See also

URL.search_params

Examples

To create a URLSearchParams:

>>> params = URLSearchParams('?a=1&b=2&a=3')
>>> list(params)
[('a', '1'), ('b', '2'), ('a', '3')]

>>> params = URLSearchParams([('a', '1'), ('b', '2'), ('a', '3')])
>>> list(params)
[('a', '1'), ('b', '2'), ('a', '3')]

>>> params = URLSearchParams({'a': '1', 'b': '2', 'a': '3'})
>>> list(params)
[('a', '3'), ('b', '2')]

>>> new_params = URLSearchParams(params)
>>> list(new_params)
[('a', '3'), ('b', '2')]

__add__(other: Any) → str#

Returns a string in application/x-www-form-urlencoded form concatenated with other.

other must be a string.

Parameters:: other – A string to concatenate.
Returns:: A string in application/x-www-form-urlencoded form concatenated with other.

__contains__(item: Any) → bool#

Returns True if a name-value pair with the specified item exists, False otherwise.

item must be a string.

This is equivalent to has(item).

Parameters:: item – The name of parameter to find.
Returns:: True if a name-value pair with the specified item exists, False otherwise.

__eq__(other: Any) → bool#

Returns True if other is equal to this object.

Parameters:: other – The URLSearchParams to compare to this one.
Returns:: True if other is equal to this object, False otherwise.

__getitem__(key: int | slice) → tuple[str, str] | list[tuple[str, str]]#

Returns the name-value pair(s) specified by key.

Parameters:: key – An index that specifies the position to return, or a slice object that specifies the range to return.
Returns:: A name-value pair or a list of name-value pairs.

__iter__() → Iterator[tuple[str, str]]#

Returns a new iterator of this object’s items ((name, value) pairs).

This is equivalent to entries().

Returns:: An iterator of this object’s items ((name, value) pairs).

__len__() → int#

Returns the number of name-value pairs.

Returns:: The number of name-value pairs.

__str__() → str#

Returns a string in application/x-www-form-urlencoded form.

Returns:: A string in application/x-www-form-urlencoded form.

Examples

>>> params = URLSearchParams()
>>> params.append('a', '1')
>>> params.append('b', '2')
>>> params.append('a', '3')
>>> str(params)
'a=1&b=2&a=3'

append(name: str, value: str | int | float) → None#

Appends a new name-value pair as a new search parameter.

Parameters:

name – The name of parameter to append.
value – The value of parameter to append.

Examples

>>> params = URLSearchParams()
>>> params.append('a', '1')
>>> params.append('b', '2')
>>> params.append('a', '3')
>>> list(params)
[('a', '1'), ('b', '2'), ('a', '3')]

attach(init: URLRecord) → None#

Associates a URL record init with this URLSearchParams object.

Parameters:: init – The URL record to associate with.

delete(name: str, value: str | int | float | None = None) → None#

Removes all name-value pairs whose name is name and value is value.

Parameters:

name – The name of parameter to delete.
value – The value of parameter to delete.

Examples

>>> params = URLSearchParams('a=1&b=2&a=3')
>>> list(params)
[('a', '1'), ('b', '2'), ('a', '3')]
>>> params.delete('a')
>>> list(params)
[('b', '2')]

>>> params = URLSearchParams('a=1&b=2&a=3')
>>> list(params)
[('a', '1'), ('b', '2'), ('a', '3')]
>>> params.delete('a', '3')
>>> list(params)
[('a', '1'), ('b', '2')]

entries() → Iterator[tuple[str, str]]#

Returns a new iterator of this object’s items ((name, value) pairs).

This is equivalent to __iter__().

Returns:: An iterator of this object’s items ((name, value) pairs).

get(name: str) → str | None#

Returns the value of the first name-value pair whose name is name.

Parameters:: name – The name of parameter to return.
Returns:: The value of the first name-value pair whose name is name, or None if not exists.

Examples

>>> params = URLSearchParams('a=1&b=2&a=3')
>>> params.get('a')
'1'
>>> params.get('c')  # → None

get_all(name: str) → tuple[str, ...]#

Returns the values of all name-value pairs whose name is name.

Parameters:: name – The name of parameter to return.
Returns:: The values of all name-value pairs whose name is name, or the empty tuple if not exists.

Examples

>>> params = URLSearchParams('a=1&b=2&a=3')
>>> params.get_all('a')
('1', '3')
>>> params.get_all('c')
()

has(name: str, value: str | int | float | None = None) → bool#

Returns True if a name-value pair with the specified name and value exists.

Parameters:

name – The name of parameter to find.
value – The value of parameter to find.

Returns:

True if a name-value pair with the specified name and value exists, False otherwise.

keys() → Iterator[str]#

Returns a new iterator of this object’s names.

Returns:: An iterator of this object’s names.

set(name: str, value: str | int | float) → None#

If name-value pair with the specified name exists, sets the value of the first name-value pair whose name is name to value and remove the other values. Otherwise, appends a new name-value pair.

Parameters:

name – The name of parameter to set.
value – The value of parameter to set.

Examples

>>> params = URLSearchParams('a=1&b=2&a=3')
>>> list(params)
[('a', '1'), ('b', '2'), ('a', '3')]
>>> params.set('a', '4')
>>> list(params)
[('a', '4'), ('b', '2')]

sort() → None#

Sorts all name-value pairs by comparison of code units.

The relative order between name-value pairs with equal names will be preserved.

Examples

>>> params = URLSearchParams('ﬃ&🌈')
>>> list(params)
[('ﬃ', ''), ('🌈', '')]
>>> params.sort()
# code point: 'ﬃ' (0xFB03) < '🌈' (0x1F308), but
# code units: '🌈' (0xD83C, 0xDF08) < 'ﬃ' (0xFB03)
>>> list(params)
[('🌈', ''), ('ﬃ', '')]

values() → Iterator[str]#

Returns a new iterator of this object’s values.

Returns:: An iterator of this object’s values.

class urlstd.parse.URLValidator#

Bases: object

Validates a URL string.

Examples

>>> URL.can_parse('https://user:password@example.org/')
True
>>> URLValidator.is_valid('https://user:password@example.org/')
False
>>> URL.can_parse('file:///C|/demo')
True
>>> URLValidator.is_valid('file:///C|/demo')
False

>>> validity = ValidityState()
>>> URLValidator.is_valid('https://user:password@example.org/', validity=validity)
False
>>> validity
ValidityState(valid=False, error_types=['invalid-credentials'],
descriptions=["invalid-credentials: input includes credentials: 'https://user:password@example.org/' at position 21"],
validation_errors=1, disable_logging=True)

>>> URLValidator.is_valid('file:///C|/demo', validity=validity)
False
>>> validity
ValidityState(valid=False, error_types=['invalid-URL-unit'],
descriptions=["invalid-URL-unit: code point is found that is not a URL unit: U+007C (|) in 'file:///C|/demo' at position 9"],
validation_errors=1, disable_logging=True)

classmethod is_valid(urlstring: str, base: str | URLRecord | None = None, encoding: str = 'utf-8', **kwargs) → bool#

Returns True if urlstring against a base URL base is a valid URL.

Parameters:

urlstring – An absolute-URL or a relative-URL to verify. If urlstring is a relative-URL, base is required.
base – An absolute-URL for a relative-URL urlstring.
encoding – The encoding to encode URL’s query. If the encoding fails, it will be replaced with the appropriate XML character reference.

Keyword Arguments:

validity – A ValidityState object that stores validation results.

Returns:

True if urlstring against a base URL base is a valid URL, False otherwise.

See also

URL.can_parse()

classmethod is_valid_url_scheme(value: str, **kwargs) → bool#

Returns True if value is a valid URL-scheme that is registered in the IANA URI Schemes registry.

Parameters:: value – A URL-scheme to verify.
Keyword Arguments:: validity – A ValidityState object that stores validation results.
Returns:: True if value is a valid URL-scheme, False otherwise.

Examples

>>> URLValidator.is_valid_url_scheme('aaa')  # diameter protocol
True
>>> URLValidator.is_valid_url_scheme('aaaa')  # unknown scheme
False

class urlstd.parse.ValidityState(valid: bool = True, error_types: list[str] = <factory>, descriptions: list[str] = <factory>, validation_errors: int = 0, disable_logging: bool = True)#

Bases: object

A validation status.

See also

HostValidator, URLValidator

Examples

>>> URL.can_parse('https://example/%?%#%')
True
>>> validity = ValidityState()
>>> URLValidator.is_valid('https://example/%?%#%', validity=validity)
False
>>> validity.valid
False
>>> validity.validation_errors
3
>>> validity.descriptions[0]
"invalid-URL-unit: incorrect percent encoding is found: '%' in 'https://example/%?%#%' at position 20"
>>> validity.descriptions[1]
"invalid-URL-unit: incorrect percent encoding is found: '%#%' in 'https://example/%?%#%' at position 18"
>>> validity.descriptions[2]
"invalid-URL-unit: incorrect percent encoding is found: '%?%' in 'https://example/%?%#%' at position 16"

valid: bool = True#: True if there are no validation errors, False otherwise.

error_types: list[str]#: A list of error type names.

descriptions: list[str]#: A list of error descriptions.

validation_errors: int = 0#: A number of validation errors.

disable_logging: bool = True#: True to disable logging, False otherwise.

__add__(other: Any) → ValidityState#: This API is for internal use only.

__iadd__(other: Any) → Self#: This API is for internal use only.

prepend(msg: str, *args) → None#: This API is for internal use only.

reset() → None#: This API is for internal use only.

urlstd.parse.parse_qsl(query: bytes) → list[tuple[str, str]]#

An alternative to urllib.parse.parse_qsl().

Parses a byte sequence in the form application/x-www-form-urlencoded, and returns a list of utf-8 decoded name-value pairs.

Invalid surrogates will be replaced with U+FFFD.

Parameters:: query – A byte sequence to parse.
Returns:: A list of utf-8 decoded name-value pairs.

Examples

>>> parse_qsl(b'a=a&a=b&a=c')
[('a', 'a'), ('a', 'b'), ('a', 'c')]

>>> parse_qsl(b'%61+%4d%4D=')
[('a MM', '')]

>>> parse_qsl(b'%FE%FF')
[('\ufffd\ufffd', '')]

urlstd.parse.parse_url(urlstring: str, base: str | URLRecord | None = None, encoding: str = 'utf-8', **kwargs) → URLRecord#

Parses a string urlstring against a base URL base using the basic URL parser, and returns URLRecord.

Parameters:

urlstring – An absolute-URL or a relative-URL. If urlstring is a relative-URL, base is required.
base – An absolute-URL for a relative-URL urlstring.
encoding – The encoding to encode URL’s query. If the encoding fails, it will be replaced with the appropriate XML character reference.

Returns:

A URL record.

Raises:

urlstd.error.URLParseError – Raised when URL parsing fails.

urlstd.parse.string_percent_decode(s: str) → bytes#

Returns a percent-decoded byte sequence after encoding with utf-8.

Invalid surrogates will be replaced with U+FFFD.

Parameters:: s – A string to percent-decode.
Returns:: A percent-decoded byte sequence after encoding with utf-8.

Examples

>>> string_percent_decode('%f0%9f%8c%88').decode()
'🌈'

>>> string_percent_decode('\U0001f308').decode()
'🌈'

>>> string_percent_decode('\ud83c\udf08').decode()
'🌈'

>>> string_percent_decode('\udf08\ud83c').decode()
'\ufffd\ufffd'

urlstd.parse.string_percent_encode(s: str, safe: str, encoding: str = 'utf-8', space_as_plus: bool = False) → str#

Returns a percent-encoded string after encoding with encoding.

Invalid surrogates will be replaced with U+FFFD. Also, if the encoding fails, it will be replaced with the appropriate XML character reference.

Parameters:

s – A string to percent-encode.
safe – ASCII characters that should not be percent-encoded.
encoding – The encoding to encode s.
space_as_plus – If True, replace 0x20 (space) with U+002B (plus sign).

Returns:

A percent-encoded string after encoding with encoding.

Examples

>>> string_percent_encode('/El Niño/', '/')
'/El%20Ni%C3%B1o/'

>>> string_percent_encode('\U0001f308', '')
'%F0%9F%8C%88'

>>> string_percent_encode('\ud83c\udf08', '')
'%F0%9F%8C%88'

>>> string_percent_encode('\ud83c', '')
'%EF%BF%BD'  # → '\ufffd'

>>> string_percent_encode('\U0001f308', '', encoding='windows-1252')
'%26%23127752%3B'  # → '&#127752;'

urlstd.parse.urlencode(query: Sequence[tuple[str, str]], encoding: str = 'utf-8') → str#

An alternative to urllib.parse.urlencode().

Converts a sequence of tuples of name-value pairs into a percent-encoded ASCII text string in the form application/x-www-form-urlencoded.

Invalid surrogates will be replaced with U+FFFD. Also, if the encoding fails, it will be replaced with the appropriate XML character reference.

Parameters:

query – A sequence of tuples of name-value pairs to percent-encode.
encoding – The encoding to encode query.

Returns:

A string in the form application/x-www-form-urlencoded.

Examples

>>> urlencode([('a', 'a'), ('a', 'b'), ('a', 'c')])
'a=a&a=b&a=c'

>>> urlencode([('🌈', 'a')])
'%F0%9F%8C%88=a'

>>> urlencode([('🌈', 'a')], encoding="windows-1252")
'%26%23127752%3B=a'  # → '&#127752;=a'

>>> urlencode([('\ud83c\udf08', 'a')])
'%F0%9F%8C%88=a'

>>> urlencode([('\ud83c', 'a')])
'%EF%BF%BD=a'  # → '\ufffd=a'

urlstd.parse.urlparse(urlstring: str, base: str | None = None, encoding: str = 'utf-8', allow_fragments: bool = True) → ParseResult#

An alternative to urllib.parse.urlparse().

Parses a string urlstring against a base URL base using the basic URL parser, and returns urllib.parse.ParseResult.

Parameters:

urlstring – An absolute-URL or a relative-URL. If urlstring is a relative-URL, base is required.
base – An absolute-URL for a relative-URL urlstring.
encoding – The encoding to encode URL’s query. If the encoding fails, it will be replaced with the appropriate XML character reference.
allow_fragments – If False, fragment identifiers are not recognized.

Returns:

A named tuple urllib.parse.ParseResult.

Raises:

urlstd.error.URLParseError – Raised when URL parsing fails.

Examples

>>> urlparse('http://user:pass@foo:21/bar;par?b#c')
ParseResult(scheme='http', netloc='user:pass@foo:21', path='/bar',
params='par', query='b', fragment='c')

>>> urlparse('?🌈=a#c', base='http://user:pass@foo:21/bar;par?b#c')
ParseResult(scheme='http', netloc='user:pass@foo:21', path='/bar',
params='par', query='%F0%9F%8C%88=a', fragment='c')

>>> urlparse('?🌈=a#c', base='http://user:pass@foo:21/bar;par?b#c',
...     encoding='windows-1252')
ParseResult(scheme='http', netloc='user:pass@foo:21', path='/bar',
params='par', query='%26%23127752%3B=a', fragment='c')

urlstd.parse.utf8_decode(b: bytes) → str#

Decodes a byte sequence with utf-8 and returns its string.

If decoding fails, it will be replaced with U+FFFD.

Parameters:: b – A byte sequence to decode with utf-8.
Returns:: A utf-8 decoded string.

urlstd.parse.utf8_encode(s: str) → bytes#

Encodes a string with utf-8 and returns its byte sequence.

Invalid surrogates will be replaced with U+FFFD.

Parameters:: s – A string to encode with utf-8.
Returns:: A utf-8 encoded byte sequence.

Examples

>>> utf8_encode('\ud83c\udf08').decode()  # surrogate pair
'🌈'
>>> utf8_encode('\udf08\ud83c').decode()  # invalid surrogates
'��'  # '\ufffd\ufffd'

urlstd.parse.utf8_percent_encode(s: str, safe: str, space_as_plus: bool = False) → str#

Returns a percent-encoded string after encoding with utf-8.

Invalid surrogates will be replaced with U+FFFD. Also, if the encoding fails, it will be replaced with the appropriate XML character reference.

This is equivalent to string_percent_encode(s, safe, encoding='utf-8', space_as_plus=space_as_plus).

Parameters:

s – A string to percent-encode.
safe – ASCII characters that should not be percent-encoded.
space_as_plus – If True, replace 0x20 (space) with U+002B (plus sign).

Returns:

A percent-encoded string after encoding with utf-8.