You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
bazarr/libs/sqlalchemy/ext/horizontal_shard.py

485 lines
16 KiB

# ext/horizontal_shard.py
# Copyright (C) 2005-2023 the SQLAlchemy authors and contributors
# <see AUTHORS file>
#
# This module is part of SQLAlchemy and is released under
# the MIT License: https://www.opensource.org/licenses/mit-license.php
"""Horizontal sharding support.
Defines a rudimental 'horizontal sharding' system which allows a Session to
distribute queries and persistence operations across multiple databases.
For a usage example, see the :ref:`examples_sharding` example included in
the source distribution.
.. deepalchemy:: The horizontal sharding extension is an advanced feature,
involving a complex statement -> database interaction as well as
use of semi-public APIs for non-trivial cases. Simpler approaches to
refering to multiple database "shards", most commonly using a distinct
:class:`_orm.Session` per "shard", should always be considered first
before using this more complex and less-production-tested system.
"""
from __future__ import annotations
from typing import Any
from typing import Callable
from typing import Dict
from typing import Iterable
from typing import Optional
from typing import Tuple
from typing import Type
from typing import TYPE_CHECKING
from typing import TypeVar
from typing import Union
from .. import event
from .. import exc
from .. import inspect
from .. import util
from ..orm import PassiveFlag
from ..orm._typing import OrmExecuteOptionsParameter
from ..orm.interfaces import ORMOption
from ..orm.mapper import Mapper
from ..orm.query import Query
from ..orm.session import _BindArguments
from ..orm.session import _PKIdentityArgument
from ..orm.session import Session
from ..util.typing import Protocol
from ..util.typing import Self
if TYPE_CHECKING:
from ..engine.base import Connection
from ..engine.base import Engine
from ..engine.base import OptionEngine
from ..engine.result import IteratorResult
from ..engine.result import Result
from ..orm import LoaderCallableStatus
from ..orm._typing import _O
from ..orm.bulk_persistence import BulkUDCompileState
from ..orm.context import QueryContext
from ..orm.session import _EntityBindKey
from ..orm.session import _SessionBind
from ..orm.session import ORMExecuteState
from ..orm.state import InstanceState
from ..sql import Executable
from ..sql._typing import _TP
from ..sql.elements import ClauseElement
__all__ = ["ShardedSession", "ShardedQuery"]
_T = TypeVar("_T", bound=Any)
ShardIdentifier = str
class ShardChooser(Protocol):
def __call__(
self,
mapper: Optional[Mapper[_T]],
instance: Any,
clause: Optional[ClauseElement],
) -> Any:
...
class IdentityChooser(Protocol):
def __call__(
self,
mapper: Mapper[_T],
primary_key: _PKIdentityArgument,
*,
lazy_loaded_from: Optional[InstanceState[Any]],
execution_options: OrmExecuteOptionsParameter,
bind_arguments: _BindArguments,
**kw: Any,
) -> Any:
...
class ShardedQuery(Query[_T]):
"""Query class used with :class:`.ShardedSession`.
.. legacy:: The :class:`.ShardedQuery` is a subclass of the legacy
:class:`.Query` class. The :class:`.ShardedSession` now supports
2.0 style execution via the :meth:`.ShardedSession.execute` method.
"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
assert isinstance(self.session, ShardedSession)
self.identity_chooser = self.session.identity_chooser
self.execute_chooser = self.session.execute_chooser
self._shard_id = None
def set_shard(self, shard_id: ShardIdentifier) -> Self:
"""Return a new query, limited to a single shard ID.
All subsequent operations with the returned query will
be against the single shard regardless of other state.
The shard_id can be passed for a 2.0 style execution to the
bind_arguments dictionary of :meth:`.Session.execute`::
results = session.execute(
stmt,
bind_arguments={"shard_id": "my_shard"}
)
"""
return self.execution_options(_sa_shard_id=shard_id)
class ShardedSession(Session):
shard_chooser: ShardChooser
identity_chooser: IdentityChooser
execute_chooser: Callable[[ORMExecuteState], Iterable[Any]]
def __init__(
self,
shard_chooser: ShardChooser,
identity_chooser: Optional[IdentityChooser] = None,
execute_chooser: Optional[
Callable[[ORMExecuteState], Iterable[Any]]
] = None,
shards: Optional[Dict[str, Any]] = None,
query_cls: Type[Query[_T]] = ShardedQuery,
*,
id_chooser: Optional[
Callable[[Query[_T], Iterable[_T]], Iterable[Any]]
] = None,
query_chooser: Optional[Callable[[Executable], Iterable[Any]]] = None,
**kwargs: Any,
) -> None:
"""Construct a ShardedSession.
:param shard_chooser: A callable which, passed a Mapper, a mapped
instance, and possibly a SQL clause, returns a shard ID. This id
may be based off of the attributes present within the object, or on
some round-robin scheme. If the scheme is based on a selection, it
should set whatever state on the instance to mark it in the future as
participating in that shard.
:param identity_chooser: A callable, passed a Mapper and primary key
argument, which should return a list of shard ids where this
primary key might reside.
.. versionchanged:: 2.0 The ``identity_chooser`` parameter
supersedes the ``id_chooser`` parameter.
:param execute_chooser: For a given :class:`.ORMExecuteState`,
returns the list of shard_ids
where the query should be issued. Results from all shards returned
will be combined together into a single listing.
.. versionchanged:: 1.4 The ``execute_chooser`` parameter
supersedes the ``query_chooser`` parameter.
:param shards: A dictionary of string shard names
to :class:`~sqlalchemy.engine.Engine` objects.
"""
super().__init__(query_cls=query_cls, **kwargs)
event.listen(
self, "do_orm_execute", execute_and_instances, retval=True
)
self.shard_chooser = shard_chooser
if id_chooser:
_id_chooser = id_chooser
util.warn_deprecated(
"The ``id_chooser`` parameter is deprecated; "
"please use ``identity_chooser``.",
"2.0",
)
def _legacy_identity_chooser(
mapper: Mapper[_T],
primary_key: _PKIdentityArgument,
*,
lazy_loaded_from: Optional[InstanceState[Any]],
execution_options: OrmExecuteOptionsParameter,
bind_arguments: _BindArguments,
**kw: Any,
) -> Any:
q = self.query(mapper)
if lazy_loaded_from:
q = q._set_lazyload_from(lazy_loaded_from)
return _id_chooser(q, primary_key)
self.identity_chooser = _legacy_identity_chooser
elif identity_chooser:
self.identity_chooser = identity_chooser
else:
raise exc.ArgumentError(
"identity_chooser or id_chooser is required"
)
if query_chooser:
_query_chooser = query_chooser
util.warn_deprecated(
"The ``query_chooser`` parameter is deprecated; "
"please use ``execute_chooser``.",
"1.4",
)
if execute_chooser:
raise exc.ArgumentError(
"Can't pass query_chooser and execute_chooser "
"at the same time."
)
def _default_execute_chooser(
orm_context: ORMExecuteState,
) -> Iterable[Any]:
return _query_chooser(orm_context.statement)
if execute_chooser is None:
execute_chooser = _default_execute_chooser
if execute_chooser is None:
raise exc.ArgumentError(
"execute_chooser or query_chooser is required"
)
self.execute_chooser = execute_chooser
self.__shards: Dict[ShardIdentifier, _SessionBind] = {}
if shards is not None:
for k in shards:
self.bind_shard(k, shards[k])
def _identity_lookup(
self,
mapper: Mapper[_O],
primary_key_identity: Union[Any, Tuple[Any, ...]],
identity_token: Optional[Any] = None,
passive: PassiveFlag = PassiveFlag.PASSIVE_OFF,
lazy_loaded_from: Optional[InstanceState[Any]] = None,
execution_options: OrmExecuteOptionsParameter = util.EMPTY_DICT,
bind_arguments: Optional[_BindArguments] = None,
**kw: Any,
) -> Union[Optional[_O], LoaderCallableStatus]:
"""override the default :meth:`.Session._identity_lookup` method so
that we search for a given non-token primary key identity across all
possible identity tokens (e.g. shard ids).
.. versionchanged:: 1.4 Moved :meth:`.Session._identity_lookup` from
the :class:`_query.Query` object to the :class:`.Session`.
"""
if identity_token is not None:
obj = super()._identity_lookup(
mapper,
primary_key_identity,
identity_token=identity_token,
**kw,
)
return obj
else:
for shard_id in self.identity_chooser(
mapper,
primary_key_identity,
lazy_loaded_from=lazy_loaded_from,
execution_options=execution_options,
bind_arguments=dict(bind_arguments) if bind_arguments else {},
):
obj2 = super()._identity_lookup(
mapper,
primary_key_identity,
identity_token=shard_id,
lazy_loaded_from=lazy_loaded_from,
**kw,
)
if obj2 is not None:
return obj2
return None
def _choose_shard_and_assign(
self,
mapper: Optional[_EntityBindKey[_O]],
instance: Any,
**kw: Any,
) -> Any:
if instance is not None:
state = inspect(instance)
if state.key:
token = state.key[2]
assert token is not None
return token
elif state.identity_token:
return state.identity_token
assert isinstance(mapper, Mapper)
shard_id = self.shard_chooser(mapper, instance, **kw)
if instance is not None:
state.identity_token = shard_id
return shard_id
def connection_callable( # type: ignore [override]
self,
mapper: Optional[Mapper[_T]] = None,
instance: Optional[Any] = None,
shard_id: Optional[ShardIdentifier] = None,
**kw: Any,
) -> Connection:
"""Provide a :class:`_engine.Connection` to use in the unit of work
flush process.
"""
if shard_id is None:
shard_id = self._choose_shard_and_assign(mapper, instance)
if self.in_transaction():
trans = self.get_transaction()
assert trans is not None
return trans.connection(mapper, shard_id=shard_id)
else:
bind = self.get_bind(
mapper=mapper, shard_id=shard_id, instance=instance
)
if isinstance(bind, Engine):
return bind.connect(**kw)
else:
assert isinstance(bind, Connection)
return bind
def get_bind(
self,
mapper: Optional[_EntityBindKey[_O]] = None,
*,
shard_id: Optional[ShardIdentifier] = None,
instance: Optional[Any] = None,
clause: Optional[ClauseElement] = None,
**kw: Any,
) -> _SessionBind:
if shard_id is None:
shard_id = self._choose_shard_and_assign(
mapper, instance=instance, clause=clause
)
assert shard_id is not None
return self.__shards[shard_id]
def bind_shard(
self, shard_id: ShardIdentifier, bind: Union[Engine, OptionEngine]
) -> None:
self.__shards[shard_id] = bind
class set_shard_id(ORMOption):
"""a loader option for statements to apply a specific shard id to the
primary query as well as for additional relationship and column
loaders.
The :class:`_horizontal.set_shard_id` option may be applied using
the :meth:`_sql.Executable.options` method of any executable statement::
stmt = (
select(MyObject).
where(MyObject.name == 'some name').
options(set_shard_id("shard1"))
)
Above, the statement when invoked will limit to the "shard1" shard
identifier for the primary query as well as for all relationship and
column loading strategies, including eager loaders such as
:func:`_orm.selectinload`, deferred column loaders like :func:`_orm.defer`,
and the lazy relationship loader :func:`_orm.lazyload`.
In this way, the :class:`_horizontal.set_shard_id` option has much wider
scope than using the "shard_id" argument within the
:paramref:`_orm.Session.execute.bind_arguments` dictionary.
.. versionadded:: 2.0.0
"""
__slots__ = ("shard_id", "propagate_to_loaders")
def __init__(
self, shard_id: ShardIdentifier, propagate_to_loaders: bool = True
):
"""Construct a :class:`_horizontal.set_shard_id` option.
:param shard_id: shard identifier
:param propagate_to_loaders: if left at its default of ``True``, the
shard option will take place for lazy loaders such as
:func:`_orm.lazyload` and :func:`_orm.defer`; if False, the option
will not be propagated to loaded objects. Note that :func:`_orm.defer`
always limits to the shard_id of the parent row in any case, so the
parameter only has a net effect on the behavior of the
:func:`_orm.lazyload` strategy.
"""
self.shard_id = shard_id
self.propagate_to_loaders = propagate_to_loaders
def execute_and_instances(
orm_context: ORMExecuteState,
) -> Union[Result[_T], IteratorResult[_TP]]:
active_options: Union[
None,
QueryContext.default_load_options,
Type[QueryContext.default_load_options],
BulkUDCompileState.default_update_options,
Type[BulkUDCompileState.default_update_options],
]
if orm_context.is_select:
active_options = orm_context.load_options
elif orm_context.is_update or orm_context.is_delete:
active_options = orm_context.update_delete_options
else:
active_options = None
session = orm_context.session
assert isinstance(session, ShardedSession)
def iter_for_shard(
shard_id: ShardIdentifier,
) -> Union[Result[_T], IteratorResult[_TP]]:
bind_arguments = dict(orm_context.bind_arguments)
bind_arguments["shard_id"] = shard_id
orm_context.update_execution_options(identity_token=shard_id)
return orm_context.invoke_statement(bind_arguments=bind_arguments)
for orm_opt in orm_context._non_compile_orm_options:
# TODO: if we had an ORMOption that gets applied at ORM statement
# execution time, that would allow this to be more generalized.
# for now just iterate and look for our options
if isinstance(orm_opt, set_shard_id):
shard_id = orm_opt.shard_id
break
else:
if active_options and active_options._identity_token is not None:
shard_id = active_options._identity_token
elif "_sa_shard_id" in orm_context.execution_options:
shard_id = orm_context.execution_options["_sa_shard_id"]
elif "shard_id" in orm_context.bind_arguments:
shard_id = orm_context.bind_arguments["shard_id"]
else:
shard_id = None
if shard_id is not None:
return iter_for_shard(shard_id)
else:
partial = []
for shard_id in session.execute_chooser(orm_context):
result_ = iter_for_shard(shard_id)
partial.append(result_)
return partial[0].merge(*partial[1:])