You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
485 lines
16 KiB
485 lines
16 KiB
2 years ago
|
# ext/horizontal_shard.py
|
||
|
# Copyright (C) 2005-2023 the SQLAlchemy authors and contributors
|
||
|
# <see AUTHORS file>
|
||
|
#
|
||
|
# This module is part of SQLAlchemy and is released under
|
||
|
# the MIT License: https://www.opensource.org/licenses/mit-license.php
|
||
|
|
||
|
"""Horizontal sharding support.
|
||
|
|
||
|
Defines a rudimental 'horizontal sharding' system which allows a Session to
|
||
|
distribute queries and persistence operations across multiple databases.
|
||
|
|
||
|
For a usage example, see the :ref:`examples_sharding` example included in
|
||
|
the source distribution.
|
||
|
|
||
|
.. deepalchemy:: The horizontal sharding extension is an advanced feature,
|
||
|
involving a complex statement -> database interaction as well as
|
||
|
use of semi-public APIs for non-trivial cases. Simpler approaches to
|
||
|
refering to multiple database "shards", most commonly using a distinct
|
||
|
:class:`_orm.Session` per "shard", should always be considered first
|
||
|
before using this more complex and less-production-tested system.
|
||
|
|
||
|
|
||
|
|
||
|
"""
|
||
|
from __future__ import annotations
|
||
|
|
||
|
from typing import Any
|
||
|
from typing import Callable
|
||
|
from typing import Dict
|
||
|
from typing import Iterable
|
||
|
from typing import Optional
|
||
|
from typing import Tuple
|
||
|
from typing import Type
|
||
|
from typing import TYPE_CHECKING
|
||
|
from typing import TypeVar
|
||
|
from typing import Union
|
||
|
|
||
|
from .. import event
|
||
|
from .. import exc
|
||
|
from .. import inspect
|
||
|
from .. import util
|
||
|
from ..orm import PassiveFlag
|
||
|
from ..orm._typing import OrmExecuteOptionsParameter
|
||
|
from ..orm.interfaces import ORMOption
|
||
|
from ..orm.mapper import Mapper
|
||
|
from ..orm.query import Query
|
||
|
from ..orm.session import _BindArguments
|
||
|
from ..orm.session import _PKIdentityArgument
|
||
|
from ..orm.session import Session
|
||
|
from ..util.typing import Protocol
|
||
|
from ..util.typing import Self
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
from ..engine.base import Connection
|
||
|
from ..engine.base import Engine
|
||
|
from ..engine.base import OptionEngine
|
||
|
from ..engine.result import IteratorResult
|
||
|
from ..engine.result import Result
|
||
|
from ..orm import LoaderCallableStatus
|
||
|
from ..orm._typing import _O
|
||
|
from ..orm.bulk_persistence import BulkUDCompileState
|
||
|
from ..orm.context import QueryContext
|
||
|
from ..orm.session import _EntityBindKey
|
||
|
from ..orm.session import _SessionBind
|
||
|
from ..orm.session import ORMExecuteState
|
||
|
from ..orm.state import InstanceState
|
||
|
from ..sql import Executable
|
||
|
from ..sql._typing import _TP
|
||
|
from ..sql.elements import ClauseElement
|
||
|
|
||
|
__all__ = ["ShardedSession", "ShardedQuery"]
|
||
|
|
||
|
_T = TypeVar("_T", bound=Any)
|
||
|
|
||
|
|
||
|
ShardIdentifier = str
|
||
|
|
||
|
|
||
|
class ShardChooser(Protocol):
|
||
|
def __call__(
|
||
|
self,
|
||
|
mapper: Optional[Mapper[_T]],
|
||
|
instance: Any,
|
||
|
clause: Optional[ClauseElement],
|
||
|
) -> Any:
|
||
|
...
|
||
|
|
||
|
|
||
|
class IdentityChooser(Protocol):
|
||
|
def __call__(
|
||
|
self,
|
||
|
mapper: Mapper[_T],
|
||
|
primary_key: _PKIdentityArgument,
|
||
|
*,
|
||
|
lazy_loaded_from: Optional[InstanceState[Any]],
|
||
|
execution_options: OrmExecuteOptionsParameter,
|
||
|
bind_arguments: _BindArguments,
|
||
|
**kw: Any,
|
||
|
) -> Any:
|
||
|
...
|
||
|
|
||
|
|
||
|
class ShardedQuery(Query[_T]):
|
||
|
"""Query class used with :class:`.ShardedSession`.
|
||
|
|
||
|
.. legacy:: The :class:`.ShardedQuery` is a subclass of the legacy
|
||
|
:class:`.Query` class. The :class:`.ShardedSession` now supports
|
||
|
2.0 style execution via the :meth:`.ShardedSession.execute` method.
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||
|
super().__init__(*args, **kwargs)
|
||
|
assert isinstance(self.session, ShardedSession)
|
||
|
|
||
|
self.identity_chooser = self.session.identity_chooser
|
||
|
self.execute_chooser = self.session.execute_chooser
|
||
|
self._shard_id = None
|
||
|
|
||
|
def set_shard(self, shard_id: ShardIdentifier) -> Self:
|
||
|
"""Return a new query, limited to a single shard ID.
|
||
|
|
||
|
All subsequent operations with the returned query will
|
||
|
be against the single shard regardless of other state.
|
||
|
|
||
|
The shard_id can be passed for a 2.0 style execution to the
|
||
|
bind_arguments dictionary of :meth:`.Session.execute`::
|
||
|
|
||
|
results = session.execute(
|
||
|
stmt,
|
||
|
bind_arguments={"shard_id": "my_shard"}
|
||
|
)
|
||
|
|
||
|
"""
|
||
|
return self.execution_options(_sa_shard_id=shard_id)
|
||
|
|
||
|
|
||
|
class ShardedSession(Session):
|
||
|
shard_chooser: ShardChooser
|
||
|
identity_chooser: IdentityChooser
|
||
|
execute_chooser: Callable[[ORMExecuteState], Iterable[Any]]
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
shard_chooser: ShardChooser,
|
||
|
identity_chooser: Optional[IdentityChooser] = None,
|
||
|
execute_chooser: Optional[
|
||
|
Callable[[ORMExecuteState], Iterable[Any]]
|
||
|
] = None,
|
||
|
shards: Optional[Dict[str, Any]] = None,
|
||
|
query_cls: Type[Query[_T]] = ShardedQuery,
|
||
|
*,
|
||
|
id_chooser: Optional[
|
||
|
Callable[[Query[_T], Iterable[_T]], Iterable[Any]]
|
||
|
] = None,
|
||
|
query_chooser: Optional[Callable[[Executable], Iterable[Any]]] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> None:
|
||
|
"""Construct a ShardedSession.
|
||
|
|
||
|
:param shard_chooser: A callable which, passed a Mapper, a mapped
|
||
|
instance, and possibly a SQL clause, returns a shard ID. This id
|
||
|
may be based off of the attributes present within the object, or on
|
||
|
some round-robin scheme. If the scheme is based on a selection, it
|
||
|
should set whatever state on the instance to mark it in the future as
|
||
|
participating in that shard.
|
||
|
|
||
|
:param identity_chooser: A callable, passed a Mapper and primary key
|
||
|
argument, which should return a list of shard ids where this
|
||
|
primary key might reside.
|
||
|
|
||
|
.. versionchanged:: 2.0 The ``identity_chooser`` parameter
|
||
|
supersedes the ``id_chooser`` parameter.
|
||
|
|
||
|
:param execute_chooser: For a given :class:`.ORMExecuteState`,
|
||
|
returns the list of shard_ids
|
||
|
where the query should be issued. Results from all shards returned
|
||
|
will be combined together into a single listing.
|
||
|
|
||
|
.. versionchanged:: 1.4 The ``execute_chooser`` parameter
|
||
|
supersedes the ``query_chooser`` parameter.
|
||
|
|
||
|
:param shards: A dictionary of string shard names
|
||
|
to :class:`~sqlalchemy.engine.Engine` objects.
|
||
|
|
||
|
"""
|
||
|
super().__init__(query_cls=query_cls, **kwargs)
|
||
|
|
||
|
event.listen(
|
||
|
self, "do_orm_execute", execute_and_instances, retval=True
|
||
|
)
|
||
|
self.shard_chooser = shard_chooser
|
||
|
|
||
|
if id_chooser:
|
||
|
_id_chooser = id_chooser
|
||
|
util.warn_deprecated(
|
||
|
"The ``id_chooser`` parameter is deprecated; "
|
||
|
"please use ``identity_chooser``.",
|
||
|
"2.0",
|
||
|
)
|
||
|
|
||
|
def _legacy_identity_chooser(
|
||
|
mapper: Mapper[_T],
|
||
|
primary_key: _PKIdentityArgument,
|
||
|
*,
|
||
|
lazy_loaded_from: Optional[InstanceState[Any]],
|
||
|
execution_options: OrmExecuteOptionsParameter,
|
||
|
bind_arguments: _BindArguments,
|
||
|
**kw: Any,
|
||
|
) -> Any:
|
||
|
q = self.query(mapper)
|
||
|
if lazy_loaded_from:
|
||
|
q = q._set_lazyload_from(lazy_loaded_from)
|
||
|
return _id_chooser(q, primary_key)
|
||
|
|
||
|
self.identity_chooser = _legacy_identity_chooser
|
||
|
elif identity_chooser:
|
||
|
self.identity_chooser = identity_chooser
|
||
|
else:
|
||
|
raise exc.ArgumentError(
|
||
|
"identity_chooser or id_chooser is required"
|
||
|
)
|
||
|
|
||
|
if query_chooser:
|
||
|
_query_chooser = query_chooser
|
||
|
util.warn_deprecated(
|
||
|
"The ``query_chooser`` parameter is deprecated; "
|
||
|
"please use ``execute_chooser``.",
|
||
|
"1.4",
|
||
|
)
|
||
|
if execute_chooser:
|
||
|
raise exc.ArgumentError(
|
||
|
"Can't pass query_chooser and execute_chooser "
|
||
|
"at the same time."
|
||
|
)
|
||
|
|
||
|
def _default_execute_chooser(
|
||
|
orm_context: ORMExecuteState,
|
||
|
) -> Iterable[Any]:
|
||
|
return _query_chooser(orm_context.statement)
|
||
|
|
||
|
if execute_chooser is None:
|
||
|
execute_chooser = _default_execute_chooser
|
||
|
|
||
|
if execute_chooser is None:
|
||
|
raise exc.ArgumentError(
|
||
|
"execute_chooser or query_chooser is required"
|
||
|
)
|
||
|
self.execute_chooser = execute_chooser
|
||
|
self.__shards: Dict[ShardIdentifier, _SessionBind] = {}
|
||
|
if shards is not None:
|
||
|
for k in shards:
|
||
|
self.bind_shard(k, shards[k])
|
||
|
|
||
|
def _identity_lookup(
|
||
|
self,
|
||
|
mapper: Mapper[_O],
|
||
|
primary_key_identity: Union[Any, Tuple[Any, ...]],
|
||
|
identity_token: Optional[Any] = None,
|
||
|
passive: PassiveFlag = PassiveFlag.PASSIVE_OFF,
|
||
|
lazy_loaded_from: Optional[InstanceState[Any]] = None,
|
||
|
execution_options: OrmExecuteOptionsParameter = util.EMPTY_DICT,
|
||
|
bind_arguments: Optional[_BindArguments] = None,
|
||
|
**kw: Any,
|
||
|
) -> Union[Optional[_O], LoaderCallableStatus]:
|
||
|
"""override the default :meth:`.Session._identity_lookup` method so
|
||
|
that we search for a given non-token primary key identity across all
|
||
|
possible identity tokens (e.g. shard ids).
|
||
|
|
||
|
.. versionchanged:: 1.4 Moved :meth:`.Session._identity_lookup` from
|
||
|
the :class:`_query.Query` object to the :class:`.Session`.
|
||
|
|
||
|
"""
|
||
|
|
||
|
if identity_token is not None:
|
||
|
obj = super()._identity_lookup(
|
||
|
mapper,
|
||
|
primary_key_identity,
|
||
|
identity_token=identity_token,
|
||
|
**kw,
|
||
|
)
|
||
|
|
||
|
return obj
|
||
|
else:
|
||
|
for shard_id in self.identity_chooser(
|
||
|
mapper,
|
||
|
primary_key_identity,
|
||
|
lazy_loaded_from=lazy_loaded_from,
|
||
|
execution_options=execution_options,
|
||
|
bind_arguments=dict(bind_arguments) if bind_arguments else {},
|
||
|
):
|
||
|
obj2 = super()._identity_lookup(
|
||
|
mapper,
|
||
|
primary_key_identity,
|
||
|
identity_token=shard_id,
|
||
|
lazy_loaded_from=lazy_loaded_from,
|
||
|
**kw,
|
||
|
)
|
||
|
if obj2 is not None:
|
||
|
return obj2
|
||
|
|
||
|
return None
|
||
|
|
||
|
def _choose_shard_and_assign(
|
||
|
self,
|
||
|
mapper: Optional[_EntityBindKey[_O]],
|
||
|
instance: Any,
|
||
|
**kw: Any,
|
||
|
) -> Any:
|
||
|
if instance is not None:
|
||
|
state = inspect(instance)
|
||
|
if state.key:
|
||
|
token = state.key[2]
|
||
|
assert token is not None
|
||
|
return token
|
||
|
elif state.identity_token:
|
||
|
return state.identity_token
|
||
|
|
||
|
assert isinstance(mapper, Mapper)
|
||
|
shard_id = self.shard_chooser(mapper, instance, **kw)
|
||
|
if instance is not None:
|
||
|
state.identity_token = shard_id
|
||
|
return shard_id
|
||
|
|
||
|
def connection_callable( # type: ignore [override]
|
||
|
self,
|
||
|
mapper: Optional[Mapper[_T]] = None,
|
||
|
instance: Optional[Any] = None,
|
||
|
shard_id: Optional[ShardIdentifier] = None,
|
||
|
**kw: Any,
|
||
|
) -> Connection:
|
||
|
"""Provide a :class:`_engine.Connection` to use in the unit of work
|
||
|
flush process.
|
||
|
|
||
|
"""
|
||
|
|
||
|
if shard_id is None:
|
||
|
shard_id = self._choose_shard_and_assign(mapper, instance)
|
||
|
|
||
|
if self.in_transaction():
|
||
|
trans = self.get_transaction()
|
||
|
assert trans is not None
|
||
|
return trans.connection(mapper, shard_id=shard_id)
|
||
|
else:
|
||
|
bind = self.get_bind(
|
||
|
mapper=mapper, shard_id=shard_id, instance=instance
|
||
|
)
|
||
|
|
||
|
if isinstance(bind, Engine):
|
||
|
return bind.connect(**kw)
|
||
|
else:
|
||
|
assert isinstance(bind, Connection)
|
||
|
return bind
|
||
|
|
||
|
def get_bind(
|
||
|
self,
|
||
|
mapper: Optional[_EntityBindKey[_O]] = None,
|
||
|
*,
|
||
|
shard_id: Optional[ShardIdentifier] = None,
|
||
|
instance: Optional[Any] = None,
|
||
|
clause: Optional[ClauseElement] = None,
|
||
|
**kw: Any,
|
||
|
) -> _SessionBind:
|
||
|
if shard_id is None:
|
||
|
shard_id = self._choose_shard_and_assign(
|
||
|
mapper, instance=instance, clause=clause
|
||
|
)
|
||
|
assert shard_id is not None
|
||
|
return self.__shards[shard_id]
|
||
|
|
||
|
def bind_shard(
|
||
|
self, shard_id: ShardIdentifier, bind: Union[Engine, OptionEngine]
|
||
|
) -> None:
|
||
|
self.__shards[shard_id] = bind
|
||
|
|
||
|
|
||
|
class set_shard_id(ORMOption):
|
||
|
"""a loader option for statements to apply a specific shard id to the
|
||
|
primary query as well as for additional relationship and column
|
||
|
loaders.
|
||
|
|
||
|
The :class:`_horizontal.set_shard_id` option may be applied using
|
||
|
the :meth:`_sql.Executable.options` method of any executable statement::
|
||
|
|
||
|
stmt = (
|
||
|
select(MyObject).
|
||
|
where(MyObject.name == 'some name').
|
||
|
options(set_shard_id("shard1"))
|
||
|
)
|
||
|
|
||
|
Above, the statement when invoked will limit to the "shard1" shard
|
||
|
identifier for the primary query as well as for all relationship and
|
||
|
column loading strategies, including eager loaders such as
|
||
|
:func:`_orm.selectinload`, deferred column loaders like :func:`_orm.defer`,
|
||
|
and the lazy relationship loader :func:`_orm.lazyload`.
|
||
|
|
||
|
In this way, the :class:`_horizontal.set_shard_id` option has much wider
|
||
|
scope than using the "shard_id" argument within the
|
||
|
:paramref:`_orm.Session.execute.bind_arguments` dictionary.
|
||
|
|
||
|
|
||
|
.. versionadded:: 2.0.0
|
||
|
|
||
|
"""
|
||
|
|
||
|
__slots__ = ("shard_id", "propagate_to_loaders")
|
||
|
|
||
|
def __init__(
|
||
|
self, shard_id: ShardIdentifier, propagate_to_loaders: bool = True
|
||
|
):
|
||
|
"""Construct a :class:`_horizontal.set_shard_id` option.
|
||
|
|
||
|
:param shard_id: shard identifier
|
||
|
:param propagate_to_loaders: if left at its default of ``True``, the
|
||
|
shard option will take place for lazy loaders such as
|
||
|
:func:`_orm.lazyload` and :func:`_orm.defer`; if False, the option
|
||
|
will not be propagated to loaded objects. Note that :func:`_orm.defer`
|
||
|
always limits to the shard_id of the parent row in any case, so the
|
||
|
parameter only has a net effect on the behavior of the
|
||
|
:func:`_orm.lazyload` strategy.
|
||
|
|
||
|
"""
|
||
|
self.shard_id = shard_id
|
||
|
self.propagate_to_loaders = propagate_to_loaders
|
||
|
|
||
|
|
||
|
def execute_and_instances(
|
||
|
orm_context: ORMExecuteState,
|
||
|
) -> Union[Result[_T], IteratorResult[_TP]]:
|
||
|
active_options: Union[
|
||
|
None,
|
||
|
QueryContext.default_load_options,
|
||
|
Type[QueryContext.default_load_options],
|
||
|
BulkUDCompileState.default_update_options,
|
||
|
Type[BulkUDCompileState.default_update_options],
|
||
|
]
|
||
|
|
||
|
if orm_context.is_select:
|
||
|
active_options = orm_context.load_options
|
||
|
|
||
|
elif orm_context.is_update or orm_context.is_delete:
|
||
|
active_options = orm_context.update_delete_options
|
||
|
else:
|
||
|
active_options = None
|
||
|
|
||
|
session = orm_context.session
|
||
|
assert isinstance(session, ShardedSession)
|
||
|
|
||
|
def iter_for_shard(
|
||
|
shard_id: ShardIdentifier,
|
||
|
) -> Union[Result[_T], IteratorResult[_TP]]:
|
||
|
|
||
|
bind_arguments = dict(orm_context.bind_arguments)
|
||
|
bind_arguments["shard_id"] = shard_id
|
||
|
|
||
|
orm_context.update_execution_options(identity_token=shard_id)
|
||
|
return orm_context.invoke_statement(bind_arguments=bind_arguments)
|
||
|
|
||
|
for orm_opt in orm_context._non_compile_orm_options:
|
||
|
# TODO: if we had an ORMOption that gets applied at ORM statement
|
||
|
# execution time, that would allow this to be more generalized.
|
||
|
# for now just iterate and look for our options
|
||
|
if isinstance(orm_opt, set_shard_id):
|
||
|
shard_id = orm_opt.shard_id
|
||
|
break
|
||
|
else:
|
||
|
if active_options and active_options._identity_token is not None:
|
||
|
shard_id = active_options._identity_token
|
||
|
elif "_sa_shard_id" in orm_context.execution_options:
|
||
|
shard_id = orm_context.execution_options["_sa_shard_id"]
|
||
|
elif "shard_id" in orm_context.bind_arguments:
|
||
|
shard_id = orm_context.bind_arguments["shard_id"]
|
||
|
else:
|
||
|
shard_id = None
|
||
|
|
||
|
if shard_id is not None:
|
||
|
return iter_for_shard(shard_id)
|
||
|
else:
|
||
|
partial = []
|
||
|
for shard_id in session.execute_chooser(orm_context):
|
||
|
result_ = iter_for_shard(shard_id)
|
||
|
partial.append(result_)
|
||
|
return partial[0].merge(*partial[1:])
|