bazarr/libs/textdistance/benchmark.py

from __future__ import annotations

# built-in
import json
import math
from collections import defaultdict
from timeit import timeit
from typing import Iterable, Iterator, NamedTuple

# external
from tabulate import tabulate

# app
from .libraries import LIBRARIES_PATH, prototype


# python3 -m textdistance.benchmark


libraries = prototype.clone()


class Lib(NamedTuple):
    algorithm: str
    library: str
    function: str
    time: float
    setup: str

    @property
    def row(self) -> tuple[str, ...]:
        time = '' if math.isinf(self.time) else f'{self.time:0.05f}'
        return (self.algorithm, self.library.split('.')[0], time)


INTERNAL_SETUP = """
from textdistance import {} as cls
func = cls(external=False)
"""

STMT = """
func('text', 'test')
func('qwer', 'asdf')
func('a' * 15, 'b' * 15)
"""

RUNS = 4000


class Benchmark:
    @staticmethod
    def get_installed() -> Iterator[Lib]:
        for alg in libraries.get_algorithms():
            for lib in libraries.get_libs(alg):
                # try load function
                if not lib.get_function():
                    print(f'WARNING: cannot get func for {lib}')
                    continue
                # return library info
                yield Lib(
                    algorithm=alg,
                    library=lib.module_name,
                    function=lib.func_name,
                    time=float('Inf'),
                    setup=lib.setup,
                )

    @staticmethod
    def get_external_benchmark(installed: Iterable[Lib]) -> Iterator[Lib]:
        for lib in installed:
            time = timeit(
                stmt=STMT,
                setup=lib.setup,
                number=RUNS,
            )
            yield lib._replace(time=time)

    @staticmethod
    def get_internal_benchmark() -> Iterator[Lib]:
        for alg in libraries.get_algorithms():
            setup = f'func = __import__("textdistance").{alg}(external=False)'
            yield Lib(
                algorithm=alg,
                library='**textdistance**',
                function=alg,
                time=timeit(
                    stmt=STMT,
                    setup=setup,
                    number=RUNS,
                ),
                setup=setup,
            )

    @staticmethod
    def filter_benchmark(
        external: Iterable[Lib],
        internal: Iterable[Lib],
    ) -> Iterator[Lib]:
        limits = {i.algorithm: i.time for i in internal}
        return filter(lambda x: x.time < limits[x.algorithm], external)

    @staticmethod
    def get_table(libs: list[Lib]) -> str:
        table = tabulate(
            [lib.row for lib in libs],
            headers=['algorithm', 'library', 'time'],
            tablefmt='github',
        )
        table += f'\nTotal: {len(libs)} libs.\n\n'
        return table

    @staticmethod
    def save(libs: Iterable[Lib]) -> None:
        data = defaultdict(list)
        for lib in libs:
            data[lib.algorithm].append([lib.library, lib.function])
        with LIBRARIES_PATH.open('w', encoding='utf8') as f:
            json.dump(obj=data, fp=f, indent=2, sort_keys=True)

    @classmethod
    def run(cls) -> None:
        print('# Installed libraries:\n')
        installed = list(cls.get_installed())
        installed.sort()
        print(cls.get_table(installed))

        print('# Benchmarks (with textdistance):\n')
        benchmark = list(cls.get_external_benchmark(installed))
        benchmark_internal = list(cls.get_internal_benchmark())
        benchmark += benchmark_internal
        benchmark.sort(key=lambda x: (x.algorithm, x.time))
        print(cls.get_table(benchmark))

        benchmark = list(cls.filter_benchmark(benchmark, benchmark_internal))
        cls.save(benchmark)


if __name__ == '__main__':
    Benchmark.run()
Improved global search function * Use Hamming textdistance library Used Hamming textdistance to sort by closest match. * Global search UI improvements Increased dropdown height to show more results initially (and which can also be scrolled into view). Scrollbars will appear automatically as needed. Remove dropdown when Search box is cleared. * Added textdistance 4.6.2 library 5 months ago			`from __future__ import annotations`

			`# built-in`
			`import json`
			`import math`
			`from collections import defaultdict`
			`from timeit import timeit`
			`from typing import Iterable, Iterator, NamedTuple`

			`# external`
			`from tabulate import tabulate`

			`# app`
			`from .libraries import LIBRARIES_PATH, prototype`


			`# python3 -m textdistance.benchmark`


			`libraries = prototype.clone()`


			`class Lib(NamedTuple):`
			`algorithm: str`
			`library: str`
			`function: str`
			`time: float`
			`setup: str`

			`@property`
			`def row(self) -> tuple[str, ...]:`
			`time = '' if math.isinf(self.time) else f'{self.time:0.05f}'`
			`return (self.algorithm, self.library.split('.')[0], time)`


			`INTERNAL_SETUP = """`
			`from textdistance import {} as cls`
			`func = cls(external=False)`
			`"""`

			`STMT = """`
			`func('text', 'test')`
			`func('qwer', 'asdf')`
			`func('a' * 15, 'b' * 15)`
			`"""`

			`RUNS = 4000`


			`class Benchmark:`
			`@staticmethod`
			`def get_installed() -> Iterator[Lib]:`
			`for alg in libraries.get_algorithms():`
			`for lib in libraries.get_libs(alg):`
			`# try load function`
			`if not lib.get_function():`
			`print(f'WARNING: cannot get func for {lib}')`
			`continue`
			`# return library info`
			`yield Lib(`
			`algorithm=alg,`
			`library=lib.module_name,`
			`function=lib.func_name,`
			`time=float('Inf'),`
			`setup=lib.setup,`
			`)`

			`@staticmethod`
			`def get_external_benchmark(installed: Iterable[Lib]) -> Iterator[Lib]:`
			`for lib in installed:`
			`time = timeit(`
			`stmt=STMT,`
			`setup=lib.setup,`
			`number=RUNS,`
			`)`
			`yield lib._replace(time=time)`

			`@staticmethod`
			`def get_internal_benchmark() -> Iterator[Lib]:`
			`for alg in libraries.get_algorithms():`
			`setup = f'func = __import__("textdistance").{alg}(external=False)'`
			`yield Lib(`
			`algorithm=alg,`
			`library='textdistance',`
			`function=alg,`
			`time=timeit(`
			`stmt=STMT,`
			`setup=setup,`
			`number=RUNS,`
			`),`
			`setup=setup,`
			`)`

			`@staticmethod`
			`def filter_benchmark(`
			`external: Iterable[Lib],`
			`internal: Iterable[Lib],`
			`) -> Iterator[Lib]:`
			`limits = {i.algorithm: i.time for i in internal}`
			`return filter(lambda x: x.time < limits[x.algorithm], external)`

			`@staticmethod`
			`def get_table(libs: list[Lib]) -> str:`
			`table = tabulate(`
			`[lib.row for lib in libs],`
			`headers=['algorithm', 'library', 'time'],`
			`tablefmt='github',`
			`)`
			`table += f'\nTotal: {len(libs)} libs.\n\n'`
			`return table`

			`@staticmethod`
			`def save(libs: Iterable[Lib]) -> None:`
			`data = defaultdict(list)`
			`for lib in libs:`
			`data[lib.algorithm].append([lib.library, lib.function])`
			`with LIBRARIES_PATH.open('w', encoding='utf8') as f:`
			`json.dump(obj=data, fp=f, indent=2, sort_keys=True)`

			`@classmethod`
			`def run(cls) -> None:`
			`print('# Installed libraries:\n')`
			`installed = list(cls.get_installed())`
			`installed.sort()`
			`print(cls.get_table(installed))`

			`print('# Benchmarks (with textdistance):\n')`
			`benchmark = list(cls.get_external_benchmark(installed))`
			`benchmark_internal = list(cls.get_internal_benchmark())`
			`benchmark += benchmark_internal`
			`benchmark.sort(key=lambda x: (x.algorithm, x.time))`
			`print(cls.get_table(benchmark))`

			`benchmark = list(cls.filter_benchmark(benchmark, benchmark_internal))`
			`cls.save(benchmark)`


			`if __name__ == '__main__':`
			`Benchmark.run()`