You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
208 lines
7.2 KiB
208 lines
7.2 KiB
6 years ago
|
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||
|
#
|
||
|
# This module is part of GitDB and is released under
|
||
|
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||
|
"""Module containing a database to deal with packs"""
|
||
|
from gitdb.db.base import (
|
||
|
FileDBBase,
|
||
|
ObjectDBR,
|
||
|
CachingDB
|
||
|
)
|
||
|
|
||
|
from gitdb.util import LazyMixin
|
||
|
|
||
|
from gitdb.exc import (
|
||
|
BadObject,
|
||
|
UnsupportedOperation,
|
||
|
AmbiguousObjectName
|
||
|
)
|
||
|
|
||
|
from gitdb.pack import PackEntity
|
||
|
from gitdb.utils.compat import xrange
|
||
|
|
||
|
from functools import reduce
|
||
|
|
||
|
import os
|
||
|
import glob
|
||
|
|
||
|
__all__ = ('PackedDB', )
|
||
|
|
||
|
#{ Utilities
|
||
|
|
||
|
|
||
|
class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
|
||
|
|
||
|
"""A database operating on a set of object packs"""
|
||
|
|
||
|
# sort the priority list every N queries
|
||
|
# Higher values are better, performance tests don't show this has
|
||
|
# any effect, but it should have one
|
||
|
_sort_interval = 500
|
||
|
|
||
|
def __init__(self, root_path):
|
||
|
super(PackedDB, self).__init__(root_path)
|
||
|
# list of lists with three items:
|
||
|
# * hits - number of times the pack was hit with a request
|
||
|
# * entity - Pack entity instance
|
||
|
# * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
|
||
|
# self._entities = list() # lazy loaded list
|
||
|
self._hit_count = 0 # amount of hits
|
||
|
self._st_mtime = 0 # last modification data of our root path
|
||
|
|
||
|
def _set_cache_(self, attr):
|
||
|
if attr == '_entities':
|
||
|
self._entities = list()
|
||
|
self.update_cache(force=True)
|
||
|
# END handle entities initialization
|
||
|
|
||
|
def _sort_entities(self):
|
||
|
self._entities.sort(key=lambda l: l[0], reverse=True)
|
||
|
|
||
|
def _pack_info(self, sha):
|
||
|
""":return: tuple(entity, index) for an item at the given sha
|
||
|
:param sha: 20 or 40 byte sha
|
||
|
:raise BadObject:
|
||
|
**Note:** This method is not thread-safe, but may be hit in multi-threaded
|
||
|
operation. The worst thing that can happen though is a counter that
|
||
|
was not incremented, or the list being in wrong order. So we safe
|
||
|
the time for locking here, lets see how that goes"""
|
||
|
# presort ?
|
||
|
if self._hit_count % self._sort_interval == 0:
|
||
|
self._sort_entities()
|
||
|
# END update sorting
|
||
|
|
||
|
for item in self._entities:
|
||
|
index = item[2](sha)
|
||
|
if index is not None:
|
||
|
item[0] += 1 # one hit for you
|
||
|
self._hit_count += 1 # general hit count
|
||
|
return (item[1], index)
|
||
|
# END index found in pack
|
||
|
# END for each item
|
||
|
|
||
|
# no hit, see whether we have to update packs
|
||
|
# NOTE: considering packs don't change very often, we safe this call
|
||
|
# and leave it to the super-caller to trigger that
|
||
|
raise BadObject(sha)
|
||
|
|
||
|
#{ Object DB Read
|
||
|
|
||
|
def has_object(self, sha):
|
||
|
try:
|
||
|
self._pack_info(sha)
|
||
|
return True
|
||
|
except BadObject:
|
||
|
return False
|
||
|
# END exception handling
|
||
|
|
||
|
def info(self, sha):
|
||
|
entity, index = self._pack_info(sha)
|
||
|
return entity.info_at_index(index)
|
||
|
|
||
|
def stream(self, sha):
|
||
|
entity, index = self._pack_info(sha)
|
||
|
return entity.stream_at_index(index)
|
||
|
|
||
|
def sha_iter(self):
|
||
|
for entity in self.entities():
|
||
|
index = entity.index()
|
||
|
sha_by_index = index.sha
|
||
|
for index in xrange(index.size()):
|
||
|
yield sha_by_index(index)
|
||
|
# END for each index
|
||
|
# END for each entity
|
||
|
|
||
|
def size(self):
|
||
|
sizes = [item[1].index().size() for item in self._entities]
|
||
|
return reduce(lambda x, y: x + y, sizes, 0)
|
||
|
|
||
|
#} END object db read
|
||
|
|
||
|
#{ object db write
|
||
|
|
||
|
def store(self, istream):
|
||
|
"""Storing individual objects is not feasible as a pack is designed to
|
||
|
hold multiple objects. Writing or rewriting packs for single objects is
|
||
|
inefficient"""
|
||
|
raise UnsupportedOperation()
|
||
|
|
||
|
#} END object db write
|
||
|
|
||
|
#{ Interface
|
||
|
|
||
|
def update_cache(self, force=False):
|
||
|
"""
|
||
|
Update our cache with the acutally existing packs on disk. Add new ones,
|
||
|
and remove deleted ones. We keep the unchanged ones
|
||
|
|
||
|
:param force: If True, the cache will be updated even though the directory
|
||
|
does not appear to have changed according to its modification timestamp.
|
||
|
:return: True if the packs have been updated so there is new information,
|
||
|
False if there was no change to the pack database"""
|
||
|
stat = os.stat(self.root_path())
|
||
|
if not force and stat.st_mtime <= self._st_mtime:
|
||
|
return False
|
||
|
# END abort early on no change
|
||
|
self._st_mtime = stat.st_mtime
|
||
|
|
||
|
# packs are supposed to be prefixed with pack- by git-convention
|
||
|
# get all pack files, figure out what changed
|
||
|
pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
|
||
|
our_pack_files = set(item[1].pack().path() for item in self._entities)
|
||
|
|
||
|
# new packs
|
||
|
for pack_file in (pack_files - our_pack_files):
|
||
|
# init the hit-counter/priority with the size, a good measure for hit-
|
||
|
# probability. Its implemented so that only 12 bytes will be read
|
||
|
entity = PackEntity(pack_file)
|
||
|
self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
|
||
|
# END for each new packfile
|
||
|
|
||
|
# removed packs
|
||
|
for pack_file in (our_pack_files - pack_files):
|
||
|
del_index = -1
|
||
|
for i, item in enumerate(self._entities):
|
||
|
if item[1].pack().path() == pack_file:
|
||
|
del_index = i
|
||
|
break
|
||
|
# END found index
|
||
|
# END for each entity
|
||
|
assert del_index != -1
|
||
|
del(self._entities[del_index])
|
||
|
# END for each removed pack
|
||
|
|
||
|
# reinitialize prioritiess
|
||
|
self._sort_entities()
|
||
|
return True
|
||
|
|
||
|
def entities(self):
|
||
|
""":return: list of pack entities operated upon by this database"""
|
||
|
return [item[1] for item in self._entities]
|
||
|
|
||
|
def partial_to_complete_sha(self, partial_binsha, canonical_length):
|
||
|
""":return: 20 byte sha as inferred by the given partial binary sha
|
||
|
:param partial_binsha: binary sha with less than 20 bytes
|
||
|
:param canonical_length: length of the corresponding canonical representation.
|
||
|
It is required as binary sha's cannot display whether the original hex sha
|
||
|
had an odd or even number of characters
|
||
|
:raise AmbiguousObjectName:
|
||
|
:raise BadObject: """
|
||
|
candidate = None
|
||
|
for item in self._entities:
|
||
|
item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
|
||
|
if item_index is not None:
|
||
|
sha = item[1].index().sha(item_index)
|
||
|
if candidate and candidate != sha:
|
||
|
raise AmbiguousObjectName(partial_binsha)
|
||
|
candidate = sha
|
||
|
# END handle full sha could be found
|
||
|
# END for each entity
|
||
|
|
||
|
if candidate:
|
||
|
return candidate
|
||
|
|
||
|
# still not found ?
|
||
|
raise BadObject(partial_binsha)
|
||
|
|
||
|
#} END interface
|