from collections.abc import Mapping, Hashable from itertools import chain from pyrsistent._pvector import pvector from pyrsistent._transformations import transform class PMap(object): """ Persistent map/dict. Tries to follow the same naming conventions as the built in dict where feasible. Do not instantiate directly, instead use the factory functions :py:func:`m` or :py:func:`pmap` to create an instance. Was originally written as a very close copy of the Clojure equivalent but was later rewritten to closer re-assemble the python dict. This means that a sparse vector (a PVector) of buckets is used. The keys are hashed and the elements inserted at position hash % len(bucket_vector). Whenever the map size exceeds 2/3 of the containing vectors size the map is reallocated to a vector of double the size. This is done to avoid excessive hash collisions. This structure corresponds most closely to the built in dict type and is intended as a replacement. Where the semantics are the same (more or less) the same function names have been used but for some cases it is not possible, for example assignments and deletion of values. PMap implements the Mapping protocol and is Hashable. It also supports dot-notation for element access. Random access and insert is log32(n) where n is the size of the map. The following are examples of some common operations on persistent maps >>> m1 = m(a=1, b=3) >>> m2 = m1.set('c', 3) >>> m3 = m2.remove('a') >>> m1 == {'a': 1, 'b': 3} True >>> m2 == {'a': 1, 'b': 3, 'c': 3} True >>> m3 == {'b': 3, 'c': 3} True >>> m3['c'] 3 >>> m3.c 3 """ __slots__ = ('_size', '_buckets', '__weakref__', '_cached_hash') def __new__(cls, size, buckets): self = super(PMap, cls).__new__(cls) self._size = size self._buckets = buckets return self @staticmethod def _get_bucket(buckets, key): index = hash(key) % len(buckets) bucket = buckets[index] return index, bucket @staticmethod def _getitem(buckets, key): _, bucket = PMap._get_bucket(buckets, key) if bucket: for k, v in bucket: if k == key: return v raise KeyError(key) def __getitem__(self, key): return PMap._getitem(self._buckets, key) @staticmethod def _contains(buckets, key): _, bucket = PMap._get_bucket(buckets, key) if bucket: for k, _ in bucket: if k == key: return True return False return False def __contains__(self, key): return self._contains(self._buckets, key) get = Mapping.get def __iter__(self): return self.iterkeys() def __getattr__(self, key): try: return self[key] except KeyError as e: raise AttributeError( "{0} has no attribute '{1}'".format(type(self).__name__, key) ) from e def iterkeys(self): for k, _ in self.iteritems(): yield k # These are more efficient implementations compared to the original # methods that are based on the keys iterator and then calls the # accessor functions to access the value for the corresponding key def itervalues(self): for _, v in self.iteritems(): yield v def iteritems(self): for bucket in self._buckets: if bucket: for k, v in bucket: yield k, v def values(self): return pvector(self.itervalues()) def keys(self): return pvector(self.iterkeys()) def items(self): return pvector(self.iteritems()) def __len__(self): return self._size def __repr__(self): return 'pmap({0})'.format(str(dict(self))) def __eq__(self, other): if self is other: return True if not isinstance(other, Mapping): return NotImplemented if len(self) != len(other): return False if isinstance(other, PMap): if (hasattr(self, '_cached_hash') and hasattr(other, '_cached_hash') and self._cached_hash != other._cached_hash): return False if self._buckets == other._buckets: return True return dict(self.iteritems()) == dict(other.iteritems()) elif isinstance(other, dict): return dict(self.iteritems()) == other return dict(self.iteritems()) == dict(other.items()) __ne__ = Mapping.__ne__ def __lt__(self, other): raise TypeError('PMaps are not orderable') __le__ = __lt__ __gt__ = __lt__ __ge__ = __lt__ def __str__(self): return self.__repr__() def __hash__(self): if not hasattr(self, '_cached_hash'): self._cached_hash = hash(frozenset(self.iteritems())) return self._cached_hash def set(self, key, val): """ Return a new PMap with key and val inserted. >>> m1 = m(a=1, b=2) >>> m2 = m1.set('a', 3) >>> m3 = m1.set('c' ,4) >>> m1 == {'a': 1, 'b': 2} True >>> m2 == {'a': 3, 'b': 2} True >>> m3 == {'a': 1, 'b': 2, 'c': 4} True """ return self.evolver().set(key, val).persistent() def remove(self, key): """ Return a new PMap without the element specified by key. Raises KeyError if the element is not present. >>> m1 = m(a=1, b=2) >>> m1.remove('a') pmap({'b': 2}) """ return self.evolver().remove(key).persistent() def discard(self, key): """ Return a new PMap without the element specified by key. Returns reference to itself if element is not present. >>> m1 = m(a=1, b=2) >>> m1.discard('a') pmap({'b': 2}) >>> m1 is m1.discard('c') True """ try: return self.remove(key) except KeyError: return self def update(self, *maps): """ Return a new PMap with the items in Mappings inserted. If the same key is present in multiple maps the rightmost (last) value is inserted. >>> m1 = m(a=1, b=2) >>> m1.update(m(a=2, c=3), {'a': 17, 'd': 35}) == {'a': 17, 'b': 2, 'c': 3, 'd': 35} True """ return self.update_with(lambda l, r: r, *maps) def update_with(self, update_fn, *maps): """ Return a new PMap with the items in Mappings maps inserted. If the same key is present in multiple maps the values will be merged using merge_fn going from left to right. >>> from operator import add >>> m1 = m(a=1, b=2) >>> m1.update_with(add, m(a=2)) == {'a': 3, 'b': 2} True The reverse behaviour of the regular merge. Keep the leftmost element instead of the rightmost. >>> m1 = m(a=1) >>> m1.update_with(lambda l, r: l, m(a=2), {'a':3}) pmap({'a': 1}) """ evolver = self.evolver() for map in maps: for key, value in map.items(): evolver.set(key, update_fn(evolver[key], value) if key in evolver else value) return evolver.persistent() def __add__(self, other): return self.update(other) __or__ = __add__ def __reduce__(self): # Pickling support return pmap, (dict(self),) def transform(self, *transformations): """ Transform arbitrarily complex combinations of PVectors and PMaps. A transformation consists of two parts. One match expression that specifies which elements to transform and one transformation function that performs the actual transformation. >>> from pyrsistent import freeze, ny >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'}, ... {'author': 'Steve', 'content': 'A slightly longer article'}], ... 'weather': {'temperature': '11C', 'wind': '5m/s'}}) >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c) >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c) >>> very_short_news.articles[0].content 'A short article' >>> very_short_news.articles[1].content 'A slightly long...' When nothing has been transformed the original data structure is kept >>> short_news is news_paper True >>> very_short_news is news_paper False >>> very_short_news.articles[0] is news_paper.articles[0] True """ return transform(self, transformations) def copy(self): return self class _Evolver(object): __slots__ = ('_buckets_evolver', '_size', '_original_pmap') def __init__(self, original_pmap): self._original_pmap = original_pmap self._buckets_evolver = original_pmap._buckets.evolver() self._size = original_pmap._size def __getitem__(self, key): return PMap._getitem(self._buckets_evolver, key) def __setitem__(self, key, val): self.set(key, val) def set(self, key, val): if len(self._buckets_evolver) < 0.67 * self._size: self._reallocate(2 * len(self._buckets_evolver)) kv = (key, val) index, bucket = PMap._get_bucket(self._buckets_evolver, key) if bucket: for k, v in bucket: if k == key: if v is not val: new_bucket = [(k2, v2) if k2 != k else (k2, val) for k2, v2 in bucket] self._buckets_evolver[index] = new_bucket return self new_bucket = [kv] new_bucket.extend(bucket) self._buckets_evolver[index] = new_bucket self._size += 1 else: self._buckets_evolver[index] = [kv] self._size += 1 return self def _reallocate(self, new_size): new_list = new_size * [None] buckets = self._buckets_evolver.persistent() for k, v in chain.from_iterable(x for x in buckets if x): index = hash(k) % new_size if new_list[index]: new_list[index].append((k, v)) else: new_list[index] = [(k, v)] # A reallocation should always result in a dirty buckets evolver to avoid # possible loss of elements when doing the reallocation. self._buckets_evolver = pvector().evolver() self._buckets_evolver.extend(new_list) def is_dirty(self): return self._buckets_evolver.is_dirty() def persistent(self): if self.is_dirty(): self._original_pmap = PMap(self._size, self._buckets_evolver.persistent()) return self._original_pmap def __len__(self): return self._size def __contains__(self, key): return PMap._contains(self._buckets_evolver, key) def __delitem__(self, key): self.remove(key) def remove(self, key): index, bucket = PMap._get_bucket(self._buckets_evolver, key) if bucket: new_bucket = [(k, v) for (k, v) in bucket if k != key] if len(bucket) > len(new_bucket): self._buckets_evolver[index] = new_bucket if new_bucket else None self._size -= 1 return self raise KeyError('{0}'.format(key)) def evolver(self): """ Create a new evolver for this pmap. For a discussion on evolvers in general see the documentation for the pvector evolver. Create the evolver and perform various mutating updates to it: >>> m1 = m(a=1, b=2) >>> e = m1.evolver() >>> e['c'] = 3 >>> len(e) 3 >>> del e['a'] The underlying pmap remains the same: >>> m1 == {'a': 1, 'b': 2} True The changes are kept in the evolver. An updated pmap can be created using the persistent() function on the evolver. >>> m2 = e.persistent() >>> m2 == {'b': 2, 'c': 3} True The new pmap will share data with the original pmap in the same way that would have been done if only using operations on the pmap. """ return self._Evolver(self) Mapping.register(PMap) Hashable.register(PMap) def _turbo_mapping(initial, pre_size): if pre_size: size = pre_size else: try: size = 2 * len(initial) or 8 except Exception: # Guess we can't figure out the length. Give up on length hinting, # we can always reallocate later. size = 8 buckets = size * [None] if not isinstance(initial, Mapping): # Make a dictionary of the initial data if it isn't already, # that will save us some job further down since we can assume no # key collisions initial = dict(initial) for k, v in initial.items(): h = hash(k) index = h % size bucket = buckets[index] if bucket: bucket.append((k, v)) else: buckets[index] = [(k, v)] return PMap(len(initial), pvector().extend(buckets)) _EMPTY_PMAP = _turbo_mapping({}, 0) def pmap(initial={}, pre_size=0): """ Create new persistent map, inserts all elements in initial into the newly created map. The optional argument pre_size may be used to specify an initial size of the underlying bucket vector. This may have a positive performance impact in the cases where you know beforehand that a large number of elements will be inserted into the map eventually since it will reduce the number of reallocations required. >>> pmap({'a': 13, 'b': 14}) == {'a': 13, 'b': 14} True """ if not initial and pre_size == 0: return _EMPTY_PMAP return _turbo_mapping(initial, pre_size) def m(**kwargs): """ Creates a new persistent map. Inserts all key value arguments into the newly created map. >>> m(a=13, b=14) == {'a': 13, 'b': 14} True """ return pmap(kwargs)