You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
433 lines
13 KiB
433 lines
13 KiB
6 years ago
|
# Tweepy
|
||
|
# Copyright 2009-2010 Joshua Roesslein
|
||
|
# See LICENSE for details.
|
||
|
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import time
|
||
|
import datetime
|
||
|
import hashlib
|
||
|
import threading
|
||
|
import os
|
||
|
import logging
|
||
|
|
||
|
try:
|
||
|
import cPickle as pickle
|
||
|
except ImportError:
|
||
|
import pickle
|
||
|
|
||
|
try:
|
||
|
import fcntl
|
||
|
except ImportError:
|
||
|
# Probably on a windows system
|
||
|
# TODO: use win32file
|
||
|
pass
|
||
|
|
||
|
log = logging.getLogger('tweepy.cache')
|
||
|
|
||
|
class Cache(object):
|
||
|
"""Cache interface"""
|
||
|
|
||
|
def __init__(self, timeout=60):
|
||
|
"""Initialize the cache
|
||
|
timeout: number of seconds to keep a cached entry
|
||
|
"""
|
||
|
self.timeout = timeout
|
||
|
|
||
|
def store(self, key, value):
|
||
|
"""Add new record to cache
|
||
|
key: entry key
|
||
|
value: data of entry
|
||
|
"""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def get(self, key, timeout=None):
|
||
|
"""Get cached entry if exists and not expired
|
||
|
key: which entry to get
|
||
|
timeout: override timeout with this value [optional]
|
||
|
"""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def count(self):
|
||
|
"""Get count of entries currently stored in cache"""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def cleanup(self):
|
||
|
"""Delete any expired entries in cache."""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def flush(self):
|
||
|
"""Delete all cached entries"""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
|
||
|
class MemoryCache(Cache):
|
||
|
"""In-memory cache"""
|
||
|
|
||
|
def __init__(self, timeout=60):
|
||
|
Cache.__init__(self, timeout)
|
||
|
self._entries = {}
|
||
|
self.lock = threading.Lock()
|
||
|
|
||
|
def __getstate__(self):
|
||
|
# pickle
|
||
|
return {'entries': self._entries, 'timeout': self.timeout}
|
||
|
|
||
|
def __setstate__(self, state):
|
||
|
# unpickle
|
||
|
self.lock = threading.Lock()
|
||
|
self._entries = state['entries']
|
||
|
self.timeout = state['timeout']
|
||
|
|
||
|
def _is_expired(self, entry, timeout):
|
||
|
return timeout > 0 and (time.time() - entry[0]) >= timeout
|
||
|
|
||
|
def store(self, key, value):
|
||
|
self.lock.acquire()
|
||
|
self._entries[key] = (time.time(), value)
|
||
|
self.lock.release()
|
||
|
|
||
|
def get(self, key, timeout=None):
|
||
|
self.lock.acquire()
|
||
|
try:
|
||
|
# check to see if we have this key
|
||
|
entry = self._entries.get(key)
|
||
|
if not entry:
|
||
|
# no hit, return nothing
|
||
|
return None
|
||
|
|
||
|
# use provided timeout in arguments if provided
|
||
|
# otherwise use the one provided during init.
|
||
|
if timeout is None:
|
||
|
timeout = self.timeout
|
||
|
|
||
|
# make sure entry is not expired
|
||
|
if self._is_expired(entry, timeout):
|
||
|
# entry expired, delete and return nothing
|
||
|
del self._entries[key]
|
||
|
return None
|
||
|
|
||
|
# entry found and not expired, return it
|
||
|
return entry[1]
|
||
|
finally:
|
||
|
self.lock.release()
|
||
|
|
||
|
def count(self):
|
||
|
return len(self._entries)
|
||
|
|
||
|
def cleanup(self):
|
||
|
self.lock.acquire()
|
||
|
try:
|
||
|
for k, v in dict(self._entries).items():
|
||
|
if self._is_expired(v, self.timeout):
|
||
|
del self._entries[k]
|
||
|
finally:
|
||
|
self.lock.release()
|
||
|
|
||
|
def flush(self):
|
||
|
self.lock.acquire()
|
||
|
self._entries.clear()
|
||
|
self.lock.release()
|
||
|
|
||
|
|
||
|
class FileCache(Cache):
|
||
|
"""File-based cache"""
|
||
|
|
||
|
# locks used to make cache thread-safe
|
||
|
cache_locks = {}
|
||
|
|
||
|
def __init__(self, cache_dir, timeout=60):
|
||
|
Cache.__init__(self, timeout)
|
||
|
if os.path.exists(cache_dir) is False:
|
||
|
os.mkdir(cache_dir)
|
||
|
self.cache_dir = cache_dir
|
||
|
if cache_dir in FileCache.cache_locks:
|
||
|
self.lock = FileCache.cache_locks[cache_dir]
|
||
|
else:
|
||
|
self.lock = threading.Lock()
|
||
|
FileCache.cache_locks[cache_dir] = self.lock
|
||
|
|
||
|
if os.name == 'posix':
|
||
|
self._lock_file = self._lock_file_posix
|
||
|
self._unlock_file = self._unlock_file_posix
|
||
|
elif os.name == 'nt':
|
||
|
self._lock_file = self._lock_file_win32
|
||
|
self._unlock_file = self._unlock_file_win32
|
||
|
else:
|
||
|
log.warning('FileCache locking not supported on this system!')
|
||
|
self._lock_file = self._lock_file_dummy
|
||
|
self._unlock_file = self._unlock_file_dummy
|
||
|
|
||
|
def _get_path(self, key):
|
||
|
md5 = hashlib.md5()
|
||
|
md5.update(key.encode('utf-8'))
|
||
|
return os.path.join(self.cache_dir, md5.hexdigest())
|
||
|
|
||
|
def _lock_file_dummy(self, path, exclusive=True):
|
||
|
return None
|
||
|
|
||
|
def _unlock_file_dummy(self, lock):
|
||
|
return
|
||
|
|
||
|
def _lock_file_posix(self, path, exclusive=True):
|
||
|
lock_path = path + '.lock'
|
||
|
if exclusive is True:
|
||
|
f_lock = open(lock_path, 'w')
|
||
|
fcntl.lockf(f_lock, fcntl.LOCK_EX)
|
||
|
else:
|
||
|
f_lock = open(lock_path, 'r')
|
||
|
fcntl.lockf(f_lock, fcntl.LOCK_SH)
|
||
|
if os.path.exists(lock_path) is False:
|
||
|
f_lock.close()
|
||
|
return None
|
||
|
return f_lock
|
||
|
|
||
|
def _unlock_file_posix(self, lock):
|
||
|
lock.close()
|
||
|
|
||
|
def _lock_file_win32(self, path, exclusive=True):
|
||
|
# TODO: implement
|
||
|
return None
|
||
|
|
||
|
def _unlock_file_win32(self, lock):
|
||
|
# TODO: implement
|
||
|
return
|
||
|
|
||
|
def _delete_file(self, path):
|
||
|
os.remove(path)
|
||
|
if os.path.exists(path + '.lock'):
|
||
|
os.remove(path + '.lock')
|
||
|
|
||
|
def store(self, key, value):
|
||
|
path = self._get_path(key)
|
||
|
self.lock.acquire()
|
||
|
try:
|
||
|
# acquire lock and open file
|
||
|
f_lock = self._lock_file(path)
|
||
|
datafile = open(path, 'wb')
|
||
|
|
||
|
# write data
|
||
|
pickle.dump((time.time(), value), datafile)
|
||
|
|
||
|
# close and unlock file
|
||
|
datafile.close()
|
||
|
self._unlock_file(f_lock)
|
||
|
finally:
|
||
|
self.lock.release()
|
||
|
|
||
|
def get(self, key, timeout=None):
|
||
|
return self._get(self._get_path(key), timeout)
|
||
|
|
||
|
def _get(self, path, timeout):
|
||
|
if os.path.exists(path) is False:
|
||
|
# no record
|
||
|
return None
|
||
|
self.lock.acquire()
|
||
|
try:
|
||
|
# acquire lock and open
|
||
|
f_lock = self._lock_file(path, False)
|
||
|
datafile = open(path, 'rb')
|
||
|
|
||
|
# read pickled object
|
||
|
created_time, value = pickle.load(datafile)
|
||
|
datafile.close()
|
||
|
|
||
|
# check if value is expired
|
||
|
if timeout is None:
|
||
|
timeout = self.timeout
|
||
|
if timeout > 0:
|
||
|
if (time.time() - created_time) >= timeout:
|
||
|
# expired! delete from cache
|
||
|
value = None
|
||
|
self._delete_file(path)
|
||
|
|
||
|
# unlock and return result
|
||
|
self._unlock_file(f_lock)
|
||
|
return value
|
||
|
finally:
|
||
|
self.lock.release()
|
||
|
|
||
|
def count(self):
|
||
|
c = 0
|
||
|
for entry in os.listdir(self.cache_dir):
|
||
|
if entry.endswith('.lock'):
|
||
|
continue
|
||
|
c += 1
|
||
|
return c
|
||
|
|
||
|
def cleanup(self):
|
||
|
for entry in os.listdir(self.cache_dir):
|
||
|
if entry.endswith('.lock'):
|
||
|
continue
|
||
|
self._get(os.path.join(self.cache_dir, entry), None)
|
||
|
|
||
|
def flush(self):
|
||
|
for entry in os.listdir(self.cache_dir):
|
||
|
if entry.endswith('.lock'):
|
||
|
continue
|
||
|
self._delete_file(os.path.join(self.cache_dir, entry))
|
||
|
|
||
|
|
||
|
class MemCacheCache(Cache):
|
||
|
"""Cache interface"""
|
||
|
|
||
|
def __init__(self, client, timeout=60):
|
||
|
"""Initialize the cache
|
||
|
client: The memcache client
|
||
|
timeout: number of seconds to keep a cached entry
|
||
|
"""
|
||
|
self.client = client
|
||
|
self.timeout = timeout
|
||
|
|
||
|
def store(self, key, value):
|
||
|
"""Add new record to cache
|
||
|
key: entry key
|
||
|
value: data of entry
|
||
|
"""
|
||
|
self.client.set(key, value, time=self.timeout)
|
||
|
|
||
|
def get(self, key, timeout=None):
|
||
|
"""Get cached entry if exists and not expired
|
||
|
key: which entry to get
|
||
|
timeout: override timeout with this value [optional].
|
||
|
DOES NOT WORK HERE
|
||
|
"""
|
||
|
return self.client.get(key)
|
||
|
|
||
|
def count(self):
|
||
|
"""Get count of entries currently stored in cache. RETURN 0"""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def cleanup(self):
|
||
|
"""Delete any expired entries in cache. NO-OP"""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def flush(self):
|
||
|
"""Delete all cached entries. NO-OP"""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
|
||
|
class RedisCache(Cache):
|
||
|
"""Cache running in a redis server"""
|
||
|
|
||
|
def __init__(self, client,
|
||
|
timeout=60,
|
||
|
keys_container='tweepy:keys',
|
||
|
pre_identifier='tweepy:'):
|
||
|
Cache.__init__(self, timeout)
|
||
|
self.client = client
|
||
|
self.keys_container = keys_container
|
||
|
self.pre_identifier = pre_identifier
|
||
|
|
||
|
def _is_expired(self, entry, timeout):
|
||
|
# Returns true if the entry has expired
|
||
|
return timeout > 0 and (time.time() - entry[0]) >= timeout
|
||
|
|
||
|
def store(self, key, value):
|
||
|
"""Store the key, value pair in our redis server"""
|
||
|
# Prepend tweepy to our key,
|
||
|
# this makes it easier to identify tweepy keys in our redis server
|
||
|
key = self.pre_identifier + key
|
||
|
# Get a pipe (to execute several redis commands in one step)
|
||
|
pipe = self.client.pipeline()
|
||
|
# Set our values in a redis hash (similar to python dict)
|
||
|
pipe.set(key, pickle.dumps((time.time(), value)))
|
||
|
# Set the expiration
|
||
|
pipe.expire(key, self.timeout)
|
||
|
# Add the key to a set containing all the keys
|
||
|
pipe.sadd(self.keys_container, key)
|
||
|
# Execute the instructions in the redis server
|
||
|
pipe.execute()
|
||
|
|
||
|
def get(self, key, timeout=None):
|
||
|
"""Given a key, returns an element from the redis table"""
|
||
|
key = self.pre_identifier + key
|
||
|
# Check to see if we have this key
|
||
|
unpickled_entry = self.client.get(key)
|
||
|
if not unpickled_entry:
|
||
|
# No hit, return nothing
|
||
|
return None
|
||
|
|
||
|
entry = pickle.loads(unpickled_entry)
|
||
|
# Use provided timeout in arguments if provided
|
||
|
# otherwise use the one provided during init.
|
||
|
if timeout is None:
|
||
|
timeout = self.timeout
|
||
|
|
||
|
# Make sure entry is not expired
|
||
|
if self._is_expired(entry, timeout):
|
||
|
# entry expired, delete and return nothing
|
||
|
self.delete_entry(key)
|
||
|
return None
|
||
|
# entry found and not expired, return it
|
||
|
return entry[1]
|
||
|
|
||
|
def count(self):
|
||
|
"""Note: This is not very efficient,
|
||
|
since it retreives all the keys from the redis
|
||
|
server to know how many keys we have"""
|
||
|
return len(self.client.smembers(self.keys_container))
|
||
|
|
||
|
def delete_entry(self, key):
|
||
|
"""Delete an object from the redis table"""
|
||
|
pipe = self.client.pipeline()
|
||
|
pipe.srem(self.keys_container, key)
|
||
|
pipe.delete(key)
|
||
|
pipe.execute()
|
||
|
|
||
|
def cleanup(self):
|
||
|
"""Cleanup all the expired keys"""
|
||
|
keys = self.client.smembers(self.keys_container)
|
||
|
for key in keys:
|
||
|
entry = self.client.get(key)
|
||
|
if entry:
|
||
|
entry = pickle.loads(entry)
|
||
|
if self._is_expired(entry, self.timeout):
|
||
|
self.delete_entry(key)
|
||
|
|
||
|
def flush(self):
|
||
|
"""Delete all entries from the cache"""
|
||
|
keys = self.client.smembers(self.keys_container)
|
||
|
for key in keys:
|
||
|
self.delete_entry(key)
|
||
|
|
||
|
|
||
|
class MongodbCache(Cache):
|
||
|
"""A simple pickle-based MongoDB cache sytem."""
|
||
|
|
||
|
def __init__(self, db, timeout=3600, collection='tweepy_cache'):
|
||
|
"""Should receive a "database" cursor from pymongo."""
|
||
|
Cache.__init__(self, timeout)
|
||
|
self.timeout = timeout
|
||
|
self.col = db[collection]
|
||
|
self.col.create_index('created', expireAfterSeconds=timeout)
|
||
|
|
||
|
def store(self, key, value):
|
||
|
from bson.binary import Binary
|
||
|
|
||
|
now = datetime.datetime.utcnow()
|
||
|
blob = Binary(pickle.dumps(value))
|
||
|
|
||
|
self.col.insert({'created': now, '_id': key, 'value': blob})
|
||
|
|
||
|
def get(self, key, timeout=None):
|
||
|
if timeout:
|
||
|
raise NotImplementedError
|
||
|
obj = self.col.find_one({'_id': key})
|
||
|
if obj:
|
||
|
return pickle.loads(obj['value'])
|
||
|
|
||
|
def count(self):
|
||
|
return self.col.find({}).count()
|
||
|
|
||
|
def delete_entry(self, key):
|
||
|
return self.col.remove({'_id': key})
|
||
|
|
||
|
def cleanup(self):
|
||
|
"""MongoDB will automatically clear expired keys."""
|
||
|
pass
|
||
|
|
||
|
def flush(self):
|
||
|
self.col.drop()
|
||
|
self.col.create_index('created', expireAfterSeconds=self.timeout)
|