Make Redis backend shardable

This commit is contained in:
Andrew Godwin 2015-11-06 07:57:26 -08:00
parent 5106c7822c
commit a41516fa6b
5 changed files with 136 additions and 41 deletions

View File

@ -1,9 +1,14 @@
import time
import json
import datetime
import math
import redis
import random
import binascii
import uuid
from django.utils import six
from .base import BaseChannelBackend
@ -13,41 +18,81 @@ class RedisChannelBackend(BaseChannelBackend):
multiple processes fine, but it's going to be pretty bad at throughput.
"""
def __init__(self, routing, expiry=60, host="localhost", port=6379, prefix="django-channels:"):
def __init__(self, routing, expiry=60, hosts=None, prefix="django-channels:"):
super(RedisChannelBackend, self).__init__(routing=routing, expiry=expiry)
self.host = host
self.port = port
# Make sure they provided some hosts, or provide a default
if not hosts:
hosts = [("localhost", 6379)]
for host, port in hosts:
assert isinstance(host, six.string_types)
assert int(port)
self.hosts = hosts
self.prefix = prefix
# Precalculate some values for ring selection
self.ring_size = len(self.hosts)
self.ring_divisor = int(math.ceil(4096 / float(self.ring_size)))
@property
def connection(self):
def consistent_hash(self, value):
"""
Maps the value to a node value between 0 and 4095
using MD5, then down to one of the ring nodes.
"""
bigval = binascii.crc32(value) & 0xffffffff
return (bigval // 0x100000) // self.ring_divisor
def random_index(self):
return random.randint(0, len(self.hosts) - 1)
def connection(self, index):
"""
Returns the correct connection for the current thread.
Pass key to use a server based on consistent hashing of the key value;
pass None to use a random server instead.
"""
return redis.Redis(host=self.host, port=self.port)
# If index is explicitly None, pick a random server
if index is None:
index = self.random_index()
# Catch bad indexes
if not (0 <= index < self.ring_size):
raise ValueError("There are only %s hosts - you asked for %s!" % (self.ring_size, index))
host, port = self.hosts[index]
return redis.Redis(host=host, port=port)
@property
def connections(self):
for i in range(len(self.hosts)):
return self.connection(i)
def send(self, channel, message):
# if channel is no str (=> bytes) convert it
if not isinstance(channel, str):
channel = channel.decode('utf-8')
# Pick a connection to the right server - consistent for response
# channels, random for normal channels
if channel.startswith("!"):
index = self.consistent_hash(key)
connection = self.connection(index)
else:
connection = self.connection(None)
# Write out message into expiring key (avoids big items in list)
key = self.prefix + str(uuid.uuid4())
self.connection.set(
# TODO: Use extended set, drop support for older redis?
key = self.prefix + uuid.uuid4().get_hex()
connection.set(
key,
json.dumps(message),
)
self.connection.expire(
connection.expire(
key,
self.expiry + 10,
)
# Add key to list
self.connection.rpush(
connection.rpush(
self.prefix + channel,
key,
)
# Set list to expire when message does (any later messages will bump this)
self.connection.expire(
connection.expire(
self.prefix + channel,
self.expiry + 10,
)
@ -56,13 +101,27 @@ class RedisChannelBackend(BaseChannelBackend):
def receive_many(self, channels):
if not channels:
raise ValueError("Cannot receive on empty channel list!")
# Shuffle channels to avoid the first ones starving others of workers
random.shuffle(channels)
# Work out what servers to listen on for the given channels
indexes = {}
random_index = self.random_index()
for channel in channels:
if channel.startswith("!"):
indexes.setdefault(self.consistent_hash(channel), []).append(channel)
else:
indexes.setdefault(random_index, []).append(channel)
# Get a message from one of our channels
while True:
result = self.connection.blpop([self.prefix + channel for channel in channels], timeout=1)
# Select a random connection to use
# TODO: Would we be better trying to do this truly async?
index = random.choice(indexes.keys())
connection = self.connection(index)
channels = indexes[index]
# Shuffle channels to avoid the first ones starving others of workers
random.shuffle(channels)
# Pop off any waiting message
result = connection.blpop([self.prefix + channel for channel in channels], timeout=1)
if result:
content = self.connection.get(result[1])
content = connection.get(result[1])
if content is None:
continue
return result[0][len(self.prefix):].decode("utf-8"), json.loads(content.decode("utf-8"))
@ -75,7 +134,7 @@ class RedisChannelBackend(BaseChannelBackend):
seconds (expiry defaults to message expiry if not provided).
"""
key = "%s:group:%s" % (self.prefix, group)
self.connection.zadd(
self.connection(self.consistent_hash(group)).zadd(
key,
**{channel: time.time() + (expiry or self.expiry)}
)
@ -86,7 +145,7 @@ class RedisChannelBackend(BaseChannelBackend):
does nothing otherwise (does not error)
"""
key = "%s:group:%s" % (self.prefix, group)
self.connection.zrem(
self.connection(self.consistent_hash(group)).zrem(
key,
channel,
)
@ -96,10 +155,11 @@ class RedisChannelBackend(BaseChannelBackend):
Returns an iterable of all channels in the group.
"""
key = "%s:group:%s" % (self.prefix, group)
connection = self.connection(self.consistent_hash(group))
# Discard old channels
self.connection.zremrangebyscore(key, 0, int(time.time()) - 10)
connection.zremrangebyscore(key, 0, int(time.time()) - 10)
# Return current lot
return self.connection.zrange(
return connection.zrange(
key,
0,
-1,
@ -113,14 +173,14 @@ class RedisChannelBackend(BaseChannelBackend):
obtained, False if lock not obtained.
"""
key = "%s:lock:%s" % (self.prefix, channel)
return bool(self.connection.setnx(key, "1"))
return bool(self.connection(self.consistent_hash(channel)).setnx(key, "1"))
def unlock_channel(self, channel):
"""
Unlocks the named channel. Always succeeds.
"""
key = "%s:lock:%s" % (self.prefix, channel)
self.connection.delete(key)
self.connection(self.consistent_hash(channel)).delete(key)
def __str__(self):
return "%s(host=%s, port=%s)" % (self.__class__.__name__, self.host, self.port)

View File

@ -5,6 +5,50 @@ Multiple choices of backend are available, to fill different tradeoffs of
complexity, throughput and scalability. You can also write your own backend if
you wish; the API is very simple and documented below.
Redis
-----
The Redis backend is the recommended backend to run Channels with, as it
supports both high throughput on a single Redis server as well as the ability
to run against a set of Redis servers in a sharded mode.
To use the Redis backend you have to install the redis package::
pip install -U redis
By default, it will attempt to connect to a Redis server on ``localhost:6379``,
but you can override this with the ``HOSTS`` setting::
CHANNEL_BACKENDS = {
"default": {
"BACKEND": "channels.backends.redis.RedisChannelBackend",
"HOSTS": [("redis-channel-1", 6379), ("redis-channel-2", 6379)],
},
}
Sharding
~~~~~~~~
The sharding model is based on consistent hashing - in particular,
:ref:`response channels <channel-types>` are hashed and used to pick a single
Redis server that both the interface server and the worker will use.
For normal channels, since any worker can service any channel request, messages
are simply distributed randomly among all possible servers, and workers will
pick a single server to listen to. Note that if you run more Redis servers than
workers, it's very likely that some servers will not have workers listening to
them; we recommend you always have at least ten workers for each Redis server
to ensure good distribution. Workers will, however, change server periodically
(every five seconds or so) so queued messages should eventually get a response.
Note that if you change the set of sharding servers you will need to restart
all interface servers and workers with the new set before anything works,
and any in-flight messages will be lost (even with persistence, some will);
the consistent hashing model relies on all running clients having the same
settings. Any misconfigured interface server or worker will drop some or all
messages.
In-memory
---------
@ -18,23 +62,7 @@ This backend provides no network transparency or non-blocking guarantees.
Database
--------
Redis
-----
To use the Redis backend you have to install the redis package::
pip install -U redis
Also you need to set the following in the ``CHANNEL_BACKENDS`` setting::
CHANNEL_BACKENDS = {
"default": {
"BACKEND": "channels.backends.redis_py.RedisChannelBackend",
"HOST": "redis-hostname",
},
}
=======
Writing Custom Backends
-----------------------

View File

@ -36,8 +36,8 @@ here's an example for a remote Redis server::
CHANNEL_BACKENDS = {
"default": {
"BACKEND": "channels.backends.redis_py.RedisChannelBackend",
"HOST": "redis-channel",
"BACKEND": "channels.backends.redis.RedisChannelBackend",
"HOSTS": [("redis-channel", 6379)],
},
}

0
docs/faqs.rst Normal file → Executable file
View File

7
docs/scaling.rst Normal file → Executable file
View File

@ -28,3 +28,10 @@ That's why Channels labels any *response channel* with a leading ``!``, letting
you know that only one server is listening for it, and thus letting you scale
and shard the two different types of channels accordingly (for more on
the difference, see :ref:`channel-types`).
This is the underlying theory behind Channels' sharding model - normal channels
are sent to random Redis servers, while response channels are sent to a
predictable server that both the interface server and worker can derive.
Currently, sharding is implemented as part of the Redis backend only;
see the :doc:`backend documentation <backends>` for more information.