redis-manager/redis-manager.py

1085 lines
49 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author : Yohan Bataille (ING)
# redis-manager
version = "3"
# Requires:
# redis-cli
# python 2 (2.6 and 2.7 tested)
import sys
import os
import json
import time
import copy
from subprocess import PIPE, Popen
import argparse
import SocketServer
import socket
from BaseHTTPServer import BaseHTTPRequestHandler
import threading
import traceback
import pprint
import urllib2
from random import randint
print("redis manager version: " + version)
# positional : REDIS_PATH, REDIS_NODES, ENV, HTTP_PORT (in this order, all mandatory, ENV : "DEV" or "PROD")
# optional : time (-t), dry_run (-n)
parser = argparse.ArgumentParser()
parser.add_argument("REDIS_PATH", help="Path to redis-cli binary. Example: '/opt/redis/bin/redis-cli'")
parser.add_argument("REDIS_NODES", help="Please specify a list of nodes as a single string, grouping nodes by datacenter and using '/' to separate datacenters if there is more than one. Example: '10.166.119.49:7000, 10.166.119.49:7001, 10.166.119.49:7002 / 10.166.119.48:7003, 10.166.119.48:7004, 10.166.119.48:7005'.")
parser.add_argument("ENV", choices=['DEV', 'PROD'], help="Please specify 'DEV' or 'PROD'.")
parser.add_argument("HTTP_PORT", help="Listen to request on this HTTP port.")
parser.add_argument("-m", "--other_managers", help="List of other managers as a single string. Example: 'slzuss3vmq00.sres.stech:8080, slzuss3vmq01.sres.stech:8080'.")
parser.add_argument("-t", "--time", help="Time to wait between checks, in seconds. Default: 3")
parser.add_argument("-w", "--failover_max_wait", help="How long to wait for Redis cluster to obey a failover command, in seconds. Default: 30")
parser.add_argument("-u", "--unresponsive_timeout", help="How long to wait before assuming the manager main loop is stuck, in seconds. Default: 30")
parser.add_argument("-n", "--dry_run", help="Do not send modifications commands to the cluster.", action="store_true")
args = parser.parse_args()
# if environment >= NEHOM ==> all_checks needs to be True
all_checks = True
if args.ENV == "DEV":
all_checks = False
other_managers = list()
if args.other_managers:
# Clean the string
other_managers = "".join(args.other_managers.split())
other_managers = other_managers.split(',')
loop_time = 3
if args.time:
loop_time = int(args.time)
failover_max_wait = 30
if args.failover_max_wait:
failover_max_wait = int(args.failover_max_wait)
unresponsive_timeout = 30
if args.unresponsive_timeout:
unresponsive_timeout = int(args.unresponsive_timeout)
test = False
if args.dry_run:
test = True
redis_cli = args.REDIS_PATH
http_port = int(args.HTTP_PORT)
print("HTTP_PORT: " + str(http_port))
print("REDIS_NODES: " + args.REDIS_NODES)
print("all_checks: " + str(all_checks))
print("loop_time: " + str(loop_time))
print("failover_max_wait: " + str(failover_max_wait))
print("other_managers: " + str(other_managers))
print("redis_cli: " + redis_cli)
print("test: " + str(test))
#[root@slzuss3vmq00 ~]# /opt/ZUTA0/Logiciel/RDI/bin/redis-cli -h 10.166.119.48 -p 7002 --raw CLUSTER INFO
#cluster_state:fail
#[root@slzuss3vmq00 ~]# /opt/ZUTA0/Logiciel/RDI/bin/redis-cli -h 10.166.119.48 -p 7002 --raw CLUSTER NODES
#bd6aef93f187bab16d12236cce2faf0ac40ad727 10.166.119.48:7004 master,fail? - 1496355476275 1496355472467 11 disconnected 5461-10922
#7d134a073e5be16f2d2a73e27cc63b062e74c91f 10.166.119.48:7005 master,fail? - 1496355476275 1496355475475 9 disconnected 10923-16383
#af5f4f889bf6ee6573ec57625216e7d4416e37ae 10.166.119.48:7001 slave bd6aef93f187bab16d12236cce2faf0ac40ad727 0 1496357907477 11 connected
#0585d0b297d3a15013472ab83187d5b0422a4f23 10.166.119.48:7002 myself,slave 7d134a073e5be16f2d2a73e27cc63b062e74c91f 0 0 8 connected
#bea1cec63063542b0cc003b58e198b68c7993545 10.166.119.48:7000 slave e44f67740aa2be4a568587d8ac7d4914614934fb 0 1496357910486 10 connected
#e44f67740aa2be4a568587d8ac7d4914614934fb 10.166.119.48:7003 master - 0 1496357909482 10 connected 0-5460
#[root@slzuss3vmq00 ~]# /opt/ZUTA0/Logiciel/RDI/bin/redis-cli -h 10.166.119.48 -p 7002 --raw INFO
## Replication
#role:slave
#master_host:10.166.119.48
#master_port:7005
#master_link_status:down
#[root@slzuss3vmq00 ~]# /opt/ZUTA0/Logiciel/RDI/bin/redis-cli -h 10.166.119.48 -p 7003 --raw INFO
## Replication
#role:master
#connected_slaves:1
#slave0:ip=10.166.119.48,port=7000,state=online,offset=3809,lag=1
def api_help():
return """
### HTTP API description:
# /cluster_status: 'manager disabled'
# 'Unknown state'
# 'KO'
# 'Unknown cluster'
# 'At risk'
# 'Imbalanced'
# 'OK'
# /manager_status: 'active'
# 'active asleep'
# 'active asleep repartition disabled'
# 'active repartition disabled'
# 'passive'
# 'passive asleep'
# 'passive asleep repartition disabled'
# 'passive repartition disabled'
# 'failed'
# 'starting'
# /help : This message
# /debug/enable
# /debug/disable
# /sleep?seconds=<seconds>
# /prepare_for_reboot/<IP>&duration=<seconds>:
# 'SIMILAR REQUEST ALREADY IN PROGRESS'
# 'WAIT'
# 'DONE'
# 'CANNOT PROCEED: passive manager'
# example : /prepare_for_reboot/10.166.20.120&duration=300
# /version : redis manager version
"""
# TODO: Remove global variables.
def main():
startup_nodes_list, startup_nodes_by_datacenter, startup_nodes_by_server = cluster_startup_topo()
global failover_without_quorum_requested
failover_without_quorum_requested = list()
global failover_with_quorum_requested
failover_with_quorum_requested = list()
global plan
plan = dict()
global cluster_state
global manager_status
global request_active
global sleep_until
global no_repartition_until
global no_repartition_duration
global slave_only_engine
global raw_topo
global last_loop_epoch
current_cluster_topo = list()
if all_checks:
print("PROD mode enabled: master imbalance correction by server and datacenter activated.")
else:
print("DEV mode enabled: master imbalance correction by server and datacenter deactivated.")
while True:
last_loop_epoch = time.mktime(time.gmtime())
num_manager_active = 0
sleep = False
for manager in other_managers:
try:
r = urllib2.urlopen('http://' + manager + '/manager_status', None, 1)
l = r.readlines()
if len(l) == 1 and (l[0].split()[0] == 'passive' or l[0].split()[0] == 'starting'):
if request_active:
r2 = urllib2.urlopen('http://' + manager + '/request_active', None, 1)
l2 = r2.readlines()
if len(l2) == 1 and l2[0] == 'yes':
print("other has requested activation")
request_active = False
sleep = True
elif len(l) == 1 and l[0].split()[0] == 'active':
if num_manager_active == 0:
num_manager_active += 1
else:
print("Too many active managers!")
elif len(l) == 1 and l[0].split()[0] == 'failed':
print("manager " + manager + " is KO.")
else:
print("manager " + manager + " has answered with garbage: " + str(l))
except:
print("manager " + manager + " is not responding.")
if num_manager_active == 0 and (manager_status == 'passive' or manager_status == 'starting'):
if request_active:
print("Becoming active!")
manager_status = 'active'
elif sleep:
print("Sleeping to let another manager activate.")
time.sleep(randint(1, 10))
continue
else:
request_active = True
print("Manager election in progress.")
time.sleep(randint(1, 10))
continue
elif num_manager_active == 1 and manager_status == 'starting':
print("Manager election finished, we are passive!")
manager_status = 'passive'
elif num_manager_active >= 1 and manager_status == 'active':
print("Becoming passive!")
manager_status = 'passive'
if sleep_until != 0:
delta = sleep_until - time.mktime(time.gmtime())
if delta <= 0:
sleep_until = 0
else:
print("Sleeping as requested. " + str(delta) + " seconds remaining.")
time.sleep(loop_time)
continue
raw_topo, raw_topo_str = cluster_topo(startup_nodes_list, startup_nodes_by_datacenter)
if raw_topo is None:
cluster_state = 'Unknown state'
print('Critical failure: cannot get cluster topology. Doing nothing.')
time.sleep(loop_time)
continue
bool_cluster_online = cluster_online(startup_nodes_list)
if bool_cluster_online:
pass
else:
cluster_state = 'KO'
print('Cluster failure.')
# print(raw_topo_str)
if not same_cluster(startup_nodes_list, raw_topo):
pprint.pprint(raw_topo, width=300)
cluster_state = 'Unknown cluster'
print('Not the exact cluster we know. Doing nothing.')
time.sleep(loop_time)
continue
for node in raw_topo:
if node_status(node) == 'cluster still assessing':
print('Something is going on but Redis Cluster is still assessing the situation. Doing nothing.')
pprint.pprint(raw_topo, width=300)
time.sleep(loop_time)
continue
# Loop over nodes, detect slaves with offline master and promote one slave (keep track of treated failed masters)
if not bool_cluster_online:
pprint.pprint(raw_topo, width=300)
if has_quorum(raw_topo):
print("Cluster has quorum and can recover by itself. Doing nothing.")
else:
failed_masters = list()
new_failover_without_quorum_requested = list()
for node in raw_topo:
if node_role(node) == 'slave' and node_status(node) == 'ok':
if master_status_of_slave(raw_topo, node) != 'ok':
masterid = masterid_of_slave(node)
if masterid in failed_masters:
print("Slave " + node_name(node) + " does not see master " + node_name_from_id(raw_topo, masterid) + ", but a slave has already been promoted. Doing nothing.")
elif manager_status == 'active':
if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. Waiting.")
else:
print("Failed master: " + node_name_from_id(raw_topo, masterid) + ". Promoting slave " + node_name(node) + ".")
failover_without_quorum(node_ip(node), node_port(node))
new_failover_without_quorum_requested.append({'slave': node_object_from_node_name(node_name(node)), 'epoch': time.mktime(time.gmtime())})
failed_masters.append(masterid)
failover_without_quorum_requested = failover_without_quorum_requested + new_failover_without_quorum_requested
if failed_masters == list():
print("Critical failure : no slave remaining, cannot do anything.")
else:
# Detect risky situations
failure = False
for node in raw_topo:
if node_role(node) == 'master' and node_status(node) == 'ok':
# Detect master without slave
if not master_has_at_least_one_slave(raw_topo, node):
print("Master " + node_name(node) + " has no slave !")
failure = True
elif node_role(node) == 'slave' and all_checks:
# Detect slave on same server as master.
if node_ip(node) == node_ip_from_id(raw_topo, masterid_of_slave(node)):
print("Slave " + node_name(node) + " is on the same server as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
failure = True
# Detect slave on same datacenter as master.
if node_datacenter(node) == node_datacenter_from_id(raw_topo, masterid_of_slave(node)):
print("Slave " + node_name(node) + " is on the same datacenter as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
failure = True
if failure:
cluster_state = 'At risk'
time.sleep(loop_time)
continue
if current_cluster_topo == list():
pprint.pprint(raw_topo, width=300)
current_cluster_topo = raw_topo
elif cluster_has_changed(current_cluster_topo, raw_topo):
print("Cluster topology has changed")
pprint.pprint(raw_topo, width=300)
current_cluster_topo = raw_topo
if plan != dict() and manager_status == 'active':
#pprint.pprint(plan, width=300)
steps = [int(key) for key in plan.keys()]
steps.remove(0)
current_step = min(steps)
if not cluster_has_changed(current_cluster_topo, plan['0']['starting_topo']):
if failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. We reached the timeout. Plan Failed. Forget it.")
plan = dict()
else:
print("Still waiting for the cluster to proceed with the failover.")
elif cluster_has_changed(current_cluster_topo, plan[str(current_step)]['target_topo']):
print("Cluster topology is not what we would expect. Something happened. Plan failed. Forget it.")
plan = dict()
else:
if len(steps) > 1:
print "Step " + str(current_step) + " succeeded."
del plan[str(current_step)]
print "Launching step " + str(current_step + 1) + "."
slave = plan[str(current_step + 1)]['slave']
master = plan[str(current_step + 1)]['master']
print("Slave " + slave + " will replace his master " + master)
node_object = node_object_from_node_name(slave)
failover_with_quorum(node_object['host'], node_object['port'])
failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())})
else:
print("Final step succeeded. The cluster is now balanced.")
print("I love it when a plan comes together!")
plan = dict()
time.sleep(loop_time)
continue
if slave_only_engine is not None and manager_status == 'active':
if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. Waiting.")
else:
# Failover all master nodes on this engine
for node in raw_topo:
if node_role(node) == 'master' and node_status(node) == 'ok' and node_ip(node) == slave_only_engine:
slave = get_one_slave(raw_topo, node)
if slave is None:
print("Master " + node_name(node) + " has no slave !")
else:
failover_with_quorum(node_ip(slave), node_port(slave))
failover_with_quorum_requested.append({'slave': node_object_from_node_name(node_name(slave)), 'epoch': time.mktime(time.gmtime())})
# Engine has already only slaves, starting the clock.
if not has_master(slave_only_engine, raw_topo) and no_repartition_until == 0:
no_repartition_until = time.mktime(time.gmtime()) + no_repartition_duration
if no_repartition_until != 0:
delta = no_repartition_until - time.mktime(time.gmtime())
if delta <= 0:
# We reached the requested duration, resetting.
no_repartition_until = 0
slave_only_engine = None
no_repartition_duration = 0
else:
print("Skipping master imbalance correction as requested " + str(delta) + " seconds remaining.")
time.sleep(loop_time)
continue
if slave_only_engine is not None:
print("Still trying to remove slaves from " + slave_only_engine)
time.sleep(loop_time)
continue
# Loop over nodes, detect imbalanced master repartition and promote slaves accordingly
imbalanced = False
if not all_checks:
pass
elif len(startup_nodes_by_server) < 2:
print("Only one server: skipping master imbalance correction.")
else:
server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo)
datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo)
# Detect too many masters on a server.
name, master_count, master_total_count = detect_imbalance(server_master_repartition_dict)
if name is not None:
cluster_state = 'Imbalanced'
imbalanced = True
print server_master_repartition_dict
#pprint.pprint(raw_topo, width=300)
print("Too many masters on server " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
if manager_status == 'active':
master, slave = find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter_master_repartition_dict, startup_nodes_by_server, startup_nodes_by_datacenter)
if master is None or slave is None:
print("Could not find a failover solution.")
else:
if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. Waiting.")
else:
print("Slave " + slave + " will replace his master " + master)
node_object = node_object_from_node_name(slave)
failover_with_quorum(node_object['host'], node_object['port'])
failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())})
time.sleep(loop_time)
continue
if len(startup_nodes_by_datacenter) < 2:
print("Only one datacenter: skipping master imbalance correction by datacenter.")
else:
# Detect too many masters on a datacenter.
# It is possible to have no imbalance by server but an imbalance by datacenter (+1 master on each server of a datacenter compared to the other and at least 2 servers by datacenter).
name, master_count, master_total_count = detect_imbalance(datacenter_master_repartition_dict)
if name is not None:
cluster_state = 'Imbalanced'
imbalanced = True
print("Too many masters on datacenter " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
if manager_status == 'active':
master, slave = find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter_master_repartition_dict, startup_nodes_by_server, startup_nodes_by_datacenter)
if master is None or slave is None:
print("Could not find a failover solution.")
else:
if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. Waiting.")
else:
print("Slave " + slave + " will replace his master " + master)
node_object = node_object_from_node_name(slave)
failover_with_quorum(node_object['host'], node_object['port'])
failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())})
time.sleep(loop_time)
continue
if not imbalanced:
cluster_state = 'OK'
time.sleep(loop_time)
def failover_without_quorum_did_not_happen(raw_topo):
global failover_without_quorum_requested
one_did_not_happen = False
one_cleared = False
for slave_dict in failover_without_quorum_requested:
found_node_role = None
for node in raw_topo:
if node_name_from_node_object(slave_dict['slave']) == node_name(node):
found_node_role = node_role(node)
break
if found_node_role == 'master':
failover_without_quorum_requested.remove(slave_dict)
elif time.mktime(time.gmtime()) - slave_dict['epoch'] > failover_max_wait:
print("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
failover_without_quorum_requested.remove(slave_dict)
one_cleared = True
else:
one_did_not_happen = True
return {"one_did_not_happen": one_did_not_happen, "one_cleared": one_cleared}
def failover_with_quorum_did_not_happen(raw_topo):
global failover_with_quorum_requested
one_did_not_happen = False
one_cleared = False
for slave_dict in failover_with_quorum_requested:
found_node_role = None
for node in raw_topo:
if node_name_from_node_object(slave_dict['slave']) == node_name(node):
found_node_role = node_role(node)
break
if found_node_role == 'master':
failover_with_quorum_requested.remove(slave_dict)
elif time.mktime(time.gmtime()) - slave_dict['epoch'] > failover_max_wait:
print("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
failover_with_quorum_requested.remove(slave_dict)
one_cleared = True
else:
one_did_not_happen = True
return {"one_did_not_happen": one_did_not_happen, "one_cleared": one_cleared}
def has_quorum(raw_topo):
masters_ok_count = masters_ok_count_for_cluster(raw_topo)
all_masters_count = masters_count_for_cluster(raw_topo)
if masters_ok_count < all_masters_count / 2 + 1:
return False
return True
def detect_imbalance(master_repartition_dict):
master_total_count = sum(master_repartition_dict.values())
set_count = len(master_repartition_dict)
for set_name, master_count in master_repartition_dict.iteritems():
# sets can be datacenters or servers
# If we have only 2 sets and an even number of masters, we at least try not to have more than half the masters on one set
# If we have only 2 sets and an odd number of masters, we at least try not to have more than half the masters + 1 on one set
if set_count == 2:
if master_total_count % 2 == 0:
if master_count > master_total_count / 2:
return set_name, master_count, master_total_count
elif master_total_count % 2 != 0:
if master_count > master_total_count / 2 + 1:
return set_name, master_count, master_total_count
# If we have 3 sets and 4 masters, we will have 2 masters on one set
elif set_count == 3:
if master_count > master_total_count / 2:
return set_name, master_count, master_total_count
else:
if master_total_count % 2 == 0:
if master_count > master_total_count / 2 - 1:
return set_name, master_count, master_total_count
elif master_total_count % 2 != 0:
if master_count > master_total_count / 2:
return set_name, master_count, master_total_count
return None, None, None
# Find the solution which minimizes the number of steps. The constraints are server_master_repartition and datacenter_master_repartition.
def find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter_master_repartition_dict, startup_nodes_by_server, startup_nodes_by_datacenter):
global plan
plan = dict()
max_steps = 3
solution_steps_chain = {'0': raw_topo}
master_slave_steps_chain = dict()
raw_topo_permut_dict = copy.deepcopy(solution_steps_chain)
for i in range(0, max_steps):
if debug:
print(i)
j = 0
raw_topo_1_permutations = dict()
for position, raw_topo_permut in raw_topo_permut_dict.iteritems():
if debug:
print("start position: ")
pprint.pprint(raw_topo_permut, width=300)
server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_permut)
print server_master_repartition_dict
datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_permut)
print datacenter_master_repartition_dict
# This only returns masters and slaves with node_status(master) == 'ok' or 'cluster still assessing' and node_status(slave) == 'ok' or 'cluster still assessing':
master_slaves_dict = master_slaves_topo(raw_topo_permut)
# generate all 1-permutation sets
for master in master_slaves_dict:
if debug:
print("master: " + str(master))
for slave in master_slaves_dict[master]:
raw_topo_copy = copy.deepcopy(raw_topo_permut)
raw_topo_1_permutation = simul_failover(master, slave, raw_topo_copy)
if debug:
print("slave: " + str(slave))
server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_1_permutation)
print server_master_repartition_dict
datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_1_permutation)
print datacenter_master_repartition_dict
pprint.pprint(raw_topo_1_permutation, width=300)
j += 1
if not raw_topo_1_permutation in solution_steps_chain.values():
#print "not already stored"
if solver_check(raw_topo_1_permutation, startup_nodes_by_server, startup_nodes_by_datacenter):
print("Found a solution: ")
pprint.pprint(raw_topo_1_permutation, width=300)
# return the first step
if i == 0:
print("Sounds like a plan !")
print "only one step : " + str([master, slave])
plan['0'] = {'starting_topo': copy.deepcopy(raw_topo)}
plan['1'] = {'master': master, 'slave': slave, 'target_topo': raw_topo_1_permutation}
return master, slave
else:
#print("first step position: " + position)
first = position.split('.')[1]
#print("first step: ")
#pprint.pprint(solution_steps_chain['0.'+first], width=300)
#print("master: "+master_slave_steps_chain['0.'+first][0])
#print("slave: "+master_slave_steps_chain['0.'+first][1])
step_key = '0'
step_number = 1
print("Sounds like a plan !")
end_position = position+'.'+str(j)
solution_steps_chain[end_position] = raw_topo_1_permutation
master_slave_steps_chain[end_position] = [master, slave]
plan['0'] = {'starting_topo': copy.deepcopy(raw_topo)}
for step in end_position.split('.')[1:]:
step_key += '.'+step
print "step "+str(step_number) + ": " + str(master_slave_steps_chain[step_key])
plan[str(step_number)] = {'master': master_slave_steps_chain[step_key][0], 'slave': master_slave_steps_chain[step_key][1], 'target_topo': solution_steps_chain[step_key]}
step_number += 1
return master_slave_steps_chain['0.'+first][0], master_slave_steps_chain['0.'+first][1]
else:
if debug:
print "============== store permutation ============="
solution_steps_chain[position+'.'+str(j)] = raw_topo_1_permutation
master_slave_steps_chain[position+'.'+str(j)] = [master, slave]
raw_topo_1_permutations[position+'.'+str(j)] = raw_topo_1_permutation
raw_topo_permut_dict = copy.deepcopy(raw_topo_1_permutations)
return None, None
def solver_check(raw_topo_1_permutation, startup_nodes_by_server, startup_nodes_by_datacenter):
server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_1_permutation)
datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_1_permutation)
if debug:
print "solver_check"
pprint.pprint(raw_topo_1_permutation, width=300)
print server_master_repartition_dict
print datacenter_master_repartition_dict
name_server, master_count_server, master_total_count = detect_imbalance(server_master_repartition_dict)
name_datacenter, master_count_datacenter, master_total_count = detect_imbalance(datacenter_master_repartition_dict)
if name_server is None and name_datacenter is None:
return True
return False
def simul_failover(master, slave, raw_topo):
raw_topo_copy = copy.deepcopy(raw_topo)
for node in raw_topo:
if node_name(node) == master:
switch_role(node)
elif node_name(node) == slave:
switch_role(node)
if raw_topo_copy == raw_topo:
print("Failed")
#print("raw_topo_copy: " + str(raw_topo_copy))
#print("raw_topo: " + str(raw_topo))
return raw_topo
def master_slaves_topo(raw_topo):
master_slaves_dict = dict()
for master in raw_topo:
if node_role(master) == 'master' and node_status(master) in ['ok', 'cluster still assessing']:
master_slaves_dict[node_name(master)] = list()
for slave in raw_topo:
if node_id(master) == masterid_of_slave(slave):
if node_role(slave) == 'slave' and node_status(slave) in ['ok', 'cluster still assessing']:
master_slaves_dict[node_name(master)].append(node_name(slave))
return master_slaves_dict
def has_master(engine, raw_topo):
for node in raw_topo:
if node_role(node) == 'master' and node_ip(node) == engine:
return True
return False
def cluster_online(startup_nodes_list):
for startup_node in startup_nodes_list:
proc = Popen(["timeout", "1", redis_cli, "-h", startup_node['host'], "-p", startup_node['port'], "--raw", "CLUSTER", "INFO"], stdout=PIPE)
result = proc.communicate()[0].split()
if isinstance(result, list) and len(result) > 0 and result[0] == 'cluster_state:ok':
return True
return False
def cluster_topo(startup_nodes_list, startup_nodes_by_datacenter):
for startup_node in startup_nodes_list:
proc = Popen(["timeout", "1", redis_cli, "-h", startup_node['host'], "-p", startup_node['port'], "--raw", "CLUSTER", "NODES"], stdout=PIPE)
result_str = proc.communicate()[0]
if not isinstance(result_str, str) or result_str == '':
continue
result = result_str.strip('\n').split('\n')
result = [string.split(" ") for string in result]
result_bak = copy.deepcopy(result)
for node in result:
if len(node) < 9:
node.append('-')
if isinstance(result, list) and len(result) > 0:
result = append_datacenter(result, startup_nodes_by_datacenter)
i = 0
tmp = ''
for line in result_str.strip('\n').split('\n'):
tmp += line
if len(result_bak[i]) < 9:
tmp += " -"
tmp += " " + str(node_datacenter(result[i])) + '\n'
i += 1
result_str = tmp
return result, result_str
return None, None
def append_datacenter(raw_topo, startup_nodes_by_datacenter):
for node in raw_topo:
datacenter_index = get_datacenter_for_node(node, startup_nodes_by_datacenter)
node.append(datacenter_index)
return raw_topo
def same_cluster(startup_nodes_list, raw_topo):
if len(startup_nodes_list) != len(raw_topo):
print('Found a different number of nodes.')
return False
for node in raw_topo:
if node_name(node) not in [node['host'] + ':' + node['port'] for node in startup_nodes_list]:
print(node_name(node) + ' found but unknown.')
return False
return True
def cluster_has_changed(current_cluster_topo, raw_topo):
if len(current_cluster_topo) != len(raw_topo):
print('Found a different number of nodes.')
return True
for node in raw_topo:
found = False
for node2 in current_cluster_topo:
if node_name(node) == node_name(node2):
found = True
if node_role(node) != node_role(node2):
return True
break
if not found:
return True
return False
def datacenter_master_repartition(int_datacenter_count, raw_topo):
datacenter_master_repartition_dict = dict()
for i in range(0, int_datacenter_count):
datacenter_master_repartition_dict[str(i)] = master_count_for_datacenter(i, raw_topo)
return datacenter_master_repartition_dict
def server_master_repartition(servers, raw_topo):
server_master_repartition_dict = dict()
for server in servers:
server_master_repartition_dict[server] = master_count_for_server(server, raw_topo)
return server_master_repartition_dict
def datacenter_count(startup_nodes_by_datacenter):
return len(startup_nodes_by_datacenter)
def server_count(startup_nodes_by_server):
return len(startup_nodes_by_server)
def server_list(startup_nodes_by_server):
return startup_nodes_by_server.keys()
def master_count_for_datacenter(datacenter_index, raw_topo):
count = 0
for node in raw_topo:
if node_role(node) == 'master' and node_status(node) in ['ok', 'cluster still assessing'] and node_datacenter(node) == datacenter_index:
count += 1
return count
def master_count_for_server(server, raw_topo):
count = 0
for node in raw_topo:
if node_role(node) == 'master' and node_status(node) in ['ok', 'cluster still assessing'] and node_ip(node) == server:
count += 1
return count
def masters_ok_count_for_cluster(raw_topo):
count = 0
for node in raw_topo:
if node_role(node) == 'master' and node_status(node) == 'ok':
count += 1
return count
def masters_count_for_cluster(raw_topo):
count = 0
seen = list()
for node in raw_topo:
if node_role(node) == 'master' and node_name(node) not in seen:
count += 1
seen.append(node_name(node))
return count
def node_datacenter(node):
return node[9]
def node_role(node):
if 'slave' in node[2]:
return 'slave'
return 'master'
def switch_role(node):
if 'slave' in node[2]:
node[2] = node[2].replace('slave', 'master')
else:
node[2] = node[2].replace('master', 'slave')
def master_status_of_slave(raw_topo, slave):
return node_status_from_id(raw_topo, masterid_of_slave(slave))
def masterid_of_slave(slave):
return slave[3]
def node_id(node):
return node[0]
def node_name(node):
return node[1]
def node_status(node):
if node[2] in ['myself,slave', 'myself,master', 'slave', 'master'] and node[7] == 'connected':
return 'ok'
elif node[2] in ['slave,fail?', 'master,fail?']:
return 'cluster still assessing'
return 'failed'
def node_status_from_id(raw_topo, nodeid):
for node in raw_topo:
if nodeid == node_id(node):
return node_status(node)
def node_name_from_id(raw_topo, nodeid):
for node in raw_topo:
if nodeid == node_id(node):
return node_name(node)
def node_ip_from_id(raw_topo, nodeid):
for node in raw_topo:
if nodeid == node_id(node):
return node_ip(node)
def node_datacenter_from_id(raw_topo, nodeid):
for node in raw_topo:
if nodeid == node_id(node):
return node_datacenter(node)
def node_object_from_node_name(node_name):
ip = node_name.split(':')[0]
port = node_name.split(':')[1]
return {"host": ip, "port": port}
def node_name_from_node_object(node_object):
return node_object["host"]+":"+ node_object["port"]
def cluster_startup_topo():
startup_nodes = args.REDIS_NODES
# Clean the string
startup_nodes = "".join(startup_nodes.split())
startup_nodes = startup_nodes.split('/')
startup_nodes_list = list()
# TODO: startup_nodes_by_datacenter should be a dict
startup_nodes_by_datacenter = list()
startup_nodes_by_server = dict()
for datacenter in startup_nodes:
tmp = datacenter.split(',')
for node in tmp:
node_dict = node_object_from_node_name(node)
ip = node_dict['host']
port = node_dict['port']
startup_nodes_list.append(node_dict)
if ip in startup_nodes_by_server.keys():
startup_nodes_by_server[ip].append(port)
else:
startup_nodes_by_server[ip] = [port]
startup_nodes_by_datacenter.append(tmp)
#print(startup_nodes_by_server)
#print(startup_nodes_by_datacenter)
#print(startup_nodes_list)
return startup_nodes_list, startup_nodes_by_datacenter, startup_nodes_by_server
def get_datacenter_for_node(node, startup_nodes_by_datacenter):
i = 0
for datacenter in startup_nodes_by_datacenter:
if node_name(node) in datacenter:
return i
i += 1
return None
# ip, port of the slave that will replace his master
def failover_without_quorum(ip, port):
print(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER TAKEOVER")
if not test:
proc = Popen(["timeout", "1", redis_cli, "-h", ip, "-p", port, "--raw", "CLUSTER", "FAILOVER", "TAKEOVER"], stdout=PIPE)
result = proc.communicate()[0].split()
# ip, port of the slave that will replace his master
def failover_with_quorum(ip, port):
print(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER")
if not test:
proc = Popen(["timeout", "1", redis_cli, "-h", ip, "-p", port, "--raw", "CLUSTER", "FAILOVER"], stdout=PIPE)
result = proc.communicate()[0].split()
def node_ip(node):
return node_object_from_node_name(node_name(node))['host']
def node_port(node):
return node_object_from_node_name(node_name(node))['port']
def master_has_at_least_one_slave(raw_topo, master):
for node in raw_topo:
if node_id(master) == masterid_of_slave(node):
return True
return False
def get_one_slave(raw_topo, master):
for node in raw_topo:
if node_id(master) == masterid_of_slave(node):
return node
return None
class MyHandler(BaseHTTPRequestHandler):
def do_GET(self):
global sleep_until
global slave_only_engine
global no_repartition_duration
global cluster_state
global request_active
global debug
if self.path == '/debug/enable':
self.send_response(200)
debug = True
elif self.path == '/debug/disable':
self.send_response(200)
debug = False
elif self.path == '/version':
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write(version+'\n')
elif self.path == '/help':
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write(api_help())
elif self.path == '/cluster_status':
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write(cluster_state+'\n')
elif self.path == '/manager_status':
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
delta = time.mktime(time.gmtime()) - last_loop_epoch
if delta > unresponsive_timeout:
print("manager main loop is unresponsive!")
answer = "failed"
cluster_state = 'Unknown state'
request_active = False
else:
answer = manager_status
if sleep_until != 0:
answer += " asleep"
if no_repartition_until != 0:
answer += " repartition disabled"
self.wfile.write(answer)
elif self.path == '/request_active':
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
if request_active:
self.wfile.write("yes")
else:
self.wfile.write("no")
elif "/sleep" in self.path:
try:
sleep_duration = int(self.path.split("=")[1])
sleep_until = time.mktime(time.gmtime()) + sleep_duration
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write("OK")
cluster_state = 'manager disabled'
except:
self.send_response(400)
elif "/prepare_for_reboot" in self.path:
bad_request = False
try:
no_repartition_duration_req = int(self.path.split("duration=")[1])
slave_only_engine_req = self.path.split("/")[2].split("&")[0]
if not slave_only_engine_req in [node_ip(node) for node in raw_topo]:
self.send_response(400)
bad_request = True
except:
self.send_response(400)
bad_request = True
if not bad_request:
if manager_status == 'passive':
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write("CANNOT PROCEED: passive manager")
elif manager_status == 'starting':
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write("CANNOT PROCEED: manager is starting")
elif time.mktime(time.gmtime()) - last_loop_epoch > 10:
self.send_response(500)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write("CANNOT PROCEED: failed manager")
elif no_repartition_duration != 0 and slave_only_engine != slave_only_engine_req:
print("A request has already been made to only have slaves on engine " + slave_only_engine + ".")
self.send_response(403)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write("SIMILAR REQUEST ALREADY IN PROGRESS")
elif no_repartition_duration != 0 and slave_only_engine == slave_only_engine_req and not has_master(slave_only_engine, raw_topo):
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write("DONE")
else:
slave_only_engine = slave_only_engine_req
no_repartition_duration = no_repartition_duration_req
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.end_headers()
self.wfile.write("WAIT")
else:
self.send_response(404)
def log_message(self, format, *args):
if debug:
sys.stderr.write("%s - - [%s] %s\n" %
(self.address_string(),
self.log_date_time_string(),
format%args))
class WebThread(threading.Thread):
def run(self):
httpd.serve_forever()
if __name__ == "__main__":
global cluster_state
cluster_state = 'manager disabled'
global manager_status
manager_status = 'starting'
global request_active
request_active = False
global sleep_until
sleep_until = 0
global no_repartition_until
no_repartition_until = 0
global no_repartition_duration
no_repartition_duration = 0
global slave_only_engine
slave_only_engine = None
global raw_topo
raw_topo = None
global last_loop_epoch
global debug
debug = False
httpd = SocketServer.TCPServer(("", http_port), MyHandler, bind_and_activate=False)
httpd.allow_reuse_address = True
httpd.server_bind()
httpd.server_activate()
webserver_thread = WebThread()
webserver_thread.start()
print(api_help())
try:
main()
except KeyboardInterrupt:
print 'Interrupted'
httpd.shutdown()
httpd.server_close()
sys.exit(0)
except:
print 'We crashed!'
print traceback.format_exc()
httpd.shutdown()
httpd.server_close()
sys.exit(0)
# vim: set ts=4 sw=4 sts=4 et :