Lock protection for thread-safe print.

Flush on each print.
Do not request_active eternally when already active.
Bumped version to 4.
This commit is contained in:
yohan 2019-06-12 19:31:50 +02:00
parent f64f75190a
commit e053b7f38e

View File

@ -2,7 +2,9 @@
# -*- coding: utf-8 -*-
# Author : Yohan Bataille (ING)
# redis-manager
version = "3"
from __future__ import print_function
version = "4"
# Requires:
# redis-cli
@ -23,8 +25,17 @@ import traceback
import pprint
import urllib2
from random import randint
from threading import Lock
print("redis manager version: " + version)
xprint_lock = Lock()
def xprint(*args, **kwargs):
"""Thread safe print function"""
with xprint_lock:
print(*args, **kwargs)
sys.stdout.flush()
xprint("redis manager version: " + version)
# positional : REDIS_PATH, REDIS_NODES, ENV, HTTP_PORT (in this order, all mandatory, ENV : "DEV" or "PROD")
# optional : time (-t), dry_run (-n)
@ -65,14 +76,14 @@ if args.dry_run:
redis_cli = args.REDIS_PATH
http_port = int(args.HTTP_PORT)
print("HTTP_PORT: " + str(http_port))
print("REDIS_NODES: " + args.REDIS_NODES)
print("all_checks: " + str(all_checks))
print("loop_time: " + str(loop_time))
print("failover_max_wait: " + str(failover_max_wait))
print("other_managers: " + str(other_managers))
print("redis_cli: " + redis_cli)
print("test: " + str(test))
xprint("HTTP_PORT: " + str(http_port))
xprint("REDIS_NODES: " + args.REDIS_NODES)
xprint("all_checks: " + str(all_checks))
xprint("loop_time: " + str(loop_time))
xprint("failover_max_wait: " + str(failover_max_wait))
xprint("other_managers: " + str(other_managers))
xprint("redis_cli: " + redis_cli)
xprint("test: " + str(test))
#[root@slzuss3vmq00 ~]# /opt/ZUTA0/Logiciel/RDI/bin/redis-cli -h 10.166.119.48 -p 7002 --raw CLUSTER INFO
#cluster_state:fail
@ -158,9 +169,9 @@ def main():
current_cluster_topo = list()
if all_checks:
print("PROD mode enabled: master imbalance correction by server and datacenter activated.")
xprint("PROD mode enabled: master imbalance correction by server and datacenter activated.")
else:
print("DEV mode enabled: master imbalance correction by server and datacenter deactivated.")
xprint("DEV mode enabled: master imbalance correction by server and datacenter deactivated.")
while True:
last_loop_epoch = time.mktime(time.gmtime())
@ -175,38 +186,39 @@ def main():
r2 = urllib2.urlopen('http://' + manager + '/request_active', None, 1)
l2 = r2.readlines()
if len(l2) == 1 and l2[0] == 'yes':
print("other has requested activation")
xprint("other has requested activation")
request_active = False
sleep = True
elif len(l) == 1 and l[0].split()[0] == 'active':
if num_manager_active == 0:
num_manager_active += 1
else:
print("Too many active managers!")
xprint("Too many active managers!")
elif len(l) == 1 and l[0].split()[0] == 'failed':
print("manager " + manager + " is KO.")
xprint("manager " + manager + " is KO.")
else:
print("manager " + manager + " has answered with garbage: " + str(l))
xprint("manager " + manager + " has answered with garbage: " + str(l))
except:
print("manager " + manager + " is not responding.")
xprint("manager " + manager + " is not responding.")
if num_manager_active == 0 and (manager_status == 'passive' or manager_status == 'starting'):
if request_active:
print("Becoming active!")
xprint("Becoming active!")
manager_status = 'active'
request_active = False
elif sleep:
print("Sleeping to let another manager activate.")
xprint("Sleeping to let another manager activate.")
time.sleep(randint(1, 10))
continue
else:
request_active = True
print("Manager election in progress.")
xprint("Manager election in progress.")
time.sleep(randint(1, 10))
continue
elif num_manager_active == 1 and manager_status == 'starting':
print("Manager election finished, we are passive!")
xprint("Manager election finished, we are passive!")
manager_status = 'passive'
elif num_manager_active >= 1 and manager_status == 'active':
print("Becoming passive!")
xprint("Becoming passive!")
manager_status = 'passive'
if sleep_until != 0:
@ -214,14 +226,14 @@ def main():
if delta <= 0:
sleep_until = 0
else:
print("Sleeping as requested. " + str(delta) + " seconds remaining.")
xprint("Sleeping as requested. " + str(delta) + " seconds remaining.")
time.sleep(loop_time)
continue
raw_topo, raw_topo_str = cluster_topo(startup_nodes_list, startup_nodes_by_datacenter)
if raw_topo is None:
cluster_state = 'Unknown state'
print('Critical failure: cannot get cluster topology. Doing nothing.')
xprint('Critical failure: cannot get cluster topology. Doing nothing.')
time.sleep(loop_time)
continue
bool_cluster_online = cluster_online(startup_nodes_list)
@ -229,17 +241,17 @@ def main():
pass
else:
cluster_state = 'KO'
print('Cluster failure.')
xprint('Cluster failure.')
# print(raw_topo_str)
if not same_cluster(startup_nodes_list, raw_topo):
pprint.pprint(raw_topo, width=300)
cluster_state = 'Unknown cluster'
print('Not the exact cluster we know. Doing nothing.')
xprint('Not the exact cluster we know. Doing nothing.')
time.sleep(loop_time)
continue
for node in raw_topo:
if node_status(node) == 'cluster still assessing':
print('Something is going on but Redis Cluster is still assessing the situation. Doing nothing.')
xprint('Something is going on but Redis Cluster is still assessing the situation. Doing nothing.')
pprint.pprint(raw_topo, width=300)
time.sleep(loop_time)
continue
@ -247,7 +259,7 @@ def main():
if not bool_cluster_online:
pprint.pprint(raw_topo, width=300)
if has_quorum(raw_topo):
print("Cluster has quorum and can recover by itself. Doing nothing.")
xprint("Cluster has quorum and can recover by itself. Doing nothing.")
else:
failed_masters = list()
new_failover_without_quorum_requested = list()
@ -256,19 +268,19 @@ def main():
if master_status_of_slave(raw_topo, node) != 'ok':
masterid = masterid_of_slave(node)
if masterid in failed_masters:
print("Slave " + node_name(node) + " does not see master " + node_name_from_id(raw_topo, masterid) + ", but a slave has already been promoted. Doing nothing.")
xprint("Slave " + node_name(node) + " does not see master " + node_name_from_id(raw_topo, masterid) + ", but a slave has already been promoted. Doing nothing.")
elif manager_status == 'active':
if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. Waiting.")
xprint("Cluster did not comply with our previous failover request. Waiting.")
else:
print("Failed master: " + node_name_from_id(raw_topo, masterid) + ". Promoting slave " + node_name(node) + ".")
xprint("Failed master: " + node_name_from_id(raw_topo, masterid) + ". Promoting slave " + node_name(node) + ".")
failover_without_quorum(node_ip(node), node_port(node))
new_failover_without_quorum_requested.append({'slave': node_object_from_node_name(node_name(node)), 'epoch': time.mktime(time.gmtime())})
failed_masters.append(masterid)
failover_without_quorum_requested = failover_without_quorum_requested + new_failover_without_quorum_requested
if failed_masters == list():
print("Critical failure : no slave remaining, cannot do anything.")
xprint("Critical failure : no slave remaining, cannot do anything.")
else:
# Detect risky situations
failure = False
@ -276,16 +288,16 @@ def main():
if node_role(node) == 'master' and node_status(node) == 'ok':
# Detect master without slave
if not master_has_at_least_one_slave(raw_topo, node):
print("Master " + node_name(node) + " has no slave !")
xprint("Master " + node_name(node) + " has no slave !")
failure = True
elif node_role(node) == 'slave' and all_checks:
# Detect slave on same server as master.
if node_ip(node) == node_ip_from_id(raw_topo, masterid_of_slave(node)):
print("Slave " + node_name(node) + " is on the same server as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
xprint("Slave " + node_name(node) + " is on the same server as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
failure = True
# Detect slave on same datacenter as master.
if node_datacenter(node) == node_datacenter_from_id(raw_topo, masterid_of_slave(node)):
print("Slave " + node_name(node) + " is on the same datacenter as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
xprint("Slave " + node_name(node) + " is on the same datacenter as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
failure = True
if failure:
cluster_state = 'At risk'
@ -295,7 +307,7 @@ def main():
pprint.pprint(raw_topo, width=300)
current_cluster_topo = raw_topo
elif cluster_has_changed(current_cluster_topo, raw_topo):
print("Cluster topology has changed")
xprint("Cluster topology has changed")
pprint.pprint(raw_topo, width=300)
current_cluster_topo = raw_topo
if plan != dict() and manager_status == 'active':
@ -305,41 +317,41 @@ def main():
current_step = min(steps)
if not cluster_has_changed(current_cluster_topo, plan['0']['starting_topo']):
if failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. We reached the timeout. Plan Failed. Forget it.")
xprint("Cluster did not comply with our previous failover request. We reached the timeout. Plan Failed. Forget it.")
plan = dict()
else:
print("Still waiting for the cluster to proceed with the failover.")
xprint("Still waiting for the cluster to proceed with the failover.")
elif cluster_has_changed(current_cluster_topo, plan[str(current_step)]['target_topo']):
print("Cluster topology is not what we would expect. Something happened. Plan failed. Forget it.")
xprint("Cluster topology is not what we would expect. Something happened. Plan failed. Forget it.")
plan = dict()
else:
if len(steps) > 1:
print "Step " + str(current_step) + " succeeded."
xprint("Step " + str(current_step) + " succeeded.")
del plan[str(current_step)]
print "Launching step " + str(current_step + 1) + "."
xprint("Launching step " + str(current_step + 1) + ".")
slave = plan[str(current_step + 1)]['slave']
master = plan[str(current_step + 1)]['master']
print("Slave " + slave + " will replace his master " + master)
xprint("Slave " + slave + " will replace his master " + master)
node_object = node_object_from_node_name(slave)
failover_with_quorum(node_object['host'], node_object['port'])
failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())})
else:
print("Final step succeeded. The cluster is now balanced.")
print("I love it when a plan comes together!")
xprint("Final step succeeded. The cluster is now balanced.")
xprint("I love it when a plan comes together!")
plan = dict()
time.sleep(loop_time)
continue
if slave_only_engine is not None and manager_status == 'active':
if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. Waiting.")
xprint("Cluster did not comply with our previous failover request. Waiting.")
else:
# Failover all master nodes on this engine
for node in raw_topo:
if node_role(node) == 'master' and node_status(node) == 'ok' and node_ip(node) == slave_only_engine:
slave = get_one_slave(raw_topo, node)
if slave is None:
print("Master " + node_name(node) + " has no slave !")
xprint("Master " + node_name(node) + " has no slave !")
else:
failover_with_quorum(node_ip(slave), node_port(slave))
failover_with_quorum_requested.append({'slave': node_object_from_node_name(node_name(slave)), 'epoch': time.mktime(time.gmtime())})
@ -354,11 +366,11 @@ def main():
slave_only_engine = None
no_repartition_duration = 0
else:
print("Skipping master imbalance correction as requested " + str(delta) + " seconds remaining.")
xprint("Skipping master imbalance correction as requested " + str(delta) + " seconds remaining.")
time.sleep(loop_time)
continue
if slave_only_engine is not None:
print("Still trying to remove slaves from " + slave_only_engine)
xprint("Still trying to remove slaves from " + slave_only_engine)
time.sleep(loop_time)
continue
@ -367,7 +379,7 @@ def main():
if not all_checks:
pass
elif len(startup_nodes_by_server) < 2:
print("Only one server: skipping master imbalance correction.")
xprint("Only one server: skipping master imbalance correction.")
else:
server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo)
datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo)
@ -377,26 +389,26 @@ def main():
if name is not None:
cluster_state = 'Imbalanced'
imbalanced = True
print server_master_repartition_dict
xprint(server_master_repartition_dict)
#pprint.pprint(raw_topo, width=300)
print("Too many masters on server " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
xprint("Too many masters on server " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
if manager_status == 'active':
master, slave = find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter_master_repartition_dict, startup_nodes_by_server, startup_nodes_by_datacenter)
if master is None or slave is None:
print("Could not find a failover solution.")
xprint("Could not find a failover solution.")
else:
if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. Waiting.")
xprint("Cluster did not comply with our previous failover request. Waiting.")
else:
print("Slave " + slave + " will replace his master " + master)
xprint("Slave " + slave + " will replace his master " + master)
node_object = node_object_from_node_name(slave)
failover_with_quorum(node_object['host'], node_object['port'])
failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())})
time.sleep(loop_time)
continue
if len(startup_nodes_by_datacenter) < 2:
print("Only one datacenter: skipping master imbalance correction by datacenter.")
xprint("Only one datacenter: skipping master imbalance correction by datacenter.")
else:
# Detect too many masters on a datacenter.
# It is possible to have no imbalance by server but an imbalance by datacenter (+1 master on each server of a datacenter compared to the other and at least 2 servers by datacenter).
@ -404,17 +416,17 @@ def main():
if name is not None:
cluster_state = 'Imbalanced'
imbalanced = True
print("Too many masters on datacenter " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
xprint("Too many masters on datacenter " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
if manager_status == 'active':
master, slave = find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter_master_repartition_dict, startup_nodes_by_server, startup_nodes_by_datacenter)
if master is None or slave is None:
print("Could not find a failover solution.")
xprint("Could not find a failover solution.")
else:
if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
print("Cluster did not comply with our previous failover request. Waiting.")
xprint("Cluster did not comply with our previous failover request. Waiting.")
else:
print("Slave " + slave + " will replace his master " + master)
xprint("Slave " + slave + " will replace his master " + master)
node_object = node_object_from_node_name(slave)
failover_with_quorum(node_object['host'], node_object['port'])
failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())})
@ -438,7 +450,7 @@ def failover_without_quorum_did_not_happen(raw_topo):
if found_node_role == 'master':
failover_without_quorum_requested.remove(slave_dict)
elif time.mktime(time.gmtime()) - slave_dict['epoch'] > failover_max_wait:
print("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
xprint("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
failover_without_quorum_requested.remove(slave_dict)
one_cleared = True
else:
@ -459,7 +471,7 @@ def failover_with_quorum_did_not_happen(raw_topo):
if found_node_role == 'master':
failover_with_quorum_requested.remove(slave_dict)
elif time.mktime(time.gmtime()) - slave_dict['epoch'] > failover_max_wait:
print("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
xprint("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
failover_with_quorum_requested.remove(slave_dict)
one_cleared = True
else:
@ -513,43 +525,43 @@ def find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter
raw_topo_permut_dict = copy.deepcopy(solution_steps_chain)
for i in range(0, max_steps):
if debug:
print(i)
xprint(i)
j = 0
raw_topo_1_permutations = dict()
for position, raw_topo_permut in raw_topo_permut_dict.iteritems():
if debug:
print("start position: ")
xprint("start position: ")
pprint.pprint(raw_topo_permut, width=300)
server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_permut)
print server_master_repartition_dict
xprint(server_master_repartition_dict)
datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_permut)
print datacenter_master_repartition_dict
xprint(datacenter_master_repartition_dict)
# This only returns masters and slaves with node_status(master) == 'ok' or 'cluster still assessing' and node_status(slave) == 'ok' or 'cluster still assessing':
master_slaves_dict = master_slaves_topo(raw_topo_permut)
# generate all 1-permutation sets
for master in master_slaves_dict:
if debug:
print("master: " + str(master))
xprint("master: " + str(master))
for slave in master_slaves_dict[master]:
raw_topo_copy = copy.deepcopy(raw_topo_permut)
raw_topo_1_permutation = simul_failover(master, slave, raw_topo_copy)
if debug:
print("slave: " + str(slave))
xprint("slave: " + str(slave))
server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_1_permutation)
print server_master_repartition_dict
xprint(server_master_repartition_dict)
datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_1_permutation)
print datacenter_master_repartition_dict
xprint(datacenter_master_repartition_dict)
pprint.pprint(raw_topo_1_permutation, width=300)
j += 1
if not raw_topo_1_permutation in solution_steps_chain.values():
#print "not already stored"
if solver_check(raw_topo_1_permutation, startup_nodes_by_server, startup_nodes_by_datacenter):
print("Found a solution: ")
xprint("Found a solution: ")
pprint.pprint(raw_topo_1_permutation, width=300)
# return the first step
if i == 0:
print("Sounds like a plan !")
print "only one step : " + str([master, slave])
xprint("Sounds like a plan !")
xprint("only one step : " + str([master, slave]))
plan['0'] = {'starting_topo': copy.deepcopy(raw_topo)}
plan['1'] = {'master': master, 'slave': slave, 'target_topo': raw_topo_1_permutation}
return master, slave
@ -562,20 +574,20 @@ def find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter
#print("slave: "+master_slave_steps_chain['0.'+first][1])
step_key = '0'
step_number = 1
print("Sounds like a plan !")
xprint("Sounds like a plan !")
end_position = position+'.'+str(j)
solution_steps_chain[end_position] = raw_topo_1_permutation
master_slave_steps_chain[end_position] = [master, slave]
plan['0'] = {'starting_topo': copy.deepcopy(raw_topo)}
for step in end_position.split('.')[1:]:
step_key += '.'+step
print "step "+str(step_number) + ": " + str(master_slave_steps_chain[step_key])
xprint("step "+str(step_number) + ": " + str(master_slave_steps_chain[step_key]))
plan[str(step_number)] = {'master': master_slave_steps_chain[step_key][0], 'slave': master_slave_steps_chain[step_key][1], 'target_topo': solution_steps_chain[step_key]}
step_number += 1
return master_slave_steps_chain['0.'+first][0], master_slave_steps_chain['0.'+first][1]
else:
if debug:
print "============== store permutation ============="
xprint("============== store permutation =============")
solution_steps_chain[position+'.'+str(j)] = raw_topo_1_permutation
master_slave_steps_chain[position+'.'+str(j)] = [master, slave]
raw_topo_1_permutations[position+'.'+str(j)] = raw_topo_1_permutation
@ -587,10 +599,10 @@ def solver_check(raw_topo_1_permutation, startup_nodes_by_server, startup_nodes_
server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_1_permutation)
datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_1_permutation)
if debug:
print "solver_check"
xprint("solver_check")
pprint.pprint(raw_topo_1_permutation, width=300)
print server_master_repartition_dict
print datacenter_master_repartition_dict
xprint(server_master_repartition_dict)
xprint(datacenter_master_repartition_dict)
name_server, master_count_server, master_total_count = detect_imbalance(server_master_repartition_dict)
name_datacenter, master_count_datacenter, master_total_count = detect_imbalance(datacenter_master_repartition_dict)
if name_server is None and name_datacenter is None:
@ -606,7 +618,7 @@ def simul_failover(master, slave, raw_topo):
elif node_name(node) == slave:
switch_role(node)
if raw_topo_copy == raw_topo:
print("Failed")
xprint("Failed")
#print("raw_topo_copy: " + str(raw_topo_copy))
#print("raw_topo: " + str(raw_topo))
return raw_topo
@ -676,18 +688,18 @@ def append_datacenter(raw_topo, startup_nodes_by_datacenter):
def same_cluster(startup_nodes_list, raw_topo):
if len(startup_nodes_list) != len(raw_topo):
print('Found a different number of nodes.')
xprint('Found a different number of nodes.')
return False
for node in raw_topo:
if node_name(node) not in [node['host'] + ':' + node['port'] for node in startup_nodes_list]:
print(node_name(node) + ' found but unknown.')
xprint(node_name(node) + ' found but unknown.')
return False
return True
def cluster_has_changed(current_cluster_topo, raw_topo):
if len(current_cluster_topo) != len(raw_topo):
print('Found a different number of nodes.')
xprint('Found a different number of nodes.')
return True
for node in raw_topo:
found = False
@ -877,7 +889,7 @@ def get_datacenter_for_node(node, startup_nodes_by_datacenter):
# ip, port of the slave that will replace his master
def failover_without_quorum(ip, port):
print(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER TAKEOVER")
xprint(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER TAKEOVER")
if not test:
proc = Popen(["timeout", "1", redis_cli, "-h", ip, "-p", port, "--raw", "CLUSTER", "FAILOVER", "TAKEOVER"], stdout=PIPE)
result = proc.communicate()[0].split()
@ -885,7 +897,7 @@ def failover_without_quorum(ip, port):
# ip, port of the slave that will replace his master
def failover_with_quorum(ip, port):
print(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER")
xprint(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER")
if not test:
proc = Popen(["timeout", "1", redis_cli, "-h", ip, "-p", port, "--raw", "CLUSTER", "FAILOVER"], stdout=PIPE)
result = proc.communicate()[0].split()
@ -948,7 +960,7 @@ class MyHandler(BaseHTTPRequestHandler):
self.end_headers()
delta = time.mktime(time.gmtime()) - last_loop_epoch
if delta > unresponsive_timeout:
print("manager main loop is unresponsive!")
xprint("manager main loop is unresponsive!")
answer = "failed"
cluster_state = 'Unknown state'
request_active = False
@ -1006,7 +1018,7 @@ class MyHandler(BaseHTTPRequestHandler):
self.end_headers()
self.wfile.write("CANNOT PROCEED: failed manager")
elif no_repartition_duration != 0 and slave_only_engine != slave_only_engine_req:
print("A request has already been made to only have slaves on engine " + slave_only_engine + ".")
xprint("A request has already been made to only have slaves on engine " + slave_only_engine + ".")
self.send_response(403)
self.send_header('Content-type', 'text/plain')
self.end_headers()
@ -1027,7 +1039,7 @@ class MyHandler(BaseHTTPRequestHandler):
self.send_response(404)
def log_message(self, format, *args):
if debug:
sys.stderr.write("%s - - [%s] %s\n" %
xprint("%s - - [%s] %s" %
(self.address_string(),
self.log_date_time_string(),
format%args))
@ -1066,17 +1078,17 @@ if __name__ == "__main__":
webserver_thread = WebThread()
webserver_thread.start()
print(api_help())
xprint(api_help())
try:
main()
except KeyboardInterrupt:
print 'Interrupted'
xprint('Interrupted')
httpd.shutdown()
httpd.server_close()
sys.exit(0)
except:
print 'We crashed!'
print traceback.format_exc()
xprint('We crashed!')
xprint(traceback.format_exc())
httpd.shutdown()
httpd.server_close()
sys.exit(0)