From e053b7f38e9a54066c3b98e06423afc614750199 Mon Sep 17 00:00:00 2001
From: yohan <783b8c87@scimetis.net>
Date: Wed, 12 Jun 2019 19:31:50 +0200
Subject: [PATCH] Lock protection for thread-safe print. Flush on each print.
 Do not request_active eternally when already active. Bumped version to 4.

---
 redis-manager.py | 194 +++++++++++++++++++++++++----------------------
 1 file changed, 103 insertions(+), 91 deletions(-)

diff --git a/redis-manager.py b/redis-manager.py
index 551fb8d..1f27662 100755
--- a/redis-manager.py
+++ b/redis-manager.py
@@ -2,7 +2,9 @@
 # -*- coding: utf-8 -*-
 # Author : Yohan Bataille (ING)
 # redis-manager
-version = "3"
+from __future__ import print_function
+
+version = "4"
 
 # Requires:
 # redis-cli
@@ -23,8 +25,17 @@ import traceback
 import pprint
 import urllib2
 from random import randint
+from threading import Lock
 
-print("redis manager version: " + version)
+xprint_lock = Lock()
+
+def xprint(*args, **kwargs):
+    """Thread safe print function"""
+    with xprint_lock:
+        print(*args, **kwargs)
+        sys.stdout.flush()
+
+xprint("redis manager version: " + version)
 
 # positional : REDIS_PATH, REDIS_NODES, ENV, HTTP_PORT (in this order, all mandatory, ENV : "DEV" or "PROD")
 # optional : time (-t), dry_run (-n)
@@ -65,14 +76,14 @@ if args.dry_run:
 redis_cli = args.REDIS_PATH
 http_port = int(args.HTTP_PORT)
 
-print("HTTP_PORT: " + str(http_port))
-print("REDIS_NODES: " + args.REDIS_NODES)
-print("all_checks: " + str(all_checks))
-print("loop_time: " + str(loop_time))
-print("failover_max_wait: " + str(failover_max_wait))
-print("other_managers: " + str(other_managers))
-print("redis_cli: " + redis_cli)
-print("test: " + str(test))
+xprint("HTTP_PORT: " + str(http_port))
+xprint("REDIS_NODES: " + args.REDIS_NODES)
+xprint("all_checks: " + str(all_checks))
+xprint("loop_time: " + str(loop_time))
+xprint("failover_max_wait: " + str(failover_max_wait))
+xprint("other_managers: " + str(other_managers))
+xprint("redis_cli: " + redis_cli)
+xprint("test: " + str(test))
 
 #[root@slzuss3vmq00 ~]# /opt/ZUTA0/Logiciel/RDI/bin/redis-cli -h 10.166.119.48 -p 7002 --raw CLUSTER INFO
 #cluster_state:fail
@@ -158,9 +169,9 @@ def main():
     current_cluster_topo = list()
 
     if all_checks:
-        print("PROD mode enabled: master imbalance correction by server and datacenter activated.")
+        xprint("PROD mode enabled: master imbalance correction by server and datacenter activated.")
     else:
-        print("DEV mode enabled: master imbalance correction by server and datacenter deactivated.")
+        xprint("DEV mode enabled: master imbalance correction by server and datacenter deactivated.")
 
     while True:
         last_loop_epoch = time.mktime(time.gmtime())
@@ -175,38 +186,39 @@ def main():
                         r2 = urllib2.urlopen('http://' + manager + '/request_active', None, 1)
                         l2 = r2.readlines()
                         if len(l2) == 1 and l2[0] == 'yes':
-                            print("other has requested activation")
+                            xprint("other has requested activation")
                             request_active = False
                             sleep = True
                 elif len(l) == 1 and l[0].split()[0] == 'active':
                     if num_manager_active == 0:
                         num_manager_active += 1
                     else:
-                        print("Too many active managers!")
+                        xprint("Too many active managers!")
                 elif len(l) == 1 and l[0].split()[0] == 'failed':
-                    print("manager " + manager + " is KO.")
+                    xprint("manager " + manager + " is KO.")
                 else:
-                    print("manager " + manager + " has answered with garbage: " + str(l))
+                    xprint("manager " + manager + " has answered with garbage: " + str(l))
             except:
-                print("manager " + manager + " is not responding.")
+                xprint("manager " + manager + " is not responding.")
         if num_manager_active == 0 and (manager_status == 'passive' or manager_status == 'starting'):
             if request_active:
-                print("Becoming active!")
+                xprint("Becoming active!")
                 manager_status = 'active'
+                request_active = False
             elif sleep:
-                print("Sleeping to let another manager activate.")
+                xprint("Sleeping to let another manager activate.")
                 time.sleep(randint(1, 10))
                 continue
             else:
                 request_active = True
-                print("Manager election in progress.")
+                xprint("Manager election in progress.")
                 time.sleep(randint(1, 10))
                 continue
         elif num_manager_active == 1 and manager_status == 'starting':
-            print("Manager election finished, we are passive!")
+            xprint("Manager election finished, we are passive!")
             manager_status = 'passive'
         elif num_manager_active >= 1 and manager_status == 'active':
-            print("Becoming passive!")
+            xprint("Becoming passive!")
             manager_status = 'passive'
 
         if sleep_until != 0:
@@ -214,14 +226,14 @@ def main():
             if delta <= 0:
                 sleep_until = 0
             else:
-                print("Sleeping as requested. " + str(delta) + " seconds remaining.")
+                xprint("Sleeping as requested. " + str(delta) + " seconds remaining.")
                 time.sleep(loop_time)
                 continue
 
         raw_topo, raw_topo_str = cluster_topo(startup_nodes_list, startup_nodes_by_datacenter)
         if raw_topo is None:
             cluster_state = 'Unknown state'
-            print('Critical failure: cannot get cluster topology. Doing nothing.')
+            xprint('Critical failure: cannot get cluster topology. Doing nothing.')
             time.sleep(loop_time)
             continue
         bool_cluster_online = cluster_online(startup_nodes_list)
@@ -229,17 +241,17 @@ def main():
             pass
         else:
             cluster_state = 'KO'
-            print('Cluster failure.')
+            xprint('Cluster failure.')
         # print(raw_topo_str)
         if not same_cluster(startup_nodes_list, raw_topo):
             pprint.pprint(raw_topo, width=300)
             cluster_state = 'Unknown cluster'
-            print('Not the exact cluster we know. Doing nothing.')
+            xprint('Not the exact cluster we know. Doing nothing.')
             time.sleep(loop_time)
             continue
         for node in raw_topo:
             if node_status(node) == 'cluster still assessing':
-                print('Something is going on but Redis Cluster is still assessing the situation. Doing nothing.')
+                xprint('Something is going on but Redis Cluster is still assessing the situation. Doing nothing.')
                 pprint.pprint(raw_topo, width=300)
                 time.sleep(loop_time)
                 continue
@@ -247,7 +259,7 @@ def main():
         if not bool_cluster_online:
             pprint.pprint(raw_topo, width=300)
             if has_quorum(raw_topo):
-                print("Cluster has quorum and can recover by itself. Doing nothing.")
+                xprint("Cluster has quorum and can recover by itself. Doing nothing.")
             else:
                 failed_masters = list()
                 new_failover_without_quorum_requested = list()
@@ -256,19 +268,19 @@ def main():
                         if master_status_of_slave(raw_topo, node) != 'ok':
                             masterid = masterid_of_slave(node)
                             if masterid in failed_masters:
-                                print("Slave " + node_name(node) + " does not see master " + node_name_from_id(raw_topo, masterid) + ", but a slave has already been promoted. Doing nothing.")
+                                xprint("Slave " + node_name(node) + " does not see master " + node_name_from_id(raw_topo, masterid) + ", but a slave has already been promoted. Doing nothing.")
                             elif manager_status == 'active':
                                 if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
                                     if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"]:
-                                        print("Cluster did not comply with our previous failover request. Waiting.")
+                                        xprint("Cluster did not comply with our previous failover request. Waiting.")
                                 else:
-                                    print("Failed master: " + node_name_from_id(raw_topo, masterid) + ". Promoting slave " + node_name(node) + ".")
+                                    xprint("Failed master: " + node_name_from_id(raw_topo, masterid) + ". Promoting slave " + node_name(node) + ".")
                                     failover_without_quorum(node_ip(node), node_port(node))
                                     new_failover_without_quorum_requested.append({'slave': node_object_from_node_name(node_name(node)), 'epoch': time.mktime(time.gmtime())})
                                 failed_masters.append(masterid)
                 failover_without_quorum_requested = failover_without_quorum_requested + new_failover_without_quorum_requested
                 if failed_masters == list():
-                    print("Critical failure : no slave remaining, cannot do anything.")
+                    xprint("Critical failure : no slave remaining, cannot do anything.")
         else:
             # Detect risky situations
             failure = False
@@ -276,16 +288,16 @@ def main():
                 if node_role(node) == 'master' and node_status(node) == 'ok':
                     # Detect master without slave
                     if not master_has_at_least_one_slave(raw_topo, node):
-                        print("Master " + node_name(node) + " has no slave !")
+                        xprint("Master " + node_name(node) + " has no slave !")
                         failure = True
                 elif node_role(node) == 'slave' and all_checks:
                     # Detect slave on same server as master.
                     if node_ip(node) == node_ip_from_id(raw_topo, masterid_of_slave(node)):
-                        print("Slave " + node_name(node) + " is on the same server as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
+                        xprint("Slave " + node_name(node) + " is on the same server as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
                         failure = True
                     # Detect slave on same datacenter as master.
                     if node_datacenter(node) == node_datacenter_from_id(raw_topo, masterid_of_slave(node)):
-                        print("Slave " + node_name(node) + " is on the same datacenter as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
+                        xprint("Slave " + node_name(node) + " is on the same datacenter as master " + node_name_from_id(raw_topo, masterid_of_slave(node)))
                         failure = True
             if failure:
                 cluster_state = 'At risk'
@@ -295,7 +307,7 @@ def main():
                 pprint.pprint(raw_topo, width=300)
                 current_cluster_topo = raw_topo
             elif cluster_has_changed(current_cluster_topo, raw_topo):
-                print("Cluster topology has changed")
+                xprint("Cluster topology has changed")
                 pprint.pprint(raw_topo, width=300)
                 current_cluster_topo = raw_topo
             if plan != dict() and manager_status == 'active':
@@ -305,41 +317,41 @@ def main():
                 current_step = min(steps)
                 if not cluster_has_changed(current_cluster_topo, plan['0']['starting_topo']):
                     if failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
-                        print("Cluster did not comply with our previous failover request. We reached the timeout. Plan Failed. Forget it.")
+                        xprint("Cluster did not comply with our previous failover request. We reached the timeout. Plan Failed. Forget it.")
                         plan = dict()
                     else:
-                        print("Still waiting for the cluster to proceed with the failover.")
+                        xprint("Still waiting for the cluster to proceed with the failover.")
                 elif cluster_has_changed(current_cluster_topo, plan[str(current_step)]['target_topo']):
-                    print("Cluster topology is not what we would expect. Something happened. Plan failed. Forget it.")
+                    xprint("Cluster topology is not what we would expect. Something happened. Plan failed. Forget it.")
                     plan = dict()
                 else:
                     if len(steps) > 1:
-                        print "Step " + str(current_step) + " succeeded."
+                        xprint("Step " + str(current_step) + " succeeded.")
                         del plan[str(current_step)]
-                        print "Launching step " + str(current_step + 1) + "."
+                        xprint("Launching step " + str(current_step + 1) + ".")
                         slave = plan[str(current_step + 1)]['slave']
                         master = plan[str(current_step + 1)]['master']
-                        print("Slave " + slave + " will replace his master " + master)
+                        xprint("Slave " + slave + " will replace his master " + master)
                         node_object = node_object_from_node_name(slave)
                         failover_with_quorum(node_object['host'], node_object['port'])
                         failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())})
                     else:
-                        print("Final step succeeded. The cluster is now balanced.")
-                        print("I love it when a plan comes together!")
+                        xprint("Final step succeeded. The cluster is now balanced.")
+                        xprint("I love it when a plan comes together!")
                         plan = dict()
                 time.sleep(loop_time)
                 continue
             if slave_only_engine is not None and manager_status == 'active':
                 if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
                     if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
-                        print("Cluster did not comply with our previous failover request. Waiting.")
+                        xprint("Cluster did not comply with our previous failover request. Waiting.")
                 else:
                     # Failover all master nodes on this engine
                     for node in raw_topo:
                         if node_role(node) == 'master' and node_status(node) == 'ok' and node_ip(node) == slave_only_engine:
                             slave = get_one_slave(raw_topo, node)
                             if slave is None:
-                                print("Master " + node_name(node) + " has no slave !")
+                                xprint("Master " + node_name(node) + " has no slave !")
                             else:
                                 failover_with_quorum(node_ip(slave), node_port(slave))
                                 failover_with_quorum_requested.append({'slave': node_object_from_node_name(node_name(slave)), 'epoch': time.mktime(time.gmtime())})
@@ -354,11 +366,11 @@ def main():
                         slave_only_engine = None
                         no_repartition_duration = 0
                     else:
-                        print("Skipping master imbalance correction as requested " + str(delta) + " seconds remaining.")
+                        xprint("Skipping master imbalance correction as requested " + str(delta) + " seconds remaining.")
                         time.sleep(loop_time)
                         continue
                 if slave_only_engine is not None:
-                    print("Still trying to remove slaves from " + slave_only_engine)
+                    xprint("Still trying to remove slaves from " + slave_only_engine)
                     time.sleep(loop_time)
                     continue
 
@@ -367,7 +379,7 @@ def main():
             if not all_checks:
                 pass
             elif len(startup_nodes_by_server) < 2:
-                print("Only one server: skipping master imbalance correction.")
+                xprint("Only one server: skipping master imbalance correction.")
             else:
                 server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo)
                 datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo)
@@ -377,26 +389,26 @@ def main():
                 if name is not None:
                     cluster_state = 'Imbalanced'
                     imbalanced = True
-                    print server_master_repartition_dict
+                    xprint(server_master_repartition_dict)
                     #pprint.pprint(raw_topo, width=300)
-                    print("Too many masters on server " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
+                    xprint("Too many masters on server " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
                     if manager_status == 'active':
                         master, slave = find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter_master_repartition_dict, startup_nodes_by_server, startup_nodes_by_datacenter)
                         if master is None or slave is None:
-                            print("Could not find a failover solution.")
+                            xprint("Could not find a failover solution.")
                         else:
                             if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
                                 if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
-                                    print("Cluster did not comply with our previous failover request. Waiting.")
+                                    xprint("Cluster did not comply with our previous failover request. Waiting.")
                             else:
-                                print("Slave " + slave + " will replace his master " + master)
+                                xprint("Slave " + slave + " will replace his master " + master)
                                 node_object = node_object_from_node_name(slave)
                                 failover_with_quorum(node_object['host'], node_object['port'])
                                 failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())})
                     time.sleep(loop_time)
                     continue
                 if len(startup_nodes_by_datacenter) < 2:
-                    print("Only one datacenter: skipping master imbalance correction by datacenter.")
+                    xprint("Only one datacenter: skipping master imbalance correction by datacenter.")
                 else:
                     # Detect too many masters on a datacenter.
                     # It is possible to have no imbalance by server but an imbalance by datacenter (+1 master on each server of a datacenter compared to the other and at least 2 servers by datacenter).
@@ -404,17 +416,17 @@ def main():
                     if name is not None:
                         cluster_state = 'Imbalanced'
                         imbalanced = True
-                        print("Too many masters on datacenter " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
+                        xprint("Too many masters on datacenter " + str(name) + ": " + str(master_count) + "/" + str(master_total_count))
                         if manager_status == 'active':
                             master, slave = find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter_master_repartition_dict, startup_nodes_by_server, startup_nodes_by_datacenter)
                             if master is None or slave is None:
-                                print("Could not find a failover solution.")
+                                xprint("Could not find a failover solution.")
                             else:
                                 if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]:
                                     if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]:
-                                        print("Cluster did not comply with our previous failover request. Waiting.")
+                                        xprint("Cluster did not comply with our previous failover request. Waiting.")
                                 else:
-                                    print("Slave " + slave + " will replace his master " + master)
+                                    xprint("Slave " + slave + " will replace his master " + master)
                                     node_object = node_object_from_node_name(slave)
                                     failover_with_quorum(node_object['host'], node_object['port'])
                                     failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())})
@@ -438,7 +450,7 @@ def failover_without_quorum_did_not_happen(raw_topo):
         if found_node_role == 'master':
             failover_without_quorum_requested.remove(slave_dict)
         elif time.mktime(time.gmtime()) - slave_dict['epoch'] > failover_max_wait:
-            print("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
+            xprint("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
             failover_without_quorum_requested.remove(slave_dict)
             one_cleared = True
         else:
@@ -459,7 +471,7 @@ def failover_with_quorum_did_not_happen(raw_topo):
         if found_node_role == 'master':
             failover_with_quorum_requested.remove(slave_dict)
         elif time.mktime(time.gmtime()) - slave_dict['epoch'] > failover_max_wait:
-            print("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
+            xprint("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.")
             failover_with_quorum_requested.remove(slave_dict)
             one_cleared = True
         else:
@@ -513,43 +525,43 @@ def find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter
     raw_topo_permut_dict = copy.deepcopy(solution_steps_chain)
     for i in range(0, max_steps):
         if debug:
-            print(i)
+            xprint(i)
         j = 0
         raw_topo_1_permutations = dict()
         for position, raw_topo_permut in raw_topo_permut_dict.iteritems():
             if debug:
-                print("start position: ")
+                xprint("start position: ")
                 pprint.pprint(raw_topo_permut, width=300)
                 server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_permut)
-                print server_master_repartition_dict
+                xprint(server_master_repartition_dict)
                 datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_permut)
-                print datacenter_master_repartition_dict
+                xprint(datacenter_master_repartition_dict)
             # This only returns masters and slaves with node_status(master) == 'ok' or 'cluster still assessing' and node_status(slave) == 'ok' or 'cluster still assessing':
             master_slaves_dict = master_slaves_topo(raw_topo_permut)
             # generate all 1-permutation sets
             for master in master_slaves_dict:
                 if debug:
-                    print("master: " + str(master))
+                    xprint("master: " + str(master))
                 for slave in master_slaves_dict[master]:
                     raw_topo_copy = copy.deepcopy(raw_topo_permut)
                     raw_topo_1_permutation = simul_failover(master, slave, raw_topo_copy)
                     if debug:
-                        print("slave: " + str(slave))
+                        xprint("slave: " + str(slave))
                         server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_1_permutation)
-                        print server_master_repartition_dict
+                        xprint(server_master_repartition_dict)
                         datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_1_permutation)
-                        print datacenter_master_repartition_dict
+                        xprint(datacenter_master_repartition_dict)
                         pprint.pprint(raw_topo_1_permutation, width=300)
                     j += 1
                     if not raw_topo_1_permutation in solution_steps_chain.values():
                         #print "not already stored"
                         if solver_check(raw_topo_1_permutation, startup_nodes_by_server, startup_nodes_by_datacenter):
-                            print("Found a solution: ")
+                            xprint("Found a solution: ")
                             pprint.pprint(raw_topo_1_permutation, width=300)
                             # return the first step
                             if i == 0:
-                                print("Sounds like a plan !")
-                                print "only one step : " + str([master, slave])
+                                xprint("Sounds like a plan !")
+                                xprint("only one step : " + str([master, slave]))
                                 plan['0'] = {'starting_topo': copy.deepcopy(raw_topo)}
                                 plan['1'] = {'master': master, 'slave': slave, 'target_topo': raw_topo_1_permutation}
                                 return master, slave
@@ -562,20 +574,20 @@ def find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter
                                 #print("slave: "+master_slave_steps_chain['0.'+first][1])
                                 step_key = '0'
                                 step_number = 1
-                                print("Sounds like a plan !")
+                                xprint("Sounds like a plan !")
                                 end_position = position+'.'+str(j)
                                 solution_steps_chain[end_position] = raw_topo_1_permutation
                                 master_slave_steps_chain[end_position] = [master, slave]
                                 plan['0'] = {'starting_topo': copy.deepcopy(raw_topo)}
                                 for step in end_position.split('.')[1:]:
                                     step_key += '.'+step
-                                    print "step "+str(step_number) + ": " + str(master_slave_steps_chain[step_key])
+                                    xprint("step "+str(step_number) + ": " + str(master_slave_steps_chain[step_key]))
                                     plan[str(step_number)] = {'master': master_slave_steps_chain[step_key][0], 'slave': master_slave_steps_chain[step_key][1], 'target_topo': solution_steps_chain[step_key]}
                                     step_number += 1
                                 return master_slave_steps_chain['0.'+first][0], master_slave_steps_chain['0.'+first][1]
                         else:
                             if debug:
-                                print "============== store permutation ============="
+                                xprint("============== store permutation =============")
                             solution_steps_chain[position+'.'+str(j)] = raw_topo_1_permutation
                             master_slave_steps_chain[position+'.'+str(j)] = [master, slave]
                             raw_topo_1_permutations[position+'.'+str(j)] = raw_topo_1_permutation
@@ -587,10 +599,10 @@ def solver_check(raw_topo_1_permutation, startup_nodes_by_server, startup_nodes_
     server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_1_permutation)
     datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_1_permutation)
     if debug:
-        print "solver_check"
+        xprint("solver_check")
         pprint.pprint(raw_topo_1_permutation, width=300)
-        print server_master_repartition_dict
-        print datacenter_master_repartition_dict
+        xprint(server_master_repartition_dict)
+        xprint(datacenter_master_repartition_dict)
     name_server, master_count_server, master_total_count = detect_imbalance(server_master_repartition_dict)
     name_datacenter, master_count_datacenter, master_total_count = detect_imbalance(datacenter_master_repartition_dict)
     if name_server is None and name_datacenter is None:
@@ -606,7 +618,7 @@ def simul_failover(master, slave, raw_topo):
         elif node_name(node) == slave:
             switch_role(node)
     if raw_topo_copy == raw_topo:
-        print("Failed")
+        xprint("Failed")
     #print("raw_topo_copy: " + str(raw_topo_copy))
     #print("raw_topo: " + str(raw_topo))
     return raw_topo
@@ -676,18 +688,18 @@ def append_datacenter(raw_topo, startup_nodes_by_datacenter):
 
 def same_cluster(startup_nodes_list, raw_topo):
     if len(startup_nodes_list) != len(raw_topo):
-        print('Found a different number of nodes.')
+        xprint('Found a different number of nodes.')
         return False
     for node in raw_topo:
         if node_name(node) not in [node['host'] + ':' + node['port'] for node in startup_nodes_list]:
-            print(node_name(node) + ' found but unknown.')
+            xprint(node_name(node) + ' found but unknown.')
             return False
     return True
 
 
 def cluster_has_changed(current_cluster_topo, raw_topo):
     if len(current_cluster_topo) != len(raw_topo):
-        print('Found a different number of nodes.')
+        xprint('Found a different number of nodes.')
         return True
     for node in raw_topo:
         found = False
@@ -877,7 +889,7 @@ def get_datacenter_for_node(node, startup_nodes_by_datacenter):
 
 # ip, port of the slave that will replace his master
 def failover_without_quorum(ip, port):
-    print(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER TAKEOVER")
+    xprint(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER TAKEOVER")
     if not test:
         proc = Popen(["timeout", "1", redis_cli, "-h", ip, "-p", port, "--raw", "CLUSTER", "FAILOVER", "TAKEOVER"], stdout=PIPE)
         result = proc.communicate()[0].split()
@@ -885,7 +897,7 @@ def failover_without_quorum(ip, port):
 
 # ip, port of the slave that will replace his master
 def failover_with_quorum(ip, port):
-    print(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER")
+    xprint(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER")
     if not test:
         proc = Popen(["timeout", "1", redis_cli, "-h", ip, "-p", port, "--raw", "CLUSTER", "FAILOVER"], stdout=PIPE)
         result = proc.communicate()[0].split()
@@ -948,7 +960,7 @@ class MyHandler(BaseHTTPRequestHandler):
             self.end_headers()
             delta = time.mktime(time.gmtime()) - last_loop_epoch
             if delta > unresponsive_timeout:
-                print("manager main loop is unresponsive!")
+                xprint("manager main loop is unresponsive!")
                 answer = "failed"
                 cluster_state = 'Unknown state'
                 request_active = False
@@ -1006,7 +1018,7 @@ class MyHandler(BaseHTTPRequestHandler):
                     self.end_headers()
                     self.wfile.write("CANNOT PROCEED: failed manager")
                 elif no_repartition_duration != 0 and slave_only_engine != slave_only_engine_req:
-                    print("A request has already been made to only have slaves on engine " + slave_only_engine + ".")
+                    xprint("A request has already been made to only have slaves on engine " + slave_only_engine + ".")
                     self.send_response(403)
                     self.send_header('Content-type', 'text/plain')
                     self.end_headers()
@@ -1027,7 +1039,7 @@ class MyHandler(BaseHTTPRequestHandler):
             self.send_response(404)
     def log_message(self, format, *args):
         if debug:
-            sys.stderr.write("%s - - [%s] %s\n" %
+            xprint("%s - - [%s] %s" %
                  (self.address_string(),
                   self.log_date_time_string(),
                   format%args))
@@ -1066,17 +1078,17 @@ if __name__ == "__main__":
     webserver_thread = WebThread()
     webserver_thread.start()
     
-    print(api_help())
+    xprint(api_help())
     try:
         main()
     except KeyboardInterrupt:
-        print 'Interrupted'
+        xprint('Interrupted')
         httpd.shutdown()
         httpd.server_close()
         sys.exit(0)
     except:
-        print 'We crashed!'
-        print traceback.format_exc()
+        xprint('We crashed!')
+        xprint(traceback.format_exc())
         httpd.shutdown()
         httpd.server_close()
         sys.exit(0)