From e053b7f38e9a54066c3b98e06423afc614750199 Mon Sep 17 00:00:00 2001 From: yohan <783b8c87@scimetis.net> Date: Wed, 12 Jun 2019 19:31:50 +0200 Subject: [PATCH] Lock protection for thread-safe print. Flush on each print. Do not request_active eternally when already active. Bumped version to 4. --- redis-manager.py | 194 +++++++++++++++++++++++++---------------------- 1 file changed, 103 insertions(+), 91 deletions(-) diff --git a/redis-manager.py b/redis-manager.py index 551fb8d..1f27662 100755 --- a/redis-manager.py +++ b/redis-manager.py @@ -2,7 +2,9 @@ # -*- coding: utf-8 -*- # Author : Yohan Bataille (ING) # redis-manager -version = "3" +from __future__ import print_function + +version = "4" # Requires: # redis-cli @@ -23,8 +25,17 @@ import traceback import pprint import urllib2 from random import randint +from threading import Lock -print("redis manager version: " + version) +xprint_lock = Lock() + +def xprint(*args, **kwargs): + """Thread safe print function""" + with xprint_lock: + print(*args, **kwargs) + sys.stdout.flush() + +xprint("redis manager version: " + version) # positional : REDIS_PATH, REDIS_NODES, ENV, HTTP_PORT (in this order, all mandatory, ENV : "DEV" or "PROD") # optional : time (-t), dry_run (-n) @@ -65,14 +76,14 @@ if args.dry_run: redis_cli = args.REDIS_PATH http_port = int(args.HTTP_PORT) -print("HTTP_PORT: " + str(http_port)) -print("REDIS_NODES: " + args.REDIS_NODES) -print("all_checks: " + str(all_checks)) -print("loop_time: " + str(loop_time)) -print("failover_max_wait: " + str(failover_max_wait)) -print("other_managers: " + str(other_managers)) -print("redis_cli: " + redis_cli) -print("test: " + str(test)) +xprint("HTTP_PORT: " + str(http_port)) +xprint("REDIS_NODES: " + args.REDIS_NODES) +xprint("all_checks: " + str(all_checks)) +xprint("loop_time: " + str(loop_time)) +xprint("failover_max_wait: " + str(failover_max_wait)) +xprint("other_managers: " + str(other_managers)) +xprint("redis_cli: " + redis_cli) +xprint("test: " + str(test)) #[root@slzuss3vmq00 ~]# /opt/ZUTA0/Logiciel/RDI/bin/redis-cli -h 10.166.119.48 -p 7002 --raw CLUSTER INFO #cluster_state:fail @@ -158,9 +169,9 @@ def main(): current_cluster_topo = list() if all_checks: - print("PROD mode enabled: master imbalance correction by server and datacenter activated.") + xprint("PROD mode enabled: master imbalance correction by server and datacenter activated.") else: - print("DEV mode enabled: master imbalance correction by server and datacenter deactivated.") + xprint("DEV mode enabled: master imbalance correction by server and datacenter deactivated.") while True: last_loop_epoch = time.mktime(time.gmtime()) @@ -175,38 +186,39 @@ def main(): r2 = urllib2.urlopen('http://' + manager + '/request_active', None, 1) l2 = r2.readlines() if len(l2) == 1 and l2[0] == 'yes': - print("other has requested activation") + xprint("other has requested activation") request_active = False sleep = True elif len(l) == 1 and l[0].split()[0] == 'active': if num_manager_active == 0: num_manager_active += 1 else: - print("Too many active managers!") + xprint("Too many active managers!") elif len(l) == 1 and l[0].split()[0] == 'failed': - print("manager " + manager + " is KO.") + xprint("manager " + manager + " is KO.") else: - print("manager " + manager + " has answered with garbage: " + str(l)) + xprint("manager " + manager + " has answered with garbage: " + str(l)) except: - print("manager " + manager + " is not responding.") + xprint("manager " + manager + " is not responding.") if num_manager_active == 0 and (manager_status == 'passive' or manager_status == 'starting'): if request_active: - print("Becoming active!") + xprint("Becoming active!") manager_status = 'active' + request_active = False elif sleep: - print("Sleeping to let another manager activate.") + xprint("Sleeping to let another manager activate.") time.sleep(randint(1, 10)) continue else: request_active = True - print("Manager election in progress.") + xprint("Manager election in progress.") time.sleep(randint(1, 10)) continue elif num_manager_active == 1 and manager_status == 'starting': - print("Manager election finished, we are passive!") + xprint("Manager election finished, we are passive!") manager_status = 'passive' elif num_manager_active >= 1 and manager_status == 'active': - print("Becoming passive!") + xprint("Becoming passive!") manager_status = 'passive' if sleep_until != 0: @@ -214,14 +226,14 @@ def main(): if delta <= 0: sleep_until = 0 else: - print("Sleeping as requested. " + str(delta) + " seconds remaining.") + xprint("Sleeping as requested. " + str(delta) + " seconds remaining.") time.sleep(loop_time) continue raw_topo, raw_topo_str = cluster_topo(startup_nodes_list, startup_nodes_by_datacenter) if raw_topo is None: cluster_state = 'Unknown state' - print('Critical failure: cannot get cluster topology. Doing nothing.') + xprint('Critical failure: cannot get cluster topology. Doing nothing.') time.sleep(loop_time) continue bool_cluster_online = cluster_online(startup_nodes_list) @@ -229,17 +241,17 @@ def main(): pass else: cluster_state = 'KO' - print('Cluster failure.') + xprint('Cluster failure.') # print(raw_topo_str) if not same_cluster(startup_nodes_list, raw_topo): pprint.pprint(raw_topo, width=300) cluster_state = 'Unknown cluster' - print('Not the exact cluster we know. Doing nothing.') + xprint('Not the exact cluster we know. Doing nothing.') time.sleep(loop_time) continue for node in raw_topo: if node_status(node) == 'cluster still assessing': - print('Something is going on but Redis Cluster is still assessing the situation. Doing nothing.') + xprint('Something is going on but Redis Cluster is still assessing the situation. Doing nothing.') pprint.pprint(raw_topo, width=300) time.sleep(loop_time) continue @@ -247,7 +259,7 @@ def main(): if not bool_cluster_online: pprint.pprint(raw_topo, width=300) if has_quorum(raw_topo): - print("Cluster has quorum and can recover by itself. Doing nothing.") + xprint("Cluster has quorum and can recover by itself. Doing nothing.") else: failed_masters = list() new_failover_without_quorum_requested = list() @@ -256,19 +268,19 @@ def main(): if master_status_of_slave(raw_topo, node) != 'ok': masterid = masterid_of_slave(node) if masterid in failed_masters: - print("Slave " + node_name(node) + " does not see master " + node_name_from_id(raw_topo, masterid) + ", but a slave has already been promoted. Doing nothing.") + xprint("Slave " + node_name(node) + " does not see master " + node_name_from_id(raw_topo, masterid) + ", but a slave has already been promoted. Doing nothing.") elif manager_status == 'active': if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"]: if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"]: - print("Cluster did not comply with our previous failover request. Waiting.") + xprint("Cluster did not comply with our previous failover request. Waiting.") else: - print("Failed master: " + node_name_from_id(raw_topo, masterid) + ". Promoting slave " + node_name(node) + ".") + xprint("Failed master: " + node_name_from_id(raw_topo, masterid) + ". Promoting slave " + node_name(node) + ".") failover_without_quorum(node_ip(node), node_port(node)) new_failover_without_quorum_requested.append({'slave': node_object_from_node_name(node_name(node)), 'epoch': time.mktime(time.gmtime())}) failed_masters.append(masterid) failover_without_quorum_requested = failover_without_quorum_requested + new_failover_without_quorum_requested if failed_masters == list(): - print("Critical failure : no slave remaining, cannot do anything.") + xprint("Critical failure : no slave remaining, cannot do anything.") else: # Detect risky situations failure = False @@ -276,16 +288,16 @@ def main(): if node_role(node) == 'master' and node_status(node) == 'ok': # Detect master without slave if not master_has_at_least_one_slave(raw_topo, node): - print("Master " + node_name(node) + " has no slave !") + xprint("Master " + node_name(node) + " has no slave !") failure = True elif node_role(node) == 'slave' and all_checks: # Detect slave on same server as master. if node_ip(node) == node_ip_from_id(raw_topo, masterid_of_slave(node)): - print("Slave " + node_name(node) + " is on the same server as master " + node_name_from_id(raw_topo, masterid_of_slave(node))) + xprint("Slave " + node_name(node) + " is on the same server as master " + node_name_from_id(raw_topo, masterid_of_slave(node))) failure = True # Detect slave on same datacenter as master. if node_datacenter(node) == node_datacenter_from_id(raw_topo, masterid_of_slave(node)): - print("Slave " + node_name(node) + " is on the same datacenter as master " + node_name_from_id(raw_topo, masterid_of_slave(node))) + xprint("Slave " + node_name(node) + " is on the same datacenter as master " + node_name_from_id(raw_topo, masterid_of_slave(node))) failure = True if failure: cluster_state = 'At risk' @@ -295,7 +307,7 @@ def main(): pprint.pprint(raw_topo, width=300) current_cluster_topo = raw_topo elif cluster_has_changed(current_cluster_topo, raw_topo): - print("Cluster topology has changed") + xprint("Cluster topology has changed") pprint.pprint(raw_topo, width=300) current_cluster_topo = raw_topo if plan != dict() and manager_status == 'active': @@ -305,41 +317,41 @@ def main(): current_step = min(steps) if not cluster_has_changed(current_cluster_topo, plan['0']['starting_topo']): if failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]: - print("Cluster did not comply with our previous failover request. We reached the timeout. Plan Failed. Forget it.") + xprint("Cluster did not comply with our previous failover request. We reached the timeout. Plan Failed. Forget it.") plan = dict() else: - print("Still waiting for the cluster to proceed with the failover.") + xprint("Still waiting for the cluster to proceed with the failover.") elif cluster_has_changed(current_cluster_topo, plan[str(current_step)]['target_topo']): - print("Cluster topology is not what we would expect. Something happened. Plan failed. Forget it.") + xprint("Cluster topology is not what we would expect. Something happened. Plan failed. Forget it.") plan = dict() else: if len(steps) > 1: - print "Step " + str(current_step) + " succeeded." + xprint("Step " + str(current_step) + " succeeded.") del plan[str(current_step)] - print "Launching step " + str(current_step + 1) + "." + xprint("Launching step " + str(current_step + 1) + ".") slave = plan[str(current_step + 1)]['slave'] master = plan[str(current_step + 1)]['master'] - print("Slave " + slave + " will replace his master " + master) + xprint("Slave " + slave + " will replace his master " + master) node_object = node_object_from_node_name(slave) failover_with_quorum(node_object['host'], node_object['port']) failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())}) else: - print("Final step succeeded. The cluster is now balanced.") - print("I love it when a plan comes together!") + xprint("Final step succeeded. The cluster is now balanced.") + xprint("I love it when a plan comes together!") plan = dict() time.sleep(loop_time) continue if slave_only_engine is not None and manager_status == 'active': if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]: if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]: - print("Cluster did not comply with our previous failover request. Waiting.") + xprint("Cluster did not comply with our previous failover request. Waiting.") else: # Failover all master nodes on this engine for node in raw_topo: if node_role(node) == 'master' and node_status(node) == 'ok' and node_ip(node) == slave_only_engine: slave = get_one_slave(raw_topo, node) if slave is None: - print("Master " + node_name(node) + " has no slave !") + xprint("Master " + node_name(node) + " has no slave !") else: failover_with_quorum(node_ip(slave), node_port(slave)) failover_with_quorum_requested.append({'slave': node_object_from_node_name(node_name(slave)), 'epoch': time.mktime(time.gmtime())}) @@ -354,11 +366,11 @@ def main(): slave_only_engine = None no_repartition_duration = 0 else: - print("Skipping master imbalance correction as requested " + str(delta) + " seconds remaining.") + xprint("Skipping master imbalance correction as requested " + str(delta) + " seconds remaining.") time.sleep(loop_time) continue if slave_only_engine is not None: - print("Still trying to remove slaves from " + slave_only_engine) + xprint("Still trying to remove slaves from " + slave_only_engine) time.sleep(loop_time) continue @@ -367,7 +379,7 @@ def main(): if not all_checks: pass elif len(startup_nodes_by_server) < 2: - print("Only one server: skipping master imbalance correction.") + xprint("Only one server: skipping master imbalance correction.") else: server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo) datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo) @@ -377,26 +389,26 @@ def main(): if name is not None: cluster_state = 'Imbalanced' imbalanced = True - print server_master_repartition_dict + xprint(server_master_repartition_dict) #pprint.pprint(raw_topo, width=300) - print("Too many masters on server " + str(name) + ": " + str(master_count) + "/" + str(master_total_count)) + xprint("Too many masters on server " + str(name) + ": " + str(master_count) + "/" + str(master_total_count)) if manager_status == 'active': master, slave = find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter_master_repartition_dict, startup_nodes_by_server, startup_nodes_by_datacenter) if master is None or slave is None: - print("Could not find a failover solution.") + xprint("Could not find a failover solution.") else: if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]: if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]: - print("Cluster did not comply with our previous failover request. Waiting.") + xprint("Cluster did not comply with our previous failover request. Waiting.") else: - print("Slave " + slave + " will replace his master " + master) + xprint("Slave " + slave + " will replace his master " + master) node_object = node_object_from_node_name(slave) failover_with_quorum(node_object['host'], node_object['port']) failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())}) time.sleep(loop_time) continue if len(startup_nodes_by_datacenter) < 2: - print("Only one datacenter: skipping master imbalance correction by datacenter.") + xprint("Only one datacenter: skipping master imbalance correction by datacenter.") else: # Detect too many masters on a datacenter. # It is possible to have no imbalance by server but an imbalance by datacenter (+1 master on each server of a datacenter compared to the other and at least 2 servers by datacenter). @@ -404,17 +416,17 @@ def main(): if name is not None: cluster_state = 'Imbalanced' imbalanced = True - print("Too many masters on datacenter " + str(name) + ": " + str(master_count) + "/" + str(master_total_count)) + xprint("Too many masters on datacenter " + str(name) + ": " + str(master_count) + "/" + str(master_total_count)) if manager_status == 'active': master, slave = find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter_master_repartition_dict, startup_nodes_by_server, startup_nodes_by_datacenter) if master is None or slave is None: - print("Could not find a failover solution.") + xprint("Could not find a failover solution.") else: if failover_without_quorum_did_not_happen(raw_topo)["one_did_not_happen"] or failover_with_quorum_did_not_happen(raw_topo)["one_did_not_happen"]: if not failover_without_quorum_did_not_happen(raw_topo)["one_cleared"] and not failover_with_quorum_did_not_happen(raw_topo)["one_cleared"]: - print("Cluster did not comply with our previous failover request. Waiting.") + xprint("Cluster did not comply with our previous failover request. Waiting.") else: - print("Slave " + slave + " will replace his master " + master) + xprint("Slave " + slave + " will replace his master " + master) node_object = node_object_from_node_name(slave) failover_with_quorum(node_object['host'], node_object['port']) failover_with_quorum_requested.append({'slave': node_object, 'epoch': time.mktime(time.gmtime())}) @@ -438,7 +450,7 @@ def failover_without_quorum_did_not_happen(raw_topo): if found_node_role == 'master': failover_without_quorum_requested.remove(slave_dict) elif time.mktime(time.gmtime()) - slave_dict['epoch'] > failover_max_wait: - print("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.") + xprint("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.") failover_without_quorum_requested.remove(slave_dict) one_cleared = True else: @@ -459,7 +471,7 @@ def failover_with_quorum_did_not_happen(raw_topo): if found_node_role == 'master': failover_with_quorum_requested.remove(slave_dict) elif time.mktime(time.gmtime()) - slave_dict['epoch'] > failover_max_wait: - print("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.") + xprint("Cluster has not performed failover for slave " + node_name_from_node_object(slave_dict['slave']) + " requested " + str(failover_max_wait) + " seconds ago. Removing the failover request.") failover_with_quorum_requested.remove(slave_dict) one_cleared = True else: @@ -513,43 +525,43 @@ def find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter raw_topo_permut_dict = copy.deepcopy(solution_steps_chain) for i in range(0, max_steps): if debug: - print(i) + xprint(i) j = 0 raw_topo_1_permutations = dict() for position, raw_topo_permut in raw_topo_permut_dict.iteritems(): if debug: - print("start position: ") + xprint("start position: ") pprint.pprint(raw_topo_permut, width=300) server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_permut) - print server_master_repartition_dict + xprint(server_master_repartition_dict) datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_permut) - print datacenter_master_repartition_dict + xprint(datacenter_master_repartition_dict) # This only returns masters and slaves with node_status(master) == 'ok' or 'cluster still assessing' and node_status(slave) == 'ok' or 'cluster still assessing': master_slaves_dict = master_slaves_topo(raw_topo_permut) # generate all 1-permutation sets for master in master_slaves_dict: if debug: - print("master: " + str(master)) + xprint("master: " + str(master)) for slave in master_slaves_dict[master]: raw_topo_copy = copy.deepcopy(raw_topo_permut) raw_topo_1_permutation = simul_failover(master, slave, raw_topo_copy) if debug: - print("slave: " + str(slave)) + xprint("slave: " + str(slave)) server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_1_permutation) - print server_master_repartition_dict + xprint(server_master_repartition_dict) datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_1_permutation) - print datacenter_master_repartition_dict + xprint(datacenter_master_repartition_dict) pprint.pprint(raw_topo_1_permutation, width=300) j += 1 if not raw_topo_1_permutation in solution_steps_chain.values(): #print "not already stored" if solver_check(raw_topo_1_permutation, startup_nodes_by_server, startup_nodes_by_datacenter): - print("Found a solution: ") + xprint("Found a solution: ") pprint.pprint(raw_topo_1_permutation, width=300) # return the first step if i == 0: - print("Sounds like a plan !") - print "only one step : " + str([master, slave]) + xprint("Sounds like a plan !") + xprint("only one step : " + str([master, slave])) plan['0'] = {'starting_topo': copy.deepcopy(raw_topo)} plan['1'] = {'master': master, 'slave': slave, 'target_topo': raw_topo_1_permutation} return master, slave @@ -562,20 +574,20 @@ def find_failover_candidate(raw_topo, server_master_repartition_dict, datacenter #print("slave: "+master_slave_steps_chain['0.'+first][1]) step_key = '0' step_number = 1 - print("Sounds like a plan !") + xprint("Sounds like a plan !") end_position = position+'.'+str(j) solution_steps_chain[end_position] = raw_topo_1_permutation master_slave_steps_chain[end_position] = [master, slave] plan['0'] = {'starting_topo': copy.deepcopy(raw_topo)} for step in end_position.split('.')[1:]: step_key += '.'+step - print "step "+str(step_number) + ": " + str(master_slave_steps_chain[step_key]) + xprint("step "+str(step_number) + ": " + str(master_slave_steps_chain[step_key])) plan[str(step_number)] = {'master': master_slave_steps_chain[step_key][0], 'slave': master_slave_steps_chain[step_key][1], 'target_topo': solution_steps_chain[step_key]} step_number += 1 return master_slave_steps_chain['0.'+first][0], master_slave_steps_chain['0.'+first][1] else: if debug: - print "============== store permutation =============" + xprint("============== store permutation =============") solution_steps_chain[position+'.'+str(j)] = raw_topo_1_permutation master_slave_steps_chain[position+'.'+str(j)] = [master, slave] raw_topo_1_permutations[position+'.'+str(j)] = raw_topo_1_permutation @@ -587,10 +599,10 @@ def solver_check(raw_topo_1_permutation, startup_nodes_by_server, startup_nodes_ server_master_repartition_dict = server_master_repartition(server_list(startup_nodes_by_server), raw_topo_1_permutation) datacenter_master_repartition_dict = datacenter_master_repartition(datacenter_count(startup_nodes_by_datacenter), raw_topo_1_permutation) if debug: - print "solver_check" + xprint("solver_check") pprint.pprint(raw_topo_1_permutation, width=300) - print server_master_repartition_dict - print datacenter_master_repartition_dict + xprint(server_master_repartition_dict) + xprint(datacenter_master_repartition_dict) name_server, master_count_server, master_total_count = detect_imbalance(server_master_repartition_dict) name_datacenter, master_count_datacenter, master_total_count = detect_imbalance(datacenter_master_repartition_dict) if name_server is None and name_datacenter is None: @@ -606,7 +618,7 @@ def simul_failover(master, slave, raw_topo): elif node_name(node) == slave: switch_role(node) if raw_topo_copy == raw_topo: - print("Failed") + xprint("Failed") #print("raw_topo_copy: " + str(raw_topo_copy)) #print("raw_topo: " + str(raw_topo)) return raw_topo @@ -676,18 +688,18 @@ def append_datacenter(raw_topo, startup_nodes_by_datacenter): def same_cluster(startup_nodes_list, raw_topo): if len(startup_nodes_list) != len(raw_topo): - print('Found a different number of nodes.') + xprint('Found a different number of nodes.') return False for node in raw_topo: if node_name(node) not in [node['host'] + ':' + node['port'] for node in startup_nodes_list]: - print(node_name(node) + ' found but unknown.') + xprint(node_name(node) + ' found but unknown.') return False return True def cluster_has_changed(current_cluster_topo, raw_topo): if len(current_cluster_topo) != len(raw_topo): - print('Found a different number of nodes.') + xprint('Found a different number of nodes.') return True for node in raw_topo: found = False @@ -877,7 +889,7 @@ def get_datacenter_for_node(node, startup_nodes_by_datacenter): # ip, port of the slave that will replace his master def failover_without_quorum(ip, port): - print(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER TAKEOVER") + xprint(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER TAKEOVER") if not test: proc = Popen(["timeout", "1", redis_cli, "-h", ip, "-p", port, "--raw", "CLUSTER", "FAILOVER", "TAKEOVER"], stdout=PIPE) result = proc.communicate()[0].split() @@ -885,7 +897,7 @@ def failover_without_quorum(ip, port): # ip, port of the slave that will replace his master def failover_with_quorum(ip, port): - print(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER") + xprint(redis_cli + " -h " + ip + " -p " + port + " --raw CLUSTER FAILOVER") if not test: proc = Popen(["timeout", "1", redis_cli, "-h", ip, "-p", port, "--raw", "CLUSTER", "FAILOVER"], stdout=PIPE) result = proc.communicate()[0].split() @@ -948,7 +960,7 @@ class MyHandler(BaseHTTPRequestHandler): self.end_headers() delta = time.mktime(time.gmtime()) - last_loop_epoch if delta > unresponsive_timeout: - print("manager main loop is unresponsive!") + xprint("manager main loop is unresponsive!") answer = "failed" cluster_state = 'Unknown state' request_active = False @@ -1006,7 +1018,7 @@ class MyHandler(BaseHTTPRequestHandler): self.end_headers() self.wfile.write("CANNOT PROCEED: failed manager") elif no_repartition_duration != 0 and slave_only_engine != slave_only_engine_req: - print("A request has already been made to only have slaves on engine " + slave_only_engine + ".") + xprint("A request has already been made to only have slaves on engine " + slave_only_engine + ".") self.send_response(403) self.send_header('Content-type', 'text/plain') self.end_headers() @@ -1027,7 +1039,7 @@ class MyHandler(BaseHTTPRequestHandler): self.send_response(404) def log_message(self, format, *args): if debug: - sys.stderr.write("%s - - [%s] %s\n" % + xprint("%s - - [%s] %s" % (self.address_string(), self.log_date_time_string(), format%args)) @@ -1066,17 +1078,17 @@ if __name__ == "__main__": webserver_thread = WebThread() webserver_thread.start() - print(api_help()) + xprint(api_help()) try: main() except KeyboardInterrupt: - print 'Interrupted' + xprint('Interrupted') httpd.shutdown() httpd.server_close() sys.exit(0) except: - print 'We crashed!' - print traceback.format_exc() + xprint('We crashed!') + xprint(traceback.format_exc()) httpd.shutdown() httpd.server_close() sys.exit(0)