Commit 7e9c2645 authored by root's avatar root
Browse files

Fixes and logs to ease troubleshooting

parent f4ea2447
......@@ -34,7 +34,7 @@ CONF.register_opts(THRESH_OPTS, group="thresholds")
SERVERS_FILE = "./servers.yml"
# Define a default handler at INFO logging level
logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)
class NotificationEndpoint(object):
......@@ -64,20 +64,42 @@ class NotificationEndpoint(object):
LOG.error(event_type)
LOG.error(payload)
def get_corrected_stats(self):
stats = self.nova_client.hypervisor_stats.statistics()
down_enabled = self.nova_client.hypervisors.findall(
state='down', status='enabled')
# disabled_up = self.nova_client.hypervisors.findall(
# state='up', status='disabled')
# down_disabled = self.nova_client.hypervisors.findall(
# state='down', status='disabled')
# down_or_disabled = set(down_enabled + disabled_up + down_disabled)
# for server in down_or_disabled:
for server in down_enabled:
stats.vcpus -= server.vcpus
stats.free_ram_mb -= server.free_ram_mb
stats.free_disk_gb -= server.free_disk_gb
LOG.info("Corrected stats: %s" %str(stats.__dict__))
return stats
def cloud_over_thresholds(self):
stats = self.nova_client.hypervisors.statistics()
available_vcpu = stats.vcpus - stats.vcpus_used
if available_vcpu < CONF.thresholds.vcpus:
LOG.info("Cloud over the VCPU threshold")
stats = self.get_corrected_stats()
available_vcpus = stats.vcpus - stats.vcpus_used
if available_vcpus < CONF.thresholds.vcpus:
LOG.info("Cloud under the VCPU threshold")
return False
if stats.free_ram_mb < CONF.thresholds.free_ram_mb:
LOG.info("Cloud over the RAM threshold")
elif stats.free_ram_mb < CONF.thresholds.free_ram_mb:
LOG.info("Cloud under the RAM threshold")
return False
if stats.free_disk_gb < CONF.thresholds.free_disk_gb:
LOG.info("Cloud over the Disk threshold")
elif stats.free_disk_gb < CONF.thresholds.free_disk_gb:
LOG.info("Cloud under the Disk threshold")
return False
else:
True
LOG.info("Cloud over thresholds")
return True
class Waker(NotificationEndpoint):
......@@ -87,53 +109,60 @@ class Waker(NotificationEndpoint):
)
def info(self, ctxt, publisher_id, event_type, payload, metadata):
LOG.debug("\n\nINFO")
LOG.debug(publisher_id)
LOG.debug(event_type)
LOG.info(publisher_id)
LOG.info(event_type)
LOG.debug(payload)
if self.cloud_over_thresholds():
hypervisors = self.nova_client.hypervisors.list()
slept_servers = [
hyp for hyp in hypervisors if hyp.running_vms == 0 and
hyp.state == "down" and
hyp.status == "enabled"
]
if not self.cloud_over_thresholds():
LOG.info("Starting wake up procedure...")
slept_servers = self.nova_client.hypervisors.findall(
state= "down",
status="enabled"
)
for server in slept_servers:
stats = self.nova_client.hypervisors.statistics()
new_vcpus = stats.vcpus - server.vcpus
new_free_ram_mb = stats.free_ram_mb - server.free_ram_mb
new_free_disk_gb = stats.free_disk_gb - server.free_disk_gb
LOG.debug("\n\n======================================")
LOG.debug("INFO")
LOG.debug("new_vcpus: %s" % new_vcpus)
LOG.debug("new_free_ram_mb: %s" % new_free_ram_mb)
LOG.debug("new_free_disk_gb: %s" % new_free_disk_gb)
LOG.debug("======================================\n\n")
stats = self.get_corrected_stats()
new_vcpus = stats.vcpus - stats.vcpus_used + server.vcpus
new_free_ram_mb = stats.free_ram_mb + server.free_ram_mb
new_free_disk_gb = stats.free_disk_gb + server.free_disk_gb
# TODO(thiagop): what is disk_available_least?
if new_vcpus > CONF.thresholds.vcpus:
if new_vcpus < CONF.thresholds.vcpus:
continue
elif new_free_ram_mb > CONF.thresholds.free_ram_mb:
elif new_free_ram_mb < CONF.thresholds.free_ram_mb:
continue
elif new_free_disk_gb > CONF.thresholds.free_disk_gb:
elif new_free_disk_gb < CONF.thresholds.free_disk_gb:
continue
else:
LOG.info("")
LOG.info("")
LOG.info("======================================")
LOG.info("Waking up server: %s" % server.hypervisor_hostname)
LOG.info("new_vcpus: %s" % new_vcpus)
LOG.info("new_free_ram_mb: %s" % new_free_ram_mb)
LOG.info("new_free_disk_gb: %s" % new_free_disk_gb)
LOG.info("======================================")
LOG.info("")
LOG.info("")
self.wake_up_server(server)
def wake_up_server(self, server):
LOG.info("'%s' selected for wake up." %server.hypervisor_hostname)
with open(SERVERS_FILE, 'r') as y:
server_data_dict = yaml.load(y)
for s_name, s_data in server_data_dict.items():
s_data = s_data[0]
if s_data.get("hostname") != server.hypervisor_hostname:
if s_name != server.hypervisor_hostname:
LOG.info("IPMI config for '%s' not found on servers.yml" %server.hypervisor_hostname)
continue
LOG.info("Found IPMI conf for '%s'" %server.hypervisor_hostname)
mgmt_if_addr = s_data.get("mgmt_if_addr")
username = s_data.get("username")
password = s_data.get("password")
cmd = ("ipmitool -I lanplus -H %s -U %s -P %s power on"
% (mgmt_if_addr, username, password))
LOG.info("Powering on '%s'" %server.hypervisor_hostname)
os.system(cmd)
......@@ -144,30 +173,24 @@ class Morpheus(NotificationEndpoint):
)
def info(self, ctxt, publisher_id, event_type, payload, metadata):
LOG.debug("\n\nINFO")
LOG.debug(publisher_id)
LOG.debug(event_type)
LOG.info(publisher_id)
LOG.info(event_type)
LOG.debug(payload)
if not self.cloud_over_thresholds():
hypervisors = self.nova_client.hypervisors.list()
empty_servers = [
hyp for hyp in hypervisors
if hyp.running_vms == 0 and
hyp.state == "up" and
hyp.status == "enabled"
]
LOG.info(self.cloud_over_thresholds())
if self.cloud_over_thresholds():
LOG.info("Start looking for servers to shutdown...")
empty_servers = self.nova_client.hypervisors.findall(
running_vms=0,
state= "up",
status="enabled"
)
for server in empty_servers:
stats = self.nova_client.hypervisors.statistics()
new_vcpus = stats.vcpus - server.vcpus
stats = self.get_corrected_stats()
new_vcpus = stats.vcpus - stats.vcpus_used - server.vcpus
new_free_ram_mb = stats.free_ram_mb - server.free_ram_mb
new_free_disk_gb = stats.free_disk_gb - server.free_disk_gb
# TODO(thiagop): what is disk_available_least?
LOG.debug("\n\n======================================")
LOG.debug("INFO")
LOG.debug("new_vcpus: %s" % new_vcpus)
LOG.debug("new_free_ram_mb: %s" % new_free_ram_mb)
LOG.debug("new_free_disk_gb: %s" % new_free_disk_gb)
LOG.debug("======================================\n\n")
if new_vcpus < CONF.thresholds.vcpus:
continue
elif new_free_ram_mb < CONF.thresholds.free_ram_mb:
......@@ -175,23 +198,37 @@ class Morpheus(NotificationEndpoint):
elif new_free_disk_gb < CONF.thresholds.free_disk_gb:
continue
else:
LOG.info("")
LOG.info("")
LOG.info("======================================")
LOG.info("Shutting down server: %s" % server.hypervisor_hostname)
LOG.info("new_vcpus: %s" % new_vcpus)
LOG.info("new_free_ram_mb: %s" % new_free_ram_mb)
LOG.info("new_free_disk_gb: %s" % new_free_disk_gb)
LOG.info("======================================")
LOG.info("")
LOG.info("")
self.sleep_server(server)
def sleep_server(self, server):
with open(SERVERS_FILE, 'r') as y:
LOG.info("'%s' selected for shutdown." %server.hypervisor_hostname)
server_data_dict = yaml.load(y)
for s_name, s_data in server_data_dict.items():
s_data = s_data[0]
if s_data.get("hostname") != server.hypervisor_hostname:
if s_name != server.hypervisor_hostname:
LOG.info("IPMI config for '%s' not found on servers.yml" %server.hypervisor_hostname)
continue
LOG.info("Found IPMI conf for '%s'" %server.hypervisor_hostname)
mgmt_if_addr = s_data.get("mgmt_if_addr")
username = s_data.get("username")
password = s_data.get("password")
cmd = ("ipmitool -I lanplus -H %s -U %s -P %s power off"
% (mgmt_if_addr, username, password))
LOG.info("Shutting down '%s'" %server.hypervisor_hostname)
os.system(cmd)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment