Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Lucas Cavalcante
autonomic-actuator
Commits
7e9c2645
Commit
7e9c2645
authored
Dec 19, 2017
by
root
Browse files
Fixes and logs to ease troubleshooting
parent
f4ea2447
Changes
1
Hide whitespace changes
Inline
Side-by-side
actuator.py
View file @
7e9c2645
...
...
@@ -34,7 +34,7 @@ CONF.register_opts(THRESH_OPTS, group="thresholds")
SERVERS_FILE
=
"./servers.yml"
# Define a default handler at INFO logging level
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
class
NotificationEndpoint
(
object
):
...
...
@@ -64,20 +64,42 @@ class NotificationEndpoint(object):
LOG
.
error
(
event_type
)
LOG
.
error
(
payload
)
def
get_corrected_stats
(
self
):
stats
=
self
.
nova_client
.
hypervisor_stats
.
statistics
()
down_enabled
=
self
.
nova_client
.
hypervisors
.
findall
(
state
=
'down'
,
status
=
'enabled'
)
# disabled_up = self.nova_client.hypervisors.findall(
# state='up', status='disabled')
# down_disabled = self.nova_client.hypervisors.findall(
# state='down', status='disabled')
# down_or_disabled = set(down_enabled + disabled_up + down_disabled)
# for server in down_or_disabled:
for
server
in
down_enabled
:
stats
.
vcpus
-=
server
.
vcpus
stats
.
free_ram_mb
-=
server
.
free_ram_mb
stats
.
free_disk_gb
-=
server
.
free_disk_gb
LOG
.
info
(
"Corrected stats: %s"
%
str
(
stats
.
__dict__
))
return
stats
def
cloud_over_thresholds
(
self
):
stats
=
self
.
nova_client
.
hypervisors
.
statistic
s
()
available_vcpu
=
stats
.
vcpus
-
stats
.
vcpus_used
if
available_vcpu
<
CONF
.
thresholds
.
vcpus
:
LOG
.
info
(
"Cloud
ov
er the VCPU threshold"
)
stats
=
self
.
get_corrected_stat
s
()
available_vcpu
s
=
stats
.
vcpus
-
stats
.
vcpus_used
if
available_vcpu
s
<
CONF
.
thresholds
.
vcpus
:
LOG
.
info
(
"Cloud
und
er the VCPU threshold"
)
return
False
if
stats
.
free_ram_mb
<
CONF
.
thresholds
.
free_ram_mb
:
LOG
.
info
(
"Cloud
ov
er the RAM threshold"
)
el
if
stats
.
free_ram_mb
<
CONF
.
thresholds
.
free_ram_mb
:
LOG
.
info
(
"Cloud
und
er the RAM threshold"
)
return
False
if
stats
.
free_disk_gb
<
CONF
.
thresholds
.
free_disk_gb
:
LOG
.
info
(
"Cloud
ov
er the Disk threshold"
)
el
if
stats
.
free_disk_gb
<
CONF
.
thresholds
.
free_disk_gb
:
LOG
.
info
(
"Cloud
und
er the Disk threshold"
)
return
False
else
:
True
LOG
.
info
(
"Cloud over thresholds"
)
return
True
class
Waker
(
NotificationEndpoint
):
...
...
@@ -87,53 +109,60 @@ class Waker(NotificationEndpoint):
)
def
info
(
self
,
ctxt
,
publisher_id
,
event_type
,
payload
,
metadata
):
LOG
.
debug
(
"
\n\n
INFO"
)
LOG
.
debug
(
publisher_id
)
LOG
.
debug
(
event_type
)
LOG
.
info
(
publisher_id
)
LOG
.
info
(
event_type
)
LOG
.
debug
(
payload
)
if
self
.
cloud_over_thresholds
():
hypervisors
=
self
.
nova_client
.
hypervisors
.
list
(
)
slept_servers
=
[
hyp
for
hyp
in
hypervisors
if
hyp
.
running_vms
==
0
and
hyp
.
state
==
"down"
and
hyp
.
status
==
"enabled"
]
if
not
self
.
cloud_over_thresholds
():
LOG
.
info
(
"Starting wake up procedure..."
)
slept_servers
=
self
.
nova_client
.
hypervisors
.
findall
(
state
=
"down"
,
status
=
"enabled"
)
for
server
in
slept_servers
:
stats
=
self
.
nova_client
.
hypervisors
.
statistics
()
new_vcpus
=
stats
.
vcpus
-
server
.
vcpus
new_free_ram_mb
=
stats
.
free_ram_mb
-
server
.
free_ram_mb
new_free_disk_gb
=
stats
.
free_disk_gb
-
server
.
free_disk_gb
LOG
.
debug
(
"
\n\n
======================================"
)
LOG
.
debug
(
"INFO"
)
LOG
.
debug
(
"new_vcpus: %s"
%
new_vcpus
)
LOG
.
debug
(
"new_free_ram_mb: %s"
%
new_free_ram_mb
)
LOG
.
debug
(
"new_free_disk_gb: %s"
%
new_free_disk_gb
)
LOG
.
debug
(
"======================================
\n\n
"
)
stats
=
self
.
get_corrected_stats
()
new_vcpus
=
stats
.
vcpus
-
stats
.
vcpus_used
+
server
.
vcpus
new_free_ram_mb
=
stats
.
free_ram_mb
+
server
.
free_ram_mb
new_free_disk_gb
=
stats
.
free_disk_gb
+
server
.
free_disk_gb
# TODO(thiagop): what is disk_available_least?
if
new_vcpus
>
CONF
.
thresholds
.
vcpus
:
if
new_vcpus
<
CONF
.
thresholds
.
vcpus
:
continue
elif
new_free_ram_mb
>
CONF
.
thresholds
.
free_ram_mb
:
elif
new_free_ram_mb
<
CONF
.
thresholds
.
free_ram_mb
:
continue
elif
new_free_disk_gb
>
CONF
.
thresholds
.
free_disk_gb
:
elif
new_free_disk_gb
<
CONF
.
thresholds
.
free_disk_gb
:
continue
else
:
LOG
.
info
(
""
)
LOG
.
info
(
""
)
LOG
.
info
(
"======================================"
)
LOG
.
info
(
"Waking up server: %s"
%
server
.
hypervisor_hostname
)
LOG
.
info
(
"new_vcpus: %s"
%
new_vcpus
)
LOG
.
info
(
"new_free_ram_mb: %s"
%
new_free_ram_mb
)
LOG
.
info
(
"new_free_disk_gb: %s"
%
new_free_disk_gb
)
LOG
.
info
(
"======================================"
)
LOG
.
info
(
""
)
LOG
.
info
(
""
)
self
.
wake_up_server
(
server
)
def
wake_up_server
(
self
,
server
):
LOG
.
info
(
"'%s' selected for wake up."
%
server
.
hypervisor_hostname
)
with
open
(
SERVERS_FILE
,
'r'
)
as
y
:
server_data_dict
=
yaml
.
load
(
y
)
for
s_name
,
s_data
in
server_data_dict
.
items
():
s_data
=
s_data
[
0
]
if
s_data
.
get
(
"hostname"
)
!=
server
.
hypervisor_hostname
:
if
s_name
!=
server
.
hypervisor_hostname
:
LOG
.
info
(
"IPMI config for '%s' not found on servers.yml"
%
server
.
hypervisor_hostname
)
continue
LOG
.
info
(
"Found IPMI conf for '%s'"
%
server
.
hypervisor_hostname
)
mgmt_if_addr
=
s_data
.
get
(
"mgmt_if_addr"
)
username
=
s_data
.
get
(
"username"
)
password
=
s_data
.
get
(
"password"
)
cmd
=
(
"ipmitool -I lanplus -H %s -U %s -P %s power on"
%
(
mgmt_if_addr
,
username
,
password
))
LOG
.
info
(
"Powering on '%s'"
%
server
.
hypervisor_hostname
)
os
.
system
(
cmd
)
...
...
@@ -144,30 +173,24 @@ class Morpheus(NotificationEndpoint):
)
def
info
(
self
,
ctxt
,
publisher_id
,
event_type
,
payload
,
metadata
):
LOG
.
debug
(
"
\n\n
INFO"
)
LOG
.
debug
(
publisher_id
)
LOG
.
debug
(
event_type
)
LOG
.
info
(
publisher_id
)
LOG
.
info
(
event_type
)
LOG
.
debug
(
payload
)
if
not
self
.
cloud_over_thresholds
():
hypervisors
=
self
.
nova_client
.
hypervisors
.
list
()
empty_servers
=
[
hyp
for
hyp
in
hypervisors
if
hyp
.
running_vms
==
0
and
hyp
.
state
==
"up"
and
hyp
.
status
==
"enabled"
]
LOG
.
info
(
self
.
cloud_over_thresholds
())
if
self
.
cloud_over_thresholds
():
LOG
.
info
(
"Start looking for servers to shutdown..."
)
empty_servers
=
self
.
nova_client
.
hypervisors
.
findall
(
running_vms
=
0
,
state
=
"up"
,
status
=
"enabled"
)
for
server
in
empty_servers
:
stats
=
self
.
nova_client
.
hypervisors
.
statistic
s
()
new_vcpus
=
stats
.
vcpus
-
server
.
vcpus
stats
=
self
.
get_corrected_stat
s
()
new_vcpus
=
stats
.
vcpus
-
stats
.
vcpus_used
-
server
.
vcpus
new_free_ram_mb
=
stats
.
free_ram_mb
-
server
.
free_ram_mb
new_free_disk_gb
=
stats
.
free_disk_gb
-
server
.
free_disk_gb
# TODO(thiagop): what is disk_available_least?
LOG
.
debug
(
"
\n\n
======================================"
)
LOG
.
debug
(
"INFO"
)
LOG
.
debug
(
"new_vcpus: %s"
%
new_vcpus
)
LOG
.
debug
(
"new_free_ram_mb: %s"
%
new_free_ram_mb
)
LOG
.
debug
(
"new_free_disk_gb: %s"
%
new_free_disk_gb
)
LOG
.
debug
(
"======================================
\n\n
"
)
if
new_vcpus
<
CONF
.
thresholds
.
vcpus
:
continue
elif
new_free_ram_mb
<
CONF
.
thresholds
.
free_ram_mb
:
...
...
@@ -175,23 +198,37 @@ class Morpheus(NotificationEndpoint):
elif
new_free_disk_gb
<
CONF
.
thresholds
.
free_disk_gb
:
continue
else
:
LOG
.
info
(
""
)
LOG
.
info
(
""
)
LOG
.
info
(
"======================================"
)
LOG
.
info
(
"Shutting down server: %s"
%
server
.
hypervisor_hostname
)
LOG
.
info
(
"new_vcpus: %s"
%
new_vcpus
)
LOG
.
info
(
"new_free_ram_mb: %s"
%
new_free_ram_mb
)
LOG
.
info
(
"new_free_disk_gb: %s"
%
new_free_disk_gb
)
LOG
.
info
(
"======================================"
)
LOG
.
info
(
""
)
LOG
.
info
(
""
)
self
.
sleep_server
(
server
)
def
sleep_server
(
self
,
server
):
with
open
(
SERVERS_FILE
,
'r'
)
as
y
:
LOG
.
info
(
"'%s' selected for shutdown."
%
server
.
hypervisor_hostname
)
server_data_dict
=
yaml
.
load
(
y
)
for
s_name
,
s_data
in
server_data_dict
.
items
():
s_data
=
s_data
[
0
]
if
s_data
.
get
(
"hostname"
)
!=
server
.
hypervisor_hostname
:
if
s_name
!=
server
.
hypervisor_hostname
:
LOG
.
info
(
"IPMI config for '%s' not found on servers.yml"
%
server
.
hypervisor_hostname
)
continue
LOG
.
info
(
"Found IPMI conf for '%s'"
%
server
.
hypervisor_hostname
)
mgmt_if_addr
=
s_data
.
get
(
"mgmt_if_addr"
)
username
=
s_data
.
get
(
"username"
)
password
=
s_data
.
get
(
"password"
)
cmd
=
(
"ipmitool -I lanplus -H %s -U %s -P %s power off"
%
(
mgmt_if_addr
,
username
,
password
))
LOG
.
info
(
"Shutting down '%s'"
%
server
.
hypervisor_hostname
)
os
.
system
(
cmd
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment