Skip to content

Commit

Permalink
zuul: managerless: add better logging
Browse files Browse the repository at this point in the history
Signed-off-by: Dr. Jens Harbott <[email protected]>
  • Loading branch information
osfrickler committed Jun 14, 2024
1 parent 16a35ad commit 2b2e88e
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 5 deletions.
8 changes: 4 additions & 4 deletions playbooks/managerless/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
-e @/opt/configuration/environments/images.yml \
-e @/opt/configuration/environments/configuration.yml \
-i localhost, \
/tmp/managerless-part-2.yml
/tmp/managerless-part-2.yml &> /tmp/logs/managerless/part-2.log
changed_when: true

- name: Run managerless part 3
Expand All @@ -37,7 +37,7 @@
-e @/opt/configuration/environments/secrets.yml \
-i testbed-manager.testbed.osism.xyz, \
--vault-password-file /opt/configuration/environments/.vault_pass \
/tmp/managerless-part-3.yml
/tmp/managerless-part-3.yml &> /tmp/logs/managerless/part-3.log
changed_when: true

- name: Run managerless part 4
Expand All @@ -55,7 +55,7 @@
-e @/opt/configuration/environments/secrets.yml \
-i testbed-manager.testbed.osism.xyz, \
--vault-password-file /opt/configuration/environments/.vault_pass \
/tmp/managerless-part-4.yml
/tmp/managerless-part-4.yml &> /tmp/logs/managerless/part-4.log
changed_when: true

- name: Fetch node_0_host address
Expand Down Expand Up @@ -149,5 +149,5 @@
-e @/opt/configuration/environments/secrets.yml \
-i testbed-manager.testbed.osism.xyz, \
--vault-password-file /opt/configuration/environments/.vault_pass \
/tmp/managerless-part-5.yml
/tmp/managerless-part-5.yml &> /tmp/logs/managerless/part-5.log
changed_when: true
2 changes: 2 additions & 0 deletions playbooks/managerless/post.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

vars:
stage_dir: "{{ ansible_user_dir }}/zuul-output"
zuul_copy_output:
/tmp/logs/.*/.*: logs_txt

roles:
- stage-output
29 changes: 28 additions & 1 deletion playbooks/managerless/pre.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,33 @@
cmd: "sed -i 's/cloud_env: .*/cloud_env: {{ cloud_env }}/g' /tmp/managerless-part-4.yml"
changed_when: true

- name: Ensure /tmp/logs/ dir
file:
path: "/tmp/logs"
state: "directory"

- name: Ensure node directories
file:
path: "/tmp/logs/{{ item }}"
state: "directory"
mode: 0777
with_items:
- "ansible"
- "pre"
- "terraform"

- name: Run diagnostics script
environment:
LOG_DIR: "/tmp/logs/pre"
script: "{{ zuul.project.src_dir }}/scripts/get_logs.sh"
register: get_logs_result
become: true
failed_when: false

- name: Print get_logs output
debug:
msg: "{{ get_logs_result.stdout }}"

- name: Create infrastructure
hosts: all

Expand Down Expand Up @@ -218,7 +245,7 @@
tasks:
- name: Run managerless part 1
ansible.builtin.command: |
ansible-playbook -i localhost, /tmp/managerless-part-1.yml
ansible-playbook -i localhost, /tmp/managerless-part-1.yml &> /tmp/logs/managerless/part-1.log
changed_when: true

- name: Install ansible collections
Expand Down
143 changes: 143 additions & 0 deletions scripts/get_logs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/bin/bash

set +o errexit

copy_logs() {
LOG_DIR=${LOG_DIR:-/tmp/logs}

VOLUMES_DIR="/var/lib/docker/volumes"
LOGS_TAIL_PARAMETER="all"

cp -rnL ${VOLUMES_DIR}/kolla_logs/_data/* ${LOG_DIR}/kolla/
cp -rnL /etc/kolla/* ${LOG_DIR}/kolla_configs/
# Don't save the IPA images.
rm ${LOG_DIR}/kolla_configs/config/ironic/ironic-agent.{kernel,initramfs}
mkdir ${LOG_DIR}/system_configs/
cp -rL /etc/{hostname,hosts,host.conf,resolv.conf,nsswitch.conf,systemd} ${LOG_DIR}/system_configs/
# copy docker configs if used
cp -rL /etc/docker/ ${LOG_DIR}/system_configs/
# Remove /var/log/kolla link to not double the data uploaded
unlink /var/log/kolla
cp -rvnL /var/log/* ${LOG_DIR}/system_logs/


journalctl --no-pager > ${LOG_DIR}/system_logs/syslog.txt
journalctl --no-pager -u docker.service > ${LOG_DIR}/system_logs/docker.log
journalctl --no-pager -u containerd.service > ${LOG_DIR}/system_logs/containerd.log

cp -r /etc/sudoers.d ${LOG_DIR}/system_logs/
cp /etc/sudoers ${LOG_DIR}/system_logs/sudoers.txt

df -h > ${LOG_DIR}/system_logs/df.txt
free > ${LOG_DIR}/system_logs/free.txt
lsblk > ${LOG_DIR}/system_logs/lsblk.txt
mount > ${LOG_DIR}/system_logs/mount.txt
env > ${LOG_DIR}/system_logs/env.txt
systemctl status > ${LOG_DIR}/system_logs/systemctl_status.txt
systemctl list-units --all > ${LOG_DIR}/system_logs/systemctl_units.txt
systemctl list-unit-files > ${LOG_DIR}/system_logs/systemctl_unit_files.txt

(set -x
ip a
ip m
ip l
ip r
ip -6 r
ip neigh
ping -c 4 $(hostname)
ping6 -c 4 $(hostname)

(set -x
iptables -t raw -v -n -L
iptables -t mangle -v -n -L
iptables -t nat -v -n -L
iptables -t filter -v -n -L) &> ${LOG_DIR}/system_logs/iptables.txt

(set -x
ip6tables -t raw -v -n -L
ip6tables -t mangle -v -n -L
ip6tables -t nat -v -n -L
ip6tables -t filter -v -n -L) &> ${LOG_DIR}/system_logs/ip6tables.txt

ss -nep > ${LOG_DIR}/system_logs/ss.txt

ss -nep -l > ${LOG_DIR}/system_logs/ss_l.txt

(set -x
getent ahostsv4 $(hostname)
getent ahostsv6 $(hostname)) &> ${LOG_DIR}/system_logs/getent_ahostsvX.txt

sysctl -a &> ${LOG_DIR}/system_logs/sysctl.txt
lsmod &> ${LOG_DIR}/system_logs/lsmod.txt

if [ `command -v dpkg` ]; then
dpkg -l > ${LOG_DIR}/system_logs/dpkg-l.txt
fi
if [ `command -v rpm` ]; then
rpm -qa > ${LOG_DIR}/system_logs/rpm-qa.txt
fi

# final memory usage and process list
ps -eo user,pid,ppid,lwp,%cpu,%mem,size,rss,cmd > ${LOG_DIR}/system_logs/ps.txt

# container engine related information
(docker info &&
docker images &&
docker ps -a &&
docker network ls &&
docker inspect $(docker ps -aq)) > ${LOG_DIR}/system_logs/docker-info.txt

# save dbus services
dbus-send --system --print-reply --dest=org.freedesktop.DBus /org/freedesktop/DBus org.freedesktop.DBus.ListNames > ${LOG_DIR}/system_logs/dbus-services.txt

# cephadm related logs
if [ `command -v cephadm` ]; then
mkdir -p ${LOG_DIR}/ceph
sudo cp /etc/ceph/ceph.conf ${LOG_DIR}/ceph
sudo cp /var/run/ceph/*/cluster.yml ${LOG_DIR}/ceph/cluster.yml
sudo cp /var/log/ceph/cephadm.log* ${LOG_DIR}/ceph/
sudo cephadm shell -- ceph --connect-timeout 5 -s > ${LOG_DIR}/ceph/ceph_s.txt
sudo cephadm shell -- ceph --connect-timeout 5 osd tree > ${LOG_DIR}/ceph/ceph_osd_tree.txt
fi

# bifrost related logs
if [[ $(docker ps --filter name=bifrost_deploy --format "{{.Names}}") ]]; then
for service in dnsmasq ironic ironic-api ironic-conductor ironic-inspector mariadb nginx; do
mkdir -p ${LOG_DIR}/kolla/$service
docker exec bifrost_deploy systemctl status $service > ${LOG_DIR}/kolla/$service/systemd-status-$service.txt
done
docker exec bifrost_deploy journalctl -u mariadb > ${LOG_DIR}/kolla/mariadb/mariadb.txt
fi

# haproxy related logs
if [[ $(docker ps --filter name=haproxy --format "{{.Names}}") ]]; then
mkdir -p ${LOG_DIR}/kolla/haproxy
docker exec haproxy bash -c 'echo show stat | socat stdio /var/lib/kolla/haproxy/haproxy.sock' > ${LOG_DIR}/kolla/haproxy/stats.txt
fi

# FIXME: remove
if [[ $(docker ps -a --filter name=ironic_inspector --format "{{.Names}}") ]]; then
mkdir -p ${LOG_DIR}/kolla/ironic-inspector
ls -lR ${VOLUMES_DIR}/ironic_inspector_dhcp_hosts > ${LOG_DIR}/kolla/ironic-inspector/var-lib-ls.txt
fi

for container in $(docker ps -a --format "{{.Names}}"); do
docker logs --timestamps --tail=${LOGS_TAIL_PARAMETER} ${container} &> ${LOG_DIR}/container_logs/${container}.txt
done

# Rename files to .txt; this is so that when displayed via
# logs.openstack.org clicking results in the browser shows the
# files, rather than trying to send it to another app or make you
# download it, etc.

# Rename all .log files to .txt files
for f in $(find ${LOG_DIR}/{system_logs,kolla,docker_logs} -name "*.log"); do
mv $f ${f/.log/.txt}
done

chmod -R 777 ${LOG_DIR}

du -sm ${LOG_DIR}
}

copy_logs

0 comments on commit 2b2e88e

Please sign in to comment.