Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Managerless logging #2253

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions .zuul.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,9 @@
check:
jobs:
- ansible-lint
- flake8
- python-black
- yamllint
label:
jobs:
- testbed-deploy
- testbed-deploy-stable
- testbed-upgrade
- testbed-upgrade-stable
- testbed-deploy-managerless
gate:
jobs:
- ansible-lint
Expand Down
144 changes: 144 additions & 0 deletions playbooks/managerless/files/get_logs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/bin/bash

set +o errexit

function copy_logs {
LOG_DIR=${LOG_DIR:-/tmp/logs}

VOLUMES_DIR="/var/lib/docker/volumes"
LOGS_TAIL_PARAMETER="all"

cp -rnL ${VOLUMES_DIR}/kolla_logs/_data/* ${LOG_DIR}/kolla/
cp -rnL /etc/kolla/* ${LOG_DIR}/kolla_configs/
# Don't save the IPA images.
rm ${LOG_DIR}/kolla_configs/config/ironic/ironic-agent.{kernel,initramfs}
mkdir ${LOG_DIR}/system_configs/
cp -rL /etc/{hostname,hosts,host.conf,resolv.conf,nsswitch.conf,systemd} ${LOG_DIR}/system_configs/
# copy docker configs if used
cp -rL /etc/docker/ ${LOG_DIR}/system_configs/
# Remove /var/log/kolla link to not double the data uploaded
unlink /var/log/kolla
cp -rvnL /var/log/* ${LOG_DIR}/system_logs/


journalctl --no-pager > ${LOG_DIR}/system_logs/syslog.txt
journalctl --no-pager -u docker.service > ${LOG_DIR}/system_logs/docker.log
journalctl --no-pager -u containerd.service > ${LOG_DIR}/system_logs/containerd.log

cp -r /etc/sudoers.d ${LOG_DIR}/system_logs/
cp /etc/sudoers ${LOG_DIR}/system_logs/sudoers.txt

df -h > ${LOG_DIR}/system_logs/df.txt
free > ${LOG_DIR}/system_logs/free.txt
lsblk > ${LOG_DIR}/system_logs/lsblk.txt
mount > ${LOG_DIR}/system_logs/mount.txt
env > ${LOG_DIR}/system_logs/env.txt
systemctl status > ${LOG_DIR}/system_logs/systemctl_status.txt
systemctl list-units --all > ${LOG_DIR}/system_logs/systemctl_units.txt
systemctl list-unit-files > ${LOG_DIR}/system_logs/systemctl_unit_files.txt

(set -x
ip a
ip m
ip l
ip r
ip -6 r
ip neigh
ping -c 4 $(hostname)
ping6 -c 4 $(hostname))

(set -x
iptables -t raw -v -n -L
iptables -t mangle -v -n -L
iptables -t nat -v -n -L
iptables -t filter -v -n -L) &> ${LOG_DIR}/system_logs/iptables.txt

(set -x
ip6tables -t raw -v -n -L
ip6tables -t mangle -v -n -L
ip6tables -t nat -v -n -L
ip6tables -t filter -v -n -L) &> ${LOG_DIR}/system_logs/ip6tables.txt

ss -nep > ${LOG_DIR}/system_logs/ss.txt

ss -nep -l > ${LOG_DIR}/system_logs/ss_l.txt

(set -x
getent ahostsv4 $(hostname)
getent ahostsv6 $(hostname)) &> ${LOG_DIR}/system_logs/getent_ahostsvX.txt

sysctl -a &> ${LOG_DIR}/system_logs/sysctl.txt
lsmod &> ${LOG_DIR}/system_logs/lsmod.txt

if [ `command -v dpkg` ]; then
dpkg -l > ${LOG_DIR}/system_logs/dpkg-l.txt
fi
if [ `command -v rpm` ]; then
rpm -qa > ${LOG_DIR}/system_logs/rpm-qa.txt
fi

# final memory usage and process list
ps -eo user,pid,ppid,lwp,%cpu,%mem,size,rss,cmd > ${LOG_DIR}/system_logs/ps.txt

# container engine related information
(docker info &&
docker images &&
docker ps -a &&
docker network ls &&
docker inspect $(docker ps -aq)) > ${LOG_DIR}/system_logs/docker-info.txt

# save dbus services
dbus-send --system --print-reply --dest=org.freedesktop.DBus /org/freedesktop/DBus org.freedesktop.DBus.ListNames > ${LOG_DIR}/system_logs/dbus-services.txt

# cephadm related logs
if [ `command -v cephadm` ]; then
mkdir -p ${LOG_DIR}/ceph
sudo cp /etc/ceph/ceph.conf ${LOG_DIR}/ceph
sudo cp /var/run/ceph/*/cluster.yml ${LOG_DIR}/ceph/cluster.yml
sudo cp /var/log/ceph/cephadm.log* ${LOG_DIR}/ceph/
sudo cephadm shell -- ceph --connect-timeout 5 -s > ${LOG_DIR}/ceph/ceph_s.txt
sudo cephadm shell -- ceph --connect-timeout 5 osd tree > ${LOG_DIR}/ceph/ceph_osd_tree.txt
fi

# bifrost related logs
if [[ $(docker ps --filter name=bifrost_deploy --format "{{.Names}}") ]]; then
for service in dnsmasq ironic ironic-api ironic-conductor ironic-inspector mariadb nginx; do
mkdir -p ${LOG_DIR}/kolla/$service
docker exec bifrost_deploy systemctl status $service > ${LOG_DIR}/kolla/$service/systemd-status-$service.txt
done
docker exec bifrost_deploy journalctl -u mariadb > ${LOG_DIR}/kolla/mariadb/mariadb.txt
fi

# haproxy related logs
if [[ $(docker ps --filter name=haproxy --format "{{.Names}}") ]]; then
mkdir -p ${LOG_DIR}/kolla/haproxy
docker exec haproxy bash -c 'echo show stat | socat stdio /var/lib/kolla/haproxy/haproxy.sock' > ${LOG_DIR}/kolla/haproxy/stats.txt
fi

# FIXME: remove
if [[ $(docker ps -a --filter name=ironic_inspector --format "{{.Names}}") ]]; then
mkdir -p ${LOG_DIR}/kolla/ironic-inspector
ls -lR ${VOLUMES_DIR}/ironic_inspector_dhcp_hosts > ${LOG_DIR}/kolla/ironic-inspector/var-lib-ls.txt
fi

for container in $(docker ps -a --format "{{.Names}}"); do
docker logs --timestamps --tail=${LOGS_TAIL_PARAMETER} ${container} &> ${LOG_DIR}/container_logs/${container}.txt
done

# Rename files to .txt; this is so that when displayed via
# logs.openstack.org clicking results in the browser shows the
# files, rather than trying to send it to another app or make you
# download it, etc.

# Rename all .log files to .txt files
for f in $(find ${LOG_DIR}/{system_logs,kolla,docker_logs} -name "*.log"); do
mv $f ${f/.log/.txt}
done

chmod -R 777 ${LOG_DIR}

du -sm ${LOG_DIR}

}

copy_logs
2 changes: 2 additions & 0 deletions playbooks/managerless/post.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

vars:
stage_dir: "{{ ansible_user_dir }}/zuul-output"
zuul_copy_output:
/tmp/logs/.*/.*: logs_txt

roles:
- stage-output
34 changes: 34 additions & 0 deletions playbooks/managerless/pre.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
loop:
- osism.commons
- osism.services
- "community.docker>=3.10.2"
changed_when: true

# we run local synchronisation here
Expand Down Expand Up @@ -116,6 +117,33 @@
cmd: "sed -i 's/cloud_env: .*/cloud_env: {{ cloud_env }}/g' /tmp/managerless-part-4.yml"
changed_when: true

- name: Ensure /tmp/logs/ dir
ansible.builtin.file:
path: "/tmp/logs"
state: "directory"

- name: Ensure node directories
ansible.builtin.file:
path: "/tmp/logs/{{ item }}"
state: "directory"
mode: 0777
with_items:
- "ansible"
- "pre"
- "terraform"

- name: Run diagnostics script
environment:
LOG_DIR: "/tmp/logs/pre"
ansible.builtin.script: "get_logs.sh"
register: get_logs_result
become: true
failed_when: false

- name: Print get_logs output
ansible.builtin.debug:
msg: "{{ get_logs_result.stdout }}"

- name: Create infrastructure
hosts: all

Expand Down Expand Up @@ -220,6 +248,11 @@
ansible.builtin.command: |
ansible-playbook -i localhost, /tmp/managerless-part-1.yml
changed_when: true
register: managerless_part1

- name: Print managerless part 1 output
ansible.builtin.debug:
msg: "{{ managerless_part1.stdout }}"

- name: Install ansible collections
become: true
Expand All @@ -229,6 +262,7 @@
loop:
- osism.commons
- osism.services
- "community.docker>=3.10.2"
changed_when: true

- name: Create configuration directory
Expand Down