3.1. Relevant Information for Bug ReportingΒΆ
In case you cannot investigate and resolve the issue by yourself using this document, make sure you open a ticket on your 6WIND Customer Zone with the relevant troubleshooting information.
This information can be generated and exported using the
troubleshooting-report.sh
script.
Note
If you are in an OpenStack environment, call the script providing as arguments:
--controller
inside a controller node--compute
inside a compute node--network
inside a network node
Also make sure credentials for access to nova are exported in the environment
(i.e.: OS_USERNAME
, OS_PASSWORD
, OS_PROJECT_DOMAIN_ID
, etc.).
If possible, reproduce your issue with debug info enabled. Enable debug for the shortest time possible, as it produces a flabbergasting amount of log.
To enable debug, look at the OpenStack logs section.
We recommend installing the sos
package before calling the following script,
to leverage the distribution bug reporting mechanism (sosreport
). Once
installed, troubleshooting-report.sh
will detect it and run it.
troubleshooting-report.sh
is provided here for information:
#!/bin/sh
#
# Copyright 2016 6WIND S.A.
cleanup()
{
[ -d "$TMPDIR" ] && rm -rf $TMPDIR
}
usage_option()
{
printf "\t%s" "$1"
[ -n "$3" ] && printf " %s" "$3"
printf "\t\t%s\n" "$2"
}
usage()
{
printf "%s\n\n" "$0 [-h|--help] [-c|--compute] [-C|--controller] [-N|--network] [-f|--file <file path>] [-o|--no-core-file] [-O|--clean-core-files]"
usage_option -h "display this help"
usage_option -c "Use this for OpenStack compute nodes"
usage_option -C "Use this for OpenStack controller nodes"
usage_option -N "Use this for OpenStack network nodes"
usage_option -f "Output file path"
usage_option -o "Do not archive core files"
usage_option -O "Cleanup core files after generating the report"
}
parse_args()
{
# Turn long options into short ones
for arg in "$@"; do
shift
case "$arg" in
"--help") set -- "$@" "-h" ;;
"--controller") set -- "$@" "-C" ;;
"--compute") set -- "$@" "-c" ;;
"--network") set -- "$@" "-N" ;;
"--file") set -- "$@" "-f" ;;
"--no-core-file")set -- "$@" "-o" ;;
"--clean-core-files")set -- "$@" "-O" ;;
*) set -- "$@" "$arg"
esac
done
OPTIND=1
while getopts hcCNf:oO name
do
case "$name" in
c) COMPUTE="yes" && OPENSTACK="yes" && \
SUFFIX="${SUFFIX}_compute" ;;
C) CONTROLLER="yes" && OPENSTACK="yes" && \
SUFFIX="${SUFFIX}_controller" ;;
N) NETWORK="yes" && OPENSTACK="yes" && \
SUFFIX="${SUFFIX}_network" ;;
f) ARCHIVE=${OPTARG} ;;
o) NO_CORE_FILE="yes" ;;
O) CLEAN_CORE_FILES="yes" ;;
h) usage && exit 0 ;;
*) usage && exit 1 ;;
esac
done
}
# Check whether the given command is executable, and if the timeout utility is
# available, run the command using a $TIMEOUT_S seconds timeout.
exec_cmd()
{
local netns_cmd="$(echo $@ | sed -n 's/^ip netns exec [^ ]\+ \([^ ]\+\).*$/\1/p')"
# $cmd contains the command to check for without arguments,
# stripped from any leading `ip netns exec XXX`.
cmd=${netns_cmd:-$1}
[ -x "$(command -v $cmd)" ] && $TIMEOUT $@
}
get_linux_info()
{
local ns= name=$1
if [ -n "$name" ]; then
ns="ip netns exec $name"
else
name=vrf0
fi
# ethtool on all known links
for iface in $($ns ip link show | sed -n 's/^[^:]\+: \([^:]\+\):.*$/\1/p'); do
$ns ethtool -S $iface > $BUGDIR/$name-ethtool_S_$iface.txt 2>&1
$ns ethtool -l $iface > $BUGDIR/$name-ethtool_l_$iface.txt 2>&1
$ns ethtool -k $iface > $BUGDIR/$name-ethtool_k_$iface.txt 2>&1
$ns ethtool -a $iface > $BUGDIR/$name-ethtool_a_$iface.txt 2>&1
$ns ethtool -s $iface > $BUGDIR/$name-ethtool_s_$iface.txt 2>&1
done
# interfaces, addresses, routes, neighbours and IPsec
$ns ip -detail -statistics link > $BUGDIR/$name-ip_link.txt
$ns ip -detail address > $BUGDIR/$name-ip_address.txt
$ns ip -0 -detail route > $BUGDIR/$name-ip_route.txt
$ns ip neigh > $BUGDIR/$name-ip_neigh.txt
$ns ip -6 neigh > $BUGDIR/$name-ip_neigh6.txt
$ns ip -statistics xfrm policy > $BUGDIR/$name-ip_xfrm_policy.txt
$ns ip -statistics xfrm state > $BUGDIR/$name-ip_xfrm_state.txt
# active network connections
$ns netstat -anp > $BUGDIR/$name-nestat_anp.txt
# Netfilter
exec_cmd $ns iptables-save > $BUGDIR/$name-iptables_save.txt
exec_cmd $ns ip6tables-save > $BUGDIR/$name-ip6tables_save.txt
exec_cmd $ns ebtables-save > $BUGDIR/$name-ebtables_save.txt
# Check bridge info
exec_cmd $ns brctl show > $BUGDIR/$name-brctl_show.txt
exec_cmd $ns ovs-vsctl show > $BUGDIR/$name-ovs_vsctl_show.txt 2>&1
}
get_system_info()
{
# system topology
exec_cmd lstopo --output-format xml > $BUGDIR/lstopo.xml
# processors hierarchy
cp /proc/cpuinfo $BUGDIR/cpuinfo.txt
exec_cmd lscpu > $BUGDIR/lscpu.txt
# interrupts
cp /proc/interrupts $BUGDIR/interrupts.txt
# memory record
cp /proc/meminfo $BUGDIR/meminfo.txt
exec_cmd vmstat -ws > $BUGDIR/vmstat_ws.txt
exec_cmd numastat -zm > $BUGDIR/numastat_zm.txt
exec_cmd numastat -zs > $BUGDIR/numastat_zs.txt
# PCI peripherals
exec_cmd lspci -vvv > $BUGDIR/lspci.txt
# DMI/SMBIOS
exec_cmd dmidecode > $BUGDIR/dmidecode.txt
# kernel version, logs, cmdline and loaded modules
uname -a > $BUGDIR/uname.txt
dmesg > $BUGDIR/dmesg.txt
cp /proc/cmdline $BUGDIR/cmdline.txt
lsmod > $BUGDIR/lsmod.txt
# distribution
exec_cmd lsb_release -a > $BUGDIR/lsb_release.txt 2>&1
# services list
exec_cmd service --status-all > $BUGDIR/service_status_all.txt 2>&1
exec_cmd systemctl list-units > $BUGDIR/systemctl_list_units.txt 2>&1
# logs
exec_cmd journalctl --no-pager --this-boot > $BUGDIR/journal.txt
[ -f "/var/log/syslog" ] && cp /var/log/syslog $BUGDIR/syslog.txt
[ -d "/var/log/libvirt" ] && cp -r /var/log/libvirt $BUGDIR/libvirt
# processes list
ps auxww > $BUGDIR/ps_auxww.txt
# cpuset
[ -d "/dev/cpuset" ] && cp -r /dev/cpuset $BUGDIR/cpuset 2>/dev/null
# /dev
ls -al /dev > $BUGDIR/ls_al_dev.txt
# IRQ affinity
find /proc/irq -maxdepth 1 -mindepth 1 -print -type d -exec \
cat '{}/smp_affinity' \; > $BUGDIR/proc_irq_smp_affinity.txt
# mounted partitions
exec_cmd mount > $BUGDIR/mount.txt
# VNB
exec_cmd ngctl list > $BUGDIR/ngctl_list.txt 2>&1
}
get_coredump()
{
if [ "$NO_CORE_FILE" != 'yes' ] && ls /var/crash/*.core* >/dev/null 2>&1
then
mkdir $BUGDIR/coredump
cp /var/crash/*.core* $BUGDIR/coredump/
if [ "$CLEAN_CORE_FILES" = 'yes' ]; then
rm -f /var/crash/*.core*
fi
fi
}
get_fp_info()
{
if [ -x "$(command -v fast-path.sh)" ]; then
# Record your fp configuration:
exec_cmd fp-conf-tool -DSFv > $BUGDIR/fp_config.txt \
2>$BUGDIR/fp_config.log
# Record what 6windgate version is used
exec_cmd dpkg -s 6windgate-fp > $BUGDIR/fp_version.txt 2>/dev/null
exec_cmd yum info 6windgate-fp > $BUGDIR/fp_version.txt 2>/dev/null
# in buildroot, /etc/issue contains 6WG version
[ -f "/etc/issue" ] && cp /etc/issue $BUGDIR/etc_issue.txt
# Copy fp logs
[ -f "/var/log/fast-path.log" ] && \
cp /var/log/fast-path.log $BUGDIR/fast-path.log
[ -f "/var/log/messages" ] && \
cp /var/log/messages $BUGDIR/messages.log
exec_cmd linux-fp-sync.sh status > $BUGDIR/linux_fp_sync_status.txt
fast-path.sh status > $BUGDIR/fast_path_status.txt
if grep -q "fp-.*not running" "$BUGDIR/fast_path_status.txt"; then
FP_RUNNING="false"
else
FP_RUNNING="true"
fi
fi
# Copy fp info
if [ "$FP_RUNNING" = "true" ]; then
# compilation options used for your FP:
$TIMEOUT fp-cli conf compiled > $BUGDIR/fp_compil_options.txt 2>&1
$TIMEOUT fp-cli dump-stats percore non-zero > $BUGDIR/fp_cli_dump_stats.txt
$TIMEOUT fp-cli fp-state > $BUGDIR/fp_cli_fp_state.txt
$TIMEOUT fp-cli iface > $BUGDIR/fp_cli_iface.txt
$TIMEOUT fp-cli neigh4 > $BUGDIR/fp_cli_neigh4.txt
$TIMEOUT fp-cli neigh6 > $BUGDIR/fp_cli_neigh6.txt
$TIMEOUT fp-cli route4 > $BUGDIR/fp_cli_route4.txt
$TIMEOUT fp-cli route6 > $BUGDIR/fp_cli_route6.txt
# bridge
$TIMEOUT fp-cli bridge > $BUGDIR/fp_cli_bridge.txt
$TIMEOUT fp-cli fp-vswitch-flows > $BUGDIR/fp_cli_fpvs_flows.txt
$TIMEOUT fp-cli fp-vswitch-port > $BUGDIR/fp_cli_fpvs_ports.txt
$TIMEOUT fp-cli fp-vswitch-stats > $BUGDIR/fp_cli_fpvs_stats.txt
$TIMEOUT fp-cli fp-vswitch-stats > $BUGDIR/fp_cli_fpvs_stats.txt
# Netfilter
$TIMEOUT fp-cli nf4-table filter all > $BUGDIR/fp_cli_nf4_table.txt
$TIMEOUT fp-cli nf4-table mangle all >> $BUGDIR/fp_cli_nf4_table.txt
$TIMEOUT fp-cli nf4-table nat all >> $BUGDIR/fp_cli_nf4_table.txt
$TIMEOUT fp-cli nf6-table filter all > $BUGDIR/fp_cli_nf6_table.txt
$TIMEOUT fp-cli nf6-table mangle all >> $BUGDIR/fp_cli_nf6_table.txt
$TIMEOUT fp-cli filter-bridge broute all > $BUGDIR/fp_cli_ebtables.txt
$TIMEOUT fp-cli filter-bridge filter all >> $BUGDIR/fp_cli_ebtables.txt
# IPsec
$TIMEOUT fp-cli ipsec4-spd all > $BUGDIR/fp_cli_ipsec4_spd_all.txt
$TIMEOUT fp-cli ipsec4-sad all > $BUGDIR/fp_cli_ipsec4_sad_all.txt
$TIMEOUT fp-cli ipsec6-spd all > $BUGDIR/fp_cli_ipsec6_spd_all.txt
$TIMEOUT fp-cli ipsec6-sad all > $BUGDIR/fp_cli_ipsec6_sad_all.txt
# FP-VNB
exec_cmd fpngctl list > $BUGDIR/fpngctl_list.txt
# shmem-ports
exec_cmd fp-shmem-ports -d -e all > $BUGDIR/fp_shmem_ports_d.txt
exec_cmd fp-shmem-ports -s -e all > $BUGDIR/fp_shmem_ports_s.txt
# dpvi
[ -f "/proc/sys/dpvi_shmem/list_shm" ] && \
cp /proc/sys/dpvi_shmem/list_shm $BUGDIR/list_shm.txt
[ -f "/proc/sys/dpvi/list_interfaces" ] && \
cp /proc/sys/dpvi/list_interfaces $BUGDIR/dpvi_list_interfaces.txt
[ -f "/proc/sys/dpvi/running_fastpath" ] && \
cp /proc/sys/dpvi/running_fastpath $BUGDIR/dpvi_running_fastpath.txt
exec_cmd fp-shmem-dpvi > $BUGDIR/fp_shmem_dpvi.txt
fi
}
get_openstack_info()
{
# Try setting Openstack if the user forgot telling it in arguments.
# Only working when Openstack processes are still running.
if [ -n "$(ps auxww | grep nova-compute | grep -v 'grep')" ]; then
COMPUTE=yes
OPENSTACK=yes
fi
if [ -n "$(ps auxww | grep nova-api | grep -v 'grep')" ]; then
CONTROLLER=yes
OPENSTACK=yes
fi
if [ -n "$(ps auxww | grep neutron-server | grep -v 'grep')" ]; then
NETWORK=yes
OPENSTACK=yes
fi
[ "$OPENSTACK" != 'yes' ] && return
[ -d "/etc/nova" ] && cp -r /etc/nova $BUGDIR/etc_nova
[ -d "/etc/neutron" ] && cp -r /etc/neutron $BUGDIR/etc_neutron
[ -d "/var/log/nova" ] && cp -r /var/log/nova $BUGDIR/log_nova
[ -d "/var/log/neutron" ] && cp -r /var/log/neutron $BUGDIR/log_neutron
if [ "$CONTROLLER" = 'yes' ]; then
# Check whether OpenStack credentials are correctly set
failure=$($TIMEOUT nova list 2>&1)
if [ "$?" -ne "0" ]; then
printf "\`nova list\` failed with this error: ${failure}\n"
printf "Make sure your credentials are properly exported, \
and re-run the script.\n"
return
fi
$TIMEOUT nova --version 2> $BUGDIR/nova_version.txt
$TIMEOUT nova list > $BUGDIR/nova_list.txt
$TIMEOUT nova host-list > $BUGDIR/nova_host_list.txt
$TIMEOUT nova hypervisor-list > $BUGDIR/nova_hypervisor_list.txt
$TIMEOUT nova flavor-list > $BUGDIR/nova_flavor_list.txt
$TIMEOUT nova image-list > $BUGDIR/nova_image_list.txt
$TIMEOUT nova network-list > $BUGDIR/nova_network_list.txt
$TIMEOUT nova service-list > $BUGDIR/nova_service_list.txt
for id in $($TIMEOUT nova list --minimal | tail -n +4 | awk '{ print $2 }'); do
[ -n '${id}' ] &&
printf "##### ${id} #####\n" >> $BUGDIR/nova_show.txt &&
$TIMEOUT nova show "${id}" >> $BUGDIR/nova_show.txt
done
fi
if [ "$NETWORK" = 'yes' ]; then
$TIMEOUT neutron agent-list > $BUGDIR/neutron_agent_list.txt
$TIMEOUT neutron router-list > $BUGDIR/neutron_router_list.txt
$TIMEOUT neutron net-list > $BUGDIR/neutron_net_list.txt
$TIMEOUT neutron subnet-list > $BUGDIR/neutron_net_list.txt
fi
}
trap cleanup EXIT INT QUIT
TMPDIR=$(mktemp -d)
SUFFIX=$(hostname)
TIMEOUT_S=10
BUGDIR=$TMPDIR/bug_info/$SUFFIX
mkdir -p $BUGDIR
[ -x "$(command -v timeout)" ] && TIMEOUT="$(command -v timeout) ${TIMEOUT_S}s"
parse_args $@
ARCHIVE=${ARCHIVE:-"/tmp/troubleshooting-report_${SUFFIX}.tar.gz"}
printf 'Gathering information. This may take some time...\n'
# octeon-specific info
[ -f "/proc/octeon_ethernet_stats" ] && \
cp /proc/octeon_ethernet_stats > $BUGDIR/octeon_ethernet_stats.txt
[ -f "/proc/octeon_info" ] && cp /proc/octeon_info > $BUGDIR/octeon_info.txt
if [ -n "$(ip netns)" ]; then
for vrf in $(ip netns | cut -d " " -f 1); do
get_linux_info $vrf
done
else
get_linux_info
fi
get_system_info
get_coredump
get_fp_info
get_openstack_info
##### External SOSREPORT results
if [ -x "$(command -v sosreport)" ]; then
SOSDIR="$BUGDIR/sosreport"
mkdir -p $SOSDIR
sosreport -a --batch --build --tmp-dir=$SOSDIR > $BUGDIR/sosreport.txt
fi
tar -czf ${ARCHIVE} -C ${TMPDIR} .
printf "Saved into ${ARCHIVE}\n"
Send the troubleshooting-report*.tar.gz
files along with any other information
you deem relevant.
Note
The script makes use of the following commands if available:
brctl
dmidecode
ebtables-save
ip6tables-save
iptables-save
lsb_release
lscpu
lspci
lstopo
mount
numastat
ovs-vsctl
sosreport
timeout
vmstat
For better results, make sure these tools are available on your system.