3.1. Relevant Information for Bug ReportingΒΆ

In case you cannot investigate and resolve the issue by yourself using this document, make sure you open a ticket on your 6WIND Customer Zone with the relevant troubleshooting information.

This information can be generated and exported using the troubleshooting-report.sh script.

Note

If you are in an OpenStack environment, call the script providing as arguments:

  • --controller inside a controller node

  • --compute inside a compute node

  • --network inside a network node

Also make sure credentials for access to nova are exported in the environment (i.e.: OS_USERNAME, OS_PASSWORD, OS_PROJECT_DOMAIN_ID, etc.).

If possible, reproduce your issue with debug info enabled. Enable debug for the shortest time possible, as it produces a flabbergasting amount of log.

To enable debug, look at the OpenStack logs section.

We recommend installing the sos package before calling the following script, to leverage the distribution bug reporting mechanism (sosreport). Once installed, troubleshooting-report.sh will detect it and run it.

troubleshooting-report.sh is provided here for information:

#!/bin/sh
#
# Copyright 2016 6WIND S.A.

cleanup()
{
	[ -d "$TMPDIR" ] && rm -rf $TMPDIR
}

usage_option()
{
	printf "\t%s" "$1"
	[ -n "$3" ] && printf " %s" "$3"
	printf "\t\t%s\n" "$2"
}

usage()
{
	printf "%s\n\n" "$0 [-h|--help] [-c|--compute] [-C|--controller] [-N|--network] [-f|--file <file path>] [-o|--no-core-file] [-O|--clean-core-files] [-e|--extra <file path>]"
	usage_option -h "display this help"
	usage_option -c "Use this for OpenStack compute nodes"
	usage_option -C "Use this for OpenStack controller nodes"
	usage_option -N "Use this for OpenStack network nodes"
	usage_option -f "Output file path"
	usage_option -o "Do not archive core files"
	usage_option -O "Cleanup core files after generating the report"
	usage_option -e "Add an external file in the report archive"
}

parse_args()
{
	# Turn long options into short ones
	for arg in "$@"; do
		shift
		case "$arg" in
		"--help")        set -- "$@" "-h" ;;
		"--controller")  set -- "$@" "-C" ;;
		"--compute")     set -- "$@" "-c" ;;
		"--network")     set -- "$@" "-N" ;;
		"--file")        set -- "$@" "-f" ;;
		"--no-core-file")set -- "$@" "-o" ;;
		"--clean-core-files")set -- "$@" "-O" ;;
		"--extra")       set -- "$@" "-e" ;;
		*)               set -- "$@" "$arg"
		esac
	done

	OPTIND=1
	while getopts hcCNf:oOe: name
	do
		case "$name" in
		c) COMPUTE="yes" && OPENSTACK="yes" && \
			SUFFIX="${SUFFIX}_compute" ;;
		C) CONTROLLER="yes" && OPENSTACK="yes" && \
			SUFFIX="${SUFFIX}_controller" ;;
		N) NETWORK="yes" && OPENSTACK="yes" && \
			SUFFIX="${SUFFIX}_network" ;;
		f) ARCHIVE=${OPTARG} ;;
		o) NO_CORE_FILE="yes" ;;
		O) CLEAN_CORE_FILES="yes" ;;
		e) EXTRAFILES="${OPTARG} ${EXTRAFILES}" ;;
		h) usage && exit 0 ;;
		*) usage && exit 1 ;;
		esac
	done
}

# Check whether the given command is executable, and if the timeout utility is
# available, run the command using a $TIMEOUT_S seconds timeout.
exec_cmd()
{
	local netns_cmd="$(echo $@ | sed -n 's/^ip netns exec [^ ]\+ \([^ ]\+\).*$/\1/p')"
	# $cmd contains the command to check for without arguments,
	# stripped from any leading `ip netns exec XXX`.
	cmd=${netns_cmd:-$1}

	[ -x "$(command -v $cmd)" ] && $TIMEOUT $@
}

get_linux_info()
{
	local ns= name=$1
	if [ -n "$name" ]; then
		ns="ip netns exec $name"
	else
		name=main
	fi
	# information on all known links
	for iface in $($ns ip link show | sed -n 's/^[^:]\+: \([^:]\+\):.*$/\1/p'); do
		$ns fp-cli dpdk-port-stats $iface > $BUGDIR/$name-fp-cli_dpdk-port-stats_$iface.txt 2>&1
		$ns fp-cli dpdk-port-offload $iface > $BUGDIR/$name-fp-cli_dpdk-port-offload_$iface.txt 2>&1
		$ns fp-cli dpdk-port-advertise $iface > $BUGDIR/$name-fp-cli_dpdk-port-advertise_$iface.txt 2>&1
	done
	# interfaces, addresses, routes, neighbours and IPsec
	$ns ip -detail -statistics link > $BUGDIR/$name-ip_link.txt
	$ns ip -detail address > $BUGDIR/$name-ip_address.txt
	$ns ip -detail route > $BUGDIR/$name-ip_route.txt
	$ns ip -6 -detail route > $BUGDIR/$name-ip_route6.txt
	$ns ip neigh > $BUGDIR/$name-ip_neigh.txt
	$ns ip -6 neigh > $BUGDIR/$name-ip_neigh6.txt
	$ns ip -statistics xfrm policy > $BUGDIR/$name-ip_xfrm_policy.txt
	$ns ip -statistics xfrm state > $BUGDIR/$name-ip_xfrm_state.txt
	# get fpn0 stats
	if [ "$name" = "vrf0" ]; then
		fpexec ethtool -S fpn0 > $BUGDIR/$name-ethtool_S_fpn0.txt 2>&1
	fi
	# active network connections
	$ns netstat -anp > $BUGDIR/$name-nestat_anp.txt
	# Netfilter
	exec_cmd $ns iptables-save > $BUGDIR/$name-iptables_save.txt
	exec_cmd $ns ip6tables-save > $BUGDIR/$name-ip6tables_save.txt
	exec_cmd $ns ebtables-save > $BUGDIR/$name-ebtables_save.txt
	exec_cmd $ns ipset save > $BUGDIR/$name-ipset_save.txt
	exec_cmd $ns ipset list > $BUGDIR/$name-ipset_list.txt
	# Check bridge info
	exec_cmd $ns brctl show > $BUGDIR/$name-brctl_show.txt
	exec_cmd $ns ovs-vsctl show > $BUGDIR/$name-ovs_vsctl_show.txt 2>&1
}

get_system_info()
{
	# system topology
	exec_cmd lstopo-no-graphics --output-format xml > $BUGDIR/lstopo.xml
	# processors hierarchy
	cp /proc/cpuinfo $BUGDIR/cpuinfo.txt
	exec_cmd lscpu > $BUGDIR/lscpu.txt
	# interrupts
	cp /proc/interrupts $BUGDIR/interrupts.txt
	# memory record
	cp /proc/meminfo $BUGDIR/meminfo.txt
	exec_cmd vmstat -ws > $BUGDIR/vmstat_ws.txt
	exec_cmd numastat -zm > $BUGDIR/numastat_zm.txt
	exec_cmd numastat -zs > $BUGDIR/numastat_zs.txt
	# PCI peripherals
	exec_cmd lspci -vvv > $BUGDIR/lspci.txt
	# DMI/SMBIOS
	exec_cmd dmidecode > $BUGDIR/dmidecode.txt

	# kernel version, logs, cmdline and loaded modules
	uname -a > $BUGDIR/uname.txt
	dmesg > $BUGDIR/dmesg.txt
	cp /proc/cmdline $BUGDIR/cmdline.txt
	lsmod > $BUGDIR/lsmod.txt

	# distribution
	exec_cmd lsb_release -a > $BUGDIR/lsb_release.txt 2>&1

	# services list
	exec_cmd service --status-all > $BUGDIR/service_status_all.txt 2>&1
	exec_cmd systemctl list-units > $BUGDIR/systemctl_list_units.txt 2>&1

	# logs
	exec_cmd journalctl --all --no-pager --this-boot > $BUGDIR/journal.txt
	[ -f "/var/log/syslog" ] && cp /var/log/syslog $BUGDIR/syslog.txt
	[ -d "/var/log/libvirt" ] && cp -r /var/log/libvirt $BUGDIR/libvirt
	[ -f "/var/log/netlimits.log" ] && cp /var/log/netlimits.log \
		$BUGDIR/6WIND-vRouter-license-usage-report.txt
	[ -f "/var/log/netlimits.log.sig" ] && cp /var/log/netlimits.log.sig \
		$BUGDIR/6WIND-vRouter-license-usage-report.txt.sig

	# processes list
	ps auxww > $BUGDIR/ps_auxww.txt
	# cpuset
	[ -d "/dev/cpuset" ] && cp -r /dev/cpuset $BUGDIR/cpuset 2>/dev/null
	# /dev
	ls -al /dev > $BUGDIR/ls_al_dev.txt
	# IRQ affinity
	find /proc/irq -maxdepth 1 -mindepth 1 -print -type d -exec \
		cat '{}/smp_affinity' \; > $BUGDIR/proc_irq_smp_affinity.txt

	# mounted partitions
	exec_cmd mount > $BUGDIR/mount.txt

	# VNB
	exec_cmd ngctl list > $BUGDIR/ngctl_list.txt 2>&1

	exec_cmd vrfctl list > $BUGDIR/vrf_list.txt 2>&1
}

get_coredump()
{
	if [ "$NO_CORE_FILE" != 'yes' ] && ls /var/lib/systemd/coredump/core* >/dev/null 2>&1
	then
		mkdir $BUGDIR/coredump
		cp /var/lib/systemd/coredump/core* $BUGDIR/coredump/
		coredumpctl info > $BUGDIR/coredump/coredumpctl_info.txt
		if [ "$CLEAN_CORE_FILES" = 'yes' ]; then
			rm -f /var/lib/systemd/coredump/core*
		fi
	fi
}

get_fp_info()
{
	if [ -x "$(command -v fast-path.sh)" ]; then
		# Record your fp configuration:
		exec_cmd fp-conf-tool -DSFv > $BUGDIR/fp_config.txt \
			2>$BUGDIR/fp_config.log

		# Record what 6windgate version is used
		if [ -x "$(command -v dpkg)" ]; then
			exec_cmd dpkg -s 6windgate-fp > $BUGDIR/fp_version.txt 2>/dev/null
		else
			exec_cmd yum info 6windgate-fp > $BUGDIR/fp_version.txt 2>/dev/null
		fi

		# in buildroot, /etc/issue contains 6WG version
		[ -f "/etc/issue" ] && cp /etc/issue $BUGDIR/etc_issue.txt

		# Copy fp logs
		[ -f "/var/log/fast-path.log" ] && \
			cp /var/log/fast-path.log $BUGDIR/fast-path.log
		[ -f "/var/log/messages" ] && \
			cp /var/log/messages $BUGDIR/messages.log

		exec_cmd linux-fp-sync.sh status > $BUGDIR/linux_fp_sync_status.txt
		fast-path.sh status > $BUGDIR/fast_path_status.txt
		if grep -q "fp-.*not running" "$BUGDIR/fast_path_status.txt"; then
			FP_RUNNING="false"
		else
			FP_RUNNING="true"
		fi
	fi

	# Copy fp info
	if [ "$FP_RUNNING" = "true" ]; then
		# compilation options used for your FP:
		$TIMEOUT fp-cli conf compiled > $BUGDIR/fp_compil_options.txt 2>&1
		$TIMEOUT fp-cli stats percore > $BUGDIR/fp_cli_dump_stats.txt
		$TIMEOUT fp-cli fp-state > $BUGDIR/fp_cli_fp_state.txt
		$TIMEOUT fp-cli filling > $BUGDIR/fp_cli_filling.txt
		if [ -n "$(fp-cli conf compiled | grep CONFIG_MCORE_ARCH_DPDK=y)" ]; then
			$TIMEOUT fp-cli dpdk-debug-pool | grep common_pool_count > $BUGDIR/fp_cli_dpdk_pool.txt
		fi

		$TIMEOUT fp-cli iface > $BUGDIR/fp_cli_iface.txt
		$TIMEOUT fp-cli neigh4 > $BUGDIR/fp_cli_neigh4.txt
		$TIMEOUT fp-cli neigh6 > $BUGDIR/fp_cli_neigh6.txt
		$TIMEOUT fp-cli route4 > $BUGDIR/fp_cli_route4.txt
		$TIMEOUT fp-cli route6 > $BUGDIR/fp_cli_route6.txt
		$TIMEOUT fp-cli crypto-lib > $BUGDIR/fp_cli_crypto_lib.txt
		for cryptolib in $(cat $BUGDIR/fp_cli_crypto_lib.txt | grep -v "Available crypto"); do
			$TIMEOUT fp-cli stats-crypto $cryptolib > $BUGDIR/fp_cli_stats_crypto_$cryptolib.txt
		done
		$TIMEOUT fp-cli crypto-offload-stats > $BUGDIR/fp_cli_crypto_offload_stats.txt
		# bridge
		$TIMEOUT fp-cli bridge > $BUGDIR/fp_cli_bridge.txt
		$TIMEOUT fp-cli fp-vswitch-flows > $BUGDIR/fp_cli_fpvs_flows.txt
		$TIMEOUT fp-cli fp-vswitch-port > $BUGDIR/fp_cli_fpvs_ports.txt
		$TIMEOUT fp-cli fp-vswitch-stats > $BUGDIR/fp_cli_fpvs_stats.txt
		$TIMEOUT fp-cli fp-vswitch-stats > $BUGDIR/fp_cli_fpvs_stats.txt
		# Netfilter
		$TIMEOUT fp-cli nf4-table filter all > $BUGDIR/fp_cli_nf4_table.txt
		$TIMEOUT fp-cli nf4-table mangle all >> $BUGDIR/fp_cli_nf4_table.txt
		$TIMEOUT fp-cli nf4-table nat all >> $BUGDIR/fp_cli_nf4_table.txt
		$TIMEOUT fp-cli nf4-rules filter > $BUGDIR/fp_cli_nf4_rules.txt
		$TIMEOUT fp-cli nf4-rules mangle >> $BUGDIR/fp_cli_nf4_rules.txt
		$TIMEOUT fp-cli nf4-rules nat >> $BUGDIR/fp_cli_nf4_rules.txt
		$TIMEOUT fp-cli nf6-table filter all > $BUGDIR/fp_cli_nf6_table.txt
		$TIMEOUT fp-cli nf6-table mangle all >> $BUGDIR/fp_cli_nf6_table.txt
		$TIMEOUT fp-cli nf6-rules filter > $BUGDIR/fp_cli_nf6_rules.txt
		$TIMEOUT fp-cli nf6-rules mangle >> $BUGDIR/fp_cli_nf6_rules.txt
		$TIMEOUT fp-cli filter-bridge broute all > $BUGDIR/fp_cli_ebtables.txt
		$TIMEOUT fp-cli filter-bridge filter all >> $BUGDIR/fp_cli_ebtables.txt
		$TIMEOUT fp-cli vrf-exec all nf-ipset > $BUGDIR/fp_cli_nf_ipset.txt
		# IPsec
		$TIMEOUT fp-cli ipsec4-spd all > $BUGDIR/fp_cli_ipsec4_spd_all.txt
		$TIMEOUT fp-cli ipsec4-sad all > $BUGDIR/fp_cli_ipsec4_sad_all.txt
		$TIMEOUT fp-cli ipsec6-spd all > $BUGDIR/fp_cli_ipsec6_spd_all.txt
		$TIMEOUT fp-cli ipsec6-sad all > $BUGDIR/fp_cli_ipsec6_sad_all.txt
		# License
		if [ -n "$(fp-cli conf compiled | grep CONFIG_MCORE_LICENSE=y)" ]; then
		    $TIMEOUT fp-cli license > $BUGDIR/fp_cli_license.txt
		fi

		# FP-VNB
		exec_cmd fpngctl list > $BUGDIR/fpngctl_list.txt
		# shmem-ports
		exec_cmd fp-shmem-ports -d -e all > $BUGDIR/fp_shmem_ports_d.txt
		exec_cmd fp-shmem-ports -s -e all > $BUGDIR/fp_shmem_ports_s.txt

		if [ -n "$(fp-cli conf compiled | grep CONFIG_MCORE_DPVI=y)" ]; then
			exec_cmd fp-shmem-dpvi > $BUGDIR/fp_shmem_dpvi.txt
		fi

		exec_cmd fp-cpu-usage > $BUGDIR/fp_cpu_usage.txt

		# dpvi
		[ -f "/proc/sys/dpvi_shmem/list_shm" ] && \
			cp /proc/sys/dpvi_shmem/list_shm $BUGDIR/list_shm.txt
		[ -f "/proc/sys/dpvi/list_interfaces" ] && \
			cp /proc/sys/dpvi/list_interfaces $BUGDIR/dpvi_list_interfaces.txt
		[ -f "/proc/sys/dpvi/running_fastpath" ] && \
			cp /proc/sys/dpvi/running_fastpath $BUGDIR/dpvi_running_fastpath.txt

		exec_cmd fp-shmem-dpvi > $BUGDIR/fp_shmem_dpvi.txt

		# npf
		if [ -x "$(command -v fp-npfctl)" ]; then
			exec_cmd fp-npfctl vrf-exec all show > $BUGDIR/fp_npf_conf.txt 2>&1
			exec_cmd fp-npfctl stats > $BUGDIR/fp_npf_stats.txt  2>&1
			exec_cmd fp-npfctl show-params > $BUGDIR/fp_npf_params.txt  2>&1
			exec_cmd fp-npfctl pool-usage > $BUGDIR/fp_npf_pool_usage.txt 2>&1
			exec_cmd fp-npfctl htable-stats > $BUGDIR/fp_npf_htable_stats.txt 2>&1
			exec_cmd fp-npfctl vrf-exec all cgnat all conntrack-stats > $BUGDIR/fp_cgnat_conn_stats.txt 2>&1
			exec_cmd fp-npfctl vrf-exec all cgnat all block-stats > $BUGDIR/fp_cgnat_block_stats.txt 2>&1
			exec_cmd fp-npfctl vrf-exec all cgnat all block-stats > $BUGDIR/fp_cgnat_port_stats.txt 2>&1
			exec_cmd fp-npfctl vrf-exec all cgnat all ip-stats > $BUGDIR/fp_cgnat_ip_stats.txt 2>&1
		fi
	fi
}

get_openstack_info()
{
	# Try setting Openstack if the user forgot telling it in arguments.
	# Only working when Openstack processes are still running.
	if [ -n "$(ps auxww | grep nova-compute | grep -v 'grep')" ]; then
		COMPUTE=yes
		OPENSTACK=yes
	fi
	if [ -n "$(ps auxww | grep nova-api | grep -v 'grep')" ]; then
		CONTROLLER=yes
		OPENSTACK=yes
	fi
	if [ -n "$(ps auxww | grep neutron-server | grep -v 'grep')" ]; then
		NETWORK=yes
		OPENSTACK=yes
	fi

	[ "$OPENSTACK" != 'yes' ] && return

	[ -d "/etc/nova" ] && cp -r /etc/nova $BUGDIR/etc_nova
	[ -d "/etc/neutron" ] && cp -r /etc/neutron $BUGDIR/etc_neutron
	[ -d "/var/log/nova" ] && cp -r /var/log/nova $BUGDIR/log_nova
	[ -d "/var/log/neutron" ] && cp -r /var/log/neutron $BUGDIR/log_neutron

	if [ "$CONTROLLER" = 'yes' ]; then
		# Check whether OpenStack credentials are correctly set
		failure=$($TIMEOUT nova list 2>&1)
		if [ "$?" -ne "0" ]; then
			printf "\`nova list\` failed with this error: ${failure}\n"
			printf "Make sure your credentials are properly exported, \
				and re-run the script.\n"
			return
		fi

		$TIMEOUT nova --version 2> $BUGDIR/nova_version.txt
		$TIMEOUT nova list > $BUGDIR/nova_list.txt
		$TIMEOUT nova host-list > $BUGDIR/nova_host_list.txt
		$TIMEOUT nova hypervisor-list > $BUGDIR/nova_hypervisor_list.txt
		$TIMEOUT nova flavor-list > $BUGDIR/nova_flavor_list.txt
		$TIMEOUT nova image-list > $BUGDIR/nova_image_list.txt
		$TIMEOUT nova network-list > $BUGDIR/nova_network_list.txt
		$TIMEOUT nova service-list > $BUGDIR/nova_service_list.txt
		for id in $($TIMEOUT nova list --minimal | tail -n +4 | awk '{ print $2 }'); do
			[ -n '${id}' ] &&
			printf "##### ${id} #####\n" >> $BUGDIR/nova_show.txt &&
			$TIMEOUT nova show "${id}" >> $BUGDIR/nova_show.txt
		done
	fi

	if [ "$NETWORK" = 'yes' ]; then
		$TIMEOUT neutron agent-list > $BUGDIR/neutron_agent_list.txt
		$TIMEOUT neutron router-list > $BUGDIR/neutron_router_list.txt
		$TIMEOUT neutron net-list > $BUGDIR/neutron_net_list.txt
		$TIMEOUT neutron subnet-list > $BUGDIR/neutron_net_list.txt
	fi
}

get_management_info()
{
	if [ -d /etc/sysrepo ]; then
		cp -a /etc/sysrepo $BUGDIR/etc_sysrepo
		exec_cmd sysrepocfg -X$BUGDIR/vrouter_startup.json \
			-m vrouter -d startup -f json
		exec_cmd sysrepocfg -X$BUGDIR/vrouter_running.json \
			-m vrouter -d running -f json
	fi
}

get_license_info()
{
	if command -v vrl-status > /dev/null; then
		vrl-status > $BUGDIR/vrl_status.txt
	fi
}

trap cleanup EXIT INT QUIT

TMPDIR=$(mktemp -d)
SUFFIX=$(hostname)
TIMEOUT_S=10
BUGDIR=$TMPDIR/bug_info/$SUFFIX
EXTRAFILES=""
mkdir -p $BUGDIR
[ -x "$(command -v timeout)" ] && TIMEOUT="$(command -v timeout) ${TIMEOUT_S}s"

parse_args $@

ARCHIVE=${ARCHIVE:-"/tmp/troubleshooting-report_${SUFFIX}.tar.gz"}

printf 'Gathering information. This may take some time...\n'

# octeon-specific info
[ -f "/proc/octeon_ethernet_stats" ] && \
	cp /proc/octeon_ethernet_stats > $BUGDIR/octeon_ethernet_stats.txt
[ -f "/proc/octeon_info" ] && cp /proc/octeon_info > $BUGDIR/octeon_info.txt

if [ -n "$(ip netns)" ]; then
	for vrf in $(ip netns | cut -d " " -f 1); do
		get_linux_info $vrf
	done
fi
get_linux_info

# va-specefic files [SF13646] [SF13686]
[ -f "/tmp/conf.old" ] && cp /tmp/conf.old $BUGDIR/conf.old
[ -f "/tmp/conf.json" ] && cp /tmp/conf.json $BUGDIR/conf.json

get_system_info
get_coredump
get_fp_info
get_openstack_info
get_management_info
get_license_info

##### External SOSREPORT results
if [ -x "$(command -v sosreport)" ]; then
	SOSDIR="$BUGDIR/sosreport"
	mkdir -p $SOSDIR
	sosreport -a --batch --build --tmp-dir=$SOSDIR > $BUGDIR/sosreport.txt
fi

# Add extra files
for file in ${EXTRAFILES}; do
	cp ${file} $BUGDIR/`basename ${file}`
done

tar -czf ${ARCHIVE} -C ${TMPDIR} .
printf "Saved into ${ARCHIVE}\n"

Send the troubleshooting-report*.tar.gz files along with any other information you deem relevant.

Note

The script makes use of the following commands if available:

brctl dmidecode ebtables-save ip6tables-save iptables-save lsb_release lscpu lspci lstopo mount numastat ovs-vsctl sosreport timeout vmstat

For better results, make sure these tools are available on your system.