watchdog: use dig instead of check_dns (#6685)

* watchdog: use dig instead of check_dns

check_dns is slower and uses more system resources,
dig wrapped in a script is a more performant approach and uses
fewer system resources

* added debug mode + compose image bump

---------

Co-authored-by: maxi322 <maxi322@users.noreply.github.com>
Co-authored-by: DerLinkman <niklas.meyer@servercow.de>
This commit is contained in:
maxi322 2025-08-28 12:56:37 +02:00 committed by GitHub
parent 4d88e19106
commit 5e66ffa366
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 50 additions and 4 deletions

View File

@ -16,7 +16,6 @@ RUN apk add --update \
fcgi \
openssl \
nagios-plugins-mysql \
nagios-plugins-dns \
nagios-plugins-disk \
bind-tools \
redis \
@ -32,9 +31,11 @@ RUN apk add --update \
tzdata \
whois \
&& curl https://raw.githubusercontent.com/mludvig/smtp-cli/v3.10/smtp-cli -o /smtp-cli \
&& chmod +x smtp-cli
&& chmod +x smtp-cli \
&& mkdir /usr/lib/mailcow
COPY watchdog.sh /watchdog.sh
COPY check_mysql_slavestatus.sh /usr/lib/nagios/plugins/check_mysql_slavestatus.sh
COPY check_dns.sh /usr/lib/mailcow/check_dns.sh
CMD ["/watchdog.sh"]

View File

@ -0,0 +1,39 @@
#!/bin/sh
while getopts "H:s:" opt; do
case "$opt" in
H) HOST="$OPTARG" ;;
s) SERVER="$OPTARG" ;;
*) echo "Usage: $0 -H host -s server"; exit 3 ;;
esac
done
if [ -z "$SERVER" ]; then
echo "No DNS Server provided"
exit 3
fi
if [ -z "$HOST" ]; then
echo "No host to test provided"
exit 3
fi
# run dig and measure the time it takes to run
START_TIME=$(date +%s%3N)
dig_output=$(dig +short +timeout=2 +tries=1 "$HOST" @"$SERVER" 2>/dev/null)
dig_rc=$?
dig_output_ips=$(echo "$dig_output" | grep -E '^[0-9.]+$' | sort | paste -sd ',' -)
END_TIME=$(date +%s%3N)
ELAPSED_TIME=$((END_TIME - START_TIME))
# validate and perform nagios like output and exit codes
if [ $dig_rc -ne 0 ] || [ -z "$dig_output" ]; then
echo "Domain $HOST was not found by the server"
exit 2
elif [ $dig_rc -eq 0 ]; then
echo "DNS OK: $ELAPSED_TIME ms response time. $HOST returns $dig_output_ips"
exit 0
else
echo "Unknown error"
exit 3
fi

View File

@ -1,5 +1,10 @@
#!/bin/bash
if [ "${DEV_MODE}" != "n" ]; then
echo -e "\e[31mEnabled Debug Mode\e[0m"
set -x
fi
trap "exit" INT TERM
trap "kill 0" EXIT
@ -297,7 +302,7 @@ unbound_checks() {
touch /tmp/unbound-mailcow; echo "$(tail -50 /tmp/unbound-mailcow)" > /tmp/unbound-mailcow
host_ip=$(get_container_ip unbound-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_dns -s ${host_ip} -H stackoverflow.com 2>> /tmp/unbound-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
/usr/lib/mailcow/check_dns.sh -s ${host_ip} -H stackoverflow.com 2>> /tmp/unbound-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
DNSSEC=$(dig com +dnssec | egrep 'flags:.+ad')
if [[ -z ${DNSSEC} ]]; then
echo "DNSSEC failure" 2>> /tmp/unbound-mailcow 1>&2

View File

@ -521,7 +521,7 @@ services:
- /lib/modules:/lib/modules:ro
watchdog-mailcow:
image: ghcr.io/mailcow/watchdog:2.08
image: ghcr.io/mailcow/watchdog:2.09
dns:
- ${IPV4_NETWORK:-172.22.1}.254
tmpfs:
@ -588,6 +588,7 @@ services:
- OLEFY_THRESHOLD=${OLEFY_THRESHOLD:-5}
- MAILQ_THRESHOLD=${MAILQ_THRESHOLD:-20}
- MAILQ_CRIT=${MAILQ_CRIT:-30}
- DEV_MODE=${DEV_MODE:-n}
networks:
mailcow-network:
aliases: