
pi@wdog:~ $ cat .syspy/wdog.py
#!/home/pi/.syspy/bin/python
# coding: utf-8
'''
 Python script running on a Raspberry Pi acting as a hardware watchdog of a
 second Raspberry Pi server. The server must toggle the state of a
 "heartbeat" GPIO output pin at regular interval signalling that it is alive.
 which is monitored by an input GPIO pin of the watchdog Pi.  If heartbeats
 are not received during a period of time equal or greater then the watchdog
 time out, the watchdog will pulse low two output GPIO pins. The first must
 be connected to a GPIO input pin bound to the gpio_shutdown  kernel module
 of the server Pi or to a custom shutdown script that also ensure  that the 
 server Pi is shutdown correctly. The second watchdog output pin must be
 connected to the RUN input of the server Raspberry Pi.

 https://sigmdel.ca/michel/ha/rpi/hardware_watchdog_04_en.html

 Version: 0.3

 A normally open push button should be connected to GPIO3 on the watchdog Pi
 and identified as the Power button.
    one short press (< 3 seconds):   reboot the server and watchdog
    long press (> 3 seconds):        shutdown the server and watchdog

 Added logging facilities and email notification

 (c) Copyright Feb. 2020, Michel Deslierres

 https://sigmdel.ca/michel/about_en.html#about_copyright

'''

# Needed syslog module here because LOG_XXX constants are used in some user
# settable constants.
#
from syslog import *

### User settable values #####################################################

# Timing constants
CHECK_INTERVAL = 10      # seconds between checks of the last alive signal
WATCHDOG_TIMEOUT = 45    # seconds without an alive signal before rebooting the server
PULSE_TIME   = 0.3       # length (seconds) of pulse sent to the server shutdown and reset pins
SHUTDOWN_DELAY = 25      # time allowed (seconds) for the server to shut down
RESET_DELAY  = 5         # time allowed (seconds) for the server to cold boot
START_COUNT = 4          # number of initial heartbeats to start watchdog
BUTTON_BOUNCE = 0.08     # seconds of debounce time for power button


# Watchdog GPIO connections
POWER_BUTTON_GPIO = 3     # watchdog input connected to watchdog power button
HEARTBEAT_GPIO = 17       # watchdog input connected to server's alive pin
SERVER_SHUTDOWN_GPIO = 27 # watchdog output connected to server's shutdown pin
SERVER_RESET_GPIO = 22    # watchdog output connected to server's RUN pin

# Logging constants
VERBOSE  = 1                 # 0 quiet, 1 to echo log messages with priority
CONSOLELOG_LEVEL = LOG_INFO  #     >= consoleloglevel to the console
SYSLOG_LEVEL = LOG_INFO      # priority level for messages sent to syslog
SEND_NOTIFICATION = True     # if True an email is sent when rebooting
if SEND_NOTIFICATION:
  EMAIL_DESTINATION = "domo.bocjour@gmail.com"    # eg. me@gmail.com
  EMAIL_SUBJECT = "Home Automation Watchdog"
  REBOOT_MSG = "Rebooting the home automation server at {} local time."
  SHUTDOWN_MSG = "Shutting down the home automation server and watchdog at {} local time."

##############################################################################

## Global variables ##

startCount = 0              # Count of initial heartbeats
aliveTime = 0.0             # last time the alive signal received from server
watchdogActive = False      # True = watchdog was started by initial hearbeat
buttonPressedTime = None    # Time when power button was pressed

## Required modules ##

import RPi.GPIO as GPIO
from gpiozero import Button
from threading import Timer
from signal import pause
from subprocess import check_call
import os
import time
if SEND_NOTIFICATION:
  from pymail import postmail

# Routine to send messages to syslog and echo it to the console
#
def log(level, msg):
  syslog(level, msg)
  if (VERBOSE) and (level <= CONSOLELOG_LEVEL):
    print(time.strftime('%Y-%m-%d %H:%M:%S ', time.localtime()) + msg)

# Routine to send a notification (e-mail)
#
def sendNotification(msg):
 if SEND_NOTIFICATION:
    try:
      log(LOG_INFO, 'Sending e-mail notification')
      postmail(EMAIL_SUBJECT, msg.format(time.strftime('%Y-%m-%d %H:%M:%S ', time.localtime())), EMAIL_DESTINATION)
      log(LOG_INFO, 'E-mail notification sent')
    except BaseException as error:
      log(LOG_ERR, 'An exception occurred in postmail: {}'.format(str(error)))


# Common routine to assert a normally HIGH GPIO pin LOW for a short
# period of time and to optionally sleep for a specified amount of time
#
def pulseServerPin(aPin, sleepTime=None):
  global watchdogActive
  watchdogActive = False
  log(LOG_DEBUG, "Pulsing pin {} low".format(aPin))
  GPIO.output(aPin, GPIO.LOW)
  time.sleep(PULSE_TIME)
  GPIO.output(aPin, GPIO.HIGH)
  if not sleepTime is None:
    log(LOG_DEBUG, "Waiting {} seconds after pulse".format(sleepTime))
    time.sleep(sleepTime)

# Restarts the watchdog so that it resumes waiting for an initial
# feeding before starting
#
def initWatchdog():
  global watchdogActive
  global startCount
  log(LOG_INFO, "Resetting watchdog")
  watchdogActive = False
  startCount = 0

# Shuts down the server by activating its shutdown pin and,
# after a delay to allow a proper shutdown of the OS, it
# restarts the server by activating its RUN pin. Sleeps
# to allow the boot process to complete on the server
#
def rebootServer():
  global aliveTime
  global watchdogActive
  global startCount

  log(LOG_INFO, "Rebooting server")
  if not watchdogActive:
    log(LOG_INFO, "Already rebooting")
    return

  sendNotification(REBOOT_MSG)

  # Shutdown the server properly and then wake it up
  pulseServerPin(SERVER_SHUTDOWN_GPIO, sleepTime=SHUTDOWN_DELAY)
  pulseServerPin(SERVER_RESET_GPIO, sleepTime=RESET_DELAY)

  # Reset the watchdog
  initWatchdog()


# Subclassed Timer that will restart itself after executing the function
# specified when created. It will execute the same function over and over
# at the specified intervals.
# Reference:
#   right2clicky on StackOverflow: https://stackoverflow.com/a/48741004
#
class RepeatTimer(Timer):
  def run(self):
    while not self.finished.wait(self.interval):
      self.function(*self.args, **self.kwargs)

# Routine called by the timer at regular intervals (CHECK_INTERVAL)
# to check last time the server sent heartbeat. Reboots the server if
# the alive signal has not been received for too long a period
#
def checkAlive():
  if watchdogActive and (time.time() - aliveTime > WATCHDOG_TIMEOUT):
    log(LOG_INFO, "Watchdog timed out after {0:.2f} seconds".format(time.time() - aliveTime))
    rebootServer()


# This is the call back routine for the interrupt generated by the
# server heartbeat signal. It updates the time at which the signal was
# received. If the watchdog has not been started then it increments
# the number of times a heartbeat has been detected and if it is
# now large enough, the watchdog is started.
#
def aliveCallback(channel):
  global watchdogActive
  global startCount
  global aliveTime
  aliveTime = time.time()
  if not watchdogActive:
    startCount += 1
    if startCount > START_COUNT:
      log(LOG_INFO, "Watchdog started")
      watchdogActive = True

# Callback routine when power button is pressed
#
def buttonPressed():
  global buttonPressedTime
  buttonPressedTime = time.time()

# Callback routine when power button is released
#
def buttonReleased():
  elapsed = time.time()-buttonPressedTime
  log(LOG_DEBUG, "Power button pressed for {0:.2f}".format(elapsed))
  if elapsed > 3:
    log(LOG_INFO, "Power button pressed to shutdown the server and watchdog")
    sendNotification(SHUTDOWN_MSG)
    pulseServerPin(SERVER_SHUTDOWN_GPIO)
    check_call(['/sbin/poweroff'])         # must be root for this to work
  else:
    log(LOG_INFO, "Power button pressed to reboot the server and watchdog")
    rebootServer()
    check_call(['/sbin/reboot'])           # must be root for this to work


# Setup syslog
openlog(ident='PiWatchdog')
setlogmask(LOG_UPTO(SYSLOG_LEVEL))

# Setup the GPIO pins
GPIO.setwarnings(False)
GPIO.setmode(GPIO.BCM)

GPIO.setup(SERVER_SHUTDOWN_GPIO, GPIO.OUT, initial=GPIO.HIGH)
GPIO.setup(SERVER_RESET_GPIO, GPIO.OUT, initial=GPIO.HIGH)

GPIO.setup(HEARTBEAT_GPIO, GPIO.IN, pull_up_down=GPIO.PUD_UP)
GPIO.add_event_detect(HEARTBEAT_GPIO, GPIO.FALLING, callback=aliveCallback)

# Setup the power button
button = Button(POWER_BUTTON_GPIO, bounce_time=BUTTON_BOUNCE)
button.when_pressed = buttonPressed
button.when_released = buttonReleased

# Setup the timer
aliveTime = time.time()
timer = RepeatTimer(CHECK_INTERVAL, checkAlive)

# Run the watchdog
timer.start()
log(LOG_INFO, "Watchdog loaded")

try:
  pause()
finally:
  #GPIO.cleanup() - no longer needed, gpiozero has already cleaned up ?
  if timer.isAlive():
    timer.cancel()
  log(LOG_INFO, "Watchdog terminated")
pi@wdog:~ $ 
