#!/bin/sh

# Compare two remote directories to which you have SSH access.
# (To Noel Taylor, with the compliments of the chef.)
#
# Usage:
#
#   $ ./compare-remote-dirs remote1.com:some/path remote2.com:another/path

if [ "${1}" = "" ]; then
  echo "ERROR: Please supply two arguments of the form REMOTE:PATH" >&2
  exit 1
fi

if [ "${2}" = "" ]; then
  echo "ERROR: Please supply the second REMOTE:PATH argument" >&2
  exit 1
fi

# TODO: We could do more error-checking to make sure the arguments have
# the right form, e.g., that each has a colon in the middle.  But this
# is just a quick example script.  Perfectionism can wait till morning.

REMOTE_SERVER_1=`echo "${1}" | cut -d ':' -f 1`
REMOTE_PATH_1=`echo "${1}" | cut -d ':' -f 2`
REMOTE_SERVER_2=`echo "${2}" | cut -d ':' -f 1`
REMOTE_PATH_2=`echo "${2}" | cut -d ':' -f 2`

# A few words about the use of the 'find' command below:
#
# By using 'find' instead of 'ls', we get the behavior we want:
# printing out a recursive directory listing with each file prefixed
# by its full relative path.  We ensured that those paths would be
# relative, rather than absolute, by cd'ing into the target directory
# before running 'find'.
#
# The '-print' option to 'find' is, strictly speaking, unnecessary.
# It used to be needed, but in modern 'find' commands printing is the
# default behavior and there's no need to specify it via an option.
# However, I still add it out of habit (it's still allowed), and if
# you use it then your 'find' invocations will work even with old
# implementations of 'find'.  Also, a lot of documentation on the Net
# about the 'find' command still includes the '-print' option, so
# don't be confused if you see it in examples.  (See the 'find' man
# page for why you might want to consider using '-print0' instead, by
# the way.  It's a great read.  Won the 1998 Booker Prize, I think.)
#
# The '.' argument is also, strictly speaking, unnecessary: the dot
# (current directory) is assumed if no target is specified.
#
# In other words, instead of 'find . -print' below, you could do just
# 'find' and everything would still work.

# The "$$" below expands to the process number of the current shell
# (the one that's running this script).  The reason we use it is to
# ensure that the temporary filenames we create are unique -- it would
# be unseemly for this script to accidentally trample on other files.
# We could go further and place them in /tmp, but I wanted the script
# to still work even if /tmp doesn't exist (although it always should).

ssh ${REMOTE_SERVER_1} "cd ${REMOTE_PATH_1} && find . -print" | sort > compdirs-$$-1.out
ssh ${REMOTE_SERVER_2} "cd ${REMOTE_PATH_2} && find . -print" | sort > compdirs-$$-2.out

diff -u compdirs-$$-1.out compdirs-$$-2.out

# Remove the temporary files, now that the diff is done.
rm compdirs-$$-1.out compdirs-$$-2.out