#!/usr/bin/python
#
# Publish (install) files to a destination, checking to insure that
# the destination files are unchanged from the last publication round.
# File integrity is checked using NIST's SHA secure hash algorithm.
# When a target file is first published, its current SHA hash is stored
# in a dotfile in the same directory; whenever later we touch it, we
# check to see that the stored SHA hash matches the computed SHA hash.
# (Whenever we republish a new version, we refresh the stored hash.)
#
# Use -h for quick usage, --help for long help.
#
# Files to be published are supplied on either the command line as
# source/destination pairs or from a configuration file, which lists
# source/destination pairs one pair to a line separated by whitespace.
# A configuration file can have blank lines and comment lines (they
# must start with a '#' as the first non-whitespace character).
# In control/configuration files (and only in them), you can add
# optional arguments after the file name pair. These may be:
#	-u USER or -o USER	USER will be the target file's owner
#	-g GID			GROUP will be the target file's group
#	-m MODE			MODE will be the target file's mode
#				(in numeric notation only)
# -m takes numbers. -u/-o and -g take either numbers or user names.
# (No spaces allowed in any arguments.)
# For all of them, the usual conversion rules apply; a leading '0'
# means the value is in octal. (As in '-m 0755'.) You can also specify
# hex, if that floats your boat.
# If unspecified, the target file will have the owner and group of
# whatever happens when the current UID writes to a file in the
# target directory and the mode of the source file.
# Times are not copied; the target file will have the time that it
# was last written to.
#
# Copyright: GPL. See the end of this file.
# Requirements: Unix and Python 1.5.2 (at least). It is believed to run
# fine under Python 2.2.
# Tested on: Red Hat 7.3 & 7.1.

import sys, string, os
import stat
import sha

# This is our IO blocksize. 128K seems to be sensible, although perhaps
# it should be larger.
BLOCKSIZE = 1024*128

# How we do error reporting:
# All errors that we expect and want to handle throw a specific class
# (or set of classes).
# We have a standard encoding and formatting for them.
# The standard is (filename, error message)
# Technically PubError ought to be derived from Exception, but I
# think we don't use any behavior from there.
class PubError:
	def __init__(_, fn, msg):	_.a = (fn, msg)
	def __getitem__(_, i):		return _.a[i]
	def __str__(_):			return '%s: %s' % (_.a[0], _.a[1])
	def getfn(_):			return _.a[0]
# Errors originating from IOErrors.
# We invoke this as '(filename, message, IOError/OSError object)';
# the bonus message may be None or otherwise null.
class PubIOError(PubError):
	def __init__(_, fn, msg, ieo):
		_.a = (fn, msg, ieo)
	def __str__(_):
		em = _.a[2].strerror
		ef = _.a[2].filename
		if not ef or ef == _.a[0]:
			strf = _.a[0]
		else:
			strf = '%s (working on %s)' % (_.a[0], ef)
		if _.a[1]:	return '%s: %s: %s' % (strf, _.a[1], em)
		else:		return '%s: %s' % (strf, em)

# Make a SHA checksum store name from a filename.
def shaname(fn):
	n = string.split(fn, '/')
	n[-1] = '.cksum.' + n[-1]
	return string.join(n, '/')
# derive the temporary target file name for a destination file.
# This temporary target must be in the same directory as the destination.
# It is considered okay to use a temporary name that is only unique per
# the target, since multiple publish instances running against the same
# file are broken anyways.
def tmpdstname(fn):
	n = string.split(fn, '/')
	n[-1] = '.pubtemp.' + n[-1]
	return string.join(n, '/')

# Return either the first line of the file or None, if it cannot be
# read.
def getfileline(fn):
	try:
		fh = open(fn, 'r')
		l = fh.readline()
		fh.close()
		return string.strip(l)
	except IOError, t:
		raise PubIOError, (fn, "can't read first line", t)

# Does this file usefully exist?
# To usefully exist, the file must: be a regular file, and be openable
# for reading.
def qstat(fn):
	try:		return os.lstat(fn)
	except OSError:	return None
def realfile(fn):
	st = qstat(fn)
	if not st:	raise PubError, (fn, "doesn't exist")
	if not stat.S_ISREG(st[stat.ST_MODE]):
		raise PubError, (fn, "not a regular file")
def goodfile(fn):
	realfile(fn)
	try:
		fh = open(fn, 'r')
		fh.close()
	except IOError, t:
		raise PubIOError, (fn, "failed readability check", t)
	
# Calculate the SHA checksum of a file. It is returned in hexdigit form.
def filesha(fn):
	d = sha.new()
	try:
		fh = open(fn, 'r')
		block = fh.read(BLOCKSIZE)
		while block:
			d.update(block)
			block = fh.read(BLOCKSIZE)
	       	fh.close()
	except IOError, t:
		raise PubIOError, (fn, "can't calculate SHA", t)
	return d.hexdigest()

# This returns file state information in an odd way.
# It takes an additional tuple that is the uid/gid/mode that we care
# about. If we don't care about one in particular, that value is None,
# and we return None in the return tuple.
def filestate(fn, ugm):
	try:
		(uid, gid, mode) = ugm
		st = os.lstat(fn)
		if uid != None:
			uid = st[stat.ST_UID]
		if gid != None:
			gid = st[stat.ST_GID]
		if mode != None:
			mode = stat.S_IMODE(st[stat.ST_MODE])
		return (uid, gid, mode)
	except OSError, t:
		raise PubIOError, (fn, "cannot stat", t)
def filemode(fn):
	try:
		st = os.lstat(fn)
		return stat.S_IMODE(st[stat.ST_MODE])
	except OSError, t:
		raise PubIOError, (fn, "cannot stat", t)
def filesize(fn):
	try:
		st = os.lstat(fn)
		return st[stat.ST_SIZE]
	except OSError, t:
		raise PubIOError, (fn, "cannot stat", t)


# copy the permissions and other state of file A that we care about
# to file B. (Right now this is just 'permissions', but may add times
# in the future.)
def copystate(fa, fb):
	f = fa; msg = "reading permissions"
	try:
		st = os.lstat(fa)
		f = fb; msg = "setting permissions"
		os.chmod(fb, stat.S_IMODE(st[stat.ST_MODE]))
	except OSError, t:
		raise PubIOError, (f, msg, t)

# Open a new file for writing with minimal permissions on the file.
# If the optional second argument is true, we fail if the file already
# exists.
def opennewfile(fn, mustbenew = 0):
	emode = 0
	if mustbenew:	emode = os.O_EXCL
	try:
		fd = os.open(fn, os.O_CREAT | os.O_WRONLY | os.O_TRUNC | emode,
			     0600)
		fh = os.fdopen(fd, "w")
		return fh
	except OSError, t:
		raise PubIOError, (fn, "opening file to write", t)

# A Cleanup class instance functions as a central recorder (and executor)
# of a sequence of things that must be done (and who's failure can and
# must be ignored, except for potential logging of this).

# Perform an IO operation (one that can fail with OSError or IOError)
# and gag the error.
def voidioop(fn, *args):
	try:				apply(fn, args)
	except (OSError, IOError):	pass
class Cleanup:
	def __init__(_):		_.cl = []
	def _del(_, fn):		voidioop(os.unlink, fn)
	def _ln(_, fsrc, fdst):		voidioop(os.link, fsrc, fdst)
	def _mv(_, fsrc, fdst):		voidioop(os.rename, fsrc, fdst)
	# The functions one calls to add an operation to be done.
	def rm(_, *args):		_.cl.append(_._del, args)
	def ln(_, *args):		_.cl.append(_._ln, args)
	def mv(_, *args):		_.cl.append(_._mv, args)
	# Call an arbitrary function with arguments.
	def callfunc(_, func, *args):	_.cl.append(func, args)

	# Execute our list of pending cleanups in undefined (but currently
	# LIFO) order.
	def cleanup(_):
		while _.cl:
			(op, args) = _.cl[-1]
			apply(op, args)
			# Only pop after the operation has not blown up.
			_.cl.pop()
	# Clean any and all pending cleanups
	def cancel(_):
		_.cl = []
			
# Given a string and a target filename, make the file's contents be
# that string.
def recordline(fn, str):
	fh = opennewfile(fn)
	cl = Cleanup()
	cl.rm(fn)
	try:
		fh.write(str)
		fh.close()
	except IOError, t:
		cl.cleanup()
		raise PubIOError, (fn, "writing line to file", t)

# Copy file A to file B
def copyfile(fa, fb, uid, gid, mode):
	co = Cleanup()
	# First: create the temporary.
	ft = tmpdstname(fb)
	fo = opennewfile(ft, 1)
	# Record that we ought to remove the temporary if we go boom
	co.rm(ft)
	try:
		# Now: copy input file to the temporary.
		fi = open(fa, "r")
		while 1:
			r = fi.read(BLOCKSIZE)
			if not r:
				break
			fo.write(r)
		fi.close(); fo.close()
		# Fix owner and group
		if uid != None or gid != None:
			if uid == None:		uid = -1
			if gid == None:		gid = -1
			try:		os.chown(ft, uid, gid)
			except OSError, t:
				raise PubIOError, (fb, "chown on temporary file", t)
		# Fix permissions
		if mode != None:
			os.chmod(ft, mode)
		else:
			# Copy file a's permissions to the temporary.
			copystate(fa, ft)
		# Make the temporary the real.
		os.rename(ft, fb)
	except PubError:
		co.cleanup()
		raise
	except (IOError, OSError), t:
		co.cleanup()
		raise PubIOError, (fa, "during copy to "+fb, t)

# Our concept is file pairs: the source and the target.
# Associated with the target is a stored SHA checksum, stashed in
# a file called '.cksum.<filename>' in the target's directory.
class FilePair:
	def __init__(_, src, dst):
		_.src = src; _.dst = dst
		_.ssha = None; _.dsha = None; _.dfsha = None
		_.duid = None; _.dgid = None; _.dmode = None
		# Now, we validate this pair on the spot.
		# This saves us having to remember to do this next.
		goodfile(_.src)
		if qstat(_.dst):
			goodfile(_.dst)
	def setuid(_, uid):	_.duid = uid
	def setgid(_, gid):	_.dgid = gid
	def setmode(_, mode):	_.dmode = mode
	# We maintain a cache of the SHA information, so we only have
	# to calculate it once.
	def _dsha(_):
		if _.dsha or not qstat(_.dst):	return
		_.dsha = filesha(_.dst)
	def _ssha(_):
		if _.ssha:	return
		_.ssha = filesha(_.src)
	def _dfsha(_):
		if _.dfsha:	return
		ft = shaname(_.dst)
		if not qstat(ft):	return
		_.dfsha = getfileline(ft)
	# Is the destination inconsistent: either mismatching SHAs between
	# the stored value and the calculated value, or missing target file
	# with an SHA signature file?
	def isdstinconsist(_):
		_._dfsha(); _._dsha()
		# If we do not have a stored SHA, we are always consistent
		if not _.dfsha:
			return None
		# If we have a stored SHA but not a target file to compute
		# the SHA of, we have a problem.
		if not _.dsha:
			return "target file has been removed"
		# Otherwise, we must have a matching SHA checksum.
		if _.dfsha != _.dsha:
			return "target file has been edited"
		return None
	# Do we have a valid signature file?
	# It suffices to load the cache with it.
	def validsig(_):
		_._dfsha()
		return _.dfsha

	# Do we need to update the destination?
	# To avoid a copy, all 'file attributes' must match:
	# SHA checksums, file size, UID/GID/MODE values if specified, etc.
	def needupdate(_):
		_._dsha(); _._ssha()
		# SHA mismatch?
		if _.dsha != _.ssha:
			return 1
		# Double-check: file sizes should be the same.
		if filesize(_.src) != filesize(_.dst):
			return 1
		# if we have set a UID/GID/MODE explicitly, the target
		# state there must match those settings.
		t0 = (_.duid, _.dgid, _.dmode)
		t1 = filestate(_.dst, t0)
		if t0 != t1:
			return 1
		# if we have no set mode, the target state must match
		# the source's mode.
		if _.dmode == None and\
		   filemode(_.src) != filemode(_.dst):
			return 1
		return 0

	# Perform the actual update of the file pair.
	# This does the copy and then writes the new SHA hash.
	def copy(_):
		_._ssha()
		copyfile(_.src, _.dst, _.duid, _.dgid, _.dmode)
		recordline(shaname(_.dst), _.ssha + '\n')
		_.dfsha = None; _.dsha = None
	# Just write the SHA hash. Only valid if there is none.
	def copysig(_):
		_._ssha()
		#if _.validsig():
		#	return
		recordline(shaname(_.dst), _.ssha + '\n')
		_.dfsha = None; _.dsha = None

	# Return information about additional magic things that will
	# happen during the copy.
	def copyinfo(_):
		inf = []
		if _.duid != None:
			inf.append('uid to %d' % (_.duid,))
		if _.dgid != None:
			inf.append('gid to %d' % (_.dgid,))
		if _.dmode != None:
			inf.append('file mode to 0%o' % (_.dmode,))
		if not inf:
			return None
		# poslfit the returned information excessively.
		if len(inf) < 3:
			return string.join(inf, ' and ')
		else:
			return string.join(inf[:-1], ", ") + ", and " +inf[-1]


# ----------------------------
# Structural guts of the program.

# Get our pairs from a configuration file.
# The configuration file is a set of filename pairs separated by whitespace,
# one pair per line. Comments and blank lines are allowed.
import pwd, grp
def satoi(str):
	try:
		res = string.atoi(str, 0)
		return res
	except ValueError:
		return None
def parselopts(fp, n):
	try:
		opts, args = getopt.getopt(n, "m:o:u:g:")
	except getopt.error, (cause):
		return str(cause)
	for o, a in opts:
		if o == '-m':
			mode = satoi(a)
			if mode == None:
				return "bad value for -m option"
			fp.setmode(mode)
		elif o in ('-o', '-u'):
			try:
				uid = satoi(a)
				if uid == None:
					pw = pwd.getpwnam(a)
					uid = pw[2]
				fp.setuid(uid)
			except KeyError:
				return "unknown username in -u option"
		elif o == '-g':
			try:
				gid = satoi(a)
				if gid == None:
					gr = grp.getgrnam(a)
					gid = gr[2]
				fp.setgid(gid)
			except KeyError:
				return "unknown group in -g option"
		else:
			die("Internal error: unhandled cf line option %s" % (o,))
	return None
					
def readcffile(fn):
	pairs = []
	lnum = 0
	try:
		fh = open(fn, "r")
		for l in fh.readlines():
			lnum = lnum + 1
			l = string.strip(l)
			if not l or l[0] == "#":
				continue
			n = string.split(l)
			if len(n) < 2:
				raise PubError, (fn,
						 "unparseable control file line at line %d" % (lnum, ))
			fp = FilePair(n[0], n[1])
			# Remaining arguments must be a set of options.
			# Parse them.
			re = parselopts(fp, n[2:])
			if re:
				raise PubError, (fn,
						 "parse problem in control file line %d: %s" % (lnum, re))
			pairs.append(fp)
		fh.close()
	except IOError, t:
		raise PubIOError, (fn, "reading control file", t)
	return pairs

# Global options state.
overwrite = 0
initcksum = 0
dryrun = 0
forcecopy = 0
cffile = None
quiet = 0
verbose = 0

# Our list of file pairs to operate on.
fpairs = []

# What is the convenient name of our program?
progname = "publish"
if sys.argv:
	progname = string.split(sys.argv[0], '/')[-1]

def eprint(string):
	sys.stderr.write(string + '\n')
def warn(string):
	eprint(progname + ': ' + string)
def die(string):
	eprint(progname + ': ' + string)
	sys.exit(1)
def qwarn(isok, string):
	if not (quiet and isok):
		warn(string)
def verb(lvl, str):
	if lvl <= verbose:
		warn(str)

# And: operation!
import getopt

def uprint():
	eprint("usage: publish [-IOF] [-vnq] [-f controlfile] [[file1 file2] ...]")
def usage():
	uprint()
	sys.exit(1)
def help():
	uprint()
	eprint("""Usage:
	-I, --init	Publish even if some targets lack checksum files.
	-O, --overwrite	Publish even if some targets have been edited.
	-F, --force	Both --init and --overwrite.
	--forcecopy	Always copy the source files to the targets, even
			if the target seems fully up to date. This does not
			imply --force (or --init, or --overwrite).
	-v		Verbose. May be repeated for more verbosity.
	-n, --dry-run	Do not perform any actions, just say what would
			have been done.
	-f, --file	Take source/target pairs from the named file.
	-q, --quiet	Do not warn about missing checksum files or edited
			target files if --init and/or --overwrite are given.
			May also suppress other messages.
	-h		Usage
	--help		You're reading it

 If no -f or --file argument is given, the command line arguments are
the source/target pairs.""")

try:
	opts, args = getopt.getopt(sys.argv[1:], "IOFvnqhf:",
				   ["init", "force", "overwrite", "dry-run",
				    "file=", "forcecopy", "quiet", "help"])
except getopt.error, (cause):
	warn(cause)
	usage()
for o, a in opts:
	if o in ("-I", "--init"):
		initcksum = 1
	elif o in ("-O", "--overwrite"):
		overwrite = 1
	elif o in ("-F", "--force"):
		initcksum = 1
		overwrite = 1
	elif o == "--forcecopy":
		forcecopy = 1
	elif o in ("-n", "--dry-run"):
		dryrun = 1
		if verbose == 0:
			verbose = 1
	elif o == '-v':
		verbose = verbose + 1
	elif o in ("-f", "--file"):
		cffile = a
	elif o in ("-q", "--quiet"):
		quiet = 1
	elif o in ('-h', '-?', '?'):
		uprint()
		sys.exit(0)
	elif o == '--help':
		help()
		sys.exit(0)
	else:
		die("Chris failed to properly parse option: %s" % (o,))
		sys.exit(1)
if len(args) != 0 and cffile:
	warn("Can't specify both a control file and additional arguments")
	usage()
if (len(args) % 2) != 0:
	die("Uneven number of arguments: must be pairs of source and target file")

# We now operate inside a MONSTROUS try statement that catches our
# explosions:
try:
	# Turn arguments into filepairs.
	# Read config file if any.
	if cffile:
		fpairs = readcffile(cffile)
		if not fpairs:
			verb(2, "no files specified in control file, exiting")
			sys.exit(0)
	else:
		if not args:
			# No work; goodbye.
			verb(2, "no work to perform, exiting")
			sys.exit(0)
		while args:
			src = args.pop(0); dst = args.pop(0)
			fpairs.append(FilePair(src, dst))

	# Check for the presence of checksum files.
	cksumok = 1
	for fp in fpairs:
		if not fp.validsig():
			qwarn(initcksum,
			      fp.dst +": does not have a signature file")
			cksumok = 0
	#if not allok and not initcksum:
	#	sys.exit(1)
	# Check for target consistency.
	allok = 1
	for fp in fpairs:
		r = fp.isdstinconsist()
		if r:
			allok = 0
			qwarn(overwrite, fp.dst + ": " + r)
	if (not allok and not overwrite) or (not cksumok and not initcksum):
		sys.exit(1)

	# Now we execute the operation.
	for fp in fpairs:
		if forcecopy or fp.needupdate():
			verb(1, "copying %s to %s" % (fp.src, fp.dst))
			ri = fp.copyinfo()
			if ri:
				verb(1, "also setting "+ri)
			if not dryrun:
				fp.copy()
		elif not fp.validsig():
			verb(1, "establishing signature for %s" % (fp.dst,))
			if not dryrun:
				fp.copysig()
		elif fp.isdstinconsist():
			verb(1, "updating signature for %s (%s is identical to %s, but signature inconsistent)" % (fp.dst, fp.src, fp.dst))
			if not dryrun:
				fp.copysig()
		else:
			verb(2, "%s is identical to %s" % (fp.src, fp.dst))
	# We are now all done.
except PubError, t:
	# Problem? Okay, we're gone gone gone.
	die("error: " +str(t))
sys.exit(0)

#
# Copyright (C) 2001 Chris Siebenmann <cks@hawkwind.utcs.toronto.edu>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA