#!/usr/bin/python # # Publish (install) files to a destination, checking to insure that # the destination files are unchanged from the last publication round. # File integrity is checked using NIST's SHA secure hash algorithm. # When a target file is first published, its current SHA hash is stored # in a dotfile in the same directory; whenever later we touch it, we # check to see that the stored SHA hash matches the computed SHA hash. # (Whenever we republish a new version, we refresh the stored hash.) # # Use -h for quick usage, --help for long help. # # Files to be published are supplied on either the command line as # source/destination pairs or from a configuration file, which lists # source/destination pairs one pair to a line separated by whitespace. # A configuration file can have blank lines and comment lines (they # must start with a '#' as the first non-whitespace character). # In control/configuration files (and only in them), you can add # optional arguments after the file name pair. These may be: # -u USER or -o USER USER will be the target file's owner # -g GID GROUP will be the target file's group # -m MODE MODE will be the target file's mode # (in numeric notation only) # -m takes numbers. -u/-o and -g take either numbers or user names. # (No spaces allowed in any arguments.) # For all of them, the usual conversion rules apply; a leading '0' # means the value is in octal. (As in '-m 0755'.) You can also specify # hex, if that floats your boat. # If unspecified, the target file will have the owner and group of # whatever happens when the current UID writes to a file in the # target directory and the mode of the source file. # Times are not copied; the target file will have the time that it # was last written to. # # Copyright: GPL. See the end of this file. # Requirements: Unix and Python 1.5.2 (at least). It is believed to run # fine under Python 2.2. # Tested on: Red Hat 7.3 & 7.1. import sys, string, os import stat import sha # This is our IO blocksize. 128K seems to be sensible, although perhaps # it should be larger. BLOCKSIZE = 1024*128 # How we do error reporting: # All errors that we expect and want to handle throw a specific class # (or set of classes). # We have a standard encoding and formatting for them. # The standard is (filename, error message) # Technically PubError ought to be derived from Exception, but I # think we don't use any behavior from there. class PubError: def __init__(_, fn, msg): _.a = (fn, msg) def __getitem__(_, i): return _.a[i] def __str__(_): return '%s: %s' % (_.a[0], _.a[1]) def getfn(_): return _.a[0] # Errors originating from IOErrors. # We invoke this as '(filename, message, IOError/OSError object)'; # the bonus message may be None or otherwise null. class PubIOError(PubError): def __init__(_, fn, msg, ieo): _.a = (fn, msg, ieo) def __str__(_): em = _.a[2].strerror ef = _.a[2].filename if not ef or ef == _.a[0]: strf = _.a[0] else: strf = '%s (working on %s)' % (_.a[0], ef) if _.a[1]: return '%s: %s: %s' % (strf, _.a[1], em) else: return '%s: %s' % (strf, em) # Make a SHA checksum store name from a filename. def shaname(fn): n = string.split(fn, '/') n[-1] = '.cksum.' + n[-1] return string.join(n, '/') # derive the temporary target file name for a destination file. # This temporary target must be in the same directory as the destination. # It is considered okay to use a temporary name that is only unique per # the target, since multiple publish instances running against the same # file are broken anyways. def tmpdstname(fn): n = string.split(fn, '/') n[-1] = '.pubtemp.' + n[-1] return string.join(n, '/') # Return either the first line of the file or None, if it cannot be # read. def getfileline(fn): try: fh = open(fn, 'r') l = fh.readline() fh.close() return string.strip(l) except IOError, t: raise PubIOError, (fn, "can't read first line", t) # Does this file usefully exist? # To usefully exist, the file must: be a regular file, and be openable # for reading. def qstat(fn): try: return os.lstat(fn) except OSError: return None def realfile(fn): st = qstat(fn) if not st: raise PubError, (fn, "doesn't exist") if not stat.S_ISREG(st[stat.ST_MODE]): raise PubError, (fn, "not a regular file") def goodfile(fn): realfile(fn) try: fh = open(fn, 'r') fh.close() except IOError, t: raise PubIOError, (fn, "failed readability check", t) # Calculate the SHA checksum of a file. It is returned in hexdigit form. def filesha(fn): d = sha.new() try: fh = open(fn, 'r') block = fh.read(BLOCKSIZE) while block: d.update(block) block = fh.read(BLOCKSIZE) fh.close() except IOError, t: raise PubIOError, (fn, "can't calculate SHA", t) return d.hexdigest() # This returns file state information in an odd way. # It takes an additional tuple that is the uid/gid/mode that we care # about. If we don't care about one in particular, that value is None, # and we return None in the return tuple. def filestate(fn, ugm): try: (uid, gid, mode) = ugm st = os.lstat(fn) if uid != None: uid = st[stat.ST_UID] if gid != None: gid = st[stat.ST_GID] if mode != None: mode = stat.S_IMODE(st[stat.ST_MODE]) return (uid, gid, mode) except OSError, t: raise PubIOError, (fn, "cannot stat", t) def filemode(fn): try: st = os.lstat(fn) return stat.S_IMODE(st[stat.ST_MODE]) except OSError, t: raise PubIOError, (fn, "cannot stat", t) def filesize(fn): try: st = os.lstat(fn) return st[stat.ST_SIZE] except OSError, t: raise PubIOError, (fn, "cannot stat", t) # copy the permissions and other state of file A that we care about # to file B. (Right now this is just 'permissions', but may add times # in the future.) def copystate(fa, fb): f = fa; msg = "reading permissions" try: st = os.lstat(fa) f = fb; msg = "setting permissions" os.chmod(fb, stat.S_IMODE(st[stat.ST_MODE])) except OSError, t: raise PubIOError, (f, msg, t) # Open a new file for writing with minimal permissions on the file. # If the optional second argument is true, we fail if the file already # exists. def opennewfile(fn, mustbenew = 0): emode = 0 if mustbenew: emode = os.O_EXCL try: fd = os.open(fn, os.O_CREAT | os.O_WRONLY | os.O_TRUNC | emode, 0600) fh = os.fdopen(fd, "w") return fh except OSError, t: raise PubIOError, (fn, "opening file to write", t) # A Cleanup class instance functions as a central recorder (and executor) # of a sequence of things that must be done (and who's failure can and # must be ignored, except for potential logging of this). # Perform an IO operation (one that can fail with OSError or IOError) # and gag the error. def voidioop(fn, *args): try: apply(fn, args) except (OSError, IOError): pass class Cleanup: def __init__(_): _.cl = [] def _del(_, fn): voidioop(os.unlink, fn) def _ln(_, fsrc, fdst): voidioop(os.link, fsrc, fdst) def _mv(_, fsrc, fdst): voidioop(os.rename, fsrc, fdst) # The functions one calls to add an operation to be done. def rm(_, *args): _.cl.append(_._del, args) def ln(_, *args): _.cl.append(_._ln, args) def mv(_, *args): _.cl.append(_._mv, args) # Call an arbitrary function with arguments. def callfunc(_, func, *args): _.cl.append(func, args) # Execute our list of pending cleanups in undefined (but currently # LIFO) order. def cleanup(_): while _.cl: (op, args) = _.cl[-1] apply(op, args) # Only pop after the operation has not blown up. _.cl.pop() # Clean any and all pending cleanups def cancel(_): _.cl = [] # Given a string and a target filename, make the file's contents be # that string. def recordline(fn, str): fh = opennewfile(fn) cl = Cleanup() cl.rm(fn) try: fh.write(str) fh.close() except IOError, t: cl.cleanup() raise PubIOError, (fn, "writing line to file", t) # Copy file A to file B def copyfile(fa, fb, uid, gid, mode): co = Cleanup() # First: create the temporary. ft = tmpdstname(fb) fo = opennewfile(ft, 1) # Record that we ought to remove the temporary if we go boom co.rm(ft) try: # Now: copy input file to the temporary. fi = open(fa, "r") while 1: r = fi.read(BLOCKSIZE) if not r: break fo.write(r) fi.close(); fo.close() # Fix owner and group if uid != None or gid != None: if uid == None: uid = -1 if gid == None: gid = -1 try: os.chown(ft, uid, gid) except OSError, t: raise PubIOError, (fb, "chown on temporary file", t) # Fix permissions if mode != None: os.chmod(ft, mode) else: # Copy file a's permissions to the temporary. copystate(fa, ft) # Make the temporary the real. os.rename(ft, fb) except PubError: co.cleanup() raise except (IOError, OSError), t: co.cleanup() raise PubIOError, (fa, "during copy to "+fb, t) # Our concept is file pairs: the source and the target. # Associated with the target is a stored SHA checksum, stashed in # a file called '.cksum.' in the target's directory. class FilePair: def __init__(_, src, dst): _.src = src; _.dst = dst _.ssha = None; _.dsha = None; _.dfsha = None _.duid = None; _.dgid = None; _.dmode = None # Now, we validate this pair on the spot. # This saves us having to remember to do this next. goodfile(_.src) if qstat(_.dst): goodfile(_.dst) def setuid(_, uid): _.duid = uid def setgid(_, gid): _.dgid = gid def setmode(_, mode): _.dmode = mode # We maintain a cache of the SHA information, so we only have # to calculate it once. def _dsha(_): if _.dsha or not qstat(_.dst): return _.dsha = filesha(_.dst) def _ssha(_): if _.ssha: return _.ssha = filesha(_.src) def _dfsha(_): if _.dfsha: return ft = shaname(_.dst) if not qstat(ft): return _.dfsha = getfileline(ft) # Is the destination inconsistent: either mismatching SHAs between # the stored value and the calculated value, or missing target file # with an SHA signature file? def isdstinconsist(_): _._dfsha(); _._dsha() # If we do not have a stored SHA, we are always consistent if not _.dfsha: return None # If we have a stored SHA but not a target file to compute # the SHA of, we have a problem. if not _.dsha: return "target file has been removed" # Otherwise, we must have a matching SHA checksum. if _.dfsha != _.dsha: return "target file has been edited" return None # Do we have a valid signature file? # It suffices to load the cache with it. def validsig(_): _._dfsha() return _.dfsha # Do we need to update the destination? # To avoid a copy, all 'file attributes' must match: # SHA checksums, file size, UID/GID/MODE values if specified, etc. def needupdate(_): _._dsha(); _._ssha() # SHA mismatch? if _.dsha != _.ssha: return 1 # Double-check: file sizes should be the same. if filesize(_.src) != filesize(_.dst): return 1 # if we have set a UID/GID/MODE explicitly, the target # state there must match those settings. t0 = (_.duid, _.dgid, _.dmode) t1 = filestate(_.dst, t0) if t0 != t1: return 1 # if we have no set mode, the target state must match # the source's mode. if _.dmode == None and\ filemode(_.src) != filemode(_.dst): return 1 return 0 # Perform the actual update of the file pair. # This does the copy and then writes the new SHA hash. def copy(_): _._ssha() copyfile(_.src, _.dst, _.duid, _.dgid, _.dmode) recordline(shaname(_.dst), _.ssha + '\n') _.dfsha = None; _.dsha = None # Just write the SHA hash. Only valid if there is none. def copysig(_): _._ssha() #if _.validsig(): # return recordline(shaname(_.dst), _.ssha + '\n') _.dfsha = None; _.dsha = None # Return information about additional magic things that will # happen during the copy. def copyinfo(_): inf = [] if _.duid != None: inf.append('uid to %d' % (_.duid,)) if _.dgid != None: inf.append('gid to %d' % (_.dgid,)) if _.dmode != None: inf.append('file mode to 0%o' % (_.dmode,)) if not inf: return None # poslfit the returned information excessively. if len(inf) < 3: return string.join(inf, ' and ') else: return string.join(inf[:-1], ", ") + ", and " +inf[-1] # ---------------------------- # Structural guts of the program. # Get our pairs from a configuration file. # The configuration file is a set of filename pairs separated by whitespace, # one pair per line. Comments and blank lines are allowed. import pwd, grp def satoi(str): try: res = string.atoi(str, 0) return res except ValueError: return None def parselopts(fp, n): try: opts, args = getopt.getopt(n, "m:o:u:g:") except getopt.error, (cause): return str(cause) for o, a in opts: if o == '-m': mode = satoi(a) if mode == None: return "bad value for -m option" fp.setmode(mode) elif o in ('-o', '-u'): try: uid = satoi(a) if uid == None: pw = pwd.getpwnam(a) uid = pw[2] fp.setuid(uid) except KeyError: return "unknown username in -u option" elif o == '-g': try: gid = satoi(a) if gid == None: gr = grp.getgrnam(a) gid = gr[2] fp.setgid(gid) except KeyError: return "unknown group in -g option" else: die("Internal error: unhandled cf line option %s" % (o,)) return None def readcffile(fn): pairs = [] lnum = 0 try: fh = open(fn, "r") for l in fh.readlines(): lnum = lnum + 1 l = string.strip(l) if not l or l[0] == "#": continue n = string.split(l) if len(n) < 2: raise PubError, (fn, "unparseable control file line at line %d" % (lnum, )) fp = FilePair(n[0], n[1]) # Remaining arguments must be a set of options. # Parse them. re = parselopts(fp, n[2:]) if re: raise PubError, (fn, "parse problem in control file line %d: %s" % (lnum, re)) pairs.append(fp) fh.close() except IOError, t: raise PubIOError, (fn, "reading control file", t) return pairs # Global options state. overwrite = 0 initcksum = 0 dryrun = 0 forcecopy = 0 cffile = None quiet = 0 verbose = 0 # Our list of file pairs to operate on. fpairs = [] # What is the convenient name of our program? progname = "publish" if sys.argv: progname = string.split(sys.argv[0], '/')[-1] def eprint(string): sys.stderr.write(string + '\n') def warn(string): eprint(progname + ': ' + string) def die(string): eprint(progname + ': ' + string) sys.exit(1) def qwarn(isok, string): if not (quiet and isok): warn(string) def verb(lvl, str): if lvl <= verbose: warn(str) # And: operation! import getopt def uprint(): eprint("usage: publish [-IOF] [-vnq] [-f controlfile] [[file1 file2] ...]") def usage(): uprint() sys.exit(1) def help(): uprint() eprint("""Usage: -I, --init Publish even if some targets lack checksum files. -O, --overwrite Publish even if some targets have been edited. -F, --force Both --init and --overwrite. --forcecopy Always copy the source files to the targets, even if the target seems fully up to date. This does not imply --force (or --init, or --overwrite). -v Verbose. May be repeated for more verbosity. -n, --dry-run Do not perform any actions, just say what would have been done. -f, --file Take source/target pairs from the named file. -q, --quiet Do not warn about missing checksum files or edited target files if --init and/or --overwrite are given. May also suppress other messages. -h Usage --help You're reading it If no -f or --file argument is given, the command line arguments are the source/target pairs.""") try: opts, args = getopt.getopt(sys.argv[1:], "IOFvnqhf:", ["init", "force", "overwrite", "dry-run", "file=", "forcecopy", "quiet", "help"]) except getopt.error, (cause): warn(cause) usage() for o, a in opts: if o in ("-I", "--init"): initcksum = 1 elif o in ("-O", "--overwrite"): overwrite = 1 elif o in ("-F", "--force"): initcksum = 1 overwrite = 1 elif o == "--forcecopy": forcecopy = 1 elif o in ("-n", "--dry-run"): dryrun = 1 if verbose == 0: verbose = 1 elif o == '-v': verbose = verbose + 1 elif o in ("-f", "--file"): cffile = a elif o in ("-q", "--quiet"): quiet = 1 elif o in ('-h', '-?', '?'): uprint() sys.exit(0) elif o == '--help': help() sys.exit(0) else: die("Chris failed to properly parse option: %s" % (o,)) sys.exit(1) if len(args) != 0 and cffile: warn("Can't specify both a control file and additional arguments") usage() if (len(args) % 2) != 0: die("Uneven number of arguments: must be pairs of source and target file") # We now operate inside a MONSTROUS try statement that catches our # explosions: try: # Turn arguments into filepairs. # Read config file if any. if cffile: fpairs = readcffile(cffile) if not fpairs: verb(2, "no files specified in control file, exiting") sys.exit(0) else: if not args: # No work; goodbye. verb(2, "no work to perform, exiting") sys.exit(0) while args: src = args.pop(0); dst = args.pop(0) fpairs.append(FilePair(src, dst)) # Check for the presence of checksum files. cksumok = 1 for fp in fpairs: if not fp.validsig(): qwarn(initcksum, fp.dst +": does not have a signature file") cksumok = 0 #if not allok and not initcksum: # sys.exit(1) # Check for target consistency. allok = 1 for fp in fpairs: r = fp.isdstinconsist() if r: allok = 0 qwarn(overwrite, fp.dst + ": " + r) if (not allok and not overwrite) or (not cksumok and not initcksum): sys.exit(1) # Now we execute the operation. for fp in fpairs: if forcecopy or fp.needupdate(): verb(1, "copying %s to %s" % (fp.src, fp.dst)) ri = fp.copyinfo() if ri: verb(1, "also setting "+ri) if not dryrun: fp.copy() elif not fp.validsig(): verb(1, "establishing signature for %s" % (fp.dst,)) if not dryrun: fp.copysig() elif fp.isdstinconsist(): verb(1, "updating signature for %s (%s is identical to %s, but signature inconsistent)" % (fp.dst, fp.src, fp.dst)) if not dryrun: fp.copysig() else: verb(2, "%s is identical to %s" % (fp.src, fp.dst)) # We are now all done. except PubError, t: # Problem? Okay, we're gone gone gone. die("error: " +str(t)) sys.exit(0) # # Copyright (C) 2001 Chris Siebenmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA