#conform.py (version 0.1, partly tested only)
#smart directory copying in Python
#This solves the following problem:
#there are two directories Master and Slave, presumably on different drives.
#Slave is meant to be a copy of Master and remain so. They both contain huge files (for example bitmaps).
#From time to time Master is reorganised (files are moved) and some new files are added,
# files may even be deleted.
#Filenames (without the initial directory) are supposed unique in general
# two files should not bear the same names in different subdirectories, unless they are part of a list of exceptions.
# Exceptions are due to a common phenomenon: readme.txt files and the like might have the same name in lots of subdirectories.
# This set of exceptions can be furnished at the start of the script call.
#This script maintains the conformity of Master and Slave.
#How does it work?
# A dictionnary of files from the Master is created. The keys are file names. The values are relative directories.
# Errors can be found: duplicate files. The process stops when all duplicate files are found.
# A dictionnary of files from the Slave is created. Same structure.
# directories in master that are not in slave are created in slave
# files are moved within slave to make the slave look like the structure in master.
# new files from master are copied into slave
# old files are removed from slave.
# old directories are removed from the slave.
import os.path
import os
import stat
import sys
dbg = 1
#returns an int
def file_date(filepath):
statv = os.stat(filepath)
return statv[stat.ST_MTIME]
def copy_file(from_file, to):
ifh = open(from_file,"rb")
ofh = open(to,"wb")
while 1:
block = ifh.read(4096)
if not block:
break
ofh.write(block)
ifh.close()
ofh.close()
class path_info:
def __init__(self, name, path, exceptional_files):
self.name = name
self.root = path
self.rootlen = len(path)
self.files_dir_dict = {}#contains just one relative dir for a basename -- do we really need this?? Not absolutely!!
self.duplicates = {}#contains all relative dirs for a basename, but only if there are duplicates
self.has_duplicates = 0
self.exceptional_files = exceptional_files #a list of file basenames which can appear in lots of subdirs
def add_file(self, basename, rel_dir):
dd = self.duplicates
df = self.files_dir_dict
if df.has_key(basename):
self.has_duplicates = 1
if dd.has_key(basename):
dd[basename].append(rel_dir)
else:
dd[basename]=[df[basename],rel_dir]
df[basename]=rel_dir
else:
df[basename] = rel_dir
exceptional_files = ["readme.txt"]#just a sample list
#used as a "visit" function for os.path.walk
def dir_add_dict(arg, dirname, names):
# arg is a path_info instance
relative_dir = dirname[arg.rootlen+1:]
for basename in names:
if os.path.isfile(os.path.join(dirname,basename)):
if not basename in arg.exceptional_files:
arg.add_file(basename, relative_dir)
def build_dic_master(rootpath, exceptions):
master_info = path_info("master",rootpath,exceptions)
os.path.walk(rootpath,dir_add_dict,master_info)
duplicates = master_info.duplicates.keys()
if duplicates <> []:
print "Duplicates in master"
for basename in duplicates:
print "file:",basename
ddirs = master_info.duplicates[basename]
print "found in these directories:"
for dir in ddirs:
sys.stdout.write('"%s" '%dir)
print ""
return None
else:
return master_info
def build_dic_slave(rootpath,exceptions):
slave_info = path_info("slave",rootpath,exceptions)
os.path.walk(rootpath,dir_add_dict,slave_info)
duplicates = slave_info.duplicates.keys()
if duplicates <> []:
print "Duplicates in slave"
for basename in duplicates:
print basename
ddirs = slave_info.duplicates[basename]
for dir in ddirs:
sys.stdout.write("%s "%dir)
print ""
return None
else:
return slave_info
def create_subdirs(arg, curr_indir, entries):
(out_rootpath,in_rootpath_len) = arg
for entry in entries:
if os.path.isdir(curr_indir+"\\"+entry):
out_dir = os.path.join(out_rootpath,curr_indir[in_rootpath_len+1:],entry)
if not os.path.exists(out_dir):
os.mkdir(out_dir)
def copy_directory_structure(in_rootpath, out_rootpath):
in_rootpath_len = len(in_rootpath)
os.path.walk(in_rootpath, create_subdirs, (out_rootpath,in_rootpath_len))
def move_files(master_info,slave_info):
dict_master = master_info.files_dir_dict
dict_slave = slave_info.files_dir_dict
#first move/delete files in slave
for file in dict_slave:
rel_dir_slave = dict_slave[file]
old_file = os.path.join(slave_info.root,rel_dir_slave,file)
if dict_master.has_key(file):
rel_dir_master = dict_master[file]
new_file = os.path.join(slave_info.root,rel_dir_master,file)
os.rename(old_file,new_file)
else:
os.remove(old_file)#rather brutal, are you sure? you could create a batch of files to remove, or
#you could put them aside....
#now copy new files from master
for file in dict_master:
#if older in slave then copy over
#if not in slave then copy new todo!!
rel_dir = dict_master[file]
full_path_master = os.path.join(master_info.root, rel_dir, file)
full_path_slave = os.path.join(slave_info.root,rel_dir, file)
if not os.path.exists(full_path_slave):
copy_file(full_path_master, full_path_slave)
else:
slave_date = file_date(full_path_slave)
master_date = file_date(full_path_master)
if slave_date < master_date:
copy_file(full_path_master, full_path_slave)
def remove_excess_slave_dirs1(arg, curr_slavedir, entries):
(master_rootpath, master_rootpath_len) = arg
for entry in entries:
if os.path.isdir(entry):
master_dir = os.path.join(master_rootpath,curr_slavedir[master_rootpath_len+1:],entry)
if not os.path.exists(master_dir):
os.rmdir(os.path.join(curr_slavedir,entry))
def remove_excess_slave_dirs(master_rootpath, slave_rootpath):
os.path.walk(slave_rootpath, remove_excess_slave_dirs1,(master_rootpath, len(master_rootpath)))
#top level call
def conform(master_rootpath, slave_rootpath, exceptions):
copy_directory_structure(master_rootpath, slave_rootpath)
master_info = build_dic_master(master_rootpath, exceptions)
if not master_info:
return
slave_info = build_dic_slave(slave_rootpath, exceptions)
if not slave_info:
return
move_files(master_info, slave_info)#bug shows up here...
remove_excess_slave_dirs(master_rootpath, slave_rootpath)
#modify this
conform(r"c:\hdef\test\master",r"c:\hdef\test\slave",["readme.txt"])
print "finished processing"