# getrefs1 -
# get all references to a name in the files of a directory and write
# each file line with a reference out to a file.
# given a directory and a regular expression representing
# all the different forms of a name to search for,
# iterate over the directory opening each file then
# reading it line by line trying to match the regex on the line,
# if it matches then write the match out to the references file
# which includes the file name, the line number within the file,
# the character offset of the regex match within the line,
# and the contents of the whole line.
#brcrc10.txt(1343,32):line
import glob, re, string, sys, os.path
#----------------------------------------------------------------------
def get_references_infile(dir,file,name,matches):
# open file, read through all the lines, add matches to list
#print "file: ", file
filename = dir + file
infile = open(filename,'r')
lineno = 0
re_name = re.compile("(%s)" % (name),re.IGNORECASE)
for line in infile.readlines():
lineno = lineno + 1
match = re.search(re_name,line)
if match:
span = match.span(0)
charno = span[0] + 1
line = string.rstrip(line)
match_record = "%s(%s,%s):%s" % (filename,lineno,charno,line)
#print match_record
matches.append(match_record)
infile.close()
def get_references_indirectory(dir,name):
matches = []
# get all the files in the directory
allfiles = dir + '*.*'
fileset = glob.glob(allfiles)
# iterate over each file
for path in fileset:
pathsplit = os.path.split(path)
file = pathsplit[1]
get_references_infile(dir,file,name,matches)
return matches
#----------------------------------------------------------------------
# main program:
if __name__ == "__main__":
if len(sys.argv) != 3:
#print "Usage: ", sys.argv[0], "directory name_regex"
dir = "c:\\Balzac\\balzacbooksnofront\\"
name = "Bianchon"
print "----------Find '%s' in files-----" % (name)
matches = get_references_indirectory(dir,name)
for match in matches:
print match
print "%s occurences have been found" % (len(matches))
else:
dir = sys.argv[1]
name = sys.argv[2]
matches = get_references_indirectory(dir,name)
Text file Source (historic): geocities.com/soho/square/3472
geocities.com/soho/squaregeocities.com/soho
(to report bad content: archivehelp @ gmail)
|
|
|
|
|