# wordcount00 -
# iterates through all Gutenberg text files in a directory
# creates a string containing a DOS command calling a C++ program
# to count the number of words in the file, e.g.:
# C:\python20\wordcount0>wordcount0.exe adieu10.txt >adieu10count.txt
# and then executes this command from Python
#=================================================================
import re, sys, string, glob, os.path, os
def make_wordcounts(text_dir,wordlist_dir):
# get all the files in the directory
program = "c:\\python20\\wordcount0.exe"
allfiles = text_dir + '*.*'
fileset = glob.glob(allfiles)
# iterate over each file, create a dos command, execute it
for text in fileset:
pathsplit = os.path.split(text)
filename = pathsplit[1]
wordlist = wordlist_dir + filename
cmd = "%s %s >%s" % (program,text,wordlist)
print cmd
os.system(cmd)
#----------------------------------------------------------------------
# main program:
if __name__ == "__main__":
if len(sys.argv) != 3:
#print "Usage: ", sys.argv[0], "directory"
text_dir = "c:\\Balzac\\balzacbooksnofront\\"
wordcount_dir = "c:\\Balzac\\balzacbooksnofront\\wordcounts\\"
wordcounts = make_wordcounts(text_dir,wordcount_dir)
else:
text_dir = sys.argv[1]
wordcount_dir = sys.argv[2]
wordcounts = make_wordcounts(text_dir,wordcount_dir)
Text file Source (historic): geocities.com/soho/square/3472
geocities.com/soho/squaregeocities.com/soho
(to report bad content: archivehelp @ gmail)
|
|
|
|
|