"""
#By Erik Nelson, June 12, 2006
#This is an experiment in random sentence generators.
#The data structure is a list of lists, with the substitutable thing at the
#head of each one and things it can turn into on that list.
#The next thing to do: make it a recursive grammar.
#This entails I must make the substitution loop into a function
#so I can cycle it through several times.
"""
import random
########## getvocab(filename) returns listoflistofstrings ######
"""
getvocab(filename) returns a list-of-lists-of-strings.
Rather than clutter up the code I will comment above it.
This takes a file and reads it into the list-of-list format
that wordsubfunc uses.
As each line of the file gets read in, it is appended to buildsublist.
If a blank line is read, the existing buildsublist (a list of strings) becomes an item in buildbiglist (a list of lists of strings).
Finally buildbiglist is returned when we are finished building it.
The line is cleaned up by removing the backslash-n before appending it.
This is done with the if-else statement that assigns the variable cleanline. A compacter way to do this would be welcome.(Without the conditional, the last character of the last line can get lost because the last line might not end in a backslash-n)
The final conditional append statement after the loop is needed because without it the last sublist doesn't get appended unless you end the input file with a blank line, which is a non-neat situation.
Note that it won't work unless the right path is already set.
"""
def getvocab(filename):
infile = open(filename, 'r')
buildbiglist = []
buildsublist = []
for line in infile:
if (len( line ) > 1) and (line != None) :
if line[-1] == '\n':
cleanline = line[0:-1]
else:
cleanline = line
buildsublist.append( cleanline )
elif buildsublist != []:
buildbiglist.append( buildsublist )
buildsublist = []
if buildsublist != []:
buildbiglist.append( buildsublist )
return buildbiglist
######## getvocab() function definition ends here ################
vocab = [] #an empty list to begin with
vocab.append(["#p", "\n It can be shown that > #s This is true even though the #n says > #s However, > #s", "\n In spite of it all, > #s #s <, but even a #n will #v <.", "\n In conclusion, > #s <, even though > #s ", "\n #s #s But > #s Furthermore, the #n doesn't believe that > #s","\n #s #p", "#p #p","\n All this is meaningless and #a because the #n says > #s","\n To say > #s < would be an oversimplification."])
vocab.append(["#s", "The #a #n likes to #v .", "A #n can't #v if it has a #n !", "Also, the #a #n is a #n <.","It isn't true that > #s","A #n is a #a #n that can #v .","^I have decided to become a #n even though I don't #v .", "People who #v do so at their own risk because of a #n .","^I can't #v with a #n .","A #n can't #v because of a #n .","The #n will never learn not to #v .","A #n and a #n can't #v together if one of them is more #a than the other.","^I am a #n .","The #n can #v .", 'The #n says "So, > #s <, eh?"' , '" < #s <," quoth the #n .'])
vocab.append( ["#n", "apple","banana","car","dog","eel","frog","grape","#n that isn't #a","#n that can't #v","rock","hammer", "snowball","brick","map" ,"thing","house","#n -eater"] )
vocab.append( ["#v", "amble","bark at a #n","cry","dump on a #n until it turns #a","eat a #n","fly like a #n","absquatulate","vomit","#v until the #n comes home","fail to #v","#v like a #n", "become #a","be a #n","walk","write","speak", "say a #n is like a #n","ride a #n","run","go shopping"] )
vocab.append( ["#a", "able","big","circular","dry","eerie","fluid", "un- #a","green","jumpy","fetid","squamous","raucous","wet","#n -like","flat","round", "#n -shaped","strange","enormous","overt"] )
teststring ="#p #p \n #s But > #s Furthermore, the #n doesn't believe that > #s"
"""The following four functions splice sentences by removing period, exc. mark and question mark where the workstring contains < after it, and de-capitalizing where the workstring has a > before the capital letter. """
def decapitalize(thestring):
if thestring == "":
result = ""
elif len(thestring) == 1:
result = thestring.lower()
else:
result = thestring[0].lower() + thestring[1:]
return result
def capeater(thestring):
thelist = thestring.split(">")
newlist = []
newlist.append(thelist[0])
for piece in thelist[1:]:
newlist.append( decapitalize( piece.lstrip() ) )
result = "".join( newlist )
return result
def depunct( thestring ):
if thestring == "":
result = ""
elif len(thestring) == 1:
if thestring in ".!?":
result = ""
else:
result = thestring
else:
if thestring[-1] in ".?!" :
result= thestring[:-1]
else:
result = thestring
return result
def puncteater(thestring):
thelist = thestring.split( "<" )
newlist = []
for piece in thelist[:-1]:
newlist.append( depunct( piece.rstrip() ) )
newlist.append(thelist[-1])
result = "".join(newlist)
return result
print "This program will use the vocabulary in the file that you name."
print "It will use the old vocabulary if you don't name a filename."
print "What file do you want to read from? Just hit return for default vocabulary."
filechoice = raw_input()
if filechoice !='':
vocab = getvocab(filechoice)
print teststring, "\n The above is the ORIGINAL STRING"
def replacetokens(instring):
stringbreakup = instring.split(" ")
#print stringbreakup
newword=""
buildlist = []
for i in stringbreakup:
#each i is a word in the list of source words
newword = i
#and it remains unchanged unless it's a token
for j in vocab:
#each j is a LIST in the listoflists vocab
if i == j[0]:
#so j[0] is the substitutable token
newword = random.choice(j[1:])
#so newword is a choice from list j
#if the word is changed.
buildlist.append(newword)
#print buildlist
#print "====================="
#print "and now we join the words."
outstring = " ".join(buildlist)
return outstring
evolvestring = teststring
for i in range(1,20):
teststring = replacetokens(teststring)
print "\nThe ", i, "th iteration is============\n\n",teststring
print "\n"
teststring = capeater( puncteater( teststring) )
# The above functions fix the punctuation and capitalization
# when the template calls for a sentence splice.
teststring = teststring.replace("^","")
# We use '^' to protect things like "I" from the above rule.
# Then the ^ disappears.
teststring = teststring.replace( " ." , "." )
# Eliminate spaces before periods (which are there because codetokens
# need to be separated from everything else by spaces.
teststring = teststring.replace( " -" , "-" )
teststring = teststring.replace( "- " , "-" )
teststring = teststring.replace(" ," , "," )
teststring = teststring.replace(" !" , "!")
teststring = teststring.replace(" ?", "?")
teststring = teststring.replace(' " ' , ' "')
# And similarly for hyphens, exclamation marks, question marks, commas
# The double quote call eliminates space after (lefthand) quote
# Since a lefthand quote looks the same as a righthand quote, be careful.
# Capeater will work between a punctuation mark and a (construed as right)
# quote.
print "***The final version with spliced punctuation and capitalization: **\n"
print teststring
               (
geocities.com/eriknelson2002)