# stoplist -
# loads a stop list of words into a dictionary,
# creates a test dictionary with a random selection of the stoplist words,
# and then applies the stop list to the test dictionary,
# i.e. eliminates all the words in the stoplist from the test dictionary
import string, random
def load_stoplist(filename):
infile = open(filename,'r')
stoplist = {}
for line in infile.readlines():
word = string.strip(line)
stoplist[word] = 1
infile.close()
return stoplist
def apply_stoplist(stoplist,dict):
todelete = []
for key in dict.keys():
if stoplist.has_key(key):
todelete.append(key)
for key in todelete:
del dict[key]
def make_random_selection(dict):
keys = dict.keys()
max = len(keys) - 1
selections = []
for i in range(1,100):
selection = random.randrange(0,max,1)
selections.append(keys[selection])
return selections
def list_to_dict(list):
dict = {}
for item in list:
dict[item] = 1
return dict
filename = 'stoplist3.txt'
stoplist = load_stoplist(filename)
# print stoplist
words = make_random_selection(stoplist)
words_dict = list_to_dict(words)
print "words_dict: ", words_dict
apply_stoplist(stoplist,words_dict)
print "w/ stop words eliminated: ", words_dict
               (
geocities.com/soho/square)                   (
geocities.com/soho)