# matchline2 -
# find all matches of the pattern within the string
# return a list of tuples with three items for each match: (match,start,end)
import re
def findall2(pattern,text):
# find all matches of the pattern within the string
# return a list of tuples with three items for each match: (match,start,end)
pos = 0
matches = []
re_pattern = re.compile(pattern,re.IGNORECASE)
match = 1
while (pos < len(text)) and match:
match = re_pattern.search(text,pos)
if match:
groups = match.groups()
span = match.span()
pos = span[1]
matchrec = (groups[0],span[0],span[1])
#print "matchrec: ", matchrec
matches.append(matchrec)
return matches
pattern = "(\w+\'\w+|\w+\-\w+|\w+)[\s,.!?;]"
line = "\" 'This morning,' he said, 'I had only two amounts to collect; the\n"
matches= findall2(pattern,line)
print "matches: ", matches
Text file Source (historic): geocities.com/soho/square/3472
geocities.com/soho/squaregeocities.com/soho
(to report bad content: archivehelp @ gmail)
|
|
|
|
|