# textwindow4 -
import re, string
class WordContext:
def __init__(self,re_word):
self.re_word = re_word
def get_word_indices(self,str):
pos = 0
matches = []
#re_word = "[\s,.!?;\-\"\']*(\w+\'\w+|\w+\-\w+|\w+)[\s,.!?;\-\"\']+"
re_pattern = re.compile(self.re_word,re.IGNORECASE)
match = 1
while (pos < len(str)) and match:
match = re_pattern.search(str,pos)
if match:
#print "groups: ", match.groups()
span = match.span()
#print "span: ", span
pos = span[1]
matches.append(span)
return matches
def get_left_context(self,s):
w = self.get_word_indices(s)
left_start = w[0][0]
left_end = w[3][1]
context = s[left_start:left_end]
return context
def get_right_context(self,s):
print "check1"
w = self.get_word_indices(s)
print "check2"
print "w: ", w
last = len(w) - 1
right_start = w[last-3][0]
right_end = w[last][1]
print right_start, right_end
context = s[right_start:right_end]
print context
return context
class TextWindow:
def __init__(self,pattern,re_word):
self.pattern = pattern
self.re_word = re_word
self.window = ['','','']
self.front = 0
def insert(self,s):
self.window[self.front] = string.rstrip(s) + ' '
self.front = (self.front + 1) % 3
def buffer_full(self):
return (len(self.window[0]) != 0) and \
(len(self.window[1]) != 0) and \
(len(self.window[2]) != 0)
def search(self):
front = middle = back = ''
if self.buffer_full():
match = re.search(self.pattern,self.window[1])
if match:
lines = string.rstrip(self.window[0]) + ' ' + \
string.rstrip(self.window[1]) + ' ' + \
string.rstrip(self.window[2])
match = re.search(self.pattern,lines)
span = match.span()
start = span[0]
end = span[1]
left = lines[0:start]
middle = lines[start:end]
right = lines[end:-1]
wc = WordContext(self.re_word)
front = wc.get_right_context(left)
back = wc.get_left_context(right)
return front, middle, back
#===================================================================
# main:
l1 = '"I think that you have secrets from me, Julie.--You love," he went on\n'
l2 = 'quickly, as he saw the color rise to her face. "Oh! I hoped that you\n'
l3 = 'would stay with your old father until he died. I hoped to keep you\n'
re_word = "[\s,.!?;\-\"\']*(\w+\'\w+|\w+\-\w+|\w+)[\s,.!?;\-\"\']+"
tw = TextWindow('face',re_word)
print tw.buffer_full()
tw.insert(l1)
print tw.buffer_full()
tw.insert(l2)
print tw.buffer_full()
tw.insert(l3)
print tw.buffer_full()
front, middle, back = tw.search()
print "%s | %s | %s" % (front, middle, back)
Text file Source (historic): geocities.com/soho/square/3472
geocities.com/soho/squaregeocities.com/soho
(to report bad content: archivehelp @ gmail)
|
|
|
|
|