linuxOS_AP05/debian/test/usr/lib/python3/dist-packages/Onboard/TextChanges.py
2025-09-26 09:40:02 +08:00

542 lines
17 KiB
Python

# -*- coding: utf-8 -*-
# Copyright © 2012-2013, 2015-2016 marmuta <marmvta@gmail.com>
#
# This file is part of Onboard.
#
# Onboard is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Onboard is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division, print_function, unicode_literals
import time
### Logging ###
import logging
_logger = logging.getLogger("TextChanges")
###############
class TextSpan:
"""
Span of text
Doctests:
>>> span = TextSpan(3, 2, "0123456789")
>>> span.get_span_text()
'34'
"""
def __init__(self, pos = 0, length = 0, text = "", text_pos = 0):
self.pos = pos # document caret position
self.length = length # span length
self.text = text # text that includes span, but may be larger
self.text_pos = text_pos # document position of text begin
self.last_modified = None
def copy(self):
return TextSpan(self.pos, self.length, self.text, self.text_pos)
def begin(self):
return self.pos
def end(self):
return self.pos + self.length
def text_begin(self):
return self.text_pos
def is_empty(self):
return self.length == 0
def contains(self, pos):
return self.pos <= pos < self.pos + self.length
def intersects(self, span):
return not self.intersection(span).is_empty()
def intersection(self, span):
p0 = max(self.pos, span.pos)
p1 = min(self.pos + self.length, span.pos + span.length)
if p0 > p1:
return TextSpan()
else:
return TextSpan(p0, p1 - p0)
def union_inplace(self, span):
"""
Join two spans, result in self.
Doctests:
- adjacent spans
>>> a = TextSpan(2, 3, "0123456789")
>>> b = TextSpan(5, 2, "0123456789")
>>> a.union_inplace(b) # doctest: +ELLIPSIS
TextSpan(2, 5, '23456', ...
>>> a.get_text()
'0123456789'
- intersecting spans
>>> a = TextSpan(2, 3, "0123456789")
>>> b = TextSpan(4, 2, "0123456789")
>>> a.union_inplace(b) # doctest: +ELLIPSIS
TextSpan(2, 4, '2345', ...
>>> a.get_text()
'0123456789'
"""
begin = min(self.begin(), span.begin())
end = max(self.end(), span.end())
length = end - begin
middle = length // 2
self.text = self.text[:middle - self.text_pos] + \
span.text[middle - span.text_pos:]
self.pos = begin
self.length = length
self.last_modified = max(self.last_modified if self.last_modified else 0,
span.last_modified if span.last_modified else 0)
return self
def get_text(self, begin = None, end = None):
""" Return the whole available text """
if begin is None and end is None:
return self.text
if begin is None:
begin = self.pos
if end is None:
end = self.end()
return self.text[begin - self.text_pos : end - self.text_pos]
def get_span_text(self):
""" Return just the span's part of the text. """
return self.get_text(self.pos, self.end())
def get_text_until_span(self):
"""
Return the beginning of the whole available text,
ending with and including the span.
Doctests:
>>> span = TextSpan(3, 2, "0123456789")
>>> span.get_text_until_span()
'01234'
"""
return self.text[:self.end() - self.text_pos]
def get_text_from_span(self):
"""
Return the end of the whole available text,
starting from and including the span.
Doctests:
>>> span = TextSpan(3, 2, "0123456789")
>>> span.get_text_from_span()
'3456789'
"""
return self.text[self.pos - self.text_pos:]
def get_text_after_span(self):
"""
Return the remaining available text after the span.
Doctests:
>>> span = TextSpan(3, 2, "0123456789")
>>> span.get_text_after_span()
'56789'
"""
return self.text[self.end() - self.text_pos:]
def get_char_before_span(self):
"""
Character right before the span.
Doctests:
>>> span = TextSpan(0, 0, "0123456789", 0)
>>> span.get_char_before_span()
''
>>> span = TextSpan(9, 1, "0123456789", 0)
>>> span.get_char_before_span()
'8'
>>> span = TextSpan(5, 2, "3456789", 3)
>>> span.get_char_before_span()
'4'
"""
pos = self.pos - self.text_pos
return self.text[pos - 1 : pos]
def get_last_char_in_span(self):
"""
Character right before the span.
Doctests:
>>> span = TextSpan(0, 0, "0123456789", 0)
>>> span.get_last_char_in_span()
''
>>> span = TextSpan(9, 1, "0123456789", 0)
>>> span.get_last_char_in_span()
'9'
>>> span = TextSpan(5, 2, "3456789", 3)
>>> span.get_last_char_in_span()
'6'
"""
pos = self.end() - self.text_pos
return self.text[pos - 1 : pos]
def _escape(self, text):
return text.replace("\n", "\\n")
def __repr__(self):
return "TextSpan({}, {}, '{}', {}, {})" \
.format(self.pos, self.length,
self._escape(self.get_span_text()),
self.text_begin(),
self.last_modified)
class TextChanges:
__doc__ = """
Collection of text spans yet to be learned.
Example:
>>> c = TextChanges()
>>> c.insert(0, 1) # IGNORE_RESULT
>>> c.get_span_ranges()
[[0, 1]]
Doctests:
# insert and extend span
>>> c = TextChanges()
>>> c.insert(0, 1) # IGNORE_RESULT
>>> c.get_span_ranges()
[[0, 1]]
>>> c.insert(0, 1) # IGNORE_RESULT
>>> c.get_span_ranges()
[[0, 2]]
# extend at beginning and end
>>> c = TextChanges()
>>> c.insert(0, 1); c.insert(1, 1); c.insert(0, 3) # IGNORE_RESULT
>>> c.get_span_ranges()
[[0, 5]]
# insert separated by at least one character -> multiple spans
>>> c = TextChanges()
>>> c.insert(1, 1); c.insert(0, 1) # IGNORE_RESULT
>>> c.get_span_ranges()
[[0, 1], [2, 1]]
# add and delete inside single span
>>> c = TextChanges()
>>> c.insert(0, 9); # IGNORE_RESULT
>>> c.delete(2, 1); # IGNORE_RESULT
>>> c.get_span_ranges()
[[0, 8]]
# join spans when deleting
>>> c = TextChanges()
>>> c.insert(0, 1); c.insert(2, 1) # IGNORE_RESULT
>>> c.delete(2, 1); # IGNORE_RESULT
>>> c.delete(1, 1); # IGNORE_RESULT
>>> c.get_span_ranges()
[[0, 1]]
# remove spans fully contained in the deleted range
>>> c = TextChanges()
>>> c.insert(2, 1); c.insert(4, 1) # IGNORE_RESULT
>>> c.delete(0, 5); # IGNORE_RESULT
>>> c.get_span_ranges()
[[0, 0]]
# partially delete span, with and without recording empty spans
# ins del res with res without
>>> tests = [ # deletion before span
... [[2, 3], [0, 5], [[0, 0]], [[0, 0]] ],
... [[3, 3], [0, 5], [[0, 1]], [[0, 1]] ],
... [[4, 3], [0, 5], [[0, 2]], [[0, 2]] ],
... [[5, 3], [0, 5], [[0, 3]], [[0, 3]] ],
... [[6, 3], [0, 5], [[0, 0], [1, 3]], [[1, 3]] ],
... # deletion after span
... [[0, 3], [4, 5], [[0, 3], [4, 0]], [[0, 3]] ],
... [[1, 3], [4, 5], [[1, 3]], [[1, 3]] ],
... [[2, 3], [4, 5], [[2, 2]], [[2, 2]] ],
... [[3, 3], [4, 5], [[3, 1]], [[3, 1]] ],
... # deletion completely inside of span
... [[4, 3], [4, 5], [[4, 0]], [[4, 0]] ],
... [[0, 9], [2, 3], [[0, 6]], [[0, 6]] ] ]
>>> for test in tests:
... c = TextChanges()
... _ = c.insert(*test[0]); _ = c.delete(test[1][0], test[1][1], True)
... if c.get_span_ranges() != test[2]:
... "test1: " + repr(test) + " result: " + repr(c.get_span_ranges())
... c = TextChanges()
... _ = c.insert(*test[0]); _ = c.delete(test[1][0], test[1][1], False)
... if c.get_span_ranges() != test[3]:
... "test2: " + repr(test) + " result: " + repr(c.get_span_ranges())
# insert excluded span, include_length=0 to always insert an empty span
# ins del result
>>> tests = [[[5, 5], [2, 3], [[2, 0], [8, 5]] ], # insert before span
... [[0, 5], [6, 3], [[0, 5], [6, 0]] ], # insert after span
... [[0, 5], [2, 3], [[0, 2], [5, 3]] ], # insert inside span
... [[0, 5], [3, 4], [[0, 3], [7, 2]] ] ] # insert at span end
>>> for test in tests:
... c = TextChanges()
... _= c.insert(*test[0]); _ = c.insert(test[1][0], test[1][1], 0)
... if c.get_span_ranges() != test[2]:
... "test: " + repr(test) + " result: " + repr(c.get_span_ranges())
""".replace('IGNORE_RESULT', 'doctest: +ELLIPSIS\n [...')
def __init__(self, spans = None):
self.clear()
if spans:
self._spans = spans
def clear(self):
self._spans = []
# some counts for book-keeping, not used by this class itself.
self.insert_count = 0
self.delete_count = 0
def is_empty(self):
return len(self._spans) == 0
def get_spans(self):
return self._spans
def remove_span(self, span):
self._spans.remove(span)
def get_change_count(self):
return self.insert_count + self.delete_count
def insert(self, pos, length, include_length = -1):
"""
Record insertion up to <include_length> characters,
counted from the start of the insertion. The remaining
inserted characters are excluded from spans. This may split
an existing span.
A small but non-zero <include_length> allows to skip over
possible whitespace at the start of the insertion and
will often result in including the very first word(s) for learning.
include_length = -1: include length
include_length = +n: include n
include_length = None: include nothing, don't record
zero length span either
"""
end = pos + length
spans_to_update = []
# shift all existing spans after position
for span in self._spans:
if span.pos > pos:
span.pos += length
spans_to_update.append(span)
if include_length == -1:
# include all of the insertion
span = self.find_span_at(pos)
if span:
span.length += length
else:
span = TextSpan(pos, length);
self._spans.append(span)
spans_to_update.append(span)
else:
# include the insertion up to include_length only
max_include = min(length, include_length or 0)
span = self.find_span_at(pos)
if span:
# cut existing span
old_length = span.length
span.length = pos - span.pos + max_include
spans_to_update.append(span)
# new span for the cut part
l = old_length - span.length
if l > 0 or \
l == 0 and include_length is None:
span2 = TextSpan(pos + length, l)
self._spans.append(span2)
spans_to_update.append(span2)
elif not include_length is None:
span = TextSpan(pos, max_include)
self._spans.append(span)
spans_to_update.append(span)
t = time.time()
for span in spans_to_update:
span.last_modified = t
if spans_to_update:
self.insert_count += 1
return spans_to_update
def delete(self, pos, length, record_empty_spans = True):
"""
Record deletion.
record_empty_spans = True: record extra zero length spans
at deletion point
record_empty_spans = False: no extra new spans, but keep existing ones
that become zero length (terminal scrolling)
"""
begin = pos
end = pos + length
spans_to_update = []
#from pudb import set_trace; set_trace()
# cut/remove existing spans
for span in list(self._spans):
if span.pos <= pos: # span begins before deletion point?
k = min(span.end() - begin, length) # intersecting length
if k >= 0:
span.length -= k
spans_to_update.append(span)
else: # span begins after deletion point
k = end - span.begin() # intersecting length
if k >= 0:
span.pos += k
span.length -= k
span.pos -= length # shift by deleted length
# remove spans fully contained in the deleted range
if span.length < 0:
self._spans.remove(span)
else:
spans_to_update.append(span)
# Add new empty span
if record_empty_spans:
span = self.find_span_excluding(pos)
if not span:
# Create empty span when deleting too, because this
# is still a change that can result in a word to learn.
span = TextSpan(pos, 0);
self._spans.append(span)
self._spans, span = self.consolidate_spans(self._spans, span)
spans_to_update.append(span)
if spans_to_update:
self.delete_count += 1
return spans_to_update
@staticmethod
def consolidate_spans(spans, tracked_span = None):
"""
join touching or intersecting text spans
Doctests:
# Join touching spans
>>> spans = [TextSpan(0, 1),
... TextSpan(2, 4),
... TextSpan(1, 1),
... TextSpan(10, 3),
... TextSpan(8, 2)]
>>> spans, _span = TextChanges.consolidate_spans(spans)
>>> TextChanges.to_span_ranges(spans)
[[0, 6], [8, 5]]
# Join overlapping spans
>>> spans = [TextSpan(2, 5),
... TextSpan(4, 10),
... TextSpan(12, 8)]
>>> spans, _span = TextChanges.consolidate_spans(spans)
>>> TextChanges.to_span_ranges(spans)
[[2, 18]]
# Join contained spans
>>> spans = [TextSpan(5, 1),
... TextSpan(2, 10),
... TextSpan(3, 4)]
>>> spans, _span = TextChanges.consolidate_spans(spans)
>>> TextChanges.to_span_ranges(spans)
[[2, 10]]
"""
spans = sorted(spans, key=lambda x: (x.begin(), x.end()))
new_spans = []
slast = None
for s in spans:
if slast and \
slast.end() >= s.begin():
slast.union_inplace(s)
if tracked_span is s:
tracked_span = slast
else:
new_spans.append(s)
slast = s
return new_spans, tracked_span
def find_span_at(self, pos):
"""
Doctests:
- find empty spans (text deleted):
>>> c = TextChanges()
>>> c.insert(0, 0) # doctest: +ELLIPSIS
[TextSpan(...
>>> c.find_span_at(0) # doctest: +ELLIPSIS
TextSpan(0, 0,...
"""
for span in self._spans:
if span.pos <= pos <= span.pos + span.length:
return span
return None
def find_span_excluding(self, pos):
"""
Doctests:
- find empty spans (text deleted):
>>> c = TextChanges()
>>> c.insert(0, 0) # doctest: +ELLIPSIS
[TextSpan(...
>>> c.find_span_excluding(0) # doctest: +ELLIPSIS
TextSpan(0, 0,...
- don't match the end
>>> c = TextChanges()
>>> c.insert(0, 1) # doctest: +ELLIPSIS
[TextSpan(...
>>> c.find_span_excluding(1) # doctest: +ELLIPSIS
"""
for span in self._spans:
if span.pos == pos or \
span.pos <= pos < span.pos + span.length:
return span
return None
def get_span_ranges(self):
return self.to_span_ranges(self._spans)
@staticmethod
def to_span_ranges(spans):
return sorted([[span.pos, span.length] for span in spans])
def __repr__(self):
return "TextChanges " + repr([str(span) for span in self._spans])