aboutsummaryrefslogtreecommitdiff
path: root/pypy
diff options
context:
space:
mode:
authorCarl Friedrich Bolz-Tereick <cfbolz@gmx.de>2021-02-25 09:53:25 +0100
committerCarl Friedrich Bolz-Tereick <cfbolz@gmx.de>2021-02-25 09:53:25 +0100
commit1714320d105280624ab1b0ea3274668377b80ad4 (patch)
tree5e3c411f2c93a64de2bc2a86d142d3cdf3073ee8 /pypy
parentmore upstream syncing - move inttypes.h into pyport.h (issue 3407) (diff)
downloadpypy-1714320d105280624ab1b0ea3274668377b80ad4.tar.gz
pypy-1714320d105280624ab1b0ea3274668377b80ad4.tar.bz2
pypy-1714320d105280624ab1b0ea3274668377b80ad4.zip
copy an optimization from CPython: when the search string of str.replace and
str.split doesn't occur in the string, don't create a copy but just reuse self.
Diffstat (limited to 'pypy')
-rw-r--r--pypy/objspace/std/bytesobject.py22
-rw-r--r--pypy/objspace/std/test/test_bytesobject.py4
-rw-r--r--pypy/objspace/std/test/test_unicodeobject.py4
-rw-r--r--pypy/objspace/std/unicodeobject.py2
4 files changed, 30 insertions, 2 deletions
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
index 6315c5d6cf..2316f6e513 100644
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -690,15 +690,33 @@ class W_BytesObject(W_AbstractBytesObject):
self_as_unicode._utf8.find(w_sub._utf8) >= 0)
return self._StringMethods_descr_contains(space, w_sub)
- _StringMethods_descr_replace = descr_replace
@unwrap_spec(count=int)
def descr_replace(self, space, w_old, w_new, count=-1):
+ from rpython.rlib.rstring import replace
old_is_unicode = space.isinstance_w(w_old, space.w_unicode)
new_is_unicode = space.isinstance_w(w_new, space.w_unicode)
if old_is_unicode or new_is_unicode:
self_as_uni = unicode_from_encoded_object(space, self, None, None)
return self_as_uni.descr_replace(space, w_old, w_new, count)
- return self._StringMethods_descr_replace(space, w_old, w_new, count)
+
+ # almost copy of StringMethods.descr_replace :-(
+ input = self._value
+
+ sub = self._op_val(space, w_old)
+ by = self._op_val(space, w_new)
+ # the following two lines are for being bug-to-bug compatible
+ # with CPython: see issue #2448
+ if count >= 0 and len(input) == 0:
+ return self._empty()
+ try:
+ res = replace(input, sub, by, count)
+ except OverflowError:
+ raise oefmt(space.w_OverflowError, "replace string is too long")
+ # difference: reuse self if no replacement was done
+ if type(self) is W_BytesObject and res is input:
+ return self
+
+ return self._new(res)
_StringMethods_descr_join = descr_join
def descr_join(self, space, w_list):
diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py
index cc15f97d54..2feca7ab5e 100644
--- a/pypy/objspace/std/test/test_bytesobject.py
+++ b/pypy/objspace/std/test/test_bytesobject.py
@@ -342,6 +342,10 @@ class AppTestBytesObject:
assert 'one'.replace(buffer('o'), buffer('n'), 1) == 'nne'
assert 'one'.replace(buffer('o'), buffer('n')) == 'nne'
+ def test_replace_no_occurrence(self):
+ x = b"xyz"
+ assert x.replace(b"a", b"b") is x
+
def test_strip(self):
s = " a b "
assert s.strip() == "a b"
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
index 51faff763d..6b1c7315da 100644
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1303,3 +1303,7 @@ class AppTestUnicodeString:
def test_newlist_utf8_non_ascii(self):
'ä'.split("\n")[0] # does not crash
+
+ def test_replace_no_occurrence(self):
+ x = u"xyz"
+ assert x.replace(u"a", u"b") is x
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
index 1dcd415912..1b7a8d07b4 100644
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -880,6 +880,8 @@ class W_UnicodeObject(W_Root):
count, isutf8=True)
except OverflowError:
raise oefmt(space.w_OverflowError, "replace string is too long")
+ if type(self) is W_UnicodeObject and replacements == 0:
+ return self
newlength = self._length + replacements * (w_by._length - w_sub._length)
return W_UnicodeObject(res, newlength)