| From dbf52e02f18dac6f5f0a64f78932f3dc6efc056b Mon Sep 17 00:00:00 2001 |
| From: Benjamin Peterson <benjamin@python.org> |
| Date: Tue, 2 Jan 2018 09:25:41 -0800 |
| Subject: [PATCH] bpo-31530: fix crash when multiple threads iterate over a |
| file, round 2 (#5060) |
| |
| Multiple threads iterating over a file can corrupt the file's internal readahead |
| buffer resulting in crashes. To fix this, cache buffer state thread-locally for |
| the duration of a file_iternext call and only update the file's internal state |
| after reading completes. |
| |
| No attempt is made to define or provide "reasonable" semantics for iterating |
| over a file on multiple threads. (Non-crashing) races are still |
| present. Duplicated, corrupt, and missing data will happen. |
| |
| This was originally fixed by 6401e5671781eb217ee1afb4603cc0d1b0367ae6, which |
| raised an exception from seek() and next() when concurrent operations were |
| detected. Alas, this simpler solution breaks legitimate use cases such as |
| capturing the standard streams when multiple threads are logging. |
| |
| CVE: CVE-2018-1000030 |
| Upstream-Status: Backport [https://github.com/python/cpython/commit/dbf52e02f18dac6f5f0a64f78932f3dc6efc056b] |
| |
| Signed-off-by: Jagadeesh Krishnanjanappa <jkrishnanjanappa@mvista.com> |
| |
| --- |
| Lib/test/test_file2k.py | 27 ++--- |
| .../2017-09-20-18-28-09.bpo-31530.CdLOM7.rst | 3 - |
| Objects/fileobject.c | 118 ++++++++++++--------- |
| 3 files changed, 78 insertions(+), 70 deletions(-) |
| |
| diff --git a/Lib/test/test_file2k.py b/Lib/test/test_file2k.py |
| index d8966e034e..c73e8d8dc4 100644 |
| --- a/Lib/test/test_file2k.py |
| +++ b/Lib/test/test_file2k.py |
| @@ -653,18 +653,15 @@ class FileThreadingTests(unittest.TestCase): |
| self._test_close_open_io(io_func) |
| |
| def test_iteration_torture(self): |
| - # bpo-31530: Crash when concurrently iterate over a file. |
| + # bpo-31530 |
| with open(self.filename, "wb") as fp: |
| for i in xrange(2**20): |
| fp.write(b"0"*50 + b"\n") |
| with open(self.filename, "rb") as f: |
| - def iterate(): |
| - try: |
| - for l in f: |
| - pass |
| - except IOError: |
| + def it(): |
| + for l in f: |
| pass |
| - self._run_workers(iterate, 10) |
| + self._run_workers(it, 10) |
| |
| def test_iteration_seek(self): |
| # bpo-31530: Crash when concurrently seek and iterate over a file. |
| @@ -674,17 +671,15 @@ class FileThreadingTests(unittest.TestCase): |
| with open(self.filename, "rb") as f: |
| it = iter([1] + [0]*10) # one thread reads, others seek |
| def iterate(): |
| - try: |
| - if next(it): |
| - for l in f: |
| - pass |
| - else: |
| - for i in range(100): |
| - f.seek(i*100, 0) |
| - except IOError: |
| - pass |
| + if next(it): |
| + for l in f: |
| + pass |
| + else: |
| + for i in xrange(100): |
| + f.seek(i*100, 0) |
| self._run_workers(iterate, 10) |
| |
| + |
| @unittest.skipUnless(os.name == 'posix', 'test requires a posix system.') |
| class TestFileSignalEINTR(unittest.TestCase): |
| def _test_reading(self, data_to_write, read_and_verify_code, method_name, |
| diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-20-18-28-09.bpo-31530.CdLOM7.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-20-18-28-09.bpo-31530.CdLOM7.rst |
| index a6cb6c9e9b..beb09b5ae6 100644 |
| --- a/Misc/NEWS.d/next/Core and Builtins/2017-09-20-18-28-09.bpo-31530.CdLOM7.rst |
| +++ b/Misc/NEWS.d/next/Core and Builtins/2017-09-20-18-28-09.bpo-31530.CdLOM7.rst |
| @@ -1,4 +1 @@ |
| Fixed crashes when iterating over a file on multiple threads. |
| -seek() and next() methods of file objects now raise an exception during |
| -concurrent operation on the same file object. |
| -A lock can be used to prevent the error. |
| diff --git a/Objects/fileobject.c b/Objects/fileobject.c |
| index 8d1c5812f0..270b28264a 100644 |
| --- a/Objects/fileobject.c |
| +++ b/Objects/fileobject.c |
| @@ -609,7 +609,12 @@ err_iterbuffered(void) |
| return NULL; |
| } |
| |
| -static void drop_readahead(PyFileObject *); |
| +static void |
| +drop_file_readahead(PyFileObject *f) |
| +{ |
| + PyMem_FREE(f->f_buf); |
| + f->f_buf = NULL; |
| +} |
| |
| /* Methods */ |
| |
| @@ -632,7 +637,7 @@ file_dealloc(PyFileObject *f) |
| Py_XDECREF(f->f_mode); |
| Py_XDECREF(f->f_encoding); |
| Py_XDECREF(f->f_errors); |
| - drop_readahead(f); |
| + drop_file_readahead(f); |
| Py_TYPE(f)->tp_free((PyObject *)f); |
| } |
| |
| @@ -767,13 +772,7 @@ file_seek(PyFileObject *f, PyObject *args) |
| |
| if (f->f_fp == NULL) |
| return err_closed(); |
| - if (f->unlocked_count > 0) { |
| - PyErr_SetString(PyExc_IOError, |
| - "seek() called during concurrent " |
| - "operation on the same file object"); |
| - return NULL; |
| - } |
| - drop_readahead(f); |
| + drop_file_readahead(f); |
| whence = 0; |
| if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence)) |
| return NULL; |
| @@ -2242,12 +2241,16 @@ static PyGetSetDef file_getsetlist[] = { |
| {0}, |
| }; |
| |
| +typedef struct { |
| + char *buf, *bufptr, *bufend; |
| +} readaheadbuffer; |
| + |
| static void |
| -drop_readahead(PyFileObject *f) |
| +drop_readaheadbuffer(readaheadbuffer *rab) |
| { |
| - if (f->f_buf != NULL) { |
| - PyMem_Free(f->f_buf); |
| - f->f_buf = NULL; |
| + if (rab->buf != NULL) { |
| + PyMem_FREE(rab->buf); |
| + rab->buf = NULL; |
| } |
| } |
| |
| @@ -2255,36 +2258,34 @@ drop_readahead(PyFileObject *f) |
| (unless at EOF) and no more than bufsize. Returns negative value on |
| error, will set MemoryError if bufsize bytes cannot be allocated. */ |
| static int |
| -readahead(PyFileObject *f, Py_ssize_t bufsize) |
| +readahead(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t bufsize) |
| { |
| Py_ssize_t chunksize; |
| |
| - assert(f->unlocked_count == 0); |
| - if (f->f_buf != NULL) { |
| - if( (f->f_bufend - f->f_bufptr) >= 1) |
| + if (rab->buf != NULL) { |
| + if ((rab->bufend - rab->bufptr) >= 1) |
| return 0; |
| else |
| - drop_readahead(f); |
| + drop_readaheadbuffer(rab); |
| } |
| - if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) { |
| + if ((rab->buf = PyMem_MALLOC(bufsize)) == NULL) { |
| PyErr_NoMemory(); |
| return -1; |
| } |
| FILE_BEGIN_ALLOW_THREADS(f) |
| errno = 0; |
| - chunksize = Py_UniversalNewlineFread( |
| - f->f_buf, bufsize, f->f_fp, (PyObject *)f); |
| + chunksize = Py_UniversalNewlineFread(rab->buf, bufsize, f->f_fp, (PyObject *)f); |
| FILE_END_ALLOW_THREADS(f) |
| if (chunksize == 0) { |
| if (ferror(f->f_fp)) { |
| PyErr_SetFromErrno(PyExc_IOError); |
| clearerr(f->f_fp); |
| - drop_readahead(f); |
| + drop_readaheadbuffer(rab); |
| return -1; |
| } |
| } |
| - f->f_bufptr = f->f_buf; |
| - f->f_bufend = f->f_buf + chunksize; |
| + rab->bufptr = rab->buf; |
| + rab->bufend = rab->buf + chunksize; |
| return 0; |
| } |
| |
| @@ -2294,51 +2295,43 @@ readahead(PyFileObject *f, Py_ssize_t bufsize) |
| logarithmic buffer growth to about 50 even when reading a 1gb line. */ |
| |
| static PyStringObject * |
| -readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize) |
| +readahead_get_line_skip(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t skip, Py_ssize_t bufsize) |
| { |
| PyStringObject* s; |
| char *bufptr; |
| char *buf; |
| Py_ssize_t len; |
| |
| - if (f->unlocked_count > 0) { |
| - PyErr_SetString(PyExc_IOError, |
| - "next() called during concurrent " |
| - "operation on the same file object"); |
| - return NULL; |
| - } |
| - if (f->f_buf == NULL) |
| - if (readahead(f, bufsize) < 0) |
| + if (rab->buf == NULL) |
| + if (readahead(f, rab, bufsize) < 0) |
| return NULL; |
| |
| - len = f->f_bufend - f->f_bufptr; |
| + len = rab->bufend - rab->bufptr; |
| if (len == 0) |
| - return (PyStringObject *) |
| - PyString_FromStringAndSize(NULL, skip); |
| - bufptr = (char *)memchr(f->f_bufptr, '\n', len); |
| + return (PyStringObject *)PyString_FromStringAndSize(NULL, skip); |
| + bufptr = (char *)memchr(rab->bufptr, '\n', len); |
| if (bufptr != NULL) { |
| bufptr++; /* Count the '\n' */ |
| - len = bufptr - f->f_bufptr; |
| - s = (PyStringObject *) |
| - PyString_FromStringAndSize(NULL, skip + len); |
| + len = bufptr - rab->bufptr; |
| + s = (PyStringObject *)PyString_FromStringAndSize(NULL, skip + len); |
| if (s == NULL) |
| return NULL; |
| - memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len); |
| - f->f_bufptr = bufptr; |
| - if (bufptr == f->f_bufend) |
| - drop_readahead(f); |
| + memcpy(PyString_AS_STRING(s) + skip, rab->bufptr, len); |
| + rab->bufptr = bufptr; |
| + if (bufptr == rab->bufend) |
| + drop_readaheadbuffer(rab); |
| } else { |
| - bufptr = f->f_bufptr; |
| - buf = f->f_buf; |
| - f->f_buf = NULL; /* Force new readahead buffer */ |
| + bufptr = rab->bufptr; |
| + buf = rab->buf; |
| + rab->buf = NULL; /* Force new readahead buffer */ |
| assert(len <= PY_SSIZE_T_MAX - skip); |
| - s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2)); |
| + s = readahead_get_line_skip(f, rab, skip + len, bufsize + (bufsize>>2)); |
| if (s == NULL) { |
| - PyMem_Free(buf); |
| + PyMem_FREE(buf); |
| return NULL; |
| } |
| memcpy(PyString_AS_STRING(s) + skip, bufptr, len); |
| - PyMem_Free(buf); |
| + PyMem_FREE(buf); |
| } |
| return s; |
| } |
| @@ -2356,7 +2349,30 @@ file_iternext(PyFileObject *f) |
| if (!f->readable) |
| return err_mode("reading"); |
| |
| - l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE); |
| + { |
| + /* |
| + Multiple threads can enter this method while the GIL is released |
| + during file read and wreak havoc on the file object's readahead |
| + buffer. To avoid dealing with cross-thread coordination issues, we |
| + cache the file buffer state locally and only set it back on the file |
| + object when we're done. |
| + */ |
| + readaheadbuffer rab = {f->f_buf, f->f_bufptr, f->f_bufend}; |
| + f->f_buf = NULL; |
| + l = readahead_get_line_skip(f, &rab, 0, READAHEAD_BUFSIZE); |
| + /* |
| + Make sure the file's internal read buffer is cleared out. This will |
| + only do anything if some other thread interleaved with us during |
| + readahead. We want to drop any changeling buffer, so we don't leak |
| + memory. We may lose data, but that's what you get for reading the same |
| + file object in multiple threads. |
| + */ |
| + drop_file_readahead(f); |
| + f->f_buf = rab.buf; |
| + f->f_bufptr = rab.bufptr; |
| + f->f_bufend = rab.bufend; |
| + } |
| + |
| if (l == NULL || PyString_GET_SIZE(l) == 0) { |
| Py_XDECREF(l); |
| return NULL; |
| -- |
| 2.13.3 |
| |