Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Doc/library/http.cookiejar.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
--------------

The :mod:`http.cookiejar` module defines classes for automatic handling of HTTP
cookies. It is useful for accessing web sites that require small pieces of data
cookies. It is useful for accessing websites that require small pieces of data
-- :dfn:`cookies` -- to be set on the client machine by an HTTP response from a
web server, and then returned to the server in later HTTP requests.

Expand Down
2 changes: 1 addition & 1 deletion Doc/library/urllib.robotparser.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

This module provides a single class, :class:`RobotFileParser`, which answers
questions about whether or not a particular user agent can fetch a URL on the
web site that published the :file:`robots.txt` file. For more details on the
website that published the :file:`robots.txt` file. For more details on the
structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.


Expand Down
2 changes: 1 addition & 1 deletion Doc/tutorial/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ together with its interpreted nature, make it an ideal language for scripting
and rapid application development in many areas on most platforms.

The Python interpreter and the extensive standard library are freely available
in source or binary form for all major platforms from the Python web site,
in source or binary form for all major platforms from the Python website,
https://www.python.org/, and may be freely distributed. The same site also
contains distributions of and pointers to many free third party Python modules,
programs and tools, and additional documentation.
Expand Down
2 changes: 1 addition & 1 deletion Doc/tutorial/whatnow.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ the set are:

More Python resources:

* https://www.python.org: The major Python web site. It contains code,
* https://www.python.org: The major Python website. It contains code,
documentation, and pointers to Python-related pages around the web.

* https://docs.python.org: Fast access to Python's documentation.
Expand Down
9 changes: 7 additions & 2 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,13 @@ extern void _PyLineTable_InitAddressRange(
/** API for traversing the line number table. */
PyAPI_FUNC(int) _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
// This is used in dump_frame() in traceback.c without an attached tstate.
extern int _PyCode_Addr2LineNoTstate(PyCodeObject *co, int addr);

// Similar to PyCode_Addr2Line(), but return -1 if the code object is invalid
// and can be called without an attached tstate. Used by dump_frame() in
// Python/traceback.c. The function uses heuristics to detect freed memory,
// it's not 100% reliable.
extern int _PyCode_SafeAddr2Line(PyCodeObject *co, int addr);


/** API for executors */
extern void _PyCode_Clear_Executors(PyCodeObject *code);
Expand Down
55 changes: 55 additions & 0 deletions Include/internal/pycore_interpframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,36 @@ static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) {
return (PyCodeObject *)executable;
}

// Similar to _PyFrame_GetCode(), but return NULL if the frame is invalid or
// freed. Used by dump_frame() in Python/traceback.c. The function uses
// heuristics to detect freed memory, it's not 100% reliable.
static inline PyCodeObject*
_PyFrame_SafeGetCode(_PyInterpreterFrame *f)
{
// globals and builtins may be NULL on a legit frame, but it's unlikely.
// It's more likely that it's a sign of an invalid frame.
if (f->f_globals == NULL || f->f_builtins == NULL) {
return NULL;
}

if (PyStackRef_IsNull(f->f_executable)) {
return NULL;
}
void *ptr;
memcpy(&ptr, &f->f_executable, sizeof(f->f_executable));
if (_PyMem_IsPtrFreed(ptr)) {
return NULL;
}
PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable);
if (_PyObject_IsFreed(executable)) {
return NULL;
}
if (!PyCode_Check(executable)) {
return NULL;
}
return (PyCodeObject *)executable;
}

static inline _Py_CODEUNIT *
_PyFrame_GetBytecode(_PyInterpreterFrame *f)
{
Expand All @@ -37,6 +67,31 @@ _PyFrame_GetBytecode(_PyInterpreterFrame *f)
#endif
}

// Similar to PyUnstable_InterpreterFrame_GetLasti(), but return NULL if the
// frame is invalid or freed. Used by dump_frame() in Python/traceback.c. The
// function uses heuristics to detect freed memory, it's not 100% reliable.
static inline int
_PyFrame_SafeGetLasti(struct _PyInterpreterFrame *f)
{
// Code based on _PyFrame_GetBytecode() but replace _PyFrame_GetCode()
// with _PyFrame_SafeGetCode().
PyCodeObject *co = _PyFrame_SafeGetCode(f);
if (co == NULL) {
return -1;
}

_Py_CODEUNIT *bytecode;
#ifdef Py_GIL_DISABLED
_PyCodeArray *tlbc = _PyCode_GetTLBCArray(co);
assert(f->tlbc_index >= 0 && f->tlbc_index < tlbc->size);
bytecode = (_Py_CODEUNIT *)tlbc->entries[f->tlbc_index];
#else
bytecode = _PyCode_CODE(co);
#endif

return (int)(f->instr_ptr - bytecode) * sizeof(_Py_CODEUNIT);
}

static inline PyFunctionObject *_PyFrame_GetFunction(_PyInterpreterFrame *f) {
PyObject *func = PyStackRef_AsPyObjectBorrow(f->f_funcobj);
assert(PyFunction_Check(func));
Expand Down
10 changes: 6 additions & 4 deletions Include/internal/pycore_pymem.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,17 @@ static inline int _PyMem_IsPtrFreed(const void *ptr)
{
uintptr_t value = (uintptr_t)ptr;
#if SIZEOF_VOID_P == 8
return (value == 0
return (value <= 0xff // NULL, 0x1, 0x2, ..., 0xff
|| value == (uintptr_t)0xCDCDCDCDCDCDCDCD
|| value == (uintptr_t)0xDDDDDDDDDDDDDDDD
|| value == (uintptr_t)0xFDFDFDFDFDFDFDFD);
|| value == (uintptr_t)0xFDFDFDFDFDFDFDFD
|| value >= (uintptr_t)0xFFFFFFFFFFFFFF00); // -0xff, ..., -2, -1
#elif SIZEOF_VOID_P == 4
return (value == 0
return (value <= 0xff
|| value == (uintptr_t)0xCDCDCDCD
|| value == (uintptr_t)0xDDDDDDDD
|| value == (uintptr_t)0xFDFDFDFD);
|| value == (uintptr_t)0xFDFDFDFD
|| value >= (uintptr_t)0xFFFFFF00);
#else
# error "unknown pointer size"
#endif
Expand Down
5 changes: 4 additions & 1 deletion Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,12 @@ def __init__(self, lexicon, flags=0):
s = _parser.State()
s.flags = flags
for phrase, action in lexicon:
sub_pattern = _parser.parse(phrase, flags)
if sub_pattern.state.groups != 1:
raise ValueError("Cannot use capturing groups in re.Scanner")
gid = s.opengroup()
p.append(_parser.SubPattern(s, [
(SUBPATTERN, (gid, 0, 0, _parser.parse(phrase, flags))),
(SUBPATTERN, (gid, 0, 0, sub_pattern)),
]))
s.closegroup(gid, p[-1])
p = _parser.SubPattern(s, [(BRANCH, (None, p))])
Expand Down
18 changes: 18 additions & 0 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -1639,6 +1639,24 @@ def s_int(scanner, token): return int(token)
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
'op+', 'bar'], ''))

def test_bug_gh140797(self):
# gh140797: Capturing groups are not allowed in re.Scanner

msg = r"Cannot use capturing groups in re\.Scanner"
# Capturing group throws an error
with self.assertRaisesRegex(ValueError, msg):
Scanner([("(a)b", None)])

# Named Group
with self.assertRaisesRegex(ValueError, msg):
Scanner([("(?P<name>a)", None)])

# Non-capturing groups should pass normally
s = Scanner([("(?:a)b", lambda scanner, token: token)])
result, rem = s.scan("ab")
self.assertEqual(result,['ab'])
self.assertEqual(rem,'')

def test_bug_448951(self):
# bug 448951 (similar to 429357, but with single char match)
# (Also test greedy matches.)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`faulthandler` now detects if a frame or a code object is invalid or
freed. Patch by Victor Stinner.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The undocumented :class:`!re.Scanner` class now forbids regular expressions containing capturing groups in its lexicon patterns. Patterns using capturing groups could
previously lead to crashes with segmentation fault. Use non-capturing groups (?:...) instead.
23 changes: 20 additions & 3 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1005,8 +1005,8 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno)
* source location tracking (co_lines/co_positions)
******************/

int
_PyCode_Addr2LineNoTstate(PyCodeObject *co, int addrq)
static int
_PyCode_Addr2Line(PyCodeObject *co, int addrq)
{
if (addrq < 0) {
return co->co_firstlineno;
Expand All @@ -1020,12 +1020,29 @@ _PyCode_Addr2LineNoTstate(PyCodeObject *co, int addrq)
return _PyCode_CheckLineNumber(addrq, &bounds);
}

int
_PyCode_SafeAddr2Line(PyCodeObject *co, int addrq)
{
if (addrq < 0) {
return co->co_firstlineno;
}
if (co->_co_monitoring && co->_co_monitoring->lines) {
return _Py_Instrumentation_GetLine(co, addrq/sizeof(_Py_CODEUNIT));
}
if (!(addrq >= 0 && addrq < _PyCode_NBYTES(co))) {
return -1;
}
PyCodeAddressRange bounds;
_PyCode_InitAddressRange(co, &bounds);
return _PyCode_CheckLineNumber(addrq, &bounds);
}

int
PyCode_Addr2Line(PyCodeObject *co, int addrq)
{
int lineno;
Py_BEGIN_CRITICAL_SECTION(co);
lineno = _PyCode_Addr2LineNoTstate(co, addrq);
lineno = _PyCode_Addr2Line(co, addrq);
Py_END_CRITICAL_SECTION();
return lineno;
}
Expand Down
60 changes: 37 additions & 23 deletions Python/traceback.c
Original file line number Diff line number Diff line change
Expand Up @@ -1028,44 +1028,61 @@ _Py_DumpWideString(int fd, wchar_t *str)

/* Write a frame into the file fd: "File "xxx", line xxx in xxx".

This function is signal safe. */
This function is signal safe.

static void
Return 0 on success. Return -1 if the frame is invalid. */

static int
dump_frame(int fd, _PyInterpreterFrame *frame)
{
assert(frame->owner < FRAME_OWNED_BY_INTERPRETER);
if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
/* Ignore trampoline frame */
return 0;
}

PyCodeObject *code =_PyFrame_GetCode(frame);
PyCodeObject *code = _PyFrame_SafeGetCode(frame);
if (code == NULL) {
return -1;
}

int res = 0;
PUTS(fd, " File ");
if (code->co_filename != NULL
&& PyUnicode_Check(code->co_filename))
{
PUTS(fd, "\"");
_Py_DumpASCII(fd, code->co_filename);
PUTS(fd, "\"");
} else {
}
else {
PUTS(fd, "???");
res = -1;
}
int lasti = PyUnstable_InterpreterFrame_GetLasti(frame);
int lineno = _PyCode_Addr2LineNoTstate(code, lasti);

PUTS(fd, ", line ");
int lasti = _PyFrame_SafeGetLasti(frame);
int lineno = -1;
if (lasti >= 0) {
lineno = _PyCode_SafeAddr2Line(code, lasti);
}
if (lineno >= 0) {
_Py_DumpDecimal(fd, (size_t)lineno);
}
else {
PUTS(fd, "???");
res = -1;
}
PUTS(fd, " in ");

if (code->co_name != NULL
&& PyUnicode_Check(code->co_name)) {
PUTS(fd, " in ");
if (code->co_name != NULL && PyUnicode_Check(code->co_name)) {
_Py_DumpASCII(fd, code->co_name);
}
else {
PUTS(fd, "???");
res = -1;
}

PUTS(fd, "\n");
return res;
}

static int
Expand Down Expand Up @@ -1108,17 +1125,6 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header)

unsigned int depth = 0;
while (1) {
if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
/* Trampoline frame */
frame = frame->previous;
if (frame == NULL) {
break;
}

/* Can't have more than one shim frame in a row */
assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
}

if (MAX_FRAME_DEPTH <= depth) {
if (MAX_FRAME_DEPTH < depth) {
PUTS(fd, "plus ");
Expand All @@ -1128,7 +1134,15 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header)
break;
}

dump_frame(fd, frame);
if (_PyMem_IsPtrFreed(frame)) {
PUTS(fd, " <freed frame>\n");
break;
}
if (dump_frame(fd, frame) < 0) {
PUTS(fd, " <invalid frame>\n");
break;
}

frame = frame->previous;
if (frame == NULL) {
break;
Expand Down
Loading