From ea3cd0498c443e93be441736c804258e93d21edd Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 25 Jan 2024 06:10:51 -0500 Subject: [PATCH 001/127] gh-114312: Collect stats for unlikely events (GH-114493) --- Include/cpython/pystats.h | 14 ++++++ Include/internal/pycore_code.h | 2 + Include/internal/pycore_interp.h | 29 ++++++++++++ Lib/test/test_optimizer.py | 75 ++++++++++++++++++++++++++++++++ Modules/_testinternalcapi.c | 16 +++++++ Objects/funcobject.c | 9 ++++ Objects/typeobject.c | 3 ++ Python/pylifecycle.c | 18 ++++++++ Python/pystate.c | 1 + Python/specialize.c | 11 +++++ Tools/scripts/summarize_stats.py | 22 +++++++++- 11 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 Lib/test/test_optimizer.py diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index ba67eefef3e37a..bf0cfe4cb695b4 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -122,11 +122,25 @@ typedef struct _optimization_stats { uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; } OptimizationStats; +typedef struct _rare_event_stats { + /* Setting an object's class, obj.__class__ = ... */ + uint64_t set_class; + /* Setting the bases of a class, cls.__bases__ = ... */ + uint64_t set_bases; + /* Setting the PEP 523 frame eval function, _PyInterpreterState_SetFrameEvalFunc() */ + uint64_t set_eval_frame_func; + /* Modifying the builtins, __builtins__.__dict__[var] = ... */ + uint64_t builtin_dict; + /* Modifying a function, e.g. func.__defaults__ = ..., etc. */ + uint64_t func_modification; +} RareEventStats; + typedef struct _stats { OpcodeStats opcode_stats[256]; CallStats call_stats; ObjectStats object_stats; OptimizationStats optimization_stats; + RareEventStats rare_event_stats; GCStats *gc_stats; } PyStats; diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 73df6c3568ffe0..fdd5918228455d 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -295,6 +295,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co); _Py_stats->optimization_stats.name[bucket]++; \ } \ } while (0) +#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0) // Export for '_opcode' shared extension PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); @@ -313,6 +314,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define UOP_STAT_INC(opname, name) ((void)0) #define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) #define OPT_HIST(length, name) ((void)0) +#define RARE_EVENT_STAT_INC(name) ((void)0) #endif // !Py_STATS // Utility functions for reading/writing 32/64-bit values in the inline caches. diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index f953b8426e180a..662a18d93f329d 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -60,6 +60,21 @@ struct _stoptheworld_state { /* cross-interpreter data registry */ +/* Tracks some rare events per-interpreter, used by the optimizer to turn on/off + specific optimizations. */ +typedef struct _rare_events { + /* Setting an object's class, obj.__class__ = ... */ + uint8_t set_class; + /* Setting the bases of a class, cls.__bases__ = ... */ + uint8_t set_bases; + /* Setting the PEP 523 frame eval function, _PyInterpreterState_SetFrameEvalFunc() */ + uint8_t set_eval_frame_func; + /* Modifying the builtins, __builtins__.__dict__[var] = ... */ + uint8_t builtin_dict; + int builtins_dict_watcher_id; + /* Modifying a function, e.g. func.__defaults__ = ..., etc. */ + uint8_t func_modification; +} _rare_events; /* interpreter state */ @@ -217,6 +232,7 @@ struct _is { uint16_t optimizer_resume_threshold; uint16_t optimizer_backedge_threshold; uint32_t next_func_version; + _rare_events rare_events; _Py_GlobalMonitors monitors; bool sys_profile_initialized; @@ -347,6 +363,19 @@ PyAPI_FUNC(PyStatus) _PyInterpreterState_New( PyInterpreterState **pinterp); +#define RARE_EVENT_INTERP_INC(interp, name) \ + do { \ + /* saturating add */ \ + if (interp->rare_events.name < UINT8_MAX) interp->rare_events.name++; \ + RARE_EVENT_STAT_INC(name); \ + } while (0); \ + +#define RARE_EVENT_INC(name) \ + do { \ + PyInterpreterState *interp = PyInterpreterState_Get(); \ + RARE_EVENT_INTERP_INC(interp, name); \ + } while (0); \ + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_optimizer.py b/Lib/test/test_optimizer.py new file mode 100644 index 00000000000000..b56bf3cfd9560e --- /dev/null +++ b/Lib/test/test_optimizer.py @@ -0,0 +1,75 @@ +import _testinternalcapi +import unittest +import types + + +class TestRareEventCounters(unittest.TestCase): + def test_set_class(self): + class A: + pass + class B: + pass + a = A() + + orig_counter = _testinternalcapi.get_rare_event_counters()["set_class"] + a.__class__ = B + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["set_class"] + ) + + def test_set_bases(self): + class A: + pass + class B: + pass + class C(B): + pass + + orig_counter = _testinternalcapi.get_rare_event_counters()["set_bases"] + C.__bases__ = (A,) + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["set_bases"] + ) + + def test_set_eval_frame_func(self): + orig_counter = _testinternalcapi.get_rare_event_counters()["set_eval_frame_func"] + _testinternalcapi.set_eval_frame_record([]) + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["set_eval_frame_func"] + ) + _testinternalcapi.set_eval_frame_default() + + def test_builtin_dict(self): + orig_counter = _testinternalcapi.get_rare_event_counters()["builtin_dict"] + if isinstance(__builtins__, types.ModuleType): + builtins = __builtins__.__dict__ + else: + builtins = __builtins__ + builtins["FOO"] = 42 + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["builtin_dict"] + ) + del builtins["FOO"] + + def test_func_modification(self): + def func(x=0): + pass + + for attribute in ( + "__code__", + "__defaults__", + "__kwdefaults__" + ): + orig_counter = _testinternalcapi.get_rare_event_counters()["func_modification"] + setattr(func, attribute, getattr(func, attribute)) + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["func_modification"] + ) + +if __name__ == "__main__": + unittest.main() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 7d277df164d3ec..2c32c691afa583 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1635,6 +1635,21 @@ get_type_module_name(PyObject *self, PyObject *type) return _PyType_GetModuleName((PyTypeObject *)type); } +static PyObject * +get_rare_event_counters(PyObject *self, PyObject *type) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + + return Py_BuildValue( + "{sksksksksk}", + "set_class", interp->rare_events.set_class, + "set_bases", interp->rare_events.set_bases, + "set_eval_frame_func", interp->rare_events.set_eval_frame_func, + "builtin_dict", interp->rare_events.builtin_dict, + "func_modification", interp->rare_events.func_modification + ); +} + #ifdef Py_GIL_DISABLED static PyObject * @@ -1711,6 +1726,7 @@ static PyMethodDef module_functions[] = { {"restore_crossinterp_data", restore_crossinterp_data, METH_VARARGS}, _TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF {"get_type_module_name", get_type_module_name, METH_O}, + {"get_rare_event_counters", get_rare_event_counters, METH_NOARGS}, #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 2620dc69bfd79b..08b2823d8cf024 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -53,6 +53,15 @@ handle_func_event(PyFunction_WatchEvent event, PyFunctionObject *func, if (interp->active_func_watchers) { notify_func_watchers(interp, event, func, new_value); } + switch (event) { + case PyFunction_EVENT_MODIFY_CODE: + case PyFunction_EVENT_MODIFY_DEFAULTS: + case PyFunction_EVENT_MODIFY_KWDEFAULTS: + RARE_EVENT_INTERP_INC(interp, func_modification); + break; + default: + break; + } } int diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 3a35a5b5975898..a8c3b8896d36eb 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1371,6 +1371,7 @@ type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) res = 0; } + RARE_EVENT_INC(set_bases); Py_DECREF(old_bases); Py_DECREF(old_base); @@ -5842,6 +5843,8 @@ object_set_class(PyObject *self, PyObject *value, void *closure) Py_SET_TYPE(self, newto); if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) Py_DECREF(oldto); + + RARE_EVENT_INC(set_class); return 0; } else { diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0d5eec06e9b458..261622adc4cc77 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -605,6 +605,12 @@ init_interp_create_gil(PyThreadState *tstate, int gil) _PyEval_InitGIL(tstate, own_gil); } +static int +builtins_dict_watcher(PyDict_WatchEvent event, PyObject *dict, PyObject *key, PyObject *new_value) +{ + RARE_EVENT_INC(builtin_dict); + return 0; +} static PyStatus pycore_create_interpreter(_PyRuntimeState *runtime, @@ -1266,6 +1272,14 @@ init_interp_main(PyThreadState *tstate) } } + if ((interp->rare_events.builtins_dict_watcher_id = PyDict_AddWatcher(&builtins_dict_watcher)) == -1) { + return _PyStatus_ERR("failed to add builtin dict watcher"); + } + + if (PyDict_Watch(interp->rare_events.builtins_dict_watcher_id, interp->builtins) != 0) { + return _PyStatus_ERR("failed to set builtin dict watcher"); + } + assert(!_PyErr_Occurred(tstate)); return _PyStatus_OK(); @@ -1592,6 +1606,10 @@ static void finalize_modules(PyThreadState *tstate) { PyInterpreterState *interp = tstate->interp; + + // Stop collecting stats on __builtin__ modifications during teardown + PyDict_Unwatch(interp->rare_events.builtins_dict_watcher_id, interp->builtins); + PyObject *modules = _PyImport_GetModules(interp); if (modules == NULL) { // Already done diff --git a/Python/pystate.c b/Python/pystate.c index 548c77b7dc7ebb..c9b521351444a7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2616,6 +2616,7 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, if (eval_frame != NULL) { _Py_Executors_InvalidateAll(interp); } + RARE_EVENT_INC(set_eval_frame_func); interp->eval_frame = eval_frame; } diff --git a/Python/specialize.c b/Python/specialize.c index 13e0440dd9dd0d..a9efbe0453b94e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -267,6 +267,16 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) } } +static void +print_rare_event_stats(FILE *out, RareEventStats *stats) +{ + fprintf(out, "Rare event (set_class): %" PRIu64 "\n", stats->set_class); + fprintf(out, "Rare event (set_bases): %" PRIu64 "\n", stats->set_bases); + fprintf(out, "Rare event (set_eval_frame_func): %" PRIu64 "\n", stats->set_eval_frame_func); + fprintf(out, "Rare event (builtin_dict): %" PRIu64 "\n", stats->builtin_dict); + fprintf(out, "Rare event (func_modification): %" PRIu64 "\n", stats->func_modification); +} + static void print_stats(FILE *out, PyStats *stats) { @@ -275,6 +285,7 @@ print_stats(FILE *out, PyStats *stats) print_object_stats(out, &stats->object_stats); print_gc_stats(out, stats->gc_stats); print_optimization_stats(out, &stats->optimization_stats); + print_rare_event_stats(out, &stats->rare_event_stats); } void diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 1e9dc07bae8981..9b7e7b999ea7c7 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -412,6 +412,14 @@ def get_histogram(self, prefix: str) -> list[tuple[int, int]]: rows.sort() return rows + def get_rare_events(self) -> list[tuple[str, int]]: + prefix = "Rare event " + return [ + (key[len(prefix) + 1:-1], val) + for key, val in self._data.items() + if key.startswith(prefix) + ] + class Count(int): def markdown(self) -> str: @@ -1064,6 +1072,17 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None) ) +def rare_event_section() -> Section: + def calc_rare_event_table(stats: Stats) -> Table: + return [(x, Count(y)) for x, y in stats.get_rare_events()] + + return Section( + "Rare events", + "Counts of rare/unlikely events", + [Table(("Event", "Count:"), calc_rare_event_table, JoinMode.CHANGE)], + ) + + def meta_stats_section() -> Section: def calc_rows(stats: Stats) -> Rows: return [("Number of data files", Count(stats.get("__nfiles__")))] @@ -1085,6 +1104,7 @@ def calc_rows(stats: Stats) -> Rows: object_stats_section(), gc_stats_section(), optimization_section(), + rare_event_section(), meta_stats_section(), ] @@ -1162,7 +1182,7 @@ def output_stats(inputs: list[Path], json_output=str | None): case 1: data = load_raw_data(Path(inputs[0])) if json_output is not None: - with open(json_output, 'w', encoding='utf-8') as f: + with open(json_output, "w", encoding="utf-8") as f: save_raw_data(data, f) # type: ignore stats = Stats(data) output_markdown(sys.stdout, LAYOUT, stats) From 0315941441f1e5f944a758c67eb1763b00e6e462 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 25 Jan 2024 12:54:19 +0000 Subject: [PATCH 002/127] gh-114265: remove i_loc_propagated, jump threading does not consider line numbers anymore (#114535) --- Lib/test/test_peepholer.py | 3 +- Python/flowgraph.c | 88 ++++++++++++++++++++------------------ 2 files changed, 49 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 76a6f25c34bbd3..2ea186c85c8823 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -1150,10 +1150,11 @@ def get_insts(lno1, lno2, op1, op2): lno1, lno2 = (4, 5) with self.subTest(lno = (lno1, lno2), ops = (op1, op2)): insts = get_insts(lno1, lno2, op1, op2) + op = 'JUMP' if 'JUMP' in (op1, op2) else 'JUMP_NO_INTERRUPT' expected_insts = [ ('LOAD_NAME', 0, 10), ('NOP', 0, 4), - (op2, 0, 5), + (op, 0, 5), ] self.cfg_optimization_test(insts, expected_insts, consts=list(range(5))) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 2fc90b8877b475..de831358eb9ac8 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -29,7 +29,6 @@ typedef struct _PyCfgInstruction { int i_opcode; int i_oparg; _PyCompilerSrcLocation i_loc; - unsigned i_loc_propagated : 1; /* location was set by propagate_line_numbers */ struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */ struct _PyCfgBasicblock *i_except; /* target block when exception is raised */ } cfg_instr; @@ -146,6 +145,16 @@ basicblock_next_instr(basicblock *b) return b->b_iused++; } +static cfg_instr * +basicblock_last_instr(const basicblock *b) { + assert(b->b_iused >= 0); + if (b->b_iused > 0) { + assert(b->b_instr != NULL); + return &b->b_instr[b->b_iused - 1]; + } + return NULL; +} + /* Allocate a new block and return a pointer to it. Returns NULL on error. */ @@ -186,6 +195,22 @@ basicblock_addop(basicblock *b, int opcode, int oparg, location loc) return SUCCESS; } +static int +basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc) +{ + cfg_instr *last = basicblock_last_instr(b); + if (last && is_jump(last)) { + return ERROR; + } + + RETURN_IF_ERROR( + basicblock_addop(b, opcode, target->b_label.id, loc)); + last = basicblock_last_instr(b); + assert(last && last->i_opcode == opcode); + last->i_target = target; + return SUCCESS; +} + static inline int basicblock_append_instructions(basicblock *target, basicblock *source) { @@ -199,16 +224,6 @@ basicblock_append_instructions(basicblock *target, basicblock *source) return SUCCESS; } -static cfg_instr * -basicblock_last_instr(const basicblock *b) { - assert(b->b_iused >= 0); - if (b->b_iused > 0) { - assert(b->b_instr != NULL); - return &b->b_instr[b->b_iused - 1]; - } - return NULL; -} - static inline int basicblock_nofallthrough(const basicblock *b) { cfg_instr *last = basicblock_last_instr(b); @@ -560,8 +575,8 @@ normalize_jumps_in_block(cfg_builder *g, basicblock *b) { if (backwards_jump == NULL) { return ERROR; } - basicblock_addop(backwards_jump, JUMP, target->b_label.id, last->i_loc); - backwards_jump->b_instr[0].i_target = target; + RETURN_IF_ERROR( + basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc)); last->i_opcode = reversed_opcode; last->i_target = b->b_next; @@ -1141,13 +1156,7 @@ remove_redundant_jumps(cfg_builder *g) { basicblock *next = next_nonempty_block(b->b_next); if (jump_target == next) { changes++; - if (last->i_loc_propagated) { - b->b_iused--; - } - else { - assert(last->i_loc.lineno != -1); - INSTR_SET_OP0(last, NOP); - } + INSTR_SET_OP0(last, NOP); } } } @@ -1184,23 +1193,23 @@ inline_small_exit_blocks(basicblock *bb) { // target->i_target using the provided opcode. Return whether or not the // optimization was successful. static bool -jump_thread(cfg_instr *inst, cfg_instr *target, int opcode) +jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode) { assert(is_jump(inst)); assert(is_jump(target)); + assert(inst == basicblock_last_instr(bb)); // bpo-45773: If inst->i_target == target->i_target, then nothing actually // changes (and we fall into an infinite loop): - if (inst->i_loc.lineno == -1) assert(inst->i_loc_propagated); - if (target->i_loc.lineno == -1) assert(target->i_loc_propagated); - if ((inst->i_loc.lineno == target->i_loc.lineno || - inst->i_loc_propagated || target->i_loc_propagated) && - inst->i_target != target->i_target) - { - inst->i_target = target->i_target; - inst->i_opcode = opcode; - if (inst->i_loc_propagated && !target->i_loc_propagated) { - inst->i_loc = target->i_loc; - } + if (inst->i_target != target->i_target) { + /* Change inst to NOP and append a jump to target->i_target. The + * NOP will be removed later if it's not needed for the lineno. + */ + INSTR_SET_OP0(inst, NOP); + + RETURN_IF_ERROR( + basicblock_add_jump( + bb, opcode, target->i_target, target->i_loc)); + return true; } return false; @@ -1673,29 +1682,29 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) case POP_JUMP_IF_NONE: switch (target->i_opcode) { case JUMP: - i -= jump_thread(inst, target, inst->i_opcode); + i -= jump_thread(bb, inst, target, inst->i_opcode); } break; case POP_JUMP_IF_FALSE: switch (target->i_opcode) { case JUMP: - i -= jump_thread(inst, target, POP_JUMP_IF_FALSE); + i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE); } break; case POP_JUMP_IF_TRUE: switch (target->i_opcode) { case JUMP: - i -= jump_thread(inst, target, POP_JUMP_IF_TRUE); + i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE); } break; case JUMP: case JUMP_NO_INTERRUPT: switch (target->i_opcode) { case JUMP: - i -= jump_thread(inst, target, JUMP); + i -= jump_thread(bb, inst, target, JUMP); continue; case JUMP_NO_INTERRUPT: - i -= jump_thread(inst, target, opcode); + i -= jump_thread(bb, inst, target, opcode); continue; } break; @@ -1707,7 +1716,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) * of FOR_ITER. */ /* - i -= jump_thread(inst, target, FOR_ITER); + i -= jump_thread(bb, inst, target, FOR_ITER); */ } break; @@ -2410,7 +2419,6 @@ propagate_line_numbers(basicblock *entryblock) { for (int i = 0; i < b->b_iused; i++) { if (b->b_instr[i].i_loc.lineno < 0) { b->b_instr[i].i_loc = prev_location; - b->b_instr[i].i_loc_propagated = 1; } else { prev_location = b->b_instr[i].i_loc; @@ -2420,7 +2428,6 @@ propagate_line_numbers(basicblock *entryblock) { if (b->b_next->b_iused > 0) { if (b->b_next->b_instr[0].i_loc.lineno < 0) { b->b_next->b_instr[0].i_loc = prev_location; - b->b_next->b_instr[0].i_loc_propagated = 1; } } } @@ -2429,7 +2436,6 @@ propagate_line_numbers(basicblock *entryblock) { if (target->b_predecessors == 1) { if (target->b_instr[0].i_loc.lineno < 0) { target->b_instr[0].i_loc = prev_location; - target->b_instr[0].i_loc_propagated = 1; } } } From e721adf4bd47b20ba0a93ad6471084de31bf20c7 Mon Sep 17 00:00:00 2001 From: AN Long Date: Thu, 25 Jan 2024 22:35:05 +0800 Subject: [PATCH 003/127] gh-77465: Increase test coverage for the numbers module (GH-111738) Co-authored-by: Serhiy Storchaka --- Lib/test/test_abstract_numbers.py | 158 +++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_abstract_numbers.py b/Lib/test/test_abstract_numbers.py index 2e06f0d16fdd05..72232b670cdb89 100644 --- a/Lib/test/test_abstract_numbers.py +++ b/Lib/test/test_abstract_numbers.py @@ -1,14 +1,34 @@ """Unit tests for numbers.py.""" +import abc import math import operator import unittest -from numbers import Complex, Real, Rational, Integral +from numbers import Complex, Real, Rational, Integral, Number + + +def concretize(cls): + def not_implemented(*args, **kwargs): + raise NotImplementedError() + + for name in dir(cls): + try: + value = getattr(cls, name) + if value.__isabstractmethod__: + setattr(cls, name, not_implemented) + except AttributeError: + pass + abc.update_abstractmethods(cls) + return cls + class TestNumbers(unittest.TestCase): def test_int(self): self.assertTrue(issubclass(int, Integral)) + self.assertTrue(issubclass(int, Rational)) + self.assertTrue(issubclass(int, Real)) self.assertTrue(issubclass(int, Complex)) + self.assertTrue(issubclass(int, Number)) self.assertEqual(7, int(7).real) self.assertEqual(0, int(7).imag) @@ -18,8 +38,11 @@ def test_int(self): self.assertEqual(1, int(7).denominator) def test_float(self): + self.assertFalse(issubclass(float, Integral)) self.assertFalse(issubclass(float, Rational)) self.assertTrue(issubclass(float, Real)) + self.assertTrue(issubclass(float, Complex)) + self.assertTrue(issubclass(float, Number)) self.assertEqual(7.3, float(7.3).real) self.assertEqual(0, float(7.3).imag) @@ -27,8 +50,11 @@ def test_float(self): self.assertEqual(-7.3, float(-7.3).conjugate()) def test_complex(self): + self.assertFalse(issubclass(complex, Integral)) + self.assertFalse(issubclass(complex, Rational)) self.assertFalse(issubclass(complex, Real)) self.assertTrue(issubclass(complex, Complex)) + self.assertTrue(issubclass(complex, Number)) c1, c2 = complex(3, 2), complex(4,1) # XXX: This is not ideal, but see the comment in math_trunc(). @@ -40,5 +66,135 @@ def test_complex(self): self.assertRaises(TypeError, int, c1) +class TestNumbersDefaultMethods(unittest.TestCase): + def test_complex(self): + @concretize + class MyComplex(Complex): + def __init__(self, real, imag): + self.r = real + self.i = imag + + @property + def real(self): + return self.r + + @property + def imag(self): + return self.i + + def __add__(self, other): + if isinstance(other, Complex): + return MyComplex(self.imag + other.imag, + self.real + other.real) + raise NotImplementedError + + def __neg__(self): + return MyComplex(-self.real, -self.imag) + + def __eq__(self, other): + if isinstance(other, Complex): + return self.imag == other.imag and self.real == other.real + if isinstance(other, Number): + return self.imag == 0 and self.real == other.real + + # test __bool__ + self.assertTrue(bool(MyComplex(1, 1))) + self.assertTrue(bool(MyComplex(0, 1))) + self.assertTrue(bool(MyComplex(1, 0))) + self.assertFalse(bool(MyComplex(0, 0))) + + # test __sub__ + self.assertEqual(MyComplex(2, 3) - complex(1, 2), MyComplex(1, 1)) + + # test __rsub__ + self.assertEqual(complex(2, 3) - MyComplex(1, 2), MyComplex(1, 1)) + + def test_real(self): + @concretize + class MyReal(Real): + def __init__(self, n): + self.n = n + + def __pos__(self): + return self.n + + def __float__(self): + return float(self.n) + + def __floordiv__(self, other): + return self.n // other + + def __rfloordiv__(self, other): + return other // self.n + + def __mod__(self, other): + return self.n % other + + def __rmod__(self, other): + return other % self.n + + # test __divmod__ + self.assertEqual(divmod(MyReal(3), 2), (1, 1)) + + # test __rdivmod__ + self.assertEqual(divmod(3, MyReal(2)), (1, 1)) + + # test __complex__ + self.assertEqual(complex(MyReal(1)), 1+0j) + + # test real + self.assertEqual(MyReal(3).real, 3) + + # test imag + self.assertEqual(MyReal(3).imag, 0) + + # test conjugate + self.assertEqual(MyReal(123).conjugate(), 123) + + + def test_rational(self): + @concretize + class MyRational(Rational): + def __init__(self, numerator, denominator): + self.n = numerator + self.d = denominator + + @property + def numerator(self): + return self.n + + @property + def denominator(self): + return self.d + + # test__float__ + self.assertEqual(float(MyRational(5, 2)), 2.5) + + + def test_integral(self): + @concretize + class MyIntegral(Integral): + def __init__(self, n): + self.n = n + + def __pos__(self): + return self.n + + def __int__(self): + return self.n + + # test __index__ + self.assertEqual(operator.index(MyIntegral(123)), 123) + + # test __float__ + self.assertEqual(float(MyIntegral(123)), 123.0) + + # test numerator + self.assertEqual(MyIntegral(123).numerator, 123) + + # test denominator + self.assertEqual(MyIntegral(123).denominator, 1) + + if __name__ == "__main__": unittest.main() From 07ef63fb6a0fb996d5f56c79f4ccd7a1887a6b2b Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Thu, 25 Jan 2024 09:38:43 -0500 Subject: [PATCH 004/127] Doc/library/sys.monitoring.rst: remove contradictory paragraph. (GH-113619) --- Doc/library/sys.monitoring.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Doc/library/sys.monitoring.rst b/Doc/library/sys.monitoring.rst index 762581b7eda7f1..4980227c60b21e 100644 --- a/Doc/library/sys.monitoring.rst +++ b/Doc/library/sys.monitoring.rst @@ -75,9 +75,6 @@ following IDs are pre-defined to make co-operation of tools easier:: sys.monitoring.PROFILER_ID = 2 sys.monitoring.OPTIMIZER_ID = 5 -There is no obligation to set an ID, nor is there anything preventing a tool -from using an ID even it is already in use. -However, tools are encouraged to use a unique ID and respect other tools. Events ------ From 8278fa2f5625b41be91191d18ee8eeab904a54ff Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 25 Jan 2024 08:48:50 -0800 Subject: [PATCH 005/127] gh-111051: Check if file is modifed during debugging in `pdb` (#111052) --- Lib/pdb.py | 21 +++++ Lib/test/test_pdb.py | 81 +++++++++++++++++++ ...-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst | 1 + 3 files changed, 103 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index 68f810620f8826..6f7719eb9ba6c5 100755 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -233,6 +233,8 @@ class Pdb(bdb.Bdb, cmd.Cmd): # but in case there are recursions, we stop at 999. MAX_CHAINED_EXCEPTION_DEPTH = 999 + _file_mtime_table = {} + def __init__(self, completekey='tab', stdin=None, stdout=None, skip=None, nosigint=False, readrc=True): bdb.Bdb.__init__(self, skip=skip) @@ -437,6 +439,20 @@ def _cmdloop(self): except KeyboardInterrupt: self.message('--KeyboardInterrupt--') + def _validate_file_mtime(self): + """Check if the source file of the current frame has been modified since + the last time we saw it. If so, give a warning.""" + try: + filename = self.curframe.f_code.co_filename + mtime = os.path.getmtime(filename) + except Exception: + return + if (filename in self._file_mtime_table and + mtime != self._file_mtime_table[filename]): + self.message(f"*** WARNING: file '{filename}' was edited, " + "running stale code until the program is rerun") + self._file_mtime_table[filename] = mtime + # Called before loop, handles display expressions # Set up convenience variable containers def preloop(self): @@ -681,6 +697,7 @@ def onecmd(self, line): a breakpoint command list definition. """ if not self.commands_defining: + self._validate_file_mtime() return cmd.Cmd.onecmd(self, line) else: return self.handle_command_def(line) @@ -2021,6 +2038,10 @@ def _run(self, target: Union[_ModuleTarget, _ScriptTarget]): __main__.__dict__.clear() __main__.__dict__.update(target.namespace) + # Clear the mtime table for program reruns, assume all the files + # are up to date. + self._file_mtime_table.clear() + self.run(target.code) def _format_exc(self, exc: BaseException): diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 03487aa6ffd81f..c64df62c761471 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -3056,6 +3056,87 @@ def test_blocks_at_first_code_line(self): self.assertTrue(any("__main__.py(4)()" in l for l in stdout.splitlines()), stdout) + def test_file_modified_after_execution(self): + script = """ + print("hello") + """ + + commands = """ + filename = $_frame.f_code.co_filename + f = open(filename, "w") + f.write("print('goodbye')") + f.close() + ll + """ + + stdout, stderr = self.run_pdb_script(script, commands) + self.assertIn("WARNING:", stdout) + self.assertIn("was edited", stdout) + + def test_file_modified_after_execution_with_multiple_instances(self): + script = """ + import pdb; pdb.Pdb().set_trace() + with open(__file__, "w") as f: + f.write("print('goodbye')\\n" * 5) + import pdb; pdb.Pdb().set_trace() + """ + + commands = """ + continue + continue + """ + + filename = 'main.py' + with open(filename, 'w') as f: + f.write(textwrap.dedent(script)) + self.addCleanup(os_helper.unlink, filename) + self.addCleanup(os_helper.rmtree, '__pycache__') + cmd = [sys.executable, filename] + with subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE, + stderr=subprocess.STDOUT, + env = {**os.environ, 'PYTHONIOENCODING': 'utf-8'}, + ) as proc: + stdout, _ = proc.communicate(str.encode(commands)) + stdout = stdout and bytes.decode(stdout) + + self.assertEqual(proc.returncode, 0) + self.assertIn("WARNING:", stdout) + self.assertIn("was edited", stdout) + + def test_file_modified_after_execution_with_restart(self): + script = """ + import random + # Any code with a source to step into so this script is not checked + # for changes when it's being changed + random.randint(1, 4) + print("hello") + """ + + commands = """ + ll + n + s + filename = $_frame.f_back.f_code.co_filename + def change_file(content, filename): + with open(filename, "w") as f: + f.write(f"print({content})") + + change_file('world', filename) + restart + ll + """ + + stdout, stderr = self.run_pdb_script(script, commands) + # Make sure the code is running correctly and the file is edited + self.assertIn("hello", stdout) + self.assertIn("world", stdout) + # The file was edited, but restart should clear the state and consider + # the file as up to date + self.assertNotIn("WARNING:", stdout) + def test_relative_imports(self): self.module_name = 't_main' os_helper.rmtree(self.module_name) diff --git a/Misc/NEWS.d/next/Library/2023-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst b/Misc/NEWS.d/next/Library/2023-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst new file mode 100644 index 00000000000000..adb3241b89ae3e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst @@ -0,0 +1 @@ +Added check for file modification during debugging with :mod:`pdb` From 4850410b60183dac021ded219a49be140fe5fefe Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 25 Jan 2024 09:34:03 -0800 Subject: [PATCH 006/127] gh-112075: Add try-incref functions from nogil branch for use in dict thread safety (#114512) * Bring in a subset of biased reference counting: https://github.com/colesbury/nogil/commit/b6b12a9a94e The NoGIL branch has functions for attempting to do an incref on an object which may or may not be in flight. This just brings those functions over so that they will be usable from in the dict implementation to get items w/o holding a lock. There's a handful of small simple modifications: Adding inline to the force inline functions to avoid a warning, and switching from _Py_ALWAYS_INLINE to Py_ALWAYS_INLINE as that's available Remove _Py_REF_LOCAL_SHIFT as it doesn't exist yet (and is currently 0 in the 3.12 nogil branch anyway) ob_ref_shared is currently Py_ssize_t and not uint32_t, so use that _PY_LIKELY doesn't exist, so drop it _Py_ThreadLocal becomes _Py_IsOwnedByCurrentThread Add '_PyInterpreterState_GET()' to _Py_IncRefTotal calls. Co-Authored-By: Sam Gross --- Include/internal/pycore_object.h | 136 +++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index f413b8451e5ab4..ec14c07a2991ff 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -376,6 +376,142 @@ static inline void _PyObject_GC_UNTRACK( _PyObject_GC_UNTRACK(__FILE__, __LINE__, _PyObject_CAST(op)) #endif +#ifdef Py_GIL_DISABLED + +/* Tries to increment an object's reference count + * + * This is a specialized version of _Py_TryIncref that only succeeds if the + * object is immortal or local to this thread. It does not handle the case + * where the reference count modification requires an atomic operation. This + * allows call sites to specialize for the immortal/local case. + */ +static inline int +_Py_TryIncrefFast(PyObject *op) { + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + local += 1; + if (local == 0) { + // immortal + return 1; + } + if (_Py_IsOwnedByCurrentThread(op)) { + _Py_INCREF_STAT_INC(); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyInterpreterState_GET()); +#endif + return 1; + } + return 0; +} + +static inline int +_Py_TryIncRefShared(PyObject *op) +{ + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + for (;;) { + // If the shared refcount is zero and the object is either merged + // or may not have weak references, then we cannot incref it. + if (shared == 0 || shared == _Py_REF_MERGED) { + return 0; + } + + if (_Py_atomic_compare_exchange_ssize( + &op->ob_ref_shared, + &shared, + shared + (1 << _Py_REF_SHARED_SHIFT))) { +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyInterpreterState_GET()); +#endif + _Py_INCREF_STAT_INC(); + return 1; + } + } +} + +/* Tries to incref the object op and ensures that *src still points to it. */ +static inline int +_Py_TryIncref(PyObject **src, PyObject *op) +{ + if (_Py_TryIncrefFast(op)) { + return 1; + } + if (!_Py_TryIncRefShared(op)) { + return 0; + } + if (op != _Py_atomic_load_ptr(src)) { + Py_DECREF(op); + return 0; + } + return 1; +} + +/* Loads and increfs an object from ptr, which may contain a NULL value. + Safe with concurrent (atomic) updates to ptr. + NOTE: The writer must set maybe-weakref on the stored object! */ +static inline PyObject * +_Py_XGetRef(PyObject **ptr) +{ + for (;;) { + PyObject *value = _Py_atomic_load_ptr(ptr); + if (value == NULL) { + return value; + } + if (_Py_TryIncref(ptr, value)) { + return value; + } + } +} + +/* Attempts to loads and increfs an object from ptr. Returns NULL + on failure, which may be due to a NULL value or a concurrent update. */ +static inline PyObject * +_Py_TryXGetRef(PyObject **ptr) +{ + PyObject *value = _Py_atomic_load_ptr(ptr); + if (value == NULL) { + return value; + } + if (_Py_TryIncref(ptr, value)) { + return value; + } + return NULL; +} + +/* Like Py_NewRef but also optimistically sets _Py_REF_MAYBE_WEAKREF + on objects owned by a different thread. */ +static inline PyObject * +_Py_NewRefWithLock(PyObject *op) +{ + if (_Py_TryIncrefFast(op)) { + return op; + } + _Py_INCREF_STAT_INC(); + for (;;) { + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + Py_ssize_t new_shared = shared + (1 << _Py_REF_SHARED_SHIFT); + if ((shared & _Py_REF_SHARED_FLAG_MASK) == 0) { + new_shared |= _Py_REF_MAYBE_WEAKREF; + } + if (_Py_atomic_compare_exchange_ssize( + &op->ob_ref_shared, + &shared, + new_shared)) { + return op; + } + } +} + +static inline PyObject * +_Py_XNewRefWithLock(PyObject *obj) +{ + if (obj == NULL) { + return NULL; + } + return _Py_NewRefWithLock(obj); +} + +#endif + #ifdef Py_REF_DEBUG extern void _PyInterpreterState_FinalizeRefTotal(PyInterpreterState *); extern void _Py_FinalizeRefTotal(_PyRuntimeState *); From b52fc70d1ab3be7866ab71065bae61a03a28bfae Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 25 Jan 2024 13:27:36 -0500 Subject: [PATCH 007/127] gh-112529: Implement GC for free-threaded builds (#114262) * gh-112529: Implement GC for free-threaded builds This implements a mark and sweep GC for the free-threaded builds of CPython. The implementation relies on mimalloc to find GC tracked objects (i.e., "containers"). --- Include/internal/pycore_freelist.h | 10 + Include/internal/pycore_gc.h | 35 +- Include/internal/pycore_object.h | 8 + Include/internal/pycore_object_stack.h | 84 + Include/object.h | 4 +- Lib/gzip.py | 2 +- Lib/test/test_gc.py | 13 +- Lib/test/test_io.py | 4 +- Makefile.pre.in | 3 + ...-01-18-20-20-37.gh-issue-112529.oVNvDG.rst | 3 + PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 + Python/gc.c | 8 +- Python/gc_free_threading.c | 1701 ++++++++++++++++- Python/object_stack.c | 87 + Python/pystate.c | 4 +- 18 files changed, 1952 insertions(+), 22 deletions(-) create mode 100644 Include/internal/pycore_object_stack.h create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst create mode 100644 Python/object_stack.c diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index 4ab93ee2bf6c32..dfb12839affedf 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -20,6 +20,7 @@ extern "C" { # define PyFloat_MAXFREELIST 100 # define PyContext_MAXFREELIST 255 # define _PyAsyncGen_MAXFREELIST 80 +# define _PyObjectStackChunk_MAXFREELIST 4 #else # define PyTuple_NFREELISTS 0 # define PyTuple_MAXFREELIST 0 @@ -27,6 +28,7 @@ extern "C" { # define PyFloat_MAXFREELIST 0 # define PyContext_MAXFREELIST 0 # define _PyAsyncGen_MAXFREELIST 0 +# define _PyObjectStackChunk_MAXFREELIST 0 #endif struct _Py_list_state { @@ -93,6 +95,13 @@ struct _Py_async_gen_state { #endif }; +struct _PyObjectStackChunk; + +struct _Py_object_stack_state { + struct _PyObjectStackChunk *free_list; + Py_ssize_t numfree; +}; + typedef struct _Py_freelist_state { struct _Py_float_state float_state; struct _Py_tuple_state tuple_state; @@ -100,6 +109,7 @@ typedef struct _Py_freelist_state { struct _Py_slice_state slice_state; struct _Py_context_state context_state; struct _Py_async_gen_state async_gen_state; + struct _Py_object_stack_state object_stack_state; } _PyFreeListState; #ifdef __cplusplus diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index d53de97709a782..b362a294a59042 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -37,10 +37,22 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) { } +/* Bit flags for ob_gc_bits (in Py_GIL_DISABLED builds) */ +#ifdef Py_GIL_DISABLED +# define _PyGC_BITS_TRACKED (1) +# define _PyGC_BITS_FINALIZED (2) +# define _PyGC_BITS_UNREACHABLE (4) +# define _PyGC_BITS_FROZEN (8) +#endif + /* True if the object is currently tracked by the GC. */ static inline int _PyObject_GC_IS_TRACKED(PyObject *op) { +#ifdef Py_GIL_DISABLED + return (op->ob_gc_bits & _PyGC_BITS_TRACKED) != 0; +#else PyGC_Head *gc = _Py_AS_GC(op); return (gc->_gc_next != 0); +#endif } #define _PyObject_GC_IS_TRACKED(op) _PyObject_GC_IS_TRACKED(_Py_CAST(PyObject*, op)) @@ -107,24 +119,29 @@ static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) { gc->_gc_prev = ((gc->_gc_prev & ~_PyGC_PREV_MASK) | uprev); } -static inline int _PyGCHead_FINALIZED(PyGC_Head *gc) { - return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0); -} -static inline void _PyGCHead_SET_FINALIZED(PyGC_Head *gc) { - gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED; -} - static inline int _PyGC_FINALIZED(PyObject *op) { +#ifdef Py_GIL_DISABLED + return (op->ob_gc_bits & _PyGC_BITS_FINALIZED) != 0; +#else PyGC_Head *gc = _Py_AS_GC(op); - return _PyGCHead_FINALIZED(gc); + return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0); +#endif } static inline void _PyGC_SET_FINALIZED(PyObject *op) { +#ifdef Py_GIL_DISABLED + op->ob_gc_bits |= _PyGC_BITS_FINALIZED; +#else PyGC_Head *gc = _Py_AS_GC(op); - _PyGCHead_SET_FINALIZED(gc); + gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED; +#endif } static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) { +#ifdef Py_GIL_DISABLED + op->ob_gc_bits &= ~_PyGC_BITS_FINALIZED; +#else PyGC_Head *gc = _Py_AS_GC(op); gc->_gc_prev &= ~_PyGC_PREV_MASK_FINALIZED; +#endif } diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index ec14c07a2991ff..4e52ffc77c5956 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -322,6 +322,9 @@ static inline void _PyObject_GC_TRACK( "object is in generation which is garbage collected", filename, lineno, __func__); +#ifdef Py_GIL_DISABLED + op->ob_gc_bits |= _PyGC_BITS_TRACKED; +#else PyInterpreterState *interp = _PyInterpreterState_GET(); PyGC_Head *generation0 = interp->gc.generation0; PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); @@ -329,6 +332,7 @@ static inline void _PyObject_GC_TRACK( _PyGCHead_SET_PREV(gc, last); _PyGCHead_SET_NEXT(gc, generation0); generation0->_gc_prev = (uintptr_t)gc; +#endif } /* Tell the GC to stop tracking this object. @@ -352,6 +356,9 @@ static inline void _PyObject_GC_UNTRACK( "object not tracked by the garbage collector", filename, lineno, __func__); +#ifdef Py_GIL_DISABLED + op->ob_gc_bits &= ~_PyGC_BITS_TRACKED; +#else PyGC_Head *gc = _Py_AS_GC(op); PyGC_Head *prev = _PyGCHead_PREV(gc); PyGC_Head *next = _PyGCHead_NEXT(gc); @@ -359,6 +366,7 @@ static inline void _PyObject_GC_UNTRACK( _PyGCHead_SET_PREV(next, prev); gc->_gc_next = 0; gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED; +#endif } // Macros to accept any type for the parameter, and to automatically pass diff --git a/Include/internal/pycore_object_stack.h b/Include/internal/pycore_object_stack.h new file mode 100644 index 00000000000000..1dc1c1591525de --- /dev/null +++ b/Include/internal/pycore_object_stack.h @@ -0,0 +1,84 @@ +#ifndef Py_INTERNAL_OBJECT_STACK_H +#define Py_INTERNAL_OBJECT_STACK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// _PyObjectStack is a stack of Python objects implemented as a linked list of +// fixed size buffers. + +// Chosen so that _PyObjectStackChunk is a power-of-two size. +#define _Py_OBJECT_STACK_CHUNK_SIZE 254 + +typedef struct _PyObjectStackChunk { + struct _PyObjectStackChunk *prev; + Py_ssize_t n; + PyObject *objs[_Py_OBJECT_STACK_CHUNK_SIZE]; +} _PyObjectStackChunk; + +typedef struct _PyObjectStack { + _PyObjectStackChunk *head; +} _PyObjectStack; + + +extern _PyObjectStackChunk * +_PyObjectStackChunk_New(void); + +extern void +_PyObjectStackChunk_Free(_PyObjectStackChunk *); + +extern void +_PyObjectStackChunk_ClearFreeList(_PyFreeListState *state, int is_finalization); + +// Push an item onto the stack. Return -1 on allocation failure, 0 on success. +static inline int +_PyObjectStack_Push(_PyObjectStack *stack, PyObject *obj) +{ + _PyObjectStackChunk *buf = stack->head; + if (buf == NULL || buf->n == _Py_OBJECT_STACK_CHUNK_SIZE) { + buf = _PyObjectStackChunk_New(); + if (buf == NULL) { + return -1; + } + buf->prev = stack->head; + buf->n = 0; + stack->head = buf; + } + + assert(buf->n >= 0 && buf->n < _Py_OBJECT_STACK_CHUNK_SIZE); + buf->objs[buf->n] = obj; + buf->n++; + return 0; +} + +// Pop the top item from the stack. Return NULL if the stack is empty. +static inline PyObject * +_PyObjectStack_Pop(_PyObjectStack *stack) +{ + _PyObjectStackChunk *buf = stack->head; + if (buf == NULL) { + return NULL; + } + assert(buf->n > 0 && buf->n <= _Py_OBJECT_STACK_CHUNK_SIZE); + buf->n--; + PyObject *obj = buf->objs[buf->n]; + if (buf->n == 0) { + stack->head = buf->prev; + _PyObjectStackChunk_Free(buf); + } + return obj; +} + +// Remove all items from the stack +extern void +_PyObjectStack_Clear(_PyObjectStack *stack); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_OBJECT_STACK_H diff --git a/Include/object.h b/Include/object.h index 48f1ddf7510887..ef3fb721c2b012 100644 --- a/Include/object.h +++ b/Include/object.h @@ -212,7 +212,9 @@ struct _object { struct _PyMutex { uint8_t v; }; struct _object { - uintptr_t ob_tid; // thread id (or zero) + // ob_tid stores the thread id (or zero). It is also used by the GC to + // store linked lists and the computed "gc_refs" refcount. + uintptr_t ob_tid; uint16_t _padding; struct _PyMutex ob_mutex; // per-object lock uint8_t ob_gc_bits; // gc-related state diff --git a/Lib/gzip.py b/Lib/gzip.py index 177f9080dc5af8..fda93e0261e028 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -349,7 +349,7 @@ def closed(self): def close(self): fileobj = self.fileobj - if fileobj is None: + if fileobj is None or self._buffer.closed: return try: if self.mode == WRITE: diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 1d71dd9e262a6a..b01f344cb14a1a 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1,7 +1,7 @@ import unittest import unittest.mock from test.support import (verbose, refcount_test, - cpython_only, requires_subprocess) + cpython_only, requires_subprocess, Py_GIL_DISABLED) from test.support.import_helper import import_module from test.support.os_helper import temp_dir, TESTFN, unlink from test.support.script_helper import assert_python_ok, make_script @@ -815,6 +815,15 @@ def test_freeze(self): self.assertEqual(gc.get_freeze_count(), 0) def test_get_objects(self): + gc.collect() + l = [] + l.append(l) + self.assertTrue( + any(l is element for element in gc.get_objects()) + ) + + @unittest.skipIf(Py_GIL_DISABLED, 'need generational GC') + def test_get_objects_generations(self): gc.collect() l = [] l.append(l) @@ -1225,7 +1234,7 @@ def test_refcount_errors(self): p.stderr.close() # Verify that stderr has a useful error message: self.assertRegex(stderr, - br'gc\.c:[0-9]+: gc_decref: Assertion "gc_get_refs\(g\) > 0" failed.') + br'gc.*\.c:[0-9]+: .*: Assertion "gc_get_refs\(.+\) .*" failed.') self.assertRegex(stderr, br'refcount is too small') # "address : 0x7fb5062efc18" diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 936edea3cad70c..9e28b936e00bd5 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3652,10 +3652,8 @@ def _check_create_at_shutdown(self, **kwargs): codecs.lookup('utf-8') class C: - def __init__(self): - self.buf = io.BytesIO() def __del__(self): - io.TextIOWrapper(self.buf, **{kwargs}) + io.TextIOWrapper(io.BytesIO(), **{kwargs}) print("ok") c = C() """.format(iomod=iomod, kwargs=kwargs) diff --git a/Makefile.pre.in b/Makefile.pre.in index 21b122ae0fcd9f..37a8b06987c710 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -439,6 +439,7 @@ PYTHON_OBJS= \ Python/modsupport.o \ Python/mysnprintf.o \ Python/mystrtoul.o \ + Python/object_stack.o \ Python/optimizer.o \ Python/optimizer_analysis.o \ Python/parking_lot.o \ @@ -1832,6 +1833,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_frame.h \ $(srcdir)/Include/internal/pycore_freelist.h \ $(srcdir)/Include/internal/pycore_function.h \ + $(srcdir)/Include/internal/pycore_gc.h \ $(srcdir)/Include/internal/pycore_genobject.h \ $(srcdir)/Include/internal/pycore_getopt.h \ $(srcdir)/Include/internal/pycore_gil.h \ @@ -1853,6 +1855,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_namespace.h \ $(srcdir)/Include/internal/pycore_object.h \ $(srcdir)/Include/internal/pycore_object_alloc.h \ + $(srcdir)/Include/internal/pycore_object_stack.h \ $(srcdir)/Include/internal/pycore_object_state.h \ $(srcdir)/Include/internal/pycore_obmalloc.h \ $(srcdir)/Include/internal/pycore_obmalloc_init.h \ diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst new file mode 100644 index 00000000000000..b3aa43801da488 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst @@ -0,0 +1,3 @@ +The free-threaded build now has its own thread-safe GC implementation that +uses mimalloc to find GC tracked objects. It is non-generational, unlike the +existing GC implementation. diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 610581bc96cb1a..dde801fc0fd525 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -230,6 +230,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 3141913c043869..90ccb954b424bc 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -289,6 +289,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 57275fb2039ee0..e0b9fc137457a0 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -591,6 +591,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 51cbb079b5b550..fd79436f5add97 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -1355,6 +1355,9 @@ Python + + Python + Python diff --git a/Python/gc.c b/Python/gc.c index 14870505ef1308..46646760291526 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -15,6 +15,8 @@ #include "pycore_weakref.h" // _PyWeakref_ClearRef() #include "pydtrace.h" +#ifndef Py_GIL_DISABLED + typedef struct _gc_runtime_state GCState; #ifdef Py_DEBUG @@ -964,10 +966,10 @@ finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) PyGC_Head *gc = GC_NEXT(collectable); PyObject *op = FROM_GC(gc); gc_list_move(gc, &seen); - if (!_PyGCHead_FINALIZED(gc) && + if (!_PyGC_FINALIZED(op) && (finalize = Py_TYPE(op)->tp_finalize) != NULL) { - _PyGCHead_SET_FINALIZED(gc); + _PyGC_SET_FINALIZED(op); Py_INCREF(op); finalize(op); assert(!_PyErr_Occurred(tstate)); @@ -1942,3 +1944,5 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) done: gcstate->enabled = origenstate; } + +#endif // Py_GIL_DISABLED diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 207a43b68d21f5..f2cd84981461a4 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1,9 +1,1704 @@ +// Cyclic garbage collector implementation for free-threaded build. #include "Python.h" -#include "pycore_pystate.h" // _PyFreeListState_GET() -#include "pycore_tstate.h" // _PyThreadStateImpl +#include "pycore_ceval.h" // _Py_set_eval_breaker_bit() +#include "pycore_context.h" +#include "pycore_dict.h" // _PyDict_MaybeUntrack() +#include "pycore_initconfig.h" +#include "pycore_interp.h" // PyInterpreterState.gc +#include "pycore_object.h" +#include "pycore_object_alloc.h" // _PyObject_MallocWithType() +#include "pycore_object_stack.h" +#include "pycore_pyerrors.h" +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_tstate.h" // _PyThreadStateImpl +#include "pycore_weakref.h" // _PyWeakref_ClearRef() +#include "pydtrace.h" #ifdef Py_GIL_DISABLED +typedef struct _gc_runtime_state GCState; + +#ifdef Py_DEBUG +# define GC_DEBUG +#endif + +// Automatically choose the generation that needs collecting. +#define GENERATION_AUTO (-1) + +// A linked-list of objects using the `ob_tid` field as the next pointer. +struct worklist { + uintptr_t head; +}; + +struct worklist_iter { + uintptr_t *ptr; // pointer to current object + uintptr_t *next; // next value of ptr +}; + +struct visitor_args { + size_t offset; // offset of PyObject from start of block +}; + +// Per-collection state +struct collection_state { + struct visitor_args base; + PyInterpreterState *interp; + GCState *gcstate; + Py_ssize_t collected; + Py_ssize_t uncollectable; + struct worklist unreachable; + struct worklist legacy_finalizers; + struct worklist wrcb_to_call; + struct worklist objs_to_decref; +}; + +// iterate over a worklist +#define WORKSTACK_FOR_EACH(stack, op) \ + for ((op) = (PyObject *)(stack)->head; (op) != NULL; (op) = (PyObject *)(op)->ob_tid) + +// iterate over a worklist with support for removing the current object +#define WORKSTACK_FOR_EACH_ITER(stack, iter, op) \ + for (worklist_iter_init((iter), &(stack)->head), (op) = (PyObject *)(*(iter)->ptr); \ + (op) != NULL; \ + worklist_iter_init((iter), (iter)->next), (op) = (PyObject *)(*(iter)->ptr)) + +static void +worklist_push(struct worklist *worklist, PyObject *op) +{ + assert(op->ob_tid == 0); + op->ob_tid = worklist->head; + worklist->head = (uintptr_t)op; +} + +static PyObject * +worklist_pop(struct worklist *worklist) +{ + PyObject *op = (PyObject *)worklist->head; + if (op != NULL) { + worklist->head = op->ob_tid; + op->ob_tid = 0; + } + return op; +} + +static void +worklist_iter_init(struct worklist_iter *iter, uintptr_t *next) +{ + iter->ptr = next; + PyObject *op = (PyObject *)*(iter->ptr); + if (op) { + iter->next = &op->ob_tid; + } +} + +static void +worklist_remove(struct worklist_iter *iter) +{ + PyObject *op = (PyObject *)*(iter->ptr); + *(iter->ptr) = op->ob_tid; + op->ob_tid = 0; + iter->next = iter->ptr; +} + +static inline int +gc_is_unreachable(PyObject *op) +{ + return (op->ob_gc_bits & _PyGC_BITS_UNREACHABLE) != 0; +} + +static void +gc_set_unreachable(PyObject *op) +{ + op->ob_gc_bits |= _PyGC_BITS_UNREACHABLE; +} + +static void +gc_clear_unreachable(PyObject *op) +{ + op->ob_gc_bits &= ~_PyGC_BITS_UNREACHABLE; +} + +// Initialize the `ob_tid` field to zero if the object is not already +// initialized as unreachable. +static void +gc_maybe_init_refs(PyObject *op) +{ + if (!gc_is_unreachable(op)) { + gc_set_unreachable(op); + op->ob_tid = 0; + } +} + +static inline Py_ssize_t +gc_get_refs(PyObject *op) +{ + return (Py_ssize_t)op->ob_tid; +} + +static inline void +gc_add_refs(PyObject *op, Py_ssize_t refs) +{ + assert(_PyObject_GC_IS_TRACKED(op)); + op->ob_tid += refs; +} + +static inline void +gc_decref(PyObject *op) +{ + op->ob_tid -= 1; +} + +// Merge refcounts while the world is stopped. +static void +merge_refcount(PyObject *op, Py_ssize_t extra) +{ + assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); + + Py_ssize_t refcount = Py_REFCNT(op); + refcount += extra; + +#ifdef Py_REF_DEBUG + _Py_AddRefTotal(_PyInterpreterState_GET(), extra); +#endif + + // No atomics necessary; all other threads in this interpreter are paused. + op->ob_tid = 0; + op->ob_ref_local = 0; + op->ob_ref_shared = _Py_REF_SHARED(refcount, _Py_REF_MERGED); +} + +static void +gc_restore_tid(PyObject *op) +{ + mi_segment_t *segment = _mi_ptr_segment(op); + if (_Py_REF_IS_MERGED(op->ob_ref_shared)) { + op->ob_tid = 0; + } + else { + // NOTE: may change ob_tid if the object was re-initialized by + // a different thread or its segment was abandoned and reclaimed. + // The segment thread id might be zero, in which case we should + // ensure the refcounts are now merged. + op->ob_tid = segment->thread_id; + if (op->ob_tid == 0) { + merge_refcount(op, 0); + } + } +} + +static void +gc_restore_refs(PyObject *op) +{ + if (gc_is_unreachable(op)) { + gc_restore_tid(op); + gc_clear_unreachable(op); + } +} + +// Given a mimalloc memory block return the PyObject stored in it or NULL if +// the block is not allocated or the object is not tracked or is immortal. +static PyObject * +op_from_block(void *block, void *arg, bool include_frozen) +{ + struct visitor_args *a = arg; + if (block == NULL) { + return NULL; + } + PyObject *op = (PyObject *)((char*)block + a->offset); + assert(PyObject_IS_GC(op)); + if (!_PyObject_GC_IS_TRACKED(op)) { + return NULL; + } + if (!include_frozen && (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0) { + return NULL; + } + return op; +} + +static int +gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor, + struct visitor_args *arg) +{ + // Offset of PyObject header from start of memory block. + Py_ssize_t offset_base = sizeof(PyGC_Head); + if (_PyMem_DebugEnabled()) { + // The debug allocator adds two words at the beginning of each block. + offset_base += 2 * sizeof(size_t); + } + + // Objects with Py_TPFLAGS_PREHEADER have two extra fields + Py_ssize_t offset_pre = offset_base + 2 * sizeof(PyObject*); + + // visit each thread's heaps for GC objects + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc; + + arg->offset = offset_base; + if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC], true, + visitor, arg)) { + return -1; + } + arg->offset = offset_pre; + if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC_PRE], true, + visitor, arg)) { + return -1; + } + } + + // visit blocks in the per-interpreter abandoned pool (from dead threads) + mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool; + arg->offset = offset_base; + if (!_mi_abandoned_pool_visit_blocks(pool, _Py_MIMALLOC_HEAP_GC, true, + visitor, arg)) { + return -1; + } + arg->offset = offset_pre; + if (!_mi_abandoned_pool_visit_blocks(pool, _Py_MIMALLOC_HEAP_GC_PRE, true, + visitor, arg)) { + return -1; + } + return 0; +} + +// Visits all GC objects in the interpreter's heaps. +// NOTE: It is not safe to allocate or free any mimalloc managed memory while +// this function is running. +static int +gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor, + struct visitor_args *arg) +{ + // Other threads in the interpreter must be paused so that we can safely + // traverse their heaps. + assert(interp->stoptheworld.world_stopped); + + int err; + HEAD_LOCK(&_PyRuntime); + err = gc_visit_heaps_lock_held(interp, visitor, arg); + HEAD_UNLOCK(&_PyRuntime); + return err; +} + +// Subtract an incoming reference from the computed "gc_refs" refcount. +static int +visit_decref(PyObject *op, void *arg) +{ + if (_PyObject_GC_IS_TRACKED(op) && !_Py_IsImmortal(op)) { + // If update_refs hasn't reached this object yet, mark it + // as (tentatively) unreachable and initialize ob_tid to zero. + gc_maybe_init_refs(op); + gc_decref(op); + } + return 0; +} + +// Compute the number of external references to objects in the heap +// by subtracting internal references from the refcount. The difference is +// computed in the ob_tid field (we restore it later). +static bool +update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + // Exclude immortal objects from garbage collection + if (_Py_IsImmortal(op)) { + op->ob_tid = 0; + _PyObject_GC_UNTRACK(op); + gc_clear_unreachable(op); + return true; + } + + // Untrack tuples and dicts as necessary in this pass. + if (PyTuple_CheckExact(op)) { + _PyTuple_MaybeUntrack(op); + if (!_PyObject_GC_IS_TRACKED(op)) { + gc_restore_refs(op); + return true; + } + } + else if (PyDict_CheckExact(op)) { + _PyDict_MaybeUntrack(op); + if (!_PyObject_GC_IS_TRACKED(op)) { + gc_restore_refs(op); + return true; + } + } + + Py_ssize_t refcount = Py_REFCNT(op); + _PyObject_ASSERT(op, refcount >= 0); + + // Add the actual refcount to ob_tid. + gc_maybe_init_refs(op); + gc_add_refs(op, refcount); + + // Subtract internal references from ob_tid. Objects with ob_tid > 0 + // are directly reachable from outside containers, and so can't be + // collected. + Py_TYPE(op)->tp_traverse(op, visit_decref, NULL); + return true; +} + +static int +visit_clear_unreachable(PyObject *op, _PyObjectStack *stack) +{ + if (gc_is_unreachable(op)) { + _PyObject_ASSERT(op, _PyObject_GC_IS_TRACKED(op)); + gc_clear_unreachable(op); + return _PyObjectStack_Push(stack, op); + } + return 0; +} + +// Transitively clear the unreachable bit on all objects reachable from op. +static int +mark_reachable(PyObject *op) +{ + _PyObjectStack stack = { NULL }; + do { + traverseproc traverse = Py_TYPE(op)->tp_traverse; + if (traverse(op, (visitproc)&visit_clear_unreachable, &stack) < 0) { + _PyObjectStack_Clear(&stack); + return -1; + } + op = _PyObjectStack_Pop(&stack); + } while (op != NULL); + return 0; +} + +#ifdef GC_DEBUG +static bool +validate_gc_objects(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + _PyObject_ASSERT(op, gc_is_unreachable(op)); + _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0, + "refcount is too small"); + return true; +} +#endif + +static bool +mark_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + if (gc_is_unreachable(op) && gc_get_refs(op) != 0) { + // Object is reachable but currently marked as unreachable. + // Mark it as reachable and traverse its pointers to find + // any other object that may be directly reachable from it. + gc_clear_unreachable(op); + + // Transitively mark reachable objects by clearing the unreachable flag. + if (mark_reachable(op) < 0) { + return false; + } + } + + return true; +} + +/* Return true if object has a pre-PEP 442 finalization method. */ +static int +has_legacy_finalizer(PyObject *op) +{ + return Py_TYPE(op)->tp_del != NULL; +} + +static bool +scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + struct collection_state *state = (struct collection_state *)args; + if (gc_is_unreachable(op)) { + // Merge and add one to the refcount to prevent deallocation while we + // are holding on to it in a worklist. + merge_refcount(op, 1); + + if (has_legacy_finalizer(op)) { + // would be unreachable, but has legacy finalizer + gc_clear_unreachable(op); + worklist_push(&state->legacy_finalizers, op); + } + else { + worklist_push(&state->unreachable, op); + } + } + else { + // object is reachable, restore `ob_tid`; we're done with these objects + gc_restore_tid(op); + state->gcstate->long_lived_total++; + } + + return true; +} + +static int +move_legacy_finalizer_reachable(struct collection_state *state); + +static int +deduce_unreachable_heap(PyInterpreterState *interp, + struct collection_state *state) +{ + // Identify objects that are directly reachable from outside the GC heap + // by computing the difference between the refcount and the number of + // incoming references. + gc_visit_heaps(interp, &update_refs, &state->base); + +#ifdef GC_DEBUG + // Check that all objects are marked as unreachable and that the computed + // reference count difference (stored in `ob_tid`) is non-negative. + gc_visit_heaps(interp, &validate_gc_objects, &state->base); +#endif + + // Transitively mark reachable objects by clearing the + // _PyGC_BITS_UNREACHABLE flag. + if (gc_visit_heaps(interp, &mark_heap_visitor, &state->base) < 0) { + return -1; + } + + // Identify remaining unreachable objects and push them onto a stack. + // Restores ob_tid for reachable objects. + gc_visit_heaps(interp, &scan_heap_visitor, &state->base); + + if (state->legacy_finalizers.head) { + // There may be objects reachable from legacy finalizers that are in + // the unreachable set. We need to mark them as reachable. + if (move_legacy_finalizer_reachable(state) < 0) { + return -1; + } + } + + return 0; +} + +static int +move_legacy_finalizer_reachable(struct collection_state *state) +{ + // Clear the reachable bit on all objects transitively reachable + // from the objects with legacy finalizers. + PyObject *op; + WORKSTACK_FOR_EACH(&state->legacy_finalizers, op) { + if (mark_reachable(op) < 0) { + return -1; + } + } + + // Move the reachable objects from the unreachable worklist to the legacy + // finalizer worklist. + struct worklist_iter iter; + WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) { + if (!gc_is_unreachable(op)) { + worklist_remove(&iter); + worklist_push(&state->legacy_finalizers, op); + } + } + + return 0; +} + +// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are +// enqueued in `wrcb_to_call`, but not invoked yet. +static void +clear_weakrefs(struct collection_state *state) +{ + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (PyWeakref_Check(op)) { + // Clear weakrefs that are themselves unreachable to ensure their + // callbacks will not be executed later from a `tp_clear()` + // inside delete_garbage(). That would be unsafe: it could + // resurrect a dead object or access a an already cleared object. + // See bpo-38006 for one example. + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + // NOTE: This is never triggered for static types so we can avoid the + // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + // `op` may have some weakrefs. March over the list, clear + // all the weakrefs, and enqueue the weakrefs with callbacks + // that must be called into wrcb_to_call. + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + // _PyWeakref_ClearRef clears the weakref but leaves + // the callback pointer intact. Obscure: it also + // changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + + // We do not invoke callbacks for weakrefs that are themselves + // unreachable. This is partly for historical reasons: weakrefs + // predate safe object finalization, and a weakref that is itself + // unreachable may have a callback that resurrects other + // unreachable objects. + if (wr->wr_callback == NULL || gc_is_unreachable((PyObject *)wr)) { + continue; + } + + // Create a new reference so that wr can't go away before we can + // process it again. + merge_refcount((PyObject *)wr, 1); + + // Enqueue weakref to be called later. + worklist_push(&state->wrcb_to_call, (PyObject *)wr); + } + } +} + +static void +call_weakref_callbacks(struct collection_state *state) +{ + // Invoke the callbacks we decided to honor. + PyObject *op; + while ((op = worklist_pop(&state->wrcb_to_call)) != NULL) { + _PyObject_ASSERT(op, PyWeakref_Check(op)); + + PyWeakReference *wr = (PyWeakReference *)op; + PyObject *callback = wr->wr_callback; + _PyObject_ASSERT(op, callback != NULL); + + /* copy-paste of weakrefobject.c's handle_callback() */ + PyObject *temp = PyObject_CallOneArg(callback, (PyObject *)wr); + if (temp == NULL) { + PyErr_WriteUnraisable(callback); + } + else { + Py_DECREF(temp); + } + + gc_restore_tid(op); + Py_DECREF(op); // drop worklist reference + } +} + + +static GCState * +get_gc_state(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return &interp->gc; +} + + +void +_PyGC_InitState(GCState *gcstate) +{ +} + + +PyStatus +_PyGC_Init(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + + gcstate->garbage = PyList_New(0); + if (gcstate->garbage == NULL) { + return _PyStatus_NO_MEMORY(); + } + + gcstate->callbacks = PyList_New(0); + if (gcstate->callbacks == NULL) { + return _PyStatus_NO_MEMORY(); + } + + return _PyStatus_OK(); +} + +static void +debug_cycle(const char *msg, PyObject *op) +{ + PySys_FormatStderr("gc: %s <%s %p>\n", + msg, Py_TYPE(op)->tp_name, op); +} + +/* Run first-time finalizers (if any) on all the objects in collectable. + * Note that this may remove some (or even all) of the objects from the + * list, due to refcounts falling to 0. + */ +static void +finalize_garbage(struct collection_state *state) +{ + // NOTE: the unreachable worklist holds a strong reference to the object + // to prevent it from being deallocated while we are holding on to it. + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (!_PyGC_FINALIZED(op)) { + destructor finalize = Py_TYPE(op)->tp_finalize; + if (finalize != NULL) { + _PyGC_SET_FINALIZED(op); + finalize(op); + assert(!_PyErr_Occurred(_PyThreadState_GET())); + } + } + } +} + +// Break reference cycles by clearing the containers involved. +static void +delete_garbage(struct collection_state *state) +{ + PyThreadState *tstate = _PyThreadState_GET(); + GCState *gcstate = state->gcstate; + + assert(!_PyErr_Occurred(tstate)); + + PyObject *op; + while ((op = worklist_pop(&state->objs_to_decref)) != NULL) { + Py_DECREF(op); + } + + while ((op = worklist_pop(&state->unreachable)) != NULL) { + _PyObject_ASSERT(op, gc_is_unreachable(op)); + + // Clear the unreachable flag. + gc_clear_unreachable(op); + + if (!_PyObject_GC_IS_TRACKED(op)) { + // Object might have been untracked by some other tp_clear() call. + Py_DECREF(op); // drop the reference from the worklist + continue; + } + + state->collected++; + + if (gcstate->debug & _PyGC_DEBUG_SAVEALL) { + assert(gcstate->garbage != NULL); + if (PyList_Append(gcstate->garbage, op) < 0) { + _PyErr_Clear(tstate); + } + } + else { + inquiry clear = Py_TYPE(op)->tp_clear; + if (clear != NULL) { + (void) clear(op); + if (_PyErr_Occurred(tstate)) { + PyErr_FormatUnraisable("Exception ignored in tp_clear of %s", + Py_TYPE(op)->tp_name); + } + } + } + + Py_DECREF(op); // drop the reference from the worklist + } +} + +static void +handle_legacy_finalizers(struct collection_state *state) +{ + GCState *gcstate = state->gcstate; + assert(gcstate->garbage != NULL); + + PyObject *op; + while ((op = worklist_pop(&state->legacy_finalizers)) != NULL) { + state->uncollectable++; + + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) { + debug_cycle("uncollectable", op); + } + + if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) { + if (PyList_Append(gcstate->garbage, op) < 0) { + PyErr_Clear(); + } + } + Py_DECREF(op); // drop worklist reference + } +} + +// Show stats for objects in each generations +static void +show_stats_each_generations(GCState *gcstate) +{ + // TODO +} + +// Traversal callback for handle_resurrected_objects. +static int +visit_decref_unreachable(PyObject *op, void *data) +{ + if (gc_is_unreachable(op) && _PyObject_GC_IS_TRACKED(op)) { + op->ob_ref_local -= 1; + } + return 0; +} + +// Handle objects that may have resurrected after a call to 'finalize_garbage'. +static int +handle_resurrected_objects(struct collection_state *state) +{ + // First, find externally reachable objects by computing the reference + // count difference in ob_ref_local. We can't use ob_tid here because + // that's already used to store the unreachable worklist. + PyObject *op; + struct worklist_iter iter; + WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) { + assert(gc_is_unreachable(op)); + assert(_Py_REF_IS_MERGED(op->ob_ref_shared)); + + if (!_PyObject_GC_IS_TRACKED(op)) { + // Object was untracked by a finalizer. Schedule it for a Py_DECREF + // after we finish with the stop-the-world pause. + gc_clear_unreachable(op); + worklist_remove(&iter); + worklist_push(&state->objs_to_decref, op); + continue; + } + + Py_ssize_t refcount = (op->ob_ref_shared >> _Py_REF_SHARED_SHIFT); + if (refcount > INT32_MAX) { + // The refcount is too big to fit in `ob_ref_local`. Mark the + // object as immortal and bail out. + gc_clear_unreachable(op); + worklist_remove(&iter); + _Py_SetImmortal(op); + continue; + } + + op->ob_ref_local += (uint32_t)refcount; + + // Subtract one to account for the reference from the worklist. + op->ob_ref_local -= 1; + + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + (visitproc)visit_decref_unreachable, + NULL); + } + + // Find resurrected objects + bool any_resurrected = false; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + int32_t gc_refs = (int32_t)op->ob_ref_local; + op->ob_ref_local = 0; // restore ob_ref_local + + _PyObject_ASSERT(op, gc_refs >= 0); + + if (gc_is_unreachable(op) && gc_refs > 0) { + // Clear the unreachable flag on any transitively reachable objects + // from this one. + any_resurrected = true; + gc_clear_unreachable(op); + if (mark_reachable(op) < 0) { + return -1; + } + } + } + + if (any_resurrected) { + // Remove resurrected objects from the unreachable list. + WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) { + if (!gc_is_unreachable(op)) { + _PyObject_ASSERT(op, Py_REFCNT(op) > 1); + worklist_remove(&iter); + merge_refcount(op, -1); // remove worklist reference + } + } + } + +#ifdef GC_DEBUG + WORKSTACK_FOR_EACH(&state->unreachable, op) { + _PyObject_ASSERT(op, gc_is_unreachable(op)); + _PyObject_ASSERT(op, _PyObject_GC_IS_TRACKED(op)); + _PyObject_ASSERT(op, op->ob_ref_local == 0); + _PyObject_ASSERT(op, _Py_REF_IS_MERGED(op->ob_ref_shared)); + } +#endif + + return 0; +} + + +/* Invoke progress callbacks to notify clients that garbage collection + * is starting or stopping + */ +static void +invoke_gc_callback(PyThreadState *tstate, const char *phase, + int generation, Py_ssize_t collected, + Py_ssize_t uncollectable) +{ + assert(!_PyErr_Occurred(tstate)); + + /* we may get called very early */ + GCState *gcstate = &tstate->interp->gc; + if (gcstate->callbacks == NULL) { + return; + } + + /* The local variable cannot be rebound, check it for sanity */ + assert(PyList_CheckExact(gcstate->callbacks)); + PyObject *info = NULL; + if (PyList_GET_SIZE(gcstate->callbacks) != 0) { + info = Py_BuildValue("{sisnsn}", + "generation", generation, + "collected", collected, + "uncollectable", uncollectable); + if (info == NULL) { + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; + } + } + + PyObject *phase_obj = PyUnicode_FromString(phase); + if (phase_obj == NULL) { + Py_XDECREF(info); + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; + } + + PyObject *stack[] = {phase_obj, info}; + for (Py_ssize_t i=0; icallbacks); i++) { + PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); + Py_INCREF(cb); /* make sure cb doesn't go away */ + r = PyObject_Vectorcall(cb, stack, 2, NULL); + if (r == NULL) { + PyErr_WriteUnraisable(cb); + } + else { + Py_DECREF(r); + } + Py_DECREF(cb); + } + Py_DECREF(phase_obj); + Py_XDECREF(info); + assert(!_PyErr_Occurred(tstate)); +} + + +/* Find the oldest generation (highest numbered) where the count + * exceeds the threshold. Objects in the that generation and + * generations younger than it will be collected. */ +static int +gc_select_generation(GCState *gcstate) +{ + for (int i = NUM_GENERATIONS-1; i >= 0; i--) { + if (gcstate->generations[i].count > gcstate->generations[i].threshold) { + /* Avoid quadratic performance degradation in number + of tracked objects (see also issue #4074): + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + + long_lived_pending / long_lived_total + + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html + */ + if (i == NUM_GENERATIONS - 1 + && gcstate->long_lived_pending < gcstate->long_lived_total / 4) + { + continue; + } + return i; + } + } + return -1; +} + +static void +cleanup_worklist(struct worklist *worklist) +{ + PyObject *op; + while ((op = worklist_pop(worklist)) != NULL) { + gc_restore_tid(op); + gc_clear_unreachable(op); + Py_DECREF(op); + } +} + +static void +gc_collect_internal(PyInterpreterState *interp, struct collection_state *state) +{ + _PyEval_StopTheWorld(interp); + // Find unreachable objects + int err = deduce_unreachable_heap(interp, state); + if (err < 0) { + _PyEval_StartTheWorld(interp); + goto error; + } + + // Print debugging information. + if (interp->gc.debug & _PyGC_DEBUG_COLLECTABLE) { + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + debug_cycle("collectable", op); + } + } + + // Clear weakrefs and enqueue callbacks (but do not call them). + clear_weakrefs(state); + _PyEval_StartTheWorld(interp); + + // Call weakref callbacks and finalizers after unpausing other threads to + // avoid potential deadlocks. + call_weakref_callbacks(state); + finalize_garbage(state); + + // Handle any objects that may have resurrected after the finalization. + _PyEval_StopTheWorld(interp); + err = handle_resurrected_objects(state); + _PyEval_StartTheWorld(interp); + + if (err < 0) { + goto error; + } + + // Call tp_clear on objects in the unreachable set. This will cause + // the reference cycles to be broken. It may also cause some objects + // to be freed. + delete_garbage(state); + + // Append objects with legacy finalizers to the "gc.garbage" list. + handle_legacy_finalizers(state); + return; + +error: + cleanup_worklist(&state->unreachable); + cleanup_worklist(&state->legacy_finalizers); + cleanup_worklist(&state->wrcb_to_call); + cleanup_worklist(&state->objs_to_decref); + PyErr_NoMemory(); + PyErr_FormatUnraisable("Out of memory during garbage collection"); +} + +/* This is the main function. Read this to understand how the + * collection process works. */ +static Py_ssize_t +gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) +{ + int i; + Py_ssize_t m = 0; /* # objects collected */ + Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ + _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ + GCState *gcstate = &tstate->interp->gc; + + // gc_collect_main() must not be called before _PyGC_Init + // or after _PyGC_Fini() + assert(gcstate->garbage != NULL); + assert(!_PyErr_Occurred(tstate)); + + int expected = 0; + if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { + // Don't start a garbage collection if one is already in progress. + return 0; + } + + if (generation == GENERATION_AUTO) { + // Select the oldest generation that needs collecting. We will collect + // objects from that generation and all generations younger than it. + generation = gc_select_generation(gcstate); + if (generation < 0) { + // No generation needs to be collected. + _Py_atomic_store_int(&gcstate->collecting, 0); + return 0; + } + } + + assert(generation >= 0 && generation < NUM_GENERATIONS); + +#ifdef Py_STATS + if (_Py_stats) { + _Py_stats->object_stats.object_visits = 0; + } +#endif + GC_STAT_ADD(generation, collections, 1); + + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "start", generation, 0, 0); + } + + if (gcstate->debug & _PyGC_DEBUG_STATS) { + PySys_WriteStderr("gc: collecting generation %d...\n", generation); + show_stats_each_generations(gcstate); + t1 = _PyTime_GetPerfCounter(); + } + + if (PyDTrace_GC_START_ENABLED()) { + PyDTrace_GC_START(generation); + } + + /* update collection and allocation counters */ + if (generation+1 < NUM_GENERATIONS) { + gcstate->generations[generation+1].count += 1; + } + for (i = 0; i <= generation; i++) { + gcstate->generations[i].count = 0; + } + + PyInterpreterState *interp = tstate->interp; + + struct collection_state state = { + .interp = interp, + .gcstate = gcstate, + }; + + gc_collect_internal(interp, &state); + + m = state.collected; + n = state.uncollectable; + + if (gcstate->debug & _PyGC_DEBUG_STATS) { + double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); + PySys_WriteStderr( + "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", + n+m, n, d); + } + + // Clear free lists in all threads + _PyGC_ClearAllFreeLists(interp); + + if (_PyErr_Occurred(tstate)) { + if (reason == _Py_GC_REASON_SHUTDOWN) { + _PyErr_Clear(tstate); + } + else { + PyErr_FormatUnraisable("Exception ignored in garbage collection"); + } + } + + /* Update stats */ + struct gc_generation_stats *stats = &gcstate->generation_stats[generation]; + stats->collections++; + stats->collected += m; + stats->uncollectable += n; + + GC_STAT_ADD(generation, objects_collected, m); +#ifdef Py_STATS + if (_Py_stats) { + GC_STAT_ADD(generation, object_visits, + _Py_stats->object_stats.object_visits); + _Py_stats->object_stats.object_visits = 0; + } +#endif + + if (PyDTrace_GC_DONE_ENABLED()) { + PyDTrace_GC_DONE(n + m); + } + + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "stop", generation, m, n); + } + + assert(!_PyErr_Occurred(tstate)); + _Py_atomic_store_int(&gcstate->collecting, 0); + return n + m; +} + +struct get_referrers_args { + struct visitor_args base; + PyObject *objs; + struct worklist results; +}; + +static int +referrersvisit(PyObject* obj, void *arg) +{ + PyObject *objs = arg; + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(objs); i++) { + if (PyTuple_GET_ITEM(objs, i) == obj) { + return 1; + } + } + return 0; +} + +static bool +visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op == NULL) { + return true; + } + + struct get_referrers_args *arg = (struct get_referrers_args *)args; + if (Py_TYPE(op)->tp_traverse(op, referrersvisit, arg->objs)) { + op->ob_tid = 0; // we will restore the refcount later + worklist_push(&arg->results, op); + } + + return true; +} + +PyObject * +_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) +{ + PyObject *result = PyList_New(0); + if (!result) { + return NULL; + } + + _PyEval_StopTheWorld(interp); + + // Append all objects to a worklist. This abuses ob_tid. We will restore + // it later. NOTE: We can't append to the PyListObject during + // gc_visit_heaps() because PyList_Append() may reclaim an abandoned + // mimalloc segments while we are traversing them. + struct get_referrers_args args = { .objs = objs }; + gc_visit_heaps(interp, &visit_get_referrers, &args.base); + + bool error = false; + PyObject *op; + while ((op = worklist_pop(&args.results)) != NULL) { + gc_restore_tid(op); + if (op != objs && PyList_Append(result, op) < 0) { + error = true; + break; + } + } + + // In case of error, clear the remaining worklist + while ((op = worklist_pop(&args.results)) != NULL) { + gc_restore_tid(op); + } + + _PyEval_StartTheWorld(interp); + + if (error) { + Py_DECREF(result); + return NULL; + } + + return result; +} + +struct get_objects_args { + struct visitor_args base; + struct worklist objects; +}; + +static bool +visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op == NULL) { + return true; + } + + struct get_objects_args *arg = (struct get_objects_args *)args; + op->ob_tid = 0; // we will restore the refcount later + worklist_push(&arg->objects, op); + + return true; +} + +PyObject * +_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) +{ + PyObject *result = PyList_New(0); + if (!result) { + return NULL; + } + + _PyEval_StopTheWorld(interp); + + // Append all objects to a worklist. This abuses ob_tid. We will restore + // it later. NOTE: We can't append to the list during gc_visit_heaps() + // because PyList_Append() may reclaim an abandoned mimalloc segment + // while we are traversing it. + struct get_objects_args args = { 0 }; + gc_visit_heaps(interp, &visit_get_objects, &args.base); + + bool error = false; + PyObject *op; + while ((op = worklist_pop(&args.objects)) != NULL) { + gc_restore_tid(op); + if (op != result && PyList_Append(result, op) < 0) { + error = true; + break; + } + } + + // In case of error, clear the remaining worklist + while ((op = worklist_pop(&args.objects)) != NULL) { + gc_restore_tid(op); + } + + _PyEval_StartTheWorld(interp); + + if (error) { + Py_DECREF(result); + return NULL; + } + + return result; +} + +static bool +visit_freeze(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op != NULL) { + op->ob_gc_bits |= _PyGC_BITS_FROZEN; + } + return true; +} + +void +_PyGC_Freeze(PyInterpreterState *interp) +{ + struct visitor_args args; + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &visit_freeze, &args); + _PyEval_StartTheWorld(interp); +} + +static bool +visit_unfreeze(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op != NULL) { + op->ob_gc_bits &= ~_PyGC_BITS_FROZEN; + } + return true; +} + +void +_PyGC_Unfreeze(PyInterpreterState *interp) +{ + struct visitor_args args; + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &visit_unfreeze, &args); + _PyEval_StartTheWorld(interp); +} + +struct count_frozen_args { + struct visitor_args base; + Py_ssize_t count; +}; + +static bool +visit_count_frozen(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op != NULL && (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0) { + struct count_frozen_args *arg = (struct count_frozen_args *)args; + arg->count++; + } + return true; +} + +Py_ssize_t +_PyGC_GetFreezeCount(PyInterpreterState *interp) +{ + struct count_frozen_args args = { .count = 0 }; + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &visit_count_frozen, &args.base); + _PyEval_StartTheWorld(interp); + return args.count; +} + +/* C API for controlling the state of the garbage collector */ +int +PyGC_Enable(void) +{ + GCState *gcstate = get_gc_state(); + int old_state = gcstate->enabled; + gcstate->enabled = 1; + return old_state; +} + +int +PyGC_Disable(void) +{ + GCState *gcstate = get_gc_state(); + int old_state = gcstate->enabled; + gcstate->enabled = 0; + return old_state; +} + +int +PyGC_IsEnabled(void) +{ + GCState *gcstate = get_gc_state(); + return gcstate->enabled; +} + +/* Public API to invoke gc.collect() from C */ +Py_ssize_t +PyGC_Collect(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + GCState *gcstate = &tstate->interp->gc; + + if (!gcstate->enabled) { + return 0; + } + + Py_ssize_t n; + PyObject *exc = _PyErr_GetRaisedException(tstate); + n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL); + _PyErr_SetRaisedException(tstate, exc); + + return n; +} + +Py_ssize_t +_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) +{ + return gc_collect_main(tstate, generation, reason); +} + +Py_ssize_t +_PyGC_CollectNoFail(PyThreadState *tstate) +{ + /* Ideally, this function is only called on interpreter shutdown, + and therefore not recursively. Unfortunately, when there are daemon + threads, a daemon thread can start a cyclic garbage collection + during interpreter shutdown (and then never finish it). + See http://bugs.python.org/issue8713#msg195178 for an example. + */ + return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); +} + +void +_PyGC_DumpShutdownStats(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL) + && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) { + const char *message; + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) { + message = "gc: %zd uncollectable objects at shutdown"; + } + else { + message = "gc: %zd uncollectable objects at shutdown; " \ + "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them"; + } + /* PyErr_WarnFormat does too many things and we are at shutdown, + the warnings module's dependencies (e.g. linecache) may be gone + already. */ + if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, + "gc", NULL, message, + PyList_GET_SIZE(gcstate->garbage))) + { + PyErr_WriteUnraisable(NULL); + } + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) { + PyObject *repr = NULL, *bytes = NULL; + repr = PyObject_Repr(gcstate->garbage); + if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) { + PyErr_WriteUnraisable(gcstate->garbage); + } + else { + PySys_WriteStderr( + " %s\n", + PyBytes_AS_STRING(bytes) + ); + } + Py_XDECREF(repr); + Py_XDECREF(bytes); + } + } +} + + +void +_PyGC_Fini(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + Py_CLEAR(gcstate->garbage); + Py_CLEAR(gcstate->callbacks); + + /* We expect that none of this interpreters objects are shared + with other interpreters. + See https://github.com/python/cpython/issues/90228. */ +} + +/* for debugging */ + +#ifdef Py_DEBUG +static int +visit_validate(PyObject *op, void *parent_raw) +{ + PyObject *parent = _PyObject_CAST(parent_raw); + if (_PyObject_IsFreed(op)) { + _PyObject_ASSERT_FAILED_MSG(parent, + "PyObject_GC_Track() object is not valid"); + } + return 0; +} +#endif + + +/* extension modules might be compiled with GC support so these + functions must always be available */ + +void +PyObject_GC_Track(void *op_raw) +{ + PyObject *op = _PyObject_CAST(op_raw); + if (_PyObject_GC_IS_TRACKED(op)) { + _PyObject_ASSERT_FAILED_MSG(op, + "object already tracked " + "by the garbage collector"); + } + _PyObject_GC_TRACK(op); + +#ifdef Py_DEBUG + /* Check that the object is valid: validate objects traversed + by tp_traverse() */ + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void)traverse(op, visit_validate, op); +#endif +} + +void +PyObject_GC_UnTrack(void *op_raw) +{ + PyObject *op = _PyObject_CAST(op_raw); + /* Obscure: the Py_TRASHCAN mechanism requires that we be able to + * call PyObject_GC_UnTrack twice on an object. + */ + if (_PyObject_GC_IS_TRACKED(op)) { + _PyObject_GC_UNTRACK(op); + } +} + +int +PyObject_IS_GC(PyObject *obj) +{ + return _PyObject_IS_GC(obj); +} + +void +_Py_ScheduleGC(PyInterpreterState *interp) +{ + _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1); +} + +void +_PyObject_GC_Link(PyObject *op) +{ + PyThreadState *tstate = _PyThreadState_GET(); + GCState *gcstate = &tstate->interp->gc; + gcstate->generations[0].count++; /* number of allocated GC objects */ + if (gcstate->generations[0].count > gcstate->generations[0].threshold && + gcstate->enabled && + gcstate->generations[0].threshold && + !_Py_atomic_load_int_relaxed(&gcstate->collecting) && + !_PyErr_Occurred(tstate)) + { + _Py_ScheduleGC(tstate->interp); + } +} + +void +_Py_RunGC(PyThreadState *tstate) +{ + gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); +} + +static PyObject * +gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize) +{ + PyThreadState *tstate = _PyThreadState_GET(); + if (basicsize > PY_SSIZE_T_MAX - presize) { + return _PyErr_NoMemory(tstate); + } + size_t size = presize + basicsize; + char *mem = _PyObject_MallocWithType(tp, size); + if (mem == NULL) { + return _PyErr_NoMemory(tstate); + } + ((PyObject **)mem)[0] = NULL; + ((PyObject **)mem)[1] = NULL; + PyObject *op = (PyObject *)(mem + presize); + _PyObject_GC_Link(op); + return op; +} + +PyObject * +_PyObject_GC_New(PyTypeObject *tp) +{ + size_t presize = _PyType_PreHeaderSize(tp); + PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp), presize); + if (op == NULL) { + return NULL; + } + _PyObject_Init(op, tp); + return op; +} + +PyVarObject * +_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) +{ + PyVarObject *op; + + if (nitems < 0) { + PyErr_BadInternalCall(); + return NULL; + } + size_t presize = _PyType_PreHeaderSize(tp); + size_t size = _PyObject_VAR_SIZE(tp, nitems); + op = (PyVarObject *)gc_alloc(tp, size, presize); + if (op == NULL) { + return NULL; + } + _PyObject_InitVar(op, tp, nitems); + return op; +} + +PyObject * +PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size) +{ + size_t presize = _PyType_PreHeaderSize(tp); + PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp) + extra_size, presize); + if (op == NULL) { + return NULL; + } + memset(op, 0, _PyObject_SIZE(tp) + extra_size); + _PyObject_Init(op, tp); + return op; +} + +PyVarObject * +_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) +{ + const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); + const size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type); + _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op)); + if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) { + return (PyVarObject *)PyErr_NoMemory(); + } + char *mem = (char *)op - presize; + mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize); + if (mem == NULL) { + return (PyVarObject *)PyErr_NoMemory(); + } + op = (PyVarObject *) (mem + presize); + Py_SET_SIZE(op, nitems); + return op; +} + +void +PyObject_GC_Del(void *op) +{ + size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type); + if (_PyObject_GC_IS_TRACKED(op)) { +#ifdef Py_DEBUG + PyObject *exc = PyErr_GetRaisedException(); + if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, + "gc", NULL, "Object of type %s is not untracked before destruction", + ((PyObject*)op)->ob_type->tp_name)) { + PyErr_WriteUnraisable(NULL); + } + PyErr_SetRaisedException(exc); +#endif + } + GCState *gcstate = get_gc_state(); + if (gcstate->generations[0].count > 0) { + gcstate->generations[0].count--; + } + PyObject_Free(((char *)op)-presize); +} + +int +PyObject_GC_IsTracked(PyObject* obj) +{ + if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) { + return 1; + } + return 0; +} + +int +PyObject_GC_IsFinalized(PyObject *obj) +{ + if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) { + return 1; + } + return 0; +} + +struct custom_visitor_args { + struct visitor_args base; + gcvisitobjects_t callback; + void *arg; +}; + +static bool +custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + struct custom_visitor_args *wrapper = (struct custom_visitor_args *)args; + if (!wrapper->callback(op, wrapper->arg)) { + return false; + } + + return true; +} + +void +PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct custom_visitor_args wrapper = { + .callback = callback, + .arg = arg, + }; + + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &custom_visitor_wrapper, &wrapper.base); + _PyEval_StartTheWorld(interp); +} + /* Clear all free lists * All free lists are cleared during the collection of the highest generation. * Allocated items in the free list may keep a pymalloc arena occupied. @@ -25,4 +1720,4 @@ _PyGC_ClearAllFreeLists(PyInterpreterState *interp) HEAD_UNLOCK(&_PyRuntime); } -#endif +#endif // Py_GIL_DISABLED diff --git a/Python/object_stack.c b/Python/object_stack.c new file mode 100644 index 00000000000000..66b37bcbb44475 --- /dev/null +++ b/Python/object_stack.c @@ -0,0 +1,87 @@ +// Stack of Python objects + +#include "Python.h" +#include "pycore_freelist.h" +#include "pycore_pystate.h" +#include "pycore_object_stack.h" + +extern _PyObjectStackChunk *_PyObjectStackChunk_New(void); +extern void _PyObjectStackChunk_Free(_PyObjectStackChunk *); + +static struct _Py_object_stack_state * +get_state(void) +{ + _PyFreeListState *state = _PyFreeListState_GET(); + return &state->object_stack_state; +} + +_PyObjectStackChunk * +_PyObjectStackChunk_New(void) +{ + _PyObjectStackChunk *buf; + struct _Py_object_stack_state *state = get_state(); + if (state->numfree > 0) { + buf = state->free_list; + state->free_list = buf->prev; + state->numfree--; + } + else { + // NOTE: we use PyMem_RawMalloc() here because this is used by the GC + // during mimalloc heap traversal. In that context, it is not safe to + // allocate mimalloc memory, such as via PyMem_Malloc(). + buf = PyMem_RawMalloc(sizeof(_PyObjectStackChunk)); + if (buf == NULL) { + return NULL; + } + } + buf->prev = NULL; + buf->n = 0; + return buf; +} + +void +_PyObjectStackChunk_Free(_PyObjectStackChunk *buf) +{ + assert(buf->n == 0); + struct _Py_object_stack_state *state = get_state(); + if (state->numfree >= 0 && + state->numfree < _PyObjectStackChunk_MAXFREELIST) + { + buf->prev = state->free_list; + state->free_list = buf; + state->numfree++; + } + else { + PyMem_RawFree(buf); + } +} + +void +_PyObjectStack_Clear(_PyObjectStack *queue) +{ + while (queue->head != NULL) { + _PyObjectStackChunk *buf = queue->head; + buf->n = 0; + queue->head = buf->prev; + _PyObjectStackChunk_Free(buf); + } +} + +void +_PyObjectStackChunk_ClearFreeList(_PyFreeListState *free_lists, int is_finalization) +{ + if (!is_finalization) { + // Ignore requests to clear the free list during GC. We use object + // stacks during GC, so emptying the free-list is counterproductive. + return; + } + + struct _Py_object_stack_state *state = &free_lists->object_stack_state; + while (state->numfree > 0) { + _PyObjectStackChunk *buf = state->free_list; + state->free_list = buf->prev; + state->numfree--; + PyMem_RawFree(buf); + } + state->numfree = -1; +} diff --git a/Python/pystate.c b/Python/pystate.c index c9b521351444a7..5ad5b6f3fcc634 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -10,6 +10,7 @@ #include "pycore_frame.h" #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_object.h" // _PyType_InitCache() +#include "pycore_object_stack.h" // _PyObjectStackChunk_ClearFreeList() #include "pycore_parking_lot.h" // _PyParkingLot_AfterFork() #include "pycore_pyerrors.h" // _PyErr_Clear() #include "pycore_pylifecycle.h" // _PyAST_Fini() @@ -1468,6 +1469,7 @@ _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization) _PyList_ClearFreeList(state, is_finalization); _PyContext_ClearFreeList(state, is_finalization); _PyAsyncGen_ClearFreeLists(state, is_finalization); + _PyObjectStackChunk_ClearFreeList(state, is_finalization); } void @@ -2055,7 +2057,6 @@ start_the_world(struct _stoptheworld_state *stw) HEAD_LOCK(runtime); stw->requested = 0; stw->world_stopped = 0; - stw->requester = NULL; // Switch threads back to the detached state. PyInterpreterState *i; PyThreadState *t; @@ -2066,6 +2067,7 @@ start_the_world(struct _stoptheworld_state *stw) _PyParkingLot_UnparkAll(&t->state); } } + stw->requester = NULL; HEAD_UNLOCK(runtime); if (stw->is_global) { _PyRWMutex_Unlock(&runtime->stoptheworld_mutex); From d96358ff9de646dbf64dfdfed46d510da7ec4803 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Thu, 25 Jan 2024 22:46:32 +0300 Subject: [PATCH 008/127] gh-114315: Make `threading.Lock` a real class, not a factory function (#114479) `threading.Lock` is now the underlying class and is constructable rather than the old factory function. This allows for type annotations to refer to it which had no non-ugly way to be expressed prior to this. --------- Co-authored-by: Alex Waygood Co-authored-by: Gregory P. Smith --- Doc/library/threading.rst | 7 ++-- Lib/test/test_threading.py | 20 ++++++++--- Lib/threading.py | 4 +-- ...-01-23-14-11-49.gh-issue-114315.KeVdzl.rst | 2 ++ Modules/_threadmodule.c | 33 ++++++++++++++++--- 5 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-23-14-11-49.gh-issue-114315.KeVdzl.rst diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index b85b7f008d1594..5fbf9379b8202c 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -534,9 +534,10 @@ All methods are executed atomically. lock, subsequent attempts to acquire it block, until it is released; any thread may release it. - Note that ``Lock`` is actually a factory function which returns an instance - of the most efficient version of the concrete Lock class that is supported - by the platform. + .. versionchanged:: 3.13 + ``Lock`` is now a class. In earlier Pythons, ``Lock`` was a factory + function which returned an instance of the underlying private lock + type. .. method:: acquire(blocking=True, timeout=-1) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index dbdc46fff1e313..1ab223b81e939e 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -171,11 +171,21 @@ def test_args_argument(self): t.start() t.join() - @cpython_only - def test_disallow_instantiation(self): - # Ensure that the type disallows instantiation (bpo-43916) - lock = threading.Lock() - test.support.check_disallow_instantiation(self, type(lock)) + def test_lock_no_args(self): + threading.Lock() # works + self.assertRaises(TypeError, threading.Lock, 1) + self.assertRaises(TypeError, threading.Lock, a=1) + self.assertRaises(TypeError, threading.Lock, 1, 2, a=1, b=2) + + def test_lock_no_subclass(self): + # Intentionally disallow subclasses of threading.Lock because they have + # never been allowed, so why start now just because the type is public? + with self.assertRaises(TypeError): + class MyLock(threading.Lock): pass + + def test_lock_or_none(self): + import types + self.assertIsInstance(threading.Lock | None, types.UnionType) # Create a bunch of threads, let each do some work, wait until all are # done. diff --git a/Lib/threading.py b/Lib/threading.py index ecf799bc26ab06..00b95f8d92a1f0 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -5,7 +5,6 @@ import _thread import functools import warnings -import _weakref from time import monotonic as _time from _weakrefset import WeakSet @@ -37,6 +36,7 @@ _start_joinable_thread = _thread.start_joinable_thread _daemon_threads_allowed = _thread.daemon_threads_allowed _allocate_lock = _thread.allocate_lock +_LockType = _thread.LockType _set_sentinel = _thread._set_sentinel get_ident = _thread.get_ident _is_main_interpreter = _thread._is_main_interpreter @@ -115,7 +115,7 @@ def gettrace(): # Synchronization classes -Lock = _allocate_lock +Lock = _LockType def RLock(*args, **kwargs): """Factory function that returns a new reentrant lock. diff --git a/Misc/NEWS.d/next/Library/2024-01-23-14-11-49.gh-issue-114315.KeVdzl.rst b/Misc/NEWS.d/next/Library/2024-01-23-14-11-49.gh-issue-114315.KeVdzl.rst new file mode 100644 index 00000000000000..a8a19fc525d019 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-23-14-11-49.gh-issue-114315.KeVdzl.rst @@ -0,0 +1,2 @@ +Make :class:`threading.Lock` a real class, not a factory function. Add +``__new__`` to ``_thread.lock`` type. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 99f97eb6d0adcc..5cceb84658deb7 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -5,6 +5,7 @@ #include "Python.h" #include "pycore_interp.h" // _PyInterpreterState.threads.count #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_pylifecycle.h" #include "pycore_pystate.h" // _PyThreadState_SetCurrent() #include "pycore_sysmodule.h" // _PySys_GetAttr() @@ -349,6 +350,27 @@ lock__at_fork_reinit(lockobject *self, PyObject *Py_UNUSED(args)) } #endif /* HAVE_FORK */ +static lockobject *newlockobject(PyObject *module); + +static PyObject * +lock_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + // convert to AC? + if (!_PyArg_NoKeywords("lock", kwargs)) { + goto error; + } + if (!_PyArg_CheckPositional("lock", PyTuple_GET_SIZE(args), 0, 0)) { + goto error; + } + + PyObject *module = PyType_GetModuleByDef(type, &thread_module); + assert(module != NULL); + return (PyObject *)newlockobject(module); + +error: + return NULL; +} + static PyMethodDef lock_methods[] = { {"acquire_lock", _PyCFunction_CAST(lock_PyThread_acquire_lock), @@ -398,6 +420,7 @@ static PyType_Slot lock_type_slots[] = { {Py_tp_methods, lock_methods}, {Py_tp_traverse, lock_traverse}, {Py_tp_members, lock_type_members}, + {Py_tp_new, lock_new}, {0, 0} }; @@ -405,7 +428,7 @@ static PyType_Spec lock_type_spec = { .name = "_thread.lock", .basicsize = sizeof(lockobject), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | - Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE), + Py_TPFLAGS_IMMUTABLETYPE), .slots = lock_type_slots, }; @@ -1442,8 +1465,6 @@ A subthread can use this function to interrupt the main thread.\n\ Note: the default signal handler for SIGINT raises ``KeyboardInterrupt``." ); -static lockobject *newlockobject(PyObject *module); - static PyObject * thread_PyThread_allocate_lock(PyObject *module, PyObject *Py_UNUSED(ignored)) { @@ -1841,10 +1862,14 @@ thread_module_exec(PyObject *module) } // Lock - state->lock_type = (PyTypeObject *)PyType_FromSpec(&lock_type_spec); + state->lock_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &lock_type_spec, NULL); if (state->lock_type == NULL) { return -1; } + if (PyModule_AddType(module, state->lock_type) < 0) { + return -1; + } + // Old alias: lock -> LockType if (PyDict_SetItemString(d, "LockType", (PyObject *)state->lock_type) < 0) { return -1; } From 33ae9895d4ac0d88447e529038bc4725ddd8c291 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 25 Jan 2024 23:00:52 +0300 Subject: [PATCH 009/127] gh-114561: Mark some tests in ``test_wincosoleio`` with `requires_resource('console')` decorator (GH-114565) --- Lib/test/test_winconsoleio.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_winconsoleio.py b/Lib/test/test_winconsoleio.py index 72ff9606908ed5..209e4464e1a5c0 100644 --- a/Lib/test/test_winconsoleio.py +++ b/Lib/test/test_winconsoleio.py @@ -6,7 +6,7 @@ import sys import tempfile import unittest -from test.support import os_helper +from test.support import os_helper, requires_resource if sys.platform != 'win32': raise unittest.SkipTest("test only relevant on win32") @@ -150,6 +150,7 @@ def assertStdinRoundTrip(self, text): sys.stdin = old_stdin self.assertEqual(actual, text) + @requires_resource('console') def test_input(self): # ASCII self.assertStdinRoundTrip('abc123') @@ -164,6 +165,7 @@ def test_input_nonbmp(self): # Non-BMP self.assertStdinRoundTrip('\U00100000\U0010ffff\U0010fffd') + @requires_resource('console') def test_partial_reads(self): # Test that reading less than 1 full character works when stdin # contains multibyte UTF-8 sequences @@ -199,6 +201,7 @@ def test_partial_surrogate_reads(self): self.assertEqual(actual, expected, 'stdin.read({})'.format(read_count)) + @requires_resource('console') def test_ctrl_z(self): with open('CONIN$', 'rb', buffering=0) as stdin: source = '\xC4\x1A\r\n'.encode('utf-16-le') From ac5e53e15057bc0326a03f56e400ce345d1cebeb Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 25 Jan 2024 20:06:48 +0000 Subject: [PATCH 010/127] gh-107901: compiler replaces POP_BLOCK instruction by NOPs before optimisations (#114530) --- Python/flowgraph.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index de831358eb9ac8..96610b3cb11a43 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -903,6 +903,7 @@ label_exception_targets(basicblock *entryblock) { } else if (instr->i_opcode == POP_BLOCK) { handler = pop_except_block(except_stack); + INSTR_SET_OP0(instr, NOP); } else if (is_jump(instr)) { instr->i_except = handler; @@ -2313,7 +2314,7 @@ convert_pseudo_ops(cfg_builder *g) for (basicblock *b = entryblock; b != NULL; b = b->b_next) { for (int i = 0; i < b->b_iused; i++) { cfg_instr *instr = &b->b_instr[i]; - if (is_block_push(instr) || instr->i_opcode == POP_BLOCK) { + if (is_block_push(instr)) { INSTR_SET_OP0(instr, NOP); } else if (instr->i_opcode == LOAD_CLOSURE) { From 841eacd07646e643f87d7f063106633a25315910 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Fri, 26 Jan 2024 05:49:37 +0900 Subject: [PATCH 011/127] Add CODEOWNERS for dbm (gh-114555) --- .github/CODEOWNERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9587b3996a9ac2..4984170f0d17ff 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -42,6 +42,9 @@ Lib/test/test_type_*.py @JelleZijlstra Lib/test/test_capi/test_misc.py @markshannon @gvanrossum Tools/c-analyzer/ @ericsnowcurrently +# dbm +**/*dbm* @corona10 @erlend-aasland @serhiy-storchaka + # Exceptions Lib/traceback.py @iritkatriel Lib/test/test_except*.py @iritkatriel From b69548a0f52418b8a2cf7c7a885fdd7d3bfb1b0b Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 26 Jan 2024 01:12:46 +0000 Subject: [PATCH 012/127] GH-73435: Add `pathlib.PurePath.full_match()` (#114350) In 49f90ba we added support for the recursive wildcard `**` in `pathlib.PurePath.match()`. This should allow arbitrary prefix and suffix matching, like `p.match('foo/**')` or `p.match('**/foo')`, but there's a problem: for relative patterns only, `match()` implicitly inserts a `**` token on the left hand side, causing all patterns to match from the right. As a result, it's impossible to match relative patterns from the left: `PurePath('foo/bar').match('bar/**')` is true! This commit reverts the changes to `match()`, and instead adds a new `full_match()` method that: - Allows empty patterns - Supports the recursive wildcard `**` - Matches the *entire* path when given a relative pattern --- Doc/library/glob.rst | 5 +- Doc/library/pathlib.rst | 60 +++++++------- Doc/whatsnew/3.13.rst | 3 +- Lib/pathlib/__init__.py | 7 ++ Lib/pathlib/_abc.py | 54 +++++++++---- Lib/test/test_pathlib/test_pathlib_abc.py | 98 +++++++++++++++++------ 6 files changed, 155 insertions(+), 72 deletions(-) diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 6e4f72c19ff4c9..19a0bbba8966ba 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -147,8 +147,9 @@ The :mod:`glob` module defines the following functions: .. seealso:: - :meth:`pathlib.PurePath.match` and :meth:`pathlib.Path.glob` methods, - which call this function to implement pattern matching and globbing. + :meth:`pathlib.PurePath.full_match` and :meth:`pathlib.Path.glob` + methods, which call this function to implement pattern matching and + globbing. .. versionadded:: 3.13 diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index fcbc0bf489b344..2f4ff4efec47f8 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -559,55 +559,55 @@ Pure paths provide the following methods and properties: PureWindowsPath('c:/Program Files') -.. method:: PurePath.match(pattern, *, case_sensitive=None) +.. method:: PurePath.full_match(pattern, *, case_sensitive=None) Match this path against the provided glob-style pattern. Return ``True`` - if matching is successful, ``False`` otherwise. - - If *pattern* is relative, the path can be either relative or absolute, - and matching is done from the right:: + if matching is successful, ``False`` otherwise. For example:: - >>> PurePath('a/b.py').match('*.py') - True - >>> PurePath('/a/b/c.py').match('b/*.py') + >>> PurePath('a/b.py').full_match('a/*.py') True - >>> PurePath('/a/b/c.py').match('a/*.py') + >>> PurePath('a/b.py').full_match('*.py') False + >>> PurePath('/a/b/c.py').full_match('/a/**') + True + >>> PurePath('/a/b/c.py').full_match('**/*.py') + True - If *pattern* is absolute, the path must be absolute, and the whole path - must match:: + As with other methods, case-sensitivity follows platform defaults:: - >>> PurePath('/a.py').match('/*.py') - True - >>> PurePath('a/b.py').match('/*.py') + >>> PurePosixPath('b.py').full_match('*.PY') False + >>> PureWindowsPath('b.py').full_match('*.PY') + True - The *pattern* may be another path object; this speeds up matching the same - pattern against multiple files:: + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. - >>> pattern = PurePath('*.py') - >>> PurePath('a/b.py').match(pattern) - True + .. versionadded:: 3.13 - .. versionchanged:: 3.12 - Accepts an object implementing the :class:`os.PathLike` interface. - As with other methods, case-sensitivity follows platform defaults:: +.. method:: PurePath.match(pattern, *, case_sensitive=None) - >>> PurePosixPath('b.py').match('*.PY') - False - >>> PureWindowsPath('b.py').match('*.PY') + Match this path against the provided non-recursive glob-style pattern. + Return ``True`` if matching is successful, ``False`` otherwise. + + This method is similar to :meth:`~PurePath.full_match`, but empty patterns + aren't allowed (:exc:`ValueError` is raised), the recursive wildcard + "``**``" isn't supported (it acts like non-recursive "``*``"), and if a + relative pattern is provided, then matching is done from the right:: + + >>> PurePath('a/b.py').match('*.py') + True + >>> PurePath('/a/b/c.py').match('b/*.py') True + >>> PurePath('/a/b/c.py').match('a/*.py') + False - Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. + .. versionchanged:: 3.12 + The *pattern* parameter accepts a :term:`path-like object`. .. versionchanged:: 3.12 The *case_sensitive* parameter was added. - .. versionchanged:: 3.13 - Support for the recursive wildcard "``**``" was added. In previous - versions, it acted like the non-recursive wildcard "``*``". - .. method:: PurePath.relative_to(other, walk_up=False) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 40f0cd37fe9318..8c2bb05920d5b6 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -336,7 +336,8 @@ pathlib object from a 'file' URI (``file:/``). (Contributed by Barney Gale in :gh:`107465`.) -* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`. +* Add :meth:`pathlib.PurePath.full_match` for matching paths with + shell-style wildcards, including the recursive wildcard "``**``". (Contributed by Barney Gale in :gh:`73435`.) * Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob`, diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index b043aed12b3849..eee82ef26bc7e7 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -490,6 +490,13 @@ def _pattern_stack(self): parts.reverse() return parts + @property + def _pattern_str(self): + """The path expressed as a string, for use in pattern-matching.""" + # The string representation of an empty path is a single dot ('.'). Empty + # paths shouldn't match wildcards, so we change it to the empty string. + path_str = str(self) + return '' if path_str == '.' else path_str # Subclassing os.PathLike makes isinstance() checks slower, # which in turn makes Path construction slower. Register instead! diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 553e1a399061d3..6303a18680befc 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -47,8 +47,8 @@ def _is_case_sensitive(pathmod): re = glob = None -@functools.lru_cache(maxsize=256) -def _compile_pattern(pat, sep, case_sensitive): +@functools.lru_cache(maxsize=512) +def _compile_pattern(pat, sep, case_sensitive, recursive=True): """Compile given glob pattern to a re.Pattern object (observing case sensitivity).""" global re, glob @@ -56,10 +56,7 @@ def _compile_pattern(pat, sep, case_sensitive): import re, glob flags = re.NOFLAG if case_sensitive else re.IGNORECASE - regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep) - # The string representation of an empty path is a single dot ('.'). Empty - # paths shouldn't match wildcards, so we consume it with an atomic group. - regex = r'(\.\Z)?+' + regex + regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep) return re.compile(regex, flags=flags).match @@ -441,23 +438,48 @@ def _pattern_stack(self): raise NotImplementedError("Non-relative patterns are unsupported") return parts + @property + def _pattern_str(self): + """The path expressed as a string, for use in pattern-matching.""" + return str(self) + def match(self, path_pattern, *, case_sensitive=None): """ - Return True if this path matches the given pattern. + Return True if this path matches the given pattern. If the pattern is + relative, matching is done from the right; otherwise, the entire path + is matched. The recursive wildcard '**' is *not* supported by this + method. """ if not isinstance(path_pattern, PurePathBase): path_pattern = self.with_segments(path_pattern) if case_sensitive is None: case_sensitive = _is_case_sensitive(self.pathmod) sep = path_pattern.pathmod.sep - if path_pattern.anchor: - pattern_str = str(path_pattern) - elif path_pattern.parts: - pattern_str = str('**' / path_pattern) - else: + path_parts = self.parts[::-1] + pattern_parts = path_pattern.parts[::-1] + if not pattern_parts: raise ValueError("empty pattern") - match = _compile_pattern(pattern_str, sep, case_sensitive) - return match(str(self)) is not None + if len(path_parts) < len(pattern_parts): + return False + if len(path_parts) > len(pattern_parts) and path_pattern.anchor: + return False + for path_part, pattern_part in zip(path_parts, pattern_parts): + match = _compile_pattern(pattern_part, sep, case_sensitive, recursive=False) + if match(path_part) is None: + return False + return True + + def full_match(self, pattern, *, case_sensitive=None): + """ + Return True if this path matches the given glob-style pattern. The + pattern is matched against the entire path. + """ + if not isinstance(pattern, PurePathBase): + pattern = self.with_segments(pattern) + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self.pathmod) + match = _compile_pattern(pattern._pattern_str, pattern.pathmod.sep, case_sensitive) + return match(self._pattern_str) is not None @@ -781,8 +803,8 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): if filter_paths: # Filter out paths that don't match pattern. prefix_len = len(str(self._make_child_relpath('_'))) - 1 - match = _compile_pattern(str(pattern), sep, case_sensitive) - paths = (path for path in paths if match(str(path), prefix_len)) + match = _compile_pattern(pattern._pattern_str, sep, case_sensitive) + paths = (path for path in paths if match(path._pattern_str, prefix_len)) return paths def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 199718a8a69c5a..364f776dbb1413 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -249,29 +249,8 @@ def test_match_common(self): self.assertFalse(P('/ab.py').match('/a/*.py')) self.assertFalse(P('/a/b/c.py').match('/a/*.py')) # Multi-part glob-style pattern. - self.assertTrue(P('a').match('**')) - self.assertTrue(P('c.py').match('**')) - self.assertTrue(P('a/b/c.py').match('**')) - self.assertTrue(P('/a/b/c.py').match('**')) - self.assertTrue(P('/a/b/c.py').match('/**')) - self.assertTrue(P('/a/b/c.py').match('/a/**')) - self.assertTrue(P('/a/b/c.py').match('**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/**/*.py')) + self.assertFalse(P('/a/b/c.py').match('/**/*.py')) self.assertTrue(P('/a/b/c.py').match('/a/**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/a/b/**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/**/**/**/**/*.py')) - self.assertFalse(P('c.py').match('**/a.py')) - self.assertFalse(P('c.py').match('c/**')) - self.assertFalse(P('a/b/c.py').match('**/a')) - self.assertFalse(P('a/b/c.py').match('**/a/b')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c.')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').match('/a/b/c.py/**')) - self.assertFalse(P('a/b/c.py').match('/**/a/b/c.py')) - self.assertRaises(ValueError, P('a').match, '**a/b/c') - self.assertRaises(ValueError, P('a').match, 'a/b/c**') # Case-sensitive flag self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) @@ -279,9 +258,82 @@ def test_match_common(self): self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) # Matching against empty path self.assertFalse(P('').match('*')) - self.assertTrue(P('').match('**')) + self.assertFalse(P('').match('**')) self.assertFalse(P('').match('**/*')) + def test_full_match_common(self): + P = self.cls + # Simple relative pattern. + self.assertTrue(P('b.py').full_match('b.py')) + self.assertFalse(P('a/b.py').full_match('b.py')) + self.assertFalse(P('/a/b.py').full_match('b.py')) + self.assertFalse(P('a.py').full_match('b.py')) + self.assertFalse(P('b/py').full_match('b.py')) + self.assertFalse(P('/a.py').full_match('b.py')) + self.assertFalse(P('b.py/c').full_match('b.py')) + # Wildcard relative pattern. + self.assertTrue(P('b.py').full_match('*.py')) + self.assertFalse(P('a/b.py').full_match('*.py')) + self.assertFalse(P('/a/b.py').full_match('*.py')) + self.assertFalse(P('b.pyc').full_match('*.py')) + self.assertFalse(P('b./py').full_match('*.py')) + self.assertFalse(P('b.py/c').full_match('*.py')) + # Multi-part relative pattern. + self.assertTrue(P('ab/c.py').full_match('a*/*.py')) + self.assertFalse(P('/d/ab/c.py').full_match('a*/*.py')) + self.assertFalse(P('a.py').full_match('a*/*.py')) + self.assertFalse(P('/dab/c.py').full_match('a*/*.py')) + self.assertFalse(P('ab/c.py/d').full_match('a*/*.py')) + # Absolute pattern. + self.assertTrue(P('/b.py').full_match('/*.py')) + self.assertFalse(P('b.py').full_match('/*.py')) + self.assertFalse(P('a/b.py').full_match('/*.py')) + self.assertFalse(P('/a/b.py').full_match('/*.py')) + # Multi-part absolute pattern. + self.assertTrue(P('/a/b.py').full_match('/a/*.py')) + self.assertFalse(P('/ab.py').full_match('/a/*.py')) + self.assertFalse(P('/a/b/c.py').full_match('/a/*.py')) + # Multi-part glob-style pattern. + self.assertTrue(P('a').full_match('**')) + self.assertTrue(P('c.py').full_match('**')) + self.assertTrue(P('a/b/c.py').full_match('**')) + self.assertTrue(P('/a/b/c.py').full_match('**')) + self.assertTrue(P('/a/b/c.py').full_match('/**')) + self.assertTrue(P('/a/b/c.py').full_match('/a/**')) + self.assertTrue(P('/a/b/c.py').full_match('**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/a/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/a/b/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/**/**/**/**/*.py')) + self.assertFalse(P('c.py').full_match('**/a.py')) + self.assertFalse(P('c.py').full_match('c/**')) + self.assertFalse(P('a/b/c.py').full_match('**/a')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c.')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').full_match('/a/b/c.py/**')) + self.assertFalse(P('a/b/c.py').full_match('/**/a/b/c.py')) + self.assertRaises(ValueError, P('a').full_match, '**a/b/c') + self.assertRaises(ValueError, P('a').full_match, 'a/b/c**') + # Case-sensitive flag + self.assertFalse(P('A.py').full_match('a.PY', case_sensitive=True)) + self.assertTrue(P('A.py').full_match('a.PY', case_sensitive=False)) + self.assertFalse(P('c:/a/B.Py').full_match('C:/A/*.pY', case_sensitive=True)) + self.assertTrue(P('/a/b/c.py').full_match('/A/*/*.Py', case_sensitive=False)) + # Matching against empty path + self.assertFalse(P('').full_match('*')) + self.assertTrue(P('').full_match('**')) + self.assertFalse(P('').full_match('**/*')) + # Matching with empty pattern + self.assertTrue(P('').full_match('')) + self.assertTrue(P('.').full_match('.')) + self.assertFalse(P('/').full_match('')) + self.assertFalse(P('/').full_match('.')) + self.assertFalse(P('foo').full_match('')) + self.assertFalse(P('foo').full_match('.')) + def test_parts_common(self): # `parts` returns a tuple. sep = self.sep From 456e274578dc9863f42ab24d62adc0d8c511b50f Mon Sep 17 00:00:00 2001 From: Mariusz Felisiak Date: Fri, 26 Jan 2024 09:33:13 +0100 Subject: [PATCH 013/127] gh-112451: Prohibit subclassing of datetime.timezone. (#114190) This is consistent with C-extension datetime.timezone. --- Lib/_pydatetime.py | 3 +++ Lib/test/datetimetester.py | 4 ++++ .../Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst | 2 ++ 3 files changed, 9 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index bca2acf1fc88cf..355145387e355b 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -2347,6 +2347,9 @@ def __new__(cls, offset, name=_Omitted): "timedelta(hours=24).") return cls._create(offset, name) + def __init_subclass__(cls): + raise TypeError("type 'datetime.timezone' is not an acceptable base type") + @classmethod def _create(cls, offset, name=None): self = tzinfo.__new__(cls) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 8bda17358db87f..53ad5e57ada017 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -301,6 +301,10 @@ def test_inheritance(self): self.assertIsInstance(timezone.utc, tzinfo) self.assertIsInstance(self.EST, tzinfo) + def test_cannot_subclass(self): + with self.assertRaises(TypeError): + class MyTimezone(timezone): pass + def test_utcoffset(self): dummy = self.DT for h in [0, 1.5, 12]: diff --git a/Misc/NEWS.d/next/Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst b/Misc/NEWS.d/next/Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst new file mode 100644 index 00000000000000..126ca36a3b7cb1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst @@ -0,0 +1,2 @@ +Prohibit subclassing pure-Python :class:`datetime.timezone`. This is consistent +with C-extension implementation. Patch by Mariusz Felisiak. From 582d95e8bb0b78bf1b6b9a12371108b9993d3b84 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 26 Jan 2024 03:48:13 -0600 Subject: [PATCH 014/127] gh-114250: Fetch metadata for pip and its vendored dependencies from PyPI (#114450) --- Misc/sbom.spdx.json | 624 +++++++++++++++++++++++++++++++++++ Tools/build/generate_sbom.py | 263 ++++++++++++--- 2 files changed, 837 insertions(+), 50 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 5b3cd04ffa7f74..94566772338b10 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -1695,6 +1695,510 @@ "primaryPackagePurpose": "SOURCE", "versionInfo": "2.5.1" }, + { + "SPDXID": "SPDXRef-PACKAGE-cachecontrol", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "95dedbec849f46dda3137866dc28b9d133fc9af55f5b805ab1291833e4457aa4" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/1d/e3/a22348e6226dcd585d5a4b5f0175b3a16dabfd3912cbeb02f321d00e56c7/cachecontrol-0.13.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/cachecontrol@0.13.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "cachecontrol", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.13.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-colorama", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/colorama@0.4.6", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "colorama", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.4.6" + }, + { + "SPDXID": "SPDXRef-PACKAGE-distlib", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/76/cb/6bbd2b10170ed991cf64e8c8b85e01f2fb38f95d1bc77617569e0b0b26ac/distlib-0.3.6-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/distlib@0.3.6", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "distlib", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.3.6" + }, + { + "SPDXID": "SPDXRef-PACKAGE-distro", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "99522ca3e365cac527b44bde033f64c6945d90eb9f769703caaec52b09bbd3ff" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/f4/2c/c90a3adaf0ddb70afe193f5ebfb539612af57cffe677c3126be533df3098/distro-1.8.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/distro@1.8.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "distro", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.8.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-msgpack", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/9f/4a/36d936e54cf71e23ad276564465f6a54fb129e3d61520b76e13e0bb29167/msgpack-1.0.5-cp310-cp310-macosx_10_9_universal2.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/msgpack@1.0.5", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "msgpack", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.0.5" + }, + { + "SPDXID": "SPDXRef-PACKAGE-packaging", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/05/8e/8de486cbd03baba4deef4142bd643a3e7bbe954a784dc1bb17142572d127/packaging-21.3-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/packaging@21.3", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "packaging", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "21.3" + }, + { + "SPDXID": "SPDXRef-PACKAGE-platformdirs", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "cec7b889196b9144d088e4c57d9ceef7374f6c39694ad1577a0aab50d27ea28c" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/9e/d8/563a9fc17153c588c8c2042d2f0f84a89057cdb1c30270f589c88b42d62c/platformdirs-3.8.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/platformdirs@3.8.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "platformdirs", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "3.8.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-pyparsing", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "d554a96d1a7d3ddaf7183104485bc19fd80543ad6ac5bdb6426719d766fb06c1" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/a4/24/6ae4c9c45cf99d96b06b5d99e25526c060303171fb0aea9da2bfd7dbde93/pyparsing-3.1.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/pyparsing@3.1.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "pyparsing", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "3.1.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-pyproject-hooks", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "283c11acd6b928d2f6a7c73fa0d01cb2bdc5f07c57a2eeb6e83d5e56b97976f8" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/d5/ea/9ae603de7fbb3df820b23a70f6aff92bf8c7770043254ad8d2dc9d6bcba4/pyproject_hooks-1.0.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/pyproject-hooks@1.0.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "pyproject-hooks", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.0.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-requests", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/requests@2.31.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "requests", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "2.31.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-certifi", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/4c/dd/2234eab22353ffc7d94e8d13177aaa050113286e93e7b40eae01fbf7c3d9/certifi-2023.7.22-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/certifi@2023.7.22", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "certifi", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "2023.7.22" + }, + { + "SPDXID": "SPDXRef-PACKAGE-chardet", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "362777fb014af596ad31334fde1e8c327dfdb076e1960d1694662d46a6917ab9" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/74/8f/8fc49109009e8d2169d94d72e6b1f4cd45c13d147ba7d6170fb41f22b08f/chardet-5.1.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/chardet@5.1.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "chardet", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "5.1.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-idna", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/fc/34/3030de6f1370931b9dbb4dad48f6ab1015ab1d32447850b9fc94e60097be/idna-3.4-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/idna@3.4", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "idna", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "3.4" + }, + { + "SPDXID": "SPDXRef-PACKAGE-rich", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "8f87bc7ee54675732fa66a05ebfe489e27264caeeff3728c945d25971b6485ec" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/fc/1e/482e5eec0b89b593e81d78f819a9412849814e22225842b598908e7ac560/rich-13.4.2-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/rich@13.4.2", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "rich", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "13.4.2" + }, + { + "SPDXID": "SPDXRef-PACKAGE-pygments", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/34/a7/37c8d68532ba71549db4212cb036dbd6161b40e463aba336770e80c72f84/Pygments-2.15.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/pygments@2.15.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "pygments", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "2.15.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-typing-extensions", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/ec/6b/63cc3df74987c36fe26157ee12e09e8f9db4de771e0f3404263117e75b95/typing_extensions-4.7.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/typing_extensions@4.7.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "typing_extensions", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "4.7.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-resolvelib", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "d2da45d1a8dfee81bdd591647783e340ef3bcb104b54c383f70d422ef5cc7dbf" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/d2/fc/e9ccf0521607bcd244aa0b3fbd574f71b65e9ce6a112c83af988bbbe2e23/resolvelib-1.0.1-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/resolvelib@1.0.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "resolvelib", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.0.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-setuptools", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/c7/42/be1c7bbdd83e1bfb160c94b9cafd8e25efc7400346cf7ccdbdb452c467fa/setuptools-68.0.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/setuptools@68.0.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "setuptools", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "68.0.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-six", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/six@1.16.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "six", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.16.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-tenacity", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "2f277afb21b851637e8f52e6a613ff08734c347dc19ade928e519d7d2d8569b0" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/e7/b0/c23bd61e1b32c9b96fbca996c87784e196a812da8d621d8d04851f6c8181/tenacity-8.2.2-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/tenacity@8.2.2", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "tenacity", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "8.2.2" + }, + { + "SPDXID": "SPDXRef-PACKAGE-tomli", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/tomli@2.0.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "tomli", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "2.0.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-truststore", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "e37a5642ae9fc48caa8f120b6283d77225d600d224965a672c9e8ef49ce4bb4c" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/20/56/7811d5439b6a56374f274a8672d8f18b4deadadeb3a9f0c86424b98b6f96/truststore-0.8.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/truststore@0.8.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "truststore", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.8.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-webencodings", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/webencodings@0.5.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "webencodings", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.5.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-urllib3", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/48/fe/a5c6cc46e9fe9171d7ecf0f33ee7aae14642f8d74baa7af4d7840f9358be/urllib3-1.26.17-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/urllib3@1.26.17", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "urllib3", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.26.17" + }, { "SPDXID": "SPDXRef-PACKAGE-pip", "checksums": [ @@ -1724,6 +2228,126 @@ } ], "relationships": [ + { + "relatedSpdxElement": "SPDXRef-PACKAGE-cachecontrol", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-certifi", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-chardet", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-colorama", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-distlib", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-distro", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-idna", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-msgpack", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-packaging", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-platformdirs", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-pygments", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-pyparsing", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-pyproject-hooks", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-requests", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-resolvelib", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-rich", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-setuptools", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-six", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-tenacity", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-tomli", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-truststore", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-typing-extensions", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-urllib3", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-webencodings", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING", "relationshipType": "CONTAINS", diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index 317d48fee3a9d4..aceb13f141cba4 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -8,6 +8,7 @@ import subprocess import sys import typing +import zipfile from urllib.request import urlopen CPYTHON_ROOT_DIR = pathlib.Path(__file__).parent.parent.parent @@ -16,10 +17,16 @@ # the license expression is a valid SPDX license expression: # See: https://spdx.org/licenses ALLOWED_LICENSE_EXPRESSIONS = { - "MIT", - "CC0-1.0", "Apache-2.0", + "Apache-2.0 OR BSD-2-Clause", "BSD-2-Clause", + "BSD-3-Clause", + "CC0-1.0", + "ISC", + "LGPL-2.1-only", + "MIT", + "MPL-2.0", + "Python-2.0.1", } # Properties which are required for our purposes. @@ -31,14 +38,13 @@ "checksums", "licenseConcluded", "externalRefs", - "originator", "primaryPackagePurpose", ]) class PackageFiles(typing.NamedTuple): """Structure for describing the files of a package""" - include: list[str] + include: list[str] | None exclude: list[str] | None = None @@ -118,62 +124,209 @@ def filter_gitignored_paths(paths: list[str]) -> list[str]: return sorted([line.split()[-1] for line in git_check_ignore_lines if line.startswith("::")]) +def fetch_package_metadata_from_pypi(project: str, version: str, filename: str | None = None) -> tuple[str, str] | None: + """ + Fetches the SHA256 checksum and download location from PyPI. + If we're given a filename then we match with that, otherwise we use wheels. + """ + # Get pip's download location from PyPI. Check that the checksum is correct too. + try: + raw_text = urlopen(f"https://pypi.org/pypi/{project}/{version}/json").read() + release_metadata = json.loads(raw_text) + url: dict[str, typing.Any] + + # Look for a matching artifact filename and then check + # its remote checksum to the local one. + for url in release_metadata["urls"]: + # pip can only use Python-only dependencies, so there's + # no risk of picking the 'incorrect' wheel here. + if ( + (filename is None and url["packagetype"] == "bdist_wheel") + or (filename is not None and url["filename"] == filename) + ): + break + else: + raise ValueError(f"No matching filename on PyPI for '{filename}'") + + # Successfully found the download URL for the matching artifact. + download_url = url["url"] + checksum_sha256 = url["digests"]["sha256"] + return download_url, checksum_sha256 + + except (OSError, ValueError) as e: + # Fail if we're running in CI where we should have an internet connection. + error_if( + "CI" in os.environ, + f"Couldn't fetch metadata for project '{project}' from PyPI: {e}" + ) + return None + + +def find_ensurepip_pip_wheel() -> pathlib.Path | None: + """Try to find the pip wheel bundled in ensurepip. If missing return None""" + + ensurepip_bundled_dir = CPYTHON_ROOT_DIR / "Lib/ensurepip/_bundled" + + pip_wheels = [] + try: + for wheel_filename in os.listdir(ensurepip_bundled_dir): + if wheel_filename.startswith("pip-"): + pip_wheels.append(wheel_filename) + else: + print(f"Unexpected wheel in ensurepip: '{wheel_filename}'") + sys.exit(1) + + # Ignore this error, likely caused by downstream distributors + # deleting the 'ensurepip/_bundled' directory. + except FileNotFoundError: + pass + + if len(pip_wheels) == 0: + return None + elif len(pip_wheels) > 1: + print("Multiple pip wheels detected in 'Lib/ensurepip/_bundled'") + sys.exit(1) + # Otherwise return the one pip wheel. + return ensurepip_bundled_dir / pip_wheels[0] + + +def maybe_remove_pip_and_deps_from_sbom(sbom_data: dict[str, typing.Any]) -> None: + """ + Removes pip and its dependencies from the SBOM data + if the pip wheel is removed from ensurepip. This is done + by redistributors of Python and pip. + """ + + # If there's a wheel we don't remove anything. + if find_ensurepip_pip_wheel() is not None: + return + + # Otherwise we traverse the relationships + # to find dependent packages to remove. + sbom_pip_spdx_id = spdx_id("SPDXRef-PACKAGE-pip") + sbom_spdx_ids_to_remove = {sbom_pip_spdx_id} + + # Find all package SPDXIDs that pip depends on. + for sbom_relationship in sbom_data["relationships"]: + if ( + sbom_relationship["relationshipType"] == "DEPENDS_ON" + and sbom_relationship["spdxElementId"] == sbom_pip_spdx_id + ): + sbom_spdx_ids_to_remove.add(sbom_relationship["relatedSpdxElement"]) + + # Remove all the packages and relationships. + sbom_data["packages"] = [ + sbom_package for sbom_package in sbom_data["packages"] + if sbom_package["SPDXID"] not in sbom_spdx_ids_to_remove + ] + sbom_data["relationships"] = [ + sbom_relationship for sbom_relationship in sbom_data["relationships"] + if sbom_relationship["relatedSpdxElement"] not in sbom_spdx_ids_to_remove + ] + + def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None: """pip is a part of a packaging ecosystem (Python, surprise!) so it's actually automatable to discover the metadata we need like the version and checksums - so let's do that on behalf of our friends at the PyPA. + so let's do that on behalf of our friends at the PyPA. This function also + discovers vendored packages within pip and fetches their metadata. """ global PACKAGE_TO_FILES - ensurepip_bundled_dir = CPYTHON_ROOT_DIR / "Lib/ensurepip/_bundled" - pip_wheels = [] - - # Find the hopefully one pip wheel in the bundled directory. - for wheel_filename in os.listdir(ensurepip_bundled_dir): - if wheel_filename.startswith("pip-"): - pip_wheels.append(wheel_filename) - if len(pip_wheels) != 1: - print("Zero or multiple pip wheels detected in 'Lib/ensurepip/_bundled'") - sys.exit(1) - pip_wheel_filename = pip_wheels[0] + pip_wheel_filepath = find_ensurepip_pip_wheel() + if pip_wheel_filepath is None: + return # There's no pip wheel, nothing to discover. # Add the wheel filename to the list of files so the SBOM file # and relationship generator can work its magic on the wheel too. PACKAGE_TO_FILES["pip"] = PackageFiles( - include=[f"Lib/ensurepip/_bundled/{pip_wheel_filename}"] + include=[str(pip_wheel_filepath.relative_to(CPYTHON_ROOT_DIR))] ) # Wheel filename format puts the version right after the project name. - pip_version = pip_wheel_filename.split("-")[1] + pip_version = pip_wheel_filepath.name.split("-")[1] pip_checksum_sha256 = hashlib.sha256( - (ensurepip_bundled_dir / pip_wheel_filename).read_bytes() + pip_wheel_filepath.read_bytes() ).hexdigest() - # Get pip's download location from PyPI. Check that the checksum is correct too. - try: - raw_text = urlopen(f"https://pypi.org/pypi/pip/{pip_version}/json").read() - pip_release_metadata = json.loads(raw_text) - url: dict[str, typing.Any] + pip_metadata = fetch_package_metadata_from_pypi( + project="pip", + version=pip_version, + filename=pip_wheel_filepath.name, + ) + # We couldn't fetch any metadata from PyPI, + # so we give up on verifying if we're not in CI. + if pip_metadata is None: + return + + pip_download_url, pip_actual_sha256 = pip_metadata + if pip_actual_sha256 != pip_checksum_sha256: + raise ValueError("Unexpected") + + # Parse 'pip/_vendor/vendor.txt' from the wheel for sub-dependencies. + with zipfile.ZipFile(pip_wheel_filepath) as whl: + vendor_txt_data = whl.read("pip/_vendor/vendor.txt").decode() + + # With this version regex we're assuming that pip isn't using pre-releases. + # If any version doesn't match we get a failure below, so we're safe doing this. + version_pin_re = re.compile(r"^([a-zA-Z0-9_.-]+)==([0-9.]*[0-9])$") + sbom_pip_dependency_spdx_ids = set() + for line in vendor_txt_data.splitlines(): + line = line.partition("#")[0].strip() # Strip comments and whitespace. + if not line: # Skip empty lines. + continue + + # Non-empty lines we must be able to match. + match = version_pin_re.match(line) + error_if(match is None, f"Couldn't parse line from pip vendor.txt: '{line}'") + assert match is not None # Make mypy happy. + + # Parse out and normalize the project name. + project_name, project_version = match.groups() + project_name = project_name.lower() + + # At this point if pip's metadata fetch succeeded we should + # expect this request to also succeed. + project_metadata = ( + fetch_package_metadata_from_pypi(project_name, project_version) + ) + assert project_metadata is not None + project_download_url, project_checksum_sha256 = project_metadata + + # Update our SBOM data with what we received from PyPI. + # Don't overwrite any existing values. + sbom_project_spdx_id = spdx_id(f"SPDXRef-PACKAGE-{project_name}") + sbom_pip_dependency_spdx_ids.add(sbom_project_spdx_id) + for package in sbom_data["packages"]: + if package["SPDXID"] != sbom_project_spdx_id: + continue - # Look for a matching artifact filename and then check - # its remote checksum to the local one. - for url in pip_release_metadata["urls"]: - if url["filename"] == pip_wheel_filename: + # Only thing missing from this blob is the `licenseConcluded`, + # that needs to be triaged by human maintainers if the list changes. + package.update({ + "SPDXID": sbom_project_spdx_id, + "name": project_name, + "versionInfo": project_version, + "downloadLocation": project_download_url, + "checksums": [ + {"algorithm": "SHA256", "checksumValue": project_checksum_sha256} + ], + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": f"pkg:pypi/{project_name}@{project_version}", + "referenceType": "purl", + }, + ], + "primaryPackagePurpose": "SOURCE" + }) break - else: - raise ValueError(f"No matching filename on PyPI for '{pip_wheel_filename}'") - if url["digests"]["sha256"] != pip_checksum_sha256: - raise ValueError(f"Local pip checksum doesn't match artifact on PyPI") - - # Successfully found the download URL for the matching artifact. - pip_download_url = url["url"] - except (OSError, ValueError) as e: - print(f"Couldn't fetch pip's metadata from PyPI: {e}") - sys.exit(1) + PACKAGE_TO_FILES[project_name] = PackageFiles(include=None) # Remove pip from the existing SBOM packages if it's there # and then overwrite its entry with our own generated one. + sbom_pip_spdx_id = spdx_id("SPDXRef-PACKAGE-pip") sbom_data["packages"] = [ sbom_package for sbom_package in sbom_data["packages"] @@ -181,7 +334,7 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None: ] sbom_data["packages"].append( { - "SPDXID": spdx_id("SPDXRef-PACKAGE-pip"), + "SPDXID": sbom_pip_spdx_id, "name": "pip", "versionInfo": pip_version, "originator": "Organization: Python Packaging Authority", @@ -205,12 +358,27 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None: "primaryPackagePurpose": "SOURCE", } ) + for sbom_dep_spdx_id in sorted(sbom_pip_dependency_spdx_ids): + sbom_data["relationships"].append({ + "spdxElementId": sbom_pip_spdx_id, + "relatedSpdxElement": sbom_dep_spdx_id, + "relationshipType": "DEPENDS_ON" + }) def main() -> None: sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json" sbom_data = json.loads(sbom_path.read_bytes()) + # Check if pip should be removed if the wheel is missing. + # We can't reset the SBOM relationship data until checking this. + maybe_remove_pip_and_deps_from_sbom(sbom_data) + + # We regenerate all of this information. Package information + # should be preserved though since that is edited by humans. + sbom_data["files"] = [] + sbom_data["relationships"] = [] + # Insert pip's SBOM metadata from the wheel. discover_pip_sbom_package(sbom_data) @@ -227,9 +395,10 @@ def main() -> None: "name" not in package, "Package is missing the 'name' field" ) + missing_required_keys = REQUIRED_PROPERTIES_PACKAGE - set(package.keys()) error_if( - set(package.keys()) != REQUIRED_PROPERTIES_PACKAGE, - f"Package '{package['name']}' is missing required fields", + bool(missing_required_keys), + f"Package '{package['name']}' is missing required fields: {missing_required_keys}", ) error_if( package["SPDXID"] != spdx_id(f"SPDXRef-PACKAGE-{package['name']}"), @@ -257,15 +426,11 @@ def main() -> None: f"License identifier '{license_concluded}' not in SBOM tool allowlist" ) - # Regenerate file information from current data. - sbom_files = [] - sbom_relationships = [] - # We call 'sorted()' here a lot to avoid filesystem scan order issues. for name, files in sorted(PACKAGE_TO_FILES.items()): package_spdx_id = spdx_id(f"SPDXRef-PACKAGE-{name}") exclude = files.exclude or () - for include in sorted(files.include): + for include in sorted(files.include or ()): # Find all the paths and then filter them through .gitignore. paths = glob.glob(include, root_dir=CPYTHON_ROOT_DIR, recursive=True) paths = filter_gitignored_paths(paths) @@ -285,7 +450,7 @@ def main() -> None: checksum_sha256 = hashlib.sha256(data).hexdigest() file_spdx_id = spdx_id(f"SPDXRef-FILE-{path}") - sbom_files.append({ + sbom_data["files"].append({ "SPDXID": file_spdx_id, "fileName": path, "checksums": [ @@ -295,15 +460,13 @@ def main() -> None: }) # Tie each file back to its respective package. - sbom_relationships.append({ + sbom_data["relationships"].append({ "spdxElementId": package_spdx_id, "relatedSpdxElement": file_spdx_id, "relationshipType": "CONTAINS", }) # Update the SBOM on disk - sbom_data["files"] = sbom_files - sbom_data["relationships"] = sbom_relationships sbom_path.write_text(json.dumps(sbom_data, indent=2, sort_keys=True)) From 01d970c1b8acf3ccf199d5de151a635ffd9d8c61 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 26 Jan 2024 12:55:22 +0300 Subject: [PATCH 015/127] gh-101100: Fix sphinx warnings in `c-api/file.rst` (#114546) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/c-api/file.rst | 9 +++++++-- Doc/c-api/object.rst | 8 ++++++++ Doc/tools/.nitignore | 2 -- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst index b36c800e00444a..0a03841e467cad 100644 --- a/Doc/c-api/file.rst +++ b/Doc/c-api/file.rst @@ -65,8 +65,13 @@ the :mod:`io` APIs instead. Overrides the normal behavior of :func:`io.open_code` to pass its parameter through the provided handler. - The handler is a function of type :c:expr:`PyObject *(\*)(PyObject *path, - void *userData)`, where *path* is guaranteed to be :c:type:`PyUnicodeObject`. + The handler is a function of type: + + .. c:type:: Py_OpenCodeHookFunction + + Equivalent of :c:expr:`PyObject *(\*)(PyObject *path, + void *userData)`, where *path* is guaranteed to be + :c:type:`PyUnicodeObject`. The *userData* pointer is passed into the hook function. Since hook functions may be called from different runtimes, this pointer should not diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 8a179690d048e3..4f656779c80b1a 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -19,6 +19,14 @@ Object Protocol to NotImplemented and return it). +.. c:macro:: Py_PRINT_RAW + + Flag to be used with multiple functions that print the object (like + :c:func:`PyObject_Print` and :c:func:`PyFile_WriteObject`). + If passed, these function would use the :func:`str` of the object + instead of the :func:`repr`. + + .. c:function:: int PyObject_Print(PyObject *o, FILE *fp, int flags) Print an object *o*, on file *fp*. Returns ``-1`` on error. The flags argument diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 00b4b6919ff14a..d56a44ad09a6f8 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -4,7 +4,6 @@ Doc/c-api/descriptor.rst Doc/c-api/exceptions.rst -Doc/c-api/file.rst Doc/c-api/float.rst Doc/c-api/gcsupport.rst Doc/c-api/init.rst @@ -12,7 +11,6 @@ Doc/c-api/init_config.rst Doc/c-api/intro.rst Doc/c-api/memoryview.rst Doc/c-api/module.rst -Doc/c-api/object.rst Doc/c-api/stable.rst Doc/c-api/sys.rst Doc/c-api/type.rst From 06c5de36f222b926bbc94831536096b974bd5e77 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 11:05:08 +0100 Subject: [PATCH 016/127] Docs: reword dbm.gnu introduction (#114548) Also... - consistently spell GDBM as GDBM - silence gdbm class refs - improve accuracy of dbm.gdbm.open() spec --- Doc/library/dbm.rst | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index eca1c25602a018..0a9d28a41c6d7a 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -137,27 +137,28 @@ then prints out the contents of the database:: The individual submodules are described in the following sections. -:mod:`dbm.gnu` --- GNU's reinterpretation of dbm ------------------------------------------------- +:mod:`dbm.gnu` --- GNU database manager +--------------------------------------- .. module:: dbm.gnu :platform: Unix - :synopsis: GNU's reinterpretation of dbm. + :synopsis: GNU database manager **Source code:** :source:`Lib/dbm/gnu.py` -------------- -This module is quite similar to the :mod:`dbm` module, but uses the GNU library -``gdbm`` instead to provide some additional functionality. Please note that the -file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are incompatible. +The :mod:`dbm.gnu` module provides an interface to the :abbr:`GDBM (GNU dbm)` +library, similar to the :mod:`dbm.ndbm` module, but with additional +functionality like crash tolerance. -The :mod:`dbm.gnu` module provides an interface to the GNU DBM library. -``dbm.gnu.gdbm`` objects behave like mappings (dictionaries), except that keys and -values are always converted to bytes before storing. Printing a ``gdbm`` -object doesn't print the -keys and values, and the :meth:`items` and :meth:`values` methods are not -supported. +:class:`!gdbm` objects behave similar to :term:`mappings `, +except that keys and values are always converted to :class:`bytes` before storing, +and the :meth:`!items` and :meth:`!values` methods are not supported. + +.. note:: + The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are + incompatible and can not be used interchangeably. .. exception:: error @@ -165,9 +166,9 @@ supported. raised for general mapping errors like specifying an incorrect key. -.. function:: open(filename[, flag[, mode]]) +.. function:: open(filename, flag="r", mode=0o666, /) - Open a ``gdbm`` database and return a :class:`gdbm` object. The *filename* + Open a GDBM database and return a :class:`!gdbm` object. The *filename* argument is the name of the database file. The optional *flag* argument can be: @@ -196,14 +197,14 @@ supported. | ``'u'`` | Do not lock database. | +---------+--------------------------------------------+ - Not all flags are valid for all versions of ``gdbm``. The module constant + Not all flags are valid for all versions of GDBM. The module constant :const:`open_flags` is a string of supported flag characters. The exception :exc:`error` is raised if an invalid flag is specified. The optional *mode* argument is the Unix mode of the file, used only when the database has to be created. It defaults to octal ``0o666``. - In addition to the dictionary-like methods, ``gdbm`` objects have the + In addition to the dictionary-like methods, :class:`gdbm` objects have the following methods: .. versionchanged:: 3.11 @@ -212,7 +213,7 @@ supported. .. method:: gdbm.firstkey() It's possible to loop over every key in the database using this method and the - :meth:`nextkey` method. The traversal is ordered by ``gdbm``'s internal + :meth:`nextkey` method. The traversal is ordered by GDBM's internal hash values, and won't be sorted by the key values. This method returns the starting key. @@ -230,7 +231,7 @@ supported. .. method:: gdbm.reorganize() If you have carried out a lot of deletions and would like to shrink the space - used by the ``gdbm`` file, this routine will reorganize the database. ``gdbm`` + used by the GDBM file, this routine will reorganize the database. :class:`!gdbm` objects will not shorten the length of a database file except by using this reorganization; otherwise, deleted file space will be kept and reused as new (key, value) pairs are added. @@ -242,11 +243,11 @@ supported. .. method:: gdbm.close() - Close the ``gdbm`` database. + Close the GDBM database. .. method:: gdbm.clear() - Remove all items from the ``gdbm`` database. + Remove all items from the GDBM database. .. versionadded:: 3.13 From d0f7f5c41d71758c59f9372a192e927d73cf7c27 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 26 Jan 2024 05:10:03 -0500 Subject: [PATCH 017/127] gh-114312: Fix rare event counter tests on aarch64 (GH-114554) --- Modules/_testinternalcapi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 2c32c691afa583..c4a648a1816392 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1642,11 +1642,11 @@ get_rare_event_counters(PyObject *self, PyObject *type) return Py_BuildValue( "{sksksksksk}", - "set_class", interp->rare_events.set_class, - "set_bases", interp->rare_events.set_bases, - "set_eval_frame_func", interp->rare_events.set_eval_frame_func, - "builtin_dict", interp->rare_events.builtin_dict, - "func_modification", interp->rare_events.func_modification + "set_class", (unsigned long)interp->rare_events.set_class, + "set_bases", (unsigned long)interp->rare_events.set_bases, + "set_eval_frame_func", (unsigned long)interp->rare_events.set_eval_frame_func, + "builtin_dict", (unsigned long)interp->rare_events.builtin_dict, + "func_modification", (unsigned long)interp->rare_events.func_modification ); } From dcd28b5c35dda8e2cb7c5f66450f2aff0948c001 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 11:11:35 +0100 Subject: [PATCH 018/127] gh-114569: Use PyMem_* APIs for most non-PyObject uses (#114574) Fix usage in Modules, Objects, and Parser subdirectories. --- Modules/_elementtree.c | 23 +++++++++++++---------- Modules/_sre/sre_lib.h | 4 ++-- Modules/mathmodule.c | 12 ++++++------ Modules/pyexpat.c | 2 +- Objects/bytearrayobject.c | 14 +++++++------- Objects/typeobject.c | 10 +++++----- Parser/lexer/lexer.c | 4 ++-- 7 files changed, 36 insertions(+), 33 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index b574c96d3f9625..54451081211654 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -267,7 +267,7 @@ typedef struct { LOCAL(int) create_extra(ElementObject* self, PyObject* attrib) { - self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); + self->extra = PyMem_Malloc(sizeof(ElementObjectExtra)); if (!self->extra) { PyErr_NoMemory(); return -1; @@ -295,10 +295,11 @@ dealloc_extra(ElementObjectExtra *extra) for (i = 0; i < extra->length; i++) Py_DECREF(extra->children[i]); - if (extra->children != extra->_children) - PyObject_Free(extra->children); + if (extra->children != extra->_children) { + PyMem_Free(extra->children); + } - PyObject_Free(extra); + PyMem_Free(extra); } LOCAL(void) @@ -495,14 +496,16 @@ element_resize(ElementObject* self, Py_ssize_t extra) * "children", which needs at least 4 bytes. Although it's a * false alarm always assume at least one child to be safe. */ - children = PyObject_Realloc(self->extra->children, - size * sizeof(PyObject*)); - if (!children) + children = PyMem_Realloc(self->extra->children, + size * sizeof(PyObject*)); + if (!children) { goto nomemory; + } } else { - children = PyObject_Malloc(size * sizeof(PyObject*)); - if (!children) + children = PyMem_Malloc(size * sizeof(PyObject*)); + if (!children) { goto nomemory; + } /* copy existing children from static area to malloc buffer */ memcpy(children, self->extra->children, self->extra->length * sizeof(PyObject*)); @@ -3044,7 +3047,7 @@ _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag, #define EXPAT(st, func) ((st)->expat_capi->func) static XML_Memory_Handling_Suite ExpatMemoryHandler = { - PyObject_Malloc, PyObject_Realloc, PyObject_Free}; + PyMem_Malloc, PyMem_Realloc, PyMem_Free}; typedef struct { PyObject_HEAD diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index f5497d9ff2b93f..97fbb0a75e54b6 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1122,7 +1122,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) /* install new repeat context */ /* TODO(https://github.com/python/cpython/issues/67877): Fix this * potential memory leak. */ - ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep)); + ctx->u.rep = (SRE_REPEAT*) PyMem_Malloc(sizeof(*ctx->u.rep)); if (!ctx->u.rep) { PyErr_NoMemory(); RETURN_FAILURE; @@ -1136,7 +1136,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) state->ptr = ptr; DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); state->repeat = ctx->u.rep->prev; - PyObject_Free(ctx->u.rep); + PyMem_Free(ctx->u.rep); if (ret) { RETURN_ON_ERROR(ret); diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 2a796c1c55d2f0..0be46b1574c1fe 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2570,7 +2570,7 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q) goto error_exit; } if (n > NUM_STACK_ELEMS) { - diffs = (double *) PyObject_Malloc(n * sizeof(double)); + diffs = (double *) PyMem_Malloc(n * sizeof(double)); if (diffs == NULL) { PyErr_NoMemory(); goto error_exit; @@ -2590,7 +2590,7 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q) } result = vector_norm(n, diffs, max, found_nan); if (diffs != diffs_on_stack) { - PyObject_Free(diffs); + PyMem_Free(diffs); } if (p_allocated) { Py_DECREF(p); @@ -2602,7 +2602,7 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q) error_exit: if (diffs != diffs_on_stack) { - PyObject_Free(diffs); + PyMem_Free(diffs); } if (p_allocated) { Py_DECREF(p); @@ -2626,7 +2626,7 @@ math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs) double *coordinates = coord_on_stack; if (nargs > NUM_STACK_ELEMS) { - coordinates = (double *) PyObject_Malloc(nargs * sizeof(double)); + coordinates = (double *) PyMem_Malloc(nargs * sizeof(double)); if (coordinates == NULL) { return PyErr_NoMemory(); } @@ -2643,13 +2643,13 @@ math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } result = vector_norm(nargs, coordinates, max, found_nan); if (coordinates != coord_on_stack) { - PyObject_Free(coordinates); + PyMem_Free(coordinates); } return PyFloat_FromDouble(result); error_exit: if (coordinates != coord_on_stack) { - PyObject_Free(coordinates); + PyMem_Free(coordinates); } return NULL; } diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index ec44892d101e44..7c08eda83e66b2 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -21,7 +21,7 @@ module pyexpat #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION) static XML_Memory_Handling_Suite ExpatMemoryHandler = { - PyObject_Malloc, PyObject_Realloc, PyObject_Free}; + PyMem_Malloc, PyMem_Realloc, PyMem_Free}; enum HandlerTypes { StartElement, diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 659de7d3dd5a99..acc59b926448ca 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -132,7 +132,7 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) } else { alloc = size + 1; - new->ob_bytes = PyObject_Malloc(alloc); + new->ob_bytes = PyMem_Malloc(alloc); if (new->ob_bytes == NULL) { Py_DECREF(new); return PyErr_NoMemory(); @@ -221,17 +221,17 @@ PyByteArray_Resize(PyObject *self, Py_ssize_t requested_size) } if (logical_offset > 0) { - sval = PyObject_Malloc(alloc); + sval = PyMem_Malloc(alloc); if (sval == NULL) { PyErr_NoMemory(); return -1; } memcpy(sval, PyByteArray_AS_STRING(self), Py_MIN((size_t)requested_size, (size_t)Py_SIZE(self))); - PyObject_Free(obj->ob_bytes); + PyMem_Free(obj->ob_bytes); } else { - sval = PyObject_Realloc(obj->ob_bytes, alloc); + sval = PyMem_Realloc(obj->ob_bytes, alloc); if (sval == NULL) { PyErr_NoMemory(); return -1; @@ -951,7 +951,7 @@ bytearray_repr(PyByteArrayObject *self) } newsize += 6 + length * 4; - buffer = PyObject_Malloc(newsize); + buffer = PyMem_Malloc(newsize); if (buffer == NULL) { PyErr_NoMemory(); return NULL; @@ -1008,7 +1008,7 @@ bytearray_repr(PyByteArrayObject *self) } v = PyUnicode_FromStringAndSize(buffer, p - buffer); - PyObject_Free(buffer); + PyMem_Free(buffer); return v; } @@ -1088,7 +1088,7 @@ bytearray_dealloc(PyByteArrayObject *self) PyErr_Print(); } if (self->ob_bytes != 0) { - PyObject_Free(self->ob_bytes); + PyMem_Free(self->ob_bytes); } Py_TYPE(self)->tp_free((PyObject *)self); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a8c3b8896d36eb..114cf21f95e744 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3493,7 +3493,7 @@ type_new_set_doc(PyTypeObject *type) // Silently truncate the docstring if it contains a null byte Py_ssize_t size = strlen(doc_str) + 1; - char *tp_doc = (char *)PyObject_Malloc(size); + char *tp_doc = (char *)PyMem_Malloc(size); if (tp_doc == NULL) { PyErr_NoMemory(); return -1; @@ -4166,12 +4166,12 @@ _PyType_FromMetaclass_impl( goto finally; } if (slot->pfunc == NULL) { - PyObject_Free(tp_doc); + PyMem_Free(tp_doc); tp_doc = NULL; } else { size_t len = strlen(slot->pfunc)+1; - tp_doc = PyObject_Malloc(len); + tp_doc = PyMem_Malloc(len); if (tp_doc == NULL) { PyErr_NoMemory(); goto finally; @@ -4501,7 +4501,7 @@ _PyType_FromMetaclass_impl( Py_CLEAR(res); } Py_XDECREF(bases); - PyObject_Free(tp_doc); + PyMem_Free(tp_doc); Py_XDECREF(ht_name); PyMem_Free(_ht_tpname); return (PyObject*)res; @@ -5099,7 +5099,7 @@ type_dealloc(PyObject *self) /* A type's tp_doc is heap allocated, unlike the tp_doc slots * of most other objects. It's okay to cast it to char *. */ - PyObject_Free((char *)type->tp_doc); + PyMem_Free((char *)type->tp_doc); PyHeapTypeObject *et = (PyHeapTypeObject *)type; Py_XDECREF(et->ht_name); diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index ebf7686773ff45..82b0e4ee352d62 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -129,7 +129,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) { if (hash_detected) { Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end; - char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char)); + char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char)); if (!result) { return -1; } @@ -154,7 +154,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) { result[j] = '\0'; // Null-terminate the result string res = PyUnicode_DecodeUTF8(result, j, NULL); - PyObject_Free(result); + PyMem_Free(result); } else { res = PyUnicode_DecodeUTF8( tok_mode->last_expr_buffer, From 65cf5dce11a38e327b9b0abfca279d650452b34f Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 11:15:34 +0100 Subject: [PATCH 019/127] Docs: rework dbm introduction (#114551) - add refs to other parts of the docs (dict, bytes, etc.) - clarify whichdb() return value by using list markup - silence refs to example or generic submodule methods (keys, get, etc.) --- Doc/library/dbm.rst | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 0a9d28a41c6d7a..1a8e0158fcdbd5 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -28,10 +28,11 @@ the Oracle Berkeley DB. available --- :mod:`dbm.gnu`, :mod:`dbm.ndbm` or :mod:`dbm.dumb` --- should be used to open a given file. - Returns one of the following values: ``None`` if the file can't be opened - because it's unreadable or doesn't exist; the empty string (``''``) if the - file's format can't be guessed; or a string containing the required module - name, such as ``'dbm.ndbm'`` or ``'dbm.gnu'``. + Return one of the following values: + + * ``None`` if the file can't be opened because it's unreadable or doesn't exist + * the empty string (``''``) if the file's format can't be guessed + * a string containing the required module name, such as ``'dbm.ndbm'`` or ``'dbm.gnu'`` .. versionchanged:: 3.11 Accepts :term:`path-like object` for filename. @@ -74,13 +75,13 @@ the Oracle Berkeley DB. modified by the prevailing umask). -The object returned by :func:`.open` supports the same basic functionality as -dictionaries; keys and their corresponding values can be stored, retrieved, and -deleted, and the :keyword:`in` operator and the :meth:`keys` method are -available, as well as :meth:`get` and :meth:`setdefault`. +The object returned by :func:`open` supports the same basic functionality as a +:class:`dict`; keys and their corresponding values can be stored, retrieved, and +deleted, and the :keyword:`in` operator and the :meth:`!keys` method are +available, as well as :meth:`!get` and :meth:`!setdefault`. .. versionchanged:: 3.2 - :meth:`get` and :meth:`setdefault` are now available in all database modules. + :meth:`!get` and :meth:`!setdefault` are now available in all database modules. .. versionchanged:: 3.8 Deleting a key from a read-only database raises database module specific error @@ -89,7 +90,7 @@ available, as well as :meth:`get` and :meth:`setdefault`. .. versionchanged:: 3.11 Accepts :term:`path-like object` for file. -Key and values are always stored as bytes. This means that when +Key and values are always stored as :class:`bytes`. This means that when strings are used they are implicitly converted to the default encoding before being stored. From 4cf068ed0879cccf86a45f06fb274b350b89e911 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 13:35:56 +0100 Subject: [PATCH 020/127] Docs: reword dbm.ndbm introduction (#114549) - add abbreviation directives for NDBM and GDBM - consistently spell NDBM as NDBM - silence broken ndbm class refs - improve accuracy of dbm.ndbm.open() spec - use replacement text for NDBM/GDBM file format incompatibility note --- Doc/library/dbm.rst | 47 +++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 1a8e0158fcdbd5..bc5bb0cec0cef7 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -52,6 +52,10 @@ the Oracle Berkeley DB. .. |flag_n| replace:: Always create a new, empty database, open for reading and writing. +.. |incompat_note| replace:: + The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are incompatible + and can not be used interchangeably. + .. function:: open(file, flag='r', mode=0o666) Open the database file *file* and return a corresponding object. @@ -157,9 +161,7 @@ functionality like crash tolerance. except that keys and values are always converted to :class:`bytes` before storing, and the :meth:`!items` and :meth:`!values` methods are not supported. -.. note:: - The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are - incompatible and can not be used interchangeably. +.. note:: |incompat_note| .. exception:: error @@ -253,29 +255,31 @@ and the :meth:`!items` and :meth:`!values` methods are not supported. .. versionadded:: 3.13 -:mod:`dbm.ndbm` --- Interface based on ndbm -------------------------------------------- +:mod:`dbm.ndbm` --- New Database Manager +---------------------------------------- .. module:: dbm.ndbm :platform: Unix - :synopsis: The standard "database" interface, based on ndbm. + :synopsis: The New Database Manager **Source code:** :source:`Lib/dbm/ndbm.py` -------------- -The :mod:`dbm.ndbm` module provides an interface to the Unix "(n)dbm" library. -Dbm objects behave like mappings (dictionaries), except that keys and values are -always stored as bytes. Printing a ``dbm`` object doesn't print the keys and -values, and the :meth:`items` and :meth:`values` methods are not supported. +The :mod:`dbm.ndbm` module provides an interface to the +:abbr:`NDBM (New Database Manager)` library. +:class:`!ndbm` objects behave similar to :term:`mappings `, +except that keys and values are always stored as :class:`bytes`, +and the :meth:`!items` and :meth:`!values` methods are not supported. + +This module can be used with the "classic" NDBM interface or the +:abbr:`GDBM (GNU dbm)` compatibility interface. -This module can be used with the "classic" ndbm interface or the GNU GDBM -compatibility interface. On Unix, the :program:`configure` script will attempt -to locate the appropriate header file to simplify building this module. +.. note:: |incompat_note| .. warning:: - The ndbm library shipped as part of macOS has an undocumented limitation on the + The NDBM library shipped as part of macOS has an undocumented limitation on the size of values, which can result in corrupted database files when storing values larger than this limit. Reading such corrupted files can result in a hard crash (segmentation fault). @@ -288,13 +292,14 @@ to locate the appropriate header file to simplify building this module. .. data:: library - Name of the ``ndbm`` implementation library used. + Name of the NDBM implementation library used. -.. function:: open(filename[, flag[, mode]]) +.. function:: open(filename, flag="r", mode=0o666, /) - Open a dbm database and return a ``ndbm`` object. The *filename* argument is the - name of the database file (without the :file:`.dir` or :file:`.pag` extensions). + Open an NDBM database and return an :class:`!ndbm` object. + The *filename* argument is the name of the database file + (without the :file:`.dir` or :file:`.pag` extensions). The optional *flag* argument must be one of these values: @@ -310,7 +315,7 @@ to locate the appropriate header file to simplify building this module. database has to be created. It defaults to octal ``0o666`` (and will be modified by the prevailing umask). - In addition to the dictionary-like methods, ``ndbm`` objects + In addition to the dictionary-like methods, :class:`!ndbm` objects provide the following method: .. versionchanged:: 3.11 @@ -318,11 +323,11 @@ to locate the appropriate header file to simplify building this module. .. method:: ndbm.close() - Close the ``ndbm`` database. + Close the NDBM database. .. method:: ndbm.clear() - Remove all items from the ``ndbm`` database. + Remove all items from the NDBM database. .. versionadded:: 3.13 From 8710faeac28e65c65862359413e8341492f529af Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 13:36:37 +0100 Subject: [PATCH 021/127] Docs: fix versionchanged directives for dbm.open() and dbm.whichdb() (#114594) --- Doc/library/dbm.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index bc5bb0cec0cef7..55846e996b5d26 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -34,8 +34,8 @@ the Oracle Berkeley DB. * the empty string (``''``) if the file's format can't be guessed * a string containing the required module name, such as ``'dbm.ndbm'`` or ``'dbm.gnu'`` -.. versionchanged:: 3.11 - Accepts :term:`path-like object` for filename. + .. versionchanged:: 3.11 + *filename* accepts a :term:`path-like object`. .. Substitutions for the open() flag param docs; all submodules use the same text. @@ -78,6 +78,9 @@ the Oracle Berkeley DB. database has to be created. It defaults to octal ``0o666`` (and will be modified by the prevailing umask). + .. versionchanged:: 3.11 + *file* accepts a :term:`path-like object`. + The object returned by :func:`open` supports the same basic functionality as a :class:`dict`; keys and their corresponding values can be stored, retrieved, and @@ -91,9 +94,6 @@ available, as well as :meth:`!get` and :meth:`!setdefault`. Deleting a key from a read-only database raises database module specific error instead of :exc:`KeyError`. -.. versionchanged:: 3.11 - Accepts :term:`path-like object` for file. - Key and values are always stored as :class:`bytes`. This means that when strings are used they are implicitly converted to the default encoding before being stored. From 30b7b4f73cd876732244de06d079a2caf2a7b95c Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Fri, 26 Jan 2024 08:42:49 -0500 Subject: [PATCH 022/127] Docs: 'still' is a better word than 'nonetheless' (#114598) --- Doc/library/dataclasses.rst | 2 +- Doc/library/imaplib.rst | 2 +- Doc/using/cmdline.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index cde147d1cbb501..88f2e0251b1e51 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -141,7 +141,7 @@ Module contents then :func:`dataclass` *may* add an implicit :meth:`~object.__hash__` method. Although not recommended, you can force :func:`dataclass` to create a :meth:`~object.__hash__` method with ``unsafe_hash=True``. This might be the case - if your class is logically immutable but can nonetheless be mutated. + if your class is logically immutable but can still be mutated. This is a specialized use case and should be considered carefully. Here are the rules governing implicit creation of a :meth:`~object.__hash__` diff --git a/Doc/library/imaplib.rst b/Doc/library/imaplib.rst index 1f774e64b0eae3..d5c868def3b64f 100644 --- a/Doc/library/imaplib.rst +++ b/Doc/library/imaplib.rst @@ -531,7 +531,7 @@ An :class:`IMAP4` instance has the following methods: allowed creation of such tags, and popular IMAP servers, such as Gmail, accept and produce such flags. There are non-Python programs which also create such tags. Although it is an RFC violation and IMAP clients and - servers are supposed to be strict, imaplib nonetheless continues to allow + servers are supposed to be strict, imaplib still continues to allow such tags to be created for backward compatibility reasons, and as of Python 3.6, handles them if they are sent from the server, since this improves real-world compatibility. diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index df8b07c6118599..53c95ca1a05c9b 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -623,7 +623,7 @@ Setting the environment variable ``TERM`` to ``dumb`` will disable color. If the environment variable ``FORCE_COLOR`` is set, then color will be enabled regardless of the value of TERM. This is useful on CI systems which -aren’t terminals but can none-the-less display ANSI escape sequences. +aren’t terminals but can still display ANSI escape sequences. If the environment variable ``NO_COLOR`` is set, Python will disable all color in the output. This takes precedence over ``FORCE_COLOR``. From 442a299af06d0dfe89484a841451666503479e2e Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 26 Jan 2024 14:38:24 +0000 Subject: [PATCH 023/127] gh-114272: Allow _wmi audit test to succeed even if it times out (GH-114602) --- Lib/test/audit-tests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py index ce4a11b119c900..de7d0da560a1c7 100644 --- a/Lib/test/audit-tests.py +++ b/Lib/test/audit-tests.py @@ -487,7 +487,13 @@ def hook(event, args): print(event, args[0]) sys.addaudithook(hook) - _wmi.exec_query("SELECT * FROM Win32_OperatingSystem") + try: + _wmi.exec_query("SELECT * FROM Win32_OperatingSystem") + except WindowsError as e: + # gh-112278: WMI may be slow response when first called, but we still + # get the audit event, so just ignore the timeout + if e.winerror != 258: + raise def test_syslog(): import syslog From 0bd8297a2208125f76807cdf01f72abe5c94136b Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 16:11:45 +0100 Subject: [PATCH 024/127] Docs: mark up dbm.open() with param list (#114601) Also consolidate following paragraphs regarding database objects. --- Doc/library/dbm.rst | 52 ++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 55846e996b5d26..3a7dad1a0736a0 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -58,41 +58,33 @@ the Oracle Berkeley DB. .. function:: open(file, flag='r', mode=0o666) - Open the database file *file* and return a corresponding object. + Open a database and return the corresponding database object. - If the database file already exists, the :func:`whichdb` function is used to - determine its type and the appropriate module is used; if it does not exist, - the first module listed above that can be imported is used. + :param file: + The database file to open. - The optional *flag* argument can be: + If the database file already exists, the :func:`whichdb` function is used to + determine its type and the appropriate module is used; if it does not exist, + the first submodule listed above that can be imported is used. + :type file: :term:`path-like object` - .. csv-table:: - :header: "Value", "Meaning" + :param str flag: + * ``'r'`` (default), |flag_r| + * ``'w'``, |flag_w| + * ``'c'``, |flag_c| + * ``'n'``, |flag_n| - ``'r'`` (default), |flag_r| - ``'w'``, |flag_w| - ``'c'``, |flag_c| - ``'n'``, |flag_n| - - The optional *mode* argument is the Unix mode of the file, used only when the - database has to be created. It defaults to octal ``0o666`` (and will be - modified by the prevailing umask). + :param int mode: + The Unix file access mode of the file (default: octal ``0o666``), + used only when the database has to be created. .. versionchanged:: 3.11 *file* accepts a :term:`path-like object`. - -The object returned by :func:`open` supports the same basic functionality as a +The object returned by :func:`~dbm.open` supports the same basic functionality as a :class:`dict`; keys and their corresponding values can be stored, retrieved, and deleted, and the :keyword:`in` operator and the :meth:`!keys` method are -available, as well as :meth:`!get` and :meth:`!setdefault`. - -.. versionchanged:: 3.2 - :meth:`!get` and :meth:`!setdefault` are now available in all database modules. - -.. versionchanged:: 3.8 - Deleting a key from a read-only database raises database module specific error - instead of :exc:`KeyError`. +available, as well as :meth:`!get` and :meth:`!setdefault` methods. Key and values are always stored as :class:`bytes`. This means that when strings are used they are implicitly converted to the default encoding before @@ -101,9 +93,17 @@ being stored. These objects also support being used in a :keyword:`with` statement, which will automatically close them when done. +.. versionchanged:: 3.2 + :meth:`!get` and :meth:`!setdefault` methods are now available for all + :mod:`dbm` backends. + .. versionchanged:: 3.4 Added native support for the context management protocol to the objects - returned by :func:`.open`. + returned by :func:`~dbm.open`. + +.. versionchanged:: 3.8 + Deleting a key from a read-only database raises a database module specific exception + instead of :exc:`KeyError`. The following example records some hostnames and a corresponding title, and then prints out the contents of the database:: From 504334c7be5a56237df2598d338cd494a42fca4c Mon Sep 17 00:00:00 2001 From: Rito Takeuchi Date: Sat, 27 Jan 2024 00:19:41 +0900 Subject: [PATCH 025/127] gh-77749: Fix inconsistent behavior of non-ASCII handling in EmailPolicy.fold() (GH-6986) It now always encodes non-ASCII characters in headers if utf8 is false. Co-authored-by: Serhiy Storchaka --- Lib/email/policy.py | 9 ++++++++- Lib/test/test_email/test_policy.py | 17 +++++++++++++++++ ...024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst | 2 ++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 611deb50bb5290..8816c84ed175a7 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -210,8 +210,15 @@ def _fold(self, name, value, refold_binary=False): self.refold_source == 'long' and (lines and len(lines[0])+len(name)+2 > maxlen or any(len(x) > maxlen for x in lines[1:]))) - if refold or refold_binary and _has_surrogates(value): + + if not refold: + if not self.utf8: + refold = not value.isascii() + elif refold_binary: + refold = _has_surrogates(value) + if refold: return self.header_factory(name, ''.join(lines)).fold(policy=self) + return name + ': ' + self.linesep.join(lines) + self.linesep diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index e87c275549406d..c6b9c80efe1b54 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -135,6 +135,23 @@ def test_policy_addition(self): for attr, value in expected.items(): self.assertEqual(getattr(added, attr), value) + def test_fold_utf8(self): + expected_ascii = 'Subject: =?utf-8?q?=C3=A1?=\n' + expected_utf8 = 'Subject: á\n' + + msg = email.message.EmailMessage() + s = 'á' + msg['Subject'] = s + + p_ascii = email.policy.default.clone() + p_utf8 = email.policy.default.clone(utf8=True) + + self.assertEqual(p_ascii.fold('Subject', msg['Subject']), expected_ascii) + self.assertEqual(p_utf8.fold('Subject', msg['Subject']), expected_utf8) + + self.assertEqual(p_ascii.fold('Subject', s), expected_ascii) + self.assertEqual(p_utf8.fold('Subject', s), expected_utf8) + def test_fold_zero_max_line_length(self): expected = 'Subject: =?utf-8?q?=C3=A1?=\n' diff --git a/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst b/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst new file mode 100644 index 00000000000000..f1c99c09d2dfe1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst @@ -0,0 +1,2 @@ +:meth:`email.policy.EmailPolicy.fold` now always encodes non-ASCII characters +in headers if :attr:`~email.policy.EmailPolicy.utf8` is false. From 699779256ec4d4b8afb8211de08ef1382c78c370 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 27 Jan 2024 00:25:16 +0900 Subject: [PATCH 026/127] gh-111968: Unify freelist naming schema to Eric's suggestion (gh-114581) --- Include/internal/pycore_freelist.h | 14 +++++++------- Objects/floatobject.c | 4 ++-- Objects/genobject.c | 4 ++-- Objects/listobject.c | 4 ++-- Objects/sliceobject.c | 14 +++++++------- Objects/tupleobject.c | 2 +- Python/context.c | 4 ++-- Python/object_stack.c | 4 ++-- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index dfb12839affedf..b91d2bc066b783 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -103,13 +103,13 @@ struct _Py_object_stack_state { }; typedef struct _Py_freelist_state { - struct _Py_float_state float_state; - struct _Py_tuple_state tuple_state; - struct _Py_list_state list_state; - struct _Py_slice_state slice_state; - struct _Py_context_state context_state; - struct _Py_async_gen_state async_gen_state; - struct _Py_object_stack_state object_stack_state; + struct _Py_float_state floats; + struct _Py_tuple_state tuples; + struct _Py_list_state lists; + struct _Py_slice_state slices; + struct _Py_context_state contexts; + struct _Py_async_gen_state async_gens; + struct _Py_object_stack_state object_stacks; } _PyFreeListState; #ifdef __cplusplus diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 912c450a5e1055..b7611d5f96ac3b 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -32,7 +32,7 @@ get_float_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); assert(state != NULL); - return &state->float_state; + return &state->floats; } #endif @@ -1993,7 +1993,7 @@ void _PyFloat_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_float_state *state = &freelist_state->float_state; + struct _Py_float_state *state = &freelist_state->floats; PyFloatObject *f = state->free_list; while (f != NULL) { PyFloatObject *next = (PyFloatObject*) Py_TYPE(f); diff --git a/Objects/genobject.c b/Objects/genobject.c index e9aeb7ab9a9fa8..f47197330fdd80 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -1633,7 +1633,7 @@ static struct _Py_async_gen_state * get_async_gen_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); - return &state->async_gen_state; + return &state->async_gens; } #endif @@ -1659,7 +1659,7 @@ void _PyAsyncGen_ClearFreeLists(_PyFreeListState *freelist_state, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_async_gen_state *state = &freelist_state->async_gen_state; + struct _Py_async_gen_state *state = &freelist_state->async_gens; while (state->value_numfree > 0) { _PyAsyncGenWrappedValue *o; diff --git a/Objects/listobject.c b/Objects/listobject.c index 401d1026133f4e..1e885f9cb80c4c 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -26,7 +26,7 @@ get_list_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); assert(state != NULL); - return &state->list_state; + return &state->lists; } #endif @@ -124,7 +124,7 @@ void _PyList_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_list_state *state = &freelist_state->list_state; + struct _Py_list_state *state = &freelist_state->lists; while (state->numfree > 0) { PyListObject *op = state->free_list[--state->numfree]; assert(PyList_CheckExact(op)); diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 440c1da30620c3..8b9d6bbfd858b7 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -106,9 +106,9 @@ PyObject _Py_EllipsisObject = _PyObject_HEAD_INIT(&PyEllipsis_Type); void _PySlice_ClearCache(_PyFreeListState *state) { #ifdef WITH_FREELISTS - PySliceObject *obj = state->slice_state.slice_cache; + PySliceObject *obj = state->slices.slice_cache; if (obj != NULL) { - state->slice_state.slice_cache = NULL; + state->slices.slice_cache = NULL; PyObject_GC_Del(obj); } #endif @@ -132,9 +132,9 @@ _PyBuildSlice_Consume2(PyObject *start, PyObject *stop, PyObject *step) PySliceObject *obj; #ifdef WITH_FREELISTS _PyFreeListState *state = _PyFreeListState_GET(); - if (state->slice_state.slice_cache != NULL) { - obj = state->slice_state.slice_cache; - state->slice_state.slice_cache = NULL; + if (state->slices.slice_cache != NULL) { + obj = state->slices.slice_cache; + state->slices.slice_cache = NULL; _Py_NewReference((PyObject *)obj); } else @@ -370,8 +370,8 @@ slice_dealloc(PySliceObject *r) Py_DECREF(r->stop); #ifdef WITH_FREELISTS _PyFreeListState *state = _PyFreeListState_GET(); - if (state->slice_state.slice_cache == NULL) { - state->slice_state.slice_cache = r; + if (state->slices.slice_cache == NULL) { + state->slices.slice_cache = r; } else #endif diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index e1b8e4004c6163..b9bf6cd48f6129 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -1125,7 +1125,7 @@ tuple_iter(PyObject *seq) * freelists * *************/ -#define STATE (state->tuple_state) +#define STATE (state->tuples) #define FREELIST_FINALIZED (STATE.numfree[0] < 0) static inline PyTupleObject * diff --git a/Python/context.c b/Python/context.c index 1e90811c374ec6..294485e5b407df 100644 --- a/Python/context.c +++ b/Python/context.c @@ -69,7 +69,7 @@ static struct _Py_context_state * get_context_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); - return &state->context_state; + return &state->contexts; } #endif @@ -1270,7 +1270,7 @@ void _PyContext_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_context_state *state = &freelist_state->context_state; + struct _Py_context_state *state = &freelist_state->contexts; for (; state->numfree > 0; state->numfree--) { PyContext *ctx = state->freelist; state->freelist = (PyContext *)ctx->ctx_weakreflist; diff --git a/Python/object_stack.c b/Python/object_stack.c index 66b37bcbb44475..8544892eb71dcb 100644 --- a/Python/object_stack.c +++ b/Python/object_stack.c @@ -12,7 +12,7 @@ static struct _Py_object_stack_state * get_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); - return &state->object_stack_state; + return &state->object_stacks; } _PyObjectStackChunk * @@ -76,7 +76,7 @@ _PyObjectStackChunk_ClearFreeList(_PyFreeListState *free_lists, int is_finalizat return; } - struct _Py_object_stack_state *state = &free_lists->object_stack_state; + struct _Py_object_stack_state *state = &free_lists->object_stacks; while (state->numfree > 0) { _PyObjectStackChunk *buf = state->free_list; state->free_list = buf->prev; From f9c505698a1ac27f5a380780767665ffd2fb8ebc Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 27 Jan 2024 01:20:21 +0900 Subject: [PATCH 027/127] gh-112087: Make list_repr and list_length to be thread-safe (gh-114582) --- Include/cpython/listobject.h | 4 ++++ Objects/listobject.c | 27 +++++++++++++++++---------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/Include/cpython/listobject.h b/Include/cpython/listobject.h index 8ade1b164681f9..49f5e8d6d1a0d6 100644 --- a/Include/cpython/listobject.h +++ b/Include/cpython/listobject.h @@ -29,7 +29,11 @@ typedef struct { static inline Py_ssize_t PyList_GET_SIZE(PyObject *op) { PyListObject *list = _PyList_CAST(op); +#ifdef Py_GIL_DISABLED + return _Py_atomic_load_ssize_relaxed(&(_PyVarObject_CAST(list)->ob_size)); +#else return Py_SIZE(list); +#endif } #define PyList_GET_SIZE(op) PyList_GET_SIZE(_PyObject_CAST(op)) diff --git a/Objects/listobject.c b/Objects/listobject.c index 1e885f9cb80c4c..56785e5f37a450 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -383,18 +383,11 @@ list_dealloc(PyObject *self) } static PyObject * -list_repr(PyObject *self) +list_repr_impl(PyListObject *v) { - PyListObject *v = (PyListObject *)self; - Py_ssize_t i; PyObject *s; _PyUnicodeWriter writer; - - if (Py_SIZE(v) == 0) { - return PyUnicode_FromString("[]"); - } - - i = Py_ReprEnter((PyObject*)v); + Py_ssize_t i = Py_ReprEnter((PyObject*)v); if (i != 0) { return i > 0 ? PyUnicode_FromString("[...]") : NULL; } @@ -439,10 +432,24 @@ list_repr(PyObject *self) return NULL; } +static PyObject * +list_repr(PyObject *self) +{ + if (PyList_GET_SIZE(self) == 0) { + return PyUnicode_FromString("[]"); + } + PyListObject *v = (PyListObject *)self; + PyObject *ret = NULL; + Py_BEGIN_CRITICAL_SECTION(v); + ret = list_repr_impl(v); + Py_END_CRITICAL_SECTION(); + return ret; +} + static Py_ssize_t list_length(PyObject *a) { - return Py_SIZE(a); + return PyList_GET_SIZE(a); } static int From 102569d150b690efe94c13921e93da66081ba1cf Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 26 Jan 2024 17:27:29 +0000 Subject: [PATCH 028/127] Use Unicode unconditionally for _winapi.CreateFile (GH-114611) Currently it switches based on build settings, but argument clinic does not handle it correctly. --- Modules/_winapi.c | 17 +++++++++-------- Modules/clinic/_winapi.c.h | 13 ++++++++----- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 26302b559817b3..5e5eb123c4ccff 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -441,7 +441,7 @@ _winapi_ConnectNamedPipe_impl(PyObject *module, HANDLE handle, /*[clinic input] _winapi.CreateFile -> HANDLE - file_name: LPCTSTR + file_name: LPCWSTR desired_access: DWORD share_mode: DWORD security_attributes: LPSECURITY_ATTRIBUTES @@ -452,12 +452,12 @@ _winapi.CreateFile -> HANDLE [clinic start generated code]*/ static HANDLE -_winapi_CreateFile_impl(PyObject *module, LPCTSTR file_name, +_winapi_CreateFile_impl(PyObject *module, LPCWSTR file_name, DWORD desired_access, DWORD share_mode, LPSECURITY_ATTRIBUTES security_attributes, DWORD creation_disposition, DWORD flags_and_attributes, HANDLE template_file) -/*[clinic end generated code: output=417ddcebfc5a3d53 input=6423c3e40372dbd5]*/ +/*[clinic end generated code: output=818c811e5e04d550 input=1fa870ed1c2e3d69]*/ { HANDLE handle; @@ -468,14 +468,15 @@ _winapi_CreateFile_impl(PyObject *module, LPCTSTR file_name, } Py_BEGIN_ALLOW_THREADS - handle = CreateFile(file_name, desired_access, - share_mode, security_attributes, - creation_disposition, - flags_and_attributes, template_file); + handle = CreateFileW(file_name, desired_access, + share_mode, security_attributes, + creation_disposition, + flags_and_attributes, template_file); Py_END_ALLOW_THREADS - if (handle == INVALID_HANDLE_VALUE) + if (handle == INVALID_HANDLE_VALUE) { PyErr_SetFromWindowsErr(0); + } return handle; } diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 3a3231c051ef71..d1052f38919dde 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -162,7 +162,7 @@ PyDoc_STRVAR(_winapi_CreateFile__doc__, {"CreateFile", _PyCFunction_CAST(_winapi_CreateFile), METH_FASTCALL, _winapi_CreateFile__doc__}, static HANDLE -_winapi_CreateFile_impl(PyObject *module, LPCTSTR file_name, +_winapi_CreateFile_impl(PyObject *module, LPCWSTR file_name, DWORD desired_access, DWORD share_mode, LPSECURITY_ATTRIBUTES security_attributes, DWORD creation_disposition, @@ -172,7 +172,7 @@ static PyObject * _winapi_CreateFile(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - LPCTSTR file_name; + LPCWSTR file_name = NULL; DWORD desired_access; DWORD share_mode; LPSECURITY_ATTRIBUTES security_attributes; @@ -181,8 +181,8 @@ _winapi_CreateFile(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HANDLE template_file; HANDLE _return_value; - if (!_PyArg_ParseStack(args, nargs, "skk" F_POINTER "kk" F_HANDLE ":CreateFile", - &file_name, &desired_access, &share_mode, &security_attributes, &creation_disposition, &flags_and_attributes, &template_file)) { + if (!_PyArg_ParseStack(args, nargs, "O&kk" F_POINTER "kk" F_HANDLE ":CreateFile", + _PyUnicode_WideCharString_Converter, &file_name, &desired_access, &share_mode, &security_attributes, &creation_disposition, &flags_and_attributes, &template_file)) { goto exit; } _return_value = _winapi_CreateFile_impl(module, file_name, desired_access, share_mode, security_attributes, creation_disposition, flags_and_attributes, template_file); @@ -195,6 +195,9 @@ _winapi_CreateFile(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return_value = HANDLE_TO_PYNUM(_return_value); exit: + /* Cleanup for file_name */ + PyMem_Free((void *)file_name); + return return_value; } @@ -1479,4 +1482,4 @@ _winapi_CopyFile2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO return return_value; } -/*[clinic end generated code: output=e1a9908bb82a6379 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2350d4f2275d3a6f input=a9049054013a1b77]*/ From d91ddff5de61447844f1dac575d2e670c8d7e26b Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 26 Jan 2024 17:33:44 +0000 Subject: [PATCH 029/127] gh-114435: Allow test_stat_inaccessible_file() to have matching ino/dev (GH-114571) This may occur if Windows allows reading stat information from a file even if the current user does not have access. --- Lib/test/test_os.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index ed1f304c6c8cac..e6f0dfde8cb4ae 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -3129,10 +3129,9 @@ def cleanup(): if support.verbose: print(" without access:", stat2) - # We cannot get st_dev/st_ino, so ensure those are 0 or else our test - # is not set up correctly - self.assertEqual(0, stat2.st_dev) - self.assertEqual(0, stat2.st_ino) + # We may not get st_dev/st_ino, so ensure those are 0 or match + self.assertIn(stat2.st_dev, (0, stat1.st_dev)) + self.assertIn(stat2.st_ino, (0, stat1.st_ino)) # st_mode and st_size should match (for a normal file, at least) self.assertEqual(stat1.st_mode, stat2.st_mode) From 3f62bf32caf04cedb2c59579a0ce835d1e793d4d Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 26 Jan 2024 20:44:45 +0300 Subject: [PATCH 030/127] Document PyOS_strtoul and PyOS_strtol (GH-114048) --- Doc/c-api/conversion.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst index c5350123dfdfdc..4aaf3905e81c8a 100644 --- a/Doc/c-api/conversion.rst +++ b/Doc/c-api/conversion.rst @@ -48,6 +48,42 @@ The return value (*rv*) for these functions should be interpreted as follows: The following functions provide locale-independent string to number conversions. +.. c:function:: unsigned long PyOS_strtoul(const char *str, char **ptr, int base) + + Convert the initial part of the string in ``str`` to an :c:expr:`unsigned + long` value according to the given ``base``, which must be between ``2`` and + ``36`` inclusive, or be the special value ``0``. + + Leading white space and case of characters are ignored. If ``base`` is zero + it looks for a leading ``0b``, ``0o`` or ``0x`` to tell which base. If + these are absent it defaults to ``10``. Base must be 0 or between 2 and 36 + (inclusive). If ``ptr`` is non-``NULL`` it will contain a pointer to the + end of the scan. + + If the converted value falls out of range of corresponding return type, + range error occurs (:c:data:`errno` is set to :c:macro:`!ERANGE`) and + :c:macro:`!ULONG_MAX` is returned. If no conversion can be performed, ``0`` + is returned. + + See also the Unix man page :manpage:`strtoul(3)`. + + .. versionadded:: 3.2 + + +.. c:function:: long PyOS_strtol(const char *str, char **ptr, int base) + + Convert the initial part of the string in ``str`` to an :c:expr:`long` value + according to the given ``base``, which must be between ``2`` and ``36`` + inclusive, or be the special value ``0``. + + Same as :c:func:`PyOS_strtoul`, but return a :c:expr:`long` value instead + and :c:macro:`LONG_MAX` on overflows. + + See also the Unix man page :manpage:`strtol(3)`. + + .. versionadded:: 3.2 + + .. c:function:: double PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) Convert a string ``s`` to a :c:expr:`double`, raising a Python From 6c2b419fb91c8d7daa769d39f73768114b5eb45a Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 19:12:48 +0100 Subject: [PATCH 031/127] Docs: rework the dbm.dumb introduction (#114550) - consistently use correct parameter markup - consistently use submodule name as database name - improve accuracy of the dbm.dumb.open() spec - remove dumbdbm class refs and replace them with generic "database object" - use parameter list for dbm.dumb.open() --- Doc/library/dbm.rst | 66 ++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 3a7dad1a0736a0..076e86143d06a6 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -351,13 +351,14 @@ This module can be used with the "classic" NDBM interface or the -------------- -The :mod:`dbm.dumb` module provides a persistent dictionary-like interface which -is written entirely in Python. Unlike other modules such as :mod:`dbm.gnu` no -external library is required. As with other persistent mappings, the keys and -values are always stored as bytes. - -The module defines the following: +The :mod:`dbm.dumb` module provides a persistent :class:`dict`-like +interface which is written entirely in Python. +Unlike other :mod:`dbm` backends, such as :mod:`dbm.gnu`, no +external library is required. +As with other :mod:`dbm` backends, +the keys and values are always stored as :class:`bytes`. +The :mod:`!dbm.dumb` module defines the following: .. exception:: error @@ -365,26 +366,33 @@ The module defines the following: raised for general mapping errors like specifying an incorrect key. -.. function:: open(filename[, flag[, mode]]) +.. function:: open(filename, flag="c", mode=0o666) - Open a ``dumbdbm`` database and return a dumbdbm object. The *filename* argument is - the basename of the database file (without any specific extensions). When a - dumbdbm database is created, files with :file:`.dat` and :file:`.dir` extensions - are created. + Open a :mod:`!dbm.dumb` database. + The returned database object behaves similar to a :term:`mapping`, + in addition to providing :meth:`~dumbdbm.sync` and :meth:`~dumbdbm.close` + methods. - The optional *flag* argument can be: + :param filename: + The basename of the database file (without extensions). + A new database creates the following files: - .. csv-table:: - :header: "Value", "Meaning" + - :file:`{filename}.dat` + - :file:`{filename}.dir` + :type database: :term:`path-like object` - ``'r'``, |flag_r| - ``'w'``, |flag_w| - ``'c'`` (default), |flag_c| - ``'n'``, |flag_n| + :param str flag: + .. csv-table:: + :header: "Value", "Meaning" - The optional *mode* argument is the Unix mode of the file, used only when the - database has to be created. It defaults to octal ``0o666`` (and will be modified - by the prevailing umask). + ``'r'``, |flag_r| + ``'w'``, |flag_w| + ``'c'`` (default), |flag_c| + ``'n'``, |flag_n| + + :param int mode: + The Unix file access mode of the file (default: ``0o666``), + used only when the database has to be created. .. warning:: It is possible to crash the Python interpreter when loading a database @@ -392,20 +400,18 @@ The module defines the following: Python's AST compiler. .. versionchanged:: 3.5 - :func:`.open` always creates a new database when the flag has the value - ``'n'``. + :func:`open` always creates a new database when *flag* is ``'n'``. .. versionchanged:: 3.8 - A database opened with flags ``'r'`` is now read-only. Opening with - flags ``'r'`` and ``'w'`` no longer creates a database if it does not - exist. + A database opened read-only if *flag* is ``'r'``. + A database is not created if it does not exist if *flag* is ``'r'`` or ``'w'``. .. versionchanged:: 3.11 - Accepts :term:`path-like object` for filename. + *filename* accepts a :term:`path-like object`. In addition to the methods provided by the - :class:`collections.abc.MutableMapping` class, :class:`dumbdbm` objects - provide the following methods: + :class:`collections.abc.MutableMapping` class, + the following methods are provided: .. method:: dumbdbm.sync() @@ -414,5 +420,5 @@ The module defines the following: .. method:: dumbdbm.close() - Close the ``dumbdbm`` database. + Close the database. From 7e31d6dea276ac91402aefb023c58d239dfd9246 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 26 Jan 2024 18:14:24 +0000 Subject: [PATCH 032/127] gh-88569: add `ntpath.isreserved()` (#95486) Add `ntpath.isreserved()`, which identifies reserved pathnames such as "NUL", "AUX" and "CON". Deprecate `pathlib.PurePath.is_reserved()`. --------- Co-authored-by: Eryk Sun Co-authored-by: Brett Cannon Co-authored-by: Steve Dower --- Doc/library/os.path.rst | 22 ++++++++ Doc/library/pathlib.rst | 13 ++--- Doc/whatsnew/3.13.rst | 15 +++++ Lib/ntpath.py | 40 ++++++++++++- Lib/pathlib/__init__.py | 28 +++------- Lib/test/test_ntpath.py | 56 +++++++++++++++++++ Lib/test/test_pathlib/test_pathlib.py | 48 ++-------------- ...2-07-31-01-24-40.gh-issue-88569.eU0--b.rst | 4 ++ 8 files changed, 154 insertions(+), 72 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-07-31-01-24-40.gh-issue-88569.eU0--b.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 3cab7a260df008..34bc76b231de92 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -326,6 +326,28 @@ the :mod:`glob` module.) .. versionadded:: 3.12 +.. function:: isreserved(path) + + Return ``True`` if *path* is a reserved pathname on the current system. + + On Windows, reserved filenames include those that end with a space or dot; + those that contain colons (i.e. file streams such as "name:stream"), + wildcard characters (i.e. ``'*?"<>'``), pipe, or ASCII control characters; + as well as DOS device names such as "NUL", "CON", "CONIN$", "CONOUT$", + "AUX", "PRN", "COM1", and "LPT1". + + .. note:: + + This function approximates rules for reserved paths on most Windows + systems. These rules change over time in various Windows releases. + This function may be updated in future Python releases as changes to + the rules become broadly available. + + .. availability:: Windows. + + .. versionadded:: 3.13 + + .. function:: join(path, *paths) Join one or more path segments intelligently. The return value is the diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 2f4ff4efec47f8..f1aba793fda03e 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -535,14 +535,13 @@ Pure paths provide the following methods and properties: reserved under Windows, ``False`` otherwise. With :class:`PurePosixPath`, ``False`` is always returned. - >>> PureWindowsPath('nul').is_reserved() - True - >>> PurePosixPath('nul').is_reserved() - False - - File system calls on reserved paths can fail mysteriously or have - unintended effects. + .. versionchanged:: 3.13 + Windows path names that contain a colon, or end with a dot or a space, + are considered reserved. UNC paths may be reserved. + .. deprecated-removed:: 3.13 3.15 + This method is deprecated; use :func:`os.path.isreserved` to detect + reserved paths on Windows. .. method:: PurePath.joinpath(*pathsegments) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 8c2bb05920d5b6..985e34b453f63a 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -321,6 +321,9 @@ os os.path ------- +* Add :func:`os.path.isreserved` to check if a path is reserved on the current + system. This function is only available on Windows. + (Contributed by Barney Gale in :gh:`88569`.) * On Windows, :func:`os.path.isabs` no longer considers paths starting with exactly one (back)slash to be absolute. (Contributed by Barney Gale and Jon Foster in :gh:`44626`.) @@ -498,6 +501,12 @@ Deprecated security and functionality bugs. This includes removal of the ``--cgi`` flag to the ``python -m http.server`` command line in 3.15. +* :mod:`pathlib`: + + * :meth:`pathlib.PurePath.is_reserved` is deprecated and scheduled for + removal in Python 3.15. Use :func:`os.path.isreserved` to detect reserved + paths on Windows. + * :mod:`sys`: :func:`sys._enablelegacywindowsfsencoding` function. Replace it with :envvar:`PYTHONLEGACYWINDOWSFSENCODING` environment variable. (Contributed by Inada Naoki in :gh:`73427`.) @@ -709,6 +718,12 @@ Pending Removal in Python 3.15 :func:`locale.getlocale()` instead. (Contributed by Hugo van Kemenade in :gh:`111187`.) +* :mod:`pathlib`: + + * :meth:`pathlib.PurePath.is_reserved` is deprecated and scheduled for + removal in Python 3.15. Use :func:`os.path.isreserved` to detect reserved + paths on Windows. + * :class:`typing.NamedTuple`: * The undocumented keyword argument syntax for creating NamedTuple classes diff --git a/Lib/ntpath.py b/Lib/ntpath.py index aa0e018eb668c2..e7cbfe17ecb3c8 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -26,8 +26,8 @@ __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", - "ismount", "expanduser","expandvars","normpath","abspath", - "curdir","pardir","sep","pathsep","defpath","altsep", + "ismount","isreserved","expanduser","expandvars","normpath", + "abspath","curdir","pardir","sep","pathsep","defpath","altsep", "extsep","devnull","realpath","supports_unicode_filenames","relpath", "samefile", "sameopenfile", "samestat", "commonpath", "isjunction"] @@ -330,6 +330,42 @@ def ismount(path): return False +_reserved_chars = frozenset( + {chr(i) for i in range(32)} | + {'"', '*', ':', '<', '>', '?', '|', '/', '\\'} +) + +_reserved_names = frozenset( + {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | + {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | + {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} +) + +def isreserved(path): + """Return true if the pathname is reserved by the system.""" + # Refer to "Naming Files, Paths, and Namespaces": + # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file + path = os.fsdecode(splitroot(path)[2]).replace(altsep, sep) + return any(_isreservedname(name) for name in reversed(path.split(sep))) + +def _isreservedname(name): + """Return true if the filename is reserved by the system.""" + # Trailing dots and spaces are reserved. + if name.endswith(('.', ' ')) and name not in ('.', '..'): + return True + # Wildcards, separators, colon, and pipe (*?"<>/\:|) are reserved. + # ASCII control characters (0-31) are reserved. + # Colon is reserved for file streams (e.g. "name:stream[:type]"). + if _reserved_chars.intersection(name): + return True + # DOS device names are reserved (e.g. "nul" or "nul .txt"). The rules + # are complex and vary across Windows versions. On the side of + # caution, return True for names that may not be reserved. + if name.partition('.')[0].rstrip(' ').upper() in _reserved_names: + return True + return False + + # Expand paths beginning with '~' or '~user'. # '~' means $HOME; '~user' means that user's home directory. # If the path doesn't begin with '~', or if the user or $HOME is unknown, diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index eee82ef26bc7e7..cc159edab5796f 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -33,15 +33,6 @@ ] -# Reference for Windows paths can be found at -# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . -_WIN_RESERVED_NAMES = frozenset( - {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | - {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | - {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} -) - - class _PathParents(Sequence): """This object provides sequence-like access to the logical ancestors of a path. Don't try to construct it yourself.""" @@ -433,18 +424,13 @@ def is_absolute(self): def is_reserved(self): """Return True if the path contains one of the special names reserved by the system, if any.""" - if self.pathmod is not ntpath or not self.name: - return False - - # NOTE: the rules for reserved names seem somewhat complicated - # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not - # exist). We err on the side of caution and return True for paths - # which are not considered reserved by Windows. - if self.drive.startswith('\\\\'): - # UNC paths are never reserved. - return False - name = self.name.partition('.')[0].partition(':')[0].rstrip(' ') - return name.upper() in _WIN_RESERVED_NAMES + msg = ("pathlib.PurePath.is_reserved() is deprecated and scheduled " + "for removal in Python 3.15. Use os.path.isreserved() to " + "detect reserved paths on Windows.") + warnings.warn(msg, DeprecationWarning, stacklevel=2) + if self.pathmod is ntpath: + return self.pathmod.isreserved(self) + return False def as_uri(self): """Return the path as a URI.""" diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index aefcb98f1c30eb..9cb03e3cd5de8d 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -981,6 +981,62 @@ def test_ismount(self): self.assertTrue(ntpath.ismount(b"\\\\localhost\\c$")) self.assertTrue(ntpath.ismount(b"\\\\localhost\\c$\\")) + def test_isreserved(self): + self.assertFalse(ntpath.isreserved('')) + self.assertFalse(ntpath.isreserved('.')) + self.assertFalse(ntpath.isreserved('..')) + self.assertFalse(ntpath.isreserved('/')) + self.assertFalse(ntpath.isreserved('/foo/bar')) + # A name that ends with a space or dot is reserved. + self.assertTrue(ntpath.isreserved('foo.')) + self.assertTrue(ntpath.isreserved('foo ')) + # ASCII control characters are reserved. + self.assertTrue(ntpath.isreserved('\foo')) + # Wildcard characters, colon, and pipe are reserved. + self.assertTrue(ntpath.isreserved('foo*bar')) + self.assertTrue(ntpath.isreserved('foo?bar')) + self.assertTrue(ntpath.isreserved('foo"bar')) + self.assertTrue(ntpath.isreserved('foobar')) + self.assertTrue(ntpath.isreserved('foo:bar')) + self.assertTrue(ntpath.isreserved('foo|bar')) + # Case-insensitive DOS-device names are reserved. + self.assertTrue(ntpath.isreserved('nul')) + self.assertTrue(ntpath.isreserved('aux')) + self.assertTrue(ntpath.isreserved('prn')) + self.assertTrue(ntpath.isreserved('con')) + self.assertTrue(ntpath.isreserved('conin$')) + self.assertTrue(ntpath.isreserved('conout$')) + # COM/LPT + 1-9 or + superscript 1-3 are reserved. + self.assertTrue(ntpath.isreserved('COM1')) + self.assertTrue(ntpath.isreserved('LPT9')) + self.assertTrue(ntpath.isreserved('com\xb9')) + self.assertTrue(ntpath.isreserved('com\xb2')) + self.assertTrue(ntpath.isreserved('lpt\xb3')) + # DOS-device name matching ignores characters after a dot or + # a colon and also ignores trailing spaces. + self.assertTrue(ntpath.isreserved('NUL.txt')) + self.assertTrue(ntpath.isreserved('PRN ')) + self.assertTrue(ntpath.isreserved('AUX .txt')) + self.assertTrue(ntpath.isreserved('COM1:bar')) + self.assertTrue(ntpath.isreserved('LPT9 :bar')) + # DOS-device names are only matched at the beginning + # of a path component. + self.assertFalse(ntpath.isreserved('bar.com9')) + self.assertFalse(ntpath.isreserved('bar.lpt9')) + # The entire path is checked, except for the drive. + self.assertTrue(ntpath.isreserved('c:/bar/baz/NUL')) + self.assertTrue(ntpath.isreserved('c:/NUL/bar/baz')) + self.assertFalse(ntpath.isreserved('//./NUL')) + # Bytes are supported. + self.assertFalse(ntpath.isreserved(b'')) + self.assertFalse(ntpath.isreserved(b'.')) + self.assertFalse(ntpath.isreserved(b'..')) + self.assertFalse(ntpath.isreserved(b'/')) + self.assertFalse(ntpath.isreserved(b'/foo/bar')) + self.assertTrue(ntpath.isreserved(b'foo.')) + self.assertTrue(ntpath.isreserved(b'nul')) + def assertEqualCI(self, s1, s2): """Assert that two strings are equal ignoring case differences.""" self.assertEqual(s1.lower(), s2.lower()) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index bdbe92369639ef..2da3afdd198015 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -349,6 +349,12 @@ def test_is_relative_to_several_args(self): with self.assertWarns(DeprecationWarning): p.is_relative_to('a', 'b') + def test_is_reserved_deprecated(self): + P = self.cls + p = P('a/b') + with self.assertWarns(DeprecationWarning): + p.is_reserved() + def test_match_empty(self): P = self.cls self.assertRaises(ValueError, P('a').match, '') @@ -414,13 +420,6 @@ def test_is_absolute(self): self.assertTrue(P('//a').is_absolute()) self.assertTrue(P('//a/b').is_absolute()) - def test_is_reserved(self): - P = self.cls - self.assertIs(False, P('').is_reserved()) - self.assertIs(False, P('/').is_reserved()) - self.assertIs(False, P('/foo/bar').is_reserved()) - self.assertIs(False, P('/dev/con/PRN/NUL').is_reserved()) - def test_join(self): P = self.cls p = P('//a') @@ -1082,41 +1081,6 @@ def test_div(self): self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s')) self.assertEqual(p / P('E:d:s'), P('E:d:s')) - def test_is_reserved(self): - P = self.cls - self.assertIs(False, P('').is_reserved()) - self.assertIs(False, P('/').is_reserved()) - self.assertIs(False, P('/foo/bar').is_reserved()) - # UNC paths are never reserved. - self.assertIs(False, P('//my/share/nul/con/aux').is_reserved()) - # Case-insensitive DOS-device names are reserved. - self.assertIs(True, P('nul').is_reserved()) - self.assertIs(True, P('aux').is_reserved()) - self.assertIs(True, P('prn').is_reserved()) - self.assertIs(True, P('con').is_reserved()) - self.assertIs(True, P('conin$').is_reserved()) - self.assertIs(True, P('conout$').is_reserved()) - # COM/LPT + 1-9 or + superscript 1-3 are reserved. - self.assertIs(True, P('COM1').is_reserved()) - self.assertIs(True, P('LPT9').is_reserved()) - self.assertIs(True, P('com\xb9').is_reserved()) - self.assertIs(True, P('com\xb2').is_reserved()) - self.assertIs(True, P('lpt\xb3').is_reserved()) - # DOS-device name mataching ignores characters after a dot or - # a colon and also ignores trailing spaces. - self.assertIs(True, P('NUL.txt').is_reserved()) - self.assertIs(True, P('PRN ').is_reserved()) - self.assertIs(True, P('AUX .txt').is_reserved()) - self.assertIs(True, P('COM1:bar').is_reserved()) - self.assertIs(True, P('LPT9 :bar').is_reserved()) - # DOS-device names are only matched at the beginning - # of a path component. - self.assertIs(False, P('bar.com9').is_reserved()) - self.assertIs(False, P('bar.lpt9').is_reserved()) - # Only the last path component matters. - self.assertIs(True, P('c:/baz/con/NUL').is_reserved()) - self.assertIs(False, P('c:/NUL/con/baz').is_reserved()) - class PurePathSubclassTest(PurePathTest): class cls(pathlib.PurePath): diff --git a/Misc/NEWS.d/next/Library/2022-07-31-01-24-40.gh-issue-88569.eU0--b.rst b/Misc/NEWS.d/next/Library/2022-07-31-01-24-40.gh-issue-88569.eU0--b.rst new file mode 100644 index 00000000000000..31dd985bb5c3b6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-07-31-01-24-40.gh-issue-88569.eU0--b.rst @@ -0,0 +1,4 @@ +Add :func:`os.path.isreserved`, which identifies reserved pathnames such +as "NUL", "AUX" and "CON". This function is only available on Windows. + +Deprecate :meth:`pathlib.PurePath.is_reserved`. From df17b5264378f38f49b16343b5016a8882212a8a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 26 Jan 2024 11:33:02 -0700 Subject: [PATCH 033/127] Add More Entries to CODEOWNERS (#114617) --- .github/CODEOWNERS | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4984170f0d17ff..ae915423ece955 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -45,6 +45,30 @@ Tools/c-analyzer/ @ericsnowcurrently # dbm **/*dbm* @corona10 @erlend-aasland @serhiy-storchaka +# runtime state/lifecycle +**/*pylifecycle* @ericsnowcurrently +**/*pystate* @ericsnowcurrently +**/*preconfig* @ericsnowcurrently +**/*initconfig* @ericsnowcurrently +**/*pathconfig* @ericsnowcurrently +**/*sysmodule* @ericsnowcurrently +**/*bltinmodule* @ericsnowcurrently +**/*gil* @ericsnowcurrently +Include/internal/pycore_runtime.h @ericsnowcurrently +Include/internal/pycore_interp.h @ericsnowcurrently +Include/internal/pycore_tstate.h @ericsnowcurrently +Include/internal/pycore_*_state.h @ericsnowcurrently +Include/internal/pycore_*_init.h @ericsnowcurrently +Include/internal/pycore_atexit.h @ericsnowcurrently +Include/internal/pycore_freelist.h @ericsnowcurrently +Include/internal/pycore_global_objects.h @ericsnowcurrently +Include/internal/pycore_obmalloc.h @ericsnowcurrently +Include/internal/pycore_pymem.h @ericsnowcurrently +Modules/main.c @ericsnowcurrently +Programs/_bootstrap_python.c @ericsnowcurrently +Programs/python.c @ericsnowcurrently +Tools/build/generate_global_objects.py @ericsnowcurrently + # Exceptions Lib/traceback.py @iritkatriel Lib/test/test_except*.py @iritkatriel @@ -79,7 +103,20 @@ Modules/_hacl/** @gpshead # Import (including importlib). **/*import* @brettcannon @ericsnowcurrently @ncoghlan @warsaw /Python/import.c @kumaraditya303 -**/*importlib/resources/* @jaraco @warsaw @FFY00 +Python/dynload_*.c @ericsnowcurrently +**/*freeze* @ericsnowcurrently +**/*frozen* @ericsnowcurrently +**/*modsupport* @ericsnowcurrently +**/*modulefinder* @ericsnowcurrently +**/*moduleobject* @ericsnowcurrently +**/*multiphase* @ericsnowcurrently +**/*pkgutil* @ericsnowcurrently +**/*pythonrun* @ericsnowcurrently +**/*runpy* @ericsnowcurrently +**/*singlephase* @ericsnowcurrently +Lib/test/test_module/ @ericsnowcurrently +Doc/c-api/module.rst @ericsnowcurrently +**/*importlib/resources/* @jaraco @warsaw @FFY00 **/importlib/metadata/* @jaraco @warsaw # Dates and times @@ -198,6 +235,8 @@ Doc/c-api/stable.rst @encukou Doc/howto/clinic.rst @erlend-aasland # Subinterpreters +**/*interpreteridobject.* @ericsnowcurrently +**/*crossinterp* @ericsnowcurrently Lib/test/support/interpreters/ @ericsnowcurrently Modules/_xx*interp*module.c @ericsnowcurrently Lib/test/test_interpreters/ @ericsnowcurrently From 07236f5b39a2e534cf190cd4f7c73300d209520b Mon Sep 17 00:00:00 2001 From: Tristan Pank Date: Fri, 26 Jan 2024 14:48:22 -0500 Subject: [PATCH 034/127] gh-114494: Change logging docstring to bool for exec_info (GH=114558) --- Lib/logging/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index eb7e020d1edfc0..684b58d5548f91 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -1493,7 +1493,7 @@ def debug(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.debug("Houston, we have a %s", "thorny problem", exc_info=1) + logger.debug("Houston, we have a %s", "thorny problem", exc_info=True) """ if self.isEnabledFor(DEBUG): self._log(DEBUG, msg, args, **kwargs) @@ -1505,7 +1505,7 @@ def info(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.info("Houston, we have a %s", "notable problem", exc_info=1) + logger.info("Houston, we have a %s", "notable problem", exc_info=True) """ if self.isEnabledFor(INFO): self._log(INFO, msg, args, **kwargs) @@ -1517,7 +1517,7 @@ def warning(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.warning("Houston, we have a %s", "bit of a problem", exc_info=1) + logger.warning("Houston, we have a %s", "bit of a problem", exc_info=True) """ if self.isEnabledFor(WARNING): self._log(WARNING, msg, args, **kwargs) @@ -1529,7 +1529,7 @@ def error(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.error("Houston, we have a %s", "major problem", exc_info=1) + logger.error("Houston, we have a %s", "major problem", exc_info=True) """ if self.isEnabledFor(ERROR): self._log(ERROR, msg, args, **kwargs) @@ -1547,7 +1547,7 @@ def critical(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.critical("Houston, we have a %s", "major disaster", exc_info=1) + logger.critical("Houston, we have a %s", "major disaster", exc_info=True) """ if self.isEnabledFor(CRITICAL): self._log(CRITICAL, msg, args, **kwargs) @@ -1565,7 +1565,7 @@ def log(self, level, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.log(level, "We have a %s", "mysterious problem", exc_info=1) + logger.log(level, "We have a %s", "mysterious problem", exc_info=True) """ if not isinstance(level, int): if raiseExceptions: From b5c7c84673b96bfdd7c877521a970f7a4beafece Mon Sep 17 00:00:00 2001 From: Aiden Fox Ivey Date: Fri, 26 Jan 2024 15:36:50 -0500 Subject: [PATCH 035/127] gh-114490: Add check for Mach-O linkage in Lib/platform.py (#114491) ``platform.architecture()`` now returns the format of binaries (e.g. Mach-O) instead of the default empty string. Co-authored-by: AN Long --- Lib/platform.py | 2 ++ .../next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst | 1 + 2 files changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst diff --git a/Lib/platform.py b/Lib/platform.py index 75aa55510858fd..b56472235ee9e4 100755 --- a/Lib/platform.py +++ b/Lib/platform.py @@ -752,6 +752,8 @@ def architecture(executable=sys.executable, bits='', linkage=''): # Linkage if 'ELF' in fileout: linkage = 'ELF' + elif 'Mach-O' in fileout: + linkage = "Mach-O" elif 'PE' in fileout: # E.g. Windows uses this format if 'Windows' in fileout: diff --git a/Misc/NEWS.d/next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst b/Misc/NEWS.d/next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst new file mode 100644 index 00000000000000..abd296f8608518 --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst @@ -0,0 +1 @@ +Add Mach-O linkage support for :func:`platform.architecture()`. From 7a9727e10c14a82e8e20f5b85e368a6f937db203 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 26 Jan 2024 22:29:28 +0000 Subject: [PATCH 036/127] pathlib tests: annotate tests needing symlinks with decorator (#114625) Add `@needs_symlinks` decorator for tests that require symlink support in the path class. Also add `@needs_windows` and `@needs_posix` decorators for tests that require a specific a specific path flavour. These aren't much used yet, but will be later. --- Lib/test/test_pathlib/test_pathlib.py | 17 ++--- Lib/test/test_pathlib/test_pathlib_abc.py | 86 +++++++++++++---------- 2 files changed, 55 insertions(+), 48 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2da3afdd198015..2cef3b295559d8 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -19,6 +19,7 @@ from test.support import os_helper from test.support.os_helper import TESTFN, FakePath from test.test_pathlib import test_pathlib_abc +from test.test_pathlib.test_pathlib_abc import needs_posix, needs_windows, needs_symlinks try: import grp, pwd @@ -26,11 +27,6 @@ grp = pwd = None -only_nt = unittest.skipIf(os.name != 'nt', - 'test requires a Windows-compatible system') -only_posix = unittest.skipIf(os.name == 'nt', - 'test requires a POSIX-compatible system') - root_in_posix = False if hasattr(os, 'geteuid'): root_in_posix = (os.geteuid() == 0) @@ -1268,7 +1264,7 @@ def test_chmod(self): self.assertEqual(p.stat().st_mode, new_mode) # On Windows, os.chmod does not follow symlinks (issue #15411) - @only_posix + @needs_posix @os_helper.skip_unless_working_chmod def test_chmod_follow_symlinks_true(self): p = self.cls(self.base) / 'linkA' @@ -1537,7 +1533,7 @@ def test_mkdir_exist_ok_root(self): self.cls('/').resolve().mkdir(exist_ok=True) self.cls('/').resolve().mkdir(parents=True, exist_ok=True) - @only_nt # XXX: not sure how to test this on POSIX. + @needs_windows # XXX: not sure how to test this on POSIX. def test_mkdir_with_unknown_drive(self): for d in 'ZYXWVUTSRQPONMLKJIHGFEDCBA': p = self.cls(d + ':\\') @@ -1602,9 +1598,8 @@ def my_mkdir(path, mode=0o777): self.assertNotIn(str(p12), concurrently_created) self.assertTrue(p.exists()) + @needs_symlinks def test_symlink_to(self): - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls(self.base) target = P / 'fileA' # Symlinking a path target. @@ -1848,7 +1843,7 @@ def test_rglob_pathlike(self): self.assertEqual(expect, set(p.rglob(FakePath(pattern)))) -@only_posix +@unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') class PosixPathTest(PathTest, PurePosixPathTest): cls = pathlib.PosixPath @@ -2024,7 +2019,7 @@ def test_from_uri_pathname2url(self): self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar')) -@only_nt +@unittest.skipIf(os.name != 'nt', 'test requires a Windows-compatible system') class WindowsPathTest(PathTest, PureWindowsPathTest): cls = pathlib.WindowsPath diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 364f776dbb1413..b19e9b40419c7a 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -11,6 +11,27 @@ from test.support.os_helper import TESTFN +_tests_needing_posix = set() +_tests_needing_windows = set() +_tests_needing_symlinks = set() + + +def needs_posix(fn): + """Decorator that marks a test as requiring a POSIX-flavoured path class.""" + _tests_needing_posix.add(fn.__name__) + return fn + +def needs_windows(fn): + """Decorator that marks a test as requiring a Windows-flavoured path class.""" + _tests_needing_windows.add(fn.__name__) + return fn + +def needs_symlinks(fn): + """Decorator that marks a test as requiring a path class that supports symlinks.""" + _tests_needing_symlinks.add(fn.__name__) + return fn + + class UnsupportedOperationTest(unittest.TestCase): def test_is_notimplemented(self): self.assertTrue(issubclass(UnsupportedOperation, NotImplementedError)) @@ -115,6 +136,11 @@ class DummyPurePathTest(unittest.TestCase): base = f'/this/path/kills/fascists/{TESTFN}' def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_posix and self.cls.pathmod is not posixpath: + self.skipTest('requires POSIX-flavoured path class') + if name in _tests_needing_windows and self.cls.pathmod is posixpath: + self.skipTest('requires Windows-flavoured path class') p = self.cls('a') self.pathmod = p.pathmod self.sep = self.pathmod.sep @@ -888,6 +914,9 @@ class DummyPathTest(DummyPurePathTest): def setUp(self): super().setUp() + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') pathmod = self.cls.pathmod p = self.cls(self.base) p.mkdir(parents=True) @@ -1045,9 +1074,8 @@ def test_iterdir(self): expected += ['linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'] self.assertEqual(paths, { P(self.base, q) for q in expected }) + @needs_symlinks def test_iterdir_symlink(self): - if not self.can_symlink: - self.skipTest("symlinks required") # __iter__ on a symlink to a directory. P = self.cls p = P(self.base, 'linkB') @@ -1116,9 +1144,8 @@ def _check(path, pattern, case_sensitive, expected): _check(path, "dirb/file*", True, []) _check(path, "dirb/file*", False, ["dirB/fileB"]) + @needs_symlinks def test_glob_follow_symlinks_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") def _check(path, glob, expected): actual = {path for path in path.glob(glob, follow_symlinks=True) if path.parts.count("linkD") <= 1} # exclude symlink loop. @@ -1144,9 +1171,8 @@ def _check(path, glob, expected): _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) _check(p, "*/dirD/**/", ["dirC/dirD/"]) + @needs_symlinks def test_glob_no_follow_symlinks_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") def _check(path, glob, expected): actual = {path for path in path.glob(glob, follow_symlinks=False)} self.assertEqual(actual, { P(self.base, q) for q in expected }) @@ -1210,9 +1236,8 @@ def _check(glob, expected): _check(p.rglob("*.txt"), ["dirC/novel.txt"]) _check(p.rglob("*.*"), ["dirC/novel.txt"]) + @needs_symlinks def test_rglob_follow_symlinks_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") def _check(path, glob, expected): actual = {path for path in path.rglob(glob, follow_symlinks=True) if path.parts.count("linkD") <= 1} # exclude symlink loop. @@ -1243,9 +1268,8 @@ def _check(path, glob, expected): _check(p, "*.txt", ["dirC/novel.txt"]) _check(p, "*.*", ["dirC/novel.txt"]) + @needs_symlinks def test_rglob_no_follow_symlinks_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") def _check(path, glob, expected): actual = {path for path in path.rglob(glob, follow_symlinks=False)} self.assertEqual(actual, { P(self.base, q) for q in expected }) @@ -1269,10 +1293,9 @@ def _check(path, glob, expected): _check(p, "*.txt", ["dirC/novel.txt"]) _check(p, "*.*", ["dirC/novel.txt"]) + @needs_symlinks def test_rglob_symlink_loop(self): # Don't get fooled by symlink loops (Issue #26012). - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls p = P(self.base) given = set(p.rglob('*')) @@ -1302,10 +1325,9 @@ def test_glob_dotdot(self): self.assertEqual(set(p.glob("xyzzy/..")), set()) self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) + @needs_symlinks def test_glob_permissions(self): # See bpo-38894 - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls base = P(self.base) / 'permissions' base.mkdir() @@ -1322,19 +1344,17 @@ def test_glob_permissions(self): self.assertEqual(len(set(base.glob("*/fileC"))), 50) self.assertEqual(len(set(base.glob("*/file*"))), 50) + @needs_symlinks def test_glob_long_symlink(self): # See gh-87695 - if not self.can_symlink: - self.skipTest("symlinks required") base = self.cls(self.base) / 'long_symlink' base.mkdir() bad_link = base / 'bad_link' bad_link.symlink_to("bad" * 200) self.assertEqual(sorted(base.glob('**/*')), [bad_link]) + @needs_symlinks def test_readlink(self): - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls(self.base) self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) self.assertEqual((P / 'brokenLink').readlink(), @@ -1358,9 +1378,8 @@ def _check_resolve(self, p, expected, strict=True): # This can be used to check both relative and absolute resolutions. _check_resolve_relative = _check_resolve_absolute = _check_resolve + @needs_symlinks def test_resolve_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls p = P(self.base, 'foo') with self.assertRaises(OSError) as cm: @@ -1419,10 +1438,9 @@ def test_resolve_common(self): # resolves to 'dirB/..' first before resolving to parent of dirB. self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + @needs_symlinks def test_resolve_dot(self): # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ - if not self.can_symlink: - self.skipTest("symlinks required") pathmod = self.pathmod p = self.cls(self.base) p.joinpath('0').symlink_to('.', target_is_directory=True) @@ -1441,11 +1459,9 @@ def _check_symlink_loop(self, *args): path.resolve(strict=True) self.assertEqual(cm.exception.errno, errno.ELOOP) + @needs_posix + @needs_symlinks def test_resolve_loop(self): - if not self.can_symlink: - self.skipTest("symlinks required") - if self.cls.pathmod is not posixpath: - self.skipTest("symlink loops work differently with concrete Windows paths") # Loops with relative symlinks. self.cls(self.base, 'linkX').symlink_to('linkX/inside') self._check_symlink_loop(self.base, 'linkX') @@ -1487,9 +1503,8 @@ def test_stat(self): self.assertEqual(statA.st_dev, statC.st_dev) # other attributes not used by pathlib. + @needs_symlinks def test_stat_no_follow_symlinks(self): - if not self.can_symlink: - self.skipTest("symlinks required") p = self.cls(self.base) / 'linkA' st = p.stat() self.assertNotEqual(st, p.stat(follow_symlinks=False)) @@ -1499,9 +1514,8 @@ def test_stat_no_follow_symlinks_nosymlink(self): st = p.stat() self.assertEqual(st, p.stat(follow_symlinks=False)) + @needs_symlinks def test_lstat(self): - if not self.can_symlink: - self.skipTest("symlinks required") p = self.cls(self.base)/ 'linkA' st = p.stat() self.assertNotEqual(st, p.lstat()) @@ -1634,9 +1648,6 @@ def test_is_char_device_false(self): self.assertIs((P / 'fileA\x00').is_char_device(), False) def _check_complex_symlinks(self, link0_target): - if not self.can_symlink: - self.skipTest("symlinks required") - # Test solving a non-looping chain of symlinks (issue #19887). pathmod = self.pathmod P = self.cls(self.base) @@ -1682,12 +1693,15 @@ def _check_complex_symlinks(self, link0_target): finally: os.chdir(old_path) + @needs_symlinks def test_complex_symlinks_absolute(self): self._check_complex_symlinks(self.base) + @needs_symlinks def test_complex_symlinks_relative(self): self._check_complex_symlinks('.') + @needs_symlinks def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(self.pathmod.join('dirA', '..')) @@ -1803,9 +1817,8 @@ def test_walk_bottom_up(self): raise AssertionError(f"Unexpected path: {path}") self.assertTrue(seen_testfn) + @needs_symlinks def test_walk_follow_symlinks(self): - if not self.can_symlink: - self.skipTest("symlinks required") self.setUpWalk() walk_it = self.walk_path.walk(follow_symlinks=True) for root, dirs, files in walk_it: @@ -1816,9 +1829,8 @@ def test_walk_follow_symlinks(self): else: self.fail("Didn't follow symlink with follow_symlinks=True") + @needs_symlinks def test_walk_symlink_location(self): - if not self.can_symlink: - self.skipTest("symlinks required") self.setUpWalk() # Tests whether symlinks end up in filenames or dirnames depending # on the `follow_symlinks` argument. From fe5905e21ac90a2fb5ebd62779f56bcd87b5f7a0 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 27 Jan 2024 01:30:25 +0000 Subject: [PATCH 037/127] Cover OS-specific behaviour in `PurePath` and `Path` tests (#114632) Test Posix- and Windows-specific behaviour from `PurePathTest` and `PathTest`. --- Lib/test/test_pathlib/test_pathlib.py | 198 +++++++++++++++++--------- 1 file changed, 128 insertions(+), 70 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2cef3b295559d8..b0067c25d208b9 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -234,8 +234,10 @@ def test_eq_common(self): self.assertNotEqual(P(), {}) self.assertNotEqual(P(), int) - def test_equivalences(self): - for k, tuples in self.equivalences.items(): + def test_equivalences(self, equivalences=None): + if equivalences is None: + equivalences = self.equivalences + for k, tuples in equivalences.items(): canon = k.replace('/', self.sep) posix = k.replace(self.sep, '/') if canon != posix: @@ -356,11 +358,8 @@ def test_match_empty(self): self.assertRaises(ValueError, P('a').match, '') self.assertRaises(ValueError, P('a').match, '.') - -class PurePosixPathTest(PurePathTest): - cls = pathlib.PurePosixPath - - def test_parse_path(self): + @needs_posix + def test_parse_path_posix(self): check = self._check_parse_path # Collapsing of excess leading slashes, except for the double-slash # special case. @@ -372,25 +371,29 @@ def test_parse_path(self): check('c:\\a', '', '', ['c:\\a',]) check('\\a', '', '', ['\\a',]) - def test_root(self): + @needs_posix + def test_root_posix(self): P = self.cls self.assertEqual(P('/a/b').root, '/') self.assertEqual(P('///a/b').root, '/') # POSIX special case for two leading slashes. self.assertEqual(P('//a/b').root, '//') - def test_eq(self): + @needs_posix + def test_eq_posix(self): P = self.cls self.assertNotEqual(P('a/b'), P('A/b')) self.assertEqual(P('/a'), P('///a')) self.assertNotEqual(P('/a'), P('//a')) - def test_as_uri(self): + @needs_posix + def test_as_uri_posix(self): P = self.cls self.assertEqual(P('/').as_uri(), 'file:///') self.assertEqual(P('/a/b.c').as_uri(), 'file:///a/b.c') self.assertEqual(P('/a/b%#c').as_uri(), 'file:///a/b%25%23c') + @needs_posix def test_as_uri_non_ascii(self): from urllib.parse import quote_from_bytes P = self.cls @@ -401,11 +404,13 @@ def test_as_uri_non_ascii(self): self.assertEqual(P('/a/b\xe9').as_uri(), 'file:///a/b' + quote_from_bytes(os.fsencode('\xe9'))) - def test_match(self): + @needs_posix + def test_match_posix(self): P = self.cls self.assertFalse(P('A.py').match('a.PY')) - def test_is_absolute(self): + @needs_posix + def test_is_absolute_posix(self): P = self.cls self.assertFalse(P().is_absolute()) self.assertFalse(P('a').is_absolute()) @@ -416,7 +421,8 @@ def test_is_absolute(self): self.assertTrue(P('//a').is_absolute()) self.assertTrue(P('//a/b').is_absolute()) - def test_join(self): + @needs_posix + def test_join_posix(self): P = self.cls p = P('//a') pp = p.joinpath('b') @@ -426,7 +432,8 @@ def test_join(self): pp = P('//a').joinpath('/c') self.assertEqual(pp, P('/c')) - def test_div(self): + @needs_posix + def test_div_posix(self): # Basically the same as joinpath(). P = self.cls p = P('//a') @@ -437,18 +444,14 @@ def test_div(self): pp = P('//a') / '/c' self.assertEqual(pp, P('/c')) + @needs_posix def test_parse_windows_path(self): P = self.cls p = P('c:', 'a', 'b') pp = P(pathlib.PureWindowsPath('c:\\a\\b')) self.assertEqual(p, pp) - -class PureWindowsPathTest(PurePathTest): - cls = pathlib.PureWindowsPath - - equivalences = PurePathTest.equivalences.copy() - equivalences.update({ + windows_equivalences = { './a:b': [ ('./a:b',) ], 'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ], 'c:/a': [ @@ -459,9 +462,14 @@ class PureWindowsPathTest(PurePathTest): '//a/b/c': [ ('//a/b', 'c'), ('//a/b/', 'c'), ], - }) + } + + @needs_windows + def test_equivalences_windows(self): + self.test_equivalences(self.windows_equivalences) - def test_parse_path(self): + @needs_windows + def test_parse_path_windows(self): check = self._check_parse_path # First part is anchored. check('c:', 'c:', '', []) @@ -509,7 +517,8 @@ def test_parse_path(self): check('D:a/c:b', 'D:', '', ['a', 'c:b']) check('D:/a/c:b', 'D:', '\\', ['a', 'c:b']) - def test_str(self): + @needs_windows + def test_str_windows(self): p = self.cls('a/b/c') self.assertEqual(str(p), 'a\\b\\c') p = self.cls('c:/a/b/c') @@ -521,7 +530,8 @@ def test_str(self): p = self.cls('//a/b/c/d') self.assertEqual(str(p), '\\\\a\\b\\c\\d') - def test_str_subclass(self): + @needs_windows + def test_str_subclass_windows(self): self._check_str_subclass('.\\a:b') self._check_str_subclass('c:') self._check_str_subclass('c:a') @@ -533,7 +543,8 @@ def test_str_subclass(self): self._check_str_subclass('\\\\some\\share\\a') self._check_str_subclass('\\\\some\\share\\a\\b.txt') - def test_eq(self): + @needs_windows + def test_eq_windows(self): P = self.cls self.assertEqual(P('c:a/b'), P('c:a/b')) self.assertEqual(P('c:a/b'), P('c:', 'a', 'b')) @@ -546,7 +557,8 @@ def test_eq(self): self.assertEqual(P('//Some/SHARE/a/B'), P('//somE/share/A/b')) self.assertEqual(P('\u0130'), P('i\u0307')) - def test_as_uri(self): + @needs_windows + def test_as_uri_windows(self): P = self.cls with self.assertRaises(ValueError): P('/a/b').as_uri() @@ -562,7 +574,8 @@ def test_as_uri(self): self.assertEqual(P('//some/share/a/b%#c\xe9').as_uri(), 'file://some/share/a/b%25%23c%C3%A9') - def test_match(self): + @needs_windows + def test_match_windows(self): P = self.cls # Absolute patterns. self.assertTrue(P('c:/b.py').match('*:/*.py')) @@ -589,7 +602,8 @@ def test_match(self): self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' - def test_ordering_common(self): + @needs_windows + def test_ordering_windows(self): # Case-insensitivity. def assertOrderedEqual(a, b): self.assertLessEqual(a, b) @@ -606,7 +620,8 @@ def assertOrderedEqual(a, b): self.assertFalse(p < q) self.assertFalse(p > q) - def test_parts(self): + @needs_windows + def test_parts_windows(self): P = self.cls p = P('c:a/b') parts = p.parts @@ -618,7 +633,8 @@ def test_parts(self): parts = p.parts self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) - def test_parent(self): + @needs_windows + def test_parent_windows(self): # Anchored P = self.cls p = P('z:a/b/c') @@ -636,7 +652,8 @@ def test_parent(self): self.assertEqual(p.parent.parent, P('//a/b')) self.assertEqual(p.parent.parent.parent, P('//a/b')) - def test_parents(self): + @needs_windows + def test_parents_windows(self): # Anchored P = self.cls p = P('z:a/b/') @@ -682,7 +699,8 @@ def test_parents(self): with self.assertRaises(IndexError): par[2] - def test_drive(self): + @needs_windows + def test_drive_windows(self): P = self.cls self.assertEqual(P('c:').drive, 'c:') self.assertEqual(P('c:a/b').drive, 'c:') @@ -693,7 +711,8 @@ def test_drive(self): self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') self.assertEqual(P('./c:a').drive, '') - def test_root(self): + @needs_windows + def test_root_windows(self): P = self.cls self.assertEqual(P('c:').root, '') self.assertEqual(P('c:a/b').root, '') @@ -703,7 +722,8 @@ def test_root(self): self.assertEqual(P('//a/b/').root, '\\') self.assertEqual(P('//a/b/c/d').root, '\\') - def test_anchor(self): + @needs_windows + def test_anchor_windows(self): P = self.cls self.assertEqual(P('c:').anchor, 'c:') self.assertEqual(P('c:a/b').anchor, 'c:') @@ -713,7 +733,8 @@ def test_anchor(self): self.assertEqual(P('//a/b/').anchor, '\\\\a\\b\\') self.assertEqual(P('//a/b/c/d').anchor, '\\\\a\\b\\') - def test_name(self): + @needs_windows + def test_name_windows(self): P = self.cls self.assertEqual(P('c:').name, '') self.assertEqual(P('c:/').name, '') @@ -724,7 +745,8 @@ def test_name(self): self.assertEqual(P('//My.py/Share.php').name, '') self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') - def test_suffix(self): + @needs_windows + def test_suffix_windows(self): P = self.cls self.assertEqual(P('c:').suffix, '') self.assertEqual(P('c:/').suffix, '') @@ -743,7 +765,8 @@ def test_suffix(self): self.assertEqual(P('//My.py/Share.php').suffix, '') self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') - def test_suffixes(self): + @needs_windows + def test_suffixes_windows(self): P = self.cls self.assertEqual(P('c:').suffixes, []) self.assertEqual(P('c:/').suffixes, []) @@ -762,7 +785,8 @@ def test_suffixes(self): self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) - def test_stem(self): + @needs_windows + def test_stem_windows(self): P = self.cls self.assertEqual(P('c:').stem, '') self.assertEqual(P('c:.').stem, '') @@ -776,7 +800,8 @@ def test_stem(self): self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, 'Some name. Ending with a dot.') - def test_with_name(self): + @needs_windows + def test_with_name_windows(self): P = self.cls self.assertEqual(P('c:a/b').with_name('d.xml'), P('c:a/d.xml')) self.assertEqual(P('c:/a/b').with_name('d.xml'), P('c:/a/d.xml')) @@ -792,7 +817,8 @@ def test_with_name(self): self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') - def test_with_stem(self): + @needs_windows + def test_with_stem_windows(self): P = self.cls self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) @@ -808,7 +834,8 @@ def test_with_stem(self): self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') - def test_with_suffix(self): + @needs_windows + def test_with_suffix_windows(self): P = self.cls self.assertEqual(P('c:a/b').with_suffix('.gz'), P('c:a/b.gz')) self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) @@ -832,7 +859,8 @@ def test_with_suffix(self): self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c/d') self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c\\d') - def test_relative_to(self): + @needs_windows + def test_relative_to_windows(self): P = self.cls p = P('C:Foo/Bar') self.assertEqual(p.relative_to(P('c:')), P('Foo/Bar')) @@ -937,7 +965,8 @@ def test_relative_to(self): self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo'), walk_up=True) self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo'), walk_up=True) - def test_is_relative_to(self): + @needs_windows + def test_is_relative_to_windows(self): P = self.cls p = P('C:Foo/Bar') self.assertTrue(p.is_relative_to(P('c:'))) @@ -990,7 +1019,8 @@ def test_is_relative_to(self): self.assertFalse(p.is_relative_to(P('//z/Share/Foo'))) self.assertFalse(p.is_relative_to(P('//Server/z/Foo'))) - def test_is_absolute(self): + @needs_windows + def test_is_absolute_windows(self): P = self.cls # Under NT, only paths with both a drive and a root are absolute. self.assertFalse(P().is_absolute()) @@ -1015,7 +1045,8 @@ def test_is_absolute(self): self.assertTrue(P('//?/UNC/').is_absolute()) self.assertTrue(P('//?/UNC/spam').is_absolute()) - def test_join(self): + @needs_windows + def test_join_windows(self): P = self.cls p = P('C:/a/b') pp = p.joinpath('x/y') @@ -1052,7 +1083,8 @@ def test_join(self): pp = P('//./BootPartition').joinpath('Windows') self.assertEqual(pp, P('//./BootPartition/Windows')) - def test_div(self): + @needs_windows + def test_div_windows(self): # Basically the same as joinpath(). P = self.cls p = P('C:/a/b') @@ -1078,6 +1110,14 @@ def test_div(self): self.assertEqual(p / P('E:d:s'), P('E:d:s')) +class PurePosixPathTest(PurePathTest): + cls = pathlib.PurePosixPath + + +class PureWindowsPathTest(PurePathTest): + cls = pathlib.PureWindowsPath + + class PurePathSubclassTest(PurePathTest): class cls(pathlib.PurePath): pass @@ -1842,12 +1882,8 @@ def test_rglob_pathlike(self): self.assertEqual(expect, set(p.rglob(P(pattern)))) self.assertEqual(expect, set(p.rglob(FakePath(pattern)))) - -@unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') -class PosixPathTest(PathTest, PurePosixPathTest): - cls = pathlib.PosixPath - - def test_absolute(self): + @needs_posix + def test_absolute_posix(self): P = self.cls self.assertEqual(str(P('/').absolute()), '/') self.assertEqual(str(P('/a').absolute()), '/a') @@ -1862,6 +1898,7 @@ def test_absolute(self): is_emscripten or is_wasi, "umask is not implemented on Emscripten/WASI." ) + @needs_posix def test_open_mode(self): old_mask = os.umask(0) self.addCleanup(os.umask, old_mask) @@ -1876,6 +1913,7 @@ def test_open_mode(self): st = os.stat(self.pathmod.join(self.base, 'other_new_file')) self.assertEqual(stat.S_IMODE(st.st_mode), 0o644) + @needs_posix def test_resolve_root(self): current_directory = os.getcwd() try: @@ -1889,6 +1927,7 @@ def test_resolve_root(self): is_emscripten or is_wasi, "umask is not implemented on Emscripten/WASI." ) + @needs_posix def test_touch_mode(self): old_mask = os.umask(0) self.addCleanup(os.umask, old_mask) @@ -1904,7 +1943,8 @@ def test_touch_mode(self): st = os.stat(self.pathmod.join(self.base, 'masked_new_file')) self.assertEqual(stat.S_IMODE(st.st_mode), 0o750) - def test_glob(self): + @needs_posix + def test_glob_posix(self): P = self.cls p = P(self.base) given = set(p.glob("FILEa")) @@ -1912,7 +1952,8 @@ def test_glob(self): self.assertEqual(given, expect) self.assertEqual(set(p.glob("FILEa*")), set()) - def test_rglob(self): + @needs_posix + def test_rglob_posix(self): P = self.cls p = P(self.base, "dirC") given = set(p.rglob("FILEd")) @@ -1924,7 +1965,8 @@ def test_rglob(self): 'pwd module does not expose getpwall()') @unittest.skipIf(sys.platform == "vxworks", "no home directory on VxWorks") - def test_expanduser(self): + @needs_posix + def test_expanduser_posix(self): P = self.cls import_helper.import_module('pwd') import pwd @@ -1979,6 +2021,7 @@ def test_expanduser(self): @unittest.skipIf(sys.platform != "darwin", "Bad file descriptor in /dev/fd affects only macOS") + @needs_posix def test_handling_bad_descriptor(self): try: file_descriptors = list(pathlib.Path('/dev/fd').rglob("*"))[3:] @@ -2000,7 +2043,8 @@ def test_handling_bad_descriptor(self): self.fail("Bad file descriptor not handled.") raise - def test_from_uri(self): + @needs_posix + def test_from_uri_posix(self): P = self.cls self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar')) self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar')) @@ -2013,17 +2057,14 @@ def test_from_uri(self): self.assertRaises(ValueError, P.from_uri, 'file:foo/bar') self.assertRaises(ValueError, P.from_uri, 'http://foo/bar') - def test_from_uri_pathname2url(self): + @needs_posix + def test_from_uri_pathname2url_posix(self): P = self.cls self.assertEqual(P.from_uri('file:' + pathname2url('/foo/bar')), P('/foo/bar')) self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar')) - -@unittest.skipIf(os.name != 'nt', 'test requires a Windows-compatible system') -class WindowsPathTest(PathTest, PureWindowsPathTest): - cls = pathlib.WindowsPath - - def test_absolute(self): + @needs_windows + def test_absolute_windows(self): P = self.cls # Simple absolute paths. @@ -2068,7 +2109,8 @@ def test_absolute(self): self.assertEqual(str(P(other_drive).absolute()), other_cwd) self.assertEqual(str(P(other_drive + 'foo').absolute()), other_cwd + '\\foo') - def test_glob(self): + @needs_windows + def test_glob_windows(self): P = self.cls p = P(self.base) self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") }) @@ -2077,14 +2119,16 @@ def test_glob(self): self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) - def test_rglob(self): + @needs_windows + def test_rglob_windows(self): P = self.cls p = P(self.base, "dirC") self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) - def test_expanduser(self): + @needs_windows + def test_expanduser_windows(self): P = self.cls with os_helper.EnvironmentVarGuard() as env: env.pop('HOME', None) @@ -2137,7 +2181,8 @@ def check(): env['HOME'] = 'C:\\Users\\eve' check() - def test_from_uri(self): + @needs_windows + def test_from_uri_windows(self): P = self.cls # DOS drive paths self.assertEqual(P.from_uri('file:c:/path/to/file'), P('c:/path/to/file')) @@ -2158,22 +2203,35 @@ def test_from_uri(self): self.assertRaises(ValueError, P.from_uri, 'file:foo/bar') self.assertRaises(ValueError, P.from_uri, 'http://foo/bar') - def test_from_uri_pathname2url(self): + @needs_windows + def test_from_uri_pathname2url_windows(self): P = self.cls self.assertEqual(P.from_uri('file:' + pathname2url(r'c:\path\to\file')), P('c:/path/to/file')) self.assertEqual(P.from_uri('file:' + pathname2url(r'\\server\path\to\file')), P('//server/path/to/file')) - def test_owner(self): + @needs_windows + def test_owner_windows(self): P = self.cls with self.assertRaises(pathlib.UnsupportedOperation): P('c:/').owner() - def test_group(self): + @needs_windows + def test_group_windows(self): P = self.cls with self.assertRaises(pathlib.UnsupportedOperation): P('c:/').group() +@unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') +class PosixPathTest(PathTest, PurePosixPathTest): + cls = pathlib.PosixPath + + +@unittest.skipIf(os.name != 'nt', 'test requires a Windows-compatible system') +class WindowsPathTest(PathTest, PureWindowsPathTest): + cls = pathlib.WindowsPath + + class PathSubclassTest(PathTest): class cls(pathlib.Path): pass From 2d08af34b873d5e6b4df5082dfc30a37ef59c346 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 27 Jan 2024 02:16:17 +0000 Subject: [PATCH 038/127] Cover OS-specific behaviour in `PurePathBase` and `PathBase` tests. (#114633) Wherever possible, move tests for OS-specific behaviour from `PurePathTest` and `PathTest` to `DummyPurePathTest` and `DummyPathTest`. --- Lib/test/test_pathlib/test_pathlib.py | 627 ---------------------- Lib/test/test_pathlib/test_pathlib_abc.py | 625 +++++++++++++++++++++ 2 files changed, 625 insertions(+), 627 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index b0067c25d208b9..9c2b26d41d73f8 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -371,14 +371,6 @@ def test_parse_path_posix(self): check('c:\\a', '', '', ['c:\\a',]) check('\\a', '', '', ['\\a',]) - @needs_posix - def test_root_posix(self): - P = self.cls - self.assertEqual(P('/a/b').root, '/') - self.assertEqual(P('///a/b').root, '/') - # POSIX special case for two leading slashes. - self.assertEqual(P('//a/b').root, '//') - @needs_posix def test_eq_posix(self): P = self.cls @@ -404,46 +396,6 @@ def test_as_uri_non_ascii(self): self.assertEqual(P('/a/b\xe9').as_uri(), 'file:///a/b' + quote_from_bytes(os.fsencode('\xe9'))) - @needs_posix - def test_match_posix(self): - P = self.cls - self.assertFalse(P('A.py').match('a.PY')) - - @needs_posix - def test_is_absolute_posix(self): - P = self.cls - self.assertFalse(P().is_absolute()) - self.assertFalse(P('a').is_absolute()) - self.assertFalse(P('a/b/').is_absolute()) - self.assertTrue(P('/').is_absolute()) - self.assertTrue(P('/a').is_absolute()) - self.assertTrue(P('/a/b/').is_absolute()) - self.assertTrue(P('//a').is_absolute()) - self.assertTrue(P('//a/b').is_absolute()) - - @needs_posix - def test_join_posix(self): - P = self.cls - p = P('//a') - pp = p.joinpath('b') - self.assertEqual(pp, P('//a/b')) - pp = P('/a').joinpath('//c') - self.assertEqual(pp, P('//c')) - pp = P('//a').joinpath('/c') - self.assertEqual(pp, P('/c')) - - @needs_posix - def test_div_posix(self): - # Basically the same as joinpath(). - P = self.cls - p = P('//a') - pp = p / 'b' - self.assertEqual(pp, P('//a/b')) - pp = P('/a') / '//c' - self.assertEqual(pp, P('//c')) - pp = P('//a') / '/c' - self.assertEqual(pp, P('/c')) - @needs_posix def test_parse_windows_path(self): P = self.cls @@ -517,32 +469,6 @@ def test_parse_path_windows(self): check('D:a/c:b', 'D:', '', ['a', 'c:b']) check('D:/a/c:b', 'D:', '\\', ['a', 'c:b']) - @needs_windows - def test_str_windows(self): - p = self.cls('a/b/c') - self.assertEqual(str(p), 'a\\b\\c') - p = self.cls('c:/a/b/c') - self.assertEqual(str(p), 'c:\\a\\b\\c') - p = self.cls('//a/b') - self.assertEqual(str(p), '\\\\a\\b\\') - p = self.cls('//a/b/c') - self.assertEqual(str(p), '\\\\a\\b\\c') - p = self.cls('//a/b/c/d') - self.assertEqual(str(p), '\\\\a\\b\\c\\d') - - @needs_windows - def test_str_subclass_windows(self): - self._check_str_subclass('.\\a:b') - self._check_str_subclass('c:') - self._check_str_subclass('c:a') - self._check_str_subclass('c:a\\b.txt') - self._check_str_subclass('c:\\') - self._check_str_subclass('c:\\a') - self._check_str_subclass('c:\\a\\b.txt') - self._check_str_subclass('\\\\some\\share') - self._check_str_subclass('\\\\some\\share\\a') - self._check_str_subclass('\\\\some\\share\\a\\b.txt') - @needs_windows def test_eq_windows(self): P = self.cls @@ -574,34 +500,6 @@ def test_as_uri_windows(self): self.assertEqual(P('//some/share/a/b%#c\xe9').as_uri(), 'file://some/share/a/b%25%23c%C3%A9') - @needs_windows - def test_match_windows(self): - P = self.cls - # Absolute patterns. - self.assertTrue(P('c:/b.py').match('*:/*.py')) - self.assertTrue(P('c:/b.py').match('c:/*.py')) - self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive - self.assertFalse(P('b.py').match('/*.py')) - self.assertFalse(P('b.py').match('c:*.py')) - self.assertFalse(P('b.py').match('c:/*.py')) - self.assertFalse(P('c:b.py').match('/*.py')) - self.assertFalse(P('c:b.py').match('c:/*.py')) - self.assertFalse(P('/b.py').match('c:*.py')) - self.assertFalse(P('/b.py').match('c:/*.py')) - # UNC patterns. - self.assertTrue(P('//some/share/a.py').match('//*/*/*.py')) - self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) - self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) - self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) - # Case-insensitivity. - self.assertTrue(P('B.py').match('b.PY')) - self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) - self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) - # Path anchor doesn't match pattern anchor - self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' - self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' - self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' - @needs_windows def test_ordering_windows(self): # Case-insensitivity. @@ -620,495 +518,6 @@ def assertOrderedEqual(a, b): self.assertFalse(p < q) self.assertFalse(p > q) - @needs_windows - def test_parts_windows(self): - P = self.cls - p = P('c:a/b') - parts = p.parts - self.assertEqual(parts, ('c:', 'a', 'b')) - p = P('c:/a/b') - parts = p.parts - self.assertEqual(parts, ('c:\\', 'a', 'b')) - p = P('//a/b/c/d') - parts = p.parts - self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) - - @needs_windows - def test_parent_windows(self): - # Anchored - P = self.cls - p = P('z:a/b/c') - self.assertEqual(p.parent, P('z:a/b')) - self.assertEqual(p.parent.parent, P('z:a')) - self.assertEqual(p.parent.parent.parent, P('z:')) - self.assertEqual(p.parent.parent.parent.parent, P('z:')) - p = P('z:/a/b/c') - self.assertEqual(p.parent, P('z:/a/b')) - self.assertEqual(p.parent.parent, P('z:/a')) - self.assertEqual(p.parent.parent.parent, P('z:/')) - self.assertEqual(p.parent.parent.parent.parent, P('z:/')) - p = P('//a/b/c/d') - self.assertEqual(p.parent, P('//a/b/c')) - self.assertEqual(p.parent.parent, P('//a/b')) - self.assertEqual(p.parent.parent.parent, P('//a/b')) - - @needs_windows - def test_parents_windows(self): - # Anchored - P = self.cls - p = P('z:a/b/') - par = p.parents - self.assertEqual(len(par), 2) - self.assertEqual(par[0], P('z:a')) - self.assertEqual(par[1], P('z:')) - self.assertEqual(par[0:1], (P('z:a'),)) - self.assertEqual(par[:-1], (P('z:a'),)) - self.assertEqual(par[:2], (P('z:a'), P('z:'))) - self.assertEqual(par[1:], (P('z:'),)) - self.assertEqual(par[::2], (P('z:a'),)) - self.assertEqual(par[::-1], (P('z:'), P('z:a'))) - self.assertEqual(list(par), [P('z:a'), P('z:')]) - with self.assertRaises(IndexError): - par[2] - p = P('z:/a/b/') - par = p.parents - self.assertEqual(len(par), 2) - self.assertEqual(par[0], P('z:/a')) - self.assertEqual(par[1], P('z:/')) - self.assertEqual(par[0:1], (P('z:/a'),)) - self.assertEqual(par[0:-1], (P('z:/a'),)) - self.assertEqual(par[:2], (P('z:/a'), P('z:/'))) - self.assertEqual(par[1:], (P('z:/'),)) - self.assertEqual(par[::2], (P('z:/a'),)) - self.assertEqual(par[::-1], (P('z:/'), P('z:/a'),)) - self.assertEqual(list(par), [P('z:/a'), P('z:/')]) - with self.assertRaises(IndexError): - par[2] - p = P('//a/b/c/d') - par = p.parents - self.assertEqual(len(par), 2) - self.assertEqual(par[0], P('//a/b/c')) - self.assertEqual(par[1], P('//a/b')) - self.assertEqual(par[0:1], (P('//a/b/c'),)) - self.assertEqual(par[0:-1], (P('//a/b/c'),)) - self.assertEqual(par[:2], (P('//a/b/c'), P('//a/b'))) - self.assertEqual(par[1:], (P('//a/b'),)) - self.assertEqual(par[::2], (P('//a/b/c'),)) - self.assertEqual(par[::-1], (P('//a/b'), P('//a/b/c'))) - self.assertEqual(list(par), [P('//a/b/c'), P('//a/b')]) - with self.assertRaises(IndexError): - par[2] - - @needs_windows - def test_drive_windows(self): - P = self.cls - self.assertEqual(P('c:').drive, 'c:') - self.assertEqual(P('c:a/b').drive, 'c:') - self.assertEqual(P('c:/').drive, 'c:') - self.assertEqual(P('c:/a/b/').drive, 'c:') - self.assertEqual(P('//a/b').drive, '\\\\a\\b') - self.assertEqual(P('//a/b/').drive, '\\\\a\\b') - self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') - self.assertEqual(P('./c:a').drive, '') - - @needs_windows - def test_root_windows(self): - P = self.cls - self.assertEqual(P('c:').root, '') - self.assertEqual(P('c:a/b').root, '') - self.assertEqual(P('c:/').root, '\\') - self.assertEqual(P('c:/a/b/').root, '\\') - self.assertEqual(P('//a/b').root, '\\') - self.assertEqual(P('//a/b/').root, '\\') - self.assertEqual(P('//a/b/c/d').root, '\\') - - @needs_windows - def test_anchor_windows(self): - P = self.cls - self.assertEqual(P('c:').anchor, 'c:') - self.assertEqual(P('c:a/b').anchor, 'c:') - self.assertEqual(P('c:/').anchor, 'c:\\') - self.assertEqual(P('c:/a/b/').anchor, 'c:\\') - self.assertEqual(P('//a/b').anchor, '\\\\a\\b\\') - self.assertEqual(P('//a/b/').anchor, '\\\\a\\b\\') - self.assertEqual(P('//a/b/c/d').anchor, '\\\\a\\b\\') - - @needs_windows - def test_name_windows(self): - P = self.cls - self.assertEqual(P('c:').name, '') - self.assertEqual(P('c:/').name, '') - self.assertEqual(P('c:a/b').name, 'b') - self.assertEqual(P('c:/a/b').name, 'b') - self.assertEqual(P('c:a/b.py').name, 'b.py') - self.assertEqual(P('c:/a/b.py').name, 'b.py') - self.assertEqual(P('//My.py/Share.php').name, '') - self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') - - @needs_windows - def test_suffix_windows(self): - P = self.cls - self.assertEqual(P('c:').suffix, '') - self.assertEqual(P('c:/').suffix, '') - self.assertEqual(P('c:a/b').suffix, '') - self.assertEqual(P('c:/a/b').suffix, '') - self.assertEqual(P('c:a/b.py').suffix, '.py') - self.assertEqual(P('c:/a/b.py').suffix, '.py') - self.assertEqual(P('c:a/.hgrc').suffix, '') - self.assertEqual(P('c:/a/.hgrc').suffix, '') - self.assertEqual(P('c:a/.hg.rc').suffix, '.rc') - self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') - self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('//My.py/Share.php').suffix, '') - self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') - - @needs_windows - def test_suffixes_windows(self): - P = self.cls - self.assertEqual(P('c:').suffixes, []) - self.assertEqual(P('c:/').suffixes, []) - self.assertEqual(P('c:a/b').suffixes, []) - self.assertEqual(P('c:/a/b').suffixes, []) - self.assertEqual(P('c:a/b.py').suffixes, ['.py']) - self.assertEqual(P('c:/a/b.py').suffixes, ['.py']) - self.assertEqual(P('c:a/.hgrc').suffixes, []) - self.assertEqual(P('c:/a/.hgrc').suffixes, []) - self.assertEqual(P('c:a/.hg.rc').suffixes, ['.rc']) - self.assertEqual(P('c:/a/.hg.rc').suffixes, ['.rc']) - self.assertEqual(P('c:a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('//My.py/Share.php').suffixes, []) - self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) - - @needs_windows - def test_stem_windows(self): - P = self.cls - self.assertEqual(P('c:').stem, '') - self.assertEqual(P('c:.').stem, '') - self.assertEqual(P('c:..').stem, '..') - self.assertEqual(P('c:/').stem, '') - self.assertEqual(P('c:a/b').stem, 'b') - self.assertEqual(P('c:a/b.py').stem, 'b') - self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') - self.assertEqual(P('c:a/.hg.rc').stem, '.hg') - self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') - self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, - 'Some name. Ending with a dot.') - - @needs_windows - def test_with_name_windows(self): - P = self.cls - self.assertEqual(P('c:a/b').with_name('d.xml'), P('c:a/d.xml')) - self.assertEqual(P('c:/a/b').with_name('d.xml'), P('c:/a/d.xml')) - self.assertEqual(P('c:a/Dot ending.').with_name('d.xml'), P('c:a/d.xml')) - self.assertEqual(P('c:/a/Dot ending.').with_name('d.xml'), P('c:/a/d.xml')) - self.assertRaises(ValueError, P('c:').with_name, 'd.xml') - self.assertRaises(ValueError, P('c:/').with_name, 'd.xml') - self.assertRaises(ValueError, P('//My/Share').with_name, 'd.xml') - self.assertEqual(str(P('a').with_name('d:')), '.\\d:') - self.assertEqual(str(P('a').with_name('d:e')), '.\\d:e') - self.assertEqual(P('c:a/b').with_name('d:'), P('c:a/d:')) - self.assertEqual(P('c:a/b').with_name('d:e'), P('c:a/d:e')) - self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') - self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') - - @needs_windows - def test_with_stem_windows(self): - P = self.cls - self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) - self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) - self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) - self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) - self.assertRaises(ValueError, P('c:').with_stem, 'd') - self.assertRaises(ValueError, P('c:/').with_stem, 'd') - self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') - self.assertEqual(str(P('a').with_stem('d:')), '.\\d:') - self.assertEqual(str(P('a').with_stem('d:e')), '.\\d:e') - self.assertEqual(P('c:a/b').with_stem('d:'), P('c:a/d:')) - self.assertEqual(P('c:a/b').with_stem('d:e'), P('c:a/d:e')) - self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') - self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') - - @needs_windows - def test_with_suffix_windows(self): - P = self.cls - self.assertEqual(P('c:a/b').with_suffix('.gz'), P('c:a/b.gz')) - self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) - self.assertEqual(P('c:a/b.py').with_suffix('.gz'), P('c:a/b.gz')) - self.assertEqual(P('c:/a/b.py').with_suffix('.gz'), P('c:/a/b.gz')) - # Path doesn't have a "filename" component. - self.assertRaises(ValueError, P('').with_suffix, '.gz') - self.assertRaises(ValueError, P('.').with_suffix, '.gz') - self.assertRaises(ValueError, P('/').with_suffix, '.gz') - self.assertRaises(ValueError, P('//My/Share').with_suffix, '.gz') - # Invalid suffix. - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '/') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '/.gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\.gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:.gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c/d') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c\\d') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c/d') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c\\d') - - @needs_windows - def test_relative_to_windows(self): - P = self.cls - p = P('C:Foo/Bar') - self.assertEqual(p.relative_to(P('c:')), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:'), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:foO')), P('Bar')) - self.assertEqual(p.relative_to('c:foO'), P('Bar')) - self.assertEqual(p.relative_to('c:foO/'), P('Bar')) - self.assertEqual(p.relative_to(P('c:foO/baR')), P()) - self.assertEqual(p.relative_to('c:foO/baR'), P()) - self.assertEqual(p.relative_to(P('c:'), walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:foO'), walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:foO', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:foO/', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to(P('c:foO/baR'), walk_up=True), P()) - self.assertEqual(p.relative_to('c:foO/baR', walk_up=True), P()) - self.assertEqual(p.relative_to(P('C:Foo/Bar/Baz'), walk_up=True), P('..')) - self.assertEqual(p.relative_to(P('C:Foo/Baz'), walk_up=True), P('../Bar')) - self.assertEqual(p.relative_to(P('C:Baz/Bar'), walk_up=True), P('../../Foo/Bar')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, P()) - self.assertRaises(ValueError, p.relative_to, '') - self.assertRaises(ValueError, p.relative_to, P('d:')) - self.assertRaises(ValueError, p.relative_to, P('/')) - self.assertRaises(ValueError, p.relative_to, P('Foo')) - self.assertRaises(ValueError, p.relative_to, P('/Foo')) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo')) - self.assertRaises(ValueError, p.relative_to, P('C:Foo/Bar/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:Foo/Baz')) - self.assertRaises(ValueError, p.relative_to, P(), walk_up=True) - self.assertRaises(ValueError, p.relative_to, '', walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo'), walk_up=True) - p = P('C:/Foo/Bar') - self.assertEqual(p.relative_to(P('c:/')), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:/'), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:/foO')), P('Bar')) - self.assertEqual(p.relative_to('c:/foO'), P('Bar')) - self.assertEqual(p.relative_to('c:/foO/'), P('Bar')) - self.assertEqual(p.relative_to(P('c:/foO/baR')), P()) - self.assertEqual(p.relative_to('c:/foO/baR'), P()) - self.assertEqual(p.relative_to(P('c:/'), walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:/', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:/foO'), walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:/foO', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:/foO/', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to(P('c:/foO/baR'), walk_up=True), P()) - self.assertEqual(p.relative_to('c:/foO/baR', walk_up=True), P()) - self.assertEqual(p.relative_to('C:/Baz', walk_up=True), P('../Foo/Bar')) - self.assertEqual(p.relative_to('C:/Foo/Bar/Baz', walk_up=True), P('..')) - self.assertEqual(p.relative_to('C:/Foo/Baz', walk_up=True), P('../Bar')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, 'c:') - self.assertRaises(ValueError, p.relative_to, P('c:')) - self.assertRaises(ValueError, p.relative_to, P('C:/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Bar/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:Foo')) - self.assertRaises(ValueError, p.relative_to, P('d:')) - self.assertRaises(ValueError, p.relative_to, P('d:/')) - self.assertRaises(ValueError, p.relative_to, P('/')) - self.assertRaises(ValueError, p.relative_to, P('/Foo')) - self.assertRaises(ValueError, p.relative_to, P('//C/Foo')) - self.assertRaises(ValueError, p.relative_to, 'c:', walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('c:'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('C:Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('d:/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('//C/Foo'), walk_up=True) - # UNC paths. - p = P('//Server/Share/Foo/Bar') - self.assertEqual(p.relative_to(P('//sErver/sHare')), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare'), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/'), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo')), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo'), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/'), P('Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar')), P()) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar'), P()) - self.assertEqual(p.relative_to(P('//sErver/sHare'), walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo'), walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar'), walk_up=True), P()) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar', walk_up=True), P()) - self.assertEqual(p.relative_to(P('//sErver/sHare/bar'), walk_up=True), P('../Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/bar', walk_up=True), P('../Foo/Bar')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo')) - self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo')) - self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo')) - self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo')) - self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo'), walk_up=True) - - @needs_windows - def test_is_relative_to_windows(self): - P = self.cls - p = P('C:Foo/Bar') - self.assertTrue(p.is_relative_to(P('c:'))) - self.assertTrue(p.is_relative_to('c:')) - self.assertTrue(p.is_relative_to(P('c:foO'))) - self.assertTrue(p.is_relative_to('c:foO')) - self.assertTrue(p.is_relative_to('c:foO/')) - self.assertTrue(p.is_relative_to(P('c:foO/baR'))) - self.assertTrue(p.is_relative_to('c:foO/baR')) - # Unrelated paths. - self.assertFalse(p.is_relative_to(P())) - self.assertFalse(p.is_relative_to('')) - self.assertFalse(p.is_relative_to(P('d:'))) - self.assertFalse(p.is_relative_to(P('/'))) - self.assertFalse(p.is_relative_to(P('Foo'))) - self.assertFalse(p.is_relative_to(P('/Foo'))) - self.assertFalse(p.is_relative_to(P('C:/Foo'))) - self.assertFalse(p.is_relative_to(P('C:Foo/Bar/Baz'))) - self.assertFalse(p.is_relative_to(P('C:Foo/Baz'))) - p = P('C:/Foo/Bar') - self.assertTrue(p.is_relative_to(P('c:/'))) - self.assertTrue(p.is_relative_to(P('c:/foO'))) - self.assertTrue(p.is_relative_to('c:/foO/')) - self.assertTrue(p.is_relative_to(P('c:/foO/baR'))) - self.assertTrue(p.is_relative_to('c:/foO/baR')) - # Unrelated paths. - self.assertFalse(p.is_relative_to('c:')) - self.assertFalse(p.is_relative_to(P('C:/Baz'))) - self.assertFalse(p.is_relative_to(P('C:/Foo/Bar/Baz'))) - self.assertFalse(p.is_relative_to(P('C:/Foo/Baz'))) - self.assertFalse(p.is_relative_to(P('C:Foo'))) - self.assertFalse(p.is_relative_to(P('d:'))) - self.assertFalse(p.is_relative_to(P('d:/'))) - self.assertFalse(p.is_relative_to(P('/'))) - self.assertFalse(p.is_relative_to(P('/Foo'))) - self.assertFalse(p.is_relative_to(P('//C/Foo'))) - # UNC paths. - p = P('//Server/Share/Foo/Bar') - self.assertTrue(p.is_relative_to(P('//sErver/sHare'))) - self.assertTrue(p.is_relative_to('//sErver/sHare')) - self.assertTrue(p.is_relative_to('//sErver/sHare/')) - self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo'))) - self.assertTrue(p.is_relative_to('//sErver/sHare/Foo')) - self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/')) - self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo/Bar'))) - self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/Bar')) - # Unrelated paths. - self.assertFalse(p.is_relative_to(P('/Server/Share/Foo'))) - self.assertFalse(p.is_relative_to(P('c:/Server/Share/Foo'))) - self.assertFalse(p.is_relative_to(P('//z/Share/Foo'))) - self.assertFalse(p.is_relative_to(P('//Server/z/Foo'))) - - @needs_windows - def test_is_absolute_windows(self): - P = self.cls - # Under NT, only paths with both a drive and a root are absolute. - self.assertFalse(P().is_absolute()) - self.assertFalse(P('a').is_absolute()) - self.assertFalse(P('a/b/').is_absolute()) - self.assertFalse(P('/').is_absolute()) - self.assertFalse(P('/a').is_absolute()) - self.assertFalse(P('/a/b/').is_absolute()) - self.assertFalse(P('c:').is_absolute()) - self.assertFalse(P('c:a').is_absolute()) - self.assertFalse(P('c:a/b/').is_absolute()) - self.assertTrue(P('c:/').is_absolute()) - self.assertTrue(P('c:/a').is_absolute()) - self.assertTrue(P('c:/a/b/').is_absolute()) - # UNC paths are absolute by definition. - self.assertTrue(P('//').is_absolute()) - self.assertTrue(P('//a').is_absolute()) - self.assertTrue(P('//a/b').is_absolute()) - self.assertTrue(P('//a/b/').is_absolute()) - self.assertTrue(P('//a/b/c').is_absolute()) - self.assertTrue(P('//a/b/c/d').is_absolute()) - self.assertTrue(P('//?/UNC/').is_absolute()) - self.assertTrue(P('//?/UNC/spam').is_absolute()) - - @needs_windows - def test_join_windows(self): - P = self.cls - p = P('C:/a/b') - pp = p.joinpath('x/y') - self.assertEqual(pp, P('C:/a/b/x/y')) - pp = p.joinpath('/x/y') - self.assertEqual(pp, P('C:/x/y')) - # Joining with a different drive => the first path is ignored, even - # if the second path is relative. - pp = p.joinpath('D:x/y') - self.assertEqual(pp, P('D:x/y')) - pp = p.joinpath('D:/x/y') - self.assertEqual(pp, P('D:/x/y')) - pp = p.joinpath('//host/share/x/y') - self.assertEqual(pp, P('//host/share/x/y')) - # Joining with the same drive => the first path is appended to if - # the second path is relative. - pp = p.joinpath('c:x/y') - self.assertEqual(pp, P('C:/a/b/x/y')) - pp = p.joinpath('c:/x/y') - self.assertEqual(pp, P('C:/x/y')) - # Joining with files with NTFS data streams => the filename should - # not be parsed as a drive letter - pp = p.joinpath(P('./d:s')) - self.assertEqual(pp, P('C:/a/b/d:s')) - pp = p.joinpath(P('./dd:s')) - self.assertEqual(pp, P('C:/a/b/dd:s')) - pp = p.joinpath(P('E:d:s')) - self.assertEqual(pp, P('E:d:s')) - # Joining onto a UNC path with no root - pp = P('//').joinpath('server') - self.assertEqual(pp, P('//server')) - pp = P('//server').joinpath('share') - self.assertEqual(pp, P('//server/share')) - pp = P('//./BootPartition').joinpath('Windows') - self.assertEqual(pp, P('//./BootPartition/Windows')) - - @needs_windows - def test_div_windows(self): - # Basically the same as joinpath(). - P = self.cls - p = P('C:/a/b') - self.assertEqual(p / 'x/y', P('C:/a/b/x/y')) - self.assertEqual(p / 'x' / 'y', P('C:/a/b/x/y')) - self.assertEqual(p / '/x/y', P('C:/x/y')) - self.assertEqual(p / '/x' / 'y', P('C:/x/y')) - # Joining with a different drive => the first path is ignored, even - # if the second path is relative. - self.assertEqual(p / 'D:x/y', P('D:x/y')) - self.assertEqual(p / 'D:' / 'x/y', P('D:x/y')) - self.assertEqual(p / 'D:/x/y', P('D:/x/y')) - self.assertEqual(p / 'D:' / '/x/y', P('D:/x/y')) - self.assertEqual(p / '//host/share/x/y', P('//host/share/x/y')) - # Joining with the same drive => the first path is appended to if - # the second path is relative. - self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y')) - self.assertEqual(p / 'c:/x/y', P('C:/x/y')) - # Joining with files with NTFS data streams => the filename should - # not be parsed as a drive letter - self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s')) - self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s')) - self.assertEqual(p / P('E:d:s'), P('E:d:s')) - class PurePosixPathTest(PurePathTest): cls = pathlib.PurePosixPath @@ -1943,24 +1352,6 @@ def test_touch_mode(self): st = os.stat(self.pathmod.join(self.base, 'masked_new_file')) self.assertEqual(stat.S_IMODE(st.st_mode), 0o750) - @needs_posix - def test_glob_posix(self): - P = self.cls - p = P(self.base) - given = set(p.glob("FILEa")) - expect = set() if not os_helper.fs_is_case_insensitive(self.base) else given - self.assertEqual(given, expect) - self.assertEqual(set(p.glob("FILEa*")), set()) - - @needs_posix - def test_rglob_posix(self): - P = self.cls - p = P(self.base, "dirC") - given = set(p.rglob("FILEd")) - expect = set() if not os_helper.fs_is_case_insensitive(self.base) else given - self.assertEqual(given, expect) - self.assertEqual(set(p.rglob("FILEd*")), set()) - @unittest.skipUnless(hasattr(pwd, 'getpwall'), 'pwd module does not expose getpwall()') @unittest.skipIf(sys.platform == "vxworks", @@ -2109,24 +1500,6 @@ def test_absolute_windows(self): self.assertEqual(str(P(other_drive).absolute()), other_cwd) self.assertEqual(str(P(other_drive + 'foo').absolute()), other_cwd + '\\foo') - @needs_windows - def test_glob_windows(self): - P = self.cls - p = P(self.base) - self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") }) - self.assertEqual(set(p.glob("*a\\")), { P(self.base, "dirA/") }) - self.assertEqual(set(p.glob("F*a")), { P(self.base, "fileA") }) - self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) - self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) - - @needs_windows - def test_rglob_windows(self): - P = self.cls - p = P(self.base, "dirC") - self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) - self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) - self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) - @needs_windows def test_expanduser_windows(self): P = self.cls diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index b19e9b40419c7a..ea70931eaa2c7e 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -174,6 +174,19 @@ def test_str_subclass_common(self): self._check_str_subclass('a/b.txt') self._check_str_subclass('/a/b.txt') + @needs_windows + def test_str_subclass_windows(self): + self._check_str_subclass('.\\a:b') + self._check_str_subclass('c:') + self._check_str_subclass('c:a') + self._check_str_subclass('c:a\\b.txt') + self._check_str_subclass('c:\\') + self._check_str_subclass('c:\\a') + self._check_str_subclass('c:\\a\\b.txt') + self._check_str_subclass('\\\\some\\share') + self._check_str_subclass('\\\\some\\share\\a') + self._check_str_subclass('\\\\some\\share\\a\\b.txt') + def test_with_segments_common(self): class P(self.cls): def __init__(self, *pathsegments, session_id): @@ -206,6 +219,55 @@ def test_join_common(self): pp = p.joinpath('/c') self.assertEqual(pp, P('/c')) + @needs_posix + def test_join_posix(self): + P = self.cls + p = P('//a') + pp = p.joinpath('b') + self.assertEqual(pp, P('//a/b')) + pp = P('/a').joinpath('//c') + self.assertEqual(pp, P('//c')) + pp = P('//a').joinpath('/c') + self.assertEqual(pp, P('/c')) + + @needs_windows + def test_join_windows(self): + P = self.cls + p = P('C:/a/b') + pp = p.joinpath('x/y') + self.assertEqual(pp, P('C:/a/b/x/y')) + pp = p.joinpath('/x/y') + self.assertEqual(pp, P('C:/x/y')) + # Joining with a different drive => the first path is ignored, even + # if the second path is relative. + pp = p.joinpath('D:x/y') + self.assertEqual(pp, P('D:x/y')) + pp = p.joinpath('D:/x/y') + self.assertEqual(pp, P('D:/x/y')) + pp = p.joinpath('//host/share/x/y') + self.assertEqual(pp, P('//host/share/x/y')) + # Joining with the same drive => the first path is appended to if + # the second path is relative. + pp = p.joinpath('c:x/y') + self.assertEqual(pp, P('C:/a/b/x/y')) + pp = p.joinpath('c:/x/y') + self.assertEqual(pp, P('C:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + pp = p.joinpath(P('./d:s')) + self.assertEqual(pp, P('C:/a/b/d:s')) + pp = p.joinpath(P('./dd:s')) + self.assertEqual(pp, P('C:/a/b/dd:s')) + pp = p.joinpath(P('E:d:s')) + self.assertEqual(pp, P('E:d:s')) + # Joining onto a UNC path with no root + pp = P('//').joinpath('server') + self.assertEqual(pp, P('//server')) + pp = P('//server').joinpath('share') + self.assertEqual(pp, P('//server/share')) + pp = P('//./BootPartition').joinpath('Windows') + self.assertEqual(pp, P('//./BootPartition/Windows')) + def test_div_common(self): # Basically the same as joinpath(). P = self.cls @@ -222,6 +284,44 @@ def test_div_common(self): pp = p/ '/c' self.assertEqual(pp, P('/c')) + @needs_posix + def test_div_posix(self): + # Basically the same as joinpath(). + P = self.cls + p = P('//a') + pp = p / 'b' + self.assertEqual(pp, P('//a/b')) + pp = P('/a') / '//c' + self.assertEqual(pp, P('//c')) + pp = P('//a') / '/c' + self.assertEqual(pp, P('/c')) + + @needs_windows + def test_div_windows(self): + # Basically the same as joinpath(). + P = self.cls + p = P('C:/a/b') + self.assertEqual(p / 'x/y', P('C:/a/b/x/y')) + self.assertEqual(p / 'x' / 'y', P('C:/a/b/x/y')) + self.assertEqual(p / '/x/y', P('C:/x/y')) + self.assertEqual(p / '/x' / 'y', P('C:/x/y')) + # Joining with a different drive => the first path is ignored, even + # if the second path is relative. + self.assertEqual(p / 'D:x/y', P('D:x/y')) + self.assertEqual(p / 'D:' / 'x/y', P('D:x/y')) + self.assertEqual(p / 'D:/x/y', P('D:/x/y')) + self.assertEqual(p / 'D:' / '/x/y', P('D:/x/y')) + self.assertEqual(p / '//host/share/x/y', P('//host/share/x/y')) + # Joining with the same drive => the first path is appended to if + # the second path is relative. + self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y')) + self.assertEqual(p / 'c:/x/y', P('C:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s')) + self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s')) + self.assertEqual(p / P('E:d:s'), P('E:d:s')) + def _check_str(self, expected, args): p = self.cls(*args) self.assertEqual(str(p), expected.replace('/', self.sep)) @@ -232,6 +332,19 @@ def test_str_common(self): self._check_str(pathstr, (pathstr,)) # Other tests for str() are in test_equivalences(). + @needs_windows + def test_str_windows(self): + p = self.cls('a/b/c') + self.assertEqual(str(p), 'a\\b\\c') + p = self.cls('c:/a/b/c') + self.assertEqual(str(p), 'c:\\a\\b\\c') + p = self.cls('//a/b') + self.assertEqual(str(p), '\\\\a\\b\\') + p = self.cls('//a/b/c') + self.assertEqual(str(p), '\\\\a\\b\\c') + p = self.cls('//a/b/c/d') + self.assertEqual(str(p), '\\\\a\\b\\c\\d') + def test_as_posix_common(self): P = self.cls for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): @@ -287,6 +400,39 @@ def test_match_common(self): self.assertFalse(P('').match('**')) self.assertFalse(P('').match('**/*')) + @needs_posix + def test_match_posix(self): + P = self.cls + self.assertFalse(P('A.py').match('a.PY')) + + @needs_windows + def test_match_windows(self): + P = self.cls + # Absolute patterns. + self.assertTrue(P('c:/b.py').match('*:/*.py')) + self.assertTrue(P('c:/b.py').match('c:/*.py')) + self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive + self.assertFalse(P('b.py').match('/*.py')) + self.assertFalse(P('b.py').match('c:*.py')) + self.assertFalse(P('b.py').match('c:/*.py')) + self.assertFalse(P('c:b.py').match('/*.py')) + self.assertFalse(P('c:b.py').match('c:/*.py')) + self.assertFalse(P('/b.py').match('c:*.py')) + self.assertFalse(P('/b.py').match('c:/*.py')) + # UNC patterns. + self.assertTrue(P('//some/share/a.py').match('//*/*/*.py')) + self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) + # Case-insensitivity. + self.assertTrue(P('B.py').match('b.PY')) + self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) + self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) + # Path anchor doesn't match pattern anchor + self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' + self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' + self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' + def test_full_match_common(self): P = self.cls # Simple relative pattern. @@ -372,6 +518,19 @@ def test_parts_common(self): parts = p.parts self.assertEqual(parts, (sep, 'a', 'b')) + @needs_windows + def test_parts_windows(self): + P = self.cls + p = P('c:a/b') + parts = p.parts + self.assertEqual(parts, ('c:', 'a', 'b')) + p = P('c:/a/b') + parts = p.parts + self.assertEqual(parts, ('c:\\', 'a', 'b')) + p = P('//a/b/c/d') + parts = p.parts + self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) + def test_parent_common(self): # Relative P = self.cls @@ -387,6 +546,25 @@ def test_parent_common(self): self.assertEqual(p.parent.parent.parent, P('/')) self.assertEqual(p.parent.parent.parent.parent, P('/')) + @needs_windows + def test_parent_windows(self): + # Anchored + P = self.cls + p = P('z:a/b/c') + self.assertEqual(p.parent, P('z:a/b')) + self.assertEqual(p.parent.parent, P('z:a')) + self.assertEqual(p.parent.parent.parent, P('z:')) + self.assertEqual(p.parent.parent.parent.parent, P('z:')) + p = P('z:/a/b/c') + self.assertEqual(p.parent, P('z:/a/b')) + self.assertEqual(p.parent.parent, P('z:/a')) + self.assertEqual(p.parent.parent.parent, P('z:/')) + self.assertEqual(p.parent.parent.parent.parent, P('z:/')) + p = P('//a/b/c/d') + self.assertEqual(p.parent, P('//a/b/c')) + self.assertEqual(p.parent.parent, P('//a/b')) + self.assertEqual(p.parent.parent.parent, P('//a/b')) + def test_parents_common(self): # Relative P = self.cls @@ -434,12 +612,71 @@ def test_parents_common(self): with self.assertRaises(IndexError): par[3] + @needs_windows + def test_parents_windows(self): + # Anchored + P = self.cls + p = P('z:a/b/') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('z:a')) + self.assertEqual(par[1], P('z:')) + self.assertEqual(par[0:1], (P('z:a'),)) + self.assertEqual(par[:-1], (P('z:a'),)) + self.assertEqual(par[:2], (P('z:a'), P('z:'))) + self.assertEqual(par[1:], (P('z:'),)) + self.assertEqual(par[::2], (P('z:a'),)) + self.assertEqual(par[::-1], (P('z:'), P('z:a'))) + self.assertEqual(list(par), [P('z:a'), P('z:')]) + with self.assertRaises(IndexError): + par[2] + p = P('z:/a/b/') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('z:/a')) + self.assertEqual(par[1], P('z:/')) + self.assertEqual(par[0:1], (P('z:/a'),)) + self.assertEqual(par[0:-1], (P('z:/a'),)) + self.assertEqual(par[:2], (P('z:/a'), P('z:/'))) + self.assertEqual(par[1:], (P('z:/'),)) + self.assertEqual(par[::2], (P('z:/a'),)) + self.assertEqual(par[::-1], (P('z:/'), P('z:/a'),)) + self.assertEqual(list(par), [P('z:/a'), P('z:/')]) + with self.assertRaises(IndexError): + par[2] + p = P('//a/b/c/d') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('//a/b/c')) + self.assertEqual(par[1], P('//a/b')) + self.assertEqual(par[0:1], (P('//a/b/c'),)) + self.assertEqual(par[0:-1], (P('//a/b/c'),)) + self.assertEqual(par[:2], (P('//a/b/c'), P('//a/b'))) + self.assertEqual(par[1:], (P('//a/b'),)) + self.assertEqual(par[::2], (P('//a/b/c'),)) + self.assertEqual(par[::-1], (P('//a/b'), P('//a/b/c'))) + self.assertEqual(list(par), [P('//a/b/c'), P('//a/b')]) + with self.assertRaises(IndexError): + par[2] + def test_drive_common(self): P = self.cls self.assertEqual(P('a/b').drive, '') self.assertEqual(P('/a/b').drive, '') self.assertEqual(P('').drive, '') + @needs_windows + def test_drive_windows(self): + P = self.cls + self.assertEqual(P('c:').drive, 'c:') + self.assertEqual(P('c:a/b').drive, 'c:') + self.assertEqual(P('c:/').drive, 'c:') + self.assertEqual(P('c:/a/b/').drive, 'c:') + self.assertEqual(P('//a/b').drive, '\\\\a\\b') + self.assertEqual(P('//a/b/').drive, '\\\\a\\b') + self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') + self.assertEqual(P('./c:a').drive, '') + def test_root_common(self): P = self.cls sep = self.sep @@ -448,6 +685,24 @@ def test_root_common(self): self.assertEqual(P('/').root, sep) self.assertEqual(P('/a/b').root, sep) + @needs_posix + def test_root_posix(self): + P = self.cls + self.assertEqual(P('/a/b').root, '/') + # POSIX special case for two leading slashes. + self.assertEqual(P('//a/b').root, '//') + + @needs_windows + def test_root_windows(self): + P = self.cls + self.assertEqual(P('c:').root, '') + self.assertEqual(P('c:a/b').root, '') + self.assertEqual(P('c:/').root, '\\') + self.assertEqual(P('c:/a/b/').root, '\\') + self.assertEqual(P('//a/b').root, '\\') + self.assertEqual(P('//a/b/').root, '\\') + self.assertEqual(P('//a/b/c/d').root, '\\') + def test_anchor_common(self): P = self.cls sep = self.sep @@ -456,6 +711,17 @@ def test_anchor_common(self): self.assertEqual(P('/').anchor, sep) self.assertEqual(P('/a/b').anchor, sep) + @needs_windows + def test_anchor_windows(self): + P = self.cls + self.assertEqual(P('c:').anchor, 'c:') + self.assertEqual(P('c:a/b').anchor, 'c:') + self.assertEqual(P('c:/').anchor, 'c:\\') + self.assertEqual(P('c:/a/b/').anchor, 'c:\\') + self.assertEqual(P('//a/b').anchor, '\\\\a\\b\\') + self.assertEqual(P('//a/b/').anchor, '\\\\a\\b\\') + self.assertEqual(P('//a/b/c/d').anchor, '\\\\a\\b\\') + def test_name_empty(self): P = self.cls self.assertEqual(P('').name, '') @@ -470,6 +736,18 @@ def test_name_common(self): self.assertEqual(P('a/b.py').name, 'b.py') self.assertEqual(P('/a/b.py').name, 'b.py') + @needs_windows + def test_name_windows(self): + P = self.cls + self.assertEqual(P('c:').name, '') + self.assertEqual(P('c:/').name, '') + self.assertEqual(P('c:a/b').name, 'b') + self.assertEqual(P('c:/a/b').name, 'b') + self.assertEqual(P('c:a/b.py').name, 'b.py') + self.assertEqual(P('c:/a/b.py').name, 'b.py') + self.assertEqual(P('//My.py/Share.php').name, '') + self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') + def test_suffix_common(self): P = self.cls self.assertEqual(P('').suffix, '') @@ -490,6 +768,26 @@ def test_suffix_common(self): self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') + @needs_windows + def test_suffix_windows(self): + P = self.cls + self.assertEqual(P('c:').suffix, '') + self.assertEqual(P('c:/').suffix, '') + self.assertEqual(P('c:a/b').suffix, '') + self.assertEqual(P('c:/a/b').suffix, '') + self.assertEqual(P('c:a/b.py').suffix, '.py') + self.assertEqual(P('c:/a/b.py').suffix, '.py') + self.assertEqual(P('c:a/.hgrc').suffix, '') + self.assertEqual(P('c:/a/.hgrc').suffix, '') + self.assertEqual(P('c:a/.hg.rc').suffix, '.rc') + self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') + self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('//My.py/Share.php').suffix, '') + self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') + def test_suffixes_common(self): P = self.cls self.assertEqual(P('').suffixes, []) @@ -509,6 +807,26 @@ def test_suffixes_common(self): self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) + @needs_windows + def test_suffixes_windows(self): + P = self.cls + self.assertEqual(P('c:').suffixes, []) + self.assertEqual(P('c:/').suffixes, []) + self.assertEqual(P('c:a/b').suffixes, []) + self.assertEqual(P('c:/a/b').suffixes, []) + self.assertEqual(P('c:a/b.py').suffixes, ['.py']) + self.assertEqual(P('c:/a/b.py').suffixes, ['.py']) + self.assertEqual(P('c:a/.hgrc').suffixes, []) + self.assertEqual(P('c:/a/.hgrc').suffixes, []) + self.assertEqual(P('c:a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('c:/a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('c:a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('//My.py/Share.php').suffixes, []) + self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) + self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) + def test_stem_empty(self): P = self.cls self.assertEqual(P('').stem, '') @@ -526,6 +844,20 @@ def test_stem_common(self): self.assertEqual(P('a/Some name. Ending with a dot.').stem, 'Some name. Ending with a dot.') + @needs_windows + def test_stem_windows(self): + P = self.cls + self.assertEqual(P('c:').stem, '') + self.assertEqual(P('c:.').stem, '') + self.assertEqual(P('c:..').stem, '..') + self.assertEqual(P('c:/').stem, '') + self.assertEqual(P('c:a/b').stem, 'b') + self.assertEqual(P('c:a/b.py').stem, 'b') + self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') + self.assertEqual(P('c:a/.hg.rc').stem, '.hg') + self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') + self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, + 'Some name. Ending with a dot.') def test_with_name_common(self): P = self.cls self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) @@ -535,6 +867,23 @@ def test_with_name_common(self): self.assertEqual(P('a/Dot ending.').with_name('d.xml'), P('a/d.xml')) self.assertEqual(P('/a/Dot ending.').with_name('d.xml'), P('/a/d.xml')) + @needs_windows + def test_with_name_windows(self): + P = self.cls + self.assertEqual(P('c:a/b').with_name('d.xml'), P('c:a/d.xml')) + self.assertEqual(P('c:/a/b').with_name('d.xml'), P('c:/a/d.xml')) + self.assertEqual(P('c:a/Dot ending.').with_name('d.xml'), P('c:a/d.xml')) + self.assertEqual(P('c:/a/Dot ending.').with_name('d.xml'), P('c:/a/d.xml')) + self.assertRaises(ValueError, P('c:').with_name, 'd.xml') + self.assertRaises(ValueError, P('c:/').with_name, 'd.xml') + self.assertRaises(ValueError, P('//My/Share').with_name, 'd.xml') + self.assertEqual(str(P('a').with_name('d:')), '.\\d:') + self.assertEqual(str(P('a').with_name('d:e')), '.\\d:e') + self.assertEqual(P('c:a/b').with_name('d:'), P('c:a/d:')) + self.assertEqual(P('c:a/b').with_name('d:e'), P('c:a/d:e')) + self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') + self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') + def test_with_name_empty(self): P = self.cls self.assertEqual(P('').with_name('d.xml'), P('d.xml')) @@ -559,6 +908,23 @@ def test_with_stem_common(self): self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d')) self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d')) + @needs_windows + def test_with_stem_windows(self): + P = self.cls + self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) + self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) + self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) + self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) + self.assertRaises(ValueError, P('c:').with_stem, 'd') + self.assertRaises(ValueError, P('c:/').with_stem, 'd') + self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') + self.assertEqual(str(P('a').with_stem('d:')), '.\\d:') + self.assertEqual(str(P('a').with_stem('d:e')), '.\\d:e') + self.assertEqual(P('c:a/b').with_stem('d:'), P('c:a/d:')) + self.assertEqual(P('c:a/b').with_stem('d:e'), P('c:a/d:e')) + self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') + self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') + def test_with_stem_empty(self): P = self.cls self.assertEqual(P('').with_stem('d'), P('d')) @@ -583,6 +949,31 @@ def test_with_suffix_common(self): self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) + @needs_windows + def test_with_suffix_windows(self): + P = self.cls + self.assertEqual(P('c:a/b').with_suffix('.gz'), P('c:a/b.gz')) + self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) + self.assertEqual(P('c:a/b.py').with_suffix('.gz'), P('c:a/b.gz')) + self.assertEqual(P('c:/a/b.py').with_suffix('.gz'), P('c:/a/b.gz')) + # Path doesn't have a "filename" component. + self.assertRaises(ValueError, P('').with_suffix, '.gz') + self.assertRaises(ValueError, P('.').with_suffix, '.gz') + self.assertRaises(ValueError, P('/').with_suffix, '.gz') + self.assertRaises(ValueError, P('//My/Share').with_suffix, '.gz') + # Invalid suffix. + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '/') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '/.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c/d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c\\d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c/d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c\\d') + def test_with_suffix_empty(self): P = self.cls # Path doesn't have a "filename" component. @@ -677,6 +1068,112 @@ def test_relative_to_common(self): self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) + @needs_windows + def test_relative_to_windows(self): + P = self.cls + p = P('C:Foo/Bar') + self.assertEqual(p.relative_to(P('c:')), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:foO')), P('Bar')) + self.assertEqual(p.relative_to('c:foO'), P('Bar')) + self.assertEqual(p.relative_to('c:foO/'), P('Bar')) + self.assertEqual(p.relative_to(P('c:foO/baR')), P()) + self.assertEqual(p.relative_to('c:foO/baR'), P()) + self.assertEqual(p.relative_to(P('c:'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:foO'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:foO', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:foO/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('c:foO/baR'), walk_up=True), P()) + self.assertEqual(p.relative_to('c:foO/baR', walk_up=True), P()) + self.assertEqual(p.relative_to(P('C:Foo/Bar/Baz'), walk_up=True), P('..')) + self.assertEqual(p.relative_to(P('C:Foo/Baz'), walk_up=True), P('../Bar')) + self.assertEqual(p.relative_to(P('C:Baz/Bar'), walk_up=True), P('../../Foo/Bar')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, P()) + self.assertRaises(ValueError, p.relative_to, '') + self.assertRaises(ValueError, p.relative_to, P('d:')) + self.assertRaises(ValueError, p.relative_to, P('/')) + self.assertRaises(ValueError, p.relative_to, P('Foo')) + self.assertRaises(ValueError, p.relative_to, P('/Foo')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo/Bar/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo/Baz')) + self.assertRaises(ValueError, p.relative_to, P(), walk_up=True) + self.assertRaises(ValueError, p.relative_to, '', walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo'), walk_up=True) + p = P('C:/Foo/Bar') + self.assertEqual(p.relative_to(P('c:/')), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:/'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:/foO')), P('Bar')) + self.assertEqual(p.relative_to('c:/foO'), P('Bar')) + self.assertEqual(p.relative_to('c:/foO/'), P('Bar')) + self.assertEqual(p.relative_to(P('c:/foO/baR')), P()) + self.assertEqual(p.relative_to('c:/foO/baR'), P()) + self.assertEqual(p.relative_to(P('c:/'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:/', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:/foO'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:/foO', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:/foO/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('c:/foO/baR'), walk_up=True), P()) + self.assertEqual(p.relative_to('c:/foO/baR', walk_up=True), P()) + self.assertEqual(p.relative_to('C:/Baz', walk_up=True), P('../Foo/Bar')) + self.assertEqual(p.relative_to('C:/Foo/Bar/Baz', walk_up=True), P('..')) + self.assertEqual(p.relative_to('C:/Foo/Baz', walk_up=True), P('../Bar')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, 'c:') + self.assertRaises(ValueError, p.relative_to, P('c:')) + self.assertRaises(ValueError, p.relative_to, P('C:/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Bar/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo')) + self.assertRaises(ValueError, p.relative_to, P('d:')) + self.assertRaises(ValueError, p.relative_to, P('d:/')) + self.assertRaises(ValueError, p.relative_to, P('/')) + self.assertRaises(ValueError, p.relative_to, P('/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//C/Foo')) + self.assertRaises(ValueError, p.relative_to, 'c:', walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('c:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('C:Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//C/Foo'), walk_up=True) + # UNC paths. + p = P('//Server/Share/Foo/Bar') + self.assertEqual(p.relative_to(P('//sErver/sHare')), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare'), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo')), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo'), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/'), P('Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar')), P()) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar'), P()) + self.assertEqual(p.relative_to(P('//sErver/sHare'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar'), walk_up=True), P()) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar', walk_up=True), P()) + self.assertEqual(p.relative_to(P('//sErver/sHare/bar'), walk_up=True), P('../Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/bar', walk_up=True), P('../Foo/Bar')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo')) + self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo'), walk_up=True) + def test_is_relative_to_common(self): P = self.cls p = P('a/b') @@ -709,6 +1206,98 @@ def test_is_relative_to_common(self): self.assertFalse(p.is_relative_to('')) self.assertFalse(p.is_relative_to(P('a'))) + @needs_windows + def test_is_relative_to_windows(self): + P = self.cls + p = P('C:Foo/Bar') + self.assertTrue(p.is_relative_to(P('c:'))) + self.assertTrue(p.is_relative_to('c:')) + self.assertTrue(p.is_relative_to(P('c:foO'))) + self.assertTrue(p.is_relative_to('c:foO')) + self.assertTrue(p.is_relative_to('c:foO/')) + self.assertTrue(p.is_relative_to(P('c:foO/baR'))) + self.assertTrue(p.is_relative_to('c:foO/baR')) + # Unrelated paths. + self.assertFalse(p.is_relative_to(P())) + self.assertFalse(p.is_relative_to('')) + self.assertFalse(p.is_relative_to(P('d:'))) + self.assertFalse(p.is_relative_to(P('/'))) + self.assertFalse(p.is_relative_to(P('Foo'))) + self.assertFalse(p.is_relative_to(P('/Foo'))) + self.assertFalse(p.is_relative_to(P('C:/Foo'))) + self.assertFalse(p.is_relative_to(P('C:Foo/Bar/Baz'))) + self.assertFalse(p.is_relative_to(P('C:Foo/Baz'))) + p = P('C:/Foo/Bar') + self.assertTrue(p.is_relative_to(P('c:/'))) + self.assertTrue(p.is_relative_to(P('c:/foO'))) + self.assertTrue(p.is_relative_to('c:/foO/')) + self.assertTrue(p.is_relative_to(P('c:/foO/baR'))) + self.assertTrue(p.is_relative_to('c:/foO/baR')) + # Unrelated paths. + self.assertFalse(p.is_relative_to('c:')) + self.assertFalse(p.is_relative_to(P('C:/Baz'))) + self.assertFalse(p.is_relative_to(P('C:/Foo/Bar/Baz'))) + self.assertFalse(p.is_relative_to(P('C:/Foo/Baz'))) + self.assertFalse(p.is_relative_to(P('C:Foo'))) + self.assertFalse(p.is_relative_to(P('d:'))) + self.assertFalse(p.is_relative_to(P('d:/'))) + self.assertFalse(p.is_relative_to(P('/'))) + self.assertFalse(p.is_relative_to(P('/Foo'))) + self.assertFalse(p.is_relative_to(P('//C/Foo'))) + # UNC paths. + p = P('//Server/Share/Foo/Bar') + self.assertTrue(p.is_relative_to(P('//sErver/sHare'))) + self.assertTrue(p.is_relative_to('//sErver/sHare')) + self.assertTrue(p.is_relative_to('//sErver/sHare/')) + self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo'))) + self.assertTrue(p.is_relative_to('//sErver/sHare/Foo')) + self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/')) + self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo/Bar'))) + self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/Bar')) + # Unrelated paths. + self.assertFalse(p.is_relative_to(P('/Server/Share/Foo'))) + self.assertFalse(p.is_relative_to(P('c:/Server/Share/Foo'))) + self.assertFalse(p.is_relative_to(P('//z/Share/Foo'))) + self.assertFalse(p.is_relative_to(P('//Server/z/Foo'))) + + @needs_posix + def test_is_absolute_posix(self): + P = self.cls + self.assertFalse(P('').is_absolute()) + self.assertFalse(P('a').is_absolute()) + self.assertFalse(P('a/b/').is_absolute()) + self.assertTrue(P('/').is_absolute()) + self.assertTrue(P('/a').is_absolute()) + self.assertTrue(P('/a/b/').is_absolute()) + self.assertTrue(P('//a').is_absolute()) + self.assertTrue(P('//a/b').is_absolute()) + + @needs_windows + def test_is_absolute_windows(self): + P = self.cls + # Under NT, only paths with both a drive and a root are absolute. + self.assertFalse(P().is_absolute()) + self.assertFalse(P('a').is_absolute()) + self.assertFalse(P('a/b/').is_absolute()) + self.assertFalse(P('/').is_absolute()) + self.assertFalse(P('/a').is_absolute()) + self.assertFalse(P('/a/b/').is_absolute()) + self.assertFalse(P('c:').is_absolute()) + self.assertFalse(P('c:a').is_absolute()) + self.assertFalse(P('c:a/b/').is_absolute()) + self.assertTrue(P('c:/').is_absolute()) + self.assertTrue(P('c:/a').is_absolute()) + self.assertTrue(P('c:/a/b/').is_absolute()) + # UNC paths are absolute by definition. + self.assertTrue(P('//').is_absolute()) + self.assertTrue(P('//a').is_absolute()) + self.assertTrue(P('//a/b').is_absolute()) + self.assertTrue(P('//a/b/').is_absolute()) + self.assertTrue(P('//a/b/c').is_absolute()) + self.assertTrue(P('//a/b/c/d').is_absolute()) + self.assertTrue(P('//?/UNC/').is_absolute()) + self.assertTrue(P('//?/UNC/spam').is_absolute()) + # # Tests for the virtual classes. @@ -1124,6 +1713,25 @@ def _check(glob, expected): else: _check(p.glob("*/"), ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) + @needs_posix + def test_glob_posix(self): + P = self.cls + p = P(self.base) + given = set(p.glob("FILEa")) + expect = set() + self.assertEqual(given, expect) + self.assertEqual(set(p.glob("FILEa*")), set()) + + @needs_windows + def test_glob_windows(self): + P = self.cls + p = P(self.base) + self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") }) + self.assertEqual(set(p.glob("*a\\")), { P(self.base, "dirA/") }) + self.assertEqual(set(p.glob("F*a")), { P(self.base, "fileA") }) + self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) + self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) + def test_glob_empty_pattern(self): def _check(glob, expected): self.assertEqual(set(glob), { P(self.base, q) for q in expected }) @@ -1236,6 +1844,23 @@ def _check(glob, expected): _check(p.rglob("*.txt"), ["dirC/novel.txt"]) _check(p.rglob("*.*"), ["dirC/novel.txt"]) + @needs_posix + def test_rglob_posix(self): + P = self.cls + p = P(self.base, "dirC") + given = set(p.rglob("FILEd")) + expect = set() + self.assertEqual(given, expect) + self.assertEqual(set(p.rglob("FILEd*")), set()) + + @needs_windows + def test_rglob_windows(self): + P = self.cls + p = P(self.base, "dirC") + self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) + self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) + self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) + @needs_symlinks def test_rglob_follow_symlinks_common(self): def _check(path, glob, expected): From 7a7bce5a0ab249407e866a1e955d21fa2b0c8506 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 26 Jan 2024 19:38:14 -0800 Subject: [PATCH 039/127] gh-113055: Use pointer for interp->obmalloc state (gh-113412) For interpreters that share state with the main interpreter, this points to the same static memory structure. For interpreters with their own obmalloc state, it is heap allocated. Add free_obmalloc_arenas() which will free the obmalloc arenas and radix tree structures for interpreters with their own obmalloc state. Co-authored-by: Eric Snow --- Include/internal/pycore_interp.h | 12 +- Include/internal/pycore_obmalloc.h | 2 + Include/internal/pycore_obmalloc_init.h | 7 - Include/internal/pycore_runtime_init.h | 1 - ...-12-22-13-21-39.gh-issue-113055.47xBMF.rst | 5 + Objects/obmalloc.c | 121 +++++++++++++++++- Python/pylifecycle.c | 16 +++ Python/pystate.c | 14 +- Tools/c-analyzer/cpython/ignored.tsv | 3 +- 9 files changed, 157 insertions(+), 24 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 662a18d93f329d..04e75940dcb573 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -203,7 +203,17 @@ struct _is { struct _mimalloc_interp_state mimalloc; #endif - struct _obmalloc_state obmalloc; + // Per-interpreter state for the obmalloc allocator. For the main + // interpreter and for all interpreters that don't have their + // own obmalloc state, this points to the static structure in + // obmalloc.c obmalloc_state_main. For other interpreters, it is + // heap allocated by _PyMem_init_obmalloc() and freed when the + // interpreter structure is freed. In the case of a heap allocated + // obmalloc state, it is not safe to hold on to or use memory after + // the interpreter is freed. The obmalloc state corresponding to + // that allocated memory is gone. See free_obmalloc_arenas() for + // more comments. + struct _obmalloc_state *obmalloc; PyObject *audit_hooks; PyType_WatchCallback type_watchers[TYPE_MAX_WATCHERS]; diff --git a/Include/internal/pycore_obmalloc.h b/Include/internal/pycore_obmalloc.h index 17572dba65487d..9140d8f08f0af1 100644 --- a/Include/internal/pycore_obmalloc.h +++ b/Include/internal/pycore_obmalloc.h @@ -686,6 +686,8 @@ extern Py_ssize_t _Py_GetGlobalAllocatedBlocks(void); _Py_GetGlobalAllocatedBlocks() extern Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *); extern void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *); +extern int _PyMem_init_obmalloc(PyInterpreterState *interp); +extern bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp); #ifdef WITH_PYMALLOC diff --git a/Include/internal/pycore_obmalloc_init.h b/Include/internal/pycore_obmalloc_init.h index 8ee72ff2d4126f..e6811b7aeca73c 100644 --- a/Include/internal/pycore_obmalloc_init.h +++ b/Include/internal/pycore_obmalloc_init.h @@ -59,13 +59,6 @@ extern "C" { .dump_debug_stats = -1, \ } -#define _obmalloc_state_INIT(obmalloc) \ - { \ - .pools = { \ - .used = _obmalloc_pools_INIT(obmalloc.pools), \ - }, \ - } - #ifdef __cplusplus } diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index b4806ab09fd145..0a5c92bb84b524 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -155,7 +155,6 @@ extern PyTypeObject _PyExc_MemoryError; { \ .id_refcount = -1, \ .imports = IMPORTS_INIT, \ - .obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \ .ceval = { \ .recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ }, \ diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst new file mode 100644 index 00000000000000..90f49272218c96 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst @@ -0,0 +1,5 @@ +Make interp->obmalloc a pointer. For interpreters that share state with the +main interpreter, this points to the same static memory structure. For +interpreters with their own obmalloc state, it is heap allocated. Add +free_obmalloc_arenas() which will free the obmalloc arenas and radix tree +structures for interpreters with their own obmalloc state. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 16d5bcb53e7eb7..bea4ea85332bdd 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -7,6 +7,7 @@ #include "pycore_pyerrors.h" // _Py_FatalErrorFormat() #include "pycore_pymem.h" #include "pycore_pystate.h" // _PyInterpreterState_GET +#include "pycore_obmalloc_init.h" #include // malloc() #include @@ -1016,6 +1017,13 @@ static int running_on_valgrind = -1; typedef struct _obmalloc_state OMState; +/* obmalloc state for main interpreter and shared by all interpreters without + * their own obmalloc state. By not explicitly initalizing this structure, it + * will be allocated in the BSS which is a small performance win. The radix + * tree arrays are fairly large but are sparsely used. */ +static struct _obmalloc_state obmalloc_state_main; +static bool obmalloc_state_initialized; + static inline int has_own_state(PyInterpreterState *interp) { @@ -1028,10 +1036,8 @@ static inline OMState * get_state(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!has_own_state(interp)) { - interp = _PyInterpreterState_Main(); - } - return &interp->obmalloc; + assert(interp->obmalloc != NULL); // otherwise not initialized or freed + return interp->obmalloc; } // These macros all rely on a local "state" variable. @@ -1094,7 +1100,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp) "the interpreter doesn't have its own allocator"); } #endif - OMState *state = &interp->obmalloc; + OMState *state = interp->obmalloc; + + if (state == NULL) { + return 0; + } Py_ssize_t n = raw_allocated_blocks; /* add up allocated blocks for used pools */ @@ -1116,6 +1126,8 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp) return n; } +static void free_obmalloc_arenas(PyInterpreterState *interp); + void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp) { @@ -1124,10 +1136,20 @@ _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp) return; } #endif - if (has_own_state(interp)) { + if (has_own_state(interp) && interp->obmalloc != NULL) { Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp); assert(has_own_state(interp) || leaked == 0); interp->runtime->obmalloc.interpreter_leaks += leaked; + if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) { + // free the obmalloc arenas and radix tree nodes. If leaked > 0 + // then some of the memory allocated by obmalloc has not been + // freed. It might be safe to free the arenas in that case but + // it's possible that extension modules are still using that + // memory. So, it is safer to not free and to leak. Perhaps there + // should be warning when this happens. It should be possible to + // use a tool like "-fsanitize=address" to track down these leaks. + free_obmalloc_arenas(interp); + } } } @@ -2717,9 +2739,96 @@ _PyDebugAllocatorStats(FILE *out, (void)printone(out, buf2, num_blocks * sizeof_block); } +// Return true if the obmalloc state structure is heap allocated, +// by PyMem_RawCalloc(). For the main interpreter, this structure +// allocated in the BSS. Allocating that way gives some memory savings +// and a small performance win (at least on a demand paged OS). On +// 64-bit platforms, the obmalloc structure is 256 kB. Most of that +// memory is for the arena_map_top array. Since normally only one entry +// of that array is used, only one page of resident memory is actually +// used, rather than the full 256 kB. +bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp) +{ +#if WITH_PYMALLOC + return interp->obmalloc && interp->obmalloc != &obmalloc_state_main; +#else + return false; +#endif +} + +#ifdef WITH_PYMALLOC +static void +init_obmalloc_pools(PyInterpreterState *interp) +{ + // initialize the obmalloc->pools structure. This must be done + // before the obmalloc alloc/free functions can be called. + poolp temp[OBMALLOC_USED_POOLS_SIZE] = + _obmalloc_pools_INIT(interp->obmalloc->pools); + memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp)); +} +#endif /* WITH_PYMALLOC */ + +int _PyMem_init_obmalloc(PyInterpreterState *interp) +{ +#ifdef WITH_PYMALLOC + /* Initialize obmalloc, but only for subinterpreters, + since the main interpreter is initialized statically. */ + if (_Py_IsMainInterpreter(interp) + || _PyInterpreterState_HasFeature(interp, + Py_RTFLAGS_USE_MAIN_OBMALLOC)) { + interp->obmalloc = &obmalloc_state_main; + if (!obmalloc_state_initialized) { + init_obmalloc_pools(interp); + obmalloc_state_initialized = true; + } + } else { + interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state)); + if (interp->obmalloc == NULL) { + return -1; + } + init_obmalloc_pools(interp); + } +#endif /* WITH_PYMALLOC */ + return 0; // success +} + #ifdef WITH_PYMALLOC +static void +free_obmalloc_arenas(PyInterpreterState *interp) +{ + OMState *state = interp->obmalloc; + for (uint i = 0; i < maxarenas; ++i) { + // free each obmalloc memory arena + struct arena_object *ao = &allarenas[i]; + _PyObject_Arena.free(_PyObject_Arena.ctx, + (void *)ao->address, ARENA_SIZE); + } + // free the array containing pointers to all arenas + PyMem_RawFree(allarenas); +#if WITH_PYMALLOC_RADIX_TREE +#ifdef USE_INTERIOR_NODES + // Free the middle and bottom nodes of the radix tree. These are allocated + // by arena_map_mark_used() but not freed when arenas are freed. + for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) { + arena_map_mid_t *mid = arena_map_root.ptrs[i1]; + if (mid == NULL) { + continue; + } + for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) { + arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2]; + if (bot == NULL) { + continue; + } + PyMem_RawFree(bot); + } + PyMem_RawFree(mid); + } +#endif +#endif +} + #ifdef Py_DEBUG /* Is target in the list? The list is traversed via the nextpool pointers. * The list may be NULL-terminated, or circular. Return 1 if target is in diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 261622adc4cc77..fff64dd63d6b21 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -32,6 +32,7 @@ #include "pycore_typevarobject.h" // _Py_clear_generic_types() #include "pycore_unicodeobject.h" // _PyUnicode_InitTypes() #include "pycore_weakref.h" // _PyWeakref_GET_REF() +#include "pycore_obmalloc.h" // _PyMem_init_obmalloc() #include "opcode.h" @@ -645,6 +646,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + // initialize the interp->obmalloc state. This must be done after + // the settings are loaded (so that feature_flags are set) but before + // any calls are made to obmalloc functions. + if (_PyMem_init_obmalloc(interp) < 0) { + return _PyStatus_NO_MEMORY(); + } + PyThreadState *tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_INTERP); if (tstate == NULL) { @@ -2144,6 +2152,14 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config) goto error; } + // initialize the interp->obmalloc state. This must be done after + // the settings are loaded (so that feature_flags are set) but before + // any calls are made to obmalloc functions. + if (_PyMem_init_obmalloc(interp) < 0) { + status = _PyStatus_NO_MEMORY(); + goto error; + } + tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_INTERP); if (tstate == NULL) { status = _PyStatus_NO_MEMORY(); diff --git a/Python/pystate.c b/Python/pystate.c index 5ad5b6f3fcc634..8e097c848cf4a1 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -18,6 +18,7 @@ #include "pycore_pystate.h" #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_sysmodule.h" // _PySys_Audit() +#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() /* -------------------------------------------------------------------------- CAUTION @@ -553,6 +554,11 @@ free_interpreter(PyInterpreterState *interp) // The main interpreter is statically allocated so // should not be freed. if (interp != &_PyRuntime._main_interpreter) { + if (_PyMem_obmalloc_state_on_heap(interp)) { + // interpreter has its own obmalloc state, free it + PyMem_RawFree(interp->obmalloc); + interp->obmalloc = NULL; + } PyMem_RawFree(interp); } } @@ -595,14 +601,6 @@ init_interpreter(PyInterpreterState *interp, assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; - /* Initialize obmalloc, but only for subinterpreters, - since the main interpreter is initialized statically. */ - if (interp != &runtime->_main_interpreter) { - poolp temp[OBMALLOC_USED_POOLS_SIZE] = \ - _obmalloc_pools_INIT(interp->obmalloc.pools); - memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp)); - } - PyStatus status = _PyObject_InitState(interp); if (_PyStatus_EXCEPTION(status)) { return status; diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 2f9e80d6ab6737..c75aff8c1723c1 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -325,7 +325,8 @@ Objects/obmalloc.c - _PyMem_Debug - Objects/obmalloc.c - _PyMem_Raw - Objects/obmalloc.c - _PyObject - Objects/obmalloc.c - last_final_leaks - -Objects/obmalloc.c - usedpools - +Objects/obmalloc.c - obmalloc_state_main - +Objects/obmalloc.c - obmalloc_state_initialized - Objects/typeobject.c - name_op - Objects/typeobject.c - slotdefs - Objects/unicodeobject.c - stripfuncnames - From 926881dc10ebf77069e02e66eea3e0d3ba500fe5 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 27 Jan 2024 10:55:33 +0300 Subject: [PATCH 040/127] gh-113445: Amend PyObject_RichCompareBool() docs (GH-113891) --- Doc/c-api/object.rst | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 4f656779c80b1a..12476412799a4f 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -229,12 +229,8 @@ Object Protocol .. c:function:: int PyObject_RichCompareBool(PyObject *o1, PyObject *o2, int opid) Compare the values of *o1* and *o2* using the operation specified by *opid*, - which must be one of :c:macro:`Py_LT`, :c:macro:`Py_LE`, :c:macro:`Py_EQ`, - :c:macro:`Py_NE`, :c:macro:`Py_GT`, or :c:macro:`Py_GE`, corresponding to ``<``, - ``<=``, ``==``, ``!=``, ``>``, or ``>=`` respectively. Returns ``-1`` on error, - ``0`` if the result is false, ``1`` otherwise. This is the equivalent of the - Python expression ``o1 op o2``, where ``op`` is the operator corresponding to - *opid*. + like :c:func:`PyObject_RichCompare`, but returns ``-1`` on error, ``0`` if + the result is false, ``1`` otherwise. .. note:: If *o1* and *o2* are the same object, :c:func:`PyObject_RichCompareBool` From 547c135d70760f974ed0476a32a6809e708bfe4d Mon Sep 17 00:00:00 2001 From: NewUserHa <32261870+NewUserHa@users.noreply.github.com> Date: Sat, 27 Jan 2024 16:29:38 +0800 Subject: [PATCH 041/127] Simplify concurrent.futures.process code by using itertools.batched() (GH-114221) --- Lib/concurrent/futures/process.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index ffaffdb8b3d0aa..ca843e11eeb83d 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -190,16 +190,6 @@ def _on_queue_feeder_error(self, e, obj): super()._on_queue_feeder_error(e, obj) -def _get_chunks(*iterables, chunksize): - """ Iterates over zip()ed iterables in chunks. """ - it = zip(*iterables) - while True: - chunk = tuple(itertools.islice(it, chunksize)) - if not chunk: - return - yield chunk - - def _process_chunk(fn, chunk): """ Processes a chunk of an iterable passed to map. @@ -847,7 +837,7 @@ def map(self, fn, *iterables, timeout=None, chunksize=1): raise ValueError("chunksize must be >= 1.") results = super().map(partial(_process_chunk, fn), - _get_chunks(*iterables, chunksize=chunksize), + itertools.batched(zip(*iterables), chunksize), timeout=timeout) return _chain_from_iterable_of_lists(results) From 23fb9f0777b054526b3b32f58e60b2a03132bf45 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 27 Jan 2024 11:45:07 +0300 Subject: [PATCH 042/127] Fix `c-api/file.rst` indexes (GH-114608) --- Doc/c-api/file.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst index 0a03841e467cad..d3a78c588454e8 100644 --- a/Doc/c-api/file.rst +++ b/Doc/c-api/file.rst @@ -65,9 +65,10 @@ the :mod:`io` APIs instead. Overrides the normal behavior of :func:`io.open_code` to pass its parameter through the provided handler. - The handler is a function of type: + The *handler* is a function of type: - .. c:type:: Py_OpenCodeHookFunction + .. c:namespace:: NULL + .. c:type:: PyObject * (*Py_OpenCodeHookFunction)(PyObject *, void *) Equivalent of :c:expr:`PyObject *(\*)(PyObject *path, void *userData)`, where *path* is guaranteed to be From 6a8944acb61d0a2c210ab8066cdcec8602110e2f Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 27 Jan 2024 11:45:40 +0300 Subject: [PATCH 043/127] gh-101100: Fix sphinx warnings in `library/email.mime.rst` (GH-114635) --- Doc/library/email.mime.rst | 16 ++++++++-------- Doc/tools/.nitignore | 1 - 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Doc/library/email.mime.rst b/Doc/library/email.mime.rst index d7c0d203d191f8..dc0dd3b9eebde6 100644 --- a/Doc/library/email.mime.rst +++ b/Doc/library/email.mime.rst @@ -28,7 +28,7 @@ make things easier. Here are the classes: -.. currentmodule:: email.mime.base +.. module:: email.mime.base .. class:: MIMEBase(_maintype, _subtype, *, policy=compat32, **_params) @@ -58,7 +58,7 @@ Here are the classes: Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.nonmultipart +.. module:: email.mime.nonmultipart .. class:: MIMENonMultipart() @@ -72,7 +72,7 @@ Here are the classes: is called, a :exc:`~email.errors.MultipartConversionError` exception is raised. -.. currentmodule:: email.mime.multipart +.. module:: email.mime.multipart .. class:: MIMEMultipart(_subtype='mixed', boundary=None, _subparts=None, \ *, policy=compat32, **_params) @@ -104,7 +104,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.application +.. module:: email.mime.application .. class:: MIMEApplication(_data, _subtype='octet-stream', \ _encoder=email.encoders.encode_base64, \ @@ -135,7 +135,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.audio +.. module:: email.mime.audio .. class:: MIMEAudio(_audiodata, _subtype=None, \ _encoder=email.encoders.encode_base64, \ @@ -169,7 +169,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.image +.. module:: email.mime.image .. class:: MIMEImage(_imagedata, _subtype=None, \ _encoder=email.encoders.encode_base64, \ @@ -205,7 +205,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.message +.. module:: email.mime.message .. class:: MIMEMessage(_msg, _subtype='rfc822', *, policy=compat32) @@ -225,7 +225,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.text +.. module:: email.mime.text .. class:: MIMEText(_text, _subtype='plain', _charset=None, *, policy=compat32) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index d56a44ad09a6f8..f48506e3a21df5 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -33,7 +33,6 @@ Doc/library/decimal.rst Doc/library/email.charset.rst Doc/library/email.compat32-message.rst Doc/library/email.errors.rst -Doc/library/email.mime.rst Doc/library/email.parser.rst Doc/library/email.policy.rst Doc/library/enum.rst From 11c582235d86b6020710eff282eeb381a7bf7bb7 Mon Sep 17 00:00:00 2001 From: Charlie Zhao Date: Sat, 27 Jan 2024 17:53:01 +0800 Subject: [PATCH 044/127] gh-113560: Improve docstrings for set.issubset() and set.issuperset() (GH-113562) --- Objects/setobject.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Objects/setobject.c b/Objects/setobject.c index 88d20019bfb4a7..93de8e84f2ddf9 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1770,7 +1770,11 @@ set_issubset(PySetObject *so, PyObject *other) Py_RETURN_TRUE; } -PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set."); +PyDoc_STRVAR(issubset_doc, +"issubset($self, other, /)\n\ +--\n\ +\n\ +Test whether every element in the set is in other."); static PyObject * set_issuperset(PySetObject *so, PyObject *other) @@ -1802,7 +1806,11 @@ set_issuperset(PySetObject *so, PyObject *other) Py_RETURN_TRUE; } -PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set."); +PyDoc_STRVAR(issuperset_doc, +"issuperset($self, other, /)\n\ +--\n\ +\n\ +Test whether every element in other is in the set."); static PyObject * set_richcompare(PySetObject *v, PyObject *w, int op) From b6623d61d4e07aefd15d910ef67837c66bceaf32 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 27 Jan 2024 15:06:59 +0200 Subject: [PATCH 045/127] gh-101100: Fix Sphinx warnings in `whatsnew/3.11.rst` and related (#114531) --- Doc/whatsnew/2.7.rst | 2 +- Doc/whatsnew/3.11.rst | 96 ++++++++++++++++++++-------------------- Doc/whatsnew/3.8.rst | 4 +- Misc/NEWS.d/3.11.0a2.rst | 2 +- Misc/NEWS.d/3.11.0a4.rst | 6 +-- Misc/NEWS.d/3.11.0a6.rst | 2 +- Misc/NEWS.d/3.11.0a7.rst | 4 +- Misc/NEWS.d/3.11.0b1.rst | 4 +- Misc/NEWS.d/3.12.0a4.rst | 4 +- Misc/NEWS.d/3.12.0a6.rst | 2 +- Misc/NEWS.d/3.12.0a7.rst | 2 +- Misc/NEWS.d/3.12.0b1.rst | 2 +- Misc/NEWS.d/3.8.0a1.rst | 4 +- Misc/NEWS.d/3.9.0a1.rst | 4 +- Misc/NEWS.d/3.9.0a5.rst | 2 +- 15 files changed, 70 insertions(+), 70 deletions(-) diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst index 241d58720399af..524967b4524234 100644 --- a/Doc/whatsnew/2.7.rst +++ b/Doc/whatsnew/2.7.rst @@ -2130,7 +2130,7 @@ Changes to Python's build process and to the C API include: only the filename, function name, and first line number are required. This is useful for extension modules that are attempting to construct a more useful traceback stack. Previously such - extensions needed to call :c:func:`PyCode_New`, which had many + extensions needed to call :c:func:`!PyCode_New`, which had many more arguments. (Added by Jeffrey Yasskin.) * New function: :c:func:`PyErr_NewExceptionWithDoc` creates a new diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index cb646a54df3607..a8a7dfda0c5309 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -672,7 +672,7 @@ enum * Changed :meth:`Enum.__format__() ` (the default for :func:`format`, :meth:`str.format` and :term:`f-string`\s) to always produce - the same result as :meth:`Enum.__str__()`: for enums inheriting from + the same result as :meth:`Enum.__str__() `: for enums inheriting from :class:`~enum.ReprEnum` it will be the member's value; for all other enums it will be the enum and member name (e.g. ``Color.RED``). @@ -1604,7 +1604,7 @@ raw, adaptive bytecode containing quickened data. New opcodes ----------- -* :opcode:`ASYNC_GEN_WRAP`, :opcode:`RETURN_GENERATOR` and :opcode:`SEND`, +* :opcode:`!ASYNC_GEN_WRAP`, :opcode:`RETURN_GENERATOR` and :opcode:`SEND`, used in generators and co-routines. * :opcode:`COPY_FREE_VARS`, @@ -1615,7 +1615,7 @@ New opcodes * :opcode:`MAKE_CELL`, to create :ref:`cell-objects`. -* :opcode:`CHECK_EG_MATCH` and :opcode:`PREP_RERAISE_STAR`, +* :opcode:`CHECK_EG_MATCH` and :opcode:`!PREP_RERAISE_STAR`, to handle the :ref:`new exception groups and except* ` added in :pep:`654`. @@ -1630,38 +1630,38 @@ New opcodes Replaced opcodes ---------------- -+------------------------------------+-----------------------------------+-----------------------------------------+ -| Replaced Opcode(s) | New Opcode(s) | Notes | -+====================================+===================================+=========================================+ -| | :opcode:`!BINARY_*` | :opcode:`BINARY_OP` | Replaced all numeric binary/in-place | -| | :opcode:`!INPLACE_*` | | opcodes with a single opcode | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!CALL_FUNCTION` | | :opcode:`CALL` | Decouples argument shifting for methods | -| | :opcode:`!CALL_FUNCTION_KW` | | :opcode:`KW_NAMES` | from handling of keyword arguments; | -| | :opcode:`!CALL_METHOD` | | :opcode:`PRECALL` | allows better specialization of calls | -| | | :opcode:`PUSH_NULL` | | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!DUP_TOP` | | :opcode:`COPY` | Stack manipulation instructions | -| | :opcode:`!DUP_TOP_TWO` | | :opcode:`SWAP` | | -| | :opcode:`!ROT_TWO` | | | -| | :opcode:`!ROT_THREE` | | | -| | :opcode:`!ROT_FOUR` | | | -| | :opcode:`!ROT_N` | | | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!JUMP_IF_NOT_EXC_MATCH` | | :opcode:`CHECK_EXC_MATCH` | Now performs check but doesn't jump | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!JUMP_ABSOLUTE` | | :opcode:`JUMP_BACKWARD` | See [#bytecode-jump]_; | -| | :opcode:`!POP_JUMP_IF_FALSE` | | :opcode:`POP_JUMP_BACKWARD_IF_* | ``TRUE``, ``FALSE``, | -| | :opcode:`!POP_JUMP_IF_TRUE` | ` | ``NONE`` and ``NOT_NONE`` variants | -| | | :opcode:`POP_JUMP_FORWARD_IF_* | for each direction | -| | ` | | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!SETUP_WITH` | :opcode:`BEFORE_WITH` | :keyword:`with` block setup | -| | :opcode:`!SETUP_ASYNC_WITH` | | | -+------------------------------------+-----------------------------------+-----------------------------------------+ ++------------------------------------+------------------------------------+-----------------------------------------+ +| Replaced Opcode(s) | New Opcode(s) | Notes | ++====================================+====================================+=========================================+ +| | :opcode:`!BINARY_*` | :opcode:`BINARY_OP` | Replaced all numeric binary/in-place | +| | :opcode:`!INPLACE_*` | | opcodes with a single opcode | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!CALL_FUNCTION` | | :opcode:`CALL` | Decouples argument shifting for methods | +| | :opcode:`!CALL_FUNCTION_KW` | | :opcode:`!KW_NAMES` | from handling of keyword arguments; | +| | :opcode:`!CALL_METHOD` | | :opcode:`!PRECALL` | allows better specialization of calls | +| | | :opcode:`PUSH_NULL` | | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!DUP_TOP` | | :opcode:`COPY` | Stack manipulation instructions | +| | :opcode:`!DUP_TOP_TWO` | | :opcode:`SWAP` | | +| | :opcode:`!ROT_TWO` | | | +| | :opcode:`!ROT_THREE` | | | +| | :opcode:`!ROT_FOUR` | | | +| | :opcode:`!ROT_N` | | | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!JUMP_IF_NOT_EXC_MATCH` | | :opcode:`CHECK_EXC_MATCH` | Now performs check but doesn't jump | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!JUMP_ABSOLUTE` | | :opcode:`JUMP_BACKWARD` | See [#bytecode-jump]_; | +| | :opcode:`!POP_JUMP_IF_FALSE` | | :opcode:`!POP_JUMP_BACKWARD_IF_*`| ``TRUE``, ``FALSE``, | +| | :opcode:`!POP_JUMP_IF_TRUE` | | :opcode:`!POP_JUMP_FORWARD_IF_*` | ``NONE`` and ``NOT_NONE`` variants | +| | | for each direction | +| | | | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!SETUP_WITH` | :opcode:`BEFORE_WITH` | :keyword:`with` block setup | +| | :opcode:`!SETUP_ASYNC_WITH` | | | ++------------------------------------+------------------------------------+-----------------------------------------+ .. [#bytecode-jump] All jump opcodes are now relative, including the - existing :opcode:`JUMP_IF_TRUE_OR_POP` and :opcode:`JUMP_IF_FALSE_OR_POP`. + existing :opcode:`!JUMP_IF_TRUE_OR_POP` and :opcode:`!JUMP_IF_FALSE_OR_POP`. The argument is now an offset from the current instruction rather than an absolute location. @@ -1789,13 +1789,13 @@ Standard Library and will be removed in a future Python version, due to not supporting resources located within package subdirectories: - * :func:`importlib.resources.contents` - * :func:`importlib.resources.is_resource` - * :func:`importlib.resources.open_binary` - * :func:`importlib.resources.open_text` - * :func:`importlib.resources.read_binary` - * :func:`importlib.resources.read_text` - * :func:`importlib.resources.path` + * :func:`!importlib.resources.contents` + * :func:`!importlib.resources.is_resource` + * :func:`!importlib.resources.open_binary` + * :func:`!importlib.resources.open_text` + * :func:`!importlib.resources.read_binary` + * :func:`!importlib.resources.read_text` + * :func:`!importlib.resources.path` * The :func:`locale.getdefaultlocale` function is deprecated and will be removed in Python 3.15. Use :func:`locale.setlocale`, @@ -1803,7 +1803,7 @@ Standard Library :func:`locale.getlocale` functions instead. (Contributed by Victor Stinner in :gh:`90817`.) -* The :func:`locale.resetlocale` function is deprecated and will be +* The :func:`!locale.resetlocale` function is deprecated and will be removed in Python 3.13. Use ``locale.setlocale(locale.LC_ALL, "")`` instead. (Contributed by Victor Stinner in :gh:`90817`.) @@ -1967,7 +1967,7 @@ Removed C APIs are :ref:`listed separately `. (Contributed by Victor Stinner in :issue:`45085`.) -* Removed the :mod:`distutils` ``bdist_msi`` command deprecated in Python 3.9. +* Removed the :mod:`!distutils` ``bdist_msi`` command deprecated in Python 3.9. Use ``bdist_wheel`` (wheel packages) instead. (Contributed by Hugo van Kemenade in :issue:`45124`.) @@ -2295,7 +2295,7 @@ Porting to Python 3.11 as its second parameter, instead of ``PyFrameObject*``. See :pep:`523` for more details of how to use this function pointer type. -* :c:func:`PyCode_New` and :c:func:`PyCode_NewWithPosOnlyArgs` now take +* :c:func:`!PyCode_New` and :c:func:`!PyCode_NewWithPosOnlyArgs` now take an additional ``exception_table`` argument. Using these functions should be avoided, if at all possible. To get a custom code object: create a code object using the compiler, @@ -2402,7 +2402,7 @@ Porting to Python 3.11 been included directly, consider including ``Python.h`` instead. (Contributed by Victor Stinner in :issue:`35134`.) -* The :c:func:`PyUnicode_CHECK_INTERNED` macro has been excluded from the +* The :c:func:`!PyUnicode_CHECK_INTERNED` macro has been excluded from the limited C API. It was never usable there, because it used internal structures which are not available in the limited C API. (Contributed by Victor Stinner in :issue:`46007`.) @@ -2465,7 +2465,7 @@ Porting to Python 3.11 Debuggers that accessed the :attr:`~frame.f_locals` directly *must* call :c:func:`PyFrame_GetLocals` instead. They no longer need to call - :c:func:`PyFrame_FastToLocalsWithError` or :c:func:`PyFrame_LocalsToFast`, + :c:func:`!PyFrame_FastToLocalsWithError` or :c:func:`!PyFrame_LocalsToFast`, in fact they should not call those functions. The necessary updating of the frame is now managed by the virtual machine. @@ -2604,8 +2604,8 @@ and will be removed in Python 3.12. * :c:func:`!PyUnicode_GET_DATA_SIZE` * :c:func:`!PyUnicode_GET_SIZE` * :c:func:`!PyUnicode_GetSize` -* :c:func:`PyUnicode_IS_COMPACT` -* :c:func:`PyUnicode_IS_READY` +* :c:func:`!PyUnicode_IS_COMPACT` +* :c:func:`!PyUnicode_IS_READY` * :c:func:`PyUnicode_READY` * :c:func:`!PyUnicode_WSTR_LENGTH` * :c:func:`!_PyUnicode_AsUnicode` @@ -2660,7 +2660,7 @@ Removed (Contributed by Victor Stinner in :issue:`45474`.) * Exclude :c:func:`PyWeakref_GET_OBJECT` from the limited C API. It never - worked since the :c:type:`PyWeakReference` structure is opaque in the + worked since the :c:type:`!PyWeakReference` structure is opaque in the limited C API. (Contributed by Victor Stinner in :issue:`35134`.) diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index d373fa163ff737..304d1b4ef4efe8 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -1623,8 +1623,8 @@ Build and C API Changes allocation or deallocation may need to be adjusted. (Contributed by Eddie Elizondo in :issue:`35810`.) -* The new function :c:func:`PyCode_NewWithPosOnlyArgs` allows to create - code objects like :c:func:`PyCode_New`, but with an extra *posonlyargcount* +* The new function :c:func:`!PyCode_NewWithPosOnlyArgs` allows to create + code objects like :c:func:`!PyCode_New`, but with an extra *posonlyargcount* parameter for indicating the number of positional-only arguments. (Contributed by Pablo Galindo in :issue:`37221`.) diff --git a/Misc/NEWS.d/3.11.0a2.rst b/Misc/NEWS.d/3.11.0a2.rst index eb1456f1bcf353..a6b5fe54b391c5 100644 --- a/Misc/NEWS.d/3.11.0a2.rst +++ b/Misc/NEWS.d/3.11.0a2.rst @@ -1189,7 +1189,7 @@ context objects can now be disabled. .. section: C API Exclude :c:func:`PyWeakref_GET_OBJECT` from the limited C API. It never -worked since the :c:type:`PyWeakReference` structure is opaque in the +worked since the :c:type:`!PyWeakReference` structure is opaque in the limited C API. .. diff --git a/Misc/NEWS.d/3.11.0a4.rst b/Misc/NEWS.d/3.11.0a4.rst index 5abacd8473f394..78b682f7a22cc6 100644 --- a/Misc/NEWS.d/3.11.0a4.rst +++ b/Misc/NEWS.d/3.11.0a4.rst @@ -161,7 +161,7 @@ faster due to reference-counting optimizations. Patch by Dennis Sweeney. .. nonce: 7oGp-I .. section: Core and Builtins -:opcode:`PREP_RERAISE_STAR` no longer pushes ``lasti`` to the stack. +:opcode:`!PREP_RERAISE_STAR` no longer pushes ``lasti`` to the stack. .. @@ -170,7 +170,7 @@ faster due to reference-counting optimizations. Patch by Dennis Sweeney. .. nonce: IKx4v6 .. section: Core and Builtins -Remove :opcode:`POP_EXCEPT_AND_RERAISE` and replace it by an equivalent +Remove :opcode:`!POP_EXCEPT_AND_RERAISE` and replace it by an equivalent sequence of other opcodes. .. @@ -1171,7 +1171,7 @@ Replaced deprecated usage of :c:func:`PyImport_ImportModuleNoBlock` with .. nonce: sMgDLz .. section: C API -The :c:func:`PyUnicode_CHECK_INTERNED` macro has been excluded from the +The :c:func:`!PyUnicode_CHECK_INTERNED` macro has been excluded from the limited C API. It was never usable there, because it used internal structures which are not available in the limited C API. Patch by Victor Stinner. diff --git a/Misc/NEWS.d/3.11.0a6.rst b/Misc/NEWS.d/3.11.0a6.rst index 2b50b7773492cb..2fdceef7746d4e 100644 --- a/Misc/NEWS.d/3.11.0a6.rst +++ b/Misc/NEWS.d/3.11.0a6.rst @@ -248,7 +248,7 @@ Don't un-adapt :opcode:`COMPARE_OP` when collecting specialization stats. .. nonce: RX_AzJ .. section: Core and Builtins -Fix specialization stats gathering for :opcode:`PRECALL` instructions. +Fix specialization stats gathering for :opcode:`!PRECALL` instructions. .. diff --git a/Misc/NEWS.d/3.11.0a7.rst b/Misc/NEWS.d/3.11.0a7.rst index 76699632db223a..ec99bd0294ceca 100644 --- a/Misc/NEWS.d/3.11.0a7.rst +++ b/Misc/NEWS.d/3.11.0a7.rst @@ -138,7 +138,7 @@ Replaced :opcode:`JUMP_ABSOLUTE` by the relative jump .. nonce: SwrrFO .. section: Core and Builtins -:c:func:`PyFrame_FastToLocalsWithError` and :c:func:`PyFrame_LocalsToFast` +:c:func:`!PyFrame_FastToLocalsWithError` and :c:func:`!PyFrame_LocalsToFast` are no longer called during profiling nor tracing. C code can access the ``f_locals`` attribute of :c:type:`PyFrameObject` by calling :c:func:`PyFrame_GetLocals`. @@ -295,7 +295,7 @@ oparg) as an adaptive counter. .. nonce: O12Pba .. section: Core and Builtins -Use inline caching for :opcode:`PRECALL` and :opcode:`CALL`, and remove the +Use inline caching for :opcode:`!PRECALL` and :opcode:`CALL`, and remove the internal machinery for managing the (now unused) non-inline caches. .. diff --git a/Misc/NEWS.d/3.11.0b1.rst b/Misc/NEWS.d/3.11.0b1.rst index 2c30dc6e084bfb..f9296679655573 100644 --- a/Misc/NEWS.d/3.11.0b1.rst +++ b/Misc/NEWS.d/3.11.0b1.rst @@ -403,8 +403,8 @@ so this led to crashes. The problem is now fixed. .. nonce: 6S_uoU .. section: Core and Builtins -Make opcodes :opcode:`JUMP_IF_TRUE_OR_POP` and -:opcode:`JUMP_IF_FALSE_OR_POP` relative rather than absolute. +Make opcodes :opcode:`!JUMP_IF_TRUE_OR_POP` and +:opcode:`!JUMP_IF_FALSE_OR_POP` relative rather than absolute. .. diff --git a/Misc/NEWS.d/3.12.0a4.rst b/Misc/NEWS.d/3.12.0a4.rst index ce2814bbe2e5ab..82faa5ad0b2031 100644 --- a/Misc/NEWS.d/3.12.0a4.rst +++ b/Misc/NEWS.d/3.12.0a4.rst @@ -13,8 +13,8 @@ Fix misleading default value in :func:`input`'s ``__text_signature__``. .. nonce: cmGwxv .. section: Core and Builtins -Remove :opcode:`UNARY_POSITIVE`, :opcode:`ASYNC_GEN_WRAP` and -:opcode:`LIST_TO_TUPLE`, replacing them with intrinsics. +Remove :opcode:`!UNARY_POSITIVE`, :opcode:`!ASYNC_GEN_WRAP` and +:opcode:`!LIST_TO_TUPLE`, replacing them with intrinsics. .. diff --git a/Misc/NEWS.d/3.12.0a6.rst b/Misc/NEWS.d/3.12.0a6.rst index 5bd600cd8b6fc0..cf28bdb9258820 100644 --- a/Misc/NEWS.d/3.12.0a6.rst +++ b/Misc/NEWS.d/3.12.0a6.rst @@ -170,7 +170,7 @@ all as not all platform C libraries generate an error. .. section: Core and Builtins Add :opcode:`CALL_INTRINSIC_2` and use it instead of -:opcode:`PREP_RERAISE_STAR`. +:opcode:`!PREP_RERAISE_STAR`. .. diff --git a/Misc/NEWS.d/3.12.0a7.rst b/Misc/NEWS.d/3.12.0a7.rst index f22050b0dc377b..a859be8a047456 100644 --- a/Misc/NEWS.d/3.12.0a7.rst +++ b/Misc/NEWS.d/3.12.0a7.rst @@ -24,7 +24,7 @@ Reduce the number of inline :opcode:`CACHE` entries for .. nonce: PRkGca .. section: Core and Builtins -Removed :opcode:`JUMP_IF_FALSE_OR_POP` and :opcode:`JUMP_IF_TRUE_OR_POP` +Removed :opcode:`!JUMP_IF_FALSE_OR_POP` and :opcode:`!JUMP_IF_TRUE_OR_POP` instructions. .. diff --git a/Misc/NEWS.d/3.12.0b1.rst b/Misc/NEWS.d/3.12.0b1.rst index 007a6ad4ffd4d4..211513d05d0040 100644 --- a/Misc/NEWS.d/3.12.0b1.rst +++ b/Misc/NEWS.d/3.12.0b1.rst @@ -1008,7 +1008,7 @@ Update the bundled copy of pip to version 23.1.2. .. nonce: pst8iT .. section: Library -Make :mod:`dis` display the value of oparg of :opcode:`KW_NAMES`. +Make :mod:`dis` display the value of oparg of :opcode:`!KW_NAMES`. .. diff --git a/Misc/NEWS.d/3.8.0a1.rst b/Misc/NEWS.d/3.8.0a1.rst index 11b303e89ad04f..bd9061601fe190 100644 --- a/Misc/NEWS.d/3.8.0a1.rst +++ b/Misc/NEWS.d/3.8.0a1.rst @@ -3395,8 +3395,8 @@ Zackery Spytz. .. nonce: S0Irst .. section: Library -Fix parsing non-ASCII identifiers in :mod:`lib2to3.pgen2.tokenize` (PEP -3131). +Fix parsing non-ASCII identifiers in :mod:`!lib2to3.pgen2.tokenize` +(:pep:`3131`). .. diff --git a/Misc/NEWS.d/3.9.0a1.rst b/Misc/NEWS.d/3.9.0a1.rst index 0772a0fed20652..66d7fc1f32e705 100644 --- a/Misc/NEWS.d/3.9.0a1.rst +++ b/Misc/NEWS.d/3.9.0a1.rst @@ -5715,8 +5715,8 @@ The :c:macro:`METH_FASTCALL` calling convention has been documented. .. nonce: 4tClQT .. section: C API -The new function :c:func:`PyCode_NewWithPosOnlyArgs` allows to create code -objects like :c:func:`PyCode_New`, but with an extra *posonlyargcount* +The new function :c:func:`!PyCode_NewWithPosOnlyArgs` allows to create code +objects like :c:func:`!PyCode_New`, but with an extra *posonlyargcount* parameter for indicating the number of positonal-only arguments. .. diff --git a/Misc/NEWS.d/3.9.0a5.rst b/Misc/NEWS.d/3.9.0a5.rst index b4594aade3b3ed..f0015ac54df307 100644 --- a/Misc/NEWS.d/3.9.0a5.rst +++ b/Misc/NEWS.d/3.9.0a5.rst @@ -1122,7 +1122,7 @@ a different condition than the GIL. .. nonce: Nbl7lF .. section: Tools/Demos -Added support to fix ``getproxies`` in the :mod:`lib2to3.fixes.fix_urllib` +Added support to fix ``getproxies`` in the :mod:`!lib2to3.fixes.fix_urllib` module. Patch by José Roberto Meza Cabrera. .. From a384b20c0ce5aa520fa91ae0233d53642925525b Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 27 Jan 2024 17:30:21 +0300 Subject: [PATCH 046/127] gh-101100: Fix sphinx warnings in `reference/import.rst` (#114646) --- Doc/reference/import.rst | 7 ++++--- Doc/tools/.nitignore | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/reference/import.rst b/Doc/reference/import.rst index a7beeea29b4556..f8c9724114da9e 100644 --- a/Doc/reference/import.rst +++ b/Doc/reference/import.rst @@ -327,14 +327,15 @@ modules, and one that knows how to import modules from an :term:`import path` finders replaced :meth:`!find_module`, which is now deprecated. While it will continue to work without change, the import machinery will try it only if the finder does not implement - ``find_spec()``. + :meth:`~importlib.abc.MetaPathFinder.find_spec`. .. versionchanged:: 3.10 Use of :meth:`!find_module` by the import system now raises :exc:`ImportWarning`. .. versionchanged:: 3.12 - ``find_module()`` has been removed. Use :meth:`find_spec` instead. + :meth:`!find_module` has been removed. + Use :meth:`~importlib.abc.MetaPathFinder.find_spec` instead. Loading @@ -812,7 +813,7 @@ attributes on package objects are also used. These provide additional ways that the import machinery can be customized. :data:`sys.path` contains a list of strings providing search locations for -modules and packages. It is initialized from the :data:`PYTHONPATH` +modules and packages. It is initialized from the :envvar:`PYTHONPATH` environment variable and various other installation- and implementation-specific defaults. Entries in :data:`sys.path` can name directories on the file system, zip files, and potentially other "locations" diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index f48506e3a21df5..8b6847ef2a7d76 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -87,7 +87,6 @@ Doc/library/xmlrpc.server.rst Doc/library/zlib.rst Doc/reference/compound_stmts.rst Doc/reference/datamodel.rst -Doc/reference/import.rst Doc/tutorial/datastructures.rst Doc/using/windows.rst Doc/whatsnew/2.0.rst From 7a470541e2bbc6f3e87a6d813e2ec42cf726de7a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 27 Jan 2024 18:38:17 +0200 Subject: [PATCH 047/127] gh-114100: Remove superfluous writing to fd 1 in test_pty (GH-114647) --- Lib/test/test_pty.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pty.py b/Lib/test/test_pty.py index f31a68c5d84e03..51e3a46d0df178 100644 --- a/Lib/test/test_pty.py +++ b/Lib/test/test_pty.py @@ -1,4 +1,5 @@ from test.support import verbose, reap_children +from test.support.os_helper import TESTFN, unlink from test.support.import_helper import import_module # Skip these tests if termios or fcntl are not available @@ -292,7 +293,26 @@ def test_master_read(self): self.assertEqual(data, b"") def test_spawn_doesnt_hang(self): - pty.spawn([sys.executable, '-c', 'print("hi there")']) + self.addCleanup(unlink, TESTFN) + with open(TESTFN, 'wb') as f: + STDOUT_FILENO = 1 + dup_stdout = os.dup(STDOUT_FILENO) + os.dup2(f.fileno(), STDOUT_FILENO) + buf = b'' + def master_read(fd): + nonlocal buf + data = os.read(fd, 1024) + buf += data + return data + try: + pty.spawn([sys.executable, '-c', 'print("hi there")'], + master_read) + finally: + os.dup2(dup_stdout, STDOUT_FILENO) + os.close(dup_stdout) + self.assertEqual(buf, b'hi there\r\n') + with open(TESTFN, 'rb') as f: + self.assertEqual(f.read(), b'hi there\r\n') class SmallPtyTests(unittest.TestCase): """These tests don't spawn children or hang.""" From 823a38a960c245cbf309ef29120d3690ba1bcd2c Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 27 Jan 2024 19:59:51 +0000 Subject: [PATCH 048/127] GH-79634: Speed up pathlib globbing by removing `joinpath()` call. (#114623) Remove `self.joinpath('')` call that should have been removed in 6313cdde. This makes `PathBase.glob('')` yield itself *without* adding a trailing slash. It's hard to say whether this is more or less correct, but at least everything else is faster, and there's no behaviour change in the public classes where empty glob patterns are disallowed. --- Lib/pathlib/_abc.py | 2 +- Lib/test/test_pathlib/test_pathlib.py | 2 ++ Lib/test/test_pathlib/test_pathlib_abc.py | 7 +++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 6303a18680befc..ad5684829ebc80 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -771,7 +771,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): filter_paths = False deduplicate_paths = False sep = self.pathmod.sep - paths = iter([self.joinpath('')] if self.is_dir() else []) + paths = iter([self] if self.is_dir() else []) while stack: part = stack.pop() if part in specials: diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 9c2b26d41d73f8..5ce3b605c58e63 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1232,6 +1232,8 @@ def test_glob_empty_pattern(self): list(p.glob('')) with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): list(p.glob('.')) + with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): + list(p.glob('./')) def test_glob_many_open_files(self): depth = 30 diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index ea70931eaa2c7e..ab989cb5503f99 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1733,12 +1733,11 @@ def test_glob_windows(self): self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) def test_glob_empty_pattern(self): - def _check(glob, expected): - self.assertEqual(set(glob), { P(self.base, q) for q in expected }) P = self.cls p = P(self.base) - _check(p.glob(""), [""]) - _check(p.glob("."), ["."]) + self.assertEqual(list(p.glob("")), [p]) + self.assertEqual(list(p.glob(".")), [p / "."]) + self.assertEqual(list(p.glob("./")), [p / "./"]) def test_glob_case_sensitive(self): P = self.cls From a768e12f094a9b14a9a1680fb50330e1050716c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Skytt=C3=A4?= Date: Sat, 27 Jan 2024 23:47:55 +0200 Subject: [PATCH 049/127] Use bool in fileinput.input() docstring and tests for the inplace argument (GH-111998) The `.rst` docs, most tests, and typeshed already use bool for it. --- Lib/fileinput.py | 2 +- Lib/test/test_fileinput.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/fileinput.py b/Lib/fileinput.py index 1b25f28f3d3432..3dba3d2fbfa967 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -53,7 +53,7 @@ sequence must be accessed in strictly sequential order; sequence access and readline() cannot be mixed. -Optional in-place filtering: if the keyword argument inplace=1 is +Optional in-place filtering: if the keyword argument inplace=True is passed to input() or to the FileInput constructor, the file is moved to a backup file and standard output is directed to the input file. This makes it possible to write a filter that rewrites its input file diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index 786d9186634305..b3ad41d2588c4c 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -151,7 +151,7 @@ def test_buffer_sizes(self): print('6. Inplace') savestdout = sys.stdout try: - fi = FileInput(files=(t1, t2, t3, t4), inplace=1, encoding="utf-8") + fi = FileInput(files=(t1, t2, t3, t4), inplace=True, encoding="utf-8") for line in fi: line = line[:-1].upper() print(line) @@ -256,7 +256,7 @@ def test_detached_stdin_binary_mode(self): def test_file_opening_hook(self): try: # cannot use openhook and inplace mode - fi = FileInput(inplace=1, openhook=lambda f, m: None) + fi = FileInput(inplace=True, openhook=lambda f, m: None) self.fail("FileInput should raise if both inplace " "and openhook arguments are given") except ValueError: From 5ecfd750b4f511f270c38f0d748da9cffa279295 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Sun, 28 Jan 2024 08:51:25 -0600 Subject: [PATCH 050/127] Correction Skip Montanaro's email address (#114677) --- Doc/library/atexit.rst | 4 ++-- Doc/library/csv.rst | 2 +- Doc/library/readline.rst | 2 +- Doc/library/urllib.robotparser.rst | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/atexit.rst b/Doc/library/atexit.rst index 3dbef69580d9b3..43a8bd2d7cd133 100644 --- a/Doc/library/atexit.rst +++ b/Doc/library/atexit.rst @@ -4,8 +4,8 @@ .. module:: atexit :synopsis: Register and execute cleanup functions. -.. moduleauthor:: Skip Montanaro -.. sectionauthor:: Skip Montanaro +.. moduleauthor:: Skip Montanaro +.. sectionauthor:: Skip Montanaro -------------- diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 7a5589e68b3052..07f38f5690bb54 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -4,7 +4,7 @@ .. module:: csv :synopsis: Write and read tabular data to and from delimited files. -.. sectionauthor:: Skip Montanaro +.. sectionauthor:: Skip Montanaro **Source code:** :source:`Lib/csv.py` diff --git a/Doc/library/readline.rst b/Doc/library/readline.rst index 1adafcaa02eab9..54c6d9f3b32b1a 100644 --- a/Doc/library/readline.rst +++ b/Doc/library/readline.rst @@ -5,7 +5,7 @@ :platform: Unix :synopsis: GNU readline support for Python. -.. sectionauthor:: Skip Montanaro +.. sectionauthor:: Skip Montanaro -------------- diff --git a/Doc/library/urllib.robotparser.rst b/Doc/library/urllib.robotparser.rst index f063e463753e0b..b5a49d9c592387 100644 --- a/Doc/library/urllib.robotparser.rst +++ b/Doc/library/urllib.robotparser.rst @@ -5,7 +5,7 @@ :synopsis: Load a robots.txt file and answer questions about fetchability of other URLs. -.. sectionauthor:: Skip Montanaro +.. sectionauthor:: Skip Montanaro **Source code:** :source:`Lib/urllib/robotparser.py` From d00fbed68ffcd5823acbb32a0e47e2e5f9732ff7 Mon Sep 17 00:00:00 2001 From: Bhushan Mohanraj <50306448+bhushan-mohanraj@users.noreply.github.com> Date: Sun, 28 Jan 2024 15:10:32 -0500 Subject: [PATCH 051/127] Fix indentation in `__post_init__` documentation. (gh-114666) --- Doc/library/dataclasses.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index 88f2e0251b1e51..4ada69d63abada 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -538,8 +538,8 @@ that has to be called, it is common to call this method in a class Rectangle: def __init__(self, height, width): - self.height = height - self.width = width + self.height = height + self.width = width @dataclass class Square(Rectangle): From 3bb6912d8832e6e0a98c74de360dc1b23906c4b3 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sun, 28 Jan 2024 22:28:25 +0200 Subject: [PATCH 052/127] gh-100734: Add 'Notable change in 3.11.x' to `whatsnew/3.11.rst` (#114657) Co-authored-by: Serhiy Storchaka --- Doc/whatsnew/3.11.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index a8a7dfda0c5309..4f4c1de8d8d596 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -2701,4 +2701,30 @@ Removed (Contributed by Inada Naoki in :issue:`44029`.) +Notable changes in 3.11.4 +========================= + +tarfile +------- + +* The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`, + have a new a *filter* argument that allows limiting tar features than may be + surprising or dangerous, such as creating files outside the destination + directory. + See :ref:`tarfile-extraction-filter` for details. + In Python 3.12, use without the *filter* argument will show a + :exc:`DeprecationWarning`. + In Python 3.14, the default will switch to ``'data'``. + (Contributed by Petr Viktorin in :pep:`706`.) + + +Notable changes in 3.11.5 +========================= + +OpenSSL +------- + +* Windows builds and macOS installers from python.org now use OpenSSL 3.0. + + .. _libb2: https://www.blake2.net/ From f7c05d7ad3075a1dbeed86b6b12903032e4afba6 Mon Sep 17 00:00:00 2001 From: Furkan Onder Date: Mon, 29 Jan 2024 02:05:29 +0300 Subject: [PATCH 053/127] gh-55664: Add warning when creating a type using a namespace dictionary with non-string keys. (GH-105338) Co-authored-by: Daniel Urban --- Lib/test/test_descr.py | 17 ++++++++++++++++- ...023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst | 1 + Objects/typeobject.c | 11 +++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index fd0af9b30a0a71..beeab6cb7f254c 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -4734,6 +4734,20 @@ class X(object): with self.assertRaises(AttributeError): del X.__abstractmethods__ + def test_gh55664(self): + # gh-55664: issue a warning when the + # __dict__ of a class contains non-string keys + with self.assertWarnsRegex(RuntimeWarning, 'MyClass'): + MyClass = type('MyClass', (), {1: 2}) + + class meta(type): + def __new__(mcls, name, bases, ns): + ns[1] = 2 + return super().__new__(mcls, name, bases, ns) + + with self.assertWarnsRegex(RuntimeWarning, 'MyClass'): + MyClass = meta('MyClass', (), {}) + def test_proxy_call(self): class FakeStr: __class__ = str @@ -5151,7 +5165,8 @@ class Base2(object): mykey = 'from Base2' mykey2 = 'from Base2' - X = type('X', (Base,), {MyKey(): 5}) + with self.assertWarnsRegex(RuntimeWarning, 'X'): + X = type('X', (Base,), {MyKey(): 5}) # mykey is read from Base self.assertEqual(X.mykey, 'from Base') # mykey2 is read from Base2 because MyKey.__eq__ has set __bases__ diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst new file mode 100644 index 00000000000000..438be985496650 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst @@ -0,0 +1 @@ +Add warning when creating :class:`type` using a namespace dictionary with non-string keys. Patched by Daniel Urban and Furkan Onder. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 114cf21f95e744..a850473cad813d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3828,6 +3828,17 @@ type_new_impl(type_new_ctx *ctx) // Put the proper slots in place fixup_slot_dispatchers(type); + if (!_PyDict_HasOnlyStringKeys(type->tp_dict)) { + if (PyErr_WarnFormat( + PyExc_RuntimeWarning, + 1, + "non-string key in the __dict__ of class %.200s", + type->tp_name) == -1) + { + goto error; + } + } + if (type_new_set_names(type) < 0) { goto error; } From f6d9e5926b6138994eaa60d1c36462e36105733d Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 28 Jan 2024 18:48:48 -0800 Subject: [PATCH 054/127] GH-113464: Add a JIT backend for tier 2 (GH-113465) Add an option (--enable-experimental-jit for configure-based builds or --experimental-jit for PCbuild-based ones) to build an *experimental* just-in-time compiler, based on copy-and-patch (https://fredrikbk.com/publications/copy-and-patch.pdf). See Tools/jit/README.md for more information on how to install the required build-time tooling. --- .github/workflows/jit.yml | 112 +++++ .github/workflows/mypy.yml | 2 + .gitignore | 1 + Include/cpython/optimizer.h | 2 + Include/internal/pycore_jit.h | 25 ++ Include/internal/pycore_object.h | 4 +- Makefile.pre.in | 11 +- ...-12-24-03-25-28.gh-issue-113464.dvjQmA.rst | 4 + PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/build.bat | 3 + PCbuild/pythoncore.vcxproj | 3 + PCbuild/pythoncore.vcxproj.filters | 6 + PCbuild/regen.targets | 23 +- Python/ceval.c | 20 +- Python/jit.c | 369 ++++++++++++++++ Python/optimizer.c | 12 + Python/pylifecycle.c | 7 + Tools/jit/README.md | 46 ++ Tools/jit/_llvm.py | 99 +++++ Tools/jit/_schema.py | 99 +++++ Tools/jit/_stencils.py | 220 ++++++++++ Tools/jit/_targets.py | 394 ++++++++++++++++++ Tools/jit/_writer.py | 95 +++++ Tools/jit/build.py | 28 ++ Tools/jit/mypy.ini | 5 + Tools/jit/template.c | 98 +++++ configure | 31 ++ configure.ac | 20 + 29 files changed, 1738 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/jit.yml create mode 100644 Include/internal/pycore_jit.h create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-12-24-03-25-28.gh-issue-113464.dvjQmA.rst create mode 100644 Python/jit.c create mode 100644 Tools/jit/README.md create mode 100644 Tools/jit/_llvm.py create mode 100644 Tools/jit/_schema.py create mode 100644 Tools/jit/_stencils.py create mode 100644 Tools/jit/_targets.py create mode 100644 Tools/jit/_writer.py create mode 100644 Tools/jit/build.py create mode 100644 Tools/jit/mypy.ini create mode 100644 Tools/jit/template.c diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml new file mode 100644 index 00000000000000..e137fd21b0a0dd --- /dev/null +++ b/.github/workflows/jit.yml @@ -0,0 +1,112 @@ +name: JIT +on: + pull_request: + paths: '**jit**' + push: + paths: '**jit**' + workflow_dispatch: +jobs: + jit: + name: ${{ matrix.target }} (${{ matrix.debug && 'Debug' || 'Release' }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + target: + - i686-pc-windows-msvc/msvc + - x86_64-pc-windows-msvc/msvc + - x86_64-apple-darwin/clang + - x86_64-unknown-linux-gnu/gcc + - x86_64-unknown-linux-gnu/clang + - aarch64-unknown-linux-gnu/gcc + - aarch64-unknown-linux-gnu/clang + debug: + - true + - false + llvm: + - 16 + include: + - target: i686-pc-windows-msvc/msvc + architecture: Win32 + runner: windows-latest + compiler: msvc + - target: x86_64-pc-windows-msvc/msvc + architecture: x64 + runner: windows-latest + compiler: msvc + - target: x86_64-apple-darwin/clang + architecture: x86_64 + runner: macos-latest + compiler: clang + exclude: test_embed + - target: x86_64-unknown-linux-gnu/gcc + architecture: x86_64 + runner: ubuntu-latest + compiler: gcc + - target: x86_64-unknown-linux-gnu/clang + architecture: x86_64 + runner: ubuntu-latest + compiler: clang + - target: aarch64-unknown-linux-gnu/gcc + architecture: aarch64 + runner: ubuntu-latest + compiler: gcc + # These fail because of emulation, not because of the JIT: + exclude: test_unix_events test_init test_process_pool test_shutdown test_multiprocessing_fork test_cmd_line test_faulthandler test_os test_perf_profiler test_posix test_signal test_socket test_subprocess test_threading test_venv + - target: aarch64-unknown-linux-gnu/clang + architecture: aarch64 + runner: ubuntu-latest + compiler: clang + # These fail because of emulation, not because of the JIT: + exclude: test_unix_events test_init test_process_pool test_shutdown test_multiprocessing_fork test_cmd_line test_faulthandler test_os test_perf_profiler test_posix test_signal test_socket test_subprocess test_threading test_venv + env: + CC: ${{ matrix.compiler }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Windows + if: runner.os == 'Windows' + run: | + choco install llvm --allow-downgrade --no-progress --version ${{ matrix.llvm }} + ./PCbuild/build.bat --experimental-jit ${{ matrix.debug && '-d' || '--pgo' }} -p ${{ matrix.architecture }} + ./PCbuild/rt.bat ${{ matrix.debug && '-d' }} -p ${{ matrix.architecture }} -q --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 + + - name: macOS + if: runner.os == 'macOS' + run: | + brew install llvm@${{ matrix.llvm }} + export SDKROOT="$(xcrun --show-sdk-path)" + ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} + make all --jobs 3 + ./python.exe -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 + + - name: Native Linux + if: runner.os == 'Linux' && matrix.architecture == 'x86_64' + run: | + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} + export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} + make all --jobs 4 + ./python -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 + - name: Emulated Linux + if: runner.os == 'Linux' && matrix.architecture != 'x86_64' + run: | + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} + export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + ./configure --prefix="$(pwd)/../build" + make install --jobs 4 + make clean --jobs 4 + export HOST=${{ matrix.architecture }}-linux-gnu + sudo apt install --yes "gcc-$HOST" qemu-user + ${{ !matrix.debug && matrix.compiler == 'clang' && './configure --enable-optimizations' || '' }} + ${{ !matrix.debug && matrix.compiler == 'clang' && 'make profile-run-stamp --jobs 4' || '' }} + export CC="${{ matrix.compiler == 'clang' && 'clang --target=$HOST' || '$HOST-gcc' }}" + export CPP="$CC --preprocess" + export HOSTRUNNER=qemu-${{ matrix.architecture }} + export QEMU_LD_PREFIX="/usr/$HOST" + ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} --build=x86_64-linux-gnu --host="$HOST" --with-build-python=../build/bin/python3 --with-pkg-config=no ac_cv_buggy_getaddrinfo=no ac_cv_file__dev_ptc=no ac_cv_file__dev_ptmx=yes + make all --jobs 4 + ./python -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 11928e72b9b43a..b766785de405d2 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -12,6 +12,7 @@ on: - "Tools/build/generate_sbom.py" - "Tools/cases_generator/**" - "Tools/clinic/**" + - "Tools/jit/**" - "Tools/peg_generator/**" - "Tools/requirements-dev.txt" - "Tools/wasm/**" @@ -38,6 +39,7 @@ jobs: "Tools/build/", "Tools/cases_generator", "Tools/clinic", + "Tools/jit", "Tools/peg_generator", "Tools/wasm", ] diff --git a/.gitignore b/.gitignore index c424a894c2a6e0..18eb2a9f0632ce 100644 --- a/.gitignore +++ b/.gitignore @@ -126,6 +126,7 @@ Tools/unicode/data/ # hendrikmuhs/ccache-action@v1 /.ccache /cross-build/ +/jit_stencils.h /platform /profile-clean-stamp /profile-run-stamp diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 96e829f8fbe97d..ecf3cae4cbc3f1 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -39,6 +39,8 @@ typedef struct { typedef struct _PyExecutorObject { PyObject_VAR_HEAD _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ + void *jit_code; + size_t jit_size; _PyUOpInstruction trace[1]; } _PyExecutorObject; diff --git a/Include/internal/pycore_jit.h b/Include/internal/pycore_jit.h new file mode 100644 index 00000000000000..0b71eb6f758ac6 --- /dev/null +++ b/Include/internal/pycore_jit.h @@ -0,0 +1,25 @@ +#ifndef Py_INTERNAL_JIT_H +#define Py_INTERNAL_JIT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef _Py_JIT + +typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate); + +int _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length); +void _PyJIT_Free(_PyExecutorObject *executor); + +#endif // _Py_JIT + +#ifdef __cplusplus +} +#endif + +#endif // !Py_INTERNAL_JIT_H diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 4e52ffc77c5956..e32ea2f528940a 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -178,7 +178,7 @@ _Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct) } _Py_DECREF_STAT_INC(); #ifdef Py_REF_DEBUG - _Py_DEC_REFTOTAL(_PyInterpreterState_GET()); + _Py_DEC_REFTOTAL(PyInterpreterState_Get()); #endif if (--op->ob_refcnt != 0) { assert(op->ob_refcnt > 0); @@ -199,7 +199,7 @@ _Py_DECREF_NO_DEALLOC(PyObject *op) } _Py_DECREF_STAT_INC(); #ifdef Py_REF_DEBUG - _Py_DEC_REFTOTAL(_PyInterpreterState_GET()); + _Py_DEC_REFTOTAL(PyInterpreterState_Get()); #endif op->ob_refcnt--; #ifdef Py_DEBUG diff --git a/Makefile.pre.in b/Makefile.pre.in index 37a8b06987c710..fff3d3c4914e7a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -433,6 +433,7 @@ PYTHON_OBJS= \ Python/initconfig.o \ Python/instrumentation.o \ Python/intrinsics.o \ + Python/jit.o \ Python/legacy_tracing.o \ Python/lock.o \ Python/marshal.o \ @@ -1365,7 +1366,7 @@ regen-unicodedata: regen-all: regen-cases regen-typeslots \ regen-token regen-ast regen-keyword regen-sre regen-frozen \ regen-pegen-metaparser regen-pegen regen-test-frozenmain \ - regen-test-levenshtein regen-global-objects regen-sbom + regen-test-levenshtein regen-global-objects regen-sbom regen-jit @echo @echo "Note: make regen-stdlib-module-names, make regen-limited-abi, " @echo "make regen-configure and make regen-unicodedata should be run manually" @@ -1846,6 +1847,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_initconfig.h \ $(srcdir)/Include/internal/pycore_interp.h \ $(srcdir)/Include/internal/pycore_intrinsics.h \ + $(srcdir)/Include/internal/pycore_jit.h \ $(srcdir)/Include/internal/pycore_list.h \ $(srcdir)/Include/internal/pycore_llist.h \ $(srcdir)/Include/internal/pycore_lock.h \ @@ -2641,6 +2643,12 @@ config.status: $(srcdir)/configure Python/asm_trampoline.o: $(srcdir)/Python/asm_trampoline.S $(CC) -c $(PY_CORE_CFLAGS) -o $@ $< +Python/jit.o: regen-jit + +.PHONY: regen-jit +regen-jit: + @REGEN_JIT_COMMAND@ + # Some make's put the object file in the current directory .c.o: $(CC) -c $(PY_CORE_CFLAGS) -o $@ $< @@ -2733,6 +2741,7 @@ clean-retain-profile: pycremoval -rm -f Python/deepfreeze/*.[co] -rm -f Python/frozen_modules/*.h -rm -f Python/frozen_modules/MANIFEST + -rm -f jit_stencils.h -find build -type f -a ! -name '*.gc??' -exec rm -f {} ';' -rm -f Include/pydtrace_probes.h -rm -f profile-gen-stamp diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-24-03-25-28.gh-issue-113464.dvjQmA.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-24-03-25-28.gh-issue-113464.dvjQmA.rst new file mode 100644 index 00000000000000..bdee4d645f61c8 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-24-03-25-28.gh-issue-113464.dvjQmA.rst @@ -0,0 +1,4 @@ +Add an option (``--enable-experimental-jit`` for ``configure``-based builds +or ``--experimental-jit`` for ``PCbuild``-based ones) to build an +*experimental* just-in-time compiler, based on `copy-and-patch +`_ diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index dde801fc0fd525..35788ec4503e8f 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -224,6 +224,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 90ccb954b424bc..7a44179e356105 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -250,6 +250,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/build.bat b/PCbuild/build.bat index e61267b5852a8f..83b50db4467033 100644 --- a/PCbuild/build.bat +++ b/PCbuild/build.bat @@ -36,6 +36,7 @@ echo. overrides -c and -d echo. --disable-gil Enable experimental support for running without the GIL. echo. --test-marker Enable the test marker within the build. echo. --regen Regenerate all opcodes, grammar and tokens. +echo. --experimental-jit Enable the experimental just-in-time compiler. echo. echo.Available flags to avoid building certain modules. echo.These flags have no effect if '-e' is not given: @@ -85,6 +86,7 @@ if "%~1"=="--disable-gil" (set UseDisableGil=true) & shift & goto CheckOpts if "%~1"=="--test-marker" (set UseTestMarker=true) & shift & goto CheckOpts if "%~1"=="-V" shift & goto Version if "%~1"=="--regen" (set Regen=true) & shift & goto CheckOpts +if "%~1"=="--experimental-jit" (set UseJIT=true) & shift & goto CheckOpts rem These use the actual property names used by MSBuild. We could just let rem them in through the environment, but we specify them on the command line rem anyway for visibility so set defaults after this @@ -176,6 +178,7 @@ echo on /p:IncludeSSL=%IncludeSSL% /p:IncludeTkinter=%IncludeTkinter%^ /p:DisableGil=%UseDisableGil%^ /p:UseTestMarker=%UseTestMarker% %GITProperty%^ + /p:UseJIT=%UseJIT%^ %1 %2 %3 %4 %5 %6 %7 %8 %9 @echo off diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index e0b9fc137457a0..e1ff97659659ee 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -104,6 +104,7 @@ $(zlibDir);%(AdditionalIncludeDirectories) _USRDLL;Py_BUILD_CORE;Py_BUILD_CORE_BUILTIN;Py_ENABLE_SHARED;MS_DLL_ID="$(SysWinVer)";%(PreprocessorDefinitions) _Py_HAVE_ZLIB;%(PreprocessorDefinitions) + _Py_JIT;%(PreprocessorDefinitions) version.lib;ws2_32.lib;pathcch.lib;bcrypt.lib;%(AdditionalDependencies) @@ -247,6 +248,7 @@ + @@ -585,6 +587,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index fd79436f5add97..4c55f23006b2f0 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -669,6 +669,9 @@ Include\cpython + + Include\internal + Include\internal @@ -1337,6 +1340,9 @@ Source Files + + Python + Source Files diff --git a/PCbuild/regen.targets b/PCbuild/regen.targets index cc9469c7ddd726..a90620d6ca8b7d 100644 --- a/PCbuild/regen.targets +++ b/PCbuild/regen.targets @@ -28,6 +28,9 @@ <_KeywordSources Include="$(PySourcePath)Grammar\python.gram;$(PySourcePath)Grammar\Tokens" /> <_KeywordOutputs Include="$(PySourcePath)Lib\keyword.py" /> + + <_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(GeneratedPyConfigDir)pyconfig.h;$(PySourcePath)Tools\jit\**"/> + <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils.h"/> @@ -76,10 +79,28 @@ + + + + aarch64-pc-windows-msvc + i686-pc-windows-msvc + x86_64-pc-windows-msvc + $(JITArgs) --debug + + + - + + + diff --git a/Python/ceval.c b/Python/ceval.c index 49388cd20377c0..4f208009086191 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -11,6 +11,7 @@ #include "pycore_function.h" #include "pycore_instruments.h" #include "pycore_intrinsics.h" +#include "pycore_jit.h" #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_moduleobject.h" // PyModuleObject #include "pycore_object.h" // _PyObject_GC_TRACK() @@ -955,9 +956,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int -// The Tier 2 interpreter is also here! +// Tier 2 is also here! enter_tier_two: +#ifdef _Py_JIT + + ; // ;) + jit_func jitted = current_executor->jit_code; + next_instr = jitted(frame, stack_pointer, tstate); + frame = tstate->current_frame; + Py_DECREF(current_executor); + if (next_instr == NULL) { + goto resume_with_error; + } + stack_pointer = _PyFrame_GetStackPointer(frame); + DISPATCH(); + +#else + #undef LOAD_IP #define LOAD_IP(UNUSED) (void)0 @@ -1073,6 +1089,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Py_DECREF(current_executor); DISPATCH(); +#endif // _Py_JIT + } #if defined(__GNUC__) # pragma GCC diagnostic pop diff --git a/Python/jit.c b/Python/jit.c new file mode 100644 index 00000000000000..22949c082da05a --- /dev/null +++ b/Python/jit.c @@ -0,0 +1,369 @@ +#ifdef _Py_JIT + +#include "Python.h" + +#include "pycore_abstract.h" +#include "pycore_call.h" +#include "pycore_ceval.h" +#include "pycore_dict.h" +#include "pycore_intrinsics.h" +#include "pycore_long.h" +#include "pycore_opcode_metadata.h" +#include "pycore_opcode_utils.h" +#include "pycore_optimizer.h" +#include "pycore_pyerrors.h" +#include "pycore_setobject.h" +#include "pycore_sliceobject.h" +#include "pycore_jit.h" + +#include "jit_stencils.h" + +// Memory management stuff: //////////////////////////////////////////////////// + +#ifndef MS_WINDOWS + #include +#endif + +static size_t +get_page_size(void) +{ +#ifdef MS_WINDOWS + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; +#else + return sysconf(_SC_PAGESIZE); +#endif +} + +static void +jit_error(const char *message) +{ +#ifdef MS_WINDOWS + int hint = GetLastError(); +#else + int hint = errno; +#endif + PyErr_Format(PyExc_RuntimeWarning, "JIT %s (%d)", message, hint); +} + +static char * +jit_alloc(size_t size) +{ + assert(size); + assert(size % get_page_size() == 0); +#ifdef MS_WINDOWS + int flags = MEM_COMMIT | MEM_RESERVE; + char *memory = VirtualAlloc(NULL, size, flags, PAGE_READWRITE); + int failed = memory == NULL; +#else + int flags = MAP_ANONYMOUS | MAP_PRIVATE; + char *memory = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + int failed = memory == MAP_FAILED; +#endif + if (failed) { + jit_error("unable to allocate memory"); + return NULL; + } + return memory; +} + +static int +jit_free(char *memory, size_t size) +{ + assert(size); + assert(size % get_page_size() == 0); +#ifdef MS_WINDOWS + int failed = !VirtualFree(memory, 0, MEM_RELEASE); +#else + int failed = munmap(memory, size); +#endif + if (failed) { + jit_error("unable to free memory"); + return -1; + } + return 0; +} + +static int +mark_executable(char *memory, size_t size) +{ + if (size == 0) { + return 0; + } + assert(size % get_page_size() == 0); + // Do NOT ever leave the memory writable! Also, don't forget to flush the + // i-cache (I cannot begin to tell you how horrible that is to debug): +#ifdef MS_WINDOWS + if (!FlushInstructionCache(GetCurrentProcess(), memory, size)) { + jit_error("unable to flush instruction cache"); + return -1; + } + int old; + int failed = !VirtualProtect(memory, size, PAGE_EXECUTE_READ, &old); +#else + __builtin___clear_cache((char *)memory, (char *)memory + size); + int failed = mprotect(memory, size, PROT_EXEC | PROT_READ); +#endif + if (failed) { + jit_error("unable to protect executable memory"); + return -1; + } + return 0; +} + +static int +mark_readable(char *memory, size_t size) +{ + if (size == 0) { + return 0; + } + assert(size % get_page_size() == 0); +#ifdef MS_WINDOWS + DWORD old; + int failed = !VirtualProtect(memory, size, PAGE_READONLY, &old); +#else + int failed = mprotect(memory, size, PROT_READ); +#endif + if (failed) { + jit_error("unable to protect readable memory"); + return -1; + } + return 0; +} + +// JIT compiler stuff: ///////////////////////////////////////////////////////// + +// Warning! AArch64 requires you to get your hands dirty. These are your gloves: + +// value[value_start : value_start + len] +static uint32_t +get_bits(uint64_t value, uint8_t value_start, uint8_t width) +{ + assert(width <= 32); + return (value >> value_start) & ((1ULL << width) - 1); +} + +// *loc[loc_start : loc_start + width] = value[value_start : value_start + width] +static void +set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, + uint8_t width) +{ + assert(loc_start + width <= 32); + // Clear the bits we're about to patch: + *loc &= ~(((1ULL << width) - 1) << loc_start); + assert(get_bits(*loc, loc_start, width) == 0); + // Patch the bits: + *loc |= get_bits(value, value_start, width) << loc_start; + assert(get_bits(*loc, loc_start, width) == get_bits(value, value_start, width)); +} + +// See https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions +// for instruction encodings: +#define IS_AARCH64_ADD_OR_SUB(I) (((I) & 0x11C00000) == 0x11000000) +#define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000) +#define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000) +#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000) +#define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000) + +// Fill all of stencil's holes in the memory pointed to by base, using the +// values in patches. +static void +patch(char *base, const Stencil *stencil, uint64_t *patches) +{ + for (uint64_t i = 0; i < stencil->holes_size; i++) { + const Hole *hole = &stencil->holes[i]; + void *location = base + hole->offset; + uint64_t value = patches[hole->value] + (uint64_t)hole->symbol + hole->addend; + uint32_t *loc32 = (uint32_t *)location; + uint64_t *loc64 = (uint64_t *)location; + // LLD is a great reference for performing relocations... just keep in + // mind that Tools/jit/build.py does filtering and preprocessing for us! + // Here's a good place to start for each platform: + // - aarch64-apple-darwin: + // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.cpp + // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.h + // - aarch64-unknown-linux-gnu: + // - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/AArch64.cpp + // - i686-pc-windows-msvc: + // - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp + // - x86_64-apple-darwin: + // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/X86_64.cpp + // - x86_64-pc-windows-msvc: + // - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp + // - x86_64-unknown-linux-gnu: + // - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/X86_64.cpp + switch (hole->kind) { + case HoleKind_IMAGE_REL_I386_DIR32: + // 32-bit absolute address. + // Check that we're not out of range of 32 unsigned bits: + assert(value < (1ULL << 32)); + *loc32 = (uint32_t)value; + continue; + case HoleKind_ARM64_RELOC_UNSIGNED: + case HoleKind_IMAGE_REL_AMD64_ADDR64: + case HoleKind_R_AARCH64_ABS64: + case HoleKind_X86_64_RELOC_UNSIGNED: + case HoleKind_R_X86_64_64: + // 64-bit absolute address. + *loc64 = value; + continue; + case HoleKind_R_AARCH64_CALL26: + case HoleKind_R_AARCH64_JUMP26: + // 28-bit relative branch. + assert(IS_AARCH64_BRANCH(*loc32)); + value -= (uint64_t)location; + // Check that we're not out of range of 28 signed bits: + assert((int64_t)value >= -(1 << 27)); + assert((int64_t)value < (1 << 27)); + // Since instructions are 4-byte aligned, only use 26 bits: + assert(get_bits(value, 0, 2) == 0); + set_bits(loc32, 0, value, 2, 26); + continue; + case HoleKind_R_AARCH64_MOVW_UABS_G0_NC: + // 16-bit low part of an absolute address. + assert(IS_AARCH64_MOV(*loc32)); + // Check the implicit shift (this is "part 0 of 3"): + assert(get_bits(*loc32, 21, 2) == 0); + set_bits(loc32, 5, value, 0, 16); + continue; + case HoleKind_R_AARCH64_MOVW_UABS_G1_NC: + // 16-bit middle-low part of an absolute address. + assert(IS_AARCH64_MOV(*loc32)); + // Check the implicit shift (this is "part 1 of 3"): + assert(get_bits(*loc32, 21, 2) == 1); + set_bits(loc32, 5, value, 16, 16); + continue; + case HoleKind_R_AARCH64_MOVW_UABS_G2_NC: + // 16-bit middle-high part of an absolute address. + assert(IS_AARCH64_MOV(*loc32)); + // Check the implicit shift (this is "part 2 of 3"): + assert(get_bits(*loc32, 21, 2) == 2); + set_bits(loc32, 5, value, 32, 16); + continue; + case HoleKind_R_AARCH64_MOVW_UABS_G3: + // 16-bit high part of an absolute address. + assert(IS_AARCH64_MOV(*loc32)); + // Check the implicit shift (this is "part 3 of 3"): + assert(get_bits(*loc32, 21, 2) == 3); + set_bits(loc32, 5, value, 48, 16); + continue; + case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21: + // 21-bit count of pages between this page and an absolute address's + // page... I know, I know, it's weird. Pairs nicely with + // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). + assert(IS_AARCH64_ADRP(*loc32)); + // Number of pages between this page and the value's page: + value = (value >> 12) - ((uint64_t)location >> 12); + // Check that we're not out of range of 21 signed bits: + assert((int64_t)value >= -(1 << 20)); + assert((int64_t)value < (1 << 20)); + // value[0:2] goes in loc[29:31]: + set_bits(loc32, 29, value, 0, 2); + // value[2:21] goes in loc[5:26]: + set_bits(loc32, 5, value, 2, 19); + continue; + case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12: + // 12-bit low part of an absolute address. Pairs nicely with + // ARM64_RELOC_GOT_LOAD_PAGE21 (above). + assert(IS_AARCH64_LDR_OR_STR(*loc32) || IS_AARCH64_ADD_OR_SUB(*loc32)); + // There might be an implicit shift encoded in the instruction: + uint8_t shift = 0; + if (IS_AARCH64_LDR_OR_STR(*loc32)) { + shift = (uint8_t)get_bits(*loc32, 30, 2); + // If both of these are set, the shift is supposed to be 4. + // That's pretty weird, and it's never actually been observed... + assert(get_bits(*loc32, 23, 1) == 0 || get_bits(*loc32, 26, 1) == 0); + } + value = get_bits(value, 0, 12); + assert(get_bits(value, 0, shift) == 0); + set_bits(loc32, 10, value, shift, 12); + continue; + } + Py_UNREACHABLE(); + } +} + +static void +copy_and_patch(char *base, const Stencil *stencil, uint64_t *patches) +{ + memcpy(base, stencil->body, stencil->body_size); + patch(base, stencil, patches); +} + +static void +emit(const StencilGroup *group, uint64_t patches[]) +{ + copy_and_patch((char *)patches[HoleValue_CODE], &group->code, patches); + copy_and_patch((char *)patches[HoleValue_DATA], &group->data, patches); +} + +// Compiles executor in-place. Don't forget to call _PyJIT_Free later! +int +_PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length) +{ + // Loop once to find the total compiled size: + size_t code_size = 0; + size_t data_size = 0; + for (size_t i = 0; i < length; i++) { + _PyUOpInstruction *instruction = &trace[i]; + const StencilGroup *group = &stencil_groups[instruction->opcode]; + code_size += group->code.body_size; + data_size += group->data.body_size; + } + // Round up to the nearest page (code and data need separate pages): + size_t page_size = get_page_size(); + assert((page_size & (page_size - 1)) == 0); + code_size += page_size - (code_size & (page_size - 1)); + data_size += page_size - (data_size & (page_size - 1)); + char *memory = jit_alloc(code_size + data_size); + if (memory == NULL) { + return -1; + } + // Loop again to emit the code: + char *code = memory; + char *data = memory + code_size; + for (size_t i = 0; i < length; i++) { + _PyUOpInstruction *instruction = &trace[i]; + const StencilGroup *group = &stencil_groups[instruction->opcode]; + // Think of patches as a dictionary mapping HoleValue to uint64_t: + uint64_t patches[] = GET_PATCHES(); + patches[HoleValue_CODE] = (uint64_t)code; + patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size; + patches[HoleValue_DATA] = (uint64_t)data; + patches[HoleValue_EXECUTOR] = (uint64_t)executor; + patches[HoleValue_OPARG] = instruction->oparg; + patches[HoleValue_OPERAND] = instruction->operand; + patches[HoleValue_TARGET] = instruction->target; + patches[HoleValue_TOP] = (uint64_t)memory; + patches[HoleValue_ZERO] = 0; + emit(group, patches); + code += group->code.body_size; + data += group->data.body_size; + } + if (mark_executable(memory, code_size) || + mark_readable(memory + code_size, data_size)) + { + jit_free(memory, code_size + data_size); + return -1; + } + executor->jit_code = memory; + executor->jit_size = code_size + data_size; + return 0; +} + +void +_PyJIT_Free(_PyExecutorObject *executor) +{ + char *memory = (char *)executor->jit_code; + size_t size = executor->jit_size; + if (memory) { + executor->jit_code = NULL; + executor->jit_size = 0; + if (jit_free(memory, size)) { + PyErr_WriteUnraisable(NULL); + } + } +} + +#endif // _Py_JIT diff --git a/Python/optimizer.c b/Python/optimizer.c index db615068ff517f..0d04b09fef1e84 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -7,6 +7,7 @@ #include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uop_ids.h" +#include "pycore_jit.h" #include "cpython/optimizer.h" #include #include @@ -227,6 +228,9 @@ static PyMethodDef executor_methods[] = { static void uop_dealloc(_PyExecutorObject *self) { _Py_ExecutorClear(self); +#ifdef _Py_JIT + _PyJIT_Free(self); +#endif PyObject_Free(self); } @@ -789,6 +793,14 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) executor->trace[i].operand); } } +#endif +#ifdef _Py_JIT + executor->jit_code = NULL; + executor->jit_size = 0; + if (_PyJIT_Compile(executor, executor->trace, Py_SIZE(executor))) { + Py_DECREF(executor); + return NULL; + } #endif return executor; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index fff64dd63d6b21..372f60602375b6 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1240,12 +1240,19 @@ init_interp_main(PyThreadState *tstate) // Turn on experimental tier 2 (uops-based) optimizer if (is_main_interp) { +#ifndef _Py_JIT + // No JIT, maybe use the tier two interpreter: char *envvar = Py_GETENV("PYTHON_UOPS"); int enabled = envvar != NULL && *envvar > '0'; if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) { enabled = 1; } if (enabled) { +#else + // Always enable tier two for JIT builds (ignoring the environment + // variable and command-line option above): + if (true) { +#endif PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer(); if (opt == NULL) { return _PyStatus_ERR("can't initialize optimizer"); diff --git a/Tools/jit/README.md b/Tools/jit/README.md new file mode 100644 index 00000000000000..04a6c0780bf972 --- /dev/null +++ b/Tools/jit/README.md @@ -0,0 +1,46 @@ +The JIT Compiler +================ + +This version of CPython can be built with an experimental just-in-time compiler. While most everything you already know about building and using CPython is unchanged, you will probably need to install a compatible version of LLVM first. + +## Installing LLVM + +The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon). + +LLVM version 16 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-16`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. + +It's easy to install all of the required tools: + +### Linux + +Install LLVM 16 on Ubuntu/Debian: + +```sh +wget https://apt.llvm.org/llvm.sh +chmod +x llvm.sh +sudo ./llvm.sh 16 +``` + +### macOS + +Install LLVM 16 with [Homebrew](https://brew.sh): + +```sh +brew install llvm@16 +``` + +Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them. + +### Windows + +Install LLVM 16 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=16), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** + +## Building + +For `PCbuild`-based builds, pass the new `--experimental-jit` option to `build.bat`. + +For all other builds, pass the new `--enable-experimental-jit` option to `configure`. + +Otherwise, just configure and build as you normally would. Cross-compiling "just works", since the JIT is built for the host platform. + +[^why-llvm]: Clang is specifically needed because it's the only C compiler with support for guaranteed tail calls (`musttail`), which are required by CPython's continuation-passing-style approach to JIT compilation. Since LLVM also includes other functionalities we need (namely, object file parsing and disassembly), it's convenient to only support one toolchain at this time. diff --git a/Tools/jit/_llvm.py b/Tools/jit/_llvm.py new file mode 100644 index 00000000000000..603bbef59ba2e6 --- /dev/null +++ b/Tools/jit/_llvm.py @@ -0,0 +1,99 @@ +"""Utilities for invoking LLVM tools.""" +import asyncio +import functools +import os +import re +import shlex +import subprocess +import typing + +_LLVM_VERSION = 16 +_LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\s+") + +_P = typing.ParamSpec("_P") +_R = typing.TypeVar("_R") +_C = typing.Callable[_P, typing.Awaitable[_R]] + + +def _async_cache(f: _C[_P, _R]) -> _C[_P, _R]: + cache = {} + lock = asyncio.Lock() + + @functools.wraps(f) + async def wrapper( + *args: _P.args, **kwargs: _P.kwargs # pylint: disable = no-member + ) -> _R: + async with lock: + if args not in cache: + cache[args] = await f(*args, **kwargs) + return cache[args] + + return wrapper + + +_CORES = asyncio.BoundedSemaphore(os.cpu_count() or 1) + + +async def _run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str | None: + command = [tool, *args] + async with _CORES: + if echo: + print(shlex.join(command)) + try: + process = await asyncio.create_subprocess_exec( + *command, stdout=subprocess.PIPE + ) + except FileNotFoundError: + return None + out, _ = await process.communicate() + if process.returncode: + raise RuntimeError(f"{tool} exited with return code {process.returncode}") + return out.decode() + + +@_async_cache +async def _check_tool_version(name: str, *, echo: bool = False) -> bool: + output = await _run(name, ["--version"], echo=echo) + return bool(output and _LLVM_VERSION_PATTERN.search(output)) + + +@_async_cache +async def _get_brew_llvm_prefix(*, echo: bool = False) -> str | None: + output = await _run("brew", ["--prefix", f"llvm@{_LLVM_VERSION}"], echo=echo) + return output and output.removesuffix("\n") + + +@_async_cache +async def _find_tool(tool: str, *, echo: bool = False) -> str | None: + # Unversioned executables: + path = tool + if await _check_tool_version(path, echo=echo): + return path + # Versioned executables: + path = f"{tool}-{_LLVM_VERSION}" + if await _check_tool_version(path, echo=echo): + return path + # Homebrew-installed executables: + prefix = await _get_brew_llvm_prefix(echo=echo) + if prefix is not None: + path = os.path.join(prefix, "bin", tool) + if await _check_tool_version(path, echo=echo): + return path + # Nothing found: + return None + + +async def maybe_run( + tool: str, args: typing.Iterable[str], echo: bool = False +) -> str | None: + """Run an LLVM tool if it can be found. Otherwise, return None.""" + path = await _find_tool(tool, echo=echo) + return path and await _run(path, args, echo=echo) + + +async def run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str: + """Run an LLVM tool if it can be found. Otherwise, raise RuntimeError.""" + output = await maybe_run(tool, args, echo=echo) + if output is None: + raise RuntimeError(f"Can't find {tool}-{_LLVM_VERSION}!") + return output diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py new file mode 100644 index 00000000000000..8eeb78e6cd69ee --- /dev/null +++ b/Tools/jit/_schema.py @@ -0,0 +1,99 @@ +"""Schema for the JSON produced by llvm-readobj --elf-output-style=JSON.""" +import typing + +HoleKind: typing.TypeAlias = typing.Literal[ + "ARM64_RELOC_GOT_LOAD_PAGE21", + "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + "ARM64_RELOC_UNSIGNED", + "IMAGE_REL_AMD64_ADDR64", + "IMAGE_REL_I386_DIR32", + "R_AARCH64_ABS64", + "R_AARCH64_CALL26", + "R_AARCH64_JUMP26", + "R_AARCH64_MOVW_UABS_G0_NC", + "R_AARCH64_MOVW_UABS_G1_NC", + "R_AARCH64_MOVW_UABS_G2_NC", + "R_AARCH64_MOVW_UABS_G3", + "R_X86_64_64", + "X86_64_RELOC_UNSIGNED", +] + + +class COFFRelocation(typing.TypedDict): + """A COFF object file relocation record.""" + + Type: dict[typing.Literal["Value"], HoleKind] + Symbol: str + Offset: int + + +class ELFRelocation(typing.TypedDict): + """An ELF object file relocation record.""" + + Addend: int + Offset: int + Symbol: dict[typing.Literal["Value"], str] + Type: dict[typing.Literal["Value"], HoleKind] + + +class MachORelocation(typing.TypedDict): + """A Mach-O object file relocation record.""" + + Offset: int + Section: typing.NotRequired[dict[typing.Literal["Value"], str]] + Symbol: typing.NotRequired[dict[typing.Literal["Value"], str]] + Type: dict[typing.Literal["Value"], HoleKind] + + +class _COFFSymbol(typing.TypedDict): + Name: str + Value: int + + +class _ELFSymbol(typing.TypedDict): + Name: dict[typing.Literal["Value"], str] + Value: int + + +class _MachOSymbol(typing.TypedDict): + Name: dict[typing.Literal["Value"], str] + Value: int + + +class COFFSection(typing.TypedDict): + """A COFF object file section.""" + + Characteristics: dict[ + typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]] + ] + Number: int + RawDataSize: int + Relocations: list[dict[typing.Literal["Relocation"], COFFRelocation]] + SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] + Symbols: list[dict[typing.Literal["Symbol"], _COFFSymbol]] + + +class ELFSection(typing.TypedDict): + """An ELF object file section.""" + + Flags: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]] + Index: int + Info: int + Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]] + SectionData: dict[typing.Literal["Bytes"], list[int]] + Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]] + Type: dict[typing.Literal["Value"], str] + + +class MachOSection(typing.TypedDict): + """A Mach-O object file section.""" + + Address: int + Attributes: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]] + Index: int + Name: dict[typing.Literal["Value"], str] + Relocations: typing.NotRequired[ + list[dict[typing.Literal["Relocation"], MachORelocation]] + ] + SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] + Symbols: typing.NotRequired[list[dict[typing.Literal["Symbol"], _MachOSymbol]]] diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py new file mode 100644 index 00000000000000..71c678e04fbfd5 --- /dev/null +++ b/Tools/jit/_stencils.py @@ -0,0 +1,220 @@ +"""Core data structures for compiled code templates.""" +import dataclasses +import enum +import sys + +import _schema + + +@enum.unique +class HoleValue(enum.Enum): + """ + Different "base" values that can be patched into holes (usually combined with the + address of a symbol and/or an addend). + """ + + # The base address of the machine code for the current uop (exposed as _JIT_ENTRY): + CODE = enum.auto() + # The base address of the machine code for the next uop (exposed as _JIT_CONTINUE): + CONTINUE = enum.auto() + # The base address of the read-only data for this uop: + DATA = enum.auto() + # The address of the current executor (exposed as _JIT_EXECUTOR): + EXECUTOR = enum.auto() + # The base address of the "global" offset table located in the read-only data. + # Shouldn't be present in the final stencils, since these are all replaced with + # equivalent DATA values: + GOT = enum.auto() + # The current uop's oparg (exposed as _JIT_OPARG): + OPARG = enum.auto() + # The current uop's operand (exposed as _JIT_OPERAND): + OPERAND = enum.auto() + # The current uop's target (exposed as _JIT_TARGET): + TARGET = enum.auto() + # The base address of the machine code for the first uop (exposed as _JIT_TOP): + TOP = enum.auto() + # A hardcoded value of zero (used for symbol lookups): + ZERO = enum.auto() + + +@dataclasses.dataclass +class Hole: + """ + A "hole" in the stencil to be patched with a computed runtime value. + + Analogous to relocation records in an object file. + """ + + offset: int + kind: _schema.HoleKind + # Patch with this base value: + value: HoleValue + # ...plus the address of this symbol: + symbol: str | None + # ...plus this addend: + addend: int + # Convenience method: + replace = dataclasses.replace + + def as_c(self) -> str: + """Dump this hole as an initialization of a C Hole struct.""" + parts = [ + f"{self.offset:#x}", + f"HoleKind_{self.kind}", + f"HoleValue_{self.value.name}", + f"&{self.symbol}" if self.symbol else "NULL", + _format_addend(self.addend), + ] + return f"{{{', '.join(parts)}}}" + + +@dataclasses.dataclass +class Stencil: + """ + A contiguous block of machine code or data to be copied-and-patched. + + Analogous to a section or segment in an object file. + """ + + body: bytearray = dataclasses.field(default_factory=bytearray, init=False) + holes: list[Hole] = dataclasses.field(default_factory=list, init=False) + disassembly: list[str] = dataclasses.field(default_factory=list, init=False) + + def pad(self, alignment: int) -> None: + """Pad the stencil to the given alignment.""" + offset = len(self.body) + padding = -offset % alignment + self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") + self.body.extend([0] * padding) + + def emit_aarch64_trampoline(self, hole: Hole) -> None: + """Even with the large code model, AArch64 Linux insists on 28-bit jumps.""" + base = len(self.body) + where = slice(hole.offset, hole.offset + 4) + instruction = int.from_bytes(self.body[where], sys.byteorder) + instruction &= 0xFC000000 + instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF + self.body[where] = instruction.to_bytes(4, sys.byteorder) + self.disassembly += [ + f"{base + 4 * 0: x}: d2800008 mov x8, #0x0", + f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}", + f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16", + f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}", + f"{base + 4 * 2:x}: f2c00008 movk x8, #0x0, lsl #32", + f"{base + 4 * 2:016x}: R_AARCH64_MOVW_UABS_G2_NC {hole.symbol}", + f"{base + 4 * 3:x}: f2e00008 movk x8, #0x0, lsl #48", + f"{base + 4 * 3:016x}: R_AARCH64_MOVW_UABS_G3 {hole.symbol}", + f"{base + 4 * 4:x}: d61f0100 br x8", + ] + for code in [ + 0xD2800008.to_bytes(4, sys.byteorder), + 0xF2A00008.to_bytes(4, sys.byteorder), + 0xF2C00008.to_bytes(4, sys.byteorder), + 0xF2E00008.to_bytes(4, sys.byteorder), + 0xD61F0100.to_bytes(4, sys.byteorder), + ]: + self.body.extend(code) + for i, kind in enumerate( + [ + "R_AARCH64_MOVW_UABS_G0_NC", + "R_AARCH64_MOVW_UABS_G1_NC", + "R_AARCH64_MOVW_UABS_G2_NC", + "R_AARCH64_MOVW_UABS_G3", + ] + ): + self.holes.append(hole.replace(offset=base + 4 * i, kind=kind)) + + +@dataclasses.dataclass +class StencilGroup: + """ + Code and data corresponding to a given micro-opcode. + + Analogous to an entire object file. + """ + + code: Stencil = dataclasses.field(default_factory=Stencil, init=False) + data: Stencil = dataclasses.field(default_factory=Stencil, init=False) + symbols: dict[int | str, tuple[HoleValue, int]] = dataclasses.field( + default_factory=dict, init=False + ) + _got: dict[str, int] = dataclasses.field(default_factory=dict, init=False) + + def process_relocations(self, *, alignment: int = 1) -> None: + """Fix up all GOT and internal relocations for this stencil group.""" + self.code.pad(alignment) + self.data.pad(8) + for stencil in [self.code, self.data]: + holes = [] + for hole in stencil.holes: + if hole.value is HoleValue.GOT: + assert hole.symbol is not None + hole.value = HoleValue.DATA + hole.addend += self._global_offset_table_lookup(hole.symbol) + hole.symbol = None + elif hole.symbol in self.symbols: + hole.value, addend = self.symbols[hole.symbol] + hole.addend += addend + hole.symbol = None + elif ( + hole.kind in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26"} + and hole.value is HoleValue.ZERO + ): + self.code.emit_aarch64_trampoline(hole) + continue + holes.append(hole) + stencil.holes[:] = holes + self.code.pad(alignment) + self._emit_global_offset_table() + self.code.holes.sort(key=lambda hole: hole.offset) + self.data.holes.sort(key=lambda hole: hole.offset) + + def _global_offset_table_lookup(self, symbol: str) -> int: + return len(self.data.body) + self._got.setdefault(symbol, 8 * len(self._got)) + + def _emit_global_offset_table(self) -> None: + got = len(self.data.body) + for s, offset in self._got.items(): + if s in self.symbols: + value, addend = self.symbols[s] + symbol = None + else: + value, symbol = symbol_to_value(s) + addend = 0 + self.data.holes.append( + Hole(got + offset, "R_X86_64_64", value, symbol, addend) + ) + value_part = value.name if value is not HoleValue.ZERO else "" + if value_part and not symbol and not addend: + addend_part = "" + else: + addend_part = f"&{symbol}" if symbol else "" + addend_part += _format_addend(addend, signed=symbol is not None) + if value_part: + value_part += "+" + self.data.disassembly.append( + f"{len(self.data.body):x}: {value_part}{addend_part}" + ) + self.data.body.extend([0] * 8) + + +def symbol_to_value(symbol: str) -> tuple[HoleValue, str | None]: + """ + Convert a symbol name to a HoleValue and a symbol name. + + Some symbols (starting with "_JIT_") are special and are converted to their + own HoleValues. + """ + if symbol.startswith("_JIT_"): + try: + return HoleValue[symbol.removeprefix("_JIT_")], None + except KeyError: + pass + return HoleValue.ZERO, symbol + + +def _format_addend(addend: int, signed: bool = False) -> str: + addend %= 1 << 64 + if addend & (1 << 63): + addend -= 1 << 64 + return f"{addend:{'+#x' if signed else '#x'}}" diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py new file mode 100644 index 00000000000000..51b091eb246413 --- /dev/null +++ b/Tools/jit/_targets.py @@ -0,0 +1,394 @@ +"""Target-specific code generation, parsing, and processing.""" +import asyncio +import dataclasses +import hashlib +import json +import os +import pathlib +import re +import sys +import tempfile +import typing + +import _llvm +import _schema +import _stencils +import _writer + +if sys.version_info < (3, 11): + raise RuntimeError("Building the JIT compiler requires Python 3.11 or newer!") + +TOOLS_JIT_BUILD = pathlib.Path(__file__).resolve() +TOOLS_JIT = TOOLS_JIT_BUILD.parent +TOOLS = TOOLS_JIT.parent +CPYTHON = TOOLS.parent +PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h" +TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c" + + +_S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection) +_R = typing.TypeVar( + "_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation +) + + +@dataclasses.dataclass +class _Target(typing.Generic[_S, _R]): + triple: str + _: dataclasses.KW_ONLY + alignment: int = 1 + prefix: str = "" + debug: bool = False + force: bool = False + verbose: bool = False + + def _compute_digest(self, out: pathlib.Path) -> str: + hasher = hashlib.sha256() + hasher.update(self.triple.encode()) + hasher.update(self.alignment.to_bytes()) + hasher.update(self.prefix.encode()) + # These dependencies are also reflected in _JITSources in regen.targets: + hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) + hasher.update((out / "pyconfig.h").read_bytes()) + for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): + for filename in filenames: + hasher.update(pathlib.Path(dirpath, filename).read_bytes()) + return hasher.hexdigest() + + async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: + group = _stencils.StencilGroup() + args = ["--disassemble", "--reloc", f"{path}"] + output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose) + if output is not None: + group.code.disassembly.extend( + line.expandtabs().strip() + for line in output.splitlines() + if not line.isspace() + ) + args = [ + "--elf-output-style=JSON", + "--expand-relocs", + # "--pretty-print", + "--section-data", + "--section-relocations", + "--section-symbols", + "--sections", + f"{path}", + ] + output = await _llvm.run("llvm-readobj", args, echo=self.verbose) + # --elf-output-style=JSON is only *slightly* broken on Mach-O... + output = output.replace("PrivateExtern\n", "\n") + output = output.replace("Extern\n", "\n") + # ...and also COFF: + output = output[output.index("[", 1, None) :] + output = output[: output.rindex("]", None, -1) + 1] + sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output) + for wrapped_section in sections: + self._handle_section(wrapped_section["Section"], group) + assert group.symbols["_JIT_ENTRY"] == (_stencils.HoleValue.CODE, 0) + if group.data.body: + line = f"0: {str(bytes(group.data.body)).removeprefix('b')}" + group.data.disassembly.append(line) + group.process_relocations() + return group + + def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None: + raise NotImplementedError(type(self)) + + def _handle_relocation( + self, base: int, relocation: _R, raw: bytes + ) -> _stencils.Hole: + raise NotImplementedError(type(self)) + + async def _compile( + self, opname: str, c: pathlib.Path, tempdir: pathlib.Path + ) -> _stencils.StencilGroup: + o = tempdir / f"{opname}.o" + args = [ + f"--target={self.triple}", + "-DPy_BUILD_CORE", + "-D_DEBUG" if self.debug else "-DNDEBUG", + f"-D_JIT_OPCODE={opname}", + "-D_PyJIT_ACTIVE", + "-D_Py_JIT", + "-I.", + f"-I{CPYTHON / 'Include'}", + f"-I{CPYTHON / 'Include' / 'internal'}", + f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}", + f"-I{CPYTHON / 'Python'}", + "-O3", + "-c", + "-fno-asynchronous-unwind-tables", + # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds: + "-fno-jump-tables", + # Position-independent code adds indirection to every load and jump: + "-fno-pic", + # Don't make calls to weird stack-smashing canaries: + "-fno-stack-protector", + # We have three options for code model: + # - "small": the default, assumes that code and data reside in the + # lowest 2GB of memory (128MB on aarch64) + # - "medium": assumes that code resides in the lowest 2GB of memory, + # and makes no assumptions about data (not available on aarch64) + # - "large": makes no assumptions about either code or data + "-mcmodel=large", + "-o", + f"{o}", + "-std=c11", + f"{c}", + ] + await _llvm.run("clang", args, echo=self.verbose) + return await self._parse(o) + + async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: + generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() + opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases)) + tasks = [] + with tempfile.TemporaryDirectory() as tempdir: + work = pathlib.Path(tempdir).resolve() + async with asyncio.TaskGroup() as group: + for opname in opnames: + coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work) + tasks.append(group.create_task(coro, name=opname)) + return {task.get_name(): task.result() for task in tasks} + + def build(self, out: pathlib.Path, *, comment: str = "") -> None: + """Build jit_stencils.h in the given directory.""" + digest = f"// {self._compute_digest(out)}\n" + jit_stencils = out / "jit_stencils.h" + if ( + not self.force + and jit_stencils.exists() + and jit_stencils.read_text().startswith(digest) + ): + return + stencil_groups = asyncio.run(self._build_stencils()) + with jit_stencils.open("w") as file: + file.write(digest) + if comment: + file.write(f"// {comment}\n") + file.write("") + for line in _writer.dump(stencil_groups): + file.write(f"{line}\n") + + +class _COFF( + _Target[_schema.COFFSection, _schema.COFFRelocation] +): # pylint: disable = too-few-public-methods + def _handle_section( + self, section: _schema.COFFSection, group: _stencils.StencilGroup + ) -> None: + flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]} + if "SectionData" in section: + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data, seen with printf debugging calls: + section_data_bytes = [0] * section["RawDataSize"] + if "IMAGE_SCN_MEM_EXECUTE" in flags: + value = _stencils.HoleValue.CODE + stencil = group.code + elif "IMAGE_SCN_MEM_READ" in flags: + value = _stencils.HoleValue.DATA + stencil = group.data + else: + return + base = len(stencil.body) + group.symbols[section["Number"]] = value, base + stencil.body.extend(section_data_bytes) + for wrapped_symbol in section["Symbols"]: + symbol = wrapped_symbol["Symbol"] + offset = base + symbol["Value"] + name = symbol["Name"] + name = name.removeprefix(self.prefix) + group.symbols[name] = value, offset + for wrapped_relocation in section["Relocations"]: + relocation = wrapped_relocation["Relocation"] + hole = self._handle_relocation(base, relocation, stencil.body) + stencil.holes.append(hole) + + def _handle_relocation( + self, base: int, relocation: _schema.COFFRelocation, raw: bytes + ) -> _stencils.Hole: + match relocation: + case { + "Offset": offset, + "Symbol": s, + "Type": {"Value": "IMAGE_REL_AMD64_ADDR64" as kind}, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + addend = int.from_bytes(raw[offset : offset + 8], "little") + case { + "Offset": offset, + "Symbol": s, + "Type": {"Value": "IMAGE_REL_I386_DIR32" as kind}, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + addend = int.from_bytes(raw[offset : offset + 4], "little") + case _: + raise NotImplementedError(relocation) + return _stencils.Hole(offset, kind, value, symbol, addend) + + +class _ELF( + _Target[_schema.ELFSection, _schema.ELFRelocation] +): # pylint: disable = too-few-public-methods + def _handle_section( + self, section: _schema.ELFSection, group: _stencils.StencilGroup + ) -> None: + section_type = section["Type"]["Value"] + flags = {flag["Name"] for flag in section["Flags"]["Flags"]} + if section_type == "SHT_RELA": + assert "SHF_INFO_LINK" in flags, flags + assert not section["Symbols"] + value, base = group.symbols[section["Info"]] + if value is _stencils.HoleValue.CODE: + stencil = group.code + else: + assert value is _stencils.HoleValue.DATA + stencil = group.data + for wrapped_relocation in section["Relocations"]: + relocation = wrapped_relocation["Relocation"] + hole = self._handle_relocation(base, relocation, stencil.body) + stencil.holes.append(hole) + elif section_type == "SHT_PROGBITS": + if "SHF_ALLOC" not in flags: + return + if "SHF_EXECINSTR" in flags: + value = _stencils.HoleValue.CODE + stencil = group.code + else: + value = _stencils.HoleValue.DATA + stencil = group.data + group.symbols[section["Index"]] = value, len(stencil.body) + for wrapped_symbol in section["Symbols"]: + symbol = wrapped_symbol["Symbol"] + offset = len(stencil.body) + symbol["Value"] + name = symbol["Name"]["Value"] + name = name.removeprefix(self.prefix) + group.symbols[name] = value, offset + stencil.body.extend(section["SectionData"]["Bytes"]) + assert not section["Relocations"] + else: + assert section_type in { + "SHT_GROUP", + "SHT_LLVM_ADDRSIG", + "SHT_NULL", + "SHT_STRTAB", + "SHT_SYMTAB", + }, section_type + + def _handle_relocation( + self, base: int, relocation: _schema.ELFRelocation, raw: bytes + ) -> _stencils.Hole: + match relocation: + case { + "Addend": addend, + "Offset": offset, + "Symbol": {"Value": s}, + "Type": {"Value": kind}, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + case _: + raise NotImplementedError(relocation) + return _stencils.Hole(offset, kind, value, symbol, addend) + + +class _MachO( + _Target[_schema.MachOSection, _schema.MachORelocation] +): # pylint: disable = too-few-public-methods + def _handle_section( + self, section: _schema.MachOSection, group: _stencils.StencilGroup + ) -> None: + assert section["Address"] >= len(group.code.body) + assert "SectionData" in section + flags = {flag["Name"] for flag in section["Attributes"]["Flags"]} + name = section["Name"]["Value"] + name = name.removeprefix(self.prefix) + if "SomeInstructions" in flags: + value = _stencils.HoleValue.CODE + stencil = group.code + start_address = 0 + group.symbols[name] = value, section["Address"] - start_address + else: + value = _stencils.HoleValue.DATA + stencil = group.data + start_address = len(group.code.body) + group.symbols[name] = value, len(group.code.body) + base = section["Address"] - start_address + group.symbols[section["Index"]] = value, base + stencil.body.extend( + [0] * (section["Address"] - len(group.code.body) - len(group.data.body)) + ) + stencil.body.extend(section["SectionData"]["Bytes"]) + assert "Symbols" in section + for wrapped_symbol in section["Symbols"]: + symbol = wrapped_symbol["Symbol"] + offset = symbol["Value"] - start_address + name = symbol["Name"]["Value"] + name = name.removeprefix(self.prefix) + group.symbols[name] = value, offset + assert "Relocations" in section + for wrapped_relocation in section["Relocations"]: + relocation = wrapped_relocation["Relocation"] + hole = self._handle_relocation(base, relocation, stencil.body) + stencil.holes.append(hole) + + def _handle_relocation( + self, base: int, relocation: _schema.MachORelocation, raw: bytes + ) -> _stencils.Hole: + symbol: str | None + match relocation: + case { + "Offset": offset, + "Symbol": {"Value": s}, + "Type": { + "Value": "ARM64_RELOC_GOT_LOAD_PAGE21" + | "ARM64_RELOC_GOT_LOAD_PAGEOFF12" as kind + }, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.HoleValue.GOT, s + addend = 0 + case { + "Offset": offset, + "Section": {"Value": s}, + "Type": {"Value": kind}, + } | { + "Offset": offset, + "Symbol": {"Value": s}, + "Type": {"Value": kind}, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + addend = 0 + case _: + raise NotImplementedError(relocation) + # Turn Clang's weird __bzero calls into normal bzero calls: + if symbol == "__bzero": + symbol = "bzero" + return _stencils.Hole(offset, kind, value, symbol, addend) + + +def get_target(host: str) -> _COFF | _ELF | _MachO: + """Build a _Target for the given host "triple" and options.""" + if re.fullmatch(r"aarch64-apple-darwin.*", host): + return _MachO(host, alignment=8, prefix="_") + if re.fullmatch(r"aarch64-.*-linux-gnu", host): + return _ELF(host, alignment=8) + if re.fullmatch(r"i686-pc-windows-msvc", host): + return _COFF(host, prefix="_") + if re.fullmatch(r"x86_64-apple-darwin.*", host): + return _MachO(host, prefix="_") + if re.fullmatch(r"x86_64-pc-windows-msvc", host): + return _COFF(host) + if re.fullmatch(r"x86_64-.*-linux-gnu", host): + return _ELF(host) + raise ValueError(host) diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py new file mode 100644 index 00000000000000..8a2a42e75cfb9b --- /dev/null +++ b/Tools/jit/_writer.py @@ -0,0 +1,95 @@ +"""Utilities for writing StencilGroups out to a C header file.""" +import typing + +import _schema +import _stencils + + +def _dump_header() -> typing.Iterator[str]: + yield "typedef enum {" + for kind in typing.get_args(_schema.HoleKind): + yield f" HoleKind_{kind}," + yield "} HoleKind;" + yield "" + yield "typedef enum {" + for value in _stencils.HoleValue: + yield f" HoleValue_{value.name}," + yield "} HoleValue;" + yield "" + yield "typedef struct {" + yield " const uint64_t offset;" + yield " const HoleKind kind;" + yield " const HoleValue value;" + yield " const void *symbol;" + yield " const uint64_t addend;" + yield "} Hole;" + yield "" + yield "typedef struct {" + yield " const size_t body_size;" + yield " const unsigned char * const body;" + yield " const size_t holes_size;" + yield " const Hole * const holes;" + yield "} Stencil;" + yield "" + yield "typedef struct {" + yield " const Stencil code;" + yield " const Stencil data;" + yield "} StencilGroup;" + yield "" + + +def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]: + yield "#define INIT_STENCIL(STENCIL) { \\" + yield " .body_size = Py_ARRAY_LENGTH(STENCIL##_body) - 1, \\" + yield " .body = STENCIL##_body, \\" + yield " .holes_size = Py_ARRAY_LENGTH(STENCIL##_holes) - 1, \\" + yield " .holes = STENCIL##_holes, \\" + yield "}" + yield "" + yield "#define INIT_STENCIL_GROUP(OP) { \\" + yield " .code = INIT_STENCIL(OP##_code), \\" + yield " .data = INIT_STENCIL(OP##_data), \\" + yield "}" + yield "" + yield "static const StencilGroup stencil_groups[512] = {" + for opname in opnames: + yield f" [{opname}] = INIT_STENCIL_GROUP({opname})," + yield "};" + yield "" + yield "#define GET_PATCHES() { \\" + for value in _stencils.HoleValue: + yield f" [HoleValue_{value.name}] = (uint64_t)0xBADBADBADBADBADB, \\" + yield "}" + + +def _dump_stencil(opname: str, group: _stencils.StencilGroup) -> typing.Iterator[str]: + yield f"// {opname}" + for part, stencil in [("code", group.code), ("data", group.data)]: + for line in stencil.disassembly: + yield f"// {line}" + if stencil.body: + size = len(stencil.body) + 1 + yield f"static const unsigned char {opname}_{part}_body[{size}] = {{" + for i in range(0, len(stencil.body), 8): + row = " ".join(f"{byte:#04x}," for byte in stencil.body[i : i + 8]) + yield f" {row}" + yield "};" + else: + yield f"static const unsigned char {opname}_{part}_body[1];" + if stencil.holes: + size = len(stencil.holes) + 1 + yield f"static const Hole {opname}_{part}_holes[{size}] = {{" + for hole in stencil.holes: + yield f" {hole.as_c()}," + yield "};" + else: + yield f"static const Hole {opname}_{part}_holes[1];" + yield "" + + +def dump(groups: dict[str, _stencils.StencilGroup]) -> typing.Iterator[str]: + """Yield a JIT compiler line-by-line as a C header file.""" + yield from _dump_header() + for opname, group in groups.items(): + yield from _dump_stencil(opname, group) + yield from _dump_footer(groups) diff --git a/Tools/jit/build.py b/Tools/jit/build.py new file mode 100644 index 00000000000000..4d4ace14ebf26c --- /dev/null +++ b/Tools/jit/build.py @@ -0,0 +1,28 @@ +"""Build an experimental just-in-time compiler for CPython.""" +import argparse +import pathlib +import shlex +import sys + +import _targets + +if __name__ == "__main__": + comment = f"$ {shlex.join([sys.executable] + sys.argv)}" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "target", type=_targets.get_target, help="a PEP 11 target triple to compile for" + ) + parser.add_argument( + "-d", "--debug", action="store_true", help="compile for a debug build of Python" + ) + parser.add_argument( + "-f", "--force", action="store_true", help="force the entire JIT to be rebuilt" + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="echo commands as they are run" + ) + args = parser.parse_args() + args.target.debug = args.debug + args.target.force = args.force + args.target.verbose = args.verbose + args.target.build(pathlib.Path.cwd(), comment=comment) diff --git a/Tools/jit/mypy.ini b/Tools/jit/mypy.ini new file mode 100644 index 00000000000000..768d0028516abd --- /dev/null +++ b/Tools/jit/mypy.ini @@ -0,0 +1,5 @@ +[mypy] +files = Tools/jit +pretty = True +python_version = 3.11 +strict = True diff --git a/Tools/jit/template.c b/Tools/jit/template.c new file mode 100644 index 00000000000000..12303a550d8879 --- /dev/null +++ b/Tools/jit/template.c @@ -0,0 +1,98 @@ +#include "Python.h" + +#include "pycore_call.h" +#include "pycore_ceval.h" +#include "pycore_dict.h" +#include "pycore_emscripten_signal.h" +#include "pycore_intrinsics.h" +#include "pycore_jit.h" +#include "pycore_long.h" +#include "pycore_opcode_metadata.h" +#include "pycore_opcode_utils.h" +#include "pycore_range.h" +#include "pycore_setobject.h" +#include "pycore_sliceobject.h" + +#include "ceval_macros.h" + +#undef CURRENT_OPARG +#define CURRENT_OPARG() (_oparg) + +#undef CURRENT_OPERAND +#define CURRENT_OPERAND() (_operand) + +#undef DEOPT_IF +#define DEOPT_IF(COND, INSTNAME) \ + do { \ + if ((COND)) { \ + goto deoptimize; \ + } \ + } while (0) + +#undef ENABLE_SPECIALIZATION +#define ENABLE_SPECIALIZATION (0) + +#undef GOTO_ERROR +#define GOTO_ERROR(LABEL) \ + do { \ + goto LABEL ## _tier_two; \ + } while (0) + +#undef LOAD_IP +#define LOAD_IP(UNUSED) \ + do { \ + } while (0) + +#define PATCH_VALUE(TYPE, NAME, ALIAS) \ + extern void ALIAS; \ + TYPE NAME = (TYPE)(uint64_t)&ALIAS; + +#define PATCH_JUMP(ALIAS) \ + extern void ALIAS; \ + __attribute__((musttail)) \ + return ((jit_func)&ALIAS)(frame, stack_pointer, tstate); + +_Py_CODEUNIT * +_JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate) +{ + // Locals that the instruction implementations expect to exist: + PATCH_VALUE(_PyExecutorObject *, current_executor, _JIT_EXECUTOR) + int oparg; + int opcode = _JIT_OPCODE; + _PyUOpInstruction *next_uop; + // Other stuff we need handy: + PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG) + PATCH_VALUE(uint64_t, _operand, _JIT_OPERAND) + PATCH_VALUE(uint32_t, _target, _JIT_TARGET) + // The actual instruction definitions (only one will be used): + if (opcode == _JUMP_TO_TOP) { + CHECK_EVAL_BREAKER(); + PATCH_JUMP(_JIT_TOP); + } + switch (opcode) { +#include "executor_cases.c.h" + default: + Py_UNREACHABLE(); + } + PATCH_JUMP(_JIT_CONTINUE); + // Labels that the instruction implementations expect to exist: +unbound_local_error_tier_two: + _PyEval_FormatExcCheckArg( + tstate, PyExc_UnboundLocalError, UNBOUNDLOCAL_ERROR_MSG, + PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)); + goto error_tier_two; +pop_4_error_tier_two: + STACK_SHRINK(1); +pop_3_error_tier_two: + STACK_SHRINK(1); +pop_2_error_tier_two: + STACK_SHRINK(1); +pop_1_error_tier_two: + STACK_SHRINK(1); +error_tier_two: + _PyFrame_SetStackPointer(frame, stack_pointer); + return NULL; +deoptimize: + _PyFrame_SetStackPointer(frame, stack_pointer); + return _PyCode_CODE(_PyFrame_GetCode(frame)) + _target; +} diff --git a/configure b/configure index b1153df4d7ec52..c563c3f5d3c7e6 100755 --- a/configure +++ b/configure @@ -920,6 +920,7 @@ LLVM_AR PROFILE_TASK DEF_MAKE_RULE DEF_MAKE_ALL_RULE +REGEN_JIT_COMMAND ABIFLAGS LN MKDIR_P @@ -1074,6 +1075,7 @@ with_pydebug with_trace_refs enable_pystats with_assertions +enable_experimental_jit enable_optimizations with_lto enable_bolt @@ -1801,6 +1803,9 @@ Optional Features: --disable-gil enable experimental support for running without the GIL (default is no) --enable-pystats enable internal statistics gathering (default is no) + --enable-experimental-jit + build the experimental just-in-time compiler + (default is no) --enable-optimizations enable expensive, stable optimizations (PGO, etc.) (default is no) --enable-bolt enable usage of the llvm-bolt post-link optimizer @@ -7997,6 +8002,32 @@ else printf "%s\n" "no" >&6; } fi +# Check for --enable-experimental-jit: +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --enable-experimental-jit" >&5 +printf %s "checking for --enable-experimental-jit... " >&6; } +# Check whether --enable-experimental-jit was given. +if test ${enable_experimental_jit+y} +then : + enableval=$enable_experimental_jit; +else $as_nop + enable_experimental_jit=no +fi + +if test "x$enable_experimental_jit" = xno +then : + +else $as_nop + as_fn_append CFLAGS_NODIST " -D_Py_JIT" + REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py $host" + if test "x$Py_DEBUG" = xtrue +then : + as_fn_append REGEN_JIT_COMMAND " --debug" +fi +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_experimental_jit" >&5 +printf "%s\n" "$enable_experimental_jit" >&6; } + # Enable optimization flags diff --git a/configure.ac b/configure.ac index 9587e6d63499aa..13c46b3e80151d 100644 --- a/configure.ac +++ b/configure.ac @@ -1579,6 +1579,26 @@ else AC_MSG_RESULT([no]) fi +# Check for --enable-experimental-jit: +AC_MSG_CHECKING([for --enable-experimental-jit]) +AC_ARG_ENABLE([experimental-jit], + [AS_HELP_STRING([--enable-experimental-jit], + [build the experimental just-in-time compiler (default is no)])], + [], + [enable_experimental_jit=no]) +AS_VAR_IF([enable_experimental_jit], + [no], + [], + [AS_VAR_APPEND([CFLAGS_NODIST], [" -D_Py_JIT"]) + AS_VAR_SET([REGEN_JIT_COMMAND], + ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py $host"]) + AS_VAR_IF([Py_DEBUG], + [true], + [AS_VAR_APPEND([REGEN_JIT_COMMAND], [" --debug"])], + [])]) +AC_SUBST([REGEN_JIT_COMMAND]) +AC_MSG_RESULT([$enable_experimental_jit]) + # Enable optimization flags AC_SUBST([DEF_MAKE_ALL_RULE]) AC_SUBST([DEF_MAKE_RULE]) From a16a9f978f42b8a09297c1efbf33877f6388c403 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 28 Jan 2024 18:52:58 -0800 Subject: [PATCH 055/127] GH-113464: A JIT backend for tier 2 (GH-113465) Add an option (--enable-experimental-jit for configure-based builds or --experimental-jit for PCbuild-based ones) to build an *experimental* just-in-time compiler, based on copy-and-patch (https://fredrikbk.com/publications/copy-and-patch.pdf). See Tools/jit/README.md for more information, including how to install the required build-time tooling. Merry JIT-mas! ;) From d7d0d13cd37651990586d31d8974c59bd25e1045 Mon Sep 17 00:00:00 2001 From: Stanley <46876382+slateny@users.noreply.github.com> Date: Mon, 29 Jan 2024 01:19:22 -0800 Subject: [PATCH 056/127] gh-89159: Add some TarFile attribute types (GH-114520) Co-authored-by: Stanley <46876382+slateny@users.noreply.github.com> --- Doc/library/tarfile.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 34a738a7f1c41f..2134293a0bb0de 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -673,6 +673,7 @@ be finalized; only the internally used file object will be closed. See the .. attribute:: TarFile.pax_headers + :type: dict A dictionary containing key-value pairs of pax global headers. @@ -838,26 +839,31 @@ A ``TarInfo`` object has the following public data attributes: attribute. .. attribute:: TarInfo.chksum + :type: int Header checksum. .. attribute:: TarInfo.devmajor + :type: int Device major number. .. attribute:: TarInfo.devminor + :type: int Device minor number. .. attribute:: TarInfo.offset + :type: int The tar header starts here. .. attribute:: TarInfo.offset_data + :type: int The file's data starts here. From 2124a3ddcc0e274521f74d239f0e94060e17dd7f Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Mon, 29 Jan 2024 01:30:22 -0800 Subject: [PATCH 057/127] gh-109653: Improve import time of importlib.metadata / email.utils (#114664) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My criterion for delayed imports is that they're only worth it if the majority of users of the module would benefit from it, otherwise you're just moving latency around unpredictably. mktime_tz is not used anywhere in the standard library and grep.app indicates it's not got much use in the ecosystem either. Distribution.files is not nearly as widely used as other importlib.metadata APIs, so we defer the csv import. Before: ``` λ hyperfine -w 8 './python -c "import importlib.metadata"' Benchmark 1: ./python -c "import importlib.metadata" Time (mean ± σ): 65.1 ms ± 0.5 ms [User: 55.3 ms, System: 9.8 ms] Range (min … max): 64.4 ms … 66.4 ms 44 runs ``` After: ``` λ hyperfine -w 8 './python -c "import importlib.metadata"' Benchmark 1: ./python -c "import importlib.metadata" Time (mean ± σ): 62.0 ms ± 0.3 ms [User: 52.5 ms, System: 9.6 ms] Range (min … max): 61.3 ms … 62.8 ms 46 runs ``` for about a 3ms saving with warm disk cache, maybe 7-11ms with cold disk cache. --- Lib/email/_parseaddr.py | 5 ++++- Lib/importlib/metadata/__init__.py | 5 ++++- .../Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index febe411355d6be..0f1bf8e4253ec4 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -13,7 +13,7 @@ 'quote', ] -import time, calendar +import time SPACE = ' ' EMPTYSTRING = '' @@ -194,6 +194,9 @@ def mktime_tz(data): # No zone info, so localtime is better assumption than GMT return time.mktime(data[:8] + (-1,)) else: + # Delay the import, since mktime_tz is rarely used + import calendar + t = calendar.timegm(data) return t - data[9] diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 7b142e786e829e..c612fbefee2e80 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -1,7 +1,6 @@ import os import re import abc -import csv import sys import json import email @@ -478,6 +477,10 @@ def make_file(name, hash=None, size_str=None): @pass_none def make_files(lines): + # Delay csv import, since Distribution.files is not as widely used + # as other parts of importlib.metadata + import csv + return starmap(make_file, csv.reader(lines)) @pass_none diff --git a/Misc/NEWS.d/next/Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst b/Misc/NEWS.d/next/Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst new file mode 100644 index 00000000000000..fb3382098853b3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst @@ -0,0 +1 @@ +Improve import time of :mod:`importlib.metadata` and :mod:`email.utils`. From 1ac1b2f9536a581f1656f0ac9330a7382420cda1 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 29 Jan 2024 12:37:06 +0300 Subject: [PATCH 058/127] gh-114685: Fix incorrect use of PyBUF_READ in import.c (GH-114686) --- Python/import.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/import.c b/Python/import.c index 2dd95d8364a0be..2fd0c08a6bb5ae 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3544,7 +3544,7 @@ _imp_get_frozen_object_impl(PyObject *module, PyObject *name, struct frozen_info info = {0}; Py_buffer buf = {0}; if (PyObject_CheckBuffer(dataobj)) { - if (PyObject_GetBuffer(dataobj, &buf, PyBUF_READ) != 0) { + if (PyObject_GetBuffer(dataobj, &buf, PyBUF_SIMPLE) != 0) { return NULL; } info.data = (const char *)buf.buf; From 3b86891fd69093b60141300862f278614ba80613 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Mon, 29 Jan 2024 01:37:28 -0800 Subject: [PATCH 059/127] gh-110893: Improve the documentation for __future__ module (#114642) nedbat took issue with the phrasing "real module". I'm actually fine with that phrasing, but I do think the `__future__` page should be clear about the way in which the `__future__` module is special. (Yes, there was a footnote linking to the future statements part of the reference, but there should be upfront discussion). I'm sympathetic to nedbat's claim that no one really cares about `__future__._Feature`, so I've moved the interesting table up to the top. --- Doc/library/__future__.rst | 98 ++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/Doc/library/__future__.rst b/Doc/library/__future__.rst index d261e4a4f338a5..762f8b4695b3dd 100644 --- a/Doc/library/__future__.rst +++ b/Doc/library/__future__.rst @@ -8,20 +8,68 @@ -------------- -:mod:`__future__` is a real module, and serves three purposes: +Imports of the form ``from __future__ import feature`` are called +:ref:`future statements `. These are special-cased by the Python compiler +to allow the use of new Python features in modules containing the future statement +before the release in which the feature becomes standard. + +While these future statements are given additional special meaning by the +Python compiler, they are still executed like any other import statement and +the :mod:`__future__` exists and is handled by the import system the same way +any other Python module would be. This design serves three purposes: * To avoid confusing existing tools that analyze import statements and expect to find the modules they're importing. -* To ensure that :ref:`future statements ` run under releases prior to - 2.1 at least yield runtime exceptions (the import of :mod:`__future__` will - fail, because there was no module of that name prior to 2.1). - * To document when incompatible changes were introduced, and when they will be --- or were --- made mandatory. This is a form of executable documentation, and can be inspected programmatically via importing :mod:`__future__` and examining its contents. +* To ensure that :ref:`future statements ` run under releases prior to + Python 2.1 at least yield runtime exceptions (the import of :mod:`__future__` + will fail, because there was no module of that name prior to 2.1). + +Module Contents +--------------- + +No feature description will ever be deleted from :mod:`__future__`. Since its +introduction in Python 2.1 the following features have found their way into the +language using this mechanism: + ++------------------+-------------+--------------+---------------------------------------------+ +| feature | optional in | mandatory in | effect | ++==================+=============+==============+=============================================+ +| nested_scopes | 2.1.0b1 | 2.2 | :pep:`227`: | +| | | | *Statically Nested Scopes* | ++------------------+-------------+--------------+---------------------------------------------+ +| generators | 2.2.0a1 | 2.3 | :pep:`255`: | +| | | | *Simple Generators* | ++------------------+-------------+--------------+---------------------------------------------+ +| division | 2.2.0a2 | 3.0 | :pep:`238`: | +| | | | *Changing the Division Operator* | ++------------------+-------------+--------------+---------------------------------------------+ +| absolute_import | 2.5.0a1 | 3.0 | :pep:`328`: | +| | | | *Imports: Multi-Line and Absolute/Relative* | ++------------------+-------------+--------------+---------------------------------------------+ +| with_statement | 2.5.0a1 | 2.6 | :pep:`343`: | +| | | | *The "with" Statement* | ++------------------+-------------+--------------+---------------------------------------------+ +| print_function | 2.6.0a2 | 3.0 | :pep:`3105`: | +| | | | *Make print a function* | ++------------------+-------------+--------------+---------------------------------------------+ +| unicode_literals | 2.6.0a2 | 3.0 | :pep:`3112`: | +| | | | *Bytes literals in Python 3000* | ++------------------+-------------+--------------+---------------------------------------------+ +| generator_stop | 3.5.0b1 | 3.7 | :pep:`479`: | +| | | | *StopIteration handling inside generators* | ++------------------+-------------+--------------+---------------------------------------------+ +| annotations | 3.7.0b1 | TBD [1]_ | :pep:`563`: | +| | | | *Postponed evaluation of annotations* | ++------------------+-------------+--------------+---------------------------------------------+ + +.. XXX Adding a new entry? Remember to update simple_stmts.rst, too. + .. _future-classes: .. class:: _Feature @@ -65,43 +113,6 @@ dynamically compiled code. This flag is stored in the :attr:`_Feature.compiler_flag` attribute on :class:`_Feature` instances. -No feature description will ever be deleted from :mod:`__future__`. Since its -introduction in Python 2.1 the following features have found their way into the -language using this mechanism: - -+------------------+-------------+--------------+---------------------------------------------+ -| feature | optional in | mandatory in | effect | -+==================+=============+==============+=============================================+ -| nested_scopes | 2.1.0b1 | 2.2 | :pep:`227`: | -| | | | *Statically Nested Scopes* | -+------------------+-------------+--------------+---------------------------------------------+ -| generators | 2.2.0a1 | 2.3 | :pep:`255`: | -| | | | *Simple Generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| division | 2.2.0a2 | 3.0 | :pep:`238`: | -| | | | *Changing the Division Operator* | -+------------------+-------------+--------------+---------------------------------------------+ -| absolute_import | 2.5.0a1 | 3.0 | :pep:`328`: | -| | | | *Imports: Multi-Line and Absolute/Relative* | -+------------------+-------------+--------------+---------------------------------------------+ -| with_statement | 2.5.0a1 | 2.6 | :pep:`343`: | -| | | | *The "with" Statement* | -+------------------+-------------+--------------+---------------------------------------------+ -| print_function | 2.6.0a2 | 3.0 | :pep:`3105`: | -| | | | *Make print a function* | -+------------------+-------------+--------------+---------------------------------------------+ -| unicode_literals | 2.6.0a2 | 3.0 | :pep:`3112`: | -| | | | *Bytes literals in Python 3000* | -+------------------+-------------+--------------+---------------------------------------------+ -| generator_stop | 3.5.0b1 | 3.7 | :pep:`479`: | -| | | | *StopIteration handling inside generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| annotations | 3.7.0b1 | TBD [1]_ | :pep:`563`: | -| | | | *Postponed evaluation of annotations* | -+------------------+-------------+--------------+---------------------------------------------+ - -.. XXX Adding a new entry? Remember to update simple_stmts.rst, too. - .. [1] ``from __future__ import annotations`` was previously scheduled to become mandatory in Python 3.10, but the Python Steering Council @@ -115,3 +126,6 @@ language using this mechanism: :ref:`future` How the compiler treats future imports. + + :pep:`236` - Back to the __future__ + The original proposal for the __future__ mechanism. From 97fb2480e4807a34b8197243ad57566ed7769e24 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 29 Jan 2024 12:56:11 +0300 Subject: [PATCH 060/127] gh-101100: Fix sphinx warnings in `Doc/c-api/memoryview.rst` (GH-114669) --- Doc/c-api/memoryview.rst | 13 +++++++++++++ Doc/tools/.nitignore | 1 - 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/memoryview.rst b/Doc/c-api/memoryview.rst index 2aa43318e7a455..f6038032805259 100644 --- a/Doc/c-api/memoryview.rst +++ b/Doc/c-api/memoryview.rst @@ -20,6 +20,17 @@ any other object. read/write, otherwise it may be either read-only or read/write at the discretion of the exporter. + +.. c:macro:: PyBUF_READ + + Flag to request a readonly buffer. + + +.. c:macro:: PyBUF_WRITE + + Flag to request a writable buffer. + + .. c:function:: PyObject *PyMemoryView_FromMemory(char *mem, Py_ssize_t size, int flags) Create a memoryview object using *mem* as the underlying buffer. @@ -41,6 +52,8 @@ any other object. original memory. Otherwise, a copy is made and the memoryview points to a new bytes object. + *buffertype* can be one of :c:macro:`PyBUF_READ` or :c:macro:`PyBUF_WRITE`. + .. c:function:: int PyMemoryView_Check(PyObject *obj) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 8b6847ef2a7d76..b8b7c2299ca9f4 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -9,7 +9,6 @@ Doc/c-api/gcsupport.rst Doc/c-api/init.rst Doc/c-api/init_config.rst Doc/c-api/intro.rst -Doc/c-api/memoryview.rst Doc/c-api/module.rst Doc/c-api/stable.rst Doc/c-api/sys.rst From b7a12ab2146f946ae57e2d8019372cafe94d8375 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 29 Jan 2024 15:12:19 +0200 Subject: [PATCH 061/127] gh-101100: Fix Sphinx warnings in `whatsnew/2.2.rst` (#112366) Co-authored-by: Hugo van Kemenade --- Doc/tools/.nitignore | 1 - Doc/whatsnew/2.2.rst | 140 +++++++++++++++++++++---------------------- 2 files changed, 70 insertions(+), 71 deletions(-) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index b8b7c2299ca9f4..763503205e1670 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -90,7 +90,6 @@ Doc/tutorial/datastructures.rst Doc/using/windows.rst Doc/whatsnew/2.0.rst Doc/whatsnew/2.1.rst -Doc/whatsnew/2.2.rst Doc/whatsnew/2.4.rst Doc/whatsnew/2.5.rst Doc/whatsnew/2.6.rst diff --git a/Doc/whatsnew/2.2.rst b/Doc/whatsnew/2.2.rst index 6efc23a82de923..968bd7a126bdf0 100644 --- a/Doc/whatsnew/2.2.rst +++ b/Doc/whatsnew/2.2.rst @@ -53,9 +53,9 @@ A long time ago I wrote a web page listing flaws in Python's design. One of the most significant flaws was that it's impossible to subclass Python types implemented in C. In particular, it's not possible to subclass built-in types, so you can't just subclass, say, lists in order to add a single useful method to -them. The :mod:`UserList` module provides a class that supports all of the +them. The :mod:`!UserList` module provides a class that supports all of the methods of lists and that can be subclassed further, but there's lots of C code -that expects a regular Python list and won't accept a :class:`UserList` +that expects a regular Python list and won't accept a :class:`!UserList` instance. Python 2.2 fixes this, and in the process adds some exciting new capabilities. @@ -69,7 +69,7 @@ A brief summary: * It's also possible to automatically call methods on accessing or setting an instance attribute by using a new mechanism called :dfn:`properties`. Many uses - of :meth:`__getattr__` can be rewritten to use properties instead, making the + of :meth:`!__getattr__` can be rewritten to use properties instead, making the resulting code simpler and faster. As a small side benefit, attributes can now have docstrings, too. @@ -120,7 +120,7 @@ added so if no built-in type is suitable, you can just subclass This means that :keyword:`class` statements that don't have any base classes are always classic classes in Python 2.2. (Actually you can also change this by -setting a module-level variable named :attr:`__metaclass__` --- see :pep:`253` +setting a module-level variable named :attr:`!__metaclass__` --- see :pep:`253` for the details --- but it's easier to just subclass :class:`object`.) The type objects for the built-in types are available as built-ins, named using @@ -134,8 +134,8 @@ type objects that behave as factories when called. :: 123 To make the set of types complete, new type objects such as :func:`dict` and -:func:`file` have been added. Here's a more interesting example, adding a -:meth:`lock` method to file objects:: +:func:`!file` have been added. Here's a more interesting example, adding a +:meth:`!lock` method to file objects:: class LockableFile(file): def lock (self, operation, length=0, start=0, whence=0): @@ -146,7 +146,7 @@ To make the set of types complete, new type objects such as :func:`dict` and The now-obsolete :mod:`!posixfile` module contained a class that emulated all of a file object's methods and also added a :meth:`!lock` method, but this class couldn't be passed to internal functions that expected a built-in file, -something which is possible with our new :class:`LockableFile`. +something which is possible with our new :class:`!LockableFile`. Descriptors @@ -154,11 +154,11 @@ Descriptors In previous versions of Python, there was no consistent way to discover what attributes and methods were supported by an object. There were some informal -conventions, such as defining :attr:`__members__` and :attr:`__methods__` +conventions, such as defining :attr:`!__members__` and :attr:`!__methods__` attributes that were lists of names, but often the author of an extension type or a class wouldn't bother to define them. You could fall back on inspecting the :attr:`~object.__dict__` of an object, but when class inheritance or an arbitrary -:meth:`__getattr__` hook were in use this could still be inaccurate. +:meth:`!__getattr__` hook were in use this could still be inaccurate. The one big idea underlying the new class model is that an API for describing the attributes of an object using :dfn:`descriptors` has been formalized. @@ -171,7 +171,7 @@ attributes of their own: * :attr:`~definition.__name__` is the attribute's name. -* :attr:`__doc__` is the attribute's docstring. +* :attr:`!__doc__` is the attribute's docstring. * ``__get__(object)`` is a method that retrieves the attribute value from *object*. @@ -186,7 +186,7 @@ are:: descriptor = obj.__class__.x descriptor.__get__(obj) -For methods, :meth:`descriptor.__get__` returns a temporary object that's +For methods, :meth:`!descriptor.__get__` returns a temporary object that's callable, and wraps up the instance and the method to be called on it. This is also why static methods and class methods are now possible; they have descriptors that wrap up just the method, or the method and the class. As a @@ -204,7 +204,7 @@ methods are defined like this:: ... g = classmethod(g) -The :func:`staticmethod` function takes the function :func:`f`, and returns it +The :func:`staticmethod` function takes the function :func:`!f`, and returns it wrapped up in a descriptor so it can be stored in the class object. You might expect there to be special syntax for creating such methods (``def static f``, ``defstatic f()``, or something like that) but no such syntax has been defined @@ -232,10 +232,10 @@ like this:: f = eiffelmethod(f, pre_f, post_f) -Note that a person using the new :func:`eiffelmethod` doesn't have to understand +Note that a person using the new :func:`!eiffelmethod` doesn't have to understand anything about descriptors. This is why I think the new features don't increase the basic complexity of the language. There will be a few wizards who need to -know about it in order to write :func:`eiffelmethod` or the ZODB or whatever, +know about it in order to write :func:`!eiffelmethod` or the ZODB or whatever, but most users will just write code on top of the resulting libraries and ignore the implementation details. @@ -263,10 +263,10 @@ from :pep:`253` by Guido van Rossum):: The lookup rule for classic classes is simple but not very smart; the base classes are searched depth-first, going from left to right. A reference to -:meth:`D.save` will search the classes :class:`D`, :class:`B`, and then -:class:`A`, where :meth:`save` would be found and returned. :meth:`C.save` -would never be found at all. This is bad, because if :class:`C`'s :meth:`save` -method is saving some internal state specific to :class:`C`, not calling it will +:meth:`!D.save` will search the classes :class:`!D`, :class:`!B`, and then +:class:`!A`, where :meth:`!save` would be found and returned. :meth:`!C.save` +would never be found at all. This is bad, because if :class:`!C`'s :meth:`!save` +method is saving some internal state specific to :class:`!C`, not calling it will result in that state never getting saved. New-style classes follow a different algorithm that's a bit more complicated to @@ -276,22 +276,22 @@ produces more useful results for really complicated inheritance graphs.) #. List all the base classes, following the classic lookup rule and include a class multiple times if it's visited repeatedly. In the above example, the list - of visited classes is [:class:`D`, :class:`B`, :class:`A`, :class:`C`, - :class:`A`]. + of visited classes is [:class:`!D`, :class:`!B`, :class:`!A`, :class:`!C`, + :class:`!A`]. #. Scan the list for duplicated classes. If any are found, remove all but one occurrence, leaving the *last* one in the list. In the above example, the list - becomes [:class:`D`, :class:`B`, :class:`C`, :class:`A`] after dropping + becomes [:class:`!D`, :class:`!B`, :class:`!C`, :class:`!A`] after dropping duplicates. -Following this rule, referring to :meth:`D.save` will return :meth:`C.save`, +Following this rule, referring to :meth:`!D.save` will return :meth:`!C.save`, which is the behaviour we're after. This lookup rule is the same as the one followed by Common Lisp. A new built-in function, :func:`super`, provides a way to get at a class's superclasses without having to reimplement Python's algorithm. The most commonly used form will be ``super(class, obj)``, which returns a bound superclass object (not the actual class object). This form will be used in methods to call a method in the superclass; for example, -:class:`D`'s :meth:`save` method would look like this:: +:class:`!D`'s :meth:`!save` method would look like this:: class D (B,C): def save (self): @@ -309,7 +309,7 @@ Attribute Access ---------------- A fair number of sophisticated Python classes define hooks for attribute access -using :meth:`__getattr__`; most commonly this is done for convenience, to make +using :meth:`~object.__getattr__`; most commonly this is done for convenience, to make code more readable by automatically mapping an attribute access such as ``obj.parent`` into a method call such as ``obj.get_parent``. Python 2.2 adds some new ways of controlling attribute access. @@ -321,22 +321,22 @@ instance's dictionary. New-style classes also support a new method, ``__getattribute__(attr_name)``. The difference between the two methods is -that :meth:`__getattribute__` is *always* called whenever any attribute is -accessed, while the old :meth:`__getattr__` is only called if ``foo`` isn't +that :meth:`~object.__getattribute__` is *always* called whenever any attribute is +accessed, while the old :meth:`~object.__getattr__` is only called if ``foo`` isn't found in the instance's dictionary. However, Python 2.2's support for :dfn:`properties` will often be a simpler way -to trap attribute references. Writing a :meth:`__getattr__` method is +to trap attribute references. Writing a :meth:`!__getattr__` method is complicated because to avoid recursion you can't use regular attribute accesses inside them, and instead have to mess around with the contents of -:attr:`~object.__dict__`. :meth:`__getattr__` methods also end up being called by Python -when it checks for other methods such as :meth:`__repr__` or :meth:`__coerce__`, +:attr:`~object.__dict__`. :meth:`~object.__getattr__` methods also end up being called by Python +when it checks for other methods such as :meth:`~object.__repr__` or :meth:`!__coerce__`, and so have to be written with this in mind. Finally, calling a function on every attribute access results in a sizable performance loss. :class:`property` is a new built-in type that packages up three functions that get, set, or delete an attribute, and a docstring. For example, if you want to -define a :attr:`size` attribute that's computed, but also settable, you could +define a :attr:`!size` attribute that's computed, but also settable, you could write:: class C(object): @@ -355,9 +355,9 @@ write:: "Storage size of this instance") That is certainly clearer and easier to write than a pair of -:meth:`__getattr__`/:meth:`__setattr__` methods that check for the :attr:`size` +:meth:`!__getattr__`/:meth:`!__setattr__` methods that check for the :attr:`!size` attribute and handle it specially while retrieving all other attributes from the -instance's :attr:`~object.__dict__`. Accesses to :attr:`size` are also the only ones +instance's :attr:`~object.__dict__`. Accesses to :attr:`!size` are also the only ones which have to perform the work of calling a function, so references to other attributes run at their usual speed. @@ -447,7 +447,7 @@ an iterator for the object *obj*, while ``iter(C, sentinel)`` returns an iterator that will invoke the callable object *C* until it returns *sentinel* to signal that the iterator is done. -Python classes can define an :meth:`__iter__` method, which should create and +Python classes can define an :meth:`!__iter__` method, which should create and return a new iterator for the object; if the object is its own iterator, this method can just return ``self``. In particular, iterators will usually be their own iterators. Extension types implemented in C can implement a :c:member:`~PyTypeObject.tp_iter` @@ -478,7 +478,7 @@ there are no more values to be returned, calling :meth:`next` should raise the In 2.2, Python's :keyword:`for` statement no longer expects a sequence; it expects something for which :func:`iter` will return an iterator. For backward compatibility and convenience, an iterator is automatically constructed for -sequences that don't implement :meth:`__iter__` or a :c:member:`~PyTypeObject.tp_iter` slot, so +sequences that don't implement :meth:`!__iter__` or a :c:member:`~PyTypeObject.tp_iter` slot, so ``for i in [1,2,3]`` will still work. Wherever the Python interpreter loops over a sequence, it's been changed to use the iterator protocol. This means you can do things like this:: @@ -510,8 +510,8 @@ Iterator support has been added to some of Python's basic types. Calling Oct 10 That's just the default behaviour. If you want to iterate over keys, values, or -key/value pairs, you can explicitly call the :meth:`iterkeys`, -:meth:`itervalues`, or :meth:`iteritems` methods to get an appropriate iterator. +key/value pairs, you can explicitly call the :meth:`!iterkeys`, +:meth:`!itervalues`, or :meth:`!iteritems` methods to get an appropriate iterator. In a minor related change, the :keyword:`in` operator now works on dictionaries, so ``key in dict`` is now equivalent to ``dict.has_key(key)``. @@ -580,7 +580,7 @@ allowed inside the :keyword:`!try` block of a :keyword:`try`...\ :keyword:`finally` statement; read :pep:`255` for a full explanation of the interaction between :keyword:`!yield` and exceptions.) -Here's a sample usage of the :func:`generate_ints` generator:: +Here's a sample usage of the :func:`!generate_ints` generator:: >>> gen = generate_ints(3) >>> gen @@ -641,7 +641,7 @@ like:: sentence := "Store it in the neighboring harbor" if (i := find("or", sentence)) > 5 then write(i) -In Icon the :func:`find` function returns the indexes at which the substring +In Icon the :func:`!find` function returns the indexes at which the substring "or" is found: 3, 23, 33. In the :keyword:`if` statement, ``i`` is first assigned a value of 3, but 3 is less than 5, so the comparison fails, and Icon retries it with the second value of 23. 23 is greater than 5, so the comparison @@ -671,7 +671,7 @@ PEP 237: Unifying Long Integers and Integers In recent versions, the distinction between regular integers, which are 32-bit values on most machines, and long integers, which can be of arbitrary size, was becoming an annoyance. For example, on platforms that support files larger than -``2**32`` bytes, the :meth:`tell` method of file objects has to return a long +``2**32`` bytes, the :meth:`!tell` method of file objects has to return a long integer. However, there were various bits of Python that expected plain integers and would raise an error if a long integer was provided instead. For example, in Python 1.5, only regular integers could be used as a slice index, and @@ -752,7 +752,7 @@ Here are the changes 2.2 introduces: 0.5. Without the ``__future__`` statement, ``/`` still means classic division. The default meaning of ``/`` will not change until Python 3.0. -* Classes can define methods called :meth:`__truediv__` and :meth:`__floordiv__` +* Classes can define methods called :meth:`~object.__truediv__` and :meth:`~object.__floordiv__` to overload the two division operators. At the C level, there are also slots in the :c:type:`PyNumberMethods` structure so extension types can define the two operators. @@ -785,17 +785,17 @@ support.) When built to use UCS-4 (a "wide Python"), the interpreter can natively handle Unicode characters from U+000000 to U+110000, so the range of legal values for -the :func:`unichr` function is expanded accordingly. Using an interpreter +the :func:`!unichr` function is expanded accordingly. Using an interpreter compiled to use UCS-2 (a "narrow Python"), values greater than 65535 will still -cause :func:`unichr` to raise a :exc:`ValueError` exception. This is all +cause :func:`!unichr` to raise a :exc:`ValueError` exception. This is all described in :pep:`261`, "Support for 'wide' Unicode characters"; consult it for further details. Another change is simpler to explain. Since their introduction, Unicode strings -have supported an :meth:`encode` method to convert the string to a selected +have supported an :meth:`!encode` method to convert the string to a selected encoding such as UTF-8 or Latin-1. A symmetric ``decode([*encoding*])`` method has been added to 8-bit strings (though not to Unicode strings) in 2.2. -:meth:`decode` assumes that the string is in the specified encoding and decodes +:meth:`!decode` assumes that the string is in the specified encoding and decodes it, returning whatever is returned by the codec. Using this new feature, codecs have been added for tasks not directly related to @@ -819,10 +819,10 @@ encoding, and compression with the :mod:`zlib` module:: >>> "sheesh".encode('rot-13') 'furrfu' -To convert a class instance to Unicode, a :meth:`__unicode__` method can be -defined by a class, analogous to :meth:`__str__`. +To convert a class instance to Unicode, a :meth:`!__unicode__` method can be +defined by a class, analogous to :meth:`!__str__`. -:meth:`encode`, :meth:`decode`, and :meth:`__unicode__` were implemented by +:meth:`!encode`, :meth:`!decode`, and :meth:`!__unicode__` were implemented by Marc-André Lemburg. The changes to support using UCS-4 internally were implemented by Fredrik Lundh and Martin von Löwis. @@ -859,7 +859,7 @@ doesn't work:: return g(value-1) + 1 ... -The function :func:`g` will always raise a :exc:`NameError` exception, because +The function :func:`!g` will always raise a :exc:`NameError` exception, because the binding of the name ``g`` isn't in either its local namespace or in the module-level namespace. This isn't much of a problem in practice (how often do you recursively define interior functions like this?), but this also made using @@ -915,7 +915,7 @@ To make the preceding explanation a bit clearer, here's an example:: Line 4 containing the ``exec`` statement is a syntax error, since ``exec`` would define a new local variable named ``x`` whose value should -be accessed by :func:`g`. +be accessed by :func:`!g`. This shouldn't be much of a limitation, since ``exec`` is rarely used in most Python code (and when it is used, it's often a sign of a poor design @@ -933,7 +933,7 @@ anyway). New and Improved Modules ======================== -* The :mod:`xmlrpclib` module was contributed to the standard library by Fredrik +* The :mod:`!xmlrpclib` module was contributed to the standard library by Fredrik Lundh, providing support for writing XML-RPC clients. XML-RPC is a simple remote procedure call protocol built on top of HTTP and XML. For example, the following snippet retrieves a list of RSS channels from the O'Reilly Network, @@ -956,7 +956,7 @@ New and Improved Modules # 'description': 'A utility which converts HTML to XSL FO.', # 'title': 'html2fo 0.3 (Default)'}, ... ] - The :mod:`SimpleXMLRPCServer` module makes it easy to create straightforward + The :mod:`!SimpleXMLRPCServer` module makes it easy to create straightforward XML-RPC servers. See http://xmlrpc.scripting.com/ for more information about XML-RPC. * The new :mod:`hmac` module implements the HMAC algorithm described by @@ -964,9 +964,9 @@ New and Improved Modules * Several functions that originally returned lengthy tuples now return pseudo-sequences that still behave like tuples but also have mnemonic attributes such - as memberst_mtime or :attr:`tm_year`. The enhanced functions include - :func:`stat`, :func:`fstat`, :func:`statvfs`, and :func:`fstatvfs` in the - :mod:`os` module, and :func:`localtime`, :func:`gmtime`, and :func:`strptime` in + as :attr:`!memberst_mtime` or :attr:`!tm_year`. The enhanced functions include + :func:`~os.stat`, :func:`~os.fstat`, :func:`~os.statvfs`, and :func:`~os.fstatvfs` in the + :mod:`os` module, and :func:`~time.localtime`, :func:`~time.gmtime`, and :func:`~time.strptime` in the :mod:`time` module. For example, to obtain a file's size using the old tuples, you'd end up writing @@ -999,7 +999,7 @@ New and Improved Modules underlying the :mod:`re` module. For example, the :func:`re.sub` and :func:`re.split` functions have been rewritten in C. Another contributed patch speeds up certain Unicode character ranges by a factor of two, and a new - :meth:`finditer` method that returns an iterator over all the non-overlapping + :meth:`~re.finditer` method that returns an iterator over all the non-overlapping matches in a given string. (SRE is maintained by Fredrik Lundh. The BIGCHARSET patch was contributed by Martin von Löwis.) @@ -1012,33 +1012,33 @@ New and Improved Modules new extensions: the NAMESPACE extension defined in :rfc:`2342`, SORT, GETACL and SETACL. (Contributed by Anthony Baxter and Michel Pelletier.) -* The :mod:`rfc822` module's parsing of email addresses is now compliant with +* The :mod:`!rfc822` module's parsing of email addresses is now compliant with :rfc:`2822`, an update to :rfc:`822`. (The module's name is *not* going to be changed to ``rfc2822``.) A new package, :mod:`email`, has also been added for parsing and generating e-mail messages. (Contributed by Barry Warsaw, and arising out of his work on Mailman.) -* The :mod:`difflib` module now contains a new :class:`Differ` class for +* The :mod:`difflib` module now contains a new :class:`!Differ` class for producing human-readable lists of changes (a "delta") between two sequences of - lines of text. There are also two generator functions, :func:`ndiff` and - :func:`restore`, which respectively return a delta from two sequences, or one of + lines of text. There are also two generator functions, :func:`!ndiff` and + :func:`!restore`, which respectively return a delta from two sequences, or one of the original sequences from a delta. (Grunt work contributed by David Goodger, from ndiff.py code by Tim Peters who then did the generatorization.) -* New constants :const:`ascii_letters`, :const:`ascii_lowercase`, and - :const:`ascii_uppercase` were added to the :mod:`string` module. There were - several modules in the standard library that used :const:`string.letters` to +* New constants :const:`!ascii_letters`, :const:`!ascii_lowercase`, and + :const:`!ascii_uppercase` were added to the :mod:`string` module. There were + several modules in the standard library that used :const:`!string.letters` to mean the ranges A-Za-z, but that assumption is incorrect when locales are in - use, because :const:`string.letters` varies depending on the set of legal + use, because :const:`!string.letters` varies depending on the set of legal characters defined by the current locale. The buggy modules have all been fixed - to use :const:`ascii_letters` instead. (Reported by an unknown person; fixed by + to use :const:`!ascii_letters` instead. (Reported by an unknown person; fixed by Fred L. Drake, Jr.) * The :mod:`mimetypes` module now makes it easier to use alternative MIME-type - databases by the addition of a :class:`MimeTypes` class, which takes a list of + databases by the addition of a :class:`~mimetypes.MimeTypes` class, which takes a list of filenames to be parsed. (Contributed by Fred L. Drake, Jr.) -* A :class:`Timer` class was added to the :mod:`threading` module that allows +* A :class:`~threading.Timer` class was added to the :mod:`threading` module that allows scheduling an activity to happen at some future time. (Contributed by Itamar Shtull-Trauring.) @@ -1114,7 +1114,7 @@ code, none of the changes described here will affect you very much. * Two new wrapper functions, :c:func:`PyOS_snprintf` and :c:func:`PyOS_vsnprintf` were added to provide cross-platform implementations for the relatively new :c:func:`snprintf` and :c:func:`vsnprintf` C lib APIs. In contrast to the standard - :c:func:`sprintf` and :c:func:`vsprintf` functions, the Python versions check the + :c:func:`sprintf` and :c:func:`!vsprintf` functions, the Python versions check the bounds of the buffer used to protect against buffer overruns. (Contributed by M.-A. Lemburg.) @@ -1212,12 +1212,12 @@ Some of the more notable changes are: * The :file:`Tools/scripts/ftpmirror.py` script now parses a :file:`.netrc` file, if you have one. (Contributed by Mike Romberg.) -* Some features of the object returned by the :func:`xrange` function are now +* Some features of the object returned by the :func:`!xrange` function are now deprecated, and trigger warnings when they're accessed; they'll disappear in - Python 2.3. :class:`xrange` objects tried to pretend they were full sequence + Python 2.3. :class:`!xrange` objects tried to pretend they were full sequence types by supporting slicing, sequence multiplication, and the :keyword:`in` operator, but these features were rarely used and therefore buggy. The - :meth:`tolist` method and the :attr:`start`, :attr:`stop`, and :attr:`step` + :meth:`!tolist` method and the :attr:`!start`, :attr:`!stop`, and :attr:`!step` attributes are also being deprecated. At the C level, the fourth argument to the :c:func:`!PyRange_New` function, ``repeat``, has also been deprecated. From e8b8f5e9c2da6a436360ce648061c90bdfcba863 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Mon, 29 Jan 2024 08:43:44 -0600 Subject: [PATCH 062/127] gh-101100: Fix datetime reference warnings (GH-114661) Co-authored-by: Serhiy Storchaka --- Doc/conf.py | 5 ++ Doc/library/datetime.rst | 106 +++++++++++++++++++-------------------- Doc/tools/.nitignore | 1 - 3 files changed, 58 insertions(+), 54 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index 458954370debe2..a96e7787d167a3 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -89,20 +89,25 @@ nitpick_ignore = [ # Standard C functions ('c:func', 'calloc'), + ('c:func', 'ctime'), ('c:func', 'dlopen'), ('c:func', 'exec'), ('c:func', 'fcntl'), ('c:func', 'fork'), ('c:func', 'free'), + ('c:func', 'gettimeofday'), ('c:func', 'gmtime'), + ('c:func', 'localeconv'), ('c:func', 'localtime'), ('c:func', 'main'), ('c:func', 'malloc'), + ('c:func', 'mktime'), ('c:func', 'printf'), ('c:func', 'realloc'), ('c:func', 'snprintf'), ('c:func', 'sprintf'), ('c:func', 'stat'), + ('c:func', 'strftime'), ('c:func', 'system'), ('c:func', 'time'), ('c:func', 'vsnprintf'), diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index b36f8c19cd6040..47ecb0ba331bdc 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -14,7 +14,7 @@ .. XXX what order should the types be discussed in? -The :mod:`datetime` module supplies classes for manipulating dates and times. +The :mod:`!datetime` module supplies classes for manipulating dates and times. While date and time arithmetic is supported, the focus of the implementation is on efficient attribute extraction for output formatting and manipulation. @@ -70,7 +70,7 @@ These :class:`tzinfo` objects capture information about the offset from UTC time, the time zone name, and whether daylight saving time is in effect. Only one concrete :class:`tzinfo` class, the :class:`timezone` class, is -supplied by the :mod:`datetime` module. The :class:`timezone` class can +supplied by the :mod:`!datetime` module. The :class:`timezone` class can represent simple timezones with fixed offsets from UTC, such as UTC itself or North American EST and EDT timezones. Supporting timezones at deeper levels of detail is up to the application. The rules for time adjustment across the @@ -80,7 +80,7 @@ standard suitable for every application aside from UTC. Constants --------- -The :mod:`datetime` module exports the following constants: +The :mod:`!datetime` module exports the following constants: .. data:: MINYEAR @@ -631,7 +631,7 @@ Notes: date2.toordinal()``. Date comparison raises :exc:`TypeError` if the other comparand isn't also a :class:`date` object. However, ``NotImplemented`` is returned instead if the other comparand has a - :meth:`timetuple` attribute. This hook gives other kinds of date objects a + :attr:`~date.timetuple` attribute. This hook gives other kinds of date objects a chance at implementing mixed-type comparison. If not, when a :class:`date` object is compared to an object of a different type, :exc:`TypeError` is raised unless the comparison is ``==`` or ``!=``. The latter cases return @@ -1215,7 +1215,7 @@ Supported operations: object addresses, datetime comparison normally raises :exc:`TypeError` if the other comparand isn't also a :class:`.datetime` object. However, ``NotImplemented`` is returned instead if the other comparand has a - :meth:`timetuple` attribute. This hook gives other kinds of date objects a + :attr:`~.datetime.timetuple` attribute. This hook gives other kinds of date objects a chance at implementing mixed-type comparison. If not, when a :class:`.datetime` object is compared to an object of a different type, :exc:`TypeError` is raised unless the comparison is ``==`` or ``!=``. The latter cases return @@ -1347,22 +1347,22 @@ Instance methods: where ``yday = d.toordinal() - date(d.year, 1, 1).toordinal() + 1`` is the day number within the current year starting with ``1`` for January - 1st. The :attr:`tm_isdst` flag of the result is set according to the + 1st. The :attr:`~time.struct_time.tm_isdst` flag of the result is set according to the :meth:`dst` method: :attr:`.tzinfo` is ``None`` or :meth:`dst` returns - ``None``, :attr:`tm_isdst` is set to ``-1``; else if :meth:`dst` returns a - non-zero value, :attr:`tm_isdst` is set to ``1``; else :attr:`tm_isdst` is + ``None``, :attr:`!tm_isdst` is set to ``-1``; else if :meth:`dst` returns a + non-zero value, :attr:`!tm_isdst` is set to ``1``; else :attr:`!tm_isdst` is set to ``0``. .. method:: datetime.utctimetuple() If :class:`.datetime` instance *d* is naive, this is the same as - ``d.timetuple()`` except that :attr:`tm_isdst` is forced to 0 regardless of what + ``d.timetuple()`` except that :attr:`~.time.struct_time.tm_isdst` is forced to 0 regardless of what ``d.dst()`` returns. DST is never in effect for a UTC time. If *d* is aware, *d* is normalized to UTC time, by subtracting ``d.utcoffset()``, and a :class:`time.struct_time` for the - normalized time is returned. :attr:`tm_isdst` is forced to 0. Note + normalized time is returned. :attr:`!tm_isdst` is forced to 0. Note that an :exc:`OverflowError` may be raised if *d*.year was ``MINYEAR`` or ``MAXYEAR`` and UTC adjustment spills over a year boundary. @@ -1550,7 +1550,7 @@ Instance methods: Examples of Usage: :class:`.datetime` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Examples of working with :class:`~datetime.datetime` objects: +Examples of working with :class:`.datetime` objects: .. doctest:: @@ -1761,9 +1761,9 @@ is aware, :exc:`TypeError` is raised if an order comparison is attempted. For eq comparisons, naive instances are never equal to aware instances. If both comparands are aware, and have -the same :attr:`~time.tzinfo` attribute, the common :attr:`~time.tzinfo` attribute is +the same :attr:`~.time.tzinfo` attribute, the common :attr:`!tzinfo` attribute is ignored and the base times are compared. If both comparands are aware and -have different :attr:`~time.tzinfo` attributes, the comparands are first adjusted by +have different :attr:`!tzinfo` attributes, the comparands are first adjusted by subtracting their UTC offsets (obtained from ``self.utcoffset()``). In order to stop mixed-type comparisons from falling back to the default comparison by object address, when a :class:`.time` object is compared to an object of a @@ -1771,7 +1771,7 @@ different type, :exc:`TypeError` is raised unless the comparison is ``==`` or ``!=``. The latter cases return :const:`False` or :const:`True`, respectively. .. versionchanged:: 3.3 - Equality comparisons between aware and naive :class:`~datetime.time` instances + Equality comparisons between aware and naive :class:`.time` instances don't raise :exc:`TypeError`. In Boolean contexts, a :class:`.time` object is always considered to be true. @@ -1981,7 +1981,7 @@ Examples of working with a :class:`.time` object:: You need to derive a concrete subclass, and (at least) supply implementations of the standard :class:`tzinfo` methods needed by the - :class:`.datetime` methods you use. The :mod:`datetime` module provides + :class:`.datetime` methods you use. The :mod:`!datetime` module provides :class:`timezone`, a simple concrete subclass of :class:`tzinfo` which can represent timezones with fixed offset from UTC such as UTC itself or North American EST and EDT. @@ -1994,7 +1994,7 @@ Examples of working with a :class:`.time` object:: A concrete subclass of :class:`tzinfo` may need to implement the following methods. Exactly which methods are needed depends on the uses made of aware - :mod:`datetime` objects. If in doubt, simply implement all of them. + :mod:`!datetime` objects. If in doubt, simply implement all of them. .. method:: tzinfo.utcoffset(dt) @@ -2035,7 +2035,7 @@ Examples of working with a :class:`.time` object:: already been added to the UTC offset returned by :meth:`utcoffset`, so there's no need to consult :meth:`dst` unless you're interested in obtaining DST info separately. For example, :meth:`datetime.timetuple` calls its :attr:`~.datetime.tzinfo` - attribute's :meth:`dst` method to determine how the :attr:`tm_isdst` flag + attribute's :meth:`dst` method to determine how the :attr:`~time.struct_time.tm_isdst` flag should be set, and :meth:`tzinfo.fromutc` calls :meth:`dst` to account for DST changes when crossing time zones. @@ -2051,7 +2051,7 @@ Examples of working with a :class:`.time` object:: relies on this, but cannot detect violations; it's the programmer's responsibility to ensure it. If a :class:`tzinfo` subclass cannot guarantee this, it may be able to override the default implementation of - :meth:`tzinfo.fromutc` to work correctly with :meth:`astimezone` regardless. + :meth:`tzinfo.fromutc` to work correctly with :meth:`~.datetime.astimezone` regardless. Most implementations of :meth:`dst` will probably look like one of these two:: @@ -2080,7 +2080,7 @@ Examples of working with a :class:`.time` object:: .. method:: tzinfo.tzname(dt) Return the time zone name corresponding to the :class:`.datetime` object *dt*, as - a string. Nothing about string names is defined by the :mod:`datetime` module, + a string. Nothing about string names is defined by the :mod:`!datetime` module, and there's no requirement that it mean anything in particular. For example, "GMT", "UTC", "-500", "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. Return ``None`` if a string name isn't known. Note that this is @@ -2128,7 +2128,7 @@ There is one more :class:`tzinfo` method that a subclass may wish to override: different years. An example of a time zone the default :meth:`fromutc` implementation may not handle correctly in all cases is one where the standard offset (from UTC) depends on the specific date and time passed, which can happen - for political reasons. The default implementations of :meth:`astimezone` and + for political reasons. The default implementations of :meth:`~.datetime.astimezone` and :meth:`fromutc` may not produce the result you want if the result is one of the hours straddling the moment the standard offset changes. @@ -2194,10 +2194,10 @@ hour that can't be spelled unambiguously in local wall time: the last hour of daylight time. In Eastern, that's times of the form 5:MM UTC on the day daylight time ends. The local wall clock leaps from 1:59 (daylight time) back to 1:00 (standard time) again. Local times of the form 1:MM are ambiguous. -:meth:`astimezone` mimics the local clock's behavior by mapping two adjacent UTC +:meth:`~.datetime.astimezone` mimics the local clock's behavior by mapping two adjacent UTC hours into the same local hour then. In the Eastern example, UTC times of the form 5:MM and 6:MM both map to 1:MM when converted to Eastern, but earlier times -have the :attr:`~datetime.fold` attribute set to 0 and the later times have it set to 1. +have the :attr:`~.datetime.fold` attribute set to 0 and the later times have it set to 1. For example, at the Fall back transition of 2016, we get:: >>> u0 = datetime(2016, 11, 6, 4, tzinfo=timezone.utc) @@ -2212,10 +2212,10 @@ For example, at the Fall back transition of 2016, we get:: 07:00:00 UTC = 02:00:00 EST 0 Note that the :class:`.datetime` instances that differ only by the value of the -:attr:`~datetime.fold` attribute are considered equal in comparisons. +:attr:`~.datetime.fold` attribute are considered equal in comparisons. Applications that can't bear wall-time ambiguities should explicitly check the -value of the :attr:`~datetime.fold` attribute or avoid using hybrid +value of the :attr:`~.datetime.fold` attribute or avoid using hybrid :class:`tzinfo` subclasses; there are no ambiguities when using :class:`timezone`, or any other fixed-offset :class:`tzinfo` subclass (such as a class representing only EST (fixed offset -5 hours), or only EDT (fixed offset -4 hours)). @@ -2223,7 +2223,7 @@ only EST (fixed offset -5 hours), or only EDT (fixed offset -4 hours)). .. seealso:: :mod:`zoneinfo` - The :mod:`datetime` module has a basic :class:`timezone` class (for + The :mod:`!datetime` module has a basic :class:`timezone` class (for handling arbitrary fixed offsets from UTC) and its :attr:`timezone.utc` attribute (a UTC timezone instance). @@ -2241,7 +2241,7 @@ only EST (fixed offset -5 hours), or only EDT (fixed offset -4 hours)). .. _datetime-timezone: :class:`timezone` Objects --------------------------- +------------------------- The :class:`timezone` class is a subclass of :class:`tzinfo`, each instance of which represents a timezone defined by a fixed offset from @@ -2316,8 +2316,8 @@ Class attributes: .. _strftime-strptime-behavior: -:meth:`strftime` and :meth:`strptime` Behavior ----------------------------------------------- +:meth:`~.datetime.strftime` and :meth:`~.datetime.strptime` Behavior +-------------------------------------------------------------------- :class:`date`, :class:`.datetime`, and :class:`.time` objects all support a ``strftime(format)`` method, to create a string representing the time under the @@ -2327,8 +2327,8 @@ Conversely, the :meth:`datetime.strptime` class method creates a :class:`.datetime` object from a string representing a date and time and a corresponding format string. -The table below provides a high-level comparison of :meth:`strftime` -versus :meth:`strptime`: +The table below provides a high-level comparison of :meth:`~.datetime.strftime` +versus :meth:`~.datetime.strptime`: +----------------+--------------------------------------------------------+------------------------------------------------------------------------------+ | | ``strftime`` | ``strptime`` | @@ -2345,8 +2345,8 @@ versus :meth:`strptime`: .. _format-codes: -:meth:`strftime` and :meth:`strptime` Format Codes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:meth:`~.datetime.strftime` and :meth:`~.datetime.strptime` Format Codes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These methods accept format codes that can be used to parse and format dates:: @@ -2485,13 +2485,13 @@ convenience. These parameters all correspond to ISO 8601 date values. | | naive). | -03:07:12.345216 | | +-----------+--------------------------------+------------------------+-------+ -These may not be available on all platforms when used with the :meth:`strftime` +These may not be available on all platforms when used with the :meth:`~.datetime.strftime` method. The ISO 8601 year and ISO 8601 week directives are not interchangeable -with the year and week number directives above. Calling :meth:`strptime` with +with the year and week number directives above. Calling :meth:`~.datetime.strptime` with incomplete or ambiguous ISO 8601 directives will raise a :exc:`ValueError`. The full set of format codes supported varies across platforms, because Python -calls the platform C library's :func:`strftime` function, and platform +calls the platform C library's :c:func:`strftime` function, and platform variations are common. To see the full set of format codes supported on your platform, consult the :manpage:`strftime(3)` documentation. There are also differences between platforms in handling of unsupported format specifiers. @@ -2507,9 +2507,9 @@ Technical Detail Broadly speaking, ``d.strftime(fmt)`` acts like the :mod:`time` module's ``time.strftime(fmt, d.timetuple())`` although not all objects support a -:meth:`timetuple` method. +:meth:`~date.timetuple` method. -For the :meth:`datetime.strptime` class method, the default value is +For the :meth:`.datetime.strptime` class method, the default value is ``1900-01-01T00:00:00.000``: any components not specified in the format string will be pulled from the default value. [#]_ @@ -2544,27 +2544,27 @@ Notes: contain non-ASCII characters. (2) - The :meth:`strptime` method can parse years in the full [1, 9999] range, but + The :meth:`~.datetime.strptime` method can parse years in the full [1, 9999] range, but years < 1000 must be zero-filled to 4-digit width. .. versionchanged:: 3.2 - In previous versions, :meth:`strftime` method was restricted to + In previous versions, :meth:`~.datetime.strftime` method was restricted to years >= 1900. .. versionchanged:: 3.3 - In version 3.2, :meth:`strftime` method was restricted to + In version 3.2, :meth:`~.datetime.strftime` method was restricted to years >= 1000. (3) - When used with the :meth:`strptime` method, the ``%p`` directive only affects + When used with the :meth:`~.datetime.strptime` method, the ``%p`` directive only affects the output hour field if the ``%I`` directive is used to parse the hour. (4) - Unlike the :mod:`time` module, the :mod:`datetime` module does not support + Unlike the :mod:`time` module, the :mod:`!datetime` module does not support leap seconds. (5) - When used with the :meth:`strptime` method, the ``%f`` directive + When used with the :meth:`~.datetime.strptime` method, the ``%f`` directive accepts from one to six digits and zero pads on the right. ``%f`` is an extension to the set of format characters in the C standard (but implemented separately in datetime objects, and therefore always @@ -2577,7 +2577,7 @@ Notes: For an aware object: ``%z`` - :meth:`utcoffset` is transformed into a string of the form + :meth:`~.datetime.utcoffset` is transformed into a string of the form ``±HHMM[SS[.ffffff]]``, where ``HH`` is a 2-digit string giving the number of UTC offset hours, ``MM`` is a 2-digit string giving the number of UTC offset minutes, ``SS`` is a 2-digit string giving the number of UTC offset @@ -2585,14 +2585,14 @@ Notes: offset microseconds. The ``ffffff`` part is omitted when the offset is a whole number of seconds and both the ``ffffff`` and the ``SS`` part is omitted when the offset is a whole number of minutes. For example, if - :meth:`utcoffset` returns ``timedelta(hours=-3, minutes=-30)``, ``%z`` is + :meth:`~.datetime.utcoffset` returns ``timedelta(hours=-3, minutes=-30)``, ``%z`` is replaced with the string ``'-0330'``. .. versionchanged:: 3.7 The UTC offset is not restricted to a whole number of minutes. .. versionchanged:: 3.7 - When the ``%z`` directive is provided to the :meth:`strptime` method, + When the ``%z`` directive is provided to the :meth:`~.datetime.strptime` method, the UTC offsets can have a colon as a separator between hours, minutes and seconds. For example, ``'+01:00:00'`` will be parsed as an offset of one hour. @@ -2603,11 +2603,11 @@ Notes: hours, minutes and seconds. ``%Z`` - In :meth:`strftime`, ``%Z`` is replaced by an empty string if - :meth:`tzname` returns ``None``; otherwise ``%Z`` is replaced by the + In :meth:`~.datetime.strftime`, ``%Z`` is replaced by an empty string if + :meth:`~.datetime.tzname` returns ``None``; otherwise ``%Z`` is replaced by the returned value, which must be a string. - :meth:`strptime` only accepts certain values for ``%Z``: + :meth:`~.datetime.strptime` only accepts certain values for ``%Z``: 1. any value in ``time.tzname`` for your machine's locale 2. the hard-coded values ``UTC`` and ``GMT`` @@ -2617,23 +2617,23 @@ Notes: invalid values. .. versionchanged:: 3.2 - When the ``%z`` directive is provided to the :meth:`strptime` method, an + When the ``%z`` directive is provided to the :meth:`~.datetime.strptime` method, an aware :class:`.datetime` object will be produced. The ``tzinfo`` of the result will be set to a :class:`timezone` instance. (7) - When used with the :meth:`strptime` method, ``%U`` and ``%W`` are only used + When used with the :meth:`~.datetime.strptime` method, ``%U`` and ``%W`` are only used in calculations when the day of the week and the calendar year (``%Y``) are specified. (8) Similar to ``%U`` and ``%W``, ``%V`` is only used in calculations when the day of the week and the ISO year (``%G``) are specified in a - :meth:`strptime` format string. Also note that ``%G`` and ``%Y`` are not + :meth:`~.datetime.strptime` format string. Also note that ``%G`` and ``%Y`` are not interchangeable. (9) - When used with the :meth:`strptime` method, the leading zero is optional + When used with the :meth:`~.datetime.strptime` method, the leading zero is optional for formats ``%d``, ``%m``, ``%H``, ``%I``, ``%M``, ``%S``, ``%j``, ``%U``, ``%W``, and ``%V``. Format ``%y`` does require a leading zero. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 763503205e1670..bba4fe0d5f2425 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -26,7 +26,6 @@ Doc/library/asyncio-subprocess.rst Doc/library/bdb.rst Doc/library/collections.rst Doc/library/csv.rst -Doc/library/datetime.rst Doc/library/dbm.rst Doc/library/decimal.rst Doc/library/email.charset.rst From c87233fd3fa77067013c35328f8c4884f0567a59 Mon Sep 17 00:00:00 2001 From: mpage Date: Mon, 29 Jan 2024 07:08:23 -0800 Subject: [PATCH 063/127] gh-112050: Adapt collections.deque to Argument Clinic (#113963) --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 3 + ...-01-11-22-58-45.gh-issue-112050.hDuvDW.rst | 1 + Modules/_collectionsmodule.c | 412 ++++++++++------- Modules/clinic/_collectionsmodule.c.h | 418 +++++++++++++++++- 7 files changed, 685 insertions(+), 152 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-11-22-58-45.gh-issue-112050.hDuvDW.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index e92707051c12b7..57505b5388fd6c 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1049,6 +1049,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(max_length)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxdigits)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxevents)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxlen)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxmem)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxsplit)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxvalue)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index eb60b80c964d42..0f4f3b61910241 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -538,6 +538,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(max_length) STRUCT_FOR_ID(maxdigits) STRUCT_FOR_ID(maxevents) + STRUCT_FOR_ID(maxlen) STRUCT_FOR_ID(maxmem) STRUCT_FOR_ID(maxsplit) STRUCT_FOR_ID(maxvalue) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 9b39de1d69c6c7..63a2b54c839a4b 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1047,6 +1047,7 @@ extern "C" { INIT_ID(max_length), \ INIT_ID(maxdigits), \ INIT_ID(maxevents), \ + INIT_ID(maxlen), \ INIT_ID(maxmem), \ INIT_ID(maxsplit), \ INIT_ID(maxvalue), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 898d386f4cfd05..bf8cdd85e4be5c 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1455,6 +1455,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(maxevents); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(maxlen); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(maxmem); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-11-22-58-45.gh-issue-112050.hDuvDW.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-11-22-58-45.gh-issue-112050.hDuvDW.rst new file mode 100644 index 00000000000000..e5f3d5ea0cea25 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-11-22-58-45.gh-issue-112050.hDuvDW.rst @@ -0,0 +1 @@ +Convert :class:`collections.deque` to use Argument Clinic. diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index c8cd53de5e2262..ef77d34b10e47b 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -44,8 +44,11 @@ find_module_state_by_def(PyTypeObject *type) /*[clinic input] module _collections class _tuplegetter "_tuplegetterobject *" "clinic_state()->tuplegetter_type" +class _collections.deque "dequeobject *" "clinic_state()->deque_type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7356042a89862e0e]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=a033cc2a8476b3f1]*/ + +typedef struct dequeobject dequeobject; /* We can safely assume type to be the defining class, * since tuplegetter is not a base type */ @@ -53,6 +56,12 @@ class _tuplegetter "_tuplegetterobject *" "clinic_state()->tuplegetter_type" #include "clinic/_collectionsmodule.c.h" #undef clinic_state +/*[python input] +class dequeobject_converter(self_converter): + type = "dequeobject *" +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=b6ae4a3ff852be2f]*/ + /* collections module implementation of a deque() datatype Written and maintained by Raymond D. Hettinger */ @@ -121,7 +130,7 @@ typedef struct BLOCK { struct BLOCK *rightlink; } block; -typedef struct { +struct dequeobject { PyObject_VAR_HEAD block *leftblock; block *rightblock; @@ -132,7 +141,7 @@ typedef struct { Py_ssize_t numfreeblocks; block *freeblocks[MAXFREEBLOCKS]; PyObject *weakreflist; -} dequeobject; +}; /* For debug builds, add error checking to track the endpoints * in the chain of links. The goal is to make sure that link @@ -219,8 +228,17 @@ deque_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *)deque; } +/*[clinic input] +_collections.deque.pop as deque_pop + + deque: dequeobject + +Remove and return the rightmost element. +[clinic start generated code]*/ + static PyObject * -deque_pop(dequeobject *deque, PyObject *unused) +deque_pop_impl(dequeobject *deque) +/*[clinic end generated code: output=2e5f7890c4251f07 input=eb6e6d020f877dec]*/ { PyObject *item; block *prevblock; @@ -254,10 +272,17 @@ deque_pop(dequeobject *deque, PyObject *unused) return item; } -PyDoc_STRVAR(pop_doc, "Remove and return the rightmost element."); +/*[clinic input] +_collections.deque.popleft as deque_popleft + + deque: dequeobject + +Remove and return the leftmost element. +[clinic start generated code]*/ static PyObject * -deque_popleft(dequeobject *deque, PyObject *unused) +deque_popleft_impl(dequeobject *deque) +/*[clinic end generated code: output=62b154897097ff68 input=acb41b9af50a9d9b]*/ { PyObject *item; block *prevblock; @@ -292,8 +317,6 @@ deque_popleft(dequeobject *deque, PyObject *unused) return item; } -PyDoc_STRVAR(popleft_doc, "Remove and return the leftmost element."); - /* The deque's size limit is d.maxlen. The limit can be zero or positive. * If there is no limit, then d.maxlen == -1. * @@ -326,7 +349,7 @@ deque_append_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) deque->rightindex++; deque->rightblock->data[deque->rightindex] = item; if (NEEDS_TRIM(deque, maxlen)) { - PyObject *olditem = deque_popleft(deque, NULL); + PyObject *olditem = deque_popleft_impl(deque); Py_DECREF(olditem); } else { deque->state++; @@ -334,16 +357,25 @@ deque_append_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) return 0; } +/*[clinic input] +_collections.deque.append as deque_append + + deque: dequeobject + item: object + / + +Add an element to the right side of the deque. +[clinic start generated code]*/ + static PyObject * deque_append(dequeobject *deque, PyObject *item) +/*[clinic end generated code: output=507b13efc4853ecc input=f112b83c380528e3]*/ { if (deque_append_internal(deque, Py_NewRef(item), deque->maxlen) < 0) return NULL; Py_RETURN_NONE; } -PyDoc_STRVAR(append_doc, "Add an element to the right side of the deque."); - static inline int deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) { @@ -362,7 +394,7 @@ deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) deque->leftindex--; deque->leftblock->data[deque->leftindex] = item; if (NEEDS_TRIM(deque, deque->maxlen)) { - PyObject *olditem = deque_pop(deque, NULL); + PyObject *olditem = deque_pop_impl(deque); Py_DECREF(olditem); } else { deque->state++; @@ -370,16 +402,25 @@ deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) return 0; } +/*[clinic input] +_collections.deque.appendleft as deque_appendleft + + deque: dequeobject + item: object + / + +Add an element to the left side of the deque. +[clinic start generated code]*/ + static PyObject * deque_appendleft(dequeobject *deque, PyObject *item) +/*[clinic end generated code: output=de0335a64800ffd8 input=bbdaa60a3e956062]*/ { if (deque_appendleft_internal(deque, Py_NewRef(item), deque->maxlen) < 0) return NULL; Py_RETURN_NONE; } -PyDoc_STRVAR(appendleft_doc, "Add an element to the left side of the deque."); - static PyObject* finalize_iterator(PyObject *it) { @@ -410,8 +451,19 @@ consume_iterator(PyObject *it) return finalize_iterator(it); } +/*[clinic input] +_collections.deque.extend as deque_extend + + deque: dequeobject + iterable: object + / + +Extend the right side of the deque with elements from the iterable. +[clinic start generated code]*/ + static PyObject * deque_extend(dequeobject *deque, PyObject *iterable) +/*[clinic end generated code: output=a3a6e74d17063f8d input=cfebfd34d5383339]*/ { PyObject *it, *item; PyObject *(*iternext)(PyObject *); @@ -454,11 +506,19 @@ deque_extend(dequeobject *deque, PyObject *iterable) return finalize_iterator(it); } -PyDoc_STRVAR(extend_doc, -"Extend the right side of the deque with elements from the iterable"); +/*[clinic input] +_collections.deque.extendleft as deque_extendleft + + deque: dequeobject + iterable: object + / + +Extend the left side of the deque with elements from the iterable. +[clinic start generated code]*/ static PyObject * deque_extendleft(dequeobject *deque, PyObject *iterable) +/*[clinic end generated code: output=2dba946c50498c67 input=f4820e695a6f9416]*/ { PyObject *it, *item; PyObject *(*iternext)(PyObject *); @@ -501,9 +561,6 @@ deque_extendleft(dequeobject *deque, PyObject *iterable) return finalize_iterator(it); } -PyDoc_STRVAR(extendleft_doc, -"Extend the left side of the deque with elements from the iterable"); - static PyObject * deque_inplace_concat(dequeobject *deque, PyObject *other) { @@ -517,8 +574,17 @@ deque_inplace_concat(dequeobject *deque, PyObject *other) return (PyObject *)deque; } +/*[clinic input] +_collections.deque.copy as deque_copy + + deque: dequeobject + +Return a shallow copy of a deque. +[clinic start generated code]*/ + static PyObject * -deque_copy(PyObject *deque, PyObject *Py_UNUSED(ignored)) +deque_copy_impl(dequeobject *deque) +/*[clinic end generated code: output=6409b3d1ad2898b5 input=0e22f138bc1fcbee]*/ { PyObject *result; dequeobject *old_deque = (dequeobject *)deque; @@ -537,7 +603,7 @@ deque_copy(PyObject *deque, PyObject *Py_UNUSED(ignored)) PyObject *item = old_deque->leftblock->data[old_deque->leftindex]; rv = deque_append(new_deque, item); } else { - rv = deque_extend(new_deque, deque); + rv = deque_extend(new_deque, (PyObject *)deque); } if (rv != NULL) { Py_DECREF(rv); @@ -547,7 +613,8 @@ deque_copy(PyObject *deque, PyObject *Py_UNUSED(ignored)) return NULL; } if (old_deque->maxlen < 0) - result = PyObject_CallOneArg((PyObject *)(Py_TYPE(deque)), deque); + result = PyObject_CallOneArg((PyObject *)(Py_TYPE(deque)), + (PyObject *)deque); else result = PyObject_CallFunction((PyObject *)(Py_TYPE(deque)), "Oi", deque, old_deque->maxlen, NULL); @@ -561,7 +628,18 @@ deque_copy(PyObject *deque, PyObject *Py_UNUSED(ignored)) return result; } -PyDoc_STRVAR(copy_doc, "Return a shallow copy of a deque."); +/*[clinic input] +_collections.deque.__copy__ as deque___copy__ = _collections.deque.copy + +Return a shallow copy of a deque. +[clinic start generated code]*/ + +static PyObject * +deque___copy___impl(dequeobject *deque) +/*[clinic end generated code: output=7c5821504342bf23 input=fce05df783e7912b]*/ +{ + return deque_copy_impl(deque); +} static PyObject * deque_concat(dequeobject *deque, PyObject *other) @@ -580,7 +658,7 @@ deque_concat(dequeobject *deque, PyObject *other) return NULL; } - new_deque = deque_copy((PyObject *)deque, NULL); + new_deque = deque_copy_impl(deque); if (new_deque == NULL) return NULL; result = deque_extend((dequeobject *)new_deque, other); @@ -669,22 +747,29 @@ deque_clear(dequeobject *deque) alternate_method: while (Py_SIZE(deque)) { - item = deque_pop(deque, NULL); + item = deque_pop_impl(deque); assert (item != NULL); Py_DECREF(item); } return 0; } +/*[clinic input] +_collections.deque.clear as deque_clearmethod + + deque: dequeobject + +Remove all elements from the deque. +[clinic start generated code]*/ + static PyObject * -deque_clearmethod(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +deque_clearmethod_impl(dequeobject *deque) +/*[clinic end generated code: output=79b2513e097615c1 input=20488eb932f89f9e]*/ { deque_clear(deque); Py_RETURN_NONE; } -PyDoc_STRVAR(clear_doc, "Remove all elements from the deque."); - static PyObject * deque_inplace_repeat(dequeobject *deque, Py_ssize_t n) { @@ -768,7 +853,7 @@ deque_repeat(dequeobject *deque, Py_ssize_t n) dequeobject *new_deque; PyObject *rv; - new_deque = (dequeobject *)deque_copy((PyObject *) deque, NULL); + new_deque = (dequeobject *)deque_copy_impl(deque); if (new_deque == NULL) return NULL; rv = deque_inplace_repeat(new_deque, n); @@ -925,36 +1010,36 @@ _deque_rotate(dequeobject *deque, Py_ssize_t n) return rv; } -static PyObject * -deque_rotate(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) -{ - Py_ssize_t n=1; +/*[clinic input] +_collections.deque.rotate as deque_rotate - if (!_PyArg_CheckPositional("deque.rotate", nargs, 0, 1)) { - return NULL; - } - if (nargs) { - PyObject *index = _PyNumber_Index(args[0]); - if (index == NULL) { - return NULL; - } - n = PyLong_AsSsize_t(index); - Py_DECREF(index); - if (n == -1 && PyErr_Occurred()) { - return NULL; - } - } + deque: dequeobject + n: Py_ssize_t = 1 + / +Rotate the deque n steps to the right. If n is negative, rotates left. +[clinic start generated code]*/ + +static PyObject * +deque_rotate_impl(dequeobject *deque, Py_ssize_t n) +/*[clinic end generated code: output=96c2402a371eb15d input=d22070f49cc06c76]*/ +{ if (!_deque_rotate(deque, n)) Py_RETURN_NONE; return NULL; } -PyDoc_STRVAR(rotate_doc, -"Rotate the deque n steps to the right (default n=1). If n is negative, rotates left."); +/*[clinic input] +_collections.deque.reverse as deque_reverse + + deque: dequeobject + +Reverse *IN PLACE*. +[clinic start generated code]*/ static PyObject * -deque_reverse(dequeobject *deque, PyObject *unused) +deque_reverse_impl(dequeobject *deque) +/*[clinic end generated code: output=bdeebc2cf8c1f064 input=f139787f406101c9]*/ { block *leftblock = deque->leftblock; block *rightblock = deque->rightblock; @@ -991,11 +1076,19 @@ deque_reverse(dequeobject *deque, PyObject *unused) Py_RETURN_NONE; } -PyDoc_STRVAR(reverse_doc, -"D.reverse() -- reverse *IN PLACE*"); +/*[clinic input] +_collections.deque.count as deque_count + + deque: dequeobject + value as v: object + / + +Return number of occurrences of value. +[clinic start generated code]*/ static PyObject * deque_count(dequeobject *deque, PyObject *v) +/*[clinic end generated code: output=7405d289d94d7b9b input=1892925260ff5d78]*/ { block *b = deque->leftblock; Py_ssize_t index = deque->leftindex; @@ -1030,9 +1123,6 @@ deque_count(dequeobject *deque, PyObject *v) return PyLong_FromSsize_t(count); } -PyDoc_STRVAR(count_doc, -"D.count(value) -- return number of occurrences of value"); - static int deque_contains(dequeobject *deque, PyObject *v) { @@ -1071,22 +1161,33 @@ deque_len(dequeobject *deque) return Py_SIZE(deque); } +/*[clinic input] +@text_signature "($self, value, [start, [stop]])" +_collections.deque.index as deque_index + + deque: dequeobject + value as v: object + start: object(converter='_PyEval_SliceIndexNotNone', type='Py_ssize_t', c_default='0') = NULL + stop: object(converter='_PyEval_SliceIndexNotNone', type='Py_ssize_t', c_default='Py_SIZE(deque)') = NULL + / + +Return first index of value. + +Raises ValueError if the value is not present. +[clinic start generated code]*/ + static PyObject * -deque_index(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +deque_index_impl(dequeobject *deque, PyObject *v, Py_ssize_t start, + Py_ssize_t stop) +/*[clinic end generated code: output=df45132753175ef9 input=140210c099830f64]*/ { - Py_ssize_t i, n, start=0, stop=Py_SIZE(deque); - PyObject *v, *item; + Py_ssize_t i, n; + PyObject *item; block *b = deque->leftblock; Py_ssize_t index = deque->leftindex; size_t start_state = deque->state; int cmp; - if (!_PyArg_ParseStack(args, nargs, "O|O&O&:index", &v, - _PyEval_SliceIndexNotNone, &start, - _PyEval_SliceIndexNotNone, &stop)) { - return NULL; - } - if (start < 0) { start += Py_SIZE(deque); if (start < 0) @@ -1138,10 +1239,6 @@ deque_index(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) return NULL; } -PyDoc_STRVAR(index_doc, -"D.index(value, [start, [stop]]) -- return first index of value.\n" -"Raises ValueError if the value is not present."); - /* insert(), remove(), and delitem() are implemented in terms of rotate() for simplicity and reasonable performance near the end points. If for some reason these methods become popular, it is not @@ -1150,18 +1247,24 @@ PyDoc_STRVAR(index_doc, boost (by moving each pointer only once instead of twice). */ +/*[clinic input] +_collections.deque.insert as deque_insert + + deque: dequeobject + index: Py_ssize_t + value: object + / + +Insert value before index. +[clinic start generated code]*/ + static PyObject * -deque_insert(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +deque_insert_impl(dequeobject *deque, Py_ssize_t index, PyObject *value) +/*[clinic end generated code: output=ef4d2c15d5532b80 input=3e5c1c120d70c0e6]*/ { - Py_ssize_t index; Py_ssize_t n = Py_SIZE(deque); - PyObject *value; PyObject *rv; - if (!_PyArg_ParseStack(args, nargs, "nO:insert", &index, &value)) { - return NULL; - } - if (deque->maxlen == Py_SIZE(deque)) { PyErr_SetString(PyExc_IndexError, "deque already at its maximum size"); return NULL; @@ -1184,12 +1287,6 @@ deque_insert(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) Py_RETURN_NONE; } -PyDoc_STRVAR(insert_doc, -"D.insert(index, object) -- insert object before index"); - -PyDoc_STRVAR(remove_doc, -"D.remove(value) -- remove first occurrence of value."); - static int valid_index(Py_ssize_t i, Py_ssize_t limit) { @@ -1246,15 +1343,26 @@ deque_del_item(dequeobject *deque, Py_ssize_t i) assert (i >= 0 && i < Py_SIZE(deque)); if (_deque_rotate(deque, -i)) return -1; - item = deque_popleft(deque, NULL); + item = deque_popleft_impl(deque); rv = _deque_rotate(deque, i); assert (item != NULL); Py_DECREF(item); return rv; } +/*[clinic input] +_collections.deque.remove as deque_remove + + deque: dequeobject + value: object + / + +Remove first occurrence of value. +[clinic start generated code]*/ + static PyObject * deque_remove(dequeobject *deque, PyObject *value) +/*[clinic end generated code: output=49e1666d612fe911 input=d972f32d15990880]*/ { PyObject *item; block *b = deque->leftblock; @@ -1375,8 +1483,17 @@ deque_traverse(dequeobject *deque, visitproc visit, void *arg) return 0; } +/*[clinic input] +_collections.deque.__reduce__ as deque___reduce__ + + deque: dequeobject + +Return state information for pickling. +[clinic start generated code]*/ + static PyObject * -deque_reduce(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +deque___reduce___impl(dequeobject *deque) +/*[clinic end generated code: output=cb85d9e0b7d2c5ad input=991a933a5bc7a526]*/ { PyObject *state, *it; @@ -1510,26 +1627,23 @@ deque_richcompare(PyObject *v, PyObject *w, int op) return NULL; } +/*[clinic input] +@text_signature "([iterable[, maxlen]])" +_collections.deque.__init__ as deque_init + + deque: dequeobject + iterable: object = NULL + maxlen as maxlenobj: object = NULL + +A list-like sequence optimized for data accesses near its endpoints. +[clinic start generated code]*/ + static int -deque_init(dequeobject *deque, PyObject *args, PyObject *kwdargs) +deque_init_impl(dequeobject *deque, PyObject *iterable, PyObject *maxlenobj) +/*[clinic end generated code: output=7084a39d71218dcd input=5ebdffc48a2d27ae]*/ + { - PyObject *iterable = NULL; - PyObject *maxlenobj = NULL; Py_ssize_t maxlen = -1; - char *kwlist[] = {"iterable", "maxlen", 0}; - - if (kwdargs == NULL && PyTuple_GET_SIZE(args) <= 2) { - if (PyTuple_GET_SIZE(args) > 0) { - iterable = PyTuple_GET_ITEM(args, 0); - } - if (PyTuple_GET_SIZE(args) > 1) { - maxlenobj = PyTuple_GET_ITEM(args, 1); - } - } else { - if (!PyArg_ParseTupleAndKeywords(args, kwdargs, "|OO:deque", kwlist, - &iterable, &maxlenobj)) - return -1; - } if (maxlenobj != NULL && maxlenobj != Py_None) { maxlen = PyLong_AsSsize_t(maxlenobj); if (maxlen == -1 && PyErr_Occurred()) @@ -1551,8 +1665,17 @@ deque_init(dequeobject *deque, PyObject *args, PyObject *kwdargs) return 0; } +/*[clinic input] +_collections.deque.__sizeof__ as deque___sizeof__ + + deque: dequeobject + +Return the size of the deque in memory, in bytes. +[clinic start generated code]*/ + static PyObject * -deque_sizeof(dequeobject *deque, void *unused) +deque___sizeof___impl(dequeobject *deque) +/*[clinic end generated code: output=4d36e9fb4f30bbaf input=4e7c9a00c03c3290]*/ { size_t res = _PyObject_SIZE(Py_TYPE(deque)); size_t blocks; @@ -1563,9 +1686,6 @@ deque_sizeof(dequeobject *deque, void *unused) return PyLong_FromSize_t(res); } -PyDoc_STRVAR(sizeof_doc, -"D.__sizeof__() -- size of D in memory, in bytes"); - static PyObject * deque_get_maxlen(dequeobject *deque, void *Py_UNUSED(ignored)) { @@ -1574,6 +1694,22 @@ deque_get_maxlen(dequeobject *deque, void *Py_UNUSED(ignored)) return PyLong_FromSsize_t(deque->maxlen); } +static PyObject *deque_reviter(dequeobject *deque); + +/*[clinic input] +_collections.deque.__reversed__ as deque___reversed__ + + deque: dequeobject + +Return a reverse iterator over the deque. +[clinic start generated code]*/ + +static PyObject * +deque___reversed___impl(dequeobject *deque) +/*[clinic end generated code: output=3e7e7e715883cf2e input=3d494c25a6fe5c7e]*/ +{ + return deque_reviter(deque); +} /* deque object ********************************************************/ @@ -1584,47 +1720,26 @@ static PyGetSetDef deque_getset[] = { }; static PyObject *deque_iter(dequeobject *deque); -static PyObject *deque_reviter(dequeobject *deque, PyObject *Py_UNUSED(ignored)); -PyDoc_STRVAR(reversed_doc, - "D.__reversed__() -- return a reverse iterator over the deque"); static PyMethodDef deque_methods[] = { - {"append", (PyCFunction)deque_append, - METH_O, append_doc}, - {"appendleft", (PyCFunction)deque_appendleft, - METH_O, appendleft_doc}, - {"clear", (PyCFunction)deque_clearmethod, - METH_NOARGS, clear_doc}, - {"__copy__", deque_copy, - METH_NOARGS, copy_doc}, - {"copy", deque_copy, - METH_NOARGS, copy_doc}, - {"count", (PyCFunction)deque_count, - METH_O, count_doc}, - {"extend", (PyCFunction)deque_extend, - METH_O, extend_doc}, - {"extendleft", (PyCFunction)deque_extendleft, - METH_O, extendleft_doc}, - {"index", _PyCFunction_CAST(deque_index), - METH_FASTCALL, index_doc}, - {"insert", _PyCFunction_CAST(deque_insert), - METH_FASTCALL, insert_doc}, - {"pop", (PyCFunction)deque_pop, - METH_NOARGS, pop_doc}, - {"popleft", (PyCFunction)deque_popleft, - METH_NOARGS, popleft_doc}, - {"__reduce__", (PyCFunction)deque_reduce, - METH_NOARGS, reduce_doc}, - {"remove", (PyCFunction)deque_remove, - METH_O, remove_doc}, - {"__reversed__", (PyCFunction)deque_reviter, - METH_NOARGS, reversed_doc}, - {"reverse", (PyCFunction)deque_reverse, - METH_NOARGS, reverse_doc}, - {"rotate", _PyCFunction_CAST(deque_rotate), - METH_FASTCALL, rotate_doc}, - {"__sizeof__", (PyCFunction)deque_sizeof, - METH_NOARGS, sizeof_doc}, + DEQUE_APPEND_METHODDEF + DEQUE_APPENDLEFT_METHODDEF + DEQUE_CLEARMETHOD_METHODDEF + DEQUE___COPY___METHODDEF + DEQUE_COPY_METHODDEF + DEQUE_COUNT_METHODDEF + DEQUE_EXTEND_METHODDEF + DEQUE_EXTENDLEFT_METHODDEF + DEQUE_INDEX_METHODDEF + DEQUE_INSERT_METHODDEF + DEQUE_POP_METHODDEF + DEQUE_POPLEFT_METHODDEF + DEQUE___REDUCE___METHODDEF + DEQUE_REMOVE_METHODDEF + DEQUE___REVERSED___METHODDEF + DEQUE_REVERSE_METHODDEF + DEQUE_ROTATE_METHODDEF + DEQUE___SIZEOF___METHODDEF {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL} /* sentinel */ @@ -1635,17 +1750,12 @@ static PyMemberDef deque_members[] = { {NULL}, }; -PyDoc_STRVAR(deque_doc, -"deque([iterable[, maxlen]]) --> deque object\n\ -\n\ -A list-like sequence optimized for data accesses near its endpoints."); - static PyType_Slot deque_slots[] = { {Py_tp_dealloc, deque_dealloc}, {Py_tp_repr, deque_repr}, {Py_tp_hash, PyObject_HashNotImplemented}, {Py_tp_getattro, PyObject_GenericGetAttr}, - {Py_tp_doc, (void *)deque_doc}, + {Py_tp_doc, (void *)deque_init__doc__}, {Py_tp_traverse, deque_traverse}, {Py_tp_clear, deque_clear}, {Py_tp_richcompare, deque_richcompare}, @@ -1834,7 +1944,7 @@ static PyType_Spec dequeiter_spec = { /*********************** Deque Reverse Iterator **************************/ static PyObject * -deque_reviter(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +deque_reviter(dequeobject *deque) { dequeiterobject *it; collections_state *state = find_module_state_by_def(Py_TYPE(deque)); @@ -1889,7 +1999,7 @@ dequereviter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; assert(type == state->dequereviter_type); - it = (dequeiterobject*)deque_reviter((dequeobject *)deque, NULL); + it = (dequeiterobject *)deque_reviter((dequeobject *)deque); if (!it) return NULL; /* consume items from the queue */ diff --git a/Modules/clinic/_collectionsmodule.c.h b/Modules/clinic/_collectionsmodule.c.h index 591ab50c76a8e8..60fb12a2231619 100644 --- a/Modules/clinic/_collectionsmodule.c.h +++ b/Modules/clinic/_collectionsmodule.c.h @@ -2,9 +2,425 @@ preserve [clinic start generated code]*/ +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif #include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_CheckPositional() +PyDoc_STRVAR(deque_pop__doc__, +"pop($self, /)\n" +"--\n" +"\n" +"Remove and return the rightmost element."); + +#define DEQUE_POP_METHODDEF \ + {"pop", (PyCFunction)deque_pop, METH_NOARGS, deque_pop__doc__}, + +static PyObject * +deque_pop_impl(dequeobject *deque); + +static PyObject * +deque_pop(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_pop_impl(deque); +} + +PyDoc_STRVAR(deque_popleft__doc__, +"popleft($self, /)\n" +"--\n" +"\n" +"Remove and return the leftmost element."); + +#define DEQUE_POPLEFT_METHODDEF \ + {"popleft", (PyCFunction)deque_popleft, METH_NOARGS, deque_popleft__doc__}, + +static PyObject * +deque_popleft_impl(dequeobject *deque); + +static PyObject * +deque_popleft(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_popleft_impl(deque); +} + +PyDoc_STRVAR(deque_append__doc__, +"append($self, item, /)\n" +"--\n" +"\n" +"Add an element to the right side of the deque."); + +#define DEQUE_APPEND_METHODDEF \ + {"append", (PyCFunction)deque_append, METH_O, deque_append__doc__}, + +PyDoc_STRVAR(deque_appendleft__doc__, +"appendleft($self, item, /)\n" +"--\n" +"\n" +"Add an element to the left side of the deque."); + +#define DEQUE_APPENDLEFT_METHODDEF \ + {"appendleft", (PyCFunction)deque_appendleft, METH_O, deque_appendleft__doc__}, + +PyDoc_STRVAR(deque_extend__doc__, +"extend($self, iterable, /)\n" +"--\n" +"\n" +"Extend the right side of the deque with elements from the iterable."); + +#define DEQUE_EXTEND_METHODDEF \ + {"extend", (PyCFunction)deque_extend, METH_O, deque_extend__doc__}, + +PyDoc_STRVAR(deque_extendleft__doc__, +"extendleft($self, iterable, /)\n" +"--\n" +"\n" +"Extend the left side of the deque with elements from the iterable."); + +#define DEQUE_EXTENDLEFT_METHODDEF \ + {"extendleft", (PyCFunction)deque_extendleft, METH_O, deque_extendleft__doc__}, + +PyDoc_STRVAR(deque_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a shallow copy of a deque."); + +#define DEQUE_COPY_METHODDEF \ + {"copy", (PyCFunction)deque_copy, METH_NOARGS, deque_copy__doc__}, + +static PyObject * +deque_copy_impl(dequeobject *deque); + +static PyObject * +deque_copy(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_copy_impl(deque); +} + +PyDoc_STRVAR(deque___copy____doc__, +"__copy__($self, /)\n" +"--\n" +"\n" +"Return a shallow copy of a deque."); + +#define DEQUE___COPY___METHODDEF \ + {"__copy__", (PyCFunction)deque___copy__, METH_NOARGS, deque___copy____doc__}, + +static PyObject * +deque___copy___impl(dequeobject *deque); + +static PyObject * +deque___copy__(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque___copy___impl(deque); +} + +PyDoc_STRVAR(deque_clearmethod__doc__, +"clear($self, /)\n" +"--\n" +"\n" +"Remove all elements from the deque."); + +#define DEQUE_CLEARMETHOD_METHODDEF \ + {"clear", (PyCFunction)deque_clearmethod, METH_NOARGS, deque_clearmethod__doc__}, + +static PyObject * +deque_clearmethod_impl(dequeobject *deque); + +static PyObject * +deque_clearmethod(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_clearmethod_impl(deque); +} + +PyDoc_STRVAR(deque_rotate__doc__, +"rotate($self, n=1, /)\n" +"--\n" +"\n" +"Rotate the deque n steps to the right. If n is negative, rotates left."); + +#define DEQUE_ROTATE_METHODDEF \ + {"rotate", _PyCFunction_CAST(deque_rotate), METH_FASTCALL, deque_rotate__doc__}, + +static PyObject * +deque_rotate_impl(dequeobject *deque, Py_ssize_t n); + +static PyObject * +deque_rotate(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t n = 1; + + if (!_PyArg_CheckPositional("rotate", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + n = ival; + } +skip_optional: + return_value = deque_rotate_impl(deque, n); + +exit: + return return_value; +} + +PyDoc_STRVAR(deque_reverse__doc__, +"reverse($self, /)\n" +"--\n" +"\n" +"Reverse *IN PLACE*."); + +#define DEQUE_REVERSE_METHODDEF \ + {"reverse", (PyCFunction)deque_reverse, METH_NOARGS, deque_reverse__doc__}, + +static PyObject * +deque_reverse_impl(dequeobject *deque); + +static PyObject * +deque_reverse(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_reverse_impl(deque); +} + +PyDoc_STRVAR(deque_count__doc__, +"count($self, value, /)\n" +"--\n" +"\n" +"Return number of occurrences of value."); + +#define DEQUE_COUNT_METHODDEF \ + {"count", (PyCFunction)deque_count, METH_O, deque_count__doc__}, + +PyDoc_STRVAR(deque_index__doc__, +"index($self, value, [start, [stop]])\n" +"--\n" +"\n" +"Return first index of value.\n" +"\n" +"Raises ValueError if the value is not present."); + +#define DEQUE_INDEX_METHODDEF \ + {"index", _PyCFunction_CAST(deque_index), METH_FASTCALL, deque_index__doc__}, + +static PyObject * +deque_index_impl(dequeobject *deque, PyObject *v, Py_ssize_t start, + Py_ssize_t stop); + +static PyObject * +deque_index(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *v; + Py_ssize_t start = 0; + Py_ssize_t stop = Py_SIZE(deque); + + if (!_PyArg_CheckPositional("index", nargs, 1, 3)) { + goto exit; + } + v = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndexNotNone(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndexNotNone(args[2], &stop)) { + goto exit; + } +skip_optional: + return_value = deque_index_impl(deque, v, start, stop); + +exit: + return return_value; +} + +PyDoc_STRVAR(deque_insert__doc__, +"insert($self, index, value, /)\n" +"--\n" +"\n" +"Insert value before index."); + +#define DEQUE_INSERT_METHODDEF \ + {"insert", _PyCFunction_CAST(deque_insert), METH_FASTCALL, deque_insert__doc__}, + +static PyObject * +deque_insert_impl(dequeobject *deque, Py_ssize_t index, PyObject *value); + +static PyObject * +deque_insert(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t index; + PyObject *value; + + if (!_PyArg_CheckPositional("insert", nargs, 2, 2)) { + goto exit; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + index = ival; + } + value = args[1]; + return_value = deque_insert_impl(deque, index, value); + +exit: + return return_value; +} + +PyDoc_STRVAR(deque_remove__doc__, +"remove($self, value, /)\n" +"--\n" +"\n" +"Remove first occurrence of value."); + +#define DEQUE_REMOVE_METHODDEF \ + {"remove", (PyCFunction)deque_remove, METH_O, deque_remove__doc__}, + +PyDoc_STRVAR(deque___reduce____doc__, +"__reduce__($self, /)\n" +"--\n" +"\n" +"Return state information for pickling."); + +#define DEQUE___REDUCE___METHODDEF \ + {"__reduce__", (PyCFunction)deque___reduce__, METH_NOARGS, deque___reduce____doc__}, + +static PyObject * +deque___reduce___impl(dequeobject *deque); + +static PyObject * +deque___reduce__(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque___reduce___impl(deque); +} + +PyDoc_STRVAR(deque_init__doc__, +"deque([iterable[, maxlen]])\n" +"--\n" +"\n" +"A list-like sequence optimized for data accesses near its endpoints."); + +static int +deque_init_impl(dequeobject *deque, PyObject *iterable, PyObject *maxlenobj); + +static int +deque_init(PyObject *deque, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(iterable), &_Py_ID(maxlen), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"iterable", "maxlen", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "deque", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *iterable = NULL; + PyObject *maxlenobj = NULL; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 2, 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + iterable = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + maxlenobj = fastargs[1]; +skip_optional_pos: + return_value = deque_init_impl((dequeobject *)deque, iterable, maxlenobj); + +exit: + return return_value; +} + +PyDoc_STRVAR(deque___sizeof____doc__, +"__sizeof__($self, /)\n" +"--\n" +"\n" +"Return the size of the deque in memory, in bytes."); + +#define DEQUE___SIZEOF___METHODDEF \ + {"__sizeof__", (PyCFunction)deque___sizeof__, METH_NOARGS, deque___sizeof____doc__}, + +static PyObject * +deque___sizeof___impl(dequeobject *deque); + +static PyObject * +deque___sizeof__(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque___sizeof___impl(deque); +} + +PyDoc_STRVAR(deque___reversed____doc__, +"__reversed__($self, /)\n" +"--\n" +"\n" +"Return a reverse iterator over the deque."); + +#define DEQUE___REVERSED___METHODDEF \ + {"__reversed__", (PyCFunction)deque___reversed__, METH_NOARGS, deque___reversed____doc__}, + +static PyObject * +deque___reversed___impl(dequeobject *deque); + +static PyObject * +deque___reversed__(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque___reversed___impl(deque); +} + PyDoc_STRVAR(_collections__count_elements__doc__, "_count_elements($module, mapping, iterable, /)\n" "--\n" @@ -72,4 +488,4 @@ tuplegetter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=c896a72f8c45930d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=3633a5cbc23e8440 input=a9049054013a1b77]*/ From 15fe8cea174772060b24c96d335a498aba3b8ed4 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 29 Jan 2024 16:45:31 +0100 Subject: [PATCH 064/127] gh-91325: Skip Stable ABI checks with Py_TRACE_REFS special build (GH-92046) Skip Stable ABI checks with Py_TRACE_REFS special build This build is not compatible with Py_LIMITED_API nor with the stable ABI. --- Lib/test/test_stable_abi_ctypes.py | 36 +++++++++++++++++++--------- Misc/stable_abi.toml | 4 ++++ Modules/_testcapi_feature_macros.inc | 9 +++++++ Tools/build/stable_abi.py | 19 ++++++++------- 4 files changed, 48 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 4976ac3642bbe4..90d45272838420 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -9,6 +9,13 @@ from _testcapi import get_feature_macros feature_macros = get_feature_macros() + +# Stable ABI is incompatible with Py_TRACE_REFS builds due to PyObject +# layout differences. +# See https://github.com/python/cpython/issues/88299#issuecomment-1113366226 +if feature_macros['Py_TRACE_REFS']: + raise unittest.SkipTest("incompatible with Py_TRACE_REFS.") + ctypes_test = import_module('ctypes') class TestStableABIAvailability(unittest.TestCase): @@ -441,7 +448,9 @@ def test_windows_feature_macros(self): "PyModule_AddObjectRef", "PyModule_AddStringConstant", "PyModule_AddType", + "PyModule_Create2", "PyModule_ExecDef", + "PyModule_FromDefAndSpec2", "PyModule_GetDef", "PyModule_GetDict", "PyModule_GetFilename", @@ -911,6 +920,13 @@ def test_windows_feature_macros(self): "_Py_TrueStruct", "_Py_VaBuildValue_SizeT", ) +if feature_macros['HAVE_FORK']: + SYMBOL_NAMES += ( + 'PyOS_AfterFork', + 'PyOS_AfterFork_Child', + 'PyOS_AfterFork_Parent', + 'PyOS_BeforeFork', + ) if feature_macros['MS_WINDOWS']: SYMBOL_NAMES += ( 'PyErr_SetExcFromWindowsErr', @@ -926,17 +942,6 @@ def test_windows_feature_macros(self): 'PyUnicode_DecodeMBCSStateful', 'PyUnicode_EncodeCodePage', ) -if feature_macros['HAVE_FORK']: - SYMBOL_NAMES += ( - 'PyOS_AfterFork', - 'PyOS_AfterFork_Child', - 'PyOS_AfterFork_Parent', - 'PyOS_BeforeFork', - ) -if feature_macros['USE_STACKCHECK']: - SYMBOL_NAMES += ( - 'PyOS_CheckStack', - ) if feature_macros['PY_HAVE_THREAD_NATIVE_ID']: SYMBOL_NAMES += ( 'PyThread_get_thread_native_id', @@ -946,14 +951,23 @@ def test_windows_feature_macros(self): '_Py_NegativeRefcount', '_Py_RefTotal', ) +if feature_macros['Py_TRACE_REFS']: + SYMBOL_NAMES += ( + ) +if feature_macros['USE_STACKCHECK']: + SYMBOL_NAMES += ( + 'PyOS_CheckStack', + ) EXPECTED_FEATURE_MACROS = set(['HAVE_FORK', 'MS_WINDOWS', 'PY_HAVE_THREAD_NATIVE_ID', 'Py_REF_DEBUG', + 'Py_TRACE_REFS', 'USE_STACKCHECK']) WINDOWS_FEATURE_MACROS = {'HAVE_FORK': False, 'MS_WINDOWS': True, 'PY_HAVE_THREAD_NATIVE_ID': True, 'Py_REF_DEBUG': 'maybe', + 'Py_TRACE_REFS': 'maybe', 'USE_STACKCHECK': 'maybe'} diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 22b25dd0ec141f..2e6b0fff9cd770 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -78,6 +78,10 @@ [feature_macro.Py_REF_DEBUG] doc = 'when Python is compiled in debug mode (with Py_REF_DEBUG)' windows = 'maybe' +[feature_macro.Py_TRACE_REFS] + # nb. This mode is not compatible with Stable ABI/Limited API. + doc = 'when Python is compiled with Py_TRACE_REFS' + windows = 'maybe' # Mentioned in PEP 384: diff --git a/Modules/_testcapi_feature_macros.inc b/Modules/_testcapi_feature_macros.inc index a076e714980074..f5f3524f2c0177 100644 --- a/Modules/_testcapi_feature_macros.inc +++ b/Modules/_testcapi_feature_macros.inc @@ -38,6 +38,15 @@ if (res) { Py_DECREF(result); return NULL; } +#ifdef Py_TRACE_REFS + res = PyDict_SetItemString(result, "Py_TRACE_REFS", Py_True); +#else + res = PyDict_SetItemString(result, "Py_TRACE_REFS", Py_False); +#endif +if (res) { + Py_DECREF(result); return NULL; +} + #ifdef USE_STACKCHECK res = PyDict_SetItemString(result, "USE_STACKCHECK", Py_True); #else diff --git a/Tools/build/stable_abi.py b/Tools/build/stable_abi.py index 85c437d521a15a..83146622c74f94 100644 --- a/Tools/build/stable_abi.py +++ b/Tools/build/stable_abi.py @@ -278,6 +278,13 @@ def gen_ctypes_test(manifest, args, outfile): from _testcapi import get_feature_macros feature_macros = get_feature_macros() + + # Stable ABI is incompatible with Py_TRACE_REFS builds due to PyObject + # layout differences. + # See https://github.com/python/cpython/issues/88299#issuecomment-1113366226 + if feature_macros['Py_TRACE_REFS']: + raise unittest.SkipTest("incompatible with Py_TRACE_REFS.") + ctypes_test = import_module('ctypes') class TestStableABIAvailability(unittest.TestCase): @@ -308,16 +315,11 @@ def test_windows_feature_macros(self): {'function', 'data'}, include_abi_only=True, ) - optional_items = {} + feature_macros = list(manifest.select({'feature_macro'})) + optional_items = {m.name: [] for m in feature_macros} for item in items: - if item.name in ( - # Some symbols aren't exported on all platforms. - # This is a bug: https://bugs.python.org/issue44133 - 'PyModule_Create2', 'PyModule_FromDefAndSpec2', - ): - continue if item.ifdef: - optional_items.setdefault(item.ifdef, []).append(item.name) + optional_items[item.ifdef].append(item.name) else: write(f' "{item.name}",') write(")") @@ -328,7 +330,6 @@ def test_windows_feature_macros(self): write(f" {name!r},") write(" )") write("") - feature_macros = list(manifest.select({'feature_macro'})) feature_names = sorted(m.name for m in feature_macros) write(f"EXPECTED_FEATURE_MACROS = set({pprint.pformat(feature_names)})") From 0f54ee4c6cdba74492183eb2dd142393c7dba403 Mon Sep 17 00:00:00 2001 From: Steven Ward Date: Mon, 29 Jan 2024 11:00:15 -0500 Subject: [PATCH 065/127] Remove limit in calendar CLI help message for year arg (GH-114719) The limit was removed in 66c88ce30ca2b23daa37038e1a3c0de98f241f50 (GH-4109). --- Lib/calendar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/calendar.py b/Lib/calendar.py index 03469d8ac96bcd..3c79540f986b63 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -737,7 +737,7 @@ def main(args=None): parser.add_argument( "year", nargs='?', type=int, - help="year number (1-9999)" + help="year number" ) parser.add_argument( "month", From e351ca3c205860e94cad5da25c74bd76933f5f11 Mon Sep 17 00:00:00 2001 From: Soumendra Ganguly <67527439+8vasu@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:10:28 +0100 Subject: [PATCH 066/127] gh-85984: Add POSIX pseudo-terminal functions. (GH-102413) Signed-off-by: Soumendra Ganguly Co-authored-by: Gregory P. Smith Co-authored-by: Petr Viktorin --- Doc/conf.py | 5 + Doc/library/os.rst | 59 ++++++ Lib/test/test_os.py | 45 ++++- ...3-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst | 2 + Modules/clinic/posixmodule.c.h | 168 +++++++++++++++++- Modules/posixmodule.c | 147 +++++++++++++++ configure | 30 ++++ configure.ac | 8 +- pyconfig.h.in | 15 ++ 9 files changed, 468 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst diff --git a/Doc/conf.py b/Doc/conf.py index a96e7787d167a3..e12128ad356e1b 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -97,12 +97,16 @@ ('c:func', 'free'), ('c:func', 'gettimeofday'), ('c:func', 'gmtime'), + ('c:func', 'grantpt'), ('c:func', 'localeconv'), ('c:func', 'localtime'), ('c:func', 'main'), ('c:func', 'malloc'), ('c:func', 'mktime'), + ('c:func', 'posix_openpt'), ('c:func', 'printf'), + ('c:func', 'ptsname'), + ('c:func', 'ptsname_r'), ('c:func', 'realloc'), ('c:func', 'snprintf'), ('c:func', 'sprintf'), @@ -110,6 +114,7 @@ ('c:func', 'strftime'), ('c:func', 'system'), ('c:func', 'time'), + ('c:func', 'unlockpt'), ('c:func', 'vsnprintf'), # Standard C types ('c:type', 'FILE'), diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 0008ec6a40c76f..cc9f3e75a80c51 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1122,6 +1122,20 @@ as internal buffering of data. .. versionchanged:: 3.12 Added support for pipes on Windows. + +.. function:: grantpt(fd, /) + + Grant access to the slave pseudo-terminal device associated with the + master pseudo-terminal device to which the file descriptor *fd* refers. + The file descriptor *fd* is not closed upon failure. + + Calls the C standard library function :c:func:`grantpt`. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.13 + + .. function:: isatty(fd, /) Return ``True`` if the file descriptor *fd* is open and connected to a @@ -1429,6 +1443,23 @@ or `the MSDN `_ on Windo .. versionadded:: 3.3 +.. function:: posix_openpt(oflag, /) + + Open and return a file descriptor for a master pseudo-terminal device. + + Calls the C standard library function :c:func:`posix_openpt`. The *oflag* + argument is used to set file status flags and file access modes as + specified in the manual page of :c:func:`posix_openpt` of your system. + + The returned file descriptor is :ref:`non-inheritable `. + If the value :data:`O_CLOEXEC` is available on the system, it is added to + *oflag*. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.13 + + .. function:: preadv(fd, buffers, offset, flags=0, /) Read from a file descriptor *fd* at a position of *offset* into mutable @@ -1486,6 +1517,21 @@ or `the MSDN `_ on Windo .. versionadded:: 3.7 +.. function:: ptsname(fd, /) + + Return the name of the slave pseudo-terminal device associated with the + master pseudo-terminal device to which the file descriptor *fd* refers. + The file descriptor *fd* is not closed upon failure. + + Calls the reentrant C standard library function :c:func:`ptsname_r` if + it is available; otherwise, the C standard library function + :c:func:`ptsname`, which is not guaranteed to be thread-safe, is called. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.13 + + .. function:: pwrite(fd, str, offset, /) Write the bytestring in *str* to file descriptor *fd* at position of @@ -1738,6 +1784,19 @@ or `the MSDN `_ on Windo .. availability:: Unix. +.. function:: unlockpt(fd, /) + + Unlock the slave pseudo-terminal device associated with the master + pseudo-terminal device to which the file descriptor *fd* refers. + The file descriptor *fd* is not closed upon failure. + + Calls the C standard library function :c:func:`unlockpt`. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.13 + + .. function:: write(fd, str, /) Write the bytestring in *str* to file descriptor *fd*. diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index e6f0dfde8cb4ae..ed79a2c24ef30b 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -4536,13 +4536,46 @@ def test_dup2(self): self.assertEqual(os.dup2(fd, fd3, inheritable=False), fd3) self.assertFalse(os.get_inheritable(fd3)) - @unittest.skipUnless(hasattr(os, 'openpty'), "need os.openpty()") +@unittest.skipUnless(hasattr(os, 'openpty'), "need os.openpty()") +class PseudoterminalTests(unittest.TestCase): + def open_pty(self): + """Open a pty fd-pair, and schedule cleanup for it""" + main_fd, second_fd = os.openpty() + self.addCleanup(os.close, main_fd) + self.addCleanup(os.close, second_fd) + return main_fd, second_fd + def test_openpty(self): - master_fd, slave_fd = os.openpty() - self.addCleanup(os.close, master_fd) - self.addCleanup(os.close, slave_fd) - self.assertEqual(os.get_inheritable(master_fd), False) - self.assertEqual(os.get_inheritable(slave_fd), False) + main_fd, second_fd = self.open_pty() + self.assertEqual(os.get_inheritable(main_fd), False) + self.assertEqual(os.get_inheritable(second_fd), False) + + @unittest.skipUnless(hasattr(os, 'ptsname'), "need os.ptsname()") + @unittest.skipUnless(hasattr(os, 'O_RDWR'), "need os.O_RDWR") + @unittest.skipUnless(hasattr(os, 'O_NOCTTY'), "need os.O_NOCTTY") + def test_open_via_ptsname(self): + main_fd, second_fd = self.open_pty() + second_path = os.ptsname(main_fd) + reopened_second_fd = os.open(second_path, os.O_RDWR|os.O_NOCTTY) + self.addCleanup(os.close, reopened_second_fd) + os.write(reopened_second_fd, b'foo') + self.assertEqual(os.read(main_fd, 3), b'foo') + + @unittest.skipUnless(hasattr(os, 'posix_openpt'), "need os.posix_openpt()") + @unittest.skipUnless(hasattr(os, 'grantpt'), "need os.grantpt()") + @unittest.skipUnless(hasattr(os, 'unlockpt'), "need os.unlockpt()") + @unittest.skipUnless(hasattr(os, 'ptsname'), "need os.ptsname()") + @unittest.skipUnless(hasattr(os, 'O_RDWR'), "need os.O_RDWR") + @unittest.skipUnless(hasattr(os, 'O_NOCTTY'), "need os.O_NOCTTY") + def test_posix_pty_functions(self): + mother_fd = os.posix_openpt(os.O_RDWR|os.O_NOCTTY) + self.addCleanup(os.close, mother_fd) + os.grantpt(mother_fd) + os.unlockpt(mother_fd) + son_path = os.ptsname(mother_fd) + son_fd = os.open(son_path, os.O_RDWR|os.O_NOCTTY) + self.addCleanup(os.close, son_fd) + self.assertEqual(os.ptsname(mother_fd), os.ttyname(son_fd)) @unittest.skipUnless(hasattr(os, 'spawnl'), "need os.openpty()") def test_pipe_spawnl(self): diff --git a/Misc/NEWS.d/next/Library/2023-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst b/Misc/NEWS.d/next/Library/2023-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst new file mode 100644 index 00000000000000..0e54a1fe3c8a1c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst @@ -0,0 +1,2 @@ +Add POSIX pseudo-terminal functions :func:`os.posix_openpt`, +:func:`os.grantpt`, :func:`os.unlockpt`, and :func:`os.ptsname`. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index ba3e1cfa8dbc21..1373bdef03ba5e 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -4465,6 +4465,156 @@ os_sched_getaffinity(PyObject *module, PyObject *arg) #endif /* defined(HAVE_SCHED_H) && defined(HAVE_SCHED_SETAFFINITY) */ +#if defined(HAVE_POSIX_OPENPT) + +PyDoc_STRVAR(os_posix_openpt__doc__, +"posix_openpt($module, oflag, /)\n" +"--\n" +"\n" +"Open and return a file descriptor for a master pseudo-terminal device.\n" +"\n" +"Performs a posix_openpt() C function call. The oflag argument is used to\n" +"set file status flags and file access modes as specified in the manual page\n" +"of posix_openpt() of your system."); + +#define OS_POSIX_OPENPT_METHODDEF \ + {"posix_openpt", (PyCFunction)os_posix_openpt, METH_O, os_posix_openpt__doc__}, + +static int +os_posix_openpt_impl(PyObject *module, int oflag); + +static PyObject * +os_posix_openpt(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int oflag; + int _return_value; + + oflag = PyLong_AsInt(arg); + if (oflag == -1 && PyErr_Occurred()) { + goto exit; + } + _return_value = os_posix_openpt_impl(module, oflag); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromLong((long)_return_value); + +exit: + return return_value; +} + +#endif /* defined(HAVE_POSIX_OPENPT) */ + +#if defined(HAVE_GRANTPT) + +PyDoc_STRVAR(os_grantpt__doc__, +"grantpt($module, fd, /)\n" +"--\n" +"\n" +"Grant access to the slave pseudo-terminal device.\n" +"\n" +" fd\n" +" File descriptor of a master pseudo-terminal device.\n" +"\n" +"Performs a grantpt() C function call."); + +#define OS_GRANTPT_METHODDEF \ + {"grantpt", (PyCFunction)os_grantpt, METH_O, os_grantpt__doc__}, + +static PyObject * +os_grantpt_impl(PyObject *module, int fd); + +static PyObject * +os_grantpt(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int fd; + + if (!_PyLong_FileDescriptor_Converter(arg, &fd)) { + goto exit; + } + return_value = os_grantpt_impl(module, fd); + +exit: + return return_value; +} + +#endif /* defined(HAVE_GRANTPT) */ + +#if defined(HAVE_UNLOCKPT) + +PyDoc_STRVAR(os_unlockpt__doc__, +"unlockpt($module, fd, /)\n" +"--\n" +"\n" +"Unlock a pseudo-terminal master/slave pair.\n" +"\n" +" fd\n" +" File descriptor of a master pseudo-terminal device.\n" +"\n" +"Performs an unlockpt() C function call."); + +#define OS_UNLOCKPT_METHODDEF \ + {"unlockpt", (PyCFunction)os_unlockpt, METH_O, os_unlockpt__doc__}, + +static PyObject * +os_unlockpt_impl(PyObject *module, int fd); + +static PyObject * +os_unlockpt(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int fd; + + if (!_PyLong_FileDescriptor_Converter(arg, &fd)) { + goto exit; + } + return_value = os_unlockpt_impl(module, fd); + +exit: + return return_value; +} + +#endif /* defined(HAVE_UNLOCKPT) */ + +#if (defined(HAVE_PTSNAME) || defined(HAVE_PTSNAME_R)) + +PyDoc_STRVAR(os_ptsname__doc__, +"ptsname($module, fd, /)\n" +"--\n" +"\n" +"Return the name of the slave pseudo-terminal device.\n" +"\n" +" fd\n" +" File descriptor of a master pseudo-terminal device.\n" +"\n" +"If the ptsname_r() C function is available, it is called;\n" +"otherwise, performs a ptsname() C function call."); + +#define OS_PTSNAME_METHODDEF \ + {"ptsname", (PyCFunction)os_ptsname, METH_O, os_ptsname__doc__}, + +static PyObject * +os_ptsname_impl(PyObject *module, int fd); + +static PyObject * +os_ptsname(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int fd; + + if (!_PyLong_FileDescriptor_Converter(arg, &fd)) { + goto exit; + } + return_value = os_ptsname_impl(module, fd); + +exit: + return return_value; +} + +#endif /* (defined(HAVE_PTSNAME) || defined(HAVE_PTSNAME_R)) */ + #if (defined(HAVE_OPENPTY) || defined(HAVE__GETPTY) || defined(HAVE_DEV_PTMX)) PyDoc_STRVAR(os_openpty__doc__, @@ -11991,6 +12141,22 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #define OS_SCHED_GETAFFINITY_METHODDEF #endif /* !defined(OS_SCHED_GETAFFINITY_METHODDEF) */ +#ifndef OS_POSIX_OPENPT_METHODDEF + #define OS_POSIX_OPENPT_METHODDEF +#endif /* !defined(OS_POSIX_OPENPT_METHODDEF) */ + +#ifndef OS_GRANTPT_METHODDEF + #define OS_GRANTPT_METHODDEF +#endif /* !defined(OS_GRANTPT_METHODDEF) */ + +#ifndef OS_UNLOCKPT_METHODDEF + #define OS_UNLOCKPT_METHODDEF +#endif /* !defined(OS_UNLOCKPT_METHODDEF) */ + +#ifndef OS_PTSNAME_METHODDEF + #define OS_PTSNAME_METHODDEF +#endif /* !defined(OS_PTSNAME_METHODDEF) */ + #ifndef OS_OPENPTY_METHODDEF #define OS_OPENPTY_METHODDEF #endif /* !defined(OS_OPENPTY_METHODDEF) */ @@ -12422,4 +12588,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=18c128534c355d84 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=43e4e557c771358a input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 007fc1cb116f84..40ff131b119d66 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -8358,6 +8358,149 @@ os_sched_getaffinity_impl(PyObject *module, pid_t pid) #endif /* HAVE_SCHED_H */ +#ifdef HAVE_POSIX_OPENPT +/*[clinic input] +os.posix_openpt -> int + + oflag: int + / + +Open and return a file descriptor for a master pseudo-terminal device. + +Performs a posix_openpt() C function call. The oflag argument is used to +set file status flags and file access modes as specified in the manual page +of posix_openpt() of your system. +[clinic start generated code]*/ + +static int +os_posix_openpt_impl(PyObject *module, int oflag) +/*[clinic end generated code: output=ee0bc2624305fc79 input=0de33d0e29693caa]*/ +{ + int fd; + +#if defined(O_CLOEXEC) + oflag |= O_CLOEXEC; +#endif + + fd = posix_openpt(oflag); + if (fd == -1) { + posix_error(); + return -1; + } + + // Just in case, likely a no-op given O_CLOEXEC above. + if (_Py_set_inheritable(fd, 0, NULL) < 0) { + close(fd); + return -1; + } + + return fd; +} +#endif /* HAVE_POSIX_OPENPT */ + +#ifdef HAVE_GRANTPT +/*[clinic input] +os.grantpt + + fd: fildes + File descriptor of a master pseudo-terminal device. + / + +Grant access to the slave pseudo-terminal device. + +Performs a grantpt() C function call. +[clinic start generated code]*/ + +static PyObject * +os_grantpt_impl(PyObject *module, int fd) +/*[clinic end generated code: output=dfd580015cf548ab input=0668e3b96760e849]*/ +{ + int ret; + int saved_errno; + PyOS_sighandler_t sig_saved; + + sig_saved = PyOS_setsig(SIGCHLD, SIG_DFL); + + ret = grantpt(fd); + if (ret == -1) + saved_errno = errno; + + PyOS_setsig(SIGCHLD, sig_saved); + + if (ret == -1) { + errno = saved_errno; + return posix_error(); + } + + Py_RETURN_NONE; +} +#endif /* HAVE_GRANTPT */ + +#ifdef HAVE_UNLOCKPT +/*[clinic input] +os.unlockpt + + fd: fildes + File descriptor of a master pseudo-terminal device. + / + +Unlock a pseudo-terminal master/slave pair. + +Performs an unlockpt() C function call. +[clinic start generated code]*/ + +static PyObject * +os_unlockpt_impl(PyObject *module, int fd) +/*[clinic end generated code: output=e08d354dec12d30c input=de7ab1f59f69a2b4]*/ +{ + if (unlockpt(fd) == -1) + return posix_error(); + + Py_RETURN_NONE; +} +#endif /* HAVE_UNLOCKPT */ + +#if defined(HAVE_PTSNAME) || defined(HAVE_PTSNAME_R) +/*[clinic input] +os.ptsname + + fd: fildes + File descriptor of a master pseudo-terminal device. + / + +Return the name of the slave pseudo-terminal device. + +If the ptsname_r() C function is available, it is called; +otherwise, performs a ptsname() C function call. +[clinic start generated code]*/ + +static PyObject * +os_ptsname_impl(PyObject *module, int fd) +/*[clinic end generated code: output=ef300fadc5675872 input=1369ccc0546f3130]*/ +{ +#ifdef HAVE_PTSNAME_R + int ret; + char name[MAXPATHLEN+1]; + + ret = ptsname_r(fd, name, sizeof(name)); + if (ret != 0) { + errno = ret; + return posix_error(); + } +#else + char *name; + + name = ptsname(fd); + /* POSIX manpage: Upon failure, ptsname() shall return a null pointer and may set errno. + *MAY* set errno? Hmm... */ + if (name == NULL) + return posix_error(); +#endif /* HAVE_PTSNAME_R */ + + return PyUnicode_DecodeFSDefault(name); +} +#endif /* defined(HAVE_PTSNAME) || defined(HAVE_PTSNAME_R) */ + /* AIX uses /dev/ptc but is otherwise the same as /dev/ptmx */ #if defined(HAVE_DEV_PTC) && !defined(HAVE_DEV_PTMX) # define DEV_PTY_FILE "/dev/ptc" @@ -16275,6 +16418,10 @@ static PyMethodDef posix_methods[] = { OS_SCHED_YIELD_METHODDEF OS_SCHED_SETAFFINITY_METHODDEF OS_SCHED_GETAFFINITY_METHODDEF + OS_POSIX_OPENPT_METHODDEF + OS_GRANTPT_METHODDEF + OS_UNLOCKPT_METHODDEF + OS_PTSNAME_METHODDEF OS_OPENPTY_METHODDEF OS_LOGIN_TTY_METHODDEF OS_FORKPTY_METHODDEF diff --git a/configure b/configure index c563c3f5d3c7e6..adc5a8f014c795 100755 --- a/configure +++ b/configure @@ -17637,6 +17637,12 @@ if test "x$ac_cv_func_getwd" = xyes then : printf "%s\n" "#define HAVE_GETWD 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "grantpt" "ac_cv_func_grantpt" +if test "x$ac_cv_func_grantpt" = xyes +then : + printf "%s\n" "#define HAVE_GRANTPT 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "if_nameindex" "ac_cv_func_if_nameindex" if test "x$ac_cv_func_if_nameindex" = xyes @@ -17823,6 +17829,12 @@ if test "x$ac_cv_func_posix_fallocate" = xyes then : printf "%s\n" "#define HAVE_POSIX_FALLOCATE 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "posix_openpt" "ac_cv_func_posix_openpt" +if test "x$ac_cv_func_posix_openpt" = xyes +then : + printf "%s\n" "#define HAVE_POSIX_OPENPT 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "posix_spawn" "ac_cv_func_posix_spawn" if test "x$ac_cv_func_posix_spawn" = xyes @@ -17877,6 +17889,18 @@ if test "x$ac_cv_func_pthread_kill" = xyes then : printf "%s\n" "#define HAVE_PTHREAD_KILL 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "ptsname" "ac_cv_func_ptsname" +if test "x$ac_cv_func_ptsname" = xyes +then : + printf "%s\n" "#define HAVE_PTSNAME 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ptsname_r" "ac_cv_func_ptsname_r" +if test "x$ac_cv_func_ptsname_r" = xyes +then : + printf "%s\n" "#define HAVE_PTSNAME_R 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "pwrite" "ac_cv_func_pwrite" if test "x$ac_cv_func_pwrite" = xyes @@ -18285,6 +18309,12 @@ if test "x$ac_cv_func_unlinkat" = xyes then : printf "%s\n" "#define HAVE_UNLINKAT 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "unlockpt" "ac_cv_func_unlockpt" +if test "x$ac_cv_func_unlockpt" = xyes +then : + printf "%s\n" "#define HAVE_UNLOCKPT 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "utimensat" "ac_cv_func_utimensat" if test "x$ac_cv_func_utimensat" = xyes diff --git a/configure.ac b/configure.ac index 13c46b3e80151d..f5fa17fd8b7d0d 100644 --- a/configure.ac +++ b/configure.ac @@ -4791,12 +4791,12 @@ AC_CHECK_FUNCS([ \ getgrnam_r getgrouplist getgroups gethostname getitimer getloadavg getlogin \ getpeername getpgid getpid getppid getpriority _getpty \ getpwent getpwnam_r getpwuid getpwuid_r getresgid getresuid getrusage getsid getspent \ - getspnam getuid getwd if_nameindex initgroups kill killpg lchown linkat \ + getspnam getuid getwd grantpt if_nameindex initgroups kill killpg lchown linkat \ lockf lstat lutimes madvise mbrtowc memrchr mkdirat mkfifo mkfifoat \ mknod mknodat mktime mmap mremap nice openat opendir pathconf pause pipe \ - pipe2 plock poll posix_fadvise posix_fallocate posix_spawn posix_spawnp \ + pipe2 plock poll posix_fadvise posix_fallocate posix_openpt posix_spawn posix_spawnp \ posix_spawn_file_actions_addclosefrom_np \ - pread preadv preadv2 pthread_condattr_setclock pthread_init pthread_kill \ + pread preadv preadv2 pthread_condattr_setclock pthread_init pthread_kill ptsname ptsname_r \ pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ rtpSpawn sched_get_priority_max sched_rr_get_interval sched_setaffinity \ sched_setparam sched_setscheduler sem_clockwait sem_getvalue sem_open \ @@ -4806,7 +4806,7 @@ AC_CHECK_FUNCS([ \ sigfillset siginterrupt sigpending sigrelse sigtimedwait sigwait \ sigwaitinfo snprintf splice strftime strlcpy strsignal symlinkat sync \ sysconf system tcgetpgrp tcsetpgrp tempnam timegm times tmpfile \ - tmpnam tmpnam_r truncate ttyname umask uname unlinkat utimensat utimes vfork \ + tmpnam tmpnam_r truncate ttyname umask uname unlinkat unlockpt utimensat utimes vfork \ wait wait3 wait4 waitid waitpid wcscoll wcsftime wcsxfrm wmemcmp writev \ ]) diff --git a/pyconfig.h.in b/pyconfig.h.in index d8a9f68951afbd..02e33c7007196d 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -601,6 +601,9 @@ bcopy. */ #undef HAVE_GLIBC_MEMMOVE_BUG +/* Define to 1 if you have the `grantpt' function. */ +#undef HAVE_GRANTPT + /* Define to 1 if you have the header file. */ #undef HAVE_GRP_H @@ -899,6 +902,9 @@ /* Define to 1 if you have the `posix_fallocate' function. */ #undef HAVE_POSIX_FALLOCATE +/* Define to 1 if you have the `posix_openpt' function. */ +#undef HAVE_POSIX_OPENPT + /* Define to 1 if you have the `posix_spawn' function. */ #undef HAVE_POSIX_SPAWN @@ -951,6 +957,12 @@ /* Define if platform requires stubbed pthreads support */ #undef HAVE_PTHREAD_STUBS +/* Define to 1 if you have the `ptsname' function. */ +#undef HAVE_PTSNAME + +/* Define to 1 if you have the `ptsname_r' function. */ +#undef HAVE_PTSNAME_R + /* Define to 1 if you have the header file. */ #undef HAVE_PTY_H @@ -1459,6 +1471,9 @@ /* Define to 1 if you have the `unlinkat' function. */ #undef HAVE_UNLINKAT +/* Define to 1 if you have the `unlockpt' function. */ +#undef HAVE_UNLOCKPT + /* Define to 1 if you have the `unshare' function. */ #undef HAVE_UNSHARE From 39c766b579cabc71a4a50773d299d4350221a70b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 29 Jan 2024 18:20:13 +0200 Subject: [PATCH 067/127] Fix more references to datetime and time classes (GH-114717) They could be confused with references to datetime and time modules. --- Doc/library/datetime.rst | 8 ++++---- Doc/library/mailbox.rst | 8 ++++---- Doc/whatsnew/3.8.rst | 4 ++-- Misc/NEWS.d/3.13.0a1.rst | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 47ecb0ba331bdc..4ff049c8709289 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -1255,7 +1255,7 @@ Instance methods: ``tzinfo=None`` can be specified to create a naive datetime from an aware datetime with no conversion of date and time data. - :class:`datetime` objects are also supported by generic function + :class:`.datetime` objects are also supported by generic function :func:`copy.replace`. .. versionchanged:: 3.6 @@ -1678,7 +1678,7 @@ Usage of ``KabulTz`` from above:: :class:`.time` Objects ---------------------- -A :class:`time` object represents a (local) time of day, independent of any particular +A :class:`.time` object represents a (local) time of day, independent of any particular day, and subject to adjustment via a :class:`tzinfo` object. .. class:: time(hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0) @@ -1836,7 +1836,7 @@ Instance methods: ``tzinfo=None`` can be specified to create a naive :class:`.time` from an aware :class:`.time`, without conversion of the time data. - :class:`time` objects are also supported by generic function + :class:`.time` objects are also supported by generic function :func:`copy.replace`. .. versionchanged:: 3.6 @@ -2522,7 +2522,7 @@ information, which are supported in ``datetime.strptime`` but are discarded by ``time.strptime``. For :class:`.time` objects, the format codes for year, month, and day should not -be used, as :class:`time` objects have no such values. If they're used anyway, +be used, as :class:`!time` objects have no such values. If they're used anyway, ``1900`` is substituted for the year, and ``1`` for the month and day. For :class:`date` objects, the format codes for hours, minutes, seconds, and diff --git a/Doc/library/mailbox.rst b/Doc/library/mailbox.rst index fa5b273093f583..a613548c9e518e 100644 --- a/Doc/library/mailbox.rst +++ b/Doc/library/mailbox.rst @@ -1136,8 +1136,8 @@ When a :class:`!MaildirMessage` instance is created based upon a leading "From " or trailing newline. For convenience, *time_* may be specified and will be formatted appropriately and appended to *from_*. If *time_* is specified, it should be a :class:`time.struct_time` instance, a - tuple suitable for passing to :meth:`time.strftime`, or ``True`` (to use - :meth:`time.gmtime`). + tuple suitable for passing to :func:`time.strftime`, or ``True`` (to use + :func:`time.gmtime`). .. method:: get_flags() @@ -1508,8 +1508,8 @@ When a :class:`!BabylMessage` instance is created based upon an leading "From " or trailing newline. For convenience, *time_* may be specified and will be formatted appropriately and appended to *from_*. If *time_* is specified, it should be a :class:`time.struct_time` instance, a - tuple suitable for passing to :meth:`time.strftime`, or ``True`` (to use - :meth:`time.gmtime`). + tuple suitable for passing to :func:`time.strftime`, or ``True`` (to use + :func:`time.gmtime`). .. method:: get_flags() diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 304d1b4ef4efe8..b041e592d61ed1 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -754,8 +754,8 @@ datetime -------- Added new alternate constructors :meth:`datetime.date.fromisocalendar` and -:meth:`datetime.datetime.fromisocalendar`, which construct :class:`date` and -:class:`datetime` objects respectively from ISO year, week number, and weekday; +:meth:`datetime.datetime.fromisocalendar`, which construct :class:`~datetime.date` and +:class:`~datetime.datetime` objects respectively from ISO year, week number, and weekday; these are the inverse of each class's ``isocalendar`` method. (Contributed by Paul Ganssle in :issue:`36004`.) diff --git a/Misc/NEWS.d/3.13.0a1.rst b/Misc/NEWS.d/3.13.0a1.rst index 102bddcee5c5c2..d385b6a4504f97 100644 --- a/Misc/NEWS.d/3.13.0a1.rst +++ b/Misc/NEWS.d/3.13.0a1.rst @@ -2276,7 +2276,7 @@ creation. .. nonce: m2H5Bk .. section: Library -Remove unnecessary extra ``__slots__`` in :py:class:`datetime`\'s pure +Remove unnecessary extra ``__slots__`` in :class:`~datetime.datetime`\'s pure python implementation to reduce memory size, as they are defined in the superclass. Patch by James Hilton-Balfe From 2c089b09ac0872e08d146c55ed60d754154761c3 Mon Sep 17 00:00:00 2001 From: Steven Ward Date: Mon, 29 Jan 2024 11:58:21 -0500 Subject: [PATCH 068/127] gh-112240: Add option to calendar module CLI to specify the weekday to start each week (GH-112241) --- Doc/library/calendar.rst | 8 +++++++- Lib/calendar.py | 7 +++++++ .../2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst diff --git a/Doc/library/calendar.rst b/Doc/library/calendar.rst index 6586f539a8da4f..c4dcf5641d6066 100644 --- a/Doc/library/calendar.rst +++ b/Doc/library/calendar.rst @@ -586,10 +586,16 @@ The following options are accepted: or as an HTML document. +.. option:: --first-weekday WEEKDAY, -f WEEKDAY + + The weekday to start each week. + Must be a number between 0 (Monday) and 6 (Sunday). + Defaults to 0. + + .. option:: year The year to print the calendar for. - Must be a number between 1 and 9999. Defaults to the current year. diff --git a/Lib/calendar.py b/Lib/calendar.py index 3c79540f986b63..833ce331b14a0c 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -734,6 +734,11 @@ def main(args=None): choices=("text", "html"), help="output type (text or html)" ) + parser.add_argument( + "-f", "--first-weekday", + type=int, default=0, + help="weekday (0 is Monday, 6 is Sunday) to start each week (default 0)" + ) parser.add_argument( "year", nargs='?', type=int, @@ -761,6 +766,7 @@ def main(args=None): cal = LocaleHTMLCalendar(locale=locale) else: cal = HTMLCalendar() + cal.setfirstweekday(options.first_weekday) encoding = options.encoding if encoding is None: encoding = sys.getdefaultencoding() @@ -775,6 +781,7 @@ def main(args=None): cal = LocaleTextCalendar(locale=locale) else: cal = TextCalendar() + cal.setfirstweekday(options.first_weekday) optdict = dict(w=options.width, l=options.lines) if options.month is None: optdict["c"] = options.spacing diff --git a/Misc/NEWS.d/next/Library/2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst b/Misc/NEWS.d/next/Library/2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst new file mode 100644 index 00000000000000..686f0311e80dcb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst @@ -0,0 +1,2 @@ +Add option to calendar module CLI to specify the weekday to start each week. +Patch by Steven Ward. From 3d716655d22dc14e79ac0d30f33eef0a49efdac0 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 29 Jan 2024 09:38:03 -0800 Subject: [PATCH 069/127] gh-112075: Use PyMem_* for allocating dict keys objects (#114543) Use PyMem_* for keys allocation --- Objects/dictobject.c | 66 +++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index e608b91679b568..c5477ab15f8dc9 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -262,7 +262,7 @@ _PyDict_ClearFreeList(PyInterpreterState *interp) PyObject_GC_Del(op); } while (state->keys_numfree) { - PyObject_Free(state->keys_free_list[--state->keys_numfree]); + PyMem_Free(state->keys_free_list[--state->keys_numfree]); } #endif } @@ -332,6 +332,22 @@ dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk) _Py_DecRefTotal(_PyInterpreterState_GET()); #endif if (--dk->dk_refcnt == 0) { + if (DK_IS_UNICODE(dk)) { + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dk); + Py_ssize_t i, n; + for (i = 0, n = dk->dk_nentries; i < n; i++) { + Py_XDECREF(entries[i].me_key); + Py_XDECREF(entries[i].me_value); + } + } + else { + PyDictKeyEntry *entries = DK_ENTRIES(dk); + Py_ssize_t i, n; + for (i = 0, n = dk->dk_nentries; i < n; i++) { + Py_XDECREF(entries[i].me_key); + Py_XDECREF(entries[i].me_value); + } + } free_keys_object(interp, dk); } } @@ -640,9 +656,9 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) else #endif { - dk = PyObject_Malloc(sizeof(PyDictKeysObject) - + ((size_t)1 << log2_bytes) - + entry_size * usable); + dk = PyMem_Malloc(sizeof(PyDictKeysObject) + + ((size_t)1 << log2_bytes) + + entry_size * usable); if (dk == NULL) { PyErr_NoMemory(); return NULL; @@ -666,23 +682,6 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) static void free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys) { - assert(keys != Py_EMPTY_KEYS); - if (DK_IS_UNICODE(keys)) { - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); - Py_ssize_t i, n; - for (i = 0, n = keys->dk_nentries; i < n; i++) { - Py_XDECREF(entries[i].me_key); - Py_XDECREF(entries[i].me_value); - } - } - else { - PyDictKeyEntry *entries = DK_ENTRIES(keys); - Py_ssize_t i, n; - for (i = 0, n = keys->dk_nentries; i < n; i++) { - Py_XDECREF(entries[i].me_key); - Py_XDECREF(entries[i].me_value); - } - } #if PyDict_MAXFREELIST > 0 struct _Py_dict_state *state = get_dict_state(interp); #ifdef Py_DEBUG @@ -697,7 +696,7 @@ free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys) return; } #endif - PyObject_Free(keys); + PyMem_Free(keys); } static inline PyDictValues* @@ -798,7 +797,7 @@ clone_combined_dict_keys(PyDictObject *orig) assert(orig->ma_keys->dk_refcnt == 1); size_t keys_size = _PyDict_KeysSize(orig->ma_keys); - PyDictKeysObject *keys = PyObject_Malloc(keys_size); + PyDictKeysObject *keys = PyMem_Malloc(keys_size); if (keys == NULL) { PyErr_NoMemory(); return NULL; @@ -1544,32 +1543,13 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, } } - // We can not use free_keys_object here because key's reference - // are moved already. if (oldkeys != Py_EMPTY_KEYS) { #ifdef Py_REF_DEBUG _Py_DecRefTotal(_PyInterpreterState_GET()); #endif assert(oldkeys->dk_kind != DICT_KEYS_SPLIT); assert(oldkeys->dk_refcnt == 1); -#if PyDict_MAXFREELIST > 0 - struct _Py_dict_state *state = get_dict_state(interp); -#ifdef Py_DEBUG - // dictresize() must not be called after _PyDict_Fini() - assert(state->keys_numfree != -1); -#endif - if (DK_LOG_SIZE(oldkeys) == PyDict_LOG_MINSIZE && - DK_IS_UNICODE(oldkeys) && - state->keys_numfree < PyDict_MAXFREELIST) - { - state->keys_free_list[state->keys_numfree++] = oldkeys; - OBJECT_STAT_INC(to_freelist); - } - else -#endif - { - PyObject_Free(oldkeys); - } + free_keys_object(interp, oldkeys); } } From 0cd9bacb8ad41fe86f95b326e9199caa749539eb Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 29 Jan 2024 09:47:54 -0800 Subject: [PATCH 070/127] gh-112075: Dictionary global version counter should use atomic increments (#114568) Dictionary global version counter should use atomic increments --- Include/internal/pycore_dict.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index d96870e9197bbf..b4e1f8cf1e320b 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -209,8 +209,14 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { #define DICT_VERSION_INCREMENT (1 << DICT_MAX_WATCHERS) #define DICT_VERSION_MASK (DICT_VERSION_INCREMENT - 1) +#ifdef Py_GIL_DISABLED +#define DICT_NEXT_VERSION(INTERP) \ + (_Py_atomic_add_uint64(&(INTERP)->dict_state.global_version, DICT_VERSION_INCREMENT) + DICT_VERSION_INCREMENT) + +#else #define DICT_NEXT_VERSION(INTERP) \ ((INTERP)->dict_state.global_version += DICT_VERSION_INCREMENT) +#endif void _PyDict_SendEvent(int watcher_bits, From aa3402ad451777d8dd3ec560e14cb16dc8540c0e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 29 Jan 2024 19:58:31 +0200 Subject: [PATCH 071/127] gh-114678: Fix incorrect deprecation warning for 'N' specifier in Decimal format (GH-114683) Co-authored-by: Stefan Krah --- Lib/test/test_decimal.py | 10 +++++++++- .../2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst | 3 +++ Modules/_decimal/_decimal.c | 14 ++++++++------ 3 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 7a5fe62b467372..1423bc61c7f690 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -41,6 +41,7 @@ darwin_malloc_err_warning, is_emscripten) from test.support.import_helper import import_fresh_module from test.support import threading_helper +from test.support import warnings_helper import random import inspect import threading @@ -1237,7 +1238,14 @@ def test_deprecated_N_format(self): else: self.assertRaises(ValueError, format, h, 'N') self.assertRaises(ValueError, format, h, '010.3N') - + with warnings_helper.check_no_warnings(self): + self.assertEqual(format(h, 'N>10.3'), 'NN6.63E-34') + self.assertEqual(format(h, 'N>10.3n'), 'NN6.63e-34') + self.assertEqual(format(h, 'N>10.3e'), 'N6.626e-34') + self.assertEqual(format(h, 'N>10.3f'), 'NNNNN0.000') + self.assertRaises(ValueError, format, h, '>Nf') + self.assertRaises(ValueError, format, h, '10Nf') + self.assertRaises(ValueError, format, h, 'Nx') @run_with_locale('LC_ALL', 'ps_AF') def test_wide_char_separator_decimal_point(self): diff --git a/Misc/NEWS.d/next/Library/2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst b/Misc/NEWS.d/next/Library/2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst new file mode 100644 index 00000000000000..2306af4a39dcf6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst @@ -0,0 +1,3 @@ +Ensure that deprecation warning for 'N' specifier in :class:`~decimal.Decimal` +format is not raised for cases where 'N' appears in other places +in the format specifier. Based on patch by Stefan Krah. diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 8b93f8e2cbcf0b..127f5f2887d4cd 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -3446,6 +3446,14 @@ dec_format(PyObject *dec, PyObject *args) if (fmt == NULL) { return NULL; } + + if (size > 0 && fmt[size-1] == 'N') { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "Format specifier 'N' is deprecated", 1) < 0) { + return NULL; + } + } + /* NOTE: If https://github.com/python/cpython/pull/29438 lands, the * format string manipulation below can be eliminated by enhancing * the forked mpd_parse_fmt_str(). */ @@ -3593,12 +3601,6 @@ dec_format(PyObject *dec, PyObject *args) if (replace_fillchar) { dec_replace_fillchar(decstring); } - if (strchr(fmt, 'N') != NULL) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "Format specifier 'N' is deprecated", 1) < 0) { - goto finish; - } - } result = PyUnicode_DecodeUTF8(decstring, size, NULL); From 29952c86f3f8a972203a1ccd8381448efe145ada Mon Sep 17 00:00:00 2001 From: Matan Perelman Date: Mon, 29 Jan 2024 21:12:33 +0200 Subject: [PATCH 072/127] TaskGroup: Use explicit None check for cancellation error (#114708) --- Lib/asyncio/taskgroups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/asyncio/taskgroups.py b/Lib/asyncio/taskgroups.py index e1c56d140bef7d..f322b1f6653f6a 100644 --- a/Lib/asyncio/taskgroups.py +++ b/Lib/asyncio/taskgroups.py @@ -132,7 +132,7 @@ async def __aexit__(self, et, exc, tb): # Propagate CancelledError if there is one, except if there # are other errors -- those have priority. - if propagate_cancellation_error and not self._errors: + if propagate_cancellation_error is not None and not self._errors: raise propagate_cancellation_error if et is not None and not issubclass(et, exceptions.CancelledError): From 53d921ed96e1c57b2e42f984d3a5ca8347fedb81 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Mon, 29 Jan 2024 21:48:49 +0100 Subject: [PATCH 073/127] gh-114569: Use PyMem_* APIs for non-PyObjects in unicodeobject.c (#114690) --- Objects/unicodeobject.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4b03cc3f4da5fa..b236ddba9cdc69 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -996,7 +996,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) new_size = (struct_size + (length + 1) * char_size); if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { - PyObject_Free(_PyUnicode_UTF8(unicode)); + PyMem_Free(_PyUnicode_UTF8(unicode)); _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; } @@ -1049,7 +1049,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) { - PyObject_Free(_PyUnicode_UTF8(unicode)); + PyMem_Free(_PyUnicode_UTF8(unicode)); _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; } @@ -1590,10 +1590,10 @@ unicode_dealloc(PyObject *unicode) return; } if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { - PyObject_Free(_PyUnicode_UTF8(unicode)); + PyMem_Free(_PyUnicode_UTF8(unicode)); } if (!PyUnicode_IS_COMPACT(unicode) && _PyUnicode_DATA_ANY(unicode)) { - PyObject_Free(_PyUnicode_DATA_ANY(unicode)); + PyMem_Free(_PyUnicode_DATA_ANY(unicode)); } Py_TYPE(unicode)->tp_free(unicode); @@ -5203,7 +5203,7 @@ unicode_fill_utf8(PyObject *unicode) PyBytes_AS_STRING(writer.buffer); Py_ssize_t len = end - start; - char *cache = PyObject_Malloc(len + 1); + char *cache = PyMem_Malloc(len + 1); if (cache == NULL) { _PyBytesWriter_Dealloc(&writer); PyErr_NoMemory(); @@ -14674,7 +14674,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) PyErr_NoMemory(); goto onError; } - data = PyObject_Malloc((length + 1) * char_size); + data = PyMem_Malloc((length + 1) * char_size); if (data == NULL) { PyErr_NoMemory(); goto onError; From 3996cbdd33a479b7e59757b81489cbb3370f85e5 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:24:21 +0200 Subject: [PATCH 074/127] Set `hosted_on` for Read the Docs builds (#114697) --- Doc/conf.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index e12128ad356e1b..c2d57696aeeaa3 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -6,7 +6,9 @@ # The contents of this file are pickled, so don't put values in the namespace # that aren't pickleable (module imports are okay, they're removed automatically). -import sys, os, time +import os +import sys +import time sys.path.append(os.path.abspath('tools/extensions')) sys.path.append(os.path.abspath('includes')) @@ -55,7 +57,7 @@ # General substitutions. project = 'Python' -copyright = '2001-%s, Python Software Foundation' % time.strftime('%Y') +copyright = f"2001-{time.strftime('%Y')}, Python Software Foundation" # We look for the Include/patchlevel.h file in the current Python source tree # and replace the values accordingly. @@ -302,6 +304,9 @@ 'root_include_title': False # We use the version switcher instead. } +if os.getenv("READTHEDOCS"): + html_theme_options["hosted_on"] = 'Read the Docs' + # Override stylesheet fingerprinting for Windows CHM htmlhelp to fix GH-91207 # https://github.com/python/cpython/issues/91207 if any('htmlhelp' in arg for arg in sys.argv): @@ -310,7 +315,7 @@ print("It may be removed in the future\n") # Short title used e.g. for HTML tags. -html_short_title = '%s Documentation' % release +html_short_title = f'{release} Documentation' # Deployment preview information # (See .readthedocs.yml and https://docs.readthedocs.io/en/stable/reference/environment-variables.html) @@ -359,12 +364,9 @@ latex_engine = 'xelatex' -# Get LaTeX to handle Unicode correctly latex_elements = { -} - -# Additional stuff for the LaTeX preamble. -latex_elements['preamble'] = r''' + # For the LaTeX preamble. + 'preamble': r''' \authoraddress{ \sphinxstrong{Python Software Foundation}\\ Email: \sphinxemail{docs@python.org} @@ -372,13 +374,12 @@ \let\Verbatim=\OriginalVerbatim \let\endVerbatim=\endOriginalVerbatim \setcounter{tocdepth}{2} -''' - -# The paper size ('letter' or 'a4'). -latex_elements['papersize'] = 'a4' - -# The font size ('10pt', '11pt' or '12pt'). -latex_elements['pointsize'] = '10pt' +''', + # The paper size ('letter' or 'a4'). + 'papersize': 'a4', + # The font size ('10pt', '11pt' or '12pt'). + 'pointsize': '10pt', +} # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). @@ -441,9 +442,9 @@ # Regexes to find C items in the source files. coverage_c_regexes = { - 'cfunction': (r'^PyAPI_FUNC\(.*\)\s+([^_][\w_]+)'), - 'data': (r'^PyAPI_DATA\(.*\)\s+([^_][\w_]+)'), - 'macro': (r'^#define ([^_][\w_]+)\(.*\)[\s|\\]'), + 'cfunction': r'^PyAPI_FUNC\(.*\)\s+([^_][\w_]+)', + 'data': r'^PyAPI_DATA\(.*\)\s+([^_][\w_]+)', + 'macro': r'^#define ([^_][\w_]+)\(.*\)[\s|\\]', } # The coverage checker will ignore all C items whose names match these regexes From 8612230c1cacab6d48bfbeb9e17d04ef5a9acf21 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" <erlend@python.org> Date: Tue, 30 Jan 2024 00:04:34 +0100 Subject: [PATCH 075/127] gh-114569: Use PyMem_* APIs for non-PyObjects in compiler (#114587) --- Python/compile.c | 25 ++++++++++++------------- Python/flowgraph.c | 6 +++--- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 7cf05dd0683119..4c1d3bb2d2b475 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -160,7 +160,7 @@ _PyCompile_EnsureArrayLargeEnough(int idx, void **array, int *alloc, if (idx >= new_alloc) { new_alloc = idx + default_alloc; } - arr = PyObject_Calloc(new_alloc, item_size); + arr = PyMem_Calloc(new_alloc, item_size); if (arr == NULL) { PyErr_NoMemory(); return ERROR; @@ -181,7 +181,7 @@ _PyCompile_EnsureArrayLargeEnough(int idx, void **array, int *alloc, } assert(newsize > 0); - void *tmp = PyObject_Realloc(arr, newsize); + void *tmp = PyMem_Realloc(arr, newsize); if (tmp == NULL) { PyErr_NoMemory(); return ERROR; @@ -282,10 +282,10 @@ instr_sequence_insert_instruction(instr_sequence *seq, int pos, static void instr_sequence_fini(instr_sequence *seq) { - PyObject_Free(seq->s_labelmap); + PyMem_Free(seq->s_labelmap); seq->s_labelmap = NULL; - PyObject_Free(seq->s_instrs); + PyMem_Free(seq->s_instrs); seq->s_instrs = NULL; } @@ -690,7 +690,7 @@ compiler_unit_free(struct compiler_unit *u) Py_CLEAR(u->u_metadata.u_cellvars); Py_CLEAR(u->u_metadata.u_fasthidden); Py_CLEAR(u->u_private); - PyObject_Free(u); + PyMem_Free(u); } static int @@ -1262,8 +1262,7 @@ compiler_enter_scope(struct compiler *c, identifier name, struct compiler_unit *u; - u = (struct compiler_unit *)PyObject_Calloc(1, sizeof( - struct compiler_unit)); + u = (struct compiler_unit *)PyMem_Calloc(1, sizeof(struct compiler_unit)); if (!u) { PyErr_NoMemory(); return ERROR; @@ -6657,7 +6656,7 @@ ensure_fail_pop(struct compiler *c, pattern_context *pc, Py_ssize_t n) return SUCCESS; } Py_ssize_t needed = sizeof(jump_target_label) * size; - jump_target_label *resized = PyObject_Realloc(pc->fail_pop, needed); + jump_target_label *resized = PyMem_Realloc(pc->fail_pop, needed); if (resized == NULL) { PyErr_NoMemory(); return ERROR; @@ -6696,13 +6695,13 @@ emit_and_reset_fail_pop(struct compiler *c, location loc, USE_LABEL(c, pc->fail_pop[pc->fail_pop_size]); if (codegen_addop_noarg(INSTR_SEQUENCE(c), POP_TOP, loc) < 0) { pc->fail_pop_size = 0; - PyObject_Free(pc->fail_pop); + PyMem_Free(pc->fail_pop); pc->fail_pop = NULL; return ERROR; } } USE_LABEL(c, pc->fail_pop[0]); - PyObject_Free(pc->fail_pop); + PyMem_Free(pc->fail_pop); pc->fail_pop = NULL; return SUCCESS; } @@ -7206,7 +7205,7 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) Py_DECREF(pc->stores); *pc = old_pc; Py_INCREF(pc->stores); - // Need to NULL this for the PyObject_Free call in the error block. + // Need to NULL this for the PyMem_Free call in the error block. old_pc.fail_pop = NULL; // No match. Pop the remaining copy of the subject and fail: if (codegen_addop_noarg(INSTR_SEQUENCE(c), POP_TOP, LOC(p)) < 0 || @@ -7252,7 +7251,7 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) diff: compiler_error(c, LOC(p), "alternative patterns bind different names"); error: - PyObject_Free(old_pc.fail_pop); + PyMem_Free(old_pc.fail_pop); Py_DECREF(old_pc.stores); Py_XDECREF(control); return ERROR; @@ -7453,7 +7452,7 @@ compiler_match(struct compiler *c, stmt_ty s) pattern_context pc; pc.fail_pop = NULL; int result = compiler_match_inner(c, s, &pc); - PyObject_Free(pc.fail_pop); + PyMem_Free(pc.fail_pop); return result; } diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 96610b3cb11a43..bfc23a298ff492 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -162,7 +162,7 @@ basicblock_last_instr(const basicblock *b) { static basicblock * cfg_builder_new_block(cfg_builder *g) { - basicblock *b = (basicblock *)PyObject_Calloc(1, sizeof(basicblock)); + basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock)); if (b == NULL) { PyErr_NoMemory(); return NULL; @@ -437,10 +437,10 @@ _PyCfgBuilder_Free(cfg_builder *g) basicblock *b = g->g_block_list; while (b != NULL) { if (b->b_instr) { - PyObject_Free((void *)b->b_instr); + PyMem_Free((void *)b->b_instr); } basicblock *next = b->b_list; - PyObject_Free((void *)b); + PyMem_Free((void *)b); b = next; } PyMem_Free(g); From 742ba6081c92744ba30f16a0bb17ef9d9e809611 Mon Sep 17 00:00:00 2001 From: Brandt Bucher <brandtbucher@microsoft.com> Date: Mon, 29 Jan 2024 16:29:54 -0800 Subject: [PATCH 076/127] GH-113464: Make Brandt a codeowner for JIT stuff (GH-114739) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ae915423ece955..f4d0411504a832 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -21,6 +21,7 @@ configure* @erlend-aasland @corona10 **/*context* @1st1 **/*genobject* @markshannon **/*hamt* @1st1 +**/*jit* @brandtbucher Objects/set* @rhettinger Objects/dict* @methane @markshannon Objects/typevarobject.c @JelleZijlstra @@ -37,7 +38,6 @@ Python/ast_opt.c @isidentical Python/bytecodes.c @markshannon @gvanrossum Python/optimizer*.c @markshannon @gvanrossum Lib/test/test_patma.py @brandtbucher -Lib/test/test_peepholer.py @brandtbucher Lib/test/test_type_*.py @JelleZijlstra Lib/test/test_capi/test_misc.py @markshannon @gvanrossum Tools/c-analyzer/ @ericsnowcurrently From 963904335e579bfe39101adf3fd6a0cf705975ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sviatoslav=20Sydorenko=20=28=D0=A1=D0=B2=D1=8F=D1=82=D0=BE?= =?UTF-8?q?=D1=81=D0=BB=D0=B0=D0=B2=20=D0=A1=D0=B8=D0=B4=D0=BE=D1=80=D0=B5?= =?UTF-8?q?=D0=BD=D0=BA=D0=BE=29?= <wk@sydorenko.org.ua> Date: Tue, 30 Jan 2024 02:25:31 +0100 Subject: [PATCH 077/127] GH-80789: Get rid of the ``ensurepip`` infra for many wheels (#109245) Co-authored-by: vstinner@python.org Co-authored-by: Pradyun Gedam <pradyunsg@gmail.com> Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com> --- Lib/ensurepip/__init__.py | 129 ++++++++++--------------- Lib/test/test_ensurepip.py | 46 ++++----- Tools/build/verify_ensurepip_wheels.py | 6 +- 3 files changed, 73 insertions(+), 108 deletions(-) diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py index a09bf3201e1fb7..80ee125cfd4ed3 100644 --- a/Lib/ensurepip/__init__.py +++ b/Lib/ensurepip/__init__.py @@ -1,78 +1,64 @@ -import collections import os -import os.path import subprocess import sys import sysconfig import tempfile +from contextlib import nullcontext from importlib import resources +from pathlib import Path +from shutil import copy2 __all__ = ["version", "bootstrap"] -_PACKAGE_NAMES = ('pip',) _PIP_VERSION = "23.3.2" -_PROJECTS = [ - ("pip", _PIP_VERSION, "py3"), -] - -# Packages bundled in ensurepip._bundled have wheel_name set. -# Packages from WHEEL_PKG_DIR have wheel_path set. -_Package = collections.namedtuple('Package', - ('version', 'wheel_name', 'wheel_path')) # Directory of system wheel packages. Some Linux distribution packaging # policies recommend against bundling dependencies. For example, Fedora # installs wheel packages in the /usr/share/python-wheels/ directory and don't # install the ensurepip._bundled package. -_WHEEL_PKG_DIR = sysconfig.get_config_var('WHEEL_PKG_DIR') +if (_pkg_dir := sysconfig.get_config_var('WHEEL_PKG_DIR')) is not None: + _WHEEL_PKG_DIR = Path(_pkg_dir).resolve() +else: + _WHEEL_PKG_DIR = None + +def _find_wheel_pkg_dir_pip(): + if _WHEEL_PKG_DIR is None: + # NOTE: The compile-time `WHEEL_PKG_DIR` is unset so there is no place + # NOTE: for looking up the wheels. + return None -def _find_packages(path): - packages = {} + dist_matching_wheels = _WHEEL_PKG_DIR.glob('pip-*.whl') try: - filenames = os.listdir(path) - except OSError: - # Ignore: path doesn't exist or permission error - filenames = () - # Make the code deterministic if a directory contains multiple wheel files - # of the same package, but don't attempt to implement correct version - # comparison since this case should not happen. - filenames = sorted(filenames) - for filename in filenames: - # filename is like 'pip-21.2.4-py3-none-any.whl' - if not filename.endswith(".whl"): - continue - for name in _PACKAGE_NAMES: - prefix = name + '-' - if filename.startswith(prefix): - break - else: - continue - - # Extract '21.2.4' from 'pip-21.2.4-py3-none-any.whl' - version = filename.removeprefix(prefix).partition('-')[0] - wheel_path = os.path.join(path, filename) - packages[name] = _Package(version, None, wheel_path) - return packages - - -def _get_packages(): - global _PACKAGES, _WHEEL_PKG_DIR - if _PACKAGES is not None: - return _PACKAGES - - packages = {} - for name, version, py_tag in _PROJECTS: - wheel_name = f"{name}-{version}-{py_tag}-none-any.whl" - packages[name] = _Package(version, wheel_name, None) - if _WHEEL_PKG_DIR: - dir_packages = _find_packages(_WHEEL_PKG_DIR) - # only used the wheel package directory if all packages are found there - if all(name in dir_packages for name in _PACKAGE_NAMES): - packages = dir_packages - _PACKAGES = packages - return packages -_PACKAGES = None + last_matching_dist_wheel = sorted(dist_matching_wheels)[-1] + except IndexError: + # NOTE: `WHEEL_PKG_DIR` does not contain any wheel files for `pip`. + return None + + return nullcontext(last_matching_dist_wheel) + + +def _get_pip_whl_path_ctx(): + # Prefer pip from the wheel package directory, if present. + if (alternative_pip_wheel_path := _find_wheel_pkg_dir_pip()) is not None: + return alternative_pip_wheel_path + + return resources.as_file( + resources.files('ensurepip') + / '_bundled' + / f'pip-{_PIP_VERSION}-py3-none-any.whl' + ) + + +def _get_pip_version(): + with _get_pip_whl_path_ctx() as bundled_wheel_path: + wheel_name = bundled_wheel_path.name + return ( + # Extract '21.2.4' from 'pip-21.2.4-py3-none-any.whl' + wheel_name. + removeprefix('pip-'). + partition('-')[0] + ) def _run_pip(args, additional_paths=None): @@ -105,7 +91,7 @@ def version(): """ Returns a string specifying the bundled version of pip. """ - return _get_packages()['pip'].version + return _get_pip_version() def _disable_pip_configuration_settings(): @@ -167,24 +153,10 @@ def _bootstrap(*, root=None, upgrade=False, user=False, with tempfile.TemporaryDirectory() as tmpdir: # Put our bundled wheels into a temporary directory and construct the # additional paths that need added to sys.path - additional_paths = [] - for name, package in _get_packages().items(): - if package.wheel_name: - # Use bundled wheel package - wheel_name = package.wheel_name - wheel_path = resources.files("ensurepip") / "_bundled" / wheel_name - whl = wheel_path.read_bytes() - else: - # Use the wheel package directory - with open(package.wheel_path, "rb") as fp: - whl = fp.read() - wheel_name = os.path.basename(package.wheel_path) - - filename = os.path.join(tmpdir, wheel_name) - with open(filename, "wb") as fp: - fp.write(whl) - - additional_paths.append(filename) + tmpdir_path = Path(tmpdir) + with _get_pip_whl_path_ctx() as bundled_wheel_path: + tmp_wheel_path = tmpdir_path / bundled_wheel_path.name + copy2(bundled_wheel_path, tmp_wheel_path) # Construct the arguments to be passed to the pip command args = ["install", "--no-cache-dir", "--no-index", "--find-links", tmpdir] @@ -197,7 +169,8 @@ def _bootstrap(*, root=None, upgrade=False, user=False, if verbosity: args += ["-" + "v" * verbosity] - return _run_pip([*args, *_PACKAGE_NAMES], additional_paths) + return _run_pip([*args, "pip"], [os.fsdecode(tmp_wheel_path)]) + def _uninstall_helper(*, verbosity=0): """Helper to support a clean default uninstall process on Windows @@ -227,7 +200,7 @@ def _uninstall_helper(*, verbosity=0): if verbosity: args += ["-" + "v" * verbosity] - return _run_pip([*args, *reversed(_PACKAGE_NAMES)]) + return _run_pip([*args, "pip"]) def _main(argv=None): diff --git a/Lib/test/test_ensurepip.py b/Lib/test/test_ensurepip.py index 69ab2a4feaa938..a4b36a90d8815e 100644 --- a/Lib/test/test_ensurepip.py +++ b/Lib/test/test_ensurepip.py @@ -6,6 +6,8 @@ import test.support import unittest import unittest.mock +from importlib.resources.abc import Traversable +from pathlib import Path import ensurepip import ensurepip._uninstall @@ -20,41 +22,35 @@ def test_version(self): # Test version() with tempfile.TemporaryDirectory() as tmpdir: self.touch(tmpdir, "pip-1.2.3b1-py2.py3-none-any.whl") - with (unittest.mock.patch.object(ensurepip, '_PACKAGES', None), - unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', tmpdir)): + with unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', Path(tmpdir)): self.assertEqual(ensurepip.version(), '1.2.3b1') - def test_get_packages_no_dir(self): - # Test _get_packages() without a wheel package directory - with (unittest.mock.patch.object(ensurepip, '_PACKAGES', None), - unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', None)): - packages = ensurepip._get_packages() - - # when bundled wheel packages are used, we get _PIP_VERSION + def test_version_no_dir(self): + # Test version() without a wheel package directory + with unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', None): + # when the bundled pip wheel is used, we get _PIP_VERSION self.assertEqual(ensurepip._PIP_VERSION, ensurepip.version()) - # use bundled wheel packages - self.assertIsNotNone(packages['pip'].wheel_name) + def test_selected_wheel_path_no_dir(self): + pip_filename = f'pip-{ensurepip._PIP_VERSION}-py3-none-any.whl' + with unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', None): + with ensurepip._get_pip_whl_path_ctx() as bundled_wheel_path: + self.assertEqual(pip_filename, bundled_wheel_path.name) - def test_get_packages_with_dir(self): - # Test _get_packages() with a wheel package directory + def test_selected_wheel_path_with_dir(self): + # Test _get_pip_whl_path_ctx() with a wheel package directory pip_filename = "pip-20.2.2-py2.py3-none-any.whl" with tempfile.TemporaryDirectory() as tmpdir: self.touch(tmpdir, pip_filename) - # not used, make sure that it's ignored + # not used, make sure that they're ignored + self.touch(tmpdir, "pip-1.2.3-py2.py3-none-any.whl") self.touch(tmpdir, "wheel-0.34.2-py2.py3-none-any.whl") + self.touch(tmpdir, "pip-script.py") - with (unittest.mock.patch.object(ensurepip, '_PACKAGES', None), - unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', tmpdir)): - packages = ensurepip._get_packages() - - self.assertEqual(packages['pip'].version, '20.2.2') - self.assertEqual(packages['pip'].wheel_path, - os.path.join(tmpdir, pip_filename)) - - # wheel package is ignored - self.assertEqual(sorted(packages), ['pip']) + with unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', Path(tmpdir)): + with ensurepip._get_pip_whl_path_ctx() as bundled_wheel_path: + self.assertEqual(pip_filename, bundled_wheel_path.name) class EnsurepipMixin: @@ -69,7 +65,7 @@ def setUp(self): real_devnull = os.devnull os_patch = unittest.mock.patch("ensurepip.os") patched_os = os_patch.start() - # But expose os.listdir() used by _find_packages() + # But expose os.listdir() used by _find_wheel_pkg_dir_pip() patched_os.listdir = os.listdir self.addCleanup(os_patch.stop) patched_os.devnull = real_devnull diff --git a/Tools/build/verify_ensurepip_wheels.py b/Tools/build/verify_ensurepip_wheels.py index 29897425da6c03..a37da2f70757e5 100755 --- a/Tools/build/verify_ensurepip_wheels.py +++ b/Tools/build/verify_ensurepip_wheels.py @@ -14,7 +14,6 @@ from pathlib import Path from urllib.request import urlopen -PACKAGE_NAMES = ("pip",) ENSURE_PIP_ROOT = Path(__file__).parent.parent.parent / "Lib/ensurepip" WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled" ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding="utf-8") @@ -97,8 +96,5 @@ def verify_wheel(package_name: str) -> bool: if __name__ == "__main__": - exit_status = 0 - for package_name in PACKAGE_NAMES: - if not verify_wheel(package_name): - exit_status = 1 + exit_status = int(not verify_wheel("pip")) raise SystemExit(exit_status) From 58f883b91bd8dd4cac38b58a026397363104a129 Mon Sep 17 00:00:00 2001 From: Victor Stinner <vstinner@python.org> Date: Tue, 30 Jan 2024 11:47:58 +0100 Subject: [PATCH 078/127] gh-103323: Remove current_fast_get() unused parameter (#114593) The current_fast_get() static inline function doesn't use its 'runtime' parameter, so just remove it. --- Python/pystate.c | 50 +++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/Python/pystate.c b/Python/pystate.c index 8e097c848cf4a1..430121a6a35d7f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -67,7 +67,7 @@ _Py_thread_local PyThreadState *_Py_tss_tstate = NULL; #endif static inline PyThreadState * -current_fast_get(_PyRuntimeState *Py_UNUSED(runtime)) +current_fast_get(void) { #ifdef HAVE_THREAD_LOCAL return _Py_tss_tstate; @@ -101,14 +101,14 @@ current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime)) } #define tstate_verify_not_active(tstate) \ - if (tstate == current_fast_get((tstate)->interp->runtime)) { \ + if (tstate == current_fast_get()) { \ _Py_FatalErrorFormat(__func__, "tstate %p is still current", tstate); \ } PyThreadState * _PyThreadState_GetCurrent(void) { - return current_fast_get(&_PyRuntime); + return current_fast_get(); } @@ -360,10 +360,9 @@ holds_gil(PyThreadState *tstate) // XXX Fall back to tstate->interp->runtime->ceval.gil.last_holder // (and tstate->interp->runtime->ceval.gil.locked). assert(tstate != NULL); - _PyRuntimeState *runtime = tstate->interp->runtime; /* Must be the tstate for this thread */ - assert(tstate == gilstate_tss_get(runtime)); - return tstate == current_fast_get(runtime); + assert(tstate == gilstate_tss_get(tstate->interp->runtime)); + return tstate == current_fast_get(); } @@ -723,7 +722,7 @@ PyInterpreterState * PyInterpreterState_New(void) { // tstate can be NULL - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); PyInterpreterState *interp; PyStatus status = _PyInterpreterState_New(tstate, &interp); @@ -882,7 +881,7 @@ PyInterpreterState_Clear(PyInterpreterState *interp) // Use the current Python thread state to call audit hooks and to collect // garbage. It can be different than the current Python thread state // of 'interp'. - PyThreadState *current_tstate = current_fast_get(interp->runtime); + PyThreadState *current_tstate = current_fast_get(); _PyImport_ClearCore(interp); interpreter_clear(interp, current_tstate); } @@ -908,7 +907,7 @@ PyInterpreterState_Delete(PyInterpreterState *interp) // XXX Clearing the "current" thread state should happen before // we start finalizing the interpreter (or the current thread state). - PyThreadState *tcur = current_fast_get(runtime); + PyThreadState *tcur = current_fast_get(); if (tcur != NULL && interp == tcur->interp) { /* Unset current thread. After this, many C API calls become crashy. */ _PyThreadState_Detach(tcur); @@ -1010,7 +1009,7 @@ _PyInterpreterState_SetRunningMain(PyInterpreterState *interp) if (_PyInterpreterState_FailIfRunningMain(interp) < 0) { return -1; } - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); if (tstate->interp != interp) { PyErr_SetString(PyExc_RuntimeError, @@ -1025,7 +1024,7 @@ void _PyInterpreterState_SetNotRunningMain(PyInterpreterState *interp) { PyThreadState *tstate = interp->threads.main; - assert(tstate == current_fast_get(&_PyRuntime)); + assert(tstate == current_fast_get()); if (tstate->on_delete != NULL) { // The threading module was imported for the first time in this @@ -1178,7 +1177,7 @@ PyInterpreterState_GetDict(PyInterpreterState *interp) PyInterpreterState* PyInterpreterState_Get(void) { - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); PyInterpreterState *interp = tstate->interp; if (interp == NULL) { @@ -1474,7 +1473,7 @@ void PyThreadState_Clear(PyThreadState *tstate) { assert(tstate->_status.initialized && !tstate->_status.cleared); - assert(current_fast_get(&_PyRuntime)->interp == tstate->interp); + assert(current_fast_get()->interp == tstate->interp); // XXX assert(!tstate->_status.bound || tstate->_status.unbound); tstate->_status.finalizing = 1; // just in case @@ -1656,7 +1655,7 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate) void PyThreadState_DeleteCurrent(void) { - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); _PyThreadState_DeleteCurrent(tstate); } @@ -1732,7 +1731,7 @@ _PyThreadState_GetDict(PyThreadState *tstate) PyObject * PyThreadState_GetDict(void) { - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); if (tstate == NULL) { return NULL; } @@ -1853,7 +1852,7 @@ _PyThreadState_Attach(PyThreadState *tstate) #endif _Py_EnsureTstateNotNULL(tstate); - if (current_fast_get(&_PyRuntime) != NULL) { + if (current_fast_get() != NULL) { Py_FatalError("non-NULL old thread state"); } @@ -1883,7 +1882,7 @@ detach_thread(PyThreadState *tstate, int detached_state) { // XXX assert(tstate_is_alive(tstate) && tstate_is_bound(tstate)); assert(tstate->state == _Py_THREAD_ATTACHED); - assert(tstate == current_fast_get(&_PyRuntime)); + assert(tstate == current_fast_get()); if (tstate->critical_section != 0) { _PyCriticalSection_SuspendAll(tstate); } @@ -2168,14 +2167,14 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) PyThreadState * PyThreadState_GetUnchecked(void) { - return current_fast_get(&_PyRuntime); + return current_fast_get(); } PyThreadState * PyThreadState_Get(void) { - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); return tstate; } @@ -2183,7 +2182,7 @@ PyThreadState_Get(void) PyThreadState * _PyThreadState_Swap(_PyRuntimeState *runtime, PyThreadState *newts) { - PyThreadState *oldts = current_fast_get(runtime); + PyThreadState *oldts = current_fast_get(); if (oldts != NULL) { _PyThreadState_Detach(oldts); } @@ -2278,7 +2277,7 @@ PyObject * _PyThread_CurrentFrames(void) { _PyRuntimeState *runtime = &_PyRuntime; - PyThreadState *tstate = current_fast_get(runtime); + PyThreadState *tstate = current_fast_get(); if (_PySys_Audit(tstate, "sys._current_frames", NULL) < 0) { return NULL; } @@ -2339,7 +2338,7 @@ PyObject * _PyThread_CurrentExceptions(void) { _PyRuntimeState *runtime = &_PyRuntime; - PyThreadState *tstate = current_fast_get(runtime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); @@ -2481,7 +2480,7 @@ PyGILState_Check(void) return 1; } - PyThreadState *tstate = current_fast_get(runtime); + PyThreadState *tstate = current_fast_get(); if (tstate == NULL) { return 0; } @@ -2579,7 +2578,7 @@ PyGILState_Release(PyGILState_STATE oldstate) * races; see bugs 225673 and 1061968 (that nasty bug has a * habit of coming back). */ - assert(current_fast_get(runtime) == tstate); + assert(current_fast_get() == tstate); _PyThreadState_DeleteCurrent(tstate); } /* Release the lock if necessary */ @@ -2645,9 +2644,8 @@ _PyInterpreterState_GetConfigCopy(PyConfig *config) const PyConfig* _Py_GetConfig(void) { - _PyRuntimeState *runtime = &_PyRuntime; assert(PyGILState_Check()); - PyThreadState *tstate = current_fast_get(runtime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); return _PyInterpreterState_GetConfig(tstate->interp); } From ea30a28c3e89b69a214c536e61402660242c0f2a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka <storchaka@gmail.com> Date: Tue, 30 Jan 2024 14:21:12 +0200 Subject: [PATCH 079/127] gh-113732: Fix support of QUOTE_NOTNULL and QUOTE_STRINGS in csv.reader (GH-113738) --- Doc/whatsnew/3.12.rst | 2 +- Lib/test/test_csv.py | 25 ++++++++++ ...-01-05-16-27-34.gh-issue-113732.fgDRXA.rst | 2 + Modules/_csv.c | 46 ++++++++++++------- 4 files changed, 57 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-05-16-27-34.gh-issue-113732.fgDRXA.rst diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 77b12f9284ba0d..100312a5940b79 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -690,7 +690,7 @@ csv * Add :const:`csv.QUOTE_NOTNULL` and :const:`csv.QUOTE_STRINGS` flags to provide finer grained control of ``None`` and empty strings by - :class:`csv.writer` objects. + :class:`~csv.reader` and :class:`~csv.writer` objects. dis --- diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 69fef5945ae66f..21a4cb586ff665 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -392,10 +392,26 @@ def test_read_quoting(self): # will this fail where locale uses comma for decimals? self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]], quoting=csv.QUOTE_NONNUMERIC) + self._read_test([',3,"5",7.3, 9'], [[None, '3', '5', '7.3', ' 9']], + quoting=csv.QUOTE_NOTNULL) + self._read_test([',3,"5",7.3, 9'], [[None, 3, '5', 7.3, 9]], + quoting=csv.QUOTE_STRINGS) + + self._read_test([',,"",'], [['', '', '', '']]) + self._read_test([',,"",'], [['', '', '', '']], + quoting=csv.QUOTE_NONNUMERIC) + self._read_test([',,"",'], [[None, None, '', None]], + quoting=csv.QUOTE_NOTNULL) + self._read_test([',,"",'], [[None, None, '', None]], + quoting=csv.QUOTE_STRINGS) + self._read_test(['"a\nb", 7'], [['a\nb', ' 7']]) self.assertRaises(ValueError, self._read_test, ['abc,3'], [[]], quoting=csv.QUOTE_NONNUMERIC) + self.assertRaises(ValueError, self._read_test, + ['abc,3'], [[]], + quoting=csv.QUOTE_STRINGS) self._read_test(['1,@,3,@,5'], [['1', ',3,', '5']], quotechar='@') self._read_test(['1,\0,3,\0,5'], [['1', ',3,', '5']], quotechar='\0') @@ -403,6 +419,15 @@ def test_read_skipinitialspace(self): self._read_test(['no space, space, spaces,\ttab'], [['no space', 'space', 'spaces', '\ttab']], skipinitialspace=True) + self._read_test([' , , '], + [['', '', '']], + skipinitialspace=True) + self._read_test([' , , '], + [[None, None, None]], + skipinitialspace=True, quoting=csv.QUOTE_NOTNULL) + self._read_test([' , , '], + [[None, None, None]], + skipinitialspace=True, quoting=csv.QUOTE_STRINGS) def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size diff --git a/Misc/NEWS.d/next/Library/2024-01-05-16-27-34.gh-issue-113732.fgDRXA.rst b/Misc/NEWS.d/next/Library/2024-01-05-16-27-34.gh-issue-113732.fgDRXA.rst new file mode 100644 index 00000000000000..7582603dcf95f5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-05-16-27-34.gh-issue-113732.fgDRXA.rst @@ -0,0 +1,2 @@ +Fix support of :data:`~csv.QUOTE_NOTNULL` and :data:`~csv.QUOTE_STRINGS` in +:func:`csv.reader`. diff --git a/Modules/_csv.c b/Modules/_csv.c index 929c21584ac2ef..3aa648b8e9cec4 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -131,7 +131,7 @@ typedef struct { Py_UCS4 *field; /* temporary buffer */ Py_ssize_t field_size; /* size of allocated buffer */ Py_ssize_t field_len; /* length of current field */ - int numeric_field; /* treat field as numeric */ + bool unquoted_field; /* true if no quotes around the current field */ unsigned long line_num; /* Source-file line number */ } ReaderObj; @@ -644,22 +644,33 @@ _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs) static int parse_save_field(ReaderObj *self) { + int quoting = self->dialect->quoting; PyObject *field; - field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, - (void *) self->field, self->field_len); - if (field == NULL) - return -1; - self->field_len = 0; - if (self->numeric_field) { - PyObject *tmp; - - self->numeric_field = 0; - tmp = PyNumber_Float(field); - Py_DECREF(field); - if (tmp == NULL) + if (self->unquoted_field && + self->field_len == 0 && + (quoting == QUOTE_NOTNULL || quoting == QUOTE_STRINGS)) + { + field = Py_NewRef(Py_None); + } + else { + field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, + (void *) self->field, self->field_len); + if (field == NULL) { return -1; - field = tmp; + } + if (self->unquoted_field && + self->field_len != 0 && + (quoting == QUOTE_NONNUMERIC || quoting == QUOTE_STRINGS)) + { + PyObject *tmp = PyNumber_Float(field); + Py_DECREF(field); + if (tmp == NULL) { + return -1; + } + field = tmp; + } + self->field_len = 0; } if (PyList_Append(self->fields, field) < 0) { Py_DECREF(field); @@ -721,6 +732,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) /* fallthru */ case START_FIELD: /* expecting field */ + self->unquoted_field = true; if (c == '\n' || c == '\r' || c == EOL) { /* save empty field - return [fields] */ if (parse_save_field(self) < 0) @@ -730,10 +742,12 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) else if (c == dialect->quotechar && dialect->quoting != QUOTE_NONE) { /* start quoted field */ + self->unquoted_field = false; self->state = IN_QUOTED_FIELD; } else if (c == dialect->escapechar) { /* possible escaped character */ + self->unquoted_field = false; self->state = ESCAPED_CHAR; } else if (c == ' ' && dialect->skipinitialspace) @@ -746,8 +760,6 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) } else { /* begin new unquoted field */ - if (dialect->quoting == QUOTE_NONNUMERIC) - self->numeric_field = 1; if (parse_add_char(self, module_state, c) < 0) return -1; self->state = IN_FIELD; @@ -892,7 +904,7 @@ parse_reset(ReaderObj *self) return -1; self->field_len = 0; self->state = START_RECORD; - self->numeric_field = 0; + self->unquoted_field = false; return 0; } From e21754d7f8336d4647e28f355d8a3dbd5a2c7545 Mon Sep 17 00:00:00 2001 From: Vinay Sajip <vinay_sajip@yahoo.co.uk> Date: Tue, 30 Jan 2024 12:34:18 +0000 Subject: [PATCH 080/127] gh-114706: Allow QueueListener.stop() to be called more than once. (GH-114748) --- Lib/logging/handlers.py | 7 ++++--- Lib/test/test_logging.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Lib/logging/handlers.py b/Lib/logging/handlers.py index 9840b7b0aeba88..e7f1322e4ba3d9 100644 --- a/Lib/logging/handlers.py +++ b/Lib/logging/handlers.py @@ -1586,6 +1586,7 @@ def stop(self): Note that if you don't call this before your application exits, there may be some records still left on the queue, which won't be processed. """ - self.enqueue_sentinel() - self._thread.join() - self._thread = None + if self._thread: # see gh-114706 - allow calling this more than once + self.enqueue_sentinel() + self._thread.join() + self._thread = None diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 908e242b85f5e7..888523227c2ac4 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -4089,6 +4089,7 @@ def test_queue_listener(self): self.que_logger.critical(self.next_message()) finally: listener.stop() + listener.stop() # gh-114706 - ensure no crash if called again self.assertTrue(handler.matches(levelno=logging.WARNING, message='1')) self.assertTrue(handler.matches(levelno=logging.ERROR, message='2')) self.assertTrue(handler.matches(levelno=logging.CRITICAL, message='3')) From 809eed48058ea7391df57ead09dff53bcc5d81e9 Mon Sep 17 00:00:00 2001 From: Barney Gale <barney.gale@gmail.com> Date: Tue, 30 Jan 2024 14:25:16 +0000 Subject: [PATCH 081/127] GH-114610: Fix `pathlib._abc.PurePathBase.with_suffix('.ext')` handling of stems (#114613) Raise `ValueError` if `with_suffix('.ext')` is called on a path without a stem. Paths may only have a non-empty suffix if they also have a non-empty stem. ABC-only bugfix; no effect on public classes. --- Lib/pathlib/_abc.py | 7 +++++-- Lib/test/test_pathlib/test_pathlib.py | 7 ------- Lib/test/test_pathlib/test_pathlib_abc.py | 5 ++--- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index ad5684829ebc80..580d631cbf3b53 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -299,10 +299,13 @@ def with_suffix(self, suffix): has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. """ + stem = self.stem if not suffix: - return self.with_name(self.stem) + return self.with_name(stem) + elif not stem: + raise ValueError(f"{self!r} has an empty name") elif suffix.startswith('.') and len(suffix) > 1: - return self.with_name(self.stem + suffix) + return self.with_name(stem + suffix) else: raise ValueError(f"Invalid suffix {suffix!r}") diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 5ce3b605c58e63..a8cc30ef0ab63f 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -327,13 +327,6 @@ def test_with_stem_empty(self): self.assertRaises(ValueError, P('a/b').with_stem, '') self.assertRaises(ValueError, P('a/b').with_stem, '.') - def test_with_suffix_empty(self): - # Path doesn't have a "filename" component. - P = self.cls - self.assertRaises(ValueError, P('').with_suffix, '.gz') - self.assertRaises(ValueError, P('.').with_suffix, '.gz') - self.assertRaises(ValueError, P('/').with_suffix, '.gz') - def test_relative_to_several_args(self): P = self.cls p = P('a/b') diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index ab989cb5503f99..18a612f6b81bac 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -977,9 +977,8 @@ def test_with_suffix_windows(self): def test_with_suffix_empty(self): P = self.cls # Path doesn't have a "filename" component. - self.assertEqual(P('').with_suffix('.gz'), P('.gz')) - self.assertEqual(P('.').with_suffix('.gz'), P('..gz')) - self.assertEqual(P('/').with_suffix('.gz'), P('/.gz')) + self.assertRaises(ValueError, P('').with_suffix, '.gz') + self.assertRaises(ValueError, P('/').with_suffix, '.gz') def test_with_suffix_seps(self): P = self.cls From 4287e8608bcabcc5bde851d838d4709db5d69089 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 30 Jan 2024 17:12:11 +0200 Subject: [PATCH 082/127] gh-109975: Copyedit "What's New in Python 3.13" (#114401) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com> --- Doc/whatsnew/3.13.rst | 218 +++++++++++++++++++++--------------------- 1 file changed, 108 insertions(+), 110 deletions(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 985e34b453f63a..fec1e55e0daf0e 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -146,14 +146,6 @@ New Modules Improved Modules ================ -ast ---- - -* :func:`ast.parse` now accepts an optional argument ``optimize`` - which is passed on to the :func:`compile` built-in. This makes it - possible to obtain an optimized ``AST``. - (Contributed by Irit Katriel in :gh:`108113`). - array ----- @@ -161,6 +153,14 @@ array It can be used instead of ``'u'`` type code, which is deprecated. (Contributed by Inada Naoki in :gh:`80480`.) +ast +--- + +* :func:`ast.parse` now accepts an optional argument ``optimize`` + which is passed on to the :func:`compile` built-in. This makes it + possible to obtain an optimized ``AST``. + (Contributed by Irit Katriel in :gh:`108113`.) + asyncio ------- @@ -180,6 +180,13 @@ copy any user classes which define the :meth:`!__replace__` method. (Contributed by Serhiy Storchaka in :gh:`108751`.) +dbm +--- + +* Add :meth:`dbm.gnu.gdbm.clear` and :meth:`dbm.ndbm.ndbm.clear` methods that remove all items + from the database. + (Contributed by Donghee Na in :gh:`107122`.) + dis --- @@ -189,13 +196,6 @@ dis the ``show_offsets`` parameter. (Contributed by Irit Katriel in :gh:`112137`.) -dbm ---- - -* Add :meth:`dbm.gnu.gdbm.clear` and :meth:`dbm.ndbm.ndbm.clear` methods that remove all items - from the database. - (Contributed by Donghee Na in :gh:`107122`.) - doctest ------- @@ -213,7 +213,7 @@ email parameter to these two functions: use ``strict=False`` to get the old behavior, accept malformed inputs. ``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to - check if the *strict* paramater is available. + check if the *strict* parameter is available. (Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve the CVE-2023-27043 fix.) @@ -223,7 +223,7 @@ fractions * Formatting for objects of type :class:`fractions.Fraction` now supports the standard format specification mini-language rules for fill, alignment, sign handling, minimum width and grouping. (Contributed by Mark Dickinson - in :gh:`111320`) + in :gh:`111320`.) glob ---- @@ -297,17 +297,17 @@ os the new environment variable :envvar:`PYTHON_CPU_COUNT` or the new command-line option :option:`-X cpu_count <-X>`. This option is useful for users who need to limit CPU resources of a container system without having to modify the container (application code). - (Contributed by Donghee Na in :gh:`109595`) + (Contributed by Donghee Na in :gh:`109595`.) * Add support of :func:`os.lchmod` and the *follow_symlinks* argument in :func:`os.chmod` on Windows. Note that the default value of *follow_symlinks* in :func:`!os.lchmod` is ``False`` on Windows. - (Contributed by Serhiy Storchaka in :gh:`59616`) + (Contributed by Serhiy Storchaka in :gh:`59616`.) * Add support of :func:`os.fchmod` and a file descriptor in :func:`os.chmod` on Windows. - (Contributed by Serhiy Storchaka in :gh:`113191`) + (Contributed by Serhiy Storchaka in :gh:`113191`.) * :func:`os.posix_spawn` now accepts ``env=None``, which makes the newly spawned process use the current process environment. @@ -357,7 +357,7 @@ pdb the new ``exceptions [exc_number]`` command for Pdb. (Contributed by Matthias Bussonnier in :gh:`106676`.) -* Expressions/Statements whose prefix is a pdb command are now correctly +* Expressions/statements whose prefix is a pdb command are now correctly identified and executed. (Contributed by Tian Gao in :gh:`108464`.) @@ -487,34 +487,69 @@ Deprecated Replace ``ctypes.ARRAY(item_type, size)`` with ``item_type * size``. (Contributed by Victor Stinner in :gh:`105733`.) +* :mod:`decimal`: Deprecate non-standard format specifier "N" for + :class:`decimal.Decimal`. + It was not documented and only supported in the C implementation. + (Contributed by Serhiy Storchaka in :gh:`89902`.) + +* :mod:`dis`: The ``dis.HAVE_ARGUMENT`` separator is deprecated. Check + membership in :data:`~dis.hasarg` instead. + (Contributed by Irit Katriel in :gh:`109319`.) + * :mod:`getopt` and :mod:`optparse` modules: They are now - :term:`soft deprecated`: the :mod:`argparse` should be used for new projects. + :term:`soft deprecated`: the :mod:`argparse` module should be used for new projects. Previously, the :mod:`optparse` module was already deprecated, its removal was not scheduled, and no warnings was emitted: so there is no change in practice. (Contributed by Victor Stinner in :gh:`106535`.) +* :mod:`gettext`: Emit deprecation warning for non-integer numbers in + :mod:`gettext` functions and methods that consider plural forms even if the + translation was not found. + (Contributed by Serhiy Storchaka in :gh:`88434`.) + * :mod:`http.server`: :class:`http.server.CGIHTTPRequestHandler` now emits a - :exc:`DeprecationWarning` as it will be removed in 3.15. Process based CGI - http servers have been out of favor for a very long time. This code was + :exc:`DeprecationWarning` as it will be removed in 3.15. Process-based CGI + HTTP servers have been out of favor for a very long time. This code was outdated, unmaintained, and rarely used. It has a high potential for both security and functionality bugs. This includes removal of the ``--cgi`` flag to the ``python -m http.server`` command line in 3.15. * :mod:`pathlib`: + :meth:`pathlib.PurePath.is_reserved` is deprecated and scheduled for + removal in Python 3.15. Use :func:`os.path.isreserved` to detect reserved + paths on Windows. + +* :mod:`pydoc`: Deprecate undocumented :func:`!pydoc.ispackage` function. + (Contributed by Zackery Spytz in :gh:`64020`.) + +* :mod:`sqlite3`: Passing more than one positional argument to + :func:`sqlite3.connect` and the :class:`sqlite3.Connection` constructor is + deprecated. The remaining parameters will become keyword-only in Python 3.15. + + Deprecate passing name, number of arguments, and the callable as keyword + arguments for the following :class:`sqlite3.Connection` APIs: + + * :meth:`~sqlite3.Connection.create_function` + * :meth:`~sqlite3.Connection.create_aggregate` + + Deprecate passing the callback callable by keyword for the following + :class:`sqlite3.Connection` APIs: + + * :meth:`~sqlite3.Connection.set_authorizer` + * :meth:`~sqlite3.Connection.set_progress_handler` + * :meth:`~sqlite3.Connection.set_trace_callback` + + The affected parameters will become positional-only in Python 3.15. - * :meth:`pathlib.PurePath.is_reserved` is deprecated and scheduled for - removal in Python 3.15. Use :func:`os.path.isreserved` to detect reserved - paths on Windows. + (Contributed by Erlend E. Aasland in :gh:`107948` and :gh:`108278`.) * :mod:`sys`: :func:`sys._enablelegacywindowsfsencoding` function. - Replace it with :envvar:`PYTHONLEGACYWINDOWSFSENCODING` environment variable. + Replace it with the :envvar:`PYTHONLEGACYWINDOWSFSENCODING` environment variable. (Contributed by Inada Naoki in :gh:`73427`.) -* :mod:`traceback`: - - * The field *exc_type* of :class:`traceback.TracebackException` is - deprecated. Use *exc_type_str* instead. +* :mod:`traceback`: The field *exc_type* of :class:`traceback.TracebackException` + is deprecated. Use *exc_type_str* instead. * :mod:`typing`: @@ -550,39 +585,6 @@ Deprecated They will be removed in Python 3.15. (Contributed by Victor Stinner in :gh:`105096`.) -* Passing more than one positional argument to :func:`sqlite3.connect` and the - :class:`sqlite3.Connection` constructor is deprecated. The remaining - parameters will become keyword-only in Python 3.15. - - Deprecate passing name, number of arguments, and the callable as keyword - arguments, for the following :class:`sqlite3.Connection` APIs: - - * :meth:`~sqlite3.Connection.create_function` - * :meth:`~sqlite3.Connection.create_aggregate` - - Deprecate passing the callback callable by keyword for the following - :class:`sqlite3.Connection` APIs: - - * :meth:`~sqlite3.Connection.set_authorizer` - * :meth:`~sqlite3.Connection.set_progress_handler` - * :meth:`~sqlite3.Connection.set_trace_callback` - - The affected parameters will become positional-only in Python 3.15. - - (Contributed by Erlend E. Aasland in :gh:`107948` and :gh:`108278`.) - -* The ``dis.HAVE_ARGUMENT`` separator is deprecated. Check membership - in :data:`~dis.hasarg` instead. - (Contributed by Irit Katriel in :gh:`109319`.) - -* Deprecate non-standard format specifier "N" for :class:`decimal.Decimal`. - It was not documented and only supported in the C implementation. - (Contributed by Serhiy Storchaka in :gh:`89902`.) - -* Emit deprecation warning for non-integer numbers in :mod:`gettext` functions - and methods that consider plural forms even if the translation was not found. - (Contributed by Serhiy Storchaka in :gh:`88434`.) - * Calling :meth:`frame.clear` on a suspended frame raises :exc:`RuntimeError` (as has always been the case for an executing frame). (Contributed by Irit Katriel in :gh:`79932`.) @@ -593,9 +595,6 @@ Deprecated coroutine. (Contributed by Irit Katriel in :gh:`81137`.) -* Deprecate undocumented :func:`!pydoc.ispackage` function. - (Contributed by Zackery Spytz in :gh:`64020`.) - Pending Removal in Python 3.14 ------------------------------ @@ -657,11 +656,11 @@ Pending Removal in Python 3.14 :func:`~multiprocessing.set_start_method` APIs to explicitly specify when your code *requires* ``'fork'``. See :ref:`multiprocessing-start-methods`. -* :mod:`pathlib`: :meth:`~pathlib.PurePath.is_relative_to`, +* :mod:`pathlib`: :meth:`~pathlib.PurePath.is_relative_to` and :meth:`~pathlib.PurePath.relative_to`: passing additional arguments is deprecated. -* :func:`pkgutil.find_loader` and :func:`pkgutil.get_loader` +* :mod:`pkgutil`: :func:`~pkgutil.find_loader` and :func:`~pkgutil.get_loader` now raise :exc:`DeprecationWarning`; use :func:`importlib.util.find_spec` instead. (Contributed by Nikita Sobolev in :gh:`97850`.) @@ -719,10 +718,16 @@ Pending Removal in Python 3.15 (Contributed by Hugo van Kemenade in :gh:`111187`.) * :mod:`pathlib`: + :meth:`pathlib.PurePath.is_reserved` is deprecated and scheduled for + removal in Python 3.15. Use :func:`os.path.isreserved` to detect reserved + paths on Windows. - * :meth:`pathlib.PurePath.is_reserved` is deprecated and scheduled for - removal in Python 3.15. Use :func:`os.path.isreserved` to detect reserved - paths on Windows. +* :mod:`threading`: + Passing any arguments to :func:`threading.RLock` is now deprecated. + C version allows any numbers of args and kwargs, + but they are just ignored. Python version does not allow any arguments. + All arguments will be removed from :func:`threading.RLock` in Python 3.15. + (Contributed by Nikita Sobolev in :gh:`102029`.) * :class:`typing.NamedTuple`: @@ -749,12 +754,6 @@ Pending Removal in Python 3.15 They will be removed in Python 3.15. (Contributed by Victor Stinner in :gh:`105096`.) -* Passing any arguments to :func:`threading.RLock` is now deprecated. - C version allows any numbers of args and kwargs, - but they are just ignored. Python version does not allow any arguments. - All arguments will be removed from :func:`threading.RLock` in Python 3.15. - (Contributed by Nikita Sobolev in :gh:`102029`.) - Pending Removal in Python 3.16 ------------------------------ @@ -801,6 +800,9 @@ although there is currently no date scheduled for their removal. :data:`calendar.FEBRUARY`. (Contributed by Prince Roshan in :gh:`103636`.) +* :attr:`codeobject.co_lnotab`: use the :meth:`codeobject.co_lines` method + instead. + * :mod:`datetime`: * :meth:`~datetime.datetime.utcnow`: @@ -836,11 +838,13 @@ although there is currently no date scheduled for their removal. underscore. (Contributed by Serhiy Storchaka in :gh:`91760`.) +* :mod:`!sre_compile`, :mod:`!sre_constants` and :mod:`!sre_parse` modules. + * :mod:`ssl` options and protocols: * :class:`ssl.SSLContext` without protocol argument is deprecated. * :class:`ssl.SSLContext`: :meth:`~ssl.SSLContext.set_npn_protocols` and - :meth:`!~ssl.SSLContext.selected_npn_protocol` are deprecated: use ALPN + :meth:`!selected_npn_protocol` are deprecated: use ALPN instead. * ``ssl.OP_NO_SSL*`` options * ``ssl.OP_NO_TLS*`` options @@ -853,13 +857,6 @@ although there is currently no date scheduled for their removal. * ``ssl.TLSVersion.TLSv1`` * ``ssl.TLSVersion.TLSv1_1`` -* :mod:`!sre_compile`, :mod:`!sre_constants` and :mod:`!sre_parse` modules. - -* :attr:`codeobject.co_lnotab`: use the :meth:`codeobject.co_lines` method - instead. - -* :class:`typing.Text` (:gh:`92332`). - * :func:`sysconfig.is_python_build` *check_home* parameter is deprecated and ignored. @@ -874,14 +871,10 @@ although there is currently no date scheduled for their removal. * :meth:`!threading.currentThread`: use :meth:`threading.current_thread`. * :meth:`!threading.activeCount`: use :meth:`threading.active_count`. -* :class:`unittest.IsolatedAsyncioTestCase`: it is deprecated to return a value - that is not None from a test case. - -* :mod:`urllib.request`: :class:`~urllib.request.URLopener` and - :class:`~urllib.request.FancyURLopener` style of invoking requests is - deprecated. Use newer :func:`~urllib.request.urlopen` functions and methods. +* :class:`typing.Text` (:gh:`92332`). -* :func:`!urllib.parse.to_bytes`. +* :class:`unittest.IsolatedAsyncioTestCase`: it is deprecated to return a value + that is not ``None`` from a test case. * :mod:`urllib.parse` deprecated functions: :func:`~urllib.parse.urlparse` instead @@ -895,6 +888,11 @@ although there is currently no date scheduled for their removal. * ``splittype()`` * ``splituser()`` * ``splitvalue()`` + * ``to_bytes()`` + +* :mod:`urllib.request`: :class:`~urllib.request.URLopener` and + :class:`~urllib.request.FancyURLopener` style of invoking requests is + deprecated. Use newer :func:`~urllib.request.urlopen` functions and methods. * :mod:`wsgiref`: ``SimpleHandler.stdout.write()`` should not do partial writes. @@ -1190,10 +1188,10 @@ Changes in the Python API * Functions :c:func:`PyDict_GetItem`, :c:func:`PyDict_GetItemString`, :c:func:`PyMapping_HasKey`, :c:func:`PyMapping_HasKeyString`, :c:func:`PyObject_HasAttr`, :c:func:`PyObject_HasAttrString`, and - :c:func:`PySys_GetObject`, which clear all errors occurred during calling - the function, report now them using :func:`sys.unraisablehook`. - You can consider to replace these functions with other functions as - recomended in the documentation. + :c:func:`PySys_GetObject`, which clear all errors which occurred when calling + them, now report them using :func:`sys.unraisablehook`. + You may replace them with other functions as + recommended in the documentation. (Contributed by Serhiy Storchaka in :gh:`106672`.) * An :exc:`OSError` is now raised by :func:`getpass.getuser` for any failure to @@ -1202,7 +1200,7 @@ Changes in the Python API * The :mod:`threading` module now expects the :mod:`!_thread` module to have an ``_is_main_interpreter`` attribute. It is a function with no - arguments that returns ``True`` if the current interpreter is the + arguments that return ``True`` if the current interpreter is the main interpreter. Any library or application that provides a custom ``_thread`` module @@ -1225,7 +1223,7 @@ Build Changes (Contributed by Erlend Aasland in :gh:`105875`.) * Python built with :file:`configure` :option:`--with-trace-refs` (tracing - references) is now ABI compatible with Python release build and + references) is now ABI compatible with the Python release build and :ref:`debug build <debug-build>`. (Contributed by Victor Stinner in :gh:`108634`.) @@ -1252,7 +1250,7 @@ New Features (Contributed by Inada Naoki in :gh:`104922`.) * The *keywords* parameter of :c:func:`PyArg_ParseTupleAndKeywords` and - :c:func:`PyArg_VaParseTupleAndKeywords` has now type :c:expr:`char * const *` + :c:func:`PyArg_VaParseTupleAndKeywords` now has type :c:expr:`char * const *` in C and :c:expr:`const char * const *` in C++, instead of :c:expr:`char **`. It makes these functions compatible with arguments of type :c:expr:`const char * const *`, :c:expr:`const char **` or @@ -1309,14 +1307,14 @@ New Features always steals a reference to the value. (Contributed by Serhiy Storchaka in :gh:`86493`.) -* Added :c:func:`PyDict_GetItemRef` and :c:func:`PyDict_GetItemStringRef` +* Add :c:func:`PyDict_GetItemRef` and :c:func:`PyDict_GetItemStringRef` functions: similar to :c:func:`PyDict_GetItemWithError` but returning a :term:`strong reference` instead of a :term:`borrowed reference`. Moreover, these functions return -1 on error and so checking ``PyErr_Occurred()`` is not needed. (Contributed by Victor Stinner in :gh:`106004`.) -* Added :c:func:`PyDict_ContainsString` function: same as +* Add :c:func:`PyDict_ContainsString` function: same as :c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*` UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`. (Contributed by Victor Stinner in :gh:`108314`.) @@ -1374,7 +1372,7 @@ New Features (Contributed by Victor Stinner in :gh:`85283`.) * Add :c:func:`PyErr_FormatUnraisable` function: similar to - :c:func:`PyErr_WriteUnraisable`, but allow to customize the warning mesage. + :c:func:`PyErr_WriteUnraisable`, but allow customizing the warning message. (Contributed by Serhiy Storchaka in :gh:`108082`.) * Add :c:func:`PyList_Extend` and :c:func:`PyList_Clear` functions: similar to @@ -1384,7 +1382,7 @@ New Features * Add :c:func:`PyDict_Pop` and :c:func:`PyDict_PopString` functions: remove a key from a dictionary and optionally return the removed value. This is similar to :meth:`dict.pop`, but without the default value and not raising - :exc:`KeyError` if the key missing. + :exc:`KeyError` if the key is missing. (Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.) * Add :c:func:`Py_HashPointer` function to hash a pointer. @@ -1497,7 +1495,7 @@ Removed ------- * Removed chained :class:`classmethod` descriptors (introduced in - :issue:`19072`). This can no longer be used to wrap other descriptors + :gh:`63272`). This can no longer be used to wrap other descriptors such as :class:`property`. The core design of this feature was flawed and caused a number of downstream problems. To "pass-through" a :class:`classmethod`, consider using the :attr:`!__wrapped__` @@ -1511,14 +1509,14 @@ Removed add ``cc @vstinner`` to the issue to notify Victor Stinner. (Contributed by Victor Stinner in :gh:`106320`.) -* Remove functions deprecated in Python 3.9. +* Remove functions deprecated in Python 3.9: * ``PyEval_CallObject()``, ``PyEval_CallObjectWithKeywords()``: use :c:func:`PyObject_CallNoArgs` or :c:func:`PyObject_Call` instead. Warning: :c:func:`PyObject_Call` positional arguments must be a - :class:`tuple` and must not be *NULL*, keyword arguments must be a - :class:`dict` or *NULL*, whereas removed functions checked arguments type - and accepted *NULL* positional and keyword arguments. + :class:`tuple` and must not be ``NULL``, keyword arguments must be a + :class:`dict` or ``NULL``, whereas removed functions checked arguments type + and accepted ``NULL`` positional and keyword arguments. To replace ``PyEval_CallObjectWithKeywords(func, NULL, kwargs)`` with :c:func:`PyObject_Call`, pass an empty tuple as positional arguments using :c:func:`PyTuple_New(0) <PyTuple_New>`. From 1f515e8a109204f7399d85b7fd806135166422d9 Mon Sep 17 00:00:00 2001 From: Eugene Toder <eltoder@users.noreply.github.com> Date: Tue, 30 Jan 2024 10:19:46 -0500 Subject: [PATCH 083/127] gh-112919: Speed-up datetime, date and time.replace() (GH-112921) Use argument clinic and call new_* functions directly. This speeds up these functions 6x to 7.5x when calling with keyword arguments. --- .../pycore_global_objects_fini_generated.h | 8 + Include/internal/pycore_global_strings.h | 8 + .../internal/pycore_runtime_init_generated.h | 8 + .../internal/pycore_unicodeobject_generated.h | 24 ++ ...-12-09-23-31-17.gh-issue-112919.S5k9QN.rst | 2 + Modules/_datetimemodule.c | 170 ++++----- Modules/clinic/_datetimemodule.c.h | 352 +++++++++++++++++- 7 files changed, 476 insertions(+), 96 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-09-23-31-17.gh-issue-112919.S5k9QN.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 57505b5388fd6c..dd09ff40f39fe6 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -876,6 +876,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(d)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(data)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(database)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(day)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decode)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(default)); @@ -945,6 +946,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fix_imports)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flush)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(follow_symlinks)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(from_param)); @@ -975,6 +977,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(headers)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hi)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hook)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hour)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(id)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ident)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(identity_hint)); @@ -1059,11 +1062,14 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(metaclass)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(metadata)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(method)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(microsecond)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(minute)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mod)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mode)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(module)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(module_globals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(modules)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(month)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mro)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(msg)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mycmp)); @@ -1168,6 +1174,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(salt)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sched_priority)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(scheduler)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(second)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(seek)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(seekable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(selectors)); @@ -1244,6 +1251,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(type)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(type_params)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tz)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tzinfo)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tzname)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(uid)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(unlink)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 0f4f3b61910241..79d6509abcdfd9 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -365,6 +365,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(d) STRUCT_FOR_ID(data) STRUCT_FOR_ID(database) + STRUCT_FOR_ID(day) STRUCT_FOR_ID(decode) STRUCT_FOR_ID(decoder) STRUCT_FOR_ID(default) @@ -434,6 +435,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(fix_imports) STRUCT_FOR_ID(flags) STRUCT_FOR_ID(flush) + STRUCT_FOR_ID(fold) STRUCT_FOR_ID(follow_symlinks) STRUCT_FOR_ID(format) STRUCT_FOR_ID(from_param) @@ -464,6 +466,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(headers) STRUCT_FOR_ID(hi) STRUCT_FOR_ID(hook) + STRUCT_FOR_ID(hour) STRUCT_FOR_ID(id) STRUCT_FOR_ID(ident) STRUCT_FOR_ID(identity_hint) @@ -548,11 +551,14 @@ struct _Py_global_strings { STRUCT_FOR_ID(metaclass) STRUCT_FOR_ID(metadata) STRUCT_FOR_ID(method) + STRUCT_FOR_ID(microsecond) + STRUCT_FOR_ID(minute) STRUCT_FOR_ID(mod) STRUCT_FOR_ID(mode) STRUCT_FOR_ID(module) STRUCT_FOR_ID(module_globals) STRUCT_FOR_ID(modules) + STRUCT_FOR_ID(month) STRUCT_FOR_ID(mro) STRUCT_FOR_ID(msg) STRUCT_FOR_ID(mycmp) @@ -657,6 +663,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(salt) STRUCT_FOR_ID(sched_priority) STRUCT_FOR_ID(scheduler) + STRUCT_FOR_ID(second) STRUCT_FOR_ID(seek) STRUCT_FOR_ID(seekable) STRUCT_FOR_ID(selectors) @@ -733,6 +740,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(type) STRUCT_FOR_ID(type_params) STRUCT_FOR_ID(tz) + STRUCT_FOR_ID(tzinfo) STRUCT_FOR_ID(tzname) STRUCT_FOR_ID(uid) STRUCT_FOR_ID(unlink) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 63a2b54c839a4b..f3c55acfb3c282 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -874,6 +874,7 @@ extern "C" { INIT_ID(d), \ INIT_ID(data), \ INIT_ID(database), \ + INIT_ID(day), \ INIT_ID(decode), \ INIT_ID(decoder), \ INIT_ID(default), \ @@ -943,6 +944,7 @@ extern "C" { INIT_ID(fix_imports), \ INIT_ID(flags), \ INIT_ID(flush), \ + INIT_ID(fold), \ INIT_ID(follow_symlinks), \ INIT_ID(format), \ INIT_ID(from_param), \ @@ -973,6 +975,7 @@ extern "C" { INIT_ID(headers), \ INIT_ID(hi), \ INIT_ID(hook), \ + INIT_ID(hour), \ INIT_ID(id), \ INIT_ID(ident), \ INIT_ID(identity_hint), \ @@ -1057,11 +1060,14 @@ extern "C" { INIT_ID(metaclass), \ INIT_ID(metadata), \ INIT_ID(method), \ + INIT_ID(microsecond), \ + INIT_ID(minute), \ INIT_ID(mod), \ INIT_ID(mode), \ INIT_ID(module), \ INIT_ID(module_globals), \ INIT_ID(modules), \ + INIT_ID(month), \ INIT_ID(mro), \ INIT_ID(msg), \ INIT_ID(mycmp), \ @@ -1166,6 +1172,7 @@ extern "C" { INIT_ID(salt), \ INIT_ID(sched_priority), \ INIT_ID(scheduler), \ + INIT_ID(second), \ INIT_ID(seek), \ INIT_ID(seekable), \ INIT_ID(selectors), \ @@ -1242,6 +1249,7 @@ extern "C" { INIT_ID(type), \ INIT_ID(type_params), \ INIT_ID(tz), \ + INIT_ID(tzinfo), \ INIT_ID(tzname), \ INIT_ID(uid), \ INIT_ID(unlink), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index bf8cdd85e4be5c..2e9572382fe033 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -936,6 +936,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(database); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(day); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(decode); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1143,6 +1146,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(flush); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(fold); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(follow_symlinks); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1233,6 +1239,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(hook); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(hour); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(id); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1485,6 +1494,12 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(method); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(microsecond); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(minute); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(mod); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1500,6 +1515,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(modules); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(month); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(mro); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1812,6 +1830,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(scheduler); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(second); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(seek); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -2040,6 +2061,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(tz); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(tzinfo); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(tzname); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Misc/NEWS.d/next/Library/2023-12-09-23-31-17.gh-issue-112919.S5k9QN.rst b/Misc/NEWS.d/next/Library/2023-12-09-23-31-17.gh-issue-112919.S5k9QN.rst new file mode 100644 index 00000000000000..3e99d480139cbe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-09-23-31-17.gh-issue-112919.S5k9QN.rst @@ -0,0 +1,2 @@ +Speed-up :func:`datetime.datetime.replace`, :func:`datetime.date.replace` and +:func:`datetime.time.replace`. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index cb5403e8461ff0..9b8e0a719d9048 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -61,16 +61,6 @@ static datetime_state _datetime_global_state; #define STATIC_STATE() (&_datetime_global_state) -/*[clinic input] -module datetime -class datetime.datetime "PyDateTime_DateTime *" "&PyDateTime_DateTimeType" -class datetime.date "PyDateTime_Date *" "&PyDateTime_DateType" -class datetime.IsoCalendarDate "PyDateTime_IsoCalendarDate *" "&PyDateTime_IsoCalendarDateType" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=81bec0fa19837f63]*/ - -#include "clinic/_datetimemodule.c.h" - /* We require that C int be at least 32 bits, and use int virtually * everywhere. In just a few cases we use a temp long, where a Python * API returns a C long. In such cases, we have to ensure that the @@ -161,6 +151,17 @@ static PyTypeObject PyDateTime_TimeZoneType; static int check_tzinfo_subclass(PyObject *p); +/*[clinic input] +module datetime +class datetime.datetime "PyDateTime_DateTime *" "&PyDateTime_DateTimeType" +class datetime.date "PyDateTime_Date *" "&PyDateTime_DateType" +class datetime.time "PyDateTime_Time *" "&PyDateTime_TimeType" +class datetime.IsoCalendarDate "PyDateTime_IsoCalendarDate *" "&PyDateTime_IsoCalendarDateType" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6f65a48dd22fa40f]*/ + +#include "clinic/_datetimemodule.c.h" + /* --------------------------------------------------------------------------- * Math utilities. @@ -3466,24 +3467,22 @@ date_timetuple(PyDateTime_Date *self, PyObject *Py_UNUSED(ignored)) 0, 0, 0, -1); } +/*[clinic input] +datetime.date.replace + + year: int(c_default="GET_YEAR(self)") = unchanged + month: int(c_default="GET_MONTH(self)") = unchanged + day: int(c_default="GET_DAY(self)") = unchanged + +Return date with new specified fields. +[clinic start generated code]*/ + static PyObject * -date_replace(PyDateTime_Date *self, PyObject *args, PyObject *kw) +datetime_date_replace_impl(PyDateTime_Date *self, int year, int month, + int day) +/*[clinic end generated code: output=2a9430d1e6318aeb input=0d1f02685b3e90f6]*/ { - PyObject *clone; - PyObject *tuple; - int year = GET_YEAR(self); - int month = GET_MONTH(self); - int day = GET_DAY(self); - - if (! PyArg_ParseTupleAndKeywords(args, kw, "|iii:replace", date_kws, - &year, &month, &day)) - return NULL; - tuple = Py_BuildValue("iii", year, month, day); - if (tuple == NULL) - return NULL; - clone = date_new(Py_TYPE(self), tuple, NULL); - Py_DECREF(tuple); - return clone; + return new_date_ex(year, month, day, Py_TYPE(self)); } static Py_hash_t @@ -3596,10 +3595,9 @@ static PyMethodDef date_methods[] = { PyDoc_STR("Return the day of the week represented by the date.\n" "Monday == 0 ... Sunday == 6")}, - {"replace", _PyCFunction_CAST(date_replace), METH_VARARGS | METH_KEYWORDS, - PyDoc_STR("Return date with new specified fields.")}, + DATETIME_DATE_REPLACE_METHODDEF - {"__replace__", _PyCFunction_CAST(date_replace), METH_VARARGS | METH_KEYWORDS}, + {"__replace__", _PyCFunction_CAST(datetime_date_replace), METH_FASTCALL | METH_KEYWORDS}, {"__reduce__", (PyCFunction)date_reduce, METH_NOARGS, PyDoc_STR("__reduce__() -> (cls, state)")}, @@ -4573,36 +4571,28 @@ time_hash(PyDateTime_Time *self) return self->hashcode; } +/*[clinic input] +datetime.time.replace + + hour: int(c_default="TIME_GET_HOUR(self)") = unchanged + minute: int(c_default="TIME_GET_MINUTE(self)") = unchanged + second: int(c_default="TIME_GET_SECOND(self)") = unchanged + microsecond: int(c_default="TIME_GET_MICROSECOND(self)") = unchanged + tzinfo: object(c_default="HASTZINFO(self) ? self->tzinfo : Py_None") = unchanged + * + fold: int(c_default="TIME_GET_FOLD(self)") = unchanged + +Return time with new specified fields. +[clinic start generated code]*/ + static PyObject * -time_replace(PyDateTime_Time *self, PyObject *args, PyObject *kw) +datetime_time_replace_impl(PyDateTime_Time *self, int hour, int minute, + int second, int microsecond, PyObject *tzinfo, + int fold) +/*[clinic end generated code: output=0b89a44c299e4f80 input=9b6a35b1e704b0ca]*/ { - PyObject *clone; - PyObject *tuple; - int hh = TIME_GET_HOUR(self); - int mm = TIME_GET_MINUTE(self); - int ss = TIME_GET_SECOND(self); - int us = TIME_GET_MICROSECOND(self); - PyObject *tzinfo = HASTZINFO(self) ? self->tzinfo : Py_None; - int fold = TIME_GET_FOLD(self); - - if (! PyArg_ParseTupleAndKeywords(args, kw, "|iiiiO$i:replace", - time_kws, - &hh, &mm, &ss, &us, &tzinfo, &fold)) - return NULL; - if (fold != 0 && fold != 1) { - PyErr_SetString(PyExc_ValueError, - "fold must be either 0 or 1"); - return NULL; - } - tuple = Py_BuildValue("iiiiO", hh, mm, ss, us, tzinfo); - if (tuple == NULL) - return NULL; - clone = time_new(Py_TYPE(self), tuple, NULL); - if (clone != NULL) { - TIME_SET_FOLD(clone, fold); - } - Py_DECREF(tuple); - return clone; + return new_time_ex2(hour, minute, second, microsecond, tzinfo, fold, + Py_TYPE(self)); } static PyObject * @@ -4732,10 +4722,9 @@ static PyMethodDef time_methods[] = { {"dst", (PyCFunction)time_dst, METH_NOARGS, PyDoc_STR("Return self.tzinfo.dst(self).")}, - {"replace", _PyCFunction_CAST(time_replace), METH_VARARGS | METH_KEYWORDS, - PyDoc_STR("Return time with new specified fields.")}, + DATETIME_TIME_REPLACE_METHODDEF - {"__replace__", _PyCFunction_CAST(time_replace), METH_VARARGS | METH_KEYWORDS}, + {"__replace__", _PyCFunction_CAST(datetime_time_replace), METH_FASTCALL | METH_KEYWORDS}, {"fromisoformat", (PyCFunction)time_fromisoformat, METH_O | METH_CLASS, PyDoc_STR("string -> time from a string in ISO 8601 format")}, @@ -6042,40 +6031,32 @@ datetime_hash(PyDateTime_DateTime *self) return self->hashcode; } +/*[clinic input] +datetime.datetime.replace + + year: int(c_default="GET_YEAR(self)") = unchanged + month: int(c_default="GET_MONTH(self)") = unchanged + day: int(c_default="GET_DAY(self)") = unchanged + hour: int(c_default="DATE_GET_HOUR(self)") = unchanged + minute: int(c_default="DATE_GET_MINUTE(self)") = unchanged + second: int(c_default="DATE_GET_SECOND(self)") = unchanged + microsecond: int(c_default="DATE_GET_MICROSECOND(self)") = unchanged + tzinfo: object(c_default="HASTZINFO(self) ? self->tzinfo : Py_None") = unchanged + * + fold: int(c_default="DATE_GET_FOLD(self)") = unchanged + +Return datetime with new specified fields. +[clinic start generated code]*/ + static PyObject * -datetime_replace(PyDateTime_DateTime *self, PyObject *args, PyObject *kw) +datetime_datetime_replace_impl(PyDateTime_DateTime *self, int year, + int month, int day, int hour, int minute, + int second, int microsecond, PyObject *tzinfo, + int fold) +/*[clinic end generated code: output=00bc96536833fddb input=9b38253d56d9bcad]*/ { - PyObject *clone; - PyObject *tuple; - int y = GET_YEAR(self); - int m = GET_MONTH(self); - int d = GET_DAY(self); - int hh = DATE_GET_HOUR(self); - int mm = DATE_GET_MINUTE(self); - int ss = DATE_GET_SECOND(self); - int us = DATE_GET_MICROSECOND(self); - PyObject *tzinfo = HASTZINFO(self) ? self->tzinfo : Py_None; - int fold = DATE_GET_FOLD(self); - - if (! PyArg_ParseTupleAndKeywords(args, kw, "|iiiiiiiO$i:replace", - datetime_kws, - &y, &m, &d, &hh, &mm, &ss, &us, - &tzinfo, &fold)) - return NULL; - if (fold != 0 && fold != 1) { - PyErr_SetString(PyExc_ValueError, - "fold must be either 0 or 1"); - return NULL; - } - tuple = Py_BuildValue("iiiiiiiO", y, m, d, hh, mm, ss, us, tzinfo); - if (tuple == NULL) - return NULL; - clone = datetime_new(Py_TYPE(self), tuple, NULL); - if (clone != NULL) { - DATE_SET_FOLD(clone, fold); - } - Py_DECREF(tuple); - return clone; + return new_datetime_ex2(year, month, day, hour, minute, second, + microsecond, tzinfo, fold, Py_TYPE(self)); } static PyObject * @@ -6597,10 +6578,9 @@ static PyMethodDef datetime_methods[] = { {"dst", (PyCFunction)datetime_dst, METH_NOARGS, PyDoc_STR("Return self.tzinfo.dst(self).")}, - {"replace", _PyCFunction_CAST(datetime_replace), METH_VARARGS | METH_KEYWORDS, - PyDoc_STR("Return datetime with new specified fields.")}, + DATETIME_DATETIME_REPLACE_METHODDEF - {"__replace__", _PyCFunction_CAST(datetime_replace), METH_VARARGS | METH_KEYWORDS}, + {"__replace__", _PyCFunction_CAST(datetime_datetime_replace), METH_FASTCALL | METH_KEYWORDS}, {"astimezone", _PyCFunction_CAST(datetime_astimezone), METH_VARARGS | METH_KEYWORDS, PyDoc_STR("tz -> convert to local time in new timezone tz\n")}, diff --git a/Modules/clinic/_datetimemodule.c.h b/Modules/clinic/_datetimemodule.c.h index 1ee50fc2a13762..48499e0aaf7783 100644 --- a/Modules/clinic/_datetimemodule.c.h +++ b/Modules/clinic/_datetimemodule.c.h @@ -82,6 +82,207 @@ iso_calendar_date_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) return return_value; } +PyDoc_STRVAR(datetime_date_replace__doc__, +"replace($self, /, year=unchanged, month=unchanged, day=unchanged)\n" +"--\n" +"\n" +"Return date with new specified fields."); + +#define DATETIME_DATE_REPLACE_METHODDEF \ + {"replace", _PyCFunction_CAST(datetime_date_replace), METH_FASTCALL|METH_KEYWORDS, datetime_date_replace__doc__}, + +static PyObject * +datetime_date_replace_impl(PyDateTime_Date *self, int year, int month, + int day); + +static PyObject * +datetime_date_replace(PyDateTime_Date *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(year), &_Py_ID(month), &_Py_ID(day), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"year", "month", "day", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "replace", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + int year = GET_YEAR(self); + int month = GET_MONTH(self); + int day = GET_DAY(self); + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 3, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + year = PyLong_AsInt(args[0]); + if (year == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[1]) { + month = PyLong_AsInt(args[1]); + if (month == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + day = PyLong_AsInt(args[2]); + if (day == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = datetime_date_replace_impl(self, year, month, day); + +exit: + return return_value; +} + +PyDoc_STRVAR(datetime_time_replace__doc__, +"replace($self, /, hour=unchanged, minute=unchanged, second=unchanged,\n" +" microsecond=unchanged, tzinfo=unchanged, *, fold=unchanged)\n" +"--\n" +"\n" +"Return time with new specified fields."); + +#define DATETIME_TIME_REPLACE_METHODDEF \ + {"replace", _PyCFunction_CAST(datetime_time_replace), METH_FASTCALL|METH_KEYWORDS, datetime_time_replace__doc__}, + +static PyObject * +datetime_time_replace_impl(PyDateTime_Time *self, int hour, int minute, + int second, int microsecond, PyObject *tzinfo, + int fold); + +static PyObject * +datetime_time_replace(PyDateTime_Time *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 6 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(hour), &_Py_ID(minute), &_Py_ID(second), &_Py_ID(microsecond), &_Py_ID(tzinfo), &_Py_ID(fold), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"hour", "minute", "second", "microsecond", "tzinfo", "fold", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "replace", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[6]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + int hour = TIME_GET_HOUR(self); + int minute = TIME_GET_MINUTE(self); + int second = TIME_GET_SECOND(self); + int microsecond = TIME_GET_MICROSECOND(self); + PyObject *tzinfo = HASTZINFO(self) ? self->tzinfo : Py_None; + int fold = TIME_GET_FOLD(self); + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 5, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + hour = PyLong_AsInt(args[0]); + if (hour == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[1]) { + minute = PyLong_AsInt(args[1]); + if (minute == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[2]) { + second = PyLong_AsInt(args[2]); + if (second == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[3]) { + microsecond = PyLong_AsInt(args[3]); + if (microsecond == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[4]) { + tzinfo = args[4]; + if (!--noptargs) { + goto skip_optional_pos; + } + } +skip_optional_pos: + if (!noptargs) { + goto skip_optional_kwonly; + } + fold = PyLong_AsInt(args[5]); + if (fold == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_kwonly: + return_value = datetime_time_replace_impl(self, hour, minute, second, microsecond, tzinfo, fold); + +exit: + return return_value; +} + PyDoc_STRVAR(datetime_datetime_now__doc__, "now($type, /, tz=None)\n" "--\n" @@ -146,4 +347,153 @@ datetime_datetime_now(PyTypeObject *type, PyObject *const *args, Py_ssize_t narg exit: return return_value; } -/*[clinic end generated code: output=562813dd3e164794 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(datetime_datetime_replace__doc__, +"replace($self, /, year=unchanged, month=unchanged, day=unchanged,\n" +" hour=unchanged, minute=unchanged, second=unchanged,\n" +" microsecond=unchanged, tzinfo=unchanged, *, fold=unchanged)\n" +"--\n" +"\n" +"Return datetime with new specified fields."); + +#define DATETIME_DATETIME_REPLACE_METHODDEF \ + {"replace", _PyCFunction_CAST(datetime_datetime_replace), METH_FASTCALL|METH_KEYWORDS, datetime_datetime_replace__doc__}, + +static PyObject * +datetime_datetime_replace_impl(PyDateTime_DateTime *self, int year, + int month, int day, int hour, int minute, + int second, int microsecond, PyObject *tzinfo, + int fold); + +static PyObject * +datetime_datetime_replace(PyDateTime_DateTime *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 9 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(year), &_Py_ID(month), &_Py_ID(day), &_Py_ID(hour), &_Py_ID(minute), &_Py_ID(second), &_Py_ID(microsecond), &_Py_ID(tzinfo), &_Py_ID(fold), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"year", "month", "day", "hour", "minute", "second", "microsecond", "tzinfo", "fold", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "replace", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[9]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + int year = GET_YEAR(self); + int month = GET_MONTH(self); + int day = GET_DAY(self); + int hour = DATE_GET_HOUR(self); + int minute = DATE_GET_MINUTE(self); + int second = DATE_GET_SECOND(self); + int microsecond = DATE_GET_MICROSECOND(self); + PyObject *tzinfo = HASTZINFO(self) ? self->tzinfo : Py_None; + int fold = DATE_GET_FOLD(self); + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 8, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + year = PyLong_AsInt(args[0]); + if (year == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[1]) { + month = PyLong_AsInt(args[1]); + if (month == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[2]) { + day = PyLong_AsInt(args[2]); + if (day == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[3]) { + hour = PyLong_AsInt(args[3]); + if (hour == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[4]) { + minute = PyLong_AsInt(args[4]); + if (minute == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[5]) { + second = PyLong_AsInt(args[5]); + if (second == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[6]) { + microsecond = PyLong_AsInt(args[6]); + if (microsecond == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[7]) { + tzinfo = args[7]; + if (!--noptargs) { + goto skip_optional_pos; + } + } +skip_optional_pos: + if (!noptargs) { + goto skip_optional_kwonly; + } + fold = PyLong_AsInt(args[8]); + if (fold == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_kwonly: + return_value = datetime_datetime_replace_impl(self, year, month, day, hour, minute, second, microsecond, tzinfo, fold); + +exit: + return return_value; +} +/*[clinic end generated code: output=c7a04b865b1e0890 input=a9049054013a1b77]*/ From 39d102c2ee8eec8ab0bacbcd62d62a72742ecc7c Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado <Pablogsal@gmail.com> Date: Tue, 30 Jan 2024 16:21:30 +0000 Subject: [PATCH 084/127] gh-113744: Add a new IncompleteInputError exception to improve incomplete input detection in the codeop module (#113745) Signed-off-by: Pablo Galindo <pablogsal@gmail.com> --- Doc/data/stable_abi.dat | 1 + Include/pyerrors.h | 1 + Lib/codeop.py | 5 +++-- Lib/test/exception_hierarchy.txt | 1 + Lib/test/test_pickle.py | 3 ++- Lib/test/test_stable_abi_ctypes.py | 1 + Misc/stable_abi.toml | 2 ++ Objects/exceptions.c | 6 ++++++ PC/python3dll.c | 1 + Parser/pegen.c | 2 +- Tools/c-analyzer/cpython/globals-to-fix.tsv | 2 ++ 11 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 811b1bd84d2417..da28a2be60bc1b 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -220,6 +220,7 @@ var,PyExc_GeneratorExit,3.2,, var,PyExc_IOError,3.2,, var,PyExc_ImportError,3.2,, var,PyExc_ImportWarning,3.2,, +var,PyExc_IncompleteInputError,3.13,, var,PyExc_IndentationError,3.2,, var,PyExc_IndexError,3.2,, var,PyExc_InterruptedError,3.7,, diff --git a/Include/pyerrors.h b/Include/pyerrors.h index 5d0028c116e2d8..68d7985dac8876 100644 --- a/Include/pyerrors.h +++ b/Include/pyerrors.h @@ -108,6 +108,7 @@ PyAPI_DATA(PyObject *) PyExc_NotImplementedError; PyAPI_DATA(PyObject *) PyExc_SyntaxError; PyAPI_DATA(PyObject *) PyExc_IndentationError; PyAPI_DATA(PyObject *) PyExc_TabError; +PyAPI_DATA(PyObject *) PyExc_IncompleteInputError; PyAPI_DATA(PyObject *) PyExc_ReferenceError; PyAPI_DATA(PyObject *) PyExc_SystemError; PyAPI_DATA(PyObject *) PyExc_SystemExit; diff --git a/Lib/codeop.py b/Lib/codeop.py index 91146be2c438e2..6ad60e7f85098d 100644 --- a/Lib/codeop.py +++ b/Lib/codeop.py @@ -65,9 +65,10 @@ def _maybe_compile(compiler, source, filename, symbol): try: compiler(source + "\n", filename, symbol) return None + except IncompleteInputError as e: + return None except SyntaxError as e: - if "incomplete input" in str(e): - return None + pass # fallthrough return compiler(source, filename, symbol, incomplete_input=False) diff --git a/Lib/test/exception_hierarchy.txt b/Lib/test/exception_hierarchy.txt index 1eca123be0fecb..217ee15d4c8af5 100644 --- a/Lib/test/exception_hierarchy.txt +++ b/Lib/test/exception_hierarchy.txt @@ -44,6 +44,7 @@ BaseException ├── StopAsyncIteration ├── StopIteration ├── SyntaxError + │ └── IncompleteInputError │ └── IndentationError │ └── TabError ├── SystemError diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index b2245ddf72f708..5e187e5189d117 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -567,7 +567,8 @@ def test_exceptions(self): RecursionError, EncodingWarning, BaseExceptionGroup, - ExceptionGroup): + ExceptionGroup, + IncompleteInputError): continue if exc is not OSError and issubclass(exc, OSError): self.assertEqual(reverse_mapping('builtins', name), diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 90d45272838420..054e7f0feb1a19 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -261,6 +261,7 @@ def test_windows_feature_macros(self): "PyExc_IOError", "PyExc_ImportError", "PyExc_ImportWarning", + "PyExc_IncompleteInputError", "PyExc_IndentationError", "PyExc_IndexError", "PyExc_InterruptedError", diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 2e6b0fff9cd770..ae19d25809ec86 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2485,3 +2485,5 @@ [function._Py_SetRefcnt] added = '3.13' abi_only = true +[data.PyExc_IncompleteInputError] + added = '3.13' diff --git a/Objects/exceptions.c b/Objects/exceptions.c index a685ed803cd02d..cff55d05163b6b 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2566,6 +2566,11 @@ MiddlingExtendsException(PyExc_SyntaxError, IndentationError, SyntaxError, MiddlingExtendsException(PyExc_IndentationError, TabError, SyntaxError, "Improper mixture of spaces and tabs."); +/* + * IncompleteInputError extends SyntaxError + */ +MiddlingExtendsException(PyExc_SyntaxError, IncompleteInputError, SyntaxError, + "incomplete input."); /* * LookupError extends Exception @@ -3635,6 +3640,7 @@ static struct static_exception static_exceptions[] = { // Level 4: Other subclasses ITEM(IndentationError), // base: SyntaxError(Exception) + ITEM(IncompleteInputError), // base: SyntaxError(Exception) ITEM(IndexError), // base: LookupError(Exception) ITEM(KeyError), // base: LookupError(Exception) ITEM(ModuleNotFoundError), // base: ImportError(Exception) diff --git a/PC/python3dll.c b/PC/python3dll.c index 07aa84c91f9fc7..09ecf98fe56ea6 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -830,6 +830,7 @@ EXPORT_DATA(PyExc_FutureWarning) EXPORT_DATA(PyExc_GeneratorExit) EXPORT_DATA(PyExc_ImportError) EXPORT_DATA(PyExc_ImportWarning) +EXPORT_DATA(PyExc_IncompleteInputError) EXPORT_DATA(PyExc_IndentationError) EXPORT_DATA(PyExc_IndexError) EXPORT_DATA(PyExc_InterruptedError) diff --git a/Parser/pegen.c b/Parser/pegen.c index 7766253a76066f..3d3e64559403b1 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -844,7 +844,7 @@ _PyPegen_run_parser(Parser *p) if (res == NULL) { if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) { PyErr_Clear(); - return RAISE_SYNTAX_ERROR("incomplete input"); + return _PyPegen_raise_error(p, PyExc_IncompleteInputError, 0, "incomplete input"); } if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { return NULL; diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index e3a1b5d532bda2..0b02ad01d39983 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -197,6 +197,7 @@ Objects/exceptions.c - _PyExc_AttributeError - Objects/exceptions.c - _PyExc_SyntaxError - Objects/exceptions.c - _PyExc_IndentationError - Objects/exceptions.c - _PyExc_TabError - +Objects/exceptions.c - _PyExc_IncompleteInputError - Objects/exceptions.c - _PyExc_LookupError - Objects/exceptions.c - _PyExc_IndexError - Objects/exceptions.c - _PyExc_KeyError - @@ -261,6 +262,7 @@ Objects/exceptions.c - PyExc_AttributeError - Objects/exceptions.c - PyExc_SyntaxError - Objects/exceptions.c - PyExc_IndentationError - Objects/exceptions.c - PyExc_TabError - +Objects/exceptions.c - PyExc_IncompleteInputError - Objects/exceptions.c - PyExc_LookupError - Objects/exceptions.c - PyExc_IndexError - Objects/exceptions.c - PyExc_KeyError - From 0990d55725cb649e74739c983b67cf08c58e8439 Mon Sep 17 00:00:00 2001 From: Dino Viehland <dinoviehland@meta.com> Date: Tue, 30 Jan 2024 09:33:36 -0800 Subject: [PATCH 085/127] gh-112075: refactor dictionary lookup functions for better re-usability (#114629) Refactor dict lookup functions to use force inline helpers --- Objects/dictobject.c | 192 +++++++++++++++++++++---------------------- 1 file changed, 95 insertions(+), 97 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c5477ab15f8dc9..23d7e9b5e38a35 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -874,11 +874,11 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index) Py_UNREACHABLE(); } -// Search non-Unicode key from Unicode table -static Py_ssize_t -unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +static inline Py_ALWAYS_INLINE Py_ssize_t +do_lookup(PyDictObject *mp, PyDictKeysObject *dk, PyObject *key, Py_hash_t hash, + Py_ssize_t (*check_lookup)(PyDictObject *, PyDictKeysObject *, void *, Py_ssize_t ix, PyObject *key, Py_hash_t)) { - PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk); + void *ep0 = _DK_ENTRIES(dk); size_t mask = DK_MASK(dk); size_t perturb = hash; size_t i = (size_t)hash & mask; @@ -886,73 +886,26 @@ unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key for (;;) { ix = dictkeys_get_index(dk, i); if (ix >= 0) { - PyDictUnicodeEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - assert(PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key) { + Py_ssize_t cmp = check_lookup(mp, dk, ep0, ix, key, hash); + if (cmp < 0) { + return cmp; + } else if (cmp) { return ix; } - if (unicode_get_hash(ep->me_key) == hash) { - PyObject *startkey = ep->me_key; - Py_INCREF(startkey); - int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) { - return DKIX_ERROR; - } - if (dk == mp->ma_keys && ep->me_key == startkey) { - if (cmp > 0) { - return ix; - } - } - else { - /* The dict was mutated, restart */ - return DKIX_KEY_CHANGED; - } - } } else if (ix == DKIX_EMPTY) { return DKIX_EMPTY; } perturb >>= PERTURB_SHIFT; i = mask & (i*5 + perturb + 1); - } - Py_UNREACHABLE(); -} -// Search Unicode key from Unicode table. -static Py_ssize_t _Py_HOT_FUNCTION -unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) -{ - PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk); - size_t mask = DK_MASK(dk); - size_t perturb = hash; - size_t i = (size_t)hash & mask; - Py_ssize_t ix; - for (;;) { - ix = dictkeys_get_index(dk, i); - if (ix >= 0) { - PyDictUnicodeEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - assert(PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key || - (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) { - return ix; - } - } - else if (ix == DKIX_EMPTY) { - return DKIX_EMPTY; - } - perturb >>= PERTURB_SHIFT; - i = mask & (i*5 + perturb + 1); // Manual loop unrolling ix = dictkeys_get_index(dk, i); if (ix >= 0) { - PyDictUnicodeEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - assert(PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key || - (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) { + Py_ssize_t cmp = check_lookup(mp, dk, ep0, ix, key, hash); + if (cmp < 0) { + return cmp; + } else if (cmp) { return ix; } } @@ -965,49 +918,94 @@ unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) Py_UNREACHABLE(); } -// Search key from Generic table. +static inline Py_ALWAYS_INLINE Py_ssize_t +compare_unicode_generic(PyDictObject *mp, PyDictKeysObject *dk, + void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) +{ + PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; + assert(ep->me_key != NULL); + assert(PyUnicode_CheckExact(ep->me_key)); + assert(!PyUnicode_CheckExact(key)); + // TODO: Thread safety + + if (unicode_get_hash(ep->me_key) == hash) { + PyObject *startkey = ep->me_key; + Py_INCREF(startkey); + int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) { + return DKIX_ERROR; + } + if (dk == mp->ma_keys && ep->me_key == startkey) { + return cmp; + } + else { + /* The dict was mutated, restart */ + return DKIX_KEY_CHANGED; + } + } + return 0; +} + +// Search non-Unicode key from Unicode table static Py_ssize_t -dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) { - PyDictKeyEntry *ep0 = DK_ENTRIES(dk); - size_t mask = DK_MASK(dk); - size_t perturb = hash; - size_t i = (size_t)hash & mask; - Py_ssize_t ix; - for (;;) { - ix = dictkeys_get_index(dk, i); - if (ix >= 0) { - PyDictKeyEntry *ep = &ep0[ix]; - assert(ep->me_key != NULL); - if (ep->me_key == key) { - return ix; - } - if (ep->me_hash == hash) { - PyObject *startkey = ep->me_key; - Py_INCREF(startkey); - int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) { - return DKIX_ERROR; - } - if (dk == mp->ma_keys && ep->me_key == startkey) { - if (cmp > 0) { - return ix; - } - } - else { - /* The dict was mutated, restart */ - return DKIX_KEY_CHANGED; - } - } + return do_lookup(mp, dk, key, hash, compare_unicode_generic); +} + +static inline Py_ALWAYS_INLINE Py_ssize_t +compare_unicode_unicode(PyDictObject *mp, PyDictKeysObject *dk, + void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) +{ + PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; + assert(ep->me_key != NULL); + assert(PyUnicode_CheckExact(ep->me_key)); + if (ep->me_key == key || + (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) { + return 1; + } + return 0; +} + +static Py_ssize_t _Py_HOT_FUNCTION +unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +{ + return do_lookup(NULL, dk, key, hash, compare_unicode_unicode); +} + +static inline Py_ALWAYS_INLINE Py_ssize_t +compare_generic(PyDictObject *mp, PyDictKeysObject *dk, + void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) +{ + PyDictKeyEntry *ep = &((PyDictKeyEntry *)ep0)[ix]; + assert(ep->me_key != NULL); + if (ep->me_key == key) { + return 1; + } + if (ep->me_hash == hash) { + PyObject *startkey = ep->me_key; + Py_INCREF(startkey); + int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) { + return DKIX_ERROR; } - else if (ix == DKIX_EMPTY) { - return DKIX_EMPTY; + if (dk == mp->ma_keys && ep->me_key == startkey) { + return cmp; + } + else { + /* The dict was mutated, restart */ + return DKIX_KEY_CHANGED; } - perturb >>= PERTURB_SHIFT; - i = mask & (i*5 + perturb + 1); } - Py_UNREACHABLE(); + return 0; +} + +static Py_ssize_t +dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) +{ + return do_lookup(mp, dk, key, hash, compare_generic); } /* Lookup a string in a (all unicode) dict keys. From a1332a99cf1eb9b879d4b1f28761b096b5749a0d Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy <tjreedy@udel.edu> Date: Tue, 30 Jan 2024 13:40:54 -0500 Subject: [PATCH 086/127] Clarify one-item tuple (#114745) A 'single tuple' means 'one typle, of whatever length. Remove the unneeded and slight distracting parenthetical 'singleton' comment. --- Doc/reference/expressions.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 87ebdc1ca1c9c6..50e0f97a6534af 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1890,8 +1890,9 @@ the unpacking. .. index:: pair: trailing; comma -The trailing comma is required only to create a single tuple (a.k.a. a -*singleton*); it is optional in all other cases. A single expression without a +A trailing comma is required only to create a one-item tuple, +such as ``1,``; it is optional in all other cases. +A single expression without a trailing comma doesn't create a tuple, but rather yields the value of that expression. (To create an empty tuple, use an empty pair of parentheses: ``()``.) From 6de8aa31f39b3d8dbfba132e6649724eb07b8348 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora <kirill.bast9@mail.ru> Date: Tue, 30 Jan 2024 21:44:09 +0300 Subject: [PATCH 087/127] ``importlib/_bootstrap.py``: Reduce size of ``_List`` instances (GH-114747) Reduce size of _List instances --- Lib/importlib/_bootstrap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index d942045f3de666..6d6292f9559253 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -53,7 +53,7 @@ def _new_module(name): # For a list that can have a weakref to it. class _List(list): - pass + __slots__ = ("__weakref__",) # Copied from weakref.py with some simplifications and modifications unique to From fda7445ca50b892955fc31bd72a3615fef1d70c6 Mon Sep 17 00:00:00 2001 From: Barney Gale <barney.gale@gmail.com> Date: Tue, 30 Jan 2024 19:52:53 +0000 Subject: [PATCH 088/127] GH-70303: Make `pathlib.Path.glob('**')` return both files and directories (#114684) Return files and directories from `pathlib.Path.glob()` if the pattern ends with `**`. This is more compatible with `PurePath.full_match()` and with other glob implementations such as bash and `glob.glob()`. Users can add a trailing slash to match only directories. In my previous patch I added a `FutureWarning` with the intention of fixing this in Python 3.15. Upon further reflection I think this was an unnecessarily cautious remedy to a clear bug. --- Doc/library/pathlib.rst | 5 ++--- Doc/whatsnew/3.13.rst | 10 +++++++++ Lib/pathlib/__init__.py | 8 ------- Lib/test/test_pathlib/test_pathlib.py | 12 ----------- Lib/test/test_pathlib/test_pathlib_abc.py | 21 +++++++++++++++++++ ...4-01-28-18-38-18.gh-issue-70303._Lt_pj.rst | 2 ++ 6 files changed, 35 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-28-18-38-18.gh-issue-70303._Lt_pj.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index f1aba793fda03e..f94b6fb3805684 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1038,9 +1038,8 @@ call fails (for example because the path doesn't exist). The *follow_symlinks* parameter was added. .. versionchanged:: 3.13 - Emits :exc:`FutureWarning` if the pattern ends with "``**``". In a - future Python release, patterns with this ending will match both files - and directories. Add a trailing slash to match only directories. + Return files and directories if *pattern* ends with "``**``". In + previous versions, only directories were returned. .. versionchanged:: 3.13 The *pattern* parameter accepts a :term:`path-like object`. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index fec1e55e0daf0e..b33203efbb05c0 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -350,6 +350,11 @@ pathlib (Contributed by Barney Gale in :gh:`77609` and :gh:`105793`, and Kamil Turek in :gh:`107962`). +* Return files and directories from :meth:`pathlib.Path.glob` and + :meth:`~pathlib.Path.rglob` when given a pattern that ends with "``**``". In + earlier versions, only directories were returned. + (Contributed by Barney Gale in :gh:`70303`). + pdb --- @@ -1211,6 +1216,11 @@ Changes in the Python API * :class:`mailbox.Maildir` now ignores files with a leading dot. (Contributed by Zackery Spytz in :gh:`65559`.) +* :meth:`pathlib.Path.glob` and :meth:`~pathlib.Path.rglob` now return both + files and directories if a pattern that ends with "``**``" is given, rather + than directories only. Users may add a trailing slash to match only + directories. + Build Changes ============= diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index cc159edab5796f..4447f98e3e8689 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -465,14 +465,6 @@ def _pattern_stack(self): elif pattern[-1] in (self.pathmod.sep, self.pathmod.altsep): # GH-65238: pathlib doesn't preserve trailing slash. Add it back. parts.append('') - elif parts[-1] == '**': - # GH-70303: '**' only matches directories. Add trailing slash. - warnings.warn( - "Pattern ending '**' will match files and directories in a " - "future Python release. Add a trailing slash to match only " - "directories and remove this warning.", - FutureWarning, 4) - parts.append('') parts.reverse() return parts diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index a8cc30ef0ab63f..3bee9b8c762b80 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1258,18 +1258,6 @@ def test_glob_above_recursion_limit(self): with set_recursion_limit(recursion_limit): list(base.glob('**/')) - def test_glob_recursive_no_trailing_slash(self): - P = self.cls - p = P(self.base) - with self.assertWarns(FutureWarning): - p.glob('**') - with self.assertWarns(FutureWarning): - p.glob('*/**') - with self.assertWarns(FutureWarning): - p.rglob('**') - with self.assertWarns(FutureWarning): - p.rglob('*/**') - def test_glob_pathlike(self): P = self.cls p = P(self.base) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 18a612f6b81bac..0e12182c162c14 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1765,16 +1765,26 @@ def _check(path, glob, expected): _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) + _check(p, "dir*/**", [ + "dirA", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", + "dirB", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", + "dirC", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE"]) _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", "dirC/", "dirC/dirD/", "dirE/"]) _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", "dirB/linkD/..", "dirA/linkC/linkD/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check(p, "dir*/*/**", [ + "dirA/linkC", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", + "dirB/linkD", "dirB/linkD/fileB", + "dirC/dirD", "dirC/dirD/fileD"]) _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", "dirB/linkD/..", "dirC/dirD/.."]) _check(p, "dir*/**/fileC", ["dirC/fileC"]) _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**", ["dirC/dirD", "dirC/dirD/fileD"]) _check(p, "*/dirD/**/", ["dirC/dirD/"]) @needs_symlinks @@ -1791,12 +1801,20 @@ def _check(path, glob, expected): _check(p, "*/fileB", ["dirB/fileB"]) _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/"]) _check(p, "dir*/*/..", ["dirC/dirD/.."]) + _check(p, "dir*/**", [ + "dirA", "dirA/linkC", + "dirB", "dirB/fileB", "dirB/linkD", + "dirC", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE"]) _check(p, "dir*/**/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) _check(p, "dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check(p, "dir*/*/**", ["dirC/dirD", "dirC/dirD/fileD"]) _check(p, "dir*/*/**/", ["dirC/dirD/"]) _check(p, "dir*/*/**/..", ["dirC/dirD/.."]) _check(p, "dir*/**/fileC", ["dirC/fileC"]) + _check(p, "dir*/*/../dirD/**", ["dirC/dirD/../dirD", "dirC/dirD/../dirD/fileD"]) _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**", ["dirC/dirD", "dirC/dirD/fileD"]) _check(p, "*/dirD/**/", ["dirC/dirD/"]) def test_rglob_common(self): @@ -1833,10 +1851,13 @@ def _check(glob, expected): "dirC/dirD", "dirC/dirD/fileD"]) _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) _check(p.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p.rglob("dir*/**"), ["dirC/dirD", "dirC/dirD/fileD"]) _check(p.rglob("dir*/**/"), ["dirC/dirD/"]) _check(p.rglob("*/*"), ["dirC/dirD/fileD"]) _check(p.rglob("*/"), ["dirC/dirD/"]) _check(p.rglob(""), ["dirC/", "dirC/dirD/"]) + _check(p.rglob("**"), [ + "dirC", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt"]) _check(p.rglob("**/"), ["dirC/", "dirC/dirD/"]) # gh-91616, a re module regression _check(p.rglob("*.txt"), ["dirC/novel.txt"]) diff --git a/Misc/NEWS.d/next/Library/2024-01-28-18-38-18.gh-issue-70303._Lt_pj.rst b/Misc/NEWS.d/next/Library/2024-01-28-18-38-18.gh-issue-70303._Lt_pj.rst new file mode 100644 index 00000000000000..dedda24b481241 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-28-18-38-18.gh-issue-70303._Lt_pj.rst @@ -0,0 +1,2 @@ +Return both files and directories from :meth:`pathlib.Path.glob` if a +pattern ends with "``**``". Previously only directories were returned. From e5e186609fdd74bc53e8478da22b76440d996baa Mon Sep 17 00:00:00 2001 From: Matt Prodani <mattp@nyu.edu> Date: Tue, 30 Jan 2024 16:22:17 -0500 Subject: [PATCH 089/127] gh-112606: Use pthread_cond_timedwait_relative_np() in parking_lot.c when available (#112616) Add a configure define for HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE_NP and replaces pthread_cond_timedwait() with pthread_cond_timedwait_relative_np() for relative time when supported in semaphore waiting logic. --- Python/parking_lot.c | 6 +++++- configure | 6 ++++++ configure.ac | 4 ++-- pyconfig.h.in | 4 ++++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Python/parking_lot.c b/Python/parking_lot.c index d44c1b4b93b4d2..c83d7443e289c5 100644 --- a/Python/parking_lot.c +++ b/Python/parking_lot.c @@ -158,11 +158,15 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, _PyTime_t timeout) if (sema->counter == 0) { if (timeout >= 0) { struct timespec ts; - +#if defined(HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE_NP) + _PyTime_AsTimespec_clamp(timeout, &ts); + err = pthread_cond_timedwait_relative_np(&sema->cond, &sema->mutex, &ts); +#else _PyTime_t deadline = _PyTime_Add(_PyTime_GetSystemClock(), timeout); _PyTime_AsTimespec_clamp(deadline, &ts); err = pthread_cond_timedwait(&sema->cond, &sema->mutex, &ts); +#endif // HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE_NP } else { err = pthread_cond_wait(&sema->cond, &sema->mutex); diff --git a/configure b/configure index adc5a8f014c795..7b2119ff7f4f78 100755 --- a/configure +++ b/configure @@ -17871,6 +17871,12 @@ if test "x$ac_cv_func_preadv2" = xyes then : printf "%s\n" "#define HAVE_PREADV2 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "pthread_cond_timedwait_relative_np" "ac_cv_func_pthread_cond_timedwait_relative_np" +if test "x$ac_cv_func_pthread_cond_timedwait_relative_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE_NP 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "pthread_condattr_setclock" "ac_cv_func_pthread_condattr_setclock" if test "x$ac_cv_func_pthread_condattr_setclock" = xyes diff --git a/configure.ac b/configure.ac index f5fa17fd8b7d0d..5bef2351c987e8 100644 --- a/configure.ac +++ b/configure.ac @@ -4796,8 +4796,8 @@ AC_CHECK_FUNCS([ \ mknod mknodat mktime mmap mremap nice openat opendir pathconf pause pipe \ pipe2 plock poll posix_fadvise posix_fallocate posix_openpt posix_spawn posix_spawnp \ posix_spawn_file_actions_addclosefrom_np \ - pread preadv preadv2 pthread_condattr_setclock pthread_init pthread_kill ptsname ptsname_r \ - pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ + pread preadv preadv2 pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ + pthread_kill ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ rtpSpawn sched_get_priority_max sched_rr_get_interval sched_setaffinity \ sched_setparam sched_setscheduler sem_clockwait sem_getvalue sem_open \ sem_timedwait sem_unlink sendfile setegid seteuid setgid sethostname \ diff --git a/pyconfig.h.in b/pyconfig.h.in index 02e33c7007196d..b22740710bcbee 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -936,6 +936,10 @@ /* Define to 1 if you have the `pthread_condattr_setclock' function. */ #undef HAVE_PTHREAD_CONDATTR_SETCLOCK +/* Define to 1 if you have the `pthread_cond_timedwait_relative_np' function. + */ +#undef HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE_NP + /* Defined for Solaris 2.6 bug in pthread header. */ #undef HAVE_PTHREAD_DESTRUCTOR From 3911b42cc0d404e0eac87fce30b740b08618ff06 Mon Sep 17 00:00:00 2001 From: Skip Montanaro <skip.montanaro@gmail.com> Date: Tue, 30 Jan 2024 15:54:37 -0600 Subject: [PATCH 090/127] gh-101100: Fix references in csv docs (GH-114658) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/csv.rst | 14 +++++++------- Doc/tools/.nitignore | 1 - 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 07f38f5690bb54..66888c22b7cc28 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -88,7 +88,7 @@ The :mod:`csv` module defines the following functions: Return a writer object responsible for converting the user's data into delimited strings on the given file-like object. *csvfile* can be any object with a - :func:`write` method. If *csvfile* is a file object, it should be opened with + :meth:`~io.TextIOBase.write` method. If *csvfile* is a file object, it should be opened with ``newline=''`` [1]_. An optional *dialect* parameter can be given which is used to define a set of parameters specific to a particular CSV dialect. It may be an instance of a subclass of the @@ -197,10 +197,10 @@ The :mod:`csv` module defines the following classes: Create an object which operates like a regular writer but maps dictionaries onto output rows. The *fieldnames* parameter is a :mod:`sequence <collections.abc>` of keys that identify the order in which values in the - dictionary passed to the :meth:`writerow` method are written to file + dictionary passed to the :meth:`~csvwriter.writerow` method are written to file *f*. The optional *restval* parameter specifies the value to be written if the dictionary is missing a key in *fieldnames*. If the - dictionary passed to the :meth:`writerow` method contains a key not found in + dictionary passed to the :meth:`~csvwriter.writerow` method contains a key not found in *fieldnames*, the optional *extrasaction* parameter indicates what action to take. If it is set to ``'raise'``, the default value, a :exc:`ValueError` @@ -374,8 +374,8 @@ Dialects and Formatting Parameters To make it easier to specify the format of input and output records, specific formatting parameters are grouped together into dialects. A dialect is a -subclass of the :class:`Dialect` class having a set of specific methods and a -single :meth:`validate` method. When creating :class:`reader` or +subclass of the :class:`Dialect` class containing various attributes +describing the format of the CSV file. When creating :class:`reader` or :class:`writer` objects, the programmer can specify a string or a subclass of the :class:`Dialect` class as the dialect parameter. In addition to, or instead of, the *dialect* parameter, the programmer can also specify individual @@ -492,9 +492,9 @@ DictReader objects have the following public attribute: Writer Objects -------------- -:class:`Writer` objects (:class:`DictWriter` instances and objects returned by +:class:`writer` objects (:class:`DictWriter` instances and objects returned by the :func:`writer` function) have the following public methods. A *row* must be -an iterable of strings or numbers for :class:`Writer` objects and a dictionary +an iterable of strings or numbers for :class:`writer` objects and a dictionary mapping fieldnames to strings or numbers (by passing them through :func:`str` first) for :class:`DictWriter` objects. Note that complex numbers are written out surrounded by parens. This may cause some problems for other programs which diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index bba4fe0d5f2425..7eacb46d6299b3 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -25,7 +25,6 @@ Doc/library/asyncio-policy.rst Doc/library/asyncio-subprocess.rst Doc/library/bdb.rst Doc/library/collections.rst -Doc/library/csv.rst Doc/library/dbm.rst Doc/library/decimal.rst Doc/library/email.charset.rst From 348a72ce3fda91de5e9ac1bf6b8c4387ec3007a5 Mon Sep 17 00:00:00 2001 From: Brandt Bucher <brandtbucher@microsoft.com> Date: Tue, 30 Jan 2024 14:08:53 -0800 Subject: [PATCH 091/127] GH-113464: Add aarch64-apple-darwin/clang to JIT CI (GH-114759) --- .github/workflows/jit.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index e137fd21b0a0dd..3da72919181255 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -16,6 +16,7 @@ jobs: - i686-pc-windows-msvc/msvc - x86_64-pc-windows-msvc/msvc - x86_64-apple-darwin/clang + - aarch64-apple-darwin/clang - x86_64-unknown-linux-gnu/gcc - x86_64-unknown-linux-gnu/clang - aarch64-unknown-linux-gnu/gcc @@ -36,9 +37,12 @@ jobs: compiler: msvc - target: x86_64-apple-darwin/clang architecture: x86_64 - runner: macos-latest + runner: macos-13 + compiler: clang + - target: aarch64-apple-darwin/clang + architecture: aarch64 + runner: macos-14 compiler: clang - exclude: test_embed - target: x86_64-unknown-linux-gnu/gcc architecture: x86_64 runner: ubuntu-latest @@ -80,7 +84,7 @@ jobs: brew install llvm@${{ matrix.llvm }} export SDKROOT="$(xcrun --show-sdk-path)" ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} - make all --jobs 3 + make all --jobs 4 ./python.exe -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 - name: Native Linux @@ -91,6 +95,7 @@ jobs: ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} make all --jobs 4 ./python -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 + - name: Emulated Linux if: runner.os == 'Linux' && matrix.architecture != 'x86_64' run: | From dc4cd2c9ba60e2ee7e534e2f6e93c4c135df23b9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka <storchaka@gmail.com> Date: Wed, 31 Jan 2024 00:15:33 +0200 Subject: [PATCH 092/127] gh-106392: Fix inconsistency in deprecation warnings in datetime module (GH-114761) --- Lib/_pydatetime.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index 355145387e355b..54c12d3b2f3f16 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -1809,7 +1809,7 @@ def fromtimestamp(cls, timestamp, tz=None): def utcfromtimestamp(cls, t): """Construct a naive UTC datetime from a POSIX timestamp.""" import warnings - warnings.warn("datetime.utcfromtimestamp() is deprecated and scheduled " + warnings.warn("datetime.datetime.utcfromtimestamp() is deprecated and scheduled " "for removal in a future version. Use timezone-aware " "objects to represent datetimes in UTC: " "datetime.datetime.fromtimestamp(t, datetime.UTC).", @@ -1827,8 +1827,8 @@ def now(cls, tz=None): def utcnow(cls): "Construct a UTC datetime from time.time()." import warnings - warnings.warn("datetime.utcnow() is deprecated and scheduled for " - "removal in a future version. Instead, Use timezone-aware " + warnings.warn("datetime.datetime.utcnow() is deprecated and scheduled for " + "removal in a future version. Use timezone-aware " "objects to represent datetimes in UTC: " "datetime.datetime.now(datetime.UTC).", DeprecationWarning, From a06b606462740058b5d52fefdcdcd679d4f40260 Mon Sep 17 00:00:00 2001 From: Diego Russo <diego.russo@arm.com> Date: Tue, 30 Jan 2024 23:53:04 +0000 Subject: [PATCH 093/127] gh-110190: Fix ctypes structs with array on Windows ARM64 (GH-114753) --- .../next/Library/2024-01-30-15-34-08.gh-issue-110190.Z5PQQX.rst | 1 + Modules/_ctypes/stgdict.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-30-15-34-08.gh-issue-110190.Z5PQQX.rst diff --git a/Misc/NEWS.d/next/Library/2024-01-30-15-34-08.gh-issue-110190.Z5PQQX.rst b/Misc/NEWS.d/next/Library/2024-01-30-15-34-08.gh-issue-110190.Z5PQQX.rst new file mode 100644 index 00000000000000..af77e409963e04 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-30-15-34-08.gh-issue-110190.Z5PQQX.rst @@ -0,0 +1 @@ +Fix ctypes structs with array on Windows ARM64 platform by setting ``MAX_STRUCT_SIZE`` to 32 in stgdict. Patch by Diego Russo diff --git a/Modules/_ctypes/stgdict.c b/Modules/_ctypes/stgdict.c index 2397015ba65889..deafa696fdd0d0 100644 --- a/Modules/_ctypes/stgdict.c +++ b/Modules/_ctypes/stgdict.c @@ -707,7 +707,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct /* * The value of MAX_STRUCT_SIZE depends on the platform Python is running on. */ -#if defined(__aarch64__) || defined(__arm__) +#if defined(__aarch64__) || defined(__arm__) || defined(_M_ARM64) # define MAX_STRUCT_SIZE 32 #elif defined(__powerpc64__) # define MAX_STRUCT_SIZE 64 From 1667c2868633a1091b3519594103ca7662d64d75 Mon Sep 17 00:00:00 2001 From: Barney Gale <barney.gale@gmail.com> Date: Wed, 31 Jan 2024 00:38:01 +0000 Subject: [PATCH 094/127] pathlib ABCs: raise `UnsupportedOperation` directly. (#114776) Raise `UnsupportedOperation` directly, rather than via an `_unsupported()` helper, to give human readers and IDEs/typecheckers/etc a bigger hint that these methods are abstract. --- Lib/pathlib/__init__.py | 5 ++-- Lib/pathlib/_abc.py | 59 ++++++++++++++++++++--------------------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 4447f98e3e8689..65ce836765c42b 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -514,9 +514,8 @@ class Path(_abc.PathBase, PurePath): as_uri = PurePath.as_uri @classmethod - def _unsupported(cls, method_name): - msg = f"{cls.__name__}.{method_name}() is unsupported on this system" - raise UnsupportedOperation(msg) + def _unsupported_msg(cls, attribute): + return f"{cls.__name__}.{attribute} is unsupported on this system" def __init__(self, *args, **kwargs): if kwargs: diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 580d631cbf3b53..85884bc4b4cb47 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -149,39 +149,39 @@ class PathModuleBase: """ @classmethod - def _unsupported(cls, attr): - raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported") + def _unsupported_msg(cls, attribute): + return f"{cls.__name__}.{attribute} is unsupported" @property def sep(self): """The character used to separate path components.""" - self._unsupported('sep') + raise UnsupportedOperation(self._unsupported_msg('sep')) def join(self, path, *paths): """Join path segments.""" - self._unsupported('join()') + raise UnsupportedOperation(self._unsupported_msg('join()')) def split(self, path): """Split the path into a pair (head, tail), where *head* is everything before the final path separator, and *tail* is everything after. Either part may be empty. """ - self._unsupported('split()') + raise UnsupportedOperation(self._unsupported_msg('split()')) def splitdrive(self, path): """Split the path into a 2-item tuple (drive, tail), where *drive* is a device name or mount point, and *tail* is everything after the drive. Either part may be empty.""" - self._unsupported('splitdrive()') + raise UnsupportedOperation(self._unsupported_msg('splitdrive()')) def normcase(self, path): """Normalize the case of the path.""" - self._unsupported('normcase()') + raise UnsupportedOperation(self._unsupported_msg('normcase()')) def isabs(self, path): """Returns whether the path is absolute, i.e. unaffected by the current directory or drive.""" - self._unsupported('isabs()') + raise UnsupportedOperation(self._unsupported_msg('isabs()')) class PurePathBase: @@ -505,16 +505,15 @@ class PathBase(PurePathBase): _max_symlinks = 40 @classmethod - def _unsupported(cls, method_name): - msg = f"{cls.__name__}.{method_name}() is unsupported" - raise UnsupportedOperation(msg) + def _unsupported_msg(cls, attribute): + return f"{cls.__name__}.{attribute} is unsupported" def stat(self, *, follow_symlinks=True): """ Return the result of the stat() system call on this path, like os.stat() does. """ - self._unsupported("stat") + raise UnsupportedOperation(self._unsupported_msg('stat()')) def lstat(self): """ @@ -703,7 +702,7 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed by this path and return a file object, as the built-in open() function does. """ - self._unsupported("open") + raise UnsupportedOperation(self._unsupported_msg('open()')) def read_bytes(self): """ @@ -744,7 +743,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - self._unsupported("iterdir") + raise UnsupportedOperation(self._unsupported_msg('iterdir()')) def _scandir(self): # Emulate os.scandir(), which returns an object that can be used as a @@ -871,7 +870,7 @@ def absolute(self): Use resolve() to resolve symlinks and remove '..' segments. """ - self._unsupported("absolute") + raise UnsupportedOperation(self._unsupported_msg('absolute()')) @classmethod def cwd(cls): @@ -886,7 +885,7 @@ def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ - self._unsupported("expanduser") + raise UnsupportedOperation(self._unsupported_msg('expanduser()')) @classmethod def home(cls): @@ -898,7 +897,7 @@ def readlink(self): """ Return the path to which the symbolic link points. """ - self._unsupported("readlink") + raise UnsupportedOperation(self._unsupported_msg('readlink()')) readlink._supported = False def resolve(self, strict=False): @@ -973,7 +972,7 @@ def symlink_to(self, target, target_is_directory=False): Make this path a symlink pointing to the target path. Note the order of arguments (link, target) is the reverse of os.symlink. """ - self._unsupported("symlink_to") + raise UnsupportedOperation(self._unsupported_msg('symlink_to()')) def hardlink_to(self, target): """ @@ -981,19 +980,19 @@ def hardlink_to(self, target): Note the order of arguments (self, target) is the reverse of os.link's. """ - self._unsupported("hardlink_to") + raise UnsupportedOperation(self._unsupported_msg('hardlink_to()')) def touch(self, mode=0o666, exist_ok=True): """ Create this file with the given access mode, if it doesn't exist. """ - self._unsupported("touch") + raise UnsupportedOperation(self._unsupported_msg('touch()')) def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ Create a new directory at this given path. """ - self._unsupported("mkdir") + raise UnsupportedOperation(self._unsupported_msg('mkdir()')) def rename(self, target): """ @@ -1005,7 +1004,7 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ - self._unsupported("rename") + raise UnsupportedOperation(self._unsupported_msg('rename()')) def replace(self, target): """ @@ -1017,13 +1016,13 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ - self._unsupported("replace") + raise UnsupportedOperation(self._unsupported_msg('replace()')) def chmod(self, mode, *, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). """ - self._unsupported("chmod") + raise UnsupportedOperation(self._unsupported_msg('chmod()')) def lchmod(self, mode): """ @@ -1037,31 +1036,31 @@ def unlink(self, missing_ok=False): Remove this file or link. If the path is a directory, use rmdir() instead. """ - self._unsupported("unlink") + raise UnsupportedOperation(self._unsupported_msg('unlink()')) def rmdir(self): """ Remove this directory. The directory must be empty. """ - self._unsupported("rmdir") + raise UnsupportedOperation(self._unsupported_msg('rmdir()')) def owner(self, *, follow_symlinks=True): """ Return the login name of the file owner. """ - self._unsupported("owner") + raise UnsupportedOperation(self._unsupported_msg('owner()')) def group(self, *, follow_symlinks=True): """ Return the group name of the file gid. """ - self._unsupported("group") + raise UnsupportedOperation(self._unsupported_msg('group()')) @classmethod def from_uri(cls, uri): """Return a new path from the given 'file' URI.""" - cls._unsupported("from_uri") + raise UnsupportedOperation(cls._unsupported_msg('from_uri()')) def as_uri(self): """Return the path as a URI.""" - self._unsupported("as_uri") + raise UnsupportedOperation(self._unsupported_msg('as_uri()')) From 574291963f6b0eb7da3fde1ae9763236e7ece306 Mon Sep 17 00:00:00 2001 From: Barney Gale <barney.gale@gmail.com> Date: Wed, 31 Jan 2024 00:59:33 +0000 Subject: [PATCH 095/127] pathlib ABCs: drop partial, broken, untested support for `bytes` paths. (#114777) Methods like `full_match()`, `glob()`, etc, are difficult to make work with byte paths, and it's not worth the effort. This patch makes `PurePathBase` raise `TypeError` when given non-`str` path segments. --- Lib/pathlib/_abc.py | 7 +++---- Lib/test/test_pathlib/test_pathlib.py | 18 +--------------- Lib/test/test_pathlib/test_pathlib_abc.py | 25 +++++++++++++++++++++++ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 85884bc4b4cb47..91f5cd6c01e9d0 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -207,6 +207,9 @@ class PurePathBase: def __init__(self, path, *paths): self._raw_path = self.pathmod.join(path, *paths) if paths else path + if not isinstance(self._raw_path, str): + raise TypeError( + f"path should be a str, not {type(self._raw_path).__name__!r}") self._resolving = False def with_segments(self, *pathsegments): @@ -321,8 +324,6 @@ def relative_to(self, other, *, walk_up=False): other = self.with_segments(other) anchor0, parts0 = self._stack anchor1, parts1 = other._stack - if isinstance(anchor0, str) != isinstance(anchor1, str): - raise TypeError(f"{self._raw_path!r} and {other._raw_path!r} have different types") if anchor0 != anchor1: raise ValueError(f"{self._raw_path!r} and {other._raw_path!r} have different anchors") while parts0 and parts1 and parts0[-1] == parts1[-1]: @@ -346,8 +347,6 @@ def is_relative_to(self, other): other = self.with_segments(other) anchor0, parts0 = self._stack anchor1, parts1 = other._stack - if isinstance(anchor0, str) != isinstance(anchor1, str): - raise TypeError(f"{self._raw_path!r} and {other._raw_path!r} have different types") if anchor0 != anchor1: return False while parts0 and parts1 and parts0[-1] == parts1[-1]: diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 3bee9b8c762b80..2b166451243775 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -189,7 +189,7 @@ def test_fspath_common(self): self._check_str(p.__fspath__(), ('a/b',)) self._check_str(os.fspath(p), ('a/b',)) - def test_bytes(self): + def test_bytes_exc_message(self): P = self.cls message = (r"argument should be a str or an os\.PathLike object " r"where __fspath__ returns a str, not 'bytes'") @@ -199,22 +199,6 @@ def test_bytes(self): P(b'a', 'b') with self.assertRaisesRegex(TypeError, message): P('a', b'b') - with self.assertRaises(TypeError): - P('a').joinpath(b'b') - with self.assertRaises(TypeError): - P('a') / b'b' - with self.assertRaises(TypeError): - b'a' / P('b') - with self.assertRaises(TypeError): - P('a').match(b'b') - with self.assertRaises(TypeError): - P('a').relative_to(b'b') - with self.assertRaises(TypeError): - P('a').with_name(b'b') - with self.assertRaises(TypeError): - P('a').with_stem(b'b') - with self.assertRaises(TypeError): - P('a').with_suffix(b'b') def test_as_bytes_common(self): sep = os.fsencode(self.sep) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 0e12182c162c14..207579ccbf443b 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -155,6 +155,31 @@ def test_constructor_common(self): P('a/b/c') P('/a/b/c') + def test_bytes(self): + P = self.cls + with self.assertRaises(TypeError): + P(b'a') + with self.assertRaises(TypeError): + P(b'a', 'b') + with self.assertRaises(TypeError): + P('a', b'b') + with self.assertRaises(TypeError): + P('a').joinpath(b'b') + with self.assertRaises(TypeError): + P('a') / b'b' + with self.assertRaises(TypeError): + b'a' / P('b') + with self.assertRaises(TypeError): + P('a').match(b'b') + with self.assertRaises(TypeError): + P('a').relative_to(b'b') + with self.assertRaises(TypeError): + P('a').with_name(b'b') + with self.assertRaises(TypeError): + P('a').with_stem(b'b') + with self.assertRaises(TypeError): + P('a').with_suffix(b'b') + def _check_str_subclass(self, *args): # Issue #21127: it should be possible to construct a PurePath object # from a str subclass instance, and it then gets converted to From 2ed8f924ee05ec17ce0639a424e5ca7661b09a6b Mon Sep 17 00:00:00 2001 From: Brett Cannon <brett@python.org> Date: Tue, 30 Jan 2024 17:49:27 -0800 Subject: [PATCH 096/127] GH-114743: Set a low recursion limit for `test_main_recursion_error()` in `test_runpy` (GH-114772) This can fail under a debug build of WASI when directly executing test.test_runpy. --- .gitignore | 2 +- Lib/test/test_runpy.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 18eb2a9f0632ce..6ed7197e3ab626 100644 --- a/.gitignore +++ b/.gitignore @@ -159,5 +159,5 @@ Python/frozen_modules/MANIFEST /python !/Python/ -# main branch only: ABI files are not checked/maintained +# main branch only: ABI files are not checked/maintained. Doc/data/python*.abi diff --git a/Lib/test/test_runpy.py b/Lib/test/test_runpy.py index 57fe859e366b5b..9d76764c75be3e 100644 --- a/Lib/test/test_runpy.py +++ b/Lib/test/test_runpy.py @@ -12,7 +12,8 @@ import textwrap import unittest import warnings -from test.support import no_tracing, verbose, requires_subprocess, requires_resource +from test.support import (infinite_recursion, no_tracing, verbose, + requires_subprocess, requires_resource) from test.support.import_helper import forget, make_legacy_pyc, unload from test.support.os_helper import create_empty_file, temp_dir from test.support.script_helper import make_script, make_zip_script @@ -743,7 +744,8 @@ def test_main_recursion_error(self): "runpy.run_path(%r)\n") % dummy_dir script_name = self._make_test_script(script_dir, mod_name, source) zip_name, fname = make_zip_script(script_dir, 'test_zip', script_name) - self.assertRaises(RecursionError, run_path, zip_name) + with infinite_recursion(25): + self.assertRaises(RecursionError, run_path, zip_name) def test_encoding(self): with temp_dir() as script_dir: From c8cf5d7d148944f2850f25b02334400dd0238cb0 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" <erlend@python.org> Date: Wed, 31 Jan 2024 07:59:34 +0100 Subject: [PATCH 097/127] Docs: mark up dbm.gnu.open() and dbm.ndbm.open() using param list (#114762) --- Doc/library/dbm.rst | 117 +++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 61 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 076e86143d06a6..9bb5e5f8950956 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -52,6 +52,10 @@ the Oracle Berkeley DB. .. |flag_n| replace:: Always create a new, empty database, open for reading and writing. +.. |mode_param_doc| replace:: + The Unix file access mode of the file (default: octal ``0o666``), + used only when the database has to be created. + .. |incompat_note| replace:: The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are incompatible and can not be used interchangeably. @@ -69,14 +73,13 @@ the Oracle Berkeley DB. :type file: :term:`path-like object` :param str flag: - * ``'r'`` (default), |flag_r| - * ``'w'``, |flag_w| - * ``'c'``, |flag_c| - * ``'n'``, |flag_n| + * ``'r'`` (default): |flag_r| + * ``'w'``: |flag_w| + * ``'c'``: |flag_c| + * ``'n'``: |flag_n| :param int mode: - The Unix file access mode of the file (default: octal ``0o666``), - used only when the database has to be created. + |mode_param_doc| .. versionchanged:: 3.11 *file* accepts a :term:`path-like object`. @@ -171,47 +174,45 @@ and the :meth:`!items` and :meth:`!values` methods are not supported. .. function:: open(filename, flag="r", mode=0o666, /) - Open a GDBM database and return a :class:`!gdbm` object. The *filename* - argument is the name of the database file. - - The optional *flag* argument can be: + Open a GDBM database and return a :class:`!gdbm` object. - .. csv-table:: - :header: "Value", "Meaning" + :param filename: + The database file to open. + :type filename: :term:`path-like object` - ``'r'`` (default), |flag_r| - ``'w'``, |flag_w| - ``'c'``, |flag_c| - ``'n'``, |flag_n| + :param str flag: + * ``'r'`` (default): |flag_r| + * ``'w'``: |flag_w| + * ``'c'``: |flag_c| + * ``'n'``: |flag_n| - The following additional characters may be appended to the flag to control - how the database is opened: + The following additional characters may be appended + to control how the database is opened: - +---------+--------------------------------------------+ - | Value | Meaning | - +=========+============================================+ - | ``'f'`` | Open the database in fast mode. Writes | - | | to the database will not be synchronized. | - +---------+--------------------------------------------+ - | ``'s'`` | Synchronized mode. This will cause changes | - | | to the database to be immediately written | - | | to the file. | - +---------+--------------------------------------------+ - | ``'u'`` | Do not lock database. | - +---------+--------------------------------------------+ + * ``'f'``: Open the database in fast mode. + Writes to the database will not be synchronized. + * ``'s'``: Synchronized mode. + Changes to the database will be written immediately to the file. + * ``'u'``: Do not lock database. - Not all flags are valid for all versions of GDBM. The module constant - :const:`open_flags` is a string of supported flag characters. The exception - :exc:`error` is raised if an invalid flag is specified. + Not all flags are valid for all versions of GDBM. + See the :data:`open_flags` member for a list of supported flag characters. - The optional *mode* argument is the Unix mode of the file, used only when the - database has to be created. It defaults to octal ``0o666``. + :param int mode: + |mode_param_doc| - In addition to the dictionary-like methods, :class:`gdbm` objects have the - following methods: + :raises error: + If an invalid *flag* argument is passed. .. versionchanged:: 3.11 - Accepts :term:`path-like object` for filename. + *filename* accepts a :term:`path-like object`. + + .. data:: open_flags + + A string of characters the *flag* parameter of :meth:`~dbm.gnu.open` supports. + + In addition to the dictionary-like methods, :class:`gdbm` objects have the + following methods and attributes: .. method:: gdbm.firstkey() @@ -298,22 +299,20 @@ This module can be used with the "classic" NDBM interface or the .. function:: open(filename, flag="r", mode=0o666, /) Open an NDBM database and return an :class:`!ndbm` object. - The *filename* argument is the name of the database file - (without the :file:`.dir` or :file:`.pag` extensions). - - The optional *flag* argument must be one of these values: - .. csv-table:: - :header: "Value", "Meaning" + :param filename: + The basename of the database file + (without the :file:`.dir` or :file:`.pag` extensions). + :type filename: :term:`path-like object` - ``'r'`` (default), |flag_r| - ``'w'``, |flag_w| - ``'c'``, |flag_c| - ``'n'``, |flag_n| + :param str flag: + * ``'r'`` (default): |flag_r| + * ``'w'``: |flag_w| + * ``'c'``: |flag_c| + * ``'n'``: |flag_n| - The optional *mode* argument is the Unix mode of the file, used only when the - database has to be created. It defaults to octal ``0o666`` (and will be - modified by the prevailing umask). + :param int mode: + |mode_param_doc| In addition to the dictionary-like methods, :class:`!ndbm` objects provide the following method: @@ -382,17 +381,13 @@ The :mod:`!dbm.dumb` module defines the following: :type database: :term:`path-like object` :param str flag: - .. csv-table:: - :header: "Value", "Meaning" - - ``'r'``, |flag_r| - ``'w'``, |flag_w| - ``'c'`` (default), |flag_c| - ``'n'``, |flag_n| + * ``'r'``: |flag_r| + * ``'w'``: |flag_w| + * ``'c'`` (default): |flag_c| + * ``'n'``: |flag_n| :param int mode: - The Unix file access mode of the file (default: ``0o666``), - used only when the database has to be created. + |mode_param_doc| .. warning:: It is possible to crash the Python interpreter when loading a database @@ -400,7 +395,7 @@ The :mod:`!dbm.dumb` module defines the following: Python's AST compiler. .. versionchanged:: 3.5 - :func:`open` always creates a new database when *flag* is ``'n'``. + :func:`~dbm.dumb.open` always creates a new database when *flag* is ``'n'``. .. versionchanged:: 3.8 A database opened read-only if *flag* is ``'r'``. From 5e390a0fc825f21952beb158e2bda3c5e007fac9 Mon Sep 17 00:00:00 2001 From: Daniel Hollas <danekhollas@gmail.com> Date: Wed, 31 Jan 2024 09:29:44 +0000 Subject: [PATCH 098/127] gh-109653: Speedup import of threading module (#114509) Avoiding an import of functools leads to 50% speedup of import time. Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com> --- Lib/threading.py | 4 +--- .../Library/2024-01-23-23-13-47.gh-issue-109653.KLBHmT.rst | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-23-23-13-47.gh-issue-109653.KLBHmT.rst diff --git a/Lib/threading.py b/Lib/threading.py index 00b95f8d92a1f0..75a08e5aac97d6 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -3,7 +3,6 @@ import os as _os import sys as _sys import _thread -import functools import warnings from time import monotonic as _time @@ -1630,8 +1629,7 @@ def _register_atexit(func, *arg, **kwargs): if _SHUTTING_DOWN: raise RuntimeError("can't register atexit after shutdown") - call = functools.partial(func, *arg, **kwargs) - _threading_atexits.append(call) + _threading_atexits.append(lambda: func(*arg, **kwargs)) from _thread import stack_size diff --git a/Misc/NEWS.d/next/Library/2024-01-23-23-13-47.gh-issue-109653.KLBHmT.rst b/Misc/NEWS.d/next/Library/2024-01-23-23-13-47.gh-issue-109653.KLBHmT.rst new file mode 100644 index 00000000000000..76074df9c76fa6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-23-23-13-47.gh-issue-109653.KLBHmT.rst @@ -0,0 +1 @@ +Reduce the import time of :mod:`threading` module by ~50%. Patch by Daniel Hollas. From 7a93db44257c0404dc407ff2ddc997f4bb8890ed Mon Sep 17 00:00:00 2001 From: Skip Montanaro <skip.montanaro@gmail.com> Date: Wed, 31 Jan 2024 03:33:10 -0600 Subject: [PATCH 099/127] gh-101100: Fix class reference in library/test.rst (GH-114769) The text clearly seems to be referencing `TestFuncAcceptsSequencesMixin`, for which no target is available. Name the class properly and suppress the dangling reference. --- Doc/library/test.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/test.rst b/Doc/library/test.rst index 9173db07fd0071..cad1023021a512 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -143,7 +143,7 @@ guidelines to be followed: arg = (1, 2, 3) When using this pattern, remember that all classes that inherit from - :class:`unittest.TestCase` are run as tests. The :class:`Mixin` class in the example above + :class:`unittest.TestCase` are run as tests. The :class:`!TestFuncAcceptsSequencesMixin` class in the example above does not have any data and so can't be run by itself, thus it does not inherit from :class:`unittest.TestCase`. From b7688ef71eddcaf14f71b1c22ff2f164f34b2c74 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka <storchaka@gmail.com> Date: Wed, 31 Jan 2024 13:11:35 +0200 Subject: [PATCH 100/127] gh-114685: Check flags in PyObject_GetBuffer() (GH-114707) PyObject_GetBuffer() now raises a SystemError if called with PyBUF_READ or PyBUF_WRITE as flags. These flags should only be used with the PyMemoryView_* C API. --- Lib/test/test_buffer.py | 6 ++++++ .../C API/2024-01-29-12-13-24.gh-issue-114685.B07RME.rst | 3 +++ Modules/_testcapi/buffer.c | 6 ++++-- Objects/abstract.c | 6 ++++++ 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-01-29-12-13-24.gh-issue-114685.B07RME.rst diff --git a/Lib/test/test_buffer.py b/Lib/test/test_buffer.py index 72a06d6af450e3..535b795f508a24 100644 --- a/Lib/test/test_buffer.py +++ b/Lib/test/test_buffer.py @@ -4585,6 +4585,12 @@ def test_c_buffer(self): buf.__release_buffer__(mv) self.assertEqual(buf.references, 0) + @unittest.skipIf(_testcapi is None, "requires _testcapi") + def test_c_buffer_invalid_flags(self): + buf = _testcapi.testBuf() + self.assertRaises(SystemError, buf.__buffer__, PyBUF_READ) + self.assertRaises(SystemError, buf.__buffer__, PyBUF_WRITE) + def test_inheritance(self): class A(bytearray): def __buffer__(self, flags): diff --git a/Misc/NEWS.d/next/C API/2024-01-29-12-13-24.gh-issue-114685.B07RME.rst b/Misc/NEWS.d/next/C API/2024-01-29-12-13-24.gh-issue-114685.B07RME.rst new file mode 100644 index 00000000000000..55b02d1d8e1e9f --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-01-29-12-13-24.gh-issue-114685.B07RME.rst @@ -0,0 +1,3 @@ +:c:func:`PyObject_GetBuffer` now raises a :exc:`SystemError` if called with +:c:macro:`PyBUF_READ` or :c:macro:`PyBUF_WRITE` as flags. These flags should +only be used with the ``PyMemoryView_*`` C API. diff --git a/Modules/_testcapi/buffer.c b/Modules/_testcapi/buffer.c index 942774156c6c47..7e2f6e5e29482c 100644 --- a/Modules/_testcapi/buffer.c +++ b/Modules/_testcapi/buffer.c @@ -54,8 +54,10 @@ static int testbuf_getbuf(testBufObject *self, Py_buffer *view, int flags) { int buf = PyObject_GetBuffer(self->obj, view, flags); - Py_SETREF(view->obj, Py_NewRef(self)); - self->references++; + if (buf == 0) { + Py_SETREF(view->obj, Py_NewRef(self)); + self->references++; + } return buf; } diff --git a/Objects/abstract.c b/Objects/abstract.c index 1ec5c5b8c3dc2f..daf04eb4ab2cda 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -425,6 +425,12 @@ PyObject_AsWriteBuffer(PyObject *obj, int PyObject_GetBuffer(PyObject *obj, Py_buffer *view, int flags) { + if (flags != PyBUF_SIMPLE) { /* fast path */ + if (flags == PyBUF_READ || flags == PyBUF_WRITE) { + PyErr_BadInternalCall(); + return -1; + } + } PyBufferProcs *pb = Py_TYPE(obj)->tp_as_buffer; if (pb == NULL || pb->bf_getbuffer == NULL) { From 66f95ea6a65deff547cab0d312b8c8c8a4cf8beb Mon Sep 17 00:00:00 2001 From: Sam Gross <colesbury@gmail.com> Date: Wed, 31 Jan 2024 06:22:24 -0500 Subject: [PATCH 101/127] gh-114737: Revert change to ElementTree.iterparse "root" attribute (GH-114755) Prior to gh-114269, the iterator returned by ElementTree.iterparse was initialized with the root attribute as None. This restores the previous behavior. --- Lib/test/test_xml_etree.py | 2 ++ Lib/xml/etree/ElementTree.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index b9e7937b0bbc00..221545b315fa44 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -536,7 +536,9 @@ def test_iterparse(self): iterparse = ET.iterparse context = iterparse(SIMPLE_XMLFILE) + self.assertIsNone(context.root) action, elem = next(context) + self.assertIsNone(context.root) self.assertEqual((action, elem.tag), ('end', 'element')) self.assertEqual([(action, elem.tag) for action, elem in context], [ ('end', 'element'), diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index ae6575028be11c..bb7362d1634a72 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1256,8 +1256,8 @@ def __del__(self): source.close() it = IterParseIterator() + it.root = None wr = weakref.ref(it) - del IterParseIterator return it From 25ce7f872df661de9392122df17111c75c77dee0 Mon Sep 17 00:00:00 2001 From: Alex Waygood <Alex.Waygood@Gmail.com> Date: Wed, 31 Jan 2024 11:28:23 +0000 Subject: [PATCH 102/127] Remove Alex Waygood as an Argument Clinic CODEOWNER (#114796) --- .github/CODEOWNERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f4d0411504a832..7933d319550576 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -230,8 +230,8 @@ Doc/c-api/stable.rst @encukou **/*zipfile/_path/* @jaraco # Argument Clinic -/Tools/clinic/** @erlend-aasland @AlexWaygood -/Lib/test/test_clinic.py @erlend-aasland @AlexWaygood +/Tools/clinic/** @erlend-aasland +/Lib/test/test_clinic.py @erlend-aasland Doc/howto/clinic.rst @erlend-aasland # Subinterpreters From 1c2ea8b33c6b1f995db0aca0b223a9cc22426708 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev <mail@sobolevn.me> Date: Wed, 31 Jan 2024 14:32:27 +0300 Subject: [PATCH 103/127] gh-114790: Do not execute `workflows/require-pr-label.yml` on forks (#114791) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .github/workflows/require-pr-label.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/require-pr-label.yml b/.github/workflows/require-pr-label.yml index 080204bcfd3b94..ff5cbdf3eda749 100644 --- a/.github/workflows/require-pr-label.yml +++ b/.github/workflows/require-pr-label.yml @@ -11,6 +11,7 @@ permissions: jobs: label: name: DO-NOT-MERGE / unresolved review + if: github.repository_owner == 'python' runs-on: ubuntu-latest timeout-minutes: 10 From 765b9ce9fb357bdb79a50ce51207c827fbd13dd8 Mon Sep 17 00:00:00 2001 From: Tian Gao <gaogaotiantian@hotmail.com> Date: Wed, 31 Jan 2024 05:03:05 -0800 Subject: [PATCH 104/127] gh-59013: Set breakpoint on the first executable line of function when using `break func` in pdb (#112470) --- Lib/pdb.py | 51 ++++++++++++------- Lib/test/test_pdb.py | 31 +++++++++-- ...3-11-27-19-54-43.gh-issue-59013.chpQ0e.rst | 1 + 3 files changed, 61 insertions(+), 22 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-27-19-54-43.gh-issue-59013.chpQ0e.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index 6f7719eb9ba6c5..0754e8b628cf57 100755 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -97,17 +97,47 @@ class Restart(Exception): __all__ = ["run", "pm", "Pdb", "runeval", "runctx", "runcall", "set_trace", "post_mortem", "help"] + +def find_first_executable_line(code): + """ Try to find the first executable line of the code object. + + Equivalently, find the line number of the instruction that's + after RESUME + + Return code.co_firstlineno if no executable line is found. + """ + prev = None + for instr in dis.get_instructions(code): + if prev is not None and prev.opname == 'RESUME': + if instr.positions.lineno is not None: + return instr.positions.lineno + return code.co_firstlineno + prev = instr + return code.co_firstlineno + def find_function(funcname, filename): cre = re.compile(r'def\s+%s\s*[(]' % re.escape(funcname)) try: fp = tokenize.open(filename) except OSError: return None + funcdef = "" + funcstart = None # consumer of this info expects the first line to be 1 with fp: for lineno, line in enumerate(fp, start=1): if cre.match(line): - return funcname, filename, lineno + funcstart, funcdef = lineno, line + elif funcdef: + funcdef += line + + if funcdef: + try: + funccode = compile(funcdef, filename, 'exec').co_consts[0] + except SyntaxError: + continue + lineno_offset = find_first_executable_line(funccode) + return funcname, filename, funcstart + lineno_offset - 1 return None def lasti2lineno(code, lasti): @@ -975,7 +1005,7 @@ def do_break(self, arg, temporary = 0): #use co_name to identify the bkpt (function names #could be aliased, but co_name is invariant) funcname = code.co_name - lineno = self._find_first_executable_line(code) + lineno = find_first_executable_line(code) filename = code.co_filename except: # last thing to try @@ -1078,23 +1108,6 @@ def checkline(self, filename, lineno): return 0 return lineno - def _find_first_executable_line(self, code): - """ Try to find the first executable line of the code object. - - Equivalently, find the line number of the instruction that's - after RESUME - - Return code.co_firstlineno if no executable line is found. - """ - prev = None - for instr in dis.get_instructions(code): - if prev is not None and prev.opname == 'RESUME': - if instr.positions.lineno is not None: - return instr.positions.lineno - return code.co_firstlineno - prev = instr - return code.co_firstlineno - def do_enable(self, arg): """enable bpnumber [bpnumber ...] diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index c64df62c761471..b2283cff6cb462 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -2661,7 +2661,7 @@ def quux(): pass """.encode(), 'bœr', - ('bœr', 4), + ('bœr', 5), ) def test_find_function_found_with_encoding_cookie(self): @@ -2678,7 +2678,7 @@ def quux(): pass """.encode('iso-8859-15'), 'bœr', - ('bœr', 5), + ('bœr', 6), ) def test_find_function_found_with_bom(self): @@ -2688,9 +2688,34 @@ def bœr(): pass """.encode(), 'bœr', - ('bœr', 1), + ('bœr', 2), ) + def test_find_function_first_executable_line(self): + code = textwrap.dedent("""\ + def foo(): pass + + def bar(): + pass # line 4 + + def baz(): + # comment + pass # line 8 + + def mul(): + # code on multiple lines + code = compile( # line 12 + 'def f()', + '<string>', + 'exec', + ) + """).encode() + + self._assert_find_function(code, 'foo', ('foo', 1)) + self._assert_find_function(code, 'bar', ('bar', 4)) + self._assert_find_function(code, 'baz', ('baz', 8)) + self._assert_find_function(code, 'mul', ('mul', 12)) + def test_issue7964(self): # open the file as binary so we can force \r\n newline with open(os_helper.TESTFN, 'wb') as f: diff --git a/Misc/NEWS.d/next/Library/2023-11-27-19-54-43.gh-issue-59013.chpQ0e.rst b/Misc/NEWS.d/next/Library/2023-11-27-19-54-43.gh-issue-59013.chpQ0e.rst new file mode 100644 index 00000000000000..a2be2fb8eacf17 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-27-19-54-43.gh-issue-59013.chpQ0e.rst @@ -0,0 +1 @@ +Set breakpoint on the first executable line of the function, instead of the line of function definition when the user do ``break func`` using :mod:`pdb` From b905fad83819ec9102ecfb97e3d8ab0aaddd9784 Mon Sep 17 00:00:00 2001 From: Nachtalb <9467802+Nachtalb@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:33:46 +0100 Subject: [PATCH 105/127] gh-111741: Recognise image/webp as a standard format in the mimetypes module (GH-111742) Previously it was supported as a non-standard type. --- Lib/mimetypes.py | 2 +- Lib/test/test_mimetypes.py | 3 +-- .../Library/2023-11-04-22-32-27.gh-issue-111741.f1ufr8.rst | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-04-22-32-27.gh-issue-111741.f1ufr8.rst diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 37228de4828de5..51b99701c9d727 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -528,6 +528,7 @@ def _default_mime_types(): '.tiff' : 'image/tiff', '.tif' : 'image/tiff', '.ico' : 'image/vnd.microsoft.icon', + '.webp' : 'image/webp', '.ras' : 'image/x-cmu-raster', '.pnm' : 'image/x-portable-anymap', '.pbm' : 'image/x-portable-bitmap', @@ -587,7 +588,6 @@ def _default_mime_types(): '.pict': 'image/pict', '.pct' : 'image/pict', '.pic' : 'image/pict', - '.webp': 'image/webp', '.xul' : 'text/xul', } diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index d64aee71fc48b1..01bba0ac2eed5a 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -96,14 +96,12 @@ def test_non_standard_types(self): # First try strict eq(self.db.guess_type('foo.xul', strict=True), (None, None)) eq(self.db.guess_extension('image/jpg', strict=True), None) - eq(self.db.guess_extension('image/webp', strict=True), None) # And then non-strict eq(self.db.guess_type('foo.xul', strict=False), ('text/xul', None)) eq(self.db.guess_type('foo.XUL', strict=False), ('text/xul', None)) eq(self.db.guess_type('foo.invalid', strict=False), (None, None)) eq(self.db.guess_extension('image/jpg', strict=False), '.jpg') eq(self.db.guess_extension('image/JPG', strict=False), '.jpg') - eq(self.db.guess_extension('image/webp', strict=False), '.webp') def test_filename_with_url_delimiters(self): # bpo-38449: URL delimiters cases should be handled also. @@ -183,6 +181,7 @@ def check_extensions(): self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl') self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3') self.assertEqual(mimetypes.guess_extension('image/avif'), '.avif') + self.assertEqual(mimetypes.guess_extension('image/webp'), '.webp') self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg') self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff') self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml') diff --git a/Misc/NEWS.d/next/Library/2023-11-04-22-32-27.gh-issue-111741.f1ufr8.rst b/Misc/NEWS.d/next/Library/2023-11-04-22-32-27.gh-issue-111741.f1ufr8.rst new file mode 100644 index 00000000000000..e43f93a270ce9c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-04-22-32-27.gh-issue-111741.f1ufr8.rst @@ -0,0 +1 @@ +Recognise ``image/webp`` as a standard format in the :mod:`mimetypes` module. From 78c254582b1757c15098ae65e97a2589ae663cd7 Mon Sep 17 00:00:00 2001 From: Albert Zeyer <albzey@gmail.com> Date: Wed, 31 Jan 2024 20:14:44 +0100 Subject: [PATCH 106/127] gh-113939: Frame clear, clear locals (#113940) --- Lib/test/test_frame.py | 22 +++++++++++++++++++ ...-01-12-16-40-07.gh-issue-113939.Yi3L-e.rst | 4 ++++ Objects/frameobject.c | 1 + 3 files changed, 27 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-12-16-40-07.gh-issue-113939.Yi3L-e.rst diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py index 7f17666a8d9697..244ce8af7cdf08 100644 --- a/Lib/test/test_frame.py +++ b/Lib/test/test_frame.py @@ -55,6 +55,28 @@ class C: # The reference was released by .clear() self.assertIs(None, wr()) + def test_clear_locals_after_f_locals_access(self): + # see gh-113939 + class C: + pass + + wr = None + def inner(): + nonlocal wr + c = C() + wr = weakref.ref(c) + 1/0 + + try: + inner() + except ZeroDivisionError as exc: + support.gc_collect() + self.assertIsNotNone(wr()) + print(exc.__traceback__.tb_next.tb_frame.f_locals) + exc.__traceback__.tb_next.tb_frame.clear() + support.gc_collect() + self.assertIsNone(wr()) + def test_clear_does_not_clear_specials(self): class C: pass diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-12-16-40-07.gh-issue-113939.Yi3L-e.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-12-16-40-07.gh-issue-113939.Yi3L-e.rst new file mode 100644 index 00000000000000..28b8e4bdda6be4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-12-16-40-07.gh-issue-113939.Yi3L-e.rst @@ -0,0 +1,4 @@ +frame.clear(): +Clear frame.f_locals as well, and not only the fast locals. +This is relevant once frame.f_locals was accessed, +which would contain also references to all the locals. diff --git a/Objects/frameobject.c b/Objects/frameobject.c index cafe4ef6141d9a..a914c61aac2fd5 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -926,6 +926,7 @@ frame_tp_clear(PyFrameObject *f) Py_CLEAR(locals[i]); } f->f_frame->stacktop = 0; + Py_CLEAR(f->f_frame->f_locals); return 0; } From b25b7462d520f38049d25888f220f20f759bc077 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev <mail@sobolevn.me> Date: Wed, 31 Jan 2024 22:51:18 +0300 Subject: [PATCH 107/127] gh-114788: Do not run JIT workflow on unrelated changes (#114789) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .github/workflows/jit.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 3da72919181255..22e0bdba53ffd6 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -1,10 +1,19 @@ name: JIT on: pull_request: - paths: '**jit**' + paths: + - '**jit**' + - 'Python/bytecodes.c' push: - paths: '**jit**' + paths: + - '**jit**' + - 'Python/bytecodes.c' workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + jobs: jit: name: ${{ matrix.target }} (${{ matrix.debug && 'Debug' || 'Release' }}) From 1836f674c0d86ec3375189a550c8f4a52ff89ae8 Mon Sep 17 00:00:00 2001 From: Bradley Reynolds <bradley.reynolds@darbia.dev> Date: Wed, 31 Jan 2024 15:33:28 -0600 Subject: [PATCH 108/127] Add note to `sys.orig_argv` clarifying the difference from `sys.argv` (#114630) Co-authored-by: Ned Batchelder <ned@nedbatchelder.com> --- Doc/library/sys.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index abf2c393a44928..a97a369b77b88a 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1293,7 +1293,10 @@ always available. The list of the original command line arguments passed to the Python executable. - See also :data:`sys.argv`. + The elements of :data:`sys.orig_argv` are the arguments to the Python interpreter, + while the elements of :data:`sys.argv` are the arguments to the user's program. + Arguments consumed by the interpreter itself will be present in :data:`sys.orig_argv` + and missing from :data:`sys.argv`. .. versionadded:: 3.10 From 7b9d406729e7e7adc482b5b8c920de1874c234d0 Mon Sep 17 00:00:00 2001 From: Donghee Na <donghee.na@python.org> Date: Thu, 1 Feb 2024 08:58:08 +0900 Subject: [PATCH 109/127] gh-112087: Make PyList_{Append,Size,GetSlice} to be thread-safe (gh-114651) --- Include/internal/pycore_list.h | 3 ++- Include/object.h | 4 ++++ Objects/listobject.c | 22 +++++++++++++++------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 6c29d882335512..4536f90e414493 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -24,12 +24,13 @@ extern void _PyList_Fini(_PyFreeListState *); extern int _PyList_AppendTakeRefListResize(PyListObject *self, PyObject *newitem); +// In free-threaded build: self should be locked by the caller, if it should be thread-safe. static inline int _PyList_AppendTakeRef(PyListObject *self, PyObject *newitem) { assert(self != NULL && newitem != NULL); assert(PyList_Check(self)); - Py_ssize_t len = PyList_GET_SIZE(self); + Py_ssize_t len = Py_SIZE(self); Py_ssize_t allocated = self->allocated; assert((size_t)len + 1 < PY_SSIZE_T_MAX); if (allocated > len) { diff --git a/Include/object.h b/Include/object.h index ef3fb721c2b012..568d315d7606c4 100644 --- a/Include/object.h +++ b/Include/object.h @@ -428,7 +428,11 @@ static inline void Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { assert(ob->ob_base.ob_type != &PyLong_Type); assert(ob->ob_base.ob_type != &PyBool_Type); +#ifdef Py_GIL_DISABLED + _Py_atomic_store_ssize_relaxed(&ob->ob_size, size); +#else ob->ob_size = size; +#endif } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_SET_SIZE(ob, size) Py_SET_SIZE(_PyVarObject_CAST(ob), (size)) diff --git a/Objects/listobject.c b/Objects/listobject.c index 56785e5f37a450..80a1f1da55b8bc 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -30,7 +30,6 @@ get_list_state(void) } #endif - /* Ensure ob_item has room for at least newsize elements, and set * ob_size to newsize. If newsize > ob_size on entry, the content * of the new slots at exit is undefined heap trash; it's the caller's @@ -221,8 +220,9 @@ PyList_Size(PyObject *op) PyErr_BadInternalCall(); return -1; } - else - return Py_SIZE(op); + else { + return PyList_GET_SIZE(op); + } } static inline int @@ -328,7 +328,7 @@ PyList_Insert(PyObject *op, Py_ssize_t where, PyObject *newitem) int _PyList_AppendTakeRefListResize(PyListObject *self, PyObject *newitem) { - Py_ssize_t len = PyList_GET_SIZE(self); + Py_ssize_t len = Py_SIZE(self); assert(self->allocated == -1 || self->allocated == len); if (list_resize(self, len + 1) < 0) { Py_DECREF(newitem); @@ -342,7 +342,11 @@ int PyList_Append(PyObject *op, PyObject *newitem) { if (PyList_Check(op) && (newitem != NULL)) { - return _PyList_AppendTakeRef((PyListObject *)op, Py_NewRef(newitem)); + int ret; + Py_BEGIN_CRITICAL_SECTION(op); + ret = _PyList_AppendTakeRef((PyListObject *)op, Py_NewRef(newitem)); + Py_END_CRITICAL_SECTION(); + return ret; } PyErr_BadInternalCall(); return -1; @@ -473,7 +477,7 @@ static PyObject * list_item(PyObject *aa, Py_ssize_t i) { PyListObject *a = (PyListObject *)aa; - if (!valid_index(i, Py_SIZE(a))) { + if (!valid_index(i, PyList_GET_SIZE(a))) { PyErr_SetObject(PyExc_IndexError, &_Py_STR(list_err)); return NULL; } @@ -511,6 +515,8 @@ PyList_GetSlice(PyObject *a, Py_ssize_t ilow, Py_ssize_t ihigh) PyErr_BadInternalCall(); return NULL; } + PyObject *ret; + Py_BEGIN_CRITICAL_SECTION(a); if (ilow < 0) { ilow = 0; } @@ -523,7 +529,9 @@ PyList_GetSlice(PyObject *a, Py_ssize_t ilow, Py_ssize_t ihigh) else if (ihigh > Py_SIZE(a)) { ihigh = Py_SIZE(a); } - return list_slice((PyListObject *)a, ilow, ihigh); + ret = list_slice((PyListObject *)a, ilow, ihigh); + Py_END_CRITICAL_SECTION(); + return ret; } static PyObject * From 80aa7b3688b8fdc85cd53d4113cb5f6ce5500027 Mon Sep 17 00:00:00 2001 From: Jamie Phan <jamie@ordinarylab.dev> Date: Thu, 1 Feb 2024 07:42:17 +0700 Subject: [PATCH 110/127] gh-109534: fix reference leak when SSL handshake fails (#114074) --- Lib/asyncio/selector_events.py | 4 ++++ Lib/asyncio/sslproto.py | 1 + .../Library/2024-01-15-18-42-44.gh-issue-109534.wYaLMZ.rst | 3 +++ 3 files changed, 8 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-01-15-18-42-44.gh-issue-109534.wYaLMZ.rst diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index dcd5e0aa345029..10fbdd76e93f79 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -235,6 +235,10 @@ async def _accept_connection2( await waiter except BaseException: transport.close() + # gh-109534: When an exception is raised by the SSLProtocol object the + # exception set in this future can keep the protocol object alive and + # cause a reference cycle. + waiter = None raise # It's now up to the protocol to handle the connection. diff --git a/Lib/asyncio/sslproto.py b/Lib/asyncio/sslproto.py index 599e91ba0003d1..fa99d4533aa0a6 100644 --- a/Lib/asyncio/sslproto.py +++ b/Lib/asyncio/sslproto.py @@ -579,6 +579,7 @@ def _on_handshake_complete(self, handshake_exc): peercert = sslobj.getpeercert() except Exception as exc: + handshake_exc = None self._set_state(SSLProtocolState.UNWRAPPED) if isinstance(exc, ssl.CertificateError): msg = 'SSL handshake failed on verifying the certificate' diff --git a/Misc/NEWS.d/next/Library/2024-01-15-18-42-44.gh-issue-109534.wYaLMZ.rst b/Misc/NEWS.d/next/Library/2024-01-15-18-42-44.gh-issue-109534.wYaLMZ.rst new file mode 100644 index 00000000000000..fc9a765a230037 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-15-18-42-44.gh-issue-109534.wYaLMZ.rst @@ -0,0 +1,3 @@ +Fix a reference leak in +:class:`asyncio.selector_events.BaseSelectorEventLoop` when SSL handshakes +fail. Patch contributed by Jamie Phan. From a79a27242f75fc33416d4d135a4a542898d140e5 Mon Sep 17 00:00:00 2001 From: Aidan Holm <alfh@google.com> Date: Thu, 1 Feb 2024 08:42:38 +0800 Subject: [PATCH 111/127] gh-111112: Avoid potential confusion in TCP server example. (#111113) Improve misleading TCP server docs and example. socket.recv(), as documented by the Python reference documentation, returns at most `bufsize` bytes, and the underlying TCP protocol means there is no guaranteed correspondence between what is sent by the client and what is received by the server. This conflation could mislead readers into thinking that TCP is datagram-based or has similar semantics, which will likely appear to work for simple cases, but introduce difficult to reproduce bugs. --- Doc/library/socketserver.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 5fd213fa613c8d..864b1dadb78562 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -494,7 +494,7 @@ This is the server side:: def handle(self): # self.request is the TCP socket connected to the client self.data = self.request.recv(1024).strip() - print("{} wrote:".format(self.client_address[0])) + print("Received from {}:".format(self.client_address[0])) print(self.data) # just send back the same data, but upper-cased self.request.sendall(self.data.upper()) @@ -525,8 +525,9 @@ objects that simplify communication by providing the standard file interface):: The difference is that the ``readline()`` call in the second handler will call ``recv()`` multiple times until it encounters a newline character, while the -single ``recv()`` call in the first handler will just return what has been sent -from the client in one ``sendall()`` call. +single ``recv()`` call in the first handler will just return what has been +received so far from the client's ``sendall()`` call (typically all of it, but +this is not guaranteed by the TCP protocol). This is the client side:: From 854e2bc42340b22cdeea5d16ac8b1ef3762c6909 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 1 Feb 2024 03:35:48 +0200 Subject: [PATCH 112/127] CI: Test on macOS M1 (#114766) Test on macOS M1 --- .github/workflows/reusable-macos.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index c24b6e963ddfd6..28e9dc52fd50ee 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -12,20 +12,27 @@ on: jobs: build_macos: name: 'build and test' - runs-on: macos-latest timeout-minutes: 60 env: HOMEBREW_NO_ANALYTICS: 1 HOMEBREW_NO_AUTO_UPDATE: 1 HOMEBREW_NO_INSTALL_CLEANUP: 1 PYTHONSTRICTEXTENSIONBUILD: 1 + strategy: + fail-fast: false + matrix: + os: [ + "macos-14", # M1 + "macos-13", # Intel + ] + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Restore config.cache uses: actions/cache@v3 with: path: config.cache - key: ${{ github.job }}-${{ runner.os }}-${{ inputs.config_hash }} + key: ${{ github.job }}-${{ matrix.os }}-${{ inputs.config_hash }} - name: Install Homebrew dependencies run: brew install pkg-config openssl@3.0 xz gdbm tcl-tk - name: Configure CPython From ff8939e5abaad7cd87f4d1f07ca7f6d176090f6c Mon Sep 17 00:00:00 2001 From: Pradyot Ranjan <99216956+prady0t@users.noreply.github.com> Date: Thu, 1 Feb 2024 07:18:39 +0530 Subject: [PATCH 113/127] gh-114811: Change '\*' to '*' in warnings.rst (#114819) Regression in 3.12. --- Doc/library/warnings.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/warnings.rst b/Doc/library/warnings.rst index a9c469707e8227..500398636e11ae 100644 --- a/Doc/library/warnings.rst +++ b/Doc/library/warnings.rst @@ -396,7 +396,7 @@ Available Functions ------------------- -.. function:: warn(message, category=None, stacklevel=1, source=None, \*, skip_file_prefixes=None) +.. function:: warn(message, category=None, stacklevel=1, source=None, *, skip_file_prefixes=None) Issue a warning, or maybe ignore it or raise an exception. The *category* argument, if given, must be a :ref:`warning category class <warning-categories>`; it From 586057e9f80d57f16334c0eee8431931e4aa8cff Mon Sep 17 00:00:00 2001 From: Skip Montanaro <skip.montanaro@gmail.com> Date: Wed, 31 Jan 2024 21:11:16 -0600 Subject: [PATCH 114/127] gh-67230: Add versionadded notes for QUOTE_NOTNULL and QUOTE_STRINGS (#114816) As @GPHemsley pointed out, #29469 omitted `versionadded` notes for the 2 new items. --- Doc/library/csv.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 66888c22b7cc28..fd62b225fcebb8 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -351,6 +351,8 @@ The :mod:`csv` module defines the following constants: Instructs :class:`reader` objects to interpret an empty (unquoted) field as None and to otherwise behave as :data:`QUOTE_ALL`. + .. versionadded:: 3.12 + .. data:: QUOTE_STRINGS Instructs :class:`writer` objects to always place quotes around fields @@ -360,6 +362,8 @@ The :mod:`csv` module defines the following constants: Instructs :class:`reader` objects to interpret an empty (unquoted) string as ``None`` and to otherwise behave as :data:`QUOTE_NONNUMERIC`. + .. versionadded:: 3.12 + The :mod:`csv` module defines the following exception: From 57c3e775df5a5ca0982adf15010ed80a158b1b80 Mon Sep 17 00:00:00 2001 From: srinivasan <shivnaren@gmail.com> Date: Thu, 1 Feb 2024 09:16:49 +0530 Subject: [PATCH 115/127] gh-114648: Add IndexError exception to tutorial datastructures list.pop entry (#114681) Remove redundant explanation of optional argument. --- Doc/tutorial/datastructures.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index 87614d082a1d4e..de2827461e2f24 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -48,10 +48,9 @@ objects: :noindex: Remove the item at the given position in the list, and return it. If no index - is specified, ``a.pop()`` removes and returns the last item in the list. (The - square brackets around the *i* in the method signature denote that the parameter - is optional, not that you should type square brackets at that position. You - will see this notation frequently in the Python Library Reference.) + is specified, ``a.pop()`` removes and returns the last item in the list. + It raises an :exc:`IndexError` if the list is empty or the index is + outside the list range. .. method:: list.clear() From 5ce193e65a7e6f239337a8c5305895cf8a4d2726 Mon Sep 17 00:00:00 2001 From: technillogue <wisepoison@gmail.com> Date: Thu, 1 Feb 2024 01:03:58 -0500 Subject: [PATCH 116/127] gh-114364: Fix awkward wording about mmap.mmap.seekable (#114374) --------- Co-authored-by: Kirill Podoprigora <kirill.bast9@mail.ru> Co-authored-by: Terry Jan Reedy <tjreedy@udel.edu> Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> --- Doc/whatsnew/3.13.rst | 4 ++-- Misc/NEWS.d/3.13.0a2.rst | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index b33203efbb05c0..6b94a3771406fa 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -259,8 +259,8 @@ mmap ---- * The :class:`mmap.mmap` class now has an :meth:`~mmap.mmap.seekable` method - that can be used where it requires a file-like object with seekable and - the :meth:`~mmap.mmap.seek` method return the new absolute position. + that can be used when a seekable file-like object is required. + The :meth:`~mmap.mmap.seek` method now returns the new absolute position. (Contributed by Donghee Na and Sylvie Liberman in :gh:`111835`.) * :class:`mmap.mmap` now has a *trackfd* parameter on Unix; if it is ``False``, the file descriptor specified by *fileno* will not be duplicated. diff --git a/Misc/NEWS.d/3.13.0a2.rst b/Misc/NEWS.d/3.13.0a2.rst index d4be4fb8a3d3ab..e5841e14c02efb 100644 --- a/Misc/NEWS.d/3.13.0a2.rst +++ b/Misc/NEWS.d/3.13.0a2.rst @@ -565,9 +565,9 @@ part of a :exc:`BaseExceptionGroup`, in addition to the recent support for .. section: Library The :class:`mmap.mmap` class now has an :meth:`~mmap.mmap.seekable` method -that can be used where it requires a file-like object with seekable and the -:meth:`~mmap.mmap.seek` method return the new absolute position. Patch by -Donghee Na. +that can be used when a seekable file-like object is required. +The :meth:`~mmap.mmap.seek` method now returns the new absolute position. +Patch by Donghee Na. .. From e6d6d5dcc00af50446761b0c4d20bd6e92380135 Mon Sep 17 00:00:00 2001 From: Sam Gross <colesbury@gmail.com> Date: Thu, 1 Feb 2024 04:26:23 -0500 Subject: [PATCH 117/127] gh-114746: Avoid quadratic behavior in free-threaded GC (GH-114817) The free-threaded build's GC implementation is non-generational, but was scheduled as if it were collecting a young generation leading to quadratic behavior. This increases the minimum threshold and scales it to the number of live objects as we do for the old generation in the default build. Note that the scheduling is still not thread-safe without the GIL. Those changes will come in later PRs. A few tests, like "test_sneaky_frame_object" rely on prompt scheduling of the GC. For now, to keep that test passing, we disable the scaled threshold after calls like `gc.set_threshold(1, 0, 0)`. --- Python/gc_free_threading.c | 102 +++++++++++-------------------------- 1 file changed, 29 insertions(+), 73 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index f2cd84981461a4..a6513a2c4aba2a 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -46,6 +46,7 @@ struct collection_state { GCState *gcstate; Py_ssize_t collected; Py_ssize_t uncollectable; + Py_ssize_t long_lived_total; struct worklist unreachable; struct worklist legacy_finalizers; struct worklist wrcb_to_call; @@ -443,7 +444,7 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, else { // object is reachable, restore `ob_tid`; we're done with these objects gc_restore_tid(op); - state->gcstate->long_lived_total++; + state->long_lived_total++; } return true; @@ -605,6 +606,8 @@ get_gc_state(void) void _PyGC_InitState(GCState *gcstate) { + // TODO: move to pycore_runtime_init.h once the incremental GC lands. + gcstate->generations[0].threshold = 2000; } @@ -885,62 +888,6 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase, assert(!_PyErr_Occurred(tstate)); } - -/* Find the oldest generation (highest numbered) where the count - * exceeds the threshold. Objects in the that generation and - * generations younger than it will be collected. */ -static int -gc_select_generation(GCState *gcstate) -{ - for (int i = NUM_GENERATIONS-1; i >= 0; i--) { - if (gcstate->generations[i].count > gcstate->generations[i].threshold) { - /* Avoid quadratic performance degradation in number - of tracked objects (see also issue #4074): - - To limit the cost of garbage collection, there are two strategies; - - make each collection faster, e.g. by scanning fewer objects - - do less collections - This heuristic is about the latter strategy. - - In addition to the various configurable thresholds, we only trigger a - full collection if the ratio - - long_lived_pending / long_lived_total - - is above a given value (hardwired to 25%). - - The reason is that, while "non-full" collections (i.e., collections of - the young and middle generations) will always examine roughly the same - number of objects -- determined by the aforementioned thresholds --, - the cost of a full collection is proportional to the total number of - long-lived objects, which is virtually unbounded. - - Indeed, it has been remarked that doing a full collection every - <constant number> of object creations entails a dramatic performance - degradation in workloads which consist in creating and storing lots of - long-lived objects (e.g. building a large list of GC-tracked objects would - show quadratic performance, instead of linear as expected: see issue #4074). - - Using the above ratio, instead, yields amortized linear performance in - the total number of objects (the effect of which can be summarized - thusly: "each full garbage collection is more and more costly as the - number of objects grows, but we do fewer and fewer of them"). - - This heuristic was suggested by Martin von Löwis on python-dev in - June 2008. His original analysis and proposal can be found at: - http://mail.python.org/pipermail/python-dev/2008-June/080579.html - */ - if (i == NUM_GENERATIONS - 1 - && gcstate->long_lived_pending < gcstate->long_lived_total / 4) - { - continue; - } - return i; - } - } - return -1; -} - static void cleanup_worklist(struct worklist *worklist) { @@ -952,6 +899,21 @@ cleanup_worklist(struct worklist *worklist) } } +static bool +gc_should_collect(GCState *gcstate) +{ + int count = _Py_atomic_load_int_relaxed(&gcstate->generations[0].count); + int threshold = gcstate->generations[0].threshold; + if (count <= threshold || threshold == 0 || !gcstate->enabled) { + return false; + } + // Avoid quadratic behavior by scaling threshold to the number of live + // objects. A few tests rely on immediate scheduling of the GC so we ignore + // the scaled threshold if generations[1].threshold is set to zero. + return (count > gcstate->long_lived_total / 4 || + gcstate->generations[1].threshold == 0); +} + static void gc_collect_internal(PyInterpreterState *interp, struct collection_state *state) { @@ -1029,15 +991,10 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) return 0; } - if (generation == GENERATION_AUTO) { - // Select the oldest generation that needs collecting. We will collect - // objects from that generation and all generations younger than it. - generation = gc_select_generation(gcstate); - if (generation < 0) { - // No generation needs to be collected. - _Py_atomic_store_int(&gcstate->collecting, 0); - return 0; - } + if (reason == _Py_GC_REASON_HEAP && !gc_should_collect(gcstate)) { + // Don't collect if the threshold is not exceeded. + _Py_atomic_store_int(&gcstate->collecting, 0); + return 0; } assert(generation >= 0 && generation < NUM_GENERATIONS); @@ -1082,6 +1039,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) m = state.collected; n = state.uncollectable; + gcstate->long_lived_total = state.long_lived_total; if (gcstate->debug & _PyGC_DEBUG_STATS) { double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); @@ -1523,12 +1481,10 @@ _PyObject_GC_Link(PyObject *op) { PyThreadState *tstate = _PyThreadState_GET(); GCState *gcstate = &tstate->interp->gc; - gcstate->generations[0].count++; /* number of allocated GC objects */ - if (gcstate->generations[0].count > gcstate->generations[0].threshold && - gcstate->enabled && - gcstate->generations[0].threshold && - !_Py_atomic_load_int_relaxed(&gcstate->collecting) && - !_PyErr_Occurred(tstate)) + gcstate->generations[0].count++; + + if (gc_should_collect(gcstate) && + !_Py_atomic_load_int_relaxed(&gcstate->collecting)) { _Py_ScheduleGC(tstate->interp); } @@ -1537,7 +1493,7 @@ _PyObject_GC_Link(PyObject *op) void _Py_RunGC(PyThreadState *tstate) { - gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); + gc_collect_main(tstate, 0, _Py_GC_REASON_HEAP); } static PyObject * From de6f97cd3519c5d8528d8ca1bb00fce4e9969671 Mon Sep 17 00:00:00 2001 From: Christophe Nanteuil <35002064+christopheNan@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:34:04 +0100 Subject: [PATCH 118/127] Fix typos in ElementTree documentation (GH-108848) PI objects instead of comment objects. --- Doc/library/xml.etree.elementtree.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index fe92400fb08dfd..bb6773c361a9b4 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -664,7 +664,7 @@ Functions given. Returns an element instance, representing a processing instruction. Note that :class:`XMLParser` skips over processing instructions - in the input instead of creating comment objects for them. An + in the input instead of creating PI objects for them. An :class:`ElementTree` will only contain processing instruction nodes if they have been inserted into to the tree using one of the :class:`Element` methods. @@ -1302,8 +1302,8 @@ TreeBuilder Objects .. method:: pi(target, text) - Creates a comment with the given *target* name and *text*. If - ``insert_pis`` is true, this will also add it to the tree. + Creates a process instruction with the given *target* name and *text*. + If ``insert_pis`` is true, this will also add it to the tree. .. versionadded:: 3.8 From 21c01a009f970ccf73d2d3e4176b61fc8986adfa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 09:52:05 +0000 Subject: [PATCH 119/127] build(deps-dev): bump types-setuptools from 69.0.0.0 to 69.0.0.20240125 in /Tools (#114853) build(deps-dev): bump types-setuptools in /Tools Bumps [types-setuptools](https://github.com/python/typeshed) from 69.0.0.0 to 69.0.0.20240125. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-setuptools dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Tools/requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index b89f86a35d6115..11cce505f60d16 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -4,4 +4,4 @@ mypy==1.8.0 # needed for peg_generator: types-psutil==5.9.5.17 -types-setuptools==69.0.0.0 +types-setuptools==69.0.0.20240125 From 93bfaa858c22742b7229897ae7c15e9b6e456fc3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:42:35 +0000 Subject: [PATCH 120/127] build(deps): bump hypothesis from 6.92.2 to 6.97.4 in /Tools (#114851) Bumps [hypothesis](https://github.com/HypothesisWorks/hypothesis) from 6.92.2 to 6.97.4. - [Release notes](https://github.com/HypothesisWorks/hypothesis/releases) - [Commits](https://github.com/HypothesisWorks/hypothesis/compare/hypothesis-python-6.92.2...hypothesis-python-6.97.4) --- updated-dependencies: - dependency-name: hypothesis dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Tools/requirements-hypothesis.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/requirements-hypothesis.txt b/Tools/requirements-hypothesis.txt index 0e6e16ae198162..064731a236ee86 100644 --- a/Tools/requirements-hypothesis.txt +++ b/Tools/requirements-hypothesis.txt @@ -1,4 +1,4 @@ # Requirements file for hypothesis that # we use to run our property-based tests in CI. -hypothesis==6.92.2 +hypothesis==6.97.4 From d4c5ec24c2bbb1e1d02d17b75709028aca84398e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:49:07 +0200 Subject: [PATCH 121/127] build(deps): bump actions/cache from 3 to 4 (#114856) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yml | 16 ++++++++-------- .github/workflows/reusable-docs.yml | 2 +- .github/workflows/reusable-macos.yml | 2 +- .github/workflows/reusable-ubuntu.yml | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cc5ecc09fbc592..949c4ae95da07f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -132,7 +132,7 @@ jobs: with: python-version: '3.x' - name: Restore config.cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: config.cache key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }}-${{ env.pythonLocation }} @@ -259,7 +259,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Restore config.cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: config.cache key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} @@ -274,7 +274,7 @@ jobs: echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> $GITHUB_ENV - name: 'Restore OpenSSL build' id: cache-openssl - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ./multissl/openssl/${{ env.OPENSSL_VER }} key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} @@ -319,7 +319,7 @@ jobs: echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> $GITHUB_ENV - name: 'Restore OpenSSL build' id: cache-openssl - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ./multissl/openssl/${{ env.OPENSSL_VER }} key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} @@ -342,7 +342,7 @@ jobs: - name: Bind mount sources read-only run: sudo mount --bind -o ro $GITHUB_WORKSPACE $CPYTHON_RO_SRCDIR - name: Restore config.cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ${{ env.CPYTHON_BUILDDIR }}/config.cache key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} @@ -375,7 +375,7 @@ jobs: ./python -m venv $VENV_LOC && $VENV_PYTHON -m pip install -r ${GITHUB_WORKSPACE}/Tools/requirements-hypothesis.txt - name: 'Restore Hypothesis database' id: cache-hypothesis-database - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ./hypothesis key: hypothesis-database-${{ github.head_ref || github.run_id }} @@ -421,7 +421,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Restore config.cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: config.cache key: ${{ github.job }}-${{ runner.os }}-${{ needs.check_source.outputs.config_hash }} @@ -440,7 +440,7 @@ jobs: echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> $GITHUB_ENV - name: 'Restore OpenSSL build' id: cache-openssl - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ./multissl/openssl/${{ env.OPENSSL_VER }} key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index e534751ee1011d..cea8f93d67b29c 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -89,7 +89,7 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v4 - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: ~/.cache/pip key: ubuntu-doc-${{ hashFiles('Doc/requirements.txt') }} diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index 28e9dc52fd50ee..cad619b78ce5f2 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -29,7 +29,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Restore config.cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: config.cache key: ${{ github.job }}-${{ matrix.os }}-${{ inputs.config_hash }} diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index c2194280c0a50f..ef52d99c15191b 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -29,7 +29,7 @@ jobs: echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> $GITHUB_ENV - name: 'Restore OpenSSL build' id: cache-openssl - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ./multissl/openssl/${{ env.OPENSSL_VER }} key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} @@ -53,7 +53,7 @@ jobs: - name: Bind mount sources read-only run: sudo mount --bind -o ro $GITHUB_WORKSPACE $CPYTHON_RO_SRCDIR - name: Restore config.cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ${{ env.CPYTHON_BUILDDIR }}/config.cache key: ${{ github.job }}-${{ runner.os }}-${{ inputs.config_hash }} From 59ae215387d69119f0e77b2776e8214ca4bb8a5e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:50:08 +0000 Subject: [PATCH 122/127] build(deps-dev): bump types-psutil from 5.9.5.17 to 5.9.5.20240106 in /Tools (#114852) build(deps-dev): bump types-psutil in /Tools Bumps [types-psutil](https://github.com/python/typeshed) from 5.9.5.17 to 5.9.5.20240106. - [Commits](https://github.com/python/typeshed/commits) --- updated-dependencies: - dependency-name: types-psutil dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Tools/requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index 11cce505f60d16..c0a63b40ff4155 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -3,5 +3,5 @@ mypy==1.8.0 # needed for peg_generator: -types-psutil==5.9.5.17 +types-psutil==5.9.5.20240106 types-setuptools==69.0.0.20240125 From 0bf42dae7e73febc76ea96fd58af6b765a12b8a7 Mon Sep 17 00:00:00 2001 From: Tomas R <tomas.roun8@gmail.com> Date: Thu, 1 Feb 2024 12:49:01 +0100 Subject: [PATCH 123/127] gh-107461 ctypes: Add a testcase for nested `_as_parameter_` lookup (GH-107462) --- Lib/test/test_ctypes/test_as_parameter.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Lib/test/test_ctypes/test_as_parameter.py b/Lib/test/test_ctypes/test_as_parameter.py index a1a8745e737fa2..ca75e748256083 100644 --- a/Lib/test/test_ctypes/test_as_parameter.py +++ b/Lib/test/test_ctypes/test_as_parameter.py @@ -221,5 +221,16 @@ class AsParamPropertyWrapperTestCase(BasicWrapTestCase): wrap = AsParamPropertyWrapper +class AsParamNestedWrapperTestCase(BasicWrapTestCase): + """Test that _as_parameter_ is evaluated recursively. + + The _as_parameter_ attribute can be another object which + defines its own _as_parameter_ attribute. + """ + + def wrap(self, param): + return AsParamWrapper(AsParamWrapper(AsParamWrapper(param))) + + if __name__ == '__main__': unittest.main() From 4dbb198d279a06fed74ea4c38f93d658baf38170 Mon Sep 17 00:00:00 2001 From: Ayappan Perumal <ayappap2@in.ibm.com> Date: Thu, 1 Feb 2024 17:22:54 +0530 Subject: [PATCH 124/127] gh-105089: Fix test_create_directory_with_write test failure in AIX (GH-105228) --- Lib/test/test_zipfile/test_core.py | 2 +- .../next/Tests/2023-06-02-05-04-15.gh-issue-105089.KaZFtU.rst | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Tests/2023-06-02-05-04-15.gh-issue-105089.KaZFtU.rst diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 9bdb08aeabb781..a177044d735bed 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2959,7 +2959,7 @@ def test_create_directory_with_write(self): directory = os.path.join(TESTFN2, "directory2") os.mkdir(directory) - mode = os.stat(directory).st_mode + mode = os.stat(directory).st_mode & 0xFFFF zf.write(directory, arcname="directory2/") zinfo = zf.filelist[1] self.assertEqual(zinfo.filename, "directory2/") diff --git a/Misc/NEWS.d/next/Tests/2023-06-02-05-04-15.gh-issue-105089.KaZFtU.rst b/Misc/NEWS.d/next/Tests/2023-06-02-05-04-15.gh-issue-105089.KaZFtU.rst new file mode 100644 index 00000000000000..d04ef435dd572d --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2023-06-02-05-04-15.gh-issue-105089.KaZFtU.rst @@ -0,0 +1,4 @@ +Fix +``test.test_zipfile.test_core.TestWithDirectory.test_create_directory_with_write`` +test in AIX by doing a bitwise AND of 0xFFFF on mode , so that it will be in +sync with ``zinfo.external_attr`` From 84e0e32184f658b8174b400e6ca9c418bfe8e0fc Mon Sep 17 00:00:00 2001 From: Anders Kaseorg <andersk@mit.edu> Date: Thu, 1 Feb 2024 11:26:22 -0500 Subject: [PATCH 125/127] Remove unused Py_XDECREF from _PyFrame_ClearExceptCode (GH-106158) frame->frame_obj was set to NULL a few lines earlier. Signed-off-by: Anders Kaseorg <andersk@mit.edu> --- Python/frame.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/frame.c b/Python/frame.c index 2865b2eab603c2..ddf6ef6ba5465c 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -139,7 +139,6 @@ _PyFrame_ClearExceptCode(_PyInterpreterFrame *frame) for (int i = 0; i < frame->stacktop; i++) { Py_XDECREF(frame->localsplus[i]); } - Py_XDECREF(frame->frame_obj); Py_XDECREF(frame->f_locals); Py_DECREF(frame->f_funcobj); } From 2dea1cf7fd9b1f6a914e363ecb17a853f4b99b6b Mon Sep 17 00:00:00 2001 From: Guido van Rossum <guido@python.org> Date: Thu, 1 Feb 2024 08:54:44 -0800 Subject: [PATCH 126/127] Write about Tier 2 and JIT in "what's new 3.13" (#114826) (This will soon be superseded by Ken Jin's much more detailed version.) --- Doc/whatsnew/3.13.rst | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 6b94a3771406fa..887c3009f88504 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -81,6 +81,13 @@ Important deprecations, removals or restrictions: * Python 3.13 and later have two years of full support, followed by three years of security fixes. +Interpreter improvements: + +* A basic :ref:`JIT compiler <whatsnew313-jit-compiler>` was added. + It is currently disabled by default (though we may turn it on later). + Performance improvements are modest -- we expect to be improving this + over the next few releases. + New Features ============ @@ -477,6 +484,46 @@ Optimizations FreeBSD and Solaris. See the ``subprocess`` section above for details. (Contributed by Jakub Kulik in :gh:`113117`.) +.. _whatsnew313-jit-compiler: + +Experimental JIT Compiler +========================= + +When CPython is configured using the ``--enable-experimental-jit`` option, +a just-in-time compiler is added which can speed up some Python programs. + +The internal architecture is roughly as follows. + +* We start with specialized *Tier 1 bytecode*. + See :ref:`What's new in 3.11 <whatsnew311-pep659>` for details. + +* When the Tier 1 bytecode gets hot enough, it gets translated + to a new, purely internal *Tier 2 IR*, a.k.a. micro-ops ("uops"). + +* The Tier 2 IR uses the same stack-based VM as Tier 1, but the + instruction format is better suited to translation to machine code. + +* We have several optimization passes for Tier 2 IR, which are applied + before it is interpreted or translated to machine code. + +* There is a Tier 2 interpreter, but it is mostly intended for debugging + the earlier stages of the optimization pipeline. If the JIT is not + enabled, the Tier 2 interpreter can be invoked by passing Python the + ``-X uops`` option or by setting the ``PYTHON_UOPS`` environment + variable to ``1``. + +* When the ``--enable-experimental-jit`` option is used, the optimized + Tier 2 IR is translated to machine code, which is then executed. + This does not require additional runtime options. + +* The machine code translation process uses an architecture called + *copy-and-patch*. It has no runtime dependencies, but there is a new + build-time dependency on LLVM. + +(JIT by Brandt Bucher, inspired by a paper by Haoran Xu and Fredrik Kjolstad. +Tier 2 IR by Mark Shannon and Guido van Rossum. +Tier 2 optimizer by Ken Jin.) + Deprecated ========== From 6d7ad57385e6c18545f19714b8f520644d305715 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora <kirill.bast9@mail.ru> Date: Thu, 1 Feb 2024 19:56:24 +0300 Subject: [PATCH 127/127] Update outdated info in ``Tools/cases_generator/README.md`` (#114844) --- Tools/cases_generator/README.md | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/Tools/cases_generator/README.md b/Tools/cases_generator/README.md index ed802e44f31ad5..7fec8a882336cd 100644 --- a/Tools/cases_generator/README.md +++ b/Tools/cases_generator/README.md @@ -5,16 +5,30 @@ Documentation for the instruction definitions in `Python/bytecodes.c` What's currently here: +- `analyzer.py`: code for converting `AST` generated by `Parser` + to more high-level structure for easier interaction - `lexer.py`: lexer for C, originally written by Mark Shannon - `plexer.py`: OO interface on top of lexer.py; main class: `PLexer` -- `parsing.py`: Parser for instruction definition DSL; main class `Parser` -- `generate_cases.py`: driver script to read `Python/bytecodes.c` and +- `parsing.py`: Parser for instruction definition DSL; main class: `Parser` +- `parser.py` helper for interactions with `parsing.py` +- `tierN_generator.py`: a couple of driver scripts to read `Python/bytecodes.c` and write `Python/generated_cases.c.h` (and several other files) -- `analysis.py`: `Analyzer` class used to read the input files -- `flags.py`: abstractions related to metadata flags for instructions -- `formatting.py`: `Formatter` class used to write the output files -- `instructions.py`: classes to analyze and write instructions -- `stacking.py`: code to handle generalized stack effects +- `stack.py`: code to handle generalized stack effects +- `cwriter.py`: code which understands tokens and how to format C code; + main class: `CWriter` +- `generators_common.py`: helpers for generators +- `opcode_id_generator.py`: generate a list of opcodes and write them to + `Include/opcode_ids.h` +- `opcode_metadata_generator.py`: reads the instruction definitions and + write the metadata to `Include/internal/pycore_opcode_metadata.h` +- `py_metadata_generator.py`: reads the instruction definitions and + write the metadata to `Lib/_opcode_metadata.py` +- `target_generator.py`: generate targets for computed goto dispatch and + write them to `Python/opcode_targets.h` +- `uop_id_generator.py`: generate a list of uop IDs and write them to + `Include/internal/pycore_uop_ids.h` +- `uop_metadata_generator.py`: reads the instruction definitions and + write the metadata to `Include/internal/pycore_uop_metadata.h` Note that there is some dummy C code at the top and bottom of `Python/bytecodes.c`