diff --git a/src/backend/cdb/endpoint/cdbendpointutils.c b/src/backend/cdb/endpoint/cdbendpointutils.c index 56cd405c9d1..b8a302473d7 100644 --- a/src/backend/cdb/endpoint/cdbendpointutils.c +++ b/src/backend/cdb/endpoint/cdbendpointutils.c @@ -599,3 +599,16 @@ enable_parallel_retrieve_cursor_check_timeout(void) GP_PARALLEL_RETRIEVE_CURSOR_CHECK_PERIOD_MS); } } + +/* + * Disable the timeout of parallel retrieve cursor check. + * + * Safe to call unconditionally; disable_timeout() is a no-op when the + * timeout is not active. + */ +void +disable_parallel_retrieve_cursor_check_timeout(void) +{ + if (Gp_role == GP_ROLE_DISPATCH) + disable_timeout(GP_PARALLEL_RETRIEVE_CURSOR_CHECK_TIMEOUT, false); +} diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c index fd71782e9ee..cee6923557d 100644 --- a/src/backend/commands/portalcmds.c +++ b/src/backend/commands/portalcmds.c @@ -393,8 +393,8 @@ PortalCleanup(Portal portal) } } - /* - * If resource scheduling is enabled, release the resource lock. + /* + * If resource scheduling is enabled, release the resource lock. */ if (IsResQueueLockedForPortal(portal)) { @@ -410,6 +410,18 @@ PortalCleanup(Portal portal) { BackoffBackendEntryExit(); } + + /* + * If this was a PARALLEL RETRIEVE CURSOR and no others remain, stop + * the periodic check timer immediately rather than waiting for the + * next stray SIGALRM to find a zero count and quiet itself. + * queryDesc was set to NULL above, so the now-cleaned portal is no + * longer counted by GetNumOfParallelRetrieveCursors(). + */ + if (PortalIsParallelRetrieveCursor(portal) && + Gp_role == GP_ROLE_DISPATCH && + GetNumOfParallelRetrieveCursors() == 0) + disable_parallel_retrieve_cursor_check_timeout(); } /* diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index b769c3b249f..ac2d18be0bb 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -1894,20 +1894,22 @@ GpParallelRetrieveCursorCheckTimeoutHandler(void) /* It calls cdbdisp_checkForCancel(), which doesn't raise error */ gp_check_parallel_retrieve_cursor_error(); - int num = GetNumOfParallelRetrieveCursors(); - - /* Reset the alarm to check after a timeout */ - if (num > 0) - { - elog(DEBUG1, "There are still %d parallel retrieve cursors alive", num); - enable_parallel_retrieve_cursor_check_timeout(); - } } else { elog(DEBUG1, "DoingCommandRead is false, check parallel cursor timeout delay"); - enable_parallel_retrieve_cursor_check_timeout(); } + + /* + * Only re-arm the periodic check while parallel retrieve cursors are + * still alive. Re-arming unconditionally (the previous behavior) kept + * a SIGALRM firing every GP_PARALLEL_RETRIEVE_CURSOR_CHECK_PERIOD_MS + * for the rest of the backend's life, which interfered with unrelated + * code paths -- most visibly truncating fault-injection 'sleep' + * windows used by isolation2 tests. + */ + if (GetNumOfParallelRetrieveCursors() > 0) + enable_parallel_retrieve_cursor_check_timeout(); } /* diff --git a/src/include/cdb/cdbendpoint.h b/src/include/cdb/cdbendpoint.h index a5a45bd7623..ae4c94d4e60 100644 --- a/src/include/cdb/cdbendpoint.h +++ b/src/include/cdb/cdbendpoint.h @@ -137,6 +137,7 @@ extern void AtAbort_EndpointExecState(void); extern void allocEndpointExecState(void); extern bool gp_check_parallel_retrieve_cursor_error(void); extern void enable_parallel_retrieve_cursor_check_timeout(void); +extern void disable_parallel_retrieve_cursor_check_timeout(void); /* * Below functions should run on Endpoints(QE/Entry DB).