From 798e080366a94db78d86ed681d8f988782fa9191 Mon Sep 17 00:00:00 2001 From: Mohamed Hallumi Date: Tue, 12 May 2026 16:13:25 +0300 Subject: [PATCH 1/2] RD-3960 run docker/podman with --init so PID-1 reaps zombies skipper-entrypoint.sh runs the user command via `bash -c "$@"` with no `exec`, so bash stays PID-1 inside the build container. Bash as PID-1 does not forward SIGTERM to its children. When the docker/podman client receives SIGTERM (e.g. Jenkins pipeline cancellation, manual `docker stop`), bash swallows it and the user command (mindthegap, packer, etc.) survives until SIGKILL. Any descendant that detached from the controlling group survives even SIGKILL of bash and is left running on the host as a zombie holding registry connections, file handles, etc. Reproduced locally with debian + bash entrypoint mimicking skipper-entrypoint.sh: `docker stop -t 5` took the full 5s grace (SIGTERM ignored, SIGKILL after) without --init; with --init it took 0s (SIGTERM forwarded clean). `docker run --init` uses Docker's built-in tini as PID-1. `podman run --init` uses catatonit. Both are tiny, purpose-built init systems that forward signals and reap zombies. No image changes required from skipper consumers. This is a more general fix than per-consumer entrypoint override (#191): every skipper user benefits without any change on their side. Co-Authored-By: Claude Opus 4.7 (1M context) --- skipper/runner.py | 7 +++++++ tests/test_runner.py | 9 +++++++++ tests/test_runner_podman.py | 5 +++++ 3 files changed, 21 insertions(+) diff --git a/skipper/runner.py b/skipper/runner.py index 9dca49d..2681eb9 100644 --- a/skipper/runner.py +++ b/skipper/runner.py @@ -70,6 +70,13 @@ def _run_nested(fqdn_image, environment, command, interactive, name, net, publis else: cmd += ['--rm'] + # Run a real init (tini on docker, catatonit on podman) as PID-1 inside the + # build container so SIGTERM from `docker stop` (e.g. Jenkins pipeline + # cancellation) propagates to user commands and orphaned children are + # reaped. Without this, bash inside skipper-entrypoint.sh swallows SIGTERM + # and long-running children survive as host-visible zombies. + cmd += ['--init'] + for cmd_limit in utils.SKIPPER_ULIMIT: cmd += cmd_limit diff --git a/tests/test_runner.py b/tests/test_runner.py index 707d101..7e172ef 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -93,6 +93,7 @@ def test_run_simple_command_nested_network_exist( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -163,6 +164,7 @@ def test_run_simple_command_nested_network_not_exist( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -233,6 +235,7 @@ def test_run_simple_command_nested_with_env( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -307,6 +310,7 @@ def test_run_simple_command_nested_with_env_file( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -379,6 +383,7 @@ def test_run_simple_command_nested_with_multiple_env_files( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -457,6 +462,7 @@ def test_run_simple_command_nested_interactive( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -530,6 +536,7 @@ def test_run_complex_command_nested( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -602,6 +609,7 @@ def test_run_complex_command_nested_with_env( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -688,6 +696,7 @@ def test_run_complex_command_nested_with_special_case_verification( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", diff --git a/tests/test_runner_podman.py b/tests/test_runner_podman.py index e93e0cc..b555bb0 100644 --- a/tests/test_runner_podman.py +++ b/tests/test_runner_podman.py @@ -78,6 +78,7 @@ def test_run_simple_command_nested_network_exist( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -141,6 +142,7 @@ def test_run_simple_command_nested_network_not_exist( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -202,6 +204,7 @@ def test_run_complex_command_nested(self, resource_filename_mock, check_output_m "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -268,6 +271,7 @@ def test_run_non_existent_unauthorized_volume( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -331,6 +335,7 @@ def test_run_complex_command_nested_with_env( "-t", "-e", "KEEP_CONTAINERS=True", + "--init", "--ulimit", "nofile=65536:65536", "--privileged", From 957d9de68f1311b488161cc3d6ef084c30b7b3c3 Mon Sep 17 00:00:00 2001 From: Mohamed Hallumi Date: Sun, 17 May 2026 13:55:55 +0300 Subject: [PATCH 2/2] RD-3960 make --init opt-in via `init: true` in skipper.yaml Address review concern that an always-on `--init` could change behaviour for consumers whose entrypoints depend on the current no-init semantics (custom init systems, daemon-spawning workflows, PID-1 asserts in tests). Introduce a new `init` config field defaulting to false. When set to true in skipper.yaml, `--init` is added to the docker/podman run command. When unset or false, behaviour is identical to today. Existing consumers see no change. Consumers that want PID-1 signal forwarding and zombie reaping set `init: true` in their skipper.yaml. Co-Authored-By: Claude Opus 4.7 (1M context) --- skipper/cli.py | 4 +++ skipper/runner.py | 18 +++++------ tests/test_cli.py | 63 +++++++++++++++++++++++++++++++++++++ tests/test_runner.py | 53 +++++++++++++++++++++++++------ tests/test_runner_podman.py | 5 --- 5 files changed, 120 insertions(+), 23 deletions(-) diff --git a/skipper/cli.py b/skipper/cli.py index 0fe4fe0..4e301a3 100644 --- a/skipper/cli.py +++ b/skipper/cli.py @@ -91,6 +91,7 @@ def cli( ctx.obj["env"] = ctx.default_map.get("env", {}) ctx.obj["containers"] = ctx.default_map.get("containers") ctx.obj["volumes"] = ctx.default_map.get("volumes") + ctx.obj["init"] = ctx.default_map.get("init", False) ctx.obj["workdir"] = ctx.default_map.get("workdir") ctx.obj["workspace"] = ctx.default_map.get("workspace", None) ctx.obj["container_context"] = ctx.default_map.get("container_context") @@ -269,6 +270,7 @@ def run(ctx, interactive, name, env, publish, cache, command): net=ctx.obj["build_container_net"], publish=publish, volumes=ctx.obj.get("volumes"), + init=ctx.obj.get("init", False), workdir=ctx.obj.get("workdir"), use_cache=cache, workspace=ctx.obj.get("workspace"), @@ -310,6 +312,7 @@ def make(ctx, interactive, name, env, makefile, cache, publish, make_params): net=ctx.obj["build_container_net"], publish=publish, volumes=ctx.obj.get("volumes"), + init=ctx.obj.get("init", False), workdir=ctx.obj.get("workdir"), use_cache=cache, workspace=ctx.obj.get("workspace"), @@ -347,6 +350,7 @@ def shell(ctx, env, name, cache, publish): net=ctx.obj["build_container_net"], publish=publish, volumes=ctx.obj.get("volumes"), + init=ctx.obj.get("init", False), workdir=ctx.obj.get("workdir"), use_cache=cache, workspace=ctx.obj.get("workspace"), diff --git a/skipper/runner.py b/skipper/runner.py index 2681eb9..70df65d 100644 --- a/skipper/runner.py +++ b/skipper/runner.py @@ -21,14 +21,14 @@ def get_default_net(): # pylint: disable=too-many-arguments # pylint: disable=too-many-positional-arguments def run(command, fqdn_image=None, environment=None, interactive=False, name=None, net=None, publish=(), volumes=None, - workdir=None, use_cache=False, workspace=None, env_file=(), stdout_to_stderr=False): + workdir=None, use_cache=False, workspace=None, env_file=(), stdout_to_stderr=False, init=False): if not net: net = get_default_net() if fqdn_image is not None: return _run_nested(fqdn_image, environment, command, interactive, name, net, publish, volumes, - workdir, use_cache, workspace, env_file) + workdir, use_cache, workspace, env_file, init) return _run(command, stdout_to_stderr=stdout_to_stderr) @@ -50,7 +50,7 @@ def _run(cmd_args, stdout_to_stderr=False): # pylint: disable=too-many-branches # pylint: disable=too-many-arguments # pylint: disable=too-many-positional-arguments -def _run_nested(fqdn_image, environment, command, interactive, name, net, publish, volumes, workdir, use_cache, workspace, env_file): +def _run_nested(fqdn_image, environment, command, interactive, name, net, publish, volumes, workdir, use_cache, workspace, env_file, init=False): cwd = os.getcwd() if workspace is None: workspace = os.path.dirname(cwd) @@ -70,12 +70,12 @@ def _run_nested(fqdn_image, environment, command, interactive, name, net, publis else: cmd += ['--rm'] - # Run a real init (tini on docker, catatonit on podman) as PID-1 inside the - # build container so SIGTERM from `docker stop` (e.g. Jenkins pipeline - # cancellation) propagates to user commands and orphaned children are - # reaped. Without this, bash inside skipper-entrypoint.sh swallows SIGTERM - # and long-running children survive as host-visible zombies. - cmd += ['--init'] + if init: + # Opt-in: run a real init (tini on docker, catatonit on podman) as + # PID-1 inside the build container so SIGTERM from `docker stop` + # propagates to user commands and orphaned children are reaped. + # Off by default to preserve existing behaviour for all consumers. + cmd += ['--init'] for cmd_limit in utils.SKIPPER_ULIMIT: cmd += cmd_limit diff --git a/tests/test_cli.py b/tests/test_cli.py index 85bf0fa..151f0be 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -113,6 +113,16 @@ ], } +SKIPPER_CONF_WITH_INIT = { + "registry": REGISTRY, + "build_container_image": SKIPPER_CONF_BUILD_CONTAINER_IMAGE, + "build_container_tag": SKIPPER_CONF_BUILD_CONTAINER_TAG, + "make": { + "makefile": SKIPPER_CONF_MAKEFILE, + }, + "init": True, +} + SKIPPER_CONF_WITH_WORKDIR = { "registry": REGISTRY, "build_container_image": SKIPPER_CONF_BUILD_CONTAINER_IMAGE, @@ -335,6 +345,7 @@ def test_make_without_build_container_tag_with_context(self, skipper_runner_run_ workdir=None, use_cache=False, workspace=None, + init=False, env_file=(), ), ] @@ -1107,6 +1118,7 @@ def test_run_with_existing_local_build_container(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1139,6 +1151,7 @@ def test_run_with_existing_remote_build_container(self, skipper_runner_run_mock, workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1177,6 +1190,7 @@ def test_run_with_defaults_from_config_file(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1247,6 +1261,7 @@ def test_run_with_env_overriding_config_file(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1275,6 +1290,7 @@ def test_run_with_env_list(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1303,6 +1319,7 @@ def test_run_with_env_list_get_from_env(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1338,6 +1355,7 @@ def test_run_with_env(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1361,6 +1379,7 @@ def test_run_interactive_from_environment(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) del os.environ["SKIPPER_INTERACTIVE"] @@ -1384,6 +1403,7 @@ def test_run_non_interactive_from_environment(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1406,6 +1426,7 @@ def test_run_non_interactive(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1444,6 +1465,7 @@ def test_run_without_build_container_tag(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ), ] @@ -1472,6 +1494,7 @@ def test_run_without_build_container_tag_cached(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=True, + init=False, env_file=(), ), ] @@ -1498,6 +1521,7 @@ def test_run_with_non_default_net(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1528,6 +1552,7 @@ def test_run_with_publish_single_port(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1558,6 +1583,7 @@ def test_run_with_publish_multiple_ports(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1588,6 +1614,7 @@ def test_run_with_publish_port_range(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1680,6 +1707,32 @@ def test_run_with_defaults_from_config_file_including_volumes(self, skipper_runn workspace=None, workdir=None, use_cache=False, + init=False, + env_file=(), + ) + + @mock.patch("os.path.exists", mock.MagicMock(autospec=True, return_value=True)) + @mock.patch("skipper.config.load_defaults", mock.MagicMock(autospec=True, return_value=SKIPPER_CONF_WITH_INIT)) + @mock.patch("subprocess.check_output", mock.MagicMock(autospec=True, return_value="1234567\n")) + @mock.patch("skipper.runner.run", autospec=True) + def test_run_with_defaults_from_config_file_including_init(self, skipper_runner_run_mock): + command = ["ls", "-l"] + run_params = command + self._invoke_cli(defaults=config.load_defaults(), subcmd="run", subcmd_params=run_params) + expected_fqdn_image = "skipper-conf-build-container-image:skipper-conf-build-container-tag" + skipper_runner_run_mock.assert_called_once_with( + command, + fqdn_image=expected_fqdn_image, + environment=[], + interactive=False, + name=None, + net=None, + publish=(), + volumes=None, + workspace=None, + workdir=None, + use_cache=False, + init=True, env_file=(), ) @@ -1704,6 +1757,7 @@ def test_run_with_defaults_from_config_file_including_workdir(self, skipper_runn workdir="test-workdir", workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1728,6 +1782,7 @@ def test_run_with_defaults_from_config_file_including_workspace(self, skipper_ru workdir=None, workspace="/test/workspace", use_cache=False, + init=False, env_file=(), ) @@ -1753,6 +1808,7 @@ def test_run_with_config_including_git_revision_with_uncommitted_changes(self, s workdir=None, use_cache=False, workspace=None, + init=False, env_file=(), ) @@ -1778,6 +1834,7 @@ def test_run_with_config_including_git_revision_without_uncommitted_changes(self workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1802,6 +1859,7 @@ def test_make(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1826,6 +1884,7 @@ def test_make_with_default_params(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1850,6 +1909,7 @@ def test_make_with_additional_make_params(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1876,6 +1936,7 @@ def test_make_with_defaults_from_config_file(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) @@ -1915,6 +1976,7 @@ def test_make_without_build_container_tag(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ), ] @@ -1940,6 +2002,7 @@ def test_shell(self, skipper_runner_run_mock): workdir=None, workspace=None, use_cache=False, + init=False, env_file=(), ) diff --git a/tests/test_runner.py b/tests/test_runner.py index 7e172ef..4536c16 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -67,6 +67,50 @@ def test_run_complex_command_not_nested(self, popen_mock): runner.run(command) popen_mock.assert_called_once_with([self.runtime] + command) + @mock.patch("os.path.exists", mock.MagicMock(autospec=True, return_value=True)) + @mock.patch("getpass.getuser", mock.MagicMock(autospec=True, return_value="testuser")) + @mock.patch("os.getcwd", mock.MagicMock(autospec=True, return_value=PROJECT_DIR)) + @mock.patch("os.path.expanduser", mock.MagicMock(autospec=True, return_value=HOME_DIR)) + @mock.patch("os.getuid", autospec=True) + @mock.patch("grp.getgrnam", autospec=True) + @mock.patch("subprocess.Popen", autospec=False) + @mock.patch("subprocess.check_output", autospec=False) + @mock.patch("pkg_resources.resource_filename", autospec=False) + def test_run_nested_with_init( + self, resource_filename_mock, check_output_mock, popen_mock, grp_getgrnam_mock, os_getuid_mock + ): + resource_filename_mock.return_value = "entrypoint.sh" + check_output_mock.side_effect = [self.NET_LS, ""] + popen_mock.return_value.stdout.readline.side_effect = [""] + popen_mock.return_value.poll.return_value = -1 + grp_getgrnam_mock.return_value.gr_gid = 978 + os_getuid_mock.return_value = USER_ID + runner.run(["pwd"], FQDN_IMAGE, init=True) + nested_command = popen_mock.call_args[0][0] + assert "--init" in nested_command, nested_command + + @mock.patch("os.path.exists", mock.MagicMock(autospec=True, return_value=True)) + @mock.patch("getpass.getuser", mock.MagicMock(autospec=True, return_value="testuser")) + @mock.patch("os.getcwd", mock.MagicMock(autospec=True, return_value=PROJECT_DIR)) + @mock.patch("os.path.expanduser", mock.MagicMock(autospec=True, return_value=HOME_DIR)) + @mock.patch("os.getuid", autospec=True) + @mock.patch("grp.getgrnam", autospec=True) + @mock.patch("subprocess.Popen", autospec=False) + @mock.patch("subprocess.check_output", autospec=False) + @mock.patch("pkg_resources.resource_filename", autospec=False) + def test_run_nested_without_init_by_default( + self, resource_filename_mock, check_output_mock, popen_mock, grp_getgrnam_mock, os_getuid_mock + ): + resource_filename_mock.return_value = "entrypoint.sh" + check_output_mock.side_effect = [self.NET_LS, ""] + popen_mock.return_value.stdout.readline.side_effect = [""] + popen_mock.return_value.poll.return_value = -1 + grp_getgrnam_mock.return_value.gr_gid = 978 + os_getuid_mock.return_value = USER_ID + runner.run(["pwd"], FQDN_IMAGE) + nested_command = popen_mock.call_args[0][0] + assert "--init" not in nested_command, nested_command + @mock.patch("os.path.exists", mock.MagicMock(autospec=True, return_value=True)) @mock.patch("getpass.getuser", mock.MagicMock(autospec=True, return_value="testuser")) @mock.patch("os.getcwd", mock.MagicMock(autospec=True, return_value=PROJECT_DIR)) @@ -93,7 +137,6 @@ def test_run_simple_command_nested_network_exist( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -164,7 +207,6 @@ def test_run_simple_command_nested_network_not_exist( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -235,7 +277,6 @@ def test_run_simple_command_nested_with_env( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -310,7 +351,6 @@ def test_run_simple_command_nested_with_env_file( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -383,7 +423,6 @@ def test_run_simple_command_nested_with_multiple_env_files( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -462,7 +501,6 @@ def test_run_simple_command_nested_interactive( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -536,7 +574,6 @@ def test_run_complex_command_nested( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -609,7 +646,6 @@ def test_run_complex_command_nested_with_env( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -696,7 +732,6 @@ def test_run_complex_command_nested_with_special_case_verification( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", diff --git a/tests/test_runner_podman.py b/tests/test_runner_podman.py index b555bb0..e93e0cc 100644 --- a/tests/test_runner_podman.py +++ b/tests/test_runner_podman.py @@ -78,7 +78,6 @@ def test_run_simple_command_nested_network_exist( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -142,7 +141,6 @@ def test_run_simple_command_nested_network_not_exist( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -204,7 +202,6 @@ def test_run_complex_command_nested(self, resource_filename_mock, check_output_m "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -271,7 +268,6 @@ def test_run_non_existent_unauthorized_volume( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged", @@ -335,7 +331,6 @@ def test_run_complex_command_nested_with_env( "-t", "-e", "KEEP_CONTAINERS=True", - "--init", "--ulimit", "nofile=65536:65536", "--privileged",