Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/Agent.Sdk/Knob/AgentKnobs.cs
Original file line number Diff line number Diff line change
Expand Up @@ -814,5 +814,19 @@ public class AgentKnobs
new EnvironmentKnobSource("AGENT_CDN_CONNECTIVITY_FAIL_WARNING"),
new PipelineFeatureSource("AgentCDNConnectivityFailWarning"),
new BuiltInDefaultKnobSource("false"));

public static readonly Knob StartContainerInRetryHelper = new Knob(
nameof(StartContainerInRetryHelper),
"If true, the agent will create docker container with the --init option.",
new RuntimeKnobSource("AZP_AGENT_START_CONTAINER_IN_RETRYHELPER"),
new EnvironmentKnobSource("AZP_AGENT_START_CONTAINER_IN_RETRYHELPER"),
new BuiltInDefaultKnobSource("false"));

public static readonly Knob StartContainerInRetryHelperSleepMs = new Knob(
nameof(StartContainerInRetryHelperSleepMs),
"If true, the agent will create docker container with the --init option.",
new RuntimeKnobSource("START_CONTAINER_RETRYHELPER_SLEEP_MS"),
new EnvironmentKnobSource("START_CONTAINER_RETRYHELPER_SLEEP_MS"),
new BuiltInDefaultKnobSource("5000"));
}
}
56 changes: 53 additions & 3 deletions src/Agent.Worker/TaskRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ private async Task RunAsyncInternal()
// Run the task.
int retryCount = this.Task.RetryCountOnTaskFailure;

bool triedOnce = false;

if (retryCount > 0)
{
if (retryCount > RetryCountOnTaskFailureLimit)
Expand All @@ -439,8 +441,56 @@ private async Task RunAsyncInternal()
retryCount = RetryCountOnTaskFailureLimit;
}

bool startContainerInRetryHelper = false;
int startContainerInRetryHelperSleepMs = 5000;

try
{
startContainerInRetryHelper = AgentKnobs.StartContainerInRetryHelper.GetValue(ExecutionContext).AsBoolean();
startContainerInRetryHelperSleepMs = AgentKnobs.StartContainerInRetryHelperSleepMs.GetValue(ExecutionContext).AsInt();
}
catch (Exception ex)
{
ExecutionContext.Warning("exception getting retry helper knobs" + ex);
}

RetryHelper rh = new RetryHelper(ExecutionContext, retryCount);
await rh.RetryStep(async () => await handler.RunAsync(), RetryHelper.ExponentialDelay);
await rh.RetryStep(async () =>
{
if (startContainerInRetryHelper)
{
if (triedOnce)
{
ExecutionContext.Debug(StringUtil.Loc($"triedOnce = {triedOnce}"));
if (stepTarget is ContainerInfo containerTarget)
{
ExecutionContext.Debug(StringUtil.Loc($"triedOnce = {triedOnce}"));

// Check that the target container is still running, if not Skip task execution
IDockerCommandManager dockerManager = HostContext.GetService<IDockerCommandManager>();
bool isContainerRunning = await dockerManager.IsContainerRunning(ExecutionContext, containerTarget.ContainerId);

if (!isContainerRunning)
{
ExecutionContext.Debug(StringUtil.Loc($"triedOnce = {triedOnce}"));

int startExitCode = await dockerManager.DockerStart(ExecutionContext, containerTarget.ContainerId);
{
throw new InvalidOperationException($"Docker start fail with exit code {startExitCode}");
}
}

await System.Threading.Tasks.Task.Delay(startContainerInRetryHelperSleepMs);
}
}
else
{
triedOnce = true;
}
}

await handler.RunAsync();
}, RetryHelper.ExponentialDelay);
}
else
{
Expand All @@ -449,7 +499,7 @@ private async Task RunAsyncInternal()
}
}

private Dictionary<string, string> LoadDefaultInputs(Definition definition)
private Dictionary<string, string> LoadDefaultInputs(Definition definition)
{
Trace.Verbose("Loading default inputs.");
var inputs = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
Expand All @@ -471,7 +521,7 @@ private Dictionary<string, string> LoadDefaultInputs(Definition definition)

return inputs;
}

public async Task VerifyTask(ITaskManager taskManager, Definition definition)
{
// Verify task signatures if a fingerprint is configured for the Agent.
Expand Down
Loading