Mesos源码分析(12): Mesos-Slave接收到RunTask消息

在前文Mesos源码分析(8): Mesos-Slave的初始化中，Mesos-Slave接收到RunTaskMessage消息，会调用Slave::runTask.

void Slave::runTask(
const UPID& from,
const FrameworkInfo& frameworkInfo,
const FrameworkID& frameworkId_,
const UPID& pid,
TaskInfo task)
{
……
// Create frameworkId alias to use in the rest of the function.
const FrameworkID frameworkId = frameworkInfo.id();
LOG(INFO) << "Got assigned task " << task.task_id()
<< " for framework " << frameworkId;
……
CHECK(state == RECOVERING || state == DISCONNECTED ||
state == RUNNING || state == TERMINATING)
<< state;
// TODO(bmahler): Also ignore if we're DISCONNECTED.
if (state == RECOVERING || state == TERMINATING) {
LOG(WARNING) << "Ignoring task " << task.task_id()
<< " because the slave is " << state;
// TODO(vinod): Consider sending a TASK_LOST here.
// Currently it is tricky because 'statusUpdate()'
// ignores updates for unknown frameworks.
return;
}
Future<bool> unschedule = true;
// If we are about to create a new framework, unschedule the work
// and meta directories from getting gc'ed.
Framework* framework = getFramework(frameworkId);
if (framework == NULL) {
// Unschedule framework work directory.
string path = paths::getFrameworkPath(
flags.work_dir, info.id(), frameworkId);
if (os::exists(path)) {
unschedule = unschedule.then(defer(self(), &Self::unschedule, path));
}
// Unschedule framework meta directory.
path = paths::getFrameworkPath(metaDir, info.id(), frameworkId);
if (os::exists(path)) {
unschedule = unschedule.then(defer(self(), &Self::unschedule, path));
}
Option<UPID> frameworkPid = None();
if (pid != UPID()) {
frameworkPid = pid;
}
framework = new Framework(this, frameworkInfo, frameworkPid);
frameworks[frameworkId] = framework;
if (frameworkInfo.checkpoint()) {
framework->checkpointFramework();
}
// Is this same framework in completedFrameworks? If so, move the completed
// executors to this framework and remove it from that list.
// TODO(brenden): Consider using stout/cache.hpp instead of boost
// circular_buffer.
for (auto it = completedFrameworks.begin(), end = completedFrameworks.end();
it != end;
++it) {
if ((*it)->id() == frameworkId) {
framework->completedExecutors = (*it)->completedExecutors;
completedFrameworks.erase(it);
break;
}
}
}
const ExecutorInfo executorInfo = getExecutorInfo(frameworkInfo, task);
const ExecutorID& executorId = executorInfo.executor_id();
if (HookManager::hooksAvailable()) {
// Set task labels from run task label decorator.
task.mutable_labels()->CopyFrom(HookManager::slaveRunTaskLabelDecorator(
task, executorInfo, frameworkInfo, info));
}
// We add the task to 'pending' to ensure the framework is not
// removed and the framework and top level executor directories
// are not scheduled for deletion before '_runTask()' is called.
CHECK_NOTNULL(framework);
framework->pending[executorId][task.task_id()] = task;
// If we are about to create a new executor, unschedule the top
// level work and meta directories from getting gc'ed.
Executor* executor = framework->getExecutor(executorId);
if (executor == NULL) {
// Unschedule executor work directory.
string path = paths::getExecutorPath(
flags.work_dir, info.id(), frameworkId, executorId);
if (os::exists(path)) {
unschedule = unschedule.then(defer(self(), &Self::unschedule, path));
}
// Unschedule executor meta directory.
path = paths::getExecutorPath(metaDir, info.id(), frameworkId, executorId);
if (os::exists(path)) {
unschedule = unschedule.then(defer(self(), &Self::unschedule, path));
}
}
// Run the task after the unschedules are done.
unschedule.onAny(
defer(self(), &Self::_runTask, lambda::_1, frameworkInfo, task));
}

最终调用Slave::_runTask

void Slave::_runTask(
const Future<bool>& future,
const FrameworkInfo& frameworkInfo,
const TaskInfo& task)
{
const FrameworkID frameworkId = frameworkInfo.id();
LOG(INFO) << "Launching task " << task.task_id()
<< " for framework " << frameworkId;
Framework* framework = getFramework(frameworkId);
const ExecutorInfo executorInfo = getExecutorInfo(frameworkInfo, task);
const ExecutorID& executorId = executorInfo.executor_id();
if (framework->pending.contains(executorId) &&
framework->pending[executorId].contains(task.task_id())) {
framework->pending[executorId].erase(task.task_id());
if (framework->pending[executorId].empty()) {
framework->pending.erase(executorId);
// NOTE: Ideally we would perform the following check here:
//
// if (framework->executors.empty() &&
// framework->pending.empty()) {
// removeFramework(framework);
// }
//
// However, we need 'framework' to stay valid for the rest of
// this function. As such, we perform the check before each of
// the 'return' statements below.
}
} else {
LOG(WARNING) << "Ignoring run task " << task.task_id()
<< " of framework " << frameworkId
<< " because the task has been killed in the meantime";
return;
}
// We don't send a status update here because a terminating
// framework cannot send acknowledgements.
if (framework->state == Framework::TERMINATING) {
LOG(WARNING) << "Ignoring run task " << task.task_id()
<< " of framework " << frameworkId
<< " because the framework is terminating";
// Refer to the comment after 'framework->pending.erase' above
// for why we need this.
if (framework->executors.empty() && framework->pending.empty()) {
removeFramework(framework);
}
return;
}
if (!future.isReady()) {
LOG(ERROR) << "Failed to unschedule directories scheduled for gc: "
<< (future.isFailed() ? future.failure() : "future discarded");
const StatusUpdate update = protobuf::createStatusUpdate(
frameworkId,
info.id(),
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
"Could not launch the task because we failed to unschedule directories"
" scheduled for gc",
TaskStatus::REASON_GC_ERROR);
// TODO(vinod): Ensure that the status update manager reliably
// delivers this update. Currently, we don't guarantee this
// because removal of the framework causes the status update
// manager to stop retrying for its un-acked updates.
statusUpdate(update, UPID());
// Refer to the comment after 'framework->pending.erase' above
// for why we need this.
if (framework->executors.empty() && framework->pending.empty()) {
removeFramework(framework);
}
return;
}
// NOTE: If the task or executor uses resources that are
// checkpointed on the slave (e.g. persistent volumes), we should
// already know about it. If the slave doesn't know about them (e.g.
// CheckpointResourcesMessage was dropped or came out of order),
// we send TASK_LOST status updates here since restarting the task
// may succeed in the event that CheckpointResourcesMessage arrives
// out of order.
Resources checkpointedTaskResources =
Resources(task.resources()).filter(needCheckpointing);
foreach (const Resource& resource, checkpointedTaskResources) {
if (!checkpointedResources.contains(resource)) {
LOG(WARNING) << "Unknown checkpointed resource " << resource
<< " for task " << task.task_id()
<< " of framework " << frameworkId;
const StatusUpdate update = protobuf::createStatusUpdate(
frameworkId,
info.id(),
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
"The checkpointed resources being used by the task are unknown to "
"the slave",
TaskStatus::REASON_RESOURCES_UNKNOWN);
statusUpdate(update, UPID());
// Refer to the comment after 'framework->pending.erase' above
// for why we need this.
if (framework->executors.empty() && framework->pending.empty()) {
removeFramework(framework);
}
return;
}
}
if (task.has_executor()) {
Resources checkpointedExecutorResources =
Resources(task.executor().resources()).filter(needCheckpointing);
foreach (const Resource& resource, checkpointedExecutorResources) {
if (!checkpointedResources.contains(resource)) {
LOG(WARNING) << "Unknown checkpointed resource " << resource
<< " for executor '" << task.executor().executor_id()
<< "' of framework " << frameworkId;
const StatusUpdate update = protobuf::createStatusUpdate(
frameworkId,
info.id(),
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
"The checkpointed resources being used by the executor are unknown "
"to the slave",
TaskStatus::REASON_RESOURCES_UNKNOWN,
task.executor().executor_id());
statusUpdate(update, UPID());
// Refer to the comment after 'framework->pending.erase' above
// for why we need this.
if (framework->executors.empty() && framework->pending.empty()) {
removeFramework(framework);
}
return;
}
}
}
// NOTE: The slave cannot be in 'RECOVERING' because the task would
// have been rejected in 'runTask()' in that case.
CHECK(state == DISCONNECTED || state == RUNNING || state == TERMINATING)
<< state;
if (state == TERMINATING) {
LOG(WARNING) << "Ignoring run task " << task.task_id()
<< " of framework " << frameworkId
<< " because the slave is terminating";
// Refer to the comment after 'framework->pending.erase' above
// for why we need this.
if (framework->executors.empty() && framework->pending.empty()) {
removeFramework(framework);
}
// We don't send a TASK_LOST here because the slave is
// terminating.
return;
}
CHECK(framework->state == Framework::RUNNING) << framework->state;
// Either send the task to an executor or start a new executor
// and queue the task until the executor has started.
Executor* executor = framework->getExecutor(executorId);
if (executor == NULL) {
executor = framework->launchExecutor(executorInfo, task);
}
CHECK_NOTNULL(executor);
switch (executor->state) {
case Executor::TERMINATING:
case Executor::TERMINATED: {
LOG(WARNING) << "Asked to run task '" << task.task_id()
<< "' for framework " << frameworkId
<< " with executor '" << executorId
<< "' which is terminating/terminated";
const StatusUpdate update = protobuf::createStatusUpdate(
frameworkId,
info.id(),
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
UUID::random(),
"Executor terminating/terminated",
TaskStatus::REASON_EXECUTOR_TERMINATED);
statusUpdate(update, UPID());
break;
}
case Executor::REGISTERING:
// Checkpoint the task before we do anything else.
if (executor->checkpoint) {
executor->checkpointTask(task);
}
// Queue task if the executor has not yet registered.
LOG(INFO) << "Queuing task '" << task.task_id()
<< "' for executor " << *executor;
executor->queuedTasks[task.task_id()] = task;
break;
case Executor::RUNNING: {
// Checkpoint the task before we do anything else.
if (executor->checkpoint) {
executor->checkpointTask(task);
}
// Queue task until the containerizer is updated with new
// resource limits (MESOS-998).
LOG(INFO) << "Queuing task '" << task.task_id()
<< "' for executor " << *executor;
executor->queuedTasks[task.task_id()] = task;
// Update the resource limits for the container. Note that the
// resource limits include the currently queued tasks because we
// want the container to have enough resources to hold the
// upcoming tasks.
Resources resources = executor->resources;
// TODO(jieyu): Use foreachvalue instead once LinkedHashmap
// supports it.
foreach (const TaskInfo& task, executor->queuedTasks.values()) {
resources += task.resources();
}
containerizer->update(executor->containerId, resources)
.onAny(defer(self(),
&Self::runTasks,
lambda::_1,
frameworkId,
executorId,
executor->containerId,
list<TaskInfo>({task})));
break;
}
default:
LOG(FATAL) << "Executor " << *executor << " is in unexpected state "
<< executor->state;
break;
}
// We don't perform the checks for 'removeFramework' here since
// we're guaranteed by 'launchExecutor' that 'framework->executors'
// will be non-empty.
CHECK(!framework->executors.empty());
}

在这个函数中，会调用Framework::launchExecutor去运行一个Executor

// Create and launch an executor.
Executor* Framework::launchExecutor(
const ExecutorInfo& executorInfo,
const TaskInfo& taskInfo)
{
// Generate an ID for the executor's container.
// TODO(idownes) This should be done by the containerizer but we
// need the ContainerID to create the executor's directory. Fix
// this when 'launchExecutor()' is handled asynchronously.
ContainerID containerId;
containerId.set_value(UUID::random().toString());
Option<string> user = None();
// Create a directory for the executor.
const
string directory = paths::createExecutorDirectory(
slave->flags.work_dir,
slave->info.id(),
id(),
executorInfo.executor_id(),
containerId,
user);
Executor* executor = new Executor(
slave, id(), executorInfo, containerId, directory, info.checkpoint());
if (executor->checkpoint) {
executor->checkpointExecutor();
}
CHECK(!executors.contains(executorInfo.executor_id()))
<< "Unknown executor " << executorInfo.executor_id();
executors[executorInfo.executor_id()] = executor;
LOG(INFO) << "Launching executor " << executorInfo.executor_id()
<< " of framework " << id()
<< " with resources " << executorInfo.resources()
<< " in work directory '" << directory << "'";
slave->files->attach(executor->directory, executor->directory)
.onAny(defer(slave, &Slave::fileAttached, lambda::_1, executor->directory));
// Tell the containerizer to launch the executor.
// NOTE: We modify the ExecutorInfo to include the task's
// resources when launching the executor so that the containerizer
// has non-zero resources to work with when the executor has
// no resources. This should be revisited after MESOS-600.
ExecutorInfo executorInfo_ = executor->info;
Resources resources = executorInfo_.resources();
resources += taskInfo.resources();
executorInfo_.mutable_resources()->CopyFrom(resources);
// Launch the container.
Future<bool> launch;
if (!executor->isCommandExecutor()) {
// If the executor is _not_ a command executor, this means that
// the task will include the executor to run. The actual task to
// run will be enqueued and subsequently handled by the executor
// when it has registered to the slave.
launch = slave->containerizer->launch(
containerId,
executorInfo_, // Modified to include the task's resources, see above.
executor->directory,
user,
slave->info.id(),
slave->self(),
info.checkpoint());
} else {
// An executor has _not_ been provided by the task and will
// instead define a command and/or container to run. Right now,
// these tasks will require an executor anyway and the slave
// creates a command executor. However, it is up to the
// containerizer how to execute those tasks and the generated
// executor info works as a placeholder.
// TODO(nnielsen): Obsolete the requirement for executors to run
// one-off tasks.
launch = slave->containerizer->launch(
containerId,
taskInfo,
executorInfo_, // Modified to include the task's resources, see above.
executor->directory,
user,
slave->info.id(),
slave->self(),
info.checkpoint());
}
launch.onAny(defer(slave,
&Slave::executorLaunched,
id(),
executor->id,
containerId,
lambda::_1));
// Make sure the executor registers within the given timeout.
delay(slave->flags.executor_registration_timeout,
slave,
&Slave::registerExecutorTimeout,
id(),
executor->id,
containerId);
return executor;
}

会给Task创建一个文件夹里面有Framework和Executor的信息。

最终会调用containerizer->launch。

根据前面Mesos源码分析(7): Mesos-Slave的启动中所说，这里的containerizer是指ComposingContainerizer。

Future<bool> ComposingContainerizer::launch(
const ContainerID& containerId,
const TaskInfo& taskInfo,
const ExecutorInfo& executorInfo,
const
string& directory,
const Option<string>& user,
const SlaveID& slaveId,
const PID<Slave>& slavePid,
bool checkpoint)
{
return dispatch(process,
&ComposingContainerizerProcess::launch,
containerId,
taskInfo,
executorInfo,
directory,
user,
slaveId,
slavePid,
checkpoint);
}

ComposingContainerizer调用ComposingContainerizerProcess::launch

Future<bool> ComposingContainerizerProcess::launch(
const ContainerID& containerId,
const TaskInfo& taskInfo,
const ExecutorInfo& executorInfo,
const
string& directory,
const Option<string>& user,
const SlaveID& slaveId,
const PID<Slave>& slavePid,
bool checkpoint)
{
if (containers_.contains(containerId)) {
return Failure("Container '" + stringify(containerId) +
"' is already launching");
}
// Try each containerizer. If none of them handle the
// TaskInfo/ExecutorInfo then return a Failure.
vector<Containerizer*>::iterator containerizer = containerizers_.begin();
Container* container = new Container();
container->state = LAUNCHING;
container->containerizer = *containerizer;
containers_[containerId] = container;
return (*containerizer)->launch(
containerId,
taskInfo,
executorInfo,
directory,
user,
slaveId,
slavePid,
checkpoint)
.then(defer(self(),
&Self::_launch,
containerId,
taskInfo,
executorInfo,
directory,
user,
slaveId,
slavePid,
checkpoint,
containerizer,
lambda::_1));
}

上面这个函数可以从第一个containerizer开始，调用它的launch函数，然后再调用ComposingContainerizerProcess::_launch函数。

Future<bool> ComposingContainerizerProcess::_launch(
const ContainerID& containerId,
const Option<TaskInfo>& taskInfo,
const ExecutorInfo& executorInfo,
const
string& directory,
const Option<string>& user,
const SlaveID& slaveId,
const PID<Slave>& slavePid,
bool checkpoint,
vector<Containerizer*>::iterator containerizer,
bool launched)
{
// The container struct won't be cleaned up by destroy because
// in destroy we only forward the destroy, and wait until the
// launch returns and clean up here.
CHECK(containers_.contains(containerId));
Container* container = containers_[containerId];
if (container->state == DESTROYED) {
containers_.erase(containerId);
delete container;
return Failure("Container was destroyed while launching");
}
if (launched) {
container->state = LAUNCHED;
return
true;
}
// Try the next containerizer.
++containerizer;
if (containerizer == containerizers_.end()) {
containers_.erase(containerId);
delete container;
return
false;
}
container->containerizer = *containerizer;
Future<bool> f = taskInfo.isSome() ?
(*containerizer)->launch(
containerId,
taskInfo.get(),
executorInfo,
directory,
user,
slaveId,
slavePid,
checkpoint) :
(*containerizer)->launch(
containerId,
executorInfo,
directory,
user,
slaveId,
slavePid,
checkpoint);
return f.then(
defer(self(),
&Self::_launch,
containerId,
taskInfo,
executorInfo,
directory,
user,
slaveId,
slavePid,
checkpoint,
containerizer,
lambda::_1));
}

ComposingContainerizerProcess::_launch函数会调用下一个Containerizer的launch函数，直到最后一个containerizer。

秒客网

Mesos源码分析(12): Mesos-Slave接收到RunTask消息

相关文章