Skip to content

vllm.model_executor.layers.fused_moe.runner.moe_runner

MoERunner

Bases: ABC

Abstract base class for Mixture of Experts (MoE) runners.

This class defines the interface that all MoE runner implementations must follow. MoE runners are responsible for executing the forward pass of MoE layers, handling expert routing, and managing tensor parallel operations.

Source code in vllm/model_executor/layers/fused_moe/runner/moe_runner.py
class MoERunner(ABC):
    """
    Abstract base class for Mixture of Experts (MoE) runners.

    This class defines the interface that all MoE runner implementations must follow.
    MoE runners are responsible for executing the forward pass of MoE layers, handling
    expert routing, and managing tensor parallel operations.
    """

    @abstractmethod
    def forward(
        self,
        hidden_states: torch.Tensor,
        router_logits: torch.Tensor,
    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
        raise NotImplementedError

    @abstractmethod
    def must_reduce_shared_expert_outputs(self) -> bool:
        raise NotImplementedError

    @abstractmethod
    def maybe_all_reduce_tensor_model_parallel(
        self,
        final_hidden_states: torch.Tensor,
    ):
        raise NotImplementedError

forward abstractmethod

forward(
    hidden_states: Tensor, router_logits: Tensor
) -> Tensor | tuple[Tensor, Tensor]
Source code in vllm/model_executor/layers/fused_moe/runner/moe_runner.py
@abstractmethod
def forward(
    self,
    hidden_states: torch.Tensor,
    router_logits: torch.Tensor,
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
    raise NotImplementedError

maybe_all_reduce_tensor_model_parallel abstractmethod

maybe_all_reduce_tensor_model_parallel(
    final_hidden_states: Tensor,
)
Source code in vllm/model_executor/layers/fused_moe/runner/moe_runner.py
@abstractmethod
def maybe_all_reduce_tensor_model_parallel(
    self,
    final_hidden_states: torch.Tensor,
):
    raise NotImplementedError

must_reduce_shared_expert_outputs abstractmethod

must_reduce_shared_expert_outputs() -> bool
Source code in vllm/model_executor/layers/fused_moe/runner/moe_runner.py
@abstractmethod
def must_reduce_shared_expert_outputs(self) -> bool:
    raise NotImplementedError