etils.auto_tx

`get_optimizer_and_scheduler(optimizer, scheduler, steps, learning_rate=1e-05, learning_rate_end=1e-05, gradient_accumulation_steps=1, extra_optimizer_kwargs=None, weight_decay=0.02, warmup_steps=0)`

The get_optimizer_and_scheduler function is a helper function that returns an optimizer and scheduler based on the parameters passed to it.

Parameters:

Name	Type	Description	Default
`optimizer`	`AVAILABLE_OPTIMIZERS`	AVAILABLE_OPTIMIZERS: Choose the optimizer	required
`scheduler`	`AVAILABLE_SCHEDULERS`	AVAILABLE_SCHEDULERS: Determine the learning rate scheduler	required
`steps`	`int`	int: Specify the number of steps in the training process	required
`learning_rate`	`float`	float: Set the learning rate for the optimizer	`1e-05`
`learning_rate_end`	`float`	float: Set the final learning rate	`1e-05`
`gradient_accumulation_steps`	`int`	int: Accumulate the gradients before updating the weights	`1`
`extra_optimizer_kwargs`	`Optional[dict]`	dict \| None: Pass extra arguments to the optimizer	`None`
`weight_decay`	`float`	float: Set the weight decay for adamw optimizer	`0.02`
`warmup_steps`	`int`	int: Specify the number of steps to warm up the learning rate	`0`

Returns:

Type	Description
	A tuple of two objects: (Optimizer and scheduler)

Source code in src/python/easydel/etils/auto_tx.py

def get_optimizer_and_scheduler(
        optimizer: AVAILABLE_OPTIMIZERS,
        scheduler: AVAILABLE_SCHEDULERS,
        steps: int,
        learning_rate: float = 1e-5,
        learning_rate_end: float = 1e-5,
        gradient_accumulation_steps: int = 1,
        extra_optimizer_kwargs: Optional[dict] = None,
        weight_decay: float = 0.02,
        warmup_steps: int = 0
):
    """
    The get_optimizer_and_scheduler function is a helper function that returns an optimizer and scheduler
        based on the parameters passed to it.

    :param optimizer: AVAILABLE_OPTIMIZERS: Choose the optimizer
    :param scheduler: AVAILABLE_SCHEDULERS: Determine the learning rate scheduler
    :param steps: int: Specify the number of steps in the training process
    :param learning_rate: float: Set the learning rate for the optimizer
    :param learning_rate_end: float: Set the final learning rate
    :param gradient_accumulation_steps: int: Accumulate the gradients before updating the weights
    :param extra_optimizer_kwargs: dict | None: Pass extra arguments to the optimizer
    :param weight_decay: float: Set the weight decay for adamw optimizer
    :param warmup_steps: int: Specify the number of steps to warm up the learning rate
    :return: A tuple of two objects: (Optimizer and scheduler)
    """
    if extra_optimizer_kwargs is None:
        extra_optimizer_kwargs = {}
    if optimizer == EasyDeLOptimizers.ADAFACTOR:
        if scheduler == EasyDeLSchedulers.LINEAR:
            tx, sc = fjformer.optimizers.get_adafactor_with_linear_scheduler(
                learning_rate_start=learning_rate,
                learning_rate_end=learning_rate_end,
                gradient_accumulation_steps=gradient_accumulation_steps,
                steps=steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.COSINE:
            tx, sc = fjformer.optimizers.get_adafactor_with_cosine_scheduler(
                learning_rate=learning_rate,
                steps=steps,
                gradient_accumulation_steps=gradient_accumulation_steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.NONE:
            tx, sc = fjformer.optimizers.get_adafactor_with_linear_scheduler(
                learning_rate_start=learning_rate,
                learning_rate_end=learning_rate,
                steps=steps,
                gradient_accumulation_steps=gradient_accumulation_steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.WARM_UP_COSINE:
            tx, sc = fjformer.optimizers.get_adafactor_with_warm_up_cosine_scheduler(
                learning_rate=learning_rate,
                steps=steps,
                weight_decay=weight_decay,
                gradient_accumulation_steps=gradient_accumulation_steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.WARM_UP_LINEAR:
            tx, sc = fjformer.optimizers.get_adafactor_with_warmup_linear_scheduler(
                learning_rate_start=learning_rate,
                steps=steps,
                learning_rate_end=learning_rate_end,
                gradient_accumulation_steps=gradient_accumulation_steps,
                warmup_steps=warmup_steps,
                **extra_optimizer_kwargs

            )

        else:
            raise ValueError(
                "seems like you have choose wrong type or unavailable scheduler"
            )
    elif optimizer == EasyDeLOptimizers.LION:
        if scheduler == EasyDeLSchedulers.LINEAR:
            tx, sc = fjformer.optimizers.get_lion_with_linear_scheduler(
                learning_rate_start=learning_rate,
                learning_rate_end=learning_rate_end,
                steps=steps,
                gradient_accumulation_steps=gradient_accumulation_steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.COSINE:
            tx, sc = fjformer.optimizers.get_lion_with_cosine_scheduler(
                learning_rate=learning_rate,
                gradient_accumulation_steps=gradient_accumulation_steps,
                steps=steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.NONE:
            tx, sc = fjformer.optimizers.get_lion_with_linear_scheduler(
                learning_rate_start=learning_rate,
                learning_rate_end=learning_rate,
                steps=steps,
                gradient_accumulation_steps=gradient_accumulation_steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.WARM_UP_COSINE:
            tx, sc = fjformer.optimizers.get_lion_with_warm_up_cosine_scheduler(
                learning_rate=learning_rate,
                steps=steps,
                gradient_accumulation_steps=gradient_accumulation_steps,
                **extra_optimizer_kwargs
            )

        elif scheduler == EasyDeLSchedulers.WARM_UP_LINEAR:
            tx, sc = fjformer.optimizers.get_lion_with_with_warmup_linear_scheduler(
                learning_rate_start=learning_rate,
                steps=steps,
                learning_rate_end=learning_rate_end,
                gradient_accumulation_steps=gradient_accumulation_steps,
                warmup_steps=warmup_steps,
                **extra_optimizer_kwargs
            )
        else:
            raise ValueError(
                "seems like you have choose wrong type or unavailable scheduler")
    elif optimizer == EasyDeLOptimizers.ADAMW:
        if scheduler == EasyDeLSchedulers.LINEAR:
            tx, sc = fjformer.optimizers.get_adamw_with_linear_scheduler(
                learning_rate_start=learning_rate,
                learning_rate_end=learning_rate_end,
                steps=steps,
                gradient_accumulation_steps=gradient_accumulation_steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.COSINE:
            tx, sc = fjformer.optimizers.get_adamw_with_cosine_scheduler(
                learning_rate=learning_rate,
                gradient_accumulation_steps=gradient_accumulation_steps,
                steps=steps,
                weight_decay=weight_decay,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.NONE:
            tx, sc = fjformer.optimizers.get_adamw_with_linear_scheduler(
                learning_rate_start=learning_rate,
                learning_rate_end=learning_rate,
                gradient_accumulation_steps=gradient_accumulation_steps,
                steps=steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.WARM_UP_COSINE:
            tx, sc = fjformer.optimizers.get_adamw_with_warm_up_cosine_scheduler(
                learning_rate=learning_rate,
                steps=steps,
                weight_decay=weight_decay,
                gradient_accumulation_steps=gradient_accumulation_steps,
                **extra_optimizer_kwargs
            )
        elif scheduler == EasyDeLSchedulers.WARM_UP_LINEAR:
            tx, sc = fjformer.optimizers.get_adamw_with_warmup_linear_scheduler(
                learning_rate_start=learning_rate,
                steps=steps,
                weight_decay=weight_decay,
                learning_rate_end=learning_rate_end,
                gradient_accumulation_steps=gradient_accumulation_steps,
                warmup_steps=warmup_steps,
                **extra_optimizer_kwargs
            )
        else:
            raise ValueError(
                "seems like you have choose wrong type or unavailable scheduler"
            )
    else:
        raise ValueError(
            f"seems like you have choose wrong type or unavailable optimizer {optimizer} and scheduler {scheduler}"
        )
    return tx, sc