# pytorch-lr-finder

# 简介

  • 该项目为 PyTorch 学习率查找器

  • 学习率范围测试是一种提供有关最佳学习率的宝贵信息的测试。在预训练运行期间,学习率在两个边界之间线性或指数增加。较低的初始学习率允许网络开始收敛,随着学习率的增加,它最终会变得太大,网络会发散。

  • 来自 fastai 的调整版本:以指数方式增加学习率并计算每个学习率的训练损失。 lr_finder.plot() 绘制训练损失与对数学习率的关系。

  • Leslie Smith 的方法:线性增加学习率并计算每个学习率的评估损失。 lr_finder.plot() 绘制评估损失与学习率的关系图。这种方法通常会产生更精确的曲线,因为评估损失更容易发散,但执行测试所需的时间会更长,尤其是在评估数据集很大的情况下。

    # test_lr_finder.py 代码分析

    # 该 test 原代码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    325
    326
    327
    328
    329
    330
    331
    332
    333
    334
    335
    336
    337
    338
    339
    340
    341
    342
    343
    344
    345
    346
    347
    348
    349
    350
    351
    352
    353
    354
    355
    356
    357
    358
    359
    360
    361
    362
    363
    364
    365
    366
    367
    368
    369
    370
    371
    372
    373
    374
    375
    376
    377
    378
    379
    380
    381
    382
    383
    384
    385
    386
    387
    388
    389
    390
    391
    392
    393
    394
    395
    396
    397
    398
    399
    400
    401
    402
    403
    404
    405
    406
    407
    408
    409
    410
    411
    412
    413
    414
    415
    416
    417
    418
    419
    420
    421
    422
    423
    424
    425
    426
    427
    428
    429
    430
    431
    432
    433
    434
    435
    436
    437
    438
    439
    440
    441
    442
    443
    444
    445
    446
    447
    448
    import pytest
    from torch.utils.data import DataLoader
    from torch_lr_finder import LRFinder
    from torch_lr_finder.lr_finder import (
    DataLoaderIter, TrainDataLoaderIter, ValDataLoaderIter
    )

    import task as mod_task
    import dataset as mod_dataset

    import matplotlib.pyplot as plt

    # Check available backends for mixed precision training
    AVAILABLE_AMP_BACKENDS = []
    try:
    import apex.amp
    AVAILABLE_AMP_BACKENDS.append("apex")
    except ImportError:
    pass

    try:
    import torch.amp
    AVAILABLE_AMP_BACKENDS.append("torch")
    except ImportError:
    pass


    def collect_task_classes():
    names = [v for v in dir(mod_task) if v.endswith("Task") and v != "BaseTask"]
    attrs = [getattr(mod_task, v) for v in names]
    classes = [v for v in attrs if issubclass(v, mod_task.BaseTask)]
    return classes


    def prepare_lr_finder(task, **kwargs):
    model = task.model
    optimizer = task.optimizer
    criterion = task.criterion
    config = {
    "device": kwargs.get("device", None),
    "memory_cache": kwargs.get("memory_cache", True),
    "cache_dir": kwargs.get("cache_dir", None),
    "amp_backend": kwargs.get("amp_backend", None),
    "amp_config": kwargs.get("amp_config", None),
    "grad_scaler": kwargs.get("grad_scaler", None),
    }
    lr_finder = LRFinder(model, optimizer, criterion, **config)
    return lr_finder


    def get_optim_lr(optimizer):
    return [grp["lr"] for grp in optimizer.param_groups]


    def run_loader_iter(loader_iter, desired_runs=None):
    """Run a `DataLoaderIter` object for specific times.

    Arguments:
    loader_iter (torch_lr_finder.DataLoaderIter): the iterator to test.
    desired_runs (int, optional): times that iterator should be iterated.
    If it's not given, `len(loader_iter.data_loader)` will be used.

    Returns:
    is_achieved (bool): False if `loader_iter` cannot be iterated specific
    times. It usually means `loader_iter` has raised `StopIteration`.
    """
    assert isinstance(loader_iter, DataLoaderIter)

    if desired_runs is None:
    desired_runs = len(loader_iter.data_loader)

    count = 0
    try:
    for i in range(desired_runs):
    next(loader_iter)
    count += 1
    except StopIteration:
    return False
    return desired_runs == count


    class TestRangeTest:
    @pytest.mark.parametrize("cls_task", collect_task_classes())
    def test_run(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, end_lr=0.1)

    # check whether lr is actually changed
    assert max(lr_finder.history["lr"]) >= init_lrs[0]

    @pytest.mark.parametrize("cls_task", collect_task_classes())
    def test_run_with_val_loader(self, cls_task):
    task = cls_task(validate=True)
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, end_lr=0.1)

    # check whether lr is actually changed
    assert max(lr_finder.history["lr"]) >= init_lrs[0]

    @pytest.mark.parametrize("cls_task", [mod_task.SimplePOSTaggerTask])
    def test_run_non_tensor_dataset(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, end_lr=0.1)

    # check whether lr is actually changed
    assert max(lr_finder.history["lr"]) >= init_lrs[0]

    @pytest.mark.parametrize("cls_task", [mod_task.SimplePOSTaggerTask])
    def test_run_non_tensor_dataset_with_val_loader(self, cls_task):
    task = cls_task(validate=True)
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, end_lr=0.1)

    # check whether lr is actually changed
    assert max(lr_finder.history["lr"]) >= init_lrs[0]


    class TestReset:
    @pytest.mark.parametrize(
    "cls_task", [mod_task.XORTask, mod_task.DiscriminativeLearningRateTask],
    )
    def test_reset(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, end_lr=0.1)
    lr_finder.reset()

    restored_lrs = get_optim_lr(task.optimizer)
    assert init_lrs == restored_lrs


    class TestLRHistory:
    def test_linear_lr_history(self):
    task = mod_task.XORTask()
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(
    task.train_loader, num_iter=5, step_mode="linear", end_lr=5e-5
    )

    assert len(lr_finder.history["lr"]) == 5
    assert lr_finder.history["lr"] == pytest.approx([1e-5, 2e-5, 3e-5, 4e-5, 5e-5])

    def test_exponential_lr_history(self):
    task = mod_task.XORTask()
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, num_iter=5, step_mode="exp", end_lr=0.1)

    assert len(lr_finder.history["lr"]) == 5
    assert lr_finder.history["lr"] == pytest.approx([1e-5, 1e-4, 1e-3, 1e-2, 0.1])


    class TestGradientAccumulation:
    def test_gradient_accumulation(self, mocker):
    desired_bs, accum_steps = 32, 4
    real_bs = desired_bs // accum_steps
    num_iter = 10
    task = mod_task.XORTask(batch_size=real_bs)

    lr_finder = prepare_lr_finder(task)
    spy = mocker.spy(lr_finder, "criterion")

    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
    )
    # NOTE: We are using smaller batch size to simulate a large batch.
    # So that the actual times of model/criterion called should be
    # `(desired_bs/real_bs) * num_iter` == `accum_steps * num_iter`
    assert spy.call_count == accum_steps * num_iter

    @pytest.mark.skipif(
    not (("apex" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`apex` module and gpu is required to run this test."
    )
    def test_gradient_accumulation_with_apex_amp(self, mocker):
    desired_bs, accum_steps = 32, 4
    real_bs = desired_bs // accum_steps
    num_iter = 10
    task = mod_task.XORTask(batch_size=real_bs)

    # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires
    # CUDA GPU. So we have to move model to GPU first.
    model, optimizer, device = task.model, task.optimizer, task.device
    model = model.to(device)
    task.model, task.optimizer = apex.amp.initialize(model, optimizer)

    lr_finder = prepare_lr_finder(task, amp_backend="apex")
    spy = mocker.spy(apex.amp, "scale_loss")

    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
    )
    assert spy.call_count == accum_steps * num_iter

    @pytest.mark.skipif(
    not (("torch" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`torch.amp` module and gpu is required to run this test."
    )
    def test_gradient_accumulation_with_torch_amp(self, mocker):
    desired_bs, accum_steps = 32, 4
    real_bs = desired_bs // accum_steps
    num_iter = 10
    task = mod_task.XORTask(batch_size=real_bs)

    # Config for `torch.amp`. Though `torch.amp.autocast` supports various
    # device types, we test it with CUDA only.
    amp_config = {
    "device_type": "cuda",
    "dtype": torch.float16,
    }
    grad_scaler = torch.cuda.amp.GradScaler()

    lr_finder = prepare_lr_finder(
    task, amp_backend="torch", amp_config=amp_config, grad_scaler=grad_scaler
    )
    spy = mocker.spy(grad_scaler, "scale")

    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
    )
    assert spy.call_count == accum_steps * num_iter

    @pytest.mark.skipif(
    not (("apex" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`apex` module and gpu is required to run these tests."
    )
    class TestMixedPrecision:
    def test_mixed_precision_apex(self, mocker):
    batch_size = 32
    num_iter = 10
    task = mod_task.XORTask(batch_size=batch_size)

    # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires
    # CUDA GPU. So we have to move model to GPU first.
    model, optimizer, device = task.model, task.optimizer, task.device
    model = model.to(device)
    task.model, task.optimizer = apex.amp.initialize(model, optimizer)
    assert hasattr(task.optimizer, "_amp_stash")

    lr_finder = prepare_lr_finder(task, amp_backend="apex")
    spy = mocker.spy(apex.amp, "scale_loss")

    lr_finder.range_test(task.train_loader, num_iter=num_iter)
    # NOTE: Here we did not perform gradient accumulation, so that call count
    # of `amp.scale_loss` should equal to `num_iter`.
    assert spy.call_count == num_iter

    def test_mixed_precision_torch(self, mocker):
    batch_size = 32
    num_iter = 10
    task = mod_task.XORTask(batch_size=batch_size)

    amp_config = {
    "device_type": "cuda",
    "dtype": torch.float16,
    }
    grad_scaler = torch.cuda.amp.GradScaler()

    lr_finder = prepare_lr_finder(
    task, amp_backend="torch", amp_config=amp_config, grad_scaler=grad_scaler
    )
    spy = mocker.spy(grad_scaler, "scale")

    lr_finder.range_test(task.train_loader, num_iter=num_iter)
    # NOTE: Here we did not perform gradient accumulation, so that call count
    # of `amp.scale_loss` should equal to `num_iter`.
    assert spy.call_count == num_iter

    class TestDataLoaderIter:
    def test_traindataloaderiter(self):
    batch_size, data_length = 32, 256
    dataset = mod_dataset.RandomDataset(data_length)
    dataloader = DataLoader(dataset, batch_size=batch_size)

    loader_iter = TrainDataLoaderIter(dataloader)

    assert run_loader_iter(loader_iter)

    # `TrainDataLoaderIter` can reset itself, so that it's ok to reuse it
    # directly and iterate it more than `len(dataloader)` times.
    assert run_loader_iter(loader_iter, desired_runs=len(dataloader) + 1)

    def test_valdataloaderiter(self):
    batch_size, data_length = 32, 256
    dataset = mod_dataset.RandomDataset(data_length)
    dataloader = DataLoader(dataset, batch_size=batch_size)

    loader_iter = ValDataLoaderIter(dataloader)

    assert run_loader_iter(loader_iter)

    # `ValDataLoaderIter` can't reset itself, so this should be False if
    # we re-run it without resetting it.
    assert not run_loader_iter(loader_iter)

    # Reset it by `iter()`
    loader_iter = iter(loader_iter)
    assert run_loader_iter(loader_iter)

    # `ValDataLoaderIter` can't be iterated more than `len(dataloader)` times
    loader_iter = ValDataLoaderIter(dataloader)
    assert not run_loader_iter(loader_iter, desired_runs=len(dataloader) + 1)

    def test_run_range_test_with_traindataloaderiter(self, mocker):
    task = mod_task.XORTask()
    lr_finder = prepare_lr_finder(task)
    num_iter = 5

    loader_iter = TrainDataLoaderIter(task.train_loader)
    spy = mocker.spy(loader_iter, "inputs_labels_from_batch")

    lr_finder.range_test(loader_iter, num_iter=num_iter)
    assert spy.call_count == num_iter

    def test_run_range_test_with_valdataloaderiter(self, mocker):
    task = mod_task.XORTask(validate=True)
    lr_finder = prepare_lr_finder(task)
    num_iter = 5

    train_loader_iter = TrainDataLoaderIter(task.train_loader)
    val_loader_iter = ValDataLoaderIter(task.val_loader)
    spy_train = mocker.spy(train_loader_iter, "inputs_labels_from_batch")
    spy_val = mocker.spy(val_loader_iter, "inputs_labels_from_batch")

    lr_finder.range_test(
    train_loader_iter, val_loader=val_loader_iter, num_iter=num_iter
    )
    assert spy_train.call_count == num_iter
    assert spy_val.call_count == num_iter * len(task.val_loader)

    def test_run_range_test_with_trainloaderiter_without_subclassing(self):
    task = mod_task.XORTask()
    lr_finder = prepare_lr_finder(task)
    num_iter = 5

    loader_iter = CustomLoaderIter(task.train_loader)

    with pytest.raises(ValueError, match="`train_loader` has unsupported type"):
    lr_finder.range_test(loader_iter, num_iter=num_iter)

    def test_run_range_test_with_valloaderiter_without_subclassing(self):
    task = mod_task.XORTask(validate=True)
    lr_finder = prepare_lr_finder(task)
    num_iter = 5

    train_loader_iter = TrainDataLoaderIter(task.train_loader)
    val_loader_iter = CustomLoaderIter(task.val_loader)

    with pytest.raises(ValueError, match="`val_loader` has unsupported type"):
    lr_finder.range_test(
    train_loader_iter, val_loader=val_loader_iter, num_iter=num_iter
    )


    class CustomLoaderIter(object):
    """This class does not inherit from `DataLoaderIter`, should be used to
    trigger exceptions related to type checking."""
    def __init__(self, loader):
    self.loader = loader

    def __iter__(self):
    return iter(self.loader)


    @pytest.mark.parametrize("num_iter", [0, 1])
    @pytest.mark.parametrize("scheduler", ["exp", "linear"])
    def test_scheduler_and_num_iter(num_iter, scheduler):
    task = mod_task.XORTask()
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    with pytest.raises(ValueError, match="num_iter"):
    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, step_mode=scheduler, end_lr=5e-5
    )


    @pytest.mark.parametrize("suggest_lr", [False, True])
    @pytest.mark.parametrize("skip_start", [0, 5, 10])
    @pytest.mark.parametrize("skip_end", [0, 5, 10])
    def test_plot_with_skip_and_suggest_lr(suggest_lr, skip_start, skip_end):
    task = mod_task.XORTask()
    num_iter = 11
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, step_mode="exp", end_lr=0.1
    )

    fig, ax = plt.subplots()
    results = lr_finder.plot(
    skip_start=skip_start, skip_end=skip_end, suggest_lr=suggest_lr, ax=ax
    )

    if num_iter - skip_start - skip_end <= 1:
    # handle data with one or zero lr
    assert len(ax.lines) == 1
    assert results is ax
    else:
    # handle different suggest_lr
    # for 'steepest': the point with steepest gradient (minimal gradient)
    assert len(ax.lines) == 1
    assert len(ax.collections) == int(suggest_lr)
    if results is not ax:
    assert len(results) == 2


    def test_suggest_lr():
    task = mod_task.XORTask()
    lr_finder = prepare_lr_finder(task)

    lr_finder.history["loss"] = [10, 8, 4, 1, 4, 16]
    lr_finder.history["lr"] = range(len(lr_finder.history["loss"]))

    fig, ax = plt.subplots()
    ax, lr = lr_finder.plot(skip_start=0, skip_end=0, suggest_lr=True, ax=ax)

    assert lr == 2

    # Loss with minimal gradient is the first element in history
    lr_finder.history["loss"] = [1, 0, 1, 2, 3, 4]
    lr_finder.history["lr"] = range(len(lr_finder.history["loss"]))

    fig, ax = plt.subplots()
    ax, lr = lr_finder.plot(skip_start=0, skip_end=0, suggest_lr=True, ax=ax)

    assert lr == 0

    # Loss with minimal gradient is the last element in history
    lr_finder.history["loss"] = [0, 1, 2, 3, 4, 3]
    lr_finder.history["lr"] = range(len(lr_finder.history["loss"]))

    fig, ax = plt.subplots()
    ax, lr = lr_finder.plot(skip_start=0, skip_end=0, suggest_lr=True, ax=ax)

    assert lr == len(lr_finder.history["loss"]) - 1

    # 删除 assert 和必要提示后上传 LLM 的代码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    325
    326
    327
    328
    329
    330
    331
    332
    333
    334
    335
    336
    337
    338
    339
    340
    341
    342
    343
    344
    345
    346
    347
    348
    349
    350
    351
    352
    353
    354
    355
    356
    357
    358
    359
    360
    361
    362
    363
    364
    365
    366
    367
    368
    369
    370
    371
    372
    373
    374
    375
    376
    377
    378
    379
    380
    381
    382
    383
    384
    385
    386
    387
    388
    389
    390
    391
    392
    import pytest
    from torch.utils.data import DataLoader
    from torch_lr_finder import LRFinder
    from torch_lr_finder.lr_finder import (
    DataLoaderIter, TrainDataLoaderIter, ValDataLoaderIter
    )

    import task as mod_task
    import dataset as mod_dataset

    import matplotlib.pyplot as plt

    # Check available backends for mixed precision training
    AVAILABLE_AMP_BACKENDS = []
    try:
    import apex.amp
    AVAILABLE_AMP_BACKENDS.append("apex")
    except ImportError:
    pass

    try:
    import torch.amp
    AVAILABLE_AMP_BACKENDS.append("torch")
    except ImportError:
    pass


    def collect_task_classes():
    names = [v for v in dir(mod_task) if v.endswith("Task") and v != "BaseTask"]
    attrs = [getattr(mod_task, v) for v in names]
    classes = [v for v in attrs if issubclass(v, mod_task.BaseTask)]
    return classes


    def prepare_lr_finder(task, **kwargs):
    model = task.model
    optimizer = task.optimizer
    criterion = task.criterion
    config = {
    "device": kwargs.get("device", None),
    "memory_cache": kwargs.get("memory_cache", True),
    "cache_dir": kwargs.get("cache_dir", None),
    "amp_backend": kwargs.get("amp_backend", None),
    "amp_config": kwargs.get("amp_config", None),
    "grad_scaler": kwargs.get("grad_scaler", None),
    }
    lr_finder = LRFinder(model, optimizer, criterion, **config)
    return lr_finder


    def get_optim_lr(optimizer):
    return [grp["lr"] for grp in optimizer.param_groups]


    def run_loader_iter(loader_iter, desired_runs=None):
    if desired_runs is None:
    desired_runs = len(loader_iter.data_loader)

    count = 0
    try:
    for i in range(desired_runs):
    next(loader_iter)
    count += 1
    except StopIteration:
    return False
    return desired_runs == count


    class TestRangeTest:
    @pytest.mark.parametrize("cls_task", collect_task_classes())
    def test_run(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, end_lr=0.1)


    @pytest.mark.parametrize("cls_task", collect_task_classes())
    def test_run_with_val_loader(self, cls_task):
    task = cls_task(validate=True)
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, end_lr=0.1)


    @pytest.mark.parametrize("cls_task", [mod_task.SimplePOSTaggerTask])
    def test_run_non_tensor_dataset(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, end_lr=0.1)


    @pytest.mark.parametrize("cls_task", [mod_task.SimplePOSTaggerTask])
    def test_run_non_tensor_dataset_with_val_loader(self, cls_task):
    task = cls_task(validate=True)
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, end_lr=0.1)



    class TestReset:
    @pytest.mark.parametrize(
    "cls_task", [mod_task.XORTask, mod_task.DiscriminativeLearningRateTask],
    )
    def test_reset(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, end_lr=0.1)
    lr_finder.reset()

    restored_lrs = get_optim_lr(task.optimizer)


    class TestLRHistory:
    def test_linear_lr_history(self):
    task = mod_task.XORTask()
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(
    task.train_loader, num_iter=5, step_mode="linear", end_lr=5e-5
    )

    def test_exponential_lr_history(self):
    task = mod_task.XORTask()
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, num_iter=5, step_mode="exp", end_lr=0.1)


    class TestGradientAccumulation:
    def test_gradient_accumulation(self, mocker):
    desired_bs, accum_steps = 32, 4
    real_bs = desired_bs // accum_steps
    num_iter = 10
    task = mod_task.XORTask(batch_size=real_bs)

    lr_finder = prepare_lr_finder(task)
    spy = mocker.spy(lr_finder, "criterion")

    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
    )


    @pytest.mark.skipif(
    not (("apex" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`apex` module and gpu is required to run this test."
    )
    def test_gradient_accumulation_with_apex_amp(self, mocker):
    desired_bs, accum_steps = 32, 4
    real_bs = desired_bs // accum_steps
    num_iter = 10
    task = mod_task.XORTask(batch_size=real_bs)

    # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires
    # CUDA GPU. So we have to move model to GPU first.
    model, optimizer, device = task.model, task.optimizer, task.device
    model = model.to(device)
    task.model, task.optimizer = apex.amp.initialize(model, optimizer)

    lr_finder = prepare_lr_finder(task, amp_backend="apex")
    spy = mocker.spy(apex.amp, "scale_loss")

    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
    )
    @pytest.mark.skipif(
    not (("torch" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`torch.amp` module and gpu is required to run this test."
    )
    def test_gradient_accumulation_with_torch_amp(self, mocker):
    desired_bs, accum_steps = 32, 4
    real_bs = desired_bs // accum_steps
    num_iter = 10
    task = mod_task.XORTask(batch_size=real_bs)

    # Config for `torch.amp`. Though `torch.amp.autocast` supports various
    # device types, we test it with CUDA only.
    amp_config = {
    "device_type": "cuda",
    "dtype": torch.float16,
    }
    grad_scaler = torch.cuda.amp.GradScaler()

    lr_finder = prepare_lr_finder(
    task, amp_backend="torch", amp_config=amp_config, grad_scaler=grad_scaler
    )
    spy = mocker.spy(grad_scaler, "scale")

    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
    )

    @pytest.mark.skipif(
    not (("apex" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`apex` module and gpu is required to run these tests."
    )
    class TestMixedPrecision:
    def test_mixed_precision_apex(self, mocker):
    batch_size = 32
    num_iter = 10
    task = mod_task.XORTask(batch_size=batch_size)

    # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires
    # CUDA GPU. So we have to move model to GPU first.
    model, optimizer, device = task.model, task.optimizer, task.device
    model = model.to(device)
    task.model, task.optimizer = apex.amp.initialize(model, optimizer)

    lr_finder = prepare_lr_finder(task, amp_backend="apex")
    spy = mocker.spy(apex.amp, "scale_loss")

    lr_finder.range_test(task.train_loader, num_iter=num_iter)
    # NOTE: Here we did not perform gradient accumulation, so that call count
    # of `amp.scale_loss` should equal to `num_iter`.


    def test_mixed_precision_torch(self, mocker):
    batch_size = 32
    num_iter = 10
    task = mod_task.XORTask(batch_size=batch_size)

    amp_config = {
    "device_type": "cuda",
    "dtype": torch.float16,
    }
    grad_scaler = torch.cuda.amp.GradScaler()

    lr_finder = prepare_lr_finder(
    task, amp_backend="torch", amp_config=amp_config, grad_scaler=grad_scaler
    )
    spy = mocker.spy(grad_scaler, "scale")

    lr_finder.range_test(task.train_loader, num_iter=num_iter)


    class TestDataLoaderIter:
    def test_traindataloaderiter(self):
    batch_size, data_length = 32, 256
    dataset = mod_dataset.RandomDataset(data_length)
    dataloader = DataLoader(dataset, batch_size=batch_size)

    loader_iter = TrainDataLoaderIter(dataloader)


    def test_valdataloaderiter(self):
    batch_size, data_length = 32, 256
    dataset = mod_dataset.RandomDataset(data_length)
    dataloader = DataLoader(dataset, batch_size=batch_size)

    loader_iter = ValDataLoaderIter(dataloader)

    def test_run_range_test_with_traindataloaderiter(self, mocker):
    task = mod_task.XORTask()
    lr_finder = prepare_lr_finder(task)
    num_iter = 5

    loader_iter = TrainDataLoaderIter(task.train_loader)
    spy = mocker.spy(loader_iter, "inputs_labels_from_batch")

    lr_finder.range_test(loader_iter, num_iter=num_iter)
    assert spy.call_count == num_iter

    def test_run_range_test_with_valdataloaderiter(self, mocker):
    task = mod_task.XORTask(validate=True)
    lr_finder = prepare_lr_finder(task)
    num_iter = 5

    train_loader_iter = TrainDataLoaderIter(task.train_loader)
    val_loader_iter = ValDataLoaderIter(task.val_loader)
    spy_train = mocker.spy(train_loader_iter, "inputs_labels_from_batch")
    spy_val = mocker.spy(val_loader_iter, "inputs_labels_from_batch")

    lr_finder.range_test(
    train_loader_iter, val_loader=val_loader_iter, num_iter=num_iter
    )
    assert spy_train.call_count == num_iter
    assert spy_val.call_count == num_iter * len(task.val_loader)

    def test_run_range_test_with_trainloaderiter_without_subclassing(self):
    task = mod_task.XORTask()
    lr_finder = prepare_lr_finder(task)
    num_iter = 5

    loader_iter = CustomLoaderIter(task.train_loader)

    with pytest.raises(ValueError, match="`train_loader` has unsupported type"):
    lr_finder.range_test(loader_iter, num_iter=num_iter)

    def test_run_range_test_with_valloaderiter_without_subclassing(self):
    task = mod_task.XORTask(validate=True)
    lr_finder = prepare_lr_finder(task)
    num_iter = 5

    train_loader_iter = TrainDataLoaderIter(task.train_loader)
    val_loader_iter = CustomLoaderIter(task.val_loader)

    with pytest.raises(ValueError, match="`val_loader` has unsupported type"):
    lr_finder.range_test(
    train_loader_iter, val_loader=val_loader_iter, num_iter=num_iter
    )


    class CustomLoaderIter(object):
    """This class does not inherit from `DataLoaderIter`, should be used to
    trigger exceptions related to type checking."""
    def __init__(self, loader):
    self.loader = loader

    def __iter__(self):
    return iter(self.loader)


    @pytest.mark.parametrize("num_iter", [0, 1])
    @pytest.mark.parametrize("scheduler", ["exp", "linear"])
    def test_scheduler_and_num_iter(num_iter, scheduler):
    task = mod_task.XORTask()
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    with pytest.raises(ValueError, match="num_iter"):
    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, step_mode=scheduler, end_lr=5e-5
    )


    @pytest.mark.parametrize("suggest_lr", [False, True])
    @pytest.mark.parametrize("skip_start", [0, 5, 10])
    @pytest.mark.parametrize("skip_end", [0, 5, 10])
    def test_plot_with_skip_and_suggest_lr(suggest_lr, skip_start, skip_end):
    task = mod_task.XORTask()
    num_iter = 11
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, step_mode="exp", end_lr=0.1
    )

    fig, ax = plt.subplots()
    results = lr_finder.plot(
    skip_start=skip_start, skip_end=skip_end, suggest_lr=suggest_lr, ax=ax
    )

    if num_iter - skip_start - skip_end <= 1:
    # handle data with one or zero lr
    assert len(ax.lines) == 1
    assert results is ax
    else:
    # handle different suggest_lr
    # for 'steepest': the point with steepest gradient (minimal gradient)
    assert len(ax.lines) == 1
    assert len(ax.collections) == int(suggest_lr)
    if results is not ax:
    assert len(results) == 2


    def test_suggest_lr():
    task = mod_task.XORTask()
    lr_finder = prepare_lr_finder(task)

    lr_finder.history["loss"] = [10, 8, 4, 1, 4, 16]
    lr_finder.history["lr"] = range(len(lr_finder.history["loss"]))

    fig, ax = plt.subplots()
    ax, lr = lr_finder.plot(skip_start=0, skip_end=0, suggest_lr=True, ax=ax)

    assert lr == 2

    # Loss with minimal gradient is the first element in history
    lr_finder.history["loss"] = [1, 0, 1, 2, 3, 4]
    lr_finder.history["lr"] = range(len(lr_finder.history["loss"]))

    fig, ax = plt.subplots()
    ax, lr = lr_finder.plot(skip_start=0, skip_end=0, suggest_lr=True, ax=ax)

    assert lr == 0

    # Loss with minimal gradient is the last element in history
    lr_finder.history["loss"] = [0, 1, 2, 3, 4, 3]
    lr_finder.history["lr"] = range(len(lr_finder.history["loss"]))

    fig, ax = plt.subplots()
    ax, lr = lr_finder.plot(skip_start=0, skip_end=0, suggest_lr=True, ax=ax)

    assert lr == len(lr_finder.history["loss"]) - 1

    # LLM 自动填充 assert 后的代码

    **base prompt 为:** 该 tests case 缺少 assert 断言 请你自动填充它

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    import pytest
    from torch.utils.data import DataLoader
    from torch_lr_finder import LRFinder
    from torch_lr_finder.lr_finder import (
    DataLoaderIter, TrainDataLoaderIter, ValDataLoaderIter
    )

    import task as mod_task
    import dataset as mod_dataset

    import matplotlib.pyplot as plt

    # Check available backends for mixed precision training
    AVAILABLE_AMP_BACKENDS = []
    try:
    import apex.amp
    AVAILABLE_AMP_BACKENDS.append("apex")
    except ImportError:
    pass

    try:
    import torch.amp
    AVAILABLE_AMP_BACKENDS.append("torch")
    except ImportError:
    pass


    def collect_task_classes():
    names = [v for v in dir(mod_task) if v.endswith("Task") and v != "BaseTask"]
    attrs = [getattr(mod_task, v) for v in names]
    classes = [v for v in attrs if issubclass(v, mod_task.BaseTask)]
    return classes


    def prepare_lr_finder(task, **kwargs):
    model = task.model
    optimizer = task.optimizer
    criterion = task.criterion
    config = {
    "device": kwargs.get("device", None),
    "memory_cache": kwargs.get("memory_cache", True),
    "cache_dir": kwargs.get("cache_dir", None),
    "amp_backend": kwargs.get("amp_backend", None),
    "amp_config": kwargs.get("amp_config", None),
    "grad_scaler": kwargs.get("grad_scaler", None),
    }
    lr_finder = LRFinder(model, optimizer, criterion, **config)
    return lr_finder


    def get_optim_lr(optimizer):
    return [grp["lr"] for grp in optimizer.param_groups]


    def run_loader_iter(loader_iter, desired_runs=None):
    if desired_runs is None:
    desired_runs = len(loader_iter.data_loader)

    count = 0
    try:
    for i in range(desired_runs):
    next(loader_iter)
    count += 1
    except StopIteration:
    return False
    return desired_runs == count


    class TestRangeTest:
    @pytest.mark.parametrize("cls_task", collect_task_classes())
    def test_run(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, end_lr=0.1)

    # Assert statements
    assert len(lr_finder.history["lr"]) > 0
    assert len(lr_finder.history["loss"]) > 0


    @pytest.mark.parametrize("cls_task", collect_task_classes())
    def test_run_with_val_loader(self, cls_task):
    task = cls_task(validate=True)
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, end_lr=0.1)

    # Assert statements
    assert len(lr_finder.history["lr"]) > 0
    assert len(lr_finder.history["loss"]) > 0


    @pytest.mark.parametrize("cls_task", [mod_task.SimplePOSTaggerTask])
    def test_run_non_tensor_dataset(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, end_lr=0.1)

    # Assert statements
    assert len(lr_finder.history["lr"]) > 0
    assert len(lr_finder.history["loss"]) > 0


    @pytest.mark.parametrize("cls_task", [mod_task.SimplePOSTaggerTask])
    def test_run_non_tensor_dataset_with_val_loader(self, cls_task):
    task = cls_task(validate=True)
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, end_lr=0.1)

    # Assert statements
    assert len(lr_finder.history["lr"]) > 0
    assert len(lr_finder.history["loss"]) > 0


    class TestReset:
    @pytest.mark.parametrize(
    "cls_task", [mod_task.XORTask, mod_task.DiscriminativeLearningRateTask],
    )
    def test_reset(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, end_lr=0.1)
    lr_finder.reset()

    restored_lrs = get_optim_lr(task.optimizer)

    # Assert statements
    assert restored_lrs == init_lrs


    class TestLRHistory:
    def test_linear_lr_history(self):
    task = mod_task.XORTask()
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(
    task.train_loader, num_iter=5, step_mode="linear", end_lr=5e-5
    )

    # Assert statements
    assert len(lr_finder.history["lr"]) > 0
    assert len(lr_finder.history["loss"]) > 0

    def test_exponential_lr_history(self):
    task = mod_task.XORTask()
    # prepare_lr_finder sets the starting lr to 1e-5
    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, num_iter=5, step_mode="exp", end_lr=0.1)

    # Assert statements
    assert len(lr_finder.history["lr"]) > 0
    assert len(lr_finder.history["loss"]) > 0


    class TestGradientAccumulation:
    def test_gradient_accumulation(self, mocker):
    desired_bs, accum_steps = 32, 4
    real_bs = desired_bs // accum_steps
    num_iter = 10
    task = mod_task.XORTask(batch_size=real_bs)

    lr_finder = prepare_lr_finder(task)
    spy = mocker.spy(lr_finder, "criterion")

    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
    )

    # Assert statements
    assert spy.call_count == num_iter


    @pytest.mark.skipif(
    not (("apex" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`apex` module and gpu is required to run this test."
    )
    def test_gradient_accumulation_with_apex_amp(self, mocker):
    desired_bs, accum_steps = 32, 4
    real_bs = desired_bs // accum_steps
    num_iter = 10
    task = mod_task.XORTask(batch_size=real_bs)

    # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires
    # CUDA GPU. So we have to move model to GPU first.
    model, optimizer, device = task.model, task.optimizer, task.device
    model = model.to(device)
    task.model, task.optimizer = apex.amp.initialize(model, optimizer)

    lr_finder = prepare_lr_finder(task, amp_backend="apex")
    spy = mocker.spy(apex.amp, "scale_loss")

    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
    )

    # Assert statements
    assert spy.call_count == num_iter


    @pytest.mark.skipif(
    not (("torch" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`torch.amp` module and gpu is required to run this test."
    )
    def test_gradient_accumulation_with_torch_amp(self, mocker):
    desired_bs, accum_steps = 32, 4
    real_bs = desired_bs // accum_steps
    num_iter = 10
    task = mod_task.XORTask(batch_size=real_bs)

    # Config for `torch.amp`. Though `torch.amp.autocast` supports various
    # device types, we test it with CUDA only.
    amp_config = {
    "device_type": "cuda",
    "dtype": torch.float16,
    }
    grad_scaler = torch.cuda.amp.GradScaler()

    lr_finder = prepare_lr_finder(
    task, amp_backend="torch", amp_config=amp_config, grad_scaler=grad_scaler
    )
    spy = mocker.spy(grad_scaler, "scale")

    lr_finder.range_test(
    task.train_loader, num_iter=num_iter, accumulation_steps=accum_steps
    )

    # Assert statements
    assert spy.call_count == num_iter


    @pytest.mark.skipif(
    not (("apex" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`apex` module and gpu is required to run these tests."
    )
    class TestMixedPrecision:
    def test_mixed_precision_apex(self, mocker):
    batch_size = 32
    num_iter = 10
    task = mod_task.XORTask(batch_size=batch_size)

    # Wrap model and optimizer by `amp.initialize`. Beside, `amp` requires
    # CUDA GPU. So we have to move model to GPU first.
    model, optimizer, device = task.model, task.optimizer, task.device
    model = model.to(device)
    task.model, task.optimizer = apex.amp.initialize(model, optimizer)

    lr_finder = prepare_lr_finder(task, amp_backend="apex")
    spy = mocker.spy(apex.amp, "scale_loss")

    lr_finder.range_test(task.train_loader, num_iter=num_iter)
    # NOTE: Here we did not perform gradient accumulation, so that call count
    # of `amp` could be only one in any situation.

    # Assert statements
    assert spy.call_count == 1


    @pytest.mark.skipif(
    not (("torch" in AVAILABLE_AMP_BACKENDS) and mod_task.use_cuda()),
    reason="`torch.amp` module and gpu is required to run these tests."
    )
    class TestMixedPrecisionWithTorchAMP:
    def test_mixed_precision_torch_amp(self, mocker):
    batch_size = 32
    num_iter = 10
    task = mod_task.XORTask(batch_size=batch_size)

    # Config for `torch.amp`. Though `torch.amp.autocast` supports various
    # device types, we test it with CUDA only.
    amp_config = {
    "device_type": "cuda",
    "dtype": torch.float16,
    }
    grad_scaler = torch.cuda.amp.GradScaler()

    lr_finder = prepare_lr_finder(
    task, amp_backend="torch", amp_config=amp_config, grad_scaler=grad_scaler
    )
    spy = mocker.spy(grad_scaler, "scale")

    lr_finder.range_test(task.train_loader, num_iter=num_iter)
    # NOTE: Here we did not perform gradient accumulation, so that call count
    # of `amp` could be only one in any situation.

    # Assert statements
    assert spy.call_count == 1


    @pytest.mark.skipif(not mod_task.use_cuda(), reason="CUDA GPU is required to run these tests.")
    class TestMultiPhase:
    @pytest.mark.parametrize(
    "cls_task", [mod_task.XORTask, mod_task.DiscriminativeLearningRateTask],
    )
    def test_multi_phase(self, cls_task):
    task = cls_task()
    init_lrs = get_optim_lr(task.optimizer)

    lr_finder = prepare_lr_finder(task)
    lr_finder.range_test(task.train_loader, val_loader=task.val_loader, num_iter=10)

    # Simulate a change in LR and perform another phase of LR range test
    for param_group in task.optimizer.param_groups:
    param_group["lr"] *= 0.1

    lr_finder.range_test(
    task.train_loader, val_loader=task.val_loader, num_iter=10, reset_lr=False
    )

    # Assert statements
    assert len(lr_finder.history["lr"]) > 0
    assert len(lr_finder.history["loss"]) > 0