Learning Rate WarmUp은 많은 논문에서 사용하고 있는 유명한 기법입니다.
WarmUp 방법을 통해 학습률은 시간이 지남에 따라 아래 그림처럼 변화합니다.
구현은 아래 두 가지 코드(scheduler, callback 버전)을 참고하시고, decay_fn 등 하이퍼파라미터는 알맞게 변경해서 사용하면 됩니다.
Scheduler 버전
class LRSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, init_lr, warmup_epoch,
steps_per_epoch,
decay_fn, *,
continue_epoch = 0):
self.init_lr = init_lr
self.decay_fn = decay_fn
self.warmup_epoch = warmup_epoch
self.continue_epoch = continue_epoch
self.steps_per_epoch = steps_per_epoch
self.lr = 1e-4 # remove
def on_epoch_begin(self, epoch):
epoch = tf.cast(epoch, tf.float64)
global_epoch = tf.cast(epoch + 1, tf.float64)
warmup_epoch_float = tf.cast(self.warmup_epoch, tf.float64)
lr = tf.cond(
global_epoch < warmup_epoch_float,
lambda: tf.cast(self.init_lr * (global_epoch / warmup_epoch_float), tf.float64),
lambda: tf.cast(self.decay_fn(epoch - warmup_epoch_float), tf.float64)
)
self.lr = lr
def __call__(self, step):
def compute_epoch(step):
return step // self.steps_per_epoch
epoch = compute_epoch(step)
epoch = epoch + self.continue_epoch
self.on_epoch_begin(epoch)
return self.lr
def get_steps(x_size, batch_size):
if x_size / batch_size == 0:
return x_size // batch_size
else:
return x_size // batch_size + 1
# data_size: train_set 크기
data_size = 100000
BATCH_SIZE = 512
EPOCHS = 100
warmup_epoch = int(EPOCHS * 0.1)
init_lr = 0.1
min_lr = 1e-6
power = 1.
lr_scheduler = tf.keras.optimizers.schedules.PolynomialDecay(
initial_learning_rate = init_lr,
decay_steps = EPOCHS - warmup_epoch,
end_learning_rate = min_lr,
power = power
)
# get_steps: epoch당 전체 step 수 계산
lr_schedule = LRSchedule(init_lr, warmup_epoch,
steps_per_epoch = get_steps(data_size, BATCH_SIZE),
decay_fn = lr_scheduler,
continue_epoch = 0)
# 사용 예시
optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule)
Callback 버전
class LRSchedule(tf.keras.callbacks.Callback):
def __init__(self, init_lr, warmup_epoch, decay_fn):
self.init_lr = init_lr
self.decay_fn = decay_fn
self.warmup_epoch = warmup_epoch
self.lrs = []
def on_epoch_begin(self, epoch, logs = None):
global_epoch = tf.cast(epoch + 1, tf.float64)
warmup_epoch_float = tf.cast(self.warmup_epoch, tf.float64)
lr = tf.cond(
global_epoch < warmup_epoch_float,
lambda: init_lr * (global_epoch / warmup_epoch_float),
lambda: self.decay_fn(global_epoch - warmup_epoch_float),
)
tf.print('learning rate: ', lr)
tf.keras.backend.set_value(self.model.optimizer.lr, lr)
self.lrs.append(lr)
epochs = 1000
warmup_epoch = int(epochs * 0.1)
init_lr = 0.1
min_lr = 1e-6
power = 1.
lr_scheduler = tf.keras.optimizers.schedules.PolynomialDecay(
initial_learning_rate = init_lr,
decay_steps = epochs - warmup_epoch,
end_learning_rate = min_lr,
power = power
)
# lr_schedule = LRSchedule(init_lr = init_lr,
# warmup_epoch = warmup_epoch,
# decay_fn = lr_scheduler)
# for i in range(epochs):
# lr_schedule.on_epoch_begin(i)
# 사용 예시
model.fit(..., callbacks = [LRSchedule(init_lr = init_lr,
warmup_epoch = warmup_epoch,
decay_fn = lr_scheduler)],
initial_epoch = 0)
'# Machine Learning > Keras Implementation' 카테고리의 다른 글
Tensorflow Gradient Accumulation 간단 구현 (1) | 2021.07.18 |
---|---|
Albumentation 사용해서 Augmentation하기 (0) | 2020.08.25 |
Learning Rate: Cosine Annealing (1) (0) | 2020.07.31 |
keras load_model(), 커스텀 객체를 포함한 모델을 로드해보자 (0) | 2020.07.10 |
케라스 layer 시각화하기 (visualization) (0) | 2020.03.27 |