Coveralls logob
Coveralls logo
  • Home
  • Features
  • Pricing
  • Docs
  • Sign In

jhfjhfj1 / autokeras / 1863

21 Mar 2019 - 20:22 coverage decreased (-0.09%) to 93.566%
1863

Pull #595

travis-ci

9181eb84f9c35729a3bad740fb7f9d93?size=18&default=identiconweb-flow
Initial commit
Pull Request #595: Text Regressor using Google AI's BERT

17 of 24 new or added lines in 1 file covered. (70.83%)

2 existing lines in 2 files now uncovered.

4741 of 5067 relevant lines covered (93.57%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.48
/autokeras/nn/model_trainer.py
1
# coding=utf-8
2
# Original work Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
3
# Modified work Copyright 2019 The AutoKeras team.
4
# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
5
#
6
# Licensed under the Apache License, Version 2.0 (the "License");
7
# you may not use this file except in compliance with the License.
8
# You may obtain a copy of the License at
9
#
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
#
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17

18
import abc
1×
19
import os
1×
20
import sys
1×
21
import time
1×
22
from copy import deepcopy
1×
23
from functools import reduce
1×
24

25
import numpy as np
1×
26
import torch
1×
27
from torch.utils.data import DataLoader, RandomSampler
1×
28
from torchvision import utils as vutils
1×
29
from tqdm import tqdm, trange
1×
30

31
from autokeras.constant import Constant
1×
32
from autokeras.text.pretrained_bert.optimization import BertAdam, warmup_linear
1×
33
from autokeras.utils import get_device
1×
34

35

36
class ModelTrainerBase(abc.ABC):
1×
37
    """ A base class all model trainers will inherit from.
38
    Attributes:
39
        device: A string. Indicating the device to use. 'cuda' or 'cpu'.
40
        train_loader: Training data wrapped in batches in Pytorch Dataloader.
41
        test_loader: Testing data wrapped in batches in Pytorch Dataloader.
42
        loss_function: A function with two parameters (prediction, target).
43
            There is no specific requirement for the types of the parameters,
44
            as long as they are compatible with the model and the data loaders.
45
            The prediction should be the output of the model for a batch.
46
            The target should be a batch of targets packed in the data loaders.
47
        metric: It should be a subclass of class autokeras.metric.Metric.
48
            In the compute(prediction, target) function, prediction and targets are,
49
            all numpy arrays converted from the output of the model and the targets packed in the data loaders.
50
        verbose: Verbosity mode.
51
    """
52

53
    def __init__(self,
1×
54
                 loss_function,
55
                 train_data,
56
                 test_data=None,
57
                 metric=None,
58
                 verbose=False,
59
                 device=None):
60
        if device:
1×
61
            self.device = device
1×
62
        else:
63
            self.device = get_device()
1×
64
        self.metric = metric
1×
65
        self.verbose = verbose
1×
66
        self.loss_function = loss_function
1×
67
        self.train_loader = train_data
1×
68
        self.test_loader = test_data
1×
69
        self._timeout = None
1×
70

71
    @abc.abstractmethod
1×
72
    def train_model(self,
1×
73
                    max_iter_num=None,
74
                    max_no_improvement_num=None,
75
                    timeout=None):
76
        """Train the model.
77
        Args:
78
            timeout: timeout in seconds
79
            max_iter_num: int, maximum numer of iteration
80
            max_no_improvement_num: after max_no_improvement_num,
81
                if the model still makes no improvement, finish training.
82
        """
83
        pass
!
84

85

86
class ModelTrainer(ModelTrainerBase):
1×
87
    """A class that is used to train the model.
88
    This class can train a Pytorch model with the given data loaders.
89
    The metric, loss_function, and model must be compatible with each other.
90
    Please see the details in the Attributes.
91
    Attributes:
92
        temp_model_path: Specify the path where temp model should be stored.
93
        model: An instance of Pytorch Module. The model that will be trained.
94
        early_stop: An instance of class EarlyStop.
95
        optimizer: The optimizer is chosen to use the Pytorch Adam optimizer.
96
        current_epoch: Record the current epoch.
97
    """
98

99
    def __init__(self, model, path, **kwargs):
1×
100
        super().__init__(**kwargs)
1×
101
        self.model = model
1×
102
        if torch.cuda.device_count() > 1:
1×
103
            self.model = torch.nn.DataParallel(self.model)
!
104
        self.model.to(self.device)
1×
105
        self.optimizer = None
1×
106
        self.early_stop = None
1×
107
        self.scheduler = None
1×
108
        self.current_epoch = 0
1×
109
        self.current_metric_value = 0
1×
110
        self.temp_model_path = os.path.join(path, 'temp_model')
1×
111

112
    def train_model(self,
1×
113
                    lr=0.001,
114
                    max_iter_num=None,
115
                    max_no_improvement_num=None,
116
                    timeout=None):
117
        """Train the model.
118
        Train the model with max_iter_num or max_no_improvement_num is met.
119
        Args:
120
            lr: learning rate of the traininig
121
            timeout: timeout in seconds
122
            max_iter_num: An integer. The maximum number of epochs to train the model.
123
                The training will stop when this number is reached.
124
            max_no_improvement_num: An integer. The maximum number of epochs when the loss value doesn't decrease.
125
                The training will stop when this number is reached.
126
        Returns:
127
            A tuple of loss values and metric value.
128
        """
129
        if max_iter_num is None:
1×
130
            max_iter_num = Constant.MAX_ITER_NUM
!
131

132
        if max_no_improvement_num is None:
1×
133
            max_no_improvement_num = Constant.MAX_NO_IMPROVEMENT_NUM
1×
134

135
        self.early_stop = EarlyStop(max_no_improvement_num)
1×
136
        self.early_stop.on_train_begin()
1×
137
        self._timeout = time.time() + timeout if timeout is not None else sys.maxsize
1×
138

139
        test_metric_value_list = []
1×
140
        test_loss_list = []
1×
141
        self.optimizer = torch.optim.SGD(
1×
142
            self.model.parameters(),
143
            lr=lr,
144
            momentum=0.9,
145
            weight_decay=3e-4)
146
        # self.optimizer = torch.optim.Adam(self.model.parameters())
147
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, max_iter_num)
1×
148

149
        for epoch in range(max_iter_num):
1×
150
            self.scheduler.step()
1×
151
            self._train()
1×
152
            test_loss, metric_value = self._test()
1×
153
            self.current_metric_value = metric_value
1×
154
            test_metric_value_list.append(metric_value)
1×
155
            test_loss_list.append(test_loss)
1×
156
            decreasing = self.early_stop.on_epoch_end(test_loss)
1×
157

158
            if self.early_stop.no_improvement_count == 0:
1×
159
                self._save_model()
1×
160

161
            if not decreasing:
1×
162
                if self.verbose:
!
163
                    print('\nNo loss decrease after {} epochs.\n'.format(max_no_improvement_num))
!
164
                self._load_model()
!
165
                break
!
166

167
        last_num = min(max_no_improvement_num, max_iter_num)
1×
168
        return (sum(test_loss_list[-last_num:]) / last_num,
1×
169
                sum(test_metric_value_list[-last_num:]) / last_num)
170

171
    def _train(self):
1×
172
        """Where the actual train proceed."""
173
        self.model.train()
1×
174
        loader = self.train_loader
1×
175
        self.current_epoch += 1
1×
176

177
        if self.verbose:
1×
178
            progress_bar = self.init_progress_bar(len(loader))
1×
179
        else:
180
            progress_bar = None
1×
181

182
        for batch_idx, (inputs, targets) in enumerate(deepcopy(loader)):
1×
183
            if time.time() >= self._timeout:
1×
UNCOV
184
                raise TimeoutError
!
185
            inputs, targets = inputs.to(self.device), targets.to(self.device)
1×
186
            self.optimizer.zero_grad()
1×
187
            outputs = self.model(inputs)
1×
188
            loss = self.loss_function(outputs, targets)
1×
189
            loss.backward()
1×
190
            self.optimizer.step()
1×
191
            if self.verbose:
1×
192
                if batch_idx % 10 == 0:
1×
193
                    progress_bar.update(10)
1×
194
        if self.verbose:
1×
195
            progress_bar.close()
1×
196

197
    def _test(self):
1×
198
        """Function for evaluation."""
199
        self.model.eval()
1×
200
        test_loss = 0
1×
201
        all_targets = []
1×
202
        all_predicted = []
1×
203
        loader = self.test_loader
1×
204

205
        if self.verbose:
1×
206
            progress_bar = self.init_progress_bar(len(loader))
1×
207
        else:
208
            progress_bar = None
1×
209

210
        with torch.no_grad():
1×
211
            for batch_idx, (inputs, targets) in enumerate(deepcopy(loader)):
1×
212
                if time.time() >= self._timeout:
1×
213
                    raise TimeoutError
1×
214
                inputs, targets = inputs.to(self.device), targets.to(self.device)
1×
215
                outputs = self.model(inputs)
1×
216
                # cast tensor to float
217
                test_loss += float(self.loss_function(outputs, targets))
1×
218

219
                all_predicted.append(outputs.cpu().numpy())
1×
220
                all_targets.append(targets.cpu().numpy())
1×
221
                if self.verbose:
1×
222
                    if batch_idx % 10 == 0:
1×
223
                        progress_bar.update(10)
1×
224

225
        if self.verbose:
1×
226
            progress_bar.close()
1×
227

228
        all_predicted = reduce(lambda x, y: np.concatenate((x, y)), all_predicted)
1×
229
        all_targets = reduce(lambda x, y: np.concatenate((x, y)), all_targets)
1×
230
        return test_loss, self.metric.compute(all_predicted, all_targets)
1×
231

232
    def _save_model(self):
1×
233
        torch.save(self.model.state_dict(), self.temp_model_path)
1×
234

235
    def _load_model(self):
1×
236
        self.model.load_state_dict(torch.load(self.temp_model_path))
!
237

238
    def init_progress_bar(self, loader_len):
1×
239
        return tqdm(total=loader_len,
1×
240
                    desc='Epoch-'
241
                         + str(self.current_epoch)
242
                         + ', Current Metric - '
243
                         + str(self.current_metric_value),
244
                    file=sys.stdout,
245
                    leave=False,
246
                    ncols=100,
247
                    position=0,
248
                    unit=' batch')
249

250

251
class GANModelTrainer(ModelTrainerBase):
1×
252
    """A ModelTrainer especially for the GAN.
253
    Attributes:
254
        d_model: A discriminator model.
255
        g_model: A generator model.
256
        out_f: Out file.
257
        out_size: Size of the output image.
258
        optimizer_d: Optimizer for discriminator.
259
        optimizer_g: Optimizer for generator.
260
    """
261

262
    def __init__(self,
1×
263
                 g_model,
264
                 d_model,
265
                 train_data,
266
                 loss_function,
267
                 verbose,
268
                 gen_training_result=None,
269
                 device=None):
270
        """Initialize the GANModelTrainer.
271
        Args:
272
            g_model: The generator model to be trained.
273
            d_model: The discriminator model to be trained.
274
            train_data: the training data.
275
            loss_function: The loss function for both discriminator and generator.
276
            verbose: Whether to output the system output.
277
            gen_training_result: Whether to generate the intermediate result while training.
278
        """
279
        super().__init__(loss_function, train_data, verbose=verbose, device=device)
1×
280
        self.d_model = d_model
1×
281
        self.g_model = g_model
1×
282
        self.d_model.to(self.device)
1×
283
        self.g_model.to(self.device)
1×
284
        self.out_f = None
1×
285
        self.out_size = 0
1×
286
        if gen_training_result is not None:
1×
287
            self.out_f, self.out_size = gen_training_result
1×
288
            self.sample_noise = torch.randn(self.out_size,
1×
289
                                            self.g_model.nz,
290
                                            1, 1, device=self.device)
291
        self.optimizer_d = None
1×
292
        self.optimizer_g = None
1×
293

294
    def train_model(self,
1×
295
                    max_iter_num=None,
296
                    max_no_improvement_num=None,
297
                    timeout=None):
298
        if max_iter_num is None:
1×
299
            max_iter_num = Constant.MAX_ITER_NUM
!
300
        self.optimizer_d = torch.optim.Adam(self.d_model.parameters())
1×
301
        self.optimizer_g = torch.optim.Adam(self.g_model.parameters())
1×
302
        if self.verbose:
1×
303
            progress_bar = tqdm(total=max_iter_num,
1×
304
                                desc='     Model     ',
305
                                file=sys.stdout,
306
                                ncols=75,
307
                                position=1,
308
                                unit=' epoch')
309
        else:
310
            progress_bar = None
!
311
        for epoch in range(max_iter_num):
1×
312
            self._train(epoch)
1×
313
            if self.verbose:
1×
314
                progress_bar.update(1)
1×
315
        if self.verbose:
1×
316
            progress_bar.close()
1×
317

318
    def _train(self, epoch):
1×
319
        """Perform the actual train."""
320
        # put model into train mode
321
        self.d_model.train()
1×
322
        # TODO: why?
323
        cp_loader = deepcopy(self.train_loader)
1×
324
        if self.verbose:
1×
325
            progress_bar = tqdm(total=len(cp_loader),
1×
326
                                desc='Current Epoch',
327
                                file=sys.stdout,
328
                                leave=False,
329
                                ncols=75,
330
                                position=0,
331
                                unit=' Batch')
332
        else:
333
            progress_bar = None
!
334
        real_label = 1
1×
335
        fake_label = 0
1×
336
        for batch_idx, inputs in enumerate(cp_loader):
1×
337
            # Update Discriminator network maximize log(D(x)) + log(1 - D(G(z)))
338
            # train with real
339
            self.optimizer_d.zero_grad()
1×
340
            inputs = inputs.to(self.device)
1×
341
            batch_size = inputs.size(0)
1×
342
            outputs = self.d_model(inputs)
1×
343

344
            label = torch.full((batch_size,), real_label, device=self.device)
1×
345
            loss_d_real = self.loss_function(outputs, label)
1×
346
            loss_d_real.backward()
1×
347

348
            # train with fake
349
            noise = torch.randn((batch_size, self.g_model.nz, 1, 1,), device=self.device)
1×
350
            fake_outputs = self.g_model(noise)
1×
351
            label.fill_(fake_label)
1×
352
            outputs = self.d_model(fake_outputs.detach())
1×
353
            loss_g_fake = self.loss_function(outputs, label)
1×
354
            loss_g_fake.backward()
1×
355
            self.optimizer_d.step()
1×
356
            # (2) Update G network: maximize log(D(G(z)))
357
            self.g_model.zero_grad()
1×
358
            label.fill_(real_label)
1×
359
            outputs = self.d_model(fake_outputs)
1×
360
            loss_g = self.loss_function(outputs, label)
1×
361
            loss_g.backward()
1×
362
            self.optimizer_g.step()
1×
363

364
            if self.verbose:
1×
365
                if batch_idx % 10 == 0:
1×
366
                    progress_bar.update(10)
1×
367
            if self.out_f is not None and batch_idx % 100 == 0:
1×
368
                fake = self.g_model(self.sample_noise)
!
369
                vutils.save_image(
!
370
                    fake.detach(),
371
                    '%s/fake_samples_epoch_%03d.png' % (self.out_f, epoch),
372
                    normalize=True)
373
        if self.verbose:
1×
374
            progress_bar.close()
1×
375

376

377
class BERTTrainer(ModelTrainerBase):
1×
378
    """A ModelTrainer for the Google AI's BERT model. Currently supports only classification task.
379

380
    Attributes:
381
        model: Type of BERT model to be used for the task. E.g:- Uncased, Cased, etc.
382
        output_model_file: File location to save the trained model.
383
        num_labels: Number of output labels for the classification task.
384
    """
385

386
    def __init__(self, train_data, model, output_model_file, num_labels, loss_function=None):
1×
387
        """Initialize the BERTTrainer.
388

389
        Args:
390
            train_data: the training data.
391
            model: Type of BERT model to be used for the task. E.g:- Uncased, Cased, etc.
392
            output_model_file: File location to save the trained model.
393
            num_labels: Number of output labels for the classification task.
394
            loss_function: The loss function for the classifier.
395
        """
396
        super().__init__(loss_function, train_data, verbose=True)
1×
397

398
        self.train_data = train_data
1×
399
        self.model = model
1×
400
        self.output_model_file = output_model_file
1×
401
        self.num_labels = num_labels
1×
402

403
        # Training params
404
        self.global_step = 0
1×
405
        self.gradient_accumulation_steps = 1
1×
406
        self.learning_rate = 5e-5
1×
407
        self.nb_tr_steps = 1
1×
408
        self.num_train_epochs = Constant.BERT_TRAINER_EPOCHS
1×
409
        self.tr_loss = 0
1×
410
        self.train_batch_size = Constant.BERT_TRAINER_BATCH_SIZE
1×
411
        self.warmup_proportion = 0.1
1×
412
        self.train_data_size = self.train_data.__len__()
1×
413
        self.num_train_steps = int(self.train_data_size /
1×
414
                                   self.train_batch_size /
415
                                   self.gradient_accumulation_steps *
416
                                   self.num_train_epochs)
417

418
    def train_model(self,
1×
419
                    max_iter_num=None,
420
                    max_no_improvement_num=None,
421
                    timeout=None):
422
        """Train the model.
423

424
        Train the model with max_iter_num.
425

426
        Args:
427
            timeout: timeout in seconds
428
            max_iter_num: An integer. The maximum number of epochs to train the model.
429
            max_no_improvement_num: An integer. The maximum number of epochs when the loss value doesn't decrease.
430

431
        Returns:
432
            Training loss.
433
        """
434
        if max_iter_num is not None:
1×
435
            self.num_train_epochs = max_iter_num
!
436

437
        self.model.to(self.device)
1×
438

439
        # Prepare optimizer
440
        param_optimizer = list(self.model.named_parameters())
1×
441
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
1×
442
        optimizer_grouped_parameters = [
1×
443
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
444
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
445
        ]
446

447
        # Add bert adam
448
        optimizer = BertAdam(optimizer_grouped_parameters,
1×
449
                             lr=self.learning_rate,
450
                             warmup=self.warmup_proportion,
451
                             t_total=self.num_train_steps)
452

453
        train_sampler = RandomSampler(self.train_data)
1×
454
        train_dataloader = DataLoader(self.train_data, sampler=train_sampler, batch_size=self.train_batch_size)
1×
455

456
        if self.verbose:
1×
457
            print("***** Running training *****")
1×
458
            print("Num examples = %d", self.train_data_size)
1×
459
            print("Batch size = %d", self.train_batch_size)
1×
460
            print("Num steps = %d", self.num_train_steps)
1×
461

462
        self.model.train()
1×
463
        for _ in trange(int(self.num_train_epochs), desc="Epoch"):
1×
464
            tr_loss = self._train(optimizer, train_dataloader)
1×
465

466
        if self.verbose:
1×
467
            print("Training loss = %d", tr_loss)
1×
468

469
        self._save_model()
1×
470
        return tr_loss
1×
471

472
    def _train(self, optimizer, dataloader):
1×
473
        """ Actual training is performed here."""
474
        tr_loss = 0
1×
475
        nb_tr_examples, nb_tr_steps = 0, 0
1×
476
        for step, batch in enumerate(tqdm(dataloader, desc="Iteration")):
1×
477
            batch = tuple(t.to(self.device) for t in batch)
1×
478
            input_ids, input_mask, segment_ids, label_ids = batch
1×
479
            loss = self.model(input_ids, segment_ids, input_mask, label_ids)
1×
480
            if self.gradient_accumulation_steps > 1:
1×
481
                loss = loss / self.gradient_accumulation_steps
!
482

483
            loss.backward()
1×
484

485
            tr_loss += loss.item()
1×
486
            nb_tr_examples += input_ids.size(0)
1×
487
            nb_tr_steps += 1
1×
488
            if (step + 1) % self.gradient_accumulation_steps == 0:
1×
489
                # modify learning rate with special warm up BERT uses
490
                lr_this_step = self.learning_rate * warmup_linear(self.global_step / self.num_train_steps,
1×
491
                                                                  self.warmup_proportion)
492
                for param_group in optimizer.param_groups:
1×
493
                    param_group['lr'] = lr_this_step
1×
494
                optimizer.step()
1×
495
                optimizer.zero_grad()
1×
496
                self.global_step += 1
1×
497

498
        return tr_loss
1×
499

500
    def _save_model(self):
1×
501
        """Save the trained model to disk."""
502
        model_to_save = self.model.module if hasattr(self.model, 'module') else self.model  # Only save the model
1×
503
        torch.save(model_to_save.state_dict(), self.output_model_file)
1×
504

505

506
class EarlyStop:
1×
507
    """A class check for early stop condition.
508
    Attributes:
509
        training_losses: Record all the training loss.
510
        minimum_loss: The minimum loss we achieve so far. Used to compared to determine no improvement condition.
511
        no_improvement_count: Current no improvement count.
512
        _max_no_improvement_num: The maximum number specified.
513
        _done: Whether condition met.
514
        _min_loss_dec: A threshold for loss improvement.
515
    """
516

517
    def __init__(self, max_no_improvement_num=None, min_loss_dec=None):
1×
518
        super().__init__()
1×
519
        self.training_losses = []
1×
520
        self.minimum_loss = None
1×
521
        self.no_improvement_count = 0
1×
522
        self._max_no_improvement_num = max_no_improvement_num if max_no_improvement_num is not None \
1×
523
            else Constant.MAX_NO_IMPROVEMENT_NUM
524
        self._done = False
1×
525
        self._min_loss_dec = min_loss_dec if min_loss_dec is not None else Constant.MIN_LOSS_DEC
1×
526

527
    def on_train_begin(self):
1×
528
        """Initiate the early stop condition.
529
        Call on every time the training iteration begins.
530
        """
531
        self.training_losses = []
1×
532
        self.no_improvement_count = 0
1×
533
        self._done = False
1×
534
        self.minimum_loss = float('inf')
1×
535

536
    def on_epoch_end(self, loss):
1×
537
        """Check the early stop condition.
538
        Call on every time the training iteration end.
539
        Args:
540
            loss: The loss function achieved by the epoch.
541
        Returns:
542
            True if condition met, otherwise False.
543
        """
544
        self.training_losses.append(loss)
1×
545
        if self._done and loss > (self.minimum_loss - self._min_loss_dec):
1×
546
            return False
!
547

548
        if loss > (self.minimum_loss - self._min_loss_dec):
1×
549
            self.no_improvement_count += 1
1×
550
        else:
551
            self.no_improvement_count = 0
1×
552
            self.minimum_loss = loss
1×
553

554
        if self.no_improvement_count > self._max_no_improvement_num:
1×
555
            self._done = True
!
556

557
        return True
1×
Troubleshooting · Open an Issue · Sales · Support · ENTERPRISE · CAREERS · STATUS
BLOG · TWITTER · Legal & Privacy · Supported CI Services · What's a CI service? · Automated Testing

© 2019 Coveralls, LLC