• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

barseghyanartur / faker-file / 4032787010

pending completion
4032787010

push

github

Artur Barseghyan
Bring back MD files

1228 of 1230 relevant lines covered (99.84%)

6.27 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0
/src/faker_file/providers/augment_file_from_dir/augmenters/nlpaug_augmenter.py
1
import nlpaug.augmenter.word as naw
4✔
2

3
from .base import BaseTextAugmenter
4✔
4

5
__author__ = "Artur Barseghyan <artur.barseghyan@gmail.com>"
4✔
6
__copyright__ = "2022-2023 Artur Barseghyan"
4✔
7
__license__ = "MIT"
4✔
8
__all__ = (
4✔
9
    "ContextualWordEmbeddingsAugmenter",
10
    "DEFAULT_ACTION",
11
    "DEFAULT_MODEL_PATH",
12
)
13

14
DEFAULT_MODEL_PATH = "bert-base-multilingual-cased"
4✔
15
DEFAULT_ACTION = "substitute"
4✔
16

17

18
class ContextualWordEmbeddingsAugmenter(BaseTextAugmenter):
4✔
19
    """Text extractor based on `ContextualWordEmbsAug` of `nlpaug`.
20

21
    Usage example:
22

23
        from faker import Faker
24
        from faker_file.providers.augment_file_from_dir import (
25
            AugmentFileFromDirProvider,
26
        )
27
        from faker_file.providers.augment_file_from_dir.augmenters import (
28
            nlpaug_augmenter,
29
        )
30

31
        FAKER = Faker()
32

33
        file = AugmentFileFromDirProvider(FAKER).augment_file_from_dir(
34
            text_augmenter_cls=(
35
                nlpaug_augmenter.ContextualWordEmbeddingsAugmenter
36
            ),
37
            text_augmenter_kwargs={
38
                "model_path": "bert-base-uncased",
39
                "action": "substitute",
40
            }
41
        )
42

43
    Refer to `nlpaug` official documentation and check examples
44
    for `Textual augmenters`:
45

46
        https://nlpaug.readthedocs.io/en/latest/example/example.html
47

48
    Some well working options for `model_path` are:
49

50
        - bert-base-multilingual-cased
51
        - bert-base-multilingual-uncased
52
        - bert-base-cased
53
        - bert-base-uncased
54
        - bert-base-german-cased
55
        - GroNLP/bert-base-dutch-cased
56

57
    Options for `action` are:
58

59
        - insert
60
        - substitute
61
    """
62

63
    model_path: str = DEFAULT_MODEL_PATH
4✔
64
    action: str = DEFAULT_ACTION
4✔
65

66
    def handle_kwargs(
4✔
67
        self: "ContextualWordEmbeddingsAugmenter", **kwargs
68
    ) -> None:
69
        """Handle kwargs."""
70
        if "model_path" in kwargs:
4✔
71
            self.model_path = kwargs["model_path"]
4✔
72
        if "action" in kwargs:
4✔
73
            self.action = kwargs["action"]
4✔
74

75
    def augment(
4✔
76
        self: "ContextualWordEmbeddingsAugmenter",
77
        text: str,
78
    ) -> str:
79
        """Augment text."""
80
        aug = naw.ContextualWordEmbsAug(
4✔
81
            model_path=self.model_path,
82
            action=self.action,
83
        )
84
        return aug.augment(text)[0]
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc