• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

dg / texy / 21501721037

30 Jan 2026 02:00AM UTC coverage: 91.159% (-1.3%) from 92.426%
21501721037

push

github

dg
wip

2681 of 2941 relevant lines covered (91.16%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.18
/src/Texy/Modules/LongWordsModule.php
1
<?php
2

3
/**
4
 * This file is part of the Texy! (https://texy.nette.org)
5
 * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
6
 */
7

8
declare(strict_types=1);
9

10
namespace Texy\Modules;
11

12
use Texy;
13
use Texy\Syntax;
14
use function array_flip, array_pop, array_splice, count, end, iconv_strlen, implode, ord;
15

16

17
/**
18
 * Breaks long words with soft hyphens for better line wrapping.
19
 */
20
final class LongWordsModule extends Texy\Module
21
{
22
        private const
23
                Dont = 0, // don't hyphenate
24
                Here = 1, // hyphenate here
25
                After = 2; // hyphenate after
26

27
        private const SafeLimit = 1000;
28

29
        public int $wordLimit = 20;
30

31
        /** @var array<string, int>|string[] */
32
        private array $consonants = [
33
                'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z',
34
                'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Z',
35
                "\u{10D}", "\u{10F}", "\u{148}", "\u{159}", "\u{161}", "\u{165}", "\u{17E}", // Czech UTF-8
36
                "\u{10C}", "\u{10E}", "\u{147}", "\u{158}", "\u{160}", "\u{164}", "\u{17D}",
37
        ];
38

39
        /** @var array<string, int>|string[] */
40
        private array $vowels = [
41
                'a', 'e', 'i', 'o', 'u', 'y',
42
                'A', 'E', 'I', 'O', 'U', 'Y',
43
                "\u{E1}", "\u{E9}", "\u{11B}", "\u{ED}", "\u{F3}", "\u{FA}", "\u{16F}", "\u{FD}", // Czech UTF-8
44
                "\u{C1}", "\u{C9}", "\u{11A}", "\u{CD}", "\u{D3}", "\u{DA}", "\u{16E}", "\u{DD}",
45
        ];
46

47
        /** @var array<string, int>|string[] */
48
        private array $before_r = [
49
                'b', 'B', 'c', 'C', 'd', 'D', 'f', 'F', 'g', 'G', 'k', 'K', 'p', 'P', 'r', 'R', 't', 'T', 'v', 'V',
50
                "\u{10D}", "\u{10C}", "\u{10F}", "\u{10E}", "\u{159}", "\u{158}", "\u{165}", "\u{164}", // Czech UTF-8
51
        ];
52

53
        /** @var array<string, int>|string[] */
54
        private array $before_l = [
55
                'b', 'B', 'c', 'C', 'd', 'D', 'f', 'F', 'g', 'G', 'k', 'K', 'l', 'L', 'p', 'P', 't', 'T', 'v', 'V',
56
                "\u{10D}", "\u{10C}", "\u{10F}", "\u{10E}", "\u{165}", "\u{164}", // Czech UTF-8
57
        ];
58

59
        /** @var array<string, int>|string[] */
60
        private array $before_h = ['c', 'C', 's', 'S'];
61

62
        /** @var array<string, int>|string[] */
63
        private array $doubleVowels = ['a', 'A', 'o', 'O'];
64

65

66
        public function __construct(Texy\Texy $texy)
1✔
67
        {
68
                $this->consonants = array_flip($this->consonants);
1✔
69
                $this->vowels = array_flip($this->vowels);
1✔
70
                $this->before_r = array_flip($this->before_r);
1✔
71
                $this->before_l = array_flip($this->before_l);
1✔
72
                $this->before_h = array_flip($this->before_h);
1✔
73
                $this->doubleVowels = array_flip($this->doubleVowels);
1✔
74

75
                $texy->registerPostLine($this->postLine(...), Syntax::Hyphenation);
1✔
76
        }
1✔
77

78

79
        public function postLine(string $text): string
1✔
80
        {
81
                return Texy\Regexp::replace(
1✔
82
                        $text,
1✔
83
                        '~[^ \n\t\x14\x15\x16\x{2013}\x{2014}\x{ad}-]{' . $this->wordLimit . ',}~',
1✔
84
                        $this->pattern(...),
1✔
85
                );
86
        }
87

88

89
        /**
90
         * Parses long words.
91
         * @param  array<?string>  $matches
92
         */
93
        private function pattern(array $matches): string
1✔
94
        {
95
                [$mWord] = $matches;
1✔
96
                // [0] => lllloooonnnnggggwwwoorrdddd
97

98
                if (iconv_strlen($mWord, 'UTF-8') > self::SafeLimit) {
1✔
99
                        return $mWord;
1✔
100
                }
101

102
                $chars = Texy\Regexp::matchAll(
1✔
103
                        $mWord,
1✔
104
                        '~[' . Texy\Patterns::MARK . ']+|.~',
1✔
105
                );
106

107
                $chars = array_column($chars, 0);
1✔
108
                if (count($chars) < $this->wordLimit) {
1✔
109
                        return $mWord;
1✔
110
                }
111

112
                $s = [''];
1✔
113
                $trans = [-1];
1✔
114
                foreach ($chars as $key => $char) {
1✔
115
                        if (ord($char[0]) < 32) {
1✔
116
                                continue;
1✔
117
                        }
118

119
                        $s[] = $char;
1✔
120
                        $trans[] = $key;
1✔
121
                }
122

123
                $s[] = '';
1✔
124
                $len = count($s) - 2;
1✔
125

126
                $positions = [];
1✔
127
                $a = 0;
1✔
128
                $last = 1;
1✔
129

130
                while (++$a < $len) {
1✔
131
                        if ($s[$a] === "\u{A0}") {
1✔
132
                                $a++;
×
133
                                continue;  // here and after never
×
134
                        }
135

136
                        $hyphen = $this->getHyphen($s[$a], $s[$a - 1], $s[$a + 1]);
1✔
137

138
                        if ($hyphen === self::Dont && ($a - $last > $this->wordLimit * 0.6)) {
1✔
139
                                $positions[] = $last = $a - 1; // Hyphenate here
1✔
140
                        }
141

142
                        if ($hyphen === self::Here) {
1✔
143
                                $positions[] = $last = $a - 1; // Hyphenate here
1✔
144
                        }
145

146
                        if ($hyphen === self::After) {
1✔
147
                                $positions[] = $last = $a;
1✔
148
                                $a++; // Hyphenate after
1✔
149
                        }
150
                }
151

152
                $a = end($positions);
1✔
153
                if (($a === $len - 1) && isset($this->consonants[$s[$len]])) {
1✔
154
                        array_pop($positions);
1✔
155
                }
156

157
                $syllables = [];
1✔
158
                $last = 0;
1✔
159
                foreach ($positions as $pos) {
1✔
160
                        if ($pos - $last > $this->wordLimit * 0.6) {
1✔
161
                                $syllables[] = implode('', array_splice($chars, 0, $trans[$pos] - $trans[$last]));
1✔
162
                                $last = $pos;
1✔
163
                        }
164
                }
165

166
                $syllables[] = implode('', $chars);
1✔
167
                return implode("\u{AD}", $syllables);
1✔
168
        }
169

170

171
        private function getHyphen(string $ch, string $prev, string $next): int
1✔
172
        {
173
                if ($ch === '.') {
1✔
174
                        return self::Here;
1✔
175

176
                } elseif (isset($this->consonants[$ch])) { // consonants
1✔
177
                        if (isset($this->vowels[$next])) {
1✔
178
                                return isset($this->vowels[$prev]) ? self::Here : self::Dont;
1✔
179

180
                        } elseif (($ch === 's') && ($prev === 'n') && isset($this->consonants[$next])) {
1✔
181
                                return self::After;
×
182

183
                        } elseif (isset($this->consonants[$next], $this->vowels[$prev])) {
1✔
184
                                if ($next === 'r') {
1✔
185
                                        return isset($this->before_r[$ch]) ? self::Here : self::After;
1✔
186

187
                                } elseif ($next === 'l') {
1✔
188
                                        return isset($this->before_l[$ch]) ? self::Here : self::After;
1✔
189

190
                                } elseif ($next === 'h') { // CH
1✔
191
                                        return isset($this->before_h[$ch])
1✔
192
                                                ? self::Dont
1✔
193
                                                : self::After;
1✔
194
                                }
195

196
                                return self::After;
1✔
197
                        }
198

199
                        return self::Dont;
1✔
200

201
                } elseif (($ch === 'u') && isset($this->doubleVowels[$prev])) {
1✔
202
                        return self::After;
×
203

204
                } elseif (isset($this->vowels[$ch], $this->vowels[$prev])) {
1✔
205
                        return self::Here;
1✔
206
                }
207

208
                return self::Dont; // Do not hyphenate
1✔
209
        }
210
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc