• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 15507594683

07 Jun 2025 11:54AM UTC coverage: 30.951% (-30.4%) from 61.309%
15507594683

push

github

alerque
chore(tooling): Add post-checkout hook to clear makedeps on branch switch

6363 of 20558 relevant lines covered (30.95%)

3445.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.11
/languages/base-hyphenator.lua
1
-- Note: based on Knuth-Liang algorithm, formerly known in the SILE code base as liang-hyphenator
2

3
local module = require("types.module")
48✔
4
local hyphenator = pl.class(module)
48✔
5
hyphenator.type = "hyphenator"
48✔
6

7
function hyphenator:_init (language)
48✔
8
   self._name = language._name
151✔
9
   self.language = language
151✔
10
   self.minWord = 5 -- Smallest word length below which hyphenation is not applied
151✔
11
   self.leftmin = 2 -- Minimum number of characters to the left of the hyphen (TeX default)
151✔
12
   self.rightmin = 2 -- Minimum number of characters to the right of the hyphen (TeX default)
151✔
13
   self.trie = {} -- Trie resulting from the patterns
151✔
14
   self.exceptions = {} -- Hyphenation exceptions
151✔
15
   module._init(self)
151✔
16
   self:loadPatterns()
151✔
17
end
18

19
function hyphenator:loadPatterns ()
48✔
20
   local code = self.language:_getLegacyCode()
151✔
21
   local status, hyphens = pcall(require, ("languages.%s.hyphens"):format(code))
151✔
22
   if not status then
151✔
23
      status, hyphens = pcall(require, ("languages.%s.hyphens-tex"):format(code))
7✔
24
   end
25
   if not status then
151✔
26
      SU.warn("No hyphenation patterns for language " .. code)
2✔
27
   else
28
      for _, pattern in ipairs(hyphens.patterns or {}) do
624,899✔
29
         self:addPattern(pattern)
624,749✔
30
      end
31
      for _, exception in ipairs(hyphens.exceptions or {}) do
2,110✔
32
         self:registerException(exception)
1,960✔
33
      end
34
   end
35
   local hyphenmins = hyphens.hyphenmins
151✔
36
   -- TODO: We ought to have a way to set these according to users' preferences
37
   -- For now, we just set them to the conventional values based on the pattern files, or TeX defaults
38
   -- Yet, if available, we use the typesetting convention.
39
   -- For the record, the generation miniam are the real minima below which the pattern file is not
40
   -- applicable. (So even users' preferences should not go below these values.)
41
   if hyphenmins then
151✔
42
      if hyphenmins.typesetting then
147✔
43
         self.leftmin = hyphenmins.typesetting.left or 2
145✔
44
         self.rightmin = hyphenmins.typesetting.right or 2
145✔
45
         SU.debug("hyphenator", "Setting hyphenation minima for typesetting:", self.leftmin, self.rightmin)
290✔
46
      elseif hyphenmins.generation then
2✔
47
         self.leftmin = hyphenmins.generation.left or 2
2✔
48
         self.rightmin = hyphenmins.generation.right or 2
2✔
49
         SU.debug("hyphenator", "Setting hyphenation minima from generation:", self.leftmin, self.rightmin)
2✔
50
      end
51
   end
52
end
53

54
local _registered_base_commands = false
48✔
55

56
function hyphenator:_registerCommands ()
48✔
57
   if _registered_base_commands then
48✔
58
      return
×
59
   end
60
   _registered_base_commands = true
48✔
61
   self.commands:register("hyphenator:add-exceptions", function (options, content)
144✔
62
      local lang = options.lang or self.settings:get("document.language")
×
63
      local language = SILE.typesetter:_cacheLanguage(lang)
×
64
      for token in SU.gtoke(content[1]) do
×
65
         if token.string then
×
66
            language.hyphenator:registerException(token.string)
×
67
         end
68
      end
69
   end, "Add patterns to the languages hyphenation rules")
48✔
70
end
71

72
function hyphenator:addPattern (pattern)
48✔
73
   local trie = self.trie
624,749✔
74
   local bits = SU.splitUtf8(pattern)
624,749✔
75
   for i = 1, #bits do
3,832,702✔
76
      local char = bits[i]
3,207,953✔
77
      if not char:find("%d") then
3,207,953✔
78
         if not trie[char] then
2,524,640✔
79
            trie[char] = {}
998,111✔
80
         end
81
         trie = trie[char]
2,524,640✔
82
      end
83
   end
84
   trie["_"] = {}
624,749✔
85
   local lastWasDigit = 0
624,749✔
86
   for i = 1, #bits do
3,832,702✔
87
      local char = bits[i]
3,207,953✔
88
      if char:find("%d") then
3,207,953✔
89
         lastWasDigit = 1
683,313✔
90
         table.insert(trie["_"], tonumber(char))
683,313✔
91
      elseif lastWasDigit == 1 then
2,524,640✔
92
         lastWasDigit = 0
604,921✔
93
      else
94
         table.insert(trie["_"], 0)
1,919,719✔
95
      end
96
   end
97
end
98

99
function hyphenator:registerException (exception)
48✔
100
   local text = exception:gsub("-", "")
1,960✔
101
   local bits = SU.splitUtf8(exception)
1,960✔
102
   self.exceptions[text] = {}
1,960✔
103
   local j = 1
1,960✔
104
   for _, bit in ipairs(bits) do
23,660✔
105
      j = j + 1
21,700✔
106
      if bit == "-" then
21,700✔
107
         j = j - 1
3,080✔
108
         self.exceptions[text][j] = 1
3,080✔
109
      else
110
         self.exceptions[text][j] = 0
18,620✔
111
      end
112
   end
113
end
114

115
function hyphenator:_segment (text)
48✔
116
   if luautf8.len(text) < self.minWord then
634✔
117
      return { text }
276✔
118
   end
119
   local lowertext = luautf8.lower(text)
358✔
120
   local points = self.exceptions[lowertext]
358✔
121
   local word = SU.splitUtf8(text)
358✔
122
   if not points then
358✔
123
      points = SU.map(function ()
716✔
124
         return 0
2,509✔
125
      end, word)
716✔
126
      local work = SU.map(luautf8.lower, word)
358✔
127
      table.insert(work, ".")
358✔
128
      table.insert(work, 1, ".")
358✔
129
      table.insert(points, 1, 0)
358✔
130
      for i = 1, #work do
3,583✔
131
         local trie = self.trie
3,225✔
132
         for j = i, #work do
7,572✔
133
            if not trie[work[j]] then
7,023✔
134
               break
2,676✔
135
            end
136
            trie = trie[work[j]]
4,347✔
137
            local p = trie["_"]
4,347✔
138
            if p then
4,347✔
139
               for k = 1, #p do
4,423✔
140
                  if points[i + k - 2] and points[i + k - 2] < p[k] then
2,705✔
141
                     points[i + k - 2] = p[k]
1,751✔
142
                  end
143
               end
144
            end
145
         end
146
      end
147
      -- Still inside the no-exceptions case
148
      for i = 1, self.leftmin do
1,073✔
149
         points[i] = 0
715✔
150
      end
151
      for i = #points - self.rightmin + 1, #points do
1,163✔
152
         points[i] = 0
805✔
153
      end
154
   end
155
   local pieces = { "" }
358✔
156
   for i = 1, #word do
2,867✔
157
      pieces[#pieces] = pieces[#pieces] .. word[i]
2,509✔
158
      if points[1 + i] and 1 == (points[1 + i] % 2) then
2,509✔
159
         table.insert(pieces, "")
480✔
160
      end
161
   end
162
   return pieces
358✔
163
end
164

165
function hyphenator:hyphenateSegments (node, segments, _)
48✔
166
   local hyphen = SILE.shaper:createNnodes(self.settings:get("font.hyphenchar"), node.options)
2,310✔
167
   return SILE.types.node.discretionary({ prebreak = hyphen }), segments
1,540✔
168
end
169

170
function hyphenator:showHyphenationPoints (word, lang)
48✔
171
   lang = lang or self.settings:get("document.language")
×
172
   local language = SILE.typesetter:_cacheLanguage(lang)
×
173
   return SU.concat(language.hyphenator:_segment(word), self.settings:get("font.hyphenchar"))
×
174
end
175

176
function hyphenator:hyphenate (nodelist)
48✔
177
   local newlist = {}
22✔
178
   for _, node in ipairs(nodelist) do
1,390✔
179
      if node.language then
1,368✔
180
         local nodes_own_hyphenator = self.language.typesetter:_cacheLanguage(node.language).hyphenator
1,268✔
181
         local newnodes = nodes_own_hyphenator:hyphenateNode(node)
634✔
182
         for _, n in ipairs(newnodes) do
2,228✔
183
            table.insert(newlist, n)
1,594✔
184
         end
185
      else
186
         table.insert(newlist, node)
734✔
187
      end
188
   end
189
   return newlist
22✔
190
end
191

192
function hyphenator:hyphenateNode (node)
48✔
193
   if not node.language or not node.is_nnode or not node.text then
634✔
194
      return node
×
195
   end
196
   local segments = self:_segment(node.text)
634✔
197
   local hyphen
198
   if #segments > 1 then
634✔
199
      local newnodes = {}
314✔
200
      for j, segment in ipairs(segments) do
1,108✔
201
         if segment == "" then
794✔
202
            SU.dump({ j, segments })
×
203
            SU.error("No hyphenation segment should ever be empty", true)
×
204
         end
205
         hyphen, segments = self:hyphenateSegments(node, segments, j)
1,588✔
206
         for _, newNode in ipairs(SILE.shaper:createNnodes(segments[j], node.options)) do
2,382✔
207
            if newNode.is_nnode then
794✔
208
               newNode.parent = node
794✔
209
               table.insert(newnodes, newNode)
794✔
210
            end
211
         end
212
         if j < #segments then
794✔
213
            hyphen.parent = node
480✔
214
            table.insert(newnodes, hyphen)
480✔
215
         end
216
      end
217
      node.children = newnodes
314✔
218
      node.hyphenated = false
314✔
219
      node.done = false
314✔
220
      return newnodes
314✔
221
   end
222
   return { node }
320✔
223
end
224

225
return hyphenator
48✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc