• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 9269100476

28 May 2024 12:15PM UTC coverage: 57.433% (-8.8%) from 66.229%
9269100476

push

github

web-flow
Merge b5fac49d3 into b2cc0841f

1433 of 2405 new or added lines in 107 files covered. (59.58%)

1576 existing lines in 69 files now uncovered.

9295 of 16184 relevant lines covered (57.43%)

4034.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.39
/languages/unicode.lua
1
local icu = require("justenoughicu")
33✔
2

3
local chardata = require("char-def")
33✔
4

5
SILE.settings:declare({
33✔
6
  parameter = "languages.fixedNbsp",
7
  type = "boolean",
8
  default = false,
9
  help = "Whether to treat U+00A0 (NO-BREAK SPACE) as a fixed-width space"
×
10
})
11

12
SILE.nodeMakers.base = pl.class({
66✔
13

14
    _init = function (self, options)
15
      self.contents = {}
926✔
16
      self.options = options
926✔
17
      self.token = ""
926✔
18
      self.lastnode = false
926✔
19
      self.lasttype = false
926✔
20
    end,
21

22
    makeToken = function (self)
23
      if #self.contents > 0 then
2,107✔
24
        coroutine.yield(SILE.shaper:formNnode(self.contents, self.token, self.options))
3,136✔
25
        SU.debug("tokenizer", "Token:", self.token)
1,568✔
26
        self.contents = {}
1,568✔
27
        self.token = ""
1,568✔
28
        self.lastnode = "nnode"
1,568✔
29
      end
30
    end,
31

32
    addToken = function (self, char, item)
33
      self.token = self.token .. char
4,866✔
34
      table.insert(self.contents, item)
4,866✔
35
    end,
36

37
    makeGlue = function (self, item)
38
      if SILE.settings:get("typesetter.obeyspaces") or self.lastnode ~= "glue" then
1,098✔
39
        SU.debug("tokenizer", "Space node")
531✔
40
        coroutine.yield(SILE.shaper:makeSpaceNode(self.options, item))
1,062✔
41
      end
42
      self.lastnode = "glue"
549✔
43
      self.lasttype = "sp"
549✔
44
    end,
45

46
    makePenalty = function (self, p)
47
      if self.lastnode ~= "penalty" and self.lastnode ~= "glue" then
479✔
48
        coroutine.yield( SILE.types.node.penalty({ penalty = p or 0 }) )
28✔
49
      end
50
      self.lastnode = "penalty"
479✔
51
    end,
52

53
    makeNonBreakingSpace = function (self)
54
      -- Unicode Line Breaking Algorithm (UAX 14) specifies that U+00A0
55
      -- (NO-BREAK SPACE) is expanded or compressed like a normal space.
56
      coroutine.yield(SILE.types.node.kern(SILE.shaper:measureSpace(self.options)))
21✔
57
      self.lastnode = "glue"
7✔
58
      self.lasttype = "sp"
7✔
59
    end,
60

61
    iterator = function (_, _)
62
      SU.error("Abstract function nodemaker:iterator called", true)
×
63
    end,
64

65
    charData = function (_, char)
66
      local cp = SU.codepoint(char)
19,684✔
67
      if not chardata[cp] then return {} end
19,684✔
68
      return chardata[cp]
19,008✔
69
    end,
70

71
    isActiveNonBreakingSpace = function (self, char)
72
      return self:isNonBreakingSpace(char) and not SILE.settings:get("languages.fixedNbsp")
9,753✔
73
    end,
74

75
    isBreaking = function (self, char)
76
      return self.breakingTypes[self:charData(char).linebreak]
8,446✔
77
    end,
78

79
    isNonBreakingSpace = function (self, char)
80
      local c = self:charData(char)
4,873✔
81
      return c.contextname and c.contextname == "nobreakspace"
4,873✔
82
    end,
83

84
    isPunctuation = function (self, char)
85
      return self.puctuationTypes[self:charData(char).category]
×
86
    end,
87

88
    isSpace = function (self, char)
89
      return self.spaceTypes[self:charData(char).linebreak]
12,680✔
90
    end,
91

92
    isQuote = function (self, char)
93
      return self.quoteTypes[self:charData(char).linebreak]
8,446✔
94
    end,
95

96
    isWord = function (self, char)
97
      return self.wordTypes[self:charData(char).linebreak]
50✔
98
    end,
99

100
  })
33✔
101

102
SILE.nodeMakers.unicode = pl.class(SILE.nodeMakers.base)
66✔
103

104
SILE.nodeMakers.unicode.breakingTypes = { ba = true, zw = true }
33✔
105
SILE.nodeMakers.unicode.puctuationTypes = { po = true }
33✔
106
SILE.nodeMakers.unicode.quoteTypes = {} -- quote linebreak category is ambiguous depending on the language
33✔
107
SILE.nodeMakers.unicode.spaceTypes = { sp = true }
33✔
108
SILE.nodeMakers.unicode.wordTypes = { cm = true }
33✔
109

110
function SILE.nodeMakers.unicode:dealWith (item)
66✔
111
  local char = item.text
4,223✔
112
  local cp = SU.codepoint(char)
4,223✔
113
  local thistype = chardata[cp] and chardata[cp].linebreak
4,223✔
114
  if self:isSpace(item.text) then
8,446✔
UNCOV
115
    self:makeToken()
×
UNCOV
116
    self:makeGlue(item)
×
117
  elseif self:isActiveNonBreakingSpace(item.text) then
8,446✔
118
    self:makeToken()
×
119
    self:makeNonBreakingSpace()
×
120
  elseif self:isBreaking(item.text) then
8,446✔
UNCOV
121
    self:addToken(char, item)
×
UNCOV
122
    self:makeToken()
×
UNCOV
123
    self:makePenalty(0)
×
124
  elseif self:isQuote(item.text) then
8,446✔
125
    self:addToken(char, item)
×
126
    self:makeToken()
×
127
  elseif self.lasttype and (thistype and thistype ~= self.lasttype and not self:isWord(thistype)) then
4,248✔
128
    self:addToken(char, item)
50✔
129
  else
130
    self:letterspace()
4,198✔
131
    self:addToken(char, item)
4,198✔
132
  end
133
  self.lasttype = thistype
4,223✔
134
end
135

136
function SILE.nodeMakers.unicode:handleInitialGlue (items)
66✔
137
  local i = 1
925✔
138
  while i <= #items do
958✔
139
    local item = items[i]
951✔
140
    if self:isSpace(item.text) then self:makeGlue(item) else break end
1,935✔
141
    i = i + 1
33✔
142
  end
143
  return i, items
925✔
144
end
145

146
function SILE.nodeMakers.unicode:letterspace ()
66✔
147
  if not SILE.settings:get("document.letterspaceglue") then return end
8,396✔
148
  if self.token then self:makeToken() end
16✔
149
  if self.lastnode and self.lastnode ~= "glue" then
16✔
150
    local w = SILE.settings:get("document.letterspaceglue").width
30✔
151
    SU.debug("tokenizer", "Letter space glue:", w)
15✔
152
    coroutine.yield(SILE.types.node.kern({ width = w }))
30✔
153
    self.lastnode = "glue"
15✔
154
    self.lasttype = "sp"
15✔
155
  end
156
end
157

158
function SILE.nodeMakers.unicode.isICUBreakHere (_, chunks, item)
66✔
159
  return chunks[1] and (item.index >= chunks[1].index)
5,389✔
160
end
161

162
function SILE.nodeMakers.unicode:handleICUBreak (chunks, item)
66✔
163
  -- The ICU library has told us there is a breakpoint at
164
  -- this index. We need to...
165
  local bp = chunks[1]
1,166✔
166
  -- ... remove this breakpoint (and any out of order ones)
167
  -- from the ICU breakpoints array so that chunks[1] is
168
  -- the next index point for comparison against the string...
169
  while chunks[1] and item.index >= chunks[1].index do
2,332✔
170
    table.remove(chunks, 1)
2,332✔
171
  end
172
  -- ...decide which kind of breakpoint we have here and
173
  -- handle it appropriately.
174
  if bp.type == "word" then
1,166✔
175
    self:handleWordBreak(item)
1,374✔
176
  elseif bp.type == "line" then
479✔
177
    self:handleLineBreak(item, bp.subtype)
479✔
178
  end
179
  return chunks
1,166✔
180
end
181

182
function SILE.nodeMakers.unicode:handleWordBreak (item)
66✔
183
  self:makeToken()
687✔
184
  if self:isSpace(item.text) then
1,374✔
185
    -- Spacing word break
186
    self:makeGlue(item)
1,032✔
187
  elseif self:isActiveNonBreakingSpace(item.text) then
342✔
188
    -- Non-breaking space word break
189
    self:makeNonBreakingSpace()
14✔
190
  else
191
     -- a word break which isn't a space
192
    self:addToken(item.text, item)
164✔
193
  end
194
end
195

196
function SILE.nodeMakers.unicode:_handleWordBreakRepeatHyphen (item)
66✔
197
  -- According to some language rules, when a break occurs at an explicit hyphen,
198
  -- the hyphen gets repeated at the beginning of the new line
UNCOV
199
  if item.text == "-" then
×
UNCOV
200
    self:addToken(item.text, item)
×
UNCOV
201
    self:makeToken()
×
UNCOV
202
    if self.lastnode ~= "discretionary" then
×
UNCOV
203
      coroutine.yield(SILE.nodefactory.discretionary({
×
UNCOV
204
        postbreak = SILE.shaper:createNnodes("-", self.options)
×
205
      }))
UNCOV
206
      self.lastnode = "discretionary"
×
207
    end
208
  else
UNCOV
209
    SILE.nodeMakers.unicode.handleWordBreak(self, item)
×
210
  end
211
end
212

213
function SILE.nodeMakers.unicode:handleLineBreak (item, subtype)
66✔
214
  -- Because we are in charge of paragraphing, we
215
  -- will override space-type line breaks, and treat
216
  -- them just as ordinary word spaces.
217
  if self:isSpace(item.text) or self:isActiveNonBreakingSpace(item.text) then
1,437✔
218
    self:handleWordBreak(item)
×
219
    return
×
220
  end
221
  -- But explicit line breaks we will turn into
222
  -- soft and hard breaks.
223
  self:makeToken()
479✔
224
  self:makePenalty(subtype == "soft" and 0 or -1000)
479✔
225
  local char = item.text
479✔
226
  self:addToken(char, item)
479✔
227
  local cp = SU.codepoint(char)
479✔
228
  self.lasttype = chardata[cp] and chardata[cp].linebreak
479✔
229
end
230

231
function SILE.nodeMakers.unicode:_handleLineBreakRepeatHyphen (item, subtype)
66✔
232
  if self.lastnode == "discretionary" then
×
233
    -- Initial word boundary after a discretionary:
234
    -- Bypass it and just deal with the token.
235
    self:dealWith(item)
×
236
  else
237
    SILE.nodeMakers.unicode.handleLineBreak(self, item, subtype)
×
238
  end
239
end
240

241
function SILE.nodeMakers.unicode:iterator (items)
66✔
242
  local fulltext = ""
925✔
243
  for i = 1, #items do
6,347✔
244
    fulltext = fulltext .. items[i].text
5,422✔
245
  end
246
  local chunks = { icu.breakpoints(fulltext, self.options.language) }
925✔
247
  table.remove(chunks, 1)
925✔
248
  return coroutine.wrap(function ()
925✔
249
    local i
250
    i, self.items = self:handleInitialGlue(items)
1,850✔
251
    for j = i, #items do
6,314✔
252
      self.i = j
5,389✔
253
      self.item = self.items[self.i]
5,389✔
254
      if self:isICUBreakHere(chunks, self.item) then
10,778✔
255
        chunks = self:handleICUBreak(chunks, self.item)
2,332✔
256
      else
257
        self:dealWith(self.item)
4,223✔
258
      end
259
    end
260
    self:makeToken()
925✔
261
  end)
262
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc