• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 7054606668

01 Dec 2023 01:43AM UTC coverage: 70.141% (-4.2%) from 74.329%
7054606668

push

github

web-flow
Merge 14837a0c3 into a6c229613

11050 of 15754 relevant lines covered (70.14%)

3938.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.78
/languages/unicode.lua
1
local icu = require("justenoughicu")
90✔
2

3
local chardata = require("char-def")
90✔
4

5
SILE.nodeMakers.base = pl.class({
180✔
6

7
    _init = function (self, options)
8
      self.contents = {}
4,205✔
9
      self.options = options
4,205✔
10
      self.token = ""
4,205✔
11
      self.lastnode = false
4,205✔
12
      self.lasttype = false
4,205✔
13
    end,
14

15
    makeToken = function (self)
16
      if #self.contents > 0 then
11,769✔
17
        coroutine.yield(SILE.shaper:formNnode(self.contents, self.token, self.options))
16,192✔
18
        SU.debug("tokenizer", "Token:", self.token)
8,096✔
19
        self.contents = {}
8,096✔
20
        self.token = ""
8,096✔
21
        self.lastnode = "nnode"
8,096✔
22
      end
23
    end,
24

25
    addToken = function (self, char, item)
26
      self.token = self.token .. char
27,675✔
27
      table.insert(self.contents, item)
27,675✔
28
    end,
29

30
    makeGlue = function (self, item)
31
      if SILE.settings:get("typesetter.obeyspaces") or self.lastnode ~= "glue" then
7,270✔
32
        SU.debug("tokenizer", "Space node")
3,588✔
33
        coroutine.yield(SILE.shaper:makeSpaceNode(self.options, item))
7,176✔
34
      end
35
      self.lastnode = "glue"
3,635✔
36
      self.lasttype = "sp"
3,635✔
37
    end,
38

39
    makePenalty = function (self, p)
40
      if self.lastnode ~= "penalty" and self.lastnode ~= "glue" then
3,455✔
41
        coroutine.yield( SILE.nodefactory.penalty({ penalty = p or 0 }) )
90✔
42
      end
43
      self.lastnode = "penalty"
3,455✔
44
    end,
45

46
    iterator = function (_, _)
47
      SU.error("Abstract function nodemaker:iterator called", true)
×
48
    end,
49

50
    charData = function (_, char)
51
      local cp = SU.codepoint(char)
83,319✔
52
      if not chardata[cp] then return {} end
83,319✔
53
      return chardata[cp]
82,616✔
54
    end,
55

56
    isPunctuation = function (self, char)
57
      return self.isPunctuationType[self:charData(char).category]
×
58
    end,
59

60
    isSpace = function (self, char)
61
      return self.isSpaceType[self:charData(char).linebreak]
71,614✔
62
    end,
63

64
    isBreaking = function (self, char)
65
      return self.isBreakingType[self:charData(char).linebreak]
47,524✔
66
    end,
67
    isQuote = function (self, char)
68
      return self.isQuoteType[self:charData(char).linebreak]
47,500✔
69
    end
70

71
  })
90✔
72

73
SILE.nodeMakers.unicode = pl.class(SILE.nodeMakers.base)
180✔
74

75
SILE.nodeMakers.unicode.isWordType = { cm = true }
90✔
76
SILE.nodeMakers.unicode.isSpaceType = { sp = true }
90✔
77
SILE.nodeMakers.unicode.isBreakingType = { ba = true, zw = true }
90✔
78
SILE.nodeMakers.unicode.isPunctuationType = { po = true }
90✔
79
SILE.nodeMakers.unicode.isQuoteType = {} -- quote linebreak category is ambiguous depending on the language
90✔
80

81
function SILE.nodeMakers.unicode:dealWith (item)
180✔
82
  local char = item.text
23,791✔
83
  local cp = SU.codepoint(char)
23,791✔
84
  local thistype = chardata[cp] and chardata[cp].linebreak
23,791✔
85
  if self:isSpace(item.text) then
47,582✔
86
    self:makeToken()
29✔
87
    self:makeGlue(item)
58✔
88
  elseif self:isBreaking(item.text) then
47,524✔
89
    self:addToken(char, item)
12✔
90
    self:makeToken()
12✔
91
    self:makePenalty(0)
24✔
92
  elseif self:isQuote(item.text) then
47,500✔
93
    self:addToken(char, item)
×
94
    self:makeToken()
×
95
  elseif self.lasttype and (thistype and thistype ~= self.lasttype and not self.isWordType[thistype]) then
23,750✔
96
    self:addToken(char, item)
170✔
97
  else
98
    self:letterspace()
23,665✔
99
    self:addToken(char, item)
23,665✔
100
  end
101
  self.lasttype = thistype
23,791✔
102
end
103

104
function SILE.nodeMakers.unicode:handleInitialGlue (items)
180✔
105
  local i = 1
4,199✔
106
  while i <= #items do
4,280✔
107
    local item = items[i]
4,258✔
108
    if self:isSpace(item.text) then self:makeGlue(item) else break end
8,597✔
109
    i = i + 1
81✔
110
  end
111
  return i, items
4,199✔
112
end
113

114
function SILE.nodeMakers.unicode:letterspace ()
180✔
115
  if not SILE.settings:get("document.letterspaceglue") then return end
47,330✔
116
  if self.token then self:makeToken() end
55✔
117
  if self.lastnode and self.lastnode ~= "glue" then
55✔
118
    local w = SILE.settings:get("document.letterspaceglue").width
102✔
119
    SU.debug("tokenizer", "Letter space glue:", w)
51✔
120
    coroutine.yield(SILE.nodefactory.kern({ width = w }))
102✔
121
    self.lastnode = "glue"
51✔
122
    self.lasttype = "sp"
51✔
123
  end
124
end
125

126
function SILE.nodeMakers.unicode.isICUBreakHere (_, chunks, item)
180✔
127
  return chunks[1] and (item.index >= chunks[1].index)
31,229✔
128
end
129

130
function SILE.nodeMakers.unicode:handleICUBreak (chunks, item)
180✔
131
  -- The ICU library has told us there is a breakpoint at
132
  -- this index. We need to...
133
  local bp = chunks[1]
7,431✔
134
  -- ... remove this breakpoint (and any out of order ones)
135
  -- from the ICU breakpoints array so that chunks[1] is
136
  -- the next index point for comparison against the string...
137
  while chunks[1] and item.index >= chunks[1].index do
14,862✔
138
    table.remove(chunks, 1)
14,862✔
139
  end
140
  -- ...decide which kind of breakpoint we have here and
141
  -- handle it appropriately.
142
  if bp.type == "word" then
7,431✔
143
    self:handleWordBreak(item)
7,968✔
144
  elseif bp.type == "line" then
3,447✔
145
    self:handleLineBreak(item, bp.subtype)
3,447✔
146
  end
147
  return chunks
7,431✔
148
end
149

150
function SILE.nodeMakers.unicode:handleWordBreak (item)
180✔
151
  self:makeToken()
3,959✔
152
  if self:isSpace(item.text) then
7,918✔
153
    -- Spacing word break
154
    self:makeGlue(item)
7,050✔
155
  else -- a word break which isn't a space
156
    self:addToken(item.text, item)
434✔
157
  end
158
end
159

160
function SILE.nodeMakers.unicode:handleLineBreak (item, subtype)
180✔
161
  -- Because we are in charge of paragraphing, we
162
  -- will override space-type line breaks, and treat
163
  -- them just as ordinary word spaces.
164
  if self:isSpace(item.text) then
6,886✔
165
    self:handleWordBreak(item)
×
166
    return
×
167
  end
168
  -- But explicit line breaks we will turn into
169
  -- soft and hard breaks.
170
  self:makeToken()
3,443✔
171
  self:makePenalty(subtype == "soft" and 0 or -1000)
3,443✔
172
  local char = item.text
3,443✔
173
  self:addToken(char, item)
3,443✔
174
  local cp = SU.codepoint(char)
3,443✔
175
  self.lasttype = chardata[cp] and chardata[cp].linebreak
3,443✔
176
end
177

178
function SILE.nodeMakers.unicode:iterator (items)
180✔
179
  local fulltext = ""
4,199✔
180
  for i = 1, #items do
35,509✔
181
    fulltext = fulltext .. items[i].text
31,310✔
182
  end
183
  local chunks = { icu.breakpoints(fulltext, self.options.language) }
4,199✔
184
  table.remove(chunks, 1)
4,199✔
185
  return coroutine.wrap(function ()
4,199✔
186
    local i
187
    i, self.items = self:handleInitialGlue(items)
8,398✔
188
    for j = i, #items do
35,428✔
189
      self.i = j
31,229✔
190
      self.item = self.items[self.i]
31,229✔
191
      if self:isICUBreakHere(chunks, self.item) then
62,458✔
192
        chunks = self:handleICUBreak(chunks, self.item)
14,862✔
193
      else
194
        self:dealWith(self.item)
23,798✔
195
      end
196
    end
197
    self:makeToken()
4,199✔
198
  end)
199
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc