• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

sile-typesetter / sile / 12313034533

13 Dec 2024 09:28AM UTC coverage: 60.234% (-0.7%) from 60.941%
12313034533

push

github

web-flow
Merge 5a7694dff into d737b2656

9 of 25 new or added lines in 5 files covered. (36.0%)

145 existing lines in 16 files now uncovered.

12801 of 21252 relevant lines covered (60.23%)

2545.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

44.83
/core/utilities/numbers.lua
1
--- Number formatting utilities.
2
--- @module SU.numbers
3

4
local icu = require("justenoughicu")
97✔
5

6
--- @type formatNumber
7
-- Language-specific number formatters add functions to this table,
8
-- see e.g. languages/eo.lua
9
local formatNumber = {
97✔
10
   und = {
97✔
11

12
      -- Alpha is a special case (a numbering system, though this table is for
13
      -- formatting style hooks normally)
14
      alpha = function (num)
UNCOV
15
         local out = ""
×
UNCOV
16
         local a = string.byte("a")
×
17
         repeat
UNCOV
18
            num = num - 1
×
UNCOV
19
            out = string.char(num % 26 + a) .. out
×
UNCOV
20
            num = (num - num % 26) / 26
×
UNCOV
21
         until num < 1
×
UNCOV
22
         return out
×
23
      end,
24
      -- Greek is another special case:
25
      -- There are books where one wants to number items with Greek letters in
26
      -- sequence, e.g. annotations in biblical material etc.
27
      -- as in "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ σ τ υ φ χ ψ ω".
28
      -- We can't use ICU "grek" or "greklow" numbering systems because they are
29
      -- non-arithmetical, e.g. 6 is a digamma (ϝ´), 11 is iota alpha (ια´), etc.
30
      -- and they are also all followed by a numeric marker ("keraia").
31
      greek = function (num)
32
         local out = ""
×
33
         local a = SU.codepoint("α") -- alpha
×
34
         if num < 18 then
×
35
            -- alpha to rho
36
            out = luautf8.char(num + a - 1)
×
37
         elseif num < 25 then
×
38
            -- sigma to omega (unicode has two sigmas here, we skip one)
39
            out = luautf8.char(num + a)
×
40
         else
41
            -- Don't try to be too clever
42
            SU.error("Greek numbering is only supported up to 24")
×
43
         end
44
         return out
×
45
      end,
46
   },
97✔
47
}
48

49
-- Decent subset from unum.h
50
local icuStyles = {
97✔
51
   default = 0, -- UNUM_PATTERN_DECIMAL
52
   decimal = 1, -- UNUM_DECIMAL
53
   string = 5, -- UNUM_SPELLOUT
54
   ordinal = 6, -- UNUM_ORDINAL
55
}
56

57
-- Numbering system for which we _know_ that ICU doesn't have a
58
-- default(0) format style rule (i.e. spits out latin)
59
-- This table is just an optimization to avoid calling ICU twice when this
60
-- occurs, e.g. "roman" may be quite frequent as a numbering system.
61
local icuStyleBypass = {
97✔
62
   roman = true,
63
}
64

65
local icuFormat = function (num, lang, options)
66
   -- Consistency: further below we'll concatenate those, and an empty
67
   -- string is likely a user mistake.
68
   if not lang and not options.system then
108✔
69
      SU.warn("Number formatting needs a language or a numbering system")
×
70
      return tonumber(num)
×
71
   end
72

73
   -- ICU format style (enum)
74
   options.style = not options.style and "default" or options.style
108✔
75
   local icustyle = options.style and icuStyles[options.style]
108✔
76
   if not icustyle then
108✔
77
      SU.warn("Number formatting style is unrecognized (using default as fallback)")
×
78
      icustyle = 0
×
79
   end
80

81
   -- ICU locale: see  https://unicode-org.github.io/icu/userguide/locale/
82
   -- Ex. "en", "en-US", "sr-Latn"...
83
   local iculocale = lang or ""
108✔
84
   -- ICU keyword for a numbering system specifier: @numbers=xxxx
85
   -- The specifiers are defined here:
86
   -- https://github.com/unicode-org/cldr/blob/main/common/bcp47/number.xml
87
   if options.system then
108✔
88
      options.system = options.system:lower()
216✔
89
      iculocale = iculocale .. "@numbers=" .. options.system
108✔
90
      if icuStyleBypass[options.system] then
108✔
UNCOV
91
         icustyle = 1
×
92
      end
93
   end
94

95
   local ok, result = pcall(icu.format_number, num, iculocale, icustyle)
108✔
96
   if ok and options.system and icustyle == 0 and options.system ~= "latn" and result == tostring(num) then
108✔
97
      -- There are valid cases where "@numbers=xxxx" with default(0) and decimal(1) styles both work.
98
      -- Typically, with num=1234
99
      --   "@numbers=arab" in default(0) --> ١٢٣٤
100
      --   "@numbers=arab", in decimal(1) --> ١٬٢٣٤
101
      -- But in many cases, ICU may fallback to latin, e.g. take "roman" (or "grek")
102
      --   "@numbers=roman" in default(0) --> 1234
103
      --   "@numbers=roman" in default(1) --> MCCXXXIV
104
      -- Be user friendly and attempt honoring the script.
UNCOV
105
      ok, result = pcall(icu.format_number, num, "@numbers=" .. options.system, 1)
×
106
   end
107
   if not ok then
108✔
108
      SU.warn("Number formatting failed: " .. tostring(result))
×
109
   end
110
   return tostring(ok and result or num)
108✔
111
end
112

113
setmetatable(formatNumber, {
194✔
114
   __call = function (self, num, options, case)
115
      -- Formats a number according to options, and optional case
116
      -- Options:
117
      -- - system: a numbering system string, e.g. "latn" (= "arabic"), "roman", "arab", etc.
118
      --   With the addition of "alpha".
119
      --   Casing is guessed from the system (e.g. roman, Roman, ROMAN) unless specified
120
      -- - style: a format style string, i.e. "default", "decimal", "ordinal", "string")
121
      --   E.g. in English and latin script:   1234        1,234     1,124th    one thousand...
122
      --   Possibly extended by additional language-specific formatting rules.
123
      -- Obviously, some combinations of system, style and case won't do anything worth.
124
      if math.abs(num) > 9223372036854775807 then
108✔
125
         SU.warn("Integers larger than 64 bits do not reproduce properly in all formats")
×
126
      end
127
      options = options or {}
108✔
128

129
      -- BEGIN COMPATIBILITY SHIM
130
      if type(options) ~= "table" then
108✔
131
         -- It used to be a string aggregating both concepts.
132
         SU.deprecated(
×
133
            "Previous syntax of SU.formatNumber",
134
            "new syntax for SU.formatNumber",
135
            "0.14.6",
136
            "0.16.0",
137
            [[
×
138
               Previous syntax was SU.formatNumber(num, format[, case]) with a format string
139
               New syntax is SU.formatNumber(num, options[, case]) with an options table,
140
               possibly containing:
141

142
                 - system: a numbering system string
143

144
                   e.g. "latn" (= "arabic"), "roman", "arab", etc. with the addition of
145
                   "alpha" and "greek". Casing is taken into account (e.g. roman, Roman,
146
                   ROMAN) unless specified.
147

148
                 - style: a format style string
149

150
                   i.e. "default", "decimal", "ordinal", "string"). E.g. in English and latin
151
                   script: 1234    1,234    1,124th    one thousand    ...
152
                   Possibly extended by additional language-specific formatting rules.
153

154
               Note that the new syntax doesn't handle casing on the format style, for
155
               separation of concerns.
156
            ]]
×
157
         )
158
         if not case then
×
159
            if options:match("^%l") then
×
160
               case = "lower"
×
161
            elseif options:match("^.%l") then
×
162
               case = "title"
×
163
            else
164
               case = "upper"
×
165
            end
166
         end
167
         if options:lower() == "nth" then
×
168
            SU.deprecated("Format 'nth' in SU.formatNumber", "'ordinal' in SU.formatNumber", "0.14.6", "0.16.0")
×
169
            options = { style = "ordinal" }
×
170
         elseif options:lower() == "string" then
×
171
            options = { style = "string" }
×
172
         elseif options:lower() == "ordinal" and SILE.settings:get("document.language") == "tr" then
×
173
            SU.deprecated(
×
174
               "Format 'ordinal' in Turkish in SU.formatNumber",
175
               "'ordinal-string' in SU.formatNumber",
176
               "0.14.6",
177
               "0.16.0"
178
            )
179
            options = { style = "ordinal-string" }
×
180
         else
181
            options = { system = options }
×
182
         end
183
      end
184
      -- END COMPATIBILITY SHIM
185

186
      if options.system == "arabic" then
108✔
187
         -- "arabic" is the weirdly name, but quite friendly, used e.g. in counters and
188
         -- in several other places, let's keep it as a compatibility alias.
189
         options.system = "latn"
108✔
190
      end
191

192
      local system = options.system
108✔
193
      if not case then
108✔
194
         if system then
108✔
195
            if system:match("^%l") then
108✔
196
               case = "lower"
108✔
UNCOV
197
            elseif system:match("^.%l") then
×
198
               case = "title"
×
199
            else
UNCOV
200
               case = "upper"
×
201
            end
202
         else
203
            case = "lower"
×
204
         end
205
      end
206
      system = system and system:lower()
216✔
207

208
      local lang = system and system == "roman" and "la" or SILE.settings:get("document.language")
108✔
209
      local style = options.style
108✔
210
      local result
211
      if self[lang] and style and type(self[lang][style]) == "function" then
108✔
212
         -- Language specific hooks exists, use them...
213
         result = self[lang][style](num, options)
×
214
      elseif style and type(self["und"][style]) == "function" then
108✔
215
         -- Global specific hooks exists: use them...
216
         result = self.und[system](num, options)
×
217
      elseif system and type(self["und"][system]) == "function" then
108✔
218
         -- TRICK: Notably, special case for "alpha" and "greek"
UNCOV
219
         result = system and self.und[system](num, options)
×
220
      else
221
         --- Otherwise, rely on ICU...
222
         result = icuFormat(num, lang, options)
216✔
223
      end
224
      return icu.case(result, lang, case)
108✔
225
   end,
226
})
227

228
return formatNumber
97✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc