• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

benwbrum / fromthepage / 21491602864

29 Jan 2026 07:19PM UTC coverage: 68.395% (-0.03%) from 68.42%
21491602864

Pull #4896

github

web-flow
Merge cc04b646a into 89911d8f6
Pull Request #4896: 4880 - Latex export refactor

2241 of 3800 branches covered (58.97%)

Branch coverage included in aggregate %.

169 of 183 new or added lines in 4 files covered. (92.35%)

376 existing lines in 13 files now uncovered.

9263 of 13020 relevant lines covered (71.14%)

136.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.21
/app/models/xml_source_processor.rb
1
module XmlSourceProcessor
1✔
2
  def validate_source
1✔
3
    if self.source_text.blank?
5,295✔
4
      return
5,031✔
5
    end
6
    # Skip subject linking validation for field-based collections
7
    # and collections with subjects disabled
8
    if self.collection&.field_based || self.collection&.subjects_disabled
264✔
9
      return
9✔
10
    end
11
    validate_links(self.source_text)
255✔
12
  end
13

14
  def validate_source_translation
1✔
15
    if self.source_translation.blank?
5,292✔
16
      return
5,235✔
17
    end
18
    # Skip subject linking validation for field-based collections
19
    # and collections with subjects disabled
20
    if self.collection&.field_based || self.collection&.subjects_disabled
57!
21
      return
1✔
22
    end
23
    validate_links(self.source_translation)
56✔
24
  end
25

26
  # check the text for problems or typos with the subject links
27
  def validate_links(text)
1✔
28
    error_scope = [:activerecord, :errors, :models, :xml_source_processor]
311✔
29
    # split on all begin-braces
30
    tags = text.split('[[')
311✔
31
    # remove the initial string which occurs before the first tag
32
    debug("validate_source: tags to process are #{tags.inspect}")
311✔
33
    tags = tags - [tags[0]]
311✔
34
    debug("validate_source: massaged tags to process are #{tags.inspect}")
311✔
35
    for tag in tags
311✔
36
      debug(tag)
212✔
37

38
      if tag.include?(']]]')
212✔
39
        errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('tags_should_not_use_3_brackets', scope: error_scope))
1✔
40
        return
1✔
41
      end
42
      unless tag.include?(']]')
211✔
43
        tag = tag.strip
3✔
44
        errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('wrong_number_of_closing_braces', tag: '"[['+tag+'"', scope: error_scope))
3✔
45
      end
46

47
      # just pull the pieces between the braces
48
      inner_tag = tag.split(']]')[0]
211✔
49
      if inner_tag =~ /^\s*$/
211✔
50
        errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('blank_tag_in', tag: '"[['+tag+'"', scope: error_scope))
1✔
51
      end
52

53
      # check for unclosed single bracket
54
      if inner_tag.include?('[')
211✔
55
        unless inner_tag.include?(']')
1!
56
          errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('unclosed_bracket_within', tag: '"'+inner_tag+'"', scope: error_scope))
1✔
57
        end
58
      end
59
      # check for blank title or display name with pipes
60
      if inner_tag.include?('|')
211✔
61
        tag_parts = inner_tag.split('|')
10✔
62
        debug("validate_source: inner tag parts are #{tag_parts.inspect}")
10✔
63
        if tag_parts[0] =~ /^\s*$/
10✔
64
          errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('blank_subject_in', tag: '"[['+inner_tag+']]"', scope: error_scope))
1✔
65
        end
66
        if tag_parts[1] =~ /^\s*$/
10✔
67
          errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('blank_text_in', tag: '"[['+inner_tag+']]"', scope: error_scope))
1✔
68
        end
69
      end
70
    end
71
    #    return errors.size > 0
72
  end
73

74
  def source_text=(text)
1✔
75
    self.source_text_will_change!
238✔
76
    super
238✔
77
  end
78

79
  def source_translation=(text)
1✔
80
    self.source_translation_will_change!
55✔
81
    super
55✔
82
  end
83

84
  ##############################################
85
  # All code to convert transcriptions from source
86
  # format to canonical xml format belongs here.
87
  ##############################################
88
  def process_source
1✔
89
    if source_text_changed?
519✔
90
      self.xml_text = wiki_to_xml(self, Page::TEXT_TYPE::TRANSCRIPTION)
123✔
91
    end
92

93
    if self.respond_to?(:source_translation) && source_translation_changed?
519✔
94
      self.xml_translation = wiki_to_xml(self, Page::TEXT_TYPE::TRANSLATION)
12✔
95
    end
96
  end
97

98
  def wiki_to_xml(page, text_type, preview_mode = false)
1✔
99
    subjects_disabled = page.collection.subjects_disabled
147✔
100

101
    source_text = case text_type
147✔
102
    when Page::TEXT_TYPE::TRANSCRIPTION
131✔
103
                    page.source_text
131✔
104
    when Page::TEXT_TYPE::TRANSLATION
16✔
105
                    page.source_translation
16✔
106
    else
×
107
                    ''
×
108
    end
109

110
    xml_string = String.new(source_text)
147✔
111
    xml_string = process_latex_snippets(xml_string)
147✔
112
    xml_string = clean_bad_braces(xml_string)
147✔
113
    xml_string = clean_script_tags(xml_string)
147✔
114
    xml_string = process_square_braces(xml_string) unless subjects_disabled
147✔
115
    xml_string = process_initial_whitespace(xml_string)
147✔
116
    xml_string = process_linewise_markup(xml_string)
147✔
117
    xml_string = process_line_breaks(xml_string, !page.collection.field_based?)
147✔
118
    xml_string = valid_xml_from_source(xml_string)
147✔
119
    xml_string = update_links_and_xml(xml_string, preview_mode, text_type)
147✔
120
    xml_string = postprocess_xml_markup(xml_string)
145✔
121
    postprocess_sections
145✔
122
    xml_string
145✔
123
  end
124

125

126
  # remove script tags from HTML to prevent javascript injection
127
  def clean_script_tags(text)
1✔
128
    # text.gsub(/<script.*?<\/script>/m, '')
129
    text.gsub(/<\/?script.*?>/m, '')
147✔
130
  end
131

132
  BAD_SHIFT_REGEX = /\[\[([[[:alpha:]][[:blank:]]|,\(\)\-[[:digit:]]]+)\}\}/
1✔
133
  def clean_bad_braces(text)
1✔
134
    text.gsub BAD_SHIFT_REGEX, '[[\\1]]'
147✔
135
  end
136

137
  BRACE_REGEX = /\[\[.*?\]\]/m
1✔
138
  def process_square_braces(text)
1✔
139
    # find all the links
140
    wikilinks = text.scan(BRACE_REGEX)
144✔
141
    wikilinks.each do |wikilink_contents|
144✔
142
      # strip braces
143
      munged = wikilink_contents.sub('[[', '')
75✔
144
      munged = munged.sub(']]', '')
75✔
145

146
      # extract the title and display
147
      if munged.include? '|'
75✔
148
        parts = munged.split '|'
10✔
149
        title = parts[0]
10✔
150
        verbatim = parts[1]
10✔
151
      else
65✔
152
        title = munged
65✔
153
        verbatim = munged
65✔
154
      end
155

156
      title = canonicalize_title(title)
75✔
157

158
      replacement = "<link target_title=\"#{title}\">#{verbatim}</link>"
75✔
159
      text.sub!(wikilink_contents, replacement)
75✔
160
    end
161

162
    text
144✔
163
  end
164

165
  def remove_square_braces(text)
1✔
166
    new_text = text.scan(BRACE_REGEX)
3✔
167
    new_text.each do |results|
3✔
168
      changed = results
3✔
169
      # remove title
170
      if results.include?('|')
3!
UNCOV
171
        changed = results.sub(/\[\[.*?\|/, '')
×
172
      end
173
      changed = changed.sub('[[', '')
3✔
174
      changed = changed.sub(']]', '')
3✔
175

176
      text.sub!(results, changed)
3✔
177
    end
178
    text
3✔
179
  end
180

181
  LATEX_SNIPPET = /(\{\{tex:?(.*?):?tex\}\})/m
1✔
182
  def process_latex_snippets(text)
1✔
183
    return text unless self.respond_to? :tex_figures
147✔
184
    replacements = {}
114✔
185
    figures = self.tex_figures.to_a
114✔
186

187
    text.scan(LATEX_SNIPPET).each_with_index do |pair, i|
114✔
188
      with_tags = pair[0]
43✔
189
      contents = pair[1]
43✔
190

191
      replacements[with_tags] = "<texFigure position=\"#{i+1}\"/>" # position attribute in acts as list starts with 1
43✔
192

193
      figure = figures[i] || TexFigure.new
43✔
194
      figure.source = contents unless figure.source == contents
43!
195
      figures[i] = figure
43✔
196
    end
197

198
    self.tex_figures = figures
114✔
199
    replacements.each_pair do |s, r|
114✔
200
      text.sub!(s, r)
43✔
201
    end
202

203
    text
114✔
204
  end
205

206
  HEADER = /\s\|\s/
1✔
207
  SEPARATOR = /---.*\|/
1✔
208
  ROW = HEADER
1✔
209

210
  def process_linewise_markup(text)
1✔
211
    @tables = []
151✔
212
    @sections = []
151✔
213
    new_lines = []
151✔
214
    current_table = nil
151✔
215
    text.lines.each do |line|
151✔
216
      # first deal with any sections
217
      line = process_any_sections(line)
2,540✔
218
      # look for a header
219
      if !current_table
2,540✔
220
        if line.match(HEADER)
2,408✔
221
          line = line.chomp
47✔
222
          current_table = { header: [], rows: [], section: @sections.last }
47✔
223
          # fill the header
224
          cells = line.split(/\s*\|\s*/)
47✔
225
          cells.shift if line.match(/^\|/) # remove leading pipe
47✔
226

227
          # trim whitespace from each header cell
228
          cells = cells.map(&:strip)
47✔
229

230
          current_table[:header] = cells.map { |cell_title| cell_title.sub(/^!\s*/, '') }
188✔
231
          heading = cells.map do |cell|
47✔
232
            if cell.match(/^!/)
141!
UNCOV
233
              "<th class=\"bang\">#{cell.sub(/^!\s*/, '')}</th>"
×
234
            else
141✔
235
              "<th>#{cell}</th>"
141✔
236
            end
237
          end.join(' ')
238
          new_lines << "<table class=\"tabular\">\n<thead>\n<tr>#{heading}</tr></thead>"
47✔
239
        else
240
          # no current table, no table contents -- NO-OP
2,361✔
241
          new_lines << line
2,361✔
242
        end
243
      else
244
        # this is either an end or a separator
132✔
245
        if line.match(SEPARATOR)
132✔
246
          # NO-OP
88✔
247
        elsif line.match(ROW)
88✔
248
          # handle initial blank cells - if line starts with whitespace followed by pipe, preserve empty cell
45✔
249
          line_chomp = line.chomp
45✔
250
          has_initial_empty_cell = line_chomp.match(/^\s+\|/)
45✔
251

252
          # remove leading and trailing delimiters
253
          clean_line = line_chomp.sub(/^\s*\|/, '').sub(/\|\s*$/, '')
45✔
254
          # fill the row
255
          cells = clean_line.split(/\s*\|\s*/, -1) # -1 means "don't prune empty values at the end"
45✔
256

257
          # trim whitespace from each cell
258
          cells = cells.map(&:strip)
45✔
259

260
          # if there was initial whitespace before pipe, add empty cell at beginning
261
          cells.unshift('') if has_initial_empty_cell
45✔
262
          current_table[:rows] << cells
45✔
263
          rowline = ''
45✔
264
          cells.each_with_index do |cell, _i|
45✔
265
            rowline += "<td>#{cell}</td> "
135✔
266
          end
267

268
          if current_table[:rows].size == 1
45✔
269
            new_lines << '<tbody>'
44✔
270
          end
271
          new_lines << "<tr>#{rowline}</tr>"
45✔
272
        else
273
          # finished the last row
43✔
274
          unless current_table[:rows].empty? # only process tables with bodies
43!
275
            @tables << current_table
43✔
276
            new_lines << '</tbody>'
43✔
277
          end
278
          new_lines << '</table><lb/>'
43✔
279
          current_table = nil
43✔
280
        end
281
      end
282
    end
283

284
    if current_table
151✔
285
      # unclosed table
4✔
286
      @tables << current_table
4✔
287
      unless current_table[:rows].empty? # only process tables with bodies
4✔
288
        @tables << current_table
1✔
289
        new_lines << '</tbody>'
1✔
290
      end
291
      new_lines << '</table><lb/>'
4✔
292
    end
293
    # do something with the table data
294
    new_lines.join(' ')
151✔
295
  end
296

297
  def process_any_sections(line)
1✔
298
    6.downto(2) do |depth|
2,540✔
299
      line.scan(/(={#{depth}}([^=]+)={#{depth}})/).each do |section_match|
12,700✔
300
        wiki_title = section_match[1].strip
43✔
301
        if wiki_title.length > 0
43!
302
          verbatim = XmlSourceProcessor.cell_to_plaintext(wiki_title)
43✔
303
          safe_verbatim = verbatim.gsub(/"/, '&quot;')
43✔
304
          line = line.sub(section_match.first, "<entryHeading title=\"#{safe_verbatim}\" depth=\"#{depth}\" >#{wiki_title}</entryHeading>")
43✔
305
          @sections << Section.new(title: wiki_title, depth: depth)
43✔
306
        end
307
      end
308
    end
309

310
    line
2,540✔
311
  end
312

313
  def postprocess_sections
1✔
314
    @sections.each do |section|
145✔
315
      doc = XmlSourceProcessor.cell_to_xml(section.title)
43✔
316
      doc.elements.each('//link') do |e|
43✔
317
        title = e.attributes['target_title']
×
318
        article = collection.articles.where(title: title).first
×
319
        if article
×
UNCOV
320
          e.add_attribute('target_id', article.id.to_s)
×
321
        end
322
      end
323
      section.title = XmlSourceProcessor.xml_to_cell(doc)
43✔
324
    end
325
  end
326

327

328
  def canonicalize_title(title)
1✔
329
    # kill all tags
330
    title = title.gsub(/<.*?>/, '')
75✔
331
    # linebreaks -> spaces
332
    title = title.gsub(/\n/, ' ')
75✔
333
    # multiple spaces -> single spaces
334
    title = title.gsub(/\s+/, ' ')
75✔
335
    # change double quotes to proper xml
336
    title = title.gsub(/\"/, '&quot;')
75✔
337
    title
75✔
338
  end
339

340
  # Process initial whitespace at the beginning of lines
341
  # Replaces leading spaces with <indent> elements containing a spaces attribute
342
  def process_initial_whitespace(text)
1✔
343
    # Beginning of text
344
    text = text.gsub(/\A( +)(?=\S)/) do
154✔
345
      "<indent spaces=\"#{$1.length}\"/>"
1✔
346
    end
347

348
    # Handle the \r (old Mac) line ending case, which ^ doesn't match
349
    text = text.gsub(/(\r?\n|\r)( +)(?=\S)/) do
154✔
350
      line_break = $1
6✔
351
      spaces = $2.length
6✔
352
      "#{line_break}<indent spaces=\"#{spaces}\"/>"
6✔
353
    end
354

355
    text
154✔
356
  end
357

358
  # transformations converting source mode transcription to xml
359
  def process_line_breaks(text, add_paragraph_tags = true)
1✔
360
    if add_paragraph_tags
147✔
361
      text="<p>#{text}</p>"
144✔
362
      text = text.gsub(/\s*\n\s*\n\s*/, '</p><p>')
144✔
363
    else
3✔
364
      text = text.gsub(/\s*\n\s*\n\s*/, '<lb/><lb/>')
3✔
365
    end
366
    text = text.gsub(/([[:word:]]+)-\r\n\s*/, '\1<lb break="no" />')
147✔
367
    text = text.gsub(/\r\n\s*/, '<lb/>')
147✔
368
    text = text.gsub(/([[:word:]]+)-\n\s*/, '\1<lb break="no" />')
147✔
369
    text = text.gsub(/\n\s*/, '<lb/>')
147✔
370
    text = text.gsub(/([[:word:]]+)-\r\s*/, '\1<lb break="no" />')
147✔
371
    text = text.gsub(/\r\s*/, '<lb/>')
147✔
372
    text
147✔
373
  end
374

375
  def valid_xml_from_source(source)
1✔
376
    source = source || ''
147✔
377
    safe = source.gsub /\&/, '&amp;'
147✔
378
    safe.gsub! /\&amp;amp;/, '&amp;'
147✔
379
    safe.gsub! /[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\u10000-\u10FFFF]/, ' '
147✔
380

381
    string = <<EOF
147✔
382
    <?xml version="1.0" encoding="UTF-8"?>
383
      <page>
384
        #{safe}
385
      </page>
386
EOF
387
  end
388

389
  def update_links_and_xml(xml_string, preview_mode = false, text_type)
1✔
390
    # first clear out the existing links
391
    # log the count of articles before and after
392
    clear_links(text_type) unless preview_mode
147✔
393

394

395
    articles_by_title = collection.articles.index_by(&:title) # optimize for common case
147✔
396
    candidate_articles = collection.articles.left_joins(:article_versions) # fall-back
147✔
397
    page_update_timestamp = 1.hour.ago
147✔
398

399
    processed = ''
147✔
400
    # process it
401
    doc = REXML::Document.new xml_string
147✔
402
    doc.elements.each('//link') do |element|
145✔
403
      # default the title to the text if it's not specified
404
      if !(title = element.attributes['target_title'])
75!
UNCOV
405
        title = element.text
×
406
      end
407
      # display_text = element.text
408
      display_text = ''
75✔
409
      element.children.each do |e|
75✔
410
        display_text += e.to_s
75✔
411
      end
412
      debug("link display_text = #{display_text}")
75✔
413
      # change the xml version of quotes back to double quotes for article title
414
      title = title.gsub('&quot;', '"')
75✔
415

416
      article = articles_by_title[title]
75✔
417

418
      if article.nil?
75✔
419
        article = candidate_articles.where('article_versions.title': title)
54✔
420
                                    .where('article_versions.created_on > ?', page_update_timestamp)
421
                                    .first
422
        if article.present?
54✔
423
          display_text = article.title
1✔
424
          title = article.title
1✔
425
        end
426
      end
427

428
      # create new blank articles if they don't exist already
429
      if article.nil?
75✔
430
        article = Article.new
53✔
431
        article.title = title
53✔
432
        article.collection = collection
53✔
433
        article.created_by_id = Current.user.id if Current.user.present?
53✔
434
        article.save! unless preview_mode
53✔
435
        # add the new article to the hash
436
        articles_by_title[title] = article
53✔
437
      end
438

439
      link_id = create_link(article, display_text, text_type) unless preview_mode
75✔
440
      # now update the attribute
441
      link_element = REXML::Element.new('link')
75✔
442
      element.children.each { |c| link_element.add(c) }
150✔
443
      link_element.add_attribute('target_title', title)
75✔
444
      debug('element='+link_element.inspect)
75✔
445
      debug('article='+article.inspect)
75✔
446
      link_element.add_attribute('target_id', article.id.to_s) unless preview_mode
75✔
447
      link_element.add_attribute('link_id', link_id.to_s) unless preview_mode
75✔
448
      element.replace_with(link_element)
75✔
449
    end
450
    doc.write(processed)
145✔
451
    processed
145✔
452
  end
453

454
  # handle XML-dependent post-processing
455
  def postprocess_xml_markup(xml_string)
1✔
456
    doc = REXML::Document.new xml_string
145✔
457
    processed = ''
145✔
458
    doc.elements.each('//lb') do |element|
145✔
459
      if element.previous_element && element.previous_sibling.node_type == :element && element.previous_element.name == 'lb'
282✔
460
        pre = doc.to_s
9✔
461
        element.parent.elements.delete(element)
9✔
462
      end
463
    end
464
    doc.write(processed)
145✔
465
    processed
145✔
466
  end
467

468

469
  CELL_PREFIX = "<?xml version='1.0' encoding='UTF-8'?><cell>"
1✔
470
  CELL_SUFFIX = '</cell>'
1✔
471

472
  def self.cell_to_xml(cell)
1✔
473
    REXML::Document.new(CELL_PREFIX + cell.gsub('&', '&amp;') + CELL_SUFFIX)
98✔
474
  end
475

476
  def self.xml_to_cell(doc)
1✔
477
    text = ''
43✔
478
    doc.write(text)
43✔
479
    text.sub(CELL_PREFIX, '').sub(CELL_SUFFIX, '')
43✔
480
  end
481

482
  def self.cell_to_plaintext(cell)
1✔
483
    doc = cell_to_xml(cell)
51✔
484
    doc.each_element('.//text()') { |e| p e.text }.join
51✔
485
  end
486

487
  def self.cell_to_subject(cell)
1✔
488
    doc = cell_to_xml(cell)
2✔
489
    subjects = ''
2✔
490
    doc.elements.each('//link') do |e|
2✔
UNCOV
491
      title = e.attributes['target_title']
×
UNCOV
492
      subjects << title
×
UNCOV
493
      subjects << "\n"
×
494
    end
495
    subjects
2✔
496
  end
497

498
  def self.cell_to_category(cell)
1✔
499
    doc = cell_to_xml(cell)
2✔
500
    categories = ''
2✔
501
    doc.elements.each('//link') do |e|
2✔
UNCOV
502
      id = e.attributes['target_id']
×
UNCOV
503
      if id
×
UNCOV
504
        article = Article.find(id)
×
UNCOV
505
        article.categories.each do |category|
×
UNCOV
506
          categories << category.title
×
UNCOV
507
          categories << "\n"
×
508
        end
509
      end
510
    end
511
    categories
2✔
512
  end
513

514
  ##############################################
515
  # Code to rename links within the text.
516
  # This assumes that the name change has already
517
  # taken place within the article table in the DB
518
  ##############################################
519
  def rename_article_links(old_title, new_title)
1✔
520
    title_regex =
521
      Regexp.escape(old_title)
13✔
522
        .gsub('\\ ', ' ') # Regexp.escape converts ' ' to '\\ ' for some reason -- undo this
523
        .gsub(/\s+/, '\s+') # convert multiple whitespaces into 1+n space characters
524

525
    self.source_text = rename_link_in_text(source_text, title_regex, new_title)
13✔
526

527
    # Articles don't have translations, but we still need to update pages.source_translation
528
    if has_attribute?(:source_translation) && !source_translation.nil?
13✔
529
      self.source_translation = rename_link_in_text(source_translation, title_regex, new_title)
5✔
530
    end
531
  end
532

533
  def rename_link_in_text(text, title_regex, new_title)
1✔
534
    if new_title == ''
18✔
535
      # Link deleted, remove [[ ]] but keep the original title text
536

537
      # Handle links of the form [[Old Title|Display Text]] => Display Text
3✔
538
      text = text.gsub(/\[\[#{title_regex}\|([^\]]+)\]\]/i, '\1')
3✔
539
      # Handle links of the form [[Old Title]] => Old Title
540
      text = text.gsub(/\[\[(#{title_regex})\]\]/i, '\1')
3✔
541
    else
542
      # Replace the title part in [[Old Title|Display Text]]
15✔
543
      text = text.gsub(/\[\[#{title_regex}\|/i, "[[#{new_title}|")
15✔
544
      # Replace [[Old Title]] with [[New Title|Old Title]]
545
      text = text.gsub(/\[\[(#{title_regex})\]\]/i, "[[#{new_title}|\\1]]")
15✔
546
    end
547

548
    text
18✔
549
  end
550

551

552
  def pipe_tables_formatting(text)
1✔
553
    # since Pandoc Pipe Tables extension requires pipe characters at the beginning and end of each line we must add them
554
    # to the beginning and end of each line
UNCOV
555
    text.split("\n").map { |line| "|#{line}|" }.join("\n")
×
556
  end
557

558
  def xml_table_to_markdown_table(table_element, pandoc_format = false, plaintext_export = false)
1✔
559
    text_table = ''
10✔
560

561
    # clean up in-cell line-breaks
562
    table_element.xpath('//lb').each { |n| n.replace(' ') }
60✔
563

564
    # Sanitize single quotes with backticks
565
    # table_element.xpath('//*').each { |n| n.content.gsub("'", '`') }
566

567
    # calculate the widths of each column based on max(header, cell[0...end])
568
    column_count = ([table_element.xpath('//th').count] + table_element.xpath('//tr').map { |e| e.xpath('td').count }).max
30✔
569
    column_widths = {}
10✔
570
    1.upto(column_count) do |column_index|
10✔
571
      longest_cell = (table_element.xpath("//tr/td[position()=#{column_index}]").map { |e| e.text().length }.max || 0)
60✔
572
      corresponding_heading = heading_length = table_element.xpath("//th[position()=#{column_index}]").first
30✔
573
      heading_length = corresponding_heading.nil? ? 0 : corresponding_heading.text().length
30!
574
      column_widths[column_index] = [longest_cell, heading_length].max
30✔
575
    end
576

577
    # print the header as markdown
578
    cell_strings = []
10✔
579
    table_element.xpath('//th').each_with_index do |e, i|
10✔
580
      cell_strings << e.text.rjust(column_widths[i+1], ' ')
30✔
581
    end
582
    text_table << cell_strings.join(' | ') << "\n"
10✔
583

584
    # print the separator
585
    text_table << column_count.times.map { |i| ''.rjust(column_widths[i+1], '-') }.join(' | ') << "\n"
40✔
586

587
    # print each row as markdown
588
    table_element.xpath('//tr').each do |row_element|
10✔
589
      text_table << row_element.xpath('td').map do |e|
20✔
590
        width = 80 # default for hand-coded tables
30✔
591
        index = e.path.match(/.*td\[(\d+)\]/)
30✔
592
        if index
30✔
593
          width = column_widths[index[1].to_i] || 80
30✔
594
        else
×
UNCOV
595
          width = column_widths.values.first
×
596
        end
597

598
        if plaintext_export
30✔
599
          e.text.rjust(width, ' ')
30✔
600
        else
×
UNCOV
601
          inner_html = xml_to_pandoc_md(e.to_s.gsub("'", '&#39;'), false, false, nil, false).gsub("\n", '')
×
UNCOV
602
          inner_html.rjust(width, ' ')
×
603
        end
604
      end.join(' | ') << "\n"
605
    end
606
    if pandoc_format
10!
UNCOV
607
      text_table = pipe_tables_formatting(text_table)
×
608
    end
609

610
    "#{text_table}\n\n"
10✔
611
  end
612

613

614

615
  def debug(msg)
1✔
616
    logger.debug("DEBUG: #{msg}")
1,069✔
617
  end
618
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc