• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

benwbrum / fromthepage / 21263598330

22 Jan 2026 08:21PM UTC coverage: 68.448% (-0.004%) from 68.452%
21263598330

push

github

web-flow
Merge pull request #5224 from benwbrum/copilot/support-white-space-indentation

Support whitespace indentation in transcripts

2201 of 3725 branches covered (59.09%)

Branch coverage included in aggregate %.

15 of 18 new or added lines in 2 files covered. (83.33%)

2 existing lines in 1 file now uncovered.

9121 of 12816 relevant lines covered (71.17%)

133.82 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.52
/app/models/xml_source_processor.rb
1
module XmlSourceProcessor
1✔
2
  def validate_source
1✔
3
    if self.source_text.blank?
5,181✔
4
      return
4,963✔
5
    end
6
    # Skip subject linking validation for field-based collections
7
    # and collections with subjects disabled
8
    if self.collection&.field_based || self.collection&.subjects_disabled
218✔
9
      return
9✔
10
    end
11
    validate_links(self.source_text)
209✔
12
  end
13

14
  def validate_source_translation
1✔
15
    if self.source_translation.blank?
5,178✔
16
      return
5,121✔
17
    end
18
    # Skip subject linking validation for field-based collections
19
    # and collections with subjects disabled
20
    if self.collection&.field_based || self.collection&.subjects_disabled
57!
21
      return
1✔
22
    end
23
    validate_links(self.source_translation)
56✔
24
  end
25

26
  # check the text for problems or typos with the subject links
27
  def validate_links(text)
1✔
28
    error_scope = [:activerecord, :errors, :models, :xml_source_processor]
265✔
29
    # split on all begin-braces
30
    tags = text.split('[[')
265✔
31
    # remove the initial string which occurs before the first tag
32
    debug("validate_source: tags to process are #{tags.inspect}")
265✔
33
    tags = tags - [tags[0]]
265✔
34
    debug("validate_source: massaged tags to process are #{tags.inspect}")
265✔
35
    for tag in tags
265✔
36
      debug(tag)
126✔
37

38
      if tag.include?(']]]')
126✔
39
        errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('tags_should_not_use_3_brackets', scope: error_scope))
1✔
40
        return
1✔
41
      end
42
      unless tag.include?(']]')
125✔
43
        tag = tag.strip
3✔
44
        errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('wrong_number_of_closing_braces', tag: '"[['+tag+'"', scope: error_scope))
3✔
45
      end
46

47
      # just pull the pieces between the braces
48
      inner_tag = tag.split(']]')[0]
125✔
49
      if inner_tag =~ /^\s*$/
125✔
50
        errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('blank_tag_in', tag: '"[['+tag+'"', scope: error_scope))
1✔
51
      end
52

53
      # check for unclosed single bracket
54
      if inner_tag.include?('[')
125✔
55
        unless inner_tag.include?(']')
1!
56
          errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('unclosed_bracket_within', tag: '"'+inner_tag+'"', scope: error_scope))
1✔
57
        end
58
      end
59
      # check for blank title or display name with pipes
60
      if inner_tag.include?('|')
125✔
61
        tag_parts = inner_tag.split('|')
10✔
62
        debug("validate_source: inner tag parts are #{tag_parts.inspect}")
10✔
63
        if tag_parts[0] =~ /^\s*$/
10✔
64
          errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('blank_subject_in', tag: '"[['+inner_tag+']]"', scope: error_scope))
1✔
65
        end
66
        if tag_parts[1] =~ /^\s*$/
10✔
67
          errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('blank_text_in', tag: '"[['+inner_tag+']]"', scope: error_scope))
1✔
68
        end
69
      end
70
    end
71
    #    return errors.size > 0
72
  end
73

74
def source_text=(text)
1✔
75
    self.source_text_will_change!
235✔
76
    super
235✔
77
end
78

79
def source_translation=(text)
1✔
80
  self.source_translation_will_change!
55✔
81
  super
55✔
82
end
83

84
  ##############################################
85
  # All code to convert transcriptions from source
86
  # format to canonical xml format belongs here.
87
  ##############################################
88
  def process_source
1✔
89
    if source_text_changed?
433✔
90
      self.xml_text = wiki_to_xml(self, Page::TEXT_TYPE::TRANSCRIPTION)
80✔
91
    end
92

93
    if self.respond_to?(:source_translation) && source_translation_changed?
433✔
94
      self.xml_translation = wiki_to_xml(self, Page::TEXT_TYPE::TRANSLATION)
12✔
95
    end
96
  end
97

98
  def wiki_to_xml(page, text_type, preview_mode = false)
1✔
99
    subjects_disabled = page.collection.subjects_disabled
104✔
100

101
    source_text = case text_type
104✔
102
    when Page::TEXT_TYPE::TRANSCRIPTION
88✔
103
                    page.source_text
88✔
104
    when Page::TEXT_TYPE::TRANSLATION
16✔
105
                    page.source_translation
16✔
106
    else
×
107
                    ''
×
108
    end
109

110
    xml_string = String.new(source_text)
104✔
111
    xml_string = process_latex_snippets(xml_string)
104✔
112
    xml_string = clean_bad_braces(xml_string)
104✔
113
    xml_string = clean_script_tags(xml_string)
104✔
114
    xml_string = process_square_braces(xml_string) unless subjects_disabled
104✔
115
    xml_string = process_linewise_markup(xml_string)
104✔
116
    xml_string = process_initial_whitespace(xml_string)
104✔
117
    xml_string = process_line_breaks(xml_string, !page.collection.field_based?)
104✔
118
    xml_string = valid_xml_from_source(xml_string)
104✔
119
    xml_string = update_links_and_xml(xml_string, preview_mode, text_type)
104✔
120
    xml_string = postprocess_xml_markup(xml_string)
102✔
121
    postprocess_sections
102✔
122
    xml_string
102✔
123
  end
124

125

126
  # remove script tags from HTML to prevent javascript injection
127
  def clean_script_tags(text)
1✔
128
    # text.gsub(/<script.*?<\/script>/m, '')
129
    text.gsub(/<\/?script.*?>/m, '')
104✔
130
  end
131

132
  BAD_SHIFT_REGEX = /\[\[([[[:alpha:]][[:blank:]]|,\(\)\-[[:digit:]]]+)\}\}/
1✔
133
  def clean_bad_braces(text)
1✔
134
    text.gsub BAD_SHIFT_REGEX, '[[\\1]]'
104✔
135
  end
136

137
  BRACE_REGEX = /\[\[.*?\]\]/m
1✔
138
  def process_square_braces(text)
1✔
139
    # find all the links
140
    wikilinks = text.scan(BRACE_REGEX)
101✔
141
    wikilinks.each do |wikilink_contents|
101✔
142
      # strip braces
143
      munged = wikilink_contents.sub('[[', '')
32✔
144
      munged = munged.sub(']]', '')
32✔
145

146
      # extract the title and display
147
      if munged.include? '|'
32✔
148
        parts = munged.split '|'
10✔
149
        title = parts[0]
10✔
150
        verbatim = parts[1]
10✔
151
      else
22✔
152
        title = munged
22✔
153
        verbatim = munged
22✔
154
      end
155

156
      title = canonicalize_title(title)
32✔
157

158
      replacement = "<link target_title=\"#{title}\">#{verbatim}</link>"
32✔
159
      text.sub!(wikilink_contents, replacement)
32✔
160
    end
161

162
    text
101✔
163
  end
164

165
  def remove_square_braces(text)
1✔
166
    new_text = text.scan(BRACE_REGEX)
3✔
167
    new_text.each do |results|
3✔
168
      changed = results
3✔
169
      # remove title
170
      if results.include?('|')
3!
171
        changed = results.sub(/\[\[.*?\|/, '')
×
172
      end
173
      changed = changed.sub('[[', '')
3✔
174
      changed = changed.sub(']]', '')
3✔
175

176
      text.sub!(results, changed)
3✔
177
    end
178
    text
3✔
179
  end
180

181
  LATEX_SNIPPET = /(\{\{tex:?(.*?):?tex\}\})/m
1✔
182
  def process_latex_snippets(text)
1✔
183
    return text unless self.respond_to? :tex_figures
104✔
184
    replacements = {}
71✔
185
    figures = self.tex_figures.to_a
71✔
186

187
    text.scan(LATEX_SNIPPET).each_with_index do |pair, i|
71✔
188
      with_tags = pair[0]
×
189
      contents = pair[1]
×
190

191
      replacements[with_tags] = "<texFigure position=\"#{i+1}\"/>" # position attribute in acts as list starts with 1
×
192

193
      figure = figures[i] || TexFigure.new
×
194
      figure.source = contents unless figure.source == contents
×
195
      figures[i] = figure
×
196
    end
197

198
    self.tex_figures = figures
71✔
199
    replacements.each_pair do |s, r|
71✔
200
      text.sub!(s, r)
×
201
    end
202

203
    text
71✔
204
  end
205

206
  HEADER = /\s\|\s/
1✔
207
  SEPARATOR = /---.*\|/
1✔
208
  ROW = HEADER
1✔
209

210
  def process_linewise_markup(text)
1✔
211
    @tables = []
108✔
212
    @sections = []
108✔
213
    new_lines = []
108✔
214
    current_table = nil
108✔
215
    text.lines.each do |line|
108✔
216
      # first deal with any sections
217
      line = process_any_sections(line)
132✔
218
      # look for a header
219
      if !current_table
132✔
220
        if line.match(HEADER)
129✔
221
          line = line.chomp
4✔
222
          current_table = { header: [], rows: [], section: @sections.last }
4✔
223
          # fill the header
224
          cells = line.split(/\s*\|\s*/)
4✔
225
          cells.shift if line.match(/^\|/) # remove leading pipe
4✔
226

227
          # trim whitespace from each header cell
228
          cells = cells.map(&:strip)
4✔
229

230
          current_table[:header] = cells.map { |cell_title| cell_title.sub(/^!\s*/, '') }
16✔
231
          heading = cells.map do |cell|
4✔
232
            if cell.match(/^!/)
12!
233
              "<th class=\"bang\">#{cell.sub(/^!\s*/, '')}</th>"
×
234
            else
12✔
235
              "<th>#{cell}</th>"
12✔
236
            end
237
          end.join(' ')
238
          new_lines << "<table class=\"tabular\">\n<thead>\n<tr>#{heading}</tr></thead>"
4✔
239
        else
240
          # no current table, no table contents -- NO-OP
125✔
241
          new_lines << line
125✔
242
        end
243
      else
244
        # this is either an end or a separator
3✔
245
        if line.match(SEPARATOR)
3✔
246
          # NO-OP
2✔
247
        elsif line.match(ROW)
2✔
248
          # handle initial blank cells - if line starts with whitespace followed by pipe, preserve empty cell
2✔
249
          line_chomp = line.chomp
2✔
250
          has_initial_empty_cell = line_chomp.match(/^\s+\|/)
2✔
251

252
          # remove leading and trailing delimiters
253
          clean_line = line_chomp.sub(/^\s*\|/, '').sub(/\|\s*$/, '')
2✔
254
          # fill the row
255
          cells = clean_line.split(/\s*\|\s*/, -1) # -1 means "don't prune empty values at the end"
2✔
256

257
          # trim whitespace from each cell
258
          cells = cells.map(&:strip)
2✔
259

260
          # if there was initial whitespace before pipe, add empty cell at beginning
261
          cells.unshift('') if has_initial_empty_cell
2✔
262
          current_table[:rows] << cells
2✔
263
          rowline = ''
2✔
264
          cells.each_with_index do |cell, _i|
2✔
265
            rowline += "<td>#{cell}</td> "
6✔
266
          end
267

268
          if current_table[:rows].size == 1
2✔
269
            new_lines << '<tbody>'
1✔
270
          end
271
          new_lines << "<tr>#{rowline}</tr>"
2✔
272
        else
273
          # finished the last row
×
274
          unless current_table[:rows].empty? # only process tables with bodies
×
275
            @tables << current_table
×
276
            new_lines << '</tbody>'
×
277
          end
278
          new_lines << '</table><lb/>'
×
279
          current_table = nil
×
280
        end
281
      end
282
    end
283

284
    if current_table
108✔
285
      # unclosed table
4✔
286
      @tables << current_table
4✔
287
      unless current_table[:rows].empty? # only process tables with bodies
4✔
288
        @tables << current_table
1✔
289
        new_lines << '</tbody>'
1✔
290
      end
291
      new_lines << '</table><lb/>'
4✔
292
    end
293
    # do something with the table data
294
    new_lines.join(' ')
108✔
295
  end
296

297
  def process_any_sections(line)
1✔
298
    6.downto(2) do |depth|
132✔
299
      line.scan(/(={#{depth}}([^=]+)={#{depth}})/).each do |section_match|
660✔
300
        wiki_title = section_match[1].strip
×
301
        if wiki_title.length > 0
×
302
          verbatim = XmlSourceProcessor.cell_to_plaintext(wiki_title)
×
303
          safe_verbatim = verbatim.gsub(/"/, '&quot;')
×
304
          line = line.sub(section_match.first, "<entryHeading title=\"#{safe_verbatim}\" depth=\"#{depth}\" >#{wiki_title}</entryHeading>")
×
305
          @sections << Section.new(title: wiki_title, depth: depth)
×
306
        end
307
      end
308
    end
309

310
    line
132✔
311
  end
312

313
  def postprocess_sections
1✔
314
    @sections.each do |section|
102✔
315
      doc = XmlSourceProcessor.cell_to_xml(section.title)
×
316
      doc.elements.each('//link') do |e|
×
317
        title = e.attributes['target_title']
×
318
        article = collection.articles.where(title: title).first
×
319
        if article
×
320
          e.add_attribute('target_id', article.id.to_s)
×
321
        end
322
      end
323
      section.title = XmlSourceProcessor.xml_to_cell(doc)
×
324
    end
325
  end
326

327

328
  def canonicalize_title(title)
1✔
329
    # kill all tags
330
    title = title.gsub(/<.*?>/, '')
32✔
331
    # linebreaks -> spaces
332
    title = title.gsub(/\n/, ' ')
32✔
333
    # multiple spaces -> single spaces
334
    title = title.gsub(/\s+/, ' ')
32✔
335
    # change double quotes to proper xml
336
    title = title.gsub(/\"/, '&quot;')
32✔
337
    title
32✔
338
  end
339

340
  # Process initial whitespace at the beginning of lines
341
  # Replaces leading spaces with <indent> elements containing a spaces attribute
342
  def process_initial_whitespace(text)
1✔
343
    # Process initial whitespace at the very beginning of the text
344
    # and after newlines. The ^ anchor matches after \n and \r\n, but not after \r alone.
345
    text = text.gsub(/^( +)/) { "<indent spaces=\"#{$1.length}\"/>" }
149✔
346

347
    # Handle the \r (old Mac) line ending case, which ^ doesn't match
348
    text = text.gsub(/(\r)( +)(?!\n)/) do
111✔
NEW
349
      line_break = $1  # The \r character
×
NEW
350
      spaces = $2      # The spaces after \r
×
NEW
351
      "#{line_break}<indent spaces=\"#{spaces.length}\"/>"
×
352
    end
353

354
    text
111✔
355
  end
356

357
  # transformations converting source mode transcription to xml
358
  def process_line_breaks(text, add_paragraph_tags = true)
1✔
359
    if add_paragraph_tags
104✔
360
      text="<p>#{text}</p>"
101✔
361
      text = text.gsub(/\s*\n\s*\n\s*/, '</p><p>')
101✔
362
    else
3✔
363
      text = text.gsub(/\s*\n\s*\n\s*/, '<lb/><lb/>')
3✔
364
    end
365
    text = text.gsub(/([[:word:]]+)-\r\n\s*/, '\1<lb break="no" />')
104✔
366
    text = text.gsub(/\r\n\s*/, '<lb/>')
104✔
367
    text = text.gsub(/([[:word:]]+)-\n\s*/, '\1<lb break="no" />')
104✔
368
    text = text.gsub(/\n\s*/, '<lb/>')
104✔
369
    text = text.gsub(/([[:word:]]+)-\r\s*/, '\1<lb break="no" />')
104✔
370
    text = text.gsub(/\r\s*/, '<lb/>')
104✔
371
    text
104✔
372
  end
373

374
  def valid_xml_from_source(source)
1✔
375
    source = source || ''
104✔
376
    safe = source.gsub /\&/, '&amp;'
104✔
377
    safe.gsub! /\&amp;amp;/, '&amp;'
104✔
378
    safe.gsub! /[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\u10000-\u10FFFF]/, ' '
104✔
379

380
    string = <<EOF
104✔
381
    <?xml version="1.0" encoding="UTF-8"?>
382
      <page>
383
        #{safe}
384
      </page>
385
EOF
386
  end
387

388
  def update_links_and_xml(xml_string, preview_mode = false, text_type)
1✔
389
    # first clear out the existing links
390
    # log the count of articles before and after
391
    clear_links(text_type) unless preview_mode
104✔
392

393
    candidate_articles = collection.articles.left_joins(:article_versions)
104✔
394
    page_update_timestamp = 1.hour.ago
104✔
395

396
    processed = ''
104✔
397
    # process it
398
    doc = REXML::Document.new xml_string
104✔
399
    doc.elements.each('//link') do |element|
102✔
400
      # default the title to the text if it's not specified
401
      if !(title = element.attributes['target_title'])
32!
402
        title = element.text
×
403
      end
404
      # display_text = element.text
405
      display_text = ''
32✔
406
      element.children.each do |e|
32✔
407
        display_text += e.to_s
32✔
408
      end
409
      debug("link display_text = #{display_text}")
32✔
410
      # change the xml version of quotes back to double quotes for article title
411
      title = title.gsub('&quot;', '"')
32✔
412

413
      article = candidate_articles.find_by(title: title)
32✔
414

415
      if article.nil?
32✔
416
        article = candidate_articles.where('article_versions.title': title)
11✔
417
                                    .where('article_versions.created_on > ?', page_update_timestamp)
418
                                    .first
419
        if article.present?
11✔
420
          display_text = article.title
1✔
421
          title = article.title
1✔
422
        end
423
      end
424

425
      # create new blank articles if they don't exist already
426
      if article.nil?
32✔
427
        article = Article.new
10✔
428
        article.title = title
10✔
429
        article.collection = collection
10✔
430
        article.created_by_id = Current.user.id if Current.user.present?
10✔
431
        article.save! unless preview_mode
10✔
432
      end
433

434
      link_id = create_link(article, display_text, text_type) unless preview_mode
32✔
435
      # now update the attribute
436
      link_element = REXML::Element.new('link')
32✔
437
      element.children.each { |c| link_element.add(c) }
64✔
438
      link_element.add_attribute('target_title', title)
32✔
439
      debug('element='+link_element.inspect)
32✔
440
      debug('article='+article.inspect)
32✔
441
      link_element.add_attribute('target_id', article.id.to_s) unless preview_mode
32✔
442
      link_element.add_attribute('link_id', link_id.to_s) unless preview_mode
32✔
443
      element.replace_with(link_element)
32✔
444
    end
445
    doc.write(processed)
102✔
446
    processed
102✔
447
  end
448

449
  # handle XML-dependent post-processing
450
  def postprocess_xml_markup(xml_string)
1✔
451
    doc = REXML::Document.new xml_string
102✔
452
    processed = ''
102✔
453
    doc.elements.each('//lb') do |element|
102✔
454
      if element.previous_element && element.previous_sibling.node_type == :element && element.previous_element.name == 'lb'
34!
UNCOV
455
        pre = doc.to_s
×
UNCOV
456
        element.parent.elements.delete(element)
×
457
      end
458
    end
459
    doc.write(processed)
102✔
460
    processed
102✔
461
  end
462

463

464
  CELL_PREFIX = "<?xml version='1.0' encoding='UTF-8'?><cell>"
1✔
465
  CELL_SUFFIX = '</cell>'
1✔
466

467
  def self.cell_to_xml(cell)
1✔
468
    REXML::Document.new(CELL_PREFIX + cell.gsub('&', '&amp;') + CELL_SUFFIX)
3✔
469
  end
470

471
  def self.xml_to_cell(doc)
1✔
472
    text = ''
×
473
    doc.write(text)
×
474
    text.sub(CELL_PREFIX, '').sub(CELL_SUFFIX, '')
×
475
  end
476

477
  def self.cell_to_plaintext(cell)
1✔
478
    doc = cell_to_xml(cell)
3✔
479
    doc.each_element('.//text()') { |e| p e.text }.join
3✔
480
  end
481

482
  def self.cell_to_subject(cell)
1✔
483
    doc = cell_to_xml(cell)
×
484
    subjects = ''
×
485
    doc.elements.each('//link') do |e|
×
486
      title = e.attributes['target_title']
×
487
      subjects << title
×
488
      subjects << "\n"
×
489
    end
490
    subjects
×
491
  end
492

493
  def self.cell_to_category(cell)
1✔
494
    doc = cell_to_xml(cell)
×
495
    categories = ''
×
496
    doc.elements.each('//link') do |e|
×
497
      id = e.attributes['target_id']
×
498
      if id
×
499
        article = Article.find(id)
×
500
        article.categories.each do |category|
×
501
          categories << category.title
×
502
          categories << "\n"
×
503
        end
504
      end
505
    end
506
    categories
×
507
  end
508

509
  ##############################################
510
  # Code to rename links within the text.
511
  # This assumes that the name change has already
512
  # taken place within the article table in the DB
513
  ##############################################
514
  def rename_article_links(old_title, new_title)
1✔
515
    title_regex =
516
      Regexp.escape(old_title)
13✔
517
        .gsub('\\ ', ' ') # Regexp.escape converts ' ' to '\\ ' for some reason -- undo this
518
        .gsub(/\s+/, '\s+') # convert multiple whitespaces into 1+n space characters
519

520
    self.source_text = rename_link_in_text(source_text, title_regex, new_title)
13✔
521

522
    # Articles don't have translations, but we still need to update pages.source_translation
523
    if has_attribute?(:source_translation) && !source_translation.nil?
13✔
524
      self.source_translation = rename_link_in_text(source_translation, title_regex, new_title)
5✔
525
    end
526
  end
527

528
  def rename_link_in_text(text, title_regex, new_title)
1✔
529
    if new_title == ''
18✔
530
      # Link deleted, remove [[ ]] but keep the original title text
531

532
      # Handle links of the form [[Old Title|Display Text]] => Display Text
3✔
533
      text = text.gsub(/\[\[#{title_regex}\|([^\]]+)\]\]/i, '\1')
3✔
534
      # Handle links of the form [[Old Title]] => Old Title
535
      text = text.gsub(/\[\[(#{title_regex})\]\]/i, '\1')
3✔
536
    else
537
      # Replace the title part in [[Old Title|Display Text]]
15✔
538
      text = text.gsub(/\[\[#{title_regex}\|/i, "[[#{new_title}|")
15✔
539
      # Replace [[Old Title]] with [[New Title|Old Title]]
540
      text = text.gsub(/\[\[(#{title_regex})\]\]/i, "[[#{new_title}|\\1]]")
15✔
541
    end
542

543
    text
18✔
544
  end
545

546

547
  def pipe_tables_formatting(text)
1✔
548
    # since Pandoc Pipe Tables extension requires pipe characters at the beginning and end of each line we must add them
549
    # to the beginning and end of each line
550
    text.split("\n").map { |line| "|#{line}|" }.join("\n")
15✔
551
  end
552

553
  def xml_table_to_markdown_table(table_element, pandoc_format = false, plaintext_export = false)
1✔
554
    text_table = ''
13✔
555

556
    # clean up in-cell line-breaks
557
    table_element.xpath('//lb').each { |n| n.replace(' ') }
55✔
558

559
    # Sanitize single quotes with backticks
560
    # table_element.xpath('//*').each { |n| n.content.gsub("'", '`') }
561

562
    # calculate the widths of each column based on max(header, cell[0...end])
563
    column_count = ([table_element.xpath('//th').count] + table_element.xpath('//tr').map { |e| e.xpath('td').count }).max
39✔
564
    column_widths = {}
13✔
565
    1.upto(column_count) do |column_index|
13✔
566
      longest_cell = (table_element.xpath("//tr/td[position()=#{column_index}]").map { |e| e.text().length }.max || 0)
78✔
567
      corresponding_heading = heading_length = table_element.xpath("//th[position()=#{column_index}]").first
39✔
568
      heading_length = corresponding_heading.nil? ? 0 : corresponding_heading.text().length
39!
569
      column_widths[column_index] = [longest_cell, heading_length].max
39✔
570
    end
571

572
    # print the header as markdown
573
    cell_strings = []
13✔
574
    table_element.xpath('//th').each_with_index do |e, i|
13✔
575
      cell_strings << e.text.rjust(column_widths[i+1], ' ')
39✔
576
    end
577
    text_table << cell_strings.join(' | ') << "\n"
13✔
578

579
    # print the separator
580
    text_table << column_count.times.map { |i| ''.rjust(column_widths[i+1], '-') }.join(' | ') << "\n"
52✔
581

582
    # print each row as markdown
583
    table_element.xpath('//tr').each do |row_element|
13✔
584
      text_table << row_element.xpath('td').map do |e|
26✔
585
        width = 80 # default for hand-coded tables
39✔
586
        index = e.path.match(/.*td\[(\d+)\]/)
39✔
587
        if index
39✔
588
          width = column_widths[index[1].to_i] || 80
39✔
589
        else
×
590
          width = column_widths.values.first
×
591
        end
592

593
        if plaintext_export
39✔
594
          e.text.rjust(width, ' ')
39✔
595
        else
×
596
          inner_html = xml_to_pandoc_md(e.to_s.gsub("'", '&#39;'), false, false, nil, false).gsub("\n", '')
×
597
          inner_html.rjust(width, ' ')
×
598
        end
599
      end.join(' | ') << "\n"
600
    end
601
    if pandoc_format
13✔
602
      text_table = pipe_tables_formatting(text_table)
3✔
603
    end
604

605
    "#{text_table}\n\n"
13✔
606
  end
607

608

609

610
  def debug(msg)
1✔
611
    logger.debug("DEBUG: #{msg}")
762✔
612
  end
613
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc