• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

benwbrum / fromthepage / 21598438235

02 Feb 2026 04:29PM UTC coverage: 68.413% (-0.1%) from 68.529%
21598438235

push

github

web-flow
Merge pull request #4896 from benwbrum/4880-latex-export-refactor

4880 - Latex export refactor

2240 of 3800 branches covered (58.95%)

Branch coverage included in aggregate %.

169 of 183 new or added lines in 4 files covered. (92.35%)

186 existing lines in 4 files now uncovered.

9280 of 13039 relevant lines covered (71.17%)

150.57 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.79
/app/models/xml_source_processor.rb
1
module XmlSourceProcessor
1✔
2
  def validate_source
1✔
3
    if self.source_text.blank?
5,295✔
4
      return
5,031✔
5
    end
6
    # Skip subject linking validation for field-based collections
7
    # and collections with subjects disabled
8
    if self.collection&.field_based || self.collection&.subjects_disabled
264✔
9
      return
9✔
10
    end
11
    validate_links(self.source_text)
255✔
12
  end
13

14
  def validate_source_translation
1✔
15
    if self.source_translation.blank?
5,292✔
16
      return
5,235✔
17
    end
18
    # Skip subject linking validation for field-based collections
19
    # and collections with subjects disabled
20
    if self.collection&.field_based || self.collection&.subjects_disabled
57!
21
      return
1✔
22
    end
23
    validate_links(self.source_translation)
56✔
24
  end
25

26
  # check the text for problems or typos with the subject links
27
  def validate_links(text)
1✔
28
    error_scope = [:activerecord, :errors, :models, :xml_source_processor]
311✔
29
    # split on all begin-braces
30
    tags = text.split('[[')
311✔
31
    # remove the initial string which occurs before the first tag
32
    debug("validate_source: tags to process are #{tags.inspect}")
311✔
33
    tags = tags - [tags[0]]
311✔
34
    debug("validate_source: massaged tags to process are #{tags.inspect}")
311✔
35
    for tag in tags
311✔
36
      debug(tag)
212✔
37

38
      if tag.include?(']]]')
212✔
39
        errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('tags_should_not_use_3_brackets', scope: error_scope))
1✔
40
        return
1✔
41
      end
42
      unless tag.include?(']]')
211✔
43
        tag = tag.strip
3✔
44
        errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('wrong_number_of_closing_braces', tag: '"[['+tag+'"', scope: error_scope))
3✔
45
      end
46

47
      # just pull the pieces between the braces
48
      inner_tag = tag.split(']]')[0]
211✔
49
      if inner_tag =~ /^\s*$/
211✔
50
        errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('blank_tag_in', tag: '"[['+tag+'"', scope: error_scope))
1✔
51
      end
52

53
      # check for unclosed single bracket
54
      if inner_tag.include?('[')
211✔
55
        unless inner_tag.include?(']')
1!
56
          errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('unclosed_bracket_within', tag: '"'+inner_tag+'"', scope: error_scope))
1✔
57
        end
58
      end
59
      # check for blank title or display name with pipes
60
      if inner_tag.include?('|')
211✔
61
        tag_parts = inner_tag.split('|')
10✔
62
        debug("validate_source: inner tag parts are #{tag_parts.inspect}")
10✔
63
        if tag_parts[0] =~ /^\s*$/
10✔
64
          errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('blank_subject_in', tag: '"[['+inner_tag+']]"', scope: error_scope))
1✔
65
        end
66
        if tag_parts[1] =~ /^\s*$/
10✔
67
          errors.add(:base, I18n.t('subject_linking_error', scope: error_scope) + I18n.t('blank_text_in', tag: '"[['+inner_tag+']]"', scope: error_scope))
1✔
68
        end
69
      end
70
    end
71
    #    return errors.size > 0
72
  end
73

74
  def source_text=(text)
1✔
75
    self.source_text_will_change!
238✔
76
    super
238✔
77
  end
78

79
  def source_translation=(text)
1✔
80
    self.source_translation_will_change!
55✔
81
    super
55✔
82
  end
83

84
  ##############################################
85
  # All code to convert transcriptions from source
86
  # format to canonical xml format belongs here.
87
  ##############################################
88
  def process_source
1✔
89
    if source_text_changed?
519✔
90
      self.xml_text = wiki_to_xml(self, Page::TEXT_TYPE::TRANSCRIPTION)
123✔
91
    end
92

93
    if self.respond_to?(:source_translation) && source_translation_changed?
519✔
94
      self.xml_translation = wiki_to_xml(self, Page::TEXT_TYPE::TRANSLATION)
12✔
95
    end
96
  end
97

98
  def wiki_to_xml(page, text_type, preview_mode = false)
1✔
99
    subjects_disabled = page.collection.subjects_disabled
147✔
100

101
    source_text = case text_type
147✔
102
    when Page::TEXT_TYPE::TRANSCRIPTION
131✔
103
                    page.source_text
131✔
104
    when Page::TEXT_TYPE::TRANSLATION
16✔
105
                    page.source_translation
16✔
106
    else
×
107
                    ''
×
108
    end
109

110
    xml_string = String.new(source_text)
147✔
111
    start_time=Time.now
147✔
112
    xml_string = process_latex_snippets(xml_string)
147✔
113
    logger.info("wiki_to_xml: process_latex_snippets finished at #{Time.now - start_time} seconds")
147✔
114
    xml_string = clean_bad_braces(xml_string)
147✔
115
    logger.info("wiki_to_xml: clean_bad_braces finished at #{Time.now - start_time} seconds")
147✔
116
    xml_string = clean_script_tags(xml_string)
147✔
117
    logger.info("wiki_to_xml: clean_script_tags finished at #{Time.now - start_time} seconds")
147✔
118
    xml_string = process_square_braces(xml_string) unless subjects_disabled
147✔
119
    logger.info("wiki_to_xml: process_square_braces finished at #{Time.now - start_time} seconds")
147✔
120
    xml_string = process_initial_whitespace(xml_string)
147✔
121
    logger.info("wiki_to_xml: process_initial_whitespace finished at #{Time.now - start_time} seconds")
147✔
122
    xml_string = process_linewise_markup(xml_string)
147✔
123
    logger.info("wiki_to_xml: process_linewise_markup finished at #{Time.now - start_time} seconds")
147✔
124
    xml_string = process_line_breaks(xml_string, is_field_based: page.collection.field_based?)
147✔
125
    logger.info("wiki_to_xml: process_line_breaks finished at #{Time.now - start_time} seconds")
147✔
126
    xml_string = valid_xml_from_source(xml_string)
147✔
127
    logger.info("wiki_to_xml: valid_xml_from_source finished at #{Time.now - start_time} seconds")
147✔
128
    xml_string = update_links_and_xml(xml_string, preview_mode, text_type)
147✔
129
    logger.info("wiki_to_xml: update_links_and_xml finished at #{Time.now - start_time} seconds")
145✔
130
    xml_string = postprocess_xml_markup(xml_string)
145✔
131
    logger.info("wiki_to_xml: postprocess_xml_markup finished at #{Time.now - start_time} seconds")
145✔
132
    postprocess_sections
145✔
133
    logger.info("wiki_to_xml: postprocess_sections finished at #{Time.now - start_time} seconds")
145✔
134
    xml_string
145✔
135
  end
136

137
  # remove script tags from HTML to prevent javascript injection
138
  def clean_script_tags(text)
1✔
139
    # text.gsub(/<script.*?<\/script>/m, '')
140
    text.gsub(/<\/?script.*?>/m, '')
147✔
141
  end
142

143
  BAD_SHIFT_REGEX = /\[\[([[[:alpha:]][[:blank:]]|,\(\)\-[[:digit:]]]+)\}\}/
1✔
144
  def clean_bad_braces(text)
1✔
145
    text.gsub BAD_SHIFT_REGEX, '[[\\1]]'
147✔
146
  end
147

148
  BRACE_REGEX = /\[\[.*?\]\]/m
1✔
149
  def process_square_braces(text)
1✔
150
    # find all the links
151
    wikilinks = text.scan(BRACE_REGEX)
144✔
152
    wikilinks.each do |wikilink_contents|
144✔
153
      # strip braces
154
      munged = wikilink_contents.sub('[[', '')
75✔
155
      munged = munged.sub(']]', '')
75✔
156

157
      # extract the title and display
158
      if munged.include? '|'
75✔
159
        parts = munged.split '|'
10✔
160
        title = parts[0]
10✔
161
        verbatim = parts[1]
10✔
162
      else
65✔
163
        title = munged
65✔
164
        verbatim = munged
65✔
165
      end
166

167
      title = canonicalize_title(title)
75✔
168

169
      replacement = "<link target_title=\"#{title}\">#{verbatim}</link>"
75✔
170
      text.sub!(wikilink_contents, replacement)
75✔
171
    end
172

173
    text
144✔
174
  end
175

176
  def remove_square_braces(text)
1✔
177
    new_text = text.scan(BRACE_REGEX)
3✔
178
    new_text.each do |results|
3✔
179
      changed = results
3✔
180
      # remove title
181
      if results.include?('|')
3!
182
        changed = results.sub(/\[\[.*?\|/, '')
×
183
      end
184
      changed = changed.sub('[[', '')
3✔
185
      changed = changed.sub(']]', '')
3✔
186

187
      text.sub!(results, changed)
3✔
188
    end
189
    text
3✔
190
  end
191

192
  LATEX_SNIPPET = /(\{\{tex:?(.*?):?tex\}\})/m
1✔
193
  def process_latex_snippets(text)
1✔
194
    return text unless self.respond_to? :tex_figures
147✔
195
    replacements = {}
114✔
196
    figures = self.tex_figures.to_a
114✔
197

198
    text.scan(LATEX_SNIPPET).each_with_index do |pair, i|
114✔
199
      with_tags = pair[0]
43✔
200
      contents = pair[1]
43✔
201

202
      replacements[with_tags] = "<texFigure position=\"#{i+1}\"/>" # position attribute in acts as list starts with 1
43✔
203

204
      figure = figures[i] || TexFigure.new
43✔
205
      figure.source = contents unless figure.source == contents
43!
206
      figures[i] = figure
43✔
207
    end
208

209
    self.tex_figures = figures
114✔
210
    replacements.each_pair do |s, r|
114✔
211
      text.sub!(s, r)
43✔
212
    end
213

214
    text
114✔
215
  end
216

217
  HEADER = /\s\|\s/
1✔
218
  SEPARATOR = /---.*\|/
1✔
219
  ROW = HEADER
1✔
220

221
  def process_linewise_markup(text)
1✔
222
    @tables = []
151✔
223
    @sections = []
151✔
224
    new_lines = []
151✔
225
    current_table = nil
151✔
226
    text.lines.each do |line|
151✔
227
      # first deal with any sections
228
      line = process_any_sections(line)
2,540✔
229
      # look for a header
230
      if !current_table
2,540✔
231
        if line.match(HEADER)
2,408✔
232
          line = line.chomp
47✔
233
          current_table = { header: [], rows: [], section: @sections.last }
47✔
234
          # fill the header
235
          cells = line.split(/\s*\|\s*/)
47✔
236
          cells.shift if line.match(/^\|/) # remove leading pipe
47✔
237

238
          # trim whitespace from each header cell
239
          cells = cells.map(&:strip)
47✔
240

241
          current_table[:header] = cells.map { |cell_title| cell_title.sub(/^!\s*/, '') }
188✔
242
          heading = cells.map do |cell|
47✔
243
            if cell.match(/^!/)
141!
244
              "<th class=\"bang\">#{cell.sub(/^!\s*/, '')}</th>"
×
245
            else
141✔
246
              "<th>#{cell}</th>"
141✔
247
            end
248
          end.join(' ')
249
          new_lines << "<table class=\"tabular\">\n<thead>\n<tr>#{heading}</tr></thead>"
47✔
250
        else
251
          # no current table, no table contents -- NO-OP
2,361✔
252
          new_lines << line
2,361✔
253
        end
254
      else
255
        # this is either an end or a separator
132✔
256
        if line.match(SEPARATOR)
132✔
257
          # NO-OP
88✔
258
        elsif line.match(ROW)
88✔
259
          # handle initial blank cells - if line starts with whitespace followed by pipe, preserve empty cell
45✔
260
          line_chomp = line.chomp
45✔
261
          has_initial_empty_cell = line_chomp.match(/^\s+\|/)
45✔
262

263
          # remove leading and trailing delimiters
264
          clean_line = line_chomp.sub(/^\s*\|/, '').sub(/\|\s*$/, '')
45✔
265
          # fill the row
266
          cells = clean_line.split(/\s*\|\s*/, -1) # -1 means "don't prune empty values at the end"
45✔
267

268
          # trim whitespace from each cell
269
          cells = cells.map(&:strip)
45✔
270

271
          # if there was initial whitespace before pipe, add empty cell at beginning
272
          cells.unshift('') if has_initial_empty_cell
45✔
273
          current_table[:rows] << cells
45✔
274
          rowline = ''
45✔
275
          cells.each_with_index do |cell, _i|
45✔
276
            rowline += "<td>#{cell}</td> "
135✔
277
          end
278

279
          if current_table[:rows].size == 1
45✔
280
            new_lines << '<tbody>'
44✔
281
          end
282
          new_lines << "<tr>#{rowline}</tr>"
45✔
283
        else
284
          # finished the last row
43✔
285
          unless current_table[:rows].empty? # only process tables with bodies
43!
286
            @tables << current_table
43✔
287
            new_lines << '</tbody>'
43✔
288
          end
289
          new_lines << '</table><lb/>'
43✔
290
          current_table = nil
43✔
291
        end
292
      end
293
    end
294

295
    if current_table
151✔
296
      # unclosed table
4✔
297
      @tables << current_table
4✔
298
      unless current_table[:rows].empty? # only process tables with bodies
4✔
299
        @tables << current_table
1✔
300
        new_lines << '</tbody>'
1✔
301
      end
302
      new_lines << '</table><lb/>'
4✔
303
    end
304
    # do something with the table data
305
    new_lines.join(' ')
151✔
306
  end
307

308
  def process_any_sections(line)
1✔
309
    6.downto(2) do |depth|
2,540✔
310
      line.scan(/(={#{depth}}([^=]+)={#{depth}})/).each do |section_match|
12,700✔
311
        wiki_title = section_match[1].strip
43✔
312
        if wiki_title.length > 0
43!
313
          verbatim = XmlSourceProcessor.cell_to_plaintext(wiki_title)
43✔
314
          safe_verbatim = verbatim.gsub(/"/, '&quot;')
43✔
315
          line = line.sub(section_match.first, "<entryHeading title=\"#{safe_verbatim}\" depth=\"#{depth}\" >#{wiki_title}</entryHeading>")
43✔
316
          @sections << Section.new(title: wiki_title, depth: depth)
43✔
317
        end
318
      end
319
    end
320

321
    line
2,540✔
322
  end
323

324
  def postprocess_sections
1✔
325
    @sections.each do |section|
145✔
326
      doc = XmlSourceProcessor.cell_to_xml(section.title)
43✔
327
      doc.elements.each('//link') do |e|
43✔
328
        title = e.attributes['target_title']
×
329
        article = collection.articles.where(title: title).first
×
330
        if article
×
331
          e.add_attribute('target_id', article.id.to_s)
×
332
        end
333
      end
334
      section.title = XmlSourceProcessor.xml_to_cell(doc)
43✔
335
    end
336
  end
337

338

339
  def canonicalize_title(title)
1✔
340
    # kill all tags
341
    title = title.gsub(/<.*?>/, '')
75✔
342
    # linebreaks -> spaces
343
    title = title.gsub(/\n/, ' ')
75✔
344
    # multiple spaces -> single spaces
345
    title = title.gsub(/\s+/, ' ')
75✔
346
    # change double quotes to proper xml
347
    title = title.gsub(/\"/, '&quot;')
75✔
348
    title
75✔
349
  end
350

351
  # Process initial whitespace at the beginning of lines
352
  # Replaces leading spaces with <indent> elements containing a spaces attribute
353
  def process_initial_whitespace(text)
1✔
354
    # Beginning of text
355
    text = text.gsub(/\A( +)(?=\S)/) do
154✔
356
      "<indent spaces=\"#{$1.length}\"/>"
1✔
357
    end
358

359
    # Handle the \r (old Mac) line ending case, which ^ doesn't match
360
    text = text.gsub(/(\r?\n|\r)( +)(?=\S)/) do
154✔
361
      line_break = $1
6✔
362
      spaces = $2.length
6✔
363
      "#{line_break}<indent spaces=\"#{spaces}\"/>"
6✔
364
    end
365

366
    text
154✔
367
  end
368

369
  # transformations converting source mode transcription to xml
370
  def process_line_breaks(text, is_field_based: false)
1✔
371
    if is_field_based
147✔
372
      text = text.gsub(/\n/, '<lb/>')
3✔
373
    else
144✔
374
      text="<p>#{text}</p>"
144✔
375
      text = text.gsub(/\s*\n\s*\n\s*/, '</p><p>')
144✔
376
    end
377
    text = text.gsub(/([[:word:]]+)-\r\n\s*/, '\1<lb break="no" />')
147✔
378
    text = text.gsub(/\r\n\s*/, '<lb/>')
147✔
379
    text = text.gsub(/([[:word:]]+)-\n\s*/, '\1<lb break="no" />')
147✔
380
    text = text.gsub(/\n\s*/, '<lb/>')
147✔
381
    text = text.gsub(/([[:word:]]+)-\r\s*/, '\1<lb break="no" />')
147✔
382
    text = text.gsub(/\r\s*/, '<lb/>')
147✔
383
    text
147✔
384
  end
385

386
  def valid_xml_from_source(source)
1✔
387
    source = source || ''
147✔
388
    safe = source.gsub /\&/, '&amp;'
147✔
389
    safe.gsub! /\&amp;amp;/, '&amp;'
147✔
390
    safe.gsub! /[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\u10000-\u10FFFF]/, ' '
147✔
391

392
    string = <<EOF
147✔
393
    <?xml version="1.0" encoding="UTF-8"?>
394
      <page>
395
        #{safe}
396
      </page>
397
EOF
398
  end
399

400
  def update_links_and_xml(xml_string, preview_mode = false, text_type)
1✔
401
    # first clear out the existing links
402
    # log the count of articles before and after
403
    clear_links(text_type) unless preview_mode
147✔
404

405

406
    articles_by_title = collection.articles.index_by(&:title) # optimize for common case
147✔
407
    candidate_articles = collection.articles.left_joins(:article_versions) # fall-back
147✔
408
    page_update_timestamp = 1.hour.ago
147✔
409

410
    processed = ''
147✔
411
    # process it
412
    doc = REXML::Document.new xml_string
147✔
413
    doc.elements.each('//link') do |element|
145✔
414
      # default the title to the text if it's not specified
415
      if !(title = element.attributes['target_title'])
75!
416
        title = element.text
×
417
      end
418
      # display_text = element.text
419
      display_text = ''
75✔
420
      element.children.each do |e|
75✔
421
        display_text += e.to_s
75✔
422
      end
423
      debug("link display_text = #{display_text}")
75✔
424
      # change the xml version of quotes back to double quotes for article title
425
      title = title.gsub('&quot;', '"')
75✔
426

427
      article = articles_by_title[title]
75✔
428

429
      if article.nil?
75✔
430
        article = candidate_articles.where('article_versions.title': title)
54✔
431
                                    .where('article_versions.created_on > ?', page_update_timestamp)
432
                                    .first
433
        if article.present?
54✔
434
          display_text = article.title
1✔
435
          title = article.title
1✔
436
        end
437
      end
438

439
      # create new blank articles if they don't exist already
440
      if article.nil?
75✔
441
        article = Article.new
53✔
442
        article.title = title
53✔
443
        article.collection = collection
53✔
444
        article.created_by_id = Current.user.id if Current.user.present?
53✔
445
        article.save! unless preview_mode
53✔
446
        # add the new article to the hash
447
        articles_by_title[title] = article
53✔
448
      end
449

450
      link_id = create_link(article, display_text, text_type) unless preview_mode
75✔
451
      # now update the attribute
452
      link_element = REXML::Element.new('link')
75✔
453
      element.children.each { |c| link_element.add(c) }
150✔
454
      link_element.add_attribute('target_title', title)
75✔
455
      debug('element='+link_element.inspect)
75✔
456
      debug('article='+article.inspect)
75✔
457
      link_element.add_attribute('target_id', article.id.to_s) unless preview_mode
75✔
458
      link_element.add_attribute('link_id', link_id.to_s) unless preview_mode
75✔
459
      element.replace_with(link_element)
75✔
460
    end
461
    doc.write(processed)
145✔
462
    processed
145✔
463
  end
464

465
  # handle XML-dependent post-processing
466
  def postprocess_xml_markup(xml_string)
1✔
467
    doc = REXML::Document.new xml_string
145✔
468
    processed = ''
145✔
469
    doc.elements.each('//lb') do |element|
145✔
470
      if element.previous_element && element.previous_sibling.node_type == :element && element.previous_element.name == 'lb'
282!
471
        pre = doc.to_s
×
472
        element.parent.elements.delete(element)
×
473
      end
474
    end
475
    doc.write(processed)
145✔
476
    processed
145✔
477
  end
478

479

480
  CELL_PREFIX = "<?xml version='1.0' encoding='UTF-8'?><cell>"
1✔
481
  CELL_SUFFIX = '</cell>'
1✔
482

483
  def self.cell_to_xml(cell)
1✔
484
    REXML::Document.new(CELL_PREFIX + cell.gsub('&', '&amp;') + CELL_SUFFIX)
98✔
485
  end
486

487
  def self.xml_to_cell(doc)
1✔
488
    text = ''
43✔
489
    doc.write(text)
43✔
490
    text.sub(CELL_PREFIX, '').sub(CELL_SUFFIX, '')
43✔
491
  end
492

493
  def self.cell_to_plaintext(cell)
1✔
494
    doc = cell_to_xml(cell)
51✔
495
    doc.each_element('.//text()') { |e| p e.text }.join
51✔
496
  end
497

498
  def self.cell_to_subject(cell)
1✔
499
    doc = cell_to_xml(cell)
2✔
500
    subjects = ''
2✔
501
    doc.elements.each('//link') do |e|
2✔
502
      title = e.attributes['target_title']
×
503
      subjects << title
×
504
      subjects << "\n"
×
505
    end
506
    subjects
2✔
507
  end
508

509
  def self.cell_to_category(cell)
1✔
510
    doc = cell_to_xml(cell)
2✔
511
    categories = ''
2✔
512
    doc.elements.each('//link') do |e|
2✔
513
      id = e.attributes['target_id']
×
514
      if id
×
515
        article = Article.find(id)
×
516
        article.categories.each do |category|
×
517
          categories << category.title
×
518
          categories << "\n"
×
519
        end
520
      end
521
    end
522
    categories
2✔
523
  end
524

525
  ##############################################
526
  # Code to rename links within the text.
527
  # This assumes that the name change has already
528
  # taken place within the article table in the DB
529
  ##############################################
530
  def rename_article_links(old_title, new_title)
1✔
531
    title_regex =
532
      Regexp.escape(old_title)
13✔
533
        .gsub('\\ ', ' ') # Regexp.escape converts ' ' to '\\ ' for some reason -- undo this
534
        .gsub(/\s+/, '\s+') # convert multiple whitespaces into 1+n space characters
535

536
    self.source_text = rename_link_in_text(source_text, title_regex, new_title)
13✔
537

538
    # Articles don't have translations, but we still need to update pages.source_translation
539
    if has_attribute?(:source_translation) && !source_translation.nil?
13✔
540
      self.source_translation = rename_link_in_text(source_translation, title_regex, new_title)
5✔
541
    end
542
  end
543

544
  def rename_link_in_text(text, title_regex, new_title)
1✔
545
    if new_title == ''
18✔
546
      # Link deleted, remove [[ ]] but keep the original title text
547

548
      # Handle links of the form [[Old Title|Display Text]] => Display Text
3✔
549
      text = text.gsub(/\[\[#{title_regex}\|([^\]]+)\]\]/i, '\1')
3✔
550
      # Handle links of the form [[Old Title]] => Old Title
551
      text = text.gsub(/\[\[(#{title_regex})\]\]/i, '\1')
3✔
552
    else
553
      # Replace the title part in [[Old Title|Display Text]]
15✔
554
      text = text.gsub(/\[\[#{title_regex}\|/i, "[[#{new_title}|")
15✔
555
      # Replace [[Old Title]] with [[New Title|Old Title]]
556
      text = text.gsub(/\[\[(#{title_regex})\]\]/i, "[[#{new_title}|\\1]]")
15✔
557
    end
558

559
    text
18✔
560
  end
561

562

563
  def pipe_tables_formatting(text)
1✔
564
    # since Pandoc Pipe Tables extension requires pipe characters at the beginning and end of each line we must add them
565
    # to the beginning and end of each line
UNCOV
566
    text.split("\n").map { |line| "|#{line}|" }.join("\n")
×
567
  end
568

569
  def xml_table_to_markdown_table(table_element, pandoc_format = false, plaintext_export = false)
1✔
570
    text_table = ''
10✔
571

572
    # clean up in-cell line-breaks
573
    table_element.xpath('//lb').each { |n| n.replace(' ') }
60✔
574

575
    # Sanitize single quotes with backticks
576
    # table_element.xpath('//*').each { |n| n.content.gsub("'", '`') }
577

578
    # calculate the widths of each column based on max(header, cell[0...end])
579
    column_count = ([table_element.xpath('//th').count] + table_element.xpath('//tr').map { |e| e.xpath('td').count }).max
30✔
580
    column_widths = {}
10✔
581
    1.upto(column_count) do |column_index|
10✔
582
      longest_cell = (table_element.xpath("//tr/td[position()=#{column_index}]").map { |e| e.text().length }.max || 0)
60✔
583
      corresponding_heading = heading_length = table_element.xpath("//th[position()=#{column_index}]").first
30✔
584
      heading_length = corresponding_heading.nil? ? 0 : corresponding_heading.text().length
30!
585
      column_widths[column_index] = [longest_cell, heading_length].max
30✔
586
    end
587

588
    # print the header as markdown
589
    cell_strings = []
10✔
590
    table_element.xpath('//th').each_with_index do |e, i|
10✔
591
      cell_strings << e.text.rjust(column_widths[i+1], ' ')
30✔
592
    end
593
    text_table << cell_strings.join(' | ') << "\n"
10✔
594

595
    # print the separator
596
    text_table << column_count.times.map { |i| ''.rjust(column_widths[i+1], '-') }.join(' | ') << "\n"
40✔
597

598
    # print each row as markdown
599
    table_element.xpath('//tr').each do |row_element|
10✔
600
      text_table << row_element.xpath('td').map do |e|
20✔
601
        width = 80 # default for hand-coded tables
30✔
602
        index = e.path.match(/.*td\[(\d+)\]/)
30✔
603
        if index
30✔
604
          width = column_widths[index[1].to_i] || 80
30✔
605
        else
×
606
          width = column_widths.values.first
×
607
        end
608

609
        if plaintext_export
30✔
610
          e.text.rjust(width, ' ')
30✔
611
        else
×
612
          inner_html = xml_to_pandoc_md(e.to_s.gsub("'", '&#39;'), false, false, nil, false).gsub("\n", '')
×
613
          inner_html.rjust(width, ' ')
×
614
        end
615
      end.join(' | ') << "\n"
616
    end
617
    if pandoc_format
10!
UNCOV
618
      text_table = pipe_tables_formatting(text_table)
×
619
    end
620

621
    "#{text_table}\n\n"
10✔
622
  end
623

624

625

626
  def debug(msg)
1✔
627
    logger.debug("DEBUG: #{msg}")
1,069✔
628
  end
629
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc