• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

benwbrum / fromthepage / 17387282326

01 Sep 2025 09:13PM UTC coverage: 64.405%. Remained the same
17387282326

push

github

web-flow
4857 - Require rubocop step in CI (#4858)

* 4857 - Require rubocop step in CI

* 4865 - Organize gemfiles

1790 of 3303 branches covered (54.19%)

Branch coverage included in aggregate %.

839 of 1497 new or added lines in 133 files covered. (56.05%)

43 existing lines in 29 files now uncovered.

7928 of 11786 relevant lines covered (67.27%)

103.82 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.0
/app/controllers/abstract_xml_controller.rb
1
module AbstractXmlController
1✔
2
  # TODO rename this
3

4

5
  ##############################################
6
  # All code to manipulate source transcription
7
  # belongs here.
8
  ##############################################
9

10
  # constant - words to ignore when autolinking
11
  STOPWORDS = [ 'Mrs', 'Mrs.', 'Mr.', 'Mr', 'Dr.', 'Dr', 'Miss', 'he', 'she', 'it',
1✔
12
    'wife', 'husband', 'I', 'him', 'her', 'son', 'daughter' ]
13
  STOPREGEX = /^\w\.?\$/
1✔
14

15

16
  def autolink(text)
1✔
17
    # find the list of articles
18
    if @collection.is_a?(DocumentSet)
7✔
19
      id = @collection.collection.id
1✔
20
    else
6✔
21
      id = @collection.id
6✔
22
    end
23

24
    sql = 'select article_id, '+
7✔
25
          'display_text, '+
26
          'max(page_article_links.created_on) last_reference '+
27
          'from page_article_links ' +
28
          'inner join articles a '+
29
          'on a.id = article_id ' +
30
          "where a.collection_id = #{id} "+
31
          'group by article_id, display_text '+
32
          'union '+
33
          'select id article_id, '+
34
          'title display_text, '+
35
          'created_on last_reference '+
36
          'from articles '+
37
          "where collection_id = #{id}"
38

39

40
    matches = Page.connection.select_all(sql).to_a
7✔
41
    matches.sort! { |x, y| [ y['display_text'].length, y['last_reference'] ] <=> [ x['display_text'].length, x['last_reference'] ] }
86✔
42
    # for each article, check text to see if it needs to be linked
43
    for match in matches
7✔
44
      match_regex = Regexp.new('\b'+Regexp.escape(match['display_text'])+'\b', Regexp::IGNORECASE)
43✔
45
      display_text = match['display_text']
43✔
46
      logger.debug("DEBUG looking for #{match_regex}")
43✔
47

48
      # if the match is a stopword, ignore it and move to the next match
49
      if display_text.in?(STOPWORDS) || display_text.match(STOPREGEX)
43✔
50
        # skip this one
51
      else
52
        # find the matches and substitute in as long as the text isn't already in a link
42✔
53
        text.gsub! match_regex do |m|
42✔
54
          # find the index of the match to check if it's with a larger link
55
          position = Regexp.last_match.offset(0)[0]
4✔
56
          # check to see if the regex is already within a link, from each index
57
          if word_not_okay(text, position, m) || within_link(text, position)
4✔
58
            m
2✔
59

60
          else
61
            # not within a link, so create a new one
2✔
62
            article = Article.find(match['article_id'].to_i)
2✔
63

64
            # check if regex match is exact (including case)
65
            if m == display_text
2✔
66
              # Bug 19 -- simplify when possible
67
              # if yes, use display text
2✔
68
              if article.title == display_text
2✔
69
                "[[#{article.title}]]"
2✔
70
              else
×
71
                "[[#{article.title}|#{display_text}]]"
×
72
              end
73
              # if not, use regex match
74
            else
×
75
              "[[#{article.title}|#{m}]]"
×
76
            end
77

78

79
          end
80
        end
81
      end
82
    end
83

84
    text
7✔
85
  end
86

87
  # check for word boundaries on preceding and following sides
88
  def word_not_okay(text, index, display_text)
1✔
89
    # test for characters before the display text
90
    if index > 1
4✔
91
      if text[(index-1), 1].match /\w/
2!
92
        return true
×
93
      end
94
    end
95
    # possibly do something for after the match.
96
    # reject word boundaries that aren't inflectional, plus special cases
97
    # i.e. (Mr shouldn't link Mrs)
98
    if index + display_text.size + 2 < text.size
4!
99
      next_two = text[index + display_text.size, 2]
×
100
      unless next_two.match /\w/
×
101
        # we're not in a word boundary
102
        # check for inflectional endings that might pass
×
NEW
103
        unless next_two.match(/.+s/) || next_two.match(/.+d/)
×
104
          return false
×
105
        end
106
      end
107
    end
108

109
    # consider rejecting some words
110
    false
4✔
111
  end
112

113
  def within_link(text, index)
1✔
114
    if open_link = text.rindex('[[', index)
4✔
115
      # a begin-link precedes this
2✔
116
      if close_link = text.rindex(']]', index)
2✔
117
        # a close-link precedes this
×
118
        if open_link < close_link
×
119
          # close link was more recent than open, so we're not inside
120
          # a link already
×
121
          false
×
122
        else
123
          # we're inside a link
×
124
          true
×
125
        end
126
      else
127
        # no close-link precedes this, but a begin-link does
128
        # therefore we're inside a link: do nothing
2✔
129
        true
2✔
130
      end
131
    else
132
      # no open_link precedes this
2✔
133
      false
2✔
134
    end
135
  end
136
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc