17387282326

Committed 01 Sep 2025 09:13PM UTC coverage: 64.405%. Remained the same

Build # 17387282326

Build Type

push

github

Committed by

web-flow

Commit Message

4857 - Require rubocop step in CI (#4858)

* 4857 - Require rubocop step in CI

* 4865 - Organize gemfiles

Coverage Stats

1790 of 3303 branches covered (54.19%)

Branch coverage included in aggregate %.

839 of 1497 new or added lines in 133 files covered. (56.05%)

43 existing lines in 29 files now uncovered.

7928 of 11786 relevant lines covered (67.27%)

103.82 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.0

/app/controllers/abstract_xml_controller.rb

module AbstractXmlController
  # TODO rename this


  ##############################################
  # All code to manipulate source transcription
  # belongs here.
  ##############################################

  # constant - words to ignore when autolinking
  STOPWORDS = [ 'Mrs', 'Mrs.', 'Mr.', 'Mr', 'Dr.', 'Dr', 'Miss', 'he', 'she', 'it',
    'wife', 'husband', 'I', 'him', 'her', 'son', 'daughter' ]
  STOPREGEX = /^\w\.?\$/


  def autolink(text)
    # find the list of articles
    if @collection.is_a?(DocumentSet)
      id = @collection.collection.id
    else
      id = @collection.id
    end

    sql = 'select article_id, '+
          'display_text, '+
          'max(page_article_links.created_on) last_reference '+
          'from page_article_links ' +
          'inner join articles a '+
          'on a.id = article_id ' +
          "where a.collection_id = #{id} "+
          'group by article_id, display_text '+
          'union '+
          'select id article_id, '+
          'title display_text, '+
          'created_on last_reference '+
          'from articles '+
          "where collection_id = #{id}"


    matches = Page.connection.select_all(sql).to_a
    matches.sort! { |x, y| [ y['display_text'].length, y['last_reference'] ] <=> [ x['display_text'].length, x['last_reference'] ] }
    # for each article, check text to see if it needs to be linked
    for match in matches
      match_regex = Regexp.new('\b'+Regexp.escape(match['display_text'])+'\b', Regexp::IGNORECASE)
      display_text = match['display_text']
      logger.debug("DEBUG looking for #{match_regex}")

      # if the match is a stopword, ignore it and move to the next match
      if display_text.in?(STOPWORDS) || display_text.match(STOPREGEX)
        # skip this one
      else
        # find the matches and substitute in as long as the text isn't already in a link
        text.gsub! match_regex do |m|
          # find the index of the match to check if it's with a larger link
          position = Regexp.last_match.offset(0)[0]
          # check to see if the regex is already within a link, from each index
          if word_not_okay(text, position, m) || within_link(text, position)
            m

          else
            # not within a link, so create a new one
            article = Article.find(match['article_id'].to_i)

            # check if regex match is exact (including case)
            if m == display_text
              # Bug 19 -- simplify when possible
              # if yes, use display text
              if article.title == display_text
                "[[#{article.title}]]"
              else
                "[[#{article.title}|#{display_text}]]"
              end
              # if not, use regex match
            else
              "[[#{article.title}|#{m}]]"
            end


          end
        end
      end
    end

    text
  end

  # check for word boundaries on preceding and following sides
  def word_not_okay(text, index, display_text)
    # test for characters before the display text
    if index > 1
      if text[(index-1), 1].match /\w/
        return true
      end
    end
    # possibly do something for after the match.
    # reject word boundaries that aren't inflectional, plus special cases
    # i.e. (Mr shouldn't link Mrs)
    if index + display_text.size + 2 < text.size
      next_two = text[index + display_text.size, 2]
      unless next_two.match /\w/
        # we're not in a word boundary
        # check for inflectional endings that might pass
        unless next_two.match(/.+s/) || next_two.match(/.+d/)
          return false
        end
      end
    end

    # consider rejecting some words
    false
  end

  def within_link(text, index)
    if open_link = text.rindex('[[', index)
      # a begin-link precedes this
      if close_link = text.rindex(']]', index)
        # a close-link precedes this
        if open_link < close_link
          # close link was more recent than open, so we're not inside
          # a link already
          false
        else
          # we're inside a link
          true
        end
      else
        # no close-link precedes this, but a begin-link does
        # therefore we're inside a link: do nothing
        true
      end
    else
      # no open_link precedes this
      false
    end
  end
end

1	module AbstractXmlController	1✔
2	# TODO rename this
3
4
5	##############################################
6	# All code to manipulate source transcription
7	# belongs here.
8	##############################################
9
10	# constant - words to ignore when autolinking
11	STOPWORDS = [ 'Mrs', 'Mrs.', 'Mr.', 'Mr', 'Dr.', 'Dr', 'Miss', 'he', 'she', 'it',	1✔
12	'wife', 'husband', 'I', 'him', 'her', 'son', 'daughter' ]
13	STOPREGEX = /^\w\.?\$/	1✔
14
15
16	def autolink(text)	1✔
17	# find the list of articles
18	if @collection.is_a?(DocumentSet)	7✔
19	id = @collection.collection.id	1✔
20	else	6✔
21	id = @collection.id	6✔
22	end
23
24	sql = 'select article_id, '+	7✔
25	'display_text, '+
26	'max(page_article_links.created_on) last_reference '+
27	'from page_article_links ' +
28	'inner join articles a '+
29	'on a.id = article_id ' +
30	"where a.collection_id = #{id} "+
31	'group by article_id, display_text '+
32	'union '+
33	'select id article_id, '+
34	'title display_text, '+
35	'created_on last_reference '+
36	'from articles '+
37	"where collection_id = #{id}"
38
39
40	matches = Page.connection.select_all(sql).to_a	7✔
41	matches.sort! { \|x, y\| [ y['display_text'].length, y['last_reference'] ] <=> [ x['display_text'].length, x['last_reference'] ] }	86✔
42	# for each article, check text to see if it needs to be linked
43	for match in matches	7✔
44	match_regex = Regexp.new('\b'+Regexp.escape(match['display_text'])+'\b', Regexp::IGNORECASE)	43✔
45	display_text = match['display_text']	43✔
46	logger.debug("DEBUG looking for #{match_regex}")	43✔
47
48	# if the match is a stopword, ignore it and move to the next match
49	if display_text.in?(STOPWORDS) \|\| display_text.match(STOPREGEX)	43✔
50	# skip this one
51	else
52	# find the matches and substitute in as long as the text isn't already in a link	42✔
53	text.gsub! match_regex do \|m\|	42✔
54	# find the index of the match to check if it's with a larger link
55	position = Regexp.last_match.offset(0)[0]	4✔
56	# check to see if the regex is already within a link, from each index
57	if word_not_okay(text, position, m) \|\| within_link(text, position)	4✔
58	m	2✔
59
60	else
61	# not within a link, so create a new one	2✔
62	article = Article.find(match['article_id'].to_i)	2✔
63
64	# check if regex match is exact (including case)
65	if m == display_text	2✔
66	# Bug 19 -- simplify when possible
67	# if yes, use display text	2✔
68	if article.title == display_text	2✔
69	"[[#{article.title}]]"	2✔
70	else	×
71	"[[#{article.title}\|#{display_text}]]"	×
72	end
73	# if not, use regex match
74	else	×
75	"[[#{article.title}\|#{m}]]"	×
76	end
77
78
79	end
80	end
81	end
82	end
83
84	text	7✔
85	end
86
87	# check for word boundaries on preceding and following sides
88	def word_not_okay(text, index, display_text)	1✔
89	# test for characters before the display text
90	if index > 1	4✔
91	if text[(index-1), 1].match /\w/	2!
92	return true	×
93	end
94	end
95	# possibly do something for after the match.
96	# reject word boundaries that aren't inflectional, plus special cases
97	# i.e. (Mr shouldn't link Mrs)
98	if index + display_text.size + 2 < text.size	4!
99	next_two = text[index + display_text.size, 2]	×
100	unless next_two.match /\w/	×
101	# we're not in a word boundary
102	# check for inflectional endings that might pass	×
NEW 103	unless next_two.match(/.+s/) \|\| next_two.match(/.+d/)	×
104	return false	×
105	end
106	end
107	end
108
109	# consider rejecting some words
110	false	4✔
111	end
112
113	def within_link(text, index)	1✔
114	if open_link = text.rindex('[[', index)	4✔
115	# a begin-link precedes this	2✔
116	if close_link = text.rindex(']]', index)	2✔
117	# a close-link precedes this	×
118	if open_link < close_link	×
119	# close link was more recent than open, so we're not inside
120	# a link already	×
121	false	×
122	else
123	# we're inside a link	×
124	true	×
125	end
126	else
127	# no close-link precedes this, but a begin-link does
128	# therefore we're inside a link: do nothing	2✔
129	true	2✔
130	end
131	else
132	# no open_link precedes this	2✔
133	false	2✔
134	end
135	end
136	end

benwbrum / fromthepage / 17387282326

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous