• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

hazendaz / sitemesh2 / 59

22 Mar 2026 02:30AM UTC coverage: 40.347%. Remained the same
59

push

github

hazendaz
[mvn] Update maven wrapper

698 of 1891 branches covered (36.91%)

Branch coverage included in aggregate %.

1555 of 3693 relevant lines covered (42.11%)

0.42 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main/java/com/opensymphony/module/sitemesh/mapper/RobotDecoratorMapper.java
1
/*
2
 * SPDX-License-Identifier: Apache-2.0
3
 * Copyright 2011-2026 Hazendaz
4
 */
5
/*
6
 * Title:        RobotDecoratorMapper
7
 * Description:
8
 *
9
 * This software is published under the terms of the OpenSymphony Software
10
 * License version 1.1, of which a copy has been included with this
11
 * distribution in the LICENSE.txt file.
12
 */
13

14
package com.opensymphony.module.sitemesh.mapper;
15

16
import com.opensymphony.module.sitemesh.Config;
17
import com.opensymphony.module.sitemesh.Decorator;
18
import com.opensymphony.module.sitemesh.DecoratorMapper;
19
import com.opensymphony.module.sitemesh.Page;
20
import com.opensymphony.module.sitemesh.RequestConstants;
21

22
import jakarta.servlet.http.HttpServletRequest;
23
import jakarta.servlet.http.HttpSession;
24

25
import java.util.Properties;
26

27
/**
28
 * The RobotDecoratorMapper will use the specified decorator when the requester is identified as a robot (also known as
29
 * spider, crawler, ferret) of a search engine.
30
 * <p>
31
 * The name of this decorator should be supplied in the <code>decorator</code> property.
32
 *
33
 * @author <a href="mailto:pathos@pandora.be">Mathias Bogaert</a>
34
 *
35
 * @see com.opensymphony.module.sitemesh.DecoratorMapper
36
 */
37
public class RobotDecoratorMapper extends AbstractDecoratorMapper {
×
38

39
    /** The decorator name. */
40
    private String decoratorName = null;
×
41

42
    /** All known robot hosts (list can be found <a href="http://www.spiderhunter.com">here</a>). */
43
    private static final String[] botHosts = { "alltheweb.com", "alta-vista.net", "altavista.com", "atext.com",
×
44
            "euroseek.net", "excite.com", "fast-search.net", "google.com", "googlebot.com", "infoseek.co.jp",
45
            "infoseek.com", "inktomi.com", "inktomisearch.com", "linuxtoday.com.au", "lycos.com", "lycos.com",
46
            "northernlight.com", "pa-x.dec.com" };
47

48
    /**
49
     * All known robot user-agent headers (list can be found
50
     * <a href="http://www.robotstxt.org/wc/active.html">here</a>).
51
     * <p>
52
     * NOTE: To avoid bad detection:
53
     * </p>
54
     * <ul>
55
     * <li>Robots with ID of 2 letters only were removed</li>
56
     * <li>Robot called "webs" were removed</li>
57
     * <li>directhit was changed in direct_hit (its real id)</li>
58
     * </ul>
59
     */
60
    private static final String[] botAgents = { "acme.spider", "ahoythehomepagefinder", "alkaline", "appie",
×
61
            "arachnophilia", "architext", "aretha", "ariadne", "aspider", "atn.txt", "atomz", "auresys", "backrub",
62
            "bigbrother", "bjaaland", "blackwidow", "blindekuh", "bloodhound", "brightnet", "bspider",
63
            "cactvschemistryspider", "calif", "cassandra", "cgireader", "checkbot", "churl", "cmc", "collective",
64
            "combine", "conceptbot", "core", "cshkust", "cusco", "cyberspyder", "deweb", "dienstspider", "diibot",
65
            "direct_hit", "dnabot", "download_express", "dragonbot", "dwcp", "ebiness", "eit", "emacs", "emcspider",
66
            "esther", "evliyacelebi", "fdse", "felix", "ferret", "fetchrover", "fido", "finnish", "fireball", "fish",
67
            "fouineur", "francoroute", "freecrawl", "funnelweb", "gazz", "gcreep", "getbot", "geturl", "golem",
68
            "googlebot", "grapnel", "griffon", "gromit", "gulliver", "hambot", "harvest", "havindex", "hometown",
69
            "wired-digital", "htdig", "htmlgobble", "hyperdecontextualizer", "ibm", "iconoclast", "ilse", "imagelock",
70
            "incywincy", "informant", "infoseek", "infoseeksidewinder", "infospider", "inspectorwww", "intelliagent",
71
            "iron33", "israelisearch", "javabee", "jcrawler", "jeeves", "jobot", "joebot", "jubii", "jumpstation",
72
            "katipo", "kdd", "kilroy", "ko_yappo_robot", "labelgrabber.txt", "larbin", "legs", "linkscan", "linkwalker",
73
            "lockon", "logo_gif", "lycos", "macworm", "magpie", "mediafox", "merzscope", "meshexplorer", "mindcrawler",
74
            "moget", "momspider", "monster", "motor", "muscatferret", "mwdsearch", "myweb", "netcarta", "netmechanic",
75
            "netscoop", "newscan-online", "nhse", "nomad", "northstar", "nzexplorer", "occam", "octopus", "orb_search",
76
            "packrat", "pageboy", "parasite", "patric", "perignator", "perlcrawler", "phantom", "piltdownman",
77
            "pioneer", "pitkow", "pjspider", "pka", "plumtreewebaccessor", "poppi", "portalb", "puu", "python", "raven",
78
            "rbse", "resumerobot", "rhcs", "roadrunner", "robbie", "robi", "roverbot", "safetynetrobot", "scooter",
79
            "search_au", "searchprocess", "senrigan", "sgscout", "shaggy", "shaihulud", "sift", "simbot", "site-valet",
80
            "sitegrabber", "sitetech", "slurp", "smartspider", "snooper", "solbot", "spanner", "speedy",
81
            "spider_monkey", "spiderbot", "spiderman", "spry", "ssearcher", "suke", "sven", "tach_bw", "tarantula",
82
            "tarspider", "tcl", "techbot", "templeton", "titin", "titan", "tkwww", "tlspider", "ucsd", "udmsearch",
83
            "urlck", "valkyrie", "victoria", "visionsearch", "voyager", "vwbot", "w3index", "w3m2", "wanderer",
84
            "webbandit", "webcatcher", "webcopy", "webfetcher", "webfoot", "weblayers", "weblinker", "webmirror",
85
            "webmoose", "webquest", "webreader", "webreaper", "websnarf", "webspider", "webvac", "webwalk", "webwalker",
86
            "webwatch", "wget", "whowhere", "wmir", "wolp", "wombat", "worm", "wwwc", "wz101", "xget",
87
            "nederland.zoek" };
88

89
    @Override
90
    public void init(Config config, Properties properties, DecoratorMapper parent) throws InstantiationException {
91
        super.init(config, properties, parent);
×
92
        decoratorName = properties.getProperty("decorator");
×
93
    }
×
94

95
    @Override
96
    public Decorator getDecorator(HttpServletRequest request, Page page) {
97
        Decorator result = null;
×
98

99
        if (decoratorName != null && isBot(request)) {
×
100
            result = getNamedDecorator(request, decoratorName);
×
101
        }
102

103
        return result == null ? super.getDecorator(request, page) : result;
×
104
    }
105

106
    /**
107
     * Check if the current request came from a robot (also known as spider, crawler, ferret).
108
     *
109
     * @param request
110
     *            the request
111
     *
112
     * @return true, if is bot
113
     */
114
    private static boolean isBot(HttpServletRequest request) {
115
        if (request == null) {
×
116
            return false;
×
117
        }
118

119
        // force creation of a session
120
        HttpSession session = request.getSession(true);
×
121

122
        if (Boolean.FALSE.equals(session.getAttribute(RequestConstants.ROBOT))) {
×
123
            return false;
×
124
        }
125
        if (Boolean.TRUE.equals(session.getAttribute(RequestConstants.ROBOT))) {
×
126
            // a key was found in the session indicating it is a robot
127
            return true;
×
128
        } else if ("robots.txt".indexOf(request.getRequestURI()) != -1) {
×
129
            // there is a specific request for the robots.txt file, so we assume
130
            // it must be a robot (only robots request robots.txt)
131

132
            // set a key in the session, so the next time we don't have to manually
133
            // detect the robot again
134
            session.setAttribute(RequestConstants.ROBOT, Boolean.TRUE);
×
135
            return true;
×
136
        } else {
137
            String userAgent = request.getHeader("User-Agent");
×
138

139
            if (userAgent != null && userAgent.trim().length() > 2) {
×
140
                // first check for common user-agent headers, so that we can speed
141
                // this thing up, hopefully clever spiders will not send a fake header
142
                if (userAgent.indexOf("MSIE") != -1 || userAgent.indexOf("Gecko") != -1 // MSIE and Mozilla
×
143
                        || userAgent.indexOf("Opera") != -1 || userAgent.indexOf("iCab") != -1 // Opera and iCab
×
144
                        // (mac browser)
145
                        || userAgent.indexOf("Konqueror") != -1 || userAgent.indexOf("KMeleon") != -1 // Konqueror
×
146
                        // and KMeleon
147
                        || userAgent.indexOf("4.7") != -1 || userAgent.indexOf("Lynx") != -1) { // NS 4.78 and Lynx
×
148
                    // indicate this session is not a robot
149
                    session.setAttribute(RequestConstants.ROBOT, Boolean.FALSE);
×
150
                    return false;
×
151
                }
152

153
                for (String botAgent : botAgents) {
×
154
                    if (userAgent.indexOf(botAgent) != -1) {
×
155
                        // set a key in the session, so the next time we don't have to manually
156
                        // detect the robot again
157
                        session.setAttribute(RequestConstants.ROBOT, Boolean.TRUE);
×
158
                        return true;
×
159
                    }
160
                }
161
            }
162

163
            // detect the robot from the host or user-agent
164
            String remoteHost = request.getRemoteHost(); // requires one DNS lookup
×
165

166
            // if the DNS server didn't return a hostname, getRemoteHost returns the
167
            // IP address, which is ignored here (the last char is checked, because some
168
            // remote hosts begin with the IP)
169
            if (remoteHost != null && remoteHost.length() > 0 && remoteHost.charAt(remoteHost.length() - 1) > 64) {
×
170
                for (String botHost : botHosts) {
×
171
                    if (remoteHost.indexOf(botHost) != -1) {
×
172
                        // set a key in the session, so the next time we don't have to manually
173
                        // detect the robot again
174
                        session.setAttribute(RequestConstants.ROBOT, Boolean.TRUE);
×
175
                        return true;
×
176
                    }
177
                }
178
            }
179

180
            // remote host and user agent are not in the predefined list,
181
            // so it must be an unknown robot or not a robot
182

183
            // indicate this session is not a robot
184
            session.setAttribute(RequestConstants.ROBOT, Boolean.FALSE);
×
185
            return false;
×
186
        }
187
    }
188
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc