• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Gallopsled / pwntools / 781b1967170151f34ef8334c55b13a3fe6c70e11

pending completion
781b1967170151f34ef8334c55b13a3fe6c70e11

push

github-actions

Arusekk
Use global env

3903 of 6420 branches covered (60.79%)

12247 of 16698 relevant lines covered (73.34%)

0.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.79
/pwnlib/libcdb.py
1
"""
2
Fetch a LIBC binary based on some heuristics.
3
"""
4
from __future__ import absolute_import
1✔
5
from __future__ import division
1✔
6

7
import codecs
1✔
8
import json
1✔
9
import os
1✔
10
import tempfile
1✔
11

12
from pwnlib.context import context
1✔
13
from pwnlib.elf import ELF
1✔
14
from pwnlib.log import getLogger
1✔
15
from pwnlib.tubes.process import process
1✔
16
from pwnlib.util.fiddling import b64d
1✔
17
from pwnlib.util.fiddling import enhex
1✔
18
from pwnlib.util.fiddling import hexdump
1✔
19
from pwnlib.util.misc import read
1✔
20
from pwnlib.util.misc import which
1✔
21
from pwnlib.util.misc import write
1✔
22
from pwnlib.util.safeeval import const
1✔
23
from pwnlib.util.web import wget
1✔
24

25
log = getLogger(__name__)
1✔
26

27
HASHES = ['build_id', 'sha1', 'sha256', 'md5']
1✔
28
DEBUGINFOD_SERVERS = [
1✔
29
    'https://debuginfod.elfutils.org/',
30
]
31

32
if 'DEBUGINFOD_URLS' in os.environ:
1!
33
    urls = os.environ['DEBUGINFOD_URLS'].split(' ')
×
34
    DEBUGINFOD_SERVERS = urls + DEBUGINFOD_SERVERS
×
35

36
# https://gitlab.com/libcdb/libcdb wasn't updated after 2019,
37
# but still is a massive database of older libc binaries.
38
def provider_libcdb(hex_encoded_id, hash_type):
1✔
39
    # Deferred import because it's slow
40
    import requests
1✔
41
    from six.moves import urllib
1✔
42

43
    # Build the URL using the requested hash type
44
    url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % hash_type
1✔
45
    url      = urllib.parse.urljoin(url_base, hex_encoded_id)
1✔
46

47
    data     = b""
1✔
48
    log.debug("Downloading data from LibcDB: %s", url)
1✔
49
    try:
1✔
50
        while not data.startswith(b'\x7fELF'):
1✔
51
            data = wget(url, timeout=20)
1✔
52

53
            if not data:
1✔
54
                log.warn_once("Could not fetch libc for %s %s from libcdb", hash_type, hex_encoded_id)
1✔
55
                break
1✔
56
            
57
            # GitLab serves up symlinks with
58
            if data.startswith(b'..'):
1✔
59
                url = os.path.dirname(url) + '/'
1✔
60
                url = urllib.parse.urljoin(url.encode('utf-8'), data)
1✔
61
    except requests.RequestException as e:
×
62
        log.warn_once("Failed to fetch libc for %s %s from libcdb: %s", hash_type, hex_encoded_id, e)
×
63
    return data
1✔
64

65
# https://libc.rip/
66
def provider_libc_rip(hex_encoded_id, hash_type):
1✔
67
    # Deferred import because it's slow
68
    import requests
1✔
69

70
    # Build the request for the hash type
71
    # https://github.com/niklasb/libc-database/blob/master/searchengine/api.yml
72
    if hash_type == 'build_id':
1✔
73
        hash_type = 'buildid'
1✔
74
    url    = "https://libc.rip/api/find"
1✔
75
    params = {hash_type: hex_encoded_id}
1✔
76

77
    data = b""
1✔
78
    try:
1✔
79
        result = requests.post(url, json=params, timeout=20)
1✔
80
        result.raise_for_status()
1✔
81
        libc_match = result.json()
1✔
82
        if not libc_match:
1✔
83
            log.warn_once("Could not find libc for %s %s on libc.rip", hash_type, hex_encoded_id)
1✔
84
            return None
1✔
85

86
        if len(libc_match) > 1:
1✔
87
            log.debug("Received multiple matches. Choosing the first match and discarding the others.")
1✔
88
            log.debug("%r", libc_match)
1✔
89

90
        url = libc_match[0]['download_url']
1✔
91
        log.debug("Downloading data from libc.rip: %s", url)
1✔
92
        data = wget(url, timeout=20)
1✔
93

94
        if not data:
1!
95
            log.warn_once("Could not fetch libc for %s %s from libc.rip", hash_type, hex_encoded_id)
×
96
            return None
×
97
    except requests.RequestException as e:
×
98
        log.warn_once("Failed to fetch libc for %s %s from libc.rip: %s", hash_type, hex_encoded_id, e)
×
99
    return data
1✔
100

101
PROVIDERS = [provider_libcdb, provider_libc_rip]
1✔
102

103
def search_by_hash(hex_encoded_id, hash_type='build_id', unstrip=True):
1✔
104
    assert hash_type in HASHES, hash_type
1✔
105

106
    # Ensure that the libcdb cache directory exists
107
    cache, cache_valid = _check_elf_cache('libcdb', hex_encoded_id, hash_type)
1✔
108
    if cache_valid:
1✔
109
        return cache
1✔
110

111
    # Run through all available libc database providers to see if we have a match.
112
    for provider in PROVIDERS:
1✔
113
        data = provider(hex_encoded_id, hash_type)
1✔
114
        if data and data.startswith(b'\x7FELF'):
1✔
115
            break
1✔
116

117
    if not data:
1✔
118
        log.warn_once("Could not find libc for %s %s anywhere", hash_type, hex_encoded_id)
1✔
119

120
    # Save whatever we got to the cache
121
    write(cache, data or b'')
1✔
122

123
    # Return ``None`` if we did not get a valid ELF file
124
    if not data or not data.startswith(b'\x7FELF'):
1✔
125
        return None
1✔
126

127
    # Try to find debug info for this libc.
128
    if unstrip:
1✔
129
        unstrip_libc(cache)
1✔
130

131
    return cache
1✔
132

133
def _search_debuginfo_by_hash(base_url, hex_encoded_id):
1✔
134
    # Deferred import because it's slow
135
    import requests
1✔
136
    from six.moves import urllib
1✔
137

138
    # Check if we tried this buildid before.
139
    cache, cache_valid = _check_elf_cache('libcdb_dbg', hex_encoded_id, 'build_id')
1✔
140
    if cache_valid:
1!
141
        return cache
×
142

143
    # Try to find separate debuginfo.
144
    url  = '/buildid/{}/debuginfo'.format(hex_encoded_id)
1✔
145
    url  = urllib.parse.urljoin(base_url, url)
1✔
146
    data = b""
1✔
147
    log.debug("Downloading data from debuginfod: %s", url)
1✔
148
    try:
1✔
149
        data = wget(url, timeout=20)
1✔
150
    except requests.RequestException as e:
×
151
        log.warn_once("Failed to fetch libc debuginfo for build_id %s from %s: %s", hex_encoded_id, base_url, e)
×
152
    
153
    # Save whatever we got to the cache
154
    write(cache, data or b'')
1✔
155

156
    # Return ``None`` if we did not get a valid ELF file
157
    if not data or not data.startswith(b'\x7FELF'):
1✔
158
        log.warn_once("Could not fetch libc debuginfo for build_id %s from %s", hex_encoded_id, base_url)
1✔
159
        return None
1✔
160

161
    return cache
1✔
162

163
def _check_elf_cache(cache_type, hex_encoded_id, hash_type):
1✔
164
    """
165
    Check if there already is an ELF file for this hash in the cache.
166

167
    >>> cache, _ = _check_elf_cache('libcdb', '2d1c5e0b85cb06ff47fa6fa088ec22cb6e06074e', 'build_id')
168
    >>> os.unlink(cache) if os.path.exists(cache)
169
    >>> filename = search_by_hash('2d1c5e0b85cb06ff47fa6fa088ec22cb6e06074e', 'build_id', unstrip=False)
170
    >>> hex(ELF(filename).symbols.read)
171
    '0xe56c0'
172
    >>> filename == cache
173
    True
174
    """
175
    # Ensure that the cache directory exists
176
    cache_dir = os.path.join(context.cache_dir, cache_type, hash_type)
1✔
177

178
    if not os.path.isdir(cache_dir):
1✔
179
        os.makedirs(cache_dir)
1✔
180

181
    # If we already downloaded the file, and it looks even passingly like
182
    # a valid ELF file, return it.
183
    cache = os.path.join(cache_dir, hex_encoded_id)
1✔
184

185
    if not os.path.exists(cache):
1✔
186
        return cache, False
1✔
187
    
188
    log.debug("Found existing cached ELF at %r", cache)
1✔
189

190
    data = read(cache)
1✔
191
    if not data.startswith(b'\x7FELF'):
1✔
192
        log.info_once("Skipping unavailable ELF %s", hex_encoded_id)
1✔
193
        return cache, False
1✔
194

195
    log.info_once("Using cached data from %r", cache)
1✔
196
    return cache, True
1✔
197

198
def unstrip_libc(filename):
1✔
199
    """
200
    Given a path to a libc binary, attempt to download matching debug info
201
    and add them back to the given binary.
202

203
    This modifies the given file.
204

205
    Arguments:
206
        filename(str):
207
            Path to the libc binary to unstrip.
208

209
    Returns:
210
        :const:`True` if binary was unstripped, :const:`False` otherwise.
211

212
    Examples:
213
        >>> filename = search_by_build_id('69389d485a9793dbe873f0ea2c93e02efaa9aa3d', unstrip=False)
214
        >>> libc = ELF(filename)
215
        >>> 'main_arena' in libc.symbols
216
        False
217
        >>> unstrip_libc(filename)
218
        True
219
        >>> libc = ELF(filename)
220
        >>> hex(libc.symbols.main_arena)
221
        '0x219c80'
222
        >>> unstrip_libc(which('python3'))
223
        False
224
        >>> filename = search_by_build_id('d1704d25fbbb72fa95d517b883131828c0883fe9', unstrip=True)
225
        >>> 'main_arena' in ELF(filename).symbols
226
        True
227
    """
228
    if not which('eu-unstrip'):
1!
229
        log.warn_once('Couldn\'t find "eu-unstrip" in PATH. Install elfutils first.')
×
230
        return False
×
231

232
    libc = ELF(filename, checksec=False)
1✔
233
    if not libc.buildid:
1!
234
        log.warn_once('Given libc does not have a buildid. Cannot look for debuginfo to unstrip.')
×
235
        return False
×
236

237
    log.debug('Trying debuginfod servers: %r', DEBUGINFOD_SERVERS)
1✔
238

239
    for server_url in DEBUGINFOD_SERVERS:
1✔
240
        libc_dbg = _search_debuginfo_by_hash(server_url, enhex(libc.buildid))
1✔
241
        if libc_dbg:
1✔
242
            break
1✔
243
    else:
244
        log.warn_once('Couldn\'t find debug info for libc with build_id %s on any debuginfod server.', enhex(libc.buildid))
1✔
245
        return False
1✔
246

247
    # Add debug info to given libc binary inplace.
248
    p = process(['eu-unstrip', '-o', filename, filename, libc_dbg])
1✔
249
    output = p.recvall()
1✔
250
    p.close()
1✔
251

252
    if output:
1!
253
        log.error('Failed to unstrip libc binary: %r', output)
×
254
        return False
×
255

256
    return True
1✔
257

258
def _handle_multiple_matching_libcs(matching_libcs):
1✔
259
    from pwnlib.term import text
×
260
    from pwnlib.ui import options
×
261
    log.info('Multiple matching libc libraries for requested symbols:')
×
262
    for idx, libc in enumerate(matching_libcs):
×
263
        log.info('%d. %s', idx+1, text.red(libc['id']))
×
264
        log.indented('\t%-20s %s', text.green('BuildID:'), libc['buildid'])
×
265
        log.indented('\t%-20s %s', text.green('MD5:'), libc['md5'])
×
266
        log.indented('\t%-20s %s', text.green('SHA1:'), libc['sha1'])
×
267
        log.indented('\t%-20s %s', text.green('SHA256:'), libc['sha256'])
×
268
        log.indented('\t%s', text.green('Symbols:'))
×
269
        for symbol, address in libc['symbols'].items():
×
270
            log.indented('\t%25s = %s', symbol, address)
×
271

272
    selected_index = options("Select the libc version to use:", [libc['id'] for libc in matching_libcs])
×
273
    return matching_libcs[selected_index]
×
274

275
def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False):
1✔
276
    """
277
    Lookup possible matching libc versions based on leaked function addresses.
278

279
    The leaked function addresses have to be provided as a dict mapping the
280
    function name to the leaked value. Only the lower 3 nibbles are relevant
281
    for the lookup.
282

283
    If there are multiple matches you are presented with a list to select one
284
    interactively, unless the ``select_index`` or ``return_as_list`` arguments
285
    are used.
286

287
    Arguments:
288
        symbols(dict):
289
            Dictionary mapping symbol names to their addresses.
290
        select_index(int):
291
            The libc to select if there are multiple matches (starting at 1).
292
        unstrip(bool):
293
            Try to fetch debug info for the libc and apply it to the downloaded file.
294
        return_as_list(bool):
295
            Return a list of build ids of all matching libc versions
296
            instead of a path to a downloaded file.
297

298
    Returns:
299
        Path to the downloaded library on disk, or :const:`None`.
300
        If the ``return_as_list`` argument is :const:`True`, a list of build ids
301
        is returned instead.
302

303
    Examples:
304
        >>> filename = search_by_symbol_offsets({'puts': 0x420, 'printf': 0xc90}, select_index=1)
305
        >>> libc = ELF(filename)
306
        >>> libc.sym.system == 0x52290
307
        True
308
        >>> matched_libcs = search_by_symbol_offsets({'__libc_start_main_ret': '7f89ad926550'}, return_as_list=True)
309
        >>> len(matched_libcs) > 1
310
        True
311
        >>> for buildid in matched_libcs: # doctest +SKIP
312
        ...     libc = ELF(search_by_build_id(buildid)) # doctest +SKIP
313
    """
314
    import requests
1✔
315
    for symbol, address in symbols.items():
1✔
316
        if isinstance(address, int):
1✔
317
            symbols[symbol] = hex(address)
1✔
318
    try:
1✔
319
        params = {'symbols': symbols}
1✔
320
        url    = "https://libc.rip/api/find"
1✔
321
        log.debug('Request: %s', params)
1✔
322
        result = requests.post(url, json=params, timeout=20)
1✔
323
        result.raise_for_status()
1✔
324

325
        matching_libcs = result.json()
1✔
326
        log.debug('Result: %s', matching_libcs)
1✔
327
        if len(matching_libcs) == 0:
1!
328
            log.warn_once("No matching libc for symbols %r on libc.rip", symbols)
×
329
            return None
×
330

331
        if return_as_list:
1✔
332
            return [libc['buildid'] for libc in matching_libcs]
1✔
333

334
        if len(matching_libcs) == 1:
1!
335
            return search_by_build_id(matching_libcs[0]['buildid'], unstrip=unstrip)
×
336

337
        if select_index is not None:
1!
338
            if select_index > 0 and select_index <= len(matching_libcs):
1!
339
                return search_by_build_id(matching_libcs[select_index - 1]['buildid'], unstrip=unstrip)
1✔
340
            else:
341
                log.error('Invalid selected libc index. %d is not in the range of 1-%d.', select_index, len(matching_libcs))
×
342
                return None
×
343

344
        selected_libc = _handle_multiple_matching_libcs(matching_libcs)
×
345
        return search_by_build_id(selected_libc['buildid'], unstrip=unstrip)
×
346
    except requests.RequestException as e:
×
347
        log.warn_once("Failed to lookup libc for symbols %r from libc.rip: %s", symbols, e)
×
348
        return None
×
349

350
def search_by_build_id(hex_encoded_id, unstrip=True):
1✔
351
    """
352
    Given a hex-encoded Build ID, attempt to download a matching libc from libcdb.
353

354
    Arguments:
355
        hex_encoded_id(str):
356
            Hex-encoded Build ID (e.g. 'ABCDEF...') of the library
357
        unstrip(bool):
358
            Try to fetch debug info for the libc and apply it to the downloaded file.
359

360
    Returns:
361
        Path to the downloaded library on disk, or :const:`None`.
362

363
    Examples:
364
        >>> filename = search_by_build_id('fe136e485814fee2268cf19e5c124ed0f73f4400')
365
        >>> hex(ELF(filename).symbols.read)
366
        '0xda260'
367
        >>> None == search_by_build_id('XX')
368
        True
369
        >>> filename = search_by_build_id('a5a3c3f65fd94f4c7f323a175707c3a79cbbd614')
370
        >>> hex(ELF(filename).symbols.read)
371
        '0xeef40'
372
    """
373
    return search_by_hash(hex_encoded_id, 'build_id', unstrip)
1✔
374

375
def search_by_md5(hex_encoded_id, unstrip=True):
1✔
376
    """
377
    Given a hex-encoded md5sum, attempt to download a matching libc from libcdb.
378

379
    Arguments:
380
        hex_encoded_id(str):
381
            Hex-encoded md5sum (e.g. 'ABCDEF...') of the library
382
        unstrip(bool):
383
            Try to fetch debug info for the libc and apply it to the downloaded file.
384

385
    Returns:
386
        Path to the downloaded library on disk, or :const:`None`.
387

388
    Examples:
389
        >>> filename = search_by_md5('7a71dafb87606f360043dcd638e411bd')
390
        >>> hex(ELF(filename).symbols.read)
391
        '0xda260'
392
        >>> None == search_by_md5('XX')
393
        True
394
        >>> filename = search_by_md5('74f2d3062180572fc8bcd964b587eeae')
395
        >>> hex(ELF(filename).symbols.read)
396
        '0xeef40'
397
    """
398
    return search_by_hash(hex_encoded_id, 'md5', unstrip)
1✔
399

400
def search_by_sha1(hex_encoded_id, unstrip=True):
1✔
401
    """
402
    Given a hex-encoded sha1, attempt to download a matching libc from libcdb.
403

404
    Arguments:
405
        hex_encoded_id(str):
406
            Hex-encoded sha1sum (e.g. 'ABCDEF...') of the library
407
        unstrip(bool):
408
            Try to fetch debug info for the libc and apply it to the downloaded file.
409

410
    Returns:
411
        Path to the downloaded library on disk, or :const:`None`.
412

413
    Examples:
414
        >>> filename = search_by_sha1('34471e355a5e71400b9d65e78d2cd6ce7fc49de5')
415
        >>> hex(ELF(filename).symbols.read)
416
        '0xda260'
417
        >>> None == search_by_sha1('XX')
418
        True
419
        >>> filename = search_by_sha1('0041d2f397bc2498f62aeb4134d522c5b2635e87')
420
        >>> hex(ELF(filename).symbols.read)
421
        '0xeef40'
422
    """
423
    return search_by_hash(hex_encoded_id, 'sha1', unstrip)
1✔
424

425

426
def search_by_sha256(hex_encoded_id, unstrip=True):
1✔
427
    """
428
    Given a hex-encoded sha256, attempt to download a matching libc from libcdb.
429

430
    Arguments:
431
        hex_encoded_id(str):
432
            Hex-encoded sha256sum (e.g. 'ABCDEF...') of the library
433
        unstrip(bool):
434
            Try to fetch debug info for the libc and apply it to the downloaded file.
435

436
    Returns:
437
        Path to the downloaded library on disk, or :const:`None`.
438

439
    Examples:
440
        >>> filename = search_by_sha256('5e877a8272da934812d2d1f9ee94f73c77c790cbc5d8251f5322389fc9667f21')
441
        >>> hex(ELF(filename).symbols.read)
442
        '0xda260'
443
        >>> None == search_by_sha256('XX')
444
        True
445
        >>> filename = search_by_sha256('5d78fc60054df18df20480c71f3379218790751090f452baffb62ac6b2aff7ee')
446
        >>> hex(ELF(filename).symbols.read)
447
        '0xeef40'
448
    """
449
    return search_by_hash(hex_encoded_id, 'sha256', unstrip)
1✔
450

451

452

453

454
def get_build_id_offsets():
1✔
455
    """
456
    Returns a list of file offsets where the Build ID should reside within
457
    an ELF file of the currently selected architecture.
458
    """
459
    # Given the corpus of almost all libc to have been released with
460
    # RedHat, Fedora, Ubuntu, Debian, etc. over the past several years,
461
    # we can say with 99% certainty that the GNU Build ID section will
462
    # be at one of the specified addresses.
463
    #
464
    # The point here is to get an easy win by reading less DWORDs than would
465
    # have otherwise been required to walk the section table and the string
466
    # stable.
467
    #
468
    # function check_arch() {
469
    # readelf -n $(file -L * | grep -i "$1" | cut -d ':' -f 1) \
470
    #       | grep -B3 BUILD_ID \
471
    #       | grep offset \
472
    #       | sort \
473
    #       | uniq -c
474
    # }
475

476
    return {
×
477
    # $ check_arch 80386
478
    #     181 Displaying notes found at file offset 0x00000174 with length 0x00000024:
479
        'i386': [0x174, 0x1b4, 0x1d4],
480
    # $ check_arch "ARM, EABI5"
481
    #      69 Displaying notes found at file offset 0x00000174 with length 0x00000024:
482
        'arm':  [0x174],
483
        'thumb':  [0x174],
484
    # $ check_arch "ARM aarch64"
485
    #       1 Displaying notes found at file offset 0x00000238 with length 0x00000024:
486
        'aarch64': [0x238],
487
    # $ check_arch "x86-64"
488
    #       6 Displaying notes found at file offset 0x00000174 with length 0x00000024:
489
    #      82 Displaying notes found at file offset 0x00000270 with length 0x00000024:
490
        'amd64': [0x270, 0x174, 0x2e0, 0x370],
491
    # $ check_arch "PowerPC or cisco"
492
    #      88 Displaying notes found at file offset 0x00000174 with length 0x00000024:
493
        'powerpc': [0x174],
494
    # $ check_arch "64-bit PowerPC"
495
    #      30 Displaying notes found at file offset 0x00000238 with length 0x00000024:
496
        'powerpc64': [0x238],
497
    # $ check_arch "SPARC32"
498
    #      32 Displaying notes found at file offset 0x00000174 with length 0x00000024:
499
        'sparc': [0x174],
500
    # $ check_arch "SPARC V9"
501
    #      33 Displaying notes found at file offset 0x00000270 with length 0x00000024:
502
        'sparc64': [0x270]
503
    }.get(context.arch, [])
504

505

506
__all__ = ['get_build_id_offsets', 'search_by_build_id', 'search_by_sha1', 'search_by_sha256', 'search_by_md5', 'unstrip_libc', 'search_by_symbol_offsets']
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc