• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Gallopsled / pwntools / 5500924378

pending completion
5500924378

push

github-actions

peace-maker
Update CHANGELOG

3968 of 6659 branches covered (59.59%)

12136 of 16977 relevant lines covered (71.48%)

0.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

42.74
/pwnlib/dynelf.py
1
"""
2
Resolve symbols in loaded, dynamically-linked ELF binaries.
3
Given a function which can leak data at an arbitrary address,
4
any symbol in any loaded library can be resolved.
5

6
Example
7
^^^^^^^^
8

9
::
10

11
    # Assume a process or remote connection
12
    p = process('./pwnme')
13

14
    # Declare a function that takes a single address, and
15
    # leaks at least one byte at that address.
16
    def leak(address):
17
        data = p.read(address, 4)
18
        log.debug("%#x => %s", address, enhex(data or ''))
19
        return data
20

21
    # For the sake of this example, let's say that we
22
    # have any of these pointers.  One is a pointer into
23
    # the target binary, the other two are pointers into libc
24
    main   = 0xfeedf4ce
25
    libc   = 0xdeadb000
26
    system = 0xdeadbeef
27

28
    # With our leaker, and a pointer into our target binary,
29
    # we can resolve the address of anything.
30
    #
31
    # We do not actually need to have a copy of the target
32
    # binary for this to work.
33
    d = DynELF(leak, main)
34
    assert d.lookup(None,     'libc') == libc
35
    assert d.lookup('system', 'libc') == system
36

37
    # However, if we *do* have a copy of the target binary,
38
    # we can speed up some of the steps.
39
    d = DynELF(leak, main, elf=ELF('./pwnme'))
40
    assert d.lookup(None,     'libc') == libc
41
    assert d.lookup('system', 'libc') == system
42

43
    # Alternately, we can resolve symbols inside another library,
44
    # given a pointer into it.
45
    d = DynELF(leak, libc + 0x1234)
46
    assert d.lookup('system')      == system
47

48
DynELF
49
"""
50
from __future__ import absolute_import
1✔
51
from __future__ import division
1✔
52

53
import ctypes
1✔
54

55
from elftools.elf.enums import ENUM_D_TAG
1✔
56

57
from pwnlib import elf
1✔
58
from pwnlib import libcdb
1✔
59
from pwnlib.context import context
1✔
60
from pwnlib.elf import ELF
1✔
61
from pwnlib.elf import constants
1✔
62
from pwnlib.log import getLogger
1✔
63
from pwnlib.memleak import MemLeak
1✔
64
from pwnlib.util.fiddling import enhex
1✔
65
from pwnlib.util.packing import _need_bytes
1✔
66

67
log    = getLogger(__name__)
1✔
68
sizeof = ctypes.sizeof
1✔
69

70
def sysv_hash(symbol):
1✔
71
    """sysv_hash(str) -> int
72

73
    Function used to generate SYSV-style hashes for strings.
74
    """
75
    h = 0
×
76
    g = 0
×
77
    for c in bytearray(_need_bytes(symbol, 4, 0x80)):
×
78
        h = (h << 4) + c
×
79
        g = h & 0xf0000000
×
80
        h ^= (g >> 24)
×
81
        h &= ~g
×
82
    return h & 0xffffffff
×
83

84
def gnu_hash(s):
1✔
85
    """gnu_hash(str) -> int
86

87
    Function used to generated GNU-style hashes for strings.
88
    """
89
    s = bytearray(_need_bytes(s, 4, 0x80))
×
90
    h = 5381
×
91
    for c in s:
×
92
        h = h * 33 + c
×
93
    return h & 0xffffffff
×
94

95
class DynELF(object):
1✔
96
    '''
97
    DynELF knows how to resolve symbols in remote processes via an infoleak or
98
    memleak vulnerability encapsulated by :class:`pwnlib.memleak.MemLeak`.
99

100
    Implementation Details:
101

102
        Resolving Functions:
103

104
            In all ELFs which export symbols for importing by other libraries,
105
            (e.g. ``libc.so``) there are a series of tables which give exported
106
            symbol names, exported symbol addresses, and the ``hash`` of those
107
            exported symbols.  By applying a hash function to the name of the
108
            desired symbol (e.g., ``'printf'``), it can be located in the hash
109
            table.  Its location in the hash table provides an index into the
110
            string name table (strtab_), and the symbol address (symtab_).
111

112
            Assuming we have the base address of ``libc.so``, the way to resolve
113
            the address of ``printf`` is to locate the ``symtab``, ``strtab``,
114
            and hash table. The string ``"printf"`` is hashed according to the
115
            style of the hash table (SYSV_ or GNU_), and the hash table is
116
            walked until a matching entry is located. We can verify an exact
117
            match by checking the string table, and then get the offset into
118
            ``libc.so`` from the ``symtab``.
119

120
        Resolving Library Addresses:
121

122
            If we have a pointer into a dynamically-linked executable, we can
123
            leverage an internal linker structure called the `link map`_. This
124
            is a linked list structure which contains information about each
125
            loaded library, including its full path and base address.
126

127
            A pointer to the ``link map`` can be found in two ways.  Both are
128
            referenced from entries in the DYNAMIC_ array.
129

130
            - In non-RELRO binaries, a pointer is placed in the `.got.plt`_ area
131
              in the binary. This is marked by finding the DT_PLTGOT_ area in the
132
              binary.
133
            - In all binaries, a pointer can be found in the area described by
134
              the DT_DEBUG_ area.  This exists even in stripped binaries.
135

136
            For maximum flexibility, both mechanisms are used exhaustively.
137

138
    .. _symtab:    https://refspecs.linuxbase.org/elf/gabi4+/ch4.symtab.html
139
    .. _strtab:    https://refspecs.linuxbase.org/elf/gabi4+/ch4.strtab.html
140
    .. _.got.plt:  https://refspecs.linuxbase.org/LSB_3.1.1/LSB-Core-generic/LSB-Core-generic/specialsections.html
141
    .. _DYNAMIC:   http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#dynamic_section
142
    .. _SYSV:      https://refspecs.linuxbase.org/elf/gabi4+/ch5.dynamic.html#hash
143
    .. _GNU:       https://blogs.oracle.com/solaris/post/gnu-hash-elf-sections
144
    .. _DT_DEBUG:  https://reverseengineering.stackexchange.com/questions/6525/elf-link-map-when-linked-as-relro
145
    .. _link map:  https://sourceware.org/git/?p=glibc.git;a=blob;f=elf/link.h;h=eaca8028e45a859ac280301a6e955a14eed1b887;hb=HEAD#l84
146
    .. _DT_PLTGOT: https://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_zSeries/x2251.html
147
    '''
148

149
    def __init__(self, leak, pointer=None, elf=None, libcdb=True):
1✔
150
        '''
151
        Instantiates an object which can resolve symbols in a running binary
152
        given a :class:`pwnlib.memleak.MemLeak` leaker and a pointer inside
153
        the binary.
154

155
        Arguments:
156
            leak(MemLeak): Instance of pwnlib.memleak.MemLeak for leaking memory
157
            pointer(int):  A pointer into a loaded ELF file
158
            elf(str,ELF):  Path to the ELF file on disk, or a loaded :class:`pwnlib.elf.ELF`.
159
            libcdb(bool):  Attempt to use libcdb to speed up libc lookups
160
        '''
161
        self.libcdb    = libcdb
1✔
162
        self._elfclass = None
1✔
163
        self._elftype  = None
1✔
164
        self._link_map = None
1✔
165
        self._waitfor  = None
1✔
166
        self._bases    = {}
1✔
167
        self._dynamic  = None
1✔
168

169
        if not (pointer or (elf and elf.address)):
1!
170
            log.error("Must specify either a pointer into a module and/or an ELF file with a valid base address")
×
171

172
        pointer = pointer or elf.address
1✔
173

174
        if not isinstance(leak, MemLeak):
1!
175
            leak = MemLeak(leak)
×
176

177
        if not elf:
1!
178
            log.warn_once("No ELF provided.  Leaking is much faster if you have a copy of the ELF being leaked.")
×
179

180
        self.elf     = elf
1✔
181
        self.leak    = leak
1✔
182
        self.libbase = self._find_base(pointer or elf.address)
1✔
183

184
        if elf:
1!
185
            self._find_linkmap_assisted(elf)
1✔
186

187
    @classmethod
1✔
188
    def for_one_lib_only(cls, leak, ptr):
1✔
189
        return cls(leak, ptr)
×
190

191
    @classmethod
1✔
192
    def from_lib_ptr(cls, leak, ptr):
1✔
193
        return cls(leak, ptr)
×
194

195
    @staticmethod
1✔
196
    def find_base(leak, ptr):
1✔
197
        """Given a :class:`pwnlib.memleak.MemLeak` object and a pointer into a
198
        library, find its base address.
199
        """
200
        return DynELF(leak, ptr).libbase
×
201

202
    @property
1✔
203
    def elfclass(self):
1✔
204
        """32 or 64"""
205
        if not self._elfclass:
1✔
206
            elfclass = self.leak.field(self.libbase, elf.Elf_eident.EI_CLASS)
1✔
207
            self._elfclass =  {constants.ELFCLASS32: 32,
1✔
208
                              constants.ELFCLASS64: 64}[elfclass]
209
        return self._elfclass
1✔
210

211
    @property
1✔
212
    def elftype(self):
1✔
213
        """e_type from the elf header. In practice the value will almost always
214
        be 'EXEC' or 'DYN'. If the value is architecture-specific (between
215
        ET_LOPROC and ET_HIPROC) or invalid, KeyError is raised.
216
        """
217
        if not self._elftype:
1✔
218
            Ehdr  = {32: elf.Elf32_Ehdr, 64: elf.Elf64_Ehdr}[self.elfclass]
1✔
219
            elftype = self.leak.field(self.libbase, Ehdr.e_type)
1✔
220
            self._elftype = {constants.ET_NONE: 'NONE',
1✔
221
                             constants.ET_REL: 'REL',
222
                             constants.ET_EXEC: 'EXEC',
223
                             constants.ET_DYN: 'DYN',
224
                             constants.ET_CORE: 'CORE'}[elftype]
225
        return self._elftype
1✔
226

227
    @property
1✔
228
    def link_map(self):
1✔
229
        """Pointer to the runtime link_map object"""
230
        if not self._link_map:
1!
231
            self._link_map = self._find_linkmap()
1✔
232
        return self._link_map
1✔
233

234
    @property
1✔
235
    def dynamic(self):
1✔
236
        """
237
        Returns:
238
            Pointer to the ``.DYNAMIC`` area.
239
        """
240
        if not self._dynamic:
1✔
241
            self._dynamic = self._find_dynamic_phdr()
1✔
242
        return self._dynamic
1✔
243

244
    def _find_linkmap_assisted(self, path):
1✔
245
        """Uses an ELF file to assist in finding the link_map.
246
        """
247
        if isinstance(path, ELF):
1!
248
            path = path.path
1✔
249

250
        # Load a fresh copy of the ELF
251
        with context.local(log_level='error'):
1✔
252
            elf = ELF(path)
1✔
253
        elf.address = self.libbase
1✔
254

255
        w = self.waitfor("Loading from %r" % elf.path)
1✔
256

257
        # Save our real leaker
258
        real_leak = self.leak
1✔
259

260
        # Create a fake leaker which just leaks out of the 'loaded' ELF
261
        # However, we may load things which are outside of the ELF (e.g.
262
        # the linkmap or GOT) so we need to fall back on the real leak.
263
        @MemLeak
1✔
264
        def fake_leak(address):
1✔
265
            try:
1✔
266
                return elf.read(address, 4)
1✔
267
            except ValueError:
×
268
                return real_leak.b(address)
×
269

270
        # Save off our real leaker, use the fake leaker
271
        self.leak = fake_leak
1✔
272

273
        # Get useful pointers for resolving the linkmap faster
274
        w.status("Searching for DT_PLTGOT")
1✔
275
        pltgot = self._find_dt(constants.DT_PLTGOT)
1✔
276

277
        w.status("Searching for DT_DEBUG")
1✔
278
        debug  = self._find_dt(constants.DT_DEBUG)
1✔
279

280
        # Restore the real leaker
281
        self.leak = real_leak
1✔
282

283
        # Find the linkmap using the helper pointers
284
        self._find_linkmap(pltgot, debug)
1✔
285
        self.success('Done')
1✔
286

287
    def _find_base(self, ptr):
1✔
288
        page_size = 0x1000
1✔
289
        page_mask = ~(page_size - 1)
1✔
290

291
        ptr &= page_mask
1✔
292
        w = None
1✔
293

294
        while True:
295
            if self.leak.compare(ptr, b'\x7fELF'):
1✔
296
                break
1✔
297

298
            # See if we can short circuit the search
299
            fast = self._find_base_optimized(ptr)
1✔
300
            if fast:
1!
301
                ptr = fast
1✔
302
                continue
1✔
303

304
            ptr -= page_size
×
305

306
            if ptr < 0:
×
307
                raise ValueError("Address is negative, something is wrong!")
×
308

309
            # Defer creating the spinner in the event that 'ptr'
310
            # is already the base address
311
            w = w or self.waitfor("Finding base address")
×
312
            self.status('%#x' % ptr)
×
313

314
        # If we created a spinner, print the success message
315
        if w:
1!
316
            self.success('%#x' % ptr)
×
317

318
        return ptr
1✔
319

320
    def _find_base_optimized(self, ptr):
1✔
321
        if not self.elf:
1!
322
            return None
×
323

324
        # If we have an ELF< we can probably speed this up a little bit?
325
        # Note that we add +0x20 onto the offset in order to avoid needing
326
        # to leak any bytes which contain '\r\n\t\b '
327
        ptr += 0x20
1✔
328
        data = self.leak.n(ptr, 32)
1✔
329
        if not data:
1!
330
            return None
×
331

332
        # Do not permit multiple matches
333
        matches = list(self.elf.search(data))
1✔
334
        if len(matches) != 1:
1!
335
            return None
×
336

337
        candidate = matches[0]
1✔
338
        candidate -= self.elf.address
1✔
339

340
        # The match should have the same page-alignment as our leaked data.
341
        if candidate & 0xfff != 0x20:
1!
342
            return None
×
343

344
        # Adjust based on the original pointer we got, and the ELF's address.
345
        ptr -= candidate
1✔
346
        return ptr
1✔
347

348
    def _find_dynamic_phdr(self):
1✔
349
        """
350
        Returns the address of the first Program Header with the type
351
        PT_DYNAMIC.
352
        """
353
        leak  = self.leak
1✔
354
        base  = self.libbase
1✔
355

356
        #First find PT_DYNAMIC
357
        Ehdr  = {32: elf.Elf32_Ehdr, 64: elf.Elf64_Ehdr}[self.elfclass]
1✔
358
        Phdr  = {32: elf.Elf32_Phdr, 64: elf.Elf64_Phdr}[self.elfclass]
1✔
359

360
        self.status("PT_DYNAMIC")
1✔
361

362
        phead = base + leak.field(base, Ehdr.e_phoff)
1✔
363
        self.status("PT_DYNAMIC header = %#x" % phead)
1✔
364

365
        phnum = leak.field(base, Ehdr.e_phnum)
1✔
366
        self.status("PT_DYNAMIC count = %#x" % phnum)
1✔
367

368
        for i in range(phnum):
1!
369
            if leak.field_compare(phead, Phdr.p_type, constants.PT_DYNAMIC):
1✔
370
                break
1✔
371
            phead += sizeof(Phdr)
1✔
372
        else:
373
            self.failure("Could not find Program Header of type PT_DYNAMIC")
×
374
            return None
×
375

376
        dynamic = leak.field(phead, Phdr.p_vaddr)
1✔
377
        self.status("PT_DYNAMIC @ %#x" % dynamic)
1✔
378

379
        dynamic = self._make_absolute_ptr(dynamic)
1✔
380

381
        return dynamic
1✔
382

383
    def _find_dt(self, tag):
1✔
384
        """
385
        Find an entry in the DYNAMIC array.
386

387
        Arguments:
388
            tag(int): Single tag to find
389

390
        Returns:
391
            Pointer to the data described by the specified entry.
392
        """
393
        leak    = self.leak
1✔
394
        base    = self.libbase
1✔
395
        dynamic = self.dynamic
1✔
396
        name    = next(k for k,v in ENUM_D_TAG.items() if v == tag)
1!
397

398
        Dyn = {32: elf.Elf32_Dyn,    64: elf.Elf64_Dyn}     [self.elfclass]
1✔
399

400
        # Found the _DYNAMIC program header, now find PLTGOT entry in it
401
        # An entry with a DT_NULL tag marks the end of the DYNAMIC array.
402
        while not leak.field_compare(dynamic, Dyn.d_tag, constants.DT_NULL):
1!
403
            if leak.field_compare(dynamic, Dyn.d_tag, tag):
1✔
404
                break
1✔
405
            dynamic += sizeof(Dyn)
1✔
406
        else:
407
            self.failure("Could not find tag %s" % name)
×
408
            return None
×
409

410
        self.status("Found %s at %#x" % (name, dynamic))
1✔
411
        ptr = leak.field(dynamic, Dyn.d_ptr)
1✔
412

413
        ptr = self._make_absolute_ptr(ptr)
1✔
414

415
        return ptr
1✔
416

417

418
    def _find_linkmap(self, pltgot=None, debug=None):
1✔
419
        """
420
        The linkmap is a chained structure created by the loader at runtime
421
        which contains information on the names and load addresses of all
422
        libraries.
423

424
        For non-RELRO binaries, a pointer to this is stored in the .got.plt
425
        area.
426

427
        For RELRO binaries, a pointer is additionally stored in the DT_DEBUG
428
        area.
429
        """
430
        w = self.waitfor("Finding linkmap")
1✔
431

432
        Got     = {32: elf.Elf_i386_GOT, 64: elf.Elf_x86_64_GOT}[self.elfclass]
1✔
433
        r_debug = {32: elf.Elf32_r_debug, 64: elf.Elf64_r_debug}[self.elfclass]
1✔
434

435
        linkmap = None
1✔
436

437
        if not pltgot:
1✔
438
            w.status("Finding linkmap: DT_PLTGOT")
1✔
439
            pltgot = self._find_dt(constants.DT_PLTGOT)
1✔
440

441
        if pltgot:
1!
442
            w.status("GOT.linkmap")
1✔
443
            linkmap = self.leak.field(pltgot, Got.linkmap)
1✔
444
            w.status("GOT.linkmap %#x" % linkmap)
1✔
445

446
        if not linkmap:
1!
447
            debug = debug or self._find_dt(constants.DT_DEBUG)
×
448
            if debug:
×
449
                w.status("r_debug.linkmap")
×
450
                linkmap = self.leak.field(debug, r_debug.r_map)
×
451
                w.status("r_debug.linkmap %#x" % linkmap)
×
452

453
        if not linkmap:
1!
454
            w.failure("Could not find DT_PLTGOT or DT_DEBUG")
×
455
            return None
×
456

457
        linkmap = self._make_absolute_ptr(linkmap)
1✔
458

459
        w.success('%#x' % linkmap)
1✔
460
        return linkmap
1✔
461

462
    def waitfor(self, msg):
1✔
463
        if not self._waitfor:
1✔
464
            self._waitfor = log.waitfor(msg)
1✔
465
        else:
466
            self.status(msg)
1✔
467
        return self._waitfor
1✔
468

469
    def failure(self, msg):
1✔
470
        if not self._waitfor:
×
471
            log.failure(msg)
×
472
        else:
473
            self._waitfor.failure(msg)
×
474
            self._waitfor = None
×
475

476
    def success(self, msg):
1✔
477
        if not self._waitfor:
1!
478
            log.success(msg)
×
479
        else:
480
            self._waitfor.success(msg)
1✔
481
            self._waitfor = None
1✔
482

483
    def status(self, msg):
1✔
484
        if not self._waitfor:
1!
485
            log.info(msg)
×
486
        else:
487
            self._waitfor.status(msg)
1✔
488

489
    @property
1✔
490
    def libc(self):
1✔
491
        """libc(self) -> ELF
492

493
        Leak the Build ID of the remote libc.so, download the file,
494
        and load an ``ELF`` object with the correct base address.
495

496
        Returns:
497
            An ELF object, or None.
498
        """
499
        libc = b'libc.so'
×
500

501
        with self.waitfor('Downloading libc'):
×
502
            dynlib = self._dynamic_load_dynelf(libc)
×
503

504
            self.status("Trying lookup based on Build ID")
×
505
            build_id = dynlib._lookup_build_id(libc)
×
506

507
            if not build_id:
×
508
                return None
×
509

510
            self.status("Trying lookup based on Build ID: %s" % build_id)
×
511
            path = libcdb.search_by_build_id(build_id)
×
512

513
            if not path:
×
514
                return None
×
515

516
            libc = ELF(path)
×
517
            libc.address = dynlib.libbase
×
518
            return libc
×
519

520
    def lookup (self, symb = None, lib = None):
1✔
521
        """lookup(symb = None, lib = None) -> int
522

523
        Find the address of ``symbol``, which is found in ``lib``.
524

525
        Arguments:
526
            symb(str): Named routine to look up
527
              If omitted, the base address of the library will be returned.
528
            lib(str): Substring to match for the library name.
529
              If omitted, the current library is searched.
530
              If set to ``'libc'``, ``'libc.so'`` is assumed.
531

532
        Returns:
533
            Address of the named symbol, or :const:`None`.
534
        """
535
        result = None
1✔
536

537
        if lib == 'libc':
1!
538
            lib = 'libc.so'
1✔
539

540
        if symb:
1!
541
            symb = _need_bytes(symb, min_wrong=0x80)
1✔
542

543
        #
544
        # Get a pretty name for the symbol to show the user
545
        #
546
        if symb and lib:
1!
547
            pretty = '%r in %r' % (symb, lib)
1✔
548
        else:
549
            pretty = repr(symb or lib)
×
550

551
        if not pretty:
1!
552
            self.failure("Must specify a library or symbol")
×
553

554
        self.waitfor('Resolving %s' % pretty)
1✔
555

556
        #
557
        # If we are loading from a different library, create
558
        # a DynELF instance for it.
559
        #
560
        if lib is not None: dynlib = self._dynamic_load_dynelf(lib)
1!
561
        else:   dynlib = self
×
562

563
        if dynlib is None:
×
564
            log.failure("Could not find %r", lib)
×
565
            return None
×
566

567
        #
568
        # If we are resolving a symbol in the library, find it.
569
        #
570
        if symb and self.libcdb:
×
571
            # Try a quick lookup by build ID
572
            self.status("Trying lookup based on Build ID")
×
573
            build_id = dynlib._lookup_build_id(lib=lib)
×
574
            if build_id:
×
575
                log.info("Trying lookup based on Build ID: %s", build_id)
×
576
                path = libcdb.search_by_build_id(build_id)
×
577
                if path:
×
578
                    with context.local(log_level='error'):
×
579
                        e = ELF(path)
×
580
                        e.address = dynlib.libbase
×
581
                        result = e.symbols[symb]
×
582
        if symb and not result:
×
583
            self.status("Trying remote lookup")
×
584
            result = dynlib._lookup(symb)
×
585
        if not symb:
×
586
            result = dynlib.libbase
×
587

588
        #
589
        # Did we win?
590
        #
591
        if result: self.success("%#x" % result)
×
592
        else:      self.failure("Could not find %s" % pretty)
×
593

594
        return result
×
595

596
    def bases(self):
1✔
597
        '''Resolve base addresses of all loaded libraries.
598

599
        Return a dictionary mapping library path to its base address.
600
        '''
601
        if not self._bases:
×
602
            leak    = self.leak
×
603
            LinkMap = {32: elf.Elf32_Link_Map, 64: elf.Elf64_Link_Map}[self.elfclass]
×
604

605
            cur = self.link_map
×
606

607
            # make sure we rewind to the beginning!
608
            while leak.field(cur, LinkMap.l_prev):
×
609
                cur = leak.field(cur, LinkMap.l_prev)
×
610

611
            while cur:
×
612
                p_name = leak.field(cur, LinkMap.l_name)
×
613
                name   = leak.s(p_name)
×
614
                addr   = leak.field(cur, LinkMap.l_addr)
×
615
                cur    = leak.field(cur, LinkMap.l_next)
×
616

617
                log.debug('Found %r @ %#x', name, addr)
×
618

619
                self._bases[name] = addr
×
620

621
        return self._bases
×
622

623
    def _dynamic_load_dynelf(self, libname):
1✔
624
        """_dynamic_load_dynelf(libname) -> DynELF
625

626
        Looks up information about a loaded library via the link map.
627

628
        Arguments:
629
            libname(str):  Name of the library to resolve, or a substring (e.g. 'libc.so')
630

631
        Returns:
632
            A DynELF instance for the loaded library, or None.
633
        """
634
        cur     = self.link_map
1✔
635
        leak    = self.leak
1✔
636
        LinkMap = {32: elf.Elf32_Link_Map, 64: elf.Elf64_Link_Map}[self.elfclass]
1✔
637

638
        # make sure we rewind to the beginning!
639
        while leak.field(cur, LinkMap.l_prev):
1!
640
            cur = leak.field(cur, LinkMap.l_prev)
×
641

642
        libname = _need_bytes(libname, 2, 0x80)
1✔
643

644
        while cur:
1!
645
            self.status("link_map entry %#x" % cur)
1✔
646
            p_name = leak.field(cur, LinkMap.l_name)
1✔
647
            name   = leak.s(p_name)
1✔
648

649
            if libname in name:
1!
650
                break
×
651

652
            if name:
1✔
653
                self.status('Skipping %s' % name)
1✔
654

655
            cur = leak.field(cur, LinkMap.l_next)
1✔
656
        else:
657
            self.failure("Could not find library with name containing %r" % libname)
×
658
            return None
×
659

660
        libbase = leak.field(cur, LinkMap.l_addr)
×
661

662
        self.status("Resolved library %r at %#x" % (libname, libbase))
×
663

664
        lib = DynELF(leak, libbase)
×
665
        lib._dynamic = leak.field(cur, LinkMap.l_ld)
×
666
        lib._waitfor = self._waitfor
×
667
        return lib
×
668

669
    def _lookup(self, symb):
1✔
670
        """Performs the actual symbol lookup within one ELF file."""
671
        leak = self.leak
×
672
        Dyn  = {32: elf.Elf32_Dyn, 64: elf.Elf64_Dyn}[self.elfclass]
×
673
        name = lambda tag: next(k for k,v in ENUM_D_TAG.items() if v == tag)
×
674

675
        self.status('.gnu.hash/.hash, .strtab and .symtab offsets')
×
676

677
        #
678
        # We need all three of the hash, string table, and symbol table.
679
        #
680
        hshtab  = self._find_dt(constants.DT_GNU_HASH)
×
681
        strtab  = self._find_dt(constants.DT_STRTAB)
×
682
        symtab  = self._find_dt(constants.DT_SYMTAB)
×
683

684
        # Assume GNU hash will hit, since it is the default for GCC.
685
        if hshtab:
×
686
            hshtype = 'gnu'
×
687
        else:
688
            hshtab  = self._find_dt(constants.DT_HASH)
×
689
            hshtype = 'sysv'
×
690

691
        if not all([strtab, symtab, hshtab]):
×
692
            self.failure("Could not find all tables")
×
693

694
        strtab = self._make_absolute_ptr(strtab)
×
695
        symtab = self._make_absolute_ptr(symtab)
×
696
        hshtab = self._make_absolute_ptr(hshtab)
×
697

698
        #
699
        # Perform the hash lookup
700
        #
701
        routine = {'sysv': self._resolve_symbol_sysv,
×
702
                   'gnu':  self._resolve_symbol_gnu}[hshtype]
703
        return routine(self.libbase, symb, hshtab, strtab, symtab)
×
704

705
    def _resolve_symbol_sysv(self, libbase, symb, hshtab, strtab, symtab):
1✔
706
        """
707
        Internal Documentation:
708
            See the ELF manual for more information.  Search for the phrase
709
            "A hash table of Elf32_Word objects supports symbol table access", or see:
710
            https://docs.oracle.com/cd/E19504-01/802-6319/6ia12qkfo/index.html#chapter6-48031
711

712
            .. code-block:: c
713

714
                struct Elf_Hash {
715
                    uint32_t nbucket;
716
                    uint32_t nchain;
717
                    uint32_t bucket[nbucket];
718
                    uint32_t chain[nchain];
719
                }
720

721
            You can force an ELF to use this type of symbol table by compiling
722
            with 'gcc -Wl,--hash-style=sysv'
723
        """
724
        self.status('.hash parms')
×
725
        leak       = self.leak
×
726
        Sym        = {32: elf.Elf32_Sym, 64: elf.Elf64_Sym}[self.elfclass]
×
727

728
        nbucket   = leak.field(hshtab, elf.Elf_HashTable.nbucket)
×
729
        bucketaddr = hshtab + sizeof(elf.Elf_HashTable)
×
730
        chain      = bucketaddr + (nbucket * 4)
×
731

732
        self.status('hashmap')
×
733
        hsh = sysv_hash(symb) % nbucket
×
734

735
        # Get the index out of the bucket for the hash we computed
736
        idx = leak.d(bucketaddr, hsh)
×
737

738
        while idx != constants.STN_UNDEF:
×
739
            # Look up the symbol corresponding to the specified index
740
            sym     = symtab + (idx * sizeof(Sym))
×
741
            symtype = leak.field(sym, Sym.st_info) & 0xf
×
742

743
            # We only care about functions
744
            if symtype == constants.STT_FUNC:
×
745

746
                # Leak the name of the function from the symbol table
747
                name = leak.s(strtab + leak.field(sym, Sym.st_name))
×
748

749
                # Make sure it matches the name of the symbol we were looking for.
750
                if name == symb:
×
751
                    #Bingo
752
                    addr = libbase + leak.field(sym, Sym.st_value)
×
753
                    return addr
×
754

755
                self.status("%r (hash collision)" % name)
×
756

757
            # The name did not match what we were looking for, or we assume
758
            # it did not since it was not a function.
759
            # Follow the chain for this particular hash.
760
            idx = leak.d(chain, idx)
×
761
        else:
762
            self.failure('Could not find a SYSV hash that matched %#x' % hsh)
×
763
            return None
×
764

765
    def _resolve_symbol_gnu(self, libbase, symb, hshtab, strtab, symtab):
1✔
766
        """
767
        Internal Documentation:
768
            The GNU hash structure is a bit more complex than the normal hash
769
            structure.
770

771
            Again, Oracle has good documentation.
772
            https://blogs.oracle.com/solaris/post/gnu-hash-elf-sections
773

774
            You can force an ELF to use this type of symbol table by compiling
775
            with 'gcc -Wl,--hash-style=gnu'
776
        """
777
        self.status('.gnu.hash parms')
×
778
        leak = self.leak
×
779
        Sym  = {32: elf.Elf32_Sym, 64: elf.Elf64_Sym}[self.elfclass]
×
780

781
        # The number of hash buckets (hash % nbuckets)
782
        nbuckets  = leak.field(hshtab, elf.GNU_HASH.nbuckets)
×
783

784
        # Index of the first accessible symbol in the hash table
785
        # Numbering doesn't start at zero, it starts at symndx
786
        symndx    = leak.field(hshtab, elf.GNU_HASH.symndx)
×
787

788
        # Number of things in the bloom filter.
789
        # We don't care about the contents, but we have to skip over it.
790
        maskwords = leak.field(hshtab, elf.GNU_HASH.maskwords)
×
791

792
        # Skip over the bloom filter to get to the buckets
793
        elfword = self.elfclass // 8
×
794
        buckets = hshtab + sizeof(elf.GNU_HASH) + (elfword * maskwords)
×
795

796
        # The chains come after the buckets
797
        chains  = buckets + (4 * nbuckets)
×
798

799
        self.status('hash chain index')
×
800

801
        # Hash the symbol, find its bucket
802
        hsh    = gnu_hash(symb)
×
803
        bucket = hsh % nbuckets
×
804

805
        # Get the first index in the chain for that bucket
806
        ndx    = leak.d(buckets, bucket)
×
807
        if ndx == 0:
×
808
            self.failure('Empty chain')
×
809
            return None
×
810

811
        # Find the start of the chain, taking into account that numbering
812
        # effectively starts at 'symndx' within the chains.
813
        chain  = chains + 4 * (ndx - symndx)
×
814

815
        self.status('hash chain')
×
816

817
        # Iteratively get the I'th entry from the hash chain, until we find
818
        # one that matches.
819
        i    = 0
×
820
        hsh &= ~1
×
821

822
        # The least significant bit is used as a stopper bit.
823
        # It is set to 1 when a symbol is the last symbol in a given hash chain.
824
        hsh2 = 0
×
825
        while not hsh2 & 1:
×
826
            hsh2 = leak.d(chain, i)
×
827
            if hsh == (hsh2 & ~1):
×
828
                # Check for collision on hash values
829
                sym  = symtab + sizeof(Sym) * (ndx + i)
×
830
                name = leak.s(strtab + leak.field(sym, Sym.st_name))
×
831

832
                if name == symb:
×
833
                    # No collision, get offset and calculate address
834
                    offset = leak.field(sym, Sym.st_value)
×
835
                    addr   = offset + libbase
×
836
                    return addr
×
837

838
                self.status("%r (hash collision)" % name)
×
839

840
            # Collision or no match, continue to the next item
841
            i += 1
×
842
        else:
843
            self.failure('Could not find a GNU hash that matched %#x' % hsh)
×
844
            return None
×
845

846
    def _lookup_build_id(self, lib = None):
1✔
847

848
        libbase = self.libbase
×
849
        if not self.link_map:
×
850
            self.status("No linkmap found")
×
851
            return None
×
852

853
        if lib is not None:
×
854
            libbase = self.lookup(symb = None, lib = lib)
×
855

856
        if not libbase:
×
857
            self.status("Couldn't find libc base")
×
858
            return None
×
859

860
        for offset in libcdb.get_build_id_offsets():
×
861
            address = libbase + offset
×
862
            if self.leak.compare(address + 0xC, b"GNU\x00"):
×
863
                return enhex(b''.join(self.leak.raw(address + 0x10, 20)))
×
864
            else:
865
                self.status("Build ID not found at offset %#x" % offset)
×
866
                pass
×
867

868
    def _make_absolute_ptr(self, ptr_or_offset):
1✔
869
        """For shared libraries (or PIE executables), many ELF fields may
870
        contain offsets rather than actual pointers. If the ELF type is 'DYN',
871
        the argument may be an offset. It will not necessarily be an offset,
872
        because the run-time linker may have fixed it up to be a real pointer
873
        already. In this case an educated guess is made, and the ELF base
874
        address is added to the value if it is determined to be an offset.
875
        """
876
        if_ptr = ptr_or_offset
1✔
877
        if_offset = ptr_or_offset + self.libbase
1✔
878

879
        # if the ELF type is not DYN, the value is a pointer
880

881
        if self.elftype != 'DYN':
1!
882
            return if_ptr
×
883

884
        # if the ELF type may be DYN, guess
885

886
        if 0 < ptr_or_offset < self.libbase:
1✔
887
            return if_offset
1✔
888
        else:
889
            return if_ptr
1✔
890

891
    def stack(self):
1✔
892
        """Finds a pointer to the stack via __environ, which is an exported
893
        symbol in libc, which points to the environment block.
894
        """
895
        symbols = ['environ', '_environ', '__environ']
1✔
896

897
        for symbol in symbols:
1!
898
            environ = self.lookup(symbol, 'libc')
1✔
899

900
            if environ:
×
901
                break
×
902
        else:
903
            log.error("Could not find the stack")
×
904

905
        stack = self.leak.p(environ)
×
906

907
        self.success('*environ: %#x' % stack)
×
908

909
        return stack
×
910

911
    def heap(self):
1✔
912
        """Finds the beginning of the heap via __curbrk, which is an exported
913
        symbol in the linker, which points to the current brk.
914
        """
915
        curbrk = self.lookup('__curbrk', 'libc')
×
916
        brk    = self.leak.p(curbrk)
×
917

918
        self.success('*curbrk: %#x' % brk)
×
919

920
        return brk
×
921

922
    def _find_mapped_pages(self, readonly = False, page_size = 0x1000):
1✔
923
        """
924
        A generator of all mapped pages, as found using the Program Headers.
925

926
        Yields tuples of the form: (virtual address, memory size)
927
        """
928
        leak  = self.leak
×
929
        base  = self.libbase
×
930

931
        Ehdr  = {32: elf.Elf32_Ehdr, 64: elf.Elf64_Ehdr}[self.elfclass]
×
932
        Phdr  = {32: elf.Elf32_Phdr, 64: elf.Elf64_Phdr}[self.elfclass]
×
933

934
        phead = base + leak.field(base, Ehdr.e_phoff)
×
935
        phnum = leak.field(base, Ehdr.e_phnum)
×
936

937
        for i in range(phnum):
×
938
            if leak.field_compare(phead, Phdr.p_type, constants.PT_LOAD) :
×
939
                # the interesting pages are those that are aligned to PAGE_SIZE
940
                if leak.field_compare(phead, Phdr.p_align, page_size) and \
×
941
                    (readonly or leak.field(phead, Phdr.p_flags) & 0x02 != 0):
942
                    vaddr = leak.field(phead, Phdr.p_vaddr)
×
943
                    memsz = leak.field(phead, Phdr.p_memsz)
×
944
                    # fix relative offsets
945
                    if vaddr < base :
×
946
                        vaddr += base
×
947
                    yield vaddr, memsz
×
948
            phead += sizeof(Phdr)
×
949

950
    def dump(self, libs = False, readonly = False):
1✔
951
        """dump(libs = False, readonly = False)
952

953
        Dumps the ELF's memory pages to allow further analysis.
954

955
        Arguments:
956
            libs(bool, optional): True if should dump the libraries too (False by default)
957
            readonly(bool, optional): True if should dump read-only pages (False by default)
958

959
        Returns:
960
            a dictionary of the form: { address : bytes }
961
        """
962
        leak      = self.leak
×
963
        page_size = 0x1000
×
964
        pages     = {}
×
965

966
        for vaddr, memsz in self._find_mapped_pages(readonly, page_size) :
×
967
            offset    = vaddr % page_size
×
968
            if offset != 0 :
×
969
                memsz += offset
×
970
                vaddr -= offset
×
971
            memsz += (page_size - (memsz % page_size)) % page_size
×
972
            pages[vaddr] = leak.n(vaddr, memsz)
×
973

974
        if libs:
×
975
            for lib_name in self.bases():
×
976
                if len(lib_name) == 0:
×
977
                    continue
×
978
                dyn_lib = self._dynamic_load_dynelf(lib_name)
×
979
                if dyn_lib is not None:
×
980
                    pages.update(dyn_lib.dump(readonly = readonly))
×
981

982
        return pages
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc