• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Gallopsled / pwntools / 10356727828

12 Aug 2024 05:27PM UTC coverage: 73.972% (-0.05%) from 74.017%
10356727828

push

github

peace-maker
Release 4.14.0beta0

4533 of 7366 branches covered (61.54%)

1 of 1 new or added line in 1 file covered. (100.0%)

10 existing lines in 3 files now uncovered.

13187 of 17827 relevant lines covered (73.97%)

0.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

59.53
/pwnlib/dynelf.py
1
"""
2
Resolve symbols in loaded, dynamically-linked ELF binaries.
3
Given a function which can leak data at an arbitrary address,
4
any symbol in any loaded library can be resolved.
5

6
Example
7
^^^^^^^^
8

9
::
10

11
    # Assume a process or remote connection
12
    p = process('./pwnme')
13

14
    # Declare a function that takes a single address, and
15
    # leaks at least one byte at that address.
16
    def leak(address):
17
        data = p.read(address, 4)
18
        log.debug("%#x => %s", address, enhex(data or ''))
19
        return data
20

21
    # For the sake of this example, let's say that we
22
    # have any of these pointers.  One is a pointer into
23
    # the target binary, the other two are pointers into libc
24
    main   = 0xfeedf4ce
25
    libc   = 0xdeadb000
26
    system = 0xdeadbeef
27

28
    # With our leaker, and a pointer into our target binary,
29
    # we can resolve the address of anything.
30
    #
31
    # We do not actually need to have a copy of the target
32
    # binary for this to work.
33
    d = DynELF(leak, main)
34
    assert d.lookup(None,     'libc') == libc
35
    assert d.lookup('system', 'libc') == system
36

37
    # However, if we *do* have a copy of the target binary,
38
    # we can speed up some of the steps.
39
    d = DynELF(leak, main, elf=ELF('./pwnme'))
40
    assert d.lookup(None,     'libc') == libc
41
    assert d.lookup('system', 'libc') == system
42

43
    # Alternately, we can resolve symbols inside another library,
44
    # given a pointer into it.
45
    d = DynELF(leak, libc + 0x1234)
46
    assert d.lookup('system')      == system
47

48
DynELF
49
"""
50
from __future__ import absolute_import
1✔
51
from __future__ import division
1✔
52

53
import ctypes
1✔
54

55
from elftools.elf.enums import ENUM_D_TAG
1✔
56

57
from pwnlib import elf
1✔
58
from pwnlib import libcdb
1✔
59
from pwnlib.context import context
1✔
60
from pwnlib.elf import ELF
1✔
61
from pwnlib.elf import constants
1✔
62
from pwnlib.log import getLogger
1✔
63
from pwnlib.memleak import MemLeak
1✔
64
from pwnlib.util.fiddling import enhex
1✔
65
from pwnlib.util.packing import _need_bytes
1✔
66

67
log    = getLogger(__name__)
1✔
68
sizeof = ctypes.sizeof
1✔
69

70
def sysv_hash(symbol):
1✔
71
    """sysv_hash(str) -> int
72

73
    Function used to generate SYSV-style hashes for strings.
74
    """
75
    h = 0
×
76
    g = 0
×
77
    for c in bytearray(_need_bytes(symbol, 4, 0x80)):
×
78
        h = (h << 4) + c
×
79
        g = h & 0xf0000000
×
80
        h ^= (g >> 24)
×
81
        h &= ~g
×
82
    return h & 0xffffffff
×
83

84
def gnu_hash(s):
1✔
85
    """gnu_hash(str) -> int
86

87
    Function used to generated GNU-style hashes for strings.
88
    """
89
    s = bytearray(_need_bytes(s, 4, 0x80))
1✔
90
    h = 5381
1✔
91
    for c in s:
1✔
92
        h = h * 33 + c
1✔
93
    return h & 0xffffffff
1✔
94

95
class DynELF(object):
1✔
96
    '''
97
    DynELF knows how to resolve symbols in remote processes via an infoleak or
98
    memleak vulnerability encapsulated by :class:`pwnlib.memleak.MemLeak`.
99

100
    Implementation Details:
101

102
        Resolving Functions:
103

104
            In all ELFs which export symbols for importing by other libraries,
105
            (e.g. ``libc.so``) there are a series of tables which give exported
106
            symbol names, exported symbol addresses, and the ``hash`` of those
107
            exported symbols.  By applying a hash function to the name of the
108
            desired symbol (e.g., ``'printf'``), it can be located in the hash
109
            table.  Its location in the hash table provides an index into the
110
            string name table (strtab_), and the symbol address (symtab_).
111

112
            Assuming we have the base address of ``libc.so``, the way to resolve
113
            the address of ``printf`` is to locate the ``symtab``, ``strtab``,
114
            and hash table. The string ``"printf"`` is hashed according to the
115
            style of the hash table (SYSV_ or GNU_), and the hash table is
116
            walked until a matching entry is located. We can verify an exact
117
            match by checking the string table, and then get the offset into
118
            ``libc.so`` from the ``symtab``.
119

120
        Resolving Library Addresses:
121

122
            If we have a pointer into a dynamically-linked executable, we can
123
            leverage an internal linker structure called the `link map`_. This
124
            is a linked list structure which contains information about each
125
            loaded library, including its full path and base address.
126

127
            A pointer to the ``link map`` can be found in two ways.  Both are
128
            referenced from entries in the DYNAMIC_ array.
129

130
            - In non-RELRO binaries, a pointer is placed in the `.got.plt`_ area
131
              in the binary. This is marked by finding the DT_PLTGOT_ area in the
132
              binary.
133
            - In all binaries, a pointer can be found in the area described by
134
              the DT_DEBUG_ area.  This exists even in stripped binaries.
135

136
            For maximum flexibility, both mechanisms are used exhaustively.
137

138
    .. _symtab:    https://refspecs.linuxbase.org/elf/gabi4+/ch4.symtab.html
139
    .. _strtab:    https://refspecs.linuxbase.org/elf/gabi4+/ch4.strtab.html
140
    .. _.got.plt:  https://refspecs.linuxbase.org/LSB_3.1.1/LSB-Core-generic/LSB-Core-generic/specialsections.html
141
    .. _DYNAMIC:   http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#dynamic_section
142
    .. _SYSV:      https://refspecs.linuxbase.org/elf/gabi4+/ch5.dynamic.html#hash
143
    .. _GNU:       https://blogs.oracle.com/solaris/post/gnu-hash-elf-sections
144
    .. _DT_DEBUG:  https://reverseengineering.stackexchange.com/questions/6525/elf-link-map-when-linked-as-relro
145
    .. _link map:  https://sourceware.org/git/?p=glibc.git;a=blob;f=elf/link.h;h=eaca8028e45a859ac280301a6e955a14eed1b887;hb=HEAD#l84
146
    .. _DT_PLTGOT: https://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_zSeries/x2251.html
147
    '''
148

149
    def __init__(self, leak, pointer=None, elf=None, libcdb=True):
1✔
150
        '''
151
        Instantiates an object which can resolve symbols in a running binary
152
        given a :class:`pwnlib.memleak.MemLeak` leaker and a pointer inside
153
        the binary.
154

155
        Arguments:
156
            leak(MemLeak): Instance of pwnlib.memleak.MemLeak for leaking memory
157
            pointer(int):  A pointer into a loaded ELF file
158
            elf(str,ELF):  Path to the ELF file on disk, or a loaded :class:`pwnlib.elf.ELF`.
159
            libcdb(bool):  Attempt to use libcdb to speed up libc lookups
160
        '''
161
        self.libcdb    = libcdb
1✔
162
        self._elfclass = None
1✔
163
        self._elftype  = None
1✔
164
        self._link_map = None
1✔
165
        self._waitfor  = None
1✔
166
        self._bases    = {}
1✔
167
        self._dynamic  = None
1✔
168
        self.elf = None
1✔
169

170
        if elf:
1✔
171
            path = elf
1✔
172
            if isinstance(elf, ELF):
1!
173
                path = elf.path
1✔
174

175
            # Load a fresh copy of the ELF
176
            with context.local(log_level='error'):
1✔
177
                w = self.waitfor("Loading from %r" % path)
1✔
178
                self.elf = ELF(path)
1✔
179
                w.success("[LOADED]")
1✔
180

181
        if not (pointer or (elf and elf.address)):
1!
182
            log.error("Must specify either a pointer into a module and/or an ELF file with a valid base address")
×
183

184
        pointer = pointer or elf.address
1✔
185

186
        if not isinstance(leak, MemLeak):
1!
187
            leak = MemLeak(leak)
×
188

189
        if not elf:
1✔
190
            log.warn_once("No ELF provided.  Leaking is much faster if you have a copy of the ELF being leaked.")
1✔
191

192
        self.leak    = leak
1✔
193
        self.libbase = self._find_base(pointer or elf.address)
1✔
194

195
        if elf:
1✔
196
            self._elftype = self.elf.elftype
1✔
197
            self._elfclass = self.elf.elfclass
1✔
198
            self.elf.address = self.libbase
1✔
199
            self._dynamic = self.elf.get_section_by_name('.dynamic').header.sh_addr
1✔
200
            self._dynamic = self._make_absolute_ptr(self._dynamic) 
1✔
201

202
    @classmethod
1✔
203
    def for_one_lib_only(cls, leak, ptr):
1✔
204
        return cls(leak, ptr)
×
205

206
    @classmethod
1✔
207
    def from_lib_ptr(cls, leak, ptr):
1✔
208
        return cls(leak, ptr)
×
209

210
    @staticmethod
1✔
211
    def find_base(leak, ptr):
1✔
212
        """Given a :class:`pwnlib.memleak.MemLeak` object and a pointer into a
213
        library, find its base address.
214
        """
215
        return DynELF(leak, ptr).libbase
×
216

217
    @property
1✔
218
    def elfclass(self):
1✔
219
        """32 or 64"""
220
        if not self._elfclass:
1✔
221
            elfclass = self.leak.field(self.libbase, elf.Elf_eident.EI_CLASS)
1✔
222
            self._elfclass =  {constants.ELFCLASS32: 32,
1✔
223
                              constants.ELFCLASS64: 64}[elfclass]
224
        return self._elfclass
1✔
225

226
    @property
1✔
227
    def elftype(self):
1✔
228
        """e_type from the elf header. In practice the value will almost always
229
        be 'EXEC' or 'DYN'. If the value is architecture-specific (between
230
        ET_LOPROC and ET_HIPROC) or invalid, KeyError is raised.
231
        """
232
        if not self._elftype:
1✔
233
            Ehdr  = {32: elf.Elf32_Ehdr, 64: elf.Elf64_Ehdr}[self.elfclass]
1✔
234
            elftype = self.leak.field(self.libbase, Ehdr.e_type)
1✔
235
            self._elftype = {constants.ET_NONE: 'NONE',
1✔
236
                             constants.ET_REL: 'REL',
237
                             constants.ET_EXEC: 'EXEC',
238
                             constants.ET_DYN: 'DYN',
239
                             constants.ET_CORE: 'CORE'}[elftype]
240
        return self._elftype
1✔
241

242
    @property
1✔
243
    def link_map(self):
1✔
244
        """Pointer to the runtime link_map object"""
245
        if not self._link_map:
1✔
246
            self._link_map = self._find_linkmap()
1✔
247
        return self._link_map
1✔
248

249
    @property
1✔
250
    def dynamic(self):
1✔
251
        """
252
        Returns:
253
            Pointer to the ``.DYNAMIC`` area.
254
        """
255
        if not self._dynamic:
1!
256
            self._dynamic = self._find_dynamic_phdr()
×
257
        return self._dynamic
1✔
258

259
    def _find_base(self, ptr):
1✔
260
        page_size = 0x1000
1✔
261
        page_mask = ~(page_size - 1)
1✔
262

263
        ptr &= page_mask
1✔
264
        w = None
1✔
265

266
        while True:
1✔
267
            if self.leak.compare(ptr, b'\x7fELF'):
1✔
268
                break
1✔
269

270
            # See if we can short circuit the search
271
            fast = self._find_base_optimized(ptr)
1✔
272
            if fast:
1!
273
                ptr = fast
1✔
274
                continue
1✔
275

UNCOV
276
            ptr -= page_size
×
277

UNCOV
278
            if ptr < 0:
×
279
                raise ValueError("Address is negative, something is wrong!")
×
280

281
            # Defer creating the spinner in the event that 'ptr'
282
            # is already the base address
UNCOV
283
            w = w or self.waitfor("Finding base address")
×
UNCOV
284
            self.status('%#x' % ptr)
×
285

286
        # If we created a spinner, print the success message
287
        if w:
1!
UNCOV
288
            self.success('%#x' % ptr)
×
289

290
        return ptr
1✔
291

292
    def _find_base_optimized(self, ptr):
1✔
293
        if not self.elf:
1!
294
            return None
×
295

296
        # If we have an ELF< we can probably speed this up a little bit?
297
        # Note that we add +0x20 onto the offset in order to avoid needing
298
        # to leak any bytes which contain '\r\n\t\b '
299
        ptr += 0x20
1✔
300
        data = self.leak.n(ptr, 32)
1✔
301
        if not data:
1!
302
            return None
×
303

304
        # Do not permit multiple matches
305
        matches = list(self.elf.search(data))
1✔
306
        if len(matches) != 1:
1!
UNCOV
307
            return None
×
308

309
        candidate = matches[0]
1✔
310
        candidate -= self.elf.address
1✔
311

312
        # The match should have the same page-alignment as our leaked data.
313
        if candidate & 0xfff != 0x20:
1!
314
            return None
×
315

316
        # Adjust based on the original pointer we got, and the ELF's address.
317
        ptr -= candidate
1✔
318
        return ptr
1✔
319

320
    def _find_dynamic_phdr(self):
1✔
321
        """
322
        Returns the address of the first Program Header with the type
323
        PT_DYNAMIC.
324
        """
325
        leak  = self.leak
×
326
        base  = self.libbase
×
327

328
        #First find PT_DYNAMIC
329
        Ehdr  = {32: elf.Elf32_Ehdr, 64: elf.Elf64_Ehdr}[self.elfclass]
×
330
        Phdr  = {32: elf.Elf32_Phdr, 64: elf.Elf64_Phdr}[self.elfclass]
×
331

332
        self.status("PT_DYNAMIC")
×
333

334
        phead = base + leak.field(base, Ehdr.e_phoff)
×
335
        self.status("PT_DYNAMIC header = %#x" % phead)
×
336

337
        phnum = leak.field(base, Ehdr.e_phnum)
×
338
        self.status("PT_DYNAMIC count = %#x" % phnum)
×
339

340
        for i in range(phnum):
×
341
            if leak.field_compare(phead, Phdr.p_type, constants.PT_DYNAMIC):
×
342
                break
×
343
            phead += sizeof(Phdr)
×
344
        else:
345
            self.failure("Could not find Program Header of type PT_DYNAMIC")
×
346
            return None
×
347

348
        dynamic = leak.field(phead, Phdr.p_vaddr)
×
349
        self.status("PT_DYNAMIC @ %#x" % dynamic)
×
350

351
        dynamic = self._make_absolute_ptr(dynamic)
×
352

353
        return dynamic
×
354

355
    def _find_dt_optimized(self, name):
1✔
356
        """
357
        Find an entry in the DYNAMIC array through an ELF
358

359
        Arguments:
360
            name(str): Name of the tag to find ('DT_DEBUG', 'DT_PLTGOT', ...)
361

362
        Returns:
363
            Pointer to the data described by the specified entry.
364
        """
365
        if not self.elf:
1✔
366
            return None
1✔
367

368
        ptr = self.elf.dynamic_value_by_tag(name)
1✔
369
        if ptr:
1!
370
            ptr = self._make_absolute_ptr(ptr)
1✔
371
            self.success("Found %s at %#x" % (name, ptr))
1✔
372
            return ptr
1✔
373
        return None
×
374

375

376
    def _find_dt(self, tag):
1✔
377
        """
378
        Find an entry in the DYNAMIC array.
379

380
        Arguments:
381
            tag(int): Single tag to find
382

383
        Returns:
384
            Pointer to the data described by the specified entry.
385
        """
386
        base    = self.libbase
1✔
387
        dynamic = self.dynamic
1✔
388
        leak    = self.leak
1✔
389
        name    = next(k for k,v in ENUM_D_TAG.items() if v == tag)
1!
390

391
        # Read directly from the ELF if possible
392
        ptr = self._find_dt_optimized(name)
1✔
393
        if ptr:
1✔
394
            return ptr
1✔
395

396
        Dyn = {32: elf.Elf32_Dyn,    64: elf.Elf64_Dyn}     [self.elfclass]
1✔
397

398
        # Found the _DYNAMIC program header, now find PLTGOT entry in it
399
        # An entry with a DT_NULL tag marks the end of the DYNAMIC array.
400
        while not leak.field_compare(dynamic, Dyn.d_tag, constants.DT_NULL):
1!
401
            if leak.field_compare(dynamic, Dyn.d_tag, tag):
1✔
402
                break
1✔
403
            dynamic += sizeof(Dyn)
1✔
404
        else:
405
            self.failure("Could not find tag %s" % name)
×
406
            return None
×
407

408
        ptr = leak.field(dynamic, Dyn.d_ptr)
1✔
409

410
        ptr = self._make_absolute_ptr(ptr)
1✔
411
        self.status("Found %s at %#x" % (name, ptr))
1✔
412

413
        return ptr
1✔
414

415

416
    def _find_linkmap(self, pltgot=None, debug=None):
1✔
417
        """
418
        The linkmap is a chained structure created by the loader at runtime
419
        which contains information on the names and load addresses of all
420
        libraries.
421

422
        For non-RELRO binaries, a pointer to this is stored in the .got.plt
423
        area.
424

425
        For RELRO binaries, a pointer is additionally stored in the DT_DEBUG
426
        area.
427
        """
428
        w = self.waitfor("Finding linkmap")
1✔
429

430
        Got     = {32: elf.Elf_i386_GOT, 64: elf.Elf_x86_64_GOT}[self.elfclass]
1✔
431
        r_debug = {32: elf.Elf32_r_debug, 64: elf.Elf64_r_debug}[self.elfclass]
1✔
432

433
        linkmap = None
1✔
434

435
        if not pltgot:
1!
436
            w.status("Finding linkmap: DT_PLTGOT")
1✔
437
            pltgot = self._find_dt(constants.DT_PLTGOT)
1✔
438

439
        if pltgot:
1!
440
            w.status("GOT.linkmap")
1✔
441
            linkmap = self.leak.field(pltgot, Got.linkmap)
1✔
442
            w.status("GOT.linkmap %#x" % linkmap)
1✔
443

444
        if not linkmap:
1!
445
            debug = debug or self._find_dt(constants.DT_DEBUG)
×
446
            if debug:
×
447
                w.status("r_debug.linkmap")
×
448
                linkmap = self.leak.field(debug, r_debug.r_map)
×
449
                w.status("r_debug.linkmap %#x" % linkmap)
×
450

451
        if not linkmap:
1!
452
            w.failure("Could not find DT_PLTGOT or DT_DEBUG")
×
453
            return None
×
454

455
        linkmap = self._make_absolute_ptr(linkmap)
1✔
456

457
        w.success('%#x' % linkmap)
1✔
458
        return linkmap
1✔
459

460
    def waitfor(self, msg):
1✔
461
        if not self._waitfor:
1✔
462
            self._waitfor = log.waitfor(msg)
1✔
463
        else:
464
            self.status(msg)
1✔
465
        return self._waitfor
1✔
466

467
    def failure(self, msg):
1✔
468
        if not self._waitfor:
1!
469
            log.failure(msg)
1✔
470
        else:
471
            self._waitfor.failure(msg)
×
472
            self._waitfor = None
×
473

474
    def success(self, msg):
1✔
475
        if not self._waitfor:
1✔
476
            log.success(msg)
1✔
477
        else:
478
            self._waitfor.success(msg)
1✔
479
            self._waitfor = None
1✔
480

481
    def status(self, msg):
1✔
482
        if not self._waitfor:
1✔
483
            log.info(msg)
1✔
484
        else:
485
            self._waitfor.status(msg)
1✔
486

487
    @property
1✔
488
    def libc(self):
1✔
489
        """libc(self) -> ELF
490

491
        Leak the Build ID of the remote libc.so, download the file,
492
        and load an ``ELF`` object with the correct base address.
493

494
        Returns:
495
            An ELF object, or None.
496
        """
497
        libc = b'libc.so'
×
498

499
        with self.waitfor('Downloading libc'):
×
500
            dynlib = self._dynamic_load_dynelf(libc)
×
501

502
            self.status("Trying lookup based on Build ID")
×
503
            build_id = dynlib._lookup_build_id(libc)
×
504

505
            if not build_id:
×
506
                return None
×
507

508
            self.status("Trying lookup based on Build ID: %s" % build_id)
×
509
            path = libcdb.search_by_build_id(build_id)
×
510

511
            if not path:
×
512
                return None
×
513

514
            libc = ELF(path)
×
515
            libc.address = dynlib.libbase
×
516
            return libc
×
517

518
    def lookup (self, symb = None, lib = None):
1✔
519
        """lookup(symb = None, lib = None) -> int
520

521
        Find the address of ``symbol``, which is found in ``lib``.
522

523
        Arguments:
524
            symb(str): Named routine to look up
525
              If omitted, the base address of the library will be returned.
526
            lib(str): Substring to match for the library name.
527
              If omitted, the current library is searched.
528
              If set to ``'libc'``, ``'libc.so'`` is assumed.
529

530
        Returns:
531
            Address of the named symbol, or :const:`None`.
532
        """
533
        result = None
1✔
534

535
        if lib == 'libc':
1✔
536
            lib = 'libc.so'
1✔
537

538
        if symb:
1!
539
            symb = _need_bytes(symb, min_wrong=0x80)
1✔
540

541
        #
542
        # Get a pretty name for the symbol to show the user
543
        #
544
        if symb and lib:
1✔
545
            pretty = '%r in %r' % (symb, lib)
1✔
546
        else:
547
            pretty = repr(symb or lib)
1✔
548

549
        if not pretty:
1!
550
            self.failure("Must specify a library or symbol")
×
551

552
        self.waitfor('Resolving %s' % pretty)
1✔
553

554
        #
555
        # If we are loading from a different library, create
556
        # a DynELF instance for it.
557
        #
558
        if lib is not None: dynlib = self._dynamic_load_dynelf(lib)
1✔
559
        else:   dynlib = self
1✔
560

561
        if dynlib is None:
1!
562
            log.failure("Could not find %r", lib)
×
563
            return None
×
564

565
        #
566
        # If we are resolving a symbol in the library, find it.
567
        #
568
        if symb and self.libcdb:
1!
569
            # Try a quick lookup by build ID
570
            self.status("Trying lookup based on Build ID")
×
571
            build_id = dynlib._lookup_build_id(lib=lib)
×
572
            if build_id:
×
573
                log.info("Trying lookup based on Build ID: %s", build_id)
×
574
                path = libcdb.search_by_build_id(build_id)
×
575
                if path:
×
576
                    with context.local(log_level='error'):
×
577
                        e = ELF(path)
×
578
                        e.address = dynlib.libbase
×
579
                        result = e.symbols[symb]
×
580
        if symb and not result:
1!
581
            self.status("Trying remote lookup")
1✔
582
            result = dynlib._lookup(symb)
1✔
583
        if not symb:
1!
584
            result = dynlib.libbase
×
585

586
        #
587
        # Did we win?
588
        #
589
        if result: self.success("%#x" % result)
1!
590
        else:      self.failure("Could not find %s" % pretty)
×
591

592
        return result
1✔
593

594
    def bases(self):
1✔
595
        '''Resolve base addresses of all loaded libraries.
596

597
        Return a dictionary mapping library path to its base address.
598
        '''
599
        if not self._bases:
×
600
            if self.link_map is None:
×
601
                self.failure("Cannot determine bases without linkmap")
×
602
                return {}
×
603
                
604
            leak    = self.leak
×
605
            LinkMap = {32: elf.Elf32_Link_Map, 64: elf.Elf64_Link_Map}[self.elfclass]
×
606

607
            cur = self.link_map
×
608

609
            # make sure we rewind to the beginning!
610
            while leak.field(cur, LinkMap.l_prev):
×
611
                cur = leak.field(cur, LinkMap.l_prev)
×
612

613
            while cur:
×
614
                p_name = leak.field(cur, LinkMap.l_name)
×
615
                name   = leak.s(p_name)
×
616
                addr   = leak.field(cur, LinkMap.l_addr)
×
617
                cur    = leak.field(cur, LinkMap.l_next)
×
618

619
                log.debug('Found %r @ %#x', name, addr)
×
620

621
                self._bases[name] = addr
×
622

623
        return self._bases
×
624

625
    def _dynamic_load_dynelf(self, libname):
1✔
626
        """_dynamic_load_dynelf(libname) -> DynELF
627

628
        Looks up information about a loaded library via the link map.
629

630
        Arguments:
631
            libname(str):  Name of the library to resolve, or a substring (e.g. 'libc.so')
632

633
        Returns:
634
            A DynELF instance for the loaded library, or None.
635
        """
636
        cur     = self.link_map
1✔
637
        leak    = self.leak
1✔
638
        LinkMap = {32: elf.Elf32_Link_Map, 64: elf.Elf64_Link_Map}[self.elfclass]
1✔
639

640
        # make sure we rewind to the beginning!
641
        while leak.field(cur, LinkMap.l_prev):
1!
642
            cur = leak.field(cur, LinkMap.l_prev)
×
643

644
        libname = _need_bytes(libname, 2, 0x80)
1✔
645

646
        while cur:
1!
647
            self.status("link_map entry %#x" % cur)
1✔
648
            p_name = leak.field(cur, LinkMap.l_name)
1✔
649
            name   = leak.s(p_name)
1✔
650

651
            if libname in name:
1✔
652
                break
1✔
653

654
            if name:
1✔
655
                self.status('Skipping %s' % name)
1✔
656

657
            cur = leak.field(cur, LinkMap.l_next)
1✔
658
        else:
659
            self.failure("Could not find library with name containing %r" % libname)
×
660
            return None
×
661

662
        libbase = leak.field(cur, LinkMap.l_addr)
1✔
663

664
        self.status("Resolved library %r at %#x" % (libname, libbase))
1✔
665

666
        lib = DynELF(leak, libbase)
1✔
667
        lib._dynamic = leak.field(cur, LinkMap.l_ld)
1✔
668
        lib._waitfor = self._waitfor
1✔
669
        return lib
1✔
670

671
    def _rel_lookup(self, symb, strtab=None, symtab=None, jmprel=None):
1✔
672
        """Performs slower symbol lookup using DT_JMPREL(.rela.plt)"""
673
        leak = self.leak
1✔
674
        elf_obj = self.elf
1✔
675
        symb_name = symb.decode()
1✔
676

677
        # If elf is available look for the symbol in it
678
        if elf_obj and symb_name in elf_obj.symbols:
1!
679
            self.success("Symbol '%s' found in ELF!" % symb_name)
1✔
680
            return elf_obj.symbols[symb_name]
1✔
681

682
        log.warning("Looking up symbol through DT_JMPREL. This might be slower...")
×
683

684

685
        strtab  = strtab or self._find_dt(constants.DT_STRTAB)
×
686
        symtab  = symtab or self._find_dt(constants.DT_SYMTAB)
×
687
        jmprel  = jmprel or self._find_dt(constants.DT_JMPREL) # .rela.plt
×
688

689
        strtab = self._make_absolute_ptr(strtab)
×
690
        symtab = self._make_absolute_ptr(symtab)
×
691
        jmprel = self._make_absolute_ptr(jmprel)
×
692

693
        w = self.waitfor("Looking for %s in .rel.plt" % symb)
×
694
        # We look for the symbol by iterating through each Elf64_Rel entry.
695
        # For each Elf64_Rel, get the Elf64_Sym for that entry
696
        # Then compare the Elf64_Sym.st_name with the symbol name
697
       
698
        Rel = {32: elf.Elf32_Rel, 64: elf.Elf64_Rel}[self.elfclass]
×
699
        Sym = {32: elf.Elf32_Sym, 64: elf.Elf64_Sym}[self.elfclass]
×
700

701
        rel_addr = jmprel
×
702
        rel_entry = None
×
703
        while True:
×
704
            rel_entry = leak.struct(rel_addr, Rel)
×
705

706
            # We ran out of entries in DT_JMPREL 
707
            if rel_entry.r_offset == 0:
×
708
                return None
×
709

710
            sym_idx = rel_entry.r_info >> 32 # might be different for 32-bit
×
711
            sym_entry_address = symtab + ( sym_idx * sizeof(Sym) )
×
712
            sym_str_off = leak.field(sym_entry_address, Sym.st_name)
×
713
            symb_str = leak.s(strtab+sym_str_off)
×
714

715
            if symb_str == symb:
×
716
                w.success("Found matching Elf64_Rel entry!")
×
717
                break
×
718

719
            rel_addr += sizeof(Rel)
×
720

721
        symbol_address = self._make_absolute_ptr(rel_entry.r_offset)
×
722

723
        return symbol_address
×
724

725

726

727
    def _lookup(self, symb):
1✔
728
        """Performs the actual symbol lookup within one ELF file."""
729
        leak = self.leak
1✔
730
        Dyn  = {32: elf.Elf32_Dyn, 64: elf.Elf64_Dyn}[self.elfclass]
1✔
731
        name = lambda tag: next(k for k,v in ENUM_D_TAG.items() if v == tag)
1!
732

733
        self.status('.gnu.hash/.hash, .strtab and .symtab offsets')
1✔
734

735
        #
736
        # We need all three of the hash, string table, and symbol table.
737
        #
738
        hshtab  = self._find_dt(constants.DT_GNU_HASH)
1✔
739
        strtab  = self._find_dt(constants.DT_STRTAB)
1✔
740
        symtab  = self._find_dt(constants.DT_SYMTAB)
1✔
741

742
        # Assume GNU hash will hit, since it is the default for GCC.
743
        if hshtab:
1!
744
            hshtype = 'gnu'
1✔
745
        else:
746
            hshtab  = self._find_dt(constants.DT_HASH)
×
747
            hshtype = 'sysv'
×
748

749
        if not all([strtab, symtab, hshtab]):
1!
750
            self.failure("Could not find all tables")
×
751

752
        strtab = self._make_absolute_ptr(strtab)
1✔
753
        symtab = self._make_absolute_ptr(symtab)
1✔
754
        hshtab = self._make_absolute_ptr(hshtab)
1✔
755

756
        #
757
        # Perform the hash lookup
758
        #
759

760
        # Save off our real leaker in case we use the fake leaker
761
        real_leak = self.leak
1✔
762
        if self.elf:
1✔
763

764
            # Create a fake leaker which just leaks out of the 'loaded' ELF
765
            # However, we may load things which are outside of the ELF (e.g.
766
            # the linkmap or GOT) so we need to fall back on the real leak.
767
            @MemLeak
1✔
768
            def fake_leak(address):
×
769
                try:
1✔
770
                    return self.elf.read(address, 4)
1✔
771
                except ValueError:
×
772
                    return real_leak.b(address)
×
773
            # Use fake leaker since ELF is available
774
            self.leak = fake_leak
1✔
775

776
        routine = {'sysv': self._resolve_symbol_sysv,
1✔
777
                   'gnu':  self._resolve_symbol_gnu}[hshtype]
778
        resolved_addr = routine(self.libbase, symb, hshtab, strtab, symtab)
1✔
779

780
        if resolved_addr:
1✔
781
            # Restore the original leaker
782
            self.leak = real_leak
1✔
783
            return resolved_addr
1✔
784

785
        # if symbol not found in GNU_Hash, try looking in JMPREL
786
        resolved_addr = self._rel_lookup(symb, strtab, symtab)
1✔
787

788
        # Restore the original leaker
789
        self.leak = real_leak
1✔
790

791
        return resolved_addr
1✔
792

793
    def _resolve_symbol_sysv(self, libbase, symb, hshtab, strtab, symtab):
1✔
794
        """
795
        Internal Documentation:
796
            See the ELF manual for more information.  Search for the phrase
797
            "A hash table of Elf32_Word objects supports symbol table access", or see:
798
            https://docs.oracle.com/cd/E19504-01/802-6319/6ia12qkfo/index.html#chapter6-48031
799

800
            .. code-block:: c
801

802
                struct Elf_Hash {
803
                    uint32_t nbucket;
804
                    uint32_t nchain;
805
                    uint32_t bucket[nbucket];
806
                    uint32_t chain[nchain];
807
                }
808

809
            You can force an ELF to use this type of symbol table by compiling
810
            with 'gcc -Wl,--hash-style=sysv'
811
        """
812
        self.status('.hash parms')
×
813
        leak       = self.leak
×
814
        Sym        = {32: elf.Elf32_Sym, 64: elf.Elf64_Sym}[self.elfclass]
×
815

816
        nbucket   = leak.field(hshtab, elf.Elf_HashTable.nbucket)
×
817
        bucketaddr = hshtab + sizeof(elf.Elf_HashTable)
×
818
        chain      = bucketaddr + (nbucket * 4)
×
819

820
        self.status('hashmap')
×
821
        hsh = sysv_hash(symb) % nbucket
×
822

823
        # Get the index out of the bucket for the hash we computed
824
        idx = leak.d(bucketaddr, hsh)
×
825

826
        while idx != constants.STN_UNDEF:
×
827
            # Look up the symbol corresponding to the specified index
828
            sym     = symtab + (idx * sizeof(Sym))
×
829
            symtype = leak.field(sym, Sym.st_info) & 0xf
×
830

831
            # We only care about functions
832
            if symtype == constants.STT_FUNC:
×
833

834
                # Leak the name of the function from the symbol table
835
                name = leak.s(strtab + leak.field(sym, Sym.st_name))
×
836

837
                # Make sure it matches the name of the symbol we were looking for.
838
                if name == symb:
×
839
                    #Bingo
840
                    addr = libbase + leak.field(sym, Sym.st_value)
×
841
                    return addr
×
842

843
                self.status("%r (hash collision)" % name)
×
844

845
            # The name did not match what we were looking for, or we assume
846
            # it did not since it was not a function.
847
            # Follow the chain for this particular hash.
848
            idx = leak.d(chain, idx)
×
849
        else:
850
            self.failure('Could not find a SYSV hash that matched %#x' % hsh)
×
851
            return None
×
852

853
    def _resolve_symbol_gnu(self, libbase, symb, hshtab, strtab, symtab):
1✔
854
        """
855
        Internal Documentation:
856
            The GNU hash structure is a bit more complex than the normal hash
857
            structure.
858

859
            Again, Oracle has good documentation.
860
            https://blogs.oracle.com/solaris/post/gnu-hash-elf-sections
861

862
            You can force an ELF to use this type of symbol table by compiling
863
            with 'gcc -Wl,--hash-style=gnu'
864
        """
865
        self.status('.gnu.hash parms')
1✔
866
        leak = self.leak
1✔
867
        Sym  = {32: elf.Elf32_Sym, 64: elf.Elf64_Sym}[self.elfclass]
1✔
868

869
        # The number of hash buckets (hash % nbuckets)
870
        nbuckets  = leak.field(hshtab, elf.GNU_HASH.nbuckets)
1✔
871

872
        # Index of the first accessible symbol in the hash table
873
        # Numbering doesn't start at zero, it starts at symndx
874
        symndx    = leak.field(hshtab, elf.GNU_HASH.symndx)
1✔
875

876
        # Number of things in the bloom filter.
877
        # We don't care about the contents, but we have to skip over it.
878
        maskwords = leak.field(hshtab, elf.GNU_HASH.maskwords)
1✔
879

880
        # Skip over the bloom filter to get to the buckets
881
        elfword = self.elfclass // 8
1✔
882
        buckets = hshtab + sizeof(elf.GNU_HASH) + (elfword * maskwords)
1✔
883

884
        # The chains come after the buckets
885
        chains  = buckets + (4 * nbuckets)
1✔
886

887
        self.status('hash chain index')
1✔
888

889
        # Hash the symbol, find its bucket
890
        hsh    = gnu_hash(symb)
1✔
891
        bucket = hsh % nbuckets
1✔
892

893
        # Get the first index in the chain for that bucket
894
        ndx    = leak.d(buckets, bucket)
1✔
895
        if ndx == 0:
1✔
896
            self.failure('Empty chain')
1✔
897
            return None
1✔
898

899
        # Find the start of the chain, taking into account that numbering
900
        # effectively starts at 'symndx' within the chains.
901
        chain  = chains + 4 * (ndx - symndx)
1✔
902

903
        self.status('hash chain')
1✔
904

905
        # Iteratively get the I'th entry from the hash chain, until we find
906
        # one that matches.
907
        i    = 0
1✔
908
        hsh &= ~1
1✔
909

910
        # The least significant bit is used as a stopper bit.
911
        # It is set to 1 when a symbol is the last symbol in a given hash chain.
912
        hsh2 = 0
1✔
913
        while not hsh2 & 1:
1✔
914
            hsh2 = leak.d(chain, i)
1✔
915
            if hsh == (hsh2 & ~1):
1✔
916
                # Check for collision on hash values
917
                sym  = symtab + sizeof(Sym) * (ndx + i)
1✔
918
                name = leak.s(strtab + leak.field(sym, Sym.st_name))
1✔
919

920
                if name == symb:
1!
921
                    # No collision, get offset and calculate address
922
                    offset = leak.field(sym, Sym.st_value)
1✔
923
                    addr   = offset + libbase
1✔
924
                    return addr
1✔
925

926
                self.status("%r (hash collision)" % name)
×
927

928
            # Collision or no match, continue to the next item
929
            i += 1
1✔
930
        else:
931
            self.failure('Could not find a GNU hash that matched %#x' % hsh)
1✔
932
            return None
1✔
933

934
    def _lookup_build_id(self, lib = None):
1✔
935

936
        libbase = self.libbase
×
937
        if not self.link_map:
×
938
            self.status("No linkmap found")
×
939
            return None
×
940

941
        if lib is not None:
×
942
            libbase = self.lookup(symb = None, lib = lib)
×
943

944
        if not libbase:
×
945
            self.status("Couldn't find libc base")
×
946
            return None
×
947

948
        for offset in libcdb.get_build_id_offsets():
×
949
            address = libbase + offset
×
950
            if self.leak.compare(address + 0xC, b"GNU\x00"):
×
951
                return enhex(b''.join(self.leak.raw(address + 0x10, 20)))
×
952
            else:
953
                self.status("Build ID not found at offset %#x" % offset)
×
954
                pass
×
955

956
    def _make_absolute_ptr(self, ptr_or_offset):
1✔
957
        """For shared libraries (or PIE executables), many ELF fields may
958
        contain offsets rather than actual pointers. If the ELF type is 'DYN',
959
        the argument may be an offset. It will not necessarily be an offset,
960
        because the run-time linker may have fixed it up to be a real pointer
961
        already. In this case an educated guess is made, and the ELF base
962
        address is added to the value if it is determined to be an offset.
963
        """
964
        if_ptr = ptr_or_offset
1✔
965
        if_offset = ptr_or_offset + self.libbase
1✔
966

967
        # if the ELF type is not DYN, the value is a pointer
968

969
        if self.elftype != 'DYN':
1!
970
            return if_ptr
×
971

972
        # if the ELF type may be DYN, guess
973

974
        if 0 < ptr_or_offset < self.libbase:
1✔
975
            return if_offset
1✔
976
        else:
977
            return if_ptr
1✔
978

979
    def stack(self):
1✔
980
        """Finds a pointer to the stack via __environ, which is an exported
981
        symbol in libc, which points to the environment block.
982
        """
983
        symbols = ['environ', '_environ', '__environ']
1✔
984

985
        for symbol in symbols:
1!
986
            environ = self.lookup(symbol, 'libc')
1✔
987

988
            if environ:
1!
989
                break
1✔
990
        else:
991
            log.error("Could not find the stack")
×
992

993
        stack = self.leak.p(environ)
1✔
994

995
        self.success('*environ: %#x' % stack)
1✔
996

997
        return stack
1✔
998

999
    def heap(self):
1✔
1000
        """Finds the beginning of the heap via __curbrk, which is an exported
1001
        symbol in the linker, which points to the current brk.
1002
        """
1003
        curbrk = self.lookup('__curbrk', 'libc')
1✔
1004
        brk    = self.leak.p(curbrk)
1✔
1005

1006
        self.success('*curbrk: %#x' % brk)
1✔
1007

1008
        return brk
1✔
1009

1010
    def _find_mapped_pages(self, readonly = False, page_size = 0x1000):
1✔
1011
        """
1012
        A generator of all mapped pages, as found using the Program Headers.
1013

1014
        Yields tuples of the form: (virtual address, memory size)
1015
        """
1016
        leak  = self.leak
×
1017
        base  = self.libbase
×
1018

1019
        Ehdr  = {32: elf.Elf32_Ehdr, 64: elf.Elf64_Ehdr}[self.elfclass]
×
1020
        Phdr  = {32: elf.Elf32_Phdr, 64: elf.Elf64_Phdr}[self.elfclass]
×
1021

1022
        phead = base + leak.field(base, Ehdr.e_phoff)
×
1023
        phnum = leak.field(base, Ehdr.e_phnum)
×
1024

1025
        for i in range(phnum):
×
1026
            if leak.field_compare(phead, Phdr.p_type, constants.PT_LOAD) :
×
1027
                # the interesting pages are those that are aligned to PAGE_SIZE
1028
                if leak.field_compare(phead, Phdr.p_align, page_size) and \
×
1029
                    (readonly or leak.field(phead, Phdr.p_flags) & 0x02 != 0):
1030
                    vaddr = leak.field(phead, Phdr.p_vaddr)
×
1031
                    memsz = leak.field(phead, Phdr.p_memsz)
×
1032
                    # fix relative offsets
1033
                    if vaddr < base :
×
1034
                        vaddr += base
×
1035
                    yield vaddr, memsz
×
1036
            phead += sizeof(Phdr)
×
1037

1038
    def dump(self, libs = False, readonly = False):
1✔
1039
        """dump(libs = False, readonly = False)
1040

1041
        Dumps the ELF's memory pages to allow further analysis.
1042

1043
        Arguments:
1044
            libs(bool, optional): True if should dump the libraries too (False by default)
1045
            readonly(bool, optional): True if should dump read-only pages (False by default)
1046

1047
        Returns:
1048
            a dictionary of the form: { address : bytes }
1049
        """
1050
        leak      = self.leak
×
1051
        page_size = 0x1000
×
1052
        pages     = {}
×
1053

1054
        for vaddr, memsz in self._find_mapped_pages(readonly, page_size) :
×
1055
            offset    = vaddr % page_size
×
1056
            if offset != 0 :
×
1057
                memsz += offset
×
1058
                vaddr -= offset
×
1059
            memsz += (page_size - (memsz % page_size)) % page_size
×
1060
            pages[vaddr] = leak.n(vaddr, memsz)
×
1061

1062
        if libs:
×
1063
            for lib_name in self.bases():
×
1064
                if len(lib_name) == 0:
×
1065
                    continue
×
1066
                dyn_lib = self._dynamic_load_dynelf(lib_name)
×
1067
                if dyn_lib is not None:
×
1068
                    pages.update(dyn_lib.dump(readonly = readonly))
×
1069

1070
        return pages
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc