• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Gallopsled / pwntools / 14217040731

02 Apr 2025 10:09AM UTC coverage: 73.958% (+0.04%) from 73.922%
14217040731

Pull #2574

github

web-flow
Merge 55c335b36 into 6954b039c
Pull Request #2574: Allow creating an ELF from in-memory bytes

3815 of 6424 branches covered (59.39%)

9 of 11 new or added lines in 3 files covered. (81.82%)

99 existing lines in 1 file now uncovered.

13362 of 18067 relevant lines covered (73.96%)

0.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.99
/pwnlib/elf/corefile.py
1
# -*- coding: utf-8 -*-
2
"""Read information from Core Dumps.
3

4
Core dumps are extremely useful when writing exploits, even outside of
5
the normal act of debugging things.
6

7
Using Corefiles to Automate Exploitation
8
----------------------------------------
9

10
For example, if you have a trivial buffer overflow and don't want to
11
open up a debugger or calculate offsets, you can use a generated core
12
dump to extract the relevant information.
13

14
.. code-block:: c
15

16
    #include <string.h>
17
    #include <stdlib.h>
18
    #include <unistd.h>
19
    void win() {
20
        system("sh");
21
    }
22
    int main(int argc, char** argv) {
23
        char buffer[64];
24
        strcpy(buffer, argv[1]);
25
    }
26

27
.. code-block:: shell
28

29
    $ gcc crash.c -m32 -o crash -fno-stack-protector
30

31
.. code-block:: python
32

33
    from pwn import *
34

35
    # Generate a cyclic pattern so that we can auto-find the offset
36
    payload = cyclic(128)
37

38
    # Run the process once so that it crashes
39
    process(['./crash', payload]).wait()
40

41
    # Get the core dump
42
    core = Coredump('./core')
43

44
    # Our cyclic pattern should have been used as the crashing address
45
    assert pack(core.eip) in payload
46

47
    # Cool! Now let's just replace that value with the address of 'win'
48
    crash = ELF('./crash')
49
    payload = fit({
50
        cyclic_find(core.eip): crash.symbols.win
51
    })
52

53
    # Get a shell!
54
    io = process(['./crash', payload])
55
    io.sendline(b'id')
56
    print(io.recvline())
57
    # uid=1000(user) gid=1000(user) groups=1000(user)
58

59
Module Members
60
----------------------------------------
61

62
"""
63
from __future__ import absolute_import
1✔
64
from __future__ import division
1✔
65

66
import collections
1✔
67
import ctypes
1✔
68
import glob
1✔
69
import gzip
1✔
70
import re
1✔
71
import os
1✔
72
import socket
1✔
73
import subprocess
1✔
74
import tempfile
1✔
75

76
from io import BytesIO, StringIO
1✔
77

78
import elftools
1✔
79
from elftools.common.utils import roundup
1✔
80
from elftools.common.utils import struct_parse
1✔
81
from elftools.construct import CString
1✔
82

83
from pwnlib import atexit
1✔
84
from pwnlib.context import context
1✔
85
from pwnlib.elf.datatypes import *
1✔
86
from pwnlib.elf.elf import ELF
1✔
87
from pwnlib.log import getLogger
1✔
88
from pwnlib.tubes.process import process
1✔
89
from pwnlib.tubes.ssh import ssh_channel
1✔
90
from pwnlib.tubes.tube import tube
1✔
91
from pwnlib.util.fiddling import b64d
1✔
92
from pwnlib.util.fiddling import enhex
1✔
93
from pwnlib.util.fiddling import unhex
1✔
94
from pwnlib.util.misc import read
1✔
95
from pwnlib.util.misc import write
1✔
96
from pwnlib.util.packing import pack
1✔
97
from pwnlib.util.packing import unpack_many
1✔
98

99
log = getLogger(__name__)
1✔
100

101
prstatus_types = {
1✔
102
    'i386': elf_prstatus_i386,
103
    'amd64': elf_prstatus_amd64,
104
    'arm': elf_prstatus_arm,
105
    'aarch64': elf_prstatus_aarch64
106
}
107

108
siginfo_types = {
1✔
109
    32: elf_siginfo_32,
110
    64: elf_siginfo_64
111
}
112

113

114
class Mapping(object):
1✔
115
    """Encapsulates information about a memory mapping in a :class:`Corefile`.
116
    """
117
    def __init__(self, core, name, start, stop, flags, page_offset):
1✔
118
        self._core=core
1✔
119

120
        #: :class:`str`: Name of the mapping, e.g. ``'/bin/bash'`` or ``'[vdso]'``.
121
        self.name = name or ''
1✔
122

123
        #: :class:`int`: First mapped byte in the mapping
124
        self.start = start
1✔
125

126
        #: :class:`int`: First byte after the end of hte mapping
127
        self.stop = stop
1✔
128

129
        #: :class:`int`: Size of the mapping, in bytes
130
        self.size = stop-start
1✔
131

132
        #: :class:`int`: Offset in pages in the mapped file
133
        self.page_offset = page_offset or 0
1✔
134

135
        #: :class:`int`: Mapping flags, using e.g. ``PROT_READ`` and so on.
136
        self.flags = flags
1✔
137

138
    @property
1✔
139
    def path(self):
1✔
140
        """:class:`str`: Alias for :attr:`.Mapping.name`"""
141
        return self.name
×
142

143
    @property
1✔
144
    def address(self):
1✔
145
        """:class:`int`: Alias for :data:`Mapping.start`."""
146
        return self.start
1✔
147

148
    @property
1✔
149
    def permstr(self):
1✔
150
        """:class:`str`: Human-readable memory permission string, e.g. ``r-xp``."""
151
        flags = self.flags
×
152
        return ''.join(['r' if flags & 4 else '-',
×
153
                        'w' if flags & 2 else '-',
154
                        'x' if flags & 1 else '-',
155
                        'p'])
156
    def __str__(self):
1✔
157
        return '%x-%x %s %x %s' % (self.start,self.stop,self.permstr,self.size,self.name)
×
158

159
    def __repr__(self):
1✔
160
        return '%s(%r, start=%#x, stop=%#x, size=%#x, flags=%#x, page_offset=%#x)' \
1✔
161
            % (self.__class__.__name__,
162
               self.name,
163
               self.start,
164
               self.stop,
165
               self.size,
166
               self.flags,
167
               self.page_offset)
168

169
    def __int__(self):
1✔
170
        return self.start
×
171

172
    @property
1✔
173
    def data(self):
1✔
174
        """:class:`str`: Memory of the mapping."""
175
        return self._core.read(self.start, self.size)
1✔
176

177
    def __getitem__(self, item):
1✔
178
        if isinstance(item, slice):
1✔
179
            start = int(item.start or self.start)
1✔
180
            stop  = int(item.stop or self.stop)
1✔
181

182
            # Negative slices...
183
            if start < 0:
1!
184
                start += self.stop
×
185
            if stop < 0:
1!
186
                stop += self.stop
×
187

188
            if not (self.start <= start <= stop <= self.stop):
1!
189
                log.error("Byte range [%#x:%#x] not within range [%#x:%#x]",
×
190
                          start, stop, self.start, self.stop)
191

192
            data = self._core.read(start, stop-start)
1✔
193

194
            if item.step == 1:
1!
195
                return data
×
196
            return data[::item.step]
1✔
197

198
        return self._core.read(item, 1)
1✔
199

200
    def __contains__(self, item):
1✔
201
        if isinstance(item, Mapping):
1!
202
            return (self.start <= item.start) and (item.stop <= self.stop)
×
203
        return self.start <= item < self.stop
1✔
204

205
    def find(self, sub, start=None, end=None):
1✔
206
        """Similar to str.find() but works on our address space"""
207
        if start is None:
1!
208
            start = self.start
1✔
209
        if end is None:
1!
210
            end = self.stop
×
211

212
        result = self.data.find(sub, start-self.address, end-self.address)
1✔
213

214
        if result == -1:
1!
215
            return result
×
216

217
        return result + self.address
1✔
218

219
    def rfind(self, sub, start=None, end=None):
1✔
220
        """Similar to str.rfind() but works on our address space"""
221
        if start is None:
1!
222
            start = self.start
1✔
223
        if end is None:
1!
224
            end = self.stop
×
225

226
        result = self.data.rfind(sub, start-self.address, end-self.address)
1✔
227

228
        if result == -1:
1!
229
            return result
×
230

231
        return result + self.address
1✔
232

233
class Corefile(ELF):
1✔
234
    r"""Enhances the information available about a corefile (which is an extension
235
    of the ELF format) by permitting extraction of information about the mapped
236
    data segments, and register state.
237

238
    Registers can be accessed directly, e.g. via ``core_obj.eax`` and enumerated
239
    via :data:`Corefile.registers`.
240

241
    Memory can be accessed directly via :meth:`pwnlib.elf.elf.ELF.read` or :meth:`pwnlib.elf.elf.ELF.write`, and also
242
    via :meth:`pwnlib.elf.elf.ELF.pack` or :meth:`pwnlib.elf.elf.ELF.unpack` or even :meth:`.string`.
243

244
    Arguments:
245
        core: Path to the core file.  Alternately, may be a :class:`.process` instance,
246
              and the core file will be located automatically.
247

248
    ::
249

250
        >>> c = Corefile('./core')
251
        >>> hex(c.eax)
252
        '0xfff5f2e0'
253
        >>> c.registers
254
        {'eax': 4294308576,
255
         'ebp': 1633771891,
256
         'ebx': 4151132160,
257
         'ecx': 4294311760,
258
         'edi': 0,
259
         'edx': 4294308700,
260
         'eflags': 66050,
261
         'eip': 1633771892,
262
         'esi': 0,
263
         'esp': 4294308656,
264
         'orig_eax': 4294967295,
265
         'xcs': 35,
266
         'xds': 43,
267
         'xes': 43,
268
         'xfs': 0,
269
         'xgs': 99,
270
         'xss': 43}
271

272
    Mappings can be iterated in order via :attr:`Corefile.mappings`.
273

274
    ::
275

276
        >>> Corefile('./core').mappings
277
        [Mapping('/home/user/pwntools/crash', start=0x8048000, stop=0x8049000, size=0x1000, flags=0x5, page_offset=0x0),
278
         Mapping('/home/user/pwntools/crash', start=0x8049000, stop=0x804a000, size=0x1000, flags=0x4, page_offset=0x1),
279
         Mapping('/home/user/pwntools/crash', start=0x804a000, stop=0x804b000, size=0x1000, flags=0x6, page_offset=0x2),
280
         Mapping(None, start=0xf7528000, stop=0xf7529000, size=0x1000, flags=0x6, page_offset=0x0),
281
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf7529000, stop=0xf76d1000, size=0x1a8000, flags=0x5, page_offset=0x0),
282
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf76d1000, stop=0xf76d2000, size=0x1000, flags=0x0, page_offset=0x1a8),
283
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf76d2000, stop=0xf76d4000, size=0x2000, flags=0x4, page_offset=0x1a9),
284
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf76d4000, stop=0xf76d5000, size=0x1000, flags=0x6, page_offset=0x1aa),
285
         Mapping(None, start=0xf76d5000, stop=0xf76d8000, size=0x3000, flags=0x6, page_offset=0x0),
286
         Mapping(None, start=0xf76ef000, stop=0xf76f1000, size=0x2000, flags=0x6, page_offset=0x0),
287
         Mapping('[vdso]', start=0xf76f1000, stop=0xf76f2000, size=0x1000, flags=0x5, page_offset=0x0),
288
         Mapping('/lib/i386-linux-gnu/ld-2.19.so', start=0xf76f2000, stop=0xf7712000, size=0x20000, flags=0x5, page_offset=0x0),
289
         Mapping('/lib/i386-linux-gnu/ld-2.19.so', start=0xf7712000, stop=0xf7713000, size=0x1000, flags=0x4, page_offset=0x20),
290
         Mapping('/lib/i386-linux-gnu/ld-2.19.so', start=0xf7713000, stop=0xf7714000, size=0x1000, flags=0x6, page_offset=0x21),
291
         Mapping('[stack]', start=0xfff3e000, stop=0xfff61000, size=0x23000, flags=0x6, page_offset=0x0)]
292

293
    Examples:
294

295
        Let's build an example binary which should eat ``R0=0xdeadbeef``
296
        and ``PC=0xcafebabe``.
297

298
        If we run the binary and then wait for it to exit, we can get its
299
        core file.
300

301
        >>> context.clear(arch='arm')
302
        >>> shellcode = shellcraft.mov('r0', 0xdeadbeef)
303
        >>> shellcode += shellcraft.mov('r1', 0xcafebabe)
304
        >>> shellcode += 'bx r1'
305
        >>> address = 0x41410000
306
        >>> elf = ELF.from_assembly(shellcode, vma=address)
307
        >>> io = elf.process(env={'HELLO': 'WORLD'})
308
        >>> io.poll(block=True)
309
        -11
310

311
        You can specify a full path a la ``Corefile('/path/to/core')``,
312
        but you can also just access the :attr:`.process.corefile` attribute.
313

314
        There's a lot of behind-the-scenes logic to locate the corefile for
315
        a given process, but it's all handled transparently by Pwntools.
316

317
        >>> core = io.corefile
318

319
        The core file has a :attr:`exe` property, which is a :class:`.Mapping`
320
        object.  Each mapping can be accessed with virtual addresses via subscript, or
321
        contents can be examined via the :attr:`.Mapping.data` attribute.
322

323
        >>> core.exe # doctest: +ELLIPSIS
324
        Mapping('/.../step3', start=..., stop=..., size=0x1000, flags=0x..., page_offset=...)
325
        >>> hex(core.exe.address)
326
        '0x41410000'
327

328
        The core file also has registers which can be accessed direclty.
329
        Pseudo-registers :attr:`pc` and :attr:`sp` are available on all architectures,
330
        to make writing architecture-agnostic code more simple.
331
        If this were an amd64 corefile, we could access e.g. ``core.rax``.
332

333
        >>> core.pc == 0xcafebabe
334
        True
335
        >>> core.r0 == 0xdeadbeef
336
        True
337
        >>> core.sp == core.r13
338
        True
339

340
        We may not always know which signal caused the core dump, or what address
341
        caused a segmentation fault.  Instead of accessing registers directly, we
342
        can also extract this information from the core dump via :attr:`fault_addr`
343
        and :attr:`signal`.
344

345
        On QEMU-generated core dumps, this information is unavailable, so we
346
        substitute the value of PC.  In our example, that's correct anyway.
347

348
        >>> core.fault_addr == 0xcafebabe
349
        True
350
        >>> core.signal
351
        11
352

353
        Core files can also be generated from running processes.
354
        This requires GDB to be installed, and can only be done with native processes.
355
        Getting a "complete" corefile requires GDB 7.11 or better.
356

357
        >>> elf = ELF(which('bash-static'))
358
        >>> context.clear(binary=elf)
359
        >>> env = dict(os.environ)
360
        >>> env['HELLO'] = 'WORLD'
361
        >>> io = process(elf.path, env=env)
362
        >>> io.sendline(b'echo hello')
363
        >>> io.recvline()
364
        b'hello\n'
365

366
        The process is still running, but accessing its :attr:`.process.corefile` property
367
        automatically invokes GDB to attach and dump a corefile.
368

369
        >>> core = io.corefile
370
        >>> io.close()
371

372
        The corefile can be inspected and read from, and even exposes various mappings
373

374
        >>> core.exe # doctest: +ELLIPSIS
375
        Mapping('.../bin/bash-static', start=..., stop=..., size=..., flags=..., page_offset=...)
376
        >>> core.exe.data[0:4]
377
        b'\x7fELF'
378

379
        It also supports all of the features of :class:`ELF`, so you can :meth:`pwnlib.elf.elf.ELF.read`
380
        or :meth:`pwnlib.elf.elf.ELF.write` or even the helpers like :meth:`pwnlib.elf.elf.ELF.pack` or :meth:`pwnlib.elf.elf.ELF.unpack`.
381

382
        Don't forget to call :meth:`.ELF.save` to save the changes to disk.
383

384
        >>> core.read(elf.address, 4)
385
        b'\x7fELF'
386
        >>> core.pack(core.sp, 0xdeadbeef)
387
        >>> core.save()
388

389
        Let's re-load it as a new :attr:`Corefile` object and have a look!
390

391
        >>> core2 = Corefile(core.path)
392
        >>> hex(core2.unpack(core2.sp))
393
        '0xdeadbeef'
394

395
        Various other mappings are available by name, for the first segment of:
396

397
        * :attr:`.exe` the executable
398
        * :attr:`.libc` the loaded libc, if any
399
        * :attr:`.stack` the stack mapping
400
        * :attr:`.vvar`
401
        * :attr:`.vdso`
402
        * :attr:`.vsyscall`
403

404
        On Linux, 32-bit Intel binaries should have a VDSO section via :attr:`vdso`.  
405
        Since our ELF is statically linked, there is no libc which gets mapped.
406

407
        >>> core.vdso.data[:4]
408
        b'\x7fELF'
409
        >>> core.libc
410

411
        But if we dump a corefile from a dynamically-linked binary, the :attr:`.libc`
412
        will be loaded.
413

414
        >>> process('bash').corefile.libc # doctest: +ELLIPSIS
415
        Mapping('.../libc...so...', start=0x..., stop=0x..., size=0x..., flags=..., page_offset=...)
416

417
        The corefile also contains a :attr:`.stack` property, which gives
418
        us direct access to the stack contents.  On Linux, the very top of the stack
419
        should contain two pointer-widths of NULL bytes, preceded by the NULL-
420
        terminated path to the executable (as passed via the first arg to ``execve``).
421

422
        >>> core.stack # doctest: +ELLIPSIS
423
        Mapping('[stack]', start=0x..., stop=0x..., size=0x..., flags=0x6, page_offset=0x0)
424

425
        When creating a process, the kernel puts the absolute path of the binary and some
426
        padding bytes at the end of the stack.  We can look at those by looking at 
427
        ``core.stack.data``.
428

429
        >>> size = len('/bin/bash-static') + 8
430
        >>> core.stack.data[-size:]
431
        b'bin/bash-static\x00\x00\x00\x00\x00\x00\x00\x00\x00'
432

433
        We can also directly access the environment variables and arguments, via
434
        :attr:`.argc`, :attr:`.argv`, and :attr:`.env`.
435

436
        >>> 'HELLO' in core.env
437
        True
438
        >>> core.string(core.env['HELLO'])
439
        b'WORLD'
440
        >>> core.getenv('HELLO')
441
        b'WORLD'
442
        >>> core.argc
443
        1
444
        >>> core.argv[0] in core.stack
445
        True
446
        >>> core.string(core.argv[0]) # doctest: +ELLIPSIS
447
        b'.../bin/bash-static'
448

449
        Corefiles can also be pulled from remote machines via SSH!
450

451
        >>> s = ssh(user='travis', host='example.pwnme', password='demopass')
452
        >>> _ = s.set_working_directory()
453
        >>> elf = ELF.from_assembly(shellcraft.trap())
454
        >>> path = s.upload(elf.path)
455
        >>> _ =s.chmod('+x', path)
456
        >>> io = s.process(path)
457
        >>> io.wait(1)
458
        -1
459
        >>> io.corefile.signal == signal.SIGTRAP # doctest: +SKIP
460
        True
461

462
        Make sure fault_addr synthesis works for amd64 on ret.
463

464
        >>> context.clear(arch='amd64')
465
        >>> elf = ELF.from_assembly('push 1234; ret')
466
        >>> io = elf.process()
467
        >>> io.wait(1)
468
        >>> io.corefile.fault_addr
469
        1234
470

471
        Corefile.getenv() works correctly, even if the environment variable's
472
        value contains embedded '='. Corefile is able to find the stack, even
473
        if the stack pointer doesn't point at the stack.
474

475
        >>> elf = ELF.from_assembly(shellcraft.crash())
476
        >>> io = elf.process(env={'FOO': 'BAR=BAZ'})
477
        >>> io.wait(1)
478
        >>> core = io.corefile
479
        >>> core.getenv('FOO')
480
        b'BAR=BAZ'
481
        >>> core.sp == 0
482
        True
483
        >>> core.sp in core.stack
484
        False
485

486
        Corefile gracefully handles the stack being filled with garbage, including
487
        argc / argv / envp being overwritten.
488

489
        >>> context.clear(arch='i386')
490
        >>> assembly = '''
491
        ... LOOP:
492
        ...   mov dword ptr [esp], 0x41414141
493
        ...   pop eax
494
        ...   jmp LOOP
495
        ... '''
496
        >>> elf = ELF.from_assembly(assembly)
497
        >>> io = elf.process()
498
        >>> io.wait(2)
499
        >>> core = io.corefile
500
        >>> core.argc, core.argv, core.env
501
        (0, [], {})
502
        >>> core.stack.data.endswith(b'AAAA')
503
        True
504
        >>> core.fault_addr == core.sp
505
        True
506
    """
507

508
    _fill_gaps = False
1✔
509

510
    def __init__(self, *a, **kw):
1✔
511
        #: The NT_PRSTATUS object.
512
        self.prstatus = None
1✔
513

514
        #: The NT_PRPSINFO object
515
        self.prpsinfo = None
1✔
516

517
        #: The NT_SIGINFO object
518
        self.siginfo = None
1✔
519

520
        #: :class:`list`: A list of :class:`.Mapping` objects for each loaded memory region
521
        self.mappings = []
1✔
522

523
        #: :class:`int`: A :class:`Mapping` corresponding to the stack
524
        self.stack    = None
1✔
525

526
        """
1✔
527
        Environment variables read from the stack.
528
        Keys are the environment variable name, values are the memory 
529
        address of the variable.
530
        
531
        Use :meth:`.getenv` or :meth:`.string` to retrieve the textual value.
532
        
533
        Note: If ``FOO=BAR`` is in the environment, ``self.env['FOO']`` is the address of the string ``"BAR\x00"``.
534
        """
535
        self.env = {}
1✔
536

537
        #: :class:`int`: Pointer to envp on the stack
538
        self.envp_address = 0
1✔
539

540
        #: :class:`list`: List of addresses of arguments on the stack.
541
        self.argv = []
1✔
542

543
        #: :class:`int`: Pointer to argv on the stack
544
        self.argv_address = 0
1✔
545

546
        #: :class:`int`: Number of arguments passed
547
        self.argc = 0
1✔
548

549
        #: :class:`int`: Pointer to argc on the stack
550
        self.argc_address = 0
1✔
551

552
        # Pointer to the executable filename on the stack
553
        self.at_execfn = 0
1✔
554

555
        # Pointer to the entry point
556
        self.at_entry = 0
1✔
557

558
        # Pointer to the vdso
559
        self.at_sysinfo_ehdr = None
1✔
560

561
        try:
1✔
562
            super(Corefile, self).__init__(*a, **kw)
1✔
563
        except IOError:
×
564
            log.warning("No corefile.  Have you set /proc/sys/kernel/core_pattern?")
×
565
            raise
×
566

567
        self.load_addr = 0
1✔
568
        self._address  = 0
1✔
569

570
        if self.elftype != 'CORE':
1!
NEW
571
            log.error("%s is not a valid corefile" % self.file.name)
×
572

573
        if self.arch not in prstatus_types:
1!
NEW
574
            log.warn_once("%s does not use a supported corefile architecture, registers are unavailable" % self.file.name)
×
575

576
        prstatus_type = prstatus_types.get(self.arch)
1✔
577
        siginfo_type = siginfo_types.get(self.bits)
1✔
578

579
        with log.waitfor("Parsing corefile...") as w:
1✔
580
            self._load_mappings()
1✔
581

582
            for segment in self.segments:
1✔
583
                if not isinstance(segment, elftools.elf.segments.NoteSegment):
1✔
584
                    continue
1✔
585

586

587
                for note in segment.iter_notes():
1✔
588
                    # Try to find NT_PRSTATUS.
589
                    if note.n_type == 'NT_PRSTATUS':
1✔
590
                        self.NT_PRSTATUS = note
1✔
591
                        self.prstatus = prstatus_type.from_buffer_copy(note.n_desc)
1✔
592

593
                    # Try to find NT_PRPSINFO
594
                    if note.n_type == 'NT_PRPSINFO':
1✔
595
                        self.NT_PRPSINFO = note
1✔
596
                        self.prpsinfo = note.n_desc
1✔
597

598
                    # Try to find NT_SIGINFO so we can see the fault
599
                    if note.n_type == 'NT_SIGINFO':
1✔
600
                        self.NT_SIGINFO = note
1✔
601
                        self.siginfo = siginfo_type.from_buffer_copy(note.n_desc)
1✔
602

603
                    # Try to find the list of mapped files
604
                    if note.n_type == 'NT_FILE':
1✔
605
                        with context.local(bytes=self.bytes):
1✔
606
                            self._parse_nt_file(note)
1✔
607

608
                    # Try to find the auxiliary vector, which will tell us
609
                    # where the top of the stack is.
610
                    if note.n_type == 'NT_AUXV':
1✔
611
                        self.NT_AUXV = note
1✔
612
                        with context.local(bytes=self.bytes):
1✔
613
                            self._parse_auxv(note)
1✔
614

615
            if not self.stack and self.mappings:
1!
616
                self.stack = self.mappings[-1].stop
×
617
                if self.mappings[-1].start == 0xffffffffff600000 and len(self.mappings) > 1:
×
618
                    self.stack = self.mappings[-2].stop
×
619

620
            if self.stack and self.mappings:
1!
621
                for mapping in self.mappings:
1!
622
                    if self.stack in mapping or self.stack == mapping.stop:
1✔
623
                        mapping.name = '[stack]'
1✔
624
                        self.stack   = mapping
1✔
625
                        break
1✔
626
                else:
627
                    log.warn('Could not find the stack!')
×
628
                    self.stack = None
×
629

630
            with context.local(bytes=self.bytes):
1✔
631
                try:
1✔
632
                    self._parse_stack()
1✔
633
                except ValueError:
×
634
                    # If there are no environment variables, we die by running
635
                    # off the end of the stack.
636
                    pass
×
637

638
            # Corefiles generated by QEMU do not have a name for the 
639
            # main module mapping.
640
            # Fetching self.exe will cause this to be auto-populated,
641
            # and is a no-op in other cases.
642
            self.exe
1✔
643

644
            # Print out the nice display for the user
645
            self._describe_core()
1✔
646

647
    def _parse_nt_file(self, note):
1✔
648
        starts = []
1✔
649
        addresses = {}
1✔
650

651
        for vma, filename in zip(note.n_desc.Elf_Nt_File_Entry, note.n_desc.filename):
1✔
652
            if not isinstance(filename, str):
1!
653
                filename = filename.decode('utf-8', 'surrogateescape')
1✔
654
            for mapping in self.mappings:
1✔
655
                if mapping.start == vma.vm_start:
1✔
656
                    mapping.name = filename
1✔
657
                    mapping.page_offset = vma.page_offset
1✔
658

659
        self.mappings = sorted(self.mappings, key=lambda m: m.start)
1✔
660

661
        vvar = vdso = vsyscall = False
1✔
662
        for mapping in reversed(self.mappings):
1✔
663
            if mapping.name:
1✔
664
                continue
1✔
665

666
            if not vsyscall and mapping.start == 0xffffffffff600000:
1✔
667
                mapping.name = '[vsyscall]'
1✔
668
                vsyscall = True
1✔
669
                continue
1✔
670

671
            if mapping.start == self.at_sysinfo_ehdr \
1✔
672
            or (not vdso and mapping.size in [0x1000, 0x2000]
673
                and mapping.flags == 5
674
                and self.read(mapping.start, 4) == b'\x7fELF'):
675
                mapping.name = '[vdso]'
1✔
676
                vdso = True
1✔
677
                continue
1✔
678

679
            if not vvar and mapping.size == 0x2000 and mapping.flags == 4:
1!
680
                mapping.name = '[vvar]'
×
681
                vvar = True
×
682
                continue
×
683

684
    @property
1✔
685
    def vvar(self):
1✔
686
        """:class:`Mapping`: Mapping for the vvar section"""
687
        for m in self.mappings:
×
688
            if m.name == '[vvar]':
×
689
                return m
×
690

691
    @property
1✔
692
    def vdso(self):
1✔
693
        """:class:`Mapping`: Mapping for the vdso section"""
694
        for m in self.mappings:
1!
695
            if m.name == '[vdso]':
1✔
696
                return m
1✔
697

698
    @property
1✔
699
    def vsyscall(self):
1✔
700
        """:class:`Mapping`: Mapping for the vsyscall section"""
701
        for m in self.mappings:
×
702
            if m.name == '[vsyscall]':
×
703
                return m
×
704

705
    @property
1✔
706
    def libc(self):
1✔
707
        """:class:`Mapping`: First mapping for ``libc.so``"""
708
        expr = r'^libc\b.*so(?:\.6)?$'
1✔
709

710
        for m in self.mappings:
1✔
711
            if not m.name:
1✔
712
                continue
1✔
713

714
            basename = os.path.basename(m.name)
1✔
715

716
            if re.match(expr, basename):
1✔
717
                return m
1✔
718

719
    @property
1✔
720
    def exe(self):
1✔
721
        """:class:`Mapping`: First mapping for the executable file."""
722

723
        # Finding the executable mapping requires knowing the entry point
724
        # from the auxv
725
        if not self.at_entry:
1!
726
            return None
×
727

728
        # The entry point may not be in the first segment of a given file,
729
        # but we want to find the first segment of the file -- not the segment that 
730
        # contains the entrypoint.
731
        first_segment_for_name = {}
1✔
732

733
        for m in self.mappings:
1✔
734
            first_segment_for_name.setdefault(m.name, m)
1✔
735

736
        # Find which segment conains the entry point
737
        for m in self.mappings:
1!
738
            if m.start <= self.at_entry < m.stop:
1✔
739

740
                if not m.name and self.at_execfn:
1✔
741
                    m.name = self.string(self.at_execfn)
1✔
742
                    if not isinstance(m.name, str):
1!
743
                        m.name = m.name.decode('utf-8')
1✔
744

745
                return first_segment_for_name.get(m.name, m)
1✔
746

747
    @property
1✔
748
    def pid(self):
1✔
749
        """:class:`int`: PID of the process which created the core dump."""
750
        if self.prstatus:
1!
751
            return int(self.prstatus.pr_pid)
1✔
752

753
    @property
1✔
754
    def ppid(self):
1✔
755
        """:class:`int`: Parent PID of the process which created the core dump."""
756
        if self.prstatus:
×
757
            return int(self.prstatus.pr_ppid)
×
758

759
    @property
1✔
760
    def signal(self):
1✔
761
        """:class:`int`: Signal which caused the core to be dumped.
762

763
        Example:
764

765
            >>> elf = ELF.from_assembly(shellcraft.trap())
766
            >>> io = elf.process()
767
            >>> io.wait(1)
768
            >>> io.corefile.signal == signal.SIGTRAP
769
            True
770

771
            >>> elf = ELF.from_assembly(shellcraft.crash())
772
            >>> io = elf.process()
773
            >>> io.wait(1)
774
            >>> io.corefile.signal == signal.SIGSEGV
775
            True
776
        """
777
        if self.siginfo:
1✔
778
            return int(self.siginfo.si_signo)
1✔
779
        if self.prstatus:
1!
780
            return int(self.prstatus.pr_cursig)
1✔
781

782
    @property
1✔
783
    def fault_addr(self):
1✔
784
        """:class:`int`: Address which generated the fault, for the signals
785
            SIGILL, SIGFPE, SIGSEGV, SIGBUS.  This is only available in native
786
            core dumps created by the kernel.  If the information is unavailable,
787
            this returns the address of the instruction pointer.
788

789

790
        Example:
791

792
            >>> elf = ELF.from_assembly('mov eax, 0xdeadbeef; jmp eax', arch='i386')
793
            >>> io = elf.process()
794
            >>> io.wait(1)
795
            >>> io.corefile.fault_addr == io.corefile.eax == 0xdeadbeef
796
            True
797
        """
798
        if not self.siginfo:
1✔
799
            return getattr(self, 'pc', 0)
1✔
800

801
        fault_addr = int(self.siginfo.sigfault_addr)
1✔
802

803
        # The fault_addr is zero if the crash occurs due to a
804
        # "protection fault", e.g. a dereference of 0x4141414141414141
805
        # because this is technically a kernel address.
806
        #
807
        # A protection fault does not set "fault_addr" in the siginfo.
808
        # (http://elixir.free-electrons.com/linux/v4.14-rc8/source/kernel/signal.c#L1052)
809
        #
810
        # Since a common use for corefiles is to spray the stack with a
811
        # cyclic pattern to find the offset to get control of $PC,
812
        # check for a "ret" instruction ("\xc3").
813
        #
814
        # If we find a RET at $PC, extract the "return address" from the
815
        # top of the stack.
816
        if fault_addr == 0 and self.siginfo.si_code == 0x80:
1✔
817
            try:
1✔
818
                code = self.read(self.pc, 1)
1✔
819
                RET = b'\xc3'
1✔
820
                if code == RET:
1!
821
                    fault_addr = self.unpack(self.sp)
×
822
            except Exception:
×
823
                # Could not read $rsp or $rip
824
                pass
×
825

826
        return fault_addr
1✔
827

828
        # No embedded siginfo structure, so just return the
829
        # current instruction pointer.
830

831
    @property
1✔
832
    def _pc_register(self):
1✔
833
        name = {
1✔
834
            'i386': 'eip',
835
            'amd64': 'rip',
836
        }.get(self.arch, 'pc')
837
        return name
1✔
838

839
    @property
1✔
840
    def pc(self):
1✔
841
        """:class:`int`: The program counter for the Corefile
842

843
        This is a cross-platform way to get e.g. ``core.eip``, ``core.rip``, etc.
844
        """
845
        return self.registers.get(self._pc_register, None)
1✔
846

847
    @property
1✔
848
    def _sp_register(self):
1✔
849
        name = {
1✔
850
            'i386': 'esp',
851
            'amd64': 'rsp',
852
        }.get(self.arch, 'sp')
853
        return name
1✔
854

855
    @property
1✔
856
    def sp(self):
1✔
857
        """:class:`int`: The stack pointer for the Corefile
858

859
        This is a cross-platform way to get e.g. ``core.esp``, ``core.rsp``, etc.
860
        """
861
        return self.registers.get(self._sp_register, None)
1✔
862

863
    def _describe(self):
1✔
864
        pass
1✔
865

866
    def _describe_core(self):
1✔
867
        gnu_triplet = '-'.join(map(str, (self.arch, self.bits, self.endian)))
1✔
868

869
        fields = [
1✔
870
            repr(self.path),
871
            '%-10s %s' % ('Arch:', gnu_triplet),
872
            '%-10s %#x' % ('%s:' % self._pc_register.upper(), self.pc or 0),
873
            '%-10s %#x' % ('%s:' % self._sp_register.upper(), self.sp or 0),
874
        ]
875

876
        if self.exe and self.exe.name:
1!
877
            fields += [
1✔
878
                '%-10s %s' % ('Exe:', '%r (%#x)' % (self.exe.name, self.exe.address))
879
            ]
880

881
        if self.fault_addr:
1✔
882
            fields += [
1✔
883
                '%-10s %#x' % ('Fault:', self.fault_addr)
884
            ]
885

886
        log.info_once('\n'.join(fields))
1✔
887

888
    def _load_mappings(self):
1✔
889
        for s in self.segments:
1✔
890
            if s.header.p_type != 'PT_LOAD':
1✔
891
                continue
1✔
892

893
            mapping = Mapping(self,
1✔
894
                              None,
895
                              s.header.p_vaddr,
896
                              s.header.p_vaddr + s.header.p_memsz,
897
                              s.header.p_flags,
898
                              None)
899
            self.mappings.append(mapping)
1✔
900

901
    def _parse_auxv(self, note):
1✔
902
        t = tube()
1✔
903
        t.unrecv(note.n_desc)
1✔
904

905
        for i in range(0, note.n_descsz, context.bytes * 2):
1✔
906
            key = t.unpack()
1✔
907
            value = t.unpack()
1✔
908

909
            # The AT_EXECFN entry is a pointer to the executable's filename
910
            # at the very top of the stack, followed by a word's with of
911
            # NULL bytes.  For example, on a 64-bit system...
912
            #
913
            # 0x7fffffffefe8  53 3d 31 34  33 00 2f 62  69 6e 2f 62  61 73 68 00  |S=14|3./b|in/b|ash.|
914
            # 0x7fffffffeff8  00 00 00 00  00 00 00 00                            |....|....|    |    |
915

916
            if key == constants.AT_EXECFN:
1✔
917
                self.at_execfn = value
1✔
918
                value = value & ~0xfff
1✔
919
                value += 0x1000
1✔
920
                self.stack = value
1✔
921

922
            if key == constants.AT_ENTRY:
1✔
923
                self.at_entry = value
1✔
924

925
            if key == constants.AT_PHDR:
1✔
926
                self.at_phdr = value
1✔
927

928
            if key == constants.AT_BASE:
1✔
929
                self.at_base = value
1✔
930

931
            if key == constants.AT_SYSINFO_EHDR:
1✔
932
                self.at_sysinfo_ehdr = value
1✔
933

934
    def _parse_stack(self):
1✔
935
        # Get a copy of the stack mapping
936
        stack = self.stack
1✔
937

938
        if not stack:
1!
939
            return
×
940

941
        # If the stack does not end with zeroes, something is very wrong.
942
        if not stack.data.endswith(b'\x00' * context.bytes):
1✔
943
            log.warn_once("End of the stack is corrupted, skipping stack parsing (got: %s)",
1✔
944
                          enhex(self.data[-context.bytes:]))
945
            return
1✔
946

947
        # AT_EXECFN is the start of the filename, e.g. '/bin/sh'
948
        # Immediately preceding is a NULL-terminated environment variable string.
949
        # We want to find the beginning of it
950
        if not self.at_execfn:
1!
951
            address = stack.stop
×
952
            address -= 2*self.bytes
×
953
            address -= 1
×
954
            address = stack.rfind(b'\x00', None, address)
×
955
            address += 1
×
956
            self.at_execfn = address
×
957

958
        address = self.at_execfn-1
1✔
959

960

961
        # Sanity check!
962
        try:
1✔
963
            if stack[address] != b'\x00':
1!
964
                log.warning("Error parsing corefile stack: Could not find end of environment")
×
965
                return
×
966
        except ValueError:
×
967
            log.warning("Error parsing corefile stack: Address out of bounds")
×
968
            return
×
969

970
        # address is currently set to the NULL terminator of the last
971
        # environment variable.
972
        address = stack.rfind(b'\x00', None, address)
1✔
973

974
        # We've found the beginning of the last environment variable.
975
        # We should be able to search up the stack for the envp[] array to
976
        # find a pointer to this address, followed by a NULL.
977
        last_env_addr = address + 1
1✔
978
        p_last_env_addr = stack.find(pack(last_env_addr), None, last_env_addr)
1✔
979
        if p_last_env_addr < 0:
1!
980
            # Something weird is happening.  Just don't touch it.
981
            log.warn_once("Error parsing corefile stack: Found bad environment at %#x", last_env_addr)
×
982
            return
×
983

984
        # Sanity check that we did correctly find the envp NULL terminator.
985
        envp_nullterm = p_last_env_addr+context.bytes
1✔
986
        if self.unpack(envp_nullterm) != 0:
1!
987
            log.warning("Error parsing corefile stack: Could not find end of environment variables")
×
988
            return
×
989

990
        # We've successfully located the end of the envp[] array.
991
        #
992
        # It comes immediately after the argv[] array, which itself
993
        # is NULL-terminated.
994
        #
995
        # Now let's find the end of argv
996
        p_end_of_argv = stack.rfind(pack(0), None, p_last_env_addr)
1✔
997

998
        self.envp_address = p_end_of_argv + self.bytes
1✔
999

1000
        # Now we can fill in the environment
1001
        env_pointer_data = stack[self.envp_address:p_last_env_addr+self.bytes]
1✔
1002
        for pointer in unpack_many(env_pointer_data):
1✔
1003

1004
            # If the stack is corrupted, the pointer will be outside of
1005
            # the stack.
1006
            if pointer not in stack:
1!
1007
                continue
×
1008

1009
            try:
1✔
1010
                name_value = self.string(pointer)
1✔
1011
            except Exception:
×
1012
                continue
×
1013

1014
            name, _ = name_value.split(b'=', 1)
1✔
1015

1016
            # "end" points at the byte after the null terminator
1017
            end = pointer + len(name_value) + 1
1✔
1018

1019
            # Do not mark things as environment variables if they point
1020
            # outside of the stack itself, or we had to cross into a different
1021
            # mapping (after the stack) to read it.
1022
            # This may occur when the entire stack is filled with non-NUL bytes,
1023
            # and we NULL-terminate on a read failure in .string().
1024
            if end not in stack:
1!
1025
                continue
×
1026

1027
            if not isinstance(name, str):
1!
1028
                name = name.decode('utf-8', 'surrogateescape')
1✔
1029
            self.env[name] = pointer + len(name) + len('=')
1✔
1030

1031
        # May as well grab the arguments off the stack as well.
1032
        # argc comes immediately before argv[0] on the stack, but
1033
        # we don't know what argc is.
1034
        #
1035
        # It is unlikely that argc is a valid stack address.
1036
        address = p_end_of_argv - self.bytes
1✔
1037
        while self.unpack(address) in stack:
1✔
1038
            address -= self.bytes
1✔
1039

1040
        # address now points at argc
1041
        self.argc_address = address
1✔
1042
        self.argc = self.unpack(self.argc_address)
1✔
1043

1044
        # we can extract all of the arguments as well
1045
        self.argv_address = self.argc_address + self.bytes
1✔
1046
        self.argv = unpack_many(stack[self.argv_address: p_end_of_argv])
1✔
1047

1048
    @property
1✔
1049
    def maps(self):
1✔
1050
        """:class:`str`: A printable string which is similar to /proc/xx/maps.
1051

1052
        ::
1053

1054
            >>> print(Corefile('./core').maps)
1055
            8048000-8049000 r-xp 1000 /home/user/pwntools/crash
1056
            8049000-804a000 r--p 1000 /home/user/pwntools/crash
1057
            804a000-804b000 rw-p 1000 /home/user/pwntools/crash
1058
            f7528000-f7529000 rw-p 1000 None
1059
            f7529000-f76d1000 r-xp 1a8000 /lib/i386-linux-gnu/libc-2.19.so
1060
            f76d1000-f76d2000 ---p 1000 /lib/i386-linux-gnu/libc-2.19.so
1061
            f76d2000-f76d4000 r--p 2000 /lib/i386-linux-gnu/libc-2.19.so
1062
            f76d4000-f76d5000 rw-p 1000 /lib/i386-linux-gnu/libc-2.19.so
1063
            f76d5000-f76d8000 rw-p 3000 None
1064
            f76ef000-f76f1000 rw-p 2000 None
1065
            f76f1000-f76f2000 r-xp 1000 [vdso]
1066
            f76f2000-f7712000 r-xp 20000 /lib/i386-linux-gnu/ld-2.19.so
1067
            f7712000-f7713000 r--p 1000 /lib/i386-linux-gnu/ld-2.19.so
1068
            f7713000-f7714000 rw-p 1000 /lib/i386-linux-gnu/ld-2.19.so
1069
            fff3e000-fff61000 rw-p 23000 [stack]
1070
        """
1071
        return '\n'.join(map(str, self.mappings))
×
1072

1073
    def getenv(self, name):
1✔
1074
        """getenv(name) -> int
1075

1076
        Read an environment variable off the stack, and return its contents.
1077

1078
        Arguments:
1079
            name(str): Name of the environment variable to read.
1080

1081
        Returns:
1082
            :class:`str`: The contents of the environment variable.
1083

1084
        Example:
1085

1086
            >>> elf = ELF.from_assembly(shellcraft.trap())
1087
            >>> io = elf.process(env={'GREETING': 'Hello!'})
1088
            >>> io.wait(1)
1089
            >>> io.corefile.getenv('GREETING')
1090
            b'Hello!'
1091
        """
1092
        if not isinstance(name, str):
1!
1093
            name = name.decode('utf-8', 'surrogateescape')
×
1094
        if name not in self.env:
1!
1095
            log.error("Environment variable %r not set" % name)
×
1096

1097
        return self.string(self.env[name])
1✔
1098

1099
    @property
1✔
1100
    def registers(self):
1✔
1101
        """:class:`dict`: All available registers in the coredump.
1102

1103
        Example:
1104

1105
            >>> elf = ELF.from_assembly('mov eax, 0xdeadbeef;' + shellcraft.trap(), arch='i386')
1106
            >>> io = elf.process()
1107
            >>> io.wait(1)
1108
            >>> io.corefile.registers['eax'] == 0xdeadbeef
1109
            True
1110
        """
1111
        if not self.prstatus:
1!
1112
            return {}
×
1113

1114
        rv = {}
1✔
1115

1116
        for k in dir(self.prstatus.pr_reg):
1✔
1117
            if k.startswith('_'):
1✔
1118
                continue
1✔
1119

1120
            try:
1✔
1121
                rv[k] = int(getattr(self.prstatus.pr_reg, k))
1✔
1122
            except Exception:
×
1123
                pass
×
1124

1125
        return rv
1✔
1126

1127
    def debug(self):
1✔
1128
        """Open the corefile under a debugger."""
1129
        import pwnlib.gdb
×
1130
        pwnlib.gdb.attach(self, exe=self.exe.path)
×
1131

1132
    def __getattr__(self, attribute):
1✔
1133
        if attribute.startswith('_') or not self.prstatus:
1!
1134
            raise AttributeError(attribute)
×
1135

1136
        if hasattr(self.prstatus, attribute):
1!
1137
            return getattr(self.prstatus, attribute)
×
1138

1139
        return getattr(self.prstatus.pr_reg, attribute)
1✔
1140

1141
    # Override routines which don't make sense for Corefiles
1142
    def _populate_got(*a): pass
1✔
1143
    def _populate_plt(*a): pass
1✔
1144

1145
class Core(Corefile):
1✔
1146
    """Alias for :class:`.Corefile`"""
1147

1148
class Coredump(Corefile):
1✔
1149
    """Alias for :class:`.Corefile`"""
1150

1151
class CorefileFinder(object):
1✔
1152
    def __init__(self, proc):
1✔
1153
        if proc.poll() is None:
1!
1154
            log.error("Process %i has not exited" % (proc.pid))
×
1155

1156
        self.process = proc
1✔
1157
        self.pid = proc.pid
1✔
1158
        self.uid = proc.suid
1✔
1159
        self.gid = proc.sgid
1✔
1160
        self.exe = proc.executable
1✔
1161
        self.basename = os.path.basename(self.exe)
1✔
1162
        self.cwd = proc.cwd
1✔
1163

1164
        # XXX: Should probably break out all of this logic into
1165
        #      its own class, so that we can support "file ops"
1166
        #      locally, via SSH, and over ADB, in a transparent way.
1167
        if isinstance(proc, process):
1!
1168
            self.read = read
1✔
1169
            self.unlink = os.unlink
1✔
1170
        elif isinstance(proc, ssh_channel):
×
1171
            self.read = proc.parent.read
×
1172
            self.unlink = proc.parent.unlink
×
1173

1174
        self.kernel_core_pattern = self.read('/proc/sys/kernel/core_pattern').strip()
1✔
1175
        self.kernel_core_uses_pid = bool(int(self.read('/proc/sys/kernel/core_uses_pid')))
1✔
1176

1177
        log.debug("core_pattern: %r" % self.kernel_core_pattern)
1✔
1178
        log.debug("core_uses_pid: %r" % self.kernel_core_uses_pid)
1✔
1179

1180
        self.interpreter = self.binfmt_lookup()
1✔
1181

1182
        log.debug("interpreter: %r" % self.interpreter)
1✔
1183

1184
        # If we have already located the corefile, we will
1185
        # have renamed it to 'core.<pid>'
1186
        core_path = 'core.%i' % (proc.pid)
1✔
1187
        self.core_path = None
1✔
1188

1189
        if os.path.isfile(core_path):
1✔
1190
            log.debug("Found core immediately: %r" % core_path)
1✔
1191
            self.core_path = core_path
1✔
1192

1193
        # Try QEMU first, since it's unlikely to be a false-positive unless
1194
        # there is a PID *and* filename collision.
1195
        if not self.core_path:
1✔
1196
            log.debug("Looking for QEMU corefile")
1✔
1197
            self.core_path = self.qemu_corefile()
1✔
1198

1199
        # Check for native coredumps as a last resort
1200
        if not self.core_path:
1✔
1201
            log.debug("Looking for native corefile")
1✔
1202
            self.core_path = self.native_corefile()
1✔
1203

1204
        if not self.core_path:
1!
1205
            return
×
1206

1207
        core_pid = self.load_core_check_pid()
1✔
1208

1209
        # Move the corefile if we're configured that way
1210
        if context.rename_corefiles:
1!
1211
            new_path = 'core.%i' % core_pid
1✔
1212
            if core_pid > 0 and new_path != self.core_path:
1✔
1213
                write(new_path, self.read(self.core_path))
1✔
1214
                try:
1✔
1215
                    self.unlink(self.core_path)
1✔
1216
                except (IOError, OSError):
×
1217
                    log.warn("Could not delete %r" % self.core_path)
×
1218
                self.core_path = new_path
1✔
1219

1220
        # Check the PID
1221
        if core_pid != self.pid:
1!
1222
            log.warn("Corefile PID does not match! (got %i)" % core_pid)
×
1223

1224
        # Register the corefile for removal only if it's an exact match
1225
        elif context.delete_corefiles:
1!
1226
            atexit.register(lambda: os.unlink(self.core_path))
×
1227

1228

1229
    def load_core_check_pid(self):
1✔
1230
        """Test whether a Corefile matches our process
1231

1232
        Speculatively load a Corefile without informing the user, so that we
1233
        can check if it matches the process we're looking for.
1234

1235
        Arguments:
1236
            path(str): Path to the corefile on disk
1237

1238
        Returns:
1239
            `bool`: ``True`` if the Corefile matches, ``False`` otherwise.
1240
        """
1241

1242
        try:
1✔
1243
            with context.quiet:
1✔
1244
                with tempfile.NamedTemporaryFile() as tmp:
1✔
1245
                    tmp.write(self.read(self.core_path))
1✔
1246
                    tmp.flush()
1✔
1247
                    return Corefile(tmp.name).pid
1✔
1248
        except Exception:
×
1249
            pass
×
1250

1251
        return -1
×
1252

1253
    def apport_corefile(self):
1✔
1254
        """Find the apport crash for the process, and extract the core file.
1255

1256
        Arguments:
1257
            process(process): Process object we're looking for.
1258

1259
        Returns:
1260
            `str`: Raw core file contents
1261
        """
1262
        crash_data = self.apport_read_crash_data()
×
1263

1264
        log.debug("Apport Crash Data:\n%s" % crash_data)
×
1265

1266
        if crash_data:
×
1267
            return self.apport_crash_extract_corefile(crash_data)
×
1268

1269
    def apport_crash_extract_corefile(self, crashfile_data):
1✔
1270
        """Extract a corefile from an apport crash file contents.
1271

1272
        Arguments:
1273
            crashfile_data(str): Crash file contents
1274

1275
        Returns:
1276
            `str`: Raw binary data for the core file, or ``None``.
1277
        """
1278
        file = StringIO(crashfile_data)
×
1279

1280
        # Find the pid of the crashfile
1281
        for line in file:
×
1282
            if line.startswith(' Pid:'):
×
1283
                pid = int(line.split()[-1])
×
1284

1285
                if pid == self.pid:
×
1286
                    break
×
1287
        else:
1288
            # Could not find a " Pid:" line
1289
            return
×
1290

1291
        # Find the CoreDump section
1292
        for line in file:
×
1293
            if line.startswith('CoreDump: base64'):
×
1294
                break
×
1295
        else:
1296
            # Could not find the coredump data
1297
            return
×
1298

1299
        # Get all of the base64'd lines
1300
        chunks = []
×
1301
        for line in file:
×
1302
            if not line.startswith(' '):
×
1303
                break
×
1304
            chunks.append(b64d(line))
×
1305

1306
        # Smush everything together, then extract it
1307
        compressed_data = b''.join(chunks)
×
1308
        compressed_file = BytesIO(compressed_data)
×
1309
        gzip_file = gzip.GzipFile(fileobj=compressed_file)
×
1310
        core_data = gzip_file.read()
×
1311

1312
        return core_data
×
1313

1314
    def apport_read_crash_data(self):
1✔
1315
        """Find the apport crash for the process
1316

1317
        Returns:
1318
            `str`: Raw contents of the crash file or ``None``.
1319
        """
1320
        uid = self.uid
×
1321
        crash_name = self.exe.replace('/', '_')
×
1322

1323
        crash_path = '/var/crash/%s.%i.crash' % (crash_name, uid)
×
1324

1325
        try:
×
1326
            log.debug("Looking for Apport crash at %r" % crash_path)
×
1327
            data = self.read(crash_path)
×
1328
        except Exception:
×
1329
            return None
×
1330

1331
        # Remove the crash file, so that future crashes will be captured
1332
        try:
×
1333
            self.unlink(crash_path)
×
1334
        except Exception:
×
1335
            pass
×
1336

1337
        # Convert bytes-like object to string
1338
        if isinstance(data, bytes):
×
1339
            data = data.decode('utf-8')
×
1340

1341
        return data
×
1342

1343
    def systemd_coredump_corefile(self):
1✔
1344
        """Find the systemd-coredump crash for the process and dump it to a file.
1345

1346
        Arguments:
1347
            process(process): Process object we're looking for.
1348

1349
        Returns:
1350
            `str`: Filename of core file, if coredump was found.
1351
        """
1352
        filename = "core.%s.%i.coredumpctl" % (self.basename, self.pid)
1✔
1353
        try:
1✔
1354
            subprocess.check_call(
1✔
1355
                [
1356
                    "coredumpctl",
1357
                    "dump",
1358
                    "--output=%s" % filename,
1359
                    # Filter coredump by pid
1360
                    str(self.pid),
1361
                ],
1362
                stdout=open(os.devnull, 'w'),
1363
                stderr=subprocess.STDOUT,
1364
                shell=False,
1365
            )
1366
            return filename
1✔
1367
        except subprocess.CalledProcessError as e:
×
1368
            log.debug("coredumpctl failed with status: %d" % e.returncode)
×
1369

1370
    def native_corefile(self):
1✔
1371
        """Find the corefile for a native crash.
1372

1373
        Arguments:
1374
            process(process): Process whose crash we should find.
1375

1376
        Returns:
1377
            `str`: Filename of core file.
1378
        """
1379
        if self.kernel_core_pattern.startswith(b'|'):
1!
1380
            log.debug("Checking for corefile (piped)")
1✔
1381
            return self.native_corefile_pipe()
1✔
1382

1383
        log.debug("Checking for corefile (pattern)")
×
1384
        return self.native_corefile_pattern()
×
1385

1386
    def native_corefile_pipe(self):
1✔
1387
        """Find the corefile for a piped core_pattern
1388

1389
        Supports apport and systemd-coredump.
1390

1391
        Arguments:
1392
            process(process): Process whose crash we should find.
1393

1394
        Returns:
1395
            `str`: Filename of core file.
1396
        """
1397
        if b'/apport' in self.kernel_core_pattern:
1!
1398
            log.debug("Found apport in core_pattern")
×
1399
            apport_core = self.apport_corefile()
×
1400

1401
            if apport_core:
×
1402
                # Write the corefile to the local directory
1403
                filename = 'core.%s.%i.apport' % (self.basename, self.pid)
×
1404
                with open(filename, 'wb+') as f:
×
1405
                    f.write(apport_core)
×
1406
                return filename
×
1407

1408
            filename = self.apport_coredump()
×
1409
            if filename:
×
1410
                return filename
×
1411

1412
            # Pretend core_pattern was just 'core', and see if we come up with anything
1413
            self.kernel_core_pattern = 'core'
×
1414
            return self.native_corefile_pattern()
×
1415
        elif b'systemd-coredump' in self.kernel_core_pattern:
1!
1416
            log.debug("Found systemd-coredump in core_pattern")
1✔
1417
            return self.systemd_coredump_corefile()
1✔
1418
        else:
1419
            log.warn_once("Unsupported core_pattern: %r", self.kernel_core_pattern)
×
1420
            return None
×
1421

1422
    def native_corefile_pattern(self):
1✔
1423
        """
1424
        %%  a single % character
1425
        %c  core file size soft resource limit of crashing process (since Linux 2.6.24)
1426
        %d  dump mode—same as value returned by prctl(2) PR_GET_DUMPABLE (since Linux 3.7)
1427
        %e  executable filename (without path prefix)
1428
        %E  pathname of executable, with slashes ('/') replaced by exclamation marks ('!') (since Linux 3.0).
1429
        %g  (numeric) real GID of dumped process
1430
        %h  hostname (same as nodename returned by uname(2))
1431
        %i  TID of thread that triggered core dump, as seen in the PID namespace in which the thread resides (since Linux 3.18)
1432
        %I  TID of thread that triggered core dump, as seen in the initial PID namespace (since Linux 3.18)
1433
        %p  PID of dumped process, as seen in the PID namespace in which the process resides
1434
        %P  PID of dumped process, as seen in the initial PID namespace (since Linux 3.12)
1435
        %s  number of signal causing dump
1436
        %t  time of dump, expressed as seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC)
1437
        %u  (numeric) real UID of dumped process
1438
        """
1439
        replace = {
×
1440
            '%%': '%',
1441
            '%e': os.path.basename(self.interpreter) or self.basename,
1442
            '%E': self.exe.replace('/', '!'),
1443
            '%g': str(self.gid),
1444
            '%h': socket.gethostname(),
1445
            '%i': str(self.pid),
1446
            '%I': str(self.pid),
1447
            '%p': str(self.pid),
1448
            '%P': str(self.pid),
1449
            '%s': str(-self.process.poll()),
1450
            '%u': str(self.uid)
1451
        }
1452
        replace = dict((re.escape(k), v) for k, v in replace.items())
×
1453
        pattern = re.compile("|".join(replace.keys()))
×
1454
        if not hasattr(self.kernel_core_pattern, 'encode'):
×
1455
            self.kernel_core_pattern = self.kernel_core_pattern.decode('utf-8')
×
1456
        core_pattern = self.kernel_core_pattern
×
1457
        corefile_path = pattern.sub(lambda m: replace[re.escape(m.group(0))], core_pattern)
×
1458

1459
        if self.kernel_core_uses_pid:
×
1460
            corefile_path += '.%i' % self.pid
×
1461

1462
        if os.pathsep not in corefile_path:
×
1463
            corefile_path = os.path.join(self.cwd, corefile_path)
×
1464

1465
        log.debug("Trying corefile_path: %r" % corefile_path)
×
1466

1467
        try:
×
1468
            self.read(corefile_path)
×
1469
            return corefile_path
×
1470
        except Exception as e:
×
1471
            log.debug("No dice: %s" % e)
×
1472

1473
    def qemu_corefile(self):
1✔
1474
        """qemu_corefile() -> str
1475

1476
        Retrieves the path to a QEMU core dump.
1477
        """
1478

1479
        # QEMU doesn't follow anybody else's rules
1480
        # https://github.com/qemu/qemu/blob/stable-2.6/linux-user/elfload.c#L2710-L2744
1481
        #
1482
        #     qemu_<basename-of-target-binary>_<date>-<time>_<pid>.core
1483
        #
1484
        # Note that we don't give any fucks about the date and time, since the PID
1485
        # should be unique enough that we can just glob.
1486
        corefile_name = 'qemu_{basename}_*_{pid}.core'
1✔
1487

1488
        # Format the name
1489
        corefile_name = corefile_name.format(basename=self.basename,
1✔
1490
                                             pid=self.pid)
1491

1492
        # Get the full path
1493
        corefile_path = os.path.join(self.cwd, corefile_name)
1✔
1494

1495
        log.debug("Trying corefile_path: %r" % corefile_path)
1✔
1496

1497
        # Glob all of them, return the *most recent* based on numeric sort order.
1498
        for corefile in sorted(glob.glob(corefile_path), reverse=True):
1✔
1499
            return corefile
1✔
1500

1501
    def apport_coredump(self):
1✔
1502
        """Find new-style apport coredump of executables not belonging
1503
        to a system package
1504
        """
1505
        # Now Ubuntu, which is the most silly distro of all, doesn't follow
1506
        # anybody else's rules either...
1507
        # ...and it uses apport FROM SOME OTHER REPO THAN THE DOCS SAY
1508
        # Hey, thanks for making our lives easier, Canonical :----)
1509
        # Seriously, why is Ubuntu even considered to be the default distro
1510
        # on GH Actions?
1511
        #
1512
        #     core.<_path_to_target_binary>.<uid>.<boot_id>.<pid>.<timestamp>
1513
        #
1514
        # Note that we don't give any fucks about the timestamp, since the PID
1515
        # should be unique enough that we can just glob.
1516

1517
        boot_id = read('/proc/sys/kernel/random/boot_id').strip().decode()
×
1518

1519
        # Use the absolute path of the executable
1520
        # Apport uses the executable's path to determine the core dump filename
1521
        #
1522
        # Reference source:
1523
        # https://github.com/canonical/apport/blob/4bbb179b8f92989bf7c1ee3692074f35d70ef3e8/data/apport#L110
1524
        # https://github.com/canonical/apport/blob/4bbb179b8f92989bf7c1ee3692074f35d70ef3e8/apport/fileutils.py#L599
1525
        #
1526
        # Apport calls `get_core_path` with `options.executable_path`, which corresponds to
1527
        # the executable's pathname, as specified by the `%E` placeholder
1528
        # in the core pattern (see `man core` and `apport --help`).
1529
        path = os.path.abspath(self.exe).replace('/', '_').replace('.', '_')
×
1530

1531
        # Format the name
1532
        corefile_name = 'core.{path}.{uid}.{boot_id}.{pid}.*'.format(
×
1533
            path=path,
1534
            uid=self.uid,
1535
            boot_id=boot_id,
1536
            pid=self.pid,
1537
        )
1538

1539
        # Get the full path
1540
        corefile_path = os.path.join('/var/lib/apport/coredump', corefile_name)
×
1541

1542
        log.debug("Trying corefile_path: %r" % corefile_path)
×
1543

1544
        # Glob all of them, return the *most recent* based on numeric sort order.
1545
        for corefile in sorted(glob.glob(corefile_path), reverse=True):
×
1546
            return corefile
×
1547

1548
    def binfmt_lookup(self):
1✔
1549
        """Parses /proc/sys/fs/binfmt_misc to find the interpreter for a file"""
1550

1551
        binfmt_misc = '/proc/sys/fs/binfmt_misc'
1✔
1552

1553
        if not isinstance(self.process, process):
1!
1554
            log.debug("Not a process")
×
1555
            return ''
×
1556

1557
        if self.process._qemu:
1!
1558
            return self.process._qemu
×
1559

1560
        if not os.path.isdir(binfmt_misc):
1!
1561
            log.debug("No binfmt_misc dir")
×
1562
            return ''
×
1563

1564
        exe_data = bytearray(self.read(self.exe))
1✔
1565

1566
        for entry in os.listdir(binfmt_misc):
1✔
1567
            keys = {}
1✔
1568

1569
            path = os.path.join(binfmt_misc, entry)
1✔
1570

1571
            try:
1✔
1572
                data = self.read(path).decode()
1✔
1573
            except Exception:
1✔
1574
                continue
1✔
1575

1576
            for line in data.splitlines():
1✔
1577
                try:
1✔
1578
                    k,v = line.split(None)
1✔
1579
                except ValueError:
1✔
1580
                    continue
1✔
1581

1582
                keys[k] = v
1✔
1583

1584
            if 'magic' not in keys:
1✔
1585
                continue
1✔
1586

1587
            magic = bytearray(unhex(keys['magic']))
1✔
1588
            mask  = bytearray(b'\xff' * len(magic))
1✔
1589

1590
            if 'mask' in keys:
1✔
1591
                mask = bytearray(unhex(keys['mask']))
1✔
1592

1593
            for i, mag in enumerate(magic):
1✔
1594
                if exe_data[i] & mask[i] != mag:
1✔
1595
                    break
1✔
1596
            else:
1597
                return keys['interpreter']
1✔
1598

1599
        return ''
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc