• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Gallopsled / pwntools / 6cd1b9709b5114b16f40861d2278a4a19c34d94c

01 Nov 2023 10:17PM UTC coverage: 73.399% (+1.9%) from 71.502%
6cd1b9709b5114b16f40861d2278a4a19c34d94c

push

github-actions

Arusekk
shellcraft.aarch64: Fix atexit SEGV in loader

Fixes #2289

3901 of 6416 branches covered (0.0%)

12254 of 16695 relevant lines covered (73.4%)

0.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.81
/pwnlib/elf/corefile.py
1
# -*- coding: utf-8 -*-
2
"""Read information from Core Dumps.
1✔
3

4
Core dumps are extremely useful when writing exploits, even outside of
5
the normal act of debugging things.
6

7
Using Corefiles to Automate Exploitation
8
----------------------------------------
9

10
For example, if you have a trivial buffer overflow and don't want to
11
open up a debugger or calculate offsets, you can use a generated core
12
dump to extract the relevant information.
13

14
.. code-block:: c
15

16
    #include <string.h>
17
    #include <stdlib.h>
18
    #include <unistd.h>
19
    void win() {
20
        system("sh");
21
    }
22
    int main(int argc, char** argv) {
23
        char buffer[64];
24
        strcpy(buffer, argv[1]);
25
    }
26

27
.. code-block:: shell
28

29
    $ gcc crash.c -m32 -o crash -fno-stack-protector
30

31
.. code-block:: python
32

33
    from pwn import *
34

35
    # Generate a cyclic pattern so that we can auto-find the offset
36
    payload = cyclic(128)
37

38
    # Run the process once so that it crashes
39
    process(['./crash', payload]).wait()
40

41
    # Get the core dump
42
    core = Coredump('./core')
43

44
    # Our cyclic pattern should have been used as the crashing address
45
    assert pack(core.eip) in payload
46

47
    # Cool! Now let's just replace that value with the address of 'win'
48
    crash = ELF('./crash')
49
    payload = fit({
50
        cyclic_find(core.eip): crash.symbols.win
51
    })
52

53
    # Get a shell!
54
    io = process(['./crash', payload])
55
    io.sendline(b'id')
56
    print(io.recvline())
57
    # uid=1000(user) gid=1000(user) groups=1000(user)
58

59
Module Members
60
----------------------------------------
61

62
"""
63
from __future__ import absolute_import
1✔
64
from __future__ import division
1✔
65

66
import collections
1✔
67
import ctypes
1✔
68
import glob
1✔
69
import gzip
1✔
70
import re
1✔
71
import os
1✔
72
import socket
1✔
73
import subprocess
1✔
74
import tempfile
1✔
75

76
from io import BytesIO, StringIO
1✔
77

78
import elftools
1✔
79
from elftools.common.utils import roundup
1✔
80
from elftools.common.utils import struct_parse
1✔
81
from elftools.construct import CString
1✔
82

83
from pwnlib import atexit
1✔
84
from pwnlib.context import context
1✔
85
from pwnlib.elf.datatypes import *
1✔
86
from pwnlib.elf.elf import ELF
1✔
87
from pwnlib.log import getLogger
1✔
88
from pwnlib.tubes.process import process
1✔
89
from pwnlib.tubes.ssh import ssh_channel
1✔
90
from pwnlib.tubes.tube import tube
1✔
91
from pwnlib.util.fiddling import b64d
1✔
92
from pwnlib.util.fiddling import enhex
1✔
93
from pwnlib.util.fiddling import unhex
1✔
94
from pwnlib.util.misc import read
1✔
95
from pwnlib.util.misc import write
1✔
96
from pwnlib.util.packing import _decode
1✔
97
from pwnlib.util.packing import pack
1✔
98
from pwnlib.util.packing import unpack_many
1✔
99

100
log = getLogger(__name__)
1✔
101

102
prstatus_types = {
1✔
103
    'i386': elf_prstatus_i386,
104
    'amd64': elf_prstatus_amd64,
105
    'arm': elf_prstatus_arm,
106
    'aarch64': elf_prstatus_aarch64
107
}
108

109
prpsinfo_types = {
1✔
110
    32: elf_prpsinfo_32,
111
    64: elf_prpsinfo_64,
112
}
113

114
siginfo_types = {
1✔
115
    32: elf_siginfo_32,
116
    64: elf_siginfo_64
117
}
118

119
# Slightly modified copy of the pyelftools version of the same function,
120
# until they fix this issue:
121
# https://github.com/eliben/pyelftools/issues/93
122
def iter_notes(self):
1✔
123
    """ Iterates the list of notes in the segment.
124
    """
125
    offset = self['p_offset']
1✔
126
    end = self['p_offset'] + self['p_filesz']
1✔
127
    while offset < end:
1✔
128
        note = struct_parse(
1✔
129
            self.elffile.structs.Elf_Nhdr,
130
            self.stream,
131
            stream_pos=offset)
132
        note['n_offset'] = offset
1✔
133
        offset += self.elffile.structs.Elf_Nhdr.sizeof()
1✔
134
        self.stream.seek(offset)
1✔
135
        # n_namesz is 4-byte aligned.
136
        disk_namesz = roundup(note['n_namesz'], 2)
1✔
137
        with context.local(encoding='latin-1'):
1✔
138
            note['n_name'] = _decode(
1✔
139
                CString('').parse(self.stream.read(disk_namesz)))
140
            offset += disk_namesz
1✔
141

142
            desc_data = _decode(self.stream.read(note['n_descsz']))
1✔
143
            note['n_desc'] = desc_data
1✔
144
        offset += roundup(note['n_descsz'], 2)
1✔
145
        note['n_size'] = offset - note['n_offset']
1✔
146
        yield note
1✔
147

148
class Mapping(object):
1✔
149
    """Encapsulates information about a memory mapping in a :class:`Corefile`.
150
    """
151
    def __init__(self, core, name, start, stop, flags, page_offset):
1✔
152
        self._core=core
1✔
153

154
        #: :class:`str`: Name of the mapping, e.g. ``'/bin/bash'`` or ``'[vdso]'``.
155
        self.name = name or ''
1✔
156

157
        #: :class:`int`: First mapped byte in the mapping
158
        self.start = start
1✔
159

160
        #: :class:`int`: First byte after the end of hte mapping
161
        self.stop = stop
1✔
162

163
        #: :class:`int`: Size of the mapping, in bytes
164
        self.size = stop-start
1✔
165

166
        #: :class:`int`: Offset in pages in the mapped file
167
        self.page_offset = page_offset or 0
1✔
168

169
        #: :class:`int`: Mapping flags, using e.g. ``PROT_READ`` and so on.
170
        self.flags = flags
1✔
171

172
    @property
1✔
173
    def path(self):
174
        """:class:`str`: Alias for :attr:`.Mapping.name`"""
175
        return self.name
×
176

177
    @property
1✔
178
    def address(self):
179
        """:class:`int`: Alias for :data:`Mapping.start`."""
180
        return self.start
1✔
181

182
    @property
1✔
183
    def permstr(self):
184
        """:class:`str`: Human-readable memory permission string, e.g. ``r-xp``."""
185
        flags = self.flags
×
186
        return ''.join(['r' if flags & 4 else '-',
×
187
                        'w' if flags & 2 else '-',
188
                        'x' if flags & 1 else '-',
189
                        'p'])
190
    def __str__(self):
1✔
191
        return '%x-%x %s %x %s' % (self.start,self.stop,self.permstr,self.size,self.name)
×
192

193
    def __repr__(self):
1✔
194
        return '%s(%r, start=%#x, stop=%#x, size=%#x, flags=%#x, page_offset=%#x)' \
1✔
195
            % (self.__class__.__name__,
196
               self.name,
197
               self.start,
198
               self.stop,
199
               self.size,
200
               self.flags,
201
               self.page_offset)
202

203
    def __int__(self):
1✔
204
        return self.start
×
205

206
    @property
1✔
207
    def data(self):
208
        """:class:`str`: Memory of the mapping."""
209
        return self._core.read(self.start, self.size)
1✔
210

211
    def __getitem__(self, item):
1✔
212
        if isinstance(item, slice):
1✔
213
            start = int(item.start or self.start)
1✔
214
            stop  = int(item.stop or self.stop)
1✔
215

216
            # Negative slices...
217
            if start < 0:
1!
218
                start += self.stop
×
219
            if stop < 0:
1!
220
                stop += self.stop
×
221

222
            if not (self.start <= start <= stop <= self.stop):
1!
223
                log.error("Byte range [%#x:%#x] not within range [%#x:%#x]",
×
224
                          start, stop, self.start, self.stop)
225

226
            data = self._core.read(start, stop-start)
1✔
227

228
            if item.step == 1:
1!
229
                return data
×
230
            return data[::item.step]
1✔
231

232
        return self._core.read(item, 1)
1✔
233

234
    def __contains__(self, item):
1✔
235
        if isinstance(item, Mapping):
1!
236
            return (self.start <= item.start) and (item.stop <= self.stop)
×
237
        return self.start <= item < self.stop
1✔
238

239
    def find(self, sub, start=None, end=None):
1✔
240
        """Similar to str.find() but works on our address space"""
241
        if start is None:
1!
242
            start = self.start
1✔
243
        if end is None:
1!
244
            end = self.stop
×
245

246
        result = self.data.find(sub, start-self.address, end-self.address)
1✔
247

248
        if result == -1:
1!
249
            return result
×
250

251
        return result + self.address
1✔
252

253
    def rfind(self, sub, start=None, end=None):
1✔
254
        """Similar to str.rfind() but works on our address space"""
255
        if start is None:
1!
256
            start = self.start
1✔
257
        if end is None:
1!
258
            end = self.stop
×
259

260
        result = self.data.rfind(sub, start-self.address, end-self.address)
1✔
261

262
        if result == -1:
1!
263
            return result
×
264

265
        return result + self.address
1✔
266

267
class Corefile(ELF):
1✔
268
    r"""Enhances the information available about a corefile (which is an extension
269
    of the ELF format) by permitting extraction of information about the mapped
270
    data segments, and register state.
271

272
    Registers can be accessed directly, e.g. via ``core_obj.eax`` and enumerated
273
    via :data:`Corefile.registers`.
274

275
    Memory can be accessed directly via :meth:`.read` or :meth:`.write`, and also
276
    via :meth:`.pack` or :meth:`.unpack` or even :meth:`.string`.
277

278
    Arguments:
279
        core: Path to the core file.  Alternately, may be a :class:`.process` instance,
280
              and the core file will be located automatically.
281

282
    ::
283

284
        >>> c = Corefile('./core')
285
        >>> hex(c.eax)
286
        '0xfff5f2e0'
287
        >>> c.registers
288
        {'eax': 4294308576,
289
         'ebp': 1633771891,
290
         'ebx': 4151132160,
291
         'ecx': 4294311760,
292
         'edi': 0,
293
         'edx': 4294308700,
294
         'eflags': 66050,
295
         'eip': 1633771892,
296
         'esi': 0,
297
         'esp': 4294308656,
298
         'orig_eax': 4294967295,
299
         'xcs': 35,
300
         'xds': 43,
301
         'xes': 43,
302
         'xfs': 0,
303
         'xgs': 99,
304
         'xss': 43}
305

306
    Mappings can be iterated in order via :attr:`Corefile.mappings`.
307

308
    ::
309

310
        >>> Corefile('./core').mappings
311
        [Mapping('/home/user/pwntools/crash', start=0x8048000, stop=0x8049000, size=0x1000, flags=0x5, page_offset=0x0),
312
         Mapping('/home/user/pwntools/crash', start=0x8049000, stop=0x804a000, size=0x1000, flags=0x4, page_offset=0x1),
313
         Mapping('/home/user/pwntools/crash', start=0x804a000, stop=0x804b000, size=0x1000, flags=0x6, page_offset=0x2),
314
         Mapping(None, start=0xf7528000, stop=0xf7529000, size=0x1000, flags=0x6, page_offset=0x0),
315
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf7529000, stop=0xf76d1000, size=0x1a8000, flags=0x5, page_offset=0x0),
316
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf76d1000, stop=0xf76d2000, size=0x1000, flags=0x0, page_offset=0x1a8),
317
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf76d2000, stop=0xf76d4000, size=0x2000, flags=0x4, page_offset=0x1a9),
318
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf76d4000, stop=0xf76d5000, size=0x1000, flags=0x6, page_offset=0x1aa),
319
         Mapping(None, start=0xf76d5000, stop=0xf76d8000, size=0x3000, flags=0x6, page_offset=0x0),
320
         Mapping(None, start=0xf76ef000, stop=0xf76f1000, size=0x2000, flags=0x6, page_offset=0x0),
321
         Mapping('[vdso]', start=0xf76f1000, stop=0xf76f2000, size=0x1000, flags=0x5, page_offset=0x0),
322
         Mapping('/lib/i386-linux-gnu/ld-2.19.so', start=0xf76f2000, stop=0xf7712000, size=0x20000, flags=0x5, page_offset=0x0),
323
         Mapping('/lib/i386-linux-gnu/ld-2.19.so', start=0xf7712000, stop=0xf7713000, size=0x1000, flags=0x4, page_offset=0x20),
324
         Mapping('/lib/i386-linux-gnu/ld-2.19.so', start=0xf7713000, stop=0xf7714000, size=0x1000, flags=0x6, page_offset=0x21),
325
         Mapping('[stack]', start=0xfff3e000, stop=0xfff61000, size=0x23000, flags=0x6, page_offset=0x0)]
326

327
    Examples:
328

329
        Let's build an example binary which should eat ``R0=0xdeadbeef``
330
        and ``PC=0xcafebabe``.
331

332
        If we run the binary and then wait for it to exit, we can get its
333
        core file.
334

335
        >>> context.clear(arch='arm')
336
        >>> shellcode = shellcraft.mov('r0', 0xdeadbeef)
337
        >>> shellcode += shellcraft.mov('r1', 0xcafebabe)
338
        >>> shellcode += 'bx r1'
339
        >>> address = 0x41410000
340
        >>> elf = ELF.from_assembly(shellcode, vma=address)
341
        >>> io = elf.process(env={'HELLO': 'WORLD'})
342
        >>> io.poll(block=True)
343
        -11
344

345
        You can specify a full path a la ``Corefile('/path/to/core')``,
346
        but you can also just access the :attr:`.process.corefile` attribute.
347

348
        There's a lot of behind-the-scenes logic to locate the corefile for
349
        a given process, but it's all handled transparently by Pwntools.
350

351
        >>> core = io.corefile
352

353
        The core file has a :attr:`exe` property, which is a :class:`.Mapping`
354
        object.  Each mapping can be accessed with virtual addresses via subscript, or
355
        contents can be examined via the :attr:`.Mapping.data` attribute.
356

357
        >>> core.exe # doctest: +ELLIPSIS
358
        Mapping('/.../step3', start=..., stop=..., size=0x1000, flags=0x..., page_offset=...)
359
        >>> hex(core.exe.address)
360
        '0x41410000'
361

362
        The core file also has registers which can be accessed direclty.
363
        Pseudo-registers :attr:`pc` and :attr:`sp` are available on all architectures,
364
        to make writing architecture-agnostic code more simple.
365
        If this were an amd64 corefile, we could access e.g. ``core.rax``.
366

367
        >>> core.pc == 0xcafebabe
368
        True
369
        >>> core.r0 == 0xdeadbeef
370
        True
371
        >>> core.sp == core.r13
372
        True
373

374
        We may not always know which signal caused the core dump, or what address
375
        caused a segmentation fault.  Instead of accessing registers directly, we
376
        can also extract this information from the core dump via :attr:`fault_addr`
377
        and :attr:`signal`.
378

379
        On QEMU-generated core dumps, this information is unavailable, so we
380
        substitute the value of PC.  In our example, that's correct anyway.
381

382
        >>> core.fault_addr == 0xcafebabe
383
        True
384
        >>> core.signal
385
        11
386

387
        Core files can also be generated from running processes.
388
        This requires GDB to be installed, and can only be done with native processes.
389
        Getting a "complete" corefile requires GDB 7.11 or better.
390

391
        >>> elf = ELF(which('bash-static'))
392
        >>> context.clear(binary=elf)
393
        >>> env = dict(os.environ)
394
        >>> env['HELLO'] = 'WORLD'
395
        >>> io = process(elf.path, env=env)
396
        >>> io.sendline(b'echo hello')
397
        >>> io.recvline()
398
        b'hello\n'
399

400
        The process is still running, but accessing its :attr:`.process.corefile` property
401
        automatically invokes GDB to attach and dump a corefile.
402

403
        >>> core = io.corefile
404
        >>> io.close()
405

406
        The corefile can be inspected and read from, and even exposes various mappings
407

408
        >>> core.exe # doctest: +ELLIPSIS
409
        Mapping('.../bin/bash-static', start=..., stop=..., size=..., flags=..., page_offset=...)
410
        >>> core.exe.data[0:4]
411
        b'\x7fELF'
412

413
        It also supports all of the features of :class:`ELF`, so you can :meth:`.read`
414
        or :meth:`.write` or even the helpers like :meth:`.pack` or :meth:`.unpack`.
415

416
        Don't forget to call :meth:`.ELF.save` to save the changes to disk.
417

418
        >>> core.read(elf.address, 4)
419
        b'\x7fELF'
420
        >>> core.pack(core.sp, 0xdeadbeef)
421
        >>> core.save()
422

423
        Let's re-load it as a new :attr:`Corefile` object and have a look!
424

425
        >>> core2 = Corefile(core.path)
426
        >>> hex(core2.unpack(core2.sp))
427
        '0xdeadbeef'
428

429
        Various other mappings are available by name, for the first segment of:
430

431
        * :attr:`.exe` the executable
432
        * :attr:`.libc` the loaded libc, if any
433
        * :attr:`.stack` the stack mapping
434
        * :attr:`.vvar`
435
        * :attr:`.vdso`
436
        * :attr:`.vsyscall`
437

438
        On Linux, 32-bit Intel binaries should have a VDSO section via :attr:`vdso`.  
439
        Since our ELF is statically linked, there is no libc which gets mapped.
440

441
        >>> core.vdso.data[:4]
442
        b'\x7fELF'
443
        >>> core.libc
444

445
        But if we dump a corefile from a dynamically-linked binary, the :attr:`.libc`
446
        will be loaded.
447

448
        >>> process('bash').corefile.libc # doctest: +ELLIPSIS
449
        Mapping('.../libc...so...', start=0x..., stop=0x..., size=0x..., flags=..., page_offset=...)
450

451
        The corefile also contains a :attr:`.stack` property, which gives
452
        us direct access to the stack contents.  On Linux, the very top of the stack
453
        should contain two pointer-widths of NULL bytes, preceded by the NULL-
454
        terminated path to the executable (as passed via the first arg to ``execve``).
455

456
        >>> core.stack # doctest: +ELLIPSIS
457
        Mapping('[stack]', start=0x..., stop=0x..., size=0x..., flags=0x6, page_offset=0x0)
458

459
        When creating a process, the kernel puts the absolute path of the binary and some
460
        padding bytes at the end of the stack.  We can look at those by looking at 
461
        ``core.stack.data``.
462

463
        >>> size = len('/bin/bash-static') + 8
464
        >>> core.stack.data[-size:]
465
        b'bin/bash-static\x00\x00\x00\x00\x00\x00\x00\x00\x00'
466

467
        We can also directly access the environment variables and arguments, via
468
        :attr:`.argc`, :attr:`.argv`, and :attr:`.env`.
469

470
        >>> 'HELLO' in core.env
471
        True
472
        >>> core.string(core.env['HELLO'])
473
        b'WORLD'
474
        >>> core.getenv('HELLO')
475
        b'WORLD'
476
        >>> core.argc
477
        1
478
        >>> core.argv[0] in core.stack
479
        True
480
        >>> core.string(core.argv[0]) # doctest: +ELLIPSIS
481
        b'.../bin/bash-static'
482

483
        Corefiles can also be pulled from remote machines via SSH!
484

485
        >>> s = ssh(user='travis', host='example.pwnme', password='demopass')
486
        >>> _ = s.set_working_directory()
487
        >>> elf = ELF.from_assembly(shellcraft.trap())
488
        >>> path = s.upload(elf.path)
489
        >>> _ =s.chmod('+x', path)
490
        >>> io = s.process(path)
491
        >>> io.wait(1)
492
        -1
493
        >>> io.corefile.signal == signal.SIGTRAP # doctest: +SKIP
494
        True
495

496
        Make sure fault_addr synthesis works for amd64 on ret.
497

498
        >>> context.clear(arch='amd64')
499
        >>> elf = ELF.from_assembly('push 1234; ret')
500
        >>> io = elf.process()
501
        >>> io.wait(1)
502
        >>> io.corefile.fault_addr
503
        1234
504

505
        Corefile.getenv() works correctly, even if the environment variable's
506
        value contains embedded '='. Corefile is able to find the stack, even
507
        if the stack pointer doesn't point at the stack.
508

509
        >>> elf = ELF.from_assembly(shellcraft.crash())
510
        >>> io = elf.process(env={'FOO': 'BAR=BAZ'})
511
        >>> io.wait(1)
512
        >>> core = io.corefile
513
        >>> core.getenv('FOO')
514
        b'BAR=BAZ'
515
        >>> core.sp == 0
516
        True
517
        >>> core.sp in core.stack
518
        False
519

520
        Corefile gracefully handles the stack being filled with garbage, including
521
        argc / argv / envp being overwritten.
522

523
        >>> context.clear(arch='i386')
524
        >>> assembly = '''
525
        ... LOOP:
526
        ...   mov dword ptr [esp], 0x41414141
527
        ...   pop eax
528
        ...   jmp LOOP
529
        ... '''
530
        >>> elf = ELF.from_assembly(assembly)
531
        >>> io = elf.process()
532
        >>> io.wait(2)
533
        >>> core = io.corefile
534
        [!] End of the stack is corrupted, skipping stack parsing (got: 41414141)
535
        >>> core.argc, core.argv, core.env
536
        (0, [], {})
537
        >>> core.stack.data.endswith(b'AAAA')
538
        True
539
        >>> core.fault_addr == core.sp
540
        True
541
    """
542

543
    _fill_gaps = False
1✔
544

545
    def __init__(self, *a, **kw):
1✔
546
        #: The NT_PRSTATUS object.
547
        self.prstatus = None
1✔
548

549
        #: The NT_PRPSINFO object
550
        self.prpsinfo = None
1✔
551

552
        #: The NT_SIGINFO object
553
        self.siginfo = None
1✔
554

555
        #: :class:`list`: A list of :class:`.Mapping` objects for each loaded memory region
556
        self.mappings = []
1✔
557

558
        #: :class:`int`: A :class:`Mapping` corresponding to the stack
559
        self.stack    = None
1✔
560

561
        """
562
        Environment variables read from the stack.
563
        Keys are the environment variable name, values are the memory 
564
        address of the variable.
565
        
566
        Use :meth:`.getenv` or :meth:`.string` to retrieve the textual value.
567
        
568
        Note: If ``FOO=BAR`` is in the environment, ``self.env['FOO']`` is the address of the string ``"BAR\x00"``.
569
        """
570
        self.env = {}
1✔
571

572
        #: :class:`int`: Pointer to envp on the stack
573
        self.envp_address = 0
1✔
574

575
        #: :class:`list`: List of addresses of arguments on the stack.
576
        self.argv = []
1✔
577

578
        #: :class:`int`: Pointer to argv on the stack
579
        self.argv_address = 0
1✔
580

581
        #: :class:`int`: Number of arguments passed
582
        self.argc = 0
1✔
583

584
        #: :class:`int`: Pointer to argc on the stack
585
        self.argc_address = 0
1✔
586

587
        # Pointer to the executable filename on the stack
588
        self.at_execfn = 0
1✔
589

590
        # Pointer to the entry point
591
        self.at_entry = 0
1✔
592

593
        try:
1✔
594
            super(Corefile, self).__init__(*a, **kw)
1✔
595
        except IOError:
×
596
            log.warning("No corefile.  Have you set /proc/sys/kernel/core_pattern?")
×
597
            raise
×
598

599
        self.load_addr = 0
1✔
600
        self._address  = 0
1✔
601

602
        if self.elftype != 'CORE':
1!
603
            log.error("%s is not a valid corefile" % self.file.name)
×
604

605
        if self.arch not in prstatus_types:
1!
606
            log.warn_once("%s does not use a supported corefile architecture, registers are unavailable" % self.file.name)
×
607

608
        prstatus_type = prstatus_types.get(self.arch)
1✔
609
        prpsinfo_type = prpsinfo_types.get(self.bits)
1✔
610
        siginfo_type = siginfo_types.get(self.bits)
1✔
611

612
        with log.waitfor("Parsing corefile...") as w:
1✔
613
            self._load_mappings()
1✔
614

615
            for segment in self.segments:
1✔
616
                if not isinstance(segment, elftools.elf.segments.NoteSegment):
1✔
617
                    continue
1✔
618

619

620
                # Note that older versions of pyelftools (<=0.24) are missing enum values
621
                # for NT_PRSTATUS, NT_PRPSINFO, NT_AUXV, etc.
622
                # For this reason, we have to check if note.n_type is any of several values.
623
                for note in iter_notes(segment):
1✔
624
                    if not isinstance(note.n_desc, bytes):
1!
625
                        note['n_desc'] = note.n_desc.encode('latin1')
×
626
                    # Try to find NT_PRSTATUS.
627
                    if prstatus_type and \
1✔
628
                       note.n_descsz == ctypes.sizeof(prstatus_type) and \
629
                       note.n_type in ('NT_GNU_ABI_TAG', 'NT_PRSTATUS'):
630
                        self.NT_PRSTATUS = note
1✔
631
                        self.prstatus = prstatus_type.from_buffer_copy(note.n_desc)
1✔
632

633
                    # Try to find NT_PRPSINFO
634
                    if prpsinfo_type and \
1✔
635
                       note.n_descsz == ctypes.sizeof(prpsinfo_type) and \
636
                       note.n_type in ('NT_GNU_ABI_TAG', 'NT_PRPSINFO'):
637
                        self.NT_PRPSINFO = note
1✔
638
                        self.prpsinfo = prpsinfo_type.from_buffer_copy(note.n_desc)
1✔
639

640
                    # Try to find NT_SIGINFO so we can see the fault
641
                    if note.n_type in (0x53494749, 'NT_SIGINFO'):
1✔
642
                        self.NT_SIGINFO = note
1✔
643
                        self.siginfo = siginfo_type.from_buffer_copy(note.n_desc)
1✔
644

645
                    # Try to find the list of mapped files
646
                    if note.n_type in (constants.NT_FILE, 'NT_FILE'):
1✔
647
                        with context.local(bytes=self.bytes):
1✔
648
                            self._parse_nt_file(note)
1✔
649

650
                    # Try to find the auxiliary vector, which will tell us
651
                    # where the top of the stack is.
652
                    if note.n_type in (constants.NT_AUXV, 'NT_AUXV'):
1✔
653
                        self.NT_AUXV = note
1✔
654
                        with context.local(bytes=self.bytes):
1✔
655
                            self._parse_auxv(note)
1✔
656

657
            if not self.stack and self.mappings:
1!
658
                self.stack = self.mappings[-1].stop
×
659

660
            if self.stack and self.mappings:
1!
661
                for mapping in self.mappings:
1!
662
                    if self.stack in mapping or self.stack == mapping.stop:
1✔
663
                        mapping.name = '[stack]'
1✔
664
                        self.stack   = mapping
1✔
665
                        break
1✔
666
                else:
667
                    log.warn('Could not find the stack!')
×
668
                    self.stack = None
×
669

670
            with context.local(bytes=self.bytes, log_level='warn'):
1✔
671
                try:
1✔
672
                    self._parse_stack()
1✔
673
                except ValueError:
×
674
                    # If there are no environment variables, we die by running
675
                    # off the end of the stack.
676
                    pass
×
677

678
            # Corefiles generated by QEMU do not have a name for the 
679
            # main module mapping.
680
            # Fetching self.exe will cause this to be auto-populated,
681
            # and is a no-op in other cases.
682
            self.exe
1✔
683

684
            # Print out the nice display for the user
685
            self._describe_core()
1✔
686

687
    def _parse_nt_file(self, note):
1✔
688
        t = tube()
1✔
689
        t.unrecv(note.n_desc)
1✔
690

691
        count = t.unpack()
1✔
692
        page_size = t.unpack()
1✔
693

694
        starts = []
1✔
695
        addresses = {}
1✔
696

697
        for i in range(count):
1✔
698
            start = t.unpack()
1✔
699
            end = t.unpack()
1✔
700
            offset = t.unpack()
1✔
701
            starts.append((start, offset))
1✔
702

703
        for i in range(count):
1✔
704
            filename = t.recvuntil(b'\x00', drop=True)
1✔
705
            if not isinstance(filename, str):
1!
706
                filename = filename.decode('utf-8')
×
707
            (start, offset) = starts[i]
1✔
708

709
            for mapping in self.mappings:
1✔
710
                if mapping.start == start:
1✔
711
                    mapping.name = filename
1✔
712
                    mapping.page_offset = offset
1✔
713

714
        self.mappings = sorted(self.mappings, key=lambda m: m.start)
1✔
715

716
        vvar = vdso = vsyscall = False
1✔
717
        for mapping in reversed(self.mappings):
1✔
718
            if mapping.name:
1✔
719
                continue
1✔
720

721
            if not vsyscall and mapping.start == 0xffffffffff600000:
1✔
722
                mapping.name = '[vsyscall]'
1✔
723
                vsyscall = True
1✔
724
                continue
1✔
725

726
            if mapping.start == self.at_sysinfo_ehdr \
1✔
727
            or (not vdso and mapping.size in [0x1000, 0x2000]
728
                and mapping.flags == 5
729
                and self.read(mapping.start, 4) == b'\x7fELF'):
730
                mapping.name = '[vdso]'
1✔
731
                vdso = True
1✔
732
                continue
1✔
733

734
            if not vvar and mapping.size == 0x2000 and mapping.flags == 4:
1!
735
                mapping.name = '[vvar]'
×
736
                vvar = True
×
737
                continue
×
738

739
    @property
1✔
740
    def vvar(self):
741
        """:class:`Mapping`: Mapping for the vvar section"""
742
        for m in self.mappings:
×
743
            if m.name == '[vvar]':
×
744
                return m
×
745

746
    @property
1✔
747
    def vdso(self):
748
        """:class:`Mapping`: Mapping for the vdso section"""
749
        for m in self.mappings:
1!
750
            if m.name == '[vdso]':
1✔
751
                return m
1✔
752

753
    @property
1✔
754
    def vsyscall(self):
755
        """:class:`Mapping`: Mapping for the vsyscall section"""
756
        for m in self.mappings:
×
757
            if m.name == '[vsyscall]':
×
758
                return m
×
759

760
    @property
1✔
761
    def libc(self):
762
        """:class:`Mapping`: First mapping for ``libc.so``"""
763
        expr = r'^libc\b.*so(?:\.6)?$'
1✔
764

765
        for m in self.mappings:
1✔
766
            if not m.name:
1✔
767
                continue
1✔
768

769
            basename = os.path.basename(m.name)
1✔
770

771
            if re.match(expr, basename):
1✔
772
                return m
1✔
773

774
    @property
1✔
775
    def exe(self):
776
        """:class:`Mapping`: First mapping for the executable file."""
777

778
        # Finding the executable mapping requires knowing the entry point
779
        # from the auxv
780
        if not self.at_entry:
1!
781
            return None
×
782

783
        # The entry point may not be in the first segment of a given file,
784
        # but we want to find the first segment of the file -- not the segment that 
785
        # contains the entrypoint.
786
        first_segment_for_name = {}
1✔
787

788
        for m in self.mappings:
1✔
789
            first_segment_for_name.setdefault(m.name, m)
1✔
790

791
        # Find which segment conains the entry point
792
        for m in self.mappings:
1!
793
            if m.start <= self.at_entry < m.stop:
1✔
794

795
                if not m.name and self.at_execfn:
1✔
796
                    m.name = self.string(self.at_execfn)
1✔
797
                    if not isinstance(m.name, str):
1!
798
                        m.name = m.name.decode('utf-8')
×
799

800
                return first_segment_for_name.get(m.name, m)
1✔
801

802
    @property
1✔
803
    def pid(self):
804
        """:class:`int`: PID of the process which created the core dump."""
805
        if self.prstatus:
1!
806
            return int(self.prstatus.pr_pid)
1✔
807

808
    @property
1✔
809
    def ppid(self):
810
        """:class:`int`: Parent PID of the process which created the core dump."""
811
        if self.prstatus:
×
812
            return int(self.prstatus.pr_ppid)
×
813

814
    @property
1✔
815
    def signal(self):
816
        """:class:`int`: Signal which caused the core to be dumped.
817

818
        Example:
819

820
            >>> elf = ELF.from_assembly(shellcraft.trap())
821
            >>> io = elf.process()
822
            >>> io.wait(1)
823
            >>> io.corefile.signal == signal.SIGTRAP
824
            True
825

826
            >>> elf = ELF.from_assembly(shellcraft.crash())
827
            >>> io = elf.process()
828
            >>> io.wait(1)
829
            >>> io.corefile.signal == signal.SIGSEGV
830
            True
831
        """
832
        if self.siginfo:
1✔
833
            return int(self.siginfo.si_signo)
1✔
834
        if self.prstatus:
1!
835
            return int(self.prstatus.pr_cursig)
1✔
836

837
    @property
1✔
838
    def fault_addr(self):
839
        """:class:`int`: Address which generated the fault, for the signals
840
            SIGILL, SIGFPE, SIGSEGV, SIGBUS.  This is only available in native
841
            core dumps created by the kernel.  If the information is unavailable,
842
            this returns the address of the instruction pointer.
843

844

845
        Example:
846

847
            >>> elf = ELF.from_assembly('mov eax, 0xdeadbeef; jmp eax', arch='i386')
848
            >>> io = elf.process()
849
            >>> io.wait(1)
850
            >>> io.corefile.fault_addr == io.corefile.eax == 0xdeadbeef
851
            True
852
        """
853
        if not self.siginfo:
1✔
854
            return getattr(self, 'pc', 0)
1✔
855

856
        fault_addr = int(self.siginfo.sigfault_addr)
1✔
857

858
        # The fault_addr is zero if the crash occurs due to a
859
        # "protection fault", e.g. a dereference of 0x4141414141414141
860
        # because this is technically a kernel address.
861
        #
862
        # A protection fault does not set "fault_addr" in the siginfo.
863
        # (http://elixir.free-electrons.com/linux/v4.14-rc8/source/kernel/signal.c#L1052)
864
        #
865
        # Since a common use for corefiles is to spray the stack with a
866
        # cyclic pattern to find the offset to get control of $PC,
867
        # check for a "ret" instruction ("\xc3").
868
        #
869
        # If we find a RET at $PC, extract the "return address" from the
870
        # top of the stack.
871
        if fault_addr == 0 and self.siginfo.si_code == 0x80:
1✔
872
            try:
1✔
873
                code = self.read(self.pc, 1)
1✔
874
                RET = b'\xc3'
1✔
875
                if code == RET:
1!
876
                    fault_addr = self.unpack(self.sp)
×
877
            except Exception:
×
878
                # Could not read $rsp or $rip
879
                pass
×
880

881
        return fault_addr
1✔
882

883
        # No embedded siginfo structure, so just return the
884
        # current instruction pointer.
885

886
    @property
1✔
887
    def _pc_register(self):
888
        name = {
1✔
889
            'i386': 'eip',
890
            'amd64': 'rip',
891
        }.get(self.arch, 'pc')
892
        return name
1✔
893

894
    @property
1✔
895
    def pc(self):
896
        """:class:`int`: The program counter for the Corefile
897

898
        This is a cross-platform way to get e.g. ``core.eip``, ``core.rip``, etc.
899
        """
900
        return self.registers.get(self._pc_register, None)
1✔
901

902
    @property
1✔
903
    def _sp_register(self):
904
        name = {
1✔
905
            'i386': 'esp',
906
            'amd64': 'rsp',
907
        }.get(self.arch, 'sp')
908
        return name
1✔
909

910
    @property
1✔
911
    def sp(self):
912
        """:class:`int`: The stack pointer for the Corefile
913

914
        This is a cross-platform way to get e.g. ``core.esp``, ``core.rsp``, etc.
915
        """
916
        return self.registers.get(self._sp_register, None)
1✔
917

918
    def _describe(self):
1✔
919
        pass
1✔
920

921
    def _describe_core(self):
1✔
922
        gnu_triplet = '-'.join(map(str, (self.arch, self.bits, self.endian)))
1✔
923

924
        fields = [
1✔
925
            repr(self.path),
926
            '%-10s %s' % ('Arch:', gnu_triplet),
927
            '%-10s %#x' % ('%s:' % self._pc_register.upper(), self.pc or 0),
928
            '%-10s %#x' % ('%s:' % self._sp_register.upper(), self.sp or 0),
929
        ]
930

931
        if self.exe and self.exe.name:
1!
932
            fields += [
1✔
933
                '%-10s %s' % ('Exe:', '%r (%#x)' % (self.exe.name, self.exe.address))
934
            ]
935

936
        if self.fault_addr:
1✔
937
            fields += [
1✔
938
                '%-10s %#x' % ('Fault:', self.fault_addr)
939
            ]
940

941
        log.info_once('\n'.join(fields))
1✔
942

943
    def _load_mappings(self):
1✔
944
        for s in self.segments:
1✔
945
            if s.header.p_type != 'PT_LOAD':
1✔
946
                continue
1✔
947

948
            mapping = Mapping(self,
1✔
949
                              None,
950
                              s.header.p_vaddr,
951
                              s.header.p_vaddr + s.header.p_memsz,
952
                              s.header.p_flags,
953
                              None)
954
            self.mappings.append(mapping)
1✔
955

956
    def _parse_auxv(self, note):
1✔
957
        t = tube()
1✔
958
        t.unrecv(note.n_desc)
1✔
959

960
        for i in range(0, note.n_descsz, context.bytes * 2):
1✔
961
            key = t.unpack()
1✔
962
            value = t.unpack()
1✔
963

964
            # The AT_EXECFN entry is a pointer to the executable's filename
965
            # at the very top of the stack, followed by a word's with of
966
            # NULL bytes.  For example, on a 64-bit system...
967
            #
968
            # 0x7fffffffefe8  53 3d 31 34  33 00 2f 62  69 6e 2f 62  61 73 68 00  |S=14|3./b|in/b|ash.|
969
            # 0x7fffffffeff8  00 00 00 00  00 00 00 00                            |....|....|    |    |
970

971
            if key == constants.AT_EXECFN:
1✔
972
                self.at_execfn = value
1✔
973
                value = value & ~0xfff
1✔
974
                value += 0x1000
1✔
975
                self.stack = value
1✔
976

977
            if key == constants.AT_ENTRY:
1✔
978
                self.at_entry = value
1✔
979

980
            if key == constants.AT_PHDR:
1✔
981
                self.at_phdr = value
1✔
982

983
            if key == constants.AT_BASE:
1✔
984
                self.at_base = value
1✔
985

986
            if key == constants.AT_SYSINFO_EHDR:
1✔
987
                self.at_sysinfo_ehdr = value
1✔
988

989
    def _parse_stack(self):
1✔
990
        # Get a copy of the stack mapping
991
        stack = self.stack
1✔
992

993
        if not stack:
1!
994
            return
×
995

996
        # If the stack does not end with zeroes, something is very wrong.
997
        if not stack.data.endswith(b'\x00' * context.bytes):
1✔
998
            log.warn_once("End of the stack is corrupted, skipping stack parsing (got: %s)",
1✔
999
                          enhex(self.data[-context.bytes:]))
1000
            return
1✔
1001

1002
        # AT_EXECFN is the start of the filename, e.g. '/bin/sh'
1003
        # Immediately preceding is a NULL-terminated environment variable string.
1004
        # We want to find the beginning of it
1005
        if not self.at_execfn:
1!
1006
            address = stack.stop
×
1007
            address -= 2*self.bytes
×
1008
            address -= 1
×
1009
            address = stack.rfind(b'\x00', None, address)
×
1010
            address += 1
×
1011
            self.at_execfn = address
×
1012

1013
        address = self.at_execfn-1
1✔
1014

1015

1016
        # Sanity check!
1017
        try:
1✔
1018
            if stack[address] != b'\x00':
1!
1019
                log.warning("Error parsing corefile stack: Could not find end of environment")
×
1020
                return
×
1021
        except ValueError:
×
1022
            log.warning("Error parsing corefile stack: Address out of bounds")
×
1023
            return
×
1024

1025
        # address is currently set to the NULL terminator of the last
1026
        # environment variable.
1027
        address = stack.rfind(b'\x00', None, address)
1✔
1028

1029
        # We've found the beginning of the last environment variable.
1030
        # We should be able to search up the stack for the envp[] array to
1031
        # find a pointer to this address, followed by a NULL.
1032
        last_env_addr = address + 1
1✔
1033
        p_last_env_addr = stack.find(pack(last_env_addr), None, last_env_addr)
1✔
1034
        if p_last_env_addr < 0:
1!
1035
            # Something weird is happening.  Just don't touch it.
1036
            log.warn_once("Error parsing corefile stack: Found bad environment at %#x", last_env_addr)
×
1037
            return
×
1038

1039
        # Sanity check that we did correctly find the envp NULL terminator.
1040
        envp_nullterm = p_last_env_addr+context.bytes
1✔
1041
        if self.unpack(envp_nullterm) != 0:
1!
1042
            log.warning("Error parsing corefile stack: Could not find end of environment variables")
×
1043
            return
×
1044

1045
        # We've successfully located the end of the envp[] array.
1046
        #
1047
        # It comes immediately after the argv[] array, which itself
1048
        # is NULL-terminated.
1049
        #
1050
        # Now let's find the end of argv
1051
        p_end_of_argv = stack.rfind(pack(0), None, p_last_env_addr)
1✔
1052

1053
        self.envp_address = p_end_of_argv + self.bytes
1✔
1054

1055
        # Now we can fill in the environment
1056
        env_pointer_data = stack[self.envp_address:p_last_env_addr+self.bytes]
1✔
1057
        for pointer in unpack_many(env_pointer_data):
1✔
1058

1059
            # If the stack is corrupted, the pointer will be outside of
1060
            # the stack.
1061
            if pointer not in stack:
1!
1062
                continue
×
1063

1064
            try:
1✔
1065
                name_value = self.string(pointer)
1✔
1066
            except Exception:
×
1067
                continue
×
1068

1069
            name, _ = name_value.split(b'=', 1)
1✔
1070

1071
            # "end" points at the byte after the null terminator
1072
            end = pointer + len(name_value) + 1
1✔
1073

1074
            # Do not mark things as environment variables if they point
1075
            # outside of the stack itself, or we had to cross into a different
1076
            # mapping (after the stack) to read it.
1077
            # This may occur when the entire stack is filled with non-NUL bytes,
1078
            # and we NULL-terminate on a read failure in .string().
1079
            if end not in stack:
1!
1080
                continue
×
1081

1082
            if not isinstance(name, str):
1!
1083
                name = name.decode('utf-8', 'surrogateescape')
×
1084
            self.env[name] = pointer + len(name) + len('=')
1✔
1085

1086
        # May as well grab the arguments off the stack as well.
1087
        # argc comes immediately before argv[0] on the stack, but
1088
        # we don't know what argc is.
1089
        #
1090
        # It is unlikely that argc is a valid stack address.
1091
        address = p_end_of_argv - self.bytes
1✔
1092
        while self.unpack(address) in stack:
1✔
1093
            address -= self.bytes
1✔
1094

1095
        # address now points at argc
1096
        self.argc_address = address
1✔
1097
        self.argc = self.unpack(self.argc_address)
1✔
1098

1099
        # we can extract all of the arguments as well
1100
        self.argv_address = self.argc_address + self.bytes
1✔
1101
        self.argv = unpack_many(stack[self.argv_address: p_end_of_argv])
1✔
1102

1103
    @property
1✔
1104
    def maps(self):
1105
        """:class:`str`: A printable string which is similar to /proc/xx/maps.
1106

1107
        ::
1108

1109
            >>> print(Corefile('./core').maps)
1110
            8048000-8049000 r-xp 1000 /home/user/pwntools/crash
1111
            8049000-804a000 r--p 1000 /home/user/pwntools/crash
1112
            804a000-804b000 rw-p 1000 /home/user/pwntools/crash
1113
            f7528000-f7529000 rw-p 1000 None
1114
            f7529000-f76d1000 r-xp 1a8000 /lib/i386-linux-gnu/libc-2.19.so
1115
            f76d1000-f76d2000 ---p 1000 /lib/i386-linux-gnu/libc-2.19.so
1116
            f76d2000-f76d4000 r--p 2000 /lib/i386-linux-gnu/libc-2.19.so
1117
            f76d4000-f76d5000 rw-p 1000 /lib/i386-linux-gnu/libc-2.19.so
1118
            f76d5000-f76d8000 rw-p 3000 None
1119
            f76ef000-f76f1000 rw-p 2000 None
1120
            f76f1000-f76f2000 r-xp 1000 [vdso]
1121
            f76f2000-f7712000 r-xp 20000 /lib/i386-linux-gnu/ld-2.19.so
1122
            f7712000-f7713000 r--p 1000 /lib/i386-linux-gnu/ld-2.19.so
1123
            f7713000-f7714000 rw-p 1000 /lib/i386-linux-gnu/ld-2.19.so
1124
            fff3e000-fff61000 rw-p 23000 [stack]
1125
        """
1126
        return '\n'.join(map(str, self.mappings))
×
1127

1128
    def getenv(self, name):
1✔
1129
        """getenv(name) -> int
1130

1131
        Read an environment variable off the stack, and return its contents.
1132

1133
        Arguments:
1134
            name(str): Name of the environment variable to read.
1135

1136
        Returns:
1137
            :class:`str`: The contents of the environment variable.
1138

1139
        Example:
1140

1141
            >>> elf = ELF.from_assembly(shellcraft.trap())
1142
            >>> io = elf.process(env={'GREETING': 'Hello!'})
1143
            >>> io.wait(1)
1144
            >>> io.corefile.getenv('GREETING')
1145
            b'Hello!'
1146
        """
1147
        if not isinstance(name, str):
1!
1148
            name = name.decode('utf-8', 'surrogateescape')
×
1149
        if name not in self.env:
1!
1150
            log.error("Environment variable %r not set" % name)
×
1151

1152
        return self.string(self.env[name])
1✔
1153

1154
    @property
1✔
1155
    def registers(self):
1156
        """:class:`dict`: All available registers in the coredump.
1157

1158
        Example:
1159

1160
            >>> elf = ELF.from_assembly('mov eax, 0xdeadbeef;' + shellcraft.trap(), arch='i386')
1161
            >>> io = elf.process()
1162
            >>> io.wait(1)
1163
            >>> io.corefile.registers['eax'] == 0xdeadbeef
1164
            True
1165
        """
1166
        if not self.prstatus:
1!
1167
            return {}
×
1168

1169
        rv = {}
1✔
1170

1171
        for k in dir(self.prstatus.pr_reg):
1✔
1172
            if k.startswith('_'):
1✔
1173
                continue
1✔
1174

1175
            try:
1✔
1176
                rv[k] = int(getattr(self.prstatus.pr_reg, k))
1✔
1177
            except Exception:
1✔
1178
                pass
1✔
1179

1180
        return rv
1✔
1181

1182
    def debug(self):
1✔
1183
        """Open the corefile under a debugger."""
1184
        import pwnlib.gdb
×
1185
        pwnlib.gdb.attach(self, exe=self.exe.path)
×
1186

1187
    def __getattr__(self, attribute):
1✔
1188
        if attribute.startswith('_') or not self.prstatus:
1!
1189
            raise AttributeError(attribute)
×
1190

1191
        if hasattr(self.prstatus, attribute):
1!
1192
            return getattr(self.prstatus, attribute)
×
1193

1194
        return getattr(self.prstatus.pr_reg, attribute)
1✔
1195

1196
    # Override routines which don't make sense for Corefiles
1197
    def _populate_got(*a): pass
1✔
1198
    def _populate_plt(*a): pass
1✔
1199

1200
class Core(Corefile):
1✔
1201
    """Alias for :class:`.Corefile`"""
1202

1203
class Coredump(Corefile):
1✔
1204
    """Alias for :class:`.Corefile`"""
1205

1206
class CorefileFinder(object):
1✔
1207
    def __init__(self, proc):
1✔
1208
        if proc.poll() is None:
1!
1209
            log.error("Process %i has not exited" % (proc.pid))
×
1210

1211
        self.process = proc
1✔
1212
        self.pid = proc.pid
1✔
1213
        self.uid = proc.suid
1✔
1214
        self.gid = proc.sgid
1✔
1215
        self.exe = proc.executable
1✔
1216
        self.basename = os.path.basename(self.exe)
1✔
1217
        self.cwd = proc.cwd
1✔
1218

1219
        # XXX: Should probably break out all of this logic into
1220
        #      its own class, so that we can support "file ops"
1221
        #      locally, via SSH, and over ADB, in a transparent way.
1222
        if isinstance(proc, process):
1!
1223
            self.read = read
1✔
1224
            self.unlink = os.unlink
1✔
1225
        elif isinstance(proc, ssh_channel):
×
1226
            self.read = proc.parent.read
×
1227
            self.unlink = proc.parent.unlink
×
1228

1229
        self.kernel_core_pattern = self.read('/proc/sys/kernel/core_pattern').strip()
1✔
1230
        self.kernel_core_uses_pid = bool(int(self.read('/proc/sys/kernel/core_uses_pid')))
1✔
1231

1232
        log.debug("core_pattern: %r" % self.kernel_core_pattern)
1✔
1233
        log.debug("core_uses_pid: %r" % self.kernel_core_uses_pid)
1✔
1234

1235
        self.interpreter = self.binfmt_lookup()
1✔
1236

1237
        log.debug("interpreter: %r" % self.interpreter)
1✔
1238

1239
        # If we have already located the corefile, we will
1240
        # have renamed it to 'core.<pid>'
1241
        core_path = 'core.%i' % (proc.pid)
1✔
1242
        self.core_path = None
1✔
1243

1244
        if os.path.isfile(core_path):
1✔
1245
            log.debug("Found core immediately: %r" % core_path)
1✔
1246
            self.core_path = core_path
1✔
1247

1248
        # Try QEMU first, since it's unlikely to be a false-positive unless
1249
        # there is a PID *and* filename collision.
1250
        if not self.core_path:
1✔
1251
            log.debug("Looking for QEMU corefile")
1✔
1252
            self.core_path = self.qemu_corefile()
1✔
1253

1254
        # Check for native coredumps as a last resort
1255
        if not self.core_path:
1✔
1256
            log.debug("Looking for native corefile")
1✔
1257
            self.core_path = self.native_corefile()
1✔
1258

1259
        if not self.core_path:
1!
1260
            return
×
1261

1262
        core_pid = self.load_core_check_pid()
1✔
1263

1264
        # Move the corefile if we're configured that way
1265
        if context.rename_corefiles:
1!
1266
            new_path = 'core.%i' % core_pid
1✔
1267
            if core_pid > 0 and new_path != self.core_path:
1✔
1268
                write(new_path, self.read(self.core_path))
1✔
1269
                try:
1✔
1270
                    self.unlink(self.core_path)
1✔
1271
                except (IOError, OSError):
1✔
1272
                    log.warn("Could not delete %r" % self.core_path)
1✔
1273
                self.core_path = new_path
1✔
1274

1275
        # Check the PID
1276
        if core_pid != self.pid:
1!
1277
            log.warn("Corefile PID does not match! (got %i)" % core_pid)
×
1278

1279
        # Register the corefile for removal only if it's an exact match
1280
        elif context.delete_corefiles:
1!
1281
            atexit.register(lambda: os.unlink(self.core_path))
×
1282

1283

1284
    def load_core_check_pid(self):
1✔
1285
        """Test whether a Corefile matches our process
1286

1287
        Speculatively load a Corefile without informing the user, so that we
1288
        can check if it matches the process we're looking for.
1289

1290
        Arguments:
1291
            path(str): Path to the corefile on disk
1292

1293
        Returns:
1294
            `bool`: ``True`` if the Corefile matches, ``False`` otherwise.
1295
        """
1296

1297
        try:
1✔
1298
            with context.quiet:
1✔
1299
                with tempfile.NamedTemporaryFile() as tmp:
1✔
1300
                    tmp.write(self.read(self.core_path))
1✔
1301
                    tmp.flush()
1✔
1302
                    return Corefile(tmp.name).pid
1✔
1303
        except Exception:
×
1304
            pass
×
1305

1306
        return -1
×
1307

1308
    def apport_corefile(self):
1✔
1309
        """Find the apport crash for the process, and extract the core file.
1310

1311
        Arguments:
1312
            process(process): Process object we're looking for.
1313

1314
        Returns:
1315
            `str`: Raw core file contents
1316
        """
1317
        crash_data = self.apport_read_crash_data()
1✔
1318

1319
        log.debug("Apport Crash Data:\n%s" % crash_data)
1✔
1320

1321
        if crash_data:
1!
1322
            return self.apport_crash_extract_corefile(crash_data)
×
1323

1324
    def apport_crash_extract_corefile(self, crashfile_data):
1✔
1325
        """Extract a corefile from an apport crash file contents.
1326

1327
        Arguments:
1328
            crashfile_data(str): Crash file contents
1329

1330
        Returns:
1331
            `str`: Raw binary data for the core file, or ``None``.
1332
        """
1333
        file = StringIO(crashfile_data)
×
1334

1335
        # Find the pid of the crashfile
1336
        for line in file:
×
1337
            if line.startswith(' Pid:'):
×
1338
                pid = int(line.split()[-1])
×
1339

1340
                if pid == self.pid:
×
1341
                    break
×
1342
        else:
1343
            # Could not find a " Pid:" line
1344
            return
×
1345

1346
        # Find the CoreDump section
1347
        for line in file:
×
1348
            if line.startswith('CoreDump: base64'):
×
1349
                break
×
1350
        else:
1351
            # Could not find the coredump data
1352
            return
×
1353

1354
        # Get all of the base64'd lines
1355
        chunks = []
×
1356
        for line in file:
×
1357
            if not line.startswith(' '):
×
1358
                break
×
1359
            chunks.append(b64d(line))
×
1360

1361
        # Smush everything together, then extract it
1362
        compressed_data = b''.join(chunks)
×
1363
        compressed_file = BytesIO(compressed_data)
×
1364
        gzip_file = gzip.GzipFile(fileobj=compressed_file)
×
1365
        core_data = gzip_file.read()
×
1366

1367
        return core_data
×
1368

1369
    def apport_read_crash_data(self):
1✔
1370
        """Find the apport crash for the process
1371

1372
        Returns:
1373
            `str`: Raw contents of the crash file or ``None``.
1374
        """
1375
        uid = self.uid
1✔
1376
        crash_name = self.exe.replace('/', '_')
1✔
1377

1378
        crash_path = '/var/crash/%s.%i.crash' % (crash_name, uid)
1✔
1379

1380
        try:
1✔
1381
            log.debug("Looking for Apport crash at %r" % crash_path)
1✔
1382
            data = self.read(crash_path)
1✔
1383
        except Exception:
1✔
1384
            return None
1✔
1385

1386
        # Remove the crash file, so that future crashes will be captured
1387
        try:
×
1388
            self.unlink(crash_path)
×
1389
        except Exception:
×
1390
            pass
×
1391

1392
        return data
×
1393

1394
    def systemd_coredump_corefile(self):
1✔
1395
        """Find the systemd-coredump crash for the process and dump it to a file.
1396

1397
        Arguments:
1398
            process(process): Process object we're looking for.
1399

1400
        Returns:
1401
            `str`: Filename of core file, if coredump was found.
1402
        """
1403
        filename = "core.%s.%i.coredumpctl" % (self.basename, self.pid)
×
1404
        try:
×
1405
            subprocess.check_call(
×
1406
                [
1407
                    "coredumpctl",
1408
                    "dump",
1409
                    "--output=%s" % filename,
1410
                    # Filter coredump by pid
1411
                    str(self.pid),
1412
                ],
1413
                stdout=open(os.devnull, 'w'),
1414
                stderr=subprocess.STDOUT,
1415
                shell=False,
1416
            )
1417
            return filename
×
1418
        except subprocess.CalledProcessError as e:
×
1419
            log.debug("coredumpctl failed with status: %d" % e.returncode)
×
1420

1421
    def native_corefile(self):
1✔
1422
        """Find the corefile for a native crash.
1423

1424
        Arguments:
1425
            process(process): Process whose crash we should find.
1426

1427
        Returns:
1428
            `str`: Filename of core file.
1429
        """
1430
        if self.kernel_core_pattern.startswith(b'|'):
1!
1431
            log.debug("Checking for corefile (piped)")
1✔
1432
            return self.native_corefile_pipe()
1✔
1433

1434
        log.debug("Checking for corefile (pattern)")
×
1435
        return self.native_corefile_pattern()
×
1436

1437
    def native_corefile_pipe(self):
1✔
1438
        """Find the corefile for a piped core_pattern
1439

1440
        Supports apport and systemd-coredump.
1441

1442
        Arguments:
1443
            process(process): Process whose crash we should find.
1444

1445
        Returns:
1446
            `str`: Filename of core file.
1447
        """
1448
        if b'/apport' in self.kernel_core_pattern:
1!
1449
            log.debug("Found apport in core_pattern")
1✔
1450
            apport_core = self.apport_corefile()
1✔
1451

1452
            if apport_core:
1!
1453
                # Write the corefile to the local directory
1454
                filename = 'core.%s.%i.apport' % (self.basename, self.pid)
×
1455
                with open(filename, 'wb+') as f:
×
1456
                    f.write(apport_core)
×
1457
                return filename
×
1458

1459
            filename = self.apport_coredump()
1✔
1460
            if filename:
1!
1461
                return filename
1✔
1462

1463
            # Pretend core_pattern was just 'core', and see if we come up with anything
1464
            self.kernel_core_pattern = 'core'
×
1465
            return self.native_corefile_pattern()
×
1466
        elif b'systemd-coredump' in self.kernel_core_pattern:
×
1467
            log.debug("Found systemd-coredump in core_pattern")
×
1468
            return self.systemd_coredump_corefile()
×
1469
        else:
1470
            log.warn_once("Unsupported core_pattern: %r", self.kernel_core_pattern)
×
1471
            return None
×
1472

1473
    def native_corefile_pattern(self):
1✔
1474
        """
1475
        %%  a single % character
1476
        %c  core file size soft resource limit of crashing process (since Linux 2.6.24)
1477
        %d  dump mode—same as value returned by prctl(2) PR_GET_DUMPABLE (since Linux 3.7)
1478
        %e  executable filename (without path prefix)
1479
        %E  pathname of executable, with slashes ('/') replaced by exclamation marks ('!') (since Linux 3.0).
1480
        %g  (numeric) real GID of dumped process
1481
        %h  hostname (same as nodename returned by uname(2))
1482
        %i  TID of thread that triggered core dump, as seen in the PID namespace in which the thread resides (since Linux 3.18)
1483
        %I  TID of thread that triggered core dump, as seen in the initial PID namespace (since Linux 3.18)
1484
        %p  PID of dumped process, as seen in the PID namespace in which the process resides
1485
        %P  PID of dumped process, as seen in the initial PID namespace (since Linux 3.12)
1486
        %s  number of signal causing dump
1487
        %t  time of dump, expressed as seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC)
1488
        %u  (numeric) real UID of dumped process
1489
        """
1490
        replace = {
×
1491
            '%%': '%',
1492
            '%e': os.path.basename(self.interpreter) or self.basename,
1493
            '%E': self.exe.replace('/', '!'),
1494
            '%g': str(self.gid),
1495
            '%h': socket.gethostname(),
1496
            '%i': str(self.pid),
1497
            '%I': str(self.pid),
1498
            '%p': str(self.pid),
1499
            '%P': str(self.pid),
1500
            '%s': str(-self.process.poll()),
1501
            '%u': str(self.uid)
1502
        }
1503
        replace = dict((re.escape(k), v) for k, v in replace.items())
×
1504
        pattern = re.compile("|".join(replace.keys()))
×
1505
        if not hasattr(self.kernel_core_pattern, 'encode'):
×
1506
            self.kernel_core_pattern = self.kernel_core_pattern.decode('utf-8')
×
1507
        core_pattern = self.kernel_core_pattern
×
1508
        corefile_path = pattern.sub(lambda m: replace[re.escape(m.group(0))], core_pattern)
×
1509

1510
        if self.kernel_core_uses_pid:
×
1511
            corefile_path += '.%i' % self.pid
×
1512

1513
        if os.pathsep not in corefile_path:
×
1514
            corefile_path = os.path.join(self.cwd, corefile_path)
×
1515

1516
        log.debug("Trying corefile_path: %r" % corefile_path)
×
1517

1518
        try:
×
1519
            self.read(corefile_path)
×
1520
            return corefile_path
×
1521
        except Exception as e:
×
1522
            log.debug("No dice: %s" % e)
×
1523

1524
    def qemu_corefile(self):
1✔
1525
        """qemu_corefile() -> str
1526

1527
        Retrieves the path to a QEMU core dump.
1528
        """
1529

1530
        # QEMU doesn't follow anybody else's rules
1531
        # https://github.com/qemu/qemu/blob/stable-2.6/linux-user/elfload.c#L2710-L2744
1532
        #
1533
        #     qemu_<basename-of-target-binary>_<date>-<time>_<pid>.core
1534
        #
1535
        # Note that we don't give any fucks about the date and time, since the PID
1536
        # should be unique enough that we can just glob.
1537
        corefile_name = 'qemu_{basename}_*_{pid}.core'
1✔
1538

1539
        # Format the name
1540
        corefile_name = corefile_name.format(basename=self.basename,
1✔
1541
                                             pid=self.pid)
1542

1543
        # Get the full path
1544
        corefile_path = os.path.join(self.cwd, corefile_name)
1✔
1545

1546
        log.debug("Trying corefile_path: %r" % corefile_path)
1✔
1547

1548
        # Glob all of them, return the *most recent* based on numeric sort order.
1549
        for corefile in sorted(glob.glob(corefile_path), reverse=True):
1✔
1550
            return corefile
1✔
1551

1552
    def apport_coredump(self):
1✔
1553
        """Find new-style apport coredump of executables not belonging
1554
        to a system package
1555
        """
1556
        # Now Ubuntu, which is the most silly distro of all, doesn't follow
1557
        # anybody else's rules either...
1558
        # ...and it uses apport FROM SOME OTHER REPO THAN THE DOCS SAY
1559
        # Hey, thanks for making our lives easier, Canonical :----)
1560
        # Seriously, why is Ubuntu even considered to be the default distro
1561
        # on GH Actions?
1562
        #
1563
        #     core.<_path_to_target_binary>.<uid>.<boot_id>.<pid>.<timestamp>
1564
        #
1565
        # Note that we don't give any fucks about the timestamp, since the PID
1566
        # should be unique enough that we can just glob.
1567

1568
        boot_id = read('/proc/sys/kernel/random/boot_id').strip().decode()
1✔
1569
        path = self.exe.replace('/', '_')
1✔
1570

1571
        # Format the name
1572
        corefile_name = 'core.{path}.{uid}.{boot_id}.{pid}.*'.format(
1✔
1573
            path=path,
1574
            uid=self.uid,
1575
            boot_id=boot_id,
1576
            pid=self.pid,
1577
        )
1578

1579
        # Get the full path
1580
        corefile_path = os.path.join('/var/lib/apport/coredump', corefile_name)
1✔
1581

1582
        log.debug("Trying corefile_path: %r" % corefile_path)
1✔
1583

1584
        # Glob all of them, return the *most recent* based on numeric sort order.
1585
        for corefile in sorted(glob.glob(corefile_path), reverse=True):
1!
1586
            return corefile
1✔
1587

1588
    def binfmt_lookup(self):
1✔
1589
        """Parses /proc/sys/fs/binfmt_misc to find the interpreter for a file"""
1590

1591
        binfmt_misc = '/proc/sys/fs/binfmt_misc'
1✔
1592

1593
        if not isinstance(self.process, process):
1!
1594
            log.debug("Not a process")
×
1595
            return ''
×
1596

1597
        if self.process._qemu:
1!
1598
            return self.process._qemu
×
1599

1600
        if not os.path.isdir(binfmt_misc):
1!
1601
            log.debug("No binfmt_misc dir")
×
1602
            return ''
×
1603

1604
        exe_data = bytearray(self.read(self.exe))
1✔
1605

1606
        for entry in os.listdir(binfmt_misc):
1✔
1607
            keys = {}
1✔
1608

1609
            path = os.path.join(binfmt_misc, entry)
1✔
1610

1611
            try:
1✔
1612
                data = self.read(path).decode()
1✔
1613
            except Exception:
1✔
1614
                continue
1✔
1615

1616
            for line in data.splitlines():
1✔
1617
                try:
1✔
1618
                    k,v = line.split(None)
1✔
1619
                except ValueError:
1✔
1620
                    continue
1✔
1621

1622
                keys[k] = v
1✔
1623

1624
            if 'magic' not in keys:
1✔
1625
                continue
1✔
1626

1627
            magic = bytearray(unhex(keys['magic']))
1✔
1628
            mask  = bytearray(b'\xff' * len(magic))
1✔
1629

1630
            if 'mask' in keys:
1✔
1631
                mask = bytearray(unhex(keys['mask']))
1✔
1632

1633
            for i, mag in enumerate(magic):
1✔
1634
                if exe_data[i] & mask[i] != mag:
1✔
1635
                    break
1✔
1636
            else:
1637
                return keys['interpreter']
1✔
1638

1639
        return ''
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc