• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Gallopsled / pwntools / a9bfe94a5c5087bad5a1d28eddc3071dea90ca3d

pending completion
a9bfe94a5c5087bad5a1d28eddc3071dea90ca3d

push

github-actions

GitHub
Merge branch 'dev' into rop_raw_list

3886 of 6368 branches covered (61.02%)

102 of 102 new or added lines in 15 files covered. (100.0%)

12212 of 16604 relevant lines covered (73.55%)

0.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

72.34
/pwnlib/elf/corefile.py
1
# -*- coding: utf-8 -*-
2
"""Read information from Core Dumps.
1✔
3

4
Core dumps are extremely useful when writing exploits, even outside of
5
the normal act of debugging things.
6

7
Using Corefiles to Automate Exploitation
8
----------------------------------------
9

10
For example, if you have a trivial buffer overflow and don't want to
11
open up a debugger or calculate offsets, you can use a generated core
12
dump to extract the relevant information.
13

14
.. code-block:: c
15

16
    #include <string.h>
17
    #include <stdlib.h>
18
    #include <unistd.h>
19
    void win() {
20
        system("sh");
21
    }
22
    int main(int argc, char** argv) {
23
        char buffer[64];
24
        strcpy(buffer, argv[1]);
25
    }
26

27
.. code-block:: shell
28

29
    $ gcc crash.c -m32 -o crash -fno-stack-protector
30

31
.. code-block:: python
32

33
    from pwn import *
34

35
    # Generate a cyclic pattern so that we can auto-find the offset
36
    payload = cyclic(128)
37

38
    # Run the process once so that it crashes
39
    process(['./crash', payload]).wait()
40

41
    # Get the core dump
42
    core = Coredump('./core')
43

44
    # Our cyclic pattern should have been used as the crashing address
45
    assert pack(core.eip) in payload
46

47
    # Cool! Now let's just replace that value with the address of 'win'
48
    crash = ELF('./crash')
49
    payload = fit({
50
        cyclic_find(core.eip): crash.symbols.win
51
    })
52

53
    # Get a shell!
54
    io = process(['./crash', payload])
55
    io.sendline(b'id')
56
    print(io.recvline())
57
    # uid=1000(user) gid=1000(user) groups=1000(user)
58

59
Module Members
60
----------------------------------------
61

62
"""
63
from __future__ import absolute_import
1✔
64
from __future__ import division
1✔
65

66
import collections
1✔
67
import ctypes
1✔
68
import glob
1✔
69
import gzip
1✔
70
import re
1✔
71
import os
1✔
72
import socket
1✔
73
import subprocess
1✔
74
import tempfile
1✔
75

76
from io import BytesIO, StringIO
1✔
77

78
import elftools
1✔
79
from elftools.common.py3compat import bytes2str
1✔
80
from elftools.common.utils import roundup
1✔
81
from elftools.common.utils import struct_parse
1✔
82
from elftools.construct import CString
1✔
83

84
from pwnlib import atexit
1✔
85
from pwnlib.context import context
1✔
86
from pwnlib.elf.datatypes import *
1✔
87
from pwnlib.elf.elf import ELF
1✔
88
from pwnlib.log import getLogger
1✔
89
from pwnlib.tubes.process import process
1✔
90
from pwnlib.tubes.ssh import ssh_channel
1✔
91
from pwnlib.tubes.tube import tube
1✔
92
from pwnlib.util.fiddling import b64d
1✔
93
from pwnlib.util.fiddling import enhex
1✔
94
from pwnlib.util.fiddling import unhex
1✔
95
from pwnlib.util.misc import read
1✔
96
from pwnlib.util.misc import write
1✔
97
from pwnlib.util.packing import pack
1✔
98
from pwnlib.util.packing import unpack_many
1✔
99

100
log = getLogger(__name__)
1✔
101

102
prstatus_types = {
1✔
103
    'i386': elf_prstatus_i386,
104
    'amd64': elf_prstatus_amd64,
105
    'arm': elf_prstatus_arm,
106
    'aarch64': elf_prstatus_aarch64
107
}
108

109
prpsinfo_types = {
1✔
110
    32: elf_prpsinfo_32,
111
    64: elf_prpsinfo_64,
112
}
113

114
siginfo_types = {
1✔
115
    32: elf_siginfo_32,
116
    64: elf_siginfo_64
117
}
118

119
# Slightly modified copy of the pyelftools version of the same function,
120
# until they fix this issue:
121
# https://github.com/eliben/pyelftools/issues/93
122
def iter_notes(self):
1✔
123
    """ Iterates the list of notes in the segment.
124
    """
125
    offset = self['p_offset']
1✔
126
    end = self['p_offset'] + self['p_filesz']
1✔
127
    while offset < end:
1✔
128
        note = struct_parse(
1✔
129
            self.elffile.structs.Elf_Nhdr,
130
            self.stream,
131
            stream_pos=offset)
132
        note['n_offset'] = offset
1✔
133
        offset += self.elffile.structs.Elf_Nhdr.sizeof()
1✔
134
        self.stream.seek(offset)
1✔
135
        # n_namesz is 4-byte aligned.
136
        disk_namesz = roundup(note['n_namesz'], 2)
1✔
137
        note['n_name'] = bytes2str(
1✔
138
            CString('').parse(self.stream.read(disk_namesz)))
139
        offset += disk_namesz
1✔
140

141
        desc_data = bytes2str(self.stream.read(note['n_descsz']))
1✔
142
        note['n_desc'] = desc_data
1✔
143
        offset += roundup(note['n_descsz'], 2)
1✔
144
        note['n_size'] = offset - note['n_offset']
1✔
145
        yield note
1✔
146

147
class Mapping(object):
1✔
148
    """Encapsulates information about a memory mapping in a :class:`Corefile`.
149
    """
150
    def __init__(self, core, name, start, stop, flags, page_offset):
1✔
151
        self._core=core
1✔
152

153
        #: :class:`str`: Name of the mapping, e.g. ``'/bin/bash'`` or ``'[vdso]'``.
154
        self.name = name or ''
1✔
155

156
        #: :class:`int`: First mapped byte in the mapping
157
        self.start = start
1✔
158

159
        #: :class:`int`: First byte after the end of hte mapping
160
        self.stop = stop
1✔
161

162
        #: :class:`int`: Size of the mapping, in bytes
163
        self.size = stop-start
1✔
164

165
        #: :class:`int`: Offset in pages in the mapped file
166
        self.page_offset = page_offset or 0
1✔
167

168
        #: :class:`int`: Mapping flags, using e.g. ``PROT_READ`` and so on.
169
        self.flags = flags
1✔
170

171
    @property
1✔
172
    def path(self):
173
        """:class:`str`: Alias for :attr:`.Mapping.name`"""
174
        return self.name
×
175

176
    @property
1✔
177
    def address(self):
178
        """:class:`int`: Alias for :data:`Mapping.start`."""
179
        return self.start
1✔
180

181
    @property
1✔
182
    def permstr(self):
183
        """:class:`str`: Human-readable memory permission string, e.g. ``r-xp``."""
184
        flags = self.flags
×
185
        return ''.join(['r' if flags & 4 else '-',
×
186
                        'w' if flags & 2 else '-',
187
                        'x' if flags & 1 else '-',
188
                        'p'])
189
    def __str__(self):
1✔
190
        return '%x-%x %s %x %s' % (self.start,self.stop,self.permstr,self.size,self.name)
×
191

192
    def __repr__(self):
1✔
193
        return '%s(%r, start=%#x, stop=%#x, size=%#x, flags=%#x, page_offset=%#x)' \
1✔
194
            % (self.__class__.__name__,
195
               self.name,
196
               self.start,
197
               self.stop,
198
               self.size,
199
               self.flags,
200
               self.page_offset)
201

202
    def __int__(self):
1✔
203
        return self.start
×
204

205
    @property
1✔
206
    def data(self):
207
        """:class:`str`: Memory of the mapping."""
208
        return self._core.read(self.start, self.size)
1✔
209

210
    def __getitem__(self, item):
1✔
211
        if isinstance(item, slice):
1✔
212
            start = int(item.start or self.start)
1✔
213
            stop  = int(item.stop or self.stop)
1✔
214

215
            # Negative slices...
216
            if start < 0:
1!
217
                start += self.stop
×
218
            if stop < 0:
1!
219
                stop += self.stop
×
220

221
            if not (self.start <= start <= stop <= self.stop):
1!
222
                log.error("Byte range [%#x:%#x] not within range [%#x:%#x]",
×
223
                          start, stop, self.start, self.stop)
224

225
            data = self._core.read(start, stop-start)
1✔
226

227
            if item.step == 1:
1!
228
                return data
×
229
            return data[::item.step]
1✔
230

231
        return self._core.read(item, 1)
1✔
232

233
    def __contains__(self, item):
1✔
234
        if isinstance(item, Mapping):
1!
235
            return (self.start <= item.start) and (item.stop <= self.stop)
×
236
        return self.start <= item < self.stop
1✔
237

238
    def find(self, sub, start=None, end=None):
1✔
239
        """Similar to str.find() but works on our address space"""
240
        if start is None:
1!
241
            start = self.start
1✔
242
        if end is None:
1!
243
            end = self.stop
×
244

245
        result = self.data.find(sub, start-self.address, end-self.address)
1✔
246

247
        if result == -1:
1!
248
            return result
×
249

250
        return result + self.address
1✔
251

252
    def rfind(self, sub, start=None, end=None):
1✔
253
        """Similar to str.rfind() but works on our address space"""
254
        if start is None:
1!
255
            start = self.start
1✔
256
        if end is None:
1!
257
            end = self.stop
×
258

259
        result = self.data.rfind(sub, start-self.address, end-self.address)
1✔
260

261
        if result == -1:
1!
262
            return result
×
263

264
        return result + self.address
1✔
265

266
class Corefile(ELF):
1✔
267
    r"""Enhances the information available about a corefile (which is an extension
268
    of the ELF format) by permitting extraction of information about the mapped
269
    data segments, and register state.
270

271
    Registers can be accessed directly, e.g. via ``core_obj.eax`` and enumerated
272
    via :data:`Corefile.registers`.
273

274
    Memory can be accessed directly via :meth:`.read` or :meth:`.write`, and also
275
    via :meth:`.pack` or :meth:`.unpack` or even :meth:`.string`.
276

277
    Arguments:
278
        core: Path to the core file.  Alternately, may be a :class:`.process` instance,
279
              and the core file will be located automatically.
280

281
    ::
282

283
        >>> c = Corefile('./core')
284
        >>> hex(c.eax)
285
        '0xfff5f2e0'
286
        >>> c.registers
287
        {'eax': 4294308576,
288
         'ebp': 1633771891,
289
         'ebx': 4151132160,
290
         'ecx': 4294311760,
291
         'edi': 0,
292
         'edx': 4294308700,
293
         'eflags': 66050,
294
         'eip': 1633771892,
295
         'esi': 0,
296
         'esp': 4294308656,
297
         'orig_eax': 4294967295,
298
         'xcs': 35,
299
         'xds': 43,
300
         'xes': 43,
301
         'xfs': 0,
302
         'xgs': 99,
303
         'xss': 43}
304

305
    Mappings can be iterated in order via :attr:`Corefile.mappings`.
306

307
    ::
308

309
        >>> Corefile('./core').mappings
310
        [Mapping('/home/user/pwntools/crash', start=0x8048000, stop=0x8049000, size=0x1000, flags=0x5, page_offset=0x0),
311
         Mapping('/home/user/pwntools/crash', start=0x8049000, stop=0x804a000, size=0x1000, flags=0x4, page_offset=0x1),
312
         Mapping('/home/user/pwntools/crash', start=0x804a000, stop=0x804b000, size=0x1000, flags=0x6, page_offset=0x2),
313
         Mapping(None, start=0xf7528000, stop=0xf7529000, size=0x1000, flags=0x6, page_offset=0x0),
314
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf7529000, stop=0xf76d1000, size=0x1a8000, flags=0x5, page_offset=0x0),
315
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf76d1000, stop=0xf76d2000, size=0x1000, flags=0x0, page_offset=0x1a8),
316
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf76d2000, stop=0xf76d4000, size=0x2000, flags=0x4, page_offset=0x1a9),
317
         Mapping('/lib/i386-linux-gnu/libc-2.19.so', start=0xf76d4000, stop=0xf76d5000, size=0x1000, flags=0x6, page_offset=0x1aa),
318
         Mapping(None, start=0xf76d5000, stop=0xf76d8000, size=0x3000, flags=0x6, page_offset=0x0),
319
         Mapping(None, start=0xf76ef000, stop=0xf76f1000, size=0x2000, flags=0x6, page_offset=0x0),
320
         Mapping('[vdso]', start=0xf76f1000, stop=0xf76f2000, size=0x1000, flags=0x5, page_offset=0x0),
321
         Mapping('/lib/i386-linux-gnu/ld-2.19.so', start=0xf76f2000, stop=0xf7712000, size=0x20000, flags=0x5, page_offset=0x0),
322
         Mapping('/lib/i386-linux-gnu/ld-2.19.so', start=0xf7712000, stop=0xf7713000, size=0x1000, flags=0x4, page_offset=0x20),
323
         Mapping('/lib/i386-linux-gnu/ld-2.19.so', start=0xf7713000, stop=0xf7714000, size=0x1000, flags=0x6, page_offset=0x21),
324
         Mapping('[stack]', start=0xfff3e000, stop=0xfff61000, size=0x23000, flags=0x6, page_offset=0x0)]
325

326
    Examples:
327

328
        Let's build an example binary which should eat ``R0=0xdeadbeef``
329
        and ``PC=0xcafebabe``.
330

331
        If we run the binary and then wait for it to exit, we can get its
332
        core file.
333

334
        >>> context.clear(arch='arm')
335
        >>> shellcode = shellcraft.mov('r0', 0xdeadbeef)
336
        >>> shellcode += shellcraft.mov('r1', 0xcafebabe)
337
        >>> shellcode += 'bx r1'
338
        >>> address = 0x41410000
339
        >>> elf = ELF.from_assembly(shellcode, vma=address)
340
        >>> io = elf.process(env={'HELLO': 'WORLD'})
341
        >>> io.poll(block=True)
342
        -11
343

344
        You can specify a full path a la ``Corefile('/path/to/core')``,
345
        but you can also just access the :attr:`.process.corefile` attribute.
346

347
        There's a lot of behind-the-scenes logic to locate the corefile for
348
        a given process, but it's all handled transparently by Pwntools.
349

350
        >>> core = io.corefile
351

352
        The core file has a :attr:`exe` property, which is a :class:`.Mapping`
353
        object.  Each mapping can be accessed with virtual addresses via subscript, or
354
        contents can be examined via the :attr:`.Mapping.data` attribute.
355

356
        >>> core.exe # doctest: +ELLIPSIS
357
        Mapping('/.../step3', start=..., stop=..., size=0x1000, flags=0x..., page_offset=...)
358
        >>> hex(core.exe.address)
359
        '0x41410000'
360

361
        The core file also has registers which can be accessed direclty.
362
        Pseudo-registers :attr:`pc` and :attr:`sp` are available on all architectures,
363
        to make writing architecture-agnostic code more simple.
364
        If this were an amd64 corefile, we could access e.g. ``core.rax``.
365

366
        >>> core.pc == 0xcafebabe
367
        True
368
        >>> core.r0 == 0xdeadbeef
369
        True
370
        >>> core.sp == core.r13
371
        True
372

373
        We may not always know which signal caused the core dump, or what address
374
        caused a segmentation fault.  Instead of accessing registers directly, we
375
        can also extract this information from the core dump via :attr:`fault_addr`
376
        and :attr:`signal`.
377

378
        On QEMU-generated core dumps, this information is unavailable, so we
379
        substitute the value of PC.  In our example, that's correct anyway.
380

381
        >>> core.fault_addr == 0xcafebabe
382
        True
383
        >>> core.signal
384
        11
385

386
        Core files can also be generated from running processes.
387
        This requires GDB to be installed, and can only be done with native processes.
388
        Getting a "complete" corefile requires GDB 7.11 or better.
389

390
        >>> elf = ELF(which('bash-static'))
391
        >>> context.clear(binary=elf)
392
        >>> env = dict(os.environ)
393
        >>> env['HELLO'] = 'WORLD'
394
        >>> io = process(elf.path, env=env)
395
        >>> io.sendline(b'echo hello')
396
        >>> io.recvline()
397
        b'hello\n'
398

399
        The process is still running, but accessing its :attr:`.process.corefile` property
400
        automatically invokes GDB to attach and dump a corefile.
401

402
        >>> core = io.corefile
403
        >>> io.close()
404

405
        The corefile can be inspected and read from, and even exposes various mappings
406

407
        >>> core.exe # doctest: +ELLIPSIS
408
        Mapping('.../bin/bash-static', start=..., stop=..., size=..., flags=..., page_offset=...)
409
        >>> core.exe.data[0:4]
410
        b'\x7fELF'
411

412
        It also supports all of the features of :class:`ELF`, so you can :meth:`.read`
413
        or :meth:`.write` or even the helpers like :meth:`.pack` or :meth:`.unpack`.
414

415
        Don't forget to call :meth:`.ELF.save` to save the changes to disk.
416

417
        >>> core.read(elf.address, 4)
418
        b'\x7fELF'
419
        >>> core.pack(core.sp, 0xdeadbeef)
420
        >>> core.save()
421

422
        Let's re-load it as a new :attr:`Corefile` object and have a look!
423

424
        >>> core2 = Corefile(core.path)
425
        >>> hex(core2.unpack(core2.sp))
426
        '0xdeadbeef'
427

428
        Various other mappings are available by name, for the first segment of:
429

430
        * :attr:`.exe` the executable
431
        * :attr:`.libc` the loaded libc, if any
432
        * :attr:`.stack` the stack mapping
433
        * :attr:`.vvar`
434
        * :attr:`.vdso`
435
        * :attr:`.vsyscall`
436

437
        On Linux, 32-bit Intel binaries should have a VDSO section via :attr:`vdso`.  
438
        Since our ELF is statically linked, there is no libc which gets mapped.
439

440
        >>> core.vdso.data[:4]
441
        b'\x7fELF'
442
        >>> core.libc
443

444
        But if we dump a corefile from a dynamically-linked binary, the :attr:`.libc`
445
        will be loaded.
446

447
        >>> process('bash').corefile.libc # doctest: +ELLIPSIS
448
        Mapping('.../libc...so...', start=0x..., stop=0x..., size=0x..., flags=..., page_offset=...)
449

450
        The corefile also contains a :attr:`.stack` property, which gives
451
        us direct access to the stack contents.  On Linux, the very top of the stack
452
        should contain two pointer-widths of NULL bytes, preceded by the NULL-
453
        terminated path to the executable (as passed via the first arg to ``execve``).
454

455
        >>> core.stack # doctest: +ELLIPSIS
456
        Mapping('[stack]', start=0x..., stop=0x..., size=0x..., flags=0x6, page_offset=0x0)
457

458
        When creating a process, the kernel puts the absolute path of the binary and some
459
        padding bytes at the end of the stack.  We can look at those by looking at 
460
        ``core.stack.data``.
461

462
        >>> size = len('/bin/bash-static') + 8
463
        >>> core.stack.data[-size:]
464
        b'bin/bash-static\x00\x00\x00\x00\x00\x00\x00\x00\x00'
465

466
        We can also directly access the environment variables and arguments, via
467
        :attr:`.argc`, :attr:`.argv`, and :attr:`.env`.
468

469
        >>> 'HELLO' in core.env
470
        True
471
        >>> core.string(core.env['HELLO'])
472
        b'WORLD'
473
        >>> core.getenv('HELLO')
474
        b'WORLD'
475
        >>> core.argc
476
        1
477
        >>> core.argv[0] in core.stack
478
        True
479
        >>> core.string(core.argv[0]) # doctest: +ELLIPSIS
480
        b'.../bin/bash-static'
481

482
        Corefiles can also be pulled from remote machines via SSH!
483

484
        >>> s = ssh(user='travis', host='example.pwnme', password='demopass')
485
        >>> _ = s.set_working_directory()
486
        >>> elf = ELF.from_assembly(shellcraft.trap())
487
        >>> path = s.upload(elf.path)
488
        >>> _ =s.chmod('+x', path)
489
        >>> io = s.process(path)
490
        >>> io.wait(1)
491
        -1
492
        >>> io.corefile.signal == signal.SIGTRAP # doctest: +SKIP
493
        True
494

495
        Make sure fault_addr synthesis works for amd64 on ret.
496

497
        >>> context.clear(arch='amd64')
498
        >>> elf = ELF.from_assembly('push 1234; ret')
499
        >>> io = elf.process()
500
        >>> io.wait(1)
501
        >>> io.corefile.fault_addr
502
        1234
503

504
        Corefile.getenv() works correctly, even if the environment variable's
505
        value contains embedded '='. Corefile is able to find the stack, even
506
        if the stack pointer doesn't point at the stack.
507

508
        >>> elf = ELF.from_assembly(shellcraft.crash())
509
        >>> io = elf.process(env={'FOO': 'BAR=BAZ'})
510
        >>> io.wait(1)
511
        >>> core = io.corefile
512
        >>> core.getenv('FOO')
513
        b'BAR=BAZ'
514
        >>> core.sp == 0
515
        True
516
        >>> core.sp in core.stack
517
        False
518

519
        Corefile gracefully handles the stack being filled with garbage, including
520
        argc / argv / envp being overwritten.
521

522
        >>> context.clear(arch='i386')
523
        >>> assembly = '''
524
        ... LOOP:
525
        ...   mov dword ptr [esp], 0x41414141
526
        ...   pop eax
527
        ...   jmp LOOP
528
        ... '''
529
        >>> elf = ELF.from_assembly(assembly)
530
        >>> io = elf.process()
531
        >>> io.wait(2)
532
        >>> core = io.corefile
533
        [!] End of the stack is corrupted, skipping stack parsing (got: 41414141)
534
        >>> core.argc, core.argv, core.env
535
        (0, [], {})
536
        >>> core.stack.data.endswith(b'AAAA')
537
        True
538
        >>> core.fault_addr == core.sp
539
        True
540
    """
541

542
    _fill_gaps = False
1✔
543

544
    def __init__(self, *a, **kw):
1✔
545
        #: The NT_PRSTATUS object.
546
        self.prstatus = None
1✔
547

548
        #: The NT_PRPSINFO object
549
        self.prpsinfo = None
1✔
550

551
        #: The NT_SIGINFO object
552
        self.siginfo = None
1✔
553

554
        #: :class:`list`: A list of :class:`.Mapping` objects for each loaded memory region
555
        self.mappings = []
1✔
556

557
        #: :class:`int`: A :class:`Mapping` corresponding to the stack
558
        self.stack    = None
1✔
559

560
        """
561
        Environment variables read from the stack.
562
        Keys are the environment variable name, values are the memory 
563
        address of the variable.
564
        
565
        Use :meth:`.getenv` or :meth:`.string` to retrieve the textual value.
566
        
567
        Note: If ``FOO=BAR`` is in the environment, ``self.env['FOO']`` is the address of the string ``"BAR\x00"``.
568
        """
569
        self.env = {}
1✔
570

571
        #: :class:`int`: Pointer to envp on the stack
572
        self.envp_address = 0
1✔
573

574
        #: :class:`list`: List of addresses of arguments on the stack.
575
        self.argv = []
1✔
576

577
        #: :class:`int`: Pointer to argv on the stack
578
        self.argv_address = 0
1✔
579

580
        #: :class:`int`: Number of arguments passed
581
        self.argc = 0
1✔
582

583
        #: :class:`int`: Pointer to argc on the stack
584
        self.argc_address = 0
1✔
585

586
        # Pointer to the executable filename on the stack
587
        self.at_execfn = 0
1✔
588

589
        # Pointer to the entry point
590
        self.at_entry = 0
1✔
591

592
        try:
1✔
593
            super(Corefile, self).__init__(*a, **kw)
1✔
594
        except IOError:
×
595
            log.warning("No corefile.  Have you set /proc/sys/kernel/core_pattern?")
×
596
            raise
×
597

598
        self.load_addr = 0
1✔
599
        self._address  = 0
1✔
600

601
        if self.elftype != 'CORE':
1!
602
            log.error("%s is not a valid corefile" % self.file.name)
×
603

604
        if self.arch not in prstatus_types:
1!
605
            log.warn_once("%s does not use a supported corefile architecture, registers are unavailable" % self.file.name)
×
606

607
        prstatus_type = prstatus_types.get(self.arch)
1✔
608
        prpsinfo_type = prpsinfo_types.get(self.bits)
1✔
609
        siginfo_type = siginfo_types.get(self.bits)
1✔
610

611
        with log.waitfor("Parsing corefile...") as w:
1✔
612
            self._load_mappings()
1✔
613

614
            for segment in self.segments:
1✔
615
                if not isinstance(segment, elftools.elf.segments.NoteSegment):
1✔
616
                    continue
1✔
617

618

619
                # Note that older versions of pyelftools (<=0.24) are missing enum values
620
                # for NT_PRSTATUS, NT_PRPSINFO, NT_AUXV, etc.
621
                # For this reason, we have to check if note.n_type is any of several values.
622
                for note in iter_notes(segment):
1✔
623
                    if not isinstance(note.n_desc, bytes):
1!
624
                        note['n_desc'] = note.n_desc.encode('latin1')
×
625
                    # Try to find NT_PRSTATUS.
626
                    if prstatus_type and \
1✔
627
                       note.n_descsz == ctypes.sizeof(prstatus_type) and \
628
                       note.n_type in ('NT_GNU_ABI_TAG', 'NT_PRSTATUS'):
629
                        self.NT_PRSTATUS = note
1✔
630
                        self.prstatus = prstatus_type.from_buffer_copy(note.n_desc)
1✔
631

632
                    # Try to find NT_PRPSINFO
633
                    if prpsinfo_type and \
1✔
634
                       note.n_descsz == ctypes.sizeof(prpsinfo_type) and \
635
                       note.n_type in ('NT_GNU_ABI_TAG', 'NT_PRPSINFO'):
636
                        self.NT_PRPSINFO = note
1✔
637
                        self.prpsinfo = prpsinfo_type.from_buffer_copy(note.n_desc)
1✔
638

639
                    # Try to find NT_SIGINFO so we can see the fault
640
                    if note.n_type in (0x53494749, 'NT_SIGINFO'):
1✔
641
                        self.NT_SIGINFO = note
1✔
642
                        self.siginfo = siginfo_type.from_buffer_copy(note.n_desc)
1✔
643

644
                    # Try to find the list of mapped files
645
                    if note.n_type in (constants.NT_FILE, 'NT_FILE'):
1✔
646
                        with context.local(bytes=self.bytes):
1✔
647
                            self._parse_nt_file(note)
1✔
648

649
                    # Try to find the auxiliary vector, which will tell us
650
                    # where the top of the stack is.
651
                    if note.n_type in (constants.NT_AUXV, 'NT_AUXV'):
1✔
652
                        self.NT_AUXV = note
1✔
653
                        with context.local(bytes=self.bytes):
1✔
654
                            self._parse_auxv(note)
1✔
655

656
            if not self.stack and self.mappings:
1!
657
                self.stack = self.mappings[-1].stop
×
658

659
            if self.stack and self.mappings:
1!
660
                for mapping in self.mappings:
1!
661
                    if self.stack in mapping or self.stack == mapping.stop:
1✔
662
                        mapping.name = '[stack]'
1✔
663
                        self.stack   = mapping
1✔
664
                        break
1✔
665
                else:
666
                    log.warn('Could not find the stack!')
×
667
                    self.stack = None
×
668

669
            with context.local(bytes=self.bytes, log_level='warn'):
1✔
670
                try:
1✔
671
                    self._parse_stack()
1✔
672
                except ValueError:
×
673
                    # If there are no environment variables, we die by running
674
                    # off the end of the stack.
675
                    pass
×
676

677
            # Corefiles generated by QEMU do not have a name for the 
678
            # main module mapping.
679
            # Fetching self.exe will cause this to be auto-populated,
680
            # and is a no-op in other cases.
681
            self.exe
1✔
682

683
            # Print out the nice display for the user
684
            self._describe_core()
1✔
685

686
    def _parse_nt_file(self, note):
1✔
687
        t = tube()
1✔
688
        t.unrecv(note.n_desc)
1✔
689

690
        count = t.unpack()
1✔
691
        page_size = t.unpack()
1✔
692

693
        starts = []
1✔
694
        addresses = {}
1✔
695

696
        for i in range(count):
1✔
697
            start = t.unpack()
1✔
698
            end = t.unpack()
1✔
699
            offset = t.unpack()
1✔
700
            starts.append((start, offset))
1✔
701

702
        for i in range(count):
1✔
703
            filename = t.recvuntil(b'\x00', drop=True)
1✔
704
            if not isinstance(filename, str):
1!
705
                filename = filename.decode('utf-8')
×
706
            (start, offset) = starts[i]
1✔
707

708
            for mapping in self.mappings:
1✔
709
                if mapping.start == start:
1✔
710
                    mapping.name = filename
1✔
711
                    mapping.page_offset = offset
1✔
712

713
        self.mappings = sorted(self.mappings, key=lambda m: m.start)
1✔
714

715
        vvar = vdso = vsyscall = False
1✔
716
        for mapping in reversed(self.mappings):
1✔
717
            if mapping.name:
1✔
718
                continue
1✔
719

720
            if not vsyscall and mapping.start == 0xffffffffff600000:
1✔
721
                mapping.name = '[vsyscall]'
1✔
722
                vsyscall = True
1✔
723
                continue
1✔
724

725
            if mapping.start == self.at_sysinfo_ehdr \
1✔
726
            or (not vdso and mapping.size in [0x1000, 0x2000]
727
                and mapping.flags == 5
728
                and self.read(mapping.start, 4) == b'\x7fELF'):
729
                mapping.name = '[vdso]'
1✔
730
                vdso = True
1✔
731
                continue
1✔
732

733
            if not vvar and mapping.size == 0x2000 and mapping.flags == 4:
1!
734
                mapping.name = '[vvar]'
×
735
                vvar = True
×
736
                continue
×
737

738
    @property
1✔
739
    def vvar(self):
740
        """:class:`Mapping`: Mapping for the vvar section"""
741
        for m in self.mappings:
×
742
            if m.name == '[vvar]':
×
743
                return m
×
744

745
    @property
1✔
746
    def vdso(self):
747
        """:class:`Mapping`: Mapping for the vdso section"""
748
        for m in self.mappings:
1!
749
            if m.name == '[vdso]':
1✔
750
                return m
1✔
751

752
    @property
1✔
753
    def vsyscall(self):
754
        """:class:`Mapping`: Mapping for the vsyscall section"""
755
        for m in self.mappings:
×
756
            if m.name == '[vsyscall]':
×
757
                return m
×
758

759
    @property
1✔
760
    def libc(self):
761
        """:class:`Mapping`: First mapping for ``libc.so``"""
762
        expr = r'^libc\b.*so(?:\.6)?$'
1✔
763

764
        for m in self.mappings:
1✔
765
            if not m.name:
1✔
766
                continue
1✔
767

768
            basename = os.path.basename(m.name)
1✔
769

770
            if re.match(expr, basename):
1✔
771
                return m
1✔
772

773
    @property
1✔
774
    def exe(self):
775
        """:class:`Mapping`: First mapping for the executable file."""
776

777
        # Finding the executable mapping requires knowing the entry point
778
        # from the auxv
779
        if not self.at_entry:
1!
780
            return None
×
781

782
        # The entry point may not be in the first segment of a given file,
783
        # but we want to find the first segment of the file -- not the segment that 
784
        # contains the entrypoint.
785
        first_segment_for_name = {}
1✔
786

787
        for m in self.mappings:
1✔
788
            first_segment_for_name.setdefault(m.name, m)
1✔
789

790
        # Find which segment conains the entry point
791
        for m in self.mappings:
1!
792
            if m.start <= self.at_entry < m.stop:
1✔
793

794
                if not m.name and self.at_execfn:
1✔
795
                    m.name = self.string(self.at_execfn)
1✔
796
                    if not isinstance(m.name, str):
1!
797
                        m.name = m.name.decode('utf-8')
×
798

799
                return first_segment_for_name.get(m.name, m)
1✔
800

801
    @property
1✔
802
    def pid(self):
803
        """:class:`int`: PID of the process which created the core dump."""
804
        if self.prstatus:
1!
805
            return int(self.prstatus.pr_pid)
1✔
806

807
    @property
1✔
808
    def ppid(self):
809
        """:class:`int`: Parent PID of the process which created the core dump."""
810
        if self.prstatus:
×
811
            return int(self.prstatus.pr_ppid)
×
812

813
    @property
1✔
814
    def signal(self):
815
        """:class:`int`: Signal which caused the core to be dumped.
816

817
        Example:
818

819
            >>> elf = ELF.from_assembly(shellcraft.trap())
820
            >>> io = elf.process()
821
            >>> io.wait(1)
822
            >>> io.corefile.signal == signal.SIGTRAP
823
            True
824

825
            >>> elf = ELF.from_assembly(shellcraft.crash())
826
            >>> io = elf.process()
827
            >>> io.wait(1)
828
            >>> io.corefile.signal == signal.SIGSEGV
829
            True
830
        """
831
        if self.siginfo:
1✔
832
            return int(self.siginfo.si_signo)
1✔
833
        if self.prstatus:
1!
834
            return int(self.prstatus.pr_cursig)
1✔
835

836
    @property
1✔
837
    def fault_addr(self):
838
        """:class:`int`: Address which generated the fault, for the signals
839
            SIGILL, SIGFPE, SIGSEGV, SIGBUS.  This is only available in native
840
            core dumps created by the kernel.  If the information is unavailable,
841
            this returns the address of the instruction pointer.
842

843

844
        Example:
845

846
            >>> elf = ELF.from_assembly('mov eax, 0xdeadbeef; jmp eax', arch='i386')
847
            >>> io = elf.process()
848
            >>> io.wait(1)
849
            >>> io.corefile.fault_addr == io.corefile.eax == 0xdeadbeef
850
            True
851
        """
852
        if not self.siginfo:
1✔
853
            return getattr(self, 'pc', 0)
1✔
854

855
        fault_addr = int(self.siginfo.sigfault_addr)
1✔
856

857
        # The fault_addr is zero if the crash occurs due to a
858
        # "protection fault", e.g. a dereference of 0x4141414141414141
859
        # because this is technically a kernel address.
860
        #
861
        # A protection fault does not set "fault_addr" in the siginfo.
862
        # (http://elixir.free-electrons.com/linux/v4.14-rc8/source/kernel/signal.c#L1052)
863
        #
864
        # Since a common use for corefiles is to spray the stack with a
865
        # cyclic pattern to find the offset to get control of $PC,
866
        # check for a "ret" instruction ("\xc3").
867
        #
868
        # If we find a RET at $PC, extract the "return address" from the
869
        # top of the stack.
870
        if fault_addr == 0 and self.siginfo.si_code == 0x80:
1✔
871
            try:
1✔
872
                code = self.read(self.pc, 1)
1✔
873
                RET = b'\xc3'
1✔
874
                if code == RET:
1!
875
                    fault_addr = self.unpack(self.sp)
×
876
            except Exception:
×
877
                # Could not read $rsp or $rip
878
                pass
×
879

880
        return fault_addr
1✔
881

882
        # No embedded siginfo structure, so just return the
883
        # current instruction pointer.
884

885
    @property
1✔
886
    def _pc_register(self):
887
        name = {
1✔
888
            'i386': 'eip',
889
            'amd64': 'rip',
890
        }.get(self.arch, 'pc')
891
        return name
1✔
892

893
    @property
1✔
894
    def pc(self):
895
        """:class:`int`: The program counter for the Corefile
896

897
        This is a cross-platform way to get e.g. ``core.eip``, ``core.rip``, etc.
898
        """
899
        return self.registers.get(self._pc_register, None)
1✔
900

901
    @property
1✔
902
    def _sp_register(self):
903
        name = {
1✔
904
            'i386': 'esp',
905
            'amd64': 'rsp',
906
        }.get(self.arch, 'sp')
907
        return name
1✔
908

909
    @property
1✔
910
    def sp(self):
911
        """:class:`int`: The stack pointer for the Corefile
912

913
        This is a cross-platform way to get e.g. ``core.esp``, ``core.rsp``, etc.
914
        """
915
        return self.registers.get(self._sp_register, None)
1✔
916

917
    def _describe(self):
1✔
918
        pass
1✔
919

920
    def _describe_core(self):
1✔
921
        gnu_triplet = '-'.join(map(str, (self.arch, self.bits, self.endian)))
1✔
922

923
        fields = [
1✔
924
            repr(self.path),
925
            '%-10s %s' % ('Arch:', gnu_triplet),
926
            '%-10s %#x' % ('%s:' % self._pc_register.upper(), self.pc or 0),
927
            '%-10s %#x' % ('%s:' % self._sp_register.upper(), self.sp or 0),
928
        ]
929

930
        if self.exe and self.exe.name:
1!
931
            fields += [
1✔
932
                '%-10s %s' % ('Exe:', '%r (%#x)' % (self.exe.name, self.exe.address))
933
            ]
934

935
        if self.fault_addr:
1✔
936
            fields += [
1✔
937
                '%-10s %#x' % ('Fault:', self.fault_addr)
938
            ]
939

940
        log.info_once('\n'.join(fields))
1✔
941

942
    def _load_mappings(self):
1✔
943
        for s in self.segments:
1✔
944
            if s.header.p_type != 'PT_LOAD':
1✔
945
                continue
1✔
946

947
            mapping = Mapping(self,
1✔
948
                              None,
949
                              s.header.p_vaddr,
950
                              s.header.p_vaddr + s.header.p_memsz,
951
                              s.header.p_flags,
952
                              None)
953
            self.mappings.append(mapping)
1✔
954

955
    def _parse_auxv(self, note):
1✔
956
        t = tube()
1✔
957
        t.unrecv(note.n_desc)
1✔
958

959
        for i in range(0, note.n_descsz, context.bytes * 2):
1✔
960
            key = t.unpack()
1✔
961
            value = t.unpack()
1✔
962

963
            # The AT_EXECFN entry is a pointer to the executable's filename
964
            # at the very top of the stack, followed by a word's with of
965
            # NULL bytes.  For example, on a 64-bit system...
966
            #
967
            # 0x7fffffffefe8  53 3d 31 34  33 00 2f 62  69 6e 2f 62  61 73 68 00  |S=14|3./b|in/b|ash.|
968
            # 0x7fffffffeff8  00 00 00 00  00 00 00 00                            |....|....|    |    |
969

970
            if key == constants.AT_EXECFN:
1✔
971
                self.at_execfn = value
1✔
972
                value = value & ~0xfff
1✔
973
                value += 0x1000
1✔
974
                self.stack = value
1✔
975

976
            if key == constants.AT_ENTRY:
1✔
977
                self.at_entry = value
1✔
978

979
            if key == constants.AT_PHDR:
1✔
980
                self.at_phdr = value
1✔
981

982
            if key == constants.AT_BASE:
1✔
983
                self.at_base = value
1✔
984

985
            if key == constants.AT_SYSINFO_EHDR:
1✔
986
                self.at_sysinfo_ehdr = value
1✔
987

988
    def _parse_stack(self):
1✔
989
        # Get a copy of the stack mapping
990
        stack = self.stack
1✔
991

992
        if not stack:
1!
993
            return
×
994

995
        # If the stack does not end with zeroes, something is very wrong.
996
        if not stack.data.endswith(b'\x00' * context.bytes):
1✔
997
            log.warn_once("End of the stack is corrupted, skipping stack parsing (got: %s)",
1✔
998
                          enhex(self.data[-context.bytes:]))
999
            return
1✔
1000

1001
        # AT_EXECFN is the start of the filename, e.g. '/bin/sh'
1002
        # Immediately preceding is a NULL-terminated environment variable string.
1003
        # We want to find the beginning of it
1004
        if not self.at_execfn:
1!
1005
            address = stack.stop
×
1006
            address -= 2*self.bytes
×
1007
            address -= 1
×
1008
            address = stack.rfind(b'\x00', None, address)
×
1009
            address += 1
×
1010
            self.at_execfn = address
×
1011

1012
        address = self.at_execfn-1
1✔
1013

1014

1015
        # Sanity check!
1016
        try:
1✔
1017
            if stack[address] != b'\x00':
1!
1018
                log.warning("Error parsing corefile stack: Could not find end of environment")
×
1019
                return
×
1020
        except ValueError:
×
1021
            log.warning("Error parsing corefile stack: Address out of bounds")
×
1022
            return
×
1023

1024
        # address is currently set to the NULL terminator of the last
1025
        # environment variable.
1026
        address = stack.rfind(b'\x00', None, address)
1✔
1027

1028
        # We've found the beginning of the last environment variable.
1029
        # We should be able to search up the stack for the envp[] array to
1030
        # find a pointer to this address, followed by a NULL.
1031
        last_env_addr = address + 1
1✔
1032
        p_last_env_addr = stack.find(pack(last_env_addr), None, last_env_addr)
1✔
1033
        if p_last_env_addr < 0:
1!
1034
            # Something weird is happening.  Just don't touch it.
1035
            log.warn_once("Error parsing corefile stack: Found bad environment at %#x", last_env_addr)
×
1036
            return
×
1037

1038
        # Sanity check that we did correctly find the envp NULL terminator.
1039
        envp_nullterm = p_last_env_addr+context.bytes
1✔
1040
        if self.unpack(envp_nullterm) != 0:
1!
1041
            log.warning("Error parsing corefile stack: Could not find end of environment variables")
×
1042
            return
×
1043

1044
        # We've successfully located the end of the envp[] array.
1045
        #
1046
        # It comes immediately after the argv[] array, which itself
1047
        # is NULL-terminated.
1048
        #
1049
        # Now let's find the end of argv
1050
        p_end_of_argv = stack.rfind(pack(0), None, p_last_env_addr)
1✔
1051

1052
        self.envp_address = p_end_of_argv + self.bytes
1✔
1053

1054
        # Now we can fill in the environment
1055
        env_pointer_data = stack[self.envp_address:p_last_env_addr+self.bytes]
1✔
1056
        for pointer in unpack_many(env_pointer_data):
1✔
1057

1058
            # If the stack is corrupted, the pointer will be outside of
1059
            # the stack.
1060
            if pointer not in stack:
1!
1061
                continue
×
1062

1063
            try:
1✔
1064
                name_value = self.string(pointer)
1✔
1065
            except Exception:
×
1066
                continue
×
1067

1068
            name, _ = name_value.split(b'=', 1)
1✔
1069

1070
            # "end" points at the byte after the null terminator
1071
            end = pointer + len(name_value) + 1
1✔
1072

1073
            # Do not mark things as environment variables if they point
1074
            # outside of the stack itself, or we had to cross into a different
1075
            # mapping (after the stack) to read it.
1076
            # This may occur when the entire stack is filled with non-NUL bytes,
1077
            # and we NULL-terminate on a read failure in .string().
1078
            if end not in stack:
1!
1079
                continue
×
1080

1081
            if not isinstance(name, str):
1!
1082
                name = name.decode('utf-8', 'surrogateescape')
×
1083
            self.env[name] = pointer + len(name) + len('=')
1✔
1084

1085
        # May as well grab the arguments off the stack as well.
1086
        # argc comes immediately before argv[0] on the stack, but
1087
        # we don't know what argc is.
1088
        #
1089
        # It is unlikely that argc is a valid stack address.
1090
        address = p_end_of_argv - self.bytes
1✔
1091
        while self.unpack(address) in stack:
1✔
1092
            address -= self.bytes
1✔
1093

1094
        # address now points at argc
1095
        self.argc_address = address
1✔
1096
        self.argc = self.unpack(self.argc_address)
1✔
1097

1098
        # we can extract all of the arguments as well
1099
        self.argv_address = self.argc_address + self.bytes
1✔
1100
        self.argv = unpack_many(stack[self.argv_address: p_end_of_argv])
1✔
1101

1102
    @property
1✔
1103
    def maps(self):
1104
        """:class:`str`: A printable string which is similar to /proc/xx/maps.
1105

1106
        ::
1107

1108
            >>> print(Corefile('./core').maps)
1109
            8048000-8049000 r-xp 1000 /home/user/pwntools/crash
1110
            8049000-804a000 r--p 1000 /home/user/pwntools/crash
1111
            804a000-804b000 rw-p 1000 /home/user/pwntools/crash
1112
            f7528000-f7529000 rw-p 1000 None
1113
            f7529000-f76d1000 r-xp 1a8000 /lib/i386-linux-gnu/libc-2.19.so
1114
            f76d1000-f76d2000 ---p 1000 /lib/i386-linux-gnu/libc-2.19.so
1115
            f76d2000-f76d4000 r--p 2000 /lib/i386-linux-gnu/libc-2.19.so
1116
            f76d4000-f76d5000 rw-p 1000 /lib/i386-linux-gnu/libc-2.19.so
1117
            f76d5000-f76d8000 rw-p 3000 None
1118
            f76ef000-f76f1000 rw-p 2000 None
1119
            f76f1000-f76f2000 r-xp 1000 [vdso]
1120
            f76f2000-f7712000 r-xp 20000 /lib/i386-linux-gnu/ld-2.19.so
1121
            f7712000-f7713000 r--p 1000 /lib/i386-linux-gnu/ld-2.19.so
1122
            f7713000-f7714000 rw-p 1000 /lib/i386-linux-gnu/ld-2.19.so
1123
            fff3e000-fff61000 rw-p 23000 [stack]
1124
        """
1125
        return '\n'.join(map(str, self.mappings))
×
1126

1127
    def getenv(self, name):
1✔
1128
        """getenv(name) -> int
1129

1130
        Read an environment variable off the stack, and return its contents.
1131

1132
        Arguments:
1133
            name(str): Name of the environment variable to read.
1134

1135
        Returns:
1136
            :class:`str`: The contents of the environment variable.
1137

1138
        Example:
1139

1140
            >>> elf = ELF.from_assembly(shellcraft.trap())
1141
            >>> io = elf.process(env={'GREETING': 'Hello!'})
1142
            >>> io.wait(1)
1143
            >>> io.corefile.getenv('GREETING')
1144
            b'Hello!'
1145
        """
1146
        if not isinstance(name, str):
1!
1147
            name = name.decode('utf-8', 'surrogateescape')
×
1148
        if name not in self.env:
1!
1149
            log.error("Environment variable %r not set" % name)
×
1150

1151
        return self.string(self.env[name])
1✔
1152

1153
    @property
1✔
1154
    def registers(self):
1155
        """:class:`dict`: All available registers in the coredump.
1156

1157
        Example:
1158

1159
            >>> elf = ELF.from_assembly('mov eax, 0xdeadbeef;' + shellcraft.trap(), arch='i386')
1160
            >>> io = elf.process()
1161
            >>> io.wait(1)
1162
            >>> io.corefile.registers['eax'] == 0xdeadbeef
1163
            True
1164
        """
1165
        if not self.prstatus:
1!
1166
            return {}
×
1167

1168
        rv = {}
1✔
1169

1170
        for k in dir(self.prstatus.pr_reg):
1✔
1171
            if k.startswith('_'):
1✔
1172
                continue
1✔
1173

1174
            try:
1✔
1175
                rv[k] = int(getattr(self.prstatus.pr_reg, k))
1✔
1176
            except Exception:
1✔
1177
                pass
1✔
1178

1179
        return rv
1✔
1180

1181
    def debug(self):
1✔
1182
        """Open the corefile under a debugger."""
1183
        import pwnlib.gdb
×
1184
        pwnlib.gdb.attach(self, exe=self.exe.path)
×
1185

1186
    def __getattr__(self, attribute):
1✔
1187
        if attribute.startswith('_') or not self.prstatus:
1!
1188
            raise AttributeError(attribute)
×
1189

1190
        if hasattr(self.prstatus, attribute):
1!
1191
            return getattr(self.prstatus, attribute)
×
1192

1193
        return getattr(self.prstatus.pr_reg, attribute)
1✔
1194

1195
    # Override routines which don't make sense for Corefiles
1196
    def _populate_got(*a): pass
1✔
1197
    def _populate_plt(*a): pass
1✔
1198

1199
class Core(Corefile):
1✔
1200
    """Alias for :class:`.Corefile`"""
1201

1202
class Coredump(Corefile):
1✔
1203
    """Alias for :class:`.Corefile`"""
1204

1205
class CorefileFinder(object):
1✔
1206
    def __init__(self, proc):
1✔
1207
        if proc.poll() is None:
1!
1208
            log.error("Process %i has not exited" % (proc.pid))
×
1209

1210
        self.process = proc
1✔
1211
        self.pid = proc.pid
1✔
1212
        self.uid = proc.suid
1✔
1213
        self.gid = proc.sgid
1✔
1214
        self.exe = proc.executable
1✔
1215
        self.basename = os.path.basename(self.exe)
1✔
1216
        self.cwd = proc.cwd
1✔
1217

1218
        # XXX: Should probably break out all of this logic into
1219
        #      its own class, so that we can support "file ops"
1220
        #      locally, via SSH, and over ADB, in a transparent way.
1221
        if isinstance(proc, process):
1!
1222
            self.read = read
1✔
1223
            self.unlink = os.unlink
1✔
1224
        elif isinstance(proc, ssh_channel):
×
1225
            self.read = proc.parent.read
×
1226
            self.unlink = proc.parent.unlink
×
1227

1228
        self.kernel_core_pattern = self.read('/proc/sys/kernel/core_pattern').strip()
1✔
1229
        self.kernel_core_uses_pid = bool(int(self.read('/proc/sys/kernel/core_uses_pid')))
1✔
1230

1231
        log.debug("core_pattern: %r" % self.kernel_core_pattern)
1✔
1232
        log.debug("core_uses_pid: %r" % self.kernel_core_uses_pid)
1✔
1233

1234
        self.interpreter = self.binfmt_lookup()
1✔
1235

1236
        log.debug("interpreter: %r" % self.interpreter)
1✔
1237

1238
        # If we have already located the corefile, we will
1239
        # have renamed it to 'core.<pid>'
1240
        core_path = 'core.%i' % (proc.pid)
1✔
1241
        self.core_path = None
1✔
1242

1243
        if os.path.isfile(core_path):
1✔
1244
            log.debug("Found core immediately: %r" % core_path)
1✔
1245
            self.core_path = core_path
1✔
1246

1247
        # Try QEMU first, since it's unlikely to be a false-positive unless
1248
        # there is a PID *and* filename collision.
1249
        if not self.core_path:
1✔
1250
            log.debug("Looking for QEMU corefile")
1✔
1251
            self.core_path = self.qemu_corefile()
1✔
1252

1253
        # Check for native coredumps as a last resort
1254
        if not self.core_path:
1✔
1255
            log.debug("Looking for native corefile")
1✔
1256
            self.core_path = self.native_corefile()
1✔
1257

1258
        if not self.core_path:
1!
1259
            return
×
1260

1261
        core_pid = self.load_core_check_pid()
1✔
1262

1263
        # Move the corefile if we're configured that way
1264
        if context.rename_corefiles:
1!
1265
            new_path = 'core.%i' % core_pid
1✔
1266
            if core_pid > 0 and new_path != self.core_path:
1✔
1267
                write(new_path, self.read(self.core_path))
1✔
1268
                try:
1✔
1269
                    self.unlink(self.core_path)
1✔
1270
                except (IOError, OSError):
1✔
1271
                    log.warn("Could not delete %r" % self.core_path)
1✔
1272
                self.core_path = new_path
1✔
1273

1274
        # Check the PID
1275
        if core_pid != self.pid:
1!
1276
            log.warn("Corefile PID does not match! (got %i)" % core_pid)
×
1277

1278
        # Register the corefile for removal only if it's an exact match
1279
        elif context.delete_corefiles:
1!
1280
            atexit.register(lambda: os.unlink(self.core_path))
×
1281

1282

1283
    def load_core_check_pid(self):
1✔
1284
        """Test whether a Corefile matches our process
1285

1286
        Speculatively load a Corefile without informing the user, so that we
1287
        can check if it matches the process we're looking for.
1288

1289
        Arguments:
1290
            path(str): Path to the corefile on disk
1291

1292
        Returns:
1293
            `bool`: ``True`` if the Corefile matches, ``False`` otherwise.
1294
        """
1295

1296
        try:
1✔
1297
            with context.quiet:
1✔
1298
                with tempfile.NamedTemporaryFile() as tmp:
1✔
1299
                    tmp.write(self.read(self.core_path))
1✔
1300
                    tmp.flush()
1✔
1301
                    return Corefile(tmp.name).pid
1✔
1302
        except Exception:
×
1303
            pass
×
1304

1305
        return -1
×
1306

1307
    def apport_corefile(self):
1✔
1308
        """Find the apport crash for the process, and extract the core file.
1309

1310
        Arguments:
1311
            process(process): Process object we're looking for.
1312

1313
        Returns:
1314
            `str`: Raw core file contents
1315
        """
1316
        crash_data = self.apport_read_crash_data()
1✔
1317

1318
        log.debug("Apport Crash Data:\n%s" % crash_data)
1✔
1319

1320
        if crash_data:
1!
1321
            return self.apport_crash_extract_corefile(crash_data)
×
1322

1323
    def apport_crash_extract_corefile(self, crashfile_data):
1✔
1324
        """Extract a corefile from an apport crash file contents.
1325

1326
        Arguments:
1327
            crashfile_data(str): Crash file contents
1328

1329
        Returns:
1330
            `str`: Raw binary data for the core file, or ``None``.
1331
        """
1332
        file = StringIO(crashfile_data)
×
1333

1334
        # Find the pid of the crashfile
1335
        for line in file:
×
1336
            if line.startswith(' Pid:'):
×
1337
                pid = int(line.split()[-1])
×
1338

1339
                if pid == self.pid:
×
1340
                    break
×
1341
        else:
1342
            # Could not find a " Pid:" line
1343
            return
×
1344

1345
        # Find the CoreDump section
1346
        for line in file:
×
1347
            if line.startswith('CoreDump: base64'):
×
1348
                break
×
1349
        else:
1350
            # Could not find the coredump data
1351
            return
×
1352

1353
        # Get all of the base64'd lines
1354
        chunks = []
×
1355
        for line in file:
×
1356
            if not line.startswith(' '):
×
1357
                break
×
1358
            chunks.append(b64d(line))
×
1359

1360
        # Smush everything together, then extract it
1361
        compressed_data = b''.join(chunks)
×
1362
        compressed_file = BytesIO(compressed_data)
×
1363
        gzip_file = gzip.GzipFile(fileobj=compressed_file)
×
1364
        core_data = gzip_file.read()
×
1365

1366
        return core_data
×
1367

1368
    def apport_read_crash_data(self):
1✔
1369
        """Find the apport crash for the process
1370

1371
        Returns:
1372
            `str`: Raw contents of the crash file or ``None``.
1373
        """
1374
        uid = self.uid
1✔
1375
        crash_name = self.exe.replace('/', '_')
1✔
1376

1377
        crash_path = '/var/crash/%s.%i.crash' % (crash_name, uid)
1✔
1378

1379
        try:
1✔
1380
            log.debug("Looking for Apport crash at %r" % crash_path)
1✔
1381
            data = self.read(crash_path)
1✔
1382
        except Exception:
1✔
1383
            return None
1✔
1384

1385
        # Remove the crash file, so that future crashes will be captured
1386
        try:
×
1387
            self.unlink(crash_path)
×
1388
        except Exception:
×
1389
            pass
×
1390

1391
        return data
×
1392

1393
    def systemd_coredump_corefile(self):
1✔
1394
        """Find the systemd-coredump crash for the process and dump it to a file.
1395

1396
        Arguments:
1397
            process(process): Process object we're looking for.
1398

1399
        Returns:
1400
            `str`: Filename of core file, if coredump was found.
1401
        """
1402
        filename = "core.%s.%i.coredumpctl" % (self.basename, self.pid)
×
1403
        try:
×
1404
            subprocess.check_call(
×
1405
                [
1406
                    "coredumpctl",
1407
                    "dump",
1408
                    "--output=%s" % filename,
1409
                    # Filter coredump by pid
1410
                    str(self.pid),
1411
                ],
1412
                stdout=open(os.devnull, 'w'),
1413
                stderr=subprocess.STDOUT,
1414
                shell=False,
1415
            )
1416
            return filename
×
1417
        except subprocess.CalledProcessError as e:
×
1418
            log.debug("coredumpctl failed with status: %d" % e.returncode)
×
1419

1420
    def native_corefile(self):
1✔
1421
        """Find the corefile for a native crash.
1422

1423
        Arguments:
1424
            process(process): Process whose crash we should find.
1425

1426
        Returns:
1427
            `str`: Filename of core file.
1428
        """
1429
        if self.kernel_core_pattern.startswith(b'|'):
1!
1430
            log.debug("Checking for corefile (piped)")
1✔
1431
            return self.native_corefile_pipe()
1✔
1432

1433
        log.debug("Checking for corefile (pattern)")
×
1434
        return self.native_corefile_pattern()
×
1435

1436
    def native_corefile_pipe(self):
1✔
1437
        """Find the corefile for a piped core_pattern
1438

1439
        Supports apport and systemd-coredump.
1440

1441
        Arguments:
1442
            process(process): Process whose crash we should find.
1443

1444
        Returns:
1445
            `str`: Filename of core file.
1446
        """
1447
        if b'/apport' in self.kernel_core_pattern:
1!
1448
            log.debug("Found apport in core_pattern")
1✔
1449
            apport_core = self.apport_corefile()
1✔
1450

1451
            if apport_core:
1!
1452
                # Write the corefile to the local directory
1453
                filename = 'core.%s.%i.apport' % (self.basename, self.pid)
×
1454
                with open(filename, 'wb+') as f:
×
1455
                    f.write(apport_core)
×
1456
                return filename
×
1457

1458
            filename = self.apport_coredump()
1✔
1459
            if filename:
1!
1460
                return filename
1✔
1461

1462
            # Pretend core_pattern was just 'core', and see if we come up with anything
1463
            self.kernel_core_pattern = 'core'
×
1464
            return self.native_corefile_pattern()
×
1465
        elif b'systemd-coredump' in self.kernel_core_pattern:
×
1466
            log.debug("Found systemd-coredump in core_pattern")
×
1467
            return self.systemd_coredump_corefile()
×
1468
        else:
1469
            log.warn_once("Unsupported core_pattern: %r", self.kernel_core_pattern)
×
1470
            return None
×
1471

1472
    def native_corefile_pattern(self):
1✔
1473
        """
1474
        %%  a single % character
1475
        %c  core file size soft resource limit of crashing process (since Linux 2.6.24)
1476
        %d  dump mode—same as value returned by prctl(2) PR_GET_DUMPABLE (since Linux 3.7)
1477
        %e  executable filename (without path prefix)
1478
        %E  pathname of executable, with slashes ('/') replaced by exclamation marks ('!') (since Linux 3.0).
1479
        %g  (numeric) real GID of dumped process
1480
        %h  hostname (same as nodename returned by uname(2))
1481
        %i  TID of thread that triggered core dump, as seen in the PID namespace in which the thread resides (since Linux 3.18)
1482
        %I  TID of thread that triggered core dump, as seen in the initial PID namespace (since Linux 3.18)
1483
        %p  PID of dumped process, as seen in the PID namespace in which the process resides
1484
        %P  PID of dumped process, as seen in the initial PID namespace (since Linux 3.12)
1485
        %s  number of signal causing dump
1486
        %t  time of dump, expressed as seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC)
1487
        %u  (numeric) real UID of dumped process
1488
        """
1489
        replace = {
×
1490
            '%%': '%',
1491
            '%e': os.path.basename(self.interpreter) or self.basename,
1492
            '%E': self.exe.replace('/', '!'),
1493
            '%g': str(self.gid),
1494
            '%h': socket.gethostname(),
1495
            '%i': str(self.pid),
1496
            '%I': str(self.pid),
1497
            '%p': str(self.pid),
1498
            '%P': str(self.pid),
1499
            '%s': str(-self.process.poll()),
1500
            '%u': str(self.uid)
1501
        }
1502
        replace = dict((re.escape(k), v) for k, v in replace.items())
×
1503
        pattern = re.compile("|".join(replace.keys()))
×
1504
        if not hasattr(self.kernel_core_pattern, 'encode'):
×
1505
            self.kernel_core_pattern = self.kernel_core_pattern.decode('utf-8')
×
1506
        core_pattern = self.kernel_core_pattern
×
1507
        corefile_path = pattern.sub(lambda m: replace[re.escape(m.group(0))], core_pattern)
×
1508

1509
        if self.kernel_core_uses_pid:
×
1510
            corefile_path += '.%i' % self.pid
×
1511

1512
        if os.pathsep not in corefile_path:
×
1513
            corefile_path = os.path.join(self.cwd, corefile_path)
×
1514

1515
        log.debug("Trying corefile_path: %r" % corefile_path)
×
1516

1517
        try:
×
1518
            self.read(corefile_path)
×
1519
            return corefile_path
×
1520
        except Exception as e:
×
1521
            log.debug("No dice: %s" % e)
×
1522

1523
    def qemu_corefile(self):
1✔
1524
        """qemu_corefile() -> str
1525

1526
        Retrieves the path to a QEMU core dump.
1527
        """
1528

1529
        # QEMU doesn't follow anybody else's rules
1530
        # https://github.com/qemu/qemu/blob/stable-2.6/linux-user/elfload.c#L2710-L2744
1531
        #
1532
        #     qemu_<basename-of-target-binary>_<date>-<time>_<pid>.core
1533
        #
1534
        # Note that we don't give any fucks about the date and time, since the PID
1535
        # should be unique enough that we can just glob.
1536
        corefile_name = 'qemu_{basename}_*_{pid}.core'
1✔
1537

1538
        # Format the name
1539
        corefile_name = corefile_name.format(basename=self.basename,
1✔
1540
                                             pid=self.pid)
1541

1542
        # Get the full path
1543
        corefile_path = os.path.join(self.cwd, corefile_name)
1✔
1544

1545
        log.debug("Trying corefile_path: %r" % corefile_path)
1✔
1546

1547
        # Glob all of them, return the *most recent* based on numeric sort order.
1548
        for corefile in sorted(glob.glob(corefile_path), reverse=True):
1✔
1549
            return corefile
1✔
1550

1551
    def apport_coredump(self):
1✔
1552
        """Find new-style apport coredump of executables not belonging
1553
        to a system package
1554
        """
1555
        # Now Ubuntu, which is the most silly distro of all, doesn't follow
1556
        # anybody else's rules either...
1557
        # ...and it uses apport FROM SOME OTHER REPO THAN THE DOCS SAY
1558
        # Hey, thanks for making our lives easier, Canonical :----)
1559
        # Seriously, why is Ubuntu even considered to be the default distro
1560
        # on GH Actions?
1561
        #
1562
        #     core.<_path_to_target_binary>.<uid>.<boot_id>.<pid>.<timestamp>
1563
        #
1564
        # Note that we don't give any fucks about the timestamp, since the PID
1565
        # should be unique enough that we can just glob.
1566

1567
        boot_id = read('/proc/sys/kernel/random/boot_id').strip().decode()
1✔
1568
        path = self.exe.replace('/', '_')
1✔
1569

1570
        # Format the name
1571
        corefile_name = 'core.{path}.{uid}.{boot_id}.{pid}.*'.format(
1✔
1572
            path=path,
1573
            uid=self.uid,
1574
            boot_id=boot_id,
1575
            pid=self.pid,
1576
        )
1577

1578
        # Get the full path
1579
        corefile_path = os.path.join('/var/lib/apport/coredump', corefile_name)
1✔
1580

1581
        log.debug("Trying corefile_path: %r" % corefile_path)
1✔
1582

1583
        # Glob all of them, return the *most recent* based on numeric sort order.
1584
        for corefile in sorted(glob.glob(corefile_path), reverse=True):
1!
1585
            return corefile
1✔
1586

1587
    def binfmt_lookup(self):
1✔
1588
        """Parses /proc/sys/fs/binfmt_misc to find the interpreter for a file"""
1589

1590
        binfmt_misc = '/proc/sys/fs/binfmt_misc'
1✔
1591

1592
        if not isinstance(self.process, process):
1!
1593
            log.debug("Not a process")
×
1594
            return ''
×
1595

1596
        if self.process._qemu:
1!
1597
            return self.process._qemu
×
1598

1599
        if not os.path.isdir(binfmt_misc):
1!
1600
            log.debug("No binfmt_misc dir")
×
1601
            return ''
×
1602

1603
        exe_data = bytearray(self.read(self.exe))
1✔
1604

1605
        for entry in os.listdir(binfmt_misc):
1✔
1606
            keys = {}
1✔
1607

1608
            path = os.path.join(binfmt_misc, entry)
1✔
1609

1610
            try:
1✔
1611
                data = self.read(path).decode()
1✔
1612
            except Exception:
1✔
1613
                continue
1✔
1614

1615
            for line in data.splitlines():
1✔
1616
                try:
1✔
1617
                    k,v = line.split(None)
1✔
1618
                except ValueError:
1✔
1619
                    continue
1✔
1620

1621
                keys[k] = v
1✔
1622

1623
            if 'magic' not in keys:
1✔
1624
                continue
1✔
1625

1626
            magic = bytearray(unhex(keys['magic']))
1✔
1627
            mask  = bytearray(b'\xff' * len(magic))
1✔
1628

1629
            if 'mask' in keys:
1✔
1630
                mask = bytearray(unhex(keys['mask']))
1✔
1631

1632
            for i, mag in enumerate(magic):
1✔
1633
                if exe_data[i] & mask[i] != mag:
1✔
1634
                    break
1✔
1635
            else:
1636
                return keys['interpreter']
1✔
1637

1638
        return ''
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc