• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

christophevg / yoker / 25117204947

29 Apr 2026 03:11PM UTC coverage: 79.818% (+0.8%) from 78.972%
25117204947

push

github

christophevg
feat(tools): implement ExistenceTool for file/folder existence checks

Adds ExistenceTool for checking if files or folders exist with security hardening:
- Structured JSON output with exists, type, and path fields
- Symlink rejection to prevent path traversal attacks
- PathGuardrail integration for path containment validation
- Generic error messages to prevent information disclosure
- Expanded default blocked patterns (3 to 13)
- Updated ToolResult type to support structured results
- Added 28 comprehensive unit tests

🤖 Implemented together with a coding agent.

716 of 876 branches covered (81.74%)

Branch coverage included in aggregate %.

60 of 64 new or added lines in 6 files covered. (93.75%)

267 existing lines in 12 files now uncovered.

4370 of 5496 relevant lines covered (79.51%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.11
/src/yoker/context/basic.py
1
"""Basic persistence context manager implementation.
2

3
Provides JSONL-based context persistence with secure file handling.
4

5
Note:
6
  File locking uses fcntl (Unix-only). On Windows, file writes
7
  are still atomic but lack inter-process locking protection.
8
  For production use on Windows, consider adding a cross-platform
9
  file locking library.
10
"""
11

12
import json
1✔
13
import os
1✔
14
from datetime import datetime
1✔
15
from pathlib import Path
1✔
16
from typing import Any
1✔
17

18
from yoker.context.interface import ContextStatistics
1✔
19
from yoker.context.validator import is_safe_path, validate_session_id, validate_storage_path
1✔
20
from yoker.exceptions import ContextCorruptionError, SessionNotFoundError
1✔
21
from yoker.logging import get_logger
1✔
22

23
log = get_logger(__name__)
1✔
24

25
# File permissions
26
DIR_MODE = 0o700  # Owner-only for directories
1✔
27
FILE_MODE = 0o600  # Owner-only for files
1✔
28

29

30
class BasicPersistenceContextManager:
1✔
31
  """Context manager with JSONL persistence.
32

33
  Stores conversation history in JSONL (JSON Lines) format with:
34
  - Atomic writes for crash safety
35
  - Secure file permissions
36
  - Session lifecycle tracking
37

38
  Record types:
39
  - session_start: Session metadata
40
  - message: User/assistant/system message
41
  - tool_result: Tool execution result
42
  - turn: Turn boundary marker
43
  - session_end: Session termination marker
44
  """
45

46
  def __init__(
1✔
47
    self,
48
    storage_path: Path | str,
49
    session_id: str = "auto",
50
  ) -> None:
51
    """Initialize context manager.
52

53
    Args:
54
      storage_path: Directory for storing context files.
55
      session_id: Session ID or "auto" to generate.
56

57
    Raises:
58
      ValidationError: If storage_path or session_id is invalid.
59
    """
60
    # Validate and resolve storage path
61
    self._storage_path = validate_storage_path(Path(storage_path), "context.storage_path")
1✔
62

63
    # Validate session ID
64
    self._session_id = validate_session_id(session_id, "context.session_id")
1✔
65

66
    # In-memory context: ordered sequence of all items
67
    # Each item is {"type": "message"|"tool_result", "data": {...}}
68
    self._sequence: list[dict[str, Any]] = []
1✔
69

70
    # Statistics
71
    self._start_time = datetime.now()
1✔
72
    self._last_turn_time: datetime | None = None
1✔
73
    self._tool_call_count = 0
1✔
74

75
    # File path
76
    self._file_path = self._storage_path / f"{self._session_id}.jsonl"
1✔
77

78
    log.debug(
1✔
79
      "context_initialized",
80
      session_id=self._session_id,
81
      storage_path=str(self._storage_path),
82
    )
83

84
  def get_session_id(self) -> str:
1✔
85
    """Get the unique session identifier."""
86
    return self._session_id
1✔
87

88
  def add_message(
1✔
89
    self,
90
    role: str,
91
    content: str,
92
    metadata: dict[str, Any] | None = None,
93
    thinking: str | None = None,
94
  ) -> None:
95
    """Add a message to the context.
96

97
    Args:
98
      role: Message role ("user", "assistant", "system").
99
      content: Message content.
100
      metadata: Optional metadata (e.g., images, files).
101
      thinking: Optional thinking/reasoning content (for assistant messages).
102
    """
103
    message: dict[str, Any] = {
1✔
104
      "role": role,
105
      "content": content,
106
    }
107
    if metadata:
1✔
UNCOV
108
      message["metadata"] = metadata
×
109
    if thinking:
1✔
UNCOV
110
      message["thinking"] = thinking
×
111

112
    self._sequence.append({"type": "message", "data": message})
1✔
113
    self._append_record("message", message)
1✔
114

115
  def add_tool_result(
1✔
116
    self,
117
    tool_name: str,
118
    tool_id: str,
119
    result: str,
120
    success: bool = True,
121
  ) -> None:
122
    """Add a tool execution result to the context."""
123
    tool_result: dict[str, Any] = {
1✔
124
      "tool_name": tool_name,
125
      "tool_id": tool_id,
126
      "result": result,
127
      "success": success,
128
    }
129

130
    self._sequence.append({"type": "tool_result", "data": tool_result})
1✔
131
    self._tool_call_count += 1
1✔
132
    self._append_record("tool_result", tool_result)
1✔
133

134
  def add_tool_calls(
1✔
135
    self,
136
    tool_calls: list[dict[str, Any]],
137
    thinking: str | None = None,
138
  ) -> None:
139
    """Add an assistant message with tool calls to the context.
140

141
    This must be called BEFORE add_tool_result() for each tool call.
142
    The assistant message with tool_calls is required by the LLM API
143
    to understand what tools were called before receiving results.
144

145
    Args:
146
      tool_calls: List of tool call dictionaries with 'name' and 'arguments'.
147
      thinking: Optional thinking/reasoning content from the assistant.
148
    """
149
    # Store as a special assistant message with tool_calls
UNCOV
150
    assistant_msg: dict[str, Any] = {
×
151
      "role": "assistant",
152
      "tool_calls": tool_calls,
153
      "content": "",  # Empty content when only tool calls
154
    }
UNCOV
155
    if thinking:
×
UNCOV
156
      assistant_msg["thinking"] = thinking
×
157

UNCOV
158
    self._sequence.append({"type": "message", "data": assistant_msg})
×
UNCOV
159
    self._append_record("tool_call_message", {"tool_calls": tool_calls, "thinking": thinking})
×
160

161
  def get_context(self) -> list[dict[str, Any]]:
1✔
162
    """Get the full context for backend submission.
163

164
    Returns messages and tool results in the correct order for the LLM API:
165
    - User messages
166
    - Assistant messages (with tool_calls if present)
167
    - Tool result messages (after the assistant message)
168
    """
169
    context: list[dict[str, Any]] = []
1✔
170

171
    for item in self._sequence:
1✔
172
      if item["type"] == "message":
1✔
173
        msg = item["data"]
1✔
174
        # Build message dict, including tool_calls and thinking if present
175
        message: dict[str, Any] = {
1✔
176
          "role": msg["role"],
177
          "content": msg.get("content", ""),
178
        }
179
        # Include tool_calls for assistant messages
180
        if msg["role"] == "assistant" and "tool_calls" in msg:
1✔
UNCOV
181
          message["tool_calls"] = msg["tool_calls"]
×
182
        # Include thinking for assistant messages
183
        if msg["role"] == "assistant" and "thinking" in msg:
1✔
UNCOV
184
          message["thinking"] = msg["thinking"]
×
185
        context.append(message)
1✔
186
      elif item["type"] == "tool_result":
1✔
187
        tr = item["data"]
1✔
188
        context.append(
1✔
189
          {
190
            "role": "tool",
191
            "name": tr["tool_name"],
192
            "content": tr["result"],
193
          }
194
        )
195

196
    return context
1✔
197

198
  def get_messages(self) -> list[dict[str, Any]]:
1✔
199
    """Get all recorded messages (excludes tool results)."""
200
    messages: list[dict[str, Any]] = []
1✔
201
    for item in self._sequence:
1✔
202
      if item["type"] == "message":
1✔
203
        messages.append(item["data"])
1✔
204
    return messages
1✔
205

206
  def start_turn(self, user_message: str) -> None:
1✔
207
    """Start a new conversation turn."""
208
    turn_record: dict[str, Any] = {
1✔
209
      "user_message": user_message,
210
      "start_time": datetime.now().isoformat(),
211
    }
212
    self._sequence.append({"type": "turn", "data": turn_record})
1✔
213
    self._append_record("turn_start", turn_record)
1✔
214

215
    # Add user message
216
    self.add_message("user", user_message)
1✔
217

218
  def end_turn(self, assistant_message: str, thinking: str | None = None) -> None:
1✔
219
    """End the current conversation turn.
220

221
    Args:
222
      assistant_message: The assistant's response content.
223
      thinking: Optional thinking/reasoning content from the assistant.
224
    """
225
    self._last_turn_time = datetime.now()
1✔
226

227
    # Add assistant message with optional thinking
228
    self.add_message("assistant", assistant_message, thinking=thinking)
1✔
229

230
    # Append turn end record
231
    self._append_record(
1✔
232
      "turn_end",
233
      {
234
        "assistant_message": assistant_message,
235
      },
236
    )
237

238
  def save(self) -> None:
1✔
239
    """Persist context to storage.
240

241
    Creates storage directory if needed and writes all records.
242
    """
243
    self._ensure_storage_directory()
1✔
244

245
    # Write session_start if file doesn't exist
246
    if not self._file_path.exists():
1✔
247
      self._write_session_start()
1✔
248

249
    # Flush any buffered writes
250
    self._flush_pending_records()
1✔
251

252
  def load(self) -> bool:
1✔
253
    """Load context from storage.
254

255
    Returns:
256
      True if context was loaded, False if no stored context exists.
257

258
    Raises:
259
      ContextCorruptionError: If stored context is corrupted.
260
    """
261
    if not self._file_path.exists():
1✔
262
      return False
1✔
263

264
    # Check if path is safe
265
    if not is_safe_path(self._storage_path, self._file_path):
1✔
UNCOV
266
      raise SessionNotFoundError(self._session_id)
×
267

268
    try:
1✔
269
      self._load_from_file()
1✔
270
      return True
1✔
271
    except json.JSONDecodeError as e:
1✔
UNCOV
272
      raise ContextCorruptionError(
×
273
        str(self._file_path),
274
        e.lineno or 0,
275
        f"Invalid JSON: {e.msg}",
276
      ) from None
277

278
  def clear(self) -> None:
1✔
279
    """Clear in-memory context (does not delete from storage)."""
280
    self._sequence.clear()
1✔
281
    self._tool_call_count = 0
1✔
282
    self._last_turn_time = None
1✔
283

284
  def delete(self) -> None:
1✔
285
    """Delete stored context from disk.
286

287
    Raises:
288
      SessionNotFoundError: If session doesn't exist.
289
    """
290
    if not self._file_path.exists():
1✔
291
      raise SessionNotFoundError(self._session_id)
1✔
292

293
    try:
1✔
294
      self._file_path.unlink()
1✔
295
      log.debug("context_deleted", path=str(self._file_path))
1✔
UNCOV
296
    except OSError as e:
×
UNCOV
297
      log.error("context_delete_failed", path=str(self._file_path), error=str(e))
×
UNCOV
298
      raise
×
299

300
  def get_statistics(self) -> ContextStatistics:
1✔
301
    """Get statistics about context usage."""
302
    # Count items in sequence
303
    message_count = sum(1 for item in self._sequence if item["type"] == "message")
1✔
304
    turn_count = sum(1 for item in self._sequence if item["type"] == "turn")
1✔
305

306
    return ContextStatistics(
1✔
307
      message_count=message_count,
308
      turn_count=turn_count,
309
      tool_call_count=self._tool_call_count,
310
      start_time=self._start_time,
311
      last_turn_time=self._last_turn_time,
312
    )
313

314
  def close(self) -> None:
1✔
315
    """Release resources and flush any pending writes."""
316
    self._append_record(
1✔
317
      "session_end",
318
      {
319
        "end_time": datetime.now().isoformat(),
320
      },
321
    )
322

323
  # Private methods
324

325
  def _ensure_storage_directory(self) -> None:
1✔
326
    """Create storage directory with secure permissions if it doesn't exist."""
327
    if not self._storage_path.exists():
1✔
328
      self._storage_path.mkdir(parents=True, mode=DIR_MODE)
1✔
329
      log.debug("storage_created", path=str(self._storage_path))
1✔
330
    else:
331
      # Ensure correct permissions
332
      try:
1✔
333
        self._storage_path.chmod(DIR_MODE)
1✔
UNCOV
334
      except OSError:
×
UNCOV
335
        pass  # Ignore permission errors on existing directories
×
336

337
  def _append_record(self, record_type: str, data: dict[str, Any]) -> None:
1✔
338
    """Append a record to the JSONL file.
339

340
    Uses atomic write for crash safety.
341

342
    Args:
343
      record_type: Type of record (session_start, message, etc.).
344
      data: Record data dictionary.
345
    """
346
    self._ensure_storage_directory()
1✔
347

348
    record = {
1✔
349
      "type": record_type,
350
      "timestamp": datetime.now().isoformat(),
351
      "data": data,
352
    }
353

354
    self._atomic_write_jsonl(record)
1✔
355

356
  def _atomic_write_jsonl(self, record: dict[str, Any]) -> None:
1✔
357
    """Write a record atomically to the JSONL file.
358

359
    Uses file locking for atomic appends with secure permissions.
360

361
    Args:
362
      record: The record dictionary to write.
363
    """
364
    import fcntl
1✔
365

366
    # Ensure storage directory exists
367
    self._ensure_storage_directory()
1✔
368

369
    # Create file if needed with secure permissions
370
    if not self._file_path.exists():
1✔
371
      self._file_path.touch(mode=FILE_MODE)
1✔
372
    else:
373
      # Ensure permissions on existing file
374
      try:
1✔
375
        self._file_path.chmod(FILE_MODE)
1✔
UNCOV
376
      except OSError:
×
UNCOV
377
        pass
×
378

379
    # Write with file locking for atomic append
380
    with open(self._file_path, "a") as f:
1✔
381
      # Acquire exclusive lock
382
      fcntl.flock(f.fileno(), fcntl.LOCK_EX)
1✔
383
      try:
1✔
384
        json.dump(record, f)
1✔
385
        f.write("\n")
1✔
386
        f.flush()
1✔
387
        os.fsync(f.fileno())
1✔
388
      finally:
389
        fcntl.flock(f.fileno(), fcntl.LOCK_UN)
1✔
390

391
  def _write_session_start(self) -> None:
1✔
392
    """Write session_start record."""
393
    record = {
1✔
394
      "type": "session_start",
395
      "timestamp": self._start_time.isoformat(),
396
      "data": {
397
        "session_id": self._session_id,
398
        "start_time": self._start_time.isoformat(),
399
      },
400
    }
401
    self._atomic_write_jsonl(record)
1✔
402

403
  def _flush_pending_records(self) -> None:
1✔
404
    """Flush any pending writes to disk.
405

406
    For JSONL files, this is a no-op since records are written immediately.
407
    """
408
    # Sync file to disk
409
    if self._file_path.exists():
1✔
410
      with open(self._file_path, "a") as f:
1✔
411
        os.fsync(f.fileno())
1✔
412

413
  def _load_from_file(self) -> None:
1✔
414
    """Load context from JSONL file.
415

416
    Parses all records and reconstructs in-memory state.
417

418
    Raises:
419
      ContextCorruptionError: If file is corrupted.
420
    """
421
    self.clear()
1✔
422

423
    line_num = 0
1✔
424
    try:
1✔
425
      with open(self._file_path) as f:
1✔
426
        for line_num, line in enumerate(f, start=1):
1✔
427
          line = line.strip()
1✔
428
          if not line:
1✔
UNCOV
429
            continue
×
430

431
          record = json.loads(line)
1✔
432
          self._process_record(record, line_num)
1✔
433

434
    except json.JSONDecodeError as e:
1✔
435
      raise ContextCorruptionError(
1✔
436
        str(self._file_path),
437
        line_num,
438
        f"Invalid JSON: {e.msg}",
439
      ) from None
440

441
  def _process_record(self, record: dict[str, Any], line_num: int) -> None:
1✔
442
    """Process a single JSONL record.
443

444
    Args:
445
      record: The parsed record dictionary.
446
      line_num: Line number for error messages.
447

448
    Raises:
449
      ContextCorruptionError: If record is malformed.
450
    """
451
    if "type" not in record:
1✔
UNCOV
452
      raise ContextCorruptionError(
×
453
        str(self._file_path),
454
        line_num,
455
        "Missing record type",
456
      )
457

458
    record_type = record.get("type")
1✔
459
    data = record.get("data", {})
1✔
460

461
    if record_type == "session_start":
1✔
462
      # Already handled during initialization
463
      pass
1✔
464

465
    elif record_type == "message":
1✔
466
      if "role" not in data or "content" not in data:
1✔
UNCOV
467
        raise ContextCorruptionError(
×
468
          str(self._file_path),
469
          line_num,
470
          "Missing message fields",
471
        )
472
      self._sequence.append({"type": "message", "data": data})
1✔
473

474
    elif record_type == "tool_result":
1✔
UNCOV
475
      if "tool_id" not in data:
×
UNCOV
476
        raise ContextCorruptionError(
×
477
          str(self._file_path),
478
          line_num,
479
          "Missing tool_id",
480
        )
UNCOV
481
      self._sequence.append({"type": "tool_result", "data": data})
×
UNCOV
482
      self._tool_call_count += 1
×
483

484
    elif record_type == "tool_call_message":
1✔
485
      # Record containing assistant message with tool_calls
UNCOV
486
      if "tool_calls" not in data:
×
UNCOV
487
        raise ContextCorruptionError(
×
488
          str(self._file_path),
489
          line_num,
490
          "Missing tool_calls",
491
        )
492
      # Convert to message format
UNCOV
493
      self._sequence.append(
×
494
        {
495
          "type": "message",
496
          "data": {
497
            "role": "assistant",
498
            "tool_calls": data["tool_calls"],
499
            "content": "",
500
          },
501
        }
502
      )
503

504
    elif record_type == "turn_start":
1✔
UNCOV
505
      self._sequence.append({"type": "turn", "data": data})
×
506

507
    elif record_type == "turn_end":
1✔
508
      # Turn end - nothing special to do, statistics will be computed
UNCOV
509
      pass
×
510

511
    elif record_type == "session_end":
1✔
512
      # Session ended, nothing special to do
513
      pass
1✔
514

515
    else:
UNCOV
516
      log.warning(
×
517
        "unknown_record_type",
518
        record_type=record_type,
519
        line=line_num,
520
      )
521

522

523
__all__ = [
1✔
524
  "BasicPersistenceContextManager",
525
]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc